From 05c57de7c77a8b77e900edaacdd39f491334552a Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Fri, 10 Jan 2025 17:10:34 +0800
Subject: [PATCH 01/72] =?UTF-8?q?=E4=BB=A3=E7=A0=81=E6=A2=B3=E7=90=86?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../advisor/analyzer/analyzer_controller.py   |  33 ++--
 profiler/advisor/analyzer/base_analyzer.py    |   8 +-
 .../ai_core_performance/__init__.py           |   0
 .../ai_core_performance_analyzer.py           |  57 +++++++
 .../ai_core_performance_checker.py            | 141 ++++++++++++++++++
 .../computation/profiling_analyzer.py         |   5 +
 profiler/advisor/common/analyzer_scopes.py    |   1 +
 profiler/advisor/interface/interface.py       |   5 +-
 profiler/cli/analyze_cli.py                   |   6 +
 profiler/cli/entrance.py                      |   7 +
 10 files changed, 240 insertions(+), 23 deletions(-)
 create mode 100644 profiler/advisor/analyzer/computation/ai_core_performance/__init__.py
 create mode 100644 profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py
 create mode 100644 profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py

diff --git a/profiler/advisor/analyzer/analyzer_controller.py b/profiler/advisor/analyzer/analyzer_controller.py
index 1a5a28b63..e8a62c69d 100644
--- a/profiler/advisor/analyzer/analyzer_controller.py
+++ b/profiler/advisor/analyzer/analyzer_controller.py
@@ -186,7 +186,6 @@ class AnalyzerController:
 
         return True
 
-
     @staticmethod
     def _get_step_rank_for_cluster_statistic_diff(target_cluster_statistic_data, benchmark_cluster_statistic_data,
                                                   headers, dimension, get_max=False):
@@ -256,10 +255,10 @@ class AnalyzerController:
         return dimensions, AsyncParams.user_total_params
 
     def do_analysis(self, dimensions, **kwargs):
-        pid = os.getpid()
+        pid = os.getpid()  # 获取当前进程的pid
         resp = {"id": pid}
-        self.args_manager = AdditionalArgsManager()
-        self.args_manager.init(kwargs)
+        self.args_manager = AdditionalArgsManager()  # 初始化参数管理器
+        self.args_manager.init(kwargs)  # 初始化参数管理器
         output_path = kwargs.get("output_path")
 
         AnalyzerController._set_analysis_process_priority(pid)
@@ -278,9 +277,9 @@ class AnalyzerController:
                     PathManager.make_dir_safety(output_path)
 
                 Config().set_config("_work_path", output_path)
-            Config().set_log_path(f"mstt_advisor_{Timer().strftime}.xlsx")
+            Config().set_log_path(f"mstt_advisor_{Timer().strftime}.xlsx")  # 设置日志路径
 
-            self._do_analysis(dimensions, pid=pid, async_resp=resp, **kwargs)
+            self._do_analysis(dimensions, pid=pid, async_resp=resp, **kwargs)  # 执行分析
         except Exception as e:
             self._update_analysis_process_resp(pid, resp, status_code=AsyncAnalysisStatus.INNER_ERROR_STATUS_CODE,
                                                status=AsyncAnalysisStatus.FAILED, error_msg=str(e))
@@ -612,8 +611,8 @@ class AnalyzerController:
         return job_list
 
     def _do_analysis(self, dimensions, pid=0, async_resp=None, **kwargs):
-        self.dimensions = dimensions
-        self.kwargs = kwargs
+        self.dimensions = dimensions  # 设置分析维度
+        self.kwargs = kwargs  # 设置分析参数
         result_list = []
         profiling_path = PathManager.get_realpath(self.kwargs.get("profiling_path"))
         benchmark_profiling_path = self.kwargs.get("benchmark_profiling_path")
@@ -622,7 +621,7 @@ class AnalyzerController:
             benchmark_profiling_path = PathManager.get_realpath(benchmark_profiling_path)
             PathManager.check_path_owner_consistent([benchmark_profiling_path])
 
-        if not self._check_profiling_path_valid(profiling_path):
+        if not self._check_profiling_path_valid(profiling_path):  # 检查profiling路径是否有效
             error_msg = f"Got invalid argument '-d/--profiling_path' {profiling_path}, skip analysis"
             self._update_analysis_process_resp(pid, async_resp, error_msg=error_msg,
                                                status_code=AsyncAnalysisStatus.BAD_REQUEST_STATUS_CODE,
@@ -630,8 +629,8 @@ class AnalyzerController:
             logger.error(error_msg)
             return
 
-
-        if benchmark_profiling_path and not self._check_profiling_path_valid(benchmark_profiling_path):
+        if benchmark_profiling_path and not self._check_profiling_path_valid(
+                benchmark_profiling_path):  # 检查benchmark_profiling路径是否有效
             error_msg = (f"Got invalid argument '-bp/--benchmark_profiling_path' {benchmark_profiling_path}, "
                          f"skip analysis")
             self._update_analysis_process_resp(pid, async_resp, error_msg=error_msg,
@@ -640,7 +639,7 @@ class AnalyzerController:
             logger.error(error_msg)
             return
 
-        self._is_cluster = self._is_cluster_profiling(profiling_path)
+        self._is_cluster = self._is_cluster_profiling(profiling_path)  # 判断是否是集群profiling
         if benchmark_profiling_path:
             # 构建benchmark profiling的map，用于根据rank获取profiling路径，否则无法进行比对
             is_benchmark_cluster = self._is_cluster_profiling(benchmark_profiling_path)
@@ -655,16 +654,16 @@ class AnalyzerController:
                 return
 
         if not self._is_cluster:
-            job_list = self.single_rank_analysis(profiling_path, benchmark_profiling_path)
+            job_list = self.single_rank_analysis(profiling_path, benchmark_profiling_path)  # 单卡分析
         else:
             self.slow_rank_analyzer = SlowRankAnalyzer(profiling_path, output_path=self.kwargs.get("output_path"))
             self.slow_link_analyzer = SlowLinkAnalyzer(profiling_path, output_path=self.kwargs.get("output_path"))
-            job_list = self.do_cluster_analysis(profiling_path, benchmark_profiling_path)
+            job_list = self.do_cluster_analysis(profiling_path, benchmark_profiling_path)  # 集群分析
 
-        for i, (dimension, scope, interface, kwargs) in enumerate(job_list[::-1]):
+        for i, (dimension, scope, interface, kwargs) in enumerate(job_list[::-1]):  # dimension: 分析维度，scope: 分析器
             result_list.append(
-                interface.get_result(dimension, scope, render_html=i == len(job_list) - 1, output_dict=False,
-                                     **kwargs)
+                # 获取分析结果
+                interface.get_result(dimension, scope, render_html=i == len(job_list) - 1, output_dict=False, **kwargs)
             )
 
         for result in result_list[::-1]:
diff --git a/profiler/advisor/analyzer/base_analyzer.py b/profiler/advisor/analyzer/base_analyzer.py
index 0391eb88a..adf82ab8a 100644
--- a/profiler/advisor/analyzer/base_analyzer.py
+++ b/profiler/advisor/analyzer/base_analyzer.py
@@ -105,7 +105,7 @@ class BaseAnalyzer(VersionControl, metaclass=ABCMeta):
     def get_priority(self, max_mem_op_dur):
         pass
 
-    def identify_profiling_type(self, profiling_type_list):
+    def identify_profiling_type(self, profiling_type_list): # 确定分析类型
         profiling_type = None
         if self.collection_path.endswith(ASCEND_MS):
             profiling_type = [elem for elem in profiling_type_list if Constant.MINDSPORE in elem][0]
@@ -134,7 +134,7 @@ class BaseAnalyzer(VersionControl, metaclass=ABCMeta):
             profiling_type = profiling_type_list[0]
         return profiling_type
 
-    def identify_profiling_version(self):
+    def identify_profiling_version(self): # 确定分析版本
         profiling_version = ""
         if Constant.MINDSPORE in self.profiling_type:
             ascend_dirs = []
@@ -166,7 +166,7 @@ class BaseAnalyzer(VersionControl, metaclass=ABCMeta):
                                self.__class__.__name__, self.kwargs.get(Constant.TORCH_VERSION), profiling_version)
         return profiling_version
 
-    def init_dataset_list(self) -> None:
+    def init_dataset_list(self) -> None: # 初始化数据集列表
         dataset_cls_list = self.dataset_cls_list
         if len(dataset_cls_list) == 0:
             logger.warning(f"Analyser: %s don't rely on any dataset!", self.__class__.__name__)
@@ -184,7 +184,7 @@ class BaseAnalyzer(VersionControl, metaclass=ABCMeta):
                     self.dataset_list[key] = []
                     self.dataset_list[key].append(dataset)
 
-    def get_priority_by_time_ratio(self, dur, step_dur):
+    def get_priority_by_time_ratio(self, dur, step_dur): # 根据时间比例确定优先级
         time_ratio = safe_division(dur, step_dur)
         if time_ratio >= self.ANALYZER_HIGH_PRIORITY_TIME_RATIO:
             return PriorityBackgroundColor.high
diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/__init__.py b/profiler/advisor/analyzer/computation/ai_core_performance/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py
new file mode 100644
index 000000000..f6f7e4f43
--- /dev/null
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py
@@ -0,0 +1,57 @@
+# Copyright (c) 2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+
+from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer
+from profiler.advisor.result.result import OptimizeResult
+from profiler.advisor.analyzer.computation.ai_core_freq.ai_core_freq_checker import AICoreFreqChecker
+from profiler.advisor.display.html.priority_background_color import PriorityBackgroundColor
+from profiler.advisor.display.html.render import HTMLRender
+from profiler.advisor.dataset.timeline_event_dataset import ComputationAnalysisDataset
+from profiler.advisor.dataset.profiling.device_info import DeviceInfoParser
+from profiler.advisor.config.config import Config
+
+logger = logging.getLogger()
+
+
+class AICorePerformanceAnalyzer(BaseAnalyzer):
+    dataset_cls_list = [ComputationAnalysisDataset]
+
+    def __init__(self, collection_path, n_processes: int = 1, **kwargs) -> None:
+        super().__init__(collection_path, n_processes, **kwargs)
+        key = ComputationAnalysisDataset.get_key()
+        self.dataset = self.get_first_data_by_key(self.dataset_list, key)
+        self.result = OptimizeResult()
+        self.html_render = HTMLRender()
+        self.html = None
+        info = DeviceInfoParser(collection_path)
+        info.parse_data()
+
+    @BaseAnalyzer.check_data((ComputationAnalysisDataset.get_key(),))
+    def optimize(self, **kwargs):
+        if not Config().get_config("aic_frequency"):
+            logger.warning("Can not find ai core frequency in info.json*, please check data integrity.")
+            return self.result
+
+        add_render_list = kwargs.get("add_render_list", True)
+        ai_core_freq_checker = AICoreFreqChecker()
+        ai_core_freq_checker.check_ai_core_freq(self.dataset, rank=kwargs.get("rank"), stage=kwargs.get("stage"))
+        ai_core_freq_checker.make_record(self.result)
+        self.html = ai_core_freq_checker.make_render(self.html_render, add_render_list, priority=self.get_priority(),
+                                                     rank=kwargs.get("rank"))
+        return self.result
+
+    def get_priority(self, max_mem_op_dur=None):
+        return PriorityBackgroundColor.high
\ No newline at end of file
diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
new file mode 100644
index 000000000..5a94d131b
--- /dev/null
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -0,0 +1,141 @@
+# Copyright (c) 2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+
+from profiler.advisor.config.config import Config
+from profiler.advisor.dataset.timeline_event_dataset import ComputationAnalysisDataset
+from profiler.advisor.display.prompt.base_prompt import BasePrompt
+from profiler.advisor.result.item import OptimizeItem, OptimizeRecord
+from profiler.advisor.result.result import OptimizeResult
+from profiler.advisor.utils.utils import convert_to_float
+from profiler.prof_common.additional_args_manager import AdditionalArgsManager
+
+logger = logging.getLogger()
+
+
+class AICorePerformanceChecker:
+    """
+        operator performance checker
+    """
+    # DECREASE_FREQ_RATIO = 0.05
+    # SHOW_TOPK_OPS = 10
+    # TOTAL_DURATION_INDEX = 2
+    # DECREASE_FREQ_RATIO_INDEX = 3
+    _ITEMS = [
+        "op_name", "op_type", "task_duration", "input_shapes", "input_data_types", "input_formats", "output_shapes",
+        "output_data_types", "output_formats"
+    ]
+
+    def __init__(self):
+
+        self.ai_core_freq_issues = False
+        self.desc = ""
+        self.suggestions = ""
+        self.decrease_freq_ops = []
+        self.headers = []
+        self.op_freq = None
+        self.rank = None
+        self.stage = None
+
+    def check_ai_core_freq(self, event_dataset: ComputationAnalysisDataset, rank=None, stage=None):
+        """
+        :Param event_dataset: dataset of timeline event
+        """
+        if not hasattr(event_dataset, "op_freq") or not getattr(event_dataset, "op_freq"):
+            logger.debug("Skip slow ai core frequency checker, "
+                         "because no ai core frequency were recorded in trace_view.json")
+            return
+
+        self.rank = rank
+        self.stage = stage
+        self.op_freq = event_dataset.op_freq
+        for op_name, op_info in self.op_freq.items():
+            freq_list = op_info.get("freq_list", [])
+            if not freq_list:
+                continue
+
+            op_count = op_info.get("count", 0)
+            op_total_duration = round(op_info.get("dur", 0), 2)
+            max_freq = convert_to_float(Config().get_config("aic_frequency"))
+
+            if max_freq == 0:
+                raise ValueError("max_freq cannot be zero.")
+            decrease_freq_ratio = sum(max_freq - freq for freq in freq_list) / (max_freq * len(freq_list))
+            if decrease_freq_ratio >= Config().get_config("frequency_threshold"):
+                self.ai_core_freq_issues = True
+                self.decrease_freq_ops.append([op_name, op_count, op_total_duration,
+                                               f"{round(decrease_freq_ratio, 4):.2%}",
+                                               round(sum(freq_list) / len(freq_list), 2),
+                                               max(freq_list), min(freq_list)])
+
+        if self.decrease_freq_ops:
+            # 按算子总耗时和降频比率 降序排列
+            self.decrease_freq_ops.sort(
+                key=lambda x: (x[self.TOTAL_DURATION_INDEX], x[self.DECREASE_FREQ_RATIO_INDEX]), reverse=True)
+        if not self.ai_core_freq_issues:
+            return
+
+    def make_record(self, result: OptimizeResult):
+        """
+        make record for what and how to optimize
+        """
+        if not self.ai_core_freq_issues:
+            return self.ai_core_freq_issues
+
+        prompt_class = BasePrompt.get_prompt_class(self.__class__.__name__)
+
+        problem = prompt_class.PROBLEM
+        if self.rank is not None:
+            problem += prompt_class.RANK_ID.format(self.rank)
+
+        self.desc = prompt_class.DESCRIPTION.format(len(self.decrease_freq_ops), self.DECREASE_FREQ_RATIO)
+        if self.rank:
+            self.desc = prompt_class.RANK_DESCRIPTION.format(self.rank) + self.desc.lower()
+
+        optimization_item = OptimizeItem(problem, self.desc, [prompt_class.SUGGESTION])
+        result.add(OptimizeRecord(optimization_item))
+
+        self.headers = [
+            "Operator name",
+            "Count",
+            "Total duration(us)",
+            "AI CORE frequency decreased ratio",
+            "Average frequency",
+            "Max frequency",
+            "Min frequency",
+        ]
+        result.add_detail(problem, headers=self.headers)
+
+        for row in self.decrease_freq_ops:
+            result.add_detail(problem, detail=row)
+        return True
+
+    def make_render(self, html_render, add_render_list=True, **kwargs):
+        if not self.ai_core_freq_issues:
+            return self.ai_core_freq_issues
+
+        priority = kwargs.get("priority")
+        if self.SHOW_TOPK_OPS:
+            self.desc += f" Only show {self.SHOW_TOPK_OPS} operators here, see latest mstt_advisor.xlsx for details."
+        return html_render.render_template(key="computation",
+                                           template_dir="templates",
+                                           template_name="ai_core_frequency.html",
+                                           desc=self.desc,
+                                           suggestion=self.suggestions,
+                                           headers=self.headers,
+                                           data=self.decrease_freq_ops[:self.SHOW_TOPK_OPS],
+                                           add_render_list=add_render_list,
+                                           priority_background_color=priority,
+                                           rank=kwargs.get("rank"))
diff --git a/profiler/advisor/analyzer/computation/profiling_analyzer.py b/profiler/advisor/analyzer/computation/profiling_analyzer.py
index ccf671139..04f889854 100644
--- a/profiler/advisor/analyzer/computation/profiling_analyzer.py
+++ b/profiler/advisor/analyzer/computation/profiling_analyzer.py
@@ -116,3 +116,8 @@ class AicpuAnalyzer(ProfilingAnalyzer):
     def __init__(self, collection_path, **kwargs) -> None:
         super().__init__(collection_path, **kwargs)
         self.checker = AicpuChecker(self.cann_version)
+
+class AicpuPerformanceAnalyzer(ProfilingAnalyzer):
+    def __init__(self, collection_path, **kwargs) -> None:
+        super().__init__(collection_path, **kwargs)
+        pass
diff --git a/profiler/advisor/common/analyzer_scopes.py b/profiler/advisor/common/analyzer_scopes.py
index 2cad1a3ce..5dd8d6d35 100644
--- a/profiler/advisor/common/analyzer_scopes.py
+++ b/profiler/advisor/common/analyzer_scopes.py
@@ -40,3 +40,4 @@ class SupportedScopes:
     GC_ANALYSIS = "gc_analysis"
     CONJECTURED_GC_ANALYSIS = "conjectured_analysis"
     COMPARISON = "comparison"
+    OPERATOR_PERFORMANCE_ANALYSIS = "operator_performance_analysis"
diff --git a/profiler/advisor/interface/interface.py b/profiler/advisor/interface/interface.py
index 7b9cb00fd..fee7203c3 100644
--- a/profiler/advisor/interface/interface.py
+++ b/profiler/advisor/interface/interface.py
@@ -25,7 +25,7 @@ sys.path.append(os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.
 from profiler.advisor.utils.utils import Timer
 from profiler.advisor.result.result import OptimizeResult
 from profiler.advisor.analyzer.computation.profiling_analyzer import AicpuAnalyzer, BlockDimAnalyzer, \
-    DynamicShapeAnalyzer, OperatorBoundAnalyzer
+    DynamicShapeAnalyzer, OperatorBoundAnalyzer, AicpuPerformanceAnalyzer
 from profiler.advisor.analyzer.schedule.fusion_ops.fusion_ops_analyzer import TimelineFusionOpsAnalyzer
 from profiler.advisor.analyzer.graph_fusion.graph_fusion_analyzer import FusionOPAnalyzer
 from profiler.advisor.common.analyzer_scopes import SupportedScopes
@@ -76,7 +76,8 @@ class Interface:
             SupportedScopes.OPERATOR_NO_BOUND_ANALYSIS: OperatorBoundAnalyzer,
             SupportedScopes.BLOCK_DIM_ANALYSIS: BlockDimAnalyzer,
             SupportedScopes.GRAPH: FusionOPAnalyzer,
-            SupportedScopes.FREQ_ANALYSIS: AICoreFreqAnalyzer
+            SupportedScopes.FREQ_ANALYSIS: AICoreFreqAnalyzer,
+            SupportedScopes.OPERATOR_PERFORMANCE_ANALYSIS: AicpuPerformanceAnalyzer
         }),
         COMMUNICATION: OrderedDict({SupportedScopes.PACKET: PacketAnalyzer,
                                     SupportedScopes.COMMUNICATION_RETRANSMISSION_DETECTION: RDMARetransmissionAnalyzer,
diff --git a/profiler/cli/analyze_cli.py b/profiler/cli/analyze_cli.py
index 9453de6ff..80441c274 100644
--- a/profiler/cli/analyze_cli.py
+++ b/profiler/cli/analyze_cli.py
@@ -145,6 +145,12 @@ def analyze_schedule(**kwargs) -> None:
               required=False,
               default="cn",
               help="Language of the profiling advisor.")
+@click.option("-p",
+              "--performance",
+              metavar="",
+              required=False,
+              default=False,
+              help="Indicates whether to analyze operator performance.")
 @debug_option
 def analyze_computation(**kwargs) -> None:
     try:
diff --git a/profiler/cli/entrance.py b/profiler/cli/entrance.py
index c6d72837b..079792ea9 100644
--- a/profiler/cli/entrance.py
+++ b/profiler/cli/entrance.py
@@ -66,3 +66,10 @@ msprof_analyze_cli.add_command(compare_cli, name="compare")
 msprof_analyze_cli.add_command(cluster_cli, name="cluster")
 msprof_analyze_cli.add_command(auto_complete_cli, name="auto-completion")
 
+if __name__ == "__main__":
+    msprof_analyze_cli.main(
+        [
+            "analyze","all","-d",
+            r"D:\da","-l","cn"
+        ]
+    )
-- 
Gitee


From 53ee3c6032d3e228e5fd5d69acb63bb53c275446 Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Mon, 13 Jan 2025 16:35:28 +0800
Subject: [PATCH 02/72] =?UTF-8?q?=E4=BB=A3=E7=A0=81=E6=A2=B3=E7=90=86?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../ai_core_performance_analyzer.py           |   5 +-
 profiler/advisor/common/analyzer_scopes.py    |   2 +-
 profiler/advisor/interface/interface.py       |   6 +-
 .../advisor/rules/cn/aicore_performance.yaml  | 109 ++++++++++++++++++
 profiler/cli/entrance.py                      |   2 +-
 5 files changed, 118 insertions(+), 6 deletions(-)
 create mode 100644 profiler/advisor/rules/cn/aicore_performance.yaml

diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py
index f6f7e4f43..6992845af 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py
@@ -15,6 +15,7 @@
 import logging
 
 from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer
+from profiler.advisor.dataset.profiling.profiling_dataset import ProfilingDataset
 from profiler.advisor.result.result import OptimizeResult
 from profiler.advisor.analyzer.computation.ai_core_freq.ai_core_freq_checker import AICoreFreqChecker
 from profiler.advisor.display.html.priority_background_color import PriorityBackgroundColor
@@ -27,11 +28,11 @@ logger = logging.getLogger()
 
 
 class AICorePerformanceAnalyzer(BaseAnalyzer):
-    dataset_cls_list = [ComputationAnalysisDataset]
+    dataset_cls_list = [ProfilingDataset]
 
     def __init__(self, collection_path, n_processes: int = 1, **kwargs) -> None:
         super().__init__(collection_path, n_processes, **kwargs)
-        key = ComputationAnalysisDataset.get_key()
+        key = ProfilingDataset.get_key()
         self.dataset = self.get_first_data_by_key(self.dataset_list, key)
         self.result = OptimizeResult()
         self.html_render = HTMLRender()
diff --git a/profiler/advisor/common/analyzer_scopes.py b/profiler/advisor/common/analyzer_scopes.py
index 5dd8d6d35..40a8d99bc 100644
--- a/profiler/advisor/common/analyzer_scopes.py
+++ b/profiler/advisor/common/analyzer_scopes.py
@@ -40,4 +40,4 @@ class SupportedScopes:
     GC_ANALYSIS = "gc_analysis"
     CONJECTURED_GC_ANALYSIS = "conjectured_analysis"
     COMPARISON = "comparison"
-    OPERATOR_PERFORMANCE_ANALYSIS = "operator_performance_analysis"
+    AICORE_PERFORMANCE_ANALYSIS = "ai_core_performance_analysis"
diff --git a/profiler/advisor/interface/interface.py b/profiler/advisor/interface/interface.py
index fee7203c3..ebcf56806 100644
--- a/profiler/advisor/interface/interface.py
+++ b/profiler/advisor/interface/interface.py
@@ -25,7 +25,7 @@ sys.path.append(os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.
 from profiler.advisor.utils.utils import Timer
 from profiler.advisor.result.result import OptimizeResult
 from profiler.advisor.analyzer.computation.profiling_analyzer import AicpuAnalyzer, BlockDimAnalyzer, \
-    DynamicShapeAnalyzer, OperatorBoundAnalyzer, AicpuPerformanceAnalyzer
+    DynamicShapeAnalyzer, OperatorBoundAnalyzer
 from profiler.advisor.analyzer.schedule.fusion_ops.fusion_ops_analyzer import TimelineFusionOpsAnalyzer
 from profiler.advisor.analyzer.graph_fusion.graph_fusion_analyzer import FusionOPAnalyzer
 from profiler.advisor.common.analyzer_scopes import SupportedScopes
@@ -47,6 +47,8 @@ from profiler.advisor.analyzer.communication.alignment.byte_alignment_analyzer i
 from profiler.advisor.analyzer.schedule.gc.gc_analyzer import GcAnalyzer
 from profiler.advisor.analyzer.schedule.conjectured_gc.conjectured_gc_analyzer import ConjecturedGcAnalyzer
 from profiler.advisor.analyzer.comparison.comparison_analyzer import ComparisonAnalyzer
+from profiler.advisor.analyzer.computation.ai_core_performance.ai_core_performance_analyzer import \
+    AICorePerformanceAnalyzer
 
 logger = logging.getLogger()
 
@@ -77,7 +79,7 @@ class Interface:
             SupportedScopes.BLOCK_DIM_ANALYSIS: BlockDimAnalyzer,
             SupportedScopes.GRAPH: FusionOPAnalyzer,
             SupportedScopes.FREQ_ANALYSIS: AICoreFreqAnalyzer,
-            SupportedScopes.OPERATOR_PERFORMANCE_ANALYSIS: AicpuPerformanceAnalyzer
+            SupportedScopes.AICORE_PERFORMANCE_ANALYSIS: AICorePerformanceAnalyzer
         }),
         COMMUNICATION: OrderedDict({SupportedScopes.PACKET: PacketAnalyzer,
                                     SupportedScopes.COMMUNICATION_RETRANSMISSION_DETECTION: RDMARetransmissionAnalyzer,
diff --git a/profiler/advisor/rules/cn/aicore_performance.yaml b/profiler/advisor/rules/cn/aicore_performance.yaml
new file mode 100644
index 000000000..7eef1598a
--- /dev/null
+++ b/profiler/advisor/rules/cn/aicore_performance.yaml
@@ -0,0 +1,109 @@
+problem: "AICPU算子"
+description: "一些算子和任务执行时间超过了{}us，比如：\n"
+suggestion: "修改代码避免使用aicpu类算子"
+double_suggestion: "尝试将double类型的算子转换成float，比如{}"
+DataTypeSuggestion: &DataTypeSuggestion "数据类型{}在{}算子中可能会造成AICpu问题, 如果可以，尝试转换成{}。"
+AICPU_DOC_URL: &AICPU_DOC_URL "https://gitee.com/ascend/mstt/blob/master/profiler/advisor/doc/Samples%20of%20AI%20CPU%20Operator%20Replacement.md"
+
+CommonChecker:
+  - DataTypeChecker:
+      cann_version: [7.0.RC1]
+      op_type: [ __ALL__ ]
+      ignore_type: [ cast, tensoraequal, equal, nonzero, mul ]
+      input: [ float, float32, float16, bool, int32, uint32, int64, uint64, int8, uint8, int16, uint16, dt_bf16 ]
+      output: [ float, float32, float16, bool, int32, uint32, int64, uint64, int8, uint8, int16, uint16, dt_bf16 ]
+      suggestion: *DataTypeSuggestion
+
+  - DataTypeChecker:
+      cann_version: [7.0.RC1]
+      op_type: [ cast ]
+      input: [ float, float32, float16, bool, int32, uint32, int64, uint64, uint8, dt_bf16 ]
+      output: [ float, float32, float16, bool, int32, uint32, int64, uint64, uint8, dt_bf16 ]
+      suggestion: *DataTypeSuggestion
+
+  - DataTypeChecker:
+      cann_version: [7.0.RC1]
+      op_type: [ tensorequal ]
+      input: [ float, float32, float16, bool, int32, int8, uint8 ]
+      output: [ bool ]
+      suggestion: *DataTypeSuggestion
+
+  - DataTypeChecker:
+      cann_version: [7.0.RC1]
+      op_type: [ equal ]
+      input: [ float, float32, float16, bool, int32, int64, int8, uint8 ]
+      output: [ bool ]
+      suggestion: *DataTypeSuggestion
+
+  - DataTypeChecker:
+      cann_version: [7.0.RC1]
+      op_type: [ nonzero ]
+      input: [ float16, bool, dt_bf16 ]
+      output: [ int64 ]
+      suggestion: *DataTypeSuggestion
+
+  - DataTypeChecker:
+      cann_version: [7.0.RC1]
+      op_type: [ mul ]
+      input: [ float, float32, float16, bool, int32, uint32, int64, uint64, int8, uint8, dt_bf16 ]
+      output: [ float, float32, float16, bool, int32, uint32, int64, uint64, int8, uint8, dt_bf16 ]
+      suggestion: *DataTypeSuggestion
+
+  - DataTypeChecker:
+      cann_version: [8.0.RC1, 7.0.0]
+      op_type: [ __ALL__ ]
+      ignore_type: [ cast, tensorequal, equal, nonzero, mul ]
+      input: [ float, float32, float16, dt_bf16, float64, bool, int32, int64, int8, uint8, int16, complex64, complex128 ]
+      output: [ float, float32, float16, dt_bf16, float64, bool, int32, int64, int8, uint8, int16, complex64, complex128 ]
+      suggestion: *DataTypeSuggestion
+
+  - DataTypeChecker:
+      cann_version: [8.0.RC1, 7.0.0]
+      op_type: [ cast ]
+      input: [ float, float32, float16, bool, int32, uint32, int64, uint64, uint8, dt_bf16 ]
+      output: [ float, float32, float16, bool, int32, uint32, int64, uint64, uint8, dt_bf16 ]
+      suggestion: *DataTypeSuggestion
+
+  - DataTypeChecker:
+      cann_version: [8.0.RC1, 7.0.0]
+      op_type: [ tensorequal ]
+      input: [ float, float32, float16, dt_bf16, float64, bool, int32, int8, uint8 ]
+      output: [ bool ]
+      suggestion: *DataTypeSuggestion
+
+  - DataTypeChecker:
+      cann_version: [8.0.RC1, 7.0.0]
+      op_type: [ equal ]
+      input: [ float, float32, float16, dt_bf16, float64, bool, int32, int64, int8, uint8 ]
+      output: [ bool ]
+      suggestion: *DataTypeSuggestion
+
+  - DataTypeChecker:
+      cann_version: [8.0.RC1, 7.0.0]
+      op_type: [ mul ]
+      input: [ float, float32, float16, dt_bf16, float64, bool, int32, int64, int8, uint8, complex64 ]
+      output: [ float, float32, float16, dt_bf16, float64, bool, int32, int64, int8, uint8, complex64 ]
+      suggestion: *DataTypeSuggestion
+
+ExampleGuideChecker:
+  - IndexPutChecker:
+      op_type: [index]
+      url: *AICPU_DOC_URL
+      suggestion: "请参考<a href='{}' target='_blank'>链接</a>修改源码，尝试用等价的算子替换index算子。"
+
+  - NonzeroChecker:
+      op_type: [ indexput, indexputv2 ]
+      url: *AICPU_DOC_URL
+      suggestion: "请参考<a href='{}' target='_blank'>链接</a>修改源码，尝试用等价的算子替换indexput算子。"
+
+  - CastChecker:
+      op_type: [ argmin ]
+      url: *AICPU_DOC_URL
+      suggestion: "请参考<a href='{}' target='_blank'>链接</a>更新cann-tookit包到7.0.RC1及以上的版本。"
+
+  - CastChecker:
+      op_type: [ nonzero ]
+      url: *AICPU_DOC_URL
+      suggestion: "请参考<a href='{}' target='_blank'>链接</a>修改源码，尝试用等价的算子替换nonzero算子。"
+
+
diff --git a/profiler/cli/entrance.py b/profiler/cli/entrance.py
index 079792ea9..89ac8187d 100644
--- a/profiler/cli/entrance.py
+++ b/profiler/cli/entrance.py
@@ -70,6 +70,6 @@ if __name__ == "__main__":
     msprof_analyze_cli.main(
         [
             "analyze","all","-d",
-            r"D:\da","-l","cn"
+            r"D:\data\file","-l","cn"
         ]
     )
-- 
Gitee


From 762118c65ce74c7159ac8f4192b24035b30add0b Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Mon, 13 Jan 2025 16:37:15 +0800
Subject: [PATCH 03/72] =?UTF-8?q?=E4=BB=A3=E7=A0=81=E6=A2=B3=E7=90=86?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../advisor/analyzer/computation/profiling_analyzer.py     | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/profiler/advisor/analyzer/computation/profiling_analyzer.py b/profiler/advisor/analyzer/computation/profiling_analyzer.py
index 04f889854..bbea136f0 100644
--- a/profiler/advisor/analyzer/computation/profiling_analyzer.py
+++ b/profiler/advisor/analyzer/computation/profiling_analyzer.py
@@ -115,9 +115,4 @@ class OperatorBoundAnalyzer(ProfilingAnalyzer):
 class AicpuAnalyzer(ProfilingAnalyzer):
     def __init__(self, collection_path, **kwargs) -> None:
         super().__init__(collection_path, **kwargs)
-        self.checker = AicpuChecker(self.cann_version)
-
-class AicpuPerformanceAnalyzer(ProfilingAnalyzer):
-    def __init__(self, collection_path, **kwargs) -> None:
-        super().__init__(collection_path, **kwargs)
-        pass
+        self.checker = AicpuChecker(self.cann_version)
\ No newline at end of file
-- 
Gitee


From a9b229d83dbbf6f0f95300b8e34431b576c0e1b0 Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Tue, 14 Jan 2025 10:14:58 +0800
Subject: [PATCH 04/72] =?UTF-8?q?=E4=BB=A3=E7=A0=81=E6=A2=B3=E7=90=86?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../ai_core_performance_analyzer.py           | 15 +++--
 .../ai_core_performance_checker.py            | 16 ++++-
 .../html/templates/ai_core_performance.html   | 62 +++++++++++++++++++
 3 files changed, 82 insertions(+), 11 deletions(-)
 create mode 100644 profiler/advisor/display/html/templates/ai_core_performance.html

diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py
index 6992845af..b68386bdf 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py
@@ -15,6 +15,8 @@
 import logging
 
 from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer
+from profiler.advisor.analyzer.computation.ai_core_performance.ai_core_performance_checker import \
+    AICorePerformanceChecker
 from profiler.advisor.dataset.profiling.profiling_dataset import ProfilingDataset
 from profiler.advisor.result.result import OptimizeResult
 from profiler.advisor.analyzer.computation.ai_core_freq.ai_core_freq_checker import AICoreFreqChecker
@@ -32,27 +34,24 @@ class AICorePerformanceAnalyzer(BaseAnalyzer):
 
     def __init__(self, collection_path, n_processes: int = 1, **kwargs) -> None:
         super().__init__(collection_path, n_processes, **kwargs)
-        key = ProfilingDataset.get_key()
-        self.dataset = self.get_first_data_by_key(self.dataset_list, key)
+        profiling_key = ProfilingDataset.get_key()
+        self.profiling_dataset = self.get_first_data_by_key(self.dataset_list, profiling_key)
         self.result = OptimizeResult()
         self.html_render = HTMLRender()
         self.html = None
-        info = DeviceInfoParser(collection_path)
-        info.parse_data()
 
-    @BaseAnalyzer.check_data((ComputationAnalysisDataset.get_key(),))
     def optimize(self, **kwargs):
         if not Config().get_config("aic_frequency"):
             logger.warning("Can not find ai core frequency in info.json*, please check data integrity.")
             return self.result
 
         add_render_list = kwargs.get("add_render_list", True)
-        ai_core_freq_checker = AICoreFreqChecker()
-        ai_core_freq_checker.check_ai_core_freq(self.dataset, rank=kwargs.get("rank"), stage=kwargs.get("stage"))
+        ai_core_freq_checker = AICorePerformanceChecker()
+        ai_core_freq_checker.check_ai_core_freq(self.profiling_dataset, rank=kwargs.get("rank"), stage=kwargs.get("stage"))
         ai_core_freq_checker.make_record(self.result)
         self.html = ai_core_freq_checker.make_render(self.html_render, add_render_list, priority=self.get_priority(),
                                                      rank=kwargs.get("rank"))
         return self.result
 
     def get_priority(self, max_mem_op_dur=None):
-        return PriorityBackgroundColor.high
\ No newline at end of file
+        return PriorityBackgroundColor.high # todo 未知内容
\ No newline at end of file
diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index 5a94d131b..4d595d7d1 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -15,6 +15,7 @@
 import logging
 
 from profiler.advisor.config.config import Config
+from profiler.advisor.dataset.profiling.profiling_dataset import ProfilingDataset
 from profiler.advisor.dataset.timeline_event_dataset import ComputationAnalysisDataset
 from profiler.advisor.display.prompt.base_prompt import BasePrompt
 from profiler.advisor.result.item import OptimizeItem, OptimizeRecord
@@ -48,19 +49,28 @@ class AICorePerformanceChecker:
         self.op_freq = None
         self.rank = None
         self.stage = None
+        self.cube_dict = {}
+        self.fa_dict = {}
+        self.vector_dict = {}
 
-    def check_ai_core_freq(self, event_dataset: ComputationAnalysisDataset, rank=None, stage=None):
+
+
+    def data_filter(self, profiling_dataset: ProfilingDataset):
+        self.cude_dict = {}
+        self.fa_dict = {}
+        self.vector_dict = {}
+
+    def check_ai_core_performance(self, profiling_dataset: ProfilingDataset):
         """
         :Param event_dataset: dataset of timeline event
         """
-        if not hasattr(event_dataset, "op_freq") or not getattr(event_dataset, "op_freq"):
+        if not hasattr(profiling_dataset, "op_freq") or not getattr(profiling_dataset, "op_freq"):
             logger.debug("Skip slow ai core frequency checker, "
                          "because no ai core frequency were recorded in trace_view.json")
             return
 
         self.rank = rank
         self.stage = stage
-        self.op_freq = event_dataset.op_freq
         for op_name, op_info in self.op_freq.items():
             freq_list = op_info.get("freq_list", [])
             if not freq_list:
diff --git a/profiler/advisor/display/html/templates/ai_core_performance.html b/profiler/advisor/display/html/templates/ai_core_performance.html
new file mode 100644
index 000000000..a009f073d
--- /dev/null
+++ b/profiler/advisor/display/html/templates/ai_core_performance.html
@@ -0,0 +1,62 @@
+{% if data|length > 0 %}
+<div class="collapsible">
+    <h2 class="collapsible-header" style="background-color: {{ priority_background_color }};">AICORE Performance
+        Analysis</h2>
+    <div class="collapsible-content">
+        <a style="font-weight: bold" id="timeline_api_instruction">MatMul. </a>
+        <br><br>
+        <table>
+            <tr>
+                {% for header in headers %}
+                <th> {{ header }}</th>
+                {% endfor %}
+            </tr>
+
+            {% for row in data %}
+            <tr>
+                {% for element in row %}
+                <td>{{ element|safe }}</td>
+                {% endfor %}
+            </tr>
+            {% endfor %}
+        </table>
+        <br>
+        <a style="font-weight: bold" id="timeline_api_instruction_issue">FlashAttention</a>
+        <br><br>
+        <table>
+            <tr>
+                {% for header in headers %}
+                <th> {{ header }}</th>
+                {% endfor %}
+            </tr>
+
+            {% for row in data %}
+            <tr>
+                {% for element in row %}
+                <td>{{ element|safe }}</td>
+                {% endfor %}
+            </tr>
+            {% endfor %}
+        </table>
+        <br>
+        <a style="font-weight: bold" id="timeline_api_suggestion">Vector</a>
+        <br><br>
+        <table>
+            <tr>
+                {% for header in headers %}
+                <th> {{ header }}</th>
+                {% endfor %}
+            </tr>
+
+            {% for row in data %}
+            <tr>
+                {% for element in row %}
+                <td>{{ element|safe }}</td>
+                {% endfor %}
+            </tr>
+            {% endfor %}
+        </table>
+
+    </div>
+</div>
+{% endif %}
\ No newline at end of file
-- 
Gitee


From f4963e92bd98b26046cb66198a9b27e78b6e34ba Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Tue, 14 Jan 2025 11:04:11 +0800
Subject: [PATCH 05/72] =?UTF-8?q?=E4=BB=A3=E7=A0=81=E6=A2=B3=E7=90=86?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../ai_core_performance/ai_core_performance_analyzer.py     | 1 +
 .../ai_core_performance/ai_core_performance_checker.py      | 6 +++---
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py
index b68386bdf..801cf1f7c 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py
@@ -47,6 +47,7 @@ class AICorePerformanceAnalyzer(BaseAnalyzer):
 
         add_render_list = kwargs.get("add_render_list", True)
         ai_core_freq_checker = AICorePerformanceChecker()
+        ai_core_freq_checker.data_filter(self.profiling_dataset)
         ai_core_freq_checker.check_ai_core_freq(self.profiling_dataset, rank=kwargs.get("rank"), stage=kwargs.get("stage"))
         ai_core_freq_checker.make_record(self.result)
         self.html = ai_core_freq_checker.make_render(self.html_render, add_render_list, priority=self.get_priority(),
diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index 4d595d7d1..eefa57303 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -56,9 +56,9 @@ class AICorePerformanceChecker:
 
 
     def data_filter(self, profiling_dataset: ProfilingDataset):
-        self.cude_dict = {}
-        self.fa_dict = {}
-        self.vector_dict = {}
+        profiling_key =  profiling_dataset.get_key()
+        for item in profiling_key:
+            print(item)
 
     def check_ai_core_performance(self, profiling_dataset: ProfilingDataset):
         """
-- 
Gitee


From bf527424ec0c51da19e34ada36e77a1c2878325b Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Tue, 14 Jan 2025 17:52:49 +0800
Subject: [PATCH 06/72] =?UTF-8?q?data=5Ffilter=E4=BF=AE=E6=94=B9=EF=BC=9A?=
 =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E5=88=9D=E6=AD=A5=E7=9A=84=E7=AE=97=E5=AD=90?=
 =?UTF-8?q?=E8=BF=87=E6=BB=A4?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../ai_core_performance_analyzer.py           | 10 ++---
 .../ai_core_performance_checker.py            | 44 ++++++++++++++-----
 profiler/cli/analyze_cli.py                   |  6 ---
 3 files changed, 37 insertions(+), 23 deletions(-)

diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py
index 801cf1f7c..c7884f15a 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py
@@ -41,18 +41,16 @@ class AICorePerformanceAnalyzer(BaseAnalyzer):
         self.html = None
 
     def optimize(self, **kwargs):
-        if not Config().get_config("aic_frequency"):
-            logger.warning("Can not find ai core frequency in info.json*, please check data integrity.")
-            return self.result
-
         add_render_list = kwargs.get("add_render_list", True)
         ai_core_freq_checker = AICorePerformanceChecker()
         ai_core_freq_checker.data_filter(self.profiling_dataset)
-        ai_core_freq_checker.check_ai_core_freq(self.profiling_dataset, rank=kwargs.get("rank"), stage=kwargs.get("stage"))
+        if not ai_core_freq_checker.ai_core_performance_issues:
+            return self.result
+        ai_core_freq_checker.check_ai_core_performance(self.profiling_dataset)
         ai_core_freq_checker.make_record(self.result)
         self.html = ai_core_freq_checker.make_render(self.html_render, add_render_list, priority=self.get_priority(),
                                                      rank=kwargs.get("rank"))
         return self.result
 
     def get_priority(self, max_mem_op_dur=None):
-        return PriorityBackgroundColor.high # todo 未知内容
\ No newline at end of file
+        return PriorityBackgroundColor.high # html 底色设置
\ No newline at end of file
diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index eefa57303..5d1c0acf4 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -38,10 +38,12 @@ class AICorePerformanceChecker:
         "op_name", "op_type", "task_duration", "input_shapes", "input_data_types", "input_formats", "output_shapes",
         "output_data_types", "output_formats"
     ]
+    _CHECKER = "AICorePerformanceChecker"
+    CUBE_OPERATOR_MEMORY_SIZE = 52428800
 
     def __init__(self):
 
-        self.ai_core_freq_issues = False
+        self.ai_core_performance_issues = False
         self.desc = ""
         self.suggestions = ""
         self.decrease_freq_ops = []
@@ -49,16 +51,29 @@ class AICorePerformanceChecker:
         self.op_freq = None
         self.rank = None
         self.stage = None
-        self.cube_dict = {}
-        self.fa_dict = {}
-        self.vector_dict = {}
-
-
+        self.cube_list = []
+        self.fa_list = []
+        self.vector_list = []
 
     def data_filter(self, profiling_dataset: ProfilingDataset):
-        profiling_key =  profiling_dataset.get_key()
-        for item in profiling_key:
-            print(item)
+        if not self.check_task_dict(profiling_dataset):
+            return
+        operator_list = profiling_dataset.op_summary.op_list
+        total_duration = sum(operator.task_duration / 1000 for operator in operator_list
+                             if operator.op_type in ["AI_VECTOR_CORE", "MIX_AIV"])
+        centi_of_total_duration = total_duration / 100
+        for operator in operator_list:
+            if operator.task_type == "AI_CORE" and "matmul" in operator.op_type.lower():
+                mm = sum(int(shape.split(",")[0]) * int(shape.split(",")[1]) for shape in operator.input_shapes)
+                mm += int(operator.output_shapes[1:-1].split(",")[0]) * int(operator.output_shapes[1:-1].split(",")[1])
+                if mm >= self.CUBE_OPERATOR_MEMORY_SIZE:
+                    self.cube_list.append(operator)
+            elif operator.op_type == "FlashAttentionScore" and "varlen" in operator.op_name.lower():
+                self.fa_list.append(operator)
+            elif operator.op_type in ["AI_VECTOR_CORE", "MIX_AIV"] and operator.task_duration > centi_of_total_duration:
+                self.vector_list.append(operator)
+        if any([self.cube_list, self.fa_list, self.vector_list]):
+            self.ai_core_performance_issues = True
 
     def check_ai_core_performance(self, profiling_dataset: ProfilingDataset):
         """
@@ -69,8 +84,6 @@ class AICorePerformanceChecker:
                          "because no ai core frequency were recorded in trace_view.json")
             return
 
-        self.rank = rank
-        self.stage = stage
         for op_name, op_info in self.op_freq.items():
             freq_list = op_info.get("freq_list", [])
             if not freq_list:
@@ -149,3 +162,12 @@ class AICorePerformanceChecker:
                                            add_render_list=add_render_list,
                                            priority_background_color=priority,
                                            rank=kwargs.get("rank"))
+
+    def check_task_dict(self, profiling_dataset: ProfilingDataset) -> bool:
+        if not hasattr(profiling_dataset, "op_summary"):
+            logger.warning("Skip %s checker because of not containing %s", self._CHECKER, "op summary")
+            return False
+        if not hasattr(profiling_dataset.op_summary, "task_dict"):
+            logger.warning("Skip %s checker because of not containing %s", self._CHECKER, "op summary")
+            return False
+        return True
diff --git a/profiler/cli/analyze_cli.py b/profiler/cli/analyze_cli.py
index 80441c274..9453de6ff 100644
--- a/profiler/cli/analyze_cli.py
+++ b/profiler/cli/analyze_cli.py
@@ -145,12 +145,6 @@ def analyze_schedule(**kwargs) -> None:
               required=False,
               default="cn",
               help="Language of the profiling advisor.")
-@click.option("-p",
-              "--performance",
-              metavar="",
-              required=False,
-              default=False,
-              help="Indicates whether to analyze operator performance.")
 @debug_option
 def analyze_computation(**kwargs) -> None:
     try:
-- 
Gitee


From 5e900c853b17dbdba43b07e54c55a9653edfd29c Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Wed, 15 Jan 2025 15:06:36 +0800
Subject: [PATCH 07/72] =?UTF-8?q?data=5Ffilter=E4=BF=AE=E6=94=B9=EF=BC=9A?=
 =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E5=88=9D=E6=AD=A5=E7=9A=84=E7=AE=97=E5=AD=90?=
 =?UTF-8?q?=E8=BF=87=E6=BB=A4?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../ai_core_performance_analyzer.py           |  6 +-
 .../ai_core_performance_checker.py            | 74 +++++++++----------
 2 files changed, 39 insertions(+), 41 deletions(-)

diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py
index c7884f15a..eb46b8549 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py
@@ -46,9 +46,11 @@ class AICorePerformanceAnalyzer(BaseAnalyzer):
         ai_core_freq_checker.data_filter(self.profiling_dataset)
         if not ai_core_freq_checker.ai_core_performance_issues:
             return self.result
-        ai_core_freq_checker.check_ai_core_performance(self.profiling_dataset)
+        ai_core_freq_checker.check_ai_core_performance()
         ai_core_freq_checker.make_record(self.result)
-        self.html = ai_core_freq_checker.make_render(self.html_render, add_render_list, priority=self.get_priority(),
+        self.html = ai_core_freq_checker.make_render(self.html_render,
+                                                     add_render_list,
+                                                     priority=self.get_priority(),
                                                      rank=kwargs.get("rank"))
         return self.result
 
diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index 5d1c0acf4..b9107d522 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -13,6 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import logging
+from queue import PriorityQueue
 
 from profiler.advisor.config.config import Config
 from profiler.advisor.dataset.profiling.profiling_dataset import ProfilingDataset
@@ -59,14 +60,14 @@ class AICorePerformanceChecker:
         if not self.check_task_dict(profiling_dataset):
             return
         operator_list = profiling_dataset.op_summary.op_list
-        total_duration = sum(operator.task_duration / 1000 for operator in operator_list
-                             if operator.op_type in ["AI_VECTOR_CORE", "MIX_AIV"])
-        centi_of_total_duration = total_duration / 100
+        centi_of_total_duration = sum(operator.task_duration / 1000 for operator in operator_list
+                                      if operator.op_type in ["AI_VECTOR_CORE", "MIX_AIV"]) / 100
         for operator in operator_list:
             if operator.task_type == "AI_CORE" and "matmul" in operator.op_type.lower():
-                mm = sum(int(shape.split(",")[0]) * int(shape.split(",")[1]) for shape in operator.input_shapes)
-                mm += int(operator.output_shapes[1:-1].split(",")[0]) * int(operator.output_shapes[1:-1].split(",")[1])
-                if mm >= self.CUBE_OPERATOR_MEMORY_SIZE:
+                memory = sum(int(shape.split(",")[0]) * int(shape.split(",")[1]) for shape in operator.input_shapes)
+                memory += int(operator.output_shapes[1:-1].split(",")[0]) * int(
+                    operator.output_shapes[1:-1].split(",")[1])
+                if memory >= self.CUBE_OPERATOR_MEMORY_SIZE:
                     self.cube_list.append(operator)
             elif operator.op_type == "FlashAttentionScore" and "varlen" in operator.op_name.lower():
                 self.fa_list.append(operator)
@@ -75,40 +76,35 @@ class AICorePerformanceChecker:
         if any([self.cube_list, self.fa_list, self.vector_list]):
             self.ai_core_performance_issues = True
 
-    def check_ai_core_performance(self, profiling_dataset: ProfilingDataset):
+    def check_ai_core_performance(self):
         """
-        :Param event_dataset: dataset of timeline event
+        :Param profiling_dataset: dataset of operator performance from kernel_details.csv
         """
-        if not hasattr(profiling_dataset, "op_freq") or not getattr(profiling_dataset, "op_freq"):
-            logger.debug("Skip slow ai core frequency checker, "
-                         "because no ai core frequency were recorded in trace_view.json")
-            return
+        if self.cube_list:
+            self.check_cube_operator()
+        if self.fa_list:
+            self.check_fa_operator()
+        if self.vector_list:
+            self.check_vector_operator()
+
+
+
+    def check_cube_operator(self):
+        cube_list =  self.cube_list
+        performance_queue = PriorityQueue()
+        bound_queue = PriorityQueue()
+        affinity_queue = PriorityQueue()
+        # for operator in cube_list:
+
+        pass
+
+    def check_fa_operator(self):
+        pass
+
+    def check_vector_operator(self):
+        pass
+
 
-        for op_name, op_info in self.op_freq.items():
-            freq_list = op_info.get("freq_list", [])
-            if not freq_list:
-                continue
-
-            op_count = op_info.get("count", 0)
-            op_total_duration = round(op_info.get("dur", 0), 2)
-            max_freq = convert_to_float(Config().get_config("aic_frequency"))
-
-            if max_freq == 0:
-                raise ValueError("max_freq cannot be zero.")
-            decrease_freq_ratio = sum(max_freq - freq for freq in freq_list) / (max_freq * len(freq_list))
-            if decrease_freq_ratio >= Config().get_config("frequency_threshold"):
-                self.ai_core_freq_issues = True
-                self.decrease_freq_ops.append([op_name, op_count, op_total_duration,
-                                               f"{round(decrease_freq_ratio, 4):.2%}",
-                                               round(sum(freq_list) / len(freq_list), 2),
-                                               max(freq_list), min(freq_list)])
-
-        if self.decrease_freq_ops:
-            # 按算子总耗时和降频比率 降序排列
-            self.decrease_freq_ops.sort(
-                key=lambda x: (x[self.TOTAL_DURATION_INDEX], x[self.DECREASE_FREQ_RATIO_INDEX]), reverse=True)
-        if not self.ai_core_freq_issues:
-            return
 
     def make_record(self, result: OptimizeResult):
         """
@@ -154,7 +150,7 @@ class AICorePerformanceChecker:
             self.desc += f" Only show {self.SHOW_TOPK_OPS} operators here, see latest mstt_advisor.xlsx for details."
         return html_render.render_template(key="computation",
                                            template_dir="templates",
-                                           template_name="ai_core_frequency.html",
+                                           template_name="ai_core_performance.html",
                                            desc=self.desc,
                                            suggestion=self.suggestions,
                                            headers=self.headers,
@@ -167,7 +163,7 @@ class AICorePerformanceChecker:
         if not hasattr(profiling_dataset, "op_summary"):
             logger.warning("Skip %s checker because of not containing %s", self._CHECKER, "op summary")
             return False
-        if not hasattr(profiling_dataset.op_summary, "task_dict"):
+        if not hasattr(profiling_dataset.op_summary, "task_dict") or hasattr(profiling_dataset.op_summary, "op_list"):
             logger.warning("Skip %s checker because of not containing %s", self._CHECKER, "op summary")
             return False
         return True
-- 
Gitee


From 1281640a8d300e9bc3298f9bc1cedfe133e64449 Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Wed, 15 Jan 2025 17:41:37 +0800
Subject: [PATCH 08/72] =?UTF-8?q?data=5Ffilter=E4=BF=AE=E6=94=B9=EF=BC=9A?=
 =?UTF-8?q?=E9=87=8D=E5=86=99=E7=AE=97=E5=AD=90=E8=BF=87=E6=BB=A4=EF=BC=8C?=
 =?UTF-8?q?=E6=8C=89shap=E6=88=96type=E5=88=86=E7=BB=84=E5=AD=98=E5=82=A8?=
 =?UTF-8?q?=E5=88=B0=E5=AD=97=E5=85=B8=E4=B8=AD?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../ai_core_performance_checker.py            | 68 ++++++++++++-------
 1 file changed, 45 insertions(+), 23 deletions(-)

diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index b9107d522..6716d21a3 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -40,7 +40,7 @@ class AICorePerformanceChecker:
         "output_data_types", "output_formats"
     ]
     _CHECKER = "AICorePerformanceChecker"
-    CUBE_OPERATOR_MEMORY_SIZE = 52428800
+    CUBE_OPERATOR_MEMORY_SIZE_MB = 100
 
     def __init__(self):
 
@@ -52,30 +52,51 @@ class AICorePerformanceChecker:
         self.op_freq = None
         self.rank = None
         self.stage = None
-        self.cube_list = []
-        self.fa_list = []
-        self.vector_list = []
+        self.cube_dict = {}
+        self.fa_dict = {}
+        self.vector_dict = {}
 
     def data_filter(self, profiling_dataset: ProfilingDataset):
         if not self.check_task_dict(profiling_dataset):
             return
         operator_list = profiling_dataset.op_summary.op_list
-        centi_of_total_duration = sum(operator.task_duration / 1000 for operator in operator_list
-                                      if operator.op_type in ["AI_VECTOR_CORE", "MIX_AIV"]) / 100
-        for operator in operator_list:
-            if operator.task_type == "AI_CORE" and "matmul" in operator.op_type.lower():
-                memory = sum(int(shape.split(",")[0]) * int(shape.split(",")[1]) for shape in operator.input_shapes)
-                memory += int(operator.output_shapes[1:-1].split(",")[0]) * int(
-                    operator.output_shapes[1:-1].split(",")[1])
-                if memory >= self.CUBE_OPERATOR_MEMORY_SIZE:
-                    self.cube_list.append(operator)
-            elif operator.op_type == "FlashAttentionScore" and "varlen" in operator.op_name.lower():
-                self.fa_list.append(operator)
-            elif operator.op_type in ["AI_VECTOR_CORE", "MIX_AIV"] and operator.task_duration > centi_of_total_duration:
-                self.vector_list.append(operator)
-        if any([self.cube_list, self.fa_list, self.vector_list]):
+        total_duration = sum(float(operator.task_duration) for operator in operator_list)
+        cube_memory_dict = {}
+        vector_type_dict = {}
+        # filter cube operator and fa operator
+        for op in operator_list:
+            shapes = op.input_shapes[1:-1] + "-" + op.output_shapes[1:-1]
+            if op.task_type == "AI_CORE" and "matmul" in op.op_type.lower():
+                cube_memory_dict.setdefault(op.op_name, {}).setdefault(shapes, 0)
+                cube_memory_dict[op.op_name][shapes] += self.memory_size(op)
+            elif op.op_type == "FlashAttentionScore" and "varlen" in op.op_name.lower():
+                self.fa_dict.setdefault(op.op_name, set()).add(shapes)
+            elif op.task_type in ["AI_VECTOR_CORE", "MIX_AIV"]:
+                vector_type_dict.setdefault(op.op_type, set()).add(op)
+
+        # filter cube operator
+        for op_name in cube_memory_dict:
+            for shapes in cube_memory_dict[op_name]:
+                if cube_memory_dict[op_name][shapes] >= self.CUBE_OPERATOR_MEMORY_SIZE_MB:
+                    self.cube_dict.setdefault(op_name, set()).add(shapes)
+
+        # filter vector operator
+        for op_type in vector_type_dict:
+            duration_group_by_time = sum(float(op.task_duration) for op in vector_type_dict[op_type])
+            if (duration_group_by_time / total_duration) >= 0.01 or duration_group_by_time >= 1000000:
+                for op in vector_type_dict[op_type]:
+                    shapes = op.input_shapes[1:-1] + "-" + op.output_shapes[1:-1]
+                    self.vector_dict.setdefault(op.op_name, set()).add(shapes)
+
+        if any([self.cube_dict, self.fa_dict, self.vector_dict]):
             self.ai_core_performance_issues = True
 
+    def memory_size(self, operator):
+        input_shapes = operator.input_shapes[1:-1].split(";")
+        memory = sum(int(shape.split(",")[0]) * int(shape.split(",")[1]) for shape in input_shapes)
+        memory += int(operator.output_shapes[1:-1].split(",")[0]) * int(operator.output_shapes[1:-1].split(",")[1])
+        return memory * 2 / 1024 / 1024
+
     def check_ai_core_performance(self):
         """
         :Param profiling_dataset: dataset of operator performance from kernel_details.csv
@@ -87,13 +108,16 @@ class AICorePerformanceChecker:
         if self.vector_list:
             self.check_vector_operator()
 
-
-
     def check_cube_operator(self):
-        cube_list =  self.cube_list
+        cube_dict = self.cube_dict
         performance_queue = PriorityQueue()
         bound_queue = PriorityQueue()
         affinity_queue = PriorityQueue()
+        for name in cube_dict:
+            cube_list = cube_dict[name]
+            for shape in cube_list:
+                pass
+
         # for operator in cube_list:
 
         pass
@@ -104,8 +128,6 @@ class AICorePerformanceChecker:
     def check_vector_operator(self):
         pass
 
-
-
     def make_record(self, result: OptimizeResult):
         """
         make record for what and how to optimize
-- 
Gitee


From f9959be733173dd6eb26e982d37f8b9f1c4182c7 Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Thu, 16 Jan 2025 16:40:28 +0800
Subject: [PATCH 09/72] =?UTF-8?q?=E7=AE=97=E5=AD=90=E6=80=A7=E8=83=BD?=
 =?UTF-8?q?=E5=88=A4=E6=96=AD=E4=BF=AE=E6=94=B9=EF=BC=8C=E5=A2=9E=E5=8A=A0?=
 =?UTF-8?q?Cube=E7=AE=97=E5=AD=90=E5=92=8CFa=E7=AE=97=E5=AD=90=E7=9A=84?=
 =?UTF-8?q?=E6=80=A7=E8=83=BD=E5=88=A4=E6=96=AD?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../ai_core_performance_analyzer.py           |   6 +-
 .../ai_core_performance_checker.py            | 238 +++++++++++++++---
 2 files changed, 198 insertions(+), 46 deletions(-)

diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py
index eb46b8549..76189af1a 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py
@@ -19,12 +19,8 @@ from profiler.advisor.analyzer.computation.ai_core_performance.ai_core_performan
     AICorePerformanceChecker
 from profiler.advisor.dataset.profiling.profiling_dataset import ProfilingDataset
 from profiler.advisor.result.result import OptimizeResult
-from profiler.advisor.analyzer.computation.ai_core_freq.ai_core_freq_checker import AICoreFreqChecker
 from profiler.advisor.display.html.priority_background_color import PriorityBackgroundColor
 from profiler.advisor.display.html.render import HTMLRender
-from profiler.advisor.dataset.timeline_event_dataset import ComputationAnalysisDataset
-from profiler.advisor.dataset.profiling.device_info import DeviceInfoParser
-from profiler.advisor.config.config import Config
 
 logger = logging.getLogger()
 
@@ -46,7 +42,7 @@ class AICorePerformanceAnalyzer(BaseAnalyzer):
         ai_core_freq_checker.data_filter(self.profiling_dataset)
         if not ai_core_freq_checker.ai_core_performance_issues:
             return self.result
-        ai_core_freq_checker.check_ai_core_performance()
+        ai_core_freq_checker.check_ai_core_performance(self.profiling_dataset)
         ai_core_freq_checker.make_record(self.result)
         self.html = ai_core_freq_checker.make_render(self.html_render,
                                                      add_render_list,
diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index 6716d21a3..f8cccbb72 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -13,16 +13,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import logging
-from queue import PriorityQueue
 
-from profiler.advisor.config.config import Config
 from profiler.advisor.dataset.profiling.profiling_dataset import ProfilingDataset
-from profiler.advisor.dataset.timeline_event_dataset import ComputationAnalysisDataset
 from profiler.advisor.display.prompt.base_prompt import BasePrompt
 from profiler.advisor.result.item import OptimizeItem, OptimizeRecord
 from profiler.advisor.result.result import OptimizeResult
-from profiler.advisor.utils.utils import convert_to_float
-from profiler.prof_common.additional_args_manager import AdditionalArgsManager
+from queue import PriorityQueue
 
 logger = logging.getLogger()
 
@@ -31,14 +27,6 @@ class AICorePerformanceChecker:
     """
         operator performance checker
     """
-    # DECREASE_FREQ_RATIO = 0.05
-    # SHOW_TOPK_OPS = 10
-    # TOTAL_DURATION_INDEX = 2
-    # DECREASE_FREQ_RATIO_INDEX = 3
-    _ITEMS = [
-        "op_name", "op_type", "task_duration", "input_shapes", "input_data_types", "input_formats", "output_shapes",
-        "output_data_types", "output_formats"
-    ]
     _CHECKER = "AICorePerformanceChecker"
     CUBE_OPERATOR_MEMORY_SIZE_MB = 100
 
@@ -47,14 +35,12 @@ class AICorePerformanceChecker:
         self.ai_core_performance_issues = False
         self.desc = ""
         self.suggestions = ""
-        self.decrease_freq_ops = []
-        self.headers = []
-        self.op_freq = None
-        self.rank = None
-        self.stage = None
         self.cube_dict = {}
+        self.cube_list = []
         self.fa_dict = {}
+        self.fa_list = []
         self.vector_dict = {}
+        self.vector_list = []
 
     def data_filter(self, profiling_dataset: ProfilingDataset):
         if not self.check_task_dict(profiling_dataset):
@@ -69,8 +55,12 @@ class AICorePerformanceChecker:
             if op.task_type == "AI_CORE" and "matmul" in op.op_type.lower():
                 cube_memory_dict.setdefault(op.op_name, {}).setdefault(shapes, 0)
                 cube_memory_dict[op.op_name][shapes] += self.memory_size(op)
-            elif op.op_type == "FlashAttentionScore" and "varlen" in op.op_name.lower():
+            elif op.op_type == "FlashAttentionScore":
                 self.fa_dict.setdefault(op.op_name, set()).add(shapes)
+                self.fa_list.append(op)
+            elif op.op_type == "FlashAttentionScoreGrad":
+                self.fa_dict.setdefault(op.op_name, set()).add(shapes + "-grad")
+                self.fa_list.append(op)
             elif op.task_type in ["AI_VECTOR_CORE", "MIX_AIV"]:
                 vector_type_dict.setdefault(op.op_type, set()).add(op)
 
@@ -91,41 +81,207 @@ class AICorePerformanceChecker:
         if any([self.cube_dict, self.fa_dict, self.vector_dict]):
             self.ai_core_performance_issues = True
 
-    def memory_size(self, operator):
+    @staticmethod
+    def memory_size(operator):
         input_shapes = operator.input_shapes[1:-1].split(";")
         memory = sum(int(shape.split(",")[0]) * int(shape.split(",")[1]) for shape in input_shapes)
         memory += int(operator.output_shapes[1:-1].split(",")[0]) * int(operator.output_shapes[1:-1].split(",")[1])
         return memory * 2 / 1024 / 1024
 
-    def check_ai_core_performance(self):
+    def check_ai_core_performance(self, promoting_dataset: ProfilingDataset):
         """
         :Param profiling_dataset: dataset of operator performance from kernel_details.csv
         """
-        if self.cube_list:
-            self.check_cube_operator()
-        if self.fa_list:
-            self.check_fa_operator()
-        if self.vector_list:
-            self.check_vector_operator()
-
-    def check_cube_operator(self):
+        self.result = dict()
+        if self.cube_dict:
+            self.result["cube"] = self.check_cube_operator(promoting_dataset)
+        if self.fa_dict:
+            self.result["fa"] = self.check_fa_operator(promoting_dataset)
+        if self.vector_dict:
+            self.result["vector"] = self.check_vector_operator(promoting_dataset)
+
+    def check_cube_operator(self, profiling_dataset: ProfilingDataset):
+        # todo 未处理ND、NZ格式
         cube_dict = self.cube_dict
-        performance_queue = PriorityQueue()
-        bound_queue = PriorityQueue()
-        affinity_queue = PriorityQueue()
-        for name in cube_dict:
-            cube_list = cube_dict[name]
-            for shape in cube_list:
-                pass
+        optimization_queue = []
+        bound_queue = []
+        affinity_queue = []
+        operator_list = []
+        for op in profiling_dataset.op_summary.op_list:
+            if (op.op_name in cube_dict and
+                    op.input_shapes[1:-1] + "-" + op.output_shapes[1:-1] in cube_dict[op.op_name]):
+                operator_list.append(op)
+        for op in cube_dict:
+            shap_list = []
+            for shape in cube_dict[op]:
+                dtype = None
+                shape_duration = 0.
+                # 判断输入shape内轴是否为256的倍数
+                affinity_flag = (int(shape.split("-")[0].split(";")[0].split(",")[1]) +
+                                 int(shape.split("-")[0].split(";")[1].split(",")[0])) % 256 != 0
+                if affinity_flag:
+                    for operator in operator_list:
+                        if (operator.op_name == op and
+                                operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape):
+                            dtype = operator.input_data_types
+                            shape_duration += float(operator.task_duration)
+                    affinity_queue.append(
+                        {"op_name": op, "shape": shape.split("-")[0], "dtype": dtype, "duration": shape_duration})
+                    continue
+                else:
+                    for operator in operator_list:
+                        if (operator.op_name == op and
+                                operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape):
+                            shap_list.append(operator)
+                            shape_duration += float(operator.task_duration)
+                            dtype = operator.input_data_types
+                    aic_mac_ratio = sum(operator.aic_mac_ratio for operator in shap_list) / len(shap_list)
+                    aic_mte2_ratio = sum(operator.aic_mte2_ratio for operator in shap_list) / len(shap_list)
+                    if (aic_mac_ratio >= 0.8) and aic_mte2_ratio >= 0.95:
+                        bound_queue.append(
+                            {"op_name": op,
+                             "shape": shape.split("-")[0],
+                             "dtype": dtype,
+                             "bound": "mac_and_mte2_bound",
+                             "duration": shape_duration})
+                    elif aic_mac_ratio >= 0.8:
+                        bound_queue.append(
+                            {"op_name": op,
+                             "shape": shape.split("-")[0],
+                             "dtype": dtype,
+                             "bound": "mac_bound",
+                             "duration": shape_duration})
+                    elif aic_mte2_ratio >= 0.95:
+                        bound_queue.append(
+                            {"op_name": op,
+                             "shape": shape.split("-")[0],
+                             "dtype": dtype,
+                             "bound": "mte2_bound",
+                             "duration": shape_duration})
+                    else:
+                        optimization_queue.append(
+                            {"op_name": op,
+                             "shape": shape.split("-")[0],
+                             "dtype": dtype,
+                             "optimization": max(0.8 - aic_mac_ratio, 0.95 - aic_mte2_ratio)})
+        return [sorted(optimization_queue, key=lambda x: x["optimization"], reverse=True)[:5],
+                sorted(bound_queue, key=lambda x: x["duration"], reverse=True)[:5],
+                sorted(affinity_queue, key=lambda x: x["duration"], reverse=True)[:5]]
 
-        # for operator in cube_list:
+    def check_fa_operator(self, profiling_dataset: ProfilingDataset):
+        fa_list = self.fa_list
+        fa_dict = self.fa_dict
+        optimization_queue = []
+        bound_queue = []
+        affinity_queue = []
+        # 不亲和算子筛选
+        for op in fa_dict:
+            for shape in fa_dict[op]:
+                affinity_flag = False
+                shape_duration = 0.
+                dtype = None
+                suggestion = ""
+                if "varlen" in op.lower():
+                    # 处理变长算子 如果不亲和则affinity_flag为False
+                    if shape.split("-")[0].split(";")[0].split(",")[2] % 128 != 0:
+                        affinity_flag = True
+                        suggestion = "D不能被128整除"
+                        for operator in fa_list:
+                            if (operator.op_name == op and
+                                    operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape):
+                                shape_duration += float(operator.task_duration)
+                                dtype = operator.input_data_types
+                else:
+                    # 处理定长算子 如果不亲和则affinity_flag为False
+                    head_dim = 0
+                    seq_len = int(shape.split("-")[1].split(";")[1].split(",")[2])
+                    input_first_tensor = shape.split("-")[0].split(";")[0].split(",")
+                    if len(input_first_tensor) == 3:
+                        head_dim = int(input_first_tensor[2]) / int(shape.split("-")[1].split(";")[0].split(",")[1])
+                    else:
+                        head_dim = int(input_first_tensor[3])
+                    if head_dim % 128 != 0 and seq_len % 128 != 0:
+                        affinity_flag = True
+                        suggestion = "D和S均不能被128整除"
+                    elif head_dim % 128 != 0:
+                        affinity_flag = True
+                        suggestion = "D不能被128整除"
+                    elif seq_len % 128 != 0:
+                        affinity_flag = True
+                        suggestion = "S不能被128整除"
+                    if affinity_flag:
+                        for operator in fa_list:
+                            if (operator.op_name == op and
+                                    operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape):
+                                shape_duration += float(operator.task_duration)
+                                dtype = operator.input_data_types
 
-        pass
+                if affinity_flag:
+                    # 不亲和算子 计算耗时，加入affinity_queue
+                    affinity_queue.append(
+                        {"op_name": op,
+                         "shape": shape.split("-")[0],
+                         "dtype": dtype,
+                         "suggestion": suggestion,
+                         "duration": shape_duration})
+                    continue
+                else:
+                    # 处理bound算子和优化算子
+                    aiv_vec_ratio = 0.
+                    aic_fixpipe_ratio = 0.
+                    aic_mte2_ratio = 0.
+                    bound = ""
+                    optimization = 0.
+                    if len(shape.split("-")) > 2:
+                        for operator in fa_list:
+                            if (operator.op_name == op and
+                                    operator.input_shapes[1:-1] + "-" + operator.output_shapes[
+                                                                        1:-1] + "-grad" == shape):
+                                aic_fixpipe_ratio += float(operator.aic_fixpipe_ratio)
+                                aic_mte2_ratio += float(operator.aic_mte2_ratio)
+                                shape_duration += float(operator.task_duration)
+                        if aic_mte2_ratio >= 0.8 and aic_fixpipe_ratio >= 0.75:
+                            bound = "mte2_and_fixpipe_bound"
+                        elif aic_mte2_ratio >= 0.8:
+                            bound = "mte2_bound"
+                        elif aiv_vec_ratio >= 0.75:
+                            bound = "vec_bound"
+                        else:
+                            optimization = max(0.8 - aic_mte2_ratio, 0.75 - aiv_vec_ratio)
+                    else:
+                        for operator in fa_list:
+                            if (operator.op_name == op and
+                                    operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape):
+                                aiv_vec_ratio += float(operator.aic_vec_ratio)
+                                aic_mte2_ratio += float(operator.aic_mte2_ratio)
+                                shape_duration += float(operator.task_duration)
+                        if aic_mte2_ratio >= 0.8 and aiv_vec_ratio >= 0.75:
+                            bound = "mte2_and_vec_bound"
+                        elif aic_mte2_ratio >= 0.8:
+                            bound = "mte2_bound"
+                        elif aiv_vec_ratio >= 0.75:
+                            bound = "vec_bound"
+                        else:
+                            optimization = max(0.8 - aic_mte2_ratio, 0.75 - aiv_vec_ratio)
+                    if bound:
+                        bound_queue.append(
+                            {"op_name": op,
+                             "shape": shape.split("-")[0],
+                             "dtype": dtype,
+                             "bound": bound,
+                             "duration": shape_duration})
+                    else:
+                        optimization_queue.append(
+                            {"op_name": op,
+                             "shape": shape.split("-")[0],
+                             "dtype": dtype,
+                             "optimization": optimization})
 
-    def check_fa_operator(self):
-        pass
+        return [sorted(optimization_queue, key=lambda x: x["optimization"], reverse=True)[:5],
+                sorted(bound_queue, key=lambda x: x["duration"], reverse=True)[:5],
+                sorted(affinity_queue, key=lambda x: x["duration"], reverse=True)[:5]]
 
-    def check_vector_operator(self):
+    def check_vector_operator(self, profiling_dataset: ProfilingDataset):
         pass
 
     def make_record(self, result: OptimizeResult):
-- 
Gitee


From 705dbad637d1e8706e46ee1aad5626ccf26ed00d Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Thu, 16 Jan 2025 17:18:44 +0800
Subject: [PATCH 10/72] =?UTF-8?q?=E7=AE=97=E5=AD=90=E6=80=A7=E8=83=BD?=
 =?UTF-8?q?=E5=88=A4=E6=96=AD=E4=BF=AE=E6=94=B9=EF=BC=8C=E5=A2=9E=E5=8A=A0?=
 =?UTF-8?q?Cube=E7=AE=97=E5=AD=90=E3=80=81Fa=E7=AE=97=E5=AD=90=E5=92=8CVec?=
 =?UTF-8?q?tor=E7=AE=97=E5=AD=90=E7=9A=84=E6=80=A7=E8=83=BD=E5=88=A4?=
 =?UTF-8?q?=E6=96=AD?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../ai_core_performance_checker.py            | 54 ++++++++++++++++++-
 1 file changed, 53 insertions(+), 1 deletion(-)

diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index f8cccbb72..ea13cbaa7 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -18,7 +18,6 @@ from profiler.advisor.dataset.profiling.profiling_dataset import ProfilingDatase
 from profiler.advisor.display.prompt.base_prompt import BasePrompt
 from profiler.advisor.result.item import OptimizeItem, OptimizeRecord
 from profiler.advisor.result.result import OptimizeResult
-from queue import PriorityQueue
 
 logger = logging.getLogger()
 
@@ -282,6 +281,59 @@ class AICorePerformanceChecker:
                 sorted(affinity_queue, key=lambda x: x["duration"], reverse=True)[:5]]
 
     def check_vector_operator(self, profiling_dataset: ProfilingDataset):
+        vector_dict = self.vector_dict
+        vector_list = []
+        optimization_queue = []
+        bound_queue = []
+        for op_name in vector_dict:
+            for shape in vector_dict[op_name]:
+                for operator in profiling_dataset.op_summary.op_list:
+                    if (operator.op_name == op_name and
+                            operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape):
+                        vector_list.append(operator)
+        for op_name in vector_dict:
+            for shape in vector_dict[op_name]:
+                aiv_vec_ratio = 0.
+                aiv_met2_ratio = 0.
+                aiv_met3_ratio = 0.
+                bound = ""
+                shape_duration = 0.
+                optimization = 0.
+                dtype = ""
+                for operator in vector_list:
+                    if (operator.op_name == op_name and
+                            operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape):
+                        aiv_vec_ratio += float(operator.aiv_vec_ratio)
+                        aiv_met2_ratio += float(operator.aiv_met2_ratio)
+                        aiv_met3_ratio += float(operator.aiv_met3_ratio)
+                        shape_duration += float(operator.task_duration)
+                        dtype = operator.input_data_types
+                if aiv_vec_ratio + aiv_met2_ratio + aiv_met3_ratio >= 0.9:
+                    bound = "vec_met2_met3_bound"
+                elif aiv_met2_ratio >= 0.7:
+                    bound = "met2_bound"
+                elif aiv_met3_ratio >= 0.7:
+                    bound = "met3_bound"
+                elif aiv_vec_ratio >= 0.7:
+                    bound = "vec_bound"
+                else:
+                    optimization = max(0.7 - aiv_vec_ratio, 0.7 - aiv_met2_ratio, 0.7 - aiv_met3_ratio)
+                if bound:
+                    bound_queue.append(
+                        {"op_name": op_name,
+                         "shape": shape,
+                         "bound": bound,
+                         "dtype": dtype,
+                         "duration": shape_duration})
+                else:
+                    optimization_queue.append(
+                        {"op_name": op_name,
+                         "shape": shape,
+                         "dtype": dtype,
+                         "optimization": optimization})
+        return [sorted(optimization_queue, key=lambda x: x["optimization"], reverse=True)[:5],
+                sorted(bound_queue, key=lambda x: x["duration"], reverse=True)[:5]]
+
         pass
 
     def make_record(self, result: OptimizeResult):
-- 
Gitee


From b54252163619871b550c9bb098092e87cb68df17 Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Fri, 17 Jan 2025 14:27:45 +0800
Subject: [PATCH 11/72] =?UTF-8?q?checker=E4=BB=A3=E7=A0=81=E8=B0=83?=
 =?UTF-8?q?=E6=95=B4?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../ai_core_performance_checker.py            | 30 +++++++++----------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index ea13cbaa7..883891b13 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -134,8 +134,8 @@ class AICorePerformanceChecker:
                             shap_list.append(operator)
                             shape_duration += float(operator.task_duration)
                             dtype = operator.input_data_types
-                    aic_mac_ratio = sum(operator.aic_mac_ratio for operator in shap_list) / len(shap_list)
-                    aic_mte2_ratio = sum(operator.aic_mte2_ratio for operator in shap_list) / len(shap_list)
+                    aic_mac_ratio = sum(float(operator.aic_mac_ratio) for operator in shap_list) / len(shap_list)
+                    aic_mte2_ratio = sum(float(operator.aic_mte2_ratio) for operator in shap_list) / len(shap_list)
                     if (aic_mac_ratio >= 0.8) and aic_mte2_ratio >= 0.95:
                         bound_queue.append(
                             {"op_name": op,
@@ -251,7 +251,7 @@ class AICorePerformanceChecker:
                         for operator in fa_list:
                             if (operator.op_name == op and
                                     operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape):
-                                aiv_vec_ratio += float(operator.aic_vec_ratio)
+                                aiv_vec_ratio += float(operator.aiv_vec_ratio)
                                 aic_mte2_ratio += float(operator.aic_mte2_ratio)
                                 shape_duration += float(operator.task_duration)
                         if aic_mte2_ratio >= 0.8 and aiv_vec_ratio >= 0.75:
@@ -294,8 +294,8 @@ class AICorePerformanceChecker:
         for op_name in vector_dict:
             for shape in vector_dict[op_name]:
                 aiv_vec_ratio = 0.
-                aiv_met2_ratio = 0.
-                aiv_met3_ratio = 0.
+                aiv_mte2_ratio = 0.
+                aiv_mte3_ratio = 0.
                 bound = ""
                 shape_duration = 0.
                 optimization = 0.
@@ -304,20 +304,20 @@ class AICorePerformanceChecker:
                     if (operator.op_name == op_name and
                             operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape):
                         aiv_vec_ratio += float(operator.aiv_vec_ratio)
-                        aiv_met2_ratio += float(operator.aiv_met2_ratio)
-                        aiv_met3_ratio += float(operator.aiv_met3_ratio)
+                        aiv_mte2_ratio += float(operator.aiv_mte2_ratio)
+                        aiv_mte3_ratio += float(operator.aiv_mte3_ratio)
                         shape_duration += float(operator.task_duration)
                         dtype = operator.input_data_types
-                if aiv_vec_ratio + aiv_met2_ratio + aiv_met3_ratio >= 0.9:
-                    bound = "vec_met2_met3_bound"
-                elif aiv_met2_ratio >= 0.7:
-                    bound = "met2_bound"
-                elif aiv_met3_ratio >= 0.7:
-                    bound = "met3_bound"
+                if aiv_vec_ratio + aiv_mte2_ratio + aiv_mte3_ratio >= 0.9:
+                    bound = "vec_mte2_mte3_bound"
+                elif aiv_mte2_ratio >= 0.7:
+                    bound = "mte2_bound"
+                elif aiv_mte3_ratio >= 0.7:
+                    bound = "mte3_bound"
                 elif aiv_vec_ratio >= 0.7:
                     bound = "vec_bound"
                 else:
-                    optimization = max(0.7 - aiv_vec_ratio, 0.7 - aiv_met2_ratio, 0.7 - aiv_met3_ratio)
+                    optimization = max(0.7 - aiv_vec_ratio, 0.7 - aiv_mte2_ratio, 0.7 - aiv_mte3_ratio)
                 if bound:
                     bound_queue.append(
                         {"op_name": op_name,
@@ -393,7 +393,7 @@ class AICorePerformanceChecker:
         if not hasattr(profiling_dataset, "op_summary"):
             logger.warning("Skip %s checker because of not containing %s", self._CHECKER, "op summary")
             return False
-        if not hasattr(profiling_dataset.op_summary, "task_dict") or hasattr(profiling_dataset.op_summary, "op_list"):
+        if not hasattr(profiling_dataset.op_summary, "task_dict") or not hasattr(profiling_dataset.op_summary, "op_list"):
             logger.warning("Skip %s checker because of not containing %s", self._CHECKER, "op summary")
             return False
         return True
-- 
Gitee


From 0a70bd67befc94102d84f3f9efbc546018307c3c Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Fri, 17 Jan 2025 16:09:44 +0800
Subject: [PATCH 12/72] =?UTF-8?q?checker=E4=BB=A3=E7=A0=81=E8=B0=83?=
 =?UTF-8?q?=E6=95=B4?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../ai_core_performance_checker.py            | 155 +++++++++---------
 1 file changed, 75 insertions(+), 80 deletions(-)

diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index 883891b13..97952f229 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -105,13 +105,10 @@ class AICorePerformanceChecker:
         optimization_queue = []
         bound_queue = []
         affinity_queue = []
-        operator_list = []
-        for op in profiling_dataset.op_summary.op_list:
-            if (op.op_name in cube_dict and
-                    op.input_shapes[1:-1] + "-" + op.output_shapes[1:-1] in cube_dict[op.op_name]):
-                operator_list.append(op)
+        operator_list = [op for op in profiling_dataset.op_summary.op_list
+                         if op.op_name in cube_dict
+                         and op.input_shapes[1:-1] + "-" + op.output_shapes[1:-1] in cube_dict[op.op_name]]
         for op in cube_dict:
-            shap_list = []
             for shape in cube_dict[op]:
                 dtype = None
                 shape_duration = 0.
@@ -124,45 +121,47 @@ class AICorePerformanceChecker:
                                 operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape):
                             dtype = operator.input_data_types
                             shape_duration += float(operator.task_duration)
-                    affinity_queue.append(
-                        {"op_name": op, "shape": shape.split("-")[0], "dtype": dtype, "duration": shape_duration})
+                    affinity_queue.append({
+                        "op_name": op,
+                        "shape": shape.split("-")[0],
+                        "dtype": dtype,
+                        "duration": shape_duration})
                     continue
                 else:
-                    for operator in operator_list:
-                        if (operator.op_name == op and
-                                operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape):
-                            shap_list.append(operator)
-                            shape_duration += float(operator.task_duration)
-                            dtype = operator.input_data_types
+                    shap_list = [operator for operator in operator_list if
+                                 operator.op_name == op and
+                                 operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape]
+                    shape_duration = sum(float(operator.task_duration) for operator in shap_list)
+                    dtype = shap_list[0].input_data_types if shap_list else None
                     aic_mac_ratio = sum(float(operator.aic_mac_ratio) for operator in shap_list) / len(shap_list)
                     aic_mte2_ratio = sum(float(operator.aic_mte2_ratio) for operator in shap_list) / len(shap_list)
-                    if (aic_mac_ratio >= 0.8) and aic_mte2_ratio >= 0.95:
-                        bound_queue.append(
-                            {"op_name": op,
-                             "shape": shape.split("-")[0],
-                             "dtype": dtype,
-                             "bound": "mac_and_mte2_bound",
-                             "duration": shape_duration})
+                    if aic_mac_ratio >= 0.8 and aic_mte2_ratio >= 0.95:
+                        bound_queue.append({
+                            "op_name": op,
+                            "shape": shape.split("-")[0],
+                            "dtype": dtype,
+                            "bound": "mac_and_mte2_bound",
+                            "duration": shape_duration})
                     elif aic_mac_ratio >= 0.8:
-                        bound_queue.append(
-                            {"op_name": op,
-                             "shape": shape.split("-")[0],
-                             "dtype": dtype,
-                             "bound": "mac_bound",
-                             "duration": shape_duration})
+                        bound_queue.append({
+                            "op_name": op,
+                            "shape": shape.split("-")[0],
+                            "dtype": dtype,
+                            "bound": "mac_bound",
+                            "duration": shape_duration})
                     elif aic_mte2_ratio >= 0.95:
-                        bound_queue.append(
-                            {"op_name": op,
-                             "shape": shape.split("-")[0],
-                             "dtype": dtype,
-                             "bound": "mte2_bound",
-                             "duration": shape_duration})
+                        bound_queue.append({
+                            "op_name": op,
+                            "shape": shape.split("-")[0],
+                            "dtype": dtype,
+                            "bound": "mte2_bound",
+                            "duration": shape_duration})
                     else:
-                        optimization_queue.append(
-                            {"op_name": op,
-                             "shape": shape.split("-")[0],
-                             "dtype": dtype,
-                             "optimization": max(0.8 - aic_mac_ratio, 0.95 - aic_mte2_ratio)})
+                        optimization_queue.append({
+                            "op_name": op,
+                            "shape": shape.split("-")[0],
+                            "dtype": dtype,
+                            "optimization": max(0.8 - aic_mac_ratio, 0.95 - aic_mte2_ratio)})
         return [sorted(optimization_queue, key=lambda x: x["optimization"], reverse=True)[:5],
                 sorted(bound_queue, key=lambda x: x["duration"], reverse=True)[:5],
                 sorted(affinity_queue, key=lambda x: x["duration"], reverse=True)[:5]]
@@ -211,34 +210,33 @@ class AICorePerformanceChecker:
                     if affinity_flag:
                         for operator in fa_list:
                             if (operator.op_name == op and
-                                    operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape):
+                                    operator.input_shapes[1:-1] + "-" +
+                                    operator.output_shapes[1:-1] == shape):
                                 shape_duration += float(operator.task_duration)
                                 dtype = operator.input_data_types
 
                 if affinity_flag:
                     # 不亲和算子 计算耗时，加入affinity_queue
-                    affinity_queue.append(
-                        {"op_name": op,
-                         "shape": shape.split("-")[0],
-                         "dtype": dtype,
-                         "suggestion": suggestion,
-                         "duration": shape_duration})
+                    affinity_queue.append({
+                        "op_name": op,
+                        "shape": shape.split("-")[0],
+                        "dtype": dtype,
+                        "suggestion": suggestion,
+                        "duration": shape_duration})
                     continue
                 else:
                     # 处理bound算子和优化算子
-                    aiv_vec_ratio = 0.
-                    aic_fixpipe_ratio = 0.
-                    aic_mte2_ratio = 0.
+                    aiv_vec_ratio, aic_fixpipe_ratio, aic_mte2_ratio, optimization = 0., 0., 0., 0.
                     bound = ""
-                    optimization = 0.
                     if len(shape.split("-")) > 2:
                         for operator in fa_list:
                             if (operator.op_name == op and
-                                    operator.input_shapes[1:-1] + "-" + operator.output_shapes[
-                                                                        1:-1] + "-grad" == shape):
+                                    operator.input_shapes[1:-1] + "-" +
+                                    operator.output_shapes[1:-1] + "-grad" == shape):
                                 aic_fixpipe_ratio += float(operator.aic_fixpipe_ratio)
                                 aic_mte2_ratio += float(operator.aic_mte2_ratio)
                                 shape_duration += float(operator.task_duration)
+                                dtype = operator.input_data_types
                         if aic_mte2_ratio >= 0.8 and aic_fixpipe_ratio >= 0.75:
                             bound = "mte2_and_fixpipe_bound"
                         elif aic_mte2_ratio >= 0.8:
@@ -263,18 +261,18 @@ class AICorePerformanceChecker:
                         else:
                             optimization = max(0.8 - aic_mte2_ratio, 0.75 - aiv_vec_ratio)
                     if bound:
-                        bound_queue.append(
-                            {"op_name": op,
-                             "shape": shape.split("-")[0],
-                             "dtype": dtype,
-                             "bound": bound,
-                             "duration": shape_duration})
+                        bound_queue.append({
+                            "op_name": op,
+                            "shape": shape.split("-")[0],
+                            "dtype": dtype,
+                            "bound": bound,
+                            "duration": shape_duration})
                     else:
-                        optimization_queue.append(
-                            {"op_name": op,
-                             "shape": shape.split("-")[0],
-                             "dtype": dtype,
-                             "optimization": optimization})
+                        optimization_queue.append({
+                            "op_name": op,
+                            "shape": shape.split("-")[0],
+                            "dtype": dtype,
+                            "optimization": optimization})
 
         return [sorted(optimization_queue, key=lambda x: x["optimization"], reverse=True)[:5],
                 sorted(bound_queue, key=lambda x: x["duration"], reverse=True)[:5],
@@ -285,21 +283,17 @@ class AICorePerformanceChecker:
         vector_list = []
         optimization_queue = []
         bound_queue = []
+        vector_list.extend(
+            operator for op_name in vector_dict
+            for shape in vector_dict[op_name]
+            for operator in profiling_dataset.op_summary.op_list
+            if operator.op_name == op_name
+            and operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape
+        )
         for op_name in vector_dict:
             for shape in vector_dict[op_name]:
-                for operator in profiling_dataset.op_summary.op_list:
-                    if (operator.op_name == op_name and
-                            operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape):
-                        vector_list.append(operator)
-        for op_name in vector_dict:
-            for shape in vector_dict[op_name]:
-                aiv_vec_ratio = 0.
-                aiv_mte2_ratio = 0.
-                aiv_mte3_ratio = 0.
-                bound = ""
-                shape_duration = 0.
-                optimization = 0.
-                dtype = ""
+                aiv_vec_ratio, aiv_mte2_ratio, aiv_mte3_ratio, shape_duration, optimization = 0., 0., 0., 0., 0.
+                bound, dtype = "", ""
                 for operator in vector_list:
                     if (operator.op_name == op_name and
                             operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape):
@@ -319,15 +313,15 @@ class AICorePerformanceChecker:
                 else:
                     optimization = max(0.7 - aiv_vec_ratio, 0.7 - aiv_mte2_ratio, 0.7 - aiv_mte3_ratio)
                 if bound:
-                    bound_queue.append(
-                        {"op_name": op_name,
+                    bound_queue.append({
+                         "op_name": op_name,
                          "shape": shape,
                          "bound": bound,
                          "dtype": dtype,
                          "duration": shape_duration})
                 else:
-                    optimization_queue.append(
-                        {"op_name": op_name,
+                    optimization_queue.append({
+                         "op_name": op_name,
                          "shape": shape,
                          "dtype": dtype,
                          "optimization": optimization})
@@ -393,7 +387,8 @@ class AICorePerformanceChecker:
         if not hasattr(profiling_dataset, "op_summary"):
             logger.warning("Skip %s checker because of not containing %s", self._CHECKER, "op summary")
             return False
-        if not hasattr(profiling_dataset.op_summary, "task_dict") or not hasattr(profiling_dataset.op_summary, "op_list"):
+        if not hasattr(profiling_dataset.op_summary, "task_dict") or not hasattr(profiling_dataset.op_summary,
+                                                                                 "op_list"):
             logger.warning("Skip %s checker because of not containing %s", self._CHECKER, "op summary")
             return False
         return True
-- 
Gitee


From 4b7fc2b0ab497f2bbcda77062dae086d54c7fd36 Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Fri, 17 Jan 2025 17:26:01 +0800
Subject: [PATCH 13/72] =?UTF-8?q?checker=E4=BB=A3=E7=A0=81=E8=B0=83?=
 =?UTF-8?q?=E6=95=B4?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../ai_core_performance/ai_core_performance_checker.py         | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index 97952f229..26c3994db 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -31,11 +31,11 @@ class AICorePerformanceChecker:
 
     def __init__(self):
 
+        self.result = dict()
         self.ai_core_performance_issues = False
         self.desc = ""
         self.suggestions = ""
         self.cube_dict = {}
-        self.cube_list = []
         self.fa_dict = {}
         self.fa_list = []
         self.vector_dict = {}
@@ -91,7 +91,6 @@ class AICorePerformanceChecker:
         """
         :Param profiling_dataset: dataset of operator performance from kernel_details.csv
         """
-        self.result = dict()
         if self.cube_dict:
             self.result["cube"] = self.check_cube_operator(promoting_dataset)
         if self.fa_dict:
-- 
Gitee


From b8444dea415f1543534ffd099dfc4fb2293ca17a Mon Sep 17 00:00:00 2001
From: kiritorl <ruxinglong@huawei.com>
Date: Sat, 18 Jan 2025 18:16:55 +0800
Subject: [PATCH 14/72] =?UTF-8?q?=E9=80=82=E9=85=8D=E8=A1=A8=E6=A0=BC?=
 =?UTF-8?q?=E5=92=8Chtml=E9=A1=B5=E9=9D=A2=E5=B1=95=E7=A4=BA?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../ai_core_performance_analyzer.py           |  12 +-
 .../ai_core_performance_checker.py            | 132 +++++++++++-----
 .../html/templates/ai_core_performance.html   | 141 +++++++++++++-----
 .../advisor/rules/cn/aicore_performance.yaml  | 115 +-------------
 .../advisor/rules/en/aicore_performance.yaml  |   6 +
 profiler/cli/entrance.py                      |   4 +-
 6 files changed, 224 insertions(+), 186 deletions(-)
 create mode 100644 profiler/advisor/rules/en/aicore_performance.yaml

diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py
index 76189af1a..03b0a8c6e 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py
@@ -38,13 +38,13 @@ class AICorePerformanceAnalyzer(BaseAnalyzer):
 
     def optimize(self, **kwargs):
         add_render_list = kwargs.get("add_render_list", True)
-        ai_core_freq_checker = AICorePerformanceChecker()
-        ai_core_freq_checker.data_filter(self.profiling_dataset)
-        if not ai_core_freq_checker.ai_core_performance_issues:
+        ai_core_perf_checker = AICorePerformanceChecker()
+        ai_core_perf_checker.data_filter(self.profiling_dataset)
+        if not ai_core_perf_checker.ai_core_performance_issues:
             return self.result
-        ai_core_freq_checker.check_ai_core_performance(self.profiling_dataset)
-        ai_core_freq_checker.make_record(self.result)
-        self.html = ai_core_freq_checker.make_render(self.html_render,
+        ai_core_perf_checker.check_ai_core_performance(self.profiling_dataset)
+        ai_core_perf_checker.make_record(self.result)
+        self.html = ai_core_perf_checker.make_render(self.html_render,
                                                      add_render_list,
                                                      priority=self.get_priority(),
                                                      rank=kwargs.get("rank"))
diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index 883891b13..8b0c9c224 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -13,11 +13,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import logging
+import os
 
 from profiler.advisor.dataset.profiling.profiling_dataset import ProfilingDataset
 from profiler.advisor.display.prompt.base_prompt import BasePrompt
 from profiler.advisor.result.item import OptimizeItem, OptimizeRecord
 from profiler.advisor.result.result import OptimizeResult
+from profiler.prof_common.additional_args_manager import AdditionalArgsManager
+from profiler.prof_common.file_manager import FileManager
 
 logger = logging.getLogger()
 
@@ -40,6 +43,27 @@ class AICorePerformanceChecker:
         self.fa_list = []
         self.vector_dict = {}
         self.vector_list = []
+        self.load_aicore_perf_rules()
+
+    def load_aicore_perf_rules(self):
+        language = AdditionalArgsManager().language
+        rule_path = os.path.join(
+            os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))),
+            "rules",
+            language,
+            "aicore_performance.yaml"
+        )
+
+        if not os.path.exists(rule_path):
+            logger.warning("Skip analyze aicpu issues, because %s does not exist.", rule_path)
+
+        self.aicore_rules = FileManager.read_yaml_file(rule_path)
+        self._PROBLEM = self.aicore_rules.get("problem")
+        self.desc = self.aicore_rules.get("description")
+        self.suggestion = self.aicore_rules.get("suggestion")
+        self._AFFINITY_SUGGESTION = self.aicore_rules.get("affinity_suggestion")
+        self._BOUND_SUGGESTION = self.aicore_rules.get("bound_suggestion")
+        self._OPTI_SUGGESTION = self.aicore_rules.get("optimization_suggestion")
 
     def data_filter(self, profiling_dataset: ProfilingDataset):
         if not self.check_task_dict(profiling_dataset):
@@ -83,6 +107,9 @@ class AICorePerformanceChecker:
     @staticmethod
     def memory_size(operator):
         input_shapes = operator.input_shapes[1:-1].split(";")
+        # todo batchmatmul
+        if len(input_shapes) > 2:
+            return 1
         memory = sum(int(shape.split(",")[0]) * int(shape.split(",")[1]) for shape in input_shapes)
         memory += int(operator.output_shapes[1:-1].split(",")[0]) * int(operator.output_shapes[1:-1].split(",")[1])
         return memory * 2 / 1024 / 1024
@@ -106,6 +133,7 @@ class AICorePerformanceChecker:
         bound_queue = []
         affinity_queue = []
         operator_list = []
+        suggestion = "内轴无法被256整除"
         for op in profiling_dataset.op_summary.op_list:
             if (op.op_name in cube_dict and
                     op.input_shapes[1:-1] + "-" + op.output_shapes[1:-1] in cube_dict[op.op_name]):
@@ -125,7 +153,7 @@ class AICorePerformanceChecker:
                             dtype = operator.input_data_types
                             shape_duration += float(operator.task_duration)
                     affinity_queue.append(
-                        {"op_name": op, "shape": shape.split("-")[0], "dtype": dtype, "duration": shape_duration})
+                        {"op_name": op, "shape": shape.split("-")[0], "dtype": dtype, "duration": shape_duration, "suggestion": suggestion})
                     continue
                 else:
                     for operator in operator_list:
@@ -340,51 +368,85 @@ class AICorePerformanceChecker:
         """
         make record for what and how to optimize
         """
-        if not self.ai_core_freq_issues:
-            return self.ai_core_freq_issues
-
-        prompt_class = BasePrompt.get_prompt_class(self.__class__.__name__)
-
-        problem = prompt_class.PROBLEM
-        if self.rank is not None:
-            problem += prompt_class.RANK_ID.format(self.rank)
-
-        self.desc = prompt_class.DESCRIPTION.format(len(self.decrease_freq_ops), self.DECREASE_FREQ_RATIO)
-        if self.rank:
-            self.desc = prompt_class.RANK_DESCRIPTION.format(self.rank) + self.desc.lower()
+        if not self.ai_core_performance_issues:
+            return self.ai_core_performance_issues
 
-        optimization_item = OptimizeItem(problem, self.desc, [prompt_class.SUGGESTION])
-        result.add(OptimizeRecord(optimization_item))
+        cube_problem = "Cube算子性能分析"
+        fa_problem = "FA算子性能分析"
+        vector_problem = "Vector算子性能分析"
+        sugg_keys = ['opti', 'bound', 'affinity']
+        cube_desc = dict.fromkeys(sugg_keys, "")
+        fa_desc = dict.fromkeys(sugg_keys, "")
+        vector_desc = dict.fromkeys(sugg_keys, "")
+        if self.result["cube"]:
+            optimization_item = OptimizeItem(cube_problem, self.desc, [self.suggestion])
+            result.add(OptimizeRecord(optimization_item))
+            headers = [
+                "Type",
+                "Description and Suggestion",
+            ]
+            result.add_detail(cube_problem, headers=headers)
+            for cube_opti_issue in self.result["cube"][0]:
+                opti_sugg = self._OPTI_SUGGESTION.format(**cube_opti_issue)
+                cube_desc["opti"] += opti_sugg
+            result.add_detail(cube_problem, detail=["性能优化算子集合", cube_desc["opti"]])
+            for cube_bound_issue in self.result["cube"][1]:
+                bound_sugg = self._BOUND_SUGGESTION.format(**cube_bound_issue)
+                cube_desc["bound"] += bound_sugg
+            result.add_detail(cube_problem, detail=["bound算子集合", cube_desc["bound"]])
+            for cube_affinity_issue in self.result["cube"][2]:
+                affinity_sugg = self._AFFINITY_SUGGESTION.format(**cube_affinity_issue)
+                cube_desc["affinity"] += affinity_sugg
+            result.add_detail(cube_problem, detail=["不亲和算子集合", cube_desc["affinity"]])
 
-        self.headers = [
-            "Operator name",
-            "Count",
-            "Total duration(us)",
-            "AI CORE frequency decreased ratio",
-            "Average frequency",
-            "Max frequency",
-            "Min frequency",
-        ]
-        result.add_detail(problem, headers=self.headers)
+        if self.result["fa"]:
+            optimization_item = OptimizeItem(cube_problem, self.desc, [self.suggestion])
+            result.add(OptimizeRecord(optimization_item))
+            headers = [
+                "Type",
+                "Description and Suggestion",
+            ]
+            result.add_detail(fa_problem, headers=headers)
+            for fa_opti_issue in self.result["fa"][0]:
+                opti_sugg = self._OPTI_SUGGESTION.format(**fa_opti_issue)
+                fa_desc["opti"] += opti_sugg
+            result.add_detail(fa_problem, detail=["性能优化算子集合", fa_desc["opti"]])
+            for fa_bound_issue in self.result["fa"][1]:
+                bound_sugg = self._BOUND_SUGGESTION.format(**fa_bound_issue)
+                fa_desc["bound"] += bound_sugg
+            result.add_detail(fa_problem, detail=["bound算子集合", fa_desc["bound"]])
+            for fa_affinity_issue in self.result["fa"][2]:
+                affinity_sugg = self._AFFINITY_SUGGESTION.format(**fa_affinity_issue)
+                fa_desc["affinity"] += affinity_sugg
+            result.add_detail(fa_problem, detail=["不亲和算子集合", fa_desc["affinity"]])
 
-        for row in self.decrease_freq_ops:
-            result.add_detail(problem, detail=row)
+        if self.result["vector"]:
+            optimization_item = OptimizeItem(vector_problem, self.desc, [self.suggestion])
+            result.add(OptimizeRecord(optimization_item))
+            headers = [
+                "Type",
+                "Description and Suggestion",
+            ]
+            result.add_detail(vector_problem, headers=headers)
+            for vector_opti_issue in self.result["vector"][0]:
+                opti_sugg = self._OPTI_SUGGESTION.format(**vector_opti_issue)
+                vector_desc["opti"] += opti_sugg
+            result.add_detail(vector_problem, detail=["性能优化算子集合", vector_desc["opti"]])
+            for vector_bound_issue in self.result["vector"][1]:
+                bound_sugg = self._BOUND_SUGGESTION.format(**vector_bound_issue)
+                vector_desc["bound"] += bound_sugg
+            result.add_detail(vector_problem, detail=["bound算子集合", vector_desc["bound"]])
         return True
 
     def make_render(self, html_render, add_render_list=True, **kwargs):
-        if not self.ai_core_freq_issues:
-            return self.ai_core_freq_issues
+        if not self.ai_core_performance_issues:
+            return self.ai_core_performance_issues
 
         priority = kwargs.get("priority")
-        if self.SHOW_TOPK_OPS:
-            self.desc += f" Only show {self.SHOW_TOPK_OPS} operators here, see latest mstt_advisor.xlsx for details."
         return html_render.render_template(key="computation",
                                            template_dir="templates",
                                            template_name="ai_core_performance.html",
-                                           desc=self.desc,
-                                           suggestion=self.suggestions,
-                                           headers=self.headers,
-                                           data=self.decrease_freq_ops[:self.SHOW_TOPK_OPS],
+                                           format_result=self.result,
                                            add_render_list=add_render_list,
                                            priority_background_color=priority,
                                            rank=kwargs.get("rank"))
diff --git a/profiler/advisor/display/html/templates/ai_core_performance.html b/profiler/advisor/display/html/templates/ai_core_performance.html
index a009f073d..7feb3e768 100644
--- a/profiler/advisor/display/html/templates/ai_core_performance.html
+++ b/profiler/advisor/display/html/templates/ai_core_performance.html
@@ -1,62 +1,135 @@
-{% if data|length > 0 %}
+{% if format_result|length > 0 %}
 <div class="collapsible">
-    <h2 class="collapsible-header" style="background-color: {{ priority_background_color }};">AICORE Performance
-        Analysis</h2>
+    <h2 class="collapsible-header" style="background-color: {{ priority_background_color }};">AI CORE Performance Analysis</h2>
     <div class="collapsible-content">
-        <a style="font-weight: bold" id="timeline_api_instruction">MatMul. </a>
-        <br><br>
+        {% if format_result.cube is not none %}
+        <a style="font-weight: bold" id="timeline_api_instruction">MatMul算子相关分析，参考如下：</a>
+        <br>
         <table>
             <tr>
-                {% for header in headers %}
-                <th> {{ header }}</th>
-                {% endfor %}
+                <th>类别</th>
+                <th>描述及建议</th>
             </tr>
-
-            {% for row in data %}
+            {% set opti_ns = namespace(total_opti='') %}
+            {% for opti in format_result.cube[0] %}
+            {% if not loop.first %}
+            {% set opti_ns.total_opti = opti_ns.total_opti ~ "<br>" ~ opti.op_name ~ "算子 shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ "参考性能优化空间: " ~ opti.optimization %}
+            {% else %}
+            {% set opti_ns.total_opti = opti.op_name ~ "算子 shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ "参考性能优化空间: " ~ opti.optimization %}
+            {% endif %}
+            {% endfor %}
+            <tr>
+                <td>性能优化算子集合</td>
+                <td>{{ opti_ns.total_opti | safe }}</td>
+            </tr>
+            {% set bound_ns = namespace(total_bound='') %}
+            {% for bound in format_result.cube[1] %}
+            {% if not loop.first %}
+            {% set bound_ns.total_bound = bound_ns.total_bound ~ "<br>" ~ bound.op_name ~ "算子 shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ "bound类型为: " ~ bound.bound %}
+            {% else %}
+            {% set bound_ns.total_bound = bound.op_name ~ "算子 shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ "bound类型为: " ~ bound.bound %}
+            {% endif %}
+            {% endfor %}
             <tr>
-                {% for element in row %}
-                <td>{{ element|safe }}</td>
-                {% endfor %}
+                <td>bound算子集合</td>
+                <td>{{ bound_ns.total_bound | safe }}</td>
             </tr>
+            {% set affinity_ns = namespace(total_affinity='') %}
+            {% for affinity in format_result.cube[2] %}
+            {% if not loop.first %}
+            {% set affinity_ns.total_affinity = affinity_ns.total_affinity ~ "<br>" ~ affinity.op_name ~ "算子 shape: " ~ affinity.shape ~ " dtype: " ~ affinity.dtype ~ "不亲和类型为: " ~ affinity.suggestion %}
+            {% else %}
+            {% set affinity_ns.total_affinity = affinity.op_name ~ "算子 shape: " ~ affinity.shape ~ " dtype: " ~ affinity.dtype ~ "不亲和类型为: " ~ affinity.suggestion %}
+            {% endif %}
             {% endfor %}
+            <tr>
+                <td>bound算子集合</td>
+                <td>{{ affinity_ns.total_affinity | safe }}</td>
+            </tr>
         </table>
+        {% endif %}
+
+        {% if format_result.fa is not none %}
+        <a style="font-weight: bold" id="timeline_api_instruction">FA算子相关分析，参考如下：</a>
         <br>
-        <a style="font-weight: bold" id="timeline_api_instruction_issue">FlashAttention</a>
-        <br><br>
         <table>
             <tr>
-                {% for header in headers %}
-                <th> {{ header }}</th>
-                {% endfor %}
+                <th>类别</th>
+                <th>描述及建议</th>
             </tr>
-
-            {% for row in data %}
+            {% set opti_ns = namespace(total_opti='') %}
+            {% for opti in format_result.fa[0] %}
+            {% if not loop.first %}
+            {% set opti_ns.total_opti = opti_ns.total_opti ~ "<br>" ~ opti.op_name ~ "算子 shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ "参考性能优化空间: " ~ opti.optimization %}
+            {% else %}
+            {% set opti_ns.total_opti = opti.op_name ~ "算子 shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ "参考性能优化空间: " ~ opti.optimization %}
+            {% endif %}
+            {% endfor %}
             <tr>
-                {% for element in row %}
-                <td>{{ element|safe }}</td>
-                {% endfor %}
+                <td>性能优化算子集合</td>
+                <td>{{ opti_ns.total_opti | safe }}</td>
             </tr>
+            {% set bound_ns = namespace(total_bound='') %}
+            {% for bound in format_result.fa[1] %}
+            {% if not loop.first %}
+            {% set bound_ns.total_bound = bound_ns.total_bound ~ "<br>" ~ bound.op_name ~ "算子 shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ "bound类型为: " ~ bound.bound %}
+            {% else %}
+            {% set bound_ns.total_bound = bound.op_name ~ "算子 shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ "bound类型为: " ~ bound.bound %}
+            {% endif %}
             {% endfor %}
+            <tr>
+                <td>bound算子集合</td>
+                <td>{{ bound_ns.total_bound | safe }}</td>
+            </tr>
+            {% set affinity_ns = namespace(total_affinity='') %}
+            {% for affinity in format_result.fa[2] %}
+            {% if not loop.first %}
+            {% set affinity_ns.total_affinity = affinity_ns.total_affinity ~ "<br>" ~ affinity.op_name ~ "算子 shape: " ~ affinity.shape ~ " dtype: " ~ affinity.dtype ~ "不亲和类型为: " ~ affinity.suggestion %}
+            {% else %}
+            {% set affinity_ns.total_affinity = affinity.op_name ~ "算子 shape: " ~ affinity.shape ~ " dtype: " ~ affinity.dtype ~ "不亲和类型为: " ~ affinity.suggestion %}
+            {% endif %}
+            {% endfor %}
+            <tr>
+                <td>不亲和算子集合</td>
+                <td>{{ affinity_ns.total_affinity | safe }}</td>
+            </tr>
         </table>
+        {% endif %}
+
+        {% if format_result.cube is not none %}
+        <a style="font-weight: bold" id="timeline_api_instruction">Vector算子相关分析，参考如下：</a>
         <br>
-        <a style="font-weight: bold" id="timeline_api_suggestion">Vector</a>
-        <br><br>
         <table>
             <tr>
-                {% for header in headers %}
-                <th> {{ header }}</th>
-                {% endfor %}
+                <th>类别</th>
+                <th>描述及建议</th>
             </tr>
-
-            {% for row in data %}
+            {% set opti_ns = namespace(total_opti='') %}
+            {% for opti in format_result.vector[0] %}
+            {% if not loop.first %}
+            {% set opti_ns.total_opti = opti_ns.total_opti ~ "<br>" ~ opti.op_name ~ "算子 shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ "参考性能优化空间: " ~ opti.optimization %}
+            {% else %}
+            {% set opti_ns.total_opti = opti.op_name ~ "算子 shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ "参考性能优化空间: " ~ opti.optimization %}
+            {% endif %}
+            {% endfor %}
             <tr>
-                {% for element in row %}
-                <td>{{ element|safe }}</td>
-                {% endfor %}
+                <td>性能优化算子集合</td>
+                <td>{{ opti_ns.total_opti | safe }}</td>
             </tr>
+            {% set bound_ns = namespace(total_bound='') %}
+            {% for bound in format_result.vector[1] %}
+            {% if not loop.first %}
+            {% set bound_ns.total_bound = bound_ns.total_bound ~ "<br>" ~ bound.op_name ~ "算子 shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ "bound类型为: " ~ bound.bound %}
+            {% else %}
+            {% set bound_ns.total_bound = bound.op_name ~ "算子 shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ "bound类型为: " ~ bound.bound %}
+            {% endif %}
             {% endfor %}
+            <tr>
+                <td>bound算子集合</td>
+                <td>{{ bound_ns.total_bound | safe }}</td>
+            </tr>
         </table>
-
+        {% endif %}
     </div>
 </div>
 {% endif %}
\ No newline at end of file
diff --git a/profiler/advisor/rules/cn/aicore_performance.yaml b/profiler/advisor/rules/cn/aicore_performance.yaml
index 7eef1598a..60d813e1d 100644
--- a/profiler/advisor/rules/cn/aicore_performance.yaml
+++ b/profiler/advisor/rules/cn/aicore_performance.yaml
@@ -1,109 +1,6 @@
-problem: "AICPU算子"
-description: "一些算子和任务执行时间超过了{}us，比如：\n"
-suggestion: "修改代码避免使用aicpu类算子"
-double_suggestion: "尝试将double类型的算子转换成float，比如{}"
-DataTypeSuggestion: &DataTypeSuggestion "数据类型{}在{}算子中可能会造成AICpu问题, 如果可以，尝试转换成{}。"
-AICPU_DOC_URL: &AICPU_DOC_URL "https://gitee.com/ascend/mstt/blob/master/profiler/advisor/doc/Samples%20of%20AI%20CPU%20Operator%20Replacement.md"
-
-CommonChecker:
-  - DataTypeChecker:
-      cann_version: [7.0.RC1]
-      op_type: [ __ALL__ ]
-      ignore_type: [ cast, tensoraequal, equal, nonzero, mul ]
-      input: [ float, float32, float16, bool, int32, uint32, int64, uint64, int8, uint8, int16, uint16, dt_bf16 ]
-      output: [ float, float32, float16, bool, int32, uint32, int64, uint64, int8, uint8, int16, uint16, dt_bf16 ]
-      suggestion: *DataTypeSuggestion
-
-  - DataTypeChecker:
-      cann_version: [7.0.RC1]
-      op_type: [ cast ]
-      input: [ float, float32, float16, bool, int32, uint32, int64, uint64, uint8, dt_bf16 ]
-      output: [ float, float32, float16, bool, int32, uint32, int64, uint64, uint8, dt_bf16 ]
-      suggestion: *DataTypeSuggestion
-
-  - DataTypeChecker:
-      cann_version: [7.0.RC1]
-      op_type: [ tensorequal ]
-      input: [ float, float32, float16, bool, int32, int8, uint8 ]
-      output: [ bool ]
-      suggestion: *DataTypeSuggestion
-
-  - DataTypeChecker:
-      cann_version: [7.0.RC1]
-      op_type: [ equal ]
-      input: [ float, float32, float16, bool, int32, int64, int8, uint8 ]
-      output: [ bool ]
-      suggestion: *DataTypeSuggestion
-
-  - DataTypeChecker:
-      cann_version: [7.0.RC1]
-      op_type: [ nonzero ]
-      input: [ float16, bool, dt_bf16 ]
-      output: [ int64 ]
-      suggestion: *DataTypeSuggestion
-
-  - DataTypeChecker:
-      cann_version: [7.0.RC1]
-      op_type: [ mul ]
-      input: [ float, float32, float16, bool, int32, uint32, int64, uint64, int8, uint8, dt_bf16 ]
-      output: [ float, float32, float16, bool, int32, uint32, int64, uint64, int8, uint8, dt_bf16 ]
-      suggestion: *DataTypeSuggestion
-
-  - DataTypeChecker:
-      cann_version: [8.0.RC1, 7.0.0]
-      op_type: [ __ALL__ ]
-      ignore_type: [ cast, tensorequal, equal, nonzero, mul ]
-      input: [ float, float32, float16, dt_bf16, float64, bool, int32, int64, int8, uint8, int16, complex64, complex128 ]
-      output: [ float, float32, float16, dt_bf16, float64, bool, int32, int64, int8, uint8, int16, complex64, complex128 ]
-      suggestion: *DataTypeSuggestion
-
-  - DataTypeChecker:
-      cann_version: [8.0.RC1, 7.0.0]
-      op_type: [ cast ]
-      input: [ float, float32, float16, bool, int32, uint32, int64, uint64, uint8, dt_bf16 ]
-      output: [ float, float32, float16, bool, int32, uint32, int64, uint64, uint8, dt_bf16 ]
-      suggestion: *DataTypeSuggestion
-
-  - DataTypeChecker:
-      cann_version: [8.0.RC1, 7.0.0]
-      op_type: [ tensorequal ]
-      input: [ float, float32, float16, dt_bf16, float64, bool, int32, int8, uint8 ]
-      output: [ bool ]
-      suggestion: *DataTypeSuggestion
-
-  - DataTypeChecker:
-      cann_version: [8.0.RC1, 7.0.0]
-      op_type: [ equal ]
-      input: [ float, float32, float16, dt_bf16, float64, bool, int32, int64, int8, uint8 ]
-      output: [ bool ]
-      suggestion: *DataTypeSuggestion
-
-  - DataTypeChecker:
-      cann_version: [8.0.RC1, 7.0.0]
-      op_type: [ mul ]
-      input: [ float, float32, float16, dt_bf16, float64, bool, int32, int64, int8, uint8, complex64 ]
-      output: [ float, float32, float16, dt_bf16, float64, bool, int32, int64, int8, uint8, complex64 ]
-      suggestion: *DataTypeSuggestion
-
-ExampleGuideChecker:
-  - IndexPutChecker:
-      op_type: [index]
-      url: *AICPU_DOC_URL
-      suggestion: "请参考<a href='{}' target='_blank'>链接</a>修改源码，尝试用等价的算子替换index算子。"
-
-  - NonzeroChecker:
-      op_type: [ indexput, indexputv2 ]
-      url: *AICPU_DOC_URL
-      suggestion: "请参考<a href='{}' target='_blank'>链接</a>修改源码，尝试用等价的算子替换indexput算子。"
-
-  - CastChecker:
-      op_type: [ argmin ]
-      url: *AICPU_DOC_URL
-      suggestion: "请参考<a href='{}' target='_blank'>链接</a>更新cann-tookit包到7.0.RC1及以上的版本。"
-
-  - CastChecker:
-      op_type: [ nonzero ]
-      url: *AICPU_DOC_URL
-      suggestion: "请参考<a href='{}' target='_blank'>链接</a>修改源码，尝试用等价的算子替换nonzero算子。"
-
-
+problem: "AICORE算子"
+description: "提供一些AICORE算子的参考瓶颈"
+suggestion: "请根据亲和性、bound类型或优化空间尝试分析筛选出来的算子"
+affinity_suggestion: "{op_name}算子 shape{shape} dtype{dtype} 有不亲和特征: {suggestion}\n"
+bound_suggestion: "{op_name}算子 shape{shape} dtype{dtype} bound类型为: {bound} bound\n"
+optimization_suggestion: "{op_name}算子 shape{shape} dtype{dtype} 疑似有性能优化空间,参考性能优化空间{optimization}\n"
\ No newline at end of file
diff --git a/profiler/advisor/rules/en/aicore_performance.yaml b/profiler/advisor/rules/en/aicore_performance.yaml
new file mode 100644
index 000000000..247022214
--- /dev/null
+++ b/profiler/advisor/rules/en/aicore_performance.yaml
@@ -0,0 +1,6 @@
+problem: "AICORE Operator"
+description: "Provide some reference bottlenecks for the AICORE operator"
+suggestion: "Please try to analyze the filtered operators based on affinity, bound type or optimization space"
+affinity_suggestion: "{op_name} Op shape{shape} dtype{dtype} with disaffection characteristics: {suggestion}\n"
+bound_suggestion: "{op_name} Op shape{shape} dtype{dtype} bound type: {bound} bound\n"
+optimization_suggestion: "{op_name} Op shape{shape} dtype{dtype} suspect there is room for performance optimization, refer to Performance Optimization Space: {optimization}\n"
\ No newline at end of file
diff --git a/profiler/cli/entrance.py b/profiler/cli/entrance.py
index 89ac8187d..fa7d2421f 100644
--- a/profiler/cli/entrance.py
+++ b/profiler/cli/entrance.py
@@ -69,7 +69,7 @@ msprof_analyze_cli.add_command(auto_complete_cli, name="auto-completion")
 if __name__ == "__main__":
     msprof_analyze_cli.main(
         [
-            "analyze","all","-d",
-            r"D:\data\file","-l","cn"
+            "advisor","computation","-d",
+            r"E:\B站\910b-33f-cpsp4-add_contiguous\train-2184159-master-0_1058382_20240910063706363_ascend_pt","-l","cn"
         ]
     )
-- 
Gitee


From 7aa2873f0b52469eae7dfdaa3b88b6b8d091981d Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Mon, 20 Jan 2025 10:28:55 +0800
Subject: [PATCH 15/72] =?UTF-8?q?ND=20NZ=E6=A0=BC=E5=BC=8F=E8=B0=83?=
 =?UTF-8?q?=E6=95=B4=EF=BC=88=E5=86=85=E8=BD=B4=E8=AE=A1=E7=AE=97=E4=B8=8E?=
 =?UTF-8?q?=E5=86=85=E5=AD=98=E8=AE=A1=E7=AE=97=EF=BC=89?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../ai_core_performance_checker.py            | 53 ++++++++++++-------
 1 file changed, 35 insertions(+), 18 deletions(-)

diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index e7267fdaf..6a6c2bbc7 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -107,11 +107,16 @@ class AICorePerformanceChecker:
     @staticmethod
     def memory_size(operator):
         input_shapes = operator.input_shapes[1:-1].split(";")
-        # todo batchmatmul
-        if len(input_shapes) > 2:
-            return 1
-        memory = sum(int(shape.split(",")[0]) * int(shape.split(",")[1]) for shape in input_shapes)
-        memory += int(operator.output_shapes[1:-1].split(",")[0]) * int(operator.output_shapes[1:-1].split(",")[1])
+        memory = 0
+        if len(input_shapes.split(",")) == 4:
+            memory = sum(int(shape[0]) * int(shape[1]) * int(shape[2]) * int(shape[3])
+                         for shape in (shapes.split(",") for shapes in input_shapes))
+            output_shape = operator.output_shapes[1:-1].split(",")
+            memory += (int(output_shape[0]) * int(output_shape[1]) * int(output_shape[2]) * int(output_shape[3]))
+        else:
+            memory += sum(int(shape[0]) * int(shape[1]) for shape in (shapes.split(",") for shapes in input_shapes))
+            output_shape = operator.output_shapes[1:-1].split(",")
+            memory += (int(output_shape[0]) * int(output_shape[1]))
         return memory * 2 / 1024 / 1024
 
     def check_ai_core_performance(self, promoting_dataset: ProfilingDataset):
@@ -126,7 +131,6 @@ class AICorePerformanceChecker:
             self.result["vector"] = self.check_vector_operator(promoting_dataset)
 
     def check_cube_operator(self, profiling_dataset: ProfilingDataset):
-        # todo 未处理ND、NZ格式
         cube_dict = self.cube_dict
         optimization_queue = []
         bound_queue = []
@@ -140,9 +144,22 @@ class AICorePerformanceChecker:
                 dtype = None
                 shape_duration = 0.
                 # 判断输入shape内轴是否为256的倍数
-                affinity_flag = (int(shape.split("-")[0].split(";")[0].split(",")[1]) +
-                                 int(shape.split("-")[0].split(";")[1].split(",")[0])) % 256 != 0
-                if affinity_flag:
+                if (len(shape.split("-")[0].split(";")[0].split(","))) == 4:
+                    # NZ格式
+                    shapes = shape.split("-")[0].split(";")
+                    b = shapes[0].split(",")[1]
+                    c = shapes[0].split(",")[2]
+
+                    f = shapes[1].split(",")[1]
+                    g = shapes[1].split(",")[2]
+                    affinity_flag = (b * c % 256 == 0) and (f * g % 256 == 0)
+                else:
+                    # ND格式
+                    shapes = shape.split("-")[0].split(";")
+                    l = shapes[0].split(",")[1]
+                    k = shapes[1].split(",")[1]
+                    affinity_flag = (l % 256 == 0) and (k % 256 == 0)
+                if not affinity_flag:
                     for operator in operator_list:
                         if (operator.op_name == op and
                                 operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape):
@@ -342,17 +359,17 @@ class AICorePerformanceChecker:
                     optimization = max(0.7 - aiv_vec_ratio, 0.7 - aiv_mte2_ratio, 0.7 - aiv_mte3_ratio)
                 if bound:
                     bound_queue.append({
-                         "op_name": op_name,
-                         "shape": shape,
-                         "bound": bound,
-                         "dtype": dtype,
-                         "duration": shape_duration})
+                        "op_name": op_name,
+                        "shape": shape,
+                        "bound": bound,
+                        "dtype": dtype,
+                        "duration": shape_duration})
                 else:
                     optimization_queue.append({
-                         "op_name": op_name,
-                         "shape": shape,
-                         "dtype": dtype,
-                         "optimization": optimization})
+                        "op_name": op_name,
+                        "shape": shape,
+                        "dtype": dtype,
+                        "optimization": optimization})
         return [sorted(optimization_queue, key=lambda x: x["optimization"], reverse=True)[:5],
                 sorted(bound_queue, key=lambda x: x["duration"], reverse=True)[:5]]
 
-- 
Gitee


From b6dd0cc690cda6e263b5ca63146aea1bcf3abcbb Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Mon, 20 Jan 2025 11:15:33 +0800
Subject: [PATCH 16/72] =?UTF-8?q?ND=20NZ=E9=94=99=E8=AF=AF=E4=BF=AE?=
 =?UTF-8?q?=E5=A4=8D?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../ai_core_performance_checker.py                 | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index 6a6c2bbc7..cde048844 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -108,7 +108,7 @@ class AICorePerformanceChecker:
     def memory_size(operator):
         input_shapes = operator.input_shapes[1:-1].split(";")
         memory = 0
-        if len(input_shapes.split(",")) == 4:
+        if len(input_shapes[0].split(",")) == 4:
             memory = sum(int(shape[0]) * int(shape[1]) * int(shape[2]) * int(shape[3])
                          for shape in (shapes.split(",") for shapes in input_shapes))
             output_shape = operator.output_shapes[1:-1].split(",")
@@ -147,17 +147,17 @@ class AICorePerformanceChecker:
                 if (len(shape.split("-")[0].split(";")[0].split(","))) == 4:
                     # NZ格式
                     shapes = shape.split("-")[0].split(";")
-                    b = shapes[0].split(",")[1]
-                    c = shapes[0].split(",")[2]
+                    b = int(shapes[0].split(",")[1])
+                    c = int(shapes[0].split(",")[2])
 
-                    f = shapes[1].split(",")[1]
-                    g = shapes[1].split(",")[2]
+                    f = int(shapes[1].split(",")[1])
+                    g = int(shapes[1].split(",")[2])
                     affinity_flag = (b * c % 256 == 0) and (f * g % 256 == 0)
                 else:
                     # ND格式
                     shapes = shape.split("-")[0].split(";")
-                    l = shapes[0].split(",")[1]
-                    k = shapes[1].split(",")[1]
+                    l = int(shapes[0].split(",")[1])
+                    k = int(shapes[1].split(",")[1])
                     affinity_flag = (l % 256 == 0) and (k % 256 == 0)
                 if not affinity_flag:
                     for operator in operator_list:
-- 
Gitee


From 2711f076df26d09d553d34467c1c839c3fb6b22f Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Mon, 20 Jan 2025 14:09:09 +0800
Subject: [PATCH 17/72] =?UTF-8?q?=E9=94=99=E8=AF=AF=E4=BF=AE=E5=A4=8D?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../ai_core_performance_checker.py            | 658 +++++++++---------
 1 file changed, 330 insertions(+), 328 deletions(-)

diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index cde048844..5514753ff 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -123,351 +123,353 @@ class AICorePerformanceChecker:
         """
         :Param profiling_dataset: dataset of operator performance from kernel_details.csv
         """
-        if self.cube_dict:
-            self.result["cube"] = self.check_cube_operator(promoting_dataset)
-        if self.fa_dict:
-            self.result["fa"] = self.check_fa_operator(promoting_dataset)
-        if self.vector_dict:
-            self.result["vector"] = self.check_vector_operator(promoting_dataset)
-
-    def check_cube_operator(self, profiling_dataset: ProfilingDataset):
-        cube_dict = self.cube_dict
-        optimization_queue = []
-        bound_queue = []
-        affinity_queue = []
-        operator_list = [op for op in profiling_dataset.op_summary.op_list
-                         if op.op_name in cube_dict
-                         and op.input_shapes[1:-1] + "-" + op.output_shapes[1:-1] in cube_dict[op.op_name]]
-        suggestion = "内轴无法被256整除"
-        for op in cube_dict:
-            for shape in cube_dict[op]:
-                dtype = None
-                shape_duration = 0.
-                # 判断输入shape内轴是否为256的倍数
-                if (len(shape.split("-")[0].split(";")[0].split(","))) == 4:
-                    # NZ格式
-                    shapes = shape.split("-")[0].split(";")
-                    b = int(shapes[0].split(",")[1])
-                    c = int(shapes[0].split(",")[2])
-
-                    f = int(shapes[1].split(",")[1])
-                    g = int(shapes[1].split(",")[2])
-                    affinity_flag = (b * c % 256 == 0) and (f * g % 256 == 0)
-                else:
-                    # ND格式
-                    shapes = shape.split("-")[0].split(";")
-                    l = int(shapes[0].split(",")[1])
-                    k = int(shapes[1].split(",")[1])
-                    affinity_flag = (l % 256 == 0) and (k % 256 == 0)
-                if not affinity_flag:
-                    for operator in operator_list:
-                        if (operator.op_name == op and
-                                operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape):
-                            dtype = operator.input_data_types
-                            shape_duration += float(operator.task_duration)
-                    affinity_queue.append({
+        self.result["cube"] = self.check_cube_operator(promoting_dataset)
+        self.result["fa"] = self.check_fa_operator(promoting_dataset)
+        self.result["vector"] = self.check_vector_operator(promoting_dataset)
+
+
+def check_cube_operator(self, profiling_dataset: ProfilingDataset):
+    cube_dict = self.cube_dict
+    optimization_queue = []
+    bound_queue = []
+    affinity_queue = []
+    operator_list = [op for op in profiling_dataset.op_summary.op_list
+                     if op.op_name in cube_dict
+                     and op.input_shapes[1:-1] + "-" + op.output_shapes[1:-1] in cube_dict[op.op_name]]
+    suggestion = "内轴无法被256整除"
+    for op in cube_dict:
+        for shape in cube_dict[op]:
+            dtype = None
+            shape_duration = 0.
+            # 判断输入shape内轴是否为256的倍数
+            if (len(shape.split("-")[0].split(";")[0].split(","))) == 4:
+                # NZ格式
+                shapes = shape.split("-")[0].split(";")
+                b = int(shapes[0].split(",")[1])
+                c = int(shapes[0].split(",")[2])
+
+                f = int(shapes[1].split(",")[1])
+                g = int(shapes[1].split(",")[2])
+                affinity_flag = (b * c % 256 == 0) and (f * g % 256 == 0)
+            else:
+                # ND格式
+                shapes = shape.split("-")[0].split(";")
+                l = int(shapes[0].split(",")[1])
+                k = int(shapes[1].split(",")[1])
+                affinity_flag = (l % 256 == 0) and (k % 256 == 0)
+            if not affinity_flag:
+                for operator in operator_list:
+                    if (operator.op_name == op and
+                            operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape):
+                        dtype = operator.input_data_types
+                        shape_duration += float(operator.task_duration)
+                affinity_queue.append({
+                    "op_name": op,
+                    "shape": shape.split("-")[0],
+                    "dtype": dtype,
+                    "duration": shape_duration,
+                    "suggestion": suggestion})
+                continue
+            else:
+                shap_list = [operator for operator in operator_list if
+                             operator.op_name == op and
+                             operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape]
+                shape_duration = sum(float(operator.task_duration) for operator in shap_list)
+                dtype = shap_list[0].input_data_types if shap_list else None
+                aic_mac_ratio = sum(float(operator.aic_mac_ratio) for operator in shap_list) / len(shap_list)
+                aic_mte2_ratio = sum(float(operator.aic_mte2_ratio) for operator in shap_list) / len(shap_list)
+                if aic_mac_ratio >= 0.8 and aic_mte2_ratio >= 0.95:
+                    bound_queue.append({
                         "op_name": op,
                         "shape": shape.split("-")[0],
                         "dtype": dtype,
-                        "duration": shape_duration,
-                        "suggestion": suggestion})
-                    continue
-                else:
-                    shap_list = [operator for operator in operator_list if
-                                 operator.op_name == op and
-                                 operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape]
-                    shape_duration = sum(float(operator.task_duration) for operator in shap_list)
-                    dtype = shap_list[0].input_data_types if shap_list else None
-                    aic_mac_ratio = sum(float(operator.aic_mac_ratio) for operator in shap_list) / len(shap_list)
-                    aic_mte2_ratio = sum(float(operator.aic_mte2_ratio) for operator in shap_list) / len(shap_list)
-                    if aic_mac_ratio >= 0.8 and aic_mte2_ratio >= 0.95:
-                        bound_queue.append({
-                            "op_name": op,
-                            "shape": shape.split("-")[0],
-                            "dtype": dtype,
-                            "bound": "mac_and_mte2_bound",
-                            "duration": shape_duration})
-                    elif aic_mac_ratio >= 0.8:
-                        bound_queue.append({
-                            "op_name": op,
-                            "shape": shape.split("-")[0],
-                            "dtype": dtype,
-                            "bound": "mac_bound",
-                            "duration": shape_duration})
-                    elif aic_mte2_ratio >= 0.95:
-                        bound_queue.append({
-                            "op_name": op,
-                            "shape": shape.split("-")[0],
-                            "dtype": dtype,
-                            "bound": "mte2_bound",
-                            "duration": shape_duration})
-                    else:
-                        optimization_queue.append({
-                            "op_name": op,
-                            "shape": shape.split("-")[0],
-                            "dtype": dtype,
-                            "optimization": max(0.8 - aic_mac_ratio, 0.95 - aic_mte2_ratio)})
-        return [sorted(optimization_queue, key=lambda x: x["optimization"], reverse=True)[:5],
-                sorted(bound_queue, key=lambda x: x["duration"], reverse=True)[:5],
-                sorted(affinity_queue, key=lambda x: x["duration"], reverse=True)[:5]]
-
-    def check_fa_operator(self, profiling_dataset: ProfilingDataset):
-        fa_list = self.fa_list
-        fa_dict = self.fa_dict
-        optimization_queue = []
-        bound_queue = []
-        affinity_queue = []
-        # 不亲和算子筛选
-        for op in fa_dict:
-            for shape in fa_dict[op]:
-                affinity_flag = False
-                shape_duration = 0.
-                dtype = None
-                suggestion = ""
-                if "varlen" in op.lower():
-                    # 处理变长算子 如果不亲和则affinity_flag为False
-                    if shape.split("-")[0].split(";")[0].split(",")[2] % 128 != 0:
-                        affinity_flag = True
-                        suggestion = "D不能被128整除"
-                        for operator in fa_list:
-                            if (operator.op_name == op and
-                                    operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape):
-                                shape_duration += float(operator.task_duration)
-                                dtype = operator.input_data_types
-                else:
-                    # 处理定长算子 如果不亲和则affinity_flag为False
-                    head_dim = 0
-                    seq_len = int(shape.split("-")[1].split(";")[1].split(",")[2])
-                    input_first_tensor = shape.split("-")[0].split(";")[0].split(",")
-                    if len(input_first_tensor) == 3:
-                        head_dim = int(input_first_tensor[2]) / int(shape.split("-")[1].split(";")[0].split(",")[1])
-                    else:
-                        head_dim = int(input_first_tensor[3])
-                    if head_dim % 128 != 0 and seq_len % 128 != 0:
-                        affinity_flag = True
-                        suggestion = "D和S均不能被128整除"
-                    elif head_dim % 128 != 0:
-                        affinity_flag = True
-                        suggestion = "D不能被128整除"
-                    elif seq_len % 128 != 0:
-                        affinity_flag = True
-                        suggestion = "S不能被128整除"
-                    if affinity_flag:
-                        for operator in fa_list:
-                            if (operator.op_name == op and
-                                    operator.input_shapes[1:-1] + "-" +
-                                    operator.output_shapes[1:-1] == shape):
-                                shape_duration += float(operator.task_duration)
-                                dtype = operator.input_data_types
-
-                if affinity_flag:
-                    # 不亲和算子 计算耗时，加入affinity_queue
-                    affinity_queue.append({
+                        "bound": "mac_and_mte2_bound",
+                        "duration": shape_duration})
+                elif aic_mac_ratio >= 0.8:
+                    bound_queue.append({
+                        "op_name": op,
+                        "shape": shape.split("-")[0],
+                        "dtype": dtype,
+                        "bound": "mac_bound",
+                        "duration": shape_duration})
+                elif aic_mte2_ratio >= 0.95:
+                    bound_queue.append({
                         "op_name": op,
                         "shape": shape.split("-")[0],
                         "dtype": dtype,
-                        "suggestion": suggestion,
+                        "bound": "mte2_bound",
                         "duration": shape_duration})
-                    continue
                 else:
-                    # 处理bound算子和优化算子
-                    aiv_vec_ratio, aic_fixpipe_ratio, aic_mte2_ratio, optimization = 0., 0., 0., 0.
-                    bound = ""
-                    if len(shape.split("-")) > 2:
-                        for operator in fa_list:
-                            if (operator.op_name == op and
-                                    operator.input_shapes[1:-1] + "-" +
-                                    operator.output_shapes[1:-1] + "-grad" == shape):
-                                aic_fixpipe_ratio += float(operator.aic_fixpipe_ratio)
-                                aic_mte2_ratio += float(operator.aic_mte2_ratio)
-                                shape_duration += float(operator.task_duration)
-                                dtype = operator.input_data_types
-                        if aic_mte2_ratio >= 0.8 and aic_fixpipe_ratio >= 0.75:
-                            bound = "mte2_and_fixpipe_bound"
-                        elif aic_mte2_ratio >= 0.8:
-                            bound = "mte2_bound"
-                        elif aiv_vec_ratio >= 0.75:
-                            bound = "vec_bound"
-                        else:
-                            optimization = max(0.8 - aic_mte2_ratio, 0.75 - aiv_vec_ratio)
-                    else:
-                        for operator in fa_list:
-                            if (operator.op_name == op and
-                                    operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape):
-                                aiv_vec_ratio += float(operator.aiv_vec_ratio)
-                                aic_mte2_ratio += float(operator.aic_mte2_ratio)
-                                shape_duration += float(operator.task_duration)
-                        if aic_mte2_ratio >= 0.8 and aiv_vec_ratio >= 0.75:
-                            bound = "mte2_and_vec_bound"
-                        elif aic_mte2_ratio >= 0.8:
-                            bound = "mte2_bound"
-                        elif aiv_vec_ratio >= 0.75:
-                            bound = "vec_bound"
-                        else:
-                            optimization = max(0.8 - aic_mte2_ratio, 0.75 - aiv_vec_ratio)
-                    if bound:
-                        bound_queue.append({
-                            "op_name": op,
-                            "shape": shape.split("-")[0],
-                            "dtype": dtype,
-                            "bound": bound,
-                            "duration": shape_duration})
+                    optimization_queue.append({
+                        "op_name": op,
+                        "shape": shape.split("-")[0],
+                        "dtype": dtype,
+                        "optimization": max(0.8 - aic_mac_ratio, 0.95 - aic_mte2_ratio)})
+    return [sorted(optimization_queue, key=lambda x: x["optimization"], reverse=True)[:5],
+            sorted(bound_queue, key=lambda x: x["duration"], reverse=True)[:5],
+            sorted(affinity_queue, key=lambda x: x["duration"], reverse=True)[:5]]
+
+
+def check_fa_operator(self, profiling_dataset: ProfilingDataset):
+    fa_list = self.fa_list
+    fa_dict = self.fa_dict
+    optimization_queue = []
+    bound_queue = []
+    affinity_queue = []
+    # 不亲和算子筛选
+    for op in fa_dict:
+        for shape in fa_dict[op]:
+            affinity_flag = False
+            shape_duration = 0.
+            dtype = None
+            suggestion = ""
+            if "varlen" in op.lower():
+                # 处理变长算子 如果不亲和则affinity_flag为False
+                if shape.split("-")[0].split(";")[0].split(",")[2] % 128 != 0:
+                    affinity_flag = True
+                    suggestion = "D不能被128整除"
+                    for operator in fa_list:
+                        if (operator.op_name == op and
+                                operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape):
+                            shape_duration += float(operator.task_duration)
+                            dtype = operator.input_data_types
+            else:
+                # 处理定长算子 如果不亲和则affinity_flag为False
+                head_dim = 0
+                seq_len = int(shape.split("-")[1].split(";")[1].split(",")[2])
+                input_first_tensor = shape.split("-")[0].split(";")[0].split(",")
+                if len(input_first_tensor) == 3:
+                    head_dim = int(input_first_tensor[2]) / int(shape.split("-")[1].split(";")[0].split(",")[1])
+                else:
+                    head_dim = int(input_first_tensor[3])
+                if head_dim % 128 != 0 and seq_len % 128 != 0:
+                    affinity_flag = True
+                    suggestion = "D和S均不能被128整除"
+                elif head_dim % 128 != 0:
+                    affinity_flag = True
+                    suggestion = "D不能被128整除"
+                elif seq_len % 128 != 0:
+                    affinity_flag = True
+                    suggestion = "S不能被128整除"
+                if affinity_flag:
+                    for operator in fa_list:
+                        if (operator.op_name == op and
+                                operator.input_shapes[1:-1] + "-" +
+                                operator.output_shapes[1:-1] == shape):
+                            shape_duration += float(operator.task_duration)
+                            dtype = operator.input_data_types
+
+            if affinity_flag:
+                # 不亲和算子 计算耗时，加入affinity_queue
+                affinity_queue.append({
+                    "op_name": op,
+                    "shape": shape.split("-")[0],
+                    "dtype": dtype,
+                    "suggestion": suggestion,
+                    "duration": shape_duration})
+                continue
+            else:
+                # 处理bound算子和优化算子
+                aiv_vec_ratio, aic_fixpipe_ratio, aic_mte2_ratio, optimization = 0., 0., 0., 0.
+                bound = ""
+                if len(shape.split("-")) > 2:
+                    for operator in fa_list:
+                        if (operator.op_name == op and
+                                operator.input_shapes[1:-1] + "-" +
+                                operator.output_shapes[1:-1] + "-grad" == shape):
+                            aic_fixpipe_ratio += float(operator.aic_fixpipe_ratio)
+                            aic_mte2_ratio += float(operator.aic_mte2_ratio)
+                            shape_duration += float(operator.task_duration)
+                            dtype = operator.input_data_types
+                    if aic_mte2_ratio >= 0.8 and aic_fixpipe_ratio >= 0.75:
+                        bound = "mte2_and_fixpipe_bound"
+                    elif aic_mte2_ratio >= 0.8:
+                        bound = "mte2_bound"
+                    elif aiv_vec_ratio >= 0.75:
+                        bound = "vec_bound"
                     else:
-                        optimization_queue.append({
-                            "op_name": op,
-                            "shape": shape.split("-")[0],
-                            "dtype": dtype,
-                            "optimization": optimization})
-
-        return [sorted(optimization_queue, key=lambda x: x["optimization"], reverse=True)[:5],
-                sorted(bound_queue, key=lambda x: x["duration"], reverse=True)[:5],
-                sorted(affinity_queue, key=lambda x: x["duration"], reverse=True)[:5]]
-
-    def check_vector_operator(self, profiling_dataset: ProfilingDataset):
-        vector_dict = self.vector_dict
-        vector_list = []
-        optimization_queue = []
-        bound_queue = []
-        vector_list.extend(
-            operator for op_name in vector_dict
-            for shape in vector_dict[op_name]
-            for operator in profiling_dataset.op_summary.op_list
-            if operator.op_name == op_name
-            and operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape
-        )
-        for op_name in vector_dict:
-            for shape in vector_dict[op_name]:
-                aiv_vec_ratio, aiv_mte2_ratio, aiv_mte3_ratio, shape_duration, optimization = 0., 0., 0., 0., 0.
-                bound, dtype = "", ""
-                for operator in vector_list:
-                    if (operator.op_name == op_name and
-                            operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape):
-                        aiv_vec_ratio += float(operator.aiv_vec_ratio)
-                        aiv_mte2_ratio += float(operator.aiv_mte2_ratio)
-                        aiv_mte3_ratio += float(operator.aiv_mte3_ratio)
-                        shape_duration += float(operator.task_duration)
-                        dtype = operator.input_data_types
-                if aiv_vec_ratio + aiv_mte2_ratio + aiv_mte3_ratio >= 0.9:
-                    bound = "vec_mte2_mte3_bound"
-                elif aiv_mte2_ratio >= 0.7:
-                    bound = "mte2_bound"
-                elif aiv_mte3_ratio >= 0.7:
-                    bound = "mte3_bound"
-                elif aiv_vec_ratio >= 0.7:
-                    bound = "vec_bound"
+                        optimization = max(0.8 - aic_mte2_ratio, 0.75 - aiv_vec_ratio)
                 else:
-                    optimization = max(0.7 - aiv_vec_ratio, 0.7 - aiv_mte2_ratio, 0.7 - aiv_mte3_ratio)
+                    for operator in fa_list:
+                        if (operator.op_name == op and
+                                operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape):
+                            aiv_vec_ratio += float(operator.aiv_vec_ratio)
+                            aic_mte2_ratio += float(operator.aic_mte2_ratio)
+                            shape_duration += float(operator.task_duration)
+                    if aic_mte2_ratio >= 0.8 and aiv_vec_ratio >= 0.75:
+                        bound = "mte2_and_vec_bound"
+                    elif aic_mte2_ratio >= 0.8:
+                        bound = "mte2_bound"
+                    elif aiv_vec_ratio >= 0.75:
+                        bound = "vec_bound"
+                    else:
+                        optimization = max(0.8 - aic_mte2_ratio, 0.75 - aiv_vec_ratio)
                 if bound:
                     bound_queue.append({
-                        "op_name": op_name,
-                        "shape": shape,
-                        "bound": bound,
+                        "op_name": op,
+                        "shape": shape.split("-")[0],
                         "dtype": dtype,
+                        "bound": bound,
                         "duration": shape_duration})
                 else:
                     optimization_queue.append({
-                        "op_name": op_name,
-                        "shape": shape,
+                        "op_name": op,
+                        "shape": shape.split("-")[0],
                         "dtype": dtype,
                         "optimization": optimization})
-        return [sorted(optimization_queue, key=lambda x: x["optimization"], reverse=True)[:5],
-                sorted(bound_queue, key=lambda x: x["duration"], reverse=True)[:5]]
 
-        pass
+    return [sorted(optimization_queue, key=lambda x: x["optimization"], reverse=True)[:5],
+            sorted(bound_queue, key=lambda x: x["duration"], reverse=True)[:5],
+            sorted(affinity_queue, key=lambda x: x["duration"], reverse=True)[:5]]
 
-    def make_record(self, result: OptimizeResult):
-        """
-        make record for what and how to optimize
-        """
-        if not self.ai_core_performance_issues:
-            return self.ai_core_performance_issues
-
-        cube_problem = "Cube算子性能分析"
-        fa_problem = "FA算子性能分析"
-        vector_problem = "Vector算子性能分析"
-        sugg_keys = ['opti', 'bound', 'affinity']
-        cube_desc = dict.fromkeys(sugg_keys, "")
-        fa_desc = dict.fromkeys(sugg_keys, "")
-        vector_desc = dict.fromkeys(sugg_keys, "")
-        if self.result["cube"]:
-            optimization_item = OptimizeItem(cube_problem, self.desc, [self.suggestion])
-            result.add(OptimizeRecord(optimization_item))
-            headers = [
-                "Type",
-                "Description and Suggestion",
-            ]
-            result.add_detail(cube_problem, headers=headers)
-            for cube_opti_issue in self.result["cube"][0]:
-                opti_sugg = self._OPTI_SUGGESTION.format(**cube_opti_issue)
-                cube_desc["opti"] += opti_sugg
-            result.add_detail(cube_problem, detail=["性能优化算子集合", cube_desc["opti"]])
-            for cube_bound_issue in self.result["cube"][1]:
-                bound_sugg = self._BOUND_SUGGESTION.format(**cube_bound_issue)
-                cube_desc["bound"] += bound_sugg
-            result.add_detail(cube_problem, detail=["bound算子集合", cube_desc["bound"]])
-            for cube_affinity_issue in self.result["cube"][2]:
-                affinity_sugg = self._AFFINITY_SUGGESTION.format(**cube_affinity_issue)
-                cube_desc["affinity"] += affinity_sugg
-            result.add_detail(cube_problem, detail=["不亲和算子集合", cube_desc["affinity"]])
-
-        if self.result["fa"]:
-            optimization_item = OptimizeItem(cube_problem, self.desc, [self.suggestion])
-            result.add(OptimizeRecord(optimization_item))
-            headers = [
-                "Type",
-                "Description and Suggestion",
-            ]
-            result.add_detail(fa_problem, headers=headers)
-            for fa_opti_issue in self.result["fa"][0]:
-                opti_sugg = self._OPTI_SUGGESTION.format(**fa_opti_issue)
-                fa_desc["opti"] += opti_sugg
-            result.add_detail(fa_problem, detail=["性能优化算子集合", fa_desc["opti"]])
-            for fa_bound_issue in self.result["fa"][1]:
-                bound_sugg = self._BOUND_SUGGESTION.format(**fa_bound_issue)
-                fa_desc["bound"] += bound_sugg
-            result.add_detail(fa_problem, detail=["bound算子集合", fa_desc["bound"]])
-            for fa_affinity_issue in self.result["fa"][2]:
-                affinity_sugg = self._AFFINITY_SUGGESTION.format(**fa_affinity_issue)
-                fa_desc["affinity"] += affinity_sugg
-            result.add_detail(fa_problem, detail=["不亲和算子集合", fa_desc["affinity"]])
-
-        if self.result["vector"]:
-            optimization_item = OptimizeItem(vector_problem, self.desc, [self.suggestion])
-            result.add(OptimizeRecord(optimization_item))
-            headers = [
-                "Type",
-                "Description and Suggestion",
-            ]
-            result.add_detail(vector_problem, headers=headers)
-            for vector_opti_issue in self.result["vector"][0]:
-                opti_sugg = self._OPTI_SUGGESTION.format(**vector_opti_issue)
-                vector_desc["opti"] += opti_sugg
-            result.add_detail(vector_problem, detail=["性能优化算子集合", vector_desc["opti"]])
-            for vector_bound_issue in self.result["vector"][1]:
-                bound_sugg = self._BOUND_SUGGESTION.format(**vector_bound_issue)
-                vector_desc["bound"] += bound_sugg
-            result.add_detail(vector_problem, detail=["bound算子集合", vector_desc["bound"]])
-        return True
-
-    def make_render(self, html_render, add_render_list=True, **kwargs):
-        if not self.ai_core_performance_issues:
-            return self.ai_core_performance_issues
-
-        priority = kwargs.get("priority")
-        return html_render.render_template(key="computation",
-                                           template_dir="templates",
-                                           template_name="ai_core_performance.html",
-                                           format_result=self.result,
-                                           add_render_list=add_render_list,
-                                           priority_background_color=priority,
-                                           rank=kwargs.get("rank"))
-
-    def check_task_dict(self, profiling_dataset: ProfilingDataset) -> bool:
-        if not hasattr(profiling_dataset, "op_summary"):
-            logger.warning("Skip %s checker because of not containing %s", self._CHECKER, "op summary")
-            return False
-        if not hasattr(profiling_dataset.op_summary, "task_dict") or not hasattr(profiling_dataset.op_summary,
-                                                                                 "op_list"):
-            logger.warning("Skip %s checker because of not containing %s", self._CHECKER, "op summary")
-            return False
-        return True
+
+def check_vector_operator(self, profiling_dataset: ProfilingDataset):
+    vector_dict = self.vector_dict
+    vector_list = []
+    optimization_queue = []
+    bound_queue = []
+    vector_list.extend(
+        operator for op_name in vector_dict
+        for shape in vector_dict[op_name]
+        for operator in profiling_dataset.op_summary.op_list
+        if operator.op_name == op_name
+        and operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape
+    )
+    for op_name in vector_dict:
+        for shape in vector_dict[op_name]:
+            aiv_vec_ratio, aiv_mte2_ratio, aiv_mte3_ratio, shape_duration, optimization = 0., 0., 0., 0., 0.
+            bound, dtype = "", ""
+            for operator in vector_list:
+                if (operator.op_name == op_name and
+                        operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape):
+                    aiv_vec_ratio += float(operator.aiv_vec_ratio)
+                    aiv_mte2_ratio += float(operator.aiv_mte2_ratio)
+                    aiv_mte3_ratio += float(operator.aiv_mte3_ratio)
+                    shape_duration += float(operator.task_duration)
+                    dtype = operator.input_data_types
+            if aiv_vec_ratio + aiv_mte2_ratio + aiv_mte3_ratio >= 0.9:
+                bound = "vec_mte2_mte3_bound"
+            elif aiv_mte2_ratio >= 0.7:
+                bound = "mte2_bound"
+            elif aiv_mte3_ratio >= 0.7:
+                bound = "mte3_bound"
+            elif aiv_vec_ratio >= 0.7:
+                bound = "vec_bound"
+            else:
+                optimization = max(0.7 - aiv_vec_ratio, 0.7 - aiv_mte2_ratio, 0.7 - aiv_mte3_ratio)
+            if bound:
+                bound_queue.append({
+                    "op_name": op_name,
+                    "shape": shape,
+                    "bound": bound,
+                    "dtype": dtype,
+                    "duration": shape_duration})
+            else:
+                optimization_queue.append({
+                    "op_name": op_name,
+                    "shape": shape,
+                    "dtype": dtype,
+                    "optimization": optimization})
+    return [sorted(optimization_queue, key=lambda x: x["optimization"], reverse=True)[:5],
+            sorted(bound_queue, key=lambda x: x["duration"], reverse=True)[:5]]
+
+    pass
+
+
+def make_record(self, result: OptimizeResult):
+    """
+    make record for what and how to optimize
+    """
+    if not self.ai_core_performance_issues:
+        return self.ai_core_performance_issues
+
+    cube_problem = "Cube算子性能分析"
+    fa_problem = "FA算子性能分析"
+    vector_problem = "Vector算子性能分析"
+    sugg_keys = ['opti', 'bound', 'affinity']
+    cube_desc = dict.fromkeys(sugg_keys, "")
+    fa_desc = dict.fromkeys(sugg_keys, "")
+    vector_desc = dict.fromkeys(sugg_keys, "")
+    if self.result["cube"]:
+        optimization_item = OptimizeItem(cube_problem, self.desc, [self.suggestion])
+        result.add(OptimizeRecord(optimization_item))
+        headers = [
+            "Type",
+            "Description and Suggestion",
+        ]
+        result.add_detail(cube_problem, headers=headers)
+        for cube_opti_issue in self.result["cube"][0]:
+            opti_sugg = self._OPTI_SUGGESTION.format(**cube_opti_issue)
+            cube_desc["opti"] += opti_sugg
+        result.add_detail(cube_problem, detail=["性能优化算子集合", cube_desc["opti"]])
+        for cube_bound_issue in self.result["cube"][1]:
+            bound_sugg = self._BOUND_SUGGESTION.format(**cube_bound_issue)
+            cube_desc["bound"] += bound_sugg
+        result.add_detail(cube_problem, detail=["bound算子集合", cube_desc["bound"]])
+        for cube_affinity_issue in self.result["cube"][2]:
+            affinity_sugg = self._AFFINITY_SUGGESTION.format(**cube_affinity_issue)
+            cube_desc["affinity"] += affinity_sugg
+        result.add_detail(cube_problem, detail=["不亲和算子集合", cube_desc["affinity"]])
+
+    if self.result["fa"]:
+        optimization_item = OptimizeItem(cube_problem, self.desc, [self.suggestion])
+        result.add(OptimizeRecord(optimization_item))
+        headers = [
+            "Type",
+            "Description and Suggestion",
+        ]
+        result.add_detail(fa_problem, headers=headers)
+        for fa_opti_issue in self.result["fa"][0]:
+            opti_sugg = self._OPTI_SUGGESTION.format(**fa_opti_issue)
+            fa_desc["opti"] += opti_sugg
+        result.add_detail(fa_problem, detail=["性能优化算子集合", fa_desc["opti"]])
+        for fa_bound_issue in self.result["fa"][1]:
+            bound_sugg = self._BOUND_SUGGESTION.format(**fa_bound_issue)
+            fa_desc["bound"] += bound_sugg
+        result.add_detail(fa_problem, detail=["bound算子集合", fa_desc["bound"]])
+        for fa_affinity_issue in self.result["fa"][2]:
+            affinity_sugg = self._AFFINITY_SUGGESTION.format(**fa_affinity_issue)
+            fa_desc["affinity"] += affinity_sugg
+        result.add_detail(fa_problem, detail=["不亲和算子集合", fa_desc["affinity"]])
+
+    if self.result["vector"]:
+        optimization_item = OptimizeItem(vector_problem, self.desc, [self.suggestion])
+        result.add(OptimizeRecord(optimization_item))
+        headers = [
+            "Type",
+            "Description and Suggestion",
+        ]
+        result.add_detail(vector_problem, headers=headers)
+        for vector_opti_issue in self.result["vector"][0]:
+            opti_sugg = self._OPTI_SUGGESTION.format(**vector_opti_issue)
+            vector_desc["opti"] += opti_sugg
+        result.add_detail(vector_problem, detail=["性能优化算子集合", vector_desc["opti"]])
+        for vector_bound_issue in self.result["vector"][1]:
+            bound_sugg = self._BOUND_SUGGESTION.format(**vector_bound_issue)
+            vector_desc["bound"] += bound_sugg
+        result.add_detail(vector_problem, detail=["bound算子集合", vector_desc["bound"]])
+    return True
+
+
+def make_render(self, html_render, add_render_list=True, **kwargs):
+    if not self.ai_core_performance_issues:
+        return self.ai_core_performance_issues
+
+    priority = kwargs.get("priority")
+    return html_render.render_template(key="computation",
+                                       template_dir="templates",
+                                       template_name="ai_core_performance.html",
+                                       format_result=self.result,
+                                       add_render_list=add_render_list,
+                                       priority_background_color=priority,
+                                       rank=kwargs.get("rank"))
+
+
+def check_task_dict(self, profiling_dataset: ProfilingDataset) -> bool:
+    if not hasattr(profiling_dataset, "op_summary"):
+        logger.warning("Skip %s checker because of not containing %s", self._CHECKER, "op summary")
+        return False
+    if not hasattr(profiling_dataset.op_summary, "op_list"):
+        logger.warning("Skip %s checker because of not containing %s", self._CHECKER, "op_list")
+        return False
+    return True
-- 
Gitee


From d0e72de18d3164e9019156b7880ef451a0d55bc0 Mon Sep 17 00:00:00 2001
From: kiritorl <ruxinglong@huawei.com>
Date: Mon, 20 Jan 2025 15:16:02 +0800
Subject: [PATCH 18/72] =?UTF-8?q?=E9=80=82=E9=85=8D=E8=8B=B1=E6=96=87?=
 =?UTF-8?q?=E7=89=88=E8=BE=93=E5=87=BA?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../ai_core_performance_checker.py            | 54 ++++++-----
 .../html/templates/ai_core_performance.html   | 90 ++++++++++++-------
 .../advisor/rules/cn/aicore_performance.yaml  | 17 +++-
 .../advisor/rules/en/aicore_performance.yaml  | 17 +++-
 profiler/cli/entrance.py                      |  4 +-
 5 files changed, 116 insertions(+), 66 deletions(-)

diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index e7267fdaf..e6c6e382b 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -57,9 +57,19 @@ class AICorePerformanceChecker:
         if not os.path.exists(rule_path):
             logger.warning("Skip analyze aicpu issues, because %s does not exist.", rule_path)
 
+        self.language = language
         self.aicore_rules = FileManager.read_yaml_file(rule_path)
-        self._PROBLEM = self.aicore_rules.get("problem")
+        self._CUBE_PROBLEM = self.aicore_rules.get("cube_problem")
+        self._FA_PROBLEM = self.aicore_rules.get("fa_problem")
+        self._VECTOR_PROBLEM = self.aicore_rules.get("vector_problem")
         self.desc = self.aicore_rules.get("description")
+        self._BOUND_DESC = self.aicore_rules.get("bound_description")
+        self._OPTI_DESC = self.aicore_rules.get("optimization_description")
+        self._AFFINITY_DESC = self.aicore_rules.get("affinity_description")
+        self._CUBE_AFFINITY_DESC = self.aicore_rules.get("cube_affinity_desc")
+        self._FA_AFFINITY_DESC_TYPE1 = self.aicore_rules.get("fa_affinity_desc_type1")
+        self._FA_AFFINITY_DESC_TYPE2 = self.aicore_rules.get("fa_affinity_desc_type2")
+        self._FA_AFFINITY_DESC_TYPE3 = self.aicore_rules.get("fa_affinity_desc_type3")
         self.suggestion = self.aicore_rules.get("suggestion")
         self._AFFINITY_SUGGESTION = self.aicore_rules.get("affinity_suggestion")
         self._BOUND_SUGGESTION = self.aicore_rules.get("bound_suggestion")
@@ -134,7 +144,7 @@ class AICorePerformanceChecker:
         operator_list = [op for op in profiling_dataset.op_summary.op_list
                          if op.op_name in cube_dict
                          and op.input_shapes[1:-1] + "-" + op.output_shapes[1:-1] in cube_dict[op.op_name]]
-        suggestion = "内轴无法被256整除"
+        suggestion = self._CUBE_AFFINITY_DESC
         for op in cube_dict:
             for shape in cube_dict[op]:
                 dtype = None
@@ -211,7 +221,7 @@ class AICorePerformanceChecker:
                     # 处理变长算子 如果不亲和则affinity_flag为False
                     if shape.split("-")[0].split(";")[0].split(",")[2] % 128 != 0:
                         affinity_flag = True
-                        suggestion = "D不能被128整除"
+                        suggestion = self._FA_AFFINITY_DESC_TYPE1
                         for operator in fa_list:
                             if (operator.op_name == op and
                                     operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape):
@@ -228,13 +238,13 @@ class AICorePerformanceChecker:
                         head_dim = int(input_first_tensor[3])
                     if head_dim % 128 != 0 and seq_len % 128 != 0:
                         affinity_flag = True
-                        suggestion = "D和S均不能被128整除"
+                        suggestion = self._FA_AFFINITY_DESC_TYPE3
                     elif head_dim % 128 != 0:
                         affinity_flag = True
-                        suggestion = "D不能被128整除"
+                        suggestion = self._FA_AFFINITY_DESC_TYPE1
                     elif seq_len % 128 != 0:
                         affinity_flag = True
-                        suggestion = "S不能被128整除"
+                        suggestion = self._FA_AFFINITY_DESC_TYPE2
                     if affinity_flag:
                         for operator in fa_list:
                             if (operator.op_name == op and
@@ -365,71 +375,68 @@ class AICorePerformanceChecker:
         if not self.ai_core_performance_issues:
             return self.ai_core_performance_issues
 
-        cube_problem = "Cube算子性能分析"
-        fa_problem = "FA算子性能分析"
-        vector_problem = "Vector算子性能分析"
         sugg_keys = ['opti', 'bound', 'affinity']
         cube_desc = dict.fromkeys(sugg_keys, "")
         fa_desc = dict.fromkeys(sugg_keys, "")
         vector_desc = dict.fromkeys(sugg_keys, "")
         if self.result["cube"]:
-            optimization_item = OptimizeItem(cube_problem, self.desc, [self.suggestion])
+            optimization_item = OptimizeItem(self._CUBE_PROBLEM, self.desc, [self.suggestion])
             result.add(OptimizeRecord(optimization_item))
             headers = [
                 "Type",
                 "Description and Suggestion",
             ]
-            result.add_detail(cube_problem, headers=headers)
+            result.add_detail(self._CUBE_PROBLEM, headers=headers)
             for cube_opti_issue in self.result["cube"][0]:
                 opti_sugg = self._OPTI_SUGGESTION.format(**cube_opti_issue)
                 cube_desc["opti"] += opti_sugg
-            result.add_detail(cube_problem, detail=["性能优化算子集合", cube_desc["opti"]])
+            result.add_detail(self._CUBE_PROBLEM, detail=[self._OPTI_DESC, cube_desc["opti"]])
             for cube_bound_issue in self.result["cube"][1]:
                 bound_sugg = self._BOUND_SUGGESTION.format(**cube_bound_issue)
                 cube_desc["bound"] += bound_sugg
-            result.add_detail(cube_problem, detail=["bound算子集合", cube_desc["bound"]])
+            result.add_detail(self._CUBE_PROBLEM, detail=[self._BOUND_DESC, cube_desc["bound"]])
             for cube_affinity_issue in self.result["cube"][2]:
                 affinity_sugg = self._AFFINITY_SUGGESTION.format(**cube_affinity_issue)
                 cube_desc["affinity"] += affinity_sugg
-            result.add_detail(cube_problem, detail=["不亲和算子集合", cube_desc["affinity"]])
+            result.add_detail(self._CUBE_PROBLEM, detail=[self._AFFINITY_DESC, cube_desc["affinity"]])
 
         if self.result["fa"]:
-            optimization_item = OptimizeItem(cube_problem, self.desc, [self.suggestion])
+            optimization_item = OptimizeItem(self._FA_PROBLEM, self.desc, [self.suggestion])
             result.add(OptimizeRecord(optimization_item))
             headers = [
                 "Type",
                 "Description and Suggestion",
             ]
-            result.add_detail(fa_problem, headers=headers)
+            result.add_detail(self._FA_PROBLEM, headers=headers)
             for fa_opti_issue in self.result["fa"][0]:
                 opti_sugg = self._OPTI_SUGGESTION.format(**fa_opti_issue)
                 fa_desc["opti"] += opti_sugg
-            result.add_detail(fa_problem, detail=["性能优化算子集合", fa_desc["opti"]])
+            result.add_detail(self._FA_PROBLEM, detail=[self._OPTI_DESC, fa_desc["opti"]])
             for fa_bound_issue in self.result["fa"][1]:
                 bound_sugg = self._BOUND_SUGGESTION.format(**fa_bound_issue)
                 fa_desc["bound"] += bound_sugg
-            result.add_detail(fa_problem, detail=["bound算子集合", fa_desc["bound"]])
+            result.add_detail(self._FA_PROBLEM, detail=[self._BOUND_DESC, fa_desc["bound"]])
             for fa_affinity_issue in self.result["fa"][2]:
                 affinity_sugg = self._AFFINITY_SUGGESTION.format(**fa_affinity_issue)
                 fa_desc["affinity"] += affinity_sugg
-            result.add_detail(fa_problem, detail=["不亲和算子集合", fa_desc["affinity"]])
+            result.add_detail(self._FA_PROBLEM, detail=[self._AFFINITY_DESC, fa_desc["affinity"]])
 
         if self.result["vector"]:
-            optimization_item = OptimizeItem(vector_problem, self.desc, [self.suggestion])
+            optimization_item = OptimizeItem(self._VECTOR_PROBLEM, self.desc, [self.suggestion])
             result.add(OptimizeRecord(optimization_item))
             headers = [
                 "Type",
                 "Description and Suggestion",
             ]
-            result.add_detail(vector_problem, headers=headers)
+            result.add_detail(self._VECTOR_PROBLEM, headers=headers)
             for vector_opti_issue in self.result["vector"][0]:
                 opti_sugg = self._OPTI_SUGGESTION.format(**vector_opti_issue)
                 vector_desc["opti"] += opti_sugg
-            result.add_detail(vector_problem, detail=["性能优化算子集合", vector_desc["opti"]])
+            result.add_detail(self._VECTOR_PROBLEM, detail=[self._OPTI_DESC, vector_desc["opti"]])
             for vector_bound_issue in self.result["vector"][1]:
                 bound_sugg = self._BOUND_SUGGESTION.format(**vector_bound_issue)
                 vector_desc["bound"] += bound_sugg
-            result.add_detail(vector_problem, detail=["bound算子集合", vector_desc["bound"]])
+            result.add_detail(self._VECTOR_PROBLEM, detail=[self._BOUND_DESC, vector_desc["bound"]])
         return True
 
     def make_render(self, html_render, add_render_list=True, **kwargs):
@@ -441,6 +448,7 @@ class AICorePerformanceChecker:
                                            template_dir="templates",
                                            template_name="ai_core_performance.html",
                                            format_result=self.result,
+                                           language=self.language,
                                            add_render_list=add_render_list,
                                            priority_background_color=priority,
                                            rank=kwargs.get("rank"))
diff --git a/profiler/advisor/display/html/templates/ai_core_performance.html b/profiler/advisor/display/html/templates/ai_core_performance.html
index 7feb3e768..48e62ad6c 100644
--- a/profiler/advisor/display/html/templates/ai_core_performance.html
+++ b/profiler/advisor/display/html/templates/ai_core_performance.html
@@ -2,132 +2,156 @@
 <div class="collapsible">
     <h2 class="collapsible-header" style="background-color: {{ priority_background_color }};">AI CORE Performance Analysis</h2>
     <div class="collapsible-content">
+        {% if language == "cn" %}
+        {% set title_ns = namespace(type='类别', desc='描述及建议', opti_set='性能优化算子集合', bound_set='bound算子集合', affinity_set='不亲和算子集合',
+        opti_refer=' 参考性能优化空间: ', bound_refer=' bound类型为: ', affinity_refer=' 不亲和类型为: ', title_desc='算子相关分析，参考如下: ') %}
+        {% else %}
+        {% set title_ns = namespace(type='Type', desc='Description and Suggestion', opti_set='set of performance optimization operators', 
+        bound_set='set of bound operators', affinity_set='set of unaffine operators', opti_refer=' refer to Performance Optimization Space: ',
+         bound_refer=' bound type: ', affinity_refer=' type of disaffinity: ', title_desc=' Operator related analysis, referenced below: ') %}
+        {% endif %}
         {% if format_result.cube is not none %}
-        <a style="font-weight: bold" id="timeline_api_instruction">MatMul算子相关分析，参考如下：</a>
+        <a style="font-weight: bold" id="timeline_api_instruction">MatMul{{ title_ns.title_desc }}</a>
         <br>
         <table>
             <tr>
-                <th>类别</th>
-                <th>描述及建议</th>
+                <th>{{ title_ns.type }}</th>
+                <th>{{ title_ns.desc }}</th>
             </tr>
             {% set opti_ns = namespace(total_opti='') %}
             {% for opti in format_result.cube[0] %}
             {% if not loop.first %}
-            {% set opti_ns.total_opti = opti_ns.total_opti ~ "<br>" ~ opti.op_name ~ "算子 shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ "参考性能优化空间: " ~ opti.optimization %}
+            {% set opti_ns.total_opti = opti_ns.total_opti ~ "<br>" ~ opti.op_name ~ "operator shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ title_ns.opti_refer ~ opti.optimization %}
             {% else %}
-            {% set opti_ns.total_opti = opti.op_name ~ "算子 shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ "参考性能优化空间: " ~ opti.optimization %}
+            {% set opti_ns.total_opti = opti.op_name ~ "operator shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ title_ns.opti_refer ~ opti.optimization %}
             {% endif %}
             {% endfor %}
+            {% if opti_ns.total_opti|length > 0 %}
             <tr>
-                <td>性能优化算子集合</td>
+                <td>{{ title_ns.opti_set }}</td>
                 <td>{{ opti_ns.total_opti | safe }}</td>
             </tr>
+            {% endif %}
             {% set bound_ns = namespace(total_bound='') %}
             {% for bound in format_result.cube[1] %}
             {% if not loop.first %}
-            {% set bound_ns.total_bound = bound_ns.total_bound ~ "<br>" ~ bound.op_name ~ "算子 shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ "bound类型为: " ~ bound.bound %}
+            {% set bound_ns.total_bound = bound_ns.total_bound ~ "<br>" ~ bound.op_name ~ "operator shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ title_ns.bound_refer ~ bound.bound %}
             {% else %}
-            {% set bound_ns.total_bound = bound.op_name ~ "算子 shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ "bound类型为: " ~ bound.bound %}
+            {% set bound_ns.total_bound = bound.op_name ~ "operator shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ title_ns.bound_refer ~ bound.bound %}
             {% endif %}
             {% endfor %}
+            {% if bound_ns.total_bound|length > 0 %}
             <tr>
-                <td>bound算子集合</td>
+                <td>{{ title_ns.bound_set }}</td>
                 <td>{{ bound_ns.total_bound | safe }}</td>
             </tr>
+            {% endif %}
             {% set affinity_ns = namespace(total_affinity='') %}
             {% for affinity in format_result.cube[2] %}
             {% if not loop.first %}
-            {% set affinity_ns.total_affinity = affinity_ns.total_affinity ~ "<br>" ~ affinity.op_name ~ "算子 shape: " ~ affinity.shape ~ " dtype: " ~ affinity.dtype ~ "不亲和类型为: " ~ affinity.suggestion %}
+            {% set affinity_ns.total_affinity = affinity_ns.total_affinity ~ "<br>" ~ affinity.op_name ~ "operator shape: " ~ affinity.shape ~ " dtype: " ~ affinity.dtype ~ title_ns.affinity_refer ~ affinity.suggestion %}
             {% else %}
-            {% set affinity_ns.total_affinity = affinity.op_name ~ "算子 shape: " ~ affinity.shape ~ " dtype: " ~ affinity.dtype ~ "不亲和类型为: " ~ affinity.suggestion %}
+            {% set affinity_ns.total_affinity = affinity.op_name ~ "operator shape: " ~ affinity.shape ~ " dtype: " ~ affinity.dtype ~ title_ns.affinity_refer ~ affinity.suggestion %}
             {% endif %}
             {% endfor %}
+            {% if affinity_ns.total_affinity|length > 0 %}
             <tr>
-                <td>bound算子集合</td>
+                <td>{{ title_ns.affinity_set }}</td>
                 <td>{{ affinity_ns.total_affinity | safe }}</td>
             </tr>
+            {% endif %}
         </table>
         {% endif %}
 
         {% if format_result.fa is not none %}
-        <a style="font-weight: bold" id="timeline_api_instruction">FA算子相关分析，参考如下：</a>
+        <a style="font-weight: bold" id="timeline_api_instruction">FA{{ title_ns.title_desc }}</a>
         <br>
         <table>
             <tr>
-                <th>类别</th>
-                <th>描述及建议</th>
+                <th>{{ title_ns.type }}</th>
+                <th>{{ title_ns.desc }}</th>
             </tr>
             {% set opti_ns = namespace(total_opti='') %}
             {% for opti in format_result.fa[0] %}
             {% if not loop.first %}
-            {% set opti_ns.total_opti = opti_ns.total_opti ~ "<br>" ~ opti.op_name ~ "算子 shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ "参考性能优化空间: " ~ opti.optimization %}
+            {% set opti_ns.total_opti = opti_ns.total_opti ~ "<br>" ~ opti.op_name ~ "operator shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ title_ns.opti_refer ~ opti.optimization %}
             {% else %}
-            {% set opti_ns.total_opti = opti.op_name ~ "算子 shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ "参考性能优化空间: " ~ opti.optimization %}
+            {% set opti_ns.total_opti = opti.op_name ~ "operator shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ title_ns.opti_refer ~ opti.optimization %}
             {% endif %}
             {% endfor %}
+            {% if opti_ns.total_opti|length > 0 %}
             <tr>
-                <td>性能优化算子集合</td>
+                <td>{{ title_ns.opti_set }}</td>
                 <td>{{ opti_ns.total_opti | safe }}</td>
             </tr>
+            {% endif %}
             {% set bound_ns = namespace(total_bound='') %}
             {% for bound in format_result.fa[1] %}
             {% if not loop.first %}
-            {% set bound_ns.total_bound = bound_ns.total_bound ~ "<br>" ~ bound.op_name ~ "算子 shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ "bound类型为: " ~ bound.bound %}
+            {% set bound_ns.total_bound = bound_ns.total_bound ~ "<br>" ~ bound.op_name ~ "operator shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ title_ns.bound_refer ~ bound.bound %}
             {% else %}
-            {% set bound_ns.total_bound = bound.op_name ~ "算子 shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ "bound类型为: " ~ bound.bound %}
+            {% set bound_ns.total_bound = bound.op_name ~ "operator shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ title_ns.bound_refer ~ bound.bound %}
             {% endif %}
             {% endfor %}
+            {% if bound_ns.total_bound|length > 0 %}
             <tr>
-                <td>bound算子集合</td>
+                <td>{{ title_ns.bound_set }}</td>
                 <td>{{ bound_ns.total_bound | safe }}</td>
             </tr>
+            {% endif %}
             {% set affinity_ns = namespace(total_affinity='') %}
             {% for affinity in format_result.fa[2] %}
             {% if not loop.first %}
-            {% set affinity_ns.total_affinity = affinity_ns.total_affinity ~ "<br>" ~ affinity.op_name ~ "算子 shape: " ~ affinity.shape ~ " dtype: " ~ affinity.dtype ~ "不亲和类型为: " ~ affinity.suggestion %}
+            {% set affinity_ns.total_affinity = affinity_ns.total_affinity ~ "<br>" ~ affinity.op_name ~ "operator shape: " ~ affinity.shape ~ " dtype: " ~ affinity.dtype ~ title_ns.affinity_refer ~ affinity.suggestion %}
             {% else %}
-            {% set affinity_ns.total_affinity = affinity.op_name ~ "算子 shape: " ~ affinity.shape ~ " dtype: " ~ affinity.dtype ~ "不亲和类型为: " ~ affinity.suggestion %}
+            {% set affinity_ns.total_affinity = affinity.op_name ~ "operator shape: " ~ affinity.shape ~ " dtype: " ~ affinity.dtype ~ title_ns.affinity_refer ~ affinity.suggestion %}
             {% endif %}
             {% endfor %}
+            {% if affinity_ns.total_affinity|length > 0 %}
             <tr>
-                <td>不亲和算子集合</td>
+                <td>{{ title_ns.affinity_set }}</td>
                 <td>{{ affinity_ns.total_affinity | safe }}</td>
             </tr>
+            {% endif %}
         </table>
         {% endif %}
 
         {% if format_result.cube is not none %}
-        <a style="font-weight: bold" id="timeline_api_instruction">Vector算子相关分析，参考如下：</a>
+        <a style="font-weight: bold" id="timeline_api_instruction">Vector{{ title_ns.title_desc }}</a>
         <br>
         <table>
             <tr>
-                <th>类别</th>
-                <th>描述及建议</th>
+                <th>{{ title_ns.type }}</th>
+                <th>{{ title_ns.desc }}</th>
             </tr>
             {% set opti_ns = namespace(total_opti='') %}
             {% for opti in format_result.vector[0] %}
             {% if not loop.first %}
-            {% set opti_ns.total_opti = opti_ns.total_opti ~ "<br>" ~ opti.op_name ~ "算子 shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ "参考性能优化空间: " ~ opti.optimization %}
+            {% set opti_ns.total_opti = opti_ns.total_opti ~ "<br>" ~ opti.op_name ~ "operator shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ title_ns.opti_refer ~ opti.optimization %}
             {% else %}
-            {% set opti_ns.total_opti = opti.op_name ~ "算子 shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ "参考性能优化空间: " ~ opti.optimization %}
+            {% set opti_ns.total_opti = opti.op_name ~ "operator shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ title_ns.opti_refer ~ opti.optimization %}
             {% endif %}
             {% endfor %}
+            {% if opti_ns.total_opti|length > 0 %}
             <tr>
-                <td>性能优化算子集合</td>
+                <td>{{ title_ns.opti_set }}</td>
                 <td>{{ opti_ns.total_opti | safe }}</td>
             </tr>
+            {% endif %}
             {% set bound_ns = namespace(total_bound='') %}
             {% for bound in format_result.vector[1] %}
             {% if not loop.first %}
-            {% set bound_ns.total_bound = bound_ns.total_bound ~ "<br>" ~ bound.op_name ~ "算子 shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ "bound类型为: " ~ bound.bound %}
+            {% set bound_ns.total_bound = bound_ns.total_bound ~ "<br>" ~ bound.op_name ~ "operator shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ title_ns.bound_refer ~ bound.bound %}
             {% else %}
-            {% set bound_ns.total_bound = bound.op_name ~ "算子 shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ "bound类型为: " ~ bound.bound %}
+            {% set bound_ns.total_bound = bound.op_name ~ "operator shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ title_ns.bound_refer ~ bound.bound %}
             {% endif %}
             {% endfor %}
+            {% if bound_ns.total_bound|length > 0 %}
             <tr>
-                <td>bound算子集合</td>
+                <td>{{ title_ns.bound_set }}</td>
                 <td>{{ bound_ns.total_bound | safe }}</td>
             </tr>
+            {% endif %}
         </table>
         {% endif %}
     </div>
diff --git a/profiler/advisor/rules/cn/aicore_performance.yaml b/profiler/advisor/rules/cn/aicore_performance.yaml
index 60d813e1d..f00f0a4b7 100644
--- a/profiler/advisor/rules/cn/aicore_performance.yaml
+++ b/profiler/advisor/rules/cn/aicore_performance.yaml
@@ -1,6 +1,15 @@
-problem: "AICORE算子"
+cube_problem: "Cube算子性能分析"
+fa_problem: "FA算子性能分析"
+vector_problem: "Vector算子性能分析"
 description: "提供一些AICORE算子的参考瓶颈"
+bound_description: "bound算子集合"
+optimization_description: "性能优化算子集合"
+affinity_description: "不亲和算子集合"
+cube_affinity_desc: "内轴无法被256整除"
+fa_affinity_desc_type1: "D不能被128整除"
+fa_affinity_desc_type2: "S不能被128整除"
+fa_affinity_desc_type3: "D和S均不能被128整除"
 suggestion: "请根据亲和性、bound类型或优化空间尝试分析筛选出来的算子"
-affinity_suggestion: "{op_name}算子 shape{shape} dtype{dtype} 有不亲和特征: {suggestion}\n"
-bound_suggestion: "{op_name}算子 shape{shape} dtype{dtype} bound类型为: {bound} bound\n"
-optimization_suggestion: "{op_name}算子 shape{shape} dtype{dtype} 疑似有性能优化空间,参考性能优化空间{optimization}\n"
\ No newline at end of file
+affinity_suggestion: "{op_name}算子 shape: {shape} dtype: {dtype} 有不亲和特征: {suggestion}\n"
+bound_suggestion: "{op_name}算子 shape: {shape} dtype: {dtype} bound类型为: {bound} bound\n"
+optimization_suggestion: "{op_name}算子 shape: {shape} dtype: {dtype} 疑似有性能优化空间,参考性能优化空间: {optimization}\n"
\ No newline at end of file
diff --git a/profiler/advisor/rules/en/aicore_performance.yaml b/profiler/advisor/rules/en/aicore_performance.yaml
index 247022214..28f52f1ed 100644
--- a/profiler/advisor/rules/en/aicore_performance.yaml
+++ b/profiler/advisor/rules/en/aicore_performance.yaml
@@ -1,6 +1,15 @@
-problem: "AICORE Operator"
+cube_problem: "Cube operator performance analysis"
+fa_problem: "FA operator performance analysis"
+vector_problem: "Vector operator performance analysis"
 description: "Provide some reference bottlenecks for the AICORE operator"
+bound_description: "set of bound operators"
+optimization_description: "set of performance optimization operators"
+affinity_description: "set of unaffine operators"
+cube_affinity_desc: "Then inner axis is not divisible by 256"
+fa_affinity_desc_type1: "D is not divisible by 128"
+fa_affinity_desc_type2: "S is not divisible by 128"
+fa_affinity_desc_type3: "Neither D nor S is not divisible by 128"
 suggestion: "Please try to analyze the filtered operators based on affinity, bound type or optimization space"
-affinity_suggestion: "{op_name} Op shape{shape} dtype{dtype} with disaffection characteristics: {suggestion}\n"
-bound_suggestion: "{op_name} Op shape{shape} dtype{dtype} bound type: {bound} bound\n"
-optimization_suggestion: "{op_name} Op shape{shape} dtype{dtype} suspect there is room for performance optimization, refer to Performance Optimization Space: {optimization}\n"
\ No newline at end of file
+affinity_suggestion: "{op_name} Op shape: {shape} dtype: {dtype} with disaffection characteristics: {suggestion}\n"
+bound_suggestion: "{op_name} Op shape: {shape} dtype: {dtype} bound type: {bound} bound\n"
+optimization_suggestion: "{op_name} Op shape: {shape} dtype: {dtype} suspect there is room for performance optimization, refer to Performance Optimization Space: {optimization}\n"
\ No newline at end of file
diff --git a/profiler/cli/entrance.py b/profiler/cli/entrance.py
index fa7d2421f..89ac8187d 100644
--- a/profiler/cli/entrance.py
+++ b/profiler/cli/entrance.py
@@ -69,7 +69,7 @@ msprof_analyze_cli.add_command(auto_complete_cli, name="auto-completion")
 if __name__ == "__main__":
     msprof_analyze_cli.main(
         [
-            "advisor","computation","-d",
-            r"E:\B站\910b-33f-cpsp4-add_contiguous\train-2184159-master-0_1058382_20240910063706363_ascend_pt","-l","cn"
+            "analyze","all","-d",
+            r"D:\data\file","-l","cn"
         ]
     )
-- 
Gitee


From 295e6ee731585815c0a49adb9bcfc470377cd315 Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Mon, 20 Jan 2025 15:36:34 +0800
Subject: [PATCH 19/72] =?UTF-8?q?=E9=94=99=E8=AF=AF=E4=BF=AE=E5=A4=8D?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../ai_core_performance/ai_core_performance_checker.py   | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index 2baa00d19..5b8e14550 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -133,12 +133,9 @@ class AICorePerformanceChecker:
         """
         :Param profiling_dataset: dataset of operator performance from kernel_details.csv
         """
-        if self.cube_dict:
-            self.result["cube"] = self.check_cube_operator(promoting_dataset)
-        if self.fa_dict:
-            self.result["fa"] = self.check_fa_operator(promoting_dataset)
-        if self.vector_dict:
-            self.result["vector"] = self.check_vector_operator(promoting_dataset)
+        self.result["cube"] = self.check_cube_operator(promoting_dataset)
+        self.result["fa"] = self.check_fa_operator(promoting_dataset)
+        self.result["vector"] = self.check_vector_operator(promoting_dataset)
 
     def check_cube_operator(self, profiling_dataset: ProfilingDataset):
         cube_dict = self.cube_dict
-- 
Gitee


From 2313499b7ad30c325f21acc62db3090c74c56d91 Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Mon, 20 Jan 2025 16:18:06 +0800
Subject: [PATCH 20/72] =?UTF-8?q?=E9=94=99=E8=AF=AF=E4=BF=AE=E5=A4=8D?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../ai_core_performance_checker.py            | 27 ++++++++++---------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index 5b8e14550..bbf8b3b1a 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -116,17 +116,19 @@ class AICorePerformanceChecker:
 
     @staticmethod
     def memory_size(operator):
-        input_shapes = operator.input_shapes[1:-1].split(";")
         memory = 0
-        if len(input_shapes[0].split(",")) == 4:
-            memory = sum(int(shape[0]) * int(shape[1]) * int(shape[2]) * int(shape[3])
-                         for shape in (shapes.split(",") for shapes in input_shapes))
-            output_shape = operator.output_shapes[1:-1].split(",")
-            memory += (int(output_shape[0]) * int(output_shape[1]) * int(output_shape[2]) * int(output_shape[3]))
-        else:
-            memory += sum(int(shape[0]) * int(shape[1]) for shape in (shapes.split(",") for shapes in input_shapes))
-            output_shape = operator.output_shapes[1:-1].split(",")
-            memory += (int(output_shape[0]) * int(output_shape[1]))
+        input_shapes = operator.input_shapes[1:-1].split(";")
+        for shapes in input_shapes:
+            start = 1
+            for shape in shapes.split(","):
+                start *= int(shape)
+            memory += start
+
+        output_shape = operator.output_shapes[1:-1].split(",")
+        start = 1
+        for shapes in output_shape:
+            start *= int(shapes)
+        memory += int(start)
         return memory * 2 / 1024 / 1024
 
     def check_ai_core_performance(self, promoting_dataset: ProfilingDataset):
@@ -471,8 +473,7 @@ class AICorePerformanceChecker:
         if not hasattr(profiling_dataset, "op_summary"):
             logger.warning("Skip %s checker because of not containing %s", self._CHECKER, "op summary")
             return False
-        if not hasattr(profiling_dataset.op_summary, "task_dict") or not hasattr(profiling_dataset.op_summary,
-                                                                                 "op_list"):
-            logger.warning("Skip %s checker because of not containing %s", self._CHECKER, "op summary")
+        if not not hasattr(profiling_dataset.op_summary, "op_list"):
+            logger.warning("Skip %s checker because of not containing %s", self._CHECKER, "op_list")
             return False
         return True
-- 
Gitee


From 078be76380bf4ff0db578c7e5e52fd8180aa769f Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Mon, 20 Jan 2025 16:22:19 +0800
Subject: [PATCH 21/72] =?UTF-8?q?=E9=94=99=E8=AF=AF=E4=BF=AE=E5=A4=8D?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../ai_core_performance/ai_core_performance_checker.py          | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index bbf8b3b1a..8bdc92c80 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -473,7 +473,7 @@ class AICorePerformanceChecker:
         if not hasattr(profiling_dataset, "op_summary"):
             logger.warning("Skip %s checker because of not containing %s", self._CHECKER, "op summary")
             return False
-        if not not hasattr(profiling_dataset.op_summary, "op_list"):
+        if not hasattr(profiling_dataset.op_summary, "op_list"):
             logger.warning("Skip %s checker because of not containing %s", self._CHECKER, "op_list")
             return False
         return True
-- 
Gitee


From 4718bb7517d7ca0b898817b127b52cf39b2b02dc Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Mon, 20 Jan 2025 16:25:56 +0800
Subject: [PATCH 22/72] =?UTF-8?q?=E9=94=99=E8=AF=AF=E4=BF=AE=E5=A4=8D?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../ai_core_performance/ai_core_performance_analyzer.py         | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py
index 03b0a8c6e..89b6be779 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py
@@ -51,4 +51,4 @@ class AICorePerformanceAnalyzer(BaseAnalyzer):
         return self.result
 
     def get_priority(self, max_mem_op_dur=None):
-        return PriorityBackgroundColor.high # html 底色设置
\ No newline at end of file
+        return PriorityBackgroundColor.low
\ No newline at end of file
-- 
Gitee


From 4dee540ee5d220afa742c46be7da04f862d45605 Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Mon, 20 Jan 2025 16:50:43 +0800
Subject: [PATCH 23/72] =?UTF-8?q?=E5=86=85=E5=AD=98=E8=AE=A1=E7=AE=97?=
 =?UTF-8?q?=E4=BF=AE=E6=94=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../ai_core_performance/ai_core_performance_checker.py        | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index 8bdc92c80..647ef0c7f 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -119,6 +119,10 @@ class AICorePerformanceChecker:
         memory = 0
         input_shapes = operator.input_shapes[1:-1].split(";")
         for shapes in input_shapes:
+            if not "," in shapes:
+                # 多的一维是 bias ，预先乘2
+                memory += int (shapes) * 2
+                continue
             start = 1
             for shape in shapes.split(","):
                 start *= int(shape)
-- 
Gitee


From 61390688f533d13b6f53abc6448243d66e809b54 Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Mon, 20 Jan 2025 17:07:52 +0800
Subject: [PATCH 24/72] =?UTF-8?q?=E4=BB=A3=E7=A0=81=E8=B0=83=E6=95=B4?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../ai_core_performance/ai_core_performance_checker.py       | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index 647ef0c7f..f70a3c815 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -16,7 +16,6 @@ import logging
 import os
 
 from profiler.advisor.dataset.profiling.profiling_dataset import ProfilingDataset
-from profiler.advisor.display.prompt.base_prompt import BasePrompt
 from profiler.advisor.result.item import OptimizeItem, OptimizeRecord
 from profiler.advisor.result.result import OptimizeResult
 from profiler.prof_common.additional_args_manager import AdditionalArgsManager
@@ -119,7 +118,7 @@ class AICorePerformanceChecker:
         memory = 0
         input_shapes = operator.input_shapes[1:-1].split(";")
         for shapes in input_shapes:
-            if not "," in shapes:
+            if not "," in shapes and shapes != "":
                 # 多的一维是 bias ，预先乘2
                 memory += int (shapes) * 2
                 continue
@@ -184,7 +183,6 @@ class AICorePerformanceChecker:
                         "dtype": dtype,
                         "duration": shape_duration,
                         "suggestion": suggestion})
-                    continue
                 else:
                     shap_list = [operator for operator in operator_list if
                                  operator.op_name == op and
@@ -281,7 +279,6 @@ class AICorePerformanceChecker:
                         "dtype": dtype,
                         "suggestion": suggestion,
                         "duration": shape_duration})
-                    continue
                 else:
                     # 处理bound算子和优化算子
                     aiv_vec_ratio, aic_fixpipe_ratio, aic_mte2_ratio, optimization = 0., 0., 0., 0.
-- 
Gitee


From 4fe9ffd3d761152213dc933dbf79986aef8a0744 Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Mon, 20 Jan 2025 18:26:17 +0800
Subject: [PATCH 25/72] =?UTF-8?q?=E6=B8=85=E7=90=86=E6=97=A0=E7=94=A8?=
 =?UTF-8?q?=E5=86=85=E5=AE=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../advisor/analyzer/analyzer_controller.py   | 33 ++++++++++---------
 profiler/advisor/analyzer/base_analyzer.py    |  8 ++---
 .../ai_core_performance_checker.py            | 10 +++---
 .../computation/profiling_analyzer.py         |  2 +-
 4 files changed, 26 insertions(+), 27 deletions(-)

diff --git a/profiler/advisor/analyzer/analyzer_controller.py b/profiler/advisor/analyzer/analyzer_controller.py
index e8a62c69d..1a5a28b63 100644
--- a/profiler/advisor/analyzer/analyzer_controller.py
+++ b/profiler/advisor/analyzer/analyzer_controller.py
@@ -186,6 +186,7 @@ class AnalyzerController:
 
         return True
 
+
     @staticmethod
     def _get_step_rank_for_cluster_statistic_diff(target_cluster_statistic_data, benchmark_cluster_statistic_data,
                                                   headers, dimension, get_max=False):
@@ -255,10 +256,10 @@ class AnalyzerController:
         return dimensions, AsyncParams.user_total_params
 
     def do_analysis(self, dimensions, **kwargs):
-        pid = os.getpid()  # 获取当前进程的pid
+        pid = os.getpid()
         resp = {"id": pid}
-        self.args_manager = AdditionalArgsManager()  # 初始化参数管理器
-        self.args_manager.init(kwargs)  # 初始化参数管理器
+        self.args_manager = AdditionalArgsManager()
+        self.args_manager.init(kwargs)
         output_path = kwargs.get("output_path")
 
         AnalyzerController._set_analysis_process_priority(pid)
@@ -277,9 +278,9 @@ class AnalyzerController:
                     PathManager.make_dir_safety(output_path)
 
                 Config().set_config("_work_path", output_path)
-            Config().set_log_path(f"mstt_advisor_{Timer().strftime}.xlsx")  # 设置日志路径
+            Config().set_log_path(f"mstt_advisor_{Timer().strftime}.xlsx")
 
-            self._do_analysis(dimensions, pid=pid, async_resp=resp, **kwargs)  # 执行分析
+            self._do_analysis(dimensions, pid=pid, async_resp=resp, **kwargs)
         except Exception as e:
             self._update_analysis_process_resp(pid, resp, status_code=AsyncAnalysisStatus.INNER_ERROR_STATUS_CODE,
                                                status=AsyncAnalysisStatus.FAILED, error_msg=str(e))
@@ -611,8 +612,8 @@ class AnalyzerController:
         return job_list
 
     def _do_analysis(self, dimensions, pid=0, async_resp=None, **kwargs):
-        self.dimensions = dimensions  # 设置分析维度
-        self.kwargs = kwargs  # 设置分析参数
+        self.dimensions = dimensions
+        self.kwargs = kwargs
         result_list = []
         profiling_path = PathManager.get_realpath(self.kwargs.get("profiling_path"))
         benchmark_profiling_path = self.kwargs.get("benchmark_profiling_path")
@@ -621,7 +622,7 @@ class AnalyzerController:
             benchmark_profiling_path = PathManager.get_realpath(benchmark_profiling_path)
             PathManager.check_path_owner_consistent([benchmark_profiling_path])
 
-        if not self._check_profiling_path_valid(profiling_path):  # 检查profiling路径是否有效
+        if not self._check_profiling_path_valid(profiling_path):
             error_msg = f"Got invalid argument '-d/--profiling_path' {profiling_path}, skip analysis"
             self._update_analysis_process_resp(pid, async_resp, error_msg=error_msg,
                                                status_code=AsyncAnalysisStatus.BAD_REQUEST_STATUS_CODE,
@@ -629,8 +630,8 @@ class AnalyzerController:
             logger.error(error_msg)
             return
 
-        if benchmark_profiling_path and not self._check_profiling_path_valid(
-                benchmark_profiling_path):  # 检查benchmark_profiling路径是否有效
+
+        if benchmark_profiling_path and not self._check_profiling_path_valid(benchmark_profiling_path):
             error_msg = (f"Got invalid argument '-bp/--benchmark_profiling_path' {benchmark_profiling_path}, "
                          f"skip analysis")
             self._update_analysis_process_resp(pid, async_resp, error_msg=error_msg,
@@ -639,7 +640,7 @@ class AnalyzerController:
             logger.error(error_msg)
             return
 
-        self._is_cluster = self._is_cluster_profiling(profiling_path)  # 判断是否是集群profiling
+        self._is_cluster = self._is_cluster_profiling(profiling_path)
         if benchmark_profiling_path:
             # 构建benchmark profiling的map，用于根据rank获取profiling路径，否则无法进行比对
             is_benchmark_cluster = self._is_cluster_profiling(benchmark_profiling_path)
@@ -654,16 +655,16 @@ class AnalyzerController:
                 return
 
         if not self._is_cluster:
-            job_list = self.single_rank_analysis(profiling_path, benchmark_profiling_path)  # 单卡分析
+            job_list = self.single_rank_analysis(profiling_path, benchmark_profiling_path)
         else:
             self.slow_rank_analyzer = SlowRankAnalyzer(profiling_path, output_path=self.kwargs.get("output_path"))
             self.slow_link_analyzer = SlowLinkAnalyzer(profiling_path, output_path=self.kwargs.get("output_path"))
-            job_list = self.do_cluster_analysis(profiling_path, benchmark_profiling_path)  # 集群分析
+            job_list = self.do_cluster_analysis(profiling_path, benchmark_profiling_path)
 
-        for i, (dimension, scope, interface, kwargs) in enumerate(job_list[::-1]):  # dimension: 分析维度，scope: 分析器
+        for i, (dimension, scope, interface, kwargs) in enumerate(job_list[::-1]):
             result_list.append(
-                # 获取分析结果
-                interface.get_result(dimension, scope, render_html=i == len(job_list) - 1, output_dict=False, **kwargs)
+                interface.get_result(dimension, scope, render_html=i == len(job_list) - 1, output_dict=False,
+                                     **kwargs)
             )
 
         for result in result_list[::-1]:
diff --git a/profiler/advisor/analyzer/base_analyzer.py b/profiler/advisor/analyzer/base_analyzer.py
index adf82ab8a..0391eb88a 100644
--- a/profiler/advisor/analyzer/base_analyzer.py
+++ b/profiler/advisor/analyzer/base_analyzer.py
@@ -105,7 +105,7 @@ class BaseAnalyzer(VersionControl, metaclass=ABCMeta):
     def get_priority(self, max_mem_op_dur):
         pass
 
-    def identify_profiling_type(self, profiling_type_list): # 确定分析类型
+    def identify_profiling_type(self, profiling_type_list):
         profiling_type = None
         if self.collection_path.endswith(ASCEND_MS):
             profiling_type = [elem for elem in profiling_type_list if Constant.MINDSPORE in elem][0]
@@ -134,7 +134,7 @@ class BaseAnalyzer(VersionControl, metaclass=ABCMeta):
             profiling_type = profiling_type_list[0]
         return profiling_type
 
-    def identify_profiling_version(self): # 确定分析版本
+    def identify_profiling_version(self):
         profiling_version = ""
         if Constant.MINDSPORE in self.profiling_type:
             ascend_dirs = []
@@ -166,7 +166,7 @@ class BaseAnalyzer(VersionControl, metaclass=ABCMeta):
                                self.__class__.__name__, self.kwargs.get(Constant.TORCH_VERSION), profiling_version)
         return profiling_version
 
-    def init_dataset_list(self) -> None: # 初始化数据集列表
+    def init_dataset_list(self) -> None:
         dataset_cls_list = self.dataset_cls_list
         if len(dataset_cls_list) == 0:
             logger.warning(f"Analyser: %s don't rely on any dataset!", self.__class__.__name__)
@@ -184,7 +184,7 @@ class BaseAnalyzer(VersionControl, metaclass=ABCMeta):
                     self.dataset_list[key] = []
                     self.dataset_list[key].append(dataset)
 
-    def get_priority_by_time_ratio(self, dur, step_dur): # 根据时间比例确定优先级
+    def get_priority_by_time_ratio(self, dur, step_dur):
         time_ratio = safe_division(dur, step_dur)
         if time_ratio >= self.ANALYZER_HIGH_PRIORITY_TIME_RATIO:
             return PriorityBackgroundColor.high
diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index f70a3c815..1784c9ce3 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -36,12 +36,10 @@ class AICorePerformanceChecker:
         self.result = dict()
         self.ai_core_performance_issues = False
         self.desc = ""
-        self.suggestions = ""
         self.cube_dict = {}
         self.fa_dict = {}
         self.fa_list = []
         self.vector_dict = {}
-        self.vector_list = []
         self.load_aicore_perf_rules()
 
     def load_aicore_perf_rules(self):
@@ -392,10 +390,10 @@ class AICorePerformanceChecker:
         if not self.ai_core_performance_issues:
             return self.ai_core_performance_issues
 
-        sugg_keys = ['opti', 'bound', 'affinity']
-        cube_desc = dict.fromkeys(sugg_keys, "")
-        fa_desc = dict.fromkeys(sugg_keys, "")
-        vector_desc = dict.fromkeys(sugg_keys, "")
+        suggestion_keys = ['opti', 'bound', 'affinity']
+        cube_desc = dict.fromkeys(suggestion_keys, "")
+        fa_desc = dict.fromkeys(suggestion_keys, "")
+        vector_desc = dict.fromkeys(suggestion_keys, "")
         if self.result["cube"]:
             optimization_item = OptimizeItem(self._CUBE_PROBLEM, self.desc, [self.suggestion])
             result.add(OptimizeRecord(optimization_item))
diff --git a/profiler/advisor/analyzer/computation/profiling_analyzer.py b/profiler/advisor/analyzer/computation/profiling_analyzer.py
index bbea136f0..ccf671139 100644
--- a/profiler/advisor/analyzer/computation/profiling_analyzer.py
+++ b/profiler/advisor/analyzer/computation/profiling_analyzer.py
@@ -115,4 +115,4 @@ class OperatorBoundAnalyzer(ProfilingAnalyzer):
 class AicpuAnalyzer(ProfilingAnalyzer):
     def __init__(self, collection_path, **kwargs) -> None:
         super().__init__(collection_path, **kwargs)
-        self.checker = AicpuChecker(self.cann_version)
\ No newline at end of file
+        self.checker = AicpuChecker(self.cann_version)
-- 
Gitee


From 657d436b30cc81f319f54e0a2dac783ea75b6762 Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Tue, 21 Jan 2025 14:47:23 +0800
Subject: [PATCH 26/72] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E6=95=B0=E6=8D=AE?=
 =?UTF-8?q?=E9=87=87=E9=9B=86=E7=AD=89=E7=BA=A7=E6=A0=A1=E9=AA=8C?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../ai_core_performance_checker.py                    | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index 1784c9ce3..e2ca19405 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -73,7 +73,7 @@ class AICorePerformanceChecker:
         self._OPTI_SUGGESTION = self.aicore_rules.get("optimization_suggestion")
 
     def data_filter(self, profiling_dataset: ProfilingDataset):
-        if not self.check_task_dict(profiling_dataset):
+        if not self.check_task_list(profiling_dataset):
             return
         operator_list = profiling_dataset.op_summary.op_list
         total_duration = sum(float(operator.task_duration) for operator in operator_list)
@@ -118,7 +118,7 @@ class AICorePerformanceChecker:
         for shapes in input_shapes:
             if not "," in shapes and shapes != "":
                 # 多的一维是 bias ，预先乘2
-                memory += int (shapes) * 2
+                memory += int(shapes) * 2
                 continue
             start = 1
             for shape in shapes.split(","):
@@ -468,11 +468,16 @@ class AICorePerformanceChecker:
                                            priority_background_color=priority,
                                            rank=kwargs.get("rank"))
 
-    def check_task_dict(self, profiling_dataset: ProfilingDataset) -> bool:
+    def check_task_list(self, profiling_dataset: ProfilingDataset) -> bool:
         if not hasattr(profiling_dataset, "op_summary"):
             logger.warning("Skip %s checker because of not containing %s", self._CHECKER, "op summary")
             return False
         if not hasattr(profiling_dataset.op_summary, "op_list"):
             logger.warning("Skip %s checker because of not containing %s", self._CHECKER, "op_list")
             return False
+        if (not hasattr(profiling_dataset.op_summary, "input_shapes") or
+                not hasattr(profiling_dataset.op_summary, "input_data_types")):
+            logger.warning("Skip %s checker because of not containing input datas, "
+                           "Please use L1 and above", self._CHECKER)
+            return False
         return True
-- 
Gitee


From e8d3759a197baaaa79a6b8143bded9e8a45db95f Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Tue, 21 Jan 2025 15:14:54 +0800
Subject: [PATCH 27/72] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E6=95=B0=E6=8D=AE?=
 =?UTF-8?q?=E9=87=87=E9=9B=86=E7=AD=89=E7=BA=A7=E6=A0=A1=E9=AA=8C?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../ai_core_performance/ai_core_performance_checker.py         | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index e2ca19405..47a90e98a 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -477,7 +477,6 @@ class AICorePerformanceChecker:
             return False
         if (not hasattr(profiling_dataset.op_summary, "input_shapes") or
                 not hasattr(profiling_dataset.op_summary, "input_data_types")):
-            logger.warning("Skip %s checker because of not containing input datas, "
-                           "Please use L1 and above", self._CHECKER)
+            logger.warning("Skip %s checker because of not containing input datas", self._CHECKER)
             return False
         return True
-- 
Gitee


From edd6abcbf81abaad4ce119478eba7987e3ddfe42 Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Tue, 21 Jan 2025 16:07:21 +0800
Subject: [PATCH 28/72] =?UTF-8?q?=E8=A1=A5=E5=85=85=E5=9D=87=E5=80=BC?=
 =?UTF-8?q?=E5=A4=84=E7=90=86?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../ai_core_performance_checker.py                  | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index 47a90e98a..c9f6e039f 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -281,6 +281,7 @@ class AICorePerformanceChecker:
                     # 处理bound算子和优化算子
                     aiv_vec_ratio, aic_fixpipe_ratio, aic_mte2_ratio, optimization = 0., 0., 0., 0.
                     bound = ""
+                    length = 0
                     if len(shape.split("-")) > 2:
                         for operator in fa_list:
                             if (operator.op_name == op and
@@ -290,6 +291,9 @@ class AICorePerformanceChecker:
                                 aic_mte2_ratio += float(operator.aic_mte2_ratio)
                                 shape_duration += float(operator.task_duration)
                                 dtype = operator.input_data_types
+                                length += 1
+                        aic_fixpipe_ratio = aic_fixpipe_ratio / length
+                        aic_mte2_ratio = aic_mte2_ratio / length
                         if aic_mte2_ratio >= 0.8 and aic_fixpipe_ratio >= 0.75:
                             bound = "mte2_and_fixpipe_bound"
                         elif aic_mte2_ratio >= 0.8:
@@ -305,6 +309,9 @@ class AICorePerformanceChecker:
                                 aiv_vec_ratio += float(operator.aiv_vec_ratio)
                                 aic_mte2_ratio += float(operator.aic_mte2_ratio)
                                 shape_duration += float(operator.task_duration)
+                                length += 1
+                        aiv_vec_ratio = aiv_vec_ratio / length
+                        aic_mte2_ratio = aic_mte2_ratio / length
                         if aic_mte2_ratio >= 0.8 and aiv_vec_ratio >= 0.75:
                             bound = "mte2_and_vec_bound"
                         elif aic_mte2_ratio >= 0.8:
@@ -346,6 +353,7 @@ class AICorePerformanceChecker:
         for op_name in vector_dict:
             for shape in vector_dict[op_name]:
                 aiv_vec_ratio, aiv_mte2_ratio, aiv_mte3_ratio, shape_duration, optimization = 0., 0., 0., 0., 0.
+                length = 0
                 bound, dtype = "", ""
                 for operator in vector_list:
                     if (operator.op_name == op_name and
@@ -355,6 +363,11 @@ class AICorePerformanceChecker:
                         aiv_mte3_ratio += float(operator.aiv_mte3_ratio)
                         shape_duration += float(operator.task_duration)
                         dtype = operator.input_data_types
+                        length += 1
+                # todo 取平均值
+                aiv_vec_ratio = aiv_vec_ratio / length
+                aiv_mte2_ratio = aiv_mte2_ratio / length
+                aiv_mte2_ratio = aiv_mte2_ratio / length
                 if aiv_vec_ratio + aiv_mte2_ratio + aiv_mte3_ratio >= 0.9:
                     bound = "vec_mte2_mte3_bound"
                 elif aiv_mte2_ratio >= 0.7:
-- 
Gitee


From f9cfeeae8589c37cc2e491051a5bea1163eefe26 Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Tue, 21 Jan 2025 16:07:45 +0800
Subject: [PATCH 29/72] =?UTF-8?q?=E8=A1=A5=E5=85=85=E5=9D=87=E5=80=BC?=
 =?UTF-8?q?=E5=A4=84=E7=90=86?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../ai_core_performance/ai_core_performance_checker.py           | 1 -
 1 file changed, 1 deletion(-)

diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index c9f6e039f..0517083c3 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -364,7 +364,6 @@ class AICorePerformanceChecker:
                         shape_duration += float(operator.task_duration)
                         dtype = operator.input_data_types
                         length += 1
-                # todo 取平均值
                 aiv_vec_ratio = aiv_vec_ratio / length
                 aiv_mte2_ratio = aiv_mte2_ratio / length
                 aiv_mte2_ratio = aiv_mte2_ratio / length
-- 
Gitee


From c5359dd7e27a7be5f7e61f677d0a0d1bb4ccb1b7 Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Tue, 21 Jan 2025 16:12:01 +0800
Subject: [PATCH 30/72] =?UTF-8?q?=E9=94=99=E8=AF=AF=E4=BF=AE=E5=A4=8D?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../ai_core_performance/ai_core_performance_checker.py        | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index 0517083c3..1c341e4fb 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -487,8 +487,8 @@ class AICorePerformanceChecker:
         if not hasattr(profiling_dataset.op_summary, "op_list"):
             logger.warning("Skip %s checker because of not containing %s", self._CHECKER, "op_list")
             return False
-        if (not hasattr(profiling_dataset.op_summary, "input_shapes") or
-                not hasattr(profiling_dataset.op_summary, "input_data_types")):
+        if (not hasattr(profiling_dataset.op_summary.op_list, "input_shapes") or
+                not hasattr(profiling_dataset.op_summary.op_list, "input_data_types")):
             logger.warning("Skip %s checker because of not containing input datas", self._CHECKER)
             return False
         return True
-- 
Gitee


From 1ceadac5a761201d7396940d0fdb600ac169306c Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Tue, 21 Jan 2025 16:37:08 +0800
Subject: [PATCH 31/72] =?UTF-8?q?Checker=E5=BC=82=E5=B8=B8=E5=A4=84?=
 =?UTF-8?q?=E7=90=86?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../ai_core_performance_checker.py            | 25 ++++++++++++++++---
 1 file changed, 22 insertions(+), 3 deletions(-)

diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index 1c341e4fb..6057df7aa 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -136,9 +136,28 @@ class AICorePerformanceChecker:
         """
         :Param profiling_dataset: dataset of operator performance from kernel_details.csv
         """
-        self.result["cube"] = self.check_cube_operator(promoting_dataset)
-        self.result["fa"] = self.check_fa_operator(promoting_dataset)
-        self.result["vector"] = self.check_vector_operator(promoting_dataset)
+        try:
+            self.result["cube"] = self.check_cube_operator(promoting_dataset)
+        except (IndexError, ValueError, AttributeError):
+            logger.error("Failed to check ai core performance, cube operator incorrect shapes value.")
+            self.result["cube"] = []
+
+        try:
+            self.result["fa"] = self.check_fa_operator(promoting_dataset)
+        except (IndexError, ValueError, AttributeError):
+            logger.error("Failed to check ai core performance, fa operator incorrect shapes value.")
+            self.result["fa"] = []
+
+        try:
+            self.result["vector"] = self.check_vector_operator(promoting_dataset)
+        except (IndexError, ValueError, AttributeError):
+            logger.error("Failed to check ai core performance, vector operator incorrect shapes value.")
+            self.result["vector"] = []
+
+        if not any([self.result["cube"], self.result["fa"], self.result["vector"]]):
+            self.ai_core_performance_issues = False
+
+
 
     def check_cube_operator(self, profiling_dataset: ProfilingDataset):
         cube_dict = self.cube_dict
-- 
Gitee


From d815739679f562ea492e75733b85aae4dd3cdddc Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Wed, 22 Jan 2025 10:28:38 +0800
Subject: [PATCH 32/72] UT

---
 .../test_ai_core_performance_advice.py        | 93 +++++++++++++++++++
 1 file changed, 93 insertions(+)
 create mode 100644 profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py

diff --git a/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py b/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py
new file mode 100644
index 000000000..4782ee635
--- /dev/null
+++ b/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py
@@ -0,0 +1,93 @@
+import csv
+import os
+import shutil
+import stat
+
+import unittest
+from profiler.advisor.interface.interface import Interface
+from profiler.advisor.common.analyzer_scopes import SupportedScopes
+from profiler.advisor.dataset.timeline_event_dataset import ComputationAnalysisDataset
+
+
+class TestAICorePerformanceAdvice(unittest.TestCase):
+    TMP_DIR = "./ascend_pt"
+    OUTPUT_DIR = "./ascend_pt/ASCEND_PROFILER_OUTPUT"
+    interface = None
+    err_interface = None
+
+    def tearDown(self):
+        if os.path.exists(TestAICorePerformanceAdvice.TMP_DIR):
+            shutil.rmtree(TestAICorePerformanceAdvice.TMP_DIR)
+        self.clear_htmls()
+
+    def setUp(self):
+        if os.path.exists(TestAICorePerformanceAdvice.TMP_DIR):
+            shutil.rmtree(TestAICorePerformanceAdvice.TMP_DIR)
+        if not os.path.exists(TestAICorePerformanceAdvice.TMP_DIR):
+            os.makedirs(TestAICorePerformanceAdvice.TMP_DIR)
+        if not os.path.exists(TestAICorePerformanceAdvice.OUTPUT_DIR):
+            os.makedirs(TestAICorePerformanceAdvice.OUTPUT_DIR)
+        self.clear_htmls()
+
+    @classmethod
+    def clear_htmls(cls):
+        current_path = os.path.dirname(os.path.abspath(__file__))
+        for filename in os.listdir(current_path):
+            # 检查文件是否以“att”开头
+            if filename.startswith("att"):
+                # 构建文件的完整路径
+                file_path = os.path.join(current_path, filename)
+                # 删除文件
+                os.remove(file_path)
+
+
+    @classmethod
+    def create_kernel_details(cls):
+        # create csv files
+        csv_header = ['Step Id', 'Model ID', 'Task ID', 'Stream ID', 'Name', 'Type', 'Accelerator Core',
+                      'Start Time(us)',
+                      'Duration(us)', 'Wait Time(us)', 'Block Dim', 'Mix Block Dim', 'Input Shapes', 'Input Data Types',
+                      'Input Formats', 'Output Shapes', 'Output Data Types', 'Output Formats', 'Context ID',
+                      'aicore_time(us)',
+                      'aic_total_cycles', 'aic_mac_ratio', 'aic_mac_int8_ratio', 'aic_cube_fops',
+                      'aic_vector_fops',
+                      'aiv_time(us)', 'aiv_total_cycles', 'aiv_vec_fp32_ratio', 'aiv_vec_fp16_ratio',
+                      'aiv_vec_int32_ratio',
+                      'aiv_vec_misc_ratio', 'aiv_cube_fops', 'aiv_vector_fops']
+        csv_row1 = [1, 4294967295, 1265, 16, 'MatMul56', 'MatMul', 'AI_CORE', "172317\t", 21.2, 261.56, 9,
+                    0,
+                    '4,1025', 'INT64', 'FORMAT_ND', '4,1025', 'INT32', 'FORMAT_ND', 'N/A',
+                    0, 0, 0, 0, 0, 0,
+                    1.77, 29508, 0, 0, 0.0062, 0, 0, 5856]
+        csv_row2 = [1, 4294967295, 1265, 16, 'Add2', 'Add', 'AI_VECTOR_CORE', "183317\t", 1.5, 261.56, 9,
+                    0,
+                    '4,1025', 'INT64', 'FORMAT_ND', '4,1025', 'INT32', 'FORMAT_ND', 'N/A',
+                    0, 0, 0, 0, 0, 0,
+                    1.77, 29508, 0, 0, 0.0062, 0, 0, 5856]
+        csv_row3 = [1, 4294967295, 1265, 16, 'MatMul57', 'MatMul', 'AI_CORE', "189233\t", 3.14, 261.56, 9, 0,
+                    '4,1025', 'INT64', 'FORMAT_ND', '4,1025', 'INT32', 'FORMAT_ND', 'N/A',
+                    0, 0, 0, 0, 0, 0,
+                    1.77, 29508, 0, 0, 0.0062, 0, 0, 5856]
+        csv_row4 = [1, 4294967295, 1265, 16, 'Add1', 'Add', 'AI_CORE', "189933\t", 3.14, 261.56, 9, 0,
+                    '4,1025', 'INT64', 'FORMAT_ND', '4,1025', 'INT32', 'FORMAT_ND', 'N/A',
+                    2.3, 28888, 0.2, 0.1, 0.1, 0.7,
+                    0, 0, 0, 0, 0, 0, 0, 0]
+
+        with os.fdopen(os.open(f"{TestAICorePerformanceAdvice.OUTPUT_DIR}/kernel_details.csv",
+                               os.O_WRONLY | os.O_CREAT, stat.S_IWUSR | stat.S_IRUSR), 'w', newline='') as fp:
+            csv_writer = csv.writer(fp)
+            csv_writer.writerow(csv_header)
+            csv_writer.writerow(csv_row1)
+            csv_writer.writerow(csv_row2)
+            csv_writer.writerow(csv_row3)
+            csv_writer.writerow(csv_row4)
+
+    def test_ai_core_performance_data(self):
+        self.create_kernel_details()
+        interface = Interface(profiling_path=self.TMP_DIR)
+        dimension = Interface.COMMUNICATION
+        scope = SupportedScopes.AICORE_PERFORMANCE_ANALYSIS
+        result = interface.get_result(dimension, scope, render_html=1, output_dict=False, profiling_path=self.TMP_DIR)
+        self.assertEqual(2, len(result.data.get("带宽分析", [])))
+        self.assertEqual(1, len(result.data.get("带宽分析", []).get('data')))
+        result.clear()
\ No newline at end of file
-- 
Gitee


From 2dabcd38c699653dccb9cd571911df4df4532535 Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Wed, 22 Jan 2025 10:51:15 +0800
Subject: [PATCH 33/72] =?UTF-8?q?=E9=94=99=E8=AF=AF=E4=BF=AE=E5=A4=8D?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../ai_core_performance/ai_core_performance_checker.py        | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index 6057df7aa..ef360ee12 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -506,8 +506,8 @@ class AICorePerformanceChecker:
         if not hasattr(profiling_dataset.op_summary, "op_list"):
             logger.warning("Skip %s checker because of not containing %s", self._CHECKER, "op_list")
             return False
-        if (not hasattr(profiling_dataset.op_summary.op_list, "input_shapes") or
-                not hasattr(profiling_dataset.op_summary.op_list, "input_data_types")):
+        if (not hasattr(profiling_dataset.op_summary.op_list[0], "input_shapes") or
+                not hasattr(profiling_dataset.op_summary.op_list[0], "input_data_types")):
             logger.warning("Skip %s checker because of not containing input datas", self._CHECKER)
             return False
         return True
-- 
Gitee


From 118abaff82f2f10c610347f552c0e648bea51f1a Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Wed, 22 Jan 2025 11:39:21 +0800
Subject: [PATCH 34/72] =?UTF-8?q?UT=20=E4=BF=AE=E6=94=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../compute_advice/data/kernel_details.csv    |  0
 .../test_ai_core_performance_advice.py        | 96 ++++++++++---------
 2 files changed, 49 insertions(+), 47 deletions(-)
 create mode 100644 profiler/test/ut/advisor/compute_advice/data/kernel_details.csv

diff --git a/profiler/test/ut/advisor/compute_advice/data/kernel_details.csv b/profiler/test/ut/advisor/compute_advice/data/kernel_details.csv
new file mode 100644
index 000000000..e69de29bb
diff --git a/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py b/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py
index 4782ee635..aef4e6ed1 100644
--- a/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py
+++ b/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py
@@ -6,7 +6,6 @@ import stat
 import unittest
 from profiler.advisor.interface.interface import Interface
 from profiler.advisor.common.analyzer_scopes import SupportedScopes
-from profiler.advisor.dataset.timeline_event_dataset import ComputationAnalysisDataset
 
 
 class TestAICorePerformanceAdvice(unittest.TestCase):
@@ -33,61 +32,64 @@ class TestAICorePerformanceAdvice(unittest.TestCase):
     def clear_htmls(cls):
         current_path = os.path.dirname(os.path.abspath(__file__))
         for filename in os.listdir(current_path):
-            # 检查文件是否以“att”开头
-            if filename.startswith("att"):
+            # 检查文件是否以“mstt”开头
+            if filename.startswith("mstt"):
                 # 构建文件的完整路径
                 file_path = os.path.join(current_path, filename)
                 # 删除文件
                 os.remove(file_path)
 
-
     @classmethod
-    def create_kernel_details(cls):
-        # create csv files
-        csv_header = ['Step Id', 'Model ID', 'Task ID', 'Stream ID', 'Name', 'Type', 'Accelerator Core',
-                      'Start Time(us)',
-                      'Duration(us)', 'Wait Time(us)', 'Block Dim', 'Mix Block Dim', 'Input Shapes', 'Input Data Types',
-                      'Input Formats', 'Output Shapes', 'Output Data Types', 'Output Formats', 'Context ID',
-                      'aicore_time(us)',
-                      'aic_total_cycles', 'aic_mac_ratio', 'aic_mac_int8_ratio', 'aic_cube_fops',
-                      'aic_vector_fops',
-                      'aiv_time(us)', 'aiv_total_cycles', 'aiv_vec_fp32_ratio', 'aiv_vec_fp16_ratio',
-                      'aiv_vec_int32_ratio',
-                      'aiv_vec_misc_ratio', 'aiv_cube_fops', 'aiv_vector_fops']
-        csv_row1 = [1, 4294967295, 1265, 16, 'MatMul56', 'MatMul', 'AI_CORE', "172317\t", 21.2, 261.56, 9,
-                    0,
-                    '4,1025', 'INT64', 'FORMAT_ND', '4,1025', 'INT32', 'FORMAT_ND', 'N/A',
-                    0, 0, 0, 0, 0, 0,
-                    1.77, 29508, 0, 0, 0.0062, 0, 0, 5856]
-        csv_row2 = [1, 4294967295, 1265, 16, 'Add2', 'Add', 'AI_VECTOR_CORE', "183317\t", 1.5, 261.56, 9,
-                    0,
-                    '4,1025', 'INT64', 'FORMAT_ND', '4,1025', 'INT32', 'FORMAT_ND', 'N/A',
-                    0, 0, 0, 0, 0, 0,
-                    1.77, 29508, 0, 0, 0.0062, 0, 0, 5856]
-        csv_row3 = [1, 4294967295, 1265, 16, 'MatMul57', 'MatMul', 'AI_CORE', "189233\t", 3.14, 261.56, 9, 0,
-                    '4,1025', 'INT64', 'FORMAT_ND', '4,1025', 'INT32', 'FORMAT_ND', 'N/A',
-                    0, 0, 0, 0, 0, 0,
-                    1.77, 29508, 0, 0, 0.0062, 0, 0, 5856]
-        csv_row4 = [1, 4294967295, 1265, 16, 'Add1', 'Add', 'AI_CORE', "189933\t", 3.14, 261.56, 9, 0,
-                    '4,1025', 'INT64', 'FORMAT_ND', '4,1025', 'INT32', 'FORMAT_ND', 'N/A',
-                    2.3, 28888, 0.2, 0.1, 0.1, 0.7,
-                    0, 0, 0, 0, 0, 0, 0, 0]
+    def copy_kernel_details(cls,path):
+        # Define source and destination paths
+        source_csv_path = f"./data/{path}"
+        destination_csv_path = f"{TestAICorePerformanceAdvice.OUTPUT_DIR}/kernel_detail.csv"
+
+        # Check if source CSV file exists
+        if not os.path.exists(source_csv_path):
+            raise FileNotFoundError(f"test data file not found:{source_csv_path}")
+
+        # Ensure the output directory exists
+        if not os.path.exists(TestAICorePerformanceAdvice.OUTPUT_DIR):
+            os.makedirs(TestAICorePerformanceAdvice.OUTPUT_DIR)
+
+        # Copy the CSV file from source to destination
+        shutil.copyfile(source_csv_path, destination_csv_path)
+
+    def test_ai_core_performance_total(self):
+        file_path = "kernel_details.csv"
+        self.copy_kernel_details(file_path)
+        interface = Interface(profiling_path=self.TMP_DIR)
+        dimension = Interface.COMMUNICATION
+        scope = SupportedScopes.AICORE_PERFORMANCE_ANALYSIS
+        result = interface.get_result(dimension, scope, render_html=1, output_dict=False, profiling_path=self.TMP_DIR)
+        # TODO 测试结果验证
+        result.clear()
+
+    def test_ai_core_performance_cube_operator(self):
+        self.copy_kernel_details()
+        interface = Interface(profiling_path=self.TMP_DIR)
+        dimension = Interface.COMMUNICATION
+        scope = SupportedScopes.AICORE_PERFORMANCE_ANALYSIS
+        result = interface.get_result(dimension, scope, render_html=1, output_dict=False, profiling_path=self.TMP_DIR)
+        # TODO 测试结果验证
+        result.clear()
+
+    def test_ai_core_performance_fa_operator(self):
+        self.copy_kernel_details()
+        interface = Interface(profiling_path=self.TMP_DIR)
+        dimension = Interface.COMMUNICATION
+        scope = SupportedScopes.AICORE_PERFORMANCE_ANALYSIS
+        result = interface.get_result(dimension, scope, render_html=1, output_dict=False, profiling_path=self.TMP_DIR)
+        # TODO 测试结果验证
+        result.clear()
 
-        with os.fdopen(os.open(f"{TestAICorePerformanceAdvice.OUTPUT_DIR}/kernel_details.csv",
-                               os.O_WRONLY | os.O_CREAT, stat.S_IWUSR | stat.S_IRUSR), 'w', newline='') as fp:
-            csv_writer = csv.writer(fp)
-            csv_writer.writerow(csv_header)
-            csv_writer.writerow(csv_row1)
-            csv_writer.writerow(csv_row2)
-            csv_writer.writerow(csv_row3)
-            csv_writer.writerow(csv_row4)
 
-    def test_ai_core_performance_data(self):
-        self.create_kernel_details()
+    def test_ai_core_performance_vector_operator(self):
+        self.copy_kernel_details()
         interface = Interface(profiling_path=self.TMP_DIR)
         dimension = Interface.COMMUNICATION
         scope = SupportedScopes.AICORE_PERFORMANCE_ANALYSIS
         result = interface.get_result(dimension, scope, render_html=1, output_dict=False, profiling_path=self.TMP_DIR)
-        self.assertEqual(2, len(result.data.get("带宽分析", [])))
-        self.assertEqual(1, len(result.data.get("带宽分析", []).get('data')))
-        result.clear()
\ No newline at end of file
+        # TODO 测试结果验证
+        result.clear()
-- 
Gitee


From 1eeb72ea1f7999ae8bf75be3ff6c40b1757d74e5 Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Wed, 22 Jan 2025 14:27:25 +0800
Subject: [PATCH 35/72] =?UTF-8?q?UT=20=E4=BF=AE=E6=94=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../test_ai_core_performance_advice.py        | 21 ++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py b/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py
index aef4e6ed1..ff372d2d5 100644
--- a/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py
+++ b/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py
@@ -58,27 +58,32 @@ class TestAICorePerformanceAdvice(unittest.TestCase):
 
     def test_ai_core_performance_total(self):
         file_path = "kernel_details.csv"
+        destination_csv_path = f"{TestAICorePerformanceAdvice.OUTPUT_DIR}/kernel_detail.csv"
         self.copy_kernel_details(file_path)
         interface = Interface(profiling_path=self.TMP_DIR)
-        dimension = Interface.COMMUNICATION
+        dimension = Interface.COMPUTATION
         scope = SupportedScopes.AICORE_PERFORMANCE_ANALYSIS
         result = interface.get_result(dimension, scope, render_html=1, output_dict=False, profiling_path=self.TMP_DIR)
         # TODO 测试结果验证
         result.clear()
 
     def test_ai_core_performance_cube_operator(self):
-        self.copy_kernel_details()
+        file_path = "kernel_details_cube.csv"
+        destination_csv_path = f"{TestAICorePerformanceAdvice.OUTPUT_DIR}/kernel_detail.csv"
+        self.copy_kernel_details(file_path)
         interface = Interface(profiling_path=self.TMP_DIR)
-        dimension = Interface.COMMUNICATION
+        dimension = Interface.COMPUTATION
         scope = SupportedScopes.AICORE_PERFORMANCE_ANALYSIS
         result = interface.get_result(dimension, scope, render_html=1, output_dict=False, profiling_path=self.TMP_DIR)
         # TODO 测试结果验证
         result.clear()
 
     def test_ai_core_performance_fa_operator(self):
-        self.copy_kernel_details()
+        file_path = "kernel_details_fa.csv"
+        destination_csv_path = f"{TestAICorePerformanceAdvice.OUTPUT_DIR}/kernel_detail.csv"
+        self.copy_kernel_details(file_path)
         interface = Interface(profiling_path=self.TMP_DIR)
-        dimension = Interface.COMMUNICATION
+        dimension = Interface.COMPUTATION
         scope = SupportedScopes.AICORE_PERFORMANCE_ANALYSIS
         result = interface.get_result(dimension, scope, render_html=1, output_dict=False, profiling_path=self.TMP_DIR)
         # TODO 测试结果验证
@@ -86,9 +91,11 @@ class TestAICorePerformanceAdvice(unittest.TestCase):
 
 
     def test_ai_core_performance_vector_operator(self):
-        self.copy_kernel_details()
+        file_path = "kernel_details_vector.csv"
+        destination_csv_path = f"{TestAICorePerformanceAdvice.OUTPUT_DIR}/kernel_detail.csv"
+        self.copy_kernel_details(file_path)
         interface = Interface(profiling_path=self.TMP_DIR)
-        dimension = Interface.COMMUNICATION
+        dimension = Interface.COMPUTATION
         scope = SupportedScopes.AICORE_PERFORMANCE_ANALYSIS
         result = interface.get_result(dimension, scope, render_html=1, output_dict=False, profiling_path=self.TMP_DIR)
         # TODO 测试结果验证
-- 
Gitee


From 776aaf9db5bccc21d7a7940f55d6bc8483bf21d7 Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Wed, 22 Jan 2025 14:40:10 +0800
Subject: [PATCH 36/72] =?UTF-8?q?UT=20=E4=BF=AE=E6=94=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../test_ai_core_performance_advice.py              | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py b/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py
index ff372d2d5..ac0ba3807 100644
--- a/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py
+++ b/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py
@@ -17,7 +17,6 @@ class TestAICorePerformanceAdvice(unittest.TestCase):
     def tearDown(self):
         if os.path.exists(TestAICorePerformanceAdvice.TMP_DIR):
             shutil.rmtree(TestAICorePerformanceAdvice.TMP_DIR)
-        self.clear_htmls()
 
     def setUp(self):
         if os.path.exists(TestAICorePerformanceAdvice.TMP_DIR):
@@ -58,7 +57,7 @@ class TestAICorePerformanceAdvice(unittest.TestCase):
 
     def test_ai_core_performance_total(self):
         file_path = "kernel_details.csv"
-        destination_csv_path = f"{TestAICorePerformanceAdvice.OUTPUT_DIR}/kernel_detail.csv"
+        destination_csv_path = f"{TestAICorePerformanceAdvice.OUTPUT_DIR}/kernel_details.csv"
         self.copy_kernel_details(file_path)
         interface = Interface(profiling_path=self.TMP_DIR)
         dimension = Interface.COMPUTATION
@@ -66,10 +65,11 @@ class TestAICorePerformanceAdvice(unittest.TestCase):
         result = interface.get_result(dimension, scope, render_html=1, output_dict=False, profiling_path=self.TMP_DIR)
         # TODO 测试结果验证
         result.clear()
+        self.clear_htmls()
 
     def test_ai_core_performance_cube_operator(self):
         file_path = "kernel_details_cube.csv"
-        destination_csv_path = f"{TestAICorePerformanceAdvice.OUTPUT_DIR}/kernel_detail.csv"
+        destination_csv_path = f"{TestAICorePerformanceAdvice.OUTPUT_DIR}/kernel_details.csv"
         self.copy_kernel_details(file_path)
         interface = Interface(profiling_path=self.TMP_DIR)
         dimension = Interface.COMPUTATION
@@ -77,10 +77,11 @@ class TestAICorePerformanceAdvice(unittest.TestCase):
         result = interface.get_result(dimension, scope, render_html=1, output_dict=False, profiling_path=self.TMP_DIR)
         # TODO 测试结果验证
         result.clear()
+        self.clear_htmls()
 
     def test_ai_core_performance_fa_operator(self):
         file_path = "kernel_details_fa.csv"
-        destination_csv_path = f"{TestAICorePerformanceAdvice.OUTPUT_DIR}/kernel_detail.csv"
+        destination_csv_path = f"{TestAICorePerformanceAdvice.OUTPUT_DIR}/kernel_details.csv"
         self.copy_kernel_details(file_path)
         interface = Interface(profiling_path=self.TMP_DIR)
         dimension = Interface.COMPUTATION
@@ -88,11 +89,12 @@ class TestAICorePerformanceAdvice(unittest.TestCase):
         result = interface.get_result(dimension, scope, render_html=1, output_dict=False, profiling_path=self.TMP_DIR)
         # TODO 测试结果验证
         result.clear()
+        self.clear_htmls()
 
 
     def test_ai_core_performance_vector_operator(self):
         file_path = "kernel_details_vector.csv"
-        destination_csv_path = f"{TestAICorePerformanceAdvice.OUTPUT_DIR}/kernel_detail.csv"
+        destination_csv_path = f"{TestAICorePerformanceAdvice.OUTPUT_DIR}/kernel_details.csv"
         self.copy_kernel_details(file_path)
         interface = Interface(profiling_path=self.TMP_DIR)
         dimension = Interface.COMPUTATION
@@ -100,3 +102,4 @@ class TestAICorePerformanceAdvice(unittest.TestCase):
         result = interface.get_result(dimension, scope, render_html=1, output_dict=False, profiling_path=self.TMP_DIR)
         # TODO 测试结果验证
         result.clear()
+        self.clear_htmls()
-- 
Gitee


From b58e60ab5c76abf6624944253db4859c48aa06b9 Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Wed, 22 Jan 2025 14:52:22 +0800
Subject: [PATCH 37/72] =?UTF-8?q?UT=20=E9=97=AE=E9=A2=98=E4=BF=AE=E6=94=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../advisor/compute_advice/test_ai_core_performance_advice.py   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py b/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py
index ac0ba3807..7849391f1 100644
--- a/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py
+++ b/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py
@@ -42,7 +42,7 @@ class TestAICorePerformanceAdvice(unittest.TestCase):
     def copy_kernel_details(cls,path):
         # Define source and destination paths
         source_csv_path = f"./data/{path}"
-        destination_csv_path = f"{TestAICorePerformanceAdvice.OUTPUT_DIR}/kernel_detail.csv"
+        destination_csv_path = f"{TestAICorePerformanceAdvice.OUTPUT_DIR}/kernel_details.csv"
 
         # Check if source CSV file exists
         if not os.path.exists(source_csv_path):
-- 
Gitee


From 0abef7750282ad08013f1cde031cefe9fb996994 Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Wed, 22 Jan 2025 15:08:42 +0800
Subject: [PATCH 38/72] =?UTF-8?q?checker=20=E6=8A=A5=E9=94=99=E5=86=85?=
 =?UTF-8?q?=E5=AE=B9=E4=BF=AE=E6=94=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../ai_core_performance_checker.py                 | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index ef360ee12..c28675e9f 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -138,27 +138,25 @@ class AICorePerformanceChecker:
         """
         try:
             self.result["cube"] = self.check_cube_operator(promoting_dataset)
-        except (IndexError, ValueError, AttributeError):
-            logger.error("Failed to check ai core performance, cube operator incorrect shapes value.")
+        except (IndexError, ValueError, AttributeError) as e:
+            logger.error(f"Failed to check ai core performance, {e}.")
             self.result["cube"] = []
 
         try:
             self.result["fa"] = self.check_fa_operator(promoting_dataset)
-        except (IndexError, ValueError, AttributeError):
-            logger.error("Failed to check ai core performance, fa operator incorrect shapes value.")
+        except (IndexError, ValueError, AttributeError) as e:
+            logger.error(f"Failed to check ai core performance, {e}.")
             self.result["fa"] = []
 
         try:
             self.result["vector"] = self.check_vector_operator(promoting_dataset)
-        except (IndexError, ValueError, AttributeError):
-            logger.error("Failed to check ai core performance, vector operator incorrect shapes value.")
+        except (IndexError, ValueError, AttributeError) as e:
+            logger.error(f"Failed to check ai core performance, {e}.")
             self.result["vector"] = []
 
         if not any([self.result["cube"], self.result["fa"], self.result["vector"]]):
             self.ai_core_performance_issues = False
 
-
-
     def check_cube_operator(self, profiling_dataset: ProfilingDataset):
         cube_dict = self.cube_dict
         optimization_queue = []
-- 
Gitee


From 6db14fa52598cd74eb7a8e0bb4ca31365b191d4c Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Wed, 22 Jan 2025 15:35:47 +0800
Subject: [PATCH 39/72] =?UTF-8?q?UT=E4=BF=AE=E6=94=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../test_ai_core_performance_advice.py        | 42 +------------------
 1 file changed, 2 insertions(+), 40 deletions(-)

diff --git a/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py b/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py
index 7849391f1..6ff49cad1 100644
--- a/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py
+++ b/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py
@@ -17,6 +17,7 @@ class TestAICorePerformanceAdvice(unittest.TestCase):
     def tearDown(self):
         if os.path.exists(TestAICorePerformanceAdvice.TMP_DIR):
             shutil.rmtree(TestAICorePerformanceAdvice.TMP_DIR)
+        self.clear_htmls()
 
     def setUp(self):
         if os.path.exists(TestAICorePerformanceAdvice.TMP_DIR):
@@ -57,49 +58,10 @@ class TestAICorePerformanceAdvice(unittest.TestCase):
 
     def test_ai_core_performance_total(self):
         file_path = "kernel_details.csv"
-        destination_csv_path = f"{TestAICorePerformanceAdvice.OUTPUT_DIR}/kernel_details.csv"
-        self.copy_kernel_details(file_path)
-        interface = Interface(profiling_path=self.TMP_DIR)
-        dimension = Interface.COMPUTATION
-        scope = SupportedScopes.AICORE_PERFORMANCE_ANALYSIS
-        result = interface.get_result(dimension, scope, render_html=1, output_dict=False, profiling_path=self.TMP_DIR)
-        # TODO 测试结果验证
-        result.clear()
-        self.clear_htmls()
-
-    def test_ai_core_performance_cube_operator(self):
-        file_path = "kernel_details_cube.csv"
-        destination_csv_path = f"{TestAICorePerformanceAdvice.OUTPUT_DIR}/kernel_details.csv"
         self.copy_kernel_details(file_path)
         interface = Interface(profiling_path=self.TMP_DIR)
         dimension = Interface.COMPUTATION
         scope = SupportedScopes.AICORE_PERFORMANCE_ANALYSIS
         result = interface.get_result(dimension, scope, render_html=1, output_dict=False, profiling_path=self.TMP_DIR)
         # TODO 测试结果验证
-        result.clear()
-        self.clear_htmls()
-
-    def test_ai_core_performance_fa_operator(self):
-        file_path = "kernel_details_fa.csv"
-        destination_csv_path = f"{TestAICorePerformanceAdvice.OUTPUT_DIR}/kernel_details.csv"
-        self.copy_kernel_details(file_path)
-        interface = Interface(profiling_path=self.TMP_DIR)
-        dimension = Interface.COMPUTATION
-        scope = SupportedScopes.AICORE_PERFORMANCE_ANALYSIS
-        result = interface.get_result(dimension, scope, render_html=1, output_dict=False, profiling_path=self.TMP_DIR)
-        # TODO 测试结果验证
-        result.clear()
-        self.clear_htmls()
-
-
-    def test_ai_core_performance_vector_operator(self):
-        file_path = "kernel_details_vector.csv"
-        destination_csv_path = f"{TestAICorePerformanceAdvice.OUTPUT_DIR}/kernel_details.csv"
-        self.copy_kernel_details(file_path)
-        interface = Interface(profiling_path=self.TMP_DIR)
-        dimension = Interface.COMPUTATION
-        scope = SupportedScopes.AICORE_PERFORMANCE_ANALYSIS
-        result = interface.get_result(dimension, scope, render_html=1, output_dict=False, profiling_path=self.TMP_DIR)
-        # TODO 测试结果验证
-        result.clear()
-        self.clear_htmls()
+        result.clear()
\ No newline at end of file
-- 
Gitee


From 5a23b7934c3bf5579aa0a7bc9032f7722374c022 Mon Sep 17 00:00:00 2001
From: kiritorl <ruxinglong@huawei.com>
Date: Wed, 22 Jan 2025 15:37:09 +0800
Subject: [PATCH 40/72] =?UTF-8?q?=E5=8E=BB=E9=99=A4=E6=B2=A1=E6=9C=89?=
 =?UTF-8?q?=E5=BB=BA=E8=AE=AE=E9=A1=B9=E6=97=B6=E7=9A=84=E7=A9=BA=E8=A1=A8?=
 =?UTF-8?q?=E5=B1=95=E7=A4=BA?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../ai_core_performance_checker.py            | 24 ++++++++++++-------
 .../html/templates/ai_core_performance.html   |  6 ++---
 2 files changed, 19 insertions(+), 11 deletions(-)

diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index c28675e9f..a0e3fcd2b 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -434,15 +434,18 @@ class AICorePerformanceChecker:
             for cube_opti_issue in self.result["cube"][0]:
                 opti_sugg = self._OPTI_SUGGESTION.format(**cube_opti_issue)
                 cube_desc["opti"] += opti_sugg
-            result.add_detail(self._CUBE_PROBLEM, detail=[self._OPTI_DESC, cube_desc["opti"]])
+            if cube_desc["opti"]:
+                result.add_detail(self._CUBE_PROBLEM, detail=[self._OPTI_DESC, cube_desc["opti"]])
             for cube_bound_issue in self.result["cube"][1]:
                 bound_sugg = self._BOUND_SUGGESTION.format(**cube_bound_issue)
                 cube_desc["bound"] += bound_sugg
-            result.add_detail(self._CUBE_PROBLEM, detail=[self._BOUND_DESC, cube_desc["bound"]])
+            if cube_desc["bound"]:
+                result.add_detail(self._CUBE_PROBLEM, detail=[self._BOUND_DESC, cube_desc["bound"]])
             for cube_affinity_issue in self.result["cube"][2]:
                 affinity_sugg = self._AFFINITY_SUGGESTION.format(**cube_affinity_issue)
                 cube_desc["affinity"] += affinity_sugg
-            result.add_detail(self._CUBE_PROBLEM, detail=[self._AFFINITY_DESC, cube_desc["affinity"]])
+            if cube_desc["affinity"]:
+                result.add_detail(self._CUBE_PROBLEM, detail=[self._AFFINITY_DESC, cube_desc["affinity"]])
 
         if self.result["fa"]:
             optimization_item = OptimizeItem(self._FA_PROBLEM, self.desc, [self.suggestion])
@@ -455,15 +458,18 @@ class AICorePerformanceChecker:
             for fa_opti_issue in self.result["fa"][0]:
                 opti_sugg = self._OPTI_SUGGESTION.format(**fa_opti_issue)
                 fa_desc["opti"] += opti_sugg
-            result.add_detail(self._FA_PROBLEM, detail=[self._OPTI_DESC, fa_desc["opti"]])
+            if fa_desc["opti"]:
+                result.add_detail(self._FA_PROBLEM, detail=[self._OPTI_DESC, fa_desc["opti"]])
             for fa_bound_issue in self.result["fa"][1]:
                 bound_sugg = self._BOUND_SUGGESTION.format(**fa_bound_issue)
                 fa_desc["bound"] += bound_sugg
-            result.add_detail(self._FA_PROBLEM, detail=[self._BOUND_DESC, fa_desc["bound"]])
+            if fa_desc["bound"]:
+                result.add_detail(self._FA_PROBLEM, detail=[self._BOUND_DESC, fa_desc["bound"]])
             for fa_affinity_issue in self.result["fa"][2]:
                 affinity_sugg = self._AFFINITY_SUGGESTION.format(**fa_affinity_issue)
                 fa_desc["affinity"] += affinity_sugg
-            result.add_detail(self._FA_PROBLEM, detail=[self._AFFINITY_DESC, fa_desc["affinity"]])
+            if fa_desc["affinity"]:
+                result.add_detail(self._FA_PROBLEM, detail=[self._AFFINITY_DESC, fa_desc["affinity"]])
 
         if self.result["vector"]:
             optimization_item = OptimizeItem(self._VECTOR_PROBLEM, self.desc, [self.suggestion])
@@ -476,11 +482,13 @@ class AICorePerformanceChecker:
             for vector_opti_issue in self.result["vector"][0]:
                 opti_sugg = self._OPTI_SUGGESTION.format(**vector_opti_issue)
                 vector_desc["opti"] += opti_sugg
-            result.add_detail(self._VECTOR_PROBLEM, detail=[self._OPTI_DESC, vector_desc["opti"]])
+            if vector_desc["opti"]:
+                result.add_detail(self._VECTOR_PROBLEM, detail=[self._OPTI_DESC, vector_desc["opti"]])
             for vector_bound_issue in self.result["vector"][1]:
                 bound_sugg = self._BOUND_SUGGESTION.format(**vector_bound_issue)
                 vector_desc["bound"] += bound_sugg
-            result.add_detail(self._VECTOR_PROBLEM, detail=[self._BOUND_DESC, vector_desc["bound"]])
+            if vector_desc["bound"]:
+                result.add_detail(self._VECTOR_PROBLEM, detail=[self._BOUND_DESC, vector_desc["bound"]])
         return True
 
     def make_render(self, html_render, add_render_list=True, **kwargs):
diff --git a/profiler/advisor/display/html/templates/ai_core_performance.html b/profiler/advisor/display/html/templates/ai_core_performance.html
index 48e62ad6c..5bf133550 100644
--- a/profiler/advisor/display/html/templates/ai_core_performance.html
+++ b/profiler/advisor/display/html/templates/ai_core_performance.html
@@ -10,7 +10,7 @@
         bound_set='set of bound operators', affinity_set='set of unaffine operators', opti_refer=' refer to Performance Optimization Space: ',
          bound_refer=' bound type: ', affinity_refer=' type of disaffinity: ', title_desc=' Operator related analysis, referenced below: ') %}
         {% endif %}
-        {% if format_result.cube is not none %}
+        {% if format_result.cube|length > 0 %}
         <a style="font-weight: bold" id="timeline_api_instruction">MatMul{{ title_ns.title_desc }}</a>
         <br>
         <table>
@@ -63,7 +63,7 @@
         </table>
         {% endif %}
 
-        {% if format_result.fa is not none %}
+        {% if format_result.fa|length > 0 %}
         <a style="font-weight: bold" id="timeline_api_instruction">FA{{ title_ns.title_desc }}</a>
         <br>
         <table>
@@ -116,7 +116,7 @@
         </table>
         {% endif %}
 
-        {% if format_result.cube is not none %}
+        {% if format_result.vector|length > 0 %}
         <a style="font-weight: bold" id="timeline_api_instruction">Vector{{ title_ns.title_desc }}</a>
         <br>
         <table>
-- 
Gitee


From 5bc247663f11862050123ffe82c527d4de3fd55f Mon Sep 17 00:00:00 2001
From: kiritorl <ruxinglong@huawei.com>
Date: Wed, 22 Jan 2025 15:52:36 +0800
Subject: [PATCH 41/72] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dhtml=E7=A9=BA=E8=A1=A8?=
 =?UTF-8?q?=E7=9A=84=E5=B1=95=E7=A4=BA?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../advisor/display/html/templates/ai_core_performance.html | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/profiler/advisor/display/html/templates/ai_core_performance.html b/profiler/advisor/display/html/templates/ai_core_performance.html
index 5bf133550..d5ab1a3fa 100644
--- a/profiler/advisor/display/html/templates/ai_core_performance.html
+++ b/profiler/advisor/display/html/templates/ai_core_performance.html
@@ -10,7 +10,7 @@
         bound_set='set of bound operators', affinity_set='set of unaffine operators', opti_refer=' refer to Performance Optimization Space: ',
          bound_refer=' bound type: ', affinity_refer=' type of disaffinity: ', title_desc=' Operator related analysis, referenced below: ') %}
         {% endif %}
-        {% if format_result.cube|length > 0 %}
+        {% if format_result.cube[0]|length + format_result.cube[1]|length + format_result.cube[2]|length > 0 %}
         <a style="font-weight: bold" id="timeline_api_instruction">MatMul{{ title_ns.title_desc }}</a>
         <br>
         <table>
@@ -63,7 +63,7 @@
         </table>
         {% endif %}
 
-        {% if format_result.fa|length > 0 %}
+        {% if format_result.fa[0]|length + format_result.fa[1]|length + format_result.fa[2]|length > 0 %}
         <a style="font-weight: bold" id="timeline_api_instruction">FA{{ title_ns.title_desc }}</a>
         <br>
         <table>
@@ -116,7 +116,7 @@
         </table>
         {% endif %}
 
-        {% if format_result.vector|length > 0 %}
+        {% if format_result.vector[0]|length + format_result.vector[1]|length > 0 %}
         <a style="font-weight: bold" id="timeline_api_instruction">Vector{{ title_ns.title_desc }}</a>
         <br>
         <table>
-- 
Gitee


From 1aefa6d6f6083327266c1563621b10f40e5ff839 Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Wed, 22 Jan 2025 16:01:41 +0800
Subject: [PATCH 42/72] =?UTF-8?q?checker=20optimization=20=E6=A0=BC?=
 =?UTF-8?q?=E5=BC=8F=E4=BF=AE=E6=94=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../ai_core_performance/ai_core_performance_checker.py      | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index c28675e9f..5e562349d 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -232,7 +232,7 @@ class AICorePerformanceChecker:
                             "op_name": op,
                             "shape": shape.split("-")[0],
                             "dtype": dtype,
-                            "optimization": max(0.8 - aic_mac_ratio, 0.95 - aic_mte2_ratio)})
+                            "optimization": round(max(0.8 - aic_mac_ratio, 0.95 - aic_mte2_ratio) * 100, 2)})
         return [sorted(optimization_queue, key=lambda x: x["optimization"], reverse=True)[:5],
                 sorted(bound_queue, key=lambda x: x["duration"], reverse=True)[:5],
                 sorted(affinity_queue, key=lambda x: x["duration"], reverse=True)[:5]]
@@ -349,7 +349,7 @@ class AICorePerformanceChecker:
                             "op_name": op,
                             "shape": shape.split("-")[0],
                             "dtype": dtype,
-                            "optimization": optimization})
+                            "optimization": round(optimization * 100, 2)})
 
         return [sorted(optimization_queue, key=lambda x: x["optimization"], reverse=True)[:5],
                 sorted(bound_queue, key=lambda x: x["duration"], reverse=True)[:5],
@@ -406,7 +406,7 @@ class AICorePerformanceChecker:
                         "op_name": op_name,
                         "shape": shape,
                         "dtype": dtype,
-                        "optimization": optimization})
+                        "optimization": round(optimization * 100, 2)})
         return [sorted(optimization_queue, key=lambda x: x["optimization"], reverse=True)[:5],
                 sorted(bound_queue, key=lambda x: x["duration"], reverse=True)[:5]]
 
-- 
Gitee


From 71c43f4f0208e155affa522ba2b57b9348ef28a6 Mon Sep 17 00:00:00 2001
From: kiritorl <ruxinglong@huawei.com>
Date: Wed, 22 Jan 2025 16:07:34 +0800
Subject: [PATCH 43/72] =?UTF-8?q?=E4=BC=98=E5=8C=96=E6=80=A7=E8=83=BD?=
 =?UTF-8?q?=E7=A9=BA=E9=97=B4=E6=94=B9=E4=B8=BA=E7=99=BE=E5=88=86=E6=AF=94?=
 =?UTF-8?q?=E5=B1=95=E7=A4=BA?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../html/templates/ai_core_performance.html   | 32 +++++++++----------
 .../advisor/rules/cn/aicore_performance.yaml  |  2 +-
 .../advisor/rules/en/aicore_performance.yaml  |  2 +-
 3 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/profiler/advisor/display/html/templates/ai_core_performance.html b/profiler/advisor/display/html/templates/ai_core_performance.html
index d5ab1a3fa..77e5e0cb5 100644
--- a/profiler/advisor/display/html/templates/ai_core_performance.html
+++ b/profiler/advisor/display/html/templates/ai_core_performance.html
@@ -21,9 +21,9 @@
             {% set opti_ns = namespace(total_opti='') %}
             {% for opti in format_result.cube[0] %}
             {% if not loop.first %}
-            {% set opti_ns.total_opti = opti_ns.total_opti ~ "<br>" ~ opti.op_name ~ "operator shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ title_ns.opti_refer ~ opti.optimization %}
+            {% set opti_ns.total_opti = opti_ns.total_opti ~ "<br>" ~ opti.op_name ~ " operator shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ title_ns.opti_refer ~ opti.optimization ~ "%" %}
             {% else %}
-            {% set opti_ns.total_opti = opti.op_name ~ "operator shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ title_ns.opti_refer ~ opti.optimization %}
+            {% set opti_ns.total_opti = opti.op_name ~ " operator shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ title_ns.opti_refer ~ opti.optimization ~ "%" %}
             {% endif %}
             {% endfor %}
             {% if opti_ns.total_opti|length > 0 %}
@@ -35,9 +35,9 @@
             {% set bound_ns = namespace(total_bound='') %}
             {% for bound in format_result.cube[1] %}
             {% if not loop.first %}
-            {% set bound_ns.total_bound = bound_ns.total_bound ~ "<br>" ~ bound.op_name ~ "operator shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ title_ns.bound_refer ~ bound.bound %}
+            {% set bound_ns.total_bound = bound_ns.total_bound ~ "<br>" ~ bound.op_name ~ " operator shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ title_ns.bound_refer ~ bound.bound %}
             {% else %}
-            {% set bound_ns.total_bound = bound.op_name ~ "operator shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ title_ns.bound_refer ~ bound.bound %}
+            {% set bound_ns.total_bound = bound.op_name ~ " operator shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ title_ns.bound_refer ~ bound.bound %}
             {% endif %}
             {% endfor %}
             {% if bound_ns.total_bound|length > 0 %}
@@ -49,9 +49,9 @@
             {% set affinity_ns = namespace(total_affinity='') %}
             {% for affinity in format_result.cube[2] %}
             {% if not loop.first %}
-            {% set affinity_ns.total_affinity = affinity_ns.total_affinity ~ "<br>" ~ affinity.op_name ~ "operator shape: " ~ affinity.shape ~ " dtype: " ~ affinity.dtype ~ title_ns.affinity_refer ~ affinity.suggestion %}
+            {% set affinity_ns.total_affinity = affinity_ns.total_affinity ~ "<br>" ~ affinity.op_name ~ " operator shape: " ~ affinity.shape ~ " dtype: " ~ affinity.dtype ~ title_ns.affinity_refer ~ affinity.suggestion %}
             {% else %}
-            {% set affinity_ns.total_affinity = affinity.op_name ~ "operator shape: " ~ affinity.shape ~ " dtype: " ~ affinity.dtype ~ title_ns.affinity_refer ~ affinity.suggestion %}
+            {% set affinity_ns.total_affinity = affinity.op_name ~ " operator shape: " ~ affinity.shape ~ " dtype: " ~ affinity.dtype ~ title_ns.affinity_refer ~ affinity.suggestion %}
             {% endif %}
             {% endfor %}
             {% if affinity_ns.total_affinity|length > 0 %}
@@ -74,9 +74,9 @@
             {% set opti_ns = namespace(total_opti='') %}
             {% for opti in format_result.fa[0] %}
             {% if not loop.first %}
-            {% set opti_ns.total_opti = opti_ns.total_opti ~ "<br>" ~ opti.op_name ~ "operator shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ title_ns.opti_refer ~ opti.optimization %}
+            {% set opti_ns.total_opti = opti_ns.total_opti ~ "<br>" ~ opti.op_name ~ " operator shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ title_ns.opti_refer ~ opti.optimization ~ "%" %}
             {% else %}
-            {% set opti_ns.total_opti = opti.op_name ~ "operator shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ title_ns.opti_refer ~ opti.optimization %}
+            {% set opti_ns.total_opti = opti.op_name ~ " operator shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ title_ns.opti_refer ~ opti.optimization ~ "%" %}
             {% endif %}
             {% endfor %}
             {% if opti_ns.total_opti|length > 0 %}
@@ -88,9 +88,9 @@
             {% set bound_ns = namespace(total_bound='') %}
             {% for bound in format_result.fa[1] %}
             {% if not loop.first %}
-            {% set bound_ns.total_bound = bound_ns.total_bound ~ "<br>" ~ bound.op_name ~ "operator shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ title_ns.bound_refer ~ bound.bound %}
+            {% set bound_ns.total_bound = bound_ns.total_bound ~ "<br>" ~ bound.op_name ~ " operator shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ title_ns.bound_refer ~ bound.bound %}
             {% else %}
-            {% set bound_ns.total_bound = bound.op_name ~ "operator shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ title_ns.bound_refer ~ bound.bound %}
+            {% set bound_ns.total_bound = bound.op_name ~ " operator shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ title_ns.bound_refer ~ bound.bound %}
             {% endif %}
             {% endfor %}
             {% if bound_ns.total_bound|length > 0 %}
@@ -102,9 +102,9 @@
             {% set affinity_ns = namespace(total_affinity='') %}
             {% for affinity in format_result.fa[2] %}
             {% if not loop.first %}
-            {% set affinity_ns.total_affinity = affinity_ns.total_affinity ~ "<br>" ~ affinity.op_name ~ "operator shape: " ~ affinity.shape ~ " dtype: " ~ affinity.dtype ~ title_ns.affinity_refer ~ affinity.suggestion %}
+            {% set affinity_ns.total_affinity = affinity_ns.total_affinity ~ "<br>" ~ affinity.op_name ~ " operator shape: " ~ affinity.shape ~ " dtype: " ~ affinity.dtype ~ title_ns.affinity_refer ~ affinity.suggestion %}
             {% else %}
-            {% set affinity_ns.total_affinity = affinity.op_name ~ "operator shape: " ~ affinity.shape ~ " dtype: " ~ affinity.dtype ~ title_ns.affinity_refer ~ affinity.suggestion %}
+            {% set affinity_ns.total_affinity = affinity.op_name ~ " operator shape: " ~ affinity.shape ~ " dtype: " ~ affinity.dtype ~ title_ns.affinity_refer ~ affinity.suggestion %}
             {% endif %}
             {% endfor %}
             {% if affinity_ns.total_affinity|length > 0 %}
@@ -127,9 +127,9 @@
             {% set opti_ns = namespace(total_opti='') %}
             {% for opti in format_result.vector[0] %}
             {% if not loop.first %}
-            {% set opti_ns.total_opti = opti_ns.total_opti ~ "<br>" ~ opti.op_name ~ "operator shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ title_ns.opti_refer ~ opti.optimization %}
+            {% set opti_ns.total_opti = opti_ns.total_opti ~ "<br>" ~ opti.op_name ~ " operator shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ title_ns.opti_refer ~ opti.optimization ~ "%" %}
             {% else %}
-            {% set opti_ns.total_opti = opti.op_name ~ "operator shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ title_ns.opti_refer ~ opti.optimization %}
+            {% set opti_ns.total_opti = opti.op_name ~ " operator shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ title_ns.opti_refer ~ opti.optimization ~ "%" %}
             {% endif %}
             {% endfor %}
             {% if opti_ns.total_opti|length > 0 %}
@@ -141,9 +141,9 @@
             {% set bound_ns = namespace(total_bound='') %}
             {% for bound in format_result.vector[1] %}
             {% if not loop.first %}
-            {% set bound_ns.total_bound = bound_ns.total_bound ~ "<br>" ~ bound.op_name ~ "operator shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ title_ns.bound_refer ~ bound.bound %}
+            {% set bound_ns.total_bound = bound_ns.total_bound ~ "<br>" ~ bound.op_name ~ " operator shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ title_ns.bound_refer ~ bound.bound %}
             {% else %}
-            {% set bound_ns.total_bound = bound.op_name ~ "operator shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ title_ns.bound_refer ~ bound.bound %}
+            {% set bound_ns.total_bound = bound.op_name ~ " operator shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ title_ns.bound_refer ~ bound.bound %}
             {% endif %}
             {% endfor %}
             {% if bound_ns.total_bound|length > 0 %}
diff --git a/profiler/advisor/rules/cn/aicore_performance.yaml b/profiler/advisor/rules/cn/aicore_performance.yaml
index f00f0a4b7..8d44aaab2 100644
--- a/profiler/advisor/rules/cn/aicore_performance.yaml
+++ b/profiler/advisor/rules/cn/aicore_performance.yaml
@@ -12,4 +12,4 @@ fa_affinity_desc_type3: "D和S均不能被128整除"
 suggestion: "请根据亲和性、bound类型或优化空间尝试分析筛选出来的算子"
 affinity_suggestion: "{op_name}算子 shape: {shape} dtype: {dtype} 有不亲和特征: {suggestion}\n"
 bound_suggestion: "{op_name}算子 shape: {shape} dtype: {dtype} bound类型为: {bound} bound\n"
-optimization_suggestion: "{op_name}算子 shape: {shape} dtype: {dtype} 疑似有性能优化空间,参考性能优化空间: {optimization}\n"
\ No newline at end of file
+optimization_suggestion: "{op_name}算子 shape: {shape} dtype: {dtype} 疑似有性能优化空间,参考性能优化空间: {optimization}%\n"
\ No newline at end of file
diff --git a/profiler/advisor/rules/en/aicore_performance.yaml b/profiler/advisor/rules/en/aicore_performance.yaml
index 28f52f1ed..e85a919ab 100644
--- a/profiler/advisor/rules/en/aicore_performance.yaml
+++ b/profiler/advisor/rules/en/aicore_performance.yaml
@@ -12,4 +12,4 @@ fa_affinity_desc_type3: "Neither D nor S is not divisible by 128"
 suggestion: "Please try to analyze the filtered operators based on affinity, bound type or optimization space"
 affinity_suggestion: "{op_name} Op shape: {shape} dtype: {dtype} with disaffection characteristics: {suggestion}\n"
 bound_suggestion: "{op_name} Op shape: {shape} dtype: {dtype} bound type: {bound} bound\n"
-optimization_suggestion: "{op_name} Op shape: {shape} dtype: {dtype} suspect there is room for performance optimization, refer to Performance Optimization Space: {optimization}\n"
\ No newline at end of file
+optimization_suggestion: "{op_name} Op shape: {shape} dtype: {dtype} suspect there is room for performance optimization, refer to Performance Optimization Space: {optimization}%\n"
\ No newline at end of file
-- 
Gitee


From 2f5acce4e8ab4e9402d4924a706fd46465a2c491 Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Wed, 22 Jan 2025 16:55:30 +0800
Subject: [PATCH 44/72] =?UTF-8?q?=E9=97=AE=E9=A2=98=E4=BF=AE=E5=A4=8D?=
 =?UTF-8?q?=E4=B8=8EUT=E4=BF=AE=E6=94=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../ai_core_performance/ai_core_performance_checker.py        | 2 +-
 .../advisor/compute_advice/test_ai_core_performance_advice.py | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index 270c43d41..0ef45f52c 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -252,7 +252,7 @@ class AICorePerformanceChecker:
                 suggestion = ""
                 if "varlen" in op.lower():
                     # 处理变长算子 如果不亲和则affinity_flag为False
-                    if shape.split("-")[0].split(";")[0].split(",")[2] % 128 != 0:
+                    if int(shape.split("-")[0].split(";")[0].split(",")[2]) % 128 != 0:
                         affinity_flag = True
                         suggestion = self._FA_AFFINITY_DESC_TYPE1
                         for operator in fa_list:
diff --git a/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py b/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py
index 6ff49cad1..922d4b4c0 100644
--- a/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py
+++ b/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py
@@ -63,5 +63,7 @@ class TestAICorePerformanceAdvice(unittest.TestCase):
         dimension = Interface.COMPUTATION
         scope = SupportedScopes.AICORE_PERFORMANCE_ANALYSIS
         result = interface.get_result(dimension, scope, render_html=1, output_dict=False, profiling_path=self.TMP_DIR)
-        # TODO 测试结果验证
+        self.assertLess(1,len(result.data.get("Cube算子性能分析").get("data")[0]))
+        self.assertLess(1,len(result.data.get("Cube算子性能分析").get("data")[1]))
+        self.assertLess(1,len(result.data.get("Cube算子性能分析").get("data")[2]))
         result.clear()
\ No newline at end of file
-- 
Gitee


From d98ef93f50206a0fd3142d49c32eee92a380c394 Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Wed, 22 Jan 2025 17:26:53 +0800
Subject: [PATCH 45/72] =?UTF-8?q?=E9=97=AE=E9=A2=98=E4=BF=AE=E5=A4=8D?=
 =?UTF-8?q?=E4=B8=8EUT=E4=BF=AE=E6=94=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../ai_core_performance/ai_core_performance_checker.py       | 4 ++--
 .../compute_advice/test_ai_core_performance_advice.py        | 5 +++++
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index 0ef45f52c..09bf91349 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -397,14 +397,14 @@ class AICorePerformanceChecker:
                 if bound:
                     bound_queue.append({
                         "op_name": op_name,
-                        "shape": shape,
+                        "shape": shape.split("-")[0],
                         "bound": bound,
                         "dtype": dtype,
                         "duration": shape_duration})
                 else:
                     optimization_queue.append({
                         "op_name": op_name,
-                        "shape": shape,
+                        "shape": shape.split("-")[0],
                         "dtype": dtype,
                         "optimization": round(optimization * 100, 2)})
         return [sorted(optimization_queue, key=lambda x: x["optimization"], reverse=True)[:5],
diff --git a/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py b/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py
index 922d4b4c0..40fa81837 100644
--- a/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py
+++ b/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py
@@ -66,4 +66,9 @@ class TestAICorePerformanceAdvice(unittest.TestCase):
         self.assertLess(1,len(result.data.get("Cube算子性能分析").get("data")[0]))
         self.assertLess(1,len(result.data.get("Cube算子性能分析").get("data")[1]))
         self.assertLess(1,len(result.data.get("Cube算子性能分析").get("data")[2]))
+        self.assertLess(1,len(result.data.get("Fa算子性能分析").get("data")[0]))
+        self.assertLess(1,len(result.data.get("Fa算子性能分析").get("data")[1]))
+        self.assertLess(1,len(result.data.get("Fa算子性能分析").get("data")[2]))
+        self.assertLess(1,len(result.data.get("Vector算子性能分析").get("data")[0]))
+        self.assertLess(1,len(result.data.get("Vector算子性能分析").get("data")[1]))
         result.clear()
\ No newline at end of file
-- 
Gitee


From a8e535fddd557ddac6f1cd7cd01aa7fa06b1cd92 Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Wed, 22 Jan 2025 17:31:09 +0800
Subject: [PATCH 46/72] =?UTF-8?q?UT=E4=BF=AE=E6=94=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../test_ai_core_performance_advice.py        | 20 +++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py b/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py
index 40fa81837..61ae35d13 100644
--- a/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py
+++ b/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py
@@ -40,7 +40,7 @@ class TestAICorePerformanceAdvice(unittest.TestCase):
                 os.remove(file_path)
 
     @classmethod
-    def copy_kernel_details(cls,path):
+    def copy_kernel_details(cls, path):
         # Define source and destination paths
         source_csv_path = f"./data/{path}"
         destination_csv_path = f"{TestAICorePerformanceAdvice.OUTPUT_DIR}/kernel_details.csv"
@@ -63,12 +63,12 @@ class TestAICorePerformanceAdvice(unittest.TestCase):
         dimension = Interface.COMPUTATION
         scope = SupportedScopes.AICORE_PERFORMANCE_ANALYSIS
         result = interface.get_result(dimension, scope, render_html=1, output_dict=False, profiling_path=self.TMP_DIR)
-        self.assertLess(1,len(result.data.get("Cube算子性能分析").get("data")[0]))
-        self.assertLess(1,len(result.data.get("Cube算子性能分析").get("data")[1]))
-        self.assertLess(1,len(result.data.get("Cube算子性能分析").get("data")[2]))
-        self.assertLess(1,len(result.data.get("Fa算子性能分析").get("data")[0]))
-        self.assertLess(1,len(result.data.get("Fa算子性能分析").get("data")[1]))
-        self.assertLess(1,len(result.data.get("Fa算子性能分析").get("data")[2]))
-        self.assertLess(1,len(result.data.get("Vector算子性能分析").get("data")[0]))
-        self.assertLess(1,len(result.data.get("Vector算子性能分析").get("data")[1]))
-        result.clear()
\ No newline at end of file
+        self.assertLess(1, len(result.data.get("Cube算子性能分析").get("data")[0]))
+        self.assertLess(1, len(result.data.get("Cube算子性能分析").get("data")[1]))
+        self.assertLess(1, len(result.data.get("Cube算子性能分析").get("data")[2]))
+        self.assertLess(1, len(result.data.get("FA算子性能分析").get("data")[0]))
+        self.assertLess(1, len(result.data.get("FA算子性能分析").get("data")[1]))
+        self.assertLess(1, len(result.data.get("FA算子性能分析").get("data")[2]))
+        self.assertLess(1, len(result.data.get("Vector算子性能分析").get("data")[0]))
+        self.assertLess(1, len(result.data.get("Vector算子性能分析").get("data")[1]))
+        result.clear()
-- 
Gitee


From 6440ccc765205eb29310a30cfb1e817831306736 Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Thu, 23 Jan 2025 11:21:09 +0800
Subject: [PATCH 47/72] =?UTF-8?q?=E9=97=AE=E9=A2=98=E4=BF=AE=E5=A4=8D?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../ai_core_performance_checker.py            | 47 ++++++++++++-------
 1 file changed, 30 insertions(+), 17 deletions(-)

diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index 09bf91349..3175168e4 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -75,10 +75,12 @@ class AICorePerformanceChecker:
     def data_filter(self, profiling_dataset: ProfilingDataset):
         if not self.check_task_list(profiling_dataset):
             return
+
         operator_list = profiling_dataset.op_summary.op_list
         total_duration = sum(float(operator.task_duration) for operator in operator_list)
         cube_memory_dict = {}
         vector_type_dict = {}
+
         # filter cube operator and fa operator
         for op in operator_list:
             shapes = op.input_shapes[1:-1] + "-" + op.output_shapes[1:-1]
@@ -204,8 +206,10 @@ class AICorePerformanceChecker:
                                  operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape]
                     shape_duration = sum(float(operator.task_duration) for operator in shap_list)
                     dtype = shap_list[0].input_data_types if shap_list else None
-                    aic_mac_ratio = sum(float(operator.aic_mac_ratio) for operator in shap_list) / len(shap_list)
-                    aic_mte2_ratio = sum(float(operator.aic_mte2_ratio) for operator in shap_list) / len(shap_list)
+                    aic_mac_ratio = sum(float(operator.aic_mac_ratio) for operator in shap_list
+                                        if operator.aic_mac_ratio != "N/A") / len(shap_list)
+                    aic_mte2_ratio = sum(float(operator.aic_mte2_ratio) for operator in shap_list
+                                         if operator.aic_mac_ratio != "N/A") / len(shap_list)
                     if aic_mac_ratio >= 0.8 and aic_mte2_ratio >= 0.95:
                         bound_queue.append({
                             "op_name": op,
@@ -304,11 +308,14 @@ class AICorePerformanceChecker:
                             if (operator.op_name == op and
                                     operator.input_shapes[1:-1] + "-" +
                                     operator.output_shapes[1:-1] + "-grad" == shape):
-                                aic_fixpipe_ratio += float(operator.aic_fixpipe_ratio)
-                                aic_mte2_ratio += float(operator.aic_mte2_ratio)
-                                shape_duration += float(operator.task_duration)
-                                dtype = operator.input_data_types
-                                length += 1
+                                try:
+                                    aic_fixpipe_ratio += float(operator.aic_fixpipe_ratio)
+                                    aic_mte2_ratio += float(operator.aic_mte2_ratio)
+                                    shape_duration += float(operator.task_duration)
+                                    dtype = operator.input_data_types
+                                    length += 1
+                                except ValueError:
+                                    continue
                         aic_fixpipe_ratio = aic_fixpipe_ratio / length
                         aic_mte2_ratio = aic_mte2_ratio / length
                         if aic_mte2_ratio >= 0.8 and aic_fixpipe_ratio >= 0.75:
@@ -323,10 +330,13 @@ class AICorePerformanceChecker:
                         for operator in fa_list:
                             if (operator.op_name == op and
                                     operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape):
-                                aiv_vec_ratio += float(operator.aiv_vec_ratio)
-                                aic_mte2_ratio += float(operator.aic_mte2_ratio)
-                                shape_duration += float(operator.task_duration)
-                                length += 1
+                                try:
+                                    aiv_vec_ratio += float(operator.aiv_vec_ratio)
+                                    aic_mte2_ratio += float(operator.aic_mte2_ratio)
+                                    shape_duration += float(operator.task_duration)
+                                    length += 1
+                                except ValueError:
+                                    continue
                         aiv_vec_ratio = aiv_vec_ratio / length
                         aic_mte2_ratio = aic_mte2_ratio / length
                         if aic_mte2_ratio >= 0.8 and aiv_vec_ratio >= 0.75:
@@ -375,12 +385,15 @@ class AICorePerformanceChecker:
                 for operator in vector_list:
                     if (operator.op_name == op_name and
                             operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape):
-                        aiv_vec_ratio += float(operator.aiv_vec_ratio)
-                        aiv_mte2_ratio += float(operator.aiv_mte2_ratio)
-                        aiv_mte3_ratio += float(operator.aiv_mte3_ratio)
-                        shape_duration += float(operator.task_duration)
-                        dtype = operator.input_data_types
-                        length += 1
+                        try:
+                            aiv_vec_ratio += float(operator.aiv_vec_ratio)
+                            aiv_mte2_ratio += float(operator.aiv_mte2_ratio)
+                            aiv_mte3_ratio += float(operator.aiv_mte3_ratio)
+                            shape_duration += float(operator.task_duration)
+                            dtype = operator.input_data_types
+                            length += 1
+                        except ValueError:
+                            continue
                 aiv_vec_ratio = aiv_vec_ratio / length
                 aiv_mte2_ratio = aiv_mte2_ratio / length
                 aiv_mte2_ratio = aiv_mte2_ratio / length
-- 
Gitee


From 43932f3a4f6f6362d44185f8b90eb76c0f521f02 Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Thu, 23 Jan 2025 15:50:59 +0800
Subject: [PATCH 48/72] =?UTF-8?q?=E9=97=AE=E9=A2=98=E4=BF=AE=E5=A4=8D?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../ai_core_performance_checker.py            | 29 ++++++++++++++-----
 1 file changed, 22 insertions(+), 7 deletions(-)

diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index 3175168e4..8964e93c4 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -141,19 +141,19 @@ class AICorePerformanceChecker:
         try:
             self.result["cube"] = self.check_cube_operator(promoting_dataset)
         except (IndexError, ValueError, AttributeError) as e:
-            logger.error(f"Failed to check ai core performance, {e}.")
+            logger.error(f"Failed to check ai core performance cube operator, {e}.")
             self.result["cube"] = []
 
         try:
             self.result["fa"] = self.check_fa_operator(promoting_dataset)
         except (IndexError, ValueError, AttributeError) as e:
-            logger.error(f"Failed to check ai core performance, {e}.")
+            logger.error(f"Failed to check ai core performance fa operator, {e}.")
             self.result["fa"] = []
 
         try:
             self.result["vector"] = self.check_vector_operator(promoting_dataset)
         except (IndexError, ValueError, AttributeError) as e:
-            logger.error(f"Failed to check ai core performance, {e}.")
+            logger.error(f"Failed to check ai core performance vector operator, {e}.")
             self.result["vector"] = []
 
         if not any([self.result["cube"], self.result["fa"], self.result["vector"]]):
@@ -206,10 +206,19 @@ class AICorePerformanceChecker:
                                  operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape]
                     shape_duration = sum(float(operator.task_duration) for operator in shap_list)
                     dtype = shap_list[0].input_data_types if shap_list else None
-                    aic_mac_ratio = sum(float(operator.aic_mac_ratio) for operator in shap_list
-                                        if operator.aic_mac_ratio != "N/A") / len(shap_list)
-                    aic_mte2_ratio = sum(float(operator.aic_mte2_ratio) for operator in shap_list
-                                         if operator.aic_mac_ratio != "N/A") / len(shap_list)
+                    aic_mac_ratio, aic_mte2_ratio = 0., 0.
+                    length = 0
+                    for operator in shap_list:
+                        try:
+                            aic_mac_ratio += float(operator.aic_mac_ratio)
+                            aic_mte2_ratio += float(operator.aic_mte2_ratio)
+                            length += 1
+                        except ValueError:
+                            continue
+                    if length == 0:
+                        continue
+                    aic_mac_ratio = aic_mac_ratio / length
+                    aic_mte2_ratio = aic_mte2_ratio / length
                     if aic_mac_ratio >= 0.8 and aic_mte2_ratio >= 0.95:
                         bound_queue.append({
                             "op_name": op,
@@ -316,6 +325,8 @@ class AICorePerformanceChecker:
                                     length += 1
                                 except ValueError:
                                     continue
+                        if length == 0:
+                            continue
                         aic_fixpipe_ratio = aic_fixpipe_ratio / length
                         aic_mte2_ratio = aic_mte2_ratio / length
                         if aic_mte2_ratio >= 0.8 and aic_fixpipe_ratio >= 0.75:
@@ -337,6 +348,8 @@ class AICorePerformanceChecker:
                                     length += 1
                                 except ValueError:
                                     continue
+                        if length == 0:
+                            continue
                         aiv_vec_ratio = aiv_vec_ratio / length
                         aic_mte2_ratio = aic_mte2_ratio / length
                         if aic_mte2_ratio >= 0.8 and aiv_vec_ratio >= 0.75:
@@ -394,6 +407,8 @@ class AICorePerformanceChecker:
                             length += 1
                         except ValueError:
                             continue
+                if length == 0:
+                    continue
                 aiv_vec_ratio = aiv_vec_ratio / length
                 aiv_mte2_ratio = aiv_mte2_ratio / length
                 aiv_mte2_ratio = aiv_mte2_ratio / length
-- 
Gitee


From 1d1394fe9e2d48e36ea882112f8112ec5ed8cf41 Mon Sep 17 00:00:00 2001
From: kiritorl <ruxinglong@huawei.com>
Date: Wed, 5 Feb 2025 17:14:35 +0800
Subject: [PATCH 49/72] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E6=89=93=E5=B1=8F?=
 =?UTF-8?q?=E5=92=8C=E8=A1=A8=E6=A0=BC=E9=87=8C=E6=98=BE=E7=A4=BA=E7=A9=BA?=
 =?UTF-8?q?=E6=95=B0=E6=8D=AE=E7=9A=84=E9=97=AE=E9=A2=98?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../ai_core_performance/ai_core_performance_checker.py      | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index 270c43d41..32bd1da84 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -423,7 +423,7 @@ class AICorePerformanceChecker:
         cube_desc = dict.fromkeys(suggestion_keys, "")
         fa_desc = dict.fromkeys(suggestion_keys, "")
         vector_desc = dict.fromkeys(suggestion_keys, "")
-        if self.result["cube"]:
+        if any(self.result["cube"]):
             optimization_item = OptimizeItem(self._CUBE_PROBLEM, self.desc, [self.suggestion])
             result.add(OptimizeRecord(optimization_item))
             headers = [
@@ -447,7 +447,7 @@ class AICorePerformanceChecker:
             if cube_desc["affinity"]:
                 result.add_detail(self._CUBE_PROBLEM, detail=[self._AFFINITY_DESC, cube_desc["affinity"]])
 
-        if self.result["fa"]:
+        if any(self.result["fa"]):
             optimization_item = OptimizeItem(self._FA_PROBLEM, self.desc, [self.suggestion])
             result.add(OptimizeRecord(optimization_item))
             headers = [
@@ -471,7 +471,7 @@ class AICorePerformanceChecker:
             if fa_desc["affinity"]:
                 result.add_detail(self._FA_PROBLEM, detail=[self._AFFINITY_DESC, fa_desc["affinity"]])
 
-        if self.result["vector"]:
+        if any(self.result["vector"]):
             optimization_item = OptimizeItem(self._VECTOR_PROBLEM, self.desc, [self.suggestion])
             result.add(OptimizeRecord(optimization_item))
             headers = [
-- 
Gitee


From e2c29e98910cd5d97a78d77df07a684033f8ed27 Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Thu, 6 Feb 2025 14:46:57 +0800
Subject: [PATCH 50/72] =?UTF-8?q?=E9=97=AE=E9=A2=98=E4=BF=AE=E5=A4=8D?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../ai_core_performance/ai_core_performance_checker.py          | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index 90d7daa09..3316cf43d 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -276,7 +276,7 @@ class AICorePerformanceChecker:
                 else:
                     # 处理定长算子 如果不亲和则affinity_flag为False
                     head_dim = 0
-                    seq_len = int(shape.split("-")[1].split(";")[1].split(",")[2])
+                    seq_len = int(shape.split("-")[1].split(";")[0].split(",")[2])
                     input_first_tensor = shape.split("-")[0].split(";")[0].split(",")
                     if len(input_first_tensor) == 3:
                         head_dim = int(input_first_tensor[2]) / int(shape.split("-")[1].split(";")[0].split(",")[1])
-- 
Gitee


From a22dbe4b438ace270e8e84a25f4e0a039fe7d717 Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Thu, 6 Feb 2025 14:55:54 +0800
Subject: [PATCH 51/72] =?UTF-8?q?=E4=BB=A3=E7=A0=81=E4=BC=98=E5=8C=96?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../ai_core_performance_checker.py            | 24 ++++++++-----------
 1 file changed, 10 insertions(+), 14 deletions(-)

diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index 3316cf43d..9fd7f9a7f 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -219,33 +219,29 @@ class AICorePerformanceChecker:
                         continue
                     aic_mac_ratio = aic_mac_ratio / length
                     aic_mte2_ratio = aic_mte2_ratio / length
+                    bound = ""
+                    optimization = 0.
                     if aic_mac_ratio >= 0.8 and aic_mte2_ratio >= 0.95:
-                        bound_queue.append({
-                            "op_name": op,
-                            "shape": shape.split("-")[0],
-                            "dtype": dtype,
-                            "bound": "mac_and_mte2_bound",
-                            "duration": shape_duration})
+                        bound = "mac_and_mte2_bound"
                     elif aic_mac_ratio >= 0.8:
-                        bound_queue.append({
-                            "op_name": op,
-                            "shape": shape.split("-")[0],
-                            "dtype": dtype,
-                            "bound": "mac_bound",
-                            "duration": shape_duration})
+                        bound = "mac_bound"
                     elif aic_mte2_ratio >= 0.95:
+                        bound = "mte2_bound"
+                    else:
+                        optimization = round(max(0.8 - aic_mac_ratio, 0.95 - aic_mte2_ratio) * 100, 2)
+                    if bound:
                         bound_queue.append({
                             "op_name": op,
                             "shape": shape.split("-")[0],
                             "dtype": dtype,
-                            "bound": "mte2_bound",
+                            "bound": bound,
                             "duration": shape_duration})
                     else:
                         optimization_queue.append({
                             "op_name": op,
                             "shape": shape.split("-")[0],
                             "dtype": dtype,
-                            "optimization": round(max(0.8 - aic_mac_ratio, 0.95 - aic_mte2_ratio) * 100, 2)})
+                            "optimization": optimization})
         return [sorted(optimization_queue, key=lambda x: x["optimization"], reverse=True)[:5],
                 sorted(bound_queue, key=lambda x: x["duration"], reverse=True)[:5],
                 sorted(affinity_queue, key=lambda x: x["duration"], reverse=True)[:5]]
-- 
Gitee


From b455912d39ba8251100b4a6eeb06fcc2dcd1aa36 Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Thu, 6 Feb 2025 15:02:29 +0800
Subject: [PATCH 52/72] =?UTF-8?q?=E4=BB=A3=E7=A0=81=E4=BC=98=E5=8C=96?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../ai_core_performance/ai_core_performance_checker.py         | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index 9fd7f9a7f..5aecaaf03 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -1,7 +1,6 @@
 # Copyright (c) 2024, Huawei Technologies Co., Ltd.
 # All rights reserved.
 #
-# Licensed under the Apache License, Version 2.0  (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
@@ -434,8 +433,6 @@ class AICorePerformanceChecker:
         return [sorted(optimization_queue, key=lambda x: x["optimization"], reverse=True)[:5],
                 sorted(bound_queue, key=lambda x: x["duration"], reverse=True)[:5]]
 
-        pass
-
     def make_record(self, result: OptimizeResult):
         """
         make record for what and how to optimize
-- 
Gitee


From d7d4c7c2d66f8018a4dcf96ecd4a3c89b15a9d6c Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Thu, 6 Feb 2025 16:34:24 +0800
Subject: [PATCH 53/72] =?UTF-8?q?=E4=BB=A3=E7=A0=81=E4=BC=98=E5=8C=96?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../ai_core_performance_checker.py            | 29 ++++++++++---------
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index 5aecaaf03..b1d14ef58 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 import logging
 import os
+from functools import reduce
 
 from profiler.advisor.dataset.profiling.profiling_dataset import ProfilingDataset
 from profiler.advisor.result.item import OptimizeItem, OptimizeRecord
@@ -80,20 +81,26 @@ class AICorePerformanceChecker:
         cube_memory_dict = {}
         vector_type_dict = {}
 
-        # filter cube operator and fa operator
         for op in operator_list:
             shapes = op.input_shapes[1:-1] + "-" + op.output_shapes[1:-1]
+            # preliminary filter cube operator
             if op.task_type == "AI_CORE" and "matmul" in op.op_type.lower():
                 cube_memory_dict.setdefault(op.op_name, {}).setdefault(shapes, 0)
                 cube_memory_dict[op.op_name][shapes] += self.memory_size(op)
-            elif op.op_type == "FlashAttentionScore":
+                continue
+
+            # preliminary filter vector operator
+            if op.task_type in ["AI_VECTOR_CORE", "MIX_AIV"]:
+                vector_type_dict.setdefault(op.op_type, set()).add(op)
+                continue
+
+            # filter fa operator
+            if op.op_type == "FlashAttentionScore":
                 self.fa_dict.setdefault(op.op_name, set()).add(shapes)
                 self.fa_list.append(op)
             elif op.op_type == "FlashAttentionScoreGrad":
                 self.fa_dict.setdefault(op.op_name, set()).add(shapes + "-grad")
                 self.fa_list.append(op)
-            elif op.task_type in ["AI_VECTOR_CORE", "MIX_AIV"]:
-                vector_type_dict.setdefault(op.op_type, set()).add(op)
 
         # filter cube operator
         for op_name in cube_memory_dict:
@@ -116,21 +123,15 @@ class AICorePerformanceChecker:
     def memory_size(operator):
         memory = 0
         input_shapes = operator.input_shapes[1:-1].split(";")
+        output_shapes = operator.output_shapes[1:-1]
         for shapes in input_shapes:
             if not "," in shapes and shapes != "":
                 # 多的一维是 bias ，预先乘2
                 memory += int(shapes) * 2
                 continue
-            start = 1
-            for shape in shapes.split(","):
-                start *= int(shape)
-            memory += start
-
-        output_shape = operator.output_shapes[1:-1].split(",")
-        start = 1
-        for shapes in output_shape:
-            start *= int(shapes)
-        memory += int(start)
+            memory += reduce(lambda x, y: x*y, map(int, shapes.split(",")))
+        memory += reduce(lambda x, y: x * y, map(int, output_shapes.split(",")))
+
         return memory * 2 / 1024 / 1024
 
     def check_ai_core_performance(self, promoting_dataset: ProfilingDataset):
-- 
Gitee


From 4a6625cc7f8a4c132439d7efbf1d0af71bc066d1 Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Thu, 6 Feb 2025 16:35:08 +0800
Subject: [PATCH 54/72] =?UTF-8?q?=E4=BB=A3=E7=A0=81=E4=BC=98=E5=8C=96?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../ai_core_performance/ai_core_performance_checker.py           | 1 +
 1 file changed, 1 insertion(+)

diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index b1d14ef58..a3622ebdf 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -1,6 +1,7 @@
 # Copyright (c) 2024, Huawei Technologies Co., Ltd.
 # All rights reserved.
 #
+# Licensed under the Apache License, Version 2.0  (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
-- 
Gitee


From 3f0b15adb831e6d468f738a318417796a71b1c73 Mon Sep 17 00:00:00 2001
From: xubanxia <11655130+xubanxia@user.noreply.gitee.com>
Date: Tue, 11 Feb 2025 03:01:59 +0000
Subject: [PATCH 55/72] =?UTF-8?q?=E5=88=A0=E9=99=A4=E6=96=87=E4=BB=B6=20pr?=
 =?UTF-8?q?ofiler/cli/entrance.py?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 profiler/cli/entrance.py | 75 ----------------------------------------
 1 file changed, 75 deletions(-)
 delete mode 100644 profiler/cli/entrance.py

diff --git a/profiler/cli/entrance.py b/profiler/cli/entrance.py
deleted file mode 100644
index 89ac8187d..000000000
--- a/profiler/cli/entrance.py
+++ /dev/null
@@ -1,75 +0,0 @@
-#!/usr/bin/python
-# -*- coding: utf-8 -*-
-# Copyright (c) 2024, Huawei Technologies Co., Ltd.
-# All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0  (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import logging
-import click
-
-from profiler.cli.analyze_cli import analyze_cli
-from profiler.cli.complete_cli import auto_complete_cli
-from profiler.cli.compare_cli import compare_cli
-from profiler.cli.cluster_cli import cluster_cli
-from profiler.advisor.version import print_version_callback, cli_version
-
-logger = logging.getLogger()
-CONTEXT_SETTINGS = dict(help_option_names=['-H', '-h', '--help'],
-                        max_content_width=160)
-
-COMMAND_PRIORITY = {
-    "advisor": 1,
-    "compare": 2,
-    "cluster": 3,
-    "auto-completion": 4
-}
-
-
-class SpecialHelpOrder(click.Group):
-
-    def __init__(self, *args, **kwargs):
-        super(SpecialHelpOrder, self).__init__(*args, **kwargs)
-
-    def list_commands_for_help(self, ctx):
-        """
-        reorder the list of commands when listing the help
-        """
-        commands = super(SpecialHelpOrder, self).list_commands(ctx)
-        return [item[1] for item in sorted((COMMAND_PRIORITY.get(command, float('INF')),
-                                            command) for command in commands)]
-
-    def get_help(self, ctx):
-        self.list_commands = self.list_commands_for_help
-        return super(SpecialHelpOrder, self).get_help(ctx)
-
-
-@click.group(context_settings=CONTEXT_SETTINGS, cls=SpecialHelpOrder)
-@click.option('--version', '-V', '-v', is_flag=True,
-              callback=print_version_callback, expose_value=False,
-              is_eager=True, help=cli_version())
-def msprof_analyze_cli(**kwargs):
-    pass
-
-
-msprof_analyze_cli.add_command(analyze_cli, name="advisor")
-msprof_analyze_cli.add_command(compare_cli, name="compare")
-msprof_analyze_cli.add_command(cluster_cli, name="cluster")
-msprof_analyze_cli.add_command(auto_complete_cli, name="auto-completion")
-
-if __name__ == "__main__":
-    msprof_analyze_cli.main(
-        [
-            "analyze","all","-d",
-            r"D:\data\file","-l","cn"
-        ]
-    )
-- 
Gitee


From 353723271a2e967a4d9e4495db343b9b4aef2cc0 Mon Sep 17 00:00:00 2001
From: xubanxia <11655130+xubanxia@user.noreply.gitee.com>
Date: Tue, 11 Feb 2025 03:07:41 +0000
Subject: [PATCH 56/72] =?UTF-8?q?Revert=20"=E5=88=A0=E9=99=A4=E6=96=87?=
 =?UTF-8?q?=E4=BB=B6=20profiler/cli/entrance.py"?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts commit 3f0b15adb831e6d468f738a318417796a71b1c73.
---
 profiler/cli/entrance.py | 75 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 75 insertions(+)
 create mode 100644 profiler/cli/entrance.py

diff --git a/profiler/cli/entrance.py b/profiler/cli/entrance.py
new file mode 100644
index 000000000..89ac8187d
--- /dev/null
+++ b/profiler/cli/entrance.py
@@ -0,0 +1,75 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+# Copyright (c) 2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+import click
+
+from profiler.cli.analyze_cli import analyze_cli
+from profiler.cli.complete_cli import auto_complete_cli
+from profiler.cli.compare_cli import compare_cli
+from profiler.cli.cluster_cli import cluster_cli
+from profiler.advisor.version import print_version_callback, cli_version
+
+logger = logging.getLogger()
+CONTEXT_SETTINGS = dict(help_option_names=['-H', '-h', '--help'],
+                        max_content_width=160)
+
+COMMAND_PRIORITY = {
+    "advisor": 1,
+    "compare": 2,
+    "cluster": 3,
+    "auto-completion": 4
+}
+
+
+class SpecialHelpOrder(click.Group):
+
+    def __init__(self, *args, **kwargs):
+        super(SpecialHelpOrder, self).__init__(*args, **kwargs)
+
+    def list_commands_for_help(self, ctx):
+        """
+        reorder the list of commands when listing the help
+        """
+        commands = super(SpecialHelpOrder, self).list_commands(ctx)
+        return [item[1] for item in sorted((COMMAND_PRIORITY.get(command, float('INF')),
+                                            command) for command in commands)]
+
+    def get_help(self, ctx):
+        self.list_commands = self.list_commands_for_help
+        return super(SpecialHelpOrder, self).get_help(ctx)
+
+
+@click.group(context_settings=CONTEXT_SETTINGS, cls=SpecialHelpOrder)
+@click.option('--version', '-V', '-v', is_flag=True,
+              callback=print_version_callback, expose_value=False,
+              is_eager=True, help=cli_version())
+def msprof_analyze_cli(**kwargs):
+    pass
+
+
+msprof_analyze_cli.add_command(analyze_cli, name="advisor")
+msprof_analyze_cli.add_command(compare_cli, name="compare")
+msprof_analyze_cli.add_command(cluster_cli, name="cluster")
+msprof_analyze_cli.add_command(auto_complete_cli, name="auto-completion")
+
+if __name__ == "__main__":
+    msprof_analyze_cli.main(
+        [
+            "analyze","all","-d",
+            r"D:\data\file","-l","cn"
+        ]
+    )
-- 
Gitee


From 3b444579f0280889b0d77c7a3e2e018401115e04 Mon Sep 17 00:00:00 2001
From: xubanxia <11655130+xubanxia@user.noreply.gitee.com>
Date: Tue, 11 Feb 2025 03:08:19 +0000
Subject: [PATCH 57/72] update profiler/cli/entrance.py.

Signed-off-by: xubanxia <11655130+xubanxia@user.noreply.gitee.com>
---
 profiler/cli/entrance.py | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/profiler/cli/entrance.py b/profiler/cli/entrance.py
index 89ac8187d..503cf9ea6 100644
--- a/profiler/cli/entrance.py
+++ b/profiler/cli/entrance.py
@@ -64,12 +64,4 @@ def msprof_analyze_cli(**kwargs):
 msprof_analyze_cli.add_command(analyze_cli, name="advisor")
 msprof_analyze_cli.add_command(compare_cli, name="compare")
 msprof_analyze_cli.add_command(cluster_cli, name="cluster")
-msprof_analyze_cli.add_command(auto_complete_cli, name="auto-completion")
-
-if __name__ == "__main__":
-    msprof_analyze_cli.main(
-        [
-            "analyze","all","-d",
-            r"D:\data\file","-l","cn"
-        ]
-    )
+msprof_analyze_cli.add_command(auto_complete_cli, name="auto-completion")
\ No newline at end of file
-- 
Gitee


From 7308fe6d140b9b0fa099381aeb5265c6712d1e62 Mon Sep 17 00:00:00 2001
From: xubanxia <11655130+xubanxia@user.noreply.gitee.com>
Date: Tue, 11 Feb 2025 03:11:03 +0000
Subject: [PATCH 58/72] update
 profiler/test/ut/advisor/compute_advice/data/kernel_details.csv.

Signed-off-by: xubanxia <11655130+xubanxia@user.noreply.gitee.com>
---
 .../compute_advice/data/kernel_details.csv    | 30 +++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/profiler/test/ut/advisor/compute_advice/data/kernel_details.csv b/profiler/test/ut/advisor/compute_advice/data/kernel_details.csv
index e69de29bb..020178358 100644
--- a/profiler/test/ut/advisor/compute_advice/data/kernel_details.csv
+++ b/profiler/test/ut/advisor/compute_advice/data/kernel_details.csv
@@ -0,0 +1,30 @@
+Step Id,Model ID,Task ID,Stream ID,Name,Type,OP State,Accelerator Core,Start Time(us),Duration(us),Wait Time(us),Block Dim,Mix Block Dim,HF32 Eligible,Input Shapes,Input Data Types,Input Formats,Output Shapes,Output Data Types,Output Formats,Context ID,aicore_time(us),aic_total_cycles,aic_mac_time(us),aic_mac_ratio,aic_scalar_time(us),aic_scalar_ratio,aic_mte1_time(us),aic_mte1_ratio,aic_mte2_time(us),aic_mte2_ratio,aic_fixpipe_time(us),aic_fixpipe_ratio,aic_icache_miss_rate,aiv_time(us),aiv_total_cycles,aiv_vec_time(us),aiv_vec_ratio,aiv_scalar_time(us),aiv_scalar_ratio,aiv_mte2_time(us),aiv_mte2_ratio,aiv_mte3_time(us),aiv_mte3_ratio,aiv_icache_miss_rate,cube_utilization(%)
+19,4294967295,61653,2,aclnnMatmul_MatMulCommon_MatMulV2,MatMulV2,dynamic,AI_CORE,"1736413971558972.912	",185.504,1.087,16,0,NO,"""81920,4096;8192,512""",DT_BF16;DT_BF16,ND;ND,"""4096,512""",DT_BF16,ND,N/A,183.87,5295467,151.425,0.824,88.03,0.479,119.148,0.648,177.314,0.964,5.736,0.031,0.001,0,0,0,0,0,0,0,0,0,0,0,79.295
+19,4294967295,61669,2,aclnnMatmul_MatMulV3Common_MatMulV3,MatMulV3,dynamic,AI_CORE,"1736413971560588.764	",501.17,2.2,20,0,NO,"""81920,1536;8192,4096""",DT_BF16;DT_BF16,ND;ND,"""1536,4096""",DT_BF16,ND,N/A,478.701,17233251,356.349,0.744,118.087,0.247,296.009,0.618,452.112,0.944,35.833,0.075,0.001,0,0,0,0,0,0,0,0,0,0,0,95.517
+19,4294967295,61694,2,aclnnMatmul_MatMulCommon_MatMulV2,MatMulV2,dynamic,AI_CORE,"1736413971565213.257	",186.823,1.178,16,0,NO,"""81920,4096;8192,512""",DT_BF16;DT_BF16,ND;ND,"""4096,512""",DT_BF16,ND,N/A,183.728,5291376,151.502,0.825,87.902,0.478,118.519,0.645,177.654,0.967,5.773,0.031,0.001,0,0,0,0,0,0,0,0,0,0,0,78.675
+19,4294967295,61710,2,aclnnMatmul_MatMulV3Common_MatMulV3,MatMulV3,dynamic,AI_CORE,"1736413971566843.489	",516.991,2.33,20,0,NO,"""81920,1536;8192,4096""",DT_BF16;DT_BF16,ND;ND,"""1536,4096""",DT_BF16,ND,N/A,491.775,17703905,356.249,0.724,118.59,0.241,295.046,0.6,463.696,0.943,37.671,0.077,0.001,0,0,0,0,0,0,0,0,0,0,0,95.123
+19,4294967295,61735,2,aclnnMatmul_MatMulCommon_MatMulV2,MatMulV2,dynamic,AI_CORE,"1736413971571596.404	",187.724,0.766,16,0,NO,"""81920,4096;8192,512""",DT_BF16;DT_BF16,ND;ND,"""4096,512""",DT_BF16,ND,N/A,184.904,5325221,151.489,0.819,87.893,0.475,118.63,0.642,178.815,0.967,5.77,0.031,0.001,0,0,0,0,0,0,0,0,0,0,0,78.798
+19,4294967295,61751,2,aclnnMatmul_MatMulV3Common_MatMulV3,MatMulV3,dynamic,AI_CORE,"1736413971573223.437	",514.87,2.15,20,0,NO,"""81920,1536;8192,4096""",DT_BF16;DT_BF16,ND;ND,"""1536,4096""",DT_BF16,ND,N/A,486.931,17529512,356.117,0.731,118.847,0.244,295.529,0.607,457.002,0.939,37.938,0.078,0.001,0,0,0,0,0,0,0,0,0,0,0,94.574
+19,4294967295,61776,2,aclnnMatmul_MatMulCommon_MatMulV2,MatMulV2,dynamic,AI_CORE,"1736413971577931.851	",190.544,1.367,16,0,NO,"""81920,4096;8192,512""",DT_BF16;DT_BF16,ND;ND,"""4096,512""",DT_BF16,ND,N/A,187.073,5387702,151.741,0.811,87.935,0.47,117.467,0.628,181.043,0.968,5.803,0.031,0.001,0,0,0,0,0,0,0,0,0,0,0,78.543
+19,4294967295,61792,2,aclnnMatmul_MatMulV3Common_MatMulV3,MatMulV3,dynamic,AI_CORE,"1736413971579566.403	",504.071,2.28,20,0,NO,"""81920,1536;8192,4096""",DT_BF16;DT_BF16,ND;ND,"""1536,4096""",DT_BF16,ND,N/A,485.542,17479517,356.283,0.734,117.755,0.243,296.421,0.61,455.064,0.937,37.75,0.078,0.001,0,0,0,0,0,0,0,0,0,0,0,96.324
+19,4294967295,13792,2,aclnnMatmul_MatMulV3Common_MatMulV5,MatMulV3,dynamic,AI_CORE,"1736413974248200.543	",521.31,2.22,20,0,NO,"""8192,15365;8192,4096""",DT_BF16;DT_BF16,ND;ND,"""1536,4096""",DT_BF16,ND,N/A,499.234,17972434,356.364,0.714,117.639,0.236,295.58,0.592,471.784,0.945,35.825,0.072,0.001,0,0,0,0,0,0,0,0,0,0,0,95.765
+19,4294967295,13792,2,aclnnMatmul_MatMulV3Common_MatMulV5,MatMulV3,dynamic,AI_CORE,"1736413974248200.543	",521.31,2.22,20,0,NO,"""8192,15365;8192,4096""",DT_BF16;DT_BF16,ND;ND,"""1536,4096""",DT_BF16,ND,N/A,499.234,17972434,356.364,0.714,117.639,0.236,295.58,0.592,471.784,0.945,35.825,0.072,0.001,0,0,0,0,0,0,0,0,0,0,0,95.765
+19,4294967295,13792,2,aclnnMatmul_MatMulV3Common_MatMulV5,MatMulV3,dynamic,AI_CORE,"1736413974248200.543	",521.31,2.22,20,0,NO,"""8192,15365;8192,4096""",DT_BF16;DT_BF16,ND;ND,"""1536,4096""",DT_BF16,ND,N/A,499.234,17972434,356.364,0.714,117.639,0.236,295.58,0.592,471.784,0.945,35.825,0.072,0.001,0,0,0,0,0,0,0,0,0,0,0,95.765
+19,4294967295,13792,2,aclnnMatmul_MatMulV3Common_MatMulV5,MatMulV3,dynamic,AI_CORE,"1736413974248200.543	",521.31,2.22,20,0,NO,"""8192,15365;8192,4096""",DT_BF16;DT_BF16,ND;ND,"""1536,4096""",DT_BF16,ND,N/A,499.234,17972434,356.364,0.714,117.639,0.236,295.58,0.592,471.784,0.945,35.825,0.072,0.001,0,0,0,0,0,0,0,0,0,0,0,95.765
+19,4294967295,60679,2,aclnnFlashAttentionScore_FlashAttentionScore_FlashAttentionScore,FlashAttentionScore,dynamic,MIX_AIC,"1736413971411629.128	",410.188,1.53,20,40,NO,"""4096,2,512;4096,2,512;4096,2,512;;;;4096,4096;;;;;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16;UINT8;DT_BF16;BOOL;INT64;INT64;INT64;INT64;INT64,NCL;NCL;NCL;ND;ND;ND;ND;ND;ND;ND;ND;ND,"""2,4,4096,8;2,4,4096,8;;4096,2,512""",FLOAT;FLOAT;DT_BF16;DT_BF16,ND;ND;ND;ND,0,366.147,13181275,129.055,0.352,352.275,0.962,108.364,0.296,172.86,0.872,216.141,0.59,0.003,365.782,26336326,228.687,0.625,137.979,0.377,118.603,0.324,71.448,0.195,0.013,89.263
+19,4294967295,60707,2,aclnnFlashAttentionScore_FlashAttentionScore_FlashAttentionScore,FlashAttentionScore,dynamic,MIX_AIC,"1736413971415611.468	",406.128,1.279,20,40,NO,"""4096,2,512;4096,2,512;4096,2,512;;;;4096,4096;;;;;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16;UINT8;DT_BF16;BOOL;INT64;INT64;INT64;INT64;INT64,NCL;NCL;NCL;ND;ND;ND;ND;ND;ND;ND;ND;ND,"""2,4,4096,8;2,4,4096,8;;4096,2,512""",FLOAT;FLOAT;DT_BF16;DT_BF16,ND;ND;ND;ND,0,358.77,12915719,128.96,0.359,345.096,0.962,108.337,0.302,168.284,0.869,209.057,0.583,0.003,358.308,25798146,228.693,0.638,137.809,0.385,108.679,0.303,70.099,0.196,0.013,88.339
+19,4294967295,60735,2,aclnnFlashAttentionScore_FlashAttentionScore_FlashAttentionScore,FlashAttentionScore,dynamic,MIX_AIC,"1736413971420248.800	",407.008,0.84,20,40,NO,"""4096,2,512;4096,2,512;4096,2,512;;;;4096,4096;;;;;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16;UINT8;DT_BF16;BOOL;INT64;INT64;INT64;INT64;INT64,NCL;NCL;NCL;ND;ND;ND;ND;ND;ND;ND;ND;ND,"""2,4,4096,8;2,4,4096,8;;4096,2,512""",FLOAT;FLOAT;DT_BF16;DT_BF16,ND;ND;ND;ND,0,359.702,12949284,128.975,0.359,346.306,0.963,108.43,0.301,166.899,0.864,209.018,0.581,0.003,359.274,25867705,228.693,0.637,138.438,0.385,107.723,0.3,70.146,0.195,0.013,88.377
+19,4294967295,60763,2,aclnnFlashAttentionScore_FlashAttentionScore_FlashAttentionScore,FlashAttentionScore,dynamic,MIX_AIC,"1736413971424592.447	",405.228,1.35,20,40,NO,"""4096,2,512;4096,2,512;4096,2,512;;;;4096,4096;;;;;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16;UINT8;DT_BF16;BOOL;INT64;INT64;INT64;INT64;INT64,NCL;NCL;NCL;ND;ND;ND;ND;ND;ND;ND;ND;ND,"""2,4,4096,8;2,4,4096,8;;4096,2,512""",FLOAT;FLOAT;DT_BF16;DT_BF16,ND;ND;ND;ND,0,359.793,12952532,128.923,0.358,345.768,0.961,108.411,0.301,167.379,0.865,208.79,0.58,0.003,359.294,25869164,228.691,0.637,138.411,0.385,107.868,0.3,70.163,0.195,0.013,88.788
+19,4294967295,61655,2,aclnnFlashAttentionScoreGrad_FlashAttentionScoreGrad_FlashAttentionScoreGrad,FlashAttentionScoreGrad,dynamic,MIX_AIC,"1736413971559180.676	",762.215,1.37,20,40,NO,"""4096,2,512;4096,2,512;4096,2,512;4096,2,512;4096,4096;2,4,4096,8;2,4,4096,8;;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16;BOOL;FLOAT;FLOAT;DT_BF16;DT_BF16;INT64,NCL;NCL;NCL;NCL;ND;NCHW;NCHW;ND;NCL;ND,"""4096,2,512;4096,2,512;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16,ND;ND;ND;ND,0,755.664,27203907,344.023,0.455,592.472,0.784,266.388,0.353,397.091,0.525,589.726,0.78,0.004,755.04,54362915,318.452,0.422,184.623,0.245,206.78,0.274,152.973,0.203,0.006,99.141
+19,4294967295,61696,2,aclnnFlashAttentionScoreGrad_FlashAttentionScoreGrad_FlashAttentionScoreGrad,FlashAttentionScoreGrad,dynamic,MIX_AIC,"1736413971565420.821	",763.215,1.189,20,40,NO,"""4096,2,512;4096,2,512;4096,2,512;4096,2,512;4096,4096;2,4,4096,8;2,4,4096,8;;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16;BOOL;FLOAT;FLOAT;DT_BF16;DT_BF16;INT64,NCL;NCL;NCL;NCL;ND;NCHW;NCHW;ND;NCL;ND,"""4096,2,512;4096,2,512;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16,ND;ND;ND;ND,0,757.83,27281885,344.047,0.454,595.954,0.786,266.123,0.351,389.105,0.513,576.226,0.76,0.004,757.046,54507345,318.443,0.421,188.292,0.249,200.176,0.264,162.113,0.214,0.006,99.294
+19,4294967295,61737,2,aclnnFlashAttentionScoreGrad_FlashAttentionScoreGrad_FlashAttentionScoreGrad,FlashAttentionScoreGrad,dynamic,MIX_AIC,"1736413971571804.228	",757.095,0.88,20,40,NO,"""4096,2,512;4096,2,512;4096,2,512;4096,2,512;4096,4096;2,4,4096,8;2,4,4096,8;;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16;BOOL;FLOAT;FLOAT;DT_BF16;DT_BF16;INT64,NCL;NCL;NCL;NCL;ND;NCHW;NCHW;ND;NCL;ND,"""4096,2,512;4096,2,512;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16,ND;ND;ND;ND,0,750.605,27021778,343.983,0.458,586.708,0.782,266.304,0.355,392.522,0.523,584.432,0.779,0.004,749.913,53993736,318.436,0.425,188.508,0.251,207.668,0.277,152.634,0.204,0.006,99.143
+19,4294967295,61778,2,aclnnFlashAttentionScoreGrad_FlashAttentionScoreGrad_FlashAttentionScoreGrad,FlashAttentionScoreGrad,dynamic,MIX_AIC,"1736413971578144.095	",755.915,1.22,20,40,NO,"""4096,2,512;4096,2,512;4096,2,512;4096,2,512;4096,4096;2,4,4096,8;2,4,4096,8;;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16;BOOL;FLOAT;FLOAT;DT_BF16;DT_BF16;INT64,NCL;NCL;NCL;NCL;ND;NCHW;NCHW;ND;NCL;ND,"""4096,2,512;4096,2,512;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16,ND;ND;ND;ND,0,750.152,27005467,344.115,0.459,579.317,0.772,266.08,0.355,398.019,0.531,587.37,0.783,0.004,749.348,53953058,318.444,0.425,186.908,0.249,207.068,0.276,151.329,0.202,0.006,99.238
+19,4294967295,60763,2,aclnnFlashAttentionScore_FlashAttentionScore_FlashAttentionScore_varlen,FlashAttentionScore,dynamic,MIX_AIC,"1736413971424592.447	",405.228,1.35,20,40,NO,"""4096,2,511;4096,2,512;4096,2,512;;;;4096,4096;;;;;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16;UINT8;DT_BF16;BOOL;INT64;INT64;INT64;INT64;INT64,NCL;NCL;NCL;ND;ND;ND;ND;ND;ND;ND;ND;ND,"""2,3,4096,8;2,4,4096,8;;4096,2,512""",FLOAT;FLOAT;DT_BF16;DT_BF16,ND;ND;ND;ND,0,359.793,12952532,128.923,0.358,345.768,0.961,108.411,0.301,167.379,0.465,208.79,0.58,0.003,359.294,25869164,228.691,0.637,138.411,0.385,107.868,0.3,70.163,0.195,0.013,88.788
+19,4294967295,60683,2,aclnnAdd_AddAiCore_Add,Add,dynamic,AI_VECTOR_CORE,"1736413971412768.871	",26.78,0.485,40,0,NO,"""512,2,4096;512,2,4096""",DT_BF16;DT_BF16,NCL;NCL,"""512,2,4096""",DT_BF16,ND,N/A,0,0,0,0,0,0,0,0,0,0,0,0,0,24.19,1741674,5.986,0.247,1.352,0.056,20.363,0.842,3.195,0.132,0.027,0
+19,4294967295,60690,2,aclnnAdd_AddAiCore_Add,Add,dynamic,AI_VECTOR_CORE,"1736413971414677.549	",31.201,0.664,40,0,NO,"""512,2,4096;512,2,4096""",DT_BF16;DT_BF16,NCL;NCL,"""512,2,4096""",DT_BF16,ND,N/A,0,0,0,0,0,0,0,0,0,0,0,0,0,28.617,2060443,5.986,0.209,1.444,0.05,25.005,0.874,3.336,0.117,0.026,0
+19,4294967295,60711,2,aclnnAdd_AddAiCore_Add,Add,dynamic,AI_VECTOR_CORE,"1736413971416743.250	",27.021,1.246,40,0,NO,"""512,2,4096;512,2,4096""",DT_BF16;DT_BF16,NCL;NCL,"""512,2,4096""",DT_BF16,ND,N/A,0,0,0,0,0,0,0,0,0,0,0,0,0,24.304,1749862,5.986,0.246,1.258,0.052,20.424,0.84,3.23,0.133,0.027,0
+19,4294967295,60718,2,aclnnAdd_AddAiCore_Add,Add,dynamic,AI_VECTOR_CORE,"1736413971419318.962	",25.08,0.984,40,0,NO,"""512,2,4096;512,2,4096""",DT_BF16;DT_BF16,NCL;NCL,"""512,2,4096""",DT_BF16,ND,N/A,0,0,0,0,0,0,0,0,0,0,0,0,0,22.47,1617840,5.989,0.267,2.009,0.089,18.809,0.837,3.191,0.142,0.024,0
+19,4294967295,13907,2,aclnnAdd_AddAiCore_Add,Add,dynamic,AI_VECTOR_CORE,"1736413974268377.206	",1.38,31.48,1,0,NO,""";""",FLOAT;FLOAT,ND;ND,"""""",FLOAT,ND,N/A,0,0,0,0,0,0,0,0,0,0,0,0,0,0.883,1589,0.027,0.03,0.265,0.3,0.18,0.204,0.108,0.123,0.182,0
+19,4294967295,13910,2,aclnnAdd_AddAiCore_Add,Add,dynamic,AI_VECTOR_CORE,"1736413974268502.128	",1.46,17.48,1,0,NO,""";""",FLOAT;FLOAT,ND;ND,"""""",FLOAT,ND,N/A,0,0,0,0,0,0,0,0,0,0,0,0,0,0.948,1706,0.027,0.028,0.276,0.291,0.217,0.229,0.127,0.134,0.174,0
+19,4294967295,13913,2,aclnnAdd_AddAiCore_Add,Add,dynamic,AI_VECTOR_CORE,"1736413974268605.410	",1.5,0.09,1,0,NO,""";""",FLOAT;FLOAT,ND;ND,"""""",FLOAT,ND,N/A,0,0,0,0,0,0,0,0,0,0,0,0,0,0.96,1728,0.027,0.028,0.268,0.28,0.221,0.23,0.132,0.137,0.145,0
+19,4294967295,13916,2,aclnnAdd_AddAiCore_Add,Add,dynamic,AI_VECTOR_CORE,"1736413974268747.953	",1.58,28.28,1,0,NO,""";""",FLOAT;FLOAT,ND;ND,"""""",FLOAT,ND,N/A,0,0,0,0,0,0,0,0,0,0,0,0,0,1.107,1993,0.027,0.024,0.426,0.384,0.201,0.181,0.118,0.106,0.162,0
\ No newline at end of file
-- 
Gitee


From 3fd8aae7004492e4215e3a78d4fa12570dd193f4 Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Wed, 12 Feb 2025 09:13:14 +0800
Subject: [PATCH 59/72] =?UTF-8?q?Cube=E4=B8=8Efa=E7=AE=97=E5=AD=90?=
 =?UTF-8?q?=E5=86=85=E8=BD=B4=E5=88=A4=E6=96=AD=E6=96=B9=E6=B3=95=E6=8A=BD?=
 =?UTF-8?q?=E5=8F=96?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../ai_core_performance_checker.py            | 123 ++++++++++--------
 1 file changed, 66 insertions(+), 57 deletions(-)

diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index a3622ebdf..598c3690f 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -31,6 +31,8 @@ class AICorePerformanceChecker:
     """
     _CHECKER = "AICorePerformanceChecker"
     CUBE_OPERATOR_MEMORY_SIZE_MB = 100
+    INNER_AXIS_256 = 256
+    INNER_AXIS_128 = 128
 
     def __init__(self):
 
@@ -130,7 +132,7 @@ class AICorePerformanceChecker:
                 # 多的一维是 bias ，预先乘2
                 memory += int(shapes) * 2
                 continue
-            memory += reduce(lambda x, y: x*y, map(int, shapes.split(",")))
+            memory += reduce(lambda x, y: x * y, map(int, shapes.split(",")))
         memory += reduce(lambda x, y: x * y, map(int, output_shapes.split(",")))
 
         return memory * 2 / 1024 / 1024
@@ -173,22 +175,7 @@ class AICorePerformanceChecker:
             for shape in cube_dict[op]:
                 dtype = None
                 shape_duration = 0.
-                # 判断输入shape内轴是否为256的倍数
-                if (len(shape.split("-")[0].split(";")[0].split(","))) == 4:
-                    # NZ格式
-                    shapes = shape.split("-")[0].split(";")
-                    b = int(shapes[0].split(",")[1])
-                    c = int(shapes[0].split(",")[2])
-
-                    f = int(shapes[1].split(",")[1])
-                    g = int(shapes[1].split(",")[2])
-                    affinity_flag = (b * c % 256 == 0) and (f * g % 256 == 0)
-                else:
-                    # ND格式
-                    shapes = shape.split("-")[0].split(";")
-                    l = int(shapes[0].split(",")[1])
-                    k = int(shapes[1].split(",")[1])
-                    affinity_flag = (l % 256 == 0) and (k % 256 == 0)
+                affinity_flag = self.check_affinity(shape)
                 if not affinity_flag:
                     for operator in operator_list:
                         if (operator.op_name == op and
@@ -247,6 +234,24 @@ class AICorePerformanceChecker:
                 sorted(bound_queue, key=lambda x: x["duration"], reverse=True)[:5],
                 sorted(affinity_queue, key=lambda x: x["duration"], reverse=True)[:5]]
 
+    def _check_cube_inner_axis(self, shape):
+        # 判断输入shape内轴是否为256的倍数
+        if (len(shape.split("-")[0].split(";")[0].split(","))) == 4:
+            # NZ格式
+            shapes = shape.split("-")[0].split(";")
+            b = int(shapes[0].split(",")[1])
+            c = int(shapes[0].split(",")[2])
+
+            f = int(shapes[1].split(",")[1])
+            g = int(shapes[1].split(",")[2])
+            return (b * c % self.INNER_AXIS_256 == 0) and (f * g % self.INNER_AXIS_256 == 0)
+        else:
+            # ND格式
+            shapes = shape.split("-")[0].split(";")
+            l = int(shapes[0].split(",")[1])
+            k = int(shapes[1].split(",")[1])
+            return (l % self.INNER_AXIS_256 == 0) and (k % self.INNER_AXIS_256 == 0)
+
     def check_fa_operator(self, profiling_dataset: ProfilingDataset):
         fa_list = self.fa_list
         fa_dict = self.fa_dict
@@ -256,46 +261,7 @@ class AICorePerformanceChecker:
         # 不亲和算子筛选
         for op in fa_dict:
             for shape in fa_dict[op]:
-                affinity_flag = False
-                shape_duration = 0.
-                dtype = None
-                suggestion = ""
-                if "varlen" in op.lower():
-                    # 处理变长算子 如果不亲和则affinity_flag为False
-                    if int(shape.split("-")[0].split(";")[0].split(",")[2]) % 128 != 0:
-                        affinity_flag = True
-                        suggestion = self._FA_AFFINITY_DESC_TYPE1
-                        for operator in fa_list:
-                            if (operator.op_name == op and
-                                    operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape):
-                                shape_duration += float(operator.task_duration)
-                                dtype = operator.input_data_types
-                else:
-                    # 处理定长算子 如果不亲和则affinity_flag为False
-                    head_dim = 0
-                    seq_len = int(shape.split("-")[1].split(";")[0].split(",")[2])
-                    input_first_tensor = shape.split("-")[0].split(";")[0].split(",")
-                    if len(input_first_tensor) == 3:
-                        head_dim = int(input_first_tensor[2]) / int(shape.split("-")[1].split(";")[0].split(",")[1])
-                    else:
-                        head_dim = int(input_first_tensor[3])
-                    if head_dim % 128 != 0 and seq_len % 128 != 0:
-                        affinity_flag = True
-                        suggestion = self._FA_AFFINITY_DESC_TYPE3
-                    elif head_dim % 128 != 0:
-                        affinity_flag = True
-                        suggestion = self._FA_AFFINITY_DESC_TYPE1
-                    elif seq_len % 128 != 0:
-                        affinity_flag = True
-                        suggestion = self._FA_AFFINITY_DESC_TYPE2
-                    if affinity_flag:
-                        for operator in fa_list:
-                            if (operator.op_name == op and
-                                    operator.input_shapes[1:-1] + "-" +
-                                    operator.output_shapes[1:-1] == shape):
-                                shape_duration += float(operator.task_duration)
-                                dtype = operator.input_data_types
-
+                affinity_flag, dtype, shape_duration, suggestion = self.method_name(fa_list, op, shape)
                 if affinity_flag:
                     # 不亲和算子 计算耗时，加入affinity_queue
                     affinity_queue.append({
@@ -375,6 +341,49 @@ class AICorePerformanceChecker:
                 sorted(bound_queue, key=lambda x: x["duration"], reverse=True)[:5],
                 sorted(affinity_queue, key=lambda x: x["duration"], reverse=True)[:5]]
 
+    def _check_fa_inner_axis(self, fa_list, op, shape):
+        shape_duration = 0.
+        affinity_flag = False
+        dtype = None
+        suggestion = ""
+        if "varlen" in op.lower():
+            # 处理变长算子 如果不亲和则affinity_flag为False
+            inner_axis = int(shape.split("-")[0].split(";")[0].split(",")[2])
+            if inner_axis % self.INNER_AXIS_128 != 0:
+                affinity_flag = True
+                suggestion = self._FA_AFFINITY_DESC_TYPE1
+                for operator in fa_list:
+                    if (operator.op_name == op and
+                            operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape):
+                        shape_duration += float(operator.task_duration)
+                        dtype = operator.input_data_types
+        else:
+            # 处理定长算子 如果不亲和则affinity_flag为False
+            head_dim = 0
+            seq_len = int(shape.split("-")[1].split(";")[0].split(",")[2])
+            input_first_tensor = shape.split("-")[0].split(";")[0].split(",")
+            if len(input_first_tensor) == 3:
+                head_dim = int(input_first_tensor[2]) / int(shape.split("-")[1].split(";")[0].split(",")[1])
+            else:
+                head_dim = int(input_first_tensor[3])
+            if head_dim % self.INNER_AXIS_128 != 0 and seq_len % self.INNER_AXIS_128 != 0:
+                affinity_flag = True
+                suggestion = self._FA_AFFINITY_DESC_TYPE3
+            elif head_dim % self.INNER_AXIS_128 != 0:
+                affinity_flag = True
+                suggestion = self._FA_AFFINITY_DESC_TYPE1
+            elif seq_len % self.INNER_AXIS_128 != 0:
+                affinity_flag = True
+                suggestion = self._FA_AFFINITY_DESC_TYPE2
+            if affinity_flag:
+                for operator in fa_list:
+                    if (operator.op_name == op and
+                            operator.input_shapes[1:-1] + "-" +
+                            operator.output_shapes[1:-1] == shape):
+                        shape_duration += float(operator.task_duration)
+                        dtype = operator.input_data_types
+        return affinity_flag, dtype, shape_duration, suggestion
+
     def check_vector_operator(self, profiling_dataset: ProfilingDataset):
         vector_dict = self.vector_dict
         vector_list = []
-- 
Gitee


From 1902efa711e28f6a24c2600253f90fd841e33aea Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Wed, 12 Feb 2025 09:24:18 +0800
Subject: [PATCH 60/72] =?UTF-8?q?Cube=E4=B8=8Evector=E7=AE=97=E5=AD=90?=
 =?UTF-8?q?=E6=89=A9=E5=B1=95=E5=B9=B6=E6=8F=90=E5=8F=96=E6=94=B6=E9=9B=86?=
 =?UTF-8?q?=E7=AE=97=E5=AD=90=E5=88=97=E8=A1=A8?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../ai_core_performance_checker.py            | 36 ++++++++++++-------
 1 file changed, 24 insertions(+), 12 deletions(-)

diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index 598c3690f..baaa5e949 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -164,13 +164,11 @@ class AICorePerformanceChecker:
 
     def check_cube_operator(self, profiling_dataset: ProfilingDataset):
         cube_dict = self.cube_dict
+        suggestion = self._CUBE_AFFINITY_DESC
         optimization_queue = []
         bound_queue = []
         affinity_queue = []
-        operator_list = [op for op in profiling_dataset.op_summary.op_list
-                         if op.op_name in cube_dict
-                         and op.input_shapes[1:-1] + "-" + op.output_shapes[1:-1] in cube_dict[op.op_name]]
-        suggestion = self._CUBE_AFFINITY_DESC
+        operator_list = self._get_operator_list(cube_dict, profiling_dataset)
         for op in cube_dict:
             for shape in cube_dict[op]:
                 dtype = None
@@ -234,6 +232,16 @@ class AICorePerformanceChecker:
                 sorted(bound_queue, key=lambda x: x["duration"], reverse=True)[:5],
                 sorted(affinity_queue, key=lambda x: x["duration"], reverse=True)[:5]]
 
+    @staticmethod
+    def _get_operator_list(cube_dict, profiling_dataset):
+        operator_list = []
+        for op in profiling_dataset.op_summary.op_list:
+            if op.op_name in cube_dict:
+                key = op.input_shapes[1:-1] + "-" + op.output_shapes[1:-1]
+                if key in cube_dict[op.op_name]:
+                    operator_list.append(op)
+        return operator_list
+
     def _check_cube_inner_axis(self, shape):
         # 判断输入shape内轴是否为256的倍数
         if (len(shape.split("-")[0].split(";")[0].split(","))) == 4:
@@ -386,16 +394,9 @@ class AICorePerformanceChecker:
 
     def check_vector_operator(self, profiling_dataset: ProfilingDataset):
         vector_dict = self.vector_dict
-        vector_list = []
         optimization_queue = []
         bound_queue = []
-        vector_list.extend(
-            operator for op_name in vector_dict
-            for shape in vector_dict[op_name]
-            for operator in profiling_dataset.op_summary.op_list
-            if operator.op_name == op_name
-            and operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape
-        )
+        vector_list = self._get_vector_list(profiling_dataset, vector_dict)
         for op_name in vector_dict:
             for shape in vector_dict[op_name]:
                 aiv_vec_ratio, aiv_mte2_ratio, aiv_mte3_ratio, shape_duration, optimization = 0., 0., 0., 0., 0.
@@ -444,6 +445,17 @@ class AICorePerformanceChecker:
         return [sorted(optimization_queue, key=lambda x: x["optimization"], reverse=True)[:5],
                 sorted(bound_queue, key=lambda x: x["duration"], reverse=True)[:5]]
 
+    @staticmethod
+    def _get_vector_list(profiling_dataset, vector_dict):
+        vector_list = []
+        for op_name in vector_dict:
+            for shape in vector_dict[op_name]:
+                for operator in profiling_dataset.op_summary.op_list:
+                    if operator.op_name == op_name and operator.input_shapes[1:-1] + "-" + operator.output_shapes[
+                                                                                           1:-1] == shape:
+                        vector_list.extend([operator])
+        return vector_list
+
     def make_record(self, result: OptimizeResult):
         """
         make record for what and how to optimize
-- 
Gitee


From cec50141aaa01bc64497f6ec0cf3a9ee58c5475c Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Wed, 12 Feb 2025 09:41:30 +0800
Subject: [PATCH 61/72] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E9=99=A4=E9=9B=B6?=
 =?UTF-8?q?=E6=A0=A1=E9=AA=8C?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../ai_core_performance_checker.py            | 30 +++++++++++--------
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index baaa5e949..dcd87f1c4 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -192,8 +192,7 @@ class AICorePerformanceChecker:
                                  operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape]
                     shape_duration = sum(float(operator.task_duration) for operator in shap_list)
                     dtype = shap_list[0].input_data_types if shap_list else None
-                    aic_mac_ratio, aic_mte2_ratio = 0., 0.
-                    length = 0
+                    aic_mac_ratio, aic_mte2_ratio, length = 0., 0., 0
                     for operator in shap_list:
                         try:
                             aic_mac_ratio += float(operator.aic_mac_ratio)
@@ -201,10 +200,10 @@ class AICorePerformanceChecker:
                             length += 1
                         except ValueError:
                             continue
-                    if length == 0:
+                    aic_mac_ratio = self.safe_divide(aic_mac_ratio, length)
+                    aic_mte2_ratio = self.safe_divide(aic_mte2_ratio, length)
+                    if aic_mac_ratio is None or aic_mte2_ratio is None:
                         continue
-                    aic_mac_ratio = aic_mac_ratio / length
-                    aic_mte2_ratio = aic_mte2_ratio / length
                     bound = ""
                     optimization = 0.
                     if aic_mac_ratio >= 0.8 and aic_mte2_ratio >= 0.95:
@@ -296,10 +295,10 @@ class AICorePerformanceChecker:
                                     length += 1
                                 except ValueError:
                                     continue
-                        if length == 0:
+                        aic_fixpipe_ratio = self.safe_divide(aic_fixpipe_ratio, length)
+                        aic_mte2_ratio = self.safe_divide(aic_mte2_ratio, length)
+                        if aic_mte2_ratio is None or aic_fixpipe_ratio is None:
                             continue
-                        aic_fixpipe_ratio = aic_fixpipe_ratio / length
-                        aic_mte2_ratio = aic_mte2_ratio / length
                         if aic_mte2_ratio >= 0.8 and aic_fixpipe_ratio >= 0.75:
                             bound = "mte2_and_fixpipe_bound"
                         elif aic_mte2_ratio >= 0.8:
@@ -414,11 +413,11 @@ class AICorePerformanceChecker:
                             length += 1
                         except ValueError:
                             continue
-                if length == 0:
+                aiv_vec_ratio = self.safe_divide(aiv_vec_ratio,length)
+                aiv_mte2_ratio = self.safe_divide(aiv_mte2_ratio,length)
+                aiv_mte3_ratio = self.safe_divide(aiv_mte3_ratio,length)
+                if aiv_vec_ratio is None or aiv_mte2_ratio is None or aiv_mte3_ratio is None:
                     continue
-                aiv_vec_ratio = aiv_vec_ratio / length
-                aiv_mte2_ratio = aiv_mte2_ratio / length
-                aiv_mte2_ratio = aiv_mte2_ratio / length
                 if aiv_vec_ratio + aiv_mte2_ratio + aiv_mte3_ratio >= 0.9:
                     bound = "vec_mte2_mte3_bound"
                 elif aiv_mte2_ratio >= 0.7:
@@ -561,3 +560,10 @@ class AICorePerformanceChecker:
             logger.warning("Skip %s checker because of not containing input datas", self._CHECKER)
             return False
         return True
+
+    @staticmethod
+    def safe_divide(numerator, denominator):
+        if denominator == 0:
+            logger.warning("Warning: Division by zero is not allowed.")
+            return None
+        return numerator / denominator
-- 
Gitee


From a601933b53899387cc8fb9bad30ea59669bafa52 Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Wed, 12 Feb 2025 10:34:16 +0800
Subject: [PATCH 62/72] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E9=99=A4=E9=9B=B6?=
 =?UTF-8?q?=E6=A0=A1=E9=AA=8C=EF=BC=8C=E9=85=8D=E7=BD=AE=E6=96=87=E4=BB=B6?=
 =?UTF-8?q?=E4=BF=AE=E6=94=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../ai_core_performance_checker.py            | 13 +++----
 .../advisor/rules/cn/aicore_performance.yaml  | 34 ++++++++++++++++++-
 .../advisor/rules/en/aicore_performance.yaml  | 34 ++++++++++++++++++-
 3 files changed, 73 insertions(+), 8 deletions(-)

diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index dcd87f1c4..445b5f761 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -16,6 +16,7 @@ import logging
 import os
 from functools import reduce
 
+from debug.accuracy_tools.msprobe.core.common.utils import convert_tuple
 from profiler.advisor.dataset.profiling.profiling_dataset import ProfilingDataset
 from profiler.advisor.result.item import OptimizeItem, OptimizeRecord
 from profiler.advisor.result.result import OptimizeResult
@@ -318,10 +319,10 @@ class AICorePerformanceChecker:
                                     length += 1
                                 except ValueError:
                                     continue
-                        if length == 0:
+                        aiv_vec_ratio = self.safe_divide(aiv_vec_ratio, length)
+                        aic_mte2_ratio = self.safe_divide(aic_mte2_ratio, length)
+                        if aiv_vec_ratio is None or aic_mte2_ratio is None:
                             continue
-                        aiv_vec_ratio = aiv_vec_ratio / length
-                        aic_mte2_ratio = aic_mte2_ratio / length
                         if aic_mte2_ratio >= 0.8 and aiv_vec_ratio >= 0.75:
                             bound = "mte2_and_vec_bound"
                         elif aic_mte2_ratio >= 0.8:
@@ -413,9 +414,9 @@ class AICorePerformanceChecker:
                             length += 1
                         except ValueError:
                             continue
-                aiv_vec_ratio = self.safe_divide(aiv_vec_ratio,length)
-                aiv_mte2_ratio = self.safe_divide(aiv_mte2_ratio,length)
-                aiv_mte3_ratio = self.safe_divide(aiv_mte3_ratio,length)
+                aiv_vec_ratio = self.safe_divide(aiv_vec_ratio, length)
+                aiv_mte2_ratio = self.safe_divide(aiv_mte2_ratio, length)
+                aiv_mte3_ratio = self.safe_divide(aiv_mte3_ratio, length)
                 if aiv_vec_ratio is None or aiv_mte2_ratio is None or aiv_mte3_ratio is None:
                     continue
                 if aiv_vec_ratio + aiv_mte2_ratio + aiv_mte3_ratio >= 0.9:
diff --git a/profiler/advisor/rules/cn/aicore_performance.yaml b/profiler/advisor/rules/cn/aicore_performance.yaml
index 8d44aaab2..382de6db5 100644
--- a/profiler/advisor/rules/cn/aicore_performance.yaml
+++ b/profiler/advisor/rules/cn/aicore_performance.yaml
@@ -12,4 +12,36 @@ fa_affinity_desc_type3: "D和S均不能被128整除"
 suggestion: "请根据亲和性、bound类型或优化空间尝试分析筛选出来的算子"
 affinity_suggestion: "{op_name}算子 shape: {shape} dtype: {dtype} 有不亲和特征: {suggestion}\n"
 bound_suggestion: "{op_name}算子 shape: {shape} dtype: {dtype} bound类型为: {bound} bound\n"
-optimization_suggestion: "{op_name}算子 shape: {shape} dtype: {dtype} 疑似有性能优化空间,参考性能优化空间: {optimization}%\n"
\ No newline at end of file
+optimization_suggestion: "{op_name}算子 shape: {shape} dtype: {dtype} 疑似有性能优化空间,参考性能优化空间: {optimization}%\n"
+
+cube_operators:
+  - target: aic_mac_ratio
+    bound: mac
+    threshold_value: 0.8
+  - target: aic_mte2_ratio
+    bound: mte2
+    threshold_value: 0.95
+
+fa_operators:
+  - target: aic_mte2_ratio
+    bound: mac
+    threshold_value: 0.8
+  - target: aic_fixpipe_ratio
+    bound: fixpipe
+    threshold_value: 0.75
+  - target: aiv_vec_ratio
+    bound: vec
+    threshold_value: 0.75
+
+vector_operators:
+  - target: total
+    threshold_value: 0.9
+  - target: aiv_vec_ratio
+    bound: vec
+    threshold_value: 0.7
+  - target: aiv_mte2_ratio
+    bound: mte2
+    threshold_value: 0.7
+  - target: aiv_mte3_ratio
+    bound: mte3
+    threshold_value: 0.7
\ No newline at end of file
diff --git a/profiler/advisor/rules/en/aicore_performance.yaml b/profiler/advisor/rules/en/aicore_performance.yaml
index e85a919ab..cae3700b2 100644
--- a/profiler/advisor/rules/en/aicore_performance.yaml
+++ b/profiler/advisor/rules/en/aicore_performance.yaml
@@ -12,4 +12,36 @@ fa_affinity_desc_type3: "Neither D nor S is not divisible by 128"
 suggestion: "Please try to analyze the filtered operators based on affinity, bound type or optimization space"
 affinity_suggestion: "{op_name} Op shape: {shape} dtype: {dtype} with disaffection characteristics: {suggestion}\n"
 bound_suggestion: "{op_name} Op shape: {shape} dtype: {dtype} bound type: {bound} bound\n"
-optimization_suggestion: "{op_name} Op shape: {shape} dtype: {dtype} suspect there is room for performance optimization, refer to Performance Optimization Space: {optimization}%\n"
\ No newline at end of file
+optimization_suggestion: "{op_name} Op shape: {shape} dtype: {dtype} suspect there is room for performance optimization, refer to Performance Optimization Space: {optimization}%\n"
+
+cube_operators:
+  - target: aic_mac_ratio
+    bound: mac
+    threshold_value: 0.8
+  - target: aic_mte2_ratio
+    bound: mte2
+    threshold_value: 0.95
+
+fa_operators:
+  - target: aic_mte2_ratio
+    bound: mac
+    threshold_value: 0.8
+  - target: aic_fixpipe_ratio
+    bound: fixpipe
+    threshold_value: 0.75
+  - target: aiv_vec_ratio
+    bound: vec
+    threshold_value: 0.75
+
+vector_operators:
+  - target: total
+    threshold_value: 0.9
+  - target: aiv_vec_ratio
+    bound: vec
+    threshold_value: 0.7
+  - target: aiv_mte2_ratio
+    bound: mte2
+    threshold_value: 0.7
+  - target: aiv_mte3_ratio
+    bound: mte3
+    threshold_value: 0.7
\ No newline at end of file
-- 
Gitee


From af7d34f13a7d02c230582b0c132a85605b8ca5df Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Wed, 12 Feb 2025 11:27:38 +0800
Subject: [PATCH 63/72] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E8=A7=84=E5=88=99?=
 =?UTF-8?q?=E8=AE=BE=E7=BD=AE=EF=BC=8C=E5=B0=86=E9=98=88=E5=80=BC=E6=94=BE?=
 =?UTF-8?q?=E5=85=A5=E9=85=8D=E7=BD=AE=E6=96=87=E4=BB=B6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../ai_core_performance_checker.py            | 102 ++++++++++++------
 .../advisor/rules/cn/aicore_performance.yaml  |  18 ++--
 .../advisor/rules/en/aicore_performance.yaml  |  19 ++--
 3 files changed, 89 insertions(+), 50 deletions(-)

diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index 445b5f761..00e35c84d 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -16,7 +16,6 @@ import logging
 import os
 from functools import reduce
 
-from debug.accuracy_tools.msprobe.core.common.utils import convert_tuple
 from profiler.advisor.dataset.profiling.profiling_dataset import ProfilingDataset
 from profiler.advisor.result.item import OptimizeItem, OptimizeRecord
 from profiler.advisor.result.result import OptimizeResult
@@ -75,6 +74,9 @@ class AICorePerformanceChecker:
         self._AFFINITY_SUGGESTION = self.aicore_rules.get("affinity_suggestion")
         self._BOUND_SUGGESTION = self.aicore_rules.get("bound_suggestion")
         self._OPTI_SUGGESTION = self.aicore_rules.get("optimization_suggestion")
+        self._OPERATOR_RULES = {"cube_operators": self.aicore_rules.get("cube_operators"),
+                                "fa_operators": self.aicore_rules.get("fa_operators"),
+                                "vector_operators": self.aicore_rules.get("vector_operators")}
 
     def data_filter(self, profiling_dataset: ProfilingDataset):
         if not self.check_task_list(profiling_dataset):
@@ -207,14 +209,22 @@ class AICorePerformanceChecker:
                         continue
                     bound = ""
                     optimization = 0.
-                    if aic_mac_ratio >= 0.8 and aic_mte2_ratio >= 0.95:
-                        bound = "mac_and_mte2_bound"
-                    elif aic_mac_ratio >= 0.8:
-                        bound = "mac_bound"
-                    elif aic_mte2_ratio >= 0.95:
-                        bound = "mte2_bound"
+                    aic_mac_ratio_rule, aic_mte2_ratio_rule = None, None
+                    for operator_rule in self._OPERATOR_RULES["cube_operators"]:
+                        if operator_rule["target"] == "aic_mac_ratio":
+                            aic_mac_ratio_rule = operator_rule
+                        elif operator_rule["target"] == "aic_mte2_ratio":
+                            aic_mte2_ratio_rule = operator_rule
+                    if (aic_mac_ratio >= aic_mac_ratio_rule["threshold"]
+                            and aic_mte2_ratio >= aic_mte2_ratio_rule["threshold"]):
+                        bound = aic_mac_ratio_rule["bound"] + "_and_" + aic_mte2_ratio_rule["bound"] + "_bound"
+                    elif aic_mac_ratio >= aic_mte2_ratio_rule["threshold"]:
+                        bound = aic_mac_ratio_rule["bound"]
+                    elif aic_mte2_ratio >= aic_mte2_ratio_rule["threshold"]:
+                        bound = aic_mte2_ratio_rule["bound"]
                     else:
-                        optimization = round(max(0.8 - aic_mac_ratio, 0.95 - aic_mte2_ratio) * 100, 2)
+                        optimization = max(aic_mac_ratio["threshold"] - aic_mac_ratio,
+                                           aic_mte2_ratio_rule["threshold"] - aic_mte2_ratio)
                     if bound:
                         bound_queue.append({
                             "op_name": op,
@@ -227,7 +237,7 @@ class AICorePerformanceChecker:
                             "op_name": op,
                             "shape": shape.split("-")[0],
                             "dtype": dtype,
-                            "optimization": optimization})
+                            "optimization": round(optimization * 100, 2)})
         return [sorted(optimization_queue, key=lambda x: x["optimization"], reverse=True)[:5],
                 sorted(bound_queue, key=lambda x: x["duration"], reverse=True)[:5],
                 sorted(affinity_queue, key=lambda x: x["duration"], reverse=True)[:5]]
@@ -300,14 +310,22 @@ class AICorePerformanceChecker:
                         aic_mte2_ratio = self.safe_divide(aic_mte2_ratio, length)
                         if aic_mte2_ratio is None or aic_fixpipe_ratio is None:
                             continue
-                        if aic_mte2_ratio >= 0.8 and aic_fixpipe_ratio >= 0.75:
-                            bound = "mte2_and_fixpipe_bound"
-                        elif aic_mte2_ratio >= 0.8:
-                            bound = "mte2_bound"
-                        elif aiv_vec_ratio >= 0.75:
-                            bound = "vec_bound"
+                        aic_fixpipe_ratio_rule, aic_mte2_ratio_rule = None, None
+                        for rule in self._OPERATOR_RULES["fa_operators"]:
+                            if rule["target"] == "aic_fixpipe_ratio":
+                                aic_fixpipe_ratio_rule = rule
+                            elif rule["target"] == "aic_mte2_ratio":
+                                aic_mte2_ratio_rule = rule
+                        if (aic_mte2_ratio >= aic_mte2_ratio_rule["threshold"] and
+                                aic_fixpipe_ratio >= aic_fixpipe_ratio_rule["threshold"]):
+                            bound = aic_fixpipe_ratio_rule["bound"] + "_and_" + aic_mte2_ratio_rule["bound"] + "_bound"
+                        elif aic_mte2_ratio >= aic_mte2_ratio_rule["threshold"]:
+                            bound = aic_mte2_ratio_rule["bound"]
+                        elif aic_fixpipe_ratio >= aic_fixpipe_ratio_rule["threshold"]:
+                            bound = aic_fixpipe_ratio_rule["bound"]
                         else:
-                            optimization = max(0.8 - aic_mte2_ratio, 0.75 - aiv_vec_ratio)
+                            optimization = max(aic_fixpipe_ratio_rule["threshold"] - aic_fixpipe_ratio,
+                                               aic_mte2_ratio_rule["threshold"] - aic_mte2_ratio)
                     else:
                         for operator in fa_list:
                             if (operator.op_name == op and
@@ -323,14 +341,22 @@ class AICorePerformanceChecker:
                         aic_mte2_ratio = self.safe_divide(aic_mte2_ratio, length)
                         if aiv_vec_ratio is None or aic_mte2_ratio is None:
                             continue
-                        if aic_mte2_ratio >= 0.8 and aiv_vec_ratio >= 0.75:
-                            bound = "mte2_and_vec_bound"
-                        elif aic_mte2_ratio >= 0.8:
-                            bound = "mte2_bound"
-                        elif aiv_vec_ratio >= 0.75:
-                            bound = "vec_bound"
+                        aiv_vec_ratio_rule, aic_mte2_ratio_rule = None, None
+                        for rule in self._OPERATOR_RULES["fa_operators"]:
+                            if rule["target"] == "aiv_vec_ratio":
+                                aiv_vec_ratio_rule = rule
+                            elif rule["target"] == "aic_mte2_ratio":
+                                aic_mte2_ratio_rule = rule
+                        if (aic_mte2_ratio >= aic_mte2_ratio_rule["threshold"]
+                                and aiv_vec_ratio >= aiv_vec_ratio_rule["threshold"]):
+                            bound = aic_mte2_ratio_rule["bound"] + "_and_" + aiv_vec_ratio_rule["bound"] + "_bound"
+                        elif aic_mte2_ratio >= aic_mte2_ratio_rule["threshold"]:
+                            bound = aic_mte2_ratio_rule["bound"]
+                        elif aiv_vec_ratio >= aiv_vec_ratio_rule["threshold"]:
+                            bound = aiv_vec_ratio_rule["bound"]
                         else:
-                            optimization = max(0.8 - aic_mte2_ratio, 0.75 - aiv_vec_ratio)
+                            optimization = max(aiv_vec_ratio_rule["threshold"] - aiv_vec_ratio,
+                                               aic_mte2_ratio_rule["threshold"] - aic_mte2_ratio)
                     if bound:
                         bound_queue.append({
                             "op_name": op,
@@ -419,16 +445,28 @@ class AICorePerformanceChecker:
                 aiv_mte3_ratio = self.safe_divide(aiv_mte3_ratio, length)
                 if aiv_vec_ratio is None or aiv_mte2_ratio is None or aiv_mte3_ratio is None:
                     continue
-                if aiv_vec_ratio + aiv_mte2_ratio + aiv_mte3_ratio >= 0.9:
-                    bound = "vec_mte2_mte3_bound"
-                elif aiv_mte2_ratio >= 0.7:
-                    bound = "mte2_bound"
-                elif aiv_mte3_ratio >= 0.7:
-                    bound = "mte3_bound"
-                elif aiv_vec_ratio >= 0.7:
-                    bound = "vec_bound"
+                aiv_vec_ratio_rule, aiv_mte2_ratio_rule, aiv_mte3_ratio_rule, total_rule = None, None, None, None
+                for operator_rule in self._OPERATOR_RULES["vector_operators"]:
+                    if operator_rule["target"] == "aiv_vec_ratio":
+                        aiv_vec_ratio_rule = operator_rule
+                    elif operator_rule["target"] == "aic_mte2_ratio":
+                        aiv_mte2_ratio_rule = operator_rule
+                    elif operator_rule["target"] == "aic_mte3_ratio":
+                        aiv_mte3_ratio_rule = operator_rule
+                    elif operator_rule["target"] == "total":
+                        total_rule = operator_rule
+                if aiv_vec_ratio + aiv_mte2_ratio + aiv_mte3_ratio >= total_rule["threshold"]:
+                    bound = total_rule["bound"]
+                elif aiv_mte2_ratio >= aiv_mte2_ratio_rule["threshold"]:
+                    bound = aiv_mte2_ratio_rule["bound"]
+                elif aiv_mte3_ratio >= aiv_mte3_ratio_rule["threshold"]:
+                    bound = aiv_mte3_ratio_rule["bound"]
+                elif aiv_vec_ratio >= aiv_vec_ratio_rule["threshold"]:
+                    bound = aiv_vec_ratio_rule["bound"]
                 else:
-                    optimization = max(0.7 - aiv_vec_ratio, 0.7 - aiv_mte2_ratio, 0.7 - aiv_mte3_ratio)
+                    optimization = max(aiv_vec_ratio_rule["threshold"] - aiv_vec_ratio,
+                                       aiv_mte2_ratio_rule["threshold"] - aiv_mte2_ratio,
+                                       aiv_mte3_ratio_rule["threshold"] - aiv_mte3_ratio)
                 if bound:
                     bound_queue.append({
                         "op_name": op_name,
diff --git a/profiler/advisor/rules/cn/aicore_performance.yaml b/profiler/advisor/rules/cn/aicore_performance.yaml
index 382de6db5..f6fd914ac 100644
--- a/profiler/advisor/rules/cn/aicore_performance.yaml
+++ b/profiler/advisor/rules/cn/aicore_performance.yaml
@@ -17,31 +17,31 @@ optimization_suggestion: "{op_name}算子 shape: {shape} dtype: {dtype} 疑似
 cube_operators:
   - target: aic_mac_ratio
     bound: mac
-    threshold_value: 0.8
+    threshold: 0.8
   - target: aic_mte2_ratio
     bound: mte2
-    threshold_value: 0.95
+    threshold: 0.95
 
 fa_operators:
   - target: aic_mte2_ratio
     bound: mac
-    threshold_value: 0.8
+    threshold: 0.8
   - target: aic_fixpipe_ratio
     bound: fixpipe
-    threshold_value: 0.75
+    threshold: 0.75
   - target: aiv_vec_ratio
     bound: vec
-    threshold_value: 0.75
+    threshold: 0.75
 
 vector_operators:
   - target: total
-    threshold_value: 0.9
+    threshold: 0.9
   - target: aiv_vec_ratio
     bound: vec
-    threshold_value: 0.7
+    threshold: 0.7
   - target: aiv_mte2_ratio
     bound: mte2
-    threshold_value: 0.7
+    threshold: 0.7
   - target: aiv_mte3_ratio
     bound: mte3
-    threshold_value: 0.7
\ No newline at end of file
+    threshold: 0.7
\ No newline at end of file
diff --git a/profiler/advisor/rules/en/aicore_performance.yaml b/profiler/advisor/rules/en/aicore_performance.yaml
index cae3700b2..b1e5e4701 100644
--- a/profiler/advisor/rules/en/aicore_performance.yaml
+++ b/profiler/advisor/rules/en/aicore_performance.yaml
@@ -17,31 +17,32 @@ optimization_suggestion: "{op_name} Op shape: {shape} dtype: {dtype} suspect the
 cube_operators:
   - target: aic_mac_ratio
     bound: mac
-    threshold_value: 0.8
+    threshold: 0.8
   - target: aic_mte2_ratio
     bound: mte2
-    threshold_value: 0.95
+    threshold: 0.95
 
 fa_operators:
   - target: aic_mte2_ratio
     bound: mac
-    threshold_value: 0.8
+    threshold: 0.8
   - target: aic_fixpipe_ratio
     bound: fixpipe
-    threshold_value: 0.75
+    threshold: 0.75
   - target: aiv_vec_ratio
     bound: vec
-    threshold_value: 0.75
+    threshold: 0.75
 
 vector_operators:
   - target: total
-    threshold_value: 0.9
+    bound: vec_mte2_mte3
+    threshold: 0.9
   - target: aiv_vec_ratio
     bound: vec
-    threshold_value: 0.7
+    threshold: 0.7
   - target: aiv_mte2_ratio
     bound: mte2
-    threshold_value: 0.7
+    threshold: 0.7
   - target: aiv_mte3_ratio
     bound: mte3
-    threshold_value: 0.7
\ No newline at end of file
+    threshold: 0.7
\ No newline at end of file
-- 
Gitee


From f32c30bda3489fa385fdf246da4ffc2a2dd7b289 Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Wed, 12 Feb 2025 11:40:36 +0800
Subject: [PATCH 64/72] =?UTF-8?q?=E9=97=AE=E9=A2=98=E4=BF=AE=E5=A4=8D?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../ai_core_performance/ai_core_performance_checker.py        | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index 00e35c84d..4d51da450 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -176,7 +176,7 @@ class AICorePerformanceChecker:
             for shape in cube_dict[op]:
                 dtype = None
                 shape_duration = 0.
-                affinity_flag = self.check_affinity(shape)
+                affinity_flag = self._check_cube_inner_axis(shape)
                 if not affinity_flag:
                     for operator in operator_list:
                         if (operator.op_name == op and
@@ -279,7 +279,7 @@ class AICorePerformanceChecker:
         # 不亲和算子筛选
         for op in fa_dict:
             for shape in fa_dict[op]:
-                affinity_flag, dtype, shape_duration, suggestion = self.method_name(fa_list, op, shape)
+                affinity_flag, dtype, shape_duration, suggestion = self._check_fa_inner_axis(fa_list, op, shape)
                 if affinity_flag:
                     # 不亲和算子 计算耗时，加入affinity_queue
                     affinity_queue.append({
-- 
Gitee


From 89a5eeecb71f391249c5075bfaa2f2f32fdb1aba Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Wed, 12 Feb 2025 11:47:26 +0800
Subject: [PATCH 65/72] =?UTF-8?q?=E9=97=AE=E9=A2=98=E4=BF=AE=E5=A4=8D?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../ai_core_performance/ai_core_performance_checker.py          | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index 4d51da450..eaf6340cd 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -223,7 +223,7 @@ class AICorePerformanceChecker:
                     elif aic_mte2_ratio >= aic_mte2_ratio_rule["threshold"]:
                         bound = aic_mte2_ratio_rule["bound"]
                     else:
-                        optimization = max(aic_mac_ratio["threshold"] - aic_mac_ratio,
+                        optimization = max(aic_mac_ratio_rule["threshold"] - aic_mac_ratio,
                                            aic_mte2_ratio_rule["threshold"] - aic_mte2_ratio)
                     if bound:
                         bound_queue.append({
-- 
Gitee


From f5d71f751ebe5fdd67431e213f3ba6e67c088b46 Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Wed, 12 Feb 2025 11:49:26 +0800
Subject: [PATCH 66/72] =?UTF-8?q?=E9=97=AE=E9=A2=98=E4=BF=AE=E5=A4=8D?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 profiler/advisor/rules/cn/aicore_performance.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/profiler/advisor/rules/cn/aicore_performance.yaml b/profiler/advisor/rules/cn/aicore_performance.yaml
index f6fd914ac..3f60747b2 100644
--- a/profiler/advisor/rules/cn/aicore_performance.yaml
+++ b/profiler/advisor/rules/cn/aicore_performance.yaml
@@ -35,6 +35,7 @@ fa_operators:
 
 vector_operators:
   - target: total
+    bound: vec_mte2_mte3
     threshold: 0.9
   - target: aiv_vec_ratio
     bound: vec
-- 
Gitee


From ffe03877b86e44b239817c663162b58ed2c27148 Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Wed, 12 Feb 2025 11:59:39 +0800
Subject: [PATCH 67/72] =?UTF-8?q?=E9=97=AE=E9=A2=98=E4=BF=AE=E5=A4=8D?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../ai_core_performance/ai_core_performance_checker.py        | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index eaf6340cd..b58f734c6 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -449,9 +449,9 @@ class AICorePerformanceChecker:
                 for operator_rule in self._OPERATOR_RULES["vector_operators"]:
                     if operator_rule["target"] == "aiv_vec_ratio":
                         aiv_vec_ratio_rule = operator_rule
-                    elif operator_rule["target"] == "aic_mte2_ratio":
+                    elif operator_rule["target"] == "aiv_mte2_ratio":
                         aiv_mte2_ratio_rule = operator_rule
-                    elif operator_rule["target"] == "aic_mte3_ratio":
+                    elif operator_rule["target"] == "aiv_mte3_ratio":
                         aiv_mte3_ratio_rule = operator_rule
                     elif operator_rule["target"] == "total":
                         total_rule = operator_rule
-- 
Gitee


From f6c52443206feb83f8b2744afbdcdd124e947778 Mon Sep 17 00:00:00 2001
From: xubanxia <11655130+xubanxia@user.noreply.gitee.com>
Date: Wed, 12 Feb 2025 06:28:49 +0000
Subject: [PATCH 68/72] update
 profiler/test/ut/advisor/compute_advice/data/kernel_details.csv.

Signed-off-by: xubanxia <11655130+xubanxia@user.noreply.gitee.com>
---
 .../ut/advisor/compute_advice/data/kernel_details.csv  | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/profiler/test/ut/advisor/compute_advice/data/kernel_details.csv b/profiler/test/ut/advisor/compute_advice/data/kernel_details.csv
index 020178358..f22cb8008 100644
--- a/profiler/test/ut/advisor/compute_advice/data/kernel_details.csv
+++ b/profiler/test/ut/advisor/compute_advice/data/kernel_details.csv
@@ -15,10 +15,10 @@ Step Id,Model ID,Task ID,Stream ID,Name,Type,OP State,Accelerator Core,Start Tim
 19,4294967295,60707,2,aclnnFlashAttentionScore_FlashAttentionScore_FlashAttentionScore,FlashAttentionScore,dynamic,MIX_AIC,"1736413971415611.468	",406.128,1.279,20,40,NO,"""4096,2,512;4096,2,512;4096,2,512;;;;4096,4096;;;;;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16;UINT8;DT_BF16;BOOL;INT64;INT64;INT64;INT64;INT64,NCL;NCL;NCL;ND;ND;ND;ND;ND;ND;ND;ND;ND,"""2,4,4096,8;2,4,4096,8;;4096,2,512""",FLOAT;FLOAT;DT_BF16;DT_BF16,ND;ND;ND;ND,0,358.77,12915719,128.96,0.359,345.096,0.962,108.337,0.302,168.284,0.869,209.057,0.583,0.003,358.308,25798146,228.693,0.638,137.809,0.385,108.679,0.303,70.099,0.196,0.013,88.339
 19,4294967295,60735,2,aclnnFlashAttentionScore_FlashAttentionScore_FlashAttentionScore,FlashAttentionScore,dynamic,MIX_AIC,"1736413971420248.800	",407.008,0.84,20,40,NO,"""4096,2,512;4096,2,512;4096,2,512;;;;4096,4096;;;;;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16;UINT8;DT_BF16;BOOL;INT64;INT64;INT64;INT64;INT64,NCL;NCL;NCL;ND;ND;ND;ND;ND;ND;ND;ND;ND,"""2,4,4096,8;2,4,4096,8;;4096,2,512""",FLOAT;FLOAT;DT_BF16;DT_BF16,ND;ND;ND;ND,0,359.702,12949284,128.975,0.359,346.306,0.963,108.43,0.301,166.899,0.864,209.018,0.581,0.003,359.274,25867705,228.693,0.637,138.438,0.385,107.723,0.3,70.146,0.195,0.013,88.377
 19,4294967295,60763,2,aclnnFlashAttentionScore_FlashAttentionScore_FlashAttentionScore,FlashAttentionScore,dynamic,MIX_AIC,"1736413971424592.447	",405.228,1.35,20,40,NO,"""4096,2,512;4096,2,512;4096,2,512;;;;4096,4096;;;;;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16;UINT8;DT_BF16;BOOL;INT64;INT64;INT64;INT64;INT64,NCL;NCL;NCL;ND;ND;ND;ND;ND;ND;ND;ND;ND,"""2,4,4096,8;2,4,4096,8;;4096,2,512""",FLOAT;FLOAT;DT_BF16;DT_BF16,ND;ND;ND;ND,0,359.793,12952532,128.923,0.358,345.768,0.961,108.411,0.301,167.379,0.865,208.79,0.58,0.003,359.294,25869164,228.691,0.637,138.411,0.385,107.868,0.3,70.163,0.195,0.013,88.788
-19,4294967295,61655,2,aclnnFlashAttentionScoreGrad_FlashAttentionScoreGrad_FlashAttentionScoreGrad,FlashAttentionScoreGrad,dynamic,MIX_AIC,"1736413971559180.676	",762.215,1.37,20,40,NO,"""4096,2,512;4096,2,512;4096,2,512;4096,2,512;4096,4096;2,4,4096,8;2,4,4096,8;;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16;BOOL;FLOAT;FLOAT;DT_BF16;DT_BF16;INT64,NCL;NCL;NCL;NCL;ND;NCHW;NCHW;ND;NCL;ND,"""4096,2,512;4096,2,512;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16,ND;ND;ND;ND,0,755.664,27203907,344.023,0.455,592.472,0.784,266.388,0.353,397.091,0.525,589.726,0.78,0.004,755.04,54362915,318.452,0.422,184.623,0.245,206.78,0.274,152.973,0.203,0.006,99.141
-19,4294967295,61696,2,aclnnFlashAttentionScoreGrad_FlashAttentionScoreGrad_FlashAttentionScoreGrad,FlashAttentionScoreGrad,dynamic,MIX_AIC,"1736413971565420.821	",763.215,1.189,20,40,NO,"""4096,2,512;4096,2,512;4096,2,512;4096,2,512;4096,4096;2,4,4096,8;2,4,4096,8;;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16;BOOL;FLOAT;FLOAT;DT_BF16;DT_BF16;INT64,NCL;NCL;NCL;NCL;ND;NCHW;NCHW;ND;NCL;ND,"""4096,2,512;4096,2,512;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16,ND;ND;ND;ND,0,757.83,27281885,344.047,0.454,595.954,0.786,266.123,0.351,389.105,0.513,576.226,0.76,0.004,757.046,54507345,318.443,0.421,188.292,0.249,200.176,0.264,162.113,0.214,0.006,99.294
-19,4294967295,61737,2,aclnnFlashAttentionScoreGrad_FlashAttentionScoreGrad_FlashAttentionScoreGrad,FlashAttentionScoreGrad,dynamic,MIX_AIC,"1736413971571804.228	",757.095,0.88,20,40,NO,"""4096,2,512;4096,2,512;4096,2,512;4096,2,512;4096,4096;2,4,4096,8;2,4,4096,8;;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16;BOOL;FLOAT;FLOAT;DT_BF16;DT_BF16;INT64,NCL;NCL;NCL;NCL;ND;NCHW;NCHW;ND;NCL;ND,"""4096,2,512;4096,2,512;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16,ND;ND;ND;ND,0,750.605,27021778,343.983,0.458,586.708,0.782,266.304,0.355,392.522,0.523,584.432,0.779,0.004,749.913,53993736,318.436,0.425,188.508,0.251,207.668,0.277,152.634,0.204,0.006,99.143
-19,4294967295,61778,2,aclnnFlashAttentionScoreGrad_FlashAttentionScoreGrad_FlashAttentionScoreGrad,FlashAttentionScoreGrad,dynamic,MIX_AIC,"1736413971578144.095	",755.915,1.22,20,40,NO,"""4096,2,512;4096,2,512;4096,2,512;4096,2,512;4096,4096;2,4,4096,8;2,4,4096,8;;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16;BOOL;FLOAT;FLOAT;DT_BF16;DT_BF16;INT64,NCL;NCL;NCL;NCL;ND;NCHW;NCHW;ND;NCL;ND,"""4096,2,512;4096,2,512;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16,ND;ND;ND;ND,0,750.152,27005467,344.115,0.459,579.317,0.772,266.08,0.355,398.019,0.531,587.37,0.783,0.004,749.348,53953058,318.444,0.425,186.908,0.249,207.068,0.276,151.329,0.202,0.006,99.238
+19,4294967295,61655,2,aclnnFlashAttentionScoreGrad_FlashAttentionScoreGrad_FlashAttentionScoreGrad,FlashAttentionScoreGrad,dynamic,MIX_AIC,"1736413971559180.676	",762.215,1.37,20,40,NO,"""4096,2,512;4096,2,512;4096,2,512;4096,2,512;4096,4096;2,4,4096,8;2,4,4096,8;;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16;BOOL;FLOAT;FLOAT;DT_BF16;DT_BF16;INT64,NCL;NCL;NCL;NCL;ND;NCHW;NCHW;ND;NCL;ND,"""4096,2,512;4096,2,512;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16,ND;ND;ND;ND,0,755.664,27203907,344.023,0.455,592.472,0.784,266.388,0.353,397.091,0.525,589.726,0.525,0.004,755.04,54362915,318.452,0.422,184.623,0.245,206.78,0.274,152.973,0.203,0.006,99.141
+19,4294967295,61696,2,aclnnFlashAttentionScoreGrad_FlashAttentionScoreGrad_FlashAttentionScoreGrad,FlashAttentionScoreGrad,dynamic,MIX_AIC,"1736413971565420.821	",763.215,1.189,20,40,NO,"""4096,2,512;4096,2,512;4096,2,512;4096,2,512;4096,4096;2,4,4096,8;2,4,4096,8;;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16;BOOL;FLOAT;FLOAT;DT_BF16;DT_BF16;INT64,NCL;NCL;NCL;NCL;ND;NCHW;NCHW;ND;NCL;ND,"""4096,2,512;4096,2,512;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16,ND;ND;ND;ND,0,757.83,27281885,344.047,0.454,595.954,0.786,266.123,0.351,389.105,0.513,576.226,0.513,0.004,757.046,54507345,318.443,0.421,188.292,0.249,200.176,0.264,162.113,0.214,0.006,99.294
+19,4294967295,61737,2,aclnnFlashAttentionScoreGrad_FlashAttentionScoreGrad_FlashAttentionScoreGrad,FlashAttentionScoreGrad,dynamic,MIX_AIC,"1736413971571804.228	",757.095,0.88,20,40,NO,"""4096,2,512;4096,2,512;4096,2,512;4096,2,512;4096,4096;2,4,4096,8;2,4,4096,8;;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16;BOOL;FLOAT;FLOAT;DT_BF16;DT_BF16;INT64,NCL;NCL;NCL;NCL;ND;NCHW;NCHW;ND;NCL;ND,"""4096,2,512;4096,2,512;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16,ND;ND;ND;ND,0,750.605,27021778,343.983,0.458,586.708,0.782,266.304,0.355,392.522,0.523,584.432,0.523,0.004,749.913,53993736,318.436,0.425,188.508,0.251,207.668,0.277,152.634,0.204,0.006,99.143
+19,4294967295,61778,2,aclnnFlashAttentionScoreGrad_FlashAttentionScoreGrad_FlashAttentionScoreGrad,FlashAttentionScoreGrad,dynamic,MIX_AIC,"1736413971578144.095	",755.915,1.22,20,40,NO,"""4096,2,512;4096,2,512;4096,2,512;4096,2,512;4096,4096;2,4,4096,8;2,4,4096,8;;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16;BOOL;FLOAT;FLOAT;DT_BF16;DT_BF16;INT64,NCL;NCL;NCL;NCL;ND;NCHW;NCHW;ND;NCL;ND,"""4096,2,512;4096,2,512;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16,ND;ND;ND;ND,0,750.152,27005467,344.115,0.459,579.317,0.772,266.08,0.355,398.019,0.531,587.37,0.531,0.004,749.348,53953058,318.444,0.425,186.908,0.249,207.068,0.276,151.329,0.202,0.006,99.238
 19,4294967295,60763,2,aclnnFlashAttentionScore_FlashAttentionScore_FlashAttentionScore_varlen,FlashAttentionScore,dynamic,MIX_AIC,"1736413971424592.447	",405.228,1.35,20,40,NO,"""4096,2,511;4096,2,512;4096,2,512;;;;4096,4096;;;;;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16;UINT8;DT_BF16;BOOL;INT64;INT64;INT64;INT64;INT64,NCL;NCL;NCL;ND;ND;ND;ND;ND;ND;ND;ND;ND,"""2,3,4096,8;2,4,4096,8;;4096,2,512""",FLOAT;FLOAT;DT_BF16;DT_BF16,ND;ND;ND;ND,0,359.793,12952532,128.923,0.358,345.768,0.961,108.411,0.301,167.379,0.465,208.79,0.58,0.003,359.294,25869164,228.691,0.637,138.411,0.385,107.868,0.3,70.163,0.195,0.013,88.788
 19,4294967295,60683,2,aclnnAdd_AddAiCore_Add,Add,dynamic,AI_VECTOR_CORE,"1736413971412768.871	",26.78,0.485,40,0,NO,"""512,2,4096;512,2,4096""",DT_BF16;DT_BF16,NCL;NCL,"""512,2,4096""",DT_BF16,ND,N/A,0,0,0,0,0,0,0,0,0,0,0,0,0,24.19,1741674,5.986,0.247,1.352,0.056,20.363,0.842,3.195,0.132,0.027,0
 19,4294967295,60690,2,aclnnAdd_AddAiCore_Add,Add,dynamic,AI_VECTOR_CORE,"1736413971414677.549	",31.201,0.664,40,0,NO,"""512,2,4096;512,2,4096""",DT_BF16;DT_BF16,NCL;NCL,"""512,2,4096""",DT_BF16,ND,N/A,0,0,0,0,0,0,0,0,0,0,0,0,0,28.617,2060443,5.986,0.209,1.444,0.05,25.005,0.874,3.336,0.117,0.026,0
@@ -27,4 +27,4 @@ Step Id,Model ID,Task ID,Stream ID,Name,Type,OP State,Accelerator Core,Start Tim
 19,4294967295,13907,2,aclnnAdd_AddAiCore_Add,Add,dynamic,AI_VECTOR_CORE,"1736413974268377.206	",1.38,31.48,1,0,NO,""";""",FLOAT;FLOAT,ND;ND,"""""",FLOAT,ND,N/A,0,0,0,0,0,0,0,0,0,0,0,0,0,0.883,1589,0.027,0.03,0.265,0.3,0.18,0.204,0.108,0.123,0.182,0
 19,4294967295,13910,2,aclnnAdd_AddAiCore_Add,Add,dynamic,AI_VECTOR_CORE,"1736413974268502.128	",1.46,17.48,1,0,NO,""";""",FLOAT;FLOAT,ND;ND,"""""",FLOAT,ND,N/A,0,0,0,0,0,0,0,0,0,0,0,0,0,0.948,1706,0.027,0.028,0.276,0.291,0.217,0.229,0.127,0.134,0.174,0
 19,4294967295,13913,2,aclnnAdd_AddAiCore_Add,Add,dynamic,AI_VECTOR_CORE,"1736413974268605.410	",1.5,0.09,1,0,NO,""";""",FLOAT;FLOAT,ND;ND,"""""",FLOAT,ND,N/A,0,0,0,0,0,0,0,0,0,0,0,0,0,0.96,1728,0.027,0.028,0.268,0.28,0.221,0.23,0.132,0.137,0.145,0
-19,4294967295,13916,2,aclnnAdd_AddAiCore_Add,Add,dynamic,AI_VECTOR_CORE,"1736413974268747.953	",1.58,28.28,1,0,NO,""";""",FLOAT;FLOAT,ND;ND,"""""",FLOAT,ND,N/A,0,0,0,0,0,0,0,0,0,0,0,0,0,1.107,1993,0.027,0.024,0.426,0.384,0.201,0.181,0.118,0.106,0.162,0
\ No newline at end of file
+19,4294967295,13916,2,aclnnAdd_AddAiCore_Add,Add,dynamic,AI_VECTOR_CORE,"1736413974268747.953	",1.58,28.28,1,0,NO,""";""",FLOAT;FLOAT,ND;ND,"""""",FLOAT,ND,N/A,0,0,0,0,0,0,0,0,0,0,0,0,0,1.107,1993,0.027,0.024,0.426,0.384,0.201,0.181,0.118,0.106,0.162,0
-- 
Gitee


From 97ecae67dc91b58b00c8647b677873edade33b89 Mon Sep 17 00:00:00 2001
From: kiritorl <ruxinglong@huawei.com>
Date: Wed, 12 Feb 2025 22:49:58 +0800
Subject: [PATCH 69/72] =?UTF-8?q?=E6=8F=90=E5=8F=96make=5Frecord=E6=96=B9?=
 =?UTF-8?q?=E6=B3=95=E4=B8=AD=E7=9A=84=E5=8A=9F=E8=83=BD=E9=A1=B9=EF=BC=8C?=
 =?UTF-8?q?=E7=BC=A9=E5=87=8F=E6=96=B9=E6=B3=95=E5=86=85=E9=95=BF=E5=BA=A6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../ai_core_performance_checker.py            | 102 +++++++-----------
 1 file changed, 37 insertions(+), 65 deletions(-)

diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index b58f734c6..20fac2d92 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -494,6 +494,39 @@ class AICorePerformanceChecker:
                         vector_list.extend([operator])
         return vector_list
 
+    def draw_record(self, op_type: str, result: OptimizeResult):
+        suggestion_keys = ['opti', 'bound', 'affinity']
+        desc = dict.fromkeys(suggestion_keys, "")
+        problem_map = {
+            'cube': self._CUBE_PROBLEM,
+            'fa': self._FA_PROBLEM,
+            'vector': self._VECTOR_PROBLEM
+        }
+        optimization_item = OptimizeItem(problem_map[op_type], self.desc, [self.suggestion])
+        result.add(OptimizeRecord(optimization_item))
+        headers = [
+            "Type",
+            "Description and Suggestion",
+        ]
+        result.add_detail(problem_map[op_type], headers=headers)
+        for opti_issue in self.result[op_type][0]:
+            opti_sugg = self._OPTI_SUGGESTION.format(**opti_issue)
+            desc["opti"] += opti_sugg
+        if desc["opti"]:
+            result.add_detail(self._CUBE_PROBLEM, detail=[self._OPTI_DESC, desc["opti"]])
+        for bound_issue in self.result[op_type][1]:
+            bound_sugg = self._BOUND_SUGGESTION.format(**bound_issue)
+            desc["bound"] += bound_sugg
+        if desc["bound"]:
+            result.add_detail(self._CUBE_PROBLEM, detail=[self._BOUND_DESC, desc["bound"]])
+        if op_type == "vector": # vector 类型没有亲和性建议
+            return
+        for affinity_issue in self.result[op_type][2]:
+            affinity_sugg = self._AFFINITY_SUGGESTION.format(**affinity_issue)
+            desc["affinity"] += affinity_sugg
+        if desc["affinity"]:
+            result.add_detail(self._CUBE_PROBLEM, detail=[self._AFFINITY_DESC, desc["affinity"]])
+
     def make_record(self, result: OptimizeResult):
         """
         make record for what and how to optimize
@@ -501,76 +534,15 @@ class AICorePerformanceChecker:
         if not self.ai_core_performance_issues:
             return self.ai_core_performance_issues
 
-        suggestion_keys = ['opti', 'bound', 'affinity']
-        cube_desc = dict.fromkeys(suggestion_keys, "")
-        fa_desc = dict.fromkeys(suggestion_keys, "")
-        vector_desc = dict.fromkeys(suggestion_keys, "")
         if any(self.result["cube"]):
-            optimization_item = OptimizeItem(self._CUBE_PROBLEM, self.desc, [self.suggestion])
-            result.add(OptimizeRecord(optimization_item))
-            headers = [
-                "Type",
-                "Description and Suggestion",
-            ]
-            result.add_detail(self._CUBE_PROBLEM, headers=headers)
-            for cube_opti_issue in self.result["cube"][0]:
-                opti_sugg = self._OPTI_SUGGESTION.format(**cube_opti_issue)
-                cube_desc["opti"] += opti_sugg
-            if cube_desc["opti"]:
-                result.add_detail(self._CUBE_PROBLEM, detail=[self._OPTI_DESC, cube_desc["opti"]])
-            for cube_bound_issue in self.result["cube"][1]:
-                bound_sugg = self._BOUND_SUGGESTION.format(**cube_bound_issue)
-                cube_desc["bound"] += bound_sugg
-            if cube_desc["bound"]:
-                result.add_detail(self._CUBE_PROBLEM, detail=[self._BOUND_DESC, cube_desc["bound"]])
-            for cube_affinity_issue in self.result["cube"][2]:
-                affinity_sugg = self._AFFINITY_SUGGESTION.format(**cube_affinity_issue)
-                cube_desc["affinity"] += affinity_sugg
-            if cube_desc["affinity"]:
-                result.add_detail(self._CUBE_PROBLEM, detail=[self._AFFINITY_DESC, cube_desc["affinity"]])
+            self.draw_record("cube", result)
 
         if any(self.result["fa"]):
-            optimization_item = OptimizeItem(self._FA_PROBLEM, self.desc, [self.suggestion])
-            result.add(OptimizeRecord(optimization_item))
-            headers = [
-                "Type",
-                "Description and Suggestion",
-            ]
-            result.add_detail(self._FA_PROBLEM, headers=headers)
-            for fa_opti_issue in self.result["fa"][0]:
-                opti_sugg = self._OPTI_SUGGESTION.format(**fa_opti_issue)
-                fa_desc["opti"] += opti_sugg
-            if fa_desc["opti"]:
-                result.add_detail(self._FA_PROBLEM, detail=[self._OPTI_DESC, fa_desc["opti"]])
-            for fa_bound_issue in self.result["fa"][1]:
-                bound_sugg = self._BOUND_SUGGESTION.format(**fa_bound_issue)
-                fa_desc["bound"] += bound_sugg
-            if fa_desc["bound"]:
-                result.add_detail(self._FA_PROBLEM, detail=[self._BOUND_DESC, fa_desc["bound"]])
-            for fa_affinity_issue in self.result["fa"][2]:
-                affinity_sugg = self._AFFINITY_SUGGESTION.format(**fa_affinity_issue)
-                fa_desc["affinity"] += affinity_sugg
-            if fa_desc["affinity"]:
-                result.add_detail(self._FA_PROBLEM, detail=[self._AFFINITY_DESC, fa_desc["affinity"]])
+            self.draw_record("fa", result)
 
         if any(self.result["vector"]):
-            optimization_item = OptimizeItem(self._VECTOR_PROBLEM, self.desc, [self.suggestion])
-            result.add(OptimizeRecord(optimization_item))
-            headers = [
-                "Type",
-                "Description and Suggestion",
-            ]
-            result.add_detail(self._VECTOR_PROBLEM, headers=headers)
-            for vector_opti_issue in self.result["vector"][0]:
-                opti_sugg = self._OPTI_SUGGESTION.format(**vector_opti_issue)
-                vector_desc["opti"] += opti_sugg
-            if vector_desc["opti"]:
-                result.add_detail(self._VECTOR_PROBLEM, detail=[self._OPTI_DESC, vector_desc["opti"]])
-            for vector_bound_issue in self.result["vector"][1]:
-                bound_sugg = self._BOUND_SUGGESTION.format(**vector_bound_issue)
-                vector_desc["bound"] += bound_sugg
-            if vector_desc["bound"]:
-                result.add_detail(self._VECTOR_PROBLEM, detail=[self._BOUND_DESC, vector_desc["bound"]])
+            self.draw_record("vector", result)
+
         return True
 
     def make_render(self, html_render, add_render_list=True, **kwargs):
-- 
Gitee


From 7e5e5936afb2e1d972e1896ab047cebff1b6cc64 Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Thu, 13 Feb 2025 15:47:42 +0800
Subject: [PATCH 70/72] =?UTF-8?q?=E9=97=AE=E9=A2=98=E4=BF=AE=E5=A4=8D?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../ai_core_performance/ai_core_performance_checker.py        | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index 20fac2d92..e947b140d 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -518,14 +518,14 @@ class AICorePerformanceChecker:
             bound_sugg = self._BOUND_SUGGESTION.format(**bound_issue)
             desc["bound"] += bound_sugg
         if desc["bound"]:
-            result.add_detail(self._CUBE_PROBLEM, detail=[self._BOUND_DESC, desc["bound"]])
+            result.add_detail(self._FA_PROBLEM, detail=[self._BOUND_DESC, desc["bound"]])
         if op_type == "vector": # vector 类型没有亲和性建议
             return
         for affinity_issue in self.result[op_type][2]:
             affinity_sugg = self._AFFINITY_SUGGESTION.format(**affinity_issue)
             desc["affinity"] += affinity_sugg
         if desc["affinity"]:
-            result.add_detail(self._CUBE_PROBLEM, detail=[self._AFFINITY_DESC, desc["affinity"]])
+            result.add_detail(self._VECTOR_PROBLEM, detail=[self._AFFINITY_DESC, desc["affinity"]])
 
     def make_record(self, result: OptimizeResult):
         """
-- 
Gitee


From 863b681c903a3299c024a7ffc5ec0b6ebab65dbd Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Fri, 14 Feb 2025 10:22:52 +0800
Subject: [PATCH 71/72] =?UTF-8?q?=E9=97=AE=E9=A2=98=E4=BF=AE=E5=A4=8D?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../ai_core_performance/ai_core_performance_checker.py      | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index e947b140d..e3c3defc6 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -513,19 +513,19 @@ class AICorePerformanceChecker:
             opti_sugg = self._OPTI_SUGGESTION.format(**opti_issue)
             desc["opti"] += opti_sugg
         if desc["opti"]:
-            result.add_detail(self._CUBE_PROBLEM, detail=[self._OPTI_DESC, desc["opti"]])
+            result.add_detail(problem_map[op_type], detail=[self._OPTI_DESC, desc["opti"]])
         for bound_issue in self.result[op_type][1]:
             bound_sugg = self._BOUND_SUGGESTION.format(**bound_issue)
             desc["bound"] += bound_sugg
         if desc["bound"]:
-            result.add_detail(self._FA_PROBLEM, detail=[self._BOUND_DESC, desc["bound"]])
+            result.add_detail(problem_map[op_type], detail=[self._BOUND_DESC, desc["bound"]])
         if op_type == "vector": # vector 类型没有亲和性建议
             return
         for affinity_issue in self.result[op_type][2]:
             affinity_sugg = self._AFFINITY_SUGGESTION.format(**affinity_issue)
             desc["affinity"] += affinity_sugg
         if desc["affinity"]:
-            result.add_detail(self._VECTOR_PROBLEM, detail=[self._AFFINITY_DESC, desc["affinity"]])
+            result.add_detail(problem_map[op_type], detail=[self._AFFINITY_DESC, desc["affinity"]])
 
     def make_record(self, result: OptimizeResult):
         """
-- 
Gitee


From e418fac51c15fbd29832ce6658d1e1efb4d027ff Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Fri, 14 Feb 2025 15:51:10 +0800
Subject: [PATCH 72/72] =?UTF-8?q?=E7=9B=AE=E5=BD=95=E8=BF=81=E7=A7=BB?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../computation/ai_core_performance/__init__.py      |  0
 .../ai_core_performance_analyzer.py                  | 12 ++++++------
 .../ai_core_performance_checker.py                   |  0
 .../msprof_analyze/advisor/interface/interface.py    |  2 +-
 .../test_ai_core_performance_advice.py               |  4 ++--
 5 files changed, 9 insertions(+), 9 deletions(-)
 rename profiler/{ => msprof_analyze}/advisor/analyzer/computation/ai_core_performance/__init__.py (100%)
 rename profiler/{ => msprof_analyze}/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py (78%)
 rename profiler/{ => msprof_analyze}/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py (100%)

diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/__init__.py b/profiler/msprof_analyze/advisor/analyzer/computation/ai_core_performance/__init__.py
similarity index 100%
rename from profiler/advisor/analyzer/computation/ai_core_performance/__init__.py
rename to profiler/msprof_analyze/advisor/analyzer/computation/ai_core_performance/__init__.py
diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py b/profiler/msprof_analyze/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py
similarity index 78%
rename from profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py
rename to profiler/msprof_analyze/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py
index 89b6be779..a648fb074 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py
+++ b/profiler/msprof_analyze/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py
@@ -14,13 +14,13 @@
 # limitations under the License.
 import logging
 
-from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer
-from profiler.advisor.analyzer.computation.ai_core_performance.ai_core_performance_checker import \
+from profiler.msprof_analyze.advisor.analyzer.base_analyzer import BaseAnalyzer
+from profiler.msprof_analyze.advisor.analyzer.computation.ai_core_performance.ai_core_performance_checker import \
     AICorePerformanceChecker
-from profiler.advisor.dataset.profiling.profiling_dataset import ProfilingDataset
-from profiler.advisor.result.result import OptimizeResult
-from profiler.advisor.display.html.priority_background_color import PriorityBackgroundColor
-from profiler.advisor.display.html.render import HTMLRender
+from profiler.msprof_analyze.advisor.dataset.profiling.profiling_dataset import ProfilingDataset
+from profiler.msprof_analyze.advisor.result.result import OptimizeResult
+from profiler.msprof_analyze.advisor.display.html.priority_background_color import PriorityBackgroundColor
+from profiler.msprof_analyze.advisor.display.html.render import HTMLRender
 
 logger = logging.getLogger()
 
diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/msprof_analyze/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
similarity index 100%
rename from profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
rename to profiler/msprof_analyze/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
diff --git a/profiler/msprof_analyze/advisor/interface/interface.py b/profiler/msprof_analyze/advisor/interface/interface.py
index 30d9d0eef..cce2de625 100644
--- a/profiler/msprof_analyze/advisor/interface/interface.py
+++ b/profiler/msprof_analyze/advisor/interface/interface.py
@@ -44,7 +44,7 @@ from msprof_analyze.advisor.analyzer.schedule.gc.gc_analyzer import GcAnalyzer
 from msprof_analyze.advisor.analyzer.schedule.conjectured_gc.conjectured_gc_analyzer import ConjecturedGcAnalyzer
 from msprof_analyze.advisor.analyzer.comparison.comparison_analyzer import ComparisonAnalyzer
 from msprof_analyze.advisor.analyzer.schedule.fusible_ops.fusible_operator_analyzer import FusibleOperatorAnalyzer
-from profiler.advisor.analyzer.computation.ai_core_performance.ai_core_performance_analyzer import \
+from profiler.msprof_analyze.advisor.analyzer.computation.ai_core_performance import \
     AICorePerformanceAnalyzer
 
 logger = logging.getLogger()
diff --git a/profiler/msprof_analyze/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py b/profiler/msprof_analyze/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py
index 61ae35d13..e45f6ea3b 100644
--- a/profiler/msprof_analyze/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py
+++ b/profiler/msprof_analyze/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py
@@ -4,8 +4,8 @@ import shutil
 import stat
 
 import unittest
-from profiler.advisor.interface.interface import Interface
-from profiler.advisor.common.analyzer_scopes import SupportedScopes
+from profiler.msprof_analyze.advisor.interface.interface import Interface
+from profiler.msprof_analyze.advisor.common.analyzer_scopes import SupportedScopes
 
 
 class TestAICorePerformanceAdvice(unittest.TestCase):
-- 
Gitee