diff --git a/profiler/msprof_analyze/advisor/analyzer/computation/ai_core_performance/__init__.py b/profiler/msprof_analyze/advisor/analyzer/computation/ai_core_performance/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/profiler/msprof_analyze/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py b/profiler/msprof_analyze/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py
new file mode 100644
index 0000000000000000000000000000000000000000..a648fb074e9c443865e07cf44380abede307f317
--- /dev/null
+++ b/profiler/msprof_analyze/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py
@@ -0,0 +1,54 @@
+# Copyright (c) 2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+
+from profiler.msprof_analyze.advisor.analyzer.base_analyzer import BaseAnalyzer
+from profiler.msprof_analyze.advisor.analyzer.computation.ai_core_performance.ai_core_performance_checker import \
+ AICorePerformanceChecker
+from profiler.msprof_analyze.advisor.dataset.profiling.profiling_dataset import ProfilingDataset
+from profiler.msprof_analyze.advisor.result.result import OptimizeResult
+from profiler.msprof_analyze.advisor.display.html.priority_background_color import PriorityBackgroundColor
+from profiler.msprof_analyze.advisor.display.html.render import HTMLRender
+
+logger = logging.getLogger()
+
+
+class AICorePerformanceAnalyzer(BaseAnalyzer):
+ dataset_cls_list = [ProfilingDataset]
+
+ def __init__(self, collection_path, n_processes: int = 1, **kwargs) -> None:
+ super().__init__(collection_path, n_processes, **kwargs)
+ profiling_key = ProfilingDataset.get_key()
+ self.profiling_dataset = self.get_first_data_by_key(self.dataset_list, profiling_key)
+ self.result = OptimizeResult()
+ self.html_render = HTMLRender()
+ self.html = None
+
+ def optimize(self, **kwargs):
+ add_render_list = kwargs.get("add_render_list", True)
+ ai_core_perf_checker = AICorePerformanceChecker()
+ ai_core_perf_checker.data_filter(self.profiling_dataset)
+ if not ai_core_perf_checker.ai_core_performance_issues:
+ return self.result
+ ai_core_perf_checker.check_ai_core_performance(self.profiling_dataset)
+ ai_core_perf_checker.make_record(self.result)
+ self.html = ai_core_perf_checker.make_render(self.html_render,
+ add_render_list,
+ priority=self.get_priority(),
+ rank=kwargs.get("rank"))
+ return self.result
+
+ def get_priority(self, max_mem_op_dur=None):
+ return PriorityBackgroundColor.low
\ No newline at end of file
diff --git a/profiler/msprof_analyze/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/msprof_analyze/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
new file mode 100644
index 0000000000000000000000000000000000000000..e3c3defc667726ff8b370cd6ab57e52f7cc6c5e3
--- /dev/null
+++ b/profiler/msprof_analyze/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -0,0 +1,580 @@
+# Copyright (c) 2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+import os
+from functools import reduce
+
+from profiler.advisor.dataset.profiling.profiling_dataset import ProfilingDataset
+from profiler.advisor.result.item import OptimizeItem, OptimizeRecord
+from profiler.advisor.result.result import OptimizeResult
+from profiler.prof_common.additional_args_manager import AdditionalArgsManager
+from profiler.prof_common.file_manager import FileManager
+
+logger = logging.getLogger()
+
+
+class AICorePerformanceChecker:
+ """
+ operator performance checker
+ """
+ _CHECKER = "AICorePerformanceChecker"
+ CUBE_OPERATOR_MEMORY_SIZE_MB = 100
+ INNER_AXIS_256 = 256
+ INNER_AXIS_128 = 128
+
+ def __init__(self):
+
+ self.result = dict()
+ self.ai_core_performance_issues = False
+ self.desc = ""
+ self.cube_dict = {}
+ self.fa_dict = {}
+ self.fa_list = []
+ self.vector_dict = {}
+ self.load_aicore_perf_rules()
+
+ def load_aicore_perf_rules(self):
+ language = AdditionalArgsManager().language
+ rule_path = os.path.join(
+ os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))),
+ "rules",
+ language,
+ "aicore_performance.yaml"
+ )
+
+ if not os.path.exists(rule_path):
+ logger.warning("Skip analyze aicpu issues, because %s does not exist.", rule_path)
+
+ self.language = language
+ self.aicore_rules = FileManager.read_yaml_file(rule_path)
+ self._CUBE_PROBLEM = self.aicore_rules.get("cube_problem")
+ self._FA_PROBLEM = self.aicore_rules.get("fa_problem")
+ self._VECTOR_PROBLEM = self.aicore_rules.get("vector_problem")
+ self.desc = self.aicore_rules.get("description")
+ self._BOUND_DESC = self.aicore_rules.get("bound_description")
+ self._OPTI_DESC = self.aicore_rules.get("optimization_description")
+ self._AFFINITY_DESC = self.aicore_rules.get("affinity_description")
+ self._CUBE_AFFINITY_DESC = self.aicore_rules.get("cube_affinity_desc")
+ self._FA_AFFINITY_DESC_TYPE1 = self.aicore_rules.get("fa_affinity_desc_type1")
+ self._FA_AFFINITY_DESC_TYPE2 = self.aicore_rules.get("fa_affinity_desc_type2")
+ self._FA_AFFINITY_DESC_TYPE3 = self.aicore_rules.get("fa_affinity_desc_type3")
+ self.suggestion = self.aicore_rules.get("suggestion")
+ self._AFFINITY_SUGGESTION = self.aicore_rules.get("affinity_suggestion")
+ self._BOUND_SUGGESTION = self.aicore_rules.get("bound_suggestion")
+ self._OPTI_SUGGESTION = self.aicore_rules.get("optimization_suggestion")
+ self._OPERATOR_RULES = {"cube_operators": self.aicore_rules.get("cube_operators"),
+ "fa_operators": self.aicore_rules.get("fa_operators"),
+ "vector_operators": self.aicore_rules.get("vector_operators")}
+
+ def data_filter(self, profiling_dataset: ProfilingDataset):
+ if not self.check_task_list(profiling_dataset):
+ return
+
+ operator_list = profiling_dataset.op_summary.op_list
+ total_duration = sum(float(operator.task_duration) for operator in operator_list)
+ cube_memory_dict = {}
+ vector_type_dict = {}
+
+ for op in operator_list:
+ shapes = op.input_shapes[1:-1] + "-" + op.output_shapes[1:-1]
+ # preliminary filter cube operator
+ if op.task_type == "AI_CORE" and "matmul" in op.op_type.lower():
+ cube_memory_dict.setdefault(op.op_name, {}).setdefault(shapes, 0)
+ cube_memory_dict[op.op_name][shapes] += self.memory_size(op)
+ continue
+
+ # preliminary filter vector operator
+ if op.task_type in ["AI_VECTOR_CORE", "MIX_AIV"]:
+ vector_type_dict.setdefault(op.op_type, set()).add(op)
+ continue
+
+ # filter fa operator
+ if op.op_type == "FlashAttentionScore":
+ self.fa_dict.setdefault(op.op_name, set()).add(shapes)
+ self.fa_list.append(op)
+ elif op.op_type == "FlashAttentionScoreGrad":
+ self.fa_dict.setdefault(op.op_name, set()).add(shapes + "-grad")
+ self.fa_list.append(op)
+
+ # filter cube operator
+ for op_name in cube_memory_dict:
+ for shapes in cube_memory_dict[op_name]:
+ if cube_memory_dict[op_name][shapes] >= self.CUBE_OPERATOR_MEMORY_SIZE_MB:
+ self.cube_dict.setdefault(op_name, set()).add(shapes)
+
+ # filter vector operator
+ for op_type in vector_type_dict:
+ duration_group_by_time = sum(float(op.task_duration) for op in vector_type_dict[op_type])
+ if (duration_group_by_time / total_duration) >= 0.01 or duration_group_by_time >= 1000000:
+ for op in vector_type_dict[op_type]:
+ shapes = op.input_shapes[1:-1] + "-" + op.output_shapes[1:-1]
+ self.vector_dict.setdefault(op.op_name, set()).add(shapes)
+
+ if any([self.cube_dict, self.fa_dict, self.vector_dict]):
+ self.ai_core_performance_issues = True
+
+ @staticmethod
+ def memory_size(operator):
+ memory = 0
+ input_shapes = operator.input_shapes[1:-1].split(";")
+ output_shapes = operator.output_shapes[1:-1]
+ for shapes in input_shapes:
+ if not "," in shapes and shapes != "":
+ # 多的一维是 bias ,预先乘2
+ memory += int(shapes) * 2
+ continue
+ memory += reduce(lambda x, y: x * y, map(int, shapes.split(",")))
+ memory += reduce(lambda x, y: x * y, map(int, output_shapes.split(",")))
+
+ return memory * 2 / 1024 / 1024
+
+ def check_ai_core_performance(self, promoting_dataset: ProfilingDataset):
+ """
+ :Param profiling_dataset: dataset of operator performance from kernel_details.csv
+ """
+ try:
+ self.result["cube"] = self.check_cube_operator(promoting_dataset)
+ except (IndexError, ValueError, AttributeError) as e:
+ logger.error(f"Failed to check ai core performance cube operator, {e}.")
+ self.result["cube"] = []
+
+ try:
+ self.result["fa"] = self.check_fa_operator(promoting_dataset)
+ except (IndexError, ValueError, AttributeError) as e:
+ logger.error(f"Failed to check ai core performance fa operator, {e}.")
+ self.result["fa"] = []
+
+ try:
+ self.result["vector"] = self.check_vector_operator(promoting_dataset)
+ except (IndexError, ValueError, AttributeError) as e:
+ logger.error(f"Failed to check ai core performance vector operator, {e}.")
+ self.result["vector"] = []
+
+ if not any([self.result["cube"], self.result["fa"], self.result["vector"]]):
+ self.ai_core_performance_issues = False
+
+ def check_cube_operator(self, profiling_dataset: ProfilingDataset):
+ cube_dict = self.cube_dict
+ suggestion = self._CUBE_AFFINITY_DESC
+ optimization_queue = []
+ bound_queue = []
+ affinity_queue = []
+ operator_list = self._get_operator_list(cube_dict, profiling_dataset)
+ for op in cube_dict:
+ for shape in cube_dict[op]:
+ dtype = None
+ shape_duration = 0.
+ affinity_flag = self._check_cube_inner_axis(shape)
+ if not affinity_flag:
+ for operator in operator_list:
+ if (operator.op_name == op and
+ operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape):
+ dtype = operator.input_data_types
+ shape_duration += float(operator.task_duration)
+ affinity_queue.append({
+ "op_name": op,
+ "shape": shape.split("-")[0],
+ "dtype": dtype,
+ "duration": shape_duration,
+ "suggestion": suggestion})
+ else:
+ shap_list = [operator for operator in operator_list if
+ operator.op_name == op and
+ operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape]
+ shape_duration = sum(float(operator.task_duration) for operator in shap_list)
+ dtype = shap_list[0].input_data_types if shap_list else None
+ aic_mac_ratio, aic_mte2_ratio, length = 0., 0., 0
+ for operator in shap_list:
+ try:
+ aic_mac_ratio += float(operator.aic_mac_ratio)
+ aic_mte2_ratio += float(operator.aic_mte2_ratio)
+ length += 1
+ except ValueError:
+ continue
+ aic_mac_ratio = self.safe_divide(aic_mac_ratio, length)
+ aic_mte2_ratio = self.safe_divide(aic_mte2_ratio, length)
+ if aic_mac_ratio is None or aic_mte2_ratio is None:
+ continue
+ bound = ""
+ optimization = 0.
+ aic_mac_ratio_rule, aic_mte2_ratio_rule = None, None
+ for operator_rule in self._OPERATOR_RULES["cube_operators"]:
+ if operator_rule["target"] == "aic_mac_ratio":
+ aic_mac_ratio_rule = operator_rule
+ elif operator_rule["target"] == "aic_mte2_ratio":
+ aic_mte2_ratio_rule = operator_rule
+ if (aic_mac_ratio >= aic_mac_ratio_rule["threshold"]
+ and aic_mte2_ratio >= aic_mte2_ratio_rule["threshold"]):
+ bound = aic_mac_ratio_rule["bound"] + "_and_" + aic_mte2_ratio_rule["bound"] + "_bound"
+ elif aic_mac_ratio >= aic_mte2_ratio_rule["threshold"]:
+ bound = aic_mac_ratio_rule["bound"]
+ elif aic_mte2_ratio >= aic_mte2_ratio_rule["threshold"]:
+ bound = aic_mte2_ratio_rule["bound"]
+ else:
+ optimization = max(aic_mac_ratio_rule["threshold"] - aic_mac_ratio,
+ aic_mte2_ratio_rule["threshold"] - aic_mte2_ratio)
+ if bound:
+ bound_queue.append({
+ "op_name": op,
+ "shape": shape.split("-")[0],
+ "dtype": dtype,
+ "bound": bound,
+ "duration": shape_duration})
+ else:
+ optimization_queue.append({
+ "op_name": op,
+ "shape": shape.split("-")[0],
+ "dtype": dtype,
+ "optimization": round(optimization * 100, 2)})
+ return [sorted(optimization_queue, key=lambda x: x["optimization"], reverse=True)[:5],
+ sorted(bound_queue, key=lambda x: x["duration"], reverse=True)[:5],
+ sorted(affinity_queue, key=lambda x: x["duration"], reverse=True)[:5]]
+
+ @staticmethod
+ def _get_operator_list(cube_dict, profiling_dataset):
+ operator_list = []
+ for op in profiling_dataset.op_summary.op_list:
+ if op.op_name in cube_dict:
+ key = op.input_shapes[1:-1] + "-" + op.output_shapes[1:-1]
+ if key in cube_dict[op.op_name]:
+ operator_list.append(op)
+ return operator_list
+
+ def _check_cube_inner_axis(self, shape):
+ # 判断输入shape内轴是否为256的倍数
+ if (len(shape.split("-")[0].split(";")[0].split(","))) == 4:
+ # NZ格式
+ shapes = shape.split("-")[0].split(";")
+ b = int(shapes[0].split(",")[1])
+ c = int(shapes[0].split(",")[2])
+
+ f = int(shapes[1].split(",")[1])
+ g = int(shapes[1].split(",")[2])
+ return (b * c % self.INNER_AXIS_256 == 0) and (f * g % self.INNER_AXIS_256 == 0)
+ else:
+ # ND格式
+ shapes = shape.split("-")[0].split(";")
+ l = int(shapes[0].split(",")[1])
+ k = int(shapes[1].split(",")[1])
+ return (l % self.INNER_AXIS_256 == 0) and (k % self.INNER_AXIS_256 == 0)
+
+ def check_fa_operator(self, profiling_dataset: ProfilingDataset):
+ fa_list = self.fa_list
+ fa_dict = self.fa_dict
+ optimization_queue = []
+ bound_queue = []
+ affinity_queue = []
+ # 不亲和算子筛选
+ for op in fa_dict:
+ for shape in fa_dict[op]:
+ affinity_flag, dtype, shape_duration, suggestion = self._check_fa_inner_axis(fa_list, op, shape)
+ if affinity_flag:
+ # 不亲和算子 计算耗时,加入affinity_queue
+ affinity_queue.append({
+ "op_name": op,
+ "shape": shape.split("-")[0],
+ "dtype": dtype,
+ "suggestion": suggestion,
+ "duration": shape_duration})
+ else:
+ # 处理bound算子和优化算子
+ aiv_vec_ratio, aic_fixpipe_ratio, aic_mte2_ratio, optimization = 0., 0., 0., 0.
+ bound = ""
+ length = 0
+ if len(shape.split("-")) > 2:
+ for operator in fa_list:
+ if (operator.op_name == op and
+ operator.input_shapes[1:-1] + "-" +
+ operator.output_shapes[1:-1] + "-grad" == shape):
+ try:
+ aic_fixpipe_ratio += float(operator.aic_fixpipe_ratio)
+ aic_mte2_ratio += float(operator.aic_mte2_ratio)
+ shape_duration += float(operator.task_duration)
+ dtype = operator.input_data_types
+ length += 1
+ except ValueError:
+ continue
+ aic_fixpipe_ratio = self.safe_divide(aic_fixpipe_ratio, length)
+ aic_mte2_ratio = self.safe_divide(aic_mte2_ratio, length)
+ if aic_mte2_ratio is None or aic_fixpipe_ratio is None:
+ continue
+ aic_fixpipe_ratio_rule, aic_mte2_ratio_rule = None, None
+ for rule in self._OPERATOR_RULES["fa_operators"]:
+ if rule["target"] == "aic_fixpipe_ratio":
+ aic_fixpipe_ratio_rule = rule
+ elif rule["target"] == "aic_mte2_ratio":
+ aic_mte2_ratio_rule = rule
+ if (aic_mte2_ratio >= aic_mte2_ratio_rule["threshold"] and
+ aic_fixpipe_ratio >= aic_fixpipe_ratio_rule["threshold"]):
+ bound = aic_fixpipe_ratio_rule["bound"] + "_and_" + aic_mte2_ratio_rule["bound"] + "_bound"
+ elif aic_mte2_ratio >= aic_mte2_ratio_rule["threshold"]:
+ bound = aic_mte2_ratio_rule["bound"]
+ elif aic_fixpipe_ratio >= aic_fixpipe_ratio_rule["threshold"]:
+ bound = aic_fixpipe_ratio_rule["bound"]
+ else:
+ optimization = max(aic_fixpipe_ratio_rule["threshold"] - aic_fixpipe_ratio,
+ aic_mte2_ratio_rule["threshold"] - aic_mte2_ratio)
+ else:
+ for operator in fa_list:
+ if (operator.op_name == op and
+ operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape):
+ try:
+ aiv_vec_ratio += float(operator.aiv_vec_ratio)
+ aic_mte2_ratio += float(operator.aic_mte2_ratio)
+ shape_duration += float(operator.task_duration)
+ length += 1
+ except ValueError:
+ continue
+ aiv_vec_ratio = self.safe_divide(aiv_vec_ratio, length)
+ aic_mte2_ratio = self.safe_divide(aic_mte2_ratio, length)
+ if aiv_vec_ratio is None or aic_mte2_ratio is None:
+ continue
+ aiv_vec_ratio_rule, aic_mte2_ratio_rule = None, None
+ for rule in self._OPERATOR_RULES["fa_operators"]:
+ if rule["target"] == "aiv_vec_ratio":
+ aiv_vec_ratio_rule = rule
+ elif rule["target"] == "aic_mte2_ratio":
+ aic_mte2_ratio_rule = rule
+ if (aic_mte2_ratio >= aic_mte2_ratio_rule["threshold"]
+ and aiv_vec_ratio >= aiv_vec_ratio_rule["threshold"]):
+ bound = aic_mte2_ratio_rule["bound"] + "_and_" + aiv_vec_ratio_rule["bound"] + "_bound"
+ elif aic_mte2_ratio >= aic_mte2_ratio_rule["threshold"]:
+ bound = aic_mte2_ratio_rule["bound"]
+ elif aiv_vec_ratio >= aiv_vec_ratio_rule["threshold"]:
+ bound = aiv_vec_ratio_rule["bound"]
+ else:
+ optimization = max(aiv_vec_ratio_rule["threshold"] - aiv_vec_ratio,
+ aic_mte2_ratio_rule["threshold"] - aic_mte2_ratio)
+ if bound:
+ bound_queue.append({
+ "op_name": op,
+ "shape": shape.split("-")[0],
+ "dtype": dtype,
+ "bound": bound,
+ "duration": shape_duration})
+ else:
+ optimization_queue.append({
+ "op_name": op,
+ "shape": shape.split("-")[0],
+ "dtype": dtype,
+ "optimization": round(optimization * 100, 2)})
+
+ return [sorted(optimization_queue, key=lambda x: x["optimization"], reverse=True)[:5],
+ sorted(bound_queue, key=lambda x: x["duration"], reverse=True)[:5],
+ sorted(affinity_queue, key=lambda x: x["duration"], reverse=True)[:5]]
+
+ def _check_fa_inner_axis(self, fa_list, op, shape):
+ shape_duration = 0.
+ affinity_flag = False
+ dtype = None
+ suggestion = ""
+ if "varlen" in op.lower():
+ # 处理变长算子 如果不亲和则affinity_flag为False
+ inner_axis = int(shape.split("-")[0].split(";")[0].split(",")[2])
+ if inner_axis % self.INNER_AXIS_128 != 0:
+ affinity_flag = True
+ suggestion = self._FA_AFFINITY_DESC_TYPE1
+ for operator in fa_list:
+ if (operator.op_name == op and
+ operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape):
+ shape_duration += float(operator.task_duration)
+ dtype = operator.input_data_types
+ else:
+ # 处理定长算子 如果不亲和则affinity_flag为False
+ head_dim = 0
+ seq_len = int(shape.split("-")[1].split(";")[0].split(",")[2])
+ input_first_tensor = shape.split("-")[0].split(";")[0].split(",")
+ if len(input_first_tensor) == 3:
+ head_dim = int(input_first_tensor[2]) / int(shape.split("-")[1].split(";")[0].split(",")[1])
+ else:
+ head_dim = int(input_first_tensor[3])
+ if head_dim % self.INNER_AXIS_128 != 0 and seq_len % self.INNER_AXIS_128 != 0:
+ affinity_flag = True
+ suggestion = self._FA_AFFINITY_DESC_TYPE3
+ elif head_dim % self.INNER_AXIS_128 != 0:
+ affinity_flag = True
+ suggestion = self._FA_AFFINITY_DESC_TYPE1
+ elif seq_len % self.INNER_AXIS_128 != 0:
+ affinity_flag = True
+ suggestion = self._FA_AFFINITY_DESC_TYPE2
+ if affinity_flag:
+ for operator in fa_list:
+ if (operator.op_name == op and
+ operator.input_shapes[1:-1] + "-" +
+ operator.output_shapes[1:-1] == shape):
+ shape_duration += float(operator.task_duration)
+ dtype = operator.input_data_types
+ return affinity_flag, dtype, shape_duration, suggestion
+
+ def check_vector_operator(self, profiling_dataset: ProfilingDataset):
+ vector_dict = self.vector_dict
+ optimization_queue = []
+ bound_queue = []
+ vector_list = self._get_vector_list(profiling_dataset, vector_dict)
+ for op_name in vector_dict:
+ for shape in vector_dict[op_name]:
+ aiv_vec_ratio, aiv_mte2_ratio, aiv_mte3_ratio, shape_duration, optimization = 0., 0., 0., 0., 0.
+ length = 0
+ bound, dtype = "", ""
+ for operator in vector_list:
+ if (operator.op_name == op_name and
+ operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape):
+ try:
+ aiv_vec_ratio += float(operator.aiv_vec_ratio)
+ aiv_mte2_ratio += float(operator.aiv_mte2_ratio)
+ aiv_mte3_ratio += float(operator.aiv_mte3_ratio)
+ shape_duration += float(operator.task_duration)
+ dtype = operator.input_data_types
+ length += 1
+ except ValueError:
+ continue
+ aiv_vec_ratio = self.safe_divide(aiv_vec_ratio, length)
+ aiv_mte2_ratio = self.safe_divide(aiv_mte2_ratio, length)
+ aiv_mte3_ratio = self.safe_divide(aiv_mte3_ratio, length)
+ if aiv_vec_ratio is None or aiv_mte2_ratio is None or aiv_mte3_ratio is None:
+ continue
+ aiv_vec_ratio_rule, aiv_mte2_ratio_rule, aiv_mte3_ratio_rule, total_rule = None, None, None, None
+ for operator_rule in self._OPERATOR_RULES["vector_operators"]:
+ if operator_rule["target"] == "aiv_vec_ratio":
+ aiv_vec_ratio_rule = operator_rule
+ elif operator_rule["target"] == "aiv_mte2_ratio":
+ aiv_mte2_ratio_rule = operator_rule
+ elif operator_rule["target"] == "aiv_mte3_ratio":
+ aiv_mte3_ratio_rule = operator_rule
+ elif operator_rule["target"] == "total":
+ total_rule = operator_rule
+ if aiv_vec_ratio + aiv_mte2_ratio + aiv_mte3_ratio >= total_rule["threshold"]:
+ bound = total_rule["bound"]
+ elif aiv_mte2_ratio >= aiv_mte2_ratio_rule["threshold"]:
+ bound = aiv_mte2_ratio_rule["bound"]
+ elif aiv_mte3_ratio >= aiv_mte3_ratio_rule["threshold"]:
+ bound = aiv_mte3_ratio_rule["bound"]
+ elif aiv_vec_ratio >= aiv_vec_ratio_rule["threshold"]:
+ bound = aiv_vec_ratio_rule["bound"]
+ else:
+ optimization = max(aiv_vec_ratio_rule["threshold"] - aiv_vec_ratio,
+ aiv_mte2_ratio_rule["threshold"] - aiv_mte2_ratio,
+ aiv_mte3_ratio_rule["threshold"] - aiv_mte3_ratio)
+ if bound:
+ bound_queue.append({
+ "op_name": op_name,
+ "shape": shape.split("-")[0],
+ "bound": bound,
+ "dtype": dtype,
+ "duration": shape_duration})
+ else:
+ optimization_queue.append({
+ "op_name": op_name,
+ "shape": shape.split("-")[0],
+ "dtype": dtype,
+ "optimization": round(optimization * 100, 2)})
+ return [sorted(optimization_queue, key=lambda x: x["optimization"], reverse=True)[:5],
+ sorted(bound_queue, key=lambda x: x["duration"], reverse=True)[:5]]
+
+ @staticmethod
+ def _get_vector_list(profiling_dataset, vector_dict):
+ vector_list = []
+ for op_name in vector_dict:
+ for shape in vector_dict[op_name]:
+ for operator in profiling_dataset.op_summary.op_list:
+ if operator.op_name == op_name and operator.input_shapes[1:-1] + "-" + operator.output_shapes[
+ 1:-1] == shape:
+ vector_list.extend([operator])
+ return vector_list
+
+ def draw_record(self, op_type: str, result: OptimizeResult):
+ suggestion_keys = ['opti', 'bound', 'affinity']
+ desc = dict.fromkeys(suggestion_keys, "")
+ problem_map = {
+ 'cube': self._CUBE_PROBLEM,
+ 'fa': self._FA_PROBLEM,
+ 'vector': self._VECTOR_PROBLEM
+ }
+ optimization_item = OptimizeItem(problem_map[op_type], self.desc, [self.suggestion])
+ result.add(OptimizeRecord(optimization_item))
+ headers = [
+ "Type",
+ "Description and Suggestion",
+ ]
+ result.add_detail(problem_map[op_type], headers=headers)
+ for opti_issue in self.result[op_type][0]:
+ opti_sugg = self._OPTI_SUGGESTION.format(**opti_issue)
+ desc["opti"] += opti_sugg
+ if desc["opti"]:
+ result.add_detail(problem_map[op_type], detail=[self._OPTI_DESC, desc["opti"]])
+ for bound_issue in self.result[op_type][1]:
+ bound_sugg = self._BOUND_SUGGESTION.format(**bound_issue)
+ desc["bound"] += bound_sugg
+ if desc["bound"]:
+ result.add_detail(problem_map[op_type], detail=[self._BOUND_DESC, desc["bound"]])
+ if op_type == "vector": # vector 类型没有亲和性建议
+ return
+ for affinity_issue in self.result[op_type][2]:
+ affinity_sugg = self._AFFINITY_SUGGESTION.format(**affinity_issue)
+ desc["affinity"] += affinity_sugg
+ if desc["affinity"]:
+ result.add_detail(problem_map[op_type], detail=[self._AFFINITY_DESC, desc["affinity"]])
+
+ def make_record(self, result: OptimizeResult):
+ """
+ make record for what and how to optimize
+ """
+ if not self.ai_core_performance_issues:
+ return self.ai_core_performance_issues
+
+ if any(self.result["cube"]):
+ self.draw_record("cube", result)
+
+ if any(self.result["fa"]):
+ self.draw_record("fa", result)
+
+ if any(self.result["vector"]):
+ self.draw_record("vector", result)
+
+ return True
+
+ def make_render(self, html_render, add_render_list=True, **kwargs):
+ if not self.ai_core_performance_issues:
+ return self.ai_core_performance_issues
+
+ priority = kwargs.get("priority")
+ return html_render.render_template(key="computation",
+ template_dir="templates",
+ template_name="ai_core_performance.html",
+ format_result=self.result,
+ language=self.language,
+ add_render_list=add_render_list,
+ priority_background_color=priority,
+ rank=kwargs.get("rank"))
+
+ def check_task_list(self, profiling_dataset: ProfilingDataset) -> bool:
+ if not hasattr(profiling_dataset, "op_summary"):
+ logger.warning("Skip %s checker because of not containing %s", self._CHECKER, "op summary")
+ return False
+ if not hasattr(profiling_dataset.op_summary, "op_list"):
+ logger.warning("Skip %s checker because of not containing %s", self._CHECKER, "op_list")
+ return False
+ if (not hasattr(profiling_dataset.op_summary.op_list[0], "input_shapes") or
+ not hasattr(profiling_dataset.op_summary.op_list[0], "input_data_types")):
+ logger.warning("Skip %s checker because of not containing input datas", self._CHECKER)
+ return False
+ return True
+
+ @staticmethod
+ def safe_divide(numerator, denominator):
+ if denominator == 0:
+ logger.warning("Warning: Division by zero is not allowed.")
+ return None
+ return numerator / denominator
diff --git a/profiler/msprof_analyze/advisor/common/analyzer_scopes.py b/profiler/msprof_analyze/advisor/common/analyzer_scopes.py
index 07ceef769440b39c93aeaaf15ded5ad99fc3f4b3..683dc575d02ef3def213001e66cd47d42d88d263 100644
--- a/profiler/msprof_analyze/advisor/common/analyzer_scopes.py
+++ b/profiler/msprof_analyze/advisor/common/analyzer_scopes.py
@@ -41,3 +41,4 @@ class SupportedScopes:
FUSIBLE_OPERATOR_ANALYSIS = "fusible_operator_analysis"
CONJECTURED_GC_ANALYSIS = "conjectured_analysis"
COMPARISON = "comparison"
+ AICORE_PERFORMANCE_ANALYSIS = "ai_core_performance_analysis"
diff --git a/profiler/msprof_analyze/advisor/display/html/templates/ai_core_performance.html b/profiler/msprof_analyze/advisor/display/html/templates/ai_core_performance.html
new file mode 100644
index 0000000000000000000000000000000000000000..77e5e0cb55200efdf5b854e03ac2844ddc631a8f
--- /dev/null
+++ b/profiler/msprof_analyze/advisor/display/html/templates/ai_core_performance.html
@@ -0,0 +1,159 @@
+{% if format_result|length > 0 %}
+
+
+
+ {% if language == "cn" %}
+ {% set title_ns = namespace(type='类别', desc='描述及建议', opti_set='性能优化算子集合', bound_set='bound算子集合', affinity_set='不亲和算子集合',
+ opti_refer=' 参考性能优化空间: ', bound_refer=' bound类型为: ', affinity_refer=' 不亲和类型为: ', title_desc='算子相关分析,参考如下: ') %}
+ {% else %}
+ {% set title_ns = namespace(type='Type', desc='Description and Suggestion', opti_set='set of performance optimization operators',
+ bound_set='set of bound operators', affinity_set='set of unaffine operators', opti_refer=' refer to Performance Optimization Space: ',
+ bound_refer=' bound type: ', affinity_refer=' type of disaffinity: ', title_desc=' Operator related analysis, referenced below: ') %}
+ {% endif %}
+ {% if format_result.cube[0]|length + format_result.cube[1]|length + format_result.cube[2]|length > 0 %}
+
MatMul{{ title_ns.title_desc }}
+
+
+
+ {{ title_ns.type }} |
+ {{ title_ns.desc }} |
+
+ {% set opti_ns = namespace(total_opti='') %}
+ {% for opti in format_result.cube[0] %}
+ {% if not loop.first %}
+ {% set opti_ns.total_opti = opti_ns.total_opti ~ "
" ~ opti.op_name ~ " operator shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ title_ns.opti_refer ~ opti.optimization ~ "%" %}
+ {% else %}
+ {% set opti_ns.total_opti = opti.op_name ~ " operator shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ title_ns.opti_refer ~ opti.optimization ~ "%" %}
+ {% endif %}
+ {% endfor %}
+ {% if opti_ns.total_opti|length > 0 %}
+
+ {{ title_ns.opti_set }} |
+ {{ opti_ns.total_opti | safe }} |
+
+ {% endif %}
+ {% set bound_ns = namespace(total_bound='') %}
+ {% for bound in format_result.cube[1] %}
+ {% if not loop.first %}
+ {% set bound_ns.total_bound = bound_ns.total_bound ~ "
" ~ bound.op_name ~ " operator shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ title_ns.bound_refer ~ bound.bound %}
+ {% else %}
+ {% set bound_ns.total_bound = bound.op_name ~ " operator shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ title_ns.bound_refer ~ bound.bound %}
+ {% endif %}
+ {% endfor %}
+ {% if bound_ns.total_bound|length > 0 %}
+
+ {{ title_ns.bound_set }} |
+ {{ bound_ns.total_bound | safe }} |
+
+ {% endif %}
+ {% set affinity_ns = namespace(total_affinity='') %}
+ {% for affinity in format_result.cube[2] %}
+ {% if not loop.first %}
+ {% set affinity_ns.total_affinity = affinity_ns.total_affinity ~ "
" ~ affinity.op_name ~ " operator shape: " ~ affinity.shape ~ " dtype: " ~ affinity.dtype ~ title_ns.affinity_refer ~ affinity.suggestion %}
+ {% else %}
+ {% set affinity_ns.total_affinity = affinity.op_name ~ " operator shape: " ~ affinity.shape ~ " dtype: " ~ affinity.dtype ~ title_ns.affinity_refer ~ affinity.suggestion %}
+ {% endif %}
+ {% endfor %}
+ {% if affinity_ns.total_affinity|length > 0 %}
+
+ {{ title_ns.affinity_set }} |
+ {{ affinity_ns.total_affinity | safe }} |
+
+ {% endif %}
+
+ {% endif %}
+
+ {% if format_result.fa[0]|length + format_result.fa[1]|length + format_result.fa[2]|length > 0 %}
+
FA{{ title_ns.title_desc }}
+
+
+
+ {{ title_ns.type }} |
+ {{ title_ns.desc }} |
+
+ {% set opti_ns = namespace(total_opti='') %}
+ {% for opti in format_result.fa[0] %}
+ {% if not loop.first %}
+ {% set opti_ns.total_opti = opti_ns.total_opti ~ "
" ~ opti.op_name ~ " operator shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ title_ns.opti_refer ~ opti.optimization ~ "%" %}
+ {% else %}
+ {% set opti_ns.total_opti = opti.op_name ~ " operator shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ title_ns.opti_refer ~ opti.optimization ~ "%" %}
+ {% endif %}
+ {% endfor %}
+ {% if opti_ns.total_opti|length > 0 %}
+
+ {{ title_ns.opti_set }} |
+ {{ opti_ns.total_opti | safe }} |
+
+ {% endif %}
+ {% set bound_ns = namespace(total_bound='') %}
+ {% for bound in format_result.fa[1] %}
+ {% if not loop.first %}
+ {% set bound_ns.total_bound = bound_ns.total_bound ~ "
" ~ bound.op_name ~ " operator shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ title_ns.bound_refer ~ bound.bound %}
+ {% else %}
+ {% set bound_ns.total_bound = bound.op_name ~ " operator shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ title_ns.bound_refer ~ bound.bound %}
+ {% endif %}
+ {% endfor %}
+ {% if bound_ns.total_bound|length > 0 %}
+
+ {{ title_ns.bound_set }} |
+ {{ bound_ns.total_bound | safe }} |
+
+ {% endif %}
+ {% set affinity_ns = namespace(total_affinity='') %}
+ {% for affinity in format_result.fa[2] %}
+ {% if not loop.first %}
+ {% set affinity_ns.total_affinity = affinity_ns.total_affinity ~ "
" ~ affinity.op_name ~ " operator shape: " ~ affinity.shape ~ " dtype: " ~ affinity.dtype ~ title_ns.affinity_refer ~ affinity.suggestion %}
+ {% else %}
+ {% set affinity_ns.total_affinity = affinity.op_name ~ " operator shape: " ~ affinity.shape ~ " dtype: " ~ affinity.dtype ~ title_ns.affinity_refer ~ affinity.suggestion %}
+ {% endif %}
+ {% endfor %}
+ {% if affinity_ns.total_affinity|length > 0 %}
+
+ {{ title_ns.affinity_set }} |
+ {{ affinity_ns.total_affinity | safe }} |
+
+ {% endif %}
+
+ {% endif %}
+
+ {% if format_result.vector[0]|length + format_result.vector[1]|length > 0 %}
+
Vector{{ title_ns.title_desc }}
+
+
+
+ {{ title_ns.type }} |
+ {{ title_ns.desc }} |
+
+ {% set opti_ns = namespace(total_opti='') %}
+ {% for opti in format_result.vector[0] %}
+ {% if not loop.first %}
+ {% set opti_ns.total_opti = opti_ns.total_opti ~ "
" ~ opti.op_name ~ " operator shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ title_ns.opti_refer ~ opti.optimization ~ "%" %}
+ {% else %}
+ {% set opti_ns.total_opti = opti.op_name ~ " operator shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ title_ns.opti_refer ~ opti.optimization ~ "%" %}
+ {% endif %}
+ {% endfor %}
+ {% if opti_ns.total_opti|length > 0 %}
+
+ {{ title_ns.opti_set }} |
+ {{ opti_ns.total_opti | safe }} |
+
+ {% endif %}
+ {% set bound_ns = namespace(total_bound='') %}
+ {% for bound in format_result.vector[1] %}
+ {% if not loop.first %}
+ {% set bound_ns.total_bound = bound_ns.total_bound ~ "
" ~ bound.op_name ~ " operator shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ title_ns.bound_refer ~ bound.bound %}
+ {% else %}
+ {% set bound_ns.total_bound = bound.op_name ~ " operator shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ title_ns.bound_refer ~ bound.bound %}
+ {% endif %}
+ {% endfor %}
+ {% if bound_ns.total_bound|length > 0 %}
+
+ {{ title_ns.bound_set }} |
+ {{ bound_ns.total_bound | safe }} |
+
+ {% endif %}
+
+ {% endif %}
+
+
+{% endif %}
\ No newline at end of file
diff --git a/profiler/msprof_analyze/advisor/interface/interface.py b/profiler/msprof_analyze/advisor/interface/interface.py
index b3afefee57c8c62030af17130f79413238588f8f..cce2de62522bacc0d9bd26b3b14821409871a5f4 100644
--- a/profiler/msprof_analyze/advisor/interface/interface.py
+++ b/profiler/msprof_analyze/advisor/interface/interface.py
@@ -44,6 +44,8 @@ from msprof_analyze.advisor.analyzer.schedule.gc.gc_analyzer import GcAnalyzer
from msprof_analyze.advisor.analyzer.schedule.conjectured_gc.conjectured_gc_analyzer import ConjecturedGcAnalyzer
from msprof_analyze.advisor.analyzer.comparison.comparison_analyzer import ComparisonAnalyzer
from msprof_analyze.advisor.analyzer.schedule.fusible_ops.fusible_operator_analyzer import FusibleOperatorAnalyzer
+from profiler.msprof_analyze.advisor.analyzer.computation.ai_core_performance import \
+ AICorePerformanceAnalyzer
logger = logging.getLogger()
@@ -74,7 +76,8 @@ class Interface:
SupportedScopes.OPERATOR_NO_BOUND_ANALYSIS: OperatorBoundAnalyzer,
SupportedScopes.BLOCK_DIM_ANALYSIS: BlockDimAnalyzer,
SupportedScopes.GRAPH: FusionOPAnalyzer,
- SupportedScopes.FREQ_ANALYSIS: AICoreFreqAnalyzer
+ SupportedScopes.FREQ_ANALYSIS: AICoreFreqAnalyzer,
+ SupportedScopes.AICORE_PERFORMANCE_ANALYSIS: AICorePerformanceAnalyzer
}),
COMMUNICATION: OrderedDict({SupportedScopes.PACKET: PacketAnalyzer,
SupportedScopes.COMMUNICATION_RETRANSMISSION_DETECTION: RDMARetransmissionAnalyzer,
diff --git a/profiler/msprof_analyze/advisor/rules/cn/aicore_performance.yaml b/profiler/msprof_analyze/advisor/rules/cn/aicore_performance.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3f60747b250a07d28b1aa33b9e2c05a06cac89e2
--- /dev/null
+++ b/profiler/msprof_analyze/advisor/rules/cn/aicore_performance.yaml
@@ -0,0 +1,48 @@
+cube_problem: "Cube算子性能分析"
+fa_problem: "FA算子性能分析"
+vector_problem: "Vector算子性能分析"
+description: "提供一些AICORE算子的参考瓶颈"
+bound_description: "bound算子集合"
+optimization_description: "性能优化算子集合"
+affinity_description: "不亲和算子集合"
+cube_affinity_desc: "内轴无法被256整除"
+fa_affinity_desc_type1: "D不能被128整除"
+fa_affinity_desc_type2: "S不能被128整除"
+fa_affinity_desc_type3: "D和S均不能被128整除"
+suggestion: "请根据亲和性、bound类型或优化空间尝试分析筛选出来的算子"
+affinity_suggestion: "{op_name}算子 shape: {shape} dtype: {dtype} 有不亲和特征: {suggestion}\n"
+bound_suggestion: "{op_name}算子 shape: {shape} dtype: {dtype} bound类型为: {bound} bound\n"
+optimization_suggestion: "{op_name}算子 shape: {shape} dtype: {dtype} 疑似有性能优化空间,参考性能优化空间: {optimization}%\n"
+
+cube_operators:
+ - target: aic_mac_ratio
+ bound: mac
+ threshold: 0.8
+ - target: aic_mte2_ratio
+ bound: mte2
+ threshold: 0.95
+
+fa_operators:
+ - target: aic_mte2_ratio
+ bound: mac
+ threshold: 0.8
+ - target: aic_fixpipe_ratio
+ bound: fixpipe
+ threshold: 0.75
+ - target: aiv_vec_ratio
+ bound: vec
+ threshold: 0.75
+
+vector_operators:
+ - target: total
+ bound: vec_mte2_mte3
+ threshold: 0.9
+ - target: aiv_vec_ratio
+ bound: vec
+ threshold: 0.7
+ - target: aiv_mte2_ratio
+ bound: mte2
+ threshold: 0.7
+ - target: aiv_mte3_ratio
+ bound: mte3
+ threshold: 0.7
\ No newline at end of file
diff --git a/profiler/msprof_analyze/advisor/rules/en/aicore_performance.yaml b/profiler/msprof_analyze/advisor/rules/en/aicore_performance.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b1e5e4701a383be1c990f2f00013c0ee3b0cd896
--- /dev/null
+++ b/profiler/msprof_analyze/advisor/rules/en/aicore_performance.yaml
@@ -0,0 +1,48 @@
+cube_problem: "Cube operator performance analysis"
+fa_problem: "FA operator performance analysis"
+vector_problem: "Vector operator performance analysis"
+description: "Provide some reference bottlenecks for the AICORE operator"
+bound_description: "set of bound operators"
+optimization_description: "set of performance optimization operators"
+affinity_description: "set of unaffine operators"
+cube_affinity_desc: "Then inner axis is not divisible by 256"
+fa_affinity_desc_type1: "D is not divisible by 128"
+fa_affinity_desc_type2: "S is not divisible by 128"
+fa_affinity_desc_type3: "Neither D nor S is not divisible by 128"
+suggestion: "Please try to analyze the filtered operators based on affinity, bound type or optimization space"
+affinity_suggestion: "{op_name} Op shape: {shape} dtype: {dtype} with disaffection characteristics: {suggestion}\n"
+bound_suggestion: "{op_name} Op shape: {shape} dtype: {dtype} bound type: {bound} bound\n"
+optimization_suggestion: "{op_name} Op shape: {shape} dtype: {dtype} suspect there is room for performance optimization, refer to Performance Optimization Space: {optimization}%\n"
+
+cube_operators:
+ - target: aic_mac_ratio
+ bound: mac
+ threshold: 0.8
+ - target: aic_mte2_ratio
+ bound: mte2
+ threshold: 0.95
+
+fa_operators:
+ - target: aic_mte2_ratio
+ bound: mac
+ threshold: 0.8
+ - target: aic_fixpipe_ratio
+ bound: fixpipe
+ threshold: 0.75
+ - target: aiv_vec_ratio
+ bound: vec
+ threshold: 0.75
+
+vector_operators:
+ - target: total
+ bound: vec_mte2_mte3
+ threshold: 0.9
+ - target: aiv_vec_ratio
+ bound: vec
+ threshold: 0.7
+ - target: aiv_mte2_ratio
+ bound: mte2
+ threshold: 0.7
+ - target: aiv_mte3_ratio
+ bound: mte3
+ threshold: 0.7
\ No newline at end of file
diff --git a/profiler/msprof_analyze/cli/entrance.py b/profiler/msprof_analyze/cli/entrance.py
index 534a9b133c7e60d1442cb290490a79e9256ce43d..6a185edb974ed137a34e2e82690c5286b0194b70 100644
--- a/profiler/msprof_analyze/cli/entrance.py
+++ b/profiler/msprof_analyze/cli/entrance.py
@@ -66,5 +66,4 @@ def msprof_analyze_cli(**kwargs):
msprof_analyze_cli.add_command(analyze_cli, name="advisor")
msprof_analyze_cli.add_command(compare_cli, name="compare")
msprof_analyze_cli.add_command(cluster_cli, name="cluster")
-msprof_analyze_cli.add_command(auto_complete_cli, name="auto-completion")
-
+msprof_analyze_cli.add_command(auto_complete_cli, name="auto-completion")
\ No newline at end of file
diff --git a/profiler/msprof_analyze/test/ut/advisor/compute_advice/data/kernel_details.csv b/profiler/msprof_analyze/test/ut/advisor/compute_advice/data/kernel_details.csv
new file mode 100644
index 0000000000000000000000000000000000000000..f22cb80080b52cfc828b1b1a67ec319f6147191e
--- /dev/null
+++ b/profiler/msprof_analyze/test/ut/advisor/compute_advice/data/kernel_details.csv
@@ -0,0 +1,30 @@
+Step Id,Model ID,Task ID,Stream ID,Name,Type,OP State,Accelerator Core,Start Time(us),Duration(us),Wait Time(us),Block Dim,Mix Block Dim,HF32 Eligible,Input Shapes,Input Data Types,Input Formats,Output Shapes,Output Data Types,Output Formats,Context ID,aicore_time(us),aic_total_cycles,aic_mac_time(us),aic_mac_ratio,aic_scalar_time(us),aic_scalar_ratio,aic_mte1_time(us),aic_mte1_ratio,aic_mte2_time(us),aic_mte2_ratio,aic_fixpipe_time(us),aic_fixpipe_ratio,aic_icache_miss_rate,aiv_time(us),aiv_total_cycles,aiv_vec_time(us),aiv_vec_ratio,aiv_scalar_time(us),aiv_scalar_ratio,aiv_mte2_time(us),aiv_mte2_ratio,aiv_mte3_time(us),aiv_mte3_ratio,aiv_icache_miss_rate,cube_utilization(%)
+19,4294967295,61653,2,aclnnMatmul_MatMulCommon_MatMulV2,MatMulV2,dynamic,AI_CORE,"1736413971558972.912 ",185.504,1.087,16,0,NO,"""81920,4096;8192,512""",DT_BF16;DT_BF16,ND;ND,"""4096,512""",DT_BF16,ND,N/A,183.87,5295467,151.425,0.824,88.03,0.479,119.148,0.648,177.314,0.964,5.736,0.031,0.001,0,0,0,0,0,0,0,0,0,0,0,79.295
+19,4294967295,61669,2,aclnnMatmul_MatMulV3Common_MatMulV3,MatMulV3,dynamic,AI_CORE,"1736413971560588.764 ",501.17,2.2,20,0,NO,"""81920,1536;8192,4096""",DT_BF16;DT_BF16,ND;ND,"""1536,4096""",DT_BF16,ND,N/A,478.701,17233251,356.349,0.744,118.087,0.247,296.009,0.618,452.112,0.944,35.833,0.075,0.001,0,0,0,0,0,0,0,0,0,0,0,95.517
+19,4294967295,61694,2,aclnnMatmul_MatMulCommon_MatMulV2,MatMulV2,dynamic,AI_CORE,"1736413971565213.257 ",186.823,1.178,16,0,NO,"""81920,4096;8192,512""",DT_BF16;DT_BF16,ND;ND,"""4096,512""",DT_BF16,ND,N/A,183.728,5291376,151.502,0.825,87.902,0.478,118.519,0.645,177.654,0.967,5.773,0.031,0.001,0,0,0,0,0,0,0,0,0,0,0,78.675
+19,4294967295,61710,2,aclnnMatmul_MatMulV3Common_MatMulV3,MatMulV3,dynamic,AI_CORE,"1736413971566843.489 ",516.991,2.33,20,0,NO,"""81920,1536;8192,4096""",DT_BF16;DT_BF16,ND;ND,"""1536,4096""",DT_BF16,ND,N/A,491.775,17703905,356.249,0.724,118.59,0.241,295.046,0.6,463.696,0.943,37.671,0.077,0.001,0,0,0,0,0,0,0,0,0,0,0,95.123
+19,4294967295,61735,2,aclnnMatmul_MatMulCommon_MatMulV2,MatMulV2,dynamic,AI_CORE,"1736413971571596.404 ",187.724,0.766,16,0,NO,"""81920,4096;8192,512""",DT_BF16;DT_BF16,ND;ND,"""4096,512""",DT_BF16,ND,N/A,184.904,5325221,151.489,0.819,87.893,0.475,118.63,0.642,178.815,0.967,5.77,0.031,0.001,0,0,0,0,0,0,0,0,0,0,0,78.798
+19,4294967295,61751,2,aclnnMatmul_MatMulV3Common_MatMulV3,MatMulV3,dynamic,AI_CORE,"1736413971573223.437 ",514.87,2.15,20,0,NO,"""81920,1536;8192,4096""",DT_BF16;DT_BF16,ND;ND,"""1536,4096""",DT_BF16,ND,N/A,486.931,17529512,356.117,0.731,118.847,0.244,295.529,0.607,457.002,0.939,37.938,0.078,0.001,0,0,0,0,0,0,0,0,0,0,0,94.574
+19,4294967295,61776,2,aclnnMatmul_MatMulCommon_MatMulV2,MatMulV2,dynamic,AI_CORE,"1736413971577931.851 ",190.544,1.367,16,0,NO,"""81920,4096;8192,512""",DT_BF16;DT_BF16,ND;ND,"""4096,512""",DT_BF16,ND,N/A,187.073,5387702,151.741,0.811,87.935,0.47,117.467,0.628,181.043,0.968,5.803,0.031,0.001,0,0,0,0,0,0,0,0,0,0,0,78.543
+19,4294967295,61792,2,aclnnMatmul_MatMulV3Common_MatMulV3,MatMulV3,dynamic,AI_CORE,"1736413971579566.403 ",504.071,2.28,20,0,NO,"""81920,1536;8192,4096""",DT_BF16;DT_BF16,ND;ND,"""1536,4096""",DT_BF16,ND,N/A,485.542,17479517,356.283,0.734,117.755,0.243,296.421,0.61,455.064,0.937,37.75,0.078,0.001,0,0,0,0,0,0,0,0,0,0,0,96.324
+19,4294967295,13792,2,aclnnMatmul_MatMulV3Common_MatMulV5,MatMulV3,dynamic,AI_CORE,"1736413974248200.543 ",521.31,2.22,20,0,NO,"""8192,15365;8192,4096""",DT_BF16;DT_BF16,ND;ND,"""1536,4096""",DT_BF16,ND,N/A,499.234,17972434,356.364,0.714,117.639,0.236,295.58,0.592,471.784,0.945,35.825,0.072,0.001,0,0,0,0,0,0,0,0,0,0,0,95.765
+19,4294967295,13792,2,aclnnMatmul_MatMulV3Common_MatMulV5,MatMulV3,dynamic,AI_CORE,"1736413974248200.543 ",521.31,2.22,20,0,NO,"""8192,15365;8192,4096""",DT_BF16;DT_BF16,ND;ND,"""1536,4096""",DT_BF16,ND,N/A,499.234,17972434,356.364,0.714,117.639,0.236,295.58,0.592,471.784,0.945,35.825,0.072,0.001,0,0,0,0,0,0,0,0,0,0,0,95.765
+19,4294967295,13792,2,aclnnMatmul_MatMulV3Common_MatMulV5,MatMulV3,dynamic,AI_CORE,"1736413974248200.543 ",521.31,2.22,20,0,NO,"""8192,15365;8192,4096""",DT_BF16;DT_BF16,ND;ND,"""1536,4096""",DT_BF16,ND,N/A,499.234,17972434,356.364,0.714,117.639,0.236,295.58,0.592,471.784,0.945,35.825,0.072,0.001,0,0,0,0,0,0,0,0,0,0,0,95.765
+19,4294967295,13792,2,aclnnMatmul_MatMulV3Common_MatMulV5,MatMulV3,dynamic,AI_CORE,"1736413974248200.543 ",521.31,2.22,20,0,NO,"""8192,15365;8192,4096""",DT_BF16;DT_BF16,ND;ND,"""1536,4096""",DT_BF16,ND,N/A,499.234,17972434,356.364,0.714,117.639,0.236,295.58,0.592,471.784,0.945,35.825,0.072,0.001,0,0,0,0,0,0,0,0,0,0,0,95.765
+19,4294967295,60679,2,aclnnFlashAttentionScore_FlashAttentionScore_FlashAttentionScore,FlashAttentionScore,dynamic,MIX_AIC,"1736413971411629.128 ",410.188,1.53,20,40,NO,"""4096,2,512;4096,2,512;4096,2,512;;;;4096,4096;;;;;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16;UINT8;DT_BF16;BOOL;INT64;INT64;INT64;INT64;INT64,NCL;NCL;NCL;ND;ND;ND;ND;ND;ND;ND;ND;ND,"""2,4,4096,8;2,4,4096,8;;4096,2,512""",FLOAT;FLOAT;DT_BF16;DT_BF16,ND;ND;ND;ND,0,366.147,13181275,129.055,0.352,352.275,0.962,108.364,0.296,172.86,0.872,216.141,0.59,0.003,365.782,26336326,228.687,0.625,137.979,0.377,118.603,0.324,71.448,0.195,0.013,89.263
+19,4294967295,60707,2,aclnnFlashAttentionScore_FlashAttentionScore_FlashAttentionScore,FlashAttentionScore,dynamic,MIX_AIC,"1736413971415611.468 ",406.128,1.279,20,40,NO,"""4096,2,512;4096,2,512;4096,2,512;;;;4096,4096;;;;;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16;UINT8;DT_BF16;BOOL;INT64;INT64;INT64;INT64;INT64,NCL;NCL;NCL;ND;ND;ND;ND;ND;ND;ND;ND;ND,"""2,4,4096,8;2,4,4096,8;;4096,2,512""",FLOAT;FLOAT;DT_BF16;DT_BF16,ND;ND;ND;ND,0,358.77,12915719,128.96,0.359,345.096,0.962,108.337,0.302,168.284,0.869,209.057,0.583,0.003,358.308,25798146,228.693,0.638,137.809,0.385,108.679,0.303,70.099,0.196,0.013,88.339
+19,4294967295,60735,2,aclnnFlashAttentionScore_FlashAttentionScore_FlashAttentionScore,FlashAttentionScore,dynamic,MIX_AIC,"1736413971420248.800 ",407.008,0.84,20,40,NO,"""4096,2,512;4096,2,512;4096,2,512;;;;4096,4096;;;;;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16;UINT8;DT_BF16;BOOL;INT64;INT64;INT64;INT64;INT64,NCL;NCL;NCL;ND;ND;ND;ND;ND;ND;ND;ND;ND,"""2,4,4096,8;2,4,4096,8;;4096,2,512""",FLOAT;FLOAT;DT_BF16;DT_BF16,ND;ND;ND;ND,0,359.702,12949284,128.975,0.359,346.306,0.963,108.43,0.301,166.899,0.864,209.018,0.581,0.003,359.274,25867705,228.693,0.637,138.438,0.385,107.723,0.3,70.146,0.195,0.013,88.377
+19,4294967295,60763,2,aclnnFlashAttentionScore_FlashAttentionScore_FlashAttentionScore,FlashAttentionScore,dynamic,MIX_AIC,"1736413971424592.447 ",405.228,1.35,20,40,NO,"""4096,2,512;4096,2,512;4096,2,512;;;;4096,4096;;;;;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16;UINT8;DT_BF16;BOOL;INT64;INT64;INT64;INT64;INT64,NCL;NCL;NCL;ND;ND;ND;ND;ND;ND;ND;ND;ND,"""2,4,4096,8;2,4,4096,8;;4096,2,512""",FLOAT;FLOAT;DT_BF16;DT_BF16,ND;ND;ND;ND,0,359.793,12952532,128.923,0.358,345.768,0.961,108.411,0.301,167.379,0.865,208.79,0.58,0.003,359.294,25869164,228.691,0.637,138.411,0.385,107.868,0.3,70.163,0.195,0.013,88.788
+19,4294967295,61655,2,aclnnFlashAttentionScoreGrad_FlashAttentionScoreGrad_FlashAttentionScoreGrad,FlashAttentionScoreGrad,dynamic,MIX_AIC,"1736413971559180.676 ",762.215,1.37,20,40,NO,"""4096,2,512;4096,2,512;4096,2,512;4096,2,512;4096,4096;2,4,4096,8;2,4,4096,8;;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16;BOOL;FLOAT;FLOAT;DT_BF16;DT_BF16;INT64,NCL;NCL;NCL;NCL;ND;NCHW;NCHW;ND;NCL;ND,"""4096,2,512;4096,2,512;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16,ND;ND;ND;ND,0,755.664,27203907,344.023,0.455,592.472,0.784,266.388,0.353,397.091,0.525,589.726,0.525,0.004,755.04,54362915,318.452,0.422,184.623,0.245,206.78,0.274,152.973,0.203,0.006,99.141
+19,4294967295,61696,2,aclnnFlashAttentionScoreGrad_FlashAttentionScoreGrad_FlashAttentionScoreGrad,FlashAttentionScoreGrad,dynamic,MIX_AIC,"1736413971565420.821 ",763.215,1.189,20,40,NO,"""4096,2,512;4096,2,512;4096,2,512;4096,2,512;4096,4096;2,4,4096,8;2,4,4096,8;;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16;BOOL;FLOAT;FLOAT;DT_BF16;DT_BF16;INT64,NCL;NCL;NCL;NCL;ND;NCHW;NCHW;ND;NCL;ND,"""4096,2,512;4096,2,512;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16,ND;ND;ND;ND,0,757.83,27281885,344.047,0.454,595.954,0.786,266.123,0.351,389.105,0.513,576.226,0.513,0.004,757.046,54507345,318.443,0.421,188.292,0.249,200.176,0.264,162.113,0.214,0.006,99.294
+19,4294967295,61737,2,aclnnFlashAttentionScoreGrad_FlashAttentionScoreGrad_FlashAttentionScoreGrad,FlashAttentionScoreGrad,dynamic,MIX_AIC,"1736413971571804.228 ",757.095,0.88,20,40,NO,"""4096,2,512;4096,2,512;4096,2,512;4096,2,512;4096,4096;2,4,4096,8;2,4,4096,8;;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16;BOOL;FLOAT;FLOAT;DT_BF16;DT_BF16;INT64,NCL;NCL;NCL;NCL;ND;NCHW;NCHW;ND;NCL;ND,"""4096,2,512;4096,2,512;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16,ND;ND;ND;ND,0,750.605,27021778,343.983,0.458,586.708,0.782,266.304,0.355,392.522,0.523,584.432,0.523,0.004,749.913,53993736,318.436,0.425,188.508,0.251,207.668,0.277,152.634,0.204,0.006,99.143
+19,4294967295,61778,2,aclnnFlashAttentionScoreGrad_FlashAttentionScoreGrad_FlashAttentionScoreGrad,FlashAttentionScoreGrad,dynamic,MIX_AIC,"1736413971578144.095 ",755.915,1.22,20,40,NO,"""4096,2,512;4096,2,512;4096,2,512;4096,2,512;4096,4096;2,4,4096,8;2,4,4096,8;;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16;BOOL;FLOAT;FLOAT;DT_BF16;DT_BF16;INT64,NCL;NCL;NCL;NCL;ND;NCHW;NCHW;ND;NCL;ND,"""4096,2,512;4096,2,512;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16,ND;ND;ND;ND,0,750.152,27005467,344.115,0.459,579.317,0.772,266.08,0.355,398.019,0.531,587.37,0.531,0.004,749.348,53953058,318.444,0.425,186.908,0.249,207.068,0.276,151.329,0.202,0.006,99.238
+19,4294967295,60763,2,aclnnFlashAttentionScore_FlashAttentionScore_FlashAttentionScore_varlen,FlashAttentionScore,dynamic,MIX_AIC,"1736413971424592.447 ",405.228,1.35,20,40,NO,"""4096,2,511;4096,2,512;4096,2,512;;;;4096,4096;;;;;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16;UINT8;DT_BF16;BOOL;INT64;INT64;INT64;INT64;INT64,NCL;NCL;NCL;ND;ND;ND;ND;ND;ND;ND;ND;ND,"""2,3,4096,8;2,4,4096,8;;4096,2,512""",FLOAT;FLOAT;DT_BF16;DT_BF16,ND;ND;ND;ND,0,359.793,12952532,128.923,0.358,345.768,0.961,108.411,0.301,167.379,0.465,208.79,0.58,0.003,359.294,25869164,228.691,0.637,138.411,0.385,107.868,0.3,70.163,0.195,0.013,88.788
+19,4294967295,60683,2,aclnnAdd_AddAiCore_Add,Add,dynamic,AI_VECTOR_CORE,"1736413971412768.871 ",26.78,0.485,40,0,NO,"""512,2,4096;512,2,4096""",DT_BF16;DT_BF16,NCL;NCL,"""512,2,4096""",DT_BF16,ND,N/A,0,0,0,0,0,0,0,0,0,0,0,0,0,24.19,1741674,5.986,0.247,1.352,0.056,20.363,0.842,3.195,0.132,0.027,0
+19,4294967295,60690,2,aclnnAdd_AddAiCore_Add,Add,dynamic,AI_VECTOR_CORE,"1736413971414677.549 ",31.201,0.664,40,0,NO,"""512,2,4096;512,2,4096""",DT_BF16;DT_BF16,NCL;NCL,"""512,2,4096""",DT_BF16,ND,N/A,0,0,0,0,0,0,0,0,0,0,0,0,0,28.617,2060443,5.986,0.209,1.444,0.05,25.005,0.874,3.336,0.117,0.026,0
+19,4294967295,60711,2,aclnnAdd_AddAiCore_Add,Add,dynamic,AI_VECTOR_CORE,"1736413971416743.250 ",27.021,1.246,40,0,NO,"""512,2,4096;512,2,4096""",DT_BF16;DT_BF16,NCL;NCL,"""512,2,4096""",DT_BF16,ND,N/A,0,0,0,0,0,0,0,0,0,0,0,0,0,24.304,1749862,5.986,0.246,1.258,0.052,20.424,0.84,3.23,0.133,0.027,0
+19,4294967295,60718,2,aclnnAdd_AddAiCore_Add,Add,dynamic,AI_VECTOR_CORE,"1736413971419318.962 ",25.08,0.984,40,0,NO,"""512,2,4096;512,2,4096""",DT_BF16;DT_BF16,NCL;NCL,"""512,2,4096""",DT_BF16,ND,N/A,0,0,0,0,0,0,0,0,0,0,0,0,0,22.47,1617840,5.989,0.267,2.009,0.089,18.809,0.837,3.191,0.142,0.024,0
+19,4294967295,13907,2,aclnnAdd_AddAiCore_Add,Add,dynamic,AI_VECTOR_CORE,"1736413974268377.206 ",1.38,31.48,1,0,NO,""";""",FLOAT;FLOAT,ND;ND,"""""",FLOAT,ND,N/A,0,0,0,0,0,0,0,0,0,0,0,0,0,0.883,1589,0.027,0.03,0.265,0.3,0.18,0.204,0.108,0.123,0.182,0
+19,4294967295,13910,2,aclnnAdd_AddAiCore_Add,Add,dynamic,AI_VECTOR_CORE,"1736413974268502.128 ",1.46,17.48,1,0,NO,""";""",FLOAT;FLOAT,ND;ND,"""""",FLOAT,ND,N/A,0,0,0,0,0,0,0,0,0,0,0,0,0,0.948,1706,0.027,0.028,0.276,0.291,0.217,0.229,0.127,0.134,0.174,0
+19,4294967295,13913,2,aclnnAdd_AddAiCore_Add,Add,dynamic,AI_VECTOR_CORE,"1736413974268605.410 ",1.5,0.09,1,0,NO,""";""",FLOAT;FLOAT,ND;ND,"""""",FLOAT,ND,N/A,0,0,0,0,0,0,0,0,0,0,0,0,0,0.96,1728,0.027,0.028,0.268,0.28,0.221,0.23,0.132,0.137,0.145,0
+19,4294967295,13916,2,aclnnAdd_AddAiCore_Add,Add,dynamic,AI_VECTOR_CORE,"1736413974268747.953 ",1.58,28.28,1,0,NO,""";""",FLOAT;FLOAT,ND;ND,"""""",FLOAT,ND,N/A,0,0,0,0,0,0,0,0,0,0,0,0,0,1.107,1993,0.027,0.024,0.426,0.384,0.201,0.181,0.118,0.106,0.162,0
diff --git a/profiler/msprof_analyze/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py b/profiler/msprof_analyze/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py
new file mode 100644
index 0000000000000000000000000000000000000000..e45f6ea3b83d5927df03e73b642959a473dc522d
--- /dev/null
+++ b/profiler/msprof_analyze/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py
@@ -0,0 +1,74 @@
+import csv
+import os
+import shutil
+import stat
+
+import unittest
+from profiler.msprof_analyze.advisor.interface.interface import Interface
+from profiler.msprof_analyze.advisor.common.analyzer_scopes import SupportedScopes
+
+
+class TestAICorePerformanceAdvice(unittest.TestCase):
+ TMP_DIR = "./ascend_pt"
+ OUTPUT_DIR = "./ascend_pt/ASCEND_PROFILER_OUTPUT"
+ interface = None
+ err_interface = None
+
+ def tearDown(self):
+ if os.path.exists(TestAICorePerformanceAdvice.TMP_DIR):
+ shutil.rmtree(TestAICorePerformanceAdvice.TMP_DIR)
+ self.clear_htmls()
+
+ def setUp(self):
+ if os.path.exists(TestAICorePerformanceAdvice.TMP_DIR):
+ shutil.rmtree(TestAICorePerformanceAdvice.TMP_DIR)
+ if not os.path.exists(TestAICorePerformanceAdvice.TMP_DIR):
+ os.makedirs(TestAICorePerformanceAdvice.TMP_DIR)
+ if not os.path.exists(TestAICorePerformanceAdvice.OUTPUT_DIR):
+ os.makedirs(TestAICorePerformanceAdvice.OUTPUT_DIR)
+ self.clear_htmls()
+
+ @classmethod
+ def clear_htmls(cls):
+ current_path = os.path.dirname(os.path.abspath(__file__))
+ for filename in os.listdir(current_path):
+ # 检查文件是否以“mstt”开头
+ if filename.startswith("mstt"):
+ # 构建文件的完整路径
+ file_path = os.path.join(current_path, filename)
+ # 删除文件
+ os.remove(file_path)
+
+ @classmethod
+ def copy_kernel_details(cls, path):
+ # Define source and destination paths
+ source_csv_path = f"./data/{path}"
+ destination_csv_path = f"{TestAICorePerformanceAdvice.OUTPUT_DIR}/kernel_details.csv"
+
+ # Check if source CSV file exists
+ if not os.path.exists(source_csv_path):
+ raise FileNotFoundError(f"test data file not found:{source_csv_path}")
+
+ # Ensure the output directory exists
+ if not os.path.exists(TestAICorePerformanceAdvice.OUTPUT_DIR):
+ os.makedirs(TestAICorePerformanceAdvice.OUTPUT_DIR)
+
+ # Copy the CSV file from source to destination
+ shutil.copyfile(source_csv_path, destination_csv_path)
+
+ def test_ai_core_performance_total(self):
+ file_path = "kernel_details.csv"
+ self.copy_kernel_details(file_path)
+ interface = Interface(profiling_path=self.TMP_DIR)
+ dimension = Interface.COMPUTATION
+ scope = SupportedScopes.AICORE_PERFORMANCE_ANALYSIS
+ result = interface.get_result(dimension, scope, render_html=1, output_dict=False, profiling_path=self.TMP_DIR)
+ self.assertLess(1, len(result.data.get("Cube算子性能分析").get("data")[0]))
+ self.assertLess(1, len(result.data.get("Cube算子性能分析").get("data")[1]))
+ self.assertLess(1, len(result.data.get("Cube算子性能分析").get("data")[2]))
+ self.assertLess(1, len(result.data.get("FA算子性能分析").get("data")[0]))
+ self.assertLess(1, len(result.data.get("FA算子性能分析").get("data")[1]))
+ self.assertLess(1, len(result.data.get("FA算子性能分析").get("data")[2]))
+ self.assertLess(1, len(result.data.get("Vector算子性能分析").get("data")[0]))
+ self.assertLess(1, len(result.data.get("Vector算子性能分析").get("data")[1]))
+ result.clear()