+ {% if language == "cn" %}
+ {% set title_ns = namespace(type='类别', desc='描述及建议', opti_set='性能优化算子集合', bound_set='bound算子集合', affinity_set='不亲和算子集合',
+ opti_refer=' 参考性能优化空间: ', bound_refer=' bound类型为: ', affinity_refer=' 不亲和类型为: ', title_desc='算子相关分析,参考如下: ') %}
+ {% else %}
+ {% set title_ns = namespace(type='Type', desc='Description and Suggestion', opti_set='set of performance optimization operators',
+ bound_set='set of bound operators', affinity_set='set of unaffine operators', opti_refer=' refer to Performance Optimization Space: ',
+ bound_refer=' bound type: ', affinity_refer=' type of disaffinity: ', title_desc=' Operator related analysis, referenced below: ') %}
+ {% endif %}
{% if format_result.cube is not none %}
-
MatMul算子相关分析,参考如下:
+
MatMul{{ title_ns.title_desc }}
- 类别 |
- 描述及建议 |
+ {{ title_ns.type }} |
+ {{ title_ns.desc }} |
{% set opti_ns = namespace(total_opti='') %}
{% for opti in format_result.cube[0] %}
{% if not loop.first %}
- {% set opti_ns.total_opti = opti_ns.total_opti ~ "
" ~ opti.op_name ~ "算子 shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ "参考性能优化空间: " ~ opti.optimization %}
+ {% set opti_ns.total_opti = opti_ns.total_opti ~ "
" ~ opti.op_name ~ "operator shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ title_ns.opti_refer ~ opti.optimization %}
{% else %}
- {% set opti_ns.total_opti = opti.op_name ~ "算子 shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ "参考性能优化空间: " ~ opti.optimization %}
+ {% set opti_ns.total_opti = opti.op_name ~ "operator shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ title_ns.opti_refer ~ opti.optimization %}
{% endif %}
{% endfor %}
+ {% if opti_ns.total_opti|length > 0 %}
- 性能优化算子集合 |
+ {{ title_ns.opti_set }} |
{{ opti_ns.total_opti | safe }} |
+ {% endif %}
{% set bound_ns = namespace(total_bound='') %}
{% for bound in format_result.cube[1] %}
{% if not loop.first %}
- {% set bound_ns.total_bound = bound_ns.total_bound ~ "
" ~ bound.op_name ~ "算子 shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ "bound类型为: " ~ bound.bound %}
+ {% set bound_ns.total_bound = bound_ns.total_bound ~ "
" ~ bound.op_name ~ "operator shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ title_ns.bound_refer ~ bound.bound %}
{% else %}
- {% set bound_ns.total_bound = bound.op_name ~ "算子 shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ "bound类型为: " ~ bound.bound %}
+ {% set bound_ns.total_bound = bound.op_name ~ "operator shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ title_ns.bound_refer ~ bound.bound %}
{% endif %}
{% endfor %}
+ {% if bound_ns.total_bound|length > 0 %}
- bound算子集合 |
+ {{ title_ns.bound_set }} |
{{ bound_ns.total_bound | safe }} |
+ {% endif %}
{% set affinity_ns = namespace(total_affinity='') %}
{% for affinity in format_result.cube[2] %}
{% if not loop.first %}
- {% set affinity_ns.total_affinity = affinity_ns.total_affinity ~ "
" ~ affinity.op_name ~ "算子 shape: " ~ affinity.shape ~ " dtype: " ~ affinity.dtype ~ "不亲和类型为: " ~ affinity.suggestion %}
+ {% set affinity_ns.total_affinity = affinity_ns.total_affinity ~ "
" ~ affinity.op_name ~ "operator shape: " ~ affinity.shape ~ " dtype: " ~ affinity.dtype ~ title_ns.affinity_refer ~ affinity.suggestion %}
{% else %}
- {% set affinity_ns.total_affinity = affinity.op_name ~ "算子 shape: " ~ affinity.shape ~ " dtype: " ~ affinity.dtype ~ "不亲和类型为: " ~ affinity.suggestion %}
+ {% set affinity_ns.total_affinity = affinity.op_name ~ "operator shape: " ~ affinity.shape ~ " dtype: " ~ affinity.dtype ~ title_ns.affinity_refer ~ affinity.suggestion %}
{% endif %}
{% endfor %}
+ {% if affinity_ns.total_affinity|length > 0 %}
- bound算子集合 |
+ {{ title_ns.affinity_set }} |
{{ affinity_ns.total_affinity | safe }} |
+ {% endif %}
{% endif %}
{% if format_result.fa is not none %}
-
FA算子相关分析,参考如下:
+
FA{{ title_ns.title_desc }}
- 类别 |
- 描述及建议 |
+ {{ title_ns.type }} |
+ {{ title_ns.desc }} |
{% set opti_ns = namespace(total_opti='') %}
{% for opti in format_result.fa[0] %}
{% if not loop.first %}
- {% set opti_ns.total_opti = opti_ns.total_opti ~ "
" ~ opti.op_name ~ "算子 shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ "参考性能优化空间: " ~ opti.optimization %}
+ {% set opti_ns.total_opti = opti_ns.total_opti ~ "
" ~ opti.op_name ~ "operator shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ title_ns.opti_refer ~ opti.optimization %}
{% else %}
- {% set opti_ns.total_opti = opti.op_name ~ "算子 shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ "参考性能优化空间: " ~ opti.optimization %}
+ {% set opti_ns.total_opti = opti.op_name ~ "operator shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ title_ns.opti_refer ~ opti.optimization %}
{% endif %}
{% endfor %}
+ {% if opti_ns.total_opti|length > 0 %}
- 性能优化算子集合 |
+ {{ title_ns.opti_set }} |
{{ opti_ns.total_opti | safe }} |
+ {% endif %}
{% set bound_ns = namespace(total_bound='') %}
{% for bound in format_result.fa[1] %}
{% if not loop.first %}
- {% set bound_ns.total_bound = bound_ns.total_bound ~ "
" ~ bound.op_name ~ "算子 shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ "bound类型为: " ~ bound.bound %}
+ {% set bound_ns.total_bound = bound_ns.total_bound ~ "
" ~ bound.op_name ~ "operator shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ title_ns.bound_refer ~ bound.bound %}
{% else %}
- {% set bound_ns.total_bound = bound.op_name ~ "算子 shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ "bound类型为: " ~ bound.bound %}
+ {% set bound_ns.total_bound = bound.op_name ~ "operator shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ title_ns.bound_refer ~ bound.bound %}
{% endif %}
{% endfor %}
+ {% if bound_ns.total_bound|length > 0 %}
- bound算子集合 |
+ {{ title_ns.bound_set }} |
{{ bound_ns.total_bound | safe }} |
+ {% endif %}
{% set affinity_ns = namespace(total_affinity='') %}
{% for affinity in format_result.fa[2] %}
{% if not loop.first %}
- {% set affinity_ns.total_affinity = affinity_ns.total_affinity ~ "
" ~ affinity.op_name ~ "算子 shape: " ~ affinity.shape ~ " dtype: " ~ affinity.dtype ~ "不亲和类型为: " ~ affinity.suggestion %}
+ {% set affinity_ns.total_affinity = affinity_ns.total_affinity ~ "
" ~ affinity.op_name ~ "operator shape: " ~ affinity.shape ~ " dtype: " ~ affinity.dtype ~ title_ns.affinity_refer ~ affinity.suggestion %}
{% else %}
- {% set affinity_ns.total_affinity = affinity.op_name ~ "算子 shape: " ~ affinity.shape ~ " dtype: " ~ affinity.dtype ~ "不亲和类型为: " ~ affinity.suggestion %}
+ {% set affinity_ns.total_affinity = affinity.op_name ~ "operator shape: " ~ affinity.shape ~ " dtype: " ~ affinity.dtype ~ title_ns.affinity_refer ~ affinity.suggestion %}
{% endif %}
{% endfor %}
+ {% if affinity_ns.total_affinity|length > 0 %}
- 不亲和算子集合 |
+ {{ title_ns.affinity_set }} |
{{ affinity_ns.total_affinity | safe }} |
+ {% endif %}
{% endif %}
{% if format_result.cube is not none %}
-
Vector算子相关分析,参考如下:
+
Vector{{ title_ns.title_desc }}
- 类别 |
- 描述及建议 |
+ {{ title_ns.type }} |
+ {{ title_ns.desc }} |
{% set opti_ns = namespace(total_opti='') %}
{% for opti in format_result.vector[0] %}
{% if not loop.first %}
- {% set opti_ns.total_opti = opti_ns.total_opti ~ "
" ~ opti.op_name ~ "算子 shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ "参考性能优化空间: " ~ opti.optimization %}
+ {% set opti_ns.total_opti = opti_ns.total_opti ~ "
" ~ opti.op_name ~ "operator shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ title_ns.opti_refer ~ opti.optimization %}
{% else %}
- {% set opti_ns.total_opti = opti.op_name ~ "算子 shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ "参考性能优化空间: " ~ opti.optimization %}
+ {% set opti_ns.total_opti = opti.op_name ~ "operator shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ title_ns.opti_refer ~ opti.optimization %}
{% endif %}
{% endfor %}
+ {% if opti_ns.total_opti|length > 0 %}
- 性能优化算子集合 |
+ {{ title_ns.opti_set }} |
{{ opti_ns.total_opti | safe }} |
+ {% endif %}
{% set bound_ns = namespace(total_bound='') %}
{% for bound in format_result.vector[1] %}
{% if not loop.first %}
- {% set bound_ns.total_bound = bound_ns.total_bound ~ "
" ~ bound.op_name ~ "算子 shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ "bound类型为: " ~ bound.bound %}
+ {% set bound_ns.total_bound = bound_ns.total_bound ~ "
" ~ bound.op_name ~ "operator shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ title_ns.bound_refer ~ bound.bound %}
{% else %}
- {% set bound_ns.total_bound = bound.op_name ~ "算子 shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ "bound类型为: " ~ bound.bound %}
+ {% set bound_ns.total_bound = bound.op_name ~ "operator shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ title_ns.bound_refer ~ bound.bound %}
{% endif %}
{% endfor %}
+ {% if bound_ns.total_bound|length > 0 %}
- bound算子集合 |
+ {{ title_ns.bound_set }} |
{{ bound_ns.total_bound | safe }} |
+ {% endif %}
{% endif %}
diff --git a/profiler/advisor/rules/cn/aicore_performance.yaml b/profiler/advisor/rules/cn/aicore_performance.yaml
index 60d813e1d..f00f0a4b7 100644
--- a/profiler/advisor/rules/cn/aicore_performance.yaml
+++ b/profiler/advisor/rules/cn/aicore_performance.yaml
@@ -1,6 +1,15 @@
-problem: "AICORE算子"
+cube_problem: "Cube算子性能分析"
+fa_problem: "FA算子性能分析"
+vector_problem: "Vector算子性能分析"
description: "提供一些AICORE算子的参考瓶颈"
+bound_description: "bound算子集合"
+optimization_description: "性能优化算子集合"
+affinity_description: "不亲和算子集合"
+cube_affinity_desc: "内轴无法被256整除"
+fa_affinity_desc_type1: "D不能被128整除"
+fa_affinity_desc_type2: "S不能被128整除"
+fa_affinity_desc_type3: "D和S均不能被128整除"
suggestion: "请根据亲和性、bound类型或优化空间尝试分析筛选出来的算子"
-affinity_suggestion: "{op_name}算子 shape{shape} dtype{dtype} 有不亲和特征: {suggestion}\n"
-bound_suggestion: "{op_name}算子 shape{shape} dtype{dtype} bound类型为: {bound} bound\n"
-optimization_suggestion: "{op_name}算子 shape{shape} dtype{dtype} 疑似有性能优化空间,参考性能优化空间{optimization}\n"
\ No newline at end of file
+affinity_suggestion: "{op_name}算子 shape: {shape} dtype: {dtype} 有不亲和特征: {suggestion}\n"
+bound_suggestion: "{op_name}算子 shape: {shape} dtype: {dtype} bound类型为: {bound} bound\n"
+optimization_suggestion: "{op_name}算子 shape: {shape} dtype: {dtype} 疑似有性能优化空间,参考性能优化空间: {optimization}\n"
\ No newline at end of file
diff --git a/profiler/advisor/rules/en/aicore_performance.yaml b/profiler/advisor/rules/en/aicore_performance.yaml
index 247022214..28f52f1ed 100644
--- a/profiler/advisor/rules/en/aicore_performance.yaml
+++ b/profiler/advisor/rules/en/aicore_performance.yaml
@@ -1,6 +1,15 @@
-problem: "AICORE Operator"
+cube_problem: "Cube operator performance analysis"
+fa_problem: "FA operator performance analysis"
+vector_problem: "Vector operator performance analysis"
description: "Provide some reference bottlenecks for the AICORE operator"
+bound_description: "set of bound operators"
+optimization_description: "set of performance optimization operators"
+affinity_description: "set of unaffine operators"
+cube_affinity_desc: "Then inner axis is not divisible by 256"
+fa_affinity_desc_type1: "D is not divisible by 128"
+fa_affinity_desc_type2: "S is not divisible by 128"
+fa_affinity_desc_type3: "Neither D nor S is not divisible by 128"
suggestion: "Please try to analyze the filtered operators based on affinity, bound type or optimization space"
-affinity_suggestion: "{op_name} Op shape{shape} dtype{dtype} with disaffection characteristics: {suggestion}\n"
-bound_suggestion: "{op_name} Op shape{shape} dtype{dtype} bound type: {bound} bound\n"
-optimization_suggestion: "{op_name} Op shape{shape} dtype{dtype} suspect there is room for performance optimization, refer to Performance Optimization Space: {optimization}\n"
\ No newline at end of file
+affinity_suggestion: "{op_name} Op shape: {shape} dtype: {dtype} with disaffection characteristics: {suggestion}\n"
+bound_suggestion: "{op_name} Op shape: {shape} dtype: {dtype} bound type: {bound} bound\n"
+optimization_suggestion: "{op_name} Op shape: {shape} dtype: {dtype} suspect there is room for performance optimization, refer to Performance Optimization Space: {optimization}\n"
\ No newline at end of file
diff --git a/profiler/cli/entrance.py b/profiler/cli/entrance.py
index fa7d2421f..89ac8187d 100644
--- a/profiler/cli/entrance.py
+++ b/profiler/cli/entrance.py
@@ -69,7 +69,7 @@ msprof_analyze_cli.add_command(auto_complete_cli, name="auto-completion")
if __name__ == "__main__":
msprof_analyze_cli.main(
[
- "advisor","computation","-d",
- r"E:\B站\910b-33f-cpsp4-add_contiguous\train-2184159-master-0_1058382_20240910063706363_ascend_pt","-l","cn"
+ "analyze","all","-d",
+ r"D:\data\file","-l","cn"
]
)
--
Gitee
From 295e6ee731585815c0a49adb9bcfc470377cd315 Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Mon, 20 Jan 2025 15:36:34 +0800
Subject: [PATCH 19/72] =?UTF-8?q?=E9=94=99=E8=AF=AF=E4=BF=AE=E5=A4=8D?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../ai_core_performance/ai_core_performance_checker.py | 9 +++------
1 file changed, 3 insertions(+), 6 deletions(-)
diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index 2baa00d19..5b8e14550 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -133,12 +133,9 @@ class AICorePerformanceChecker:
"""
:Param profiling_dataset: dataset of operator performance from kernel_details.csv
"""
- if self.cube_dict:
- self.result["cube"] = self.check_cube_operator(promoting_dataset)
- if self.fa_dict:
- self.result["fa"] = self.check_fa_operator(promoting_dataset)
- if self.vector_dict:
- self.result["vector"] = self.check_vector_operator(promoting_dataset)
+ self.result["cube"] = self.check_cube_operator(promoting_dataset)
+ self.result["fa"] = self.check_fa_operator(promoting_dataset)
+ self.result["vector"] = self.check_vector_operator(promoting_dataset)
def check_cube_operator(self, profiling_dataset: ProfilingDataset):
cube_dict = self.cube_dict
--
Gitee
From 2313499b7ad30c325f21acc62db3090c74c56d91 Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Mon, 20 Jan 2025 16:18:06 +0800
Subject: [PATCH 20/72] =?UTF-8?q?=E9=94=99=E8=AF=AF=E4=BF=AE=E5=A4=8D?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../ai_core_performance_checker.py | 27 ++++++++++---------
1 file changed, 14 insertions(+), 13 deletions(-)
diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index 5b8e14550..bbf8b3b1a 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -116,17 +116,19 @@ class AICorePerformanceChecker:
@staticmethod
def memory_size(operator):
- input_shapes = operator.input_shapes[1:-1].split(";")
memory = 0
- if len(input_shapes[0].split(",")) == 4:
- memory = sum(int(shape[0]) * int(shape[1]) * int(shape[2]) * int(shape[3])
- for shape in (shapes.split(",") for shapes in input_shapes))
- output_shape = operator.output_shapes[1:-1].split(",")
- memory += (int(output_shape[0]) * int(output_shape[1]) * int(output_shape[2]) * int(output_shape[3]))
- else:
- memory += sum(int(shape[0]) * int(shape[1]) for shape in (shapes.split(",") for shapes in input_shapes))
- output_shape = operator.output_shapes[1:-1].split(",")
- memory += (int(output_shape[0]) * int(output_shape[1]))
+ input_shapes = operator.input_shapes[1:-1].split(";")
+ for shapes in input_shapes:
+ start = 1
+ for shape in shapes.split(","):
+ start *= int(shape)
+ memory += start
+
+ output_shape = operator.output_shapes[1:-1].split(",")
+ start = 1
+ for shapes in output_shape:
+ start *= int(shapes)
+ memory += int(start)
return memory * 2 / 1024 / 1024
def check_ai_core_performance(self, promoting_dataset: ProfilingDataset):
@@ -471,8 +473,7 @@ class AICorePerformanceChecker:
if not hasattr(profiling_dataset, "op_summary"):
logger.warning("Skip %s checker because of not containing %s", self._CHECKER, "op summary")
return False
- if not hasattr(profiling_dataset.op_summary, "task_dict") or not hasattr(profiling_dataset.op_summary,
- "op_list"):
- logger.warning("Skip %s checker because of not containing %s", self._CHECKER, "op summary")
+ if not not hasattr(profiling_dataset.op_summary, "op_list"):
+ logger.warning("Skip %s checker because of not containing %s", self._CHECKER, "op_list")
return False
return True
--
Gitee
From 078be76380bf4ff0db578c7e5e52fd8180aa769f Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Mon, 20 Jan 2025 16:22:19 +0800
Subject: [PATCH 21/72] =?UTF-8?q?=E9=94=99=E8=AF=AF=E4=BF=AE=E5=A4=8D?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../ai_core_performance/ai_core_performance_checker.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index bbf8b3b1a..8bdc92c80 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -473,7 +473,7 @@ class AICorePerformanceChecker:
if not hasattr(profiling_dataset, "op_summary"):
logger.warning("Skip %s checker because of not containing %s", self._CHECKER, "op summary")
return False
- if not not hasattr(profiling_dataset.op_summary, "op_list"):
+ if not hasattr(profiling_dataset.op_summary, "op_list"):
logger.warning("Skip %s checker because of not containing %s", self._CHECKER, "op_list")
return False
return True
--
Gitee
From 4718bb7517d7ca0b898817b127b52cf39b2b02dc Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Mon, 20 Jan 2025 16:25:56 +0800
Subject: [PATCH 22/72] =?UTF-8?q?=E9=94=99=E8=AF=AF=E4=BF=AE=E5=A4=8D?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../ai_core_performance/ai_core_performance_analyzer.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py
index 03b0a8c6e..89b6be779 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py
@@ -51,4 +51,4 @@ class AICorePerformanceAnalyzer(BaseAnalyzer):
return self.result
def get_priority(self, max_mem_op_dur=None):
- return PriorityBackgroundColor.high # html 底色设置
\ No newline at end of file
+ return PriorityBackgroundColor.low
\ No newline at end of file
--
Gitee
From 4dee540ee5d220afa742c46be7da04f862d45605 Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Mon, 20 Jan 2025 16:50:43 +0800
Subject: [PATCH 23/72] =?UTF-8?q?=E5=86=85=E5=AD=98=E8=AE=A1=E7=AE=97?=
=?UTF-8?q?=E4=BF=AE=E6=94=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../ai_core_performance/ai_core_performance_checker.py | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index 8bdc92c80..647ef0c7f 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -119,6 +119,10 @@ class AICorePerformanceChecker:
memory = 0
input_shapes = operator.input_shapes[1:-1].split(";")
for shapes in input_shapes:
+ if not "," in shapes:
+ # 多的一维是 bias ,预先乘2
+ memory += int (shapes) * 2
+ continue
start = 1
for shape in shapes.split(","):
start *= int(shape)
--
Gitee
From 61390688f533d13b6f53abc6448243d66e809b54 Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Mon, 20 Jan 2025 17:07:52 +0800
Subject: [PATCH 24/72] =?UTF-8?q?=E4=BB=A3=E7=A0=81=E8=B0=83=E6=95=B4?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../ai_core_performance/ai_core_performance_checker.py | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)
diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index 647ef0c7f..f70a3c815 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -16,7 +16,6 @@ import logging
import os
from profiler.advisor.dataset.profiling.profiling_dataset import ProfilingDataset
-from profiler.advisor.display.prompt.base_prompt import BasePrompt
from profiler.advisor.result.item import OptimizeItem, OptimizeRecord
from profiler.advisor.result.result import OptimizeResult
from profiler.prof_common.additional_args_manager import AdditionalArgsManager
@@ -119,7 +118,7 @@ class AICorePerformanceChecker:
memory = 0
input_shapes = operator.input_shapes[1:-1].split(";")
for shapes in input_shapes:
- if not "," in shapes:
+ if not "," in shapes and shapes != "":
# 多的一维是 bias ,预先乘2
memory += int (shapes) * 2
continue
@@ -184,7 +183,6 @@ class AICorePerformanceChecker:
"dtype": dtype,
"duration": shape_duration,
"suggestion": suggestion})
- continue
else:
shap_list = [operator for operator in operator_list if
operator.op_name == op and
@@ -281,7 +279,6 @@ class AICorePerformanceChecker:
"dtype": dtype,
"suggestion": suggestion,
"duration": shape_duration})
- continue
else:
# 处理bound算子和优化算子
aiv_vec_ratio, aic_fixpipe_ratio, aic_mte2_ratio, optimization = 0., 0., 0., 0.
--
Gitee
From 4fe9ffd3d761152213dc933dbf79986aef8a0744 Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Mon, 20 Jan 2025 18:26:17 +0800
Subject: [PATCH 25/72] =?UTF-8?q?=E6=B8=85=E7=90=86=E6=97=A0=E7=94=A8?=
=?UTF-8?q?=E5=86=85=E5=AE=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../advisor/analyzer/analyzer_controller.py | 33 ++++++++++---------
profiler/advisor/analyzer/base_analyzer.py | 8 ++---
.../ai_core_performance_checker.py | 10 +++---
.../computation/profiling_analyzer.py | 2 +-
4 files changed, 26 insertions(+), 27 deletions(-)
diff --git a/profiler/advisor/analyzer/analyzer_controller.py b/profiler/advisor/analyzer/analyzer_controller.py
index e8a62c69d..1a5a28b63 100644
--- a/profiler/advisor/analyzer/analyzer_controller.py
+++ b/profiler/advisor/analyzer/analyzer_controller.py
@@ -186,6 +186,7 @@ class AnalyzerController:
return True
+
@staticmethod
def _get_step_rank_for_cluster_statistic_diff(target_cluster_statistic_data, benchmark_cluster_statistic_data,
headers, dimension, get_max=False):
@@ -255,10 +256,10 @@ class AnalyzerController:
return dimensions, AsyncParams.user_total_params
def do_analysis(self, dimensions, **kwargs):
- pid = os.getpid() # 获取当前进程的pid
+ pid = os.getpid()
resp = {"id": pid}
- self.args_manager = AdditionalArgsManager() # 初始化参数管理器
- self.args_manager.init(kwargs) # 初始化参数管理器
+ self.args_manager = AdditionalArgsManager()
+ self.args_manager.init(kwargs)
output_path = kwargs.get("output_path")
AnalyzerController._set_analysis_process_priority(pid)
@@ -277,9 +278,9 @@ class AnalyzerController:
PathManager.make_dir_safety(output_path)
Config().set_config("_work_path", output_path)
- Config().set_log_path(f"mstt_advisor_{Timer().strftime}.xlsx") # 设置日志路径
+ Config().set_log_path(f"mstt_advisor_{Timer().strftime}.xlsx")
- self._do_analysis(dimensions, pid=pid, async_resp=resp, **kwargs) # 执行分析
+ self._do_analysis(dimensions, pid=pid, async_resp=resp, **kwargs)
except Exception as e:
self._update_analysis_process_resp(pid, resp, status_code=AsyncAnalysisStatus.INNER_ERROR_STATUS_CODE,
status=AsyncAnalysisStatus.FAILED, error_msg=str(e))
@@ -611,8 +612,8 @@ class AnalyzerController:
return job_list
def _do_analysis(self, dimensions, pid=0, async_resp=None, **kwargs):
- self.dimensions = dimensions # 设置分析维度
- self.kwargs = kwargs # 设置分析参数
+ self.dimensions = dimensions
+ self.kwargs = kwargs
result_list = []
profiling_path = PathManager.get_realpath(self.kwargs.get("profiling_path"))
benchmark_profiling_path = self.kwargs.get("benchmark_profiling_path")
@@ -621,7 +622,7 @@ class AnalyzerController:
benchmark_profiling_path = PathManager.get_realpath(benchmark_profiling_path)
PathManager.check_path_owner_consistent([benchmark_profiling_path])
- if not self._check_profiling_path_valid(profiling_path): # 检查profiling路径是否有效
+ if not self._check_profiling_path_valid(profiling_path):
error_msg = f"Got invalid argument '-d/--profiling_path' {profiling_path}, skip analysis"
self._update_analysis_process_resp(pid, async_resp, error_msg=error_msg,
status_code=AsyncAnalysisStatus.BAD_REQUEST_STATUS_CODE,
@@ -629,8 +630,8 @@ class AnalyzerController:
logger.error(error_msg)
return
- if benchmark_profiling_path and not self._check_profiling_path_valid(
- benchmark_profiling_path): # 检查benchmark_profiling路径是否有效
+
+ if benchmark_profiling_path and not self._check_profiling_path_valid(benchmark_profiling_path):
error_msg = (f"Got invalid argument '-bp/--benchmark_profiling_path' {benchmark_profiling_path}, "
f"skip analysis")
self._update_analysis_process_resp(pid, async_resp, error_msg=error_msg,
@@ -639,7 +640,7 @@ class AnalyzerController:
logger.error(error_msg)
return
- self._is_cluster = self._is_cluster_profiling(profiling_path) # 判断是否是集群profiling
+ self._is_cluster = self._is_cluster_profiling(profiling_path)
if benchmark_profiling_path:
# 构建benchmark profiling的map,用于根据rank获取profiling路径,否则无法进行比对
is_benchmark_cluster = self._is_cluster_profiling(benchmark_profiling_path)
@@ -654,16 +655,16 @@ class AnalyzerController:
return
if not self._is_cluster:
- job_list = self.single_rank_analysis(profiling_path, benchmark_profiling_path) # 单卡分析
+ job_list = self.single_rank_analysis(profiling_path, benchmark_profiling_path)
else:
self.slow_rank_analyzer = SlowRankAnalyzer(profiling_path, output_path=self.kwargs.get("output_path"))
self.slow_link_analyzer = SlowLinkAnalyzer(profiling_path, output_path=self.kwargs.get("output_path"))
- job_list = self.do_cluster_analysis(profiling_path, benchmark_profiling_path) # 集群分析
+ job_list = self.do_cluster_analysis(profiling_path, benchmark_profiling_path)
- for i, (dimension, scope, interface, kwargs) in enumerate(job_list[::-1]): # dimension: 分析维度,scope: 分析器
+ for i, (dimension, scope, interface, kwargs) in enumerate(job_list[::-1]):
result_list.append(
- # 获取分析结果
- interface.get_result(dimension, scope, render_html=i == len(job_list) - 1, output_dict=False, **kwargs)
+ interface.get_result(dimension, scope, render_html=i == len(job_list) - 1, output_dict=False,
+ **kwargs)
)
for result in result_list[::-1]:
diff --git a/profiler/advisor/analyzer/base_analyzer.py b/profiler/advisor/analyzer/base_analyzer.py
index adf82ab8a..0391eb88a 100644
--- a/profiler/advisor/analyzer/base_analyzer.py
+++ b/profiler/advisor/analyzer/base_analyzer.py
@@ -105,7 +105,7 @@ class BaseAnalyzer(VersionControl, metaclass=ABCMeta):
def get_priority(self, max_mem_op_dur):
pass
- def identify_profiling_type(self, profiling_type_list): # 确定分析类型
+ def identify_profiling_type(self, profiling_type_list):
profiling_type = None
if self.collection_path.endswith(ASCEND_MS):
profiling_type = [elem for elem in profiling_type_list if Constant.MINDSPORE in elem][0]
@@ -134,7 +134,7 @@ class BaseAnalyzer(VersionControl, metaclass=ABCMeta):
profiling_type = profiling_type_list[0]
return profiling_type
- def identify_profiling_version(self): # 确定分析版本
+ def identify_profiling_version(self):
profiling_version = ""
if Constant.MINDSPORE in self.profiling_type:
ascend_dirs = []
@@ -166,7 +166,7 @@ class BaseAnalyzer(VersionControl, metaclass=ABCMeta):
self.__class__.__name__, self.kwargs.get(Constant.TORCH_VERSION), profiling_version)
return profiling_version
- def init_dataset_list(self) -> None: # 初始化数据集列表
+ def init_dataset_list(self) -> None:
dataset_cls_list = self.dataset_cls_list
if len(dataset_cls_list) == 0:
logger.warning(f"Analyser: %s don't rely on any dataset!", self.__class__.__name__)
@@ -184,7 +184,7 @@ class BaseAnalyzer(VersionControl, metaclass=ABCMeta):
self.dataset_list[key] = []
self.dataset_list[key].append(dataset)
- def get_priority_by_time_ratio(self, dur, step_dur): # 根据时间比例确定优先级
+ def get_priority_by_time_ratio(self, dur, step_dur):
time_ratio = safe_division(dur, step_dur)
if time_ratio >= self.ANALYZER_HIGH_PRIORITY_TIME_RATIO:
return PriorityBackgroundColor.high
diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index f70a3c815..1784c9ce3 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -36,12 +36,10 @@ class AICorePerformanceChecker:
self.result = dict()
self.ai_core_performance_issues = False
self.desc = ""
- self.suggestions = ""
self.cube_dict = {}
self.fa_dict = {}
self.fa_list = []
self.vector_dict = {}
- self.vector_list = []
self.load_aicore_perf_rules()
def load_aicore_perf_rules(self):
@@ -392,10 +390,10 @@ class AICorePerformanceChecker:
if not self.ai_core_performance_issues:
return self.ai_core_performance_issues
- sugg_keys = ['opti', 'bound', 'affinity']
- cube_desc = dict.fromkeys(sugg_keys, "")
- fa_desc = dict.fromkeys(sugg_keys, "")
- vector_desc = dict.fromkeys(sugg_keys, "")
+ suggestion_keys = ['opti', 'bound', 'affinity']
+ cube_desc = dict.fromkeys(suggestion_keys, "")
+ fa_desc = dict.fromkeys(suggestion_keys, "")
+ vector_desc = dict.fromkeys(suggestion_keys, "")
if self.result["cube"]:
optimization_item = OptimizeItem(self._CUBE_PROBLEM, self.desc, [self.suggestion])
result.add(OptimizeRecord(optimization_item))
diff --git a/profiler/advisor/analyzer/computation/profiling_analyzer.py b/profiler/advisor/analyzer/computation/profiling_analyzer.py
index bbea136f0..ccf671139 100644
--- a/profiler/advisor/analyzer/computation/profiling_analyzer.py
+++ b/profiler/advisor/analyzer/computation/profiling_analyzer.py
@@ -115,4 +115,4 @@ class OperatorBoundAnalyzer(ProfilingAnalyzer):
class AicpuAnalyzer(ProfilingAnalyzer):
def __init__(self, collection_path, **kwargs) -> None:
super().__init__(collection_path, **kwargs)
- self.checker = AicpuChecker(self.cann_version)
\ No newline at end of file
+ self.checker = AicpuChecker(self.cann_version)
--
Gitee
From 657d436b30cc81f319f54e0a2dac783ea75b6762 Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Tue, 21 Jan 2025 14:47:23 +0800
Subject: [PATCH 26/72] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E6=95=B0=E6=8D=AE?=
=?UTF-8?q?=E9=87=87=E9=9B=86=E7=AD=89=E7=BA=A7=E6=A0=A1=E9=AA=8C?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../ai_core_performance_checker.py | 11 ++++++++---
1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index 1784c9ce3..e2ca19405 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -73,7 +73,7 @@ class AICorePerformanceChecker:
self._OPTI_SUGGESTION = self.aicore_rules.get("optimization_suggestion")
def data_filter(self, profiling_dataset: ProfilingDataset):
- if not self.check_task_dict(profiling_dataset):
+ if not self.check_task_list(profiling_dataset):
return
operator_list = profiling_dataset.op_summary.op_list
total_duration = sum(float(operator.task_duration) for operator in operator_list)
@@ -118,7 +118,7 @@ class AICorePerformanceChecker:
for shapes in input_shapes:
if not "," in shapes and shapes != "":
# 多的一维是 bias ,预先乘2
- memory += int (shapes) * 2
+ memory += int(shapes) * 2
continue
start = 1
for shape in shapes.split(","):
@@ -468,11 +468,16 @@ class AICorePerformanceChecker:
priority_background_color=priority,
rank=kwargs.get("rank"))
- def check_task_dict(self, profiling_dataset: ProfilingDataset) -> bool:
+ def check_task_list(self, profiling_dataset: ProfilingDataset) -> bool:
if not hasattr(profiling_dataset, "op_summary"):
logger.warning("Skip %s checker because of not containing %s", self._CHECKER, "op summary")
return False
if not hasattr(profiling_dataset.op_summary, "op_list"):
logger.warning("Skip %s checker because of not containing %s", self._CHECKER, "op_list")
return False
+ if (not hasattr(profiling_dataset.op_summary, "input_shapes") or
+ not hasattr(profiling_dataset.op_summary, "input_data_types")):
+ logger.warning("Skip %s checker because of not containing input datas, "
+ "Please use L1 and above", self._CHECKER)
+ return False
return True
--
Gitee
From e8d3759a197baaaa79a6b8143bded9e8a45db95f Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Tue, 21 Jan 2025 15:14:54 +0800
Subject: [PATCH 27/72] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E6=95=B0=E6=8D=AE?=
=?UTF-8?q?=E9=87=87=E9=9B=86=E7=AD=89=E7=BA=A7=E6=A0=A1=E9=AA=8C?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../ai_core_performance/ai_core_performance_checker.py | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index e2ca19405..47a90e98a 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -477,7 +477,6 @@ class AICorePerformanceChecker:
return False
if (not hasattr(profiling_dataset.op_summary, "input_shapes") or
not hasattr(profiling_dataset.op_summary, "input_data_types")):
- logger.warning("Skip %s checker because of not containing input datas, "
- "Please use L1 and above", self._CHECKER)
+ logger.warning("Skip %s checker because of not containing input datas", self._CHECKER)
return False
return True
--
Gitee
From edd6abcbf81abaad4ce119478eba7987e3ddfe42 Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Tue, 21 Jan 2025 16:07:21 +0800
Subject: [PATCH 28/72] =?UTF-8?q?=E8=A1=A5=E5=85=85=E5=9D=87=E5=80=BC?=
=?UTF-8?q?=E5=A4=84=E7=90=86?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../ai_core_performance_checker.py | 13 +++++++++++++
1 file changed, 13 insertions(+)
diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index 47a90e98a..c9f6e039f 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -281,6 +281,7 @@ class AICorePerformanceChecker:
# 处理bound算子和优化算子
aiv_vec_ratio, aic_fixpipe_ratio, aic_mte2_ratio, optimization = 0., 0., 0., 0.
bound = ""
+ length = 0
if len(shape.split("-")) > 2:
for operator in fa_list:
if (operator.op_name == op and
@@ -290,6 +291,9 @@ class AICorePerformanceChecker:
aic_mte2_ratio += float(operator.aic_mte2_ratio)
shape_duration += float(operator.task_duration)
dtype = operator.input_data_types
+ length += 1
+ aic_fixpipe_ratio = aic_fixpipe_ratio / length
+ aic_mte2_ratio = aic_mte2_ratio / length
if aic_mte2_ratio >= 0.8 and aic_fixpipe_ratio >= 0.75:
bound = "mte2_and_fixpipe_bound"
elif aic_mte2_ratio >= 0.8:
@@ -305,6 +309,9 @@ class AICorePerformanceChecker:
aiv_vec_ratio += float(operator.aiv_vec_ratio)
aic_mte2_ratio += float(operator.aic_mte2_ratio)
shape_duration += float(operator.task_duration)
+ length += 1
+ aiv_vec_ratio = aiv_vec_ratio / length
+ aic_mte2_ratio = aic_mte2_ratio / length
if aic_mte2_ratio >= 0.8 and aiv_vec_ratio >= 0.75:
bound = "mte2_and_vec_bound"
elif aic_mte2_ratio >= 0.8:
@@ -346,6 +353,7 @@ class AICorePerformanceChecker:
for op_name in vector_dict:
for shape in vector_dict[op_name]:
aiv_vec_ratio, aiv_mte2_ratio, aiv_mte3_ratio, shape_duration, optimization = 0., 0., 0., 0., 0.
+ length = 0
bound, dtype = "", ""
for operator in vector_list:
if (operator.op_name == op_name and
@@ -355,6 +363,11 @@ class AICorePerformanceChecker:
aiv_mte3_ratio += float(operator.aiv_mte3_ratio)
shape_duration += float(operator.task_duration)
dtype = operator.input_data_types
+ length += 1
+ # todo 取平均值
+ aiv_vec_ratio = aiv_vec_ratio / length
+ aiv_mte2_ratio = aiv_mte2_ratio / length
+ aiv_mte2_ratio = aiv_mte2_ratio / length
if aiv_vec_ratio + aiv_mte2_ratio + aiv_mte3_ratio >= 0.9:
bound = "vec_mte2_mte3_bound"
elif aiv_mte2_ratio >= 0.7:
--
Gitee
From f9cfeeae8589c37cc2e491051a5bea1163eefe26 Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Tue, 21 Jan 2025 16:07:45 +0800
Subject: [PATCH 29/72] =?UTF-8?q?=E8=A1=A5=E5=85=85=E5=9D=87=E5=80=BC?=
=?UTF-8?q?=E5=A4=84=E7=90=86?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../ai_core_performance/ai_core_performance_checker.py | 1 -
1 file changed, 1 deletion(-)
diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index c9f6e039f..0517083c3 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -364,7 +364,6 @@ class AICorePerformanceChecker:
shape_duration += float(operator.task_duration)
dtype = operator.input_data_types
length += 1
- # todo 取平均值
aiv_vec_ratio = aiv_vec_ratio / length
aiv_mte2_ratio = aiv_mte2_ratio / length
aiv_mte2_ratio = aiv_mte2_ratio / length
--
Gitee
From c5359dd7e27a7be5f7e61f677d0a0d1bb4ccb1b7 Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Tue, 21 Jan 2025 16:12:01 +0800
Subject: [PATCH 30/72] =?UTF-8?q?=E9=94=99=E8=AF=AF=E4=BF=AE=E5=A4=8D?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../ai_core_performance/ai_core_performance_checker.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index 0517083c3..1c341e4fb 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -487,8 +487,8 @@ class AICorePerformanceChecker:
if not hasattr(profiling_dataset.op_summary, "op_list"):
logger.warning("Skip %s checker because of not containing %s", self._CHECKER, "op_list")
return False
- if (not hasattr(profiling_dataset.op_summary, "input_shapes") or
- not hasattr(profiling_dataset.op_summary, "input_data_types")):
+ if (not hasattr(profiling_dataset.op_summary.op_list, "input_shapes") or
+ not hasattr(profiling_dataset.op_summary.op_list, "input_data_types")):
logger.warning("Skip %s checker because of not containing input datas", self._CHECKER)
return False
return True
--
Gitee
From 1ceadac5a761201d7396940d0fdb600ac169306c Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Tue, 21 Jan 2025 16:37:08 +0800
Subject: [PATCH 31/72] =?UTF-8?q?Checker=E5=BC=82=E5=B8=B8=E5=A4=84?=
=?UTF-8?q?=E7=90=86?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../ai_core_performance_checker.py | 25 ++++++++++++++++---
1 file changed, 22 insertions(+), 3 deletions(-)
diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index 1c341e4fb..6057df7aa 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -136,9 +136,28 @@ class AICorePerformanceChecker:
"""
:Param profiling_dataset: dataset of operator performance from kernel_details.csv
"""
- self.result["cube"] = self.check_cube_operator(promoting_dataset)
- self.result["fa"] = self.check_fa_operator(promoting_dataset)
- self.result["vector"] = self.check_vector_operator(promoting_dataset)
+ try:
+ self.result["cube"] = self.check_cube_operator(promoting_dataset)
+ except (IndexError, ValueError, AttributeError):
+ logger.error("Failed to check ai core performance, cube operator incorrect shapes value.")
+ self.result["cube"] = []
+
+ try:
+ self.result["fa"] = self.check_fa_operator(promoting_dataset)
+ except (IndexError, ValueError, AttributeError):
+ logger.error("Failed to check ai core performance, fa operator incorrect shapes value.")
+ self.result["fa"] = []
+
+ try:
+ self.result["vector"] = self.check_vector_operator(promoting_dataset)
+ except (IndexError, ValueError, AttributeError):
+ logger.error("Failed to check ai core performance, vector operator incorrect shapes value.")
+ self.result["vector"] = []
+
+ if not any([self.result["cube"], self.result["fa"], self.result["vector"]]):
+ self.ai_core_performance_issues = False
+
+
def check_cube_operator(self, profiling_dataset: ProfilingDataset):
cube_dict = self.cube_dict
--
Gitee
From d815739679f562ea492e75733b85aae4dd3cdddc Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Wed, 22 Jan 2025 10:28:38 +0800
Subject: [PATCH 32/72] UT
---
.../test_ai_core_performance_advice.py | 93 +++++++++++++++++++
1 file changed, 93 insertions(+)
create mode 100644 profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py
diff --git a/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py b/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py
new file mode 100644
index 000000000..4782ee635
--- /dev/null
+++ b/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py
@@ -0,0 +1,93 @@
+import csv
+import os
+import shutil
+import stat
+
+import unittest
+from profiler.advisor.interface.interface import Interface
+from profiler.advisor.common.analyzer_scopes import SupportedScopes
+from profiler.advisor.dataset.timeline_event_dataset import ComputationAnalysisDataset
+
+
+class TestAICorePerformanceAdvice(unittest.TestCase):
+ TMP_DIR = "./ascend_pt"
+ OUTPUT_DIR = "./ascend_pt/ASCEND_PROFILER_OUTPUT"
+ interface = None
+ err_interface = None
+
+ def tearDown(self):
+ if os.path.exists(TestAICorePerformanceAdvice.TMP_DIR):
+ shutil.rmtree(TestAICorePerformanceAdvice.TMP_DIR)
+ self.clear_htmls()
+
+ def setUp(self):
+ if os.path.exists(TestAICorePerformanceAdvice.TMP_DIR):
+ shutil.rmtree(TestAICorePerformanceAdvice.TMP_DIR)
+ if not os.path.exists(TestAICorePerformanceAdvice.TMP_DIR):
+ os.makedirs(TestAICorePerformanceAdvice.TMP_DIR)
+ if not os.path.exists(TestAICorePerformanceAdvice.OUTPUT_DIR):
+ os.makedirs(TestAICorePerformanceAdvice.OUTPUT_DIR)
+ self.clear_htmls()
+
+ @classmethod
+ def clear_htmls(cls):
+ current_path = os.path.dirname(os.path.abspath(__file__))
+ for filename in os.listdir(current_path):
+ # 检查文件是否以“att”开头
+ if filename.startswith("att"):
+ # 构建文件的完整路径
+ file_path = os.path.join(current_path, filename)
+ # 删除文件
+ os.remove(file_path)
+
+
+ @classmethod
+ def create_kernel_details(cls):
+ # create csv files
+ csv_header = ['Step Id', 'Model ID', 'Task ID', 'Stream ID', 'Name', 'Type', 'Accelerator Core',
+ 'Start Time(us)',
+ 'Duration(us)', 'Wait Time(us)', 'Block Dim', 'Mix Block Dim', 'Input Shapes', 'Input Data Types',
+ 'Input Formats', 'Output Shapes', 'Output Data Types', 'Output Formats', 'Context ID',
+ 'aicore_time(us)',
+ 'aic_total_cycles', 'aic_mac_ratio', 'aic_mac_int8_ratio', 'aic_cube_fops',
+ 'aic_vector_fops',
+ 'aiv_time(us)', 'aiv_total_cycles', 'aiv_vec_fp32_ratio', 'aiv_vec_fp16_ratio',
+ 'aiv_vec_int32_ratio',
+ 'aiv_vec_misc_ratio', 'aiv_cube_fops', 'aiv_vector_fops']
+ csv_row1 = [1, 4294967295, 1265, 16, 'MatMul56', 'MatMul', 'AI_CORE', "172317\t", 21.2, 261.56, 9,
+ 0,
+ '4,1025', 'INT64', 'FORMAT_ND', '4,1025', 'INT32', 'FORMAT_ND', 'N/A',
+ 0, 0, 0, 0, 0, 0,
+ 1.77, 29508, 0, 0, 0.0062, 0, 0, 5856]
+ csv_row2 = [1, 4294967295, 1265, 16, 'Add2', 'Add', 'AI_VECTOR_CORE', "183317\t", 1.5, 261.56, 9,
+ 0,
+ '4,1025', 'INT64', 'FORMAT_ND', '4,1025', 'INT32', 'FORMAT_ND', 'N/A',
+ 0, 0, 0, 0, 0, 0,
+ 1.77, 29508, 0, 0, 0.0062, 0, 0, 5856]
+ csv_row3 = [1, 4294967295, 1265, 16, 'MatMul57', 'MatMul', 'AI_CORE', "189233\t", 3.14, 261.56, 9, 0,
+ '4,1025', 'INT64', 'FORMAT_ND', '4,1025', 'INT32', 'FORMAT_ND', 'N/A',
+ 0, 0, 0, 0, 0, 0,
+ 1.77, 29508, 0, 0, 0.0062, 0, 0, 5856]
+ csv_row4 = [1, 4294967295, 1265, 16, 'Add1', 'Add', 'AI_CORE', "189933\t", 3.14, 261.56, 9, 0,
+ '4,1025', 'INT64', 'FORMAT_ND', '4,1025', 'INT32', 'FORMAT_ND', 'N/A',
+ 2.3, 28888, 0.2, 0.1, 0.1, 0.7,
+ 0, 0, 0, 0, 0, 0, 0, 0]
+
+ with os.fdopen(os.open(f"{TestAICorePerformanceAdvice.OUTPUT_DIR}/kernel_details.csv",
+ os.O_WRONLY | os.O_CREAT, stat.S_IWUSR | stat.S_IRUSR), 'w', newline='') as fp:
+ csv_writer = csv.writer(fp)
+ csv_writer.writerow(csv_header)
+ csv_writer.writerow(csv_row1)
+ csv_writer.writerow(csv_row2)
+ csv_writer.writerow(csv_row3)
+ csv_writer.writerow(csv_row4)
+
+ def test_ai_core_performance_data(self):
+ self.create_kernel_details()
+ interface = Interface(profiling_path=self.TMP_DIR)
+ dimension = Interface.COMMUNICATION
+ scope = SupportedScopes.AICORE_PERFORMANCE_ANALYSIS
+ result = interface.get_result(dimension, scope, render_html=1, output_dict=False, profiling_path=self.TMP_DIR)
+ self.assertEqual(2, len(result.data.get("带宽分析", [])))
+ self.assertEqual(1, len(result.data.get("带宽分析", []).get('data')))
+ result.clear()
\ No newline at end of file
--
Gitee
From 2dabcd38c699653dccb9cd571911df4df4532535 Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Wed, 22 Jan 2025 10:51:15 +0800
Subject: [PATCH 33/72] =?UTF-8?q?=E9=94=99=E8=AF=AF=E4=BF=AE=E5=A4=8D?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../ai_core_performance/ai_core_performance_checker.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index 6057df7aa..ef360ee12 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -506,8 +506,8 @@ class AICorePerformanceChecker:
if not hasattr(profiling_dataset.op_summary, "op_list"):
logger.warning("Skip %s checker because of not containing %s", self._CHECKER, "op_list")
return False
- if (not hasattr(profiling_dataset.op_summary.op_list, "input_shapes") or
- not hasattr(profiling_dataset.op_summary.op_list, "input_data_types")):
+ if (not hasattr(profiling_dataset.op_summary.op_list[0], "input_shapes") or
+ not hasattr(profiling_dataset.op_summary.op_list[0], "input_data_types")):
logger.warning("Skip %s checker because of not containing input datas", self._CHECKER)
return False
return True
--
Gitee
From 118abaff82f2f10c610347f552c0e648bea51f1a Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Wed, 22 Jan 2025 11:39:21 +0800
Subject: [PATCH 34/72] =?UTF-8?q?UT=20=E4=BF=AE=E6=94=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../compute_advice/data/kernel_details.csv | 0
.../test_ai_core_performance_advice.py | 96 ++++++++++---------
2 files changed, 49 insertions(+), 47 deletions(-)
create mode 100644 profiler/test/ut/advisor/compute_advice/data/kernel_details.csv
diff --git a/profiler/test/ut/advisor/compute_advice/data/kernel_details.csv b/profiler/test/ut/advisor/compute_advice/data/kernel_details.csv
new file mode 100644
index 000000000..e69de29bb
diff --git a/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py b/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py
index 4782ee635..aef4e6ed1 100644
--- a/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py
+++ b/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py
@@ -6,7 +6,6 @@ import stat
import unittest
from profiler.advisor.interface.interface import Interface
from profiler.advisor.common.analyzer_scopes import SupportedScopes
-from profiler.advisor.dataset.timeline_event_dataset import ComputationAnalysisDataset
class TestAICorePerformanceAdvice(unittest.TestCase):
@@ -33,61 +32,64 @@ class TestAICorePerformanceAdvice(unittest.TestCase):
def clear_htmls(cls):
current_path = os.path.dirname(os.path.abspath(__file__))
for filename in os.listdir(current_path):
- # 检查文件是否以“att”开头
- if filename.startswith("att"):
+ # 检查文件是否以“mstt”开头
+ if filename.startswith("mstt"):
# 构建文件的完整路径
file_path = os.path.join(current_path, filename)
# 删除文件
os.remove(file_path)
-
@classmethod
- def create_kernel_details(cls):
- # create csv files
- csv_header = ['Step Id', 'Model ID', 'Task ID', 'Stream ID', 'Name', 'Type', 'Accelerator Core',
- 'Start Time(us)',
- 'Duration(us)', 'Wait Time(us)', 'Block Dim', 'Mix Block Dim', 'Input Shapes', 'Input Data Types',
- 'Input Formats', 'Output Shapes', 'Output Data Types', 'Output Formats', 'Context ID',
- 'aicore_time(us)',
- 'aic_total_cycles', 'aic_mac_ratio', 'aic_mac_int8_ratio', 'aic_cube_fops',
- 'aic_vector_fops',
- 'aiv_time(us)', 'aiv_total_cycles', 'aiv_vec_fp32_ratio', 'aiv_vec_fp16_ratio',
- 'aiv_vec_int32_ratio',
- 'aiv_vec_misc_ratio', 'aiv_cube_fops', 'aiv_vector_fops']
- csv_row1 = [1, 4294967295, 1265, 16, 'MatMul56', 'MatMul', 'AI_CORE', "172317\t", 21.2, 261.56, 9,
- 0,
- '4,1025', 'INT64', 'FORMAT_ND', '4,1025', 'INT32', 'FORMAT_ND', 'N/A',
- 0, 0, 0, 0, 0, 0,
- 1.77, 29508, 0, 0, 0.0062, 0, 0, 5856]
- csv_row2 = [1, 4294967295, 1265, 16, 'Add2', 'Add', 'AI_VECTOR_CORE', "183317\t", 1.5, 261.56, 9,
- 0,
- '4,1025', 'INT64', 'FORMAT_ND', '4,1025', 'INT32', 'FORMAT_ND', 'N/A',
- 0, 0, 0, 0, 0, 0,
- 1.77, 29508, 0, 0, 0.0062, 0, 0, 5856]
- csv_row3 = [1, 4294967295, 1265, 16, 'MatMul57', 'MatMul', 'AI_CORE', "189233\t", 3.14, 261.56, 9, 0,
- '4,1025', 'INT64', 'FORMAT_ND', '4,1025', 'INT32', 'FORMAT_ND', 'N/A',
- 0, 0, 0, 0, 0, 0,
- 1.77, 29508, 0, 0, 0.0062, 0, 0, 5856]
- csv_row4 = [1, 4294967295, 1265, 16, 'Add1', 'Add', 'AI_CORE', "189933\t", 3.14, 261.56, 9, 0,
- '4,1025', 'INT64', 'FORMAT_ND', '4,1025', 'INT32', 'FORMAT_ND', 'N/A',
- 2.3, 28888, 0.2, 0.1, 0.1, 0.7,
- 0, 0, 0, 0, 0, 0, 0, 0]
+ def copy_kernel_details(cls,path):
+ # Define source and destination paths
+ source_csv_path = f"./data/{path}"
+ destination_csv_path = f"{TestAICorePerformanceAdvice.OUTPUT_DIR}/kernel_detail.csv"
+
+ # Check if source CSV file exists
+ if not os.path.exists(source_csv_path):
+ raise FileNotFoundError(f"test data file not found:{source_csv_path}")
+
+ # Ensure the output directory exists
+ if not os.path.exists(TestAICorePerformanceAdvice.OUTPUT_DIR):
+ os.makedirs(TestAICorePerformanceAdvice.OUTPUT_DIR)
+
+ # Copy the CSV file from source to destination
+ shutil.copyfile(source_csv_path, destination_csv_path)
+
+ def test_ai_core_performance_total(self):
+ file_path = "kernel_details.csv"
+ self.copy_kernel_details(file_path)
+ interface = Interface(profiling_path=self.TMP_DIR)
+ dimension = Interface.COMMUNICATION
+ scope = SupportedScopes.AICORE_PERFORMANCE_ANALYSIS
+ result = interface.get_result(dimension, scope, render_html=1, output_dict=False, profiling_path=self.TMP_DIR)
+ # TODO 测试结果验证
+ result.clear()
+
+ def test_ai_core_performance_cube_operator(self):
+ self.copy_kernel_details()
+ interface = Interface(profiling_path=self.TMP_DIR)
+ dimension = Interface.COMMUNICATION
+ scope = SupportedScopes.AICORE_PERFORMANCE_ANALYSIS
+ result = interface.get_result(dimension, scope, render_html=1, output_dict=False, profiling_path=self.TMP_DIR)
+ # TODO 测试结果验证
+ result.clear()
+
+ def test_ai_core_performance_fa_operator(self):
+ self.copy_kernel_details()
+ interface = Interface(profiling_path=self.TMP_DIR)
+ dimension = Interface.COMMUNICATION
+ scope = SupportedScopes.AICORE_PERFORMANCE_ANALYSIS
+ result = interface.get_result(dimension, scope, render_html=1, output_dict=False, profiling_path=self.TMP_DIR)
+ # TODO 测试结果验证
+ result.clear()
- with os.fdopen(os.open(f"{TestAICorePerformanceAdvice.OUTPUT_DIR}/kernel_details.csv",
- os.O_WRONLY | os.O_CREAT, stat.S_IWUSR | stat.S_IRUSR), 'w', newline='') as fp:
- csv_writer = csv.writer(fp)
- csv_writer.writerow(csv_header)
- csv_writer.writerow(csv_row1)
- csv_writer.writerow(csv_row2)
- csv_writer.writerow(csv_row3)
- csv_writer.writerow(csv_row4)
- def test_ai_core_performance_data(self):
- self.create_kernel_details()
+ def test_ai_core_performance_vector_operator(self):
+ self.copy_kernel_details()
interface = Interface(profiling_path=self.TMP_DIR)
dimension = Interface.COMMUNICATION
scope = SupportedScopes.AICORE_PERFORMANCE_ANALYSIS
result = interface.get_result(dimension, scope, render_html=1, output_dict=False, profiling_path=self.TMP_DIR)
- self.assertEqual(2, len(result.data.get("带宽分析", [])))
- self.assertEqual(1, len(result.data.get("带宽分析", []).get('data')))
- result.clear()
\ No newline at end of file
+ # TODO 测试结果验证
+ result.clear()
--
Gitee
From 1eeb72ea1f7999ae8bf75be3ff6c40b1757d74e5 Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Wed, 22 Jan 2025 14:27:25 +0800
Subject: [PATCH 35/72] =?UTF-8?q?UT=20=E4=BF=AE=E6=94=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../test_ai_core_performance_advice.py | 21 ++++++++++++-------
1 file changed, 14 insertions(+), 7 deletions(-)
diff --git a/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py b/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py
index aef4e6ed1..ff372d2d5 100644
--- a/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py
+++ b/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py
@@ -58,27 +58,32 @@ class TestAICorePerformanceAdvice(unittest.TestCase):
def test_ai_core_performance_total(self):
file_path = "kernel_details.csv"
+ destination_csv_path = f"{TestAICorePerformanceAdvice.OUTPUT_DIR}/kernel_detail.csv"
self.copy_kernel_details(file_path)
interface = Interface(profiling_path=self.TMP_DIR)
- dimension = Interface.COMMUNICATION
+ dimension = Interface.COMPUTATION
scope = SupportedScopes.AICORE_PERFORMANCE_ANALYSIS
result = interface.get_result(dimension, scope, render_html=1, output_dict=False, profiling_path=self.TMP_DIR)
# TODO 测试结果验证
result.clear()
def test_ai_core_performance_cube_operator(self):
- self.copy_kernel_details()
+ file_path = "kernel_details_cube.csv"
+ destination_csv_path = f"{TestAICorePerformanceAdvice.OUTPUT_DIR}/kernel_detail.csv"
+ self.copy_kernel_details(file_path)
interface = Interface(profiling_path=self.TMP_DIR)
- dimension = Interface.COMMUNICATION
+ dimension = Interface.COMPUTATION
scope = SupportedScopes.AICORE_PERFORMANCE_ANALYSIS
result = interface.get_result(dimension, scope, render_html=1, output_dict=False, profiling_path=self.TMP_DIR)
# TODO 测试结果验证
result.clear()
def test_ai_core_performance_fa_operator(self):
- self.copy_kernel_details()
+ file_path = "kernel_details_fa.csv"
+ destination_csv_path = f"{TestAICorePerformanceAdvice.OUTPUT_DIR}/kernel_detail.csv"
+ self.copy_kernel_details(file_path)
interface = Interface(profiling_path=self.TMP_DIR)
- dimension = Interface.COMMUNICATION
+ dimension = Interface.COMPUTATION
scope = SupportedScopes.AICORE_PERFORMANCE_ANALYSIS
result = interface.get_result(dimension, scope, render_html=1, output_dict=False, profiling_path=self.TMP_DIR)
# TODO 测试结果验证
@@ -86,9 +91,11 @@ class TestAICorePerformanceAdvice(unittest.TestCase):
def test_ai_core_performance_vector_operator(self):
- self.copy_kernel_details()
+ file_path = "kernel_details_vector.csv"
+ destination_csv_path = f"{TestAICorePerformanceAdvice.OUTPUT_DIR}/kernel_detail.csv"
+ self.copy_kernel_details(file_path)
interface = Interface(profiling_path=self.TMP_DIR)
- dimension = Interface.COMMUNICATION
+ dimension = Interface.COMPUTATION
scope = SupportedScopes.AICORE_PERFORMANCE_ANALYSIS
result = interface.get_result(dimension, scope, render_html=1, output_dict=False, profiling_path=self.TMP_DIR)
# TODO 测试结果验证
--
Gitee
From 776aaf9db5bccc21d7a7940f55d6bc8483bf21d7 Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Wed, 22 Jan 2025 14:40:10 +0800
Subject: [PATCH 36/72] =?UTF-8?q?UT=20=E4=BF=AE=E6=94=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../test_ai_core_performance_advice.py | 13 ++++++++-----
1 file changed, 8 insertions(+), 5 deletions(-)
diff --git a/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py b/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py
index ff372d2d5..ac0ba3807 100644
--- a/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py
+++ b/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py
@@ -17,7 +17,6 @@ class TestAICorePerformanceAdvice(unittest.TestCase):
def tearDown(self):
if os.path.exists(TestAICorePerformanceAdvice.TMP_DIR):
shutil.rmtree(TestAICorePerformanceAdvice.TMP_DIR)
- self.clear_htmls()
def setUp(self):
if os.path.exists(TestAICorePerformanceAdvice.TMP_DIR):
@@ -58,7 +57,7 @@ class TestAICorePerformanceAdvice(unittest.TestCase):
def test_ai_core_performance_total(self):
file_path = "kernel_details.csv"
- destination_csv_path = f"{TestAICorePerformanceAdvice.OUTPUT_DIR}/kernel_detail.csv"
+ destination_csv_path = f"{TestAICorePerformanceAdvice.OUTPUT_DIR}/kernel_details.csv"
self.copy_kernel_details(file_path)
interface = Interface(profiling_path=self.TMP_DIR)
dimension = Interface.COMPUTATION
@@ -66,10 +65,11 @@ class TestAICorePerformanceAdvice(unittest.TestCase):
result = interface.get_result(dimension, scope, render_html=1, output_dict=False, profiling_path=self.TMP_DIR)
# TODO 测试结果验证
result.clear()
+ self.clear_htmls()
def test_ai_core_performance_cube_operator(self):
file_path = "kernel_details_cube.csv"
- destination_csv_path = f"{TestAICorePerformanceAdvice.OUTPUT_DIR}/kernel_detail.csv"
+ destination_csv_path = f"{TestAICorePerformanceAdvice.OUTPUT_DIR}/kernel_details.csv"
self.copy_kernel_details(file_path)
interface = Interface(profiling_path=self.TMP_DIR)
dimension = Interface.COMPUTATION
@@ -77,10 +77,11 @@ class TestAICorePerformanceAdvice(unittest.TestCase):
result = interface.get_result(dimension, scope, render_html=1, output_dict=False, profiling_path=self.TMP_DIR)
# TODO 测试结果验证
result.clear()
+ self.clear_htmls()
def test_ai_core_performance_fa_operator(self):
file_path = "kernel_details_fa.csv"
- destination_csv_path = f"{TestAICorePerformanceAdvice.OUTPUT_DIR}/kernel_detail.csv"
+ destination_csv_path = f"{TestAICorePerformanceAdvice.OUTPUT_DIR}/kernel_details.csv"
self.copy_kernel_details(file_path)
interface = Interface(profiling_path=self.TMP_DIR)
dimension = Interface.COMPUTATION
@@ -88,11 +89,12 @@ class TestAICorePerformanceAdvice(unittest.TestCase):
result = interface.get_result(dimension, scope, render_html=1, output_dict=False, profiling_path=self.TMP_DIR)
# TODO 测试结果验证
result.clear()
+ self.clear_htmls()
def test_ai_core_performance_vector_operator(self):
file_path = "kernel_details_vector.csv"
- destination_csv_path = f"{TestAICorePerformanceAdvice.OUTPUT_DIR}/kernel_detail.csv"
+ destination_csv_path = f"{TestAICorePerformanceAdvice.OUTPUT_DIR}/kernel_details.csv"
self.copy_kernel_details(file_path)
interface = Interface(profiling_path=self.TMP_DIR)
dimension = Interface.COMPUTATION
@@ -100,3 +102,4 @@ class TestAICorePerformanceAdvice(unittest.TestCase):
result = interface.get_result(dimension, scope, render_html=1, output_dict=False, profiling_path=self.TMP_DIR)
# TODO 测试结果验证
result.clear()
+ self.clear_htmls()
--
Gitee
From b58e60ab5c76abf6624944253db4859c48aa06b9 Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Wed, 22 Jan 2025 14:52:22 +0800
Subject: [PATCH 37/72] =?UTF-8?q?UT=20=E9=97=AE=E9=A2=98=E4=BF=AE=E6=94=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../advisor/compute_advice/test_ai_core_performance_advice.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py b/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py
index ac0ba3807..7849391f1 100644
--- a/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py
+++ b/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py
@@ -42,7 +42,7 @@ class TestAICorePerformanceAdvice(unittest.TestCase):
def copy_kernel_details(cls,path):
# Define source and destination paths
source_csv_path = f"./data/{path}"
- destination_csv_path = f"{TestAICorePerformanceAdvice.OUTPUT_DIR}/kernel_detail.csv"
+ destination_csv_path = f"{TestAICorePerformanceAdvice.OUTPUT_DIR}/kernel_details.csv"
# Check if source CSV file exists
if not os.path.exists(source_csv_path):
--
Gitee
From 0abef7750282ad08013f1cde031cefe9fb996994 Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Wed, 22 Jan 2025 15:08:42 +0800
Subject: [PATCH 38/72] =?UTF-8?q?checker=20=E6=8A=A5=E9=94=99=E5=86=85?=
=?UTF-8?q?=E5=AE=B9=E4=BF=AE=E6=94=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../ai_core_performance_checker.py | 14 ++++++--------
1 file changed, 6 insertions(+), 8 deletions(-)
diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index ef360ee12..c28675e9f 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -138,27 +138,25 @@ class AICorePerformanceChecker:
"""
try:
self.result["cube"] = self.check_cube_operator(promoting_dataset)
- except (IndexError, ValueError, AttributeError):
- logger.error("Failed to check ai core performance, cube operator incorrect shapes value.")
+ except (IndexError, ValueError, AttributeError) as e:
+ logger.error(f"Failed to check ai core performance, {e}.")
self.result["cube"] = []
try:
self.result["fa"] = self.check_fa_operator(promoting_dataset)
- except (IndexError, ValueError, AttributeError):
- logger.error("Failed to check ai core performance, fa operator incorrect shapes value.")
+ except (IndexError, ValueError, AttributeError) as e:
+ logger.error(f"Failed to check ai core performance, {e}.")
self.result["fa"] = []
try:
self.result["vector"] = self.check_vector_operator(promoting_dataset)
- except (IndexError, ValueError, AttributeError):
- logger.error("Failed to check ai core performance, vector operator incorrect shapes value.")
+ except (IndexError, ValueError, AttributeError) as e:
+ logger.error(f"Failed to check ai core performance, {e}.")
self.result["vector"] = []
if not any([self.result["cube"], self.result["fa"], self.result["vector"]]):
self.ai_core_performance_issues = False
-
-
def check_cube_operator(self, profiling_dataset: ProfilingDataset):
cube_dict = self.cube_dict
optimization_queue = []
--
Gitee
From 6db14fa52598cd74eb7a8e0bb4ca31365b191d4c Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Wed, 22 Jan 2025 15:35:47 +0800
Subject: [PATCH 39/72] =?UTF-8?q?UT=E4=BF=AE=E6=94=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../test_ai_core_performance_advice.py | 42 +------------------
1 file changed, 2 insertions(+), 40 deletions(-)
diff --git a/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py b/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py
index 7849391f1..6ff49cad1 100644
--- a/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py
+++ b/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py
@@ -17,6 +17,7 @@ class TestAICorePerformanceAdvice(unittest.TestCase):
def tearDown(self):
if os.path.exists(TestAICorePerformanceAdvice.TMP_DIR):
shutil.rmtree(TestAICorePerformanceAdvice.TMP_DIR)
+ self.clear_htmls()
def setUp(self):
if os.path.exists(TestAICorePerformanceAdvice.TMP_DIR):
@@ -57,49 +58,10 @@ class TestAICorePerformanceAdvice(unittest.TestCase):
def test_ai_core_performance_total(self):
file_path = "kernel_details.csv"
- destination_csv_path = f"{TestAICorePerformanceAdvice.OUTPUT_DIR}/kernel_details.csv"
- self.copy_kernel_details(file_path)
- interface = Interface(profiling_path=self.TMP_DIR)
- dimension = Interface.COMPUTATION
- scope = SupportedScopes.AICORE_PERFORMANCE_ANALYSIS
- result = interface.get_result(dimension, scope, render_html=1, output_dict=False, profiling_path=self.TMP_DIR)
- # TODO 测试结果验证
- result.clear()
- self.clear_htmls()
-
- def test_ai_core_performance_cube_operator(self):
- file_path = "kernel_details_cube.csv"
- destination_csv_path = f"{TestAICorePerformanceAdvice.OUTPUT_DIR}/kernel_details.csv"
self.copy_kernel_details(file_path)
interface = Interface(profiling_path=self.TMP_DIR)
dimension = Interface.COMPUTATION
scope = SupportedScopes.AICORE_PERFORMANCE_ANALYSIS
result = interface.get_result(dimension, scope, render_html=1, output_dict=False, profiling_path=self.TMP_DIR)
# TODO 测试结果验证
- result.clear()
- self.clear_htmls()
-
- def test_ai_core_performance_fa_operator(self):
- file_path = "kernel_details_fa.csv"
- destination_csv_path = f"{TestAICorePerformanceAdvice.OUTPUT_DIR}/kernel_details.csv"
- self.copy_kernel_details(file_path)
- interface = Interface(profiling_path=self.TMP_DIR)
- dimension = Interface.COMPUTATION
- scope = SupportedScopes.AICORE_PERFORMANCE_ANALYSIS
- result = interface.get_result(dimension, scope, render_html=1, output_dict=False, profiling_path=self.TMP_DIR)
- # TODO 测试结果验证
- result.clear()
- self.clear_htmls()
-
-
- def test_ai_core_performance_vector_operator(self):
- file_path = "kernel_details_vector.csv"
- destination_csv_path = f"{TestAICorePerformanceAdvice.OUTPUT_DIR}/kernel_details.csv"
- self.copy_kernel_details(file_path)
- interface = Interface(profiling_path=self.TMP_DIR)
- dimension = Interface.COMPUTATION
- scope = SupportedScopes.AICORE_PERFORMANCE_ANALYSIS
- result = interface.get_result(dimension, scope, render_html=1, output_dict=False, profiling_path=self.TMP_DIR)
- # TODO 测试结果验证
- result.clear()
- self.clear_htmls()
+ result.clear()
\ No newline at end of file
--
Gitee
From 5a23b7934c3bf5579aa0a7bc9032f7722374c022 Mon Sep 17 00:00:00 2001
From: kiritorl
Date: Wed, 22 Jan 2025 15:37:09 +0800
Subject: [PATCH 40/72] =?UTF-8?q?=E5=8E=BB=E9=99=A4=E6=B2=A1=E6=9C=89?=
=?UTF-8?q?=E5=BB=BA=E8=AE=AE=E9=A1=B9=E6=97=B6=E7=9A=84=E7=A9=BA=E8=A1=A8?=
=?UTF-8?q?=E5=B1=95=E7=A4=BA?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../ai_core_performance_checker.py | 24 ++++++++++++-------
.../html/templates/ai_core_performance.html | 6 ++---
2 files changed, 19 insertions(+), 11 deletions(-)
diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index c28675e9f..a0e3fcd2b 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -434,15 +434,18 @@ class AICorePerformanceChecker:
for cube_opti_issue in self.result["cube"][0]:
opti_sugg = self._OPTI_SUGGESTION.format(**cube_opti_issue)
cube_desc["opti"] += opti_sugg
- result.add_detail(self._CUBE_PROBLEM, detail=[self._OPTI_DESC, cube_desc["opti"]])
+ if cube_desc["opti"]:
+ result.add_detail(self._CUBE_PROBLEM, detail=[self._OPTI_DESC, cube_desc["opti"]])
for cube_bound_issue in self.result["cube"][1]:
bound_sugg = self._BOUND_SUGGESTION.format(**cube_bound_issue)
cube_desc["bound"] += bound_sugg
- result.add_detail(self._CUBE_PROBLEM, detail=[self._BOUND_DESC, cube_desc["bound"]])
+ if cube_desc["bound"]:
+ result.add_detail(self._CUBE_PROBLEM, detail=[self._BOUND_DESC, cube_desc["bound"]])
for cube_affinity_issue in self.result["cube"][2]:
affinity_sugg = self._AFFINITY_SUGGESTION.format(**cube_affinity_issue)
cube_desc["affinity"] += affinity_sugg
- result.add_detail(self._CUBE_PROBLEM, detail=[self._AFFINITY_DESC, cube_desc["affinity"]])
+ if cube_desc["affinity"]:
+ result.add_detail(self._CUBE_PROBLEM, detail=[self._AFFINITY_DESC, cube_desc["affinity"]])
if self.result["fa"]:
optimization_item = OptimizeItem(self._FA_PROBLEM, self.desc, [self.suggestion])
@@ -455,15 +458,18 @@ class AICorePerformanceChecker:
for fa_opti_issue in self.result["fa"][0]:
opti_sugg = self._OPTI_SUGGESTION.format(**fa_opti_issue)
fa_desc["opti"] += opti_sugg
- result.add_detail(self._FA_PROBLEM, detail=[self._OPTI_DESC, fa_desc["opti"]])
+ if fa_desc["opti"]:
+ result.add_detail(self._FA_PROBLEM, detail=[self._OPTI_DESC, fa_desc["opti"]])
for fa_bound_issue in self.result["fa"][1]:
bound_sugg = self._BOUND_SUGGESTION.format(**fa_bound_issue)
fa_desc["bound"] += bound_sugg
- result.add_detail(self._FA_PROBLEM, detail=[self._BOUND_DESC, fa_desc["bound"]])
+ if fa_desc["bound"]:
+ result.add_detail(self._FA_PROBLEM, detail=[self._BOUND_DESC, fa_desc["bound"]])
for fa_affinity_issue in self.result["fa"][2]:
affinity_sugg = self._AFFINITY_SUGGESTION.format(**fa_affinity_issue)
fa_desc["affinity"] += affinity_sugg
- result.add_detail(self._FA_PROBLEM, detail=[self._AFFINITY_DESC, fa_desc["affinity"]])
+ if fa_desc["affinity"]:
+ result.add_detail(self._FA_PROBLEM, detail=[self._AFFINITY_DESC, fa_desc["affinity"]])
if self.result["vector"]:
optimization_item = OptimizeItem(self._VECTOR_PROBLEM, self.desc, [self.suggestion])
@@ -476,11 +482,13 @@ class AICorePerformanceChecker:
for vector_opti_issue in self.result["vector"][0]:
opti_sugg = self._OPTI_SUGGESTION.format(**vector_opti_issue)
vector_desc["opti"] += opti_sugg
- result.add_detail(self._VECTOR_PROBLEM, detail=[self._OPTI_DESC, vector_desc["opti"]])
+ if vector_desc["opti"]:
+ result.add_detail(self._VECTOR_PROBLEM, detail=[self._OPTI_DESC, vector_desc["opti"]])
for vector_bound_issue in self.result["vector"][1]:
bound_sugg = self._BOUND_SUGGESTION.format(**vector_bound_issue)
vector_desc["bound"] += bound_sugg
- result.add_detail(self._VECTOR_PROBLEM, detail=[self._BOUND_DESC, vector_desc["bound"]])
+ if vector_desc["bound"]:
+ result.add_detail(self._VECTOR_PROBLEM, detail=[self._BOUND_DESC, vector_desc["bound"]])
return True
def make_render(self, html_render, add_render_list=True, **kwargs):
diff --git a/profiler/advisor/display/html/templates/ai_core_performance.html b/profiler/advisor/display/html/templates/ai_core_performance.html
index 48e62ad6c..5bf133550 100644
--- a/profiler/advisor/display/html/templates/ai_core_performance.html
+++ b/profiler/advisor/display/html/templates/ai_core_performance.html
@@ -10,7 +10,7 @@
bound_set='set of bound operators', affinity_set='set of unaffine operators', opti_refer=' refer to Performance Optimization Space: ',
bound_refer=' bound type: ', affinity_refer=' type of disaffinity: ', title_desc=' Operator related analysis, referenced below: ') %}
{% endif %}
- {% if format_result.cube is not none %}
+ {% if format_result.cube|length > 0 %}
MatMul{{ title_ns.title_desc }}
{% endif %}
- {% if format_result.fa is not none %}
+ {% if format_result.fa|length > 0 %}
FA{{ title_ns.title_desc }}
{% endif %}
- {% if format_result.cube is not none %}
+ {% if format_result.vector|length > 0 %}
Vector{{ title_ns.title_desc }}
--
Gitee
From 5bc247663f11862050123ffe82c527d4de3fd55f Mon Sep 17 00:00:00 2001
From: kiritorl
Date: Wed, 22 Jan 2025 15:52:36 +0800
Subject: [PATCH 41/72] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dhtml=E7=A9=BA=E8=A1=A8?=
=?UTF-8?q?=E7=9A=84=E5=B1=95=E7=A4=BA?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../advisor/display/html/templates/ai_core_performance.html | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/profiler/advisor/display/html/templates/ai_core_performance.html b/profiler/advisor/display/html/templates/ai_core_performance.html
index 5bf133550..d5ab1a3fa 100644
--- a/profiler/advisor/display/html/templates/ai_core_performance.html
+++ b/profiler/advisor/display/html/templates/ai_core_performance.html
@@ -10,7 +10,7 @@
bound_set='set of bound operators', affinity_set='set of unaffine operators', opti_refer=' refer to Performance Optimization Space: ',
bound_refer=' bound type: ', affinity_refer=' type of disaffinity: ', title_desc=' Operator related analysis, referenced below: ') %}
{% endif %}
- {% if format_result.cube|length > 0 %}
+ {% if format_result.cube[0]|length + format_result.cube[1]|length + format_result.cube[2]|length > 0 %}
MatMul{{ title_ns.title_desc }}
{% endif %}
- {% if format_result.fa|length > 0 %}
+ {% if format_result.fa[0]|length + format_result.fa[1]|length + format_result.fa[2]|length > 0 %}
FA{{ title_ns.title_desc }}
{% endif %}
- {% if format_result.vector|length > 0 %}
+ {% if format_result.vector[0]|length + format_result.vector[1]|length > 0 %}
Vector{{ title_ns.title_desc }}
--
Gitee
From 1aefa6d6f6083327266c1563621b10f40e5ff839 Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Wed, 22 Jan 2025 16:01:41 +0800
Subject: [PATCH 42/72] =?UTF-8?q?checker=20optimization=20=E6=A0=BC?=
=?UTF-8?q?=E5=BC=8F=E4=BF=AE=E6=94=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../ai_core_performance/ai_core_performance_checker.py | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index c28675e9f..5e562349d 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -232,7 +232,7 @@ class AICorePerformanceChecker:
"op_name": op,
"shape": shape.split("-")[0],
"dtype": dtype,
- "optimization": max(0.8 - aic_mac_ratio, 0.95 - aic_mte2_ratio)})
+ "optimization": round(max(0.8 - aic_mac_ratio, 0.95 - aic_mte2_ratio) * 100, 2)})
return [sorted(optimization_queue, key=lambda x: x["optimization"], reverse=True)[:5],
sorted(bound_queue, key=lambda x: x["duration"], reverse=True)[:5],
sorted(affinity_queue, key=lambda x: x["duration"], reverse=True)[:5]]
@@ -349,7 +349,7 @@ class AICorePerformanceChecker:
"op_name": op,
"shape": shape.split("-")[0],
"dtype": dtype,
- "optimization": optimization})
+ "optimization": round(optimization * 100, 2)})
return [sorted(optimization_queue, key=lambda x: x["optimization"], reverse=True)[:5],
sorted(bound_queue, key=lambda x: x["duration"], reverse=True)[:5],
@@ -406,7 +406,7 @@ class AICorePerformanceChecker:
"op_name": op_name,
"shape": shape,
"dtype": dtype,
- "optimization": optimization})
+ "optimization": round(optimization * 100, 2)})
return [sorted(optimization_queue, key=lambda x: x["optimization"], reverse=True)[:5],
sorted(bound_queue, key=lambda x: x["duration"], reverse=True)[:5]]
--
Gitee
From 71c43f4f0208e155affa522ba2b57b9348ef28a6 Mon Sep 17 00:00:00 2001
From: kiritorl
Date: Wed, 22 Jan 2025 16:07:34 +0800
Subject: [PATCH 43/72] =?UTF-8?q?=E4=BC=98=E5=8C=96=E6=80=A7=E8=83=BD?=
=?UTF-8?q?=E7=A9=BA=E9=97=B4=E6=94=B9=E4=B8=BA=E7=99=BE=E5=88=86=E6=AF=94?=
=?UTF-8?q?=E5=B1=95=E7=A4=BA?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../html/templates/ai_core_performance.html | 32 +++++++++----------
.../advisor/rules/cn/aicore_performance.yaml | 2 +-
.../advisor/rules/en/aicore_performance.yaml | 2 +-
3 files changed, 18 insertions(+), 18 deletions(-)
diff --git a/profiler/advisor/display/html/templates/ai_core_performance.html b/profiler/advisor/display/html/templates/ai_core_performance.html
index d5ab1a3fa..77e5e0cb5 100644
--- a/profiler/advisor/display/html/templates/ai_core_performance.html
+++ b/profiler/advisor/display/html/templates/ai_core_performance.html
@@ -21,9 +21,9 @@
{% set opti_ns = namespace(total_opti='') %}
{% for opti in format_result.cube[0] %}
{% if not loop.first %}
- {% set opti_ns.total_opti = opti_ns.total_opti ~ "
" ~ opti.op_name ~ "operator shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ title_ns.opti_refer ~ opti.optimization %}
+ {% set opti_ns.total_opti = opti_ns.total_opti ~ "
" ~ opti.op_name ~ " operator shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ title_ns.opti_refer ~ opti.optimization ~ "%" %}
{% else %}
- {% set opti_ns.total_opti = opti.op_name ~ "operator shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ title_ns.opti_refer ~ opti.optimization %}
+ {% set opti_ns.total_opti = opti.op_name ~ " operator shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ title_ns.opti_refer ~ opti.optimization ~ "%" %}
{% endif %}
{% endfor %}
{% if opti_ns.total_opti|length > 0 %}
@@ -35,9 +35,9 @@
{% set bound_ns = namespace(total_bound='') %}
{% for bound in format_result.cube[1] %}
{% if not loop.first %}
- {% set bound_ns.total_bound = bound_ns.total_bound ~ "
" ~ bound.op_name ~ "operator shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ title_ns.bound_refer ~ bound.bound %}
+ {% set bound_ns.total_bound = bound_ns.total_bound ~ "
" ~ bound.op_name ~ " operator shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ title_ns.bound_refer ~ bound.bound %}
{% else %}
- {% set bound_ns.total_bound = bound.op_name ~ "operator shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ title_ns.bound_refer ~ bound.bound %}
+ {% set bound_ns.total_bound = bound.op_name ~ " operator shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ title_ns.bound_refer ~ bound.bound %}
{% endif %}
{% endfor %}
{% if bound_ns.total_bound|length > 0 %}
@@ -49,9 +49,9 @@
{% set affinity_ns = namespace(total_affinity='') %}
{% for affinity in format_result.cube[2] %}
{% if not loop.first %}
- {% set affinity_ns.total_affinity = affinity_ns.total_affinity ~ "
" ~ affinity.op_name ~ "operator shape: " ~ affinity.shape ~ " dtype: " ~ affinity.dtype ~ title_ns.affinity_refer ~ affinity.suggestion %}
+ {% set affinity_ns.total_affinity = affinity_ns.total_affinity ~ "
" ~ affinity.op_name ~ " operator shape: " ~ affinity.shape ~ " dtype: " ~ affinity.dtype ~ title_ns.affinity_refer ~ affinity.suggestion %}
{% else %}
- {% set affinity_ns.total_affinity = affinity.op_name ~ "operator shape: " ~ affinity.shape ~ " dtype: " ~ affinity.dtype ~ title_ns.affinity_refer ~ affinity.suggestion %}
+ {% set affinity_ns.total_affinity = affinity.op_name ~ " operator shape: " ~ affinity.shape ~ " dtype: " ~ affinity.dtype ~ title_ns.affinity_refer ~ affinity.suggestion %}
{% endif %}
{% endfor %}
{% if affinity_ns.total_affinity|length > 0 %}
@@ -74,9 +74,9 @@
{% set opti_ns = namespace(total_opti='') %}
{% for opti in format_result.fa[0] %}
{% if not loop.first %}
- {% set opti_ns.total_opti = opti_ns.total_opti ~ "
" ~ opti.op_name ~ "operator shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ title_ns.opti_refer ~ opti.optimization %}
+ {% set opti_ns.total_opti = opti_ns.total_opti ~ "
" ~ opti.op_name ~ " operator shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ title_ns.opti_refer ~ opti.optimization ~ "%" %}
{% else %}
- {% set opti_ns.total_opti = opti.op_name ~ "operator shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ title_ns.opti_refer ~ opti.optimization %}
+ {% set opti_ns.total_opti = opti.op_name ~ " operator shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ title_ns.opti_refer ~ opti.optimization ~ "%" %}
{% endif %}
{% endfor %}
{% if opti_ns.total_opti|length > 0 %}
@@ -88,9 +88,9 @@
{% set bound_ns = namespace(total_bound='') %}
{% for bound in format_result.fa[1] %}
{% if not loop.first %}
- {% set bound_ns.total_bound = bound_ns.total_bound ~ "
" ~ bound.op_name ~ "operator shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ title_ns.bound_refer ~ bound.bound %}
+ {% set bound_ns.total_bound = bound_ns.total_bound ~ "
" ~ bound.op_name ~ " operator shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ title_ns.bound_refer ~ bound.bound %}
{% else %}
- {% set bound_ns.total_bound = bound.op_name ~ "operator shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ title_ns.bound_refer ~ bound.bound %}
+ {% set bound_ns.total_bound = bound.op_name ~ " operator shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ title_ns.bound_refer ~ bound.bound %}
{% endif %}
{% endfor %}
{% if bound_ns.total_bound|length > 0 %}
@@ -102,9 +102,9 @@
{% set affinity_ns = namespace(total_affinity='') %}
{% for affinity in format_result.fa[2] %}
{% if not loop.first %}
- {% set affinity_ns.total_affinity = affinity_ns.total_affinity ~ "
" ~ affinity.op_name ~ "operator shape: " ~ affinity.shape ~ " dtype: " ~ affinity.dtype ~ title_ns.affinity_refer ~ affinity.suggestion %}
+ {% set affinity_ns.total_affinity = affinity_ns.total_affinity ~ "
" ~ affinity.op_name ~ " operator shape: " ~ affinity.shape ~ " dtype: " ~ affinity.dtype ~ title_ns.affinity_refer ~ affinity.suggestion %}
{% else %}
- {% set affinity_ns.total_affinity = affinity.op_name ~ "operator shape: " ~ affinity.shape ~ " dtype: " ~ affinity.dtype ~ title_ns.affinity_refer ~ affinity.suggestion %}
+ {% set affinity_ns.total_affinity = affinity.op_name ~ " operator shape: " ~ affinity.shape ~ " dtype: " ~ affinity.dtype ~ title_ns.affinity_refer ~ affinity.suggestion %}
{% endif %}
{% endfor %}
{% if affinity_ns.total_affinity|length > 0 %}
@@ -127,9 +127,9 @@
{% set opti_ns = namespace(total_opti='') %}
{% for opti in format_result.vector[0] %}
{% if not loop.first %}
- {% set opti_ns.total_opti = opti_ns.total_opti ~ "
" ~ opti.op_name ~ "operator shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ title_ns.opti_refer ~ opti.optimization %}
+ {% set opti_ns.total_opti = opti_ns.total_opti ~ "
" ~ opti.op_name ~ " operator shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ title_ns.opti_refer ~ opti.optimization ~ "%" %}
{% else %}
- {% set opti_ns.total_opti = opti.op_name ~ "operator shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ title_ns.opti_refer ~ opti.optimization %}
+ {% set opti_ns.total_opti = opti.op_name ~ " operator shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ title_ns.opti_refer ~ opti.optimization ~ "%" %}
{% endif %}
{% endfor %}
{% if opti_ns.total_opti|length > 0 %}
@@ -141,9 +141,9 @@
{% set bound_ns = namespace(total_bound='') %}
{% for bound in format_result.vector[1] %}
{% if not loop.first %}
- {% set bound_ns.total_bound = bound_ns.total_bound ~ "
" ~ bound.op_name ~ "operator shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ title_ns.bound_refer ~ bound.bound %}
+ {% set bound_ns.total_bound = bound_ns.total_bound ~ "
" ~ bound.op_name ~ " operator shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ title_ns.bound_refer ~ bound.bound %}
{% else %}
- {% set bound_ns.total_bound = bound.op_name ~ "operator shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ title_ns.bound_refer ~ bound.bound %}
+ {% set bound_ns.total_bound = bound.op_name ~ " operator shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ title_ns.bound_refer ~ bound.bound %}
{% endif %}
{% endfor %}
{% if bound_ns.total_bound|length > 0 %}
diff --git a/profiler/advisor/rules/cn/aicore_performance.yaml b/profiler/advisor/rules/cn/aicore_performance.yaml
index f00f0a4b7..8d44aaab2 100644
--- a/profiler/advisor/rules/cn/aicore_performance.yaml
+++ b/profiler/advisor/rules/cn/aicore_performance.yaml
@@ -12,4 +12,4 @@ fa_affinity_desc_type3: "D和S均不能被128整除"
suggestion: "请根据亲和性、bound类型或优化空间尝试分析筛选出来的算子"
affinity_suggestion: "{op_name}算子 shape: {shape} dtype: {dtype} 有不亲和特征: {suggestion}\n"
bound_suggestion: "{op_name}算子 shape: {shape} dtype: {dtype} bound类型为: {bound} bound\n"
-optimization_suggestion: "{op_name}算子 shape: {shape} dtype: {dtype} 疑似有性能优化空间,参考性能优化空间: {optimization}\n"
\ No newline at end of file
+optimization_suggestion: "{op_name}算子 shape: {shape} dtype: {dtype} 疑似有性能优化空间,参考性能优化空间: {optimization}%\n"
\ No newline at end of file
diff --git a/profiler/advisor/rules/en/aicore_performance.yaml b/profiler/advisor/rules/en/aicore_performance.yaml
index 28f52f1ed..e85a919ab 100644
--- a/profiler/advisor/rules/en/aicore_performance.yaml
+++ b/profiler/advisor/rules/en/aicore_performance.yaml
@@ -12,4 +12,4 @@ fa_affinity_desc_type3: "Neither D nor S is not divisible by 128"
suggestion: "Please try to analyze the filtered operators based on affinity, bound type or optimization space"
affinity_suggestion: "{op_name} Op shape: {shape} dtype: {dtype} with disaffection characteristics: {suggestion}\n"
bound_suggestion: "{op_name} Op shape: {shape} dtype: {dtype} bound type: {bound} bound\n"
-optimization_suggestion: "{op_name} Op shape: {shape} dtype: {dtype} suspect there is room for performance optimization, refer to Performance Optimization Space: {optimization}\n"
\ No newline at end of file
+optimization_suggestion: "{op_name} Op shape: {shape} dtype: {dtype} suspect there is room for performance optimization, refer to Performance Optimization Space: {optimization}%\n"
\ No newline at end of file
--
Gitee
From 2f5acce4e8ab4e9402d4924a706fd46465a2c491 Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Wed, 22 Jan 2025 16:55:30 +0800
Subject: [PATCH 44/72] =?UTF-8?q?=E9=97=AE=E9=A2=98=E4=BF=AE=E5=A4=8D?=
=?UTF-8?q?=E4=B8=8EUT=E4=BF=AE=E6=94=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../ai_core_performance/ai_core_performance_checker.py | 2 +-
.../advisor/compute_advice/test_ai_core_performance_advice.py | 4 +++-
2 files changed, 4 insertions(+), 2 deletions(-)
diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index 270c43d41..0ef45f52c 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -252,7 +252,7 @@ class AICorePerformanceChecker:
suggestion = ""
if "varlen" in op.lower():
# 处理变长算子 如果不亲和则affinity_flag为False
- if shape.split("-")[0].split(";")[0].split(",")[2] % 128 != 0:
+ if int(shape.split("-")[0].split(";")[0].split(",")[2]) % 128 != 0:
affinity_flag = True
suggestion = self._FA_AFFINITY_DESC_TYPE1
for operator in fa_list:
diff --git a/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py b/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py
index 6ff49cad1..922d4b4c0 100644
--- a/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py
+++ b/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py
@@ -63,5 +63,7 @@ class TestAICorePerformanceAdvice(unittest.TestCase):
dimension = Interface.COMPUTATION
scope = SupportedScopes.AICORE_PERFORMANCE_ANALYSIS
result = interface.get_result(dimension, scope, render_html=1, output_dict=False, profiling_path=self.TMP_DIR)
- # TODO 测试结果验证
+ self.assertLess(1,len(result.data.get("Cube算子性能分析").get("data")[0]))
+ self.assertLess(1,len(result.data.get("Cube算子性能分析").get("data")[1]))
+ self.assertLess(1,len(result.data.get("Cube算子性能分析").get("data")[2]))
result.clear()
\ No newline at end of file
--
Gitee
From d98ef93f50206a0fd3142d49c32eee92a380c394 Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Wed, 22 Jan 2025 17:26:53 +0800
Subject: [PATCH 45/72] =?UTF-8?q?=E9=97=AE=E9=A2=98=E4=BF=AE=E5=A4=8D?=
=?UTF-8?q?=E4=B8=8EUT=E4=BF=AE=E6=94=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../ai_core_performance/ai_core_performance_checker.py | 4 ++--
.../compute_advice/test_ai_core_performance_advice.py | 5 +++++
2 files changed, 7 insertions(+), 2 deletions(-)
diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index 0ef45f52c..09bf91349 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -397,14 +397,14 @@ class AICorePerformanceChecker:
if bound:
bound_queue.append({
"op_name": op_name,
- "shape": shape,
+ "shape": shape.split("-")[0],
"bound": bound,
"dtype": dtype,
"duration": shape_duration})
else:
optimization_queue.append({
"op_name": op_name,
- "shape": shape,
+ "shape": shape.split("-")[0],
"dtype": dtype,
"optimization": round(optimization * 100, 2)})
return [sorted(optimization_queue, key=lambda x: x["optimization"], reverse=True)[:5],
diff --git a/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py b/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py
index 922d4b4c0..40fa81837 100644
--- a/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py
+++ b/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py
@@ -66,4 +66,9 @@ class TestAICorePerformanceAdvice(unittest.TestCase):
self.assertLess(1,len(result.data.get("Cube算子性能分析").get("data")[0]))
self.assertLess(1,len(result.data.get("Cube算子性能分析").get("data")[1]))
self.assertLess(1,len(result.data.get("Cube算子性能分析").get("data")[2]))
+ self.assertLess(1,len(result.data.get("Fa算子性能分析").get("data")[0]))
+ self.assertLess(1,len(result.data.get("Fa算子性能分析").get("data")[1]))
+ self.assertLess(1,len(result.data.get("Fa算子性能分析").get("data")[2]))
+ self.assertLess(1,len(result.data.get("Vector算子性能分析").get("data")[0]))
+ self.assertLess(1,len(result.data.get("Vector算子性能分析").get("data")[1]))
result.clear()
\ No newline at end of file
--
Gitee
From a8e535fddd557ddac6f1cd7cd01aa7fa06b1cd92 Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Wed, 22 Jan 2025 17:31:09 +0800
Subject: [PATCH 46/72] =?UTF-8?q?UT=E4=BF=AE=E6=94=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../test_ai_core_performance_advice.py | 20 +++++++++----------
1 file changed, 10 insertions(+), 10 deletions(-)
diff --git a/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py b/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py
index 40fa81837..61ae35d13 100644
--- a/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py
+++ b/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py
@@ -40,7 +40,7 @@ class TestAICorePerformanceAdvice(unittest.TestCase):
os.remove(file_path)
@classmethod
- def copy_kernel_details(cls,path):
+ def copy_kernel_details(cls, path):
# Define source and destination paths
source_csv_path = f"./data/{path}"
destination_csv_path = f"{TestAICorePerformanceAdvice.OUTPUT_DIR}/kernel_details.csv"
@@ -63,12 +63,12 @@ class TestAICorePerformanceAdvice(unittest.TestCase):
dimension = Interface.COMPUTATION
scope = SupportedScopes.AICORE_PERFORMANCE_ANALYSIS
result = interface.get_result(dimension, scope, render_html=1, output_dict=False, profiling_path=self.TMP_DIR)
- self.assertLess(1,len(result.data.get("Cube算子性能分析").get("data")[0]))
- self.assertLess(1,len(result.data.get("Cube算子性能分析").get("data")[1]))
- self.assertLess(1,len(result.data.get("Cube算子性能分析").get("data")[2]))
- self.assertLess(1,len(result.data.get("Fa算子性能分析").get("data")[0]))
- self.assertLess(1,len(result.data.get("Fa算子性能分析").get("data")[1]))
- self.assertLess(1,len(result.data.get("Fa算子性能分析").get("data")[2]))
- self.assertLess(1,len(result.data.get("Vector算子性能分析").get("data")[0]))
- self.assertLess(1,len(result.data.get("Vector算子性能分析").get("data")[1]))
- result.clear()
\ No newline at end of file
+ self.assertLess(1, len(result.data.get("Cube算子性能分析").get("data")[0]))
+ self.assertLess(1, len(result.data.get("Cube算子性能分析").get("data")[1]))
+ self.assertLess(1, len(result.data.get("Cube算子性能分析").get("data")[2]))
+ self.assertLess(1, len(result.data.get("FA算子性能分析").get("data")[0]))
+ self.assertLess(1, len(result.data.get("FA算子性能分析").get("data")[1]))
+ self.assertLess(1, len(result.data.get("FA算子性能分析").get("data")[2]))
+ self.assertLess(1, len(result.data.get("Vector算子性能分析").get("data")[0]))
+ self.assertLess(1, len(result.data.get("Vector算子性能分析").get("data")[1]))
+ result.clear()
--
Gitee
From 6440ccc765205eb29310a30cfb1e817831306736 Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Thu, 23 Jan 2025 11:21:09 +0800
Subject: [PATCH 47/72] =?UTF-8?q?=E9=97=AE=E9=A2=98=E4=BF=AE=E5=A4=8D?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../ai_core_performance_checker.py | 47 ++++++++++++-------
1 file changed, 30 insertions(+), 17 deletions(-)
diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index 09bf91349..3175168e4 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -75,10 +75,12 @@ class AICorePerformanceChecker:
def data_filter(self, profiling_dataset: ProfilingDataset):
if not self.check_task_list(profiling_dataset):
return
+
operator_list = profiling_dataset.op_summary.op_list
total_duration = sum(float(operator.task_duration) for operator in operator_list)
cube_memory_dict = {}
vector_type_dict = {}
+
# filter cube operator and fa operator
for op in operator_list:
shapes = op.input_shapes[1:-1] + "-" + op.output_shapes[1:-1]
@@ -204,8 +206,10 @@ class AICorePerformanceChecker:
operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape]
shape_duration = sum(float(operator.task_duration) for operator in shap_list)
dtype = shap_list[0].input_data_types if shap_list else None
- aic_mac_ratio = sum(float(operator.aic_mac_ratio) for operator in shap_list) / len(shap_list)
- aic_mte2_ratio = sum(float(operator.aic_mte2_ratio) for operator in shap_list) / len(shap_list)
+ aic_mac_ratio = sum(float(operator.aic_mac_ratio) for operator in shap_list
+ if operator.aic_mac_ratio != "N/A") / len(shap_list)
+ aic_mte2_ratio = sum(float(operator.aic_mte2_ratio) for operator in shap_list
+ if operator.aic_mac_ratio != "N/A") / len(shap_list)
if aic_mac_ratio >= 0.8 and aic_mte2_ratio >= 0.95:
bound_queue.append({
"op_name": op,
@@ -304,11 +308,14 @@ class AICorePerformanceChecker:
if (operator.op_name == op and
operator.input_shapes[1:-1] + "-" +
operator.output_shapes[1:-1] + "-grad" == shape):
- aic_fixpipe_ratio += float(operator.aic_fixpipe_ratio)
- aic_mte2_ratio += float(operator.aic_mte2_ratio)
- shape_duration += float(operator.task_duration)
- dtype = operator.input_data_types
- length += 1
+ try:
+ aic_fixpipe_ratio += float(operator.aic_fixpipe_ratio)
+ aic_mte2_ratio += float(operator.aic_mte2_ratio)
+ shape_duration += float(operator.task_duration)
+ dtype = operator.input_data_types
+ length += 1
+ except ValueError:
+ continue
aic_fixpipe_ratio = aic_fixpipe_ratio / length
aic_mte2_ratio = aic_mte2_ratio / length
if aic_mte2_ratio >= 0.8 and aic_fixpipe_ratio >= 0.75:
@@ -323,10 +330,13 @@ class AICorePerformanceChecker:
for operator in fa_list:
if (operator.op_name == op and
operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape):
- aiv_vec_ratio += float(operator.aiv_vec_ratio)
- aic_mte2_ratio += float(operator.aic_mte2_ratio)
- shape_duration += float(operator.task_duration)
- length += 1
+ try:
+ aiv_vec_ratio += float(operator.aiv_vec_ratio)
+ aic_mte2_ratio += float(operator.aic_mte2_ratio)
+ shape_duration += float(operator.task_duration)
+ length += 1
+ except ValueError:
+ continue
aiv_vec_ratio = aiv_vec_ratio / length
aic_mte2_ratio = aic_mte2_ratio / length
if aic_mte2_ratio >= 0.8 and aiv_vec_ratio >= 0.75:
@@ -375,12 +385,15 @@ class AICorePerformanceChecker:
for operator in vector_list:
if (operator.op_name == op_name and
operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape):
- aiv_vec_ratio += float(operator.aiv_vec_ratio)
- aiv_mte2_ratio += float(operator.aiv_mte2_ratio)
- aiv_mte3_ratio += float(operator.aiv_mte3_ratio)
- shape_duration += float(operator.task_duration)
- dtype = operator.input_data_types
- length += 1
+ try:
+ aiv_vec_ratio += float(operator.aiv_vec_ratio)
+ aiv_mte2_ratio += float(operator.aiv_mte2_ratio)
+ aiv_mte3_ratio += float(operator.aiv_mte3_ratio)
+ shape_duration += float(operator.task_duration)
+ dtype = operator.input_data_types
+ length += 1
+ except ValueError:
+ continue
aiv_vec_ratio = aiv_vec_ratio / length
aiv_mte2_ratio = aiv_mte2_ratio / length
aiv_mte2_ratio = aiv_mte2_ratio / length
--
Gitee
From 43932f3a4f6f6362d44185f8b90eb76c0f521f02 Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Thu, 23 Jan 2025 15:50:59 +0800
Subject: [PATCH 48/72] =?UTF-8?q?=E9=97=AE=E9=A2=98=E4=BF=AE=E5=A4=8D?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../ai_core_performance_checker.py | 29 ++++++++++++++-----
1 file changed, 22 insertions(+), 7 deletions(-)
diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index 3175168e4..8964e93c4 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -141,19 +141,19 @@ class AICorePerformanceChecker:
try:
self.result["cube"] = self.check_cube_operator(promoting_dataset)
except (IndexError, ValueError, AttributeError) as e:
- logger.error(f"Failed to check ai core performance, {e}.")
+ logger.error(f"Failed to check ai core performance cube operator, {e}.")
self.result["cube"] = []
try:
self.result["fa"] = self.check_fa_operator(promoting_dataset)
except (IndexError, ValueError, AttributeError) as e:
- logger.error(f"Failed to check ai core performance, {e}.")
+ logger.error(f"Failed to check ai core performance fa operator, {e}.")
self.result["fa"] = []
try:
self.result["vector"] = self.check_vector_operator(promoting_dataset)
except (IndexError, ValueError, AttributeError) as e:
- logger.error(f"Failed to check ai core performance, {e}.")
+ logger.error(f"Failed to check ai core performance vector operator, {e}.")
self.result["vector"] = []
if not any([self.result["cube"], self.result["fa"], self.result["vector"]]):
@@ -206,10 +206,19 @@ class AICorePerformanceChecker:
operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape]
shape_duration = sum(float(operator.task_duration) for operator in shap_list)
dtype = shap_list[0].input_data_types if shap_list else None
- aic_mac_ratio = sum(float(operator.aic_mac_ratio) for operator in shap_list
- if operator.aic_mac_ratio != "N/A") / len(shap_list)
- aic_mte2_ratio = sum(float(operator.aic_mte2_ratio) for operator in shap_list
- if operator.aic_mac_ratio != "N/A") / len(shap_list)
+ aic_mac_ratio, aic_mte2_ratio = 0., 0.
+ length = 0
+ for operator in shap_list:
+ try:
+ aic_mac_ratio += float(operator.aic_mac_ratio)
+ aic_mte2_ratio += float(operator.aic_mte2_ratio)
+ length += 1
+ except ValueError:
+ continue
+ if length == 0:
+ continue
+ aic_mac_ratio = aic_mac_ratio / length
+ aic_mte2_ratio = aic_mte2_ratio / length
if aic_mac_ratio >= 0.8 and aic_mte2_ratio >= 0.95:
bound_queue.append({
"op_name": op,
@@ -316,6 +325,8 @@ class AICorePerformanceChecker:
length += 1
except ValueError:
continue
+ if length == 0:
+ continue
aic_fixpipe_ratio = aic_fixpipe_ratio / length
aic_mte2_ratio = aic_mte2_ratio / length
if aic_mte2_ratio >= 0.8 and aic_fixpipe_ratio >= 0.75:
@@ -337,6 +348,8 @@ class AICorePerformanceChecker:
length += 1
except ValueError:
continue
+ if length == 0:
+ continue
aiv_vec_ratio = aiv_vec_ratio / length
aic_mte2_ratio = aic_mte2_ratio / length
if aic_mte2_ratio >= 0.8 and aiv_vec_ratio >= 0.75:
@@ -394,6 +407,8 @@ class AICorePerformanceChecker:
length += 1
except ValueError:
continue
+ if length == 0:
+ continue
aiv_vec_ratio = aiv_vec_ratio / length
aiv_mte2_ratio = aiv_mte2_ratio / length
aiv_mte2_ratio = aiv_mte2_ratio / length
--
Gitee
From 1d1394fe9e2d48e36ea882112f8112ec5ed8cf41 Mon Sep 17 00:00:00 2001
From: kiritorl
Date: Wed, 5 Feb 2025 17:14:35 +0800
Subject: [PATCH 49/72] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E6=89=93=E5=B1=8F?=
=?UTF-8?q?=E5=92=8C=E8=A1=A8=E6=A0=BC=E9=87=8C=E6=98=BE=E7=A4=BA=E7=A9=BA?=
=?UTF-8?q?=E6=95=B0=E6=8D=AE=E7=9A=84=E9=97=AE=E9=A2=98?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../ai_core_performance/ai_core_performance_checker.py | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index 270c43d41..32bd1da84 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -423,7 +423,7 @@ class AICorePerformanceChecker:
cube_desc = dict.fromkeys(suggestion_keys, "")
fa_desc = dict.fromkeys(suggestion_keys, "")
vector_desc = dict.fromkeys(suggestion_keys, "")
- if self.result["cube"]:
+ if any(self.result["cube"]):
optimization_item = OptimizeItem(self._CUBE_PROBLEM, self.desc, [self.suggestion])
result.add(OptimizeRecord(optimization_item))
headers = [
@@ -447,7 +447,7 @@ class AICorePerformanceChecker:
if cube_desc["affinity"]:
result.add_detail(self._CUBE_PROBLEM, detail=[self._AFFINITY_DESC, cube_desc["affinity"]])
- if self.result["fa"]:
+ if any(self.result["fa"]):
optimization_item = OptimizeItem(self._FA_PROBLEM, self.desc, [self.suggestion])
result.add(OptimizeRecord(optimization_item))
headers = [
@@ -471,7 +471,7 @@ class AICorePerformanceChecker:
if fa_desc["affinity"]:
result.add_detail(self._FA_PROBLEM, detail=[self._AFFINITY_DESC, fa_desc["affinity"]])
- if self.result["vector"]:
+ if any(self.result["vector"]):
optimization_item = OptimizeItem(self._VECTOR_PROBLEM, self.desc, [self.suggestion])
result.add(OptimizeRecord(optimization_item))
headers = [
--
Gitee
From e2c29e98910cd5d97a78d77df07a684033f8ed27 Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Thu, 6 Feb 2025 14:46:57 +0800
Subject: [PATCH 50/72] =?UTF-8?q?=E9=97=AE=E9=A2=98=E4=BF=AE=E5=A4=8D?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../ai_core_performance/ai_core_performance_checker.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index 90d7daa09..3316cf43d 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -276,7 +276,7 @@ class AICorePerformanceChecker:
else:
# 处理定长算子 如果不亲和则affinity_flag为False
head_dim = 0
- seq_len = int(shape.split("-")[1].split(";")[1].split(",")[2])
+ seq_len = int(shape.split("-")[1].split(";")[0].split(",")[2])
input_first_tensor = shape.split("-")[0].split(";")[0].split(",")
if len(input_first_tensor) == 3:
head_dim = int(input_first_tensor[2]) / int(shape.split("-")[1].split(";")[0].split(",")[1])
--
Gitee
From a22dbe4b438ace270e8e84a25f4e0a039fe7d717 Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Thu, 6 Feb 2025 14:55:54 +0800
Subject: [PATCH 51/72] =?UTF-8?q?=E4=BB=A3=E7=A0=81=E4=BC=98=E5=8C=96?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../ai_core_performance_checker.py | 24 ++++++++-----------
1 file changed, 10 insertions(+), 14 deletions(-)
diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index 3316cf43d..9fd7f9a7f 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -219,33 +219,29 @@ class AICorePerformanceChecker:
continue
aic_mac_ratio = aic_mac_ratio / length
aic_mte2_ratio = aic_mte2_ratio / length
+ bound = ""
+ optimization = 0.
if aic_mac_ratio >= 0.8 and aic_mte2_ratio >= 0.95:
- bound_queue.append({
- "op_name": op,
- "shape": shape.split("-")[0],
- "dtype": dtype,
- "bound": "mac_and_mte2_bound",
- "duration": shape_duration})
+ bound = "mac_and_mte2_bound"
elif aic_mac_ratio >= 0.8:
- bound_queue.append({
- "op_name": op,
- "shape": shape.split("-")[0],
- "dtype": dtype,
- "bound": "mac_bound",
- "duration": shape_duration})
+ bound = "mac_bound"
elif aic_mte2_ratio >= 0.95:
+ bound = "mte2_bound"
+ else:
+ optimization = round(max(0.8 - aic_mac_ratio, 0.95 - aic_mte2_ratio) * 100, 2)
+ if bound:
bound_queue.append({
"op_name": op,
"shape": shape.split("-")[0],
"dtype": dtype,
- "bound": "mte2_bound",
+ "bound": bound,
"duration": shape_duration})
else:
optimization_queue.append({
"op_name": op,
"shape": shape.split("-")[0],
"dtype": dtype,
- "optimization": round(max(0.8 - aic_mac_ratio, 0.95 - aic_mte2_ratio) * 100, 2)})
+ "optimization": optimization})
return [sorted(optimization_queue, key=lambda x: x["optimization"], reverse=True)[:5],
sorted(bound_queue, key=lambda x: x["duration"], reverse=True)[:5],
sorted(affinity_queue, key=lambda x: x["duration"], reverse=True)[:5]]
--
Gitee
From b455912d39ba8251100b4a6eeb06fcc2dcd1aa36 Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Thu, 6 Feb 2025 15:02:29 +0800
Subject: [PATCH 52/72] =?UTF-8?q?=E4=BB=A3=E7=A0=81=E4=BC=98=E5=8C=96?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../ai_core_performance/ai_core_performance_checker.py | 3 ---
1 file changed, 3 deletions(-)
diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index 9fd7f9a7f..5aecaaf03 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -1,7 +1,6 @@
# Copyright (c) 2024, Huawei Technologies Co., Ltd.
# All rights reserved.
#
-# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
@@ -434,8 +433,6 @@ class AICorePerformanceChecker:
return [sorted(optimization_queue, key=lambda x: x["optimization"], reverse=True)[:5],
sorted(bound_queue, key=lambda x: x["duration"], reverse=True)[:5]]
- pass
-
def make_record(self, result: OptimizeResult):
"""
make record for what and how to optimize
--
Gitee
From d7d4c7c2d66f8018a4dcf96ecd4a3c89b15a9d6c Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Thu, 6 Feb 2025 16:34:24 +0800
Subject: [PATCH 53/72] =?UTF-8?q?=E4=BB=A3=E7=A0=81=E4=BC=98=E5=8C=96?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../ai_core_performance_checker.py | 29 ++++++++++---------
1 file changed, 15 insertions(+), 14 deletions(-)
diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index 5aecaaf03..b1d14ef58 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -13,6 +13,7 @@
# limitations under the License.
import logging
import os
+from functools import reduce
from profiler.advisor.dataset.profiling.profiling_dataset import ProfilingDataset
from profiler.advisor.result.item import OptimizeItem, OptimizeRecord
@@ -80,20 +81,26 @@ class AICorePerformanceChecker:
cube_memory_dict = {}
vector_type_dict = {}
- # filter cube operator and fa operator
for op in operator_list:
shapes = op.input_shapes[1:-1] + "-" + op.output_shapes[1:-1]
+ # preliminary filter cube operator
if op.task_type == "AI_CORE" and "matmul" in op.op_type.lower():
cube_memory_dict.setdefault(op.op_name, {}).setdefault(shapes, 0)
cube_memory_dict[op.op_name][shapes] += self.memory_size(op)
- elif op.op_type == "FlashAttentionScore":
+ continue
+
+ # preliminary filter vector operator
+ if op.task_type in ["AI_VECTOR_CORE", "MIX_AIV"]:
+ vector_type_dict.setdefault(op.op_type, set()).add(op)
+ continue
+
+ # filter fa operator
+ if op.op_type == "FlashAttentionScore":
self.fa_dict.setdefault(op.op_name, set()).add(shapes)
self.fa_list.append(op)
elif op.op_type == "FlashAttentionScoreGrad":
self.fa_dict.setdefault(op.op_name, set()).add(shapes + "-grad")
self.fa_list.append(op)
- elif op.task_type in ["AI_VECTOR_CORE", "MIX_AIV"]:
- vector_type_dict.setdefault(op.op_type, set()).add(op)
# filter cube operator
for op_name in cube_memory_dict:
@@ -116,21 +123,15 @@ class AICorePerformanceChecker:
def memory_size(operator):
memory = 0
input_shapes = operator.input_shapes[1:-1].split(";")
+ output_shapes = operator.output_shapes[1:-1]
for shapes in input_shapes:
if not "," in shapes and shapes != "":
# 多的一维是 bias ,预先乘2
memory += int(shapes) * 2
continue
- start = 1
- for shape in shapes.split(","):
- start *= int(shape)
- memory += start
-
- output_shape = operator.output_shapes[1:-1].split(",")
- start = 1
- for shapes in output_shape:
- start *= int(shapes)
- memory += int(start)
+ memory += reduce(lambda x, y: x*y, map(int, shapes.split(",")))
+ memory += reduce(lambda x, y: x * y, map(int, output_shapes.split(",")))
+
return memory * 2 / 1024 / 1024
def check_ai_core_performance(self, promoting_dataset: ProfilingDataset):
--
Gitee
From 4a6625cc7f8a4c132439d7efbf1d0af71bc066d1 Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Thu, 6 Feb 2025 16:35:08 +0800
Subject: [PATCH 54/72] =?UTF-8?q?=E4=BB=A3=E7=A0=81=E4=BC=98=E5=8C=96?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../ai_core_performance/ai_core_performance_checker.py | 1 +
1 file changed, 1 insertion(+)
diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index b1d14ef58..a3622ebdf 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -1,6 +1,7 @@
# Copyright (c) 2024, Huawei Technologies Co., Ltd.
# All rights reserved.
#
+# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
--
Gitee
From 3f0b15adb831e6d468f738a318417796a71b1c73 Mon Sep 17 00:00:00 2001
From: xubanxia <11655130+xubanxia@user.noreply.gitee.com>
Date: Tue, 11 Feb 2025 03:01:59 +0000
Subject: [PATCH 55/72] =?UTF-8?q?=E5=88=A0=E9=99=A4=E6=96=87=E4=BB=B6=20pr?=
=?UTF-8?q?ofiler/cli/entrance.py?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
profiler/cli/entrance.py | 75 ----------------------------------------
1 file changed, 75 deletions(-)
delete mode 100644 profiler/cli/entrance.py
diff --git a/profiler/cli/entrance.py b/profiler/cli/entrance.py
deleted file mode 100644
index 89ac8187d..000000000
--- a/profiler/cli/entrance.py
+++ /dev/null
@@ -1,75 +0,0 @@
-#!/usr/bin/python
-# -*- coding: utf-8 -*-
-# Copyright (c) 2024, Huawei Technologies Co., Ltd.
-# All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import logging
-import click
-
-from profiler.cli.analyze_cli import analyze_cli
-from profiler.cli.complete_cli import auto_complete_cli
-from profiler.cli.compare_cli import compare_cli
-from profiler.cli.cluster_cli import cluster_cli
-from profiler.advisor.version import print_version_callback, cli_version
-
-logger = logging.getLogger()
-CONTEXT_SETTINGS = dict(help_option_names=['-H', '-h', '--help'],
- max_content_width=160)
-
-COMMAND_PRIORITY = {
- "advisor": 1,
- "compare": 2,
- "cluster": 3,
- "auto-completion": 4
-}
-
-
-class SpecialHelpOrder(click.Group):
-
- def __init__(self, *args, **kwargs):
- super(SpecialHelpOrder, self).__init__(*args, **kwargs)
-
- def list_commands_for_help(self, ctx):
- """
- reorder the list of commands when listing the help
- """
- commands = super(SpecialHelpOrder, self).list_commands(ctx)
- return [item[1] for item in sorted((COMMAND_PRIORITY.get(command, float('INF')),
- command) for command in commands)]
-
- def get_help(self, ctx):
- self.list_commands = self.list_commands_for_help
- return super(SpecialHelpOrder, self).get_help(ctx)
-
-
-@click.group(context_settings=CONTEXT_SETTINGS, cls=SpecialHelpOrder)
-@click.option('--version', '-V', '-v', is_flag=True,
- callback=print_version_callback, expose_value=False,
- is_eager=True, help=cli_version())
-def msprof_analyze_cli(**kwargs):
- pass
-
-
-msprof_analyze_cli.add_command(analyze_cli, name="advisor")
-msprof_analyze_cli.add_command(compare_cli, name="compare")
-msprof_analyze_cli.add_command(cluster_cli, name="cluster")
-msprof_analyze_cli.add_command(auto_complete_cli, name="auto-completion")
-
-if __name__ == "__main__":
- msprof_analyze_cli.main(
- [
- "analyze","all","-d",
- r"D:\data\file","-l","cn"
- ]
- )
--
Gitee
From 353723271a2e967a4d9e4495db343b9b4aef2cc0 Mon Sep 17 00:00:00 2001
From: xubanxia <11655130+xubanxia@user.noreply.gitee.com>
Date: Tue, 11 Feb 2025 03:07:41 +0000
Subject: [PATCH 56/72] =?UTF-8?q?Revert=20"=E5=88=A0=E9=99=A4=E6=96=87?=
=?UTF-8?q?=E4=BB=B6=20profiler/cli/entrance.py"?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
This reverts commit 3f0b15adb831e6d468f738a318417796a71b1c73.
---
profiler/cli/entrance.py | 75 ++++++++++++++++++++++++++++++++++++++++
1 file changed, 75 insertions(+)
create mode 100644 profiler/cli/entrance.py
diff --git a/profiler/cli/entrance.py b/profiler/cli/entrance.py
new file mode 100644
index 000000000..89ac8187d
--- /dev/null
+++ b/profiler/cli/entrance.py
@@ -0,0 +1,75 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+# Copyright (c) 2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+import click
+
+from profiler.cli.analyze_cli import analyze_cli
+from profiler.cli.complete_cli import auto_complete_cli
+from profiler.cli.compare_cli import compare_cli
+from profiler.cli.cluster_cli import cluster_cli
+from profiler.advisor.version import print_version_callback, cli_version
+
+logger = logging.getLogger()
+CONTEXT_SETTINGS = dict(help_option_names=['-H', '-h', '--help'],
+ max_content_width=160)
+
+COMMAND_PRIORITY = {
+ "advisor": 1,
+ "compare": 2,
+ "cluster": 3,
+ "auto-completion": 4
+}
+
+
+class SpecialHelpOrder(click.Group):
+
+ def __init__(self, *args, **kwargs):
+ super(SpecialHelpOrder, self).__init__(*args, **kwargs)
+
+ def list_commands_for_help(self, ctx):
+ """
+ reorder the list of commands when listing the help
+ """
+ commands = super(SpecialHelpOrder, self).list_commands(ctx)
+ return [item[1] for item in sorted((COMMAND_PRIORITY.get(command, float('INF')),
+ command) for command in commands)]
+
+ def get_help(self, ctx):
+ self.list_commands = self.list_commands_for_help
+ return super(SpecialHelpOrder, self).get_help(ctx)
+
+
+@click.group(context_settings=CONTEXT_SETTINGS, cls=SpecialHelpOrder)
+@click.option('--version', '-V', '-v', is_flag=True,
+ callback=print_version_callback, expose_value=False,
+ is_eager=True, help=cli_version())
+def msprof_analyze_cli(**kwargs):
+ pass
+
+
+msprof_analyze_cli.add_command(analyze_cli, name="advisor")
+msprof_analyze_cli.add_command(compare_cli, name="compare")
+msprof_analyze_cli.add_command(cluster_cli, name="cluster")
+msprof_analyze_cli.add_command(auto_complete_cli, name="auto-completion")
+
+if __name__ == "__main__":
+ msprof_analyze_cli.main(
+ [
+ "analyze","all","-d",
+ r"D:\data\file","-l","cn"
+ ]
+ )
--
Gitee
From 3b444579f0280889b0d77c7a3e2e018401115e04 Mon Sep 17 00:00:00 2001
From: xubanxia <11655130+xubanxia@user.noreply.gitee.com>
Date: Tue, 11 Feb 2025 03:08:19 +0000
Subject: [PATCH 57/72] update profiler/cli/entrance.py.
Signed-off-by: xubanxia <11655130+xubanxia@user.noreply.gitee.com>
---
profiler/cli/entrance.py | 10 +---------
1 file changed, 1 insertion(+), 9 deletions(-)
diff --git a/profiler/cli/entrance.py b/profiler/cli/entrance.py
index 89ac8187d..503cf9ea6 100644
--- a/profiler/cli/entrance.py
+++ b/profiler/cli/entrance.py
@@ -64,12 +64,4 @@ def msprof_analyze_cli(**kwargs):
msprof_analyze_cli.add_command(analyze_cli, name="advisor")
msprof_analyze_cli.add_command(compare_cli, name="compare")
msprof_analyze_cli.add_command(cluster_cli, name="cluster")
-msprof_analyze_cli.add_command(auto_complete_cli, name="auto-completion")
-
-if __name__ == "__main__":
- msprof_analyze_cli.main(
- [
- "analyze","all","-d",
- r"D:\data\file","-l","cn"
- ]
- )
+msprof_analyze_cli.add_command(auto_complete_cli, name="auto-completion")
\ No newline at end of file
--
Gitee
From 7308fe6d140b9b0fa099381aeb5265c6712d1e62 Mon Sep 17 00:00:00 2001
From: xubanxia <11655130+xubanxia@user.noreply.gitee.com>
Date: Tue, 11 Feb 2025 03:11:03 +0000
Subject: [PATCH 58/72] update
profiler/test/ut/advisor/compute_advice/data/kernel_details.csv.
Signed-off-by: xubanxia <11655130+xubanxia@user.noreply.gitee.com>
---
.../compute_advice/data/kernel_details.csv | 30 +++++++++++++++++++
1 file changed, 30 insertions(+)
diff --git a/profiler/test/ut/advisor/compute_advice/data/kernel_details.csv b/profiler/test/ut/advisor/compute_advice/data/kernel_details.csv
index e69de29bb..020178358 100644
--- a/profiler/test/ut/advisor/compute_advice/data/kernel_details.csv
+++ b/profiler/test/ut/advisor/compute_advice/data/kernel_details.csv
@@ -0,0 +1,30 @@
+Step Id,Model ID,Task ID,Stream ID,Name,Type,OP State,Accelerator Core,Start Time(us),Duration(us),Wait Time(us),Block Dim,Mix Block Dim,HF32 Eligible,Input Shapes,Input Data Types,Input Formats,Output Shapes,Output Data Types,Output Formats,Context ID,aicore_time(us),aic_total_cycles,aic_mac_time(us),aic_mac_ratio,aic_scalar_time(us),aic_scalar_ratio,aic_mte1_time(us),aic_mte1_ratio,aic_mte2_time(us),aic_mte2_ratio,aic_fixpipe_time(us),aic_fixpipe_ratio,aic_icache_miss_rate,aiv_time(us),aiv_total_cycles,aiv_vec_time(us),aiv_vec_ratio,aiv_scalar_time(us),aiv_scalar_ratio,aiv_mte2_time(us),aiv_mte2_ratio,aiv_mte3_time(us),aiv_mte3_ratio,aiv_icache_miss_rate,cube_utilization(%)
+19,4294967295,61653,2,aclnnMatmul_MatMulCommon_MatMulV2,MatMulV2,dynamic,AI_CORE,"1736413971558972.912 ",185.504,1.087,16,0,NO,"""81920,4096;8192,512""",DT_BF16;DT_BF16,ND;ND,"""4096,512""",DT_BF16,ND,N/A,183.87,5295467,151.425,0.824,88.03,0.479,119.148,0.648,177.314,0.964,5.736,0.031,0.001,0,0,0,0,0,0,0,0,0,0,0,79.295
+19,4294967295,61669,2,aclnnMatmul_MatMulV3Common_MatMulV3,MatMulV3,dynamic,AI_CORE,"1736413971560588.764 ",501.17,2.2,20,0,NO,"""81920,1536;8192,4096""",DT_BF16;DT_BF16,ND;ND,"""1536,4096""",DT_BF16,ND,N/A,478.701,17233251,356.349,0.744,118.087,0.247,296.009,0.618,452.112,0.944,35.833,0.075,0.001,0,0,0,0,0,0,0,0,0,0,0,95.517
+19,4294967295,61694,2,aclnnMatmul_MatMulCommon_MatMulV2,MatMulV2,dynamic,AI_CORE,"1736413971565213.257 ",186.823,1.178,16,0,NO,"""81920,4096;8192,512""",DT_BF16;DT_BF16,ND;ND,"""4096,512""",DT_BF16,ND,N/A,183.728,5291376,151.502,0.825,87.902,0.478,118.519,0.645,177.654,0.967,5.773,0.031,0.001,0,0,0,0,0,0,0,0,0,0,0,78.675
+19,4294967295,61710,2,aclnnMatmul_MatMulV3Common_MatMulV3,MatMulV3,dynamic,AI_CORE,"1736413971566843.489 ",516.991,2.33,20,0,NO,"""81920,1536;8192,4096""",DT_BF16;DT_BF16,ND;ND,"""1536,4096""",DT_BF16,ND,N/A,491.775,17703905,356.249,0.724,118.59,0.241,295.046,0.6,463.696,0.943,37.671,0.077,0.001,0,0,0,0,0,0,0,0,0,0,0,95.123
+19,4294967295,61735,2,aclnnMatmul_MatMulCommon_MatMulV2,MatMulV2,dynamic,AI_CORE,"1736413971571596.404 ",187.724,0.766,16,0,NO,"""81920,4096;8192,512""",DT_BF16;DT_BF16,ND;ND,"""4096,512""",DT_BF16,ND,N/A,184.904,5325221,151.489,0.819,87.893,0.475,118.63,0.642,178.815,0.967,5.77,0.031,0.001,0,0,0,0,0,0,0,0,0,0,0,78.798
+19,4294967295,61751,2,aclnnMatmul_MatMulV3Common_MatMulV3,MatMulV3,dynamic,AI_CORE,"1736413971573223.437 ",514.87,2.15,20,0,NO,"""81920,1536;8192,4096""",DT_BF16;DT_BF16,ND;ND,"""1536,4096""",DT_BF16,ND,N/A,486.931,17529512,356.117,0.731,118.847,0.244,295.529,0.607,457.002,0.939,37.938,0.078,0.001,0,0,0,0,0,0,0,0,0,0,0,94.574
+19,4294967295,61776,2,aclnnMatmul_MatMulCommon_MatMulV2,MatMulV2,dynamic,AI_CORE,"1736413971577931.851 ",190.544,1.367,16,0,NO,"""81920,4096;8192,512""",DT_BF16;DT_BF16,ND;ND,"""4096,512""",DT_BF16,ND,N/A,187.073,5387702,151.741,0.811,87.935,0.47,117.467,0.628,181.043,0.968,5.803,0.031,0.001,0,0,0,0,0,0,0,0,0,0,0,78.543
+19,4294967295,61792,2,aclnnMatmul_MatMulV3Common_MatMulV3,MatMulV3,dynamic,AI_CORE,"1736413971579566.403 ",504.071,2.28,20,0,NO,"""81920,1536;8192,4096""",DT_BF16;DT_BF16,ND;ND,"""1536,4096""",DT_BF16,ND,N/A,485.542,17479517,356.283,0.734,117.755,0.243,296.421,0.61,455.064,0.937,37.75,0.078,0.001,0,0,0,0,0,0,0,0,0,0,0,96.324
+19,4294967295,13792,2,aclnnMatmul_MatMulV3Common_MatMulV5,MatMulV3,dynamic,AI_CORE,"1736413974248200.543 ",521.31,2.22,20,0,NO,"""8192,15365;8192,4096""",DT_BF16;DT_BF16,ND;ND,"""1536,4096""",DT_BF16,ND,N/A,499.234,17972434,356.364,0.714,117.639,0.236,295.58,0.592,471.784,0.945,35.825,0.072,0.001,0,0,0,0,0,0,0,0,0,0,0,95.765
+19,4294967295,13792,2,aclnnMatmul_MatMulV3Common_MatMulV5,MatMulV3,dynamic,AI_CORE,"1736413974248200.543 ",521.31,2.22,20,0,NO,"""8192,15365;8192,4096""",DT_BF16;DT_BF16,ND;ND,"""1536,4096""",DT_BF16,ND,N/A,499.234,17972434,356.364,0.714,117.639,0.236,295.58,0.592,471.784,0.945,35.825,0.072,0.001,0,0,0,0,0,0,0,0,0,0,0,95.765
+19,4294967295,13792,2,aclnnMatmul_MatMulV3Common_MatMulV5,MatMulV3,dynamic,AI_CORE,"1736413974248200.543 ",521.31,2.22,20,0,NO,"""8192,15365;8192,4096""",DT_BF16;DT_BF16,ND;ND,"""1536,4096""",DT_BF16,ND,N/A,499.234,17972434,356.364,0.714,117.639,0.236,295.58,0.592,471.784,0.945,35.825,0.072,0.001,0,0,0,0,0,0,0,0,0,0,0,95.765
+19,4294967295,13792,2,aclnnMatmul_MatMulV3Common_MatMulV5,MatMulV3,dynamic,AI_CORE,"1736413974248200.543 ",521.31,2.22,20,0,NO,"""8192,15365;8192,4096""",DT_BF16;DT_BF16,ND;ND,"""1536,4096""",DT_BF16,ND,N/A,499.234,17972434,356.364,0.714,117.639,0.236,295.58,0.592,471.784,0.945,35.825,0.072,0.001,0,0,0,0,0,0,0,0,0,0,0,95.765
+19,4294967295,60679,2,aclnnFlashAttentionScore_FlashAttentionScore_FlashAttentionScore,FlashAttentionScore,dynamic,MIX_AIC,"1736413971411629.128 ",410.188,1.53,20,40,NO,"""4096,2,512;4096,2,512;4096,2,512;;;;4096,4096;;;;;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16;UINT8;DT_BF16;BOOL;INT64;INT64;INT64;INT64;INT64,NCL;NCL;NCL;ND;ND;ND;ND;ND;ND;ND;ND;ND,"""2,4,4096,8;2,4,4096,8;;4096,2,512""",FLOAT;FLOAT;DT_BF16;DT_BF16,ND;ND;ND;ND,0,366.147,13181275,129.055,0.352,352.275,0.962,108.364,0.296,172.86,0.872,216.141,0.59,0.003,365.782,26336326,228.687,0.625,137.979,0.377,118.603,0.324,71.448,0.195,0.013,89.263
+19,4294967295,60707,2,aclnnFlashAttentionScore_FlashAttentionScore_FlashAttentionScore,FlashAttentionScore,dynamic,MIX_AIC,"1736413971415611.468 ",406.128,1.279,20,40,NO,"""4096,2,512;4096,2,512;4096,2,512;;;;4096,4096;;;;;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16;UINT8;DT_BF16;BOOL;INT64;INT64;INT64;INT64;INT64,NCL;NCL;NCL;ND;ND;ND;ND;ND;ND;ND;ND;ND,"""2,4,4096,8;2,4,4096,8;;4096,2,512""",FLOAT;FLOAT;DT_BF16;DT_BF16,ND;ND;ND;ND,0,358.77,12915719,128.96,0.359,345.096,0.962,108.337,0.302,168.284,0.869,209.057,0.583,0.003,358.308,25798146,228.693,0.638,137.809,0.385,108.679,0.303,70.099,0.196,0.013,88.339
+19,4294967295,60735,2,aclnnFlashAttentionScore_FlashAttentionScore_FlashAttentionScore,FlashAttentionScore,dynamic,MIX_AIC,"1736413971420248.800 ",407.008,0.84,20,40,NO,"""4096,2,512;4096,2,512;4096,2,512;;;;4096,4096;;;;;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16;UINT8;DT_BF16;BOOL;INT64;INT64;INT64;INT64;INT64,NCL;NCL;NCL;ND;ND;ND;ND;ND;ND;ND;ND;ND,"""2,4,4096,8;2,4,4096,8;;4096,2,512""",FLOAT;FLOAT;DT_BF16;DT_BF16,ND;ND;ND;ND,0,359.702,12949284,128.975,0.359,346.306,0.963,108.43,0.301,166.899,0.864,209.018,0.581,0.003,359.274,25867705,228.693,0.637,138.438,0.385,107.723,0.3,70.146,0.195,0.013,88.377
+19,4294967295,60763,2,aclnnFlashAttentionScore_FlashAttentionScore_FlashAttentionScore,FlashAttentionScore,dynamic,MIX_AIC,"1736413971424592.447 ",405.228,1.35,20,40,NO,"""4096,2,512;4096,2,512;4096,2,512;;;;4096,4096;;;;;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16;UINT8;DT_BF16;BOOL;INT64;INT64;INT64;INT64;INT64,NCL;NCL;NCL;ND;ND;ND;ND;ND;ND;ND;ND;ND,"""2,4,4096,8;2,4,4096,8;;4096,2,512""",FLOAT;FLOAT;DT_BF16;DT_BF16,ND;ND;ND;ND,0,359.793,12952532,128.923,0.358,345.768,0.961,108.411,0.301,167.379,0.865,208.79,0.58,0.003,359.294,25869164,228.691,0.637,138.411,0.385,107.868,0.3,70.163,0.195,0.013,88.788
+19,4294967295,61655,2,aclnnFlashAttentionScoreGrad_FlashAttentionScoreGrad_FlashAttentionScoreGrad,FlashAttentionScoreGrad,dynamic,MIX_AIC,"1736413971559180.676 ",762.215,1.37,20,40,NO,"""4096,2,512;4096,2,512;4096,2,512;4096,2,512;4096,4096;2,4,4096,8;2,4,4096,8;;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16;BOOL;FLOAT;FLOAT;DT_BF16;DT_BF16;INT64,NCL;NCL;NCL;NCL;ND;NCHW;NCHW;ND;NCL;ND,"""4096,2,512;4096,2,512;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16,ND;ND;ND;ND,0,755.664,27203907,344.023,0.455,592.472,0.784,266.388,0.353,397.091,0.525,589.726,0.78,0.004,755.04,54362915,318.452,0.422,184.623,0.245,206.78,0.274,152.973,0.203,0.006,99.141
+19,4294967295,61696,2,aclnnFlashAttentionScoreGrad_FlashAttentionScoreGrad_FlashAttentionScoreGrad,FlashAttentionScoreGrad,dynamic,MIX_AIC,"1736413971565420.821 ",763.215,1.189,20,40,NO,"""4096,2,512;4096,2,512;4096,2,512;4096,2,512;4096,4096;2,4,4096,8;2,4,4096,8;;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16;BOOL;FLOAT;FLOAT;DT_BF16;DT_BF16;INT64,NCL;NCL;NCL;NCL;ND;NCHW;NCHW;ND;NCL;ND,"""4096,2,512;4096,2,512;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16,ND;ND;ND;ND,0,757.83,27281885,344.047,0.454,595.954,0.786,266.123,0.351,389.105,0.513,576.226,0.76,0.004,757.046,54507345,318.443,0.421,188.292,0.249,200.176,0.264,162.113,0.214,0.006,99.294
+19,4294967295,61737,2,aclnnFlashAttentionScoreGrad_FlashAttentionScoreGrad_FlashAttentionScoreGrad,FlashAttentionScoreGrad,dynamic,MIX_AIC,"1736413971571804.228 ",757.095,0.88,20,40,NO,"""4096,2,512;4096,2,512;4096,2,512;4096,2,512;4096,4096;2,4,4096,8;2,4,4096,8;;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16;BOOL;FLOAT;FLOAT;DT_BF16;DT_BF16;INT64,NCL;NCL;NCL;NCL;ND;NCHW;NCHW;ND;NCL;ND,"""4096,2,512;4096,2,512;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16,ND;ND;ND;ND,0,750.605,27021778,343.983,0.458,586.708,0.782,266.304,0.355,392.522,0.523,584.432,0.779,0.004,749.913,53993736,318.436,0.425,188.508,0.251,207.668,0.277,152.634,0.204,0.006,99.143
+19,4294967295,61778,2,aclnnFlashAttentionScoreGrad_FlashAttentionScoreGrad_FlashAttentionScoreGrad,FlashAttentionScoreGrad,dynamic,MIX_AIC,"1736413971578144.095 ",755.915,1.22,20,40,NO,"""4096,2,512;4096,2,512;4096,2,512;4096,2,512;4096,4096;2,4,4096,8;2,4,4096,8;;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16;BOOL;FLOAT;FLOAT;DT_BF16;DT_BF16;INT64,NCL;NCL;NCL;NCL;ND;NCHW;NCHW;ND;NCL;ND,"""4096,2,512;4096,2,512;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16,ND;ND;ND;ND,0,750.152,27005467,344.115,0.459,579.317,0.772,266.08,0.355,398.019,0.531,587.37,0.783,0.004,749.348,53953058,318.444,0.425,186.908,0.249,207.068,0.276,151.329,0.202,0.006,99.238
+19,4294967295,60763,2,aclnnFlashAttentionScore_FlashAttentionScore_FlashAttentionScore_varlen,FlashAttentionScore,dynamic,MIX_AIC,"1736413971424592.447 ",405.228,1.35,20,40,NO,"""4096,2,511;4096,2,512;4096,2,512;;;;4096,4096;;;;;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16;UINT8;DT_BF16;BOOL;INT64;INT64;INT64;INT64;INT64,NCL;NCL;NCL;ND;ND;ND;ND;ND;ND;ND;ND;ND,"""2,3,4096,8;2,4,4096,8;;4096,2,512""",FLOAT;FLOAT;DT_BF16;DT_BF16,ND;ND;ND;ND,0,359.793,12952532,128.923,0.358,345.768,0.961,108.411,0.301,167.379,0.465,208.79,0.58,0.003,359.294,25869164,228.691,0.637,138.411,0.385,107.868,0.3,70.163,0.195,0.013,88.788
+19,4294967295,60683,2,aclnnAdd_AddAiCore_Add,Add,dynamic,AI_VECTOR_CORE,"1736413971412768.871 ",26.78,0.485,40,0,NO,"""512,2,4096;512,2,4096""",DT_BF16;DT_BF16,NCL;NCL,"""512,2,4096""",DT_BF16,ND,N/A,0,0,0,0,0,0,0,0,0,0,0,0,0,24.19,1741674,5.986,0.247,1.352,0.056,20.363,0.842,3.195,0.132,0.027,0
+19,4294967295,60690,2,aclnnAdd_AddAiCore_Add,Add,dynamic,AI_VECTOR_CORE,"1736413971414677.549 ",31.201,0.664,40,0,NO,"""512,2,4096;512,2,4096""",DT_BF16;DT_BF16,NCL;NCL,"""512,2,4096""",DT_BF16,ND,N/A,0,0,0,0,0,0,0,0,0,0,0,0,0,28.617,2060443,5.986,0.209,1.444,0.05,25.005,0.874,3.336,0.117,0.026,0
+19,4294967295,60711,2,aclnnAdd_AddAiCore_Add,Add,dynamic,AI_VECTOR_CORE,"1736413971416743.250 ",27.021,1.246,40,0,NO,"""512,2,4096;512,2,4096""",DT_BF16;DT_BF16,NCL;NCL,"""512,2,4096""",DT_BF16,ND,N/A,0,0,0,0,0,0,0,0,0,0,0,0,0,24.304,1749862,5.986,0.246,1.258,0.052,20.424,0.84,3.23,0.133,0.027,0
+19,4294967295,60718,2,aclnnAdd_AddAiCore_Add,Add,dynamic,AI_VECTOR_CORE,"1736413971419318.962 ",25.08,0.984,40,0,NO,"""512,2,4096;512,2,4096""",DT_BF16;DT_BF16,NCL;NCL,"""512,2,4096""",DT_BF16,ND,N/A,0,0,0,0,0,0,0,0,0,0,0,0,0,22.47,1617840,5.989,0.267,2.009,0.089,18.809,0.837,3.191,0.142,0.024,0
+19,4294967295,13907,2,aclnnAdd_AddAiCore_Add,Add,dynamic,AI_VECTOR_CORE,"1736413974268377.206 ",1.38,31.48,1,0,NO,""";""",FLOAT;FLOAT,ND;ND,"""""",FLOAT,ND,N/A,0,0,0,0,0,0,0,0,0,0,0,0,0,0.883,1589,0.027,0.03,0.265,0.3,0.18,0.204,0.108,0.123,0.182,0
+19,4294967295,13910,2,aclnnAdd_AddAiCore_Add,Add,dynamic,AI_VECTOR_CORE,"1736413974268502.128 ",1.46,17.48,1,0,NO,""";""",FLOAT;FLOAT,ND;ND,"""""",FLOAT,ND,N/A,0,0,0,0,0,0,0,0,0,0,0,0,0,0.948,1706,0.027,0.028,0.276,0.291,0.217,0.229,0.127,0.134,0.174,0
+19,4294967295,13913,2,aclnnAdd_AddAiCore_Add,Add,dynamic,AI_VECTOR_CORE,"1736413974268605.410 ",1.5,0.09,1,0,NO,""";""",FLOAT;FLOAT,ND;ND,"""""",FLOAT,ND,N/A,0,0,0,0,0,0,0,0,0,0,0,0,0,0.96,1728,0.027,0.028,0.268,0.28,0.221,0.23,0.132,0.137,0.145,0
+19,4294967295,13916,2,aclnnAdd_AddAiCore_Add,Add,dynamic,AI_VECTOR_CORE,"1736413974268747.953 ",1.58,28.28,1,0,NO,""";""",FLOAT;FLOAT,ND;ND,"""""",FLOAT,ND,N/A,0,0,0,0,0,0,0,0,0,0,0,0,0,1.107,1993,0.027,0.024,0.426,0.384,0.201,0.181,0.118,0.106,0.162,0
\ No newline at end of file
--
Gitee
From 3fd8aae7004492e4215e3a78d4fa12570dd193f4 Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Wed, 12 Feb 2025 09:13:14 +0800
Subject: [PATCH 59/72] =?UTF-8?q?Cube=E4=B8=8Efa=E7=AE=97=E5=AD=90?=
=?UTF-8?q?=E5=86=85=E8=BD=B4=E5=88=A4=E6=96=AD=E6=96=B9=E6=B3=95=E6=8A=BD?=
=?UTF-8?q?=E5=8F=96?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../ai_core_performance_checker.py | 123 ++++++++++--------
1 file changed, 66 insertions(+), 57 deletions(-)
diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index a3622ebdf..598c3690f 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -31,6 +31,8 @@ class AICorePerformanceChecker:
"""
_CHECKER = "AICorePerformanceChecker"
CUBE_OPERATOR_MEMORY_SIZE_MB = 100
+ INNER_AXIS_256 = 256
+ INNER_AXIS_128 = 128
def __init__(self):
@@ -130,7 +132,7 @@ class AICorePerformanceChecker:
# 多的一维是 bias ,预先乘2
memory += int(shapes) * 2
continue
- memory += reduce(lambda x, y: x*y, map(int, shapes.split(",")))
+ memory += reduce(lambda x, y: x * y, map(int, shapes.split(",")))
memory += reduce(lambda x, y: x * y, map(int, output_shapes.split(",")))
return memory * 2 / 1024 / 1024
@@ -173,22 +175,7 @@ class AICorePerformanceChecker:
for shape in cube_dict[op]:
dtype = None
shape_duration = 0.
- # 判断输入shape内轴是否为256的倍数
- if (len(shape.split("-")[0].split(";")[0].split(","))) == 4:
- # NZ格式
- shapes = shape.split("-")[0].split(";")
- b = int(shapes[0].split(",")[1])
- c = int(shapes[0].split(",")[2])
-
- f = int(shapes[1].split(",")[1])
- g = int(shapes[1].split(",")[2])
- affinity_flag = (b * c % 256 == 0) and (f * g % 256 == 0)
- else:
- # ND格式
- shapes = shape.split("-")[0].split(";")
- l = int(shapes[0].split(",")[1])
- k = int(shapes[1].split(",")[1])
- affinity_flag = (l % 256 == 0) and (k % 256 == 0)
+ affinity_flag = self.check_affinity(shape)
if not affinity_flag:
for operator in operator_list:
if (operator.op_name == op and
@@ -247,6 +234,24 @@ class AICorePerformanceChecker:
sorted(bound_queue, key=lambda x: x["duration"], reverse=True)[:5],
sorted(affinity_queue, key=lambda x: x["duration"], reverse=True)[:5]]
+ def _check_cube_inner_axis(self, shape):
+ # 判断输入shape内轴是否为256的倍数
+ if (len(shape.split("-")[0].split(";")[0].split(","))) == 4:
+ # NZ格式
+ shapes = shape.split("-")[0].split(";")
+ b = int(shapes[0].split(",")[1])
+ c = int(shapes[0].split(",")[2])
+
+ f = int(shapes[1].split(",")[1])
+ g = int(shapes[1].split(",")[2])
+ return (b * c % self.INNER_AXIS_256 == 0) and (f * g % self.INNER_AXIS_256 == 0)
+ else:
+ # ND格式
+ shapes = shape.split("-")[0].split(";")
+ l = int(shapes[0].split(",")[1])
+ k = int(shapes[1].split(",")[1])
+ return (l % self.INNER_AXIS_256 == 0) and (k % self.INNER_AXIS_256 == 0)
+
def check_fa_operator(self, profiling_dataset: ProfilingDataset):
fa_list = self.fa_list
fa_dict = self.fa_dict
@@ -256,46 +261,7 @@ class AICorePerformanceChecker:
# 不亲和算子筛选
for op in fa_dict:
for shape in fa_dict[op]:
- affinity_flag = False
- shape_duration = 0.
- dtype = None
- suggestion = ""
- if "varlen" in op.lower():
- # 处理变长算子 如果不亲和则affinity_flag为False
- if int(shape.split("-")[0].split(";")[0].split(",")[2]) % 128 != 0:
- affinity_flag = True
- suggestion = self._FA_AFFINITY_DESC_TYPE1
- for operator in fa_list:
- if (operator.op_name == op and
- operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape):
- shape_duration += float(operator.task_duration)
- dtype = operator.input_data_types
- else:
- # 处理定长算子 如果不亲和则affinity_flag为False
- head_dim = 0
- seq_len = int(shape.split("-")[1].split(";")[0].split(",")[2])
- input_first_tensor = shape.split("-")[0].split(";")[0].split(",")
- if len(input_first_tensor) == 3:
- head_dim = int(input_first_tensor[2]) / int(shape.split("-")[1].split(";")[0].split(",")[1])
- else:
- head_dim = int(input_first_tensor[3])
- if head_dim % 128 != 0 and seq_len % 128 != 0:
- affinity_flag = True
- suggestion = self._FA_AFFINITY_DESC_TYPE3
- elif head_dim % 128 != 0:
- affinity_flag = True
- suggestion = self._FA_AFFINITY_DESC_TYPE1
- elif seq_len % 128 != 0:
- affinity_flag = True
- suggestion = self._FA_AFFINITY_DESC_TYPE2
- if affinity_flag:
- for operator in fa_list:
- if (operator.op_name == op and
- operator.input_shapes[1:-1] + "-" +
- operator.output_shapes[1:-1] == shape):
- shape_duration += float(operator.task_duration)
- dtype = operator.input_data_types
-
+ affinity_flag, dtype, shape_duration, suggestion = self.method_name(fa_list, op, shape)
if affinity_flag:
# 不亲和算子 计算耗时,加入affinity_queue
affinity_queue.append({
@@ -375,6 +341,49 @@ class AICorePerformanceChecker:
sorted(bound_queue, key=lambda x: x["duration"], reverse=True)[:5],
sorted(affinity_queue, key=lambda x: x["duration"], reverse=True)[:5]]
+ def _check_fa_inner_axis(self, fa_list, op, shape):
+ shape_duration = 0.
+ affinity_flag = False
+ dtype = None
+ suggestion = ""
+ if "varlen" in op.lower():
+ # 处理变长算子 如果不亲和则affinity_flag为False
+ inner_axis = int(shape.split("-")[0].split(";")[0].split(",")[2])
+ if inner_axis % self.INNER_AXIS_128 != 0:
+ affinity_flag = True
+ suggestion = self._FA_AFFINITY_DESC_TYPE1
+ for operator in fa_list:
+ if (operator.op_name == op and
+ operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape):
+ shape_duration += float(operator.task_duration)
+ dtype = operator.input_data_types
+ else:
+ # 处理定长算子 如果不亲和则affinity_flag为False
+ head_dim = 0
+ seq_len = int(shape.split("-")[1].split(";")[0].split(",")[2])
+ input_first_tensor = shape.split("-")[0].split(";")[0].split(",")
+ if len(input_first_tensor) == 3:
+ head_dim = int(input_first_tensor[2]) / int(shape.split("-")[1].split(";")[0].split(",")[1])
+ else:
+ head_dim = int(input_first_tensor[3])
+ if head_dim % self.INNER_AXIS_128 != 0 and seq_len % self.INNER_AXIS_128 != 0:
+ affinity_flag = True
+ suggestion = self._FA_AFFINITY_DESC_TYPE3
+ elif head_dim % self.INNER_AXIS_128 != 0:
+ affinity_flag = True
+ suggestion = self._FA_AFFINITY_DESC_TYPE1
+ elif seq_len % self.INNER_AXIS_128 != 0:
+ affinity_flag = True
+ suggestion = self._FA_AFFINITY_DESC_TYPE2
+ if affinity_flag:
+ for operator in fa_list:
+ if (operator.op_name == op and
+ operator.input_shapes[1:-1] + "-" +
+ operator.output_shapes[1:-1] == shape):
+ shape_duration += float(operator.task_duration)
+ dtype = operator.input_data_types
+ return affinity_flag, dtype, shape_duration, suggestion
+
def check_vector_operator(self, profiling_dataset: ProfilingDataset):
vector_dict = self.vector_dict
vector_list = []
--
Gitee
From 1902efa711e28f6a24c2600253f90fd841e33aea Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Wed, 12 Feb 2025 09:24:18 +0800
Subject: [PATCH 60/72] =?UTF-8?q?Cube=E4=B8=8Evector=E7=AE=97=E5=AD=90?=
=?UTF-8?q?=E6=89=A9=E5=B1=95=E5=B9=B6=E6=8F=90=E5=8F=96=E6=94=B6=E9=9B=86?=
=?UTF-8?q?=E7=AE=97=E5=AD=90=E5=88=97=E8=A1=A8?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../ai_core_performance_checker.py | 36 ++++++++++++-------
1 file changed, 24 insertions(+), 12 deletions(-)
diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index 598c3690f..baaa5e949 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -164,13 +164,11 @@ class AICorePerformanceChecker:
def check_cube_operator(self, profiling_dataset: ProfilingDataset):
cube_dict = self.cube_dict
+ suggestion = self._CUBE_AFFINITY_DESC
optimization_queue = []
bound_queue = []
affinity_queue = []
- operator_list = [op for op in profiling_dataset.op_summary.op_list
- if op.op_name in cube_dict
- and op.input_shapes[1:-1] + "-" + op.output_shapes[1:-1] in cube_dict[op.op_name]]
- suggestion = self._CUBE_AFFINITY_DESC
+ operator_list = self._get_operator_list(cube_dict, profiling_dataset)
for op in cube_dict:
for shape in cube_dict[op]:
dtype = None
@@ -234,6 +232,16 @@ class AICorePerformanceChecker:
sorted(bound_queue, key=lambda x: x["duration"], reverse=True)[:5],
sorted(affinity_queue, key=lambda x: x["duration"], reverse=True)[:5]]
+ @staticmethod
+ def _get_operator_list(cube_dict, profiling_dataset):
+ operator_list = []
+ for op in profiling_dataset.op_summary.op_list:
+ if op.op_name in cube_dict:
+ key = op.input_shapes[1:-1] + "-" + op.output_shapes[1:-1]
+ if key in cube_dict[op.op_name]:
+ operator_list.append(op)
+ return operator_list
+
def _check_cube_inner_axis(self, shape):
# 判断输入shape内轴是否为256的倍数
if (len(shape.split("-")[0].split(";")[0].split(","))) == 4:
@@ -386,16 +394,9 @@ class AICorePerformanceChecker:
def check_vector_operator(self, profiling_dataset: ProfilingDataset):
vector_dict = self.vector_dict
- vector_list = []
optimization_queue = []
bound_queue = []
- vector_list.extend(
- operator for op_name in vector_dict
- for shape in vector_dict[op_name]
- for operator in profiling_dataset.op_summary.op_list
- if operator.op_name == op_name
- and operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape
- )
+ vector_list = self._get_vector_list(profiling_dataset, vector_dict)
for op_name in vector_dict:
for shape in vector_dict[op_name]:
aiv_vec_ratio, aiv_mte2_ratio, aiv_mte3_ratio, shape_duration, optimization = 0., 0., 0., 0., 0.
@@ -444,6 +445,17 @@ class AICorePerformanceChecker:
return [sorted(optimization_queue, key=lambda x: x["optimization"], reverse=True)[:5],
sorted(bound_queue, key=lambda x: x["duration"], reverse=True)[:5]]
+ @staticmethod
+ def _get_vector_list(profiling_dataset, vector_dict):
+ vector_list = []
+ for op_name in vector_dict:
+ for shape in vector_dict[op_name]:
+ for operator in profiling_dataset.op_summary.op_list:
+ if operator.op_name == op_name and operator.input_shapes[1:-1] + "-" + operator.output_shapes[
+ 1:-1] == shape:
+ vector_list.extend([operator])
+ return vector_list
+
def make_record(self, result: OptimizeResult):
"""
make record for what and how to optimize
--
Gitee
From cec50141aaa01bc64497f6ec0cf3a9ee58c5475c Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Wed, 12 Feb 2025 09:41:30 +0800
Subject: [PATCH 61/72] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E9=99=A4=E9=9B=B6?=
=?UTF-8?q?=E6=A0=A1=E9=AA=8C?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../ai_core_performance_checker.py | 30 +++++++++++--------
1 file changed, 18 insertions(+), 12 deletions(-)
diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index baaa5e949..dcd87f1c4 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -192,8 +192,7 @@ class AICorePerformanceChecker:
operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape]
shape_duration = sum(float(operator.task_duration) for operator in shap_list)
dtype = shap_list[0].input_data_types if shap_list else None
- aic_mac_ratio, aic_mte2_ratio = 0., 0.
- length = 0
+ aic_mac_ratio, aic_mte2_ratio, length = 0., 0., 0
for operator in shap_list:
try:
aic_mac_ratio += float(operator.aic_mac_ratio)
@@ -201,10 +200,10 @@ class AICorePerformanceChecker:
length += 1
except ValueError:
continue
- if length == 0:
+ aic_mac_ratio = self.safe_divide(aic_mac_ratio, length)
+ aic_mte2_ratio = self.safe_divide(aic_mte2_ratio, length)
+ if aic_mac_ratio is None or aic_mte2_ratio is None:
continue
- aic_mac_ratio = aic_mac_ratio / length
- aic_mte2_ratio = aic_mte2_ratio / length
bound = ""
optimization = 0.
if aic_mac_ratio >= 0.8 and aic_mte2_ratio >= 0.95:
@@ -296,10 +295,10 @@ class AICorePerformanceChecker:
length += 1
except ValueError:
continue
- if length == 0:
+ aic_fixpipe_ratio = self.safe_divide(aic_fixpipe_ratio, length)
+ aic_mte2_ratio = self.safe_divide(aic_mte2_ratio, length)
+ if aic_mte2_ratio is None or aic_fixpipe_ratio is None:
continue
- aic_fixpipe_ratio = aic_fixpipe_ratio / length
- aic_mte2_ratio = aic_mte2_ratio / length
if aic_mte2_ratio >= 0.8 and aic_fixpipe_ratio >= 0.75:
bound = "mte2_and_fixpipe_bound"
elif aic_mte2_ratio >= 0.8:
@@ -414,11 +413,11 @@ class AICorePerformanceChecker:
length += 1
except ValueError:
continue
- if length == 0:
+ aiv_vec_ratio = self.safe_divide(aiv_vec_ratio,length)
+ aiv_mte2_ratio = self.safe_divide(aiv_mte2_ratio,length)
+ aiv_mte3_ratio = self.safe_divide(aiv_mte3_ratio,length)
+ if aiv_vec_ratio is None or aiv_mte2_ratio is None or aiv_mte3_ratio is None:
continue
- aiv_vec_ratio = aiv_vec_ratio / length
- aiv_mte2_ratio = aiv_mte2_ratio / length
- aiv_mte2_ratio = aiv_mte2_ratio / length
if aiv_vec_ratio + aiv_mte2_ratio + aiv_mte3_ratio >= 0.9:
bound = "vec_mte2_mte3_bound"
elif aiv_mte2_ratio >= 0.7:
@@ -561,3 +560,10 @@ class AICorePerformanceChecker:
logger.warning("Skip %s checker because of not containing input datas", self._CHECKER)
return False
return True
+
+ @staticmethod
+ def safe_divide(numerator, denominator):
+ if denominator == 0:
+ logger.warning("Warning: Division by zero is not allowed.")
+ return None
+ return numerator / denominator
--
Gitee
From a601933b53899387cc8fb9bad30ea59669bafa52 Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Wed, 12 Feb 2025 10:34:16 +0800
Subject: [PATCH 62/72] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E9=99=A4=E9=9B=B6?=
=?UTF-8?q?=E6=A0=A1=E9=AA=8C=EF=BC=8C=E9=85=8D=E7=BD=AE=E6=96=87=E4=BB=B6?=
=?UTF-8?q?=E4=BF=AE=E6=94=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../ai_core_performance_checker.py | 13 +++----
.../advisor/rules/cn/aicore_performance.yaml | 34 ++++++++++++++++++-
.../advisor/rules/en/aicore_performance.yaml | 34 ++++++++++++++++++-
3 files changed, 73 insertions(+), 8 deletions(-)
diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index dcd87f1c4..445b5f761 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -16,6 +16,7 @@ import logging
import os
from functools import reduce
+from debug.accuracy_tools.msprobe.core.common.utils import convert_tuple
from profiler.advisor.dataset.profiling.profiling_dataset import ProfilingDataset
from profiler.advisor.result.item import OptimizeItem, OptimizeRecord
from profiler.advisor.result.result import OptimizeResult
@@ -318,10 +319,10 @@ class AICorePerformanceChecker:
length += 1
except ValueError:
continue
- if length == 0:
+ aiv_vec_ratio = self.safe_divide(aiv_vec_ratio, length)
+ aic_mte2_ratio = self.safe_divide(aic_mte2_ratio, length)
+ if aiv_vec_ratio is None or aic_mte2_ratio is None:
continue
- aiv_vec_ratio = aiv_vec_ratio / length
- aic_mte2_ratio = aic_mte2_ratio / length
if aic_mte2_ratio >= 0.8 and aiv_vec_ratio >= 0.75:
bound = "mte2_and_vec_bound"
elif aic_mte2_ratio >= 0.8:
@@ -413,9 +414,9 @@ class AICorePerformanceChecker:
length += 1
except ValueError:
continue
- aiv_vec_ratio = self.safe_divide(aiv_vec_ratio,length)
- aiv_mte2_ratio = self.safe_divide(aiv_mte2_ratio,length)
- aiv_mte3_ratio = self.safe_divide(aiv_mte3_ratio,length)
+ aiv_vec_ratio = self.safe_divide(aiv_vec_ratio, length)
+ aiv_mte2_ratio = self.safe_divide(aiv_mte2_ratio, length)
+ aiv_mte3_ratio = self.safe_divide(aiv_mte3_ratio, length)
if aiv_vec_ratio is None or aiv_mte2_ratio is None or aiv_mte3_ratio is None:
continue
if aiv_vec_ratio + aiv_mte2_ratio + aiv_mte3_ratio >= 0.9:
diff --git a/profiler/advisor/rules/cn/aicore_performance.yaml b/profiler/advisor/rules/cn/aicore_performance.yaml
index 8d44aaab2..382de6db5 100644
--- a/profiler/advisor/rules/cn/aicore_performance.yaml
+++ b/profiler/advisor/rules/cn/aicore_performance.yaml
@@ -12,4 +12,36 @@ fa_affinity_desc_type3: "D和S均不能被128整除"
suggestion: "请根据亲和性、bound类型或优化空间尝试分析筛选出来的算子"
affinity_suggestion: "{op_name}算子 shape: {shape} dtype: {dtype} 有不亲和特征: {suggestion}\n"
bound_suggestion: "{op_name}算子 shape: {shape} dtype: {dtype} bound类型为: {bound} bound\n"
-optimization_suggestion: "{op_name}算子 shape: {shape} dtype: {dtype} 疑似有性能优化空间,参考性能优化空间: {optimization}%\n"
\ No newline at end of file
+optimization_suggestion: "{op_name}算子 shape: {shape} dtype: {dtype} 疑似有性能优化空间,参考性能优化空间: {optimization}%\n"
+
+cube_operators:
+ - target: aic_mac_ratio
+ bound: mac
+ threshold_value: 0.8
+ - target: aic_mte2_ratio
+ bound: mte2
+ threshold_value: 0.95
+
+fa_operators:
+ - target: aic_mte2_ratio
+ bound: mac
+ threshold_value: 0.8
+ - target: aic_fixpipe_ratio
+ bound: fixpipe
+ threshold_value: 0.75
+ - target: aiv_vec_ratio
+ bound: vec
+ threshold_value: 0.75
+
+vector_operators:
+ - target: total
+ threshold_value: 0.9
+ - target: aiv_vec_ratio
+ bound: vec
+ threshold_value: 0.7
+ - target: aiv_mte2_ratio
+ bound: mte2
+ threshold_value: 0.7
+ - target: aiv_mte3_ratio
+ bound: mte3
+ threshold_value: 0.7
\ No newline at end of file
diff --git a/profiler/advisor/rules/en/aicore_performance.yaml b/profiler/advisor/rules/en/aicore_performance.yaml
index e85a919ab..cae3700b2 100644
--- a/profiler/advisor/rules/en/aicore_performance.yaml
+++ b/profiler/advisor/rules/en/aicore_performance.yaml
@@ -12,4 +12,36 @@ fa_affinity_desc_type3: "Neither D nor S is not divisible by 128"
suggestion: "Please try to analyze the filtered operators based on affinity, bound type or optimization space"
affinity_suggestion: "{op_name} Op shape: {shape} dtype: {dtype} with disaffection characteristics: {suggestion}\n"
bound_suggestion: "{op_name} Op shape: {shape} dtype: {dtype} bound type: {bound} bound\n"
-optimization_suggestion: "{op_name} Op shape: {shape} dtype: {dtype} suspect there is room for performance optimization, refer to Performance Optimization Space: {optimization}%\n"
\ No newline at end of file
+optimization_suggestion: "{op_name} Op shape: {shape} dtype: {dtype} suspect there is room for performance optimization, refer to Performance Optimization Space: {optimization}%\n"
+
+cube_operators:
+ - target: aic_mac_ratio
+ bound: mac
+ threshold_value: 0.8
+ - target: aic_mte2_ratio
+ bound: mte2
+ threshold_value: 0.95
+
+fa_operators:
+ - target: aic_mte2_ratio
+ bound: mac
+ threshold_value: 0.8
+ - target: aic_fixpipe_ratio
+ bound: fixpipe
+ threshold_value: 0.75
+ - target: aiv_vec_ratio
+ bound: vec
+ threshold_value: 0.75
+
+vector_operators:
+ - target: total
+ threshold_value: 0.9
+ - target: aiv_vec_ratio
+ bound: vec
+ threshold_value: 0.7
+ - target: aiv_mte2_ratio
+ bound: mte2
+ threshold_value: 0.7
+ - target: aiv_mte3_ratio
+ bound: mte3
+ threshold_value: 0.7
\ No newline at end of file
--
Gitee
From af7d34f13a7d02c230582b0c132a85605b8ca5df Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Wed, 12 Feb 2025 11:27:38 +0800
Subject: [PATCH 63/72] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E8=A7=84=E5=88=99?=
=?UTF-8?q?=E8=AE=BE=E7=BD=AE=EF=BC=8C=E5=B0=86=E9=98=88=E5=80=BC=E6=94=BE?=
=?UTF-8?q?=E5=85=A5=E9=85=8D=E7=BD=AE=E6=96=87=E4=BB=B6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../ai_core_performance_checker.py | 102 ++++++++++++------
.../advisor/rules/cn/aicore_performance.yaml | 18 ++--
.../advisor/rules/en/aicore_performance.yaml | 19 ++--
3 files changed, 89 insertions(+), 50 deletions(-)
diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index 445b5f761..00e35c84d 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -16,7 +16,6 @@ import logging
import os
from functools import reduce
-from debug.accuracy_tools.msprobe.core.common.utils import convert_tuple
from profiler.advisor.dataset.profiling.profiling_dataset import ProfilingDataset
from profiler.advisor.result.item import OptimizeItem, OptimizeRecord
from profiler.advisor.result.result import OptimizeResult
@@ -75,6 +74,9 @@ class AICorePerformanceChecker:
self._AFFINITY_SUGGESTION = self.aicore_rules.get("affinity_suggestion")
self._BOUND_SUGGESTION = self.aicore_rules.get("bound_suggestion")
self._OPTI_SUGGESTION = self.aicore_rules.get("optimization_suggestion")
+ self._OPERATOR_RULES = {"cube_operators": self.aicore_rules.get("cube_operators"),
+ "fa_operators": self.aicore_rules.get("fa_operators"),
+ "vector_operators": self.aicore_rules.get("vector_operators")}
def data_filter(self, profiling_dataset: ProfilingDataset):
if not self.check_task_list(profiling_dataset):
@@ -207,14 +209,22 @@ class AICorePerformanceChecker:
continue
bound = ""
optimization = 0.
- if aic_mac_ratio >= 0.8 and aic_mte2_ratio >= 0.95:
- bound = "mac_and_mte2_bound"
- elif aic_mac_ratio >= 0.8:
- bound = "mac_bound"
- elif aic_mte2_ratio >= 0.95:
- bound = "mte2_bound"
+ aic_mac_ratio_rule, aic_mte2_ratio_rule = None, None
+ for operator_rule in self._OPERATOR_RULES["cube_operators"]:
+ if operator_rule["target"] == "aic_mac_ratio":
+ aic_mac_ratio_rule = operator_rule
+ elif operator_rule["target"] == "aic_mte2_ratio":
+ aic_mte2_ratio_rule = operator_rule
+ if (aic_mac_ratio >= aic_mac_ratio_rule["threshold"]
+ and aic_mte2_ratio >= aic_mte2_ratio_rule["threshold"]):
+ bound = aic_mac_ratio_rule["bound"] + "_and_" + aic_mte2_ratio_rule["bound"] + "_bound"
+ elif aic_mac_ratio >= aic_mte2_ratio_rule["threshold"]:
+ bound = aic_mac_ratio_rule["bound"]
+ elif aic_mte2_ratio >= aic_mte2_ratio_rule["threshold"]:
+ bound = aic_mte2_ratio_rule["bound"]
else:
- optimization = round(max(0.8 - aic_mac_ratio, 0.95 - aic_mte2_ratio) * 100, 2)
+ optimization = max(aic_mac_ratio["threshold"] - aic_mac_ratio,
+ aic_mte2_ratio_rule["threshold"] - aic_mte2_ratio)
if bound:
bound_queue.append({
"op_name": op,
@@ -227,7 +237,7 @@ class AICorePerformanceChecker:
"op_name": op,
"shape": shape.split("-")[0],
"dtype": dtype,
- "optimization": optimization})
+ "optimization": round(optimization * 100, 2)})
return [sorted(optimization_queue, key=lambda x: x["optimization"], reverse=True)[:5],
sorted(bound_queue, key=lambda x: x["duration"], reverse=True)[:5],
sorted(affinity_queue, key=lambda x: x["duration"], reverse=True)[:5]]
@@ -300,14 +310,22 @@ class AICorePerformanceChecker:
aic_mte2_ratio = self.safe_divide(aic_mte2_ratio, length)
if aic_mte2_ratio is None or aic_fixpipe_ratio is None:
continue
- if aic_mte2_ratio >= 0.8 and aic_fixpipe_ratio >= 0.75:
- bound = "mte2_and_fixpipe_bound"
- elif aic_mte2_ratio >= 0.8:
- bound = "mte2_bound"
- elif aiv_vec_ratio >= 0.75:
- bound = "vec_bound"
+ aic_fixpipe_ratio_rule, aic_mte2_ratio_rule = None, None
+ for rule in self._OPERATOR_RULES["fa_operators"]:
+ if rule["target"] == "aic_fixpipe_ratio":
+ aic_fixpipe_ratio_rule = rule
+ elif rule["target"] == "aic_mte2_ratio":
+ aic_mte2_ratio_rule = rule
+ if (aic_mte2_ratio >= aic_mte2_ratio_rule["threshold"] and
+ aic_fixpipe_ratio >= aic_fixpipe_ratio_rule["threshold"]):
+ bound = aic_fixpipe_ratio_rule["bound"] + "_and_" + aic_mte2_ratio_rule["bound"] + "_bound"
+ elif aic_mte2_ratio >= aic_mte2_ratio_rule["threshold"]:
+ bound = aic_mte2_ratio_rule["bound"]
+ elif aic_fixpipe_ratio >= aic_fixpipe_ratio_rule["threshold"]:
+ bound = aic_fixpipe_ratio_rule["bound"]
else:
- optimization = max(0.8 - aic_mte2_ratio, 0.75 - aiv_vec_ratio)
+ optimization = max(aic_fixpipe_ratio_rule["threshold"] - aic_fixpipe_ratio,
+ aic_mte2_ratio_rule["threshold"] - aic_mte2_ratio)
else:
for operator in fa_list:
if (operator.op_name == op and
@@ -323,14 +341,22 @@ class AICorePerformanceChecker:
aic_mte2_ratio = self.safe_divide(aic_mte2_ratio, length)
if aiv_vec_ratio is None or aic_mte2_ratio is None:
continue
- if aic_mte2_ratio >= 0.8 and aiv_vec_ratio >= 0.75:
- bound = "mte2_and_vec_bound"
- elif aic_mte2_ratio >= 0.8:
- bound = "mte2_bound"
- elif aiv_vec_ratio >= 0.75:
- bound = "vec_bound"
+ aiv_vec_ratio_rule, aic_mte2_ratio_rule = None, None
+ for rule in self._OPERATOR_RULES["fa_operators"]:
+ if rule["target"] == "aiv_vec_ratio":
+ aiv_vec_ratio_rule = rule
+ elif rule["target"] == "aic_mte2_ratio":
+ aic_mte2_ratio_rule = rule
+ if (aic_mte2_ratio >= aic_mte2_ratio_rule["threshold"]
+ and aiv_vec_ratio >= aiv_vec_ratio_rule["threshold"]):
+ bound = aic_mte2_ratio_rule["bound"] + "_and_" + aiv_vec_ratio_rule["bound"] + "_bound"
+ elif aic_mte2_ratio >= aic_mte2_ratio_rule["threshold"]:
+ bound = aic_mte2_ratio_rule["bound"]
+ elif aiv_vec_ratio >= aiv_vec_ratio_rule["threshold"]:
+ bound = aiv_vec_ratio_rule["bound"]
else:
- optimization = max(0.8 - aic_mte2_ratio, 0.75 - aiv_vec_ratio)
+ optimization = max(aiv_vec_ratio_rule["threshold"] - aiv_vec_ratio,
+ aic_mte2_ratio_rule["threshold"] - aic_mte2_ratio)
if bound:
bound_queue.append({
"op_name": op,
@@ -419,16 +445,28 @@ class AICorePerformanceChecker:
aiv_mte3_ratio = self.safe_divide(aiv_mte3_ratio, length)
if aiv_vec_ratio is None or aiv_mte2_ratio is None or aiv_mte3_ratio is None:
continue
- if aiv_vec_ratio + aiv_mte2_ratio + aiv_mte3_ratio >= 0.9:
- bound = "vec_mte2_mte3_bound"
- elif aiv_mte2_ratio >= 0.7:
- bound = "mte2_bound"
- elif aiv_mte3_ratio >= 0.7:
- bound = "mte3_bound"
- elif aiv_vec_ratio >= 0.7:
- bound = "vec_bound"
+ aiv_vec_ratio_rule, aiv_mte2_ratio_rule, aiv_mte3_ratio_rule, total_rule = None, None, None, None
+ for operator_rule in self._OPERATOR_RULES["vector_operators"]:
+ if operator_rule["target"] == "aiv_vec_ratio":
+ aiv_vec_ratio_rule = operator_rule
+ elif operator_rule["target"] == "aic_mte2_ratio":
+ aiv_mte2_ratio_rule = operator_rule
+ elif operator_rule["target"] == "aic_mte3_ratio":
+ aiv_mte3_ratio_rule = operator_rule
+ elif operator_rule["target"] == "total":
+ total_rule = operator_rule
+ if aiv_vec_ratio + aiv_mte2_ratio + aiv_mte3_ratio >= total_rule["threshold"]:
+ bound = total_rule["bound"]
+ elif aiv_mte2_ratio >= aiv_mte2_ratio_rule["threshold"]:
+ bound = aiv_mte2_ratio_rule["bound"]
+ elif aiv_mte3_ratio >= aiv_mte3_ratio_rule["threshold"]:
+ bound = aiv_mte3_ratio_rule["bound"]
+ elif aiv_vec_ratio >= aiv_vec_ratio_rule["threshold"]:
+ bound = aiv_vec_ratio_rule["bound"]
else:
- optimization = max(0.7 - aiv_vec_ratio, 0.7 - aiv_mte2_ratio, 0.7 - aiv_mte3_ratio)
+ optimization = max(aiv_vec_ratio_rule["threshold"] - aiv_vec_ratio,
+ aiv_mte2_ratio_rule["threshold"] - aiv_mte2_ratio,
+ aiv_mte3_ratio_rule["threshold"] - aiv_mte3_ratio)
if bound:
bound_queue.append({
"op_name": op_name,
diff --git a/profiler/advisor/rules/cn/aicore_performance.yaml b/profiler/advisor/rules/cn/aicore_performance.yaml
index 382de6db5..f6fd914ac 100644
--- a/profiler/advisor/rules/cn/aicore_performance.yaml
+++ b/profiler/advisor/rules/cn/aicore_performance.yaml
@@ -17,31 +17,31 @@ optimization_suggestion: "{op_name}算子 shape: {shape} dtype: {dtype} 疑似
cube_operators:
- target: aic_mac_ratio
bound: mac
- threshold_value: 0.8
+ threshold: 0.8
- target: aic_mte2_ratio
bound: mte2
- threshold_value: 0.95
+ threshold: 0.95
fa_operators:
- target: aic_mte2_ratio
bound: mac
- threshold_value: 0.8
+ threshold: 0.8
- target: aic_fixpipe_ratio
bound: fixpipe
- threshold_value: 0.75
+ threshold: 0.75
- target: aiv_vec_ratio
bound: vec
- threshold_value: 0.75
+ threshold: 0.75
vector_operators:
- target: total
- threshold_value: 0.9
+ threshold: 0.9
- target: aiv_vec_ratio
bound: vec
- threshold_value: 0.7
+ threshold: 0.7
- target: aiv_mte2_ratio
bound: mte2
- threshold_value: 0.7
+ threshold: 0.7
- target: aiv_mte3_ratio
bound: mte3
- threshold_value: 0.7
\ No newline at end of file
+ threshold: 0.7
\ No newline at end of file
diff --git a/profiler/advisor/rules/en/aicore_performance.yaml b/profiler/advisor/rules/en/aicore_performance.yaml
index cae3700b2..b1e5e4701 100644
--- a/profiler/advisor/rules/en/aicore_performance.yaml
+++ b/profiler/advisor/rules/en/aicore_performance.yaml
@@ -17,31 +17,32 @@ optimization_suggestion: "{op_name} Op shape: {shape} dtype: {dtype} suspect the
cube_operators:
- target: aic_mac_ratio
bound: mac
- threshold_value: 0.8
+ threshold: 0.8
- target: aic_mte2_ratio
bound: mte2
- threshold_value: 0.95
+ threshold: 0.95
fa_operators:
- target: aic_mte2_ratio
bound: mac
- threshold_value: 0.8
+ threshold: 0.8
- target: aic_fixpipe_ratio
bound: fixpipe
- threshold_value: 0.75
+ threshold: 0.75
- target: aiv_vec_ratio
bound: vec
- threshold_value: 0.75
+ threshold: 0.75
vector_operators:
- target: total
- threshold_value: 0.9
+ bound: vec_mte2_mte3
+ threshold: 0.9
- target: aiv_vec_ratio
bound: vec
- threshold_value: 0.7
+ threshold: 0.7
- target: aiv_mte2_ratio
bound: mte2
- threshold_value: 0.7
+ threshold: 0.7
- target: aiv_mte3_ratio
bound: mte3
- threshold_value: 0.7
\ No newline at end of file
+ threshold: 0.7
\ No newline at end of file
--
Gitee
From f32c30bda3489fa385fdf246da4ffc2a2dd7b289 Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Wed, 12 Feb 2025 11:40:36 +0800
Subject: [PATCH 64/72] =?UTF-8?q?=E9=97=AE=E9=A2=98=E4=BF=AE=E5=A4=8D?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../ai_core_performance/ai_core_performance_checker.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index 00e35c84d..4d51da450 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -176,7 +176,7 @@ class AICorePerformanceChecker:
for shape in cube_dict[op]:
dtype = None
shape_duration = 0.
- affinity_flag = self.check_affinity(shape)
+ affinity_flag = self._check_cube_inner_axis(shape)
if not affinity_flag:
for operator in operator_list:
if (operator.op_name == op and
@@ -279,7 +279,7 @@ class AICorePerformanceChecker:
# 不亲和算子筛选
for op in fa_dict:
for shape in fa_dict[op]:
- affinity_flag, dtype, shape_duration, suggestion = self.method_name(fa_list, op, shape)
+ affinity_flag, dtype, shape_duration, suggestion = self._check_fa_inner_axis(fa_list, op, shape)
if affinity_flag:
# 不亲和算子 计算耗时,加入affinity_queue
affinity_queue.append({
--
Gitee
From 89a5eeecb71f391249c5075bfaa2f2f32fdb1aba Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Wed, 12 Feb 2025 11:47:26 +0800
Subject: [PATCH 65/72] =?UTF-8?q?=E9=97=AE=E9=A2=98=E4=BF=AE=E5=A4=8D?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../ai_core_performance/ai_core_performance_checker.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index 4d51da450..eaf6340cd 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -223,7 +223,7 @@ class AICorePerformanceChecker:
elif aic_mte2_ratio >= aic_mte2_ratio_rule["threshold"]:
bound = aic_mte2_ratio_rule["bound"]
else:
- optimization = max(aic_mac_ratio["threshold"] - aic_mac_ratio,
+ optimization = max(aic_mac_ratio_rule["threshold"] - aic_mac_ratio,
aic_mte2_ratio_rule["threshold"] - aic_mte2_ratio)
if bound:
bound_queue.append({
--
Gitee
From f5d71f751ebe5fdd67431e213f3ba6e67c088b46 Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Wed, 12 Feb 2025 11:49:26 +0800
Subject: [PATCH 66/72] =?UTF-8?q?=E9=97=AE=E9=A2=98=E4=BF=AE=E5=A4=8D?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
profiler/advisor/rules/cn/aicore_performance.yaml | 1 +
1 file changed, 1 insertion(+)
diff --git a/profiler/advisor/rules/cn/aicore_performance.yaml b/profiler/advisor/rules/cn/aicore_performance.yaml
index f6fd914ac..3f60747b2 100644
--- a/profiler/advisor/rules/cn/aicore_performance.yaml
+++ b/profiler/advisor/rules/cn/aicore_performance.yaml
@@ -35,6 +35,7 @@ fa_operators:
vector_operators:
- target: total
+ bound: vec_mte2_mte3
threshold: 0.9
- target: aiv_vec_ratio
bound: vec
--
Gitee
From ffe03877b86e44b239817c663162b58ed2c27148 Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Wed, 12 Feb 2025 11:59:39 +0800
Subject: [PATCH 67/72] =?UTF-8?q?=E9=97=AE=E9=A2=98=E4=BF=AE=E5=A4=8D?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../ai_core_performance/ai_core_performance_checker.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index eaf6340cd..b58f734c6 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -449,9 +449,9 @@ class AICorePerformanceChecker:
for operator_rule in self._OPERATOR_RULES["vector_operators"]:
if operator_rule["target"] == "aiv_vec_ratio":
aiv_vec_ratio_rule = operator_rule
- elif operator_rule["target"] == "aic_mte2_ratio":
+ elif operator_rule["target"] == "aiv_mte2_ratio":
aiv_mte2_ratio_rule = operator_rule
- elif operator_rule["target"] == "aic_mte3_ratio":
+ elif operator_rule["target"] == "aiv_mte3_ratio":
aiv_mte3_ratio_rule = operator_rule
elif operator_rule["target"] == "total":
total_rule = operator_rule
--
Gitee
From f6c52443206feb83f8b2744afbdcdd124e947778 Mon Sep 17 00:00:00 2001
From: xubanxia <11655130+xubanxia@user.noreply.gitee.com>
Date: Wed, 12 Feb 2025 06:28:49 +0000
Subject: [PATCH 68/72] update
profiler/test/ut/advisor/compute_advice/data/kernel_details.csv.
Signed-off-by: xubanxia <11655130+xubanxia@user.noreply.gitee.com>
---
.../ut/advisor/compute_advice/data/kernel_details.csv | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/profiler/test/ut/advisor/compute_advice/data/kernel_details.csv b/profiler/test/ut/advisor/compute_advice/data/kernel_details.csv
index 020178358..f22cb8008 100644
--- a/profiler/test/ut/advisor/compute_advice/data/kernel_details.csv
+++ b/profiler/test/ut/advisor/compute_advice/data/kernel_details.csv
@@ -15,10 +15,10 @@ Step Id,Model ID,Task ID,Stream ID,Name,Type,OP State,Accelerator Core,Start Tim
19,4294967295,60707,2,aclnnFlashAttentionScore_FlashAttentionScore_FlashAttentionScore,FlashAttentionScore,dynamic,MIX_AIC,"1736413971415611.468 ",406.128,1.279,20,40,NO,"""4096,2,512;4096,2,512;4096,2,512;;;;4096,4096;;;;;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16;UINT8;DT_BF16;BOOL;INT64;INT64;INT64;INT64;INT64,NCL;NCL;NCL;ND;ND;ND;ND;ND;ND;ND;ND;ND,"""2,4,4096,8;2,4,4096,8;;4096,2,512""",FLOAT;FLOAT;DT_BF16;DT_BF16,ND;ND;ND;ND,0,358.77,12915719,128.96,0.359,345.096,0.962,108.337,0.302,168.284,0.869,209.057,0.583,0.003,358.308,25798146,228.693,0.638,137.809,0.385,108.679,0.303,70.099,0.196,0.013,88.339
19,4294967295,60735,2,aclnnFlashAttentionScore_FlashAttentionScore_FlashAttentionScore,FlashAttentionScore,dynamic,MIX_AIC,"1736413971420248.800 ",407.008,0.84,20,40,NO,"""4096,2,512;4096,2,512;4096,2,512;;;;4096,4096;;;;;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16;UINT8;DT_BF16;BOOL;INT64;INT64;INT64;INT64;INT64,NCL;NCL;NCL;ND;ND;ND;ND;ND;ND;ND;ND;ND,"""2,4,4096,8;2,4,4096,8;;4096,2,512""",FLOAT;FLOAT;DT_BF16;DT_BF16,ND;ND;ND;ND,0,359.702,12949284,128.975,0.359,346.306,0.963,108.43,0.301,166.899,0.864,209.018,0.581,0.003,359.274,25867705,228.693,0.637,138.438,0.385,107.723,0.3,70.146,0.195,0.013,88.377
19,4294967295,60763,2,aclnnFlashAttentionScore_FlashAttentionScore_FlashAttentionScore,FlashAttentionScore,dynamic,MIX_AIC,"1736413971424592.447 ",405.228,1.35,20,40,NO,"""4096,2,512;4096,2,512;4096,2,512;;;;4096,4096;;;;;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16;UINT8;DT_BF16;BOOL;INT64;INT64;INT64;INT64;INT64,NCL;NCL;NCL;ND;ND;ND;ND;ND;ND;ND;ND;ND,"""2,4,4096,8;2,4,4096,8;;4096,2,512""",FLOAT;FLOAT;DT_BF16;DT_BF16,ND;ND;ND;ND,0,359.793,12952532,128.923,0.358,345.768,0.961,108.411,0.301,167.379,0.865,208.79,0.58,0.003,359.294,25869164,228.691,0.637,138.411,0.385,107.868,0.3,70.163,0.195,0.013,88.788
-19,4294967295,61655,2,aclnnFlashAttentionScoreGrad_FlashAttentionScoreGrad_FlashAttentionScoreGrad,FlashAttentionScoreGrad,dynamic,MIX_AIC,"1736413971559180.676 ",762.215,1.37,20,40,NO,"""4096,2,512;4096,2,512;4096,2,512;4096,2,512;4096,4096;2,4,4096,8;2,4,4096,8;;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16;BOOL;FLOAT;FLOAT;DT_BF16;DT_BF16;INT64,NCL;NCL;NCL;NCL;ND;NCHW;NCHW;ND;NCL;ND,"""4096,2,512;4096,2,512;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16,ND;ND;ND;ND,0,755.664,27203907,344.023,0.455,592.472,0.784,266.388,0.353,397.091,0.525,589.726,0.78,0.004,755.04,54362915,318.452,0.422,184.623,0.245,206.78,0.274,152.973,0.203,0.006,99.141
-19,4294967295,61696,2,aclnnFlashAttentionScoreGrad_FlashAttentionScoreGrad_FlashAttentionScoreGrad,FlashAttentionScoreGrad,dynamic,MIX_AIC,"1736413971565420.821 ",763.215,1.189,20,40,NO,"""4096,2,512;4096,2,512;4096,2,512;4096,2,512;4096,4096;2,4,4096,8;2,4,4096,8;;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16;BOOL;FLOAT;FLOAT;DT_BF16;DT_BF16;INT64,NCL;NCL;NCL;NCL;ND;NCHW;NCHW;ND;NCL;ND,"""4096,2,512;4096,2,512;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16,ND;ND;ND;ND,0,757.83,27281885,344.047,0.454,595.954,0.786,266.123,0.351,389.105,0.513,576.226,0.76,0.004,757.046,54507345,318.443,0.421,188.292,0.249,200.176,0.264,162.113,0.214,0.006,99.294
-19,4294967295,61737,2,aclnnFlashAttentionScoreGrad_FlashAttentionScoreGrad_FlashAttentionScoreGrad,FlashAttentionScoreGrad,dynamic,MIX_AIC,"1736413971571804.228 ",757.095,0.88,20,40,NO,"""4096,2,512;4096,2,512;4096,2,512;4096,2,512;4096,4096;2,4,4096,8;2,4,4096,8;;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16;BOOL;FLOAT;FLOAT;DT_BF16;DT_BF16;INT64,NCL;NCL;NCL;NCL;ND;NCHW;NCHW;ND;NCL;ND,"""4096,2,512;4096,2,512;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16,ND;ND;ND;ND,0,750.605,27021778,343.983,0.458,586.708,0.782,266.304,0.355,392.522,0.523,584.432,0.779,0.004,749.913,53993736,318.436,0.425,188.508,0.251,207.668,0.277,152.634,0.204,0.006,99.143
-19,4294967295,61778,2,aclnnFlashAttentionScoreGrad_FlashAttentionScoreGrad_FlashAttentionScoreGrad,FlashAttentionScoreGrad,dynamic,MIX_AIC,"1736413971578144.095 ",755.915,1.22,20,40,NO,"""4096,2,512;4096,2,512;4096,2,512;4096,2,512;4096,4096;2,4,4096,8;2,4,4096,8;;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16;BOOL;FLOAT;FLOAT;DT_BF16;DT_BF16;INT64,NCL;NCL;NCL;NCL;ND;NCHW;NCHW;ND;NCL;ND,"""4096,2,512;4096,2,512;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16,ND;ND;ND;ND,0,750.152,27005467,344.115,0.459,579.317,0.772,266.08,0.355,398.019,0.531,587.37,0.783,0.004,749.348,53953058,318.444,0.425,186.908,0.249,207.068,0.276,151.329,0.202,0.006,99.238
+19,4294967295,61655,2,aclnnFlashAttentionScoreGrad_FlashAttentionScoreGrad_FlashAttentionScoreGrad,FlashAttentionScoreGrad,dynamic,MIX_AIC,"1736413971559180.676 ",762.215,1.37,20,40,NO,"""4096,2,512;4096,2,512;4096,2,512;4096,2,512;4096,4096;2,4,4096,8;2,4,4096,8;;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16;BOOL;FLOAT;FLOAT;DT_BF16;DT_BF16;INT64,NCL;NCL;NCL;NCL;ND;NCHW;NCHW;ND;NCL;ND,"""4096,2,512;4096,2,512;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16,ND;ND;ND;ND,0,755.664,27203907,344.023,0.455,592.472,0.784,266.388,0.353,397.091,0.525,589.726,0.525,0.004,755.04,54362915,318.452,0.422,184.623,0.245,206.78,0.274,152.973,0.203,0.006,99.141
+19,4294967295,61696,2,aclnnFlashAttentionScoreGrad_FlashAttentionScoreGrad_FlashAttentionScoreGrad,FlashAttentionScoreGrad,dynamic,MIX_AIC,"1736413971565420.821 ",763.215,1.189,20,40,NO,"""4096,2,512;4096,2,512;4096,2,512;4096,2,512;4096,4096;2,4,4096,8;2,4,4096,8;;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16;BOOL;FLOAT;FLOAT;DT_BF16;DT_BF16;INT64,NCL;NCL;NCL;NCL;ND;NCHW;NCHW;ND;NCL;ND,"""4096,2,512;4096,2,512;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16,ND;ND;ND;ND,0,757.83,27281885,344.047,0.454,595.954,0.786,266.123,0.351,389.105,0.513,576.226,0.513,0.004,757.046,54507345,318.443,0.421,188.292,0.249,200.176,0.264,162.113,0.214,0.006,99.294
+19,4294967295,61737,2,aclnnFlashAttentionScoreGrad_FlashAttentionScoreGrad_FlashAttentionScoreGrad,FlashAttentionScoreGrad,dynamic,MIX_AIC,"1736413971571804.228 ",757.095,0.88,20,40,NO,"""4096,2,512;4096,2,512;4096,2,512;4096,2,512;4096,4096;2,4,4096,8;2,4,4096,8;;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16;BOOL;FLOAT;FLOAT;DT_BF16;DT_BF16;INT64,NCL;NCL;NCL;NCL;ND;NCHW;NCHW;ND;NCL;ND,"""4096,2,512;4096,2,512;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16,ND;ND;ND;ND,0,750.605,27021778,343.983,0.458,586.708,0.782,266.304,0.355,392.522,0.523,584.432,0.523,0.004,749.913,53993736,318.436,0.425,188.508,0.251,207.668,0.277,152.634,0.204,0.006,99.143
+19,4294967295,61778,2,aclnnFlashAttentionScoreGrad_FlashAttentionScoreGrad_FlashAttentionScoreGrad,FlashAttentionScoreGrad,dynamic,MIX_AIC,"1736413971578144.095 ",755.915,1.22,20,40,NO,"""4096,2,512;4096,2,512;4096,2,512;4096,2,512;4096,4096;2,4,4096,8;2,4,4096,8;;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16;BOOL;FLOAT;FLOAT;DT_BF16;DT_BF16;INT64,NCL;NCL;NCL;NCL;ND;NCHW;NCHW;ND;NCL;ND,"""4096,2,512;4096,2,512;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16,ND;ND;ND;ND,0,750.152,27005467,344.115,0.459,579.317,0.772,266.08,0.355,398.019,0.531,587.37,0.531,0.004,749.348,53953058,318.444,0.425,186.908,0.249,207.068,0.276,151.329,0.202,0.006,99.238
19,4294967295,60763,2,aclnnFlashAttentionScore_FlashAttentionScore_FlashAttentionScore_varlen,FlashAttentionScore,dynamic,MIX_AIC,"1736413971424592.447 ",405.228,1.35,20,40,NO,"""4096,2,511;4096,2,512;4096,2,512;;;;4096,4096;;;;;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16;UINT8;DT_BF16;BOOL;INT64;INT64;INT64;INT64;INT64,NCL;NCL;NCL;ND;ND;ND;ND;ND;ND;ND;ND;ND,"""2,3,4096,8;2,4,4096,8;;4096,2,512""",FLOAT;FLOAT;DT_BF16;DT_BF16,ND;ND;ND;ND,0,359.793,12952532,128.923,0.358,345.768,0.961,108.411,0.301,167.379,0.465,208.79,0.58,0.003,359.294,25869164,228.691,0.637,138.411,0.385,107.868,0.3,70.163,0.195,0.013,88.788
19,4294967295,60683,2,aclnnAdd_AddAiCore_Add,Add,dynamic,AI_VECTOR_CORE,"1736413971412768.871 ",26.78,0.485,40,0,NO,"""512,2,4096;512,2,4096""",DT_BF16;DT_BF16,NCL;NCL,"""512,2,4096""",DT_BF16,ND,N/A,0,0,0,0,0,0,0,0,0,0,0,0,0,24.19,1741674,5.986,0.247,1.352,0.056,20.363,0.842,3.195,0.132,0.027,0
19,4294967295,60690,2,aclnnAdd_AddAiCore_Add,Add,dynamic,AI_VECTOR_CORE,"1736413971414677.549 ",31.201,0.664,40,0,NO,"""512,2,4096;512,2,4096""",DT_BF16;DT_BF16,NCL;NCL,"""512,2,4096""",DT_BF16,ND,N/A,0,0,0,0,0,0,0,0,0,0,0,0,0,28.617,2060443,5.986,0.209,1.444,0.05,25.005,0.874,3.336,0.117,0.026,0
@@ -27,4 +27,4 @@ Step Id,Model ID,Task ID,Stream ID,Name,Type,OP State,Accelerator Core,Start Tim
19,4294967295,13907,2,aclnnAdd_AddAiCore_Add,Add,dynamic,AI_VECTOR_CORE,"1736413974268377.206 ",1.38,31.48,1,0,NO,""";""",FLOAT;FLOAT,ND;ND,"""""",FLOAT,ND,N/A,0,0,0,0,0,0,0,0,0,0,0,0,0,0.883,1589,0.027,0.03,0.265,0.3,0.18,0.204,0.108,0.123,0.182,0
19,4294967295,13910,2,aclnnAdd_AddAiCore_Add,Add,dynamic,AI_VECTOR_CORE,"1736413974268502.128 ",1.46,17.48,1,0,NO,""";""",FLOAT;FLOAT,ND;ND,"""""",FLOAT,ND,N/A,0,0,0,0,0,0,0,0,0,0,0,0,0,0.948,1706,0.027,0.028,0.276,0.291,0.217,0.229,0.127,0.134,0.174,0
19,4294967295,13913,2,aclnnAdd_AddAiCore_Add,Add,dynamic,AI_VECTOR_CORE,"1736413974268605.410 ",1.5,0.09,1,0,NO,""";""",FLOAT;FLOAT,ND;ND,"""""",FLOAT,ND,N/A,0,0,0,0,0,0,0,0,0,0,0,0,0,0.96,1728,0.027,0.028,0.268,0.28,0.221,0.23,0.132,0.137,0.145,0
-19,4294967295,13916,2,aclnnAdd_AddAiCore_Add,Add,dynamic,AI_VECTOR_CORE,"1736413974268747.953 ",1.58,28.28,1,0,NO,""";""",FLOAT;FLOAT,ND;ND,"""""",FLOAT,ND,N/A,0,0,0,0,0,0,0,0,0,0,0,0,0,1.107,1993,0.027,0.024,0.426,0.384,0.201,0.181,0.118,0.106,0.162,0
\ No newline at end of file
+19,4294967295,13916,2,aclnnAdd_AddAiCore_Add,Add,dynamic,AI_VECTOR_CORE,"1736413974268747.953 ",1.58,28.28,1,0,NO,""";""",FLOAT;FLOAT,ND;ND,"""""",FLOAT,ND,N/A,0,0,0,0,0,0,0,0,0,0,0,0,0,1.107,1993,0.027,0.024,0.426,0.384,0.201,0.181,0.118,0.106,0.162,0
--
Gitee
From 97ecae67dc91b58b00c8647b677873edade33b89 Mon Sep 17 00:00:00 2001
From: kiritorl
Date: Wed, 12 Feb 2025 22:49:58 +0800
Subject: [PATCH 69/72] =?UTF-8?q?=E6=8F=90=E5=8F=96make=5Frecord=E6=96=B9?=
=?UTF-8?q?=E6=B3=95=E4=B8=AD=E7=9A=84=E5=8A=9F=E8=83=BD=E9=A1=B9=EF=BC=8C?=
=?UTF-8?q?=E7=BC=A9=E5=87=8F=E6=96=B9=E6=B3=95=E5=86=85=E9=95=BF=E5=BA=A6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../ai_core_performance_checker.py | 102 +++++++-----------
1 file changed, 37 insertions(+), 65 deletions(-)
diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index b58f734c6..20fac2d92 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -494,6 +494,39 @@ class AICorePerformanceChecker:
vector_list.extend([operator])
return vector_list
+ def draw_record(self, op_type: str, result: OptimizeResult):
+ suggestion_keys = ['opti', 'bound', 'affinity']
+ desc = dict.fromkeys(suggestion_keys, "")
+ problem_map = {
+ 'cube': self._CUBE_PROBLEM,
+ 'fa': self._FA_PROBLEM,
+ 'vector': self._VECTOR_PROBLEM
+ }
+ optimization_item = OptimizeItem(problem_map[op_type], self.desc, [self.suggestion])
+ result.add(OptimizeRecord(optimization_item))
+ headers = [
+ "Type",
+ "Description and Suggestion",
+ ]
+ result.add_detail(problem_map[op_type], headers=headers)
+ for opti_issue in self.result[op_type][0]:
+ opti_sugg = self._OPTI_SUGGESTION.format(**opti_issue)
+ desc["opti"] += opti_sugg
+ if desc["opti"]:
+ result.add_detail(self._CUBE_PROBLEM, detail=[self._OPTI_DESC, desc["opti"]])
+ for bound_issue in self.result[op_type][1]:
+ bound_sugg = self._BOUND_SUGGESTION.format(**bound_issue)
+ desc["bound"] += bound_sugg
+ if desc["bound"]:
+ result.add_detail(self._CUBE_PROBLEM, detail=[self._BOUND_DESC, desc["bound"]])
+ if op_type == "vector": # vector 类型没有亲和性建议
+ return
+ for affinity_issue in self.result[op_type][2]:
+ affinity_sugg = self._AFFINITY_SUGGESTION.format(**affinity_issue)
+ desc["affinity"] += affinity_sugg
+ if desc["affinity"]:
+ result.add_detail(self._CUBE_PROBLEM, detail=[self._AFFINITY_DESC, desc["affinity"]])
+
def make_record(self, result: OptimizeResult):
"""
make record for what and how to optimize
@@ -501,76 +534,15 @@ class AICorePerformanceChecker:
if not self.ai_core_performance_issues:
return self.ai_core_performance_issues
- suggestion_keys = ['opti', 'bound', 'affinity']
- cube_desc = dict.fromkeys(suggestion_keys, "")
- fa_desc = dict.fromkeys(suggestion_keys, "")
- vector_desc = dict.fromkeys(suggestion_keys, "")
if any(self.result["cube"]):
- optimization_item = OptimizeItem(self._CUBE_PROBLEM, self.desc, [self.suggestion])
- result.add(OptimizeRecord(optimization_item))
- headers = [
- "Type",
- "Description and Suggestion",
- ]
- result.add_detail(self._CUBE_PROBLEM, headers=headers)
- for cube_opti_issue in self.result["cube"][0]:
- opti_sugg = self._OPTI_SUGGESTION.format(**cube_opti_issue)
- cube_desc["opti"] += opti_sugg
- if cube_desc["opti"]:
- result.add_detail(self._CUBE_PROBLEM, detail=[self._OPTI_DESC, cube_desc["opti"]])
- for cube_bound_issue in self.result["cube"][1]:
- bound_sugg = self._BOUND_SUGGESTION.format(**cube_bound_issue)
- cube_desc["bound"] += bound_sugg
- if cube_desc["bound"]:
- result.add_detail(self._CUBE_PROBLEM, detail=[self._BOUND_DESC, cube_desc["bound"]])
- for cube_affinity_issue in self.result["cube"][2]:
- affinity_sugg = self._AFFINITY_SUGGESTION.format(**cube_affinity_issue)
- cube_desc["affinity"] += affinity_sugg
- if cube_desc["affinity"]:
- result.add_detail(self._CUBE_PROBLEM, detail=[self._AFFINITY_DESC, cube_desc["affinity"]])
+ self.draw_record("cube", result)
if any(self.result["fa"]):
- optimization_item = OptimizeItem(self._FA_PROBLEM, self.desc, [self.suggestion])
- result.add(OptimizeRecord(optimization_item))
- headers = [
- "Type",
- "Description and Suggestion",
- ]
- result.add_detail(self._FA_PROBLEM, headers=headers)
- for fa_opti_issue in self.result["fa"][0]:
- opti_sugg = self._OPTI_SUGGESTION.format(**fa_opti_issue)
- fa_desc["opti"] += opti_sugg
- if fa_desc["opti"]:
- result.add_detail(self._FA_PROBLEM, detail=[self._OPTI_DESC, fa_desc["opti"]])
- for fa_bound_issue in self.result["fa"][1]:
- bound_sugg = self._BOUND_SUGGESTION.format(**fa_bound_issue)
- fa_desc["bound"] += bound_sugg
- if fa_desc["bound"]:
- result.add_detail(self._FA_PROBLEM, detail=[self._BOUND_DESC, fa_desc["bound"]])
- for fa_affinity_issue in self.result["fa"][2]:
- affinity_sugg = self._AFFINITY_SUGGESTION.format(**fa_affinity_issue)
- fa_desc["affinity"] += affinity_sugg
- if fa_desc["affinity"]:
- result.add_detail(self._FA_PROBLEM, detail=[self._AFFINITY_DESC, fa_desc["affinity"]])
+ self.draw_record("fa", result)
if any(self.result["vector"]):
- optimization_item = OptimizeItem(self._VECTOR_PROBLEM, self.desc, [self.suggestion])
- result.add(OptimizeRecord(optimization_item))
- headers = [
- "Type",
- "Description and Suggestion",
- ]
- result.add_detail(self._VECTOR_PROBLEM, headers=headers)
- for vector_opti_issue in self.result["vector"][0]:
- opti_sugg = self._OPTI_SUGGESTION.format(**vector_opti_issue)
- vector_desc["opti"] += opti_sugg
- if vector_desc["opti"]:
- result.add_detail(self._VECTOR_PROBLEM, detail=[self._OPTI_DESC, vector_desc["opti"]])
- for vector_bound_issue in self.result["vector"][1]:
- bound_sugg = self._BOUND_SUGGESTION.format(**vector_bound_issue)
- vector_desc["bound"] += bound_sugg
- if vector_desc["bound"]:
- result.add_detail(self._VECTOR_PROBLEM, detail=[self._BOUND_DESC, vector_desc["bound"]])
+ self.draw_record("vector", result)
+
return True
def make_render(self, html_render, add_render_list=True, **kwargs):
--
Gitee
From 7e5e5936afb2e1d972e1896ab047cebff1b6cc64 Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Thu, 13 Feb 2025 15:47:42 +0800
Subject: [PATCH 70/72] =?UTF-8?q?=E9=97=AE=E9=A2=98=E4=BF=AE=E5=A4=8D?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../ai_core_performance/ai_core_performance_checker.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index 20fac2d92..e947b140d 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -518,14 +518,14 @@ class AICorePerformanceChecker:
bound_sugg = self._BOUND_SUGGESTION.format(**bound_issue)
desc["bound"] += bound_sugg
if desc["bound"]:
- result.add_detail(self._CUBE_PROBLEM, detail=[self._BOUND_DESC, desc["bound"]])
+ result.add_detail(self._FA_PROBLEM, detail=[self._BOUND_DESC, desc["bound"]])
if op_type == "vector": # vector 类型没有亲和性建议
return
for affinity_issue in self.result[op_type][2]:
affinity_sugg = self._AFFINITY_SUGGESTION.format(**affinity_issue)
desc["affinity"] += affinity_sugg
if desc["affinity"]:
- result.add_detail(self._CUBE_PROBLEM, detail=[self._AFFINITY_DESC, desc["affinity"]])
+ result.add_detail(self._VECTOR_PROBLEM, detail=[self._AFFINITY_DESC, desc["affinity"]])
def make_record(self, result: OptimizeResult):
"""
--
Gitee
From 863b681c903a3299c024a7ffc5ec0b6ebab65dbd Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Fri, 14 Feb 2025 10:22:52 +0800
Subject: [PATCH 71/72] =?UTF-8?q?=E9=97=AE=E9=A2=98=E4=BF=AE=E5=A4=8D?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../ai_core_performance/ai_core_performance_checker.py | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index e947b140d..e3c3defc6 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -513,19 +513,19 @@ class AICorePerformanceChecker:
opti_sugg = self._OPTI_SUGGESTION.format(**opti_issue)
desc["opti"] += opti_sugg
if desc["opti"]:
- result.add_detail(self._CUBE_PROBLEM, detail=[self._OPTI_DESC, desc["opti"]])
+ result.add_detail(problem_map[op_type], detail=[self._OPTI_DESC, desc["opti"]])
for bound_issue in self.result[op_type][1]:
bound_sugg = self._BOUND_SUGGESTION.format(**bound_issue)
desc["bound"] += bound_sugg
if desc["bound"]:
- result.add_detail(self._FA_PROBLEM, detail=[self._BOUND_DESC, desc["bound"]])
+ result.add_detail(problem_map[op_type], detail=[self._BOUND_DESC, desc["bound"]])
if op_type == "vector": # vector 类型没有亲和性建议
return
for affinity_issue in self.result[op_type][2]:
affinity_sugg = self._AFFINITY_SUGGESTION.format(**affinity_issue)
desc["affinity"] += affinity_sugg
if desc["affinity"]:
- result.add_detail(self._VECTOR_PROBLEM, detail=[self._AFFINITY_DESC, desc["affinity"]])
+ result.add_detail(problem_map[op_type], detail=[self._AFFINITY_DESC, desc["affinity"]])
def make_record(self, result: OptimizeResult):
"""
--
Gitee
From e418fac51c15fbd29832ce6658d1e1efb4d027ff Mon Sep 17 00:00:00 2001
From: xubanxia <904691018@qq.com>
Date: Fri, 14 Feb 2025 15:51:10 +0800
Subject: [PATCH 72/72] =?UTF-8?q?=E7=9B=AE=E5=BD=95=E8=BF=81=E7=A7=BB?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../computation/ai_core_performance/__init__.py | 0
.../ai_core_performance_analyzer.py | 12 ++++++------
.../ai_core_performance_checker.py | 0
.../msprof_analyze/advisor/interface/interface.py | 2 +-
.../test_ai_core_performance_advice.py | 4 ++--
5 files changed, 9 insertions(+), 9 deletions(-)
rename profiler/{ => msprof_analyze}/advisor/analyzer/computation/ai_core_performance/__init__.py (100%)
rename profiler/{ => msprof_analyze}/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py (78%)
rename profiler/{ => msprof_analyze}/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py (100%)
diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/__init__.py b/profiler/msprof_analyze/advisor/analyzer/computation/ai_core_performance/__init__.py
similarity index 100%
rename from profiler/advisor/analyzer/computation/ai_core_performance/__init__.py
rename to profiler/msprof_analyze/advisor/analyzer/computation/ai_core_performance/__init__.py
diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py b/profiler/msprof_analyze/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py
similarity index 78%
rename from profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py
rename to profiler/msprof_analyze/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py
index 89b6be779..a648fb074 100644
--- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py
+++ b/profiler/msprof_analyze/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py
@@ -14,13 +14,13 @@
# limitations under the License.
import logging
-from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer
-from profiler.advisor.analyzer.computation.ai_core_performance.ai_core_performance_checker import \
+from profiler.msprof_analyze.advisor.analyzer.base_analyzer import BaseAnalyzer
+from profiler.msprof_analyze.advisor.analyzer.computation.ai_core_performance.ai_core_performance_checker import \
AICorePerformanceChecker
-from profiler.advisor.dataset.profiling.profiling_dataset import ProfilingDataset
-from profiler.advisor.result.result import OptimizeResult
-from profiler.advisor.display.html.priority_background_color import PriorityBackgroundColor
-from profiler.advisor.display.html.render import HTMLRender
+from profiler.msprof_analyze.advisor.dataset.profiling.profiling_dataset import ProfilingDataset
+from profiler.msprof_analyze.advisor.result.result import OptimizeResult
+from profiler.msprof_analyze.advisor.display.html.priority_background_color import PriorityBackgroundColor
+from profiler.msprof_analyze.advisor.display.html.render import HTMLRender
logger = logging.getLogger()
diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/msprof_analyze/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
similarity index 100%
rename from profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
rename to profiler/msprof_analyze/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
diff --git a/profiler/msprof_analyze/advisor/interface/interface.py b/profiler/msprof_analyze/advisor/interface/interface.py
index 30d9d0eef..cce2de625 100644
--- a/profiler/msprof_analyze/advisor/interface/interface.py
+++ b/profiler/msprof_analyze/advisor/interface/interface.py
@@ -44,7 +44,7 @@ from msprof_analyze.advisor.analyzer.schedule.gc.gc_analyzer import GcAnalyzer
from msprof_analyze.advisor.analyzer.schedule.conjectured_gc.conjectured_gc_analyzer import ConjecturedGcAnalyzer
from msprof_analyze.advisor.analyzer.comparison.comparison_analyzer import ComparisonAnalyzer
from msprof_analyze.advisor.analyzer.schedule.fusible_ops.fusible_operator_analyzer import FusibleOperatorAnalyzer
-from profiler.advisor.analyzer.computation.ai_core_performance.ai_core_performance_analyzer import \
+from profiler.msprof_analyze.advisor.analyzer.computation.ai_core_performance import \
AICorePerformanceAnalyzer
logger = logging.getLogger()
diff --git a/profiler/msprof_analyze/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py b/profiler/msprof_analyze/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py
index 61ae35d13..e45f6ea3b 100644
--- a/profiler/msprof_analyze/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py
+++ b/profiler/msprof_analyze/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py
@@ -4,8 +4,8 @@ import shutil
import stat
import unittest
-from profiler.advisor.interface.interface import Interface
-from profiler.advisor.common.analyzer_scopes import SupportedScopes
+from profiler.msprof_analyze.advisor.interface.interface import Interface
+from profiler.msprof_analyze.advisor.common.analyzer_scopes import SupportedScopes
class TestAICorePerformanceAdvice(unittest.TestCase):
--
Gitee