From 05c57de7c77a8b77e900edaacdd39f491334552a Mon Sep 17 00:00:00 2001 From: xubanxia <904691018@qq.com> Date: Fri, 10 Jan 2025 17:10:34 +0800 Subject: [PATCH 01/72] =?UTF-8?q?=E4=BB=A3=E7=A0=81=E6=A2=B3=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../advisor/analyzer/analyzer_controller.py | 33 ++-- profiler/advisor/analyzer/base_analyzer.py | 8 +- .../ai_core_performance/__init__.py | 0 .../ai_core_performance_analyzer.py | 57 +++++++ .../ai_core_performance_checker.py | 141 ++++++++++++++++++ .../computation/profiling_analyzer.py | 5 + profiler/advisor/common/analyzer_scopes.py | 1 + profiler/advisor/interface/interface.py | 5 +- profiler/cli/analyze_cli.py | 6 + profiler/cli/entrance.py | 7 + 10 files changed, 240 insertions(+), 23 deletions(-) create mode 100644 profiler/advisor/analyzer/computation/ai_core_performance/__init__.py create mode 100644 profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py create mode 100644 profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py diff --git a/profiler/advisor/analyzer/analyzer_controller.py b/profiler/advisor/analyzer/analyzer_controller.py index 1a5a28b63..e8a62c69d 100644 --- a/profiler/advisor/analyzer/analyzer_controller.py +++ b/profiler/advisor/analyzer/analyzer_controller.py @@ -186,7 +186,6 @@ class AnalyzerController: return True - @staticmethod def _get_step_rank_for_cluster_statistic_diff(target_cluster_statistic_data, benchmark_cluster_statistic_data, headers, dimension, get_max=False): @@ -256,10 +255,10 @@ class AnalyzerController: return dimensions, AsyncParams.user_total_params def do_analysis(self, dimensions, **kwargs): - pid = os.getpid() + pid = os.getpid() # 获取当前进程的pid resp = {"id": pid} - self.args_manager = AdditionalArgsManager() - self.args_manager.init(kwargs) + self.args_manager = AdditionalArgsManager() # 初始化参数管理器 + self.args_manager.init(kwargs) # 初始化参数管理器 output_path = kwargs.get("output_path") AnalyzerController._set_analysis_process_priority(pid) @@ -278,9 +277,9 @@ class AnalyzerController: PathManager.make_dir_safety(output_path) Config().set_config("_work_path", output_path) - Config().set_log_path(f"mstt_advisor_{Timer().strftime}.xlsx") + Config().set_log_path(f"mstt_advisor_{Timer().strftime}.xlsx") # 设置日志路径 - self._do_analysis(dimensions, pid=pid, async_resp=resp, **kwargs) + self._do_analysis(dimensions, pid=pid, async_resp=resp, **kwargs) # 执行分析 except Exception as e: self._update_analysis_process_resp(pid, resp, status_code=AsyncAnalysisStatus.INNER_ERROR_STATUS_CODE, status=AsyncAnalysisStatus.FAILED, error_msg=str(e)) @@ -612,8 +611,8 @@ class AnalyzerController: return job_list def _do_analysis(self, dimensions, pid=0, async_resp=None, **kwargs): - self.dimensions = dimensions - self.kwargs = kwargs + self.dimensions = dimensions # 设置分析维度 + self.kwargs = kwargs # 设置分析参数 result_list = [] profiling_path = PathManager.get_realpath(self.kwargs.get("profiling_path")) benchmark_profiling_path = self.kwargs.get("benchmark_profiling_path") @@ -622,7 +621,7 @@ class AnalyzerController: benchmark_profiling_path = PathManager.get_realpath(benchmark_profiling_path) PathManager.check_path_owner_consistent([benchmark_profiling_path]) - if not self._check_profiling_path_valid(profiling_path): + if not self._check_profiling_path_valid(profiling_path): # 检查profiling路径是否有效 error_msg = f"Got invalid argument '-d/--profiling_path' {profiling_path}, skip analysis" self._update_analysis_process_resp(pid, async_resp, error_msg=error_msg, status_code=AsyncAnalysisStatus.BAD_REQUEST_STATUS_CODE, @@ -630,8 +629,8 @@ class AnalyzerController: logger.error(error_msg) return - - if benchmark_profiling_path and not self._check_profiling_path_valid(benchmark_profiling_path): + if benchmark_profiling_path and not self._check_profiling_path_valid( + benchmark_profiling_path): # 检查benchmark_profiling路径是否有效 error_msg = (f"Got invalid argument '-bp/--benchmark_profiling_path' {benchmark_profiling_path}, " f"skip analysis") self._update_analysis_process_resp(pid, async_resp, error_msg=error_msg, @@ -640,7 +639,7 @@ class AnalyzerController: logger.error(error_msg) return - self._is_cluster = self._is_cluster_profiling(profiling_path) + self._is_cluster = self._is_cluster_profiling(profiling_path) # 判断是否是集群profiling if benchmark_profiling_path: # 构建benchmark profiling的map,用于根据rank获取profiling路径,否则无法进行比对 is_benchmark_cluster = self._is_cluster_profiling(benchmark_profiling_path) @@ -655,16 +654,16 @@ class AnalyzerController: return if not self._is_cluster: - job_list = self.single_rank_analysis(profiling_path, benchmark_profiling_path) + job_list = self.single_rank_analysis(profiling_path, benchmark_profiling_path) # 单卡分析 else: self.slow_rank_analyzer = SlowRankAnalyzer(profiling_path, output_path=self.kwargs.get("output_path")) self.slow_link_analyzer = SlowLinkAnalyzer(profiling_path, output_path=self.kwargs.get("output_path")) - job_list = self.do_cluster_analysis(profiling_path, benchmark_profiling_path) + job_list = self.do_cluster_analysis(profiling_path, benchmark_profiling_path) # 集群分析 - for i, (dimension, scope, interface, kwargs) in enumerate(job_list[::-1]): + for i, (dimension, scope, interface, kwargs) in enumerate(job_list[::-1]): # dimension: 分析维度,scope: 分析器 result_list.append( - interface.get_result(dimension, scope, render_html=i == len(job_list) - 1, output_dict=False, - **kwargs) + # 获取分析结果 + interface.get_result(dimension, scope, render_html=i == len(job_list) - 1, output_dict=False, **kwargs) ) for result in result_list[::-1]: diff --git a/profiler/advisor/analyzer/base_analyzer.py b/profiler/advisor/analyzer/base_analyzer.py index 0391eb88a..adf82ab8a 100644 --- a/profiler/advisor/analyzer/base_analyzer.py +++ b/profiler/advisor/analyzer/base_analyzer.py @@ -105,7 +105,7 @@ class BaseAnalyzer(VersionControl, metaclass=ABCMeta): def get_priority(self, max_mem_op_dur): pass - def identify_profiling_type(self, profiling_type_list): + def identify_profiling_type(self, profiling_type_list): # 确定分析类型 profiling_type = None if self.collection_path.endswith(ASCEND_MS): profiling_type = [elem for elem in profiling_type_list if Constant.MINDSPORE in elem][0] @@ -134,7 +134,7 @@ class BaseAnalyzer(VersionControl, metaclass=ABCMeta): profiling_type = profiling_type_list[0] return profiling_type - def identify_profiling_version(self): + def identify_profiling_version(self): # 确定分析版本 profiling_version = "" if Constant.MINDSPORE in self.profiling_type: ascend_dirs = [] @@ -166,7 +166,7 @@ class BaseAnalyzer(VersionControl, metaclass=ABCMeta): self.__class__.__name__, self.kwargs.get(Constant.TORCH_VERSION), profiling_version) return profiling_version - def init_dataset_list(self) -> None: + def init_dataset_list(self) -> None: # 初始化数据集列表 dataset_cls_list = self.dataset_cls_list if len(dataset_cls_list) == 0: logger.warning(f"Analyser: %s don't rely on any dataset!", self.__class__.__name__) @@ -184,7 +184,7 @@ class BaseAnalyzer(VersionControl, metaclass=ABCMeta): self.dataset_list[key] = [] self.dataset_list[key].append(dataset) - def get_priority_by_time_ratio(self, dur, step_dur): + def get_priority_by_time_ratio(self, dur, step_dur): # 根据时间比例确定优先级 time_ratio = safe_division(dur, step_dur) if time_ratio >= self.ANALYZER_HIGH_PRIORITY_TIME_RATIO: return PriorityBackgroundColor.high diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/__init__.py b/profiler/advisor/analyzer/computation/ai_core_performance/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py new file mode 100644 index 000000000..f6f7e4f43 --- /dev/null +++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py @@ -0,0 +1,57 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import logging + +from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer +from profiler.advisor.result.result import OptimizeResult +from profiler.advisor.analyzer.computation.ai_core_freq.ai_core_freq_checker import AICoreFreqChecker +from profiler.advisor.display.html.priority_background_color import PriorityBackgroundColor +from profiler.advisor.display.html.render import HTMLRender +from profiler.advisor.dataset.timeline_event_dataset import ComputationAnalysisDataset +from profiler.advisor.dataset.profiling.device_info import DeviceInfoParser +from profiler.advisor.config.config import Config + +logger = logging.getLogger() + + +class AICorePerformanceAnalyzer(BaseAnalyzer): + dataset_cls_list = [ComputationAnalysisDataset] + + def __init__(self, collection_path, n_processes: int = 1, **kwargs) -> None: + super().__init__(collection_path, n_processes, **kwargs) + key = ComputationAnalysisDataset.get_key() + self.dataset = self.get_first_data_by_key(self.dataset_list, key) + self.result = OptimizeResult() + self.html_render = HTMLRender() + self.html = None + info = DeviceInfoParser(collection_path) + info.parse_data() + + @BaseAnalyzer.check_data((ComputationAnalysisDataset.get_key(),)) + def optimize(self, **kwargs): + if not Config().get_config("aic_frequency"): + logger.warning("Can not find ai core frequency in info.json*, please check data integrity.") + return self.result + + add_render_list = kwargs.get("add_render_list", True) + ai_core_freq_checker = AICoreFreqChecker() + ai_core_freq_checker.check_ai_core_freq(self.dataset, rank=kwargs.get("rank"), stage=kwargs.get("stage")) + ai_core_freq_checker.make_record(self.result) + self.html = ai_core_freq_checker.make_render(self.html_render, add_render_list, priority=self.get_priority(), + rank=kwargs.get("rank")) + return self.result + + def get_priority(self, max_mem_op_dur=None): + return PriorityBackgroundColor.high \ No newline at end of file diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py new file mode 100644 index 000000000..5a94d131b --- /dev/null +++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py @@ -0,0 +1,141 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import logging + +from profiler.advisor.config.config import Config +from profiler.advisor.dataset.timeline_event_dataset import ComputationAnalysisDataset +from profiler.advisor.display.prompt.base_prompt import BasePrompt +from profiler.advisor.result.item import OptimizeItem, OptimizeRecord +from profiler.advisor.result.result import OptimizeResult +from profiler.advisor.utils.utils import convert_to_float +from profiler.prof_common.additional_args_manager import AdditionalArgsManager + +logger = logging.getLogger() + + +class AICorePerformanceChecker: + """ + operator performance checker + """ + # DECREASE_FREQ_RATIO = 0.05 + # SHOW_TOPK_OPS = 10 + # TOTAL_DURATION_INDEX = 2 + # DECREASE_FREQ_RATIO_INDEX = 3 + _ITEMS = [ + "op_name", "op_type", "task_duration", "input_shapes", "input_data_types", "input_formats", "output_shapes", + "output_data_types", "output_formats" + ] + + def __init__(self): + + self.ai_core_freq_issues = False + self.desc = "" + self.suggestions = "" + self.decrease_freq_ops = [] + self.headers = [] + self.op_freq = None + self.rank = None + self.stage = None + + def check_ai_core_freq(self, event_dataset: ComputationAnalysisDataset, rank=None, stage=None): + """ + :Param event_dataset: dataset of timeline event + """ + if not hasattr(event_dataset, "op_freq") or not getattr(event_dataset, "op_freq"): + logger.debug("Skip slow ai core frequency checker, " + "because no ai core frequency were recorded in trace_view.json") + return + + self.rank = rank + self.stage = stage + self.op_freq = event_dataset.op_freq + for op_name, op_info in self.op_freq.items(): + freq_list = op_info.get("freq_list", []) + if not freq_list: + continue + + op_count = op_info.get("count", 0) + op_total_duration = round(op_info.get("dur", 0), 2) + max_freq = convert_to_float(Config().get_config("aic_frequency")) + + if max_freq == 0: + raise ValueError("max_freq cannot be zero.") + decrease_freq_ratio = sum(max_freq - freq for freq in freq_list) / (max_freq * len(freq_list)) + if decrease_freq_ratio >= Config().get_config("frequency_threshold"): + self.ai_core_freq_issues = True + self.decrease_freq_ops.append([op_name, op_count, op_total_duration, + f"{round(decrease_freq_ratio, 4):.2%}", + round(sum(freq_list) / len(freq_list), 2), + max(freq_list), min(freq_list)]) + + if self.decrease_freq_ops: + # 按算子总耗时和降频比率 降序排列 + self.decrease_freq_ops.sort( + key=lambda x: (x[self.TOTAL_DURATION_INDEX], x[self.DECREASE_FREQ_RATIO_INDEX]), reverse=True) + if not self.ai_core_freq_issues: + return + + def make_record(self, result: OptimizeResult): + """ + make record for what and how to optimize + """ + if not self.ai_core_freq_issues: + return self.ai_core_freq_issues + + prompt_class = BasePrompt.get_prompt_class(self.__class__.__name__) + + problem = prompt_class.PROBLEM + if self.rank is not None: + problem += prompt_class.RANK_ID.format(self.rank) + + self.desc = prompt_class.DESCRIPTION.format(len(self.decrease_freq_ops), self.DECREASE_FREQ_RATIO) + if self.rank: + self.desc = prompt_class.RANK_DESCRIPTION.format(self.rank) + self.desc.lower() + + optimization_item = OptimizeItem(problem, self.desc, [prompt_class.SUGGESTION]) + result.add(OptimizeRecord(optimization_item)) + + self.headers = [ + "Operator name", + "Count", + "Total duration(us)", + "AI CORE frequency decreased ratio", + "Average frequency", + "Max frequency", + "Min frequency", + ] + result.add_detail(problem, headers=self.headers) + + for row in self.decrease_freq_ops: + result.add_detail(problem, detail=row) + return True + + def make_render(self, html_render, add_render_list=True, **kwargs): + if not self.ai_core_freq_issues: + return self.ai_core_freq_issues + + priority = kwargs.get("priority") + if self.SHOW_TOPK_OPS: + self.desc += f" Only show {self.SHOW_TOPK_OPS} operators here, see latest mstt_advisor.xlsx for details." + return html_render.render_template(key="computation", + template_dir="templates", + template_name="ai_core_frequency.html", + desc=self.desc, + suggestion=self.suggestions, + headers=self.headers, + data=self.decrease_freq_ops[:self.SHOW_TOPK_OPS], + add_render_list=add_render_list, + priority_background_color=priority, + rank=kwargs.get("rank")) diff --git a/profiler/advisor/analyzer/computation/profiling_analyzer.py b/profiler/advisor/analyzer/computation/profiling_analyzer.py index ccf671139..04f889854 100644 --- a/profiler/advisor/analyzer/computation/profiling_analyzer.py +++ b/profiler/advisor/analyzer/computation/profiling_analyzer.py @@ -116,3 +116,8 @@ class AicpuAnalyzer(ProfilingAnalyzer): def __init__(self, collection_path, **kwargs) -> None: super().__init__(collection_path, **kwargs) self.checker = AicpuChecker(self.cann_version) + +class AicpuPerformanceAnalyzer(ProfilingAnalyzer): + def __init__(self, collection_path, **kwargs) -> None: + super().__init__(collection_path, **kwargs) + pass diff --git a/profiler/advisor/common/analyzer_scopes.py b/profiler/advisor/common/analyzer_scopes.py index 2cad1a3ce..5dd8d6d35 100644 --- a/profiler/advisor/common/analyzer_scopes.py +++ b/profiler/advisor/common/analyzer_scopes.py @@ -40,3 +40,4 @@ class SupportedScopes: GC_ANALYSIS = "gc_analysis" CONJECTURED_GC_ANALYSIS = "conjectured_analysis" COMPARISON = "comparison" + OPERATOR_PERFORMANCE_ANALYSIS = "operator_performance_analysis" diff --git a/profiler/advisor/interface/interface.py b/profiler/advisor/interface/interface.py index 7b9cb00fd..fee7203c3 100644 --- a/profiler/advisor/interface/interface.py +++ b/profiler/advisor/interface/interface.py @@ -25,7 +25,7 @@ sys.path.append(os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os. from profiler.advisor.utils.utils import Timer from profiler.advisor.result.result import OptimizeResult from profiler.advisor.analyzer.computation.profiling_analyzer import AicpuAnalyzer, BlockDimAnalyzer, \ - DynamicShapeAnalyzer, OperatorBoundAnalyzer + DynamicShapeAnalyzer, OperatorBoundAnalyzer, AicpuPerformanceAnalyzer from profiler.advisor.analyzer.schedule.fusion_ops.fusion_ops_analyzer import TimelineFusionOpsAnalyzer from profiler.advisor.analyzer.graph_fusion.graph_fusion_analyzer import FusionOPAnalyzer from profiler.advisor.common.analyzer_scopes import SupportedScopes @@ -76,7 +76,8 @@ class Interface: SupportedScopes.OPERATOR_NO_BOUND_ANALYSIS: OperatorBoundAnalyzer, SupportedScopes.BLOCK_DIM_ANALYSIS: BlockDimAnalyzer, SupportedScopes.GRAPH: FusionOPAnalyzer, - SupportedScopes.FREQ_ANALYSIS: AICoreFreqAnalyzer + SupportedScopes.FREQ_ANALYSIS: AICoreFreqAnalyzer, + SupportedScopes.OPERATOR_PERFORMANCE_ANALYSIS: AicpuPerformanceAnalyzer }), COMMUNICATION: OrderedDict({SupportedScopes.PACKET: PacketAnalyzer, SupportedScopes.COMMUNICATION_RETRANSMISSION_DETECTION: RDMARetransmissionAnalyzer, diff --git a/profiler/cli/analyze_cli.py b/profiler/cli/analyze_cli.py index 9453de6ff..80441c274 100644 --- a/profiler/cli/analyze_cli.py +++ b/profiler/cli/analyze_cli.py @@ -145,6 +145,12 @@ def analyze_schedule(**kwargs) -> None: required=False, default="cn", help="Language of the profiling advisor.") +@click.option("-p", + "--performance", + metavar="", + required=False, + default=False, + help="Indicates whether to analyze operator performance.") @debug_option def analyze_computation(**kwargs) -> None: try: diff --git a/profiler/cli/entrance.py b/profiler/cli/entrance.py index c6d72837b..079792ea9 100644 --- a/profiler/cli/entrance.py +++ b/profiler/cli/entrance.py @@ -66,3 +66,10 @@ msprof_analyze_cli.add_command(compare_cli, name="compare") msprof_analyze_cli.add_command(cluster_cli, name="cluster") msprof_analyze_cli.add_command(auto_complete_cli, name="auto-completion") +if __name__ == "__main__": + msprof_analyze_cli.main( + [ + "analyze","all","-d", + r"D:\da","-l","cn" + ] + ) -- Gitee From 53ee3c6032d3e228e5fd5d69acb63bb53c275446 Mon Sep 17 00:00:00 2001 From: xubanxia <904691018@qq.com> Date: Mon, 13 Jan 2025 16:35:28 +0800 Subject: [PATCH 02/72] =?UTF-8?q?=E4=BB=A3=E7=A0=81=E6=A2=B3=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ai_core_performance_analyzer.py | 5 +- profiler/advisor/common/analyzer_scopes.py | 2 +- profiler/advisor/interface/interface.py | 6 +- .../advisor/rules/cn/aicore_performance.yaml | 109 ++++++++++++++++++ profiler/cli/entrance.py | 2 +- 5 files changed, 118 insertions(+), 6 deletions(-) create mode 100644 profiler/advisor/rules/cn/aicore_performance.yaml diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py index f6f7e4f43..6992845af 100644 --- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py +++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py @@ -15,6 +15,7 @@ import logging from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer +from profiler.advisor.dataset.profiling.profiling_dataset import ProfilingDataset from profiler.advisor.result.result import OptimizeResult from profiler.advisor.analyzer.computation.ai_core_freq.ai_core_freq_checker import AICoreFreqChecker from profiler.advisor.display.html.priority_background_color import PriorityBackgroundColor @@ -27,11 +28,11 @@ logger = logging.getLogger() class AICorePerformanceAnalyzer(BaseAnalyzer): - dataset_cls_list = [ComputationAnalysisDataset] + dataset_cls_list = [ProfilingDataset] def __init__(self, collection_path, n_processes: int = 1, **kwargs) -> None: super().__init__(collection_path, n_processes, **kwargs) - key = ComputationAnalysisDataset.get_key() + key = ProfilingDataset.get_key() self.dataset = self.get_first_data_by_key(self.dataset_list, key) self.result = OptimizeResult() self.html_render = HTMLRender() diff --git a/profiler/advisor/common/analyzer_scopes.py b/profiler/advisor/common/analyzer_scopes.py index 5dd8d6d35..40a8d99bc 100644 --- a/profiler/advisor/common/analyzer_scopes.py +++ b/profiler/advisor/common/analyzer_scopes.py @@ -40,4 +40,4 @@ class SupportedScopes: GC_ANALYSIS = "gc_analysis" CONJECTURED_GC_ANALYSIS = "conjectured_analysis" COMPARISON = "comparison" - OPERATOR_PERFORMANCE_ANALYSIS = "operator_performance_analysis" + AICORE_PERFORMANCE_ANALYSIS = "ai_core_performance_analysis" diff --git a/profiler/advisor/interface/interface.py b/profiler/advisor/interface/interface.py index fee7203c3..ebcf56806 100644 --- a/profiler/advisor/interface/interface.py +++ b/profiler/advisor/interface/interface.py @@ -25,7 +25,7 @@ sys.path.append(os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os. from profiler.advisor.utils.utils import Timer from profiler.advisor.result.result import OptimizeResult from profiler.advisor.analyzer.computation.profiling_analyzer import AicpuAnalyzer, BlockDimAnalyzer, \ - DynamicShapeAnalyzer, OperatorBoundAnalyzer, AicpuPerformanceAnalyzer + DynamicShapeAnalyzer, OperatorBoundAnalyzer from profiler.advisor.analyzer.schedule.fusion_ops.fusion_ops_analyzer import TimelineFusionOpsAnalyzer from profiler.advisor.analyzer.graph_fusion.graph_fusion_analyzer import FusionOPAnalyzer from profiler.advisor.common.analyzer_scopes import SupportedScopes @@ -47,6 +47,8 @@ from profiler.advisor.analyzer.communication.alignment.byte_alignment_analyzer i from profiler.advisor.analyzer.schedule.gc.gc_analyzer import GcAnalyzer from profiler.advisor.analyzer.schedule.conjectured_gc.conjectured_gc_analyzer import ConjecturedGcAnalyzer from profiler.advisor.analyzer.comparison.comparison_analyzer import ComparisonAnalyzer +from profiler.advisor.analyzer.computation.ai_core_performance.ai_core_performance_analyzer import \ + AICorePerformanceAnalyzer logger = logging.getLogger() @@ -77,7 +79,7 @@ class Interface: SupportedScopes.BLOCK_DIM_ANALYSIS: BlockDimAnalyzer, SupportedScopes.GRAPH: FusionOPAnalyzer, SupportedScopes.FREQ_ANALYSIS: AICoreFreqAnalyzer, - SupportedScopes.OPERATOR_PERFORMANCE_ANALYSIS: AicpuPerformanceAnalyzer + SupportedScopes.AICORE_PERFORMANCE_ANALYSIS: AICorePerformanceAnalyzer }), COMMUNICATION: OrderedDict({SupportedScopes.PACKET: PacketAnalyzer, SupportedScopes.COMMUNICATION_RETRANSMISSION_DETECTION: RDMARetransmissionAnalyzer, diff --git a/profiler/advisor/rules/cn/aicore_performance.yaml b/profiler/advisor/rules/cn/aicore_performance.yaml new file mode 100644 index 000000000..7eef1598a --- /dev/null +++ b/profiler/advisor/rules/cn/aicore_performance.yaml @@ -0,0 +1,109 @@ +problem: "AICPU算子" +description: "一些算子和任务执行时间超过了{}us,比如:\n" +suggestion: "修改代码避免使用aicpu类算子" +double_suggestion: "尝试将double类型的算子转换成float,比如{}" +DataTypeSuggestion: &DataTypeSuggestion "数据类型{}在{}算子中可能会造成AICpu问题, 如果可以,尝试转换成{}。" +AICPU_DOC_URL: &AICPU_DOC_URL "https://gitee.com/ascend/mstt/blob/master/profiler/advisor/doc/Samples%20of%20AI%20CPU%20Operator%20Replacement.md" + +CommonChecker: + - DataTypeChecker: + cann_version: [7.0.RC1] + op_type: [ __ALL__ ] + ignore_type: [ cast, tensoraequal, equal, nonzero, mul ] + input: [ float, float32, float16, bool, int32, uint32, int64, uint64, int8, uint8, int16, uint16, dt_bf16 ] + output: [ float, float32, float16, bool, int32, uint32, int64, uint64, int8, uint8, int16, uint16, dt_bf16 ] + suggestion: *DataTypeSuggestion + + - DataTypeChecker: + cann_version: [7.0.RC1] + op_type: [ cast ] + input: [ float, float32, float16, bool, int32, uint32, int64, uint64, uint8, dt_bf16 ] + output: [ float, float32, float16, bool, int32, uint32, int64, uint64, uint8, dt_bf16 ] + suggestion: *DataTypeSuggestion + + - DataTypeChecker: + cann_version: [7.0.RC1] + op_type: [ tensorequal ] + input: [ float, float32, float16, bool, int32, int8, uint8 ] + output: [ bool ] + suggestion: *DataTypeSuggestion + + - DataTypeChecker: + cann_version: [7.0.RC1] + op_type: [ equal ] + input: [ float, float32, float16, bool, int32, int64, int8, uint8 ] + output: [ bool ] + suggestion: *DataTypeSuggestion + + - DataTypeChecker: + cann_version: [7.0.RC1] + op_type: [ nonzero ] + input: [ float16, bool, dt_bf16 ] + output: [ int64 ] + suggestion: *DataTypeSuggestion + + - DataTypeChecker: + cann_version: [7.0.RC1] + op_type: [ mul ] + input: [ float, float32, float16, bool, int32, uint32, int64, uint64, int8, uint8, dt_bf16 ] + output: [ float, float32, float16, bool, int32, uint32, int64, uint64, int8, uint8, dt_bf16 ] + suggestion: *DataTypeSuggestion + + - DataTypeChecker: + cann_version: [8.0.RC1, 7.0.0] + op_type: [ __ALL__ ] + ignore_type: [ cast, tensorequal, equal, nonzero, mul ] + input: [ float, float32, float16, dt_bf16, float64, bool, int32, int64, int8, uint8, int16, complex64, complex128 ] + output: [ float, float32, float16, dt_bf16, float64, bool, int32, int64, int8, uint8, int16, complex64, complex128 ] + suggestion: *DataTypeSuggestion + + - DataTypeChecker: + cann_version: [8.0.RC1, 7.0.0] + op_type: [ cast ] + input: [ float, float32, float16, bool, int32, uint32, int64, uint64, uint8, dt_bf16 ] + output: [ float, float32, float16, bool, int32, uint32, int64, uint64, uint8, dt_bf16 ] + suggestion: *DataTypeSuggestion + + - DataTypeChecker: + cann_version: [8.0.RC1, 7.0.0] + op_type: [ tensorequal ] + input: [ float, float32, float16, dt_bf16, float64, bool, int32, int8, uint8 ] + output: [ bool ] + suggestion: *DataTypeSuggestion + + - DataTypeChecker: + cann_version: [8.0.RC1, 7.0.0] + op_type: [ equal ] + input: [ float, float32, float16, dt_bf16, float64, bool, int32, int64, int8, uint8 ] + output: [ bool ] + suggestion: *DataTypeSuggestion + + - DataTypeChecker: + cann_version: [8.0.RC1, 7.0.0] + op_type: [ mul ] + input: [ float, float32, float16, dt_bf16, float64, bool, int32, int64, int8, uint8, complex64 ] + output: [ float, float32, float16, dt_bf16, float64, bool, int32, int64, int8, uint8, complex64 ] + suggestion: *DataTypeSuggestion + +ExampleGuideChecker: + - IndexPutChecker: + op_type: [index] + url: *AICPU_DOC_URL + suggestion: "请参考链接修改源码,尝试用等价的算子替换index算子。" + + - NonzeroChecker: + op_type: [ indexput, indexputv2 ] + url: *AICPU_DOC_URL + suggestion: "请参考链接修改源码,尝试用等价的算子替换indexput算子。" + + - CastChecker: + op_type: [ argmin ] + url: *AICPU_DOC_URL + suggestion: "请参考链接更新cann-tookit包到7.0.RC1及以上的版本。" + + - CastChecker: + op_type: [ nonzero ] + url: *AICPU_DOC_URL + suggestion: "请参考链接修改源码,尝试用等价的算子替换nonzero算子。" + + diff --git a/profiler/cli/entrance.py b/profiler/cli/entrance.py index 079792ea9..89ac8187d 100644 --- a/profiler/cli/entrance.py +++ b/profiler/cli/entrance.py @@ -70,6 +70,6 @@ if __name__ == "__main__": msprof_analyze_cli.main( [ "analyze","all","-d", - r"D:\da","-l","cn" + r"D:\data\file","-l","cn" ] ) -- Gitee From 762118c65ce74c7159ac8f4192b24035b30add0b Mon Sep 17 00:00:00 2001 From: xubanxia <904691018@qq.com> Date: Mon, 13 Jan 2025 16:37:15 +0800 Subject: [PATCH 03/72] =?UTF-8?q?=E4=BB=A3=E7=A0=81=E6=A2=B3=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../advisor/analyzer/computation/profiling_analyzer.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/profiler/advisor/analyzer/computation/profiling_analyzer.py b/profiler/advisor/analyzer/computation/profiling_analyzer.py index 04f889854..bbea136f0 100644 --- a/profiler/advisor/analyzer/computation/profiling_analyzer.py +++ b/profiler/advisor/analyzer/computation/profiling_analyzer.py @@ -115,9 +115,4 @@ class OperatorBoundAnalyzer(ProfilingAnalyzer): class AicpuAnalyzer(ProfilingAnalyzer): def __init__(self, collection_path, **kwargs) -> None: super().__init__(collection_path, **kwargs) - self.checker = AicpuChecker(self.cann_version) - -class AicpuPerformanceAnalyzer(ProfilingAnalyzer): - def __init__(self, collection_path, **kwargs) -> None: - super().__init__(collection_path, **kwargs) - pass + self.checker = AicpuChecker(self.cann_version) \ No newline at end of file -- Gitee From a9b229d83dbbf6f0f95300b8e34431b576c0e1b0 Mon Sep 17 00:00:00 2001 From: xubanxia <904691018@qq.com> Date: Tue, 14 Jan 2025 10:14:58 +0800 Subject: [PATCH 04/72] =?UTF-8?q?=E4=BB=A3=E7=A0=81=E6=A2=B3=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ai_core_performance_analyzer.py | 15 +++-- .../ai_core_performance_checker.py | 16 ++++- .../html/templates/ai_core_performance.html | 62 +++++++++++++++++++ 3 files changed, 82 insertions(+), 11 deletions(-) create mode 100644 profiler/advisor/display/html/templates/ai_core_performance.html diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py index 6992845af..b68386bdf 100644 --- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py +++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py @@ -15,6 +15,8 @@ import logging from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer +from profiler.advisor.analyzer.computation.ai_core_performance.ai_core_performance_checker import \ + AICorePerformanceChecker from profiler.advisor.dataset.profiling.profiling_dataset import ProfilingDataset from profiler.advisor.result.result import OptimizeResult from profiler.advisor.analyzer.computation.ai_core_freq.ai_core_freq_checker import AICoreFreqChecker @@ -32,27 +34,24 @@ class AICorePerformanceAnalyzer(BaseAnalyzer): def __init__(self, collection_path, n_processes: int = 1, **kwargs) -> None: super().__init__(collection_path, n_processes, **kwargs) - key = ProfilingDataset.get_key() - self.dataset = self.get_first_data_by_key(self.dataset_list, key) + profiling_key = ProfilingDataset.get_key() + self.profiling_dataset = self.get_first_data_by_key(self.dataset_list, profiling_key) self.result = OptimizeResult() self.html_render = HTMLRender() self.html = None - info = DeviceInfoParser(collection_path) - info.parse_data() - @BaseAnalyzer.check_data((ComputationAnalysisDataset.get_key(),)) def optimize(self, **kwargs): if not Config().get_config("aic_frequency"): logger.warning("Can not find ai core frequency in info.json*, please check data integrity.") return self.result add_render_list = kwargs.get("add_render_list", True) - ai_core_freq_checker = AICoreFreqChecker() - ai_core_freq_checker.check_ai_core_freq(self.dataset, rank=kwargs.get("rank"), stage=kwargs.get("stage")) + ai_core_freq_checker = AICorePerformanceChecker() + ai_core_freq_checker.check_ai_core_freq(self.profiling_dataset, rank=kwargs.get("rank"), stage=kwargs.get("stage")) ai_core_freq_checker.make_record(self.result) self.html = ai_core_freq_checker.make_render(self.html_render, add_render_list, priority=self.get_priority(), rank=kwargs.get("rank")) return self.result def get_priority(self, max_mem_op_dur=None): - return PriorityBackgroundColor.high \ No newline at end of file + return PriorityBackgroundColor.high # todo 未知内容 \ No newline at end of file diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py index 5a94d131b..4d595d7d1 100644 --- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py +++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py @@ -15,6 +15,7 @@ import logging from profiler.advisor.config.config import Config +from profiler.advisor.dataset.profiling.profiling_dataset import ProfilingDataset from profiler.advisor.dataset.timeline_event_dataset import ComputationAnalysisDataset from profiler.advisor.display.prompt.base_prompt import BasePrompt from profiler.advisor.result.item import OptimizeItem, OptimizeRecord @@ -48,19 +49,28 @@ class AICorePerformanceChecker: self.op_freq = None self.rank = None self.stage = None + self.cube_dict = {} + self.fa_dict = {} + self.vector_dict = {} - def check_ai_core_freq(self, event_dataset: ComputationAnalysisDataset, rank=None, stage=None): + + + def data_filter(self, profiling_dataset: ProfilingDataset): + self.cude_dict = {} + self.fa_dict = {} + self.vector_dict = {} + + def check_ai_core_performance(self, profiling_dataset: ProfilingDataset): """ :Param event_dataset: dataset of timeline event """ - if not hasattr(event_dataset, "op_freq") or not getattr(event_dataset, "op_freq"): + if not hasattr(profiling_dataset, "op_freq") or not getattr(profiling_dataset, "op_freq"): logger.debug("Skip slow ai core frequency checker, " "because no ai core frequency were recorded in trace_view.json") return self.rank = rank self.stage = stage - self.op_freq = event_dataset.op_freq for op_name, op_info in self.op_freq.items(): freq_list = op_info.get("freq_list", []) if not freq_list: diff --git a/profiler/advisor/display/html/templates/ai_core_performance.html b/profiler/advisor/display/html/templates/ai_core_performance.html new file mode 100644 index 000000000..a009f073d --- /dev/null +++ b/profiler/advisor/display/html/templates/ai_core_performance.html @@ -0,0 +1,62 @@ +{% if data|length > 0 %} +
+

AICORE Performance + Analysis

+
+ MatMul. +

+ + + {% for header in headers %} + + {% endfor %} + + + {% for row in data %} + + {% for element in row %} + + {% endfor %} + + {% endfor %} +
{{ header }}
{{ element|safe }}
+
+ FlashAttention +

+ + + {% for header in headers %} + + {% endfor %} + + + {% for row in data %} + + {% for element in row %} + + {% endfor %} + + {% endfor %} +
{{ header }}
{{ element|safe }}
+
+ Vector +

+ + + {% for header in headers %} + + {% endfor %} + + + {% for row in data %} + + {% for element in row %} + + {% endfor %} + + {% endfor %} +
{{ header }}
{{ element|safe }}
+ +
+
+{% endif %} \ No newline at end of file -- Gitee From f4963e92bd98b26046cb66198a9b27e78b6e34ba Mon Sep 17 00:00:00 2001 From: xubanxia <904691018@qq.com> Date: Tue, 14 Jan 2025 11:04:11 +0800 Subject: [PATCH 05/72] =?UTF-8?q?=E4=BB=A3=E7=A0=81=E6=A2=B3=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ai_core_performance/ai_core_performance_analyzer.py | 1 + .../ai_core_performance/ai_core_performance_checker.py | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py index b68386bdf..801cf1f7c 100644 --- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py +++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py @@ -47,6 +47,7 @@ class AICorePerformanceAnalyzer(BaseAnalyzer): add_render_list = kwargs.get("add_render_list", True) ai_core_freq_checker = AICorePerformanceChecker() + ai_core_freq_checker.data_filter(self.profiling_dataset) ai_core_freq_checker.check_ai_core_freq(self.profiling_dataset, rank=kwargs.get("rank"), stage=kwargs.get("stage")) ai_core_freq_checker.make_record(self.result) self.html = ai_core_freq_checker.make_render(self.html_render, add_render_list, priority=self.get_priority(), diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py index 4d595d7d1..eefa57303 100644 --- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py +++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py @@ -56,9 +56,9 @@ class AICorePerformanceChecker: def data_filter(self, profiling_dataset: ProfilingDataset): - self.cude_dict = {} - self.fa_dict = {} - self.vector_dict = {} + profiling_key = profiling_dataset.get_key() + for item in profiling_key: + print(item) def check_ai_core_performance(self, profiling_dataset: ProfilingDataset): """ -- Gitee From bf527424ec0c51da19e34ada36e77a1c2878325b Mon Sep 17 00:00:00 2001 From: xubanxia <904691018@qq.com> Date: Tue, 14 Jan 2025 17:52:49 +0800 Subject: [PATCH 06/72] =?UTF-8?q?data=5Ffilter=E4=BF=AE=E6=94=B9=EF=BC=9A?= =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E5=88=9D=E6=AD=A5=E7=9A=84=E7=AE=97=E5=AD=90?= =?UTF-8?q?=E8=BF=87=E6=BB=A4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ai_core_performance_analyzer.py | 10 ++--- .../ai_core_performance_checker.py | 44 ++++++++++++++----- profiler/cli/analyze_cli.py | 6 --- 3 files changed, 37 insertions(+), 23 deletions(-) diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py index 801cf1f7c..c7884f15a 100644 --- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py +++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py @@ -41,18 +41,16 @@ class AICorePerformanceAnalyzer(BaseAnalyzer): self.html = None def optimize(self, **kwargs): - if not Config().get_config("aic_frequency"): - logger.warning("Can not find ai core frequency in info.json*, please check data integrity.") - return self.result - add_render_list = kwargs.get("add_render_list", True) ai_core_freq_checker = AICorePerformanceChecker() ai_core_freq_checker.data_filter(self.profiling_dataset) - ai_core_freq_checker.check_ai_core_freq(self.profiling_dataset, rank=kwargs.get("rank"), stage=kwargs.get("stage")) + if not ai_core_freq_checker.ai_core_performance_issues: + return self.result + ai_core_freq_checker.check_ai_core_performance(self.profiling_dataset) ai_core_freq_checker.make_record(self.result) self.html = ai_core_freq_checker.make_render(self.html_render, add_render_list, priority=self.get_priority(), rank=kwargs.get("rank")) return self.result def get_priority(self, max_mem_op_dur=None): - return PriorityBackgroundColor.high # todo 未知内容 \ No newline at end of file + return PriorityBackgroundColor.high # html 底色设置 \ No newline at end of file diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py index eefa57303..5d1c0acf4 100644 --- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py +++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py @@ -38,10 +38,12 @@ class AICorePerformanceChecker: "op_name", "op_type", "task_duration", "input_shapes", "input_data_types", "input_formats", "output_shapes", "output_data_types", "output_formats" ] + _CHECKER = "AICorePerformanceChecker" + CUBE_OPERATOR_MEMORY_SIZE = 52428800 def __init__(self): - self.ai_core_freq_issues = False + self.ai_core_performance_issues = False self.desc = "" self.suggestions = "" self.decrease_freq_ops = [] @@ -49,16 +51,29 @@ class AICorePerformanceChecker: self.op_freq = None self.rank = None self.stage = None - self.cube_dict = {} - self.fa_dict = {} - self.vector_dict = {} - - + self.cube_list = [] + self.fa_list = [] + self.vector_list = [] def data_filter(self, profiling_dataset: ProfilingDataset): - profiling_key = profiling_dataset.get_key() - for item in profiling_key: - print(item) + if not self.check_task_dict(profiling_dataset): + return + operator_list = profiling_dataset.op_summary.op_list + total_duration = sum(operator.task_duration / 1000 for operator in operator_list + if operator.op_type in ["AI_VECTOR_CORE", "MIX_AIV"]) + centi_of_total_duration = total_duration / 100 + for operator in operator_list: + if operator.task_type == "AI_CORE" and "matmul" in operator.op_type.lower(): + mm = sum(int(shape.split(",")[0]) * int(shape.split(",")[1]) for shape in operator.input_shapes) + mm += int(operator.output_shapes[1:-1].split(",")[0]) * int(operator.output_shapes[1:-1].split(",")[1]) + if mm >= self.CUBE_OPERATOR_MEMORY_SIZE: + self.cube_list.append(operator) + elif operator.op_type == "FlashAttentionScore" and "varlen" in operator.op_name.lower(): + self.fa_list.append(operator) + elif operator.op_type in ["AI_VECTOR_CORE", "MIX_AIV"] and operator.task_duration > centi_of_total_duration: + self.vector_list.append(operator) + if any([self.cube_list, self.fa_list, self.vector_list]): + self.ai_core_performance_issues = True def check_ai_core_performance(self, profiling_dataset: ProfilingDataset): """ @@ -69,8 +84,6 @@ class AICorePerformanceChecker: "because no ai core frequency were recorded in trace_view.json") return - self.rank = rank - self.stage = stage for op_name, op_info in self.op_freq.items(): freq_list = op_info.get("freq_list", []) if not freq_list: @@ -149,3 +162,12 @@ class AICorePerformanceChecker: add_render_list=add_render_list, priority_background_color=priority, rank=kwargs.get("rank")) + + def check_task_dict(self, profiling_dataset: ProfilingDataset) -> bool: + if not hasattr(profiling_dataset, "op_summary"): + logger.warning("Skip %s checker because of not containing %s", self._CHECKER, "op summary") + return False + if not hasattr(profiling_dataset.op_summary, "task_dict"): + logger.warning("Skip %s checker because of not containing %s", self._CHECKER, "op summary") + return False + return True diff --git a/profiler/cli/analyze_cli.py b/profiler/cli/analyze_cli.py index 80441c274..9453de6ff 100644 --- a/profiler/cli/analyze_cli.py +++ b/profiler/cli/analyze_cli.py @@ -145,12 +145,6 @@ def analyze_schedule(**kwargs) -> None: required=False, default="cn", help="Language of the profiling advisor.") -@click.option("-p", - "--performance", - metavar="", - required=False, - default=False, - help="Indicates whether to analyze operator performance.") @debug_option def analyze_computation(**kwargs) -> None: try: -- Gitee From 5e900c853b17dbdba43b07e54c55a9653edfd29c Mon Sep 17 00:00:00 2001 From: xubanxia <904691018@qq.com> Date: Wed, 15 Jan 2025 15:06:36 +0800 Subject: [PATCH 07/72] =?UTF-8?q?data=5Ffilter=E4=BF=AE=E6=94=B9=EF=BC=9A?= =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E5=88=9D=E6=AD=A5=E7=9A=84=E7=AE=97=E5=AD=90?= =?UTF-8?q?=E8=BF=87=E6=BB=A4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ai_core_performance_analyzer.py | 6 +- .../ai_core_performance_checker.py | 74 +++++++++---------- 2 files changed, 39 insertions(+), 41 deletions(-) diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py index c7884f15a..eb46b8549 100644 --- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py +++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py @@ -46,9 +46,11 @@ class AICorePerformanceAnalyzer(BaseAnalyzer): ai_core_freq_checker.data_filter(self.profiling_dataset) if not ai_core_freq_checker.ai_core_performance_issues: return self.result - ai_core_freq_checker.check_ai_core_performance(self.profiling_dataset) + ai_core_freq_checker.check_ai_core_performance() ai_core_freq_checker.make_record(self.result) - self.html = ai_core_freq_checker.make_render(self.html_render, add_render_list, priority=self.get_priority(), + self.html = ai_core_freq_checker.make_render(self.html_render, + add_render_list, + priority=self.get_priority(), rank=kwargs.get("rank")) return self.result diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py index 5d1c0acf4..b9107d522 100644 --- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py +++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging +from queue import PriorityQueue from profiler.advisor.config.config import Config from profiler.advisor.dataset.profiling.profiling_dataset import ProfilingDataset @@ -59,14 +60,14 @@ class AICorePerformanceChecker: if not self.check_task_dict(profiling_dataset): return operator_list = profiling_dataset.op_summary.op_list - total_duration = sum(operator.task_duration / 1000 for operator in operator_list - if operator.op_type in ["AI_VECTOR_CORE", "MIX_AIV"]) - centi_of_total_duration = total_duration / 100 + centi_of_total_duration = sum(operator.task_duration / 1000 for operator in operator_list + if operator.op_type in ["AI_VECTOR_CORE", "MIX_AIV"]) / 100 for operator in operator_list: if operator.task_type == "AI_CORE" and "matmul" in operator.op_type.lower(): - mm = sum(int(shape.split(",")[0]) * int(shape.split(",")[1]) for shape in operator.input_shapes) - mm += int(operator.output_shapes[1:-1].split(",")[0]) * int(operator.output_shapes[1:-1].split(",")[1]) - if mm >= self.CUBE_OPERATOR_MEMORY_SIZE: + memory = sum(int(shape.split(",")[0]) * int(shape.split(",")[1]) for shape in operator.input_shapes) + memory += int(operator.output_shapes[1:-1].split(",")[0]) * int( + operator.output_shapes[1:-1].split(",")[1]) + if memory >= self.CUBE_OPERATOR_MEMORY_SIZE: self.cube_list.append(operator) elif operator.op_type == "FlashAttentionScore" and "varlen" in operator.op_name.lower(): self.fa_list.append(operator) @@ -75,40 +76,35 @@ class AICorePerformanceChecker: if any([self.cube_list, self.fa_list, self.vector_list]): self.ai_core_performance_issues = True - def check_ai_core_performance(self, profiling_dataset: ProfilingDataset): + def check_ai_core_performance(self): """ - :Param event_dataset: dataset of timeline event + :Param profiling_dataset: dataset of operator performance from kernel_details.csv """ - if not hasattr(profiling_dataset, "op_freq") or not getattr(profiling_dataset, "op_freq"): - logger.debug("Skip slow ai core frequency checker, " - "because no ai core frequency were recorded in trace_view.json") - return + if self.cube_list: + self.check_cube_operator() + if self.fa_list: + self.check_fa_operator() + if self.vector_list: + self.check_vector_operator() + + + + def check_cube_operator(self): + cube_list = self.cube_list + performance_queue = PriorityQueue() + bound_queue = PriorityQueue() + affinity_queue = PriorityQueue() + # for operator in cube_list: + + pass + + def check_fa_operator(self): + pass + + def check_vector_operator(self): + pass + - for op_name, op_info in self.op_freq.items(): - freq_list = op_info.get("freq_list", []) - if not freq_list: - continue - - op_count = op_info.get("count", 0) - op_total_duration = round(op_info.get("dur", 0), 2) - max_freq = convert_to_float(Config().get_config("aic_frequency")) - - if max_freq == 0: - raise ValueError("max_freq cannot be zero.") - decrease_freq_ratio = sum(max_freq - freq for freq in freq_list) / (max_freq * len(freq_list)) - if decrease_freq_ratio >= Config().get_config("frequency_threshold"): - self.ai_core_freq_issues = True - self.decrease_freq_ops.append([op_name, op_count, op_total_duration, - f"{round(decrease_freq_ratio, 4):.2%}", - round(sum(freq_list) / len(freq_list), 2), - max(freq_list), min(freq_list)]) - - if self.decrease_freq_ops: - # 按算子总耗时和降频比率 降序排列 - self.decrease_freq_ops.sort( - key=lambda x: (x[self.TOTAL_DURATION_INDEX], x[self.DECREASE_FREQ_RATIO_INDEX]), reverse=True) - if not self.ai_core_freq_issues: - return def make_record(self, result: OptimizeResult): """ @@ -154,7 +150,7 @@ class AICorePerformanceChecker: self.desc += f" Only show {self.SHOW_TOPK_OPS} operators here, see latest mstt_advisor.xlsx for details." return html_render.render_template(key="computation", template_dir="templates", - template_name="ai_core_frequency.html", + template_name="ai_core_performance.html", desc=self.desc, suggestion=self.suggestions, headers=self.headers, @@ -167,7 +163,7 @@ class AICorePerformanceChecker: if not hasattr(profiling_dataset, "op_summary"): logger.warning("Skip %s checker because of not containing %s", self._CHECKER, "op summary") return False - if not hasattr(profiling_dataset.op_summary, "task_dict"): + if not hasattr(profiling_dataset.op_summary, "task_dict") or hasattr(profiling_dataset.op_summary, "op_list"): logger.warning("Skip %s checker because of not containing %s", self._CHECKER, "op summary") return False return True -- Gitee From 1281640a8d300e9bc3298f9bc1cedfe133e64449 Mon Sep 17 00:00:00 2001 From: xubanxia <904691018@qq.com> Date: Wed, 15 Jan 2025 17:41:37 +0800 Subject: [PATCH 08/72] =?UTF-8?q?data=5Ffilter=E4=BF=AE=E6=94=B9=EF=BC=9A?= =?UTF-8?q?=E9=87=8D=E5=86=99=E7=AE=97=E5=AD=90=E8=BF=87=E6=BB=A4=EF=BC=8C?= =?UTF-8?q?=E6=8C=89shap=E6=88=96type=E5=88=86=E7=BB=84=E5=AD=98=E5=82=A8?= =?UTF-8?q?=E5=88=B0=E5=AD=97=E5=85=B8=E4=B8=AD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ai_core_performance_checker.py | 68 ++++++++++++------- 1 file changed, 45 insertions(+), 23 deletions(-) diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py index b9107d522..6716d21a3 100644 --- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py +++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py @@ -40,7 +40,7 @@ class AICorePerformanceChecker: "output_data_types", "output_formats" ] _CHECKER = "AICorePerformanceChecker" - CUBE_OPERATOR_MEMORY_SIZE = 52428800 + CUBE_OPERATOR_MEMORY_SIZE_MB = 100 def __init__(self): @@ -52,30 +52,51 @@ class AICorePerformanceChecker: self.op_freq = None self.rank = None self.stage = None - self.cube_list = [] - self.fa_list = [] - self.vector_list = [] + self.cube_dict = {} + self.fa_dict = {} + self.vector_dict = {} def data_filter(self, profiling_dataset: ProfilingDataset): if not self.check_task_dict(profiling_dataset): return operator_list = profiling_dataset.op_summary.op_list - centi_of_total_duration = sum(operator.task_duration / 1000 for operator in operator_list - if operator.op_type in ["AI_VECTOR_CORE", "MIX_AIV"]) / 100 - for operator in operator_list: - if operator.task_type == "AI_CORE" and "matmul" in operator.op_type.lower(): - memory = sum(int(shape.split(",")[0]) * int(shape.split(",")[1]) for shape in operator.input_shapes) - memory += int(operator.output_shapes[1:-1].split(",")[0]) * int( - operator.output_shapes[1:-1].split(",")[1]) - if memory >= self.CUBE_OPERATOR_MEMORY_SIZE: - self.cube_list.append(operator) - elif operator.op_type == "FlashAttentionScore" and "varlen" in operator.op_name.lower(): - self.fa_list.append(operator) - elif operator.op_type in ["AI_VECTOR_CORE", "MIX_AIV"] and operator.task_duration > centi_of_total_duration: - self.vector_list.append(operator) - if any([self.cube_list, self.fa_list, self.vector_list]): + total_duration = sum(float(operator.task_duration) for operator in operator_list) + cube_memory_dict = {} + vector_type_dict = {} + # filter cube operator and fa operator + for op in operator_list: + shapes = op.input_shapes[1:-1] + "-" + op.output_shapes[1:-1] + if op.task_type == "AI_CORE" and "matmul" in op.op_type.lower(): + cube_memory_dict.setdefault(op.op_name, {}).setdefault(shapes, 0) + cube_memory_dict[op.op_name][shapes] += self.memory_size(op) + elif op.op_type == "FlashAttentionScore" and "varlen" in op.op_name.lower(): + self.fa_dict.setdefault(op.op_name, set()).add(shapes) + elif op.task_type in ["AI_VECTOR_CORE", "MIX_AIV"]: + vector_type_dict.setdefault(op.op_type, set()).add(op) + + # filter cube operator + for op_name in cube_memory_dict: + for shapes in cube_memory_dict[op_name]: + if cube_memory_dict[op_name][shapes] >= self.CUBE_OPERATOR_MEMORY_SIZE_MB: + self.cube_dict.setdefault(op_name, set()).add(shapes) + + # filter vector operator + for op_type in vector_type_dict: + duration_group_by_time = sum(float(op.task_duration) for op in vector_type_dict[op_type]) + if (duration_group_by_time / total_duration) >= 0.01 or duration_group_by_time >= 1000000: + for op in vector_type_dict[op_type]: + shapes = op.input_shapes[1:-1] + "-" + op.output_shapes[1:-1] + self.vector_dict.setdefault(op.op_name, set()).add(shapes) + + if any([self.cube_dict, self.fa_dict, self.vector_dict]): self.ai_core_performance_issues = True + def memory_size(self, operator): + input_shapes = operator.input_shapes[1:-1].split(";") + memory = sum(int(shape.split(",")[0]) * int(shape.split(",")[1]) for shape in input_shapes) + memory += int(operator.output_shapes[1:-1].split(",")[0]) * int(operator.output_shapes[1:-1].split(",")[1]) + return memory * 2 / 1024 / 1024 + def check_ai_core_performance(self): """ :Param profiling_dataset: dataset of operator performance from kernel_details.csv @@ -87,13 +108,16 @@ class AICorePerformanceChecker: if self.vector_list: self.check_vector_operator() - - def check_cube_operator(self): - cube_list = self.cube_list + cube_dict = self.cube_dict performance_queue = PriorityQueue() bound_queue = PriorityQueue() affinity_queue = PriorityQueue() + for name in cube_dict: + cube_list = cube_dict[name] + for shape in cube_list: + pass + # for operator in cube_list: pass @@ -104,8 +128,6 @@ class AICorePerformanceChecker: def check_vector_operator(self): pass - - def make_record(self, result: OptimizeResult): """ make record for what and how to optimize -- Gitee From f9959be733173dd6eb26e982d37f8b9f1c4182c7 Mon Sep 17 00:00:00 2001 From: xubanxia <904691018@qq.com> Date: Thu, 16 Jan 2025 16:40:28 +0800 Subject: [PATCH 09/72] =?UTF-8?q?=E7=AE=97=E5=AD=90=E6=80=A7=E8=83=BD?= =?UTF-8?q?=E5=88=A4=E6=96=AD=E4=BF=AE=E6=94=B9=EF=BC=8C=E5=A2=9E=E5=8A=A0?= =?UTF-8?q?Cube=E7=AE=97=E5=AD=90=E5=92=8CFa=E7=AE=97=E5=AD=90=E7=9A=84?= =?UTF-8?q?=E6=80=A7=E8=83=BD=E5=88=A4=E6=96=AD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ai_core_performance_analyzer.py | 6 +- .../ai_core_performance_checker.py | 238 +++++++++++++++--- 2 files changed, 198 insertions(+), 46 deletions(-) diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py index eb46b8549..76189af1a 100644 --- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py +++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py @@ -19,12 +19,8 @@ from profiler.advisor.analyzer.computation.ai_core_performance.ai_core_performan AICorePerformanceChecker from profiler.advisor.dataset.profiling.profiling_dataset import ProfilingDataset from profiler.advisor.result.result import OptimizeResult -from profiler.advisor.analyzer.computation.ai_core_freq.ai_core_freq_checker import AICoreFreqChecker from profiler.advisor.display.html.priority_background_color import PriorityBackgroundColor from profiler.advisor.display.html.render import HTMLRender -from profiler.advisor.dataset.timeline_event_dataset import ComputationAnalysisDataset -from profiler.advisor.dataset.profiling.device_info import DeviceInfoParser -from profiler.advisor.config.config import Config logger = logging.getLogger() @@ -46,7 +42,7 @@ class AICorePerformanceAnalyzer(BaseAnalyzer): ai_core_freq_checker.data_filter(self.profiling_dataset) if not ai_core_freq_checker.ai_core_performance_issues: return self.result - ai_core_freq_checker.check_ai_core_performance() + ai_core_freq_checker.check_ai_core_performance(self.profiling_dataset) ai_core_freq_checker.make_record(self.result) self.html = ai_core_freq_checker.make_render(self.html_render, add_render_list, diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py index 6716d21a3..f8cccbb72 100644 --- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py +++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py @@ -13,16 +13,12 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging -from queue import PriorityQueue -from profiler.advisor.config.config import Config from profiler.advisor.dataset.profiling.profiling_dataset import ProfilingDataset -from profiler.advisor.dataset.timeline_event_dataset import ComputationAnalysisDataset from profiler.advisor.display.prompt.base_prompt import BasePrompt from profiler.advisor.result.item import OptimizeItem, OptimizeRecord from profiler.advisor.result.result import OptimizeResult -from profiler.advisor.utils.utils import convert_to_float -from profiler.prof_common.additional_args_manager import AdditionalArgsManager +from queue import PriorityQueue logger = logging.getLogger() @@ -31,14 +27,6 @@ class AICorePerformanceChecker: """ operator performance checker """ - # DECREASE_FREQ_RATIO = 0.05 - # SHOW_TOPK_OPS = 10 - # TOTAL_DURATION_INDEX = 2 - # DECREASE_FREQ_RATIO_INDEX = 3 - _ITEMS = [ - "op_name", "op_type", "task_duration", "input_shapes", "input_data_types", "input_formats", "output_shapes", - "output_data_types", "output_formats" - ] _CHECKER = "AICorePerformanceChecker" CUBE_OPERATOR_MEMORY_SIZE_MB = 100 @@ -47,14 +35,12 @@ class AICorePerformanceChecker: self.ai_core_performance_issues = False self.desc = "" self.suggestions = "" - self.decrease_freq_ops = [] - self.headers = [] - self.op_freq = None - self.rank = None - self.stage = None self.cube_dict = {} + self.cube_list = [] self.fa_dict = {} + self.fa_list = [] self.vector_dict = {} + self.vector_list = [] def data_filter(self, profiling_dataset: ProfilingDataset): if not self.check_task_dict(profiling_dataset): @@ -69,8 +55,12 @@ class AICorePerformanceChecker: if op.task_type == "AI_CORE" and "matmul" in op.op_type.lower(): cube_memory_dict.setdefault(op.op_name, {}).setdefault(shapes, 0) cube_memory_dict[op.op_name][shapes] += self.memory_size(op) - elif op.op_type == "FlashAttentionScore" and "varlen" in op.op_name.lower(): + elif op.op_type == "FlashAttentionScore": self.fa_dict.setdefault(op.op_name, set()).add(shapes) + self.fa_list.append(op) + elif op.op_type == "FlashAttentionScoreGrad": + self.fa_dict.setdefault(op.op_name, set()).add(shapes + "-grad") + self.fa_list.append(op) elif op.task_type in ["AI_VECTOR_CORE", "MIX_AIV"]: vector_type_dict.setdefault(op.op_type, set()).add(op) @@ -91,41 +81,207 @@ class AICorePerformanceChecker: if any([self.cube_dict, self.fa_dict, self.vector_dict]): self.ai_core_performance_issues = True - def memory_size(self, operator): + @staticmethod + def memory_size(operator): input_shapes = operator.input_shapes[1:-1].split(";") memory = sum(int(shape.split(",")[0]) * int(shape.split(",")[1]) for shape in input_shapes) memory += int(operator.output_shapes[1:-1].split(",")[0]) * int(operator.output_shapes[1:-1].split(",")[1]) return memory * 2 / 1024 / 1024 - def check_ai_core_performance(self): + def check_ai_core_performance(self, promoting_dataset: ProfilingDataset): """ :Param profiling_dataset: dataset of operator performance from kernel_details.csv """ - if self.cube_list: - self.check_cube_operator() - if self.fa_list: - self.check_fa_operator() - if self.vector_list: - self.check_vector_operator() - - def check_cube_operator(self): + self.result = dict() + if self.cube_dict: + self.result["cube"] = self.check_cube_operator(promoting_dataset) + if self.fa_dict: + self.result["fa"] = self.check_fa_operator(promoting_dataset) + if self.vector_dict: + self.result["vector"] = self.check_vector_operator(promoting_dataset) + + def check_cube_operator(self, profiling_dataset: ProfilingDataset): + # todo 未处理ND、NZ格式 cube_dict = self.cube_dict - performance_queue = PriorityQueue() - bound_queue = PriorityQueue() - affinity_queue = PriorityQueue() - for name in cube_dict: - cube_list = cube_dict[name] - for shape in cube_list: - pass + optimization_queue = [] + bound_queue = [] + affinity_queue = [] + operator_list = [] + for op in profiling_dataset.op_summary.op_list: + if (op.op_name in cube_dict and + op.input_shapes[1:-1] + "-" + op.output_shapes[1:-1] in cube_dict[op.op_name]): + operator_list.append(op) + for op in cube_dict: + shap_list = [] + for shape in cube_dict[op]: + dtype = None + shape_duration = 0. + # 判断输入shape内轴是否为256的倍数 + affinity_flag = (int(shape.split("-")[0].split(";")[0].split(",")[1]) + + int(shape.split("-")[0].split(";")[1].split(",")[0])) % 256 != 0 + if affinity_flag: + for operator in operator_list: + if (operator.op_name == op and + operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape): + dtype = operator.input_data_types + shape_duration += float(operator.task_duration) + affinity_queue.append( + {"op_name": op, "shape": shape.split("-")[0], "dtype": dtype, "duration": shape_duration}) + continue + else: + for operator in operator_list: + if (operator.op_name == op and + operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape): + shap_list.append(operator) + shape_duration += float(operator.task_duration) + dtype = operator.input_data_types + aic_mac_ratio = sum(operator.aic_mac_ratio for operator in shap_list) / len(shap_list) + aic_mte2_ratio = sum(operator.aic_mte2_ratio for operator in shap_list) / len(shap_list) + if (aic_mac_ratio >= 0.8) and aic_mte2_ratio >= 0.95: + bound_queue.append( + {"op_name": op, + "shape": shape.split("-")[0], + "dtype": dtype, + "bound": "mac_and_mte2_bound", + "duration": shape_duration}) + elif aic_mac_ratio >= 0.8: + bound_queue.append( + {"op_name": op, + "shape": shape.split("-")[0], + "dtype": dtype, + "bound": "mac_bound", + "duration": shape_duration}) + elif aic_mte2_ratio >= 0.95: + bound_queue.append( + {"op_name": op, + "shape": shape.split("-")[0], + "dtype": dtype, + "bound": "mte2_bound", + "duration": shape_duration}) + else: + optimization_queue.append( + {"op_name": op, + "shape": shape.split("-")[0], + "dtype": dtype, + "optimization": max(0.8 - aic_mac_ratio, 0.95 - aic_mte2_ratio)}) + return [sorted(optimization_queue, key=lambda x: x["optimization"], reverse=True)[:5], + sorted(bound_queue, key=lambda x: x["duration"], reverse=True)[:5], + sorted(affinity_queue, key=lambda x: x["duration"], reverse=True)[:5]] - # for operator in cube_list: + def check_fa_operator(self, profiling_dataset: ProfilingDataset): + fa_list = self.fa_list + fa_dict = self.fa_dict + optimization_queue = [] + bound_queue = [] + affinity_queue = [] + # 不亲和算子筛选 + for op in fa_dict: + for shape in fa_dict[op]: + affinity_flag = False + shape_duration = 0. + dtype = None + suggestion = "" + if "varlen" in op.lower(): + # 处理变长算子 如果不亲和则affinity_flag为False + if shape.split("-")[0].split(";")[0].split(",")[2] % 128 != 0: + affinity_flag = True + suggestion = "D不能被128整除" + for operator in fa_list: + if (operator.op_name == op and + operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape): + shape_duration += float(operator.task_duration) + dtype = operator.input_data_types + else: + # 处理定长算子 如果不亲和则affinity_flag为False + head_dim = 0 + seq_len = int(shape.split("-")[1].split(";")[1].split(",")[2]) + input_first_tensor = shape.split("-")[0].split(";")[0].split(",") + if len(input_first_tensor) == 3: + head_dim = int(input_first_tensor[2]) / int(shape.split("-")[1].split(";")[0].split(",")[1]) + else: + head_dim = int(input_first_tensor[3]) + if head_dim % 128 != 0 and seq_len % 128 != 0: + affinity_flag = True + suggestion = "D和S均不能被128整除" + elif head_dim % 128 != 0: + affinity_flag = True + suggestion = "D不能被128整除" + elif seq_len % 128 != 0: + affinity_flag = True + suggestion = "S不能被128整除" + if affinity_flag: + for operator in fa_list: + if (operator.op_name == op and + operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape): + shape_duration += float(operator.task_duration) + dtype = operator.input_data_types - pass + if affinity_flag: + # 不亲和算子 计算耗时,加入affinity_queue + affinity_queue.append( + {"op_name": op, + "shape": shape.split("-")[0], + "dtype": dtype, + "suggestion": suggestion, + "duration": shape_duration}) + continue + else: + # 处理bound算子和优化算子 + aiv_vec_ratio = 0. + aic_fixpipe_ratio = 0. + aic_mte2_ratio = 0. + bound = "" + optimization = 0. + if len(shape.split("-")) > 2: + for operator in fa_list: + if (operator.op_name == op and + operator.input_shapes[1:-1] + "-" + operator.output_shapes[ + 1:-1] + "-grad" == shape): + aic_fixpipe_ratio += float(operator.aic_fixpipe_ratio) + aic_mte2_ratio += float(operator.aic_mte2_ratio) + shape_duration += float(operator.task_duration) + if aic_mte2_ratio >= 0.8 and aic_fixpipe_ratio >= 0.75: + bound = "mte2_and_fixpipe_bound" + elif aic_mte2_ratio >= 0.8: + bound = "mte2_bound" + elif aiv_vec_ratio >= 0.75: + bound = "vec_bound" + else: + optimization = max(0.8 - aic_mte2_ratio, 0.75 - aiv_vec_ratio) + else: + for operator in fa_list: + if (operator.op_name == op and + operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape): + aiv_vec_ratio += float(operator.aic_vec_ratio) + aic_mte2_ratio += float(operator.aic_mte2_ratio) + shape_duration += float(operator.task_duration) + if aic_mte2_ratio >= 0.8 and aiv_vec_ratio >= 0.75: + bound = "mte2_and_vec_bound" + elif aic_mte2_ratio >= 0.8: + bound = "mte2_bound" + elif aiv_vec_ratio >= 0.75: + bound = "vec_bound" + else: + optimization = max(0.8 - aic_mte2_ratio, 0.75 - aiv_vec_ratio) + if bound: + bound_queue.append( + {"op_name": op, + "shape": shape.split("-")[0], + "dtype": dtype, + "bound": bound, + "duration": shape_duration}) + else: + optimization_queue.append( + {"op_name": op, + "shape": shape.split("-")[0], + "dtype": dtype, + "optimization": optimization}) - def check_fa_operator(self): - pass + return [sorted(optimization_queue, key=lambda x: x["optimization"], reverse=True)[:5], + sorted(bound_queue, key=lambda x: x["duration"], reverse=True)[:5], + sorted(affinity_queue, key=lambda x: x["duration"], reverse=True)[:5]] - def check_vector_operator(self): + def check_vector_operator(self, profiling_dataset: ProfilingDataset): pass def make_record(self, result: OptimizeResult): -- Gitee From 705dbad637d1e8706e46ee1aad5626ccf26ed00d Mon Sep 17 00:00:00 2001 From: xubanxia <904691018@qq.com> Date: Thu, 16 Jan 2025 17:18:44 +0800 Subject: [PATCH 10/72] =?UTF-8?q?=E7=AE=97=E5=AD=90=E6=80=A7=E8=83=BD?= =?UTF-8?q?=E5=88=A4=E6=96=AD=E4=BF=AE=E6=94=B9=EF=BC=8C=E5=A2=9E=E5=8A=A0?= =?UTF-8?q?Cube=E7=AE=97=E5=AD=90=E3=80=81Fa=E7=AE=97=E5=AD=90=E5=92=8CVec?= =?UTF-8?q?tor=E7=AE=97=E5=AD=90=E7=9A=84=E6=80=A7=E8=83=BD=E5=88=A4?= =?UTF-8?q?=E6=96=AD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ai_core_performance_checker.py | 54 ++++++++++++++++++- 1 file changed, 53 insertions(+), 1 deletion(-) diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py index f8cccbb72..ea13cbaa7 100644 --- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py +++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py @@ -18,7 +18,6 @@ from profiler.advisor.dataset.profiling.profiling_dataset import ProfilingDatase from profiler.advisor.display.prompt.base_prompt import BasePrompt from profiler.advisor.result.item import OptimizeItem, OptimizeRecord from profiler.advisor.result.result import OptimizeResult -from queue import PriorityQueue logger = logging.getLogger() @@ -282,6 +281,59 @@ class AICorePerformanceChecker: sorted(affinity_queue, key=lambda x: x["duration"], reverse=True)[:5]] def check_vector_operator(self, profiling_dataset: ProfilingDataset): + vector_dict = self.vector_dict + vector_list = [] + optimization_queue = [] + bound_queue = [] + for op_name in vector_dict: + for shape in vector_dict[op_name]: + for operator in profiling_dataset.op_summary.op_list: + if (operator.op_name == op_name and + operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape): + vector_list.append(operator) + for op_name in vector_dict: + for shape in vector_dict[op_name]: + aiv_vec_ratio = 0. + aiv_met2_ratio = 0. + aiv_met3_ratio = 0. + bound = "" + shape_duration = 0. + optimization = 0. + dtype = "" + for operator in vector_list: + if (operator.op_name == op_name and + operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape): + aiv_vec_ratio += float(operator.aiv_vec_ratio) + aiv_met2_ratio += float(operator.aiv_met2_ratio) + aiv_met3_ratio += float(operator.aiv_met3_ratio) + shape_duration += float(operator.task_duration) + dtype = operator.input_data_types + if aiv_vec_ratio + aiv_met2_ratio + aiv_met3_ratio >= 0.9: + bound = "vec_met2_met3_bound" + elif aiv_met2_ratio >= 0.7: + bound = "met2_bound" + elif aiv_met3_ratio >= 0.7: + bound = "met3_bound" + elif aiv_vec_ratio >= 0.7: + bound = "vec_bound" + else: + optimization = max(0.7 - aiv_vec_ratio, 0.7 - aiv_met2_ratio, 0.7 - aiv_met3_ratio) + if bound: + bound_queue.append( + {"op_name": op_name, + "shape": shape, + "bound": bound, + "dtype": dtype, + "duration": shape_duration}) + else: + optimization_queue.append( + {"op_name": op_name, + "shape": shape, + "dtype": dtype, + "optimization": optimization}) + return [sorted(optimization_queue, key=lambda x: x["optimization"], reverse=True)[:5], + sorted(bound_queue, key=lambda x: x["duration"], reverse=True)[:5]] + pass def make_record(self, result: OptimizeResult): -- Gitee From b54252163619871b550c9bb098092e87cb68df17 Mon Sep 17 00:00:00 2001 From: xubanxia <904691018@qq.com> Date: Fri, 17 Jan 2025 14:27:45 +0800 Subject: [PATCH 11/72] =?UTF-8?q?checker=E4=BB=A3=E7=A0=81=E8=B0=83?= =?UTF-8?q?=E6=95=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ai_core_performance_checker.py | 30 +++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py index ea13cbaa7..883891b13 100644 --- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py +++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py @@ -134,8 +134,8 @@ class AICorePerformanceChecker: shap_list.append(operator) shape_duration += float(operator.task_duration) dtype = operator.input_data_types - aic_mac_ratio = sum(operator.aic_mac_ratio for operator in shap_list) / len(shap_list) - aic_mte2_ratio = sum(operator.aic_mte2_ratio for operator in shap_list) / len(shap_list) + aic_mac_ratio = sum(float(operator.aic_mac_ratio) for operator in shap_list) / len(shap_list) + aic_mte2_ratio = sum(float(operator.aic_mte2_ratio) for operator in shap_list) / len(shap_list) if (aic_mac_ratio >= 0.8) and aic_mte2_ratio >= 0.95: bound_queue.append( {"op_name": op, @@ -251,7 +251,7 @@ class AICorePerformanceChecker: for operator in fa_list: if (operator.op_name == op and operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape): - aiv_vec_ratio += float(operator.aic_vec_ratio) + aiv_vec_ratio += float(operator.aiv_vec_ratio) aic_mte2_ratio += float(operator.aic_mte2_ratio) shape_duration += float(operator.task_duration) if aic_mte2_ratio >= 0.8 and aiv_vec_ratio >= 0.75: @@ -294,8 +294,8 @@ class AICorePerformanceChecker: for op_name in vector_dict: for shape in vector_dict[op_name]: aiv_vec_ratio = 0. - aiv_met2_ratio = 0. - aiv_met3_ratio = 0. + aiv_mte2_ratio = 0. + aiv_mte3_ratio = 0. bound = "" shape_duration = 0. optimization = 0. @@ -304,20 +304,20 @@ class AICorePerformanceChecker: if (operator.op_name == op_name and operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape): aiv_vec_ratio += float(operator.aiv_vec_ratio) - aiv_met2_ratio += float(operator.aiv_met2_ratio) - aiv_met3_ratio += float(operator.aiv_met3_ratio) + aiv_mte2_ratio += float(operator.aiv_mte2_ratio) + aiv_mte3_ratio += float(operator.aiv_mte3_ratio) shape_duration += float(operator.task_duration) dtype = operator.input_data_types - if aiv_vec_ratio + aiv_met2_ratio + aiv_met3_ratio >= 0.9: - bound = "vec_met2_met3_bound" - elif aiv_met2_ratio >= 0.7: - bound = "met2_bound" - elif aiv_met3_ratio >= 0.7: - bound = "met3_bound" + if aiv_vec_ratio + aiv_mte2_ratio + aiv_mte3_ratio >= 0.9: + bound = "vec_mte2_mte3_bound" + elif aiv_mte2_ratio >= 0.7: + bound = "mte2_bound" + elif aiv_mte3_ratio >= 0.7: + bound = "mte3_bound" elif aiv_vec_ratio >= 0.7: bound = "vec_bound" else: - optimization = max(0.7 - aiv_vec_ratio, 0.7 - aiv_met2_ratio, 0.7 - aiv_met3_ratio) + optimization = max(0.7 - aiv_vec_ratio, 0.7 - aiv_mte2_ratio, 0.7 - aiv_mte3_ratio) if bound: bound_queue.append( {"op_name": op_name, @@ -393,7 +393,7 @@ class AICorePerformanceChecker: if not hasattr(profiling_dataset, "op_summary"): logger.warning("Skip %s checker because of not containing %s", self._CHECKER, "op summary") return False - if not hasattr(profiling_dataset.op_summary, "task_dict") or hasattr(profiling_dataset.op_summary, "op_list"): + if not hasattr(profiling_dataset.op_summary, "task_dict") or not hasattr(profiling_dataset.op_summary, "op_list"): logger.warning("Skip %s checker because of not containing %s", self._CHECKER, "op summary") return False return True -- Gitee From 0a70bd67befc94102d84f3f9efbc546018307c3c Mon Sep 17 00:00:00 2001 From: xubanxia <904691018@qq.com> Date: Fri, 17 Jan 2025 16:09:44 +0800 Subject: [PATCH 12/72] =?UTF-8?q?checker=E4=BB=A3=E7=A0=81=E8=B0=83?= =?UTF-8?q?=E6=95=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ai_core_performance_checker.py | 155 +++++++++--------- 1 file changed, 75 insertions(+), 80 deletions(-) diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py index 883891b13..97952f229 100644 --- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py +++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py @@ -105,13 +105,10 @@ class AICorePerformanceChecker: optimization_queue = [] bound_queue = [] affinity_queue = [] - operator_list = [] - for op in profiling_dataset.op_summary.op_list: - if (op.op_name in cube_dict and - op.input_shapes[1:-1] + "-" + op.output_shapes[1:-1] in cube_dict[op.op_name]): - operator_list.append(op) + operator_list = [op for op in profiling_dataset.op_summary.op_list + if op.op_name in cube_dict + and op.input_shapes[1:-1] + "-" + op.output_shapes[1:-1] in cube_dict[op.op_name]] for op in cube_dict: - shap_list = [] for shape in cube_dict[op]: dtype = None shape_duration = 0. @@ -124,45 +121,47 @@ class AICorePerformanceChecker: operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape): dtype = operator.input_data_types shape_duration += float(operator.task_duration) - affinity_queue.append( - {"op_name": op, "shape": shape.split("-")[0], "dtype": dtype, "duration": shape_duration}) + affinity_queue.append({ + "op_name": op, + "shape": shape.split("-")[0], + "dtype": dtype, + "duration": shape_duration}) continue else: - for operator in operator_list: - if (operator.op_name == op and - operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape): - shap_list.append(operator) - shape_duration += float(operator.task_duration) - dtype = operator.input_data_types + shap_list = [operator for operator in operator_list if + operator.op_name == op and + operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape] + shape_duration = sum(float(operator.task_duration) for operator in shap_list) + dtype = shap_list[0].input_data_types if shap_list else None aic_mac_ratio = sum(float(operator.aic_mac_ratio) for operator in shap_list) / len(shap_list) aic_mte2_ratio = sum(float(operator.aic_mte2_ratio) for operator in shap_list) / len(shap_list) - if (aic_mac_ratio >= 0.8) and aic_mte2_ratio >= 0.95: - bound_queue.append( - {"op_name": op, - "shape": shape.split("-")[0], - "dtype": dtype, - "bound": "mac_and_mte2_bound", - "duration": shape_duration}) + if aic_mac_ratio >= 0.8 and aic_mte2_ratio >= 0.95: + bound_queue.append({ + "op_name": op, + "shape": shape.split("-")[0], + "dtype": dtype, + "bound": "mac_and_mte2_bound", + "duration": shape_duration}) elif aic_mac_ratio >= 0.8: - bound_queue.append( - {"op_name": op, - "shape": shape.split("-")[0], - "dtype": dtype, - "bound": "mac_bound", - "duration": shape_duration}) + bound_queue.append({ + "op_name": op, + "shape": shape.split("-")[0], + "dtype": dtype, + "bound": "mac_bound", + "duration": shape_duration}) elif aic_mte2_ratio >= 0.95: - bound_queue.append( - {"op_name": op, - "shape": shape.split("-")[0], - "dtype": dtype, - "bound": "mte2_bound", - "duration": shape_duration}) + bound_queue.append({ + "op_name": op, + "shape": shape.split("-")[0], + "dtype": dtype, + "bound": "mte2_bound", + "duration": shape_duration}) else: - optimization_queue.append( - {"op_name": op, - "shape": shape.split("-")[0], - "dtype": dtype, - "optimization": max(0.8 - aic_mac_ratio, 0.95 - aic_mte2_ratio)}) + optimization_queue.append({ + "op_name": op, + "shape": shape.split("-")[0], + "dtype": dtype, + "optimization": max(0.8 - aic_mac_ratio, 0.95 - aic_mte2_ratio)}) return [sorted(optimization_queue, key=lambda x: x["optimization"], reverse=True)[:5], sorted(bound_queue, key=lambda x: x["duration"], reverse=True)[:5], sorted(affinity_queue, key=lambda x: x["duration"], reverse=True)[:5]] @@ -211,34 +210,33 @@ class AICorePerformanceChecker: if affinity_flag: for operator in fa_list: if (operator.op_name == op and - operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape): + operator.input_shapes[1:-1] + "-" + + operator.output_shapes[1:-1] == shape): shape_duration += float(operator.task_duration) dtype = operator.input_data_types if affinity_flag: # 不亲和算子 计算耗时,加入affinity_queue - affinity_queue.append( - {"op_name": op, - "shape": shape.split("-")[0], - "dtype": dtype, - "suggestion": suggestion, - "duration": shape_duration}) + affinity_queue.append({ + "op_name": op, + "shape": shape.split("-")[0], + "dtype": dtype, + "suggestion": suggestion, + "duration": shape_duration}) continue else: # 处理bound算子和优化算子 - aiv_vec_ratio = 0. - aic_fixpipe_ratio = 0. - aic_mte2_ratio = 0. + aiv_vec_ratio, aic_fixpipe_ratio, aic_mte2_ratio, optimization = 0., 0., 0., 0. bound = "" - optimization = 0. if len(shape.split("-")) > 2: for operator in fa_list: if (operator.op_name == op and - operator.input_shapes[1:-1] + "-" + operator.output_shapes[ - 1:-1] + "-grad" == shape): + operator.input_shapes[1:-1] + "-" + + operator.output_shapes[1:-1] + "-grad" == shape): aic_fixpipe_ratio += float(operator.aic_fixpipe_ratio) aic_mte2_ratio += float(operator.aic_mte2_ratio) shape_duration += float(operator.task_duration) + dtype = operator.input_data_types if aic_mte2_ratio >= 0.8 and aic_fixpipe_ratio >= 0.75: bound = "mte2_and_fixpipe_bound" elif aic_mte2_ratio >= 0.8: @@ -263,18 +261,18 @@ class AICorePerformanceChecker: else: optimization = max(0.8 - aic_mte2_ratio, 0.75 - aiv_vec_ratio) if bound: - bound_queue.append( - {"op_name": op, - "shape": shape.split("-")[0], - "dtype": dtype, - "bound": bound, - "duration": shape_duration}) + bound_queue.append({ + "op_name": op, + "shape": shape.split("-")[0], + "dtype": dtype, + "bound": bound, + "duration": shape_duration}) else: - optimization_queue.append( - {"op_name": op, - "shape": shape.split("-")[0], - "dtype": dtype, - "optimization": optimization}) + optimization_queue.append({ + "op_name": op, + "shape": shape.split("-")[0], + "dtype": dtype, + "optimization": optimization}) return [sorted(optimization_queue, key=lambda x: x["optimization"], reverse=True)[:5], sorted(bound_queue, key=lambda x: x["duration"], reverse=True)[:5], @@ -285,21 +283,17 @@ class AICorePerformanceChecker: vector_list = [] optimization_queue = [] bound_queue = [] + vector_list.extend( + operator for op_name in vector_dict + for shape in vector_dict[op_name] + for operator in profiling_dataset.op_summary.op_list + if operator.op_name == op_name + and operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape + ) for op_name in vector_dict: for shape in vector_dict[op_name]: - for operator in profiling_dataset.op_summary.op_list: - if (operator.op_name == op_name and - operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape): - vector_list.append(operator) - for op_name in vector_dict: - for shape in vector_dict[op_name]: - aiv_vec_ratio = 0. - aiv_mte2_ratio = 0. - aiv_mte3_ratio = 0. - bound = "" - shape_duration = 0. - optimization = 0. - dtype = "" + aiv_vec_ratio, aiv_mte2_ratio, aiv_mte3_ratio, shape_duration, optimization = 0., 0., 0., 0., 0. + bound, dtype = "", "" for operator in vector_list: if (operator.op_name == op_name and operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape): @@ -319,15 +313,15 @@ class AICorePerformanceChecker: else: optimization = max(0.7 - aiv_vec_ratio, 0.7 - aiv_mte2_ratio, 0.7 - aiv_mte3_ratio) if bound: - bound_queue.append( - {"op_name": op_name, + bound_queue.append({ + "op_name": op_name, "shape": shape, "bound": bound, "dtype": dtype, "duration": shape_duration}) else: - optimization_queue.append( - {"op_name": op_name, + optimization_queue.append({ + "op_name": op_name, "shape": shape, "dtype": dtype, "optimization": optimization}) @@ -393,7 +387,8 @@ class AICorePerformanceChecker: if not hasattr(profiling_dataset, "op_summary"): logger.warning("Skip %s checker because of not containing %s", self._CHECKER, "op summary") return False - if not hasattr(profiling_dataset.op_summary, "task_dict") or not hasattr(profiling_dataset.op_summary, "op_list"): + if not hasattr(profiling_dataset.op_summary, "task_dict") or not hasattr(profiling_dataset.op_summary, + "op_list"): logger.warning("Skip %s checker because of not containing %s", self._CHECKER, "op summary") return False return True -- Gitee From 4b7fc2b0ab497f2bbcda77062dae086d54c7fd36 Mon Sep 17 00:00:00 2001 From: xubanxia <904691018@qq.com> Date: Fri, 17 Jan 2025 17:26:01 +0800 Subject: [PATCH 13/72] =?UTF-8?q?checker=E4=BB=A3=E7=A0=81=E8=B0=83?= =?UTF-8?q?=E6=95=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ai_core_performance/ai_core_performance_checker.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py index 97952f229..26c3994db 100644 --- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py +++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py @@ -31,11 +31,11 @@ class AICorePerformanceChecker: def __init__(self): + self.result = dict() self.ai_core_performance_issues = False self.desc = "" self.suggestions = "" self.cube_dict = {} - self.cube_list = [] self.fa_dict = {} self.fa_list = [] self.vector_dict = {} @@ -91,7 +91,6 @@ class AICorePerformanceChecker: """ :Param profiling_dataset: dataset of operator performance from kernel_details.csv """ - self.result = dict() if self.cube_dict: self.result["cube"] = self.check_cube_operator(promoting_dataset) if self.fa_dict: -- Gitee From b8444dea415f1543534ffd099dfc4fb2293ca17a Mon Sep 17 00:00:00 2001 From: kiritorl Date: Sat, 18 Jan 2025 18:16:55 +0800 Subject: [PATCH 14/72] =?UTF-8?q?=E9=80=82=E9=85=8D=E8=A1=A8=E6=A0=BC?= =?UTF-8?q?=E5=92=8Chtml=E9=A1=B5=E9=9D=A2=E5=B1=95=E7=A4=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ai_core_performance_analyzer.py | 12 +- .../ai_core_performance_checker.py | 132 +++++++++++----- .../html/templates/ai_core_performance.html | 141 +++++++++++++----- .../advisor/rules/cn/aicore_performance.yaml | 115 +------------- .../advisor/rules/en/aicore_performance.yaml | 6 + profiler/cli/entrance.py | 4 +- 6 files changed, 224 insertions(+), 186 deletions(-) create mode 100644 profiler/advisor/rules/en/aicore_performance.yaml diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py index 76189af1a..03b0a8c6e 100644 --- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py +++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py @@ -38,13 +38,13 @@ class AICorePerformanceAnalyzer(BaseAnalyzer): def optimize(self, **kwargs): add_render_list = kwargs.get("add_render_list", True) - ai_core_freq_checker = AICorePerformanceChecker() - ai_core_freq_checker.data_filter(self.profiling_dataset) - if not ai_core_freq_checker.ai_core_performance_issues: + ai_core_perf_checker = AICorePerformanceChecker() + ai_core_perf_checker.data_filter(self.profiling_dataset) + if not ai_core_perf_checker.ai_core_performance_issues: return self.result - ai_core_freq_checker.check_ai_core_performance(self.profiling_dataset) - ai_core_freq_checker.make_record(self.result) - self.html = ai_core_freq_checker.make_render(self.html_render, + ai_core_perf_checker.check_ai_core_performance(self.profiling_dataset) + ai_core_perf_checker.make_record(self.result) + self.html = ai_core_perf_checker.make_render(self.html_render, add_render_list, priority=self.get_priority(), rank=kwargs.get("rank")) diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py index 883891b13..8b0c9c224 100644 --- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py +++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py @@ -13,11 +13,14 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging +import os from profiler.advisor.dataset.profiling.profiling_dataset import ProfilingDataset from profiler.advisor.display.prompt.base_prompt import BasePrompt from profiler.advisor.result.item import OptimizeItem, OptimizeRecord from profiler.advisor.result.result import OptimizeResult +from profiler.prof_common.additional_args_manager import AdditionalArgsManager +from profiler.prof_common.file_manager import FileManager logger = logging.getLogger() @@ -40,6 +43,27 @@ class AICorePerformanceChecker: self.fa_list = [] self.vector_dict = {} self.vector_list = [] + self.load_aicore_perf_rules() + + def load_aicore_perf_rules(self): + language = AdditionalArgsManager().language + rule_path = os.path.join( + os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))), + "rules", + language, + "aicore_performance.yaml" + ) + + if not os.path.exists(rule_path): + logger.warning("Skip analyze aicpu issues, because %s does not exist.", rule_path) + + self.aicore_rules = FileManager.read_yaml_file(rule_path) + self._PROBLEM = self.aicore_rules.get("problem") + self.desc = self.aicore_rules.get("description") + self.suggestion = self.aicore_rules.get("suggestion") + self._AFFINITY_SUGGESTION = self.aicore_rules.get("affinity_suggestion") + self._BOUND_SUGGESTION = self.aicore_rules.get("bound_suggestion") + self._OPTI_SUGGESTION = self.aicore_rules.get("optimization_suggestion") def data_filter(self, profiling_dataset: ProfilingDataset): if not self.check_task_dict(profiling_dataset): @@ -83,6 +107,9 @@ class AICorePerformanceChecker: @staticmethod def memory_size(operator): input_shapes = operator.input_shapes[1:-1].split(";") + # todo batchmatmul + if len(input_shapes) > 2: + return 1 memory = sum(int(shape.split(",")[0]) * int(shape.split(",")[1]) for shape in input_shapes) memory += int(operator.output_shapes[1:-1].split(",")[0]) * int(operator.output_shapes[1:-1].split(",")[1]) return memory * 2 / 1024 / 1024 @@ -106,6 +133,7 @@ class AICorePerformanceChecker: bound_queue = [] affinity_queue = [] operator_list = [] + suggestion = "内轴无法被256整除" for op in profiling_dataset.op_summary.op_list: if (op.op_name in cube_dict and op.input_shapes[1:-1] + "-" + op.output_shapes[1:-1] in cube_dict[op.op_name]): @@ -125,7 +153,7 @@ class AICorePerformanceChecker: dtype = operator.input_data_types shape_duration += float(operator.task_duration) affinity_queue.append( - {"op_name": op, "shape": shape.split("-")[0], "dtype": dtype, "duration": shape_duration}) + {"op_name": op, "shape": shape.split("-")[0], "dtype": dtype, "duration": shape_duration, "suggestion": suggestion}) continue else: for operator in operator_list: @@ -340,51 +368,85 @@ class AICorePerformanceChecker: """ make record for what and how to optimize """ - if not self.ai_core_freq_issues: - return self.ai_core_freq_issues - - prompt_class = BasePrompt.get_prompt_class(self.__class__.__name__) - - problem = prompt_class.PROBLEM - if self.rank is not None: - problem += prompt_class.RANK_ID.format(self.rank) - - self.desc = prompt_class.DESCRIPTION.format(len(self.decrease_freq_ops), self.DECREASE_FREQ_RATIO) - if self.rank: - self.desc = prompt_class.RANK_DESCRIPTION.format(self.rank) + self.desc.lower() + if not self.ai_core_performance_issues: + return self.ai_core_performance_issues - optimization_item = OptimizeItem(problem, self.desc, [prompt_class.SUGGESTION]) - result.add(OptimizeRecord(optimization_item)) + cube_problem = "Cube算子性能分析" + fa_problem = "FA算子性能分析" + vector_problem = "Vector算子性能分析" + sugg_keys = ['opti', 'bound', 'affinity'] + cube_desc = dict.fromkeys(sugg_keys, "") + fa_desc = dict.fromkeys(sugg_keys, "") + vector_desc = dict.fromkeys(sugg_keys, "") + if self.result["cube"]: + optimization_item = OptimizeItem(cube_problem, self.desc, [self.suggestion]) + result.add(OptimizeRecord(optimization_item)) + headers = [ + "Type", + "Description and Suggestion", + ] + result.add_detail(cube_problem, headers=headers) + for cube_opti_issue in self.result["cube"][0]: + opti_sugg = self._OPTI_SUGGESTION.format(**cube_opti_issue) + cube_desc["opti"] += opti_sugg + result.add_detail(cube_problem, detail=["性能优化算子集合", cube_desc["opti"]]) + for cube_bound_issue in self.result["cube"][1]: + bound_sugg = self._BOUND_SUGGESTION.format(**cube_bound_issue) + cube_desc["bound"] += bound_sugg + result.add_detail(cube_problem, detail=["bound算子集合", cube_desc["bound"]]) + for cube_affinity_issue in self.result["cube"][2]: + affinity_sugg = self._AFFINITY_SUGGESTION.format(**cube_affinity_issue) + cube_desc["affinity"] += affinity_sugg + result.add_detail(cube_problem, detail=["不亲和算子集合", cube_desc["affinity"]]) - self.headers = [ - "Operator name", - "Count", - "Total duration(us)", - "AI CORE frequency decreased ratio", - "Average frequency", - "Max frequency", - "Min frequency", - ] - result.add_detail(problem, headers=self.headers) + if self.result["fa"]: + optimization_item = OptimizeItem(cube_problem, self.desc, [self.suggestion]) + result.add(OptimizeRecord(optimization_item)) + headers = [ + "Type", + "Description and Suggestion", + ] + result.add_detail(fa_problem, headers=headers) + for fa_opti_issue in self.result["fa"][0]: + opti_sugg = self._OPTI_SUGGESTION.format(**fa_opti_issue) + fa_desc["opti"] += opti_sugg + result.add_detail(fa_problem, detail=["性能优化算子集合", fa_desc["opti"]]) + for fa_bound_issue in self.result["fa"][1]: + bound_sugg = self._BOUND_SUGGESTION.format(**fa_bound_issue) + fa_desc["bound"] += bound_sugg + result.add_detail(fa_problem, detail=["bound算子集合", fa_desc["bound"]]) + for fa_affinity_issue in self.result["fa"][2]: + affinity_sugg = self._AFFINITY_SUGGESTION.format(**fa_affinity_issue) + fa_desc["affinity"] += affinity_sugg + result.add_detail(fa_problem, detail=["不亲和算子集合", fa_desc["affinity"]]) - for row in self.decrease_freq_ops: - result.add_detail(problem, detail=row) + if self.result["vector"]: + optimization_item = OptimizeItem(vector_problem, self.desc, [self.suggestion]) + result.add(OptimizeRecord(optimization_item)) + headers = [ + "Type", + "Description and Suggestion", + ] + result.add_detail(vector_problem, headers=headers) + for vector_opti_issue in self.result["vector"][0]: + opti_sugg = self._OPTI_SUGGESTION.format(**vector_opti_issue) + vector_desc["opti"] += opti_sugg + result.add_detail(vector_problem, detail=["性能优化算子集合", vector_desc["opti"]]) + for vector_bound_issue in self.result["vector"][1]: + bound_sugg = self._BOUND_SUGGESTION.format(**vector_bound_issue) + vector_desc["bound"] += bound_sugg + result.add_detail(vector_problem, detail=["bound算子集合", vector_desc["bound"]]) return True def make_render(self, html_render, add_render_list=True, **kwargs): - if not self.ai_core_freq_issues: - return self.ai_core_freq_issues + if not self.ai_core_performance_issues: + return self.ai_core_performance_issues priority = kwargs.get("priority") - if self.SHOW_TOPK_OPS: - self.desc += f" Only show {self.SHOW_TOPK_OPS} operators here, see latest mstt_advisor.xlsx for details." return html_render.render_template(key="computation", template_dir="templates", template_name="ai_core_performance.html", - desc=self.desc, - suggestion=self.suggestions, - headers=self.headers, - data=self.decrease_freq_ops[:self.SHOW_TOPK_OPS], + format_result=self.result, add_render_list=add_render_list, priority_background_color=priority, rank=kwargs.get("rank")) diff --git a/profiler/advisor/display/html/templates/ai_core_performance.html b/profiler/advisor/display/html/templates/ai_core_performance.html index a009f073d..7feb3e768 100644 --- a/profiler/advisor/display/html/templates/ai_core_performance.html +++ b/profiler/advisor/display/html/templates/ai_core_performance.html @@ -1,62 +1,135 @@ -{% if data|length > 0 %} +{% if format_result|length > 0 %}
-

AICORE Performance - Analysis

+

AI CORE Performance Analysis

- MatMul. -

+ {% if format_result.cube is not none %} + MatMul算子相关分析,参考如下: +
- {% for header in headers %} - - {% endfor %} + + - - {% for row in data %} + {% set opti_ns = namespace(total_opti='') %} + {% for opti in format_result.cube[0] %} + {% if not loop.first %} + {% set opti_ns.total_opti = opti_ns.total_opti ~ "
" ~ opti.op_name ~ "算子 shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ "参考性能优化空间: " ~ opti.optimization %} + {% else %} + {% set opti_ns.total_opti = opti.op_name ~ "算子 shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ "参考性能优化空间: " ~ opti.optimization %} + {% endif %} + {% endfor %} + + + + + {% set bound_ns = namespace(total_bound='') %} + {% for bound in format_result.cube[1] %} + {% if not loop.first %} + {% set bound_ns.total_bound = bound_ns.total_bound ~ "
" ~ bound.op_name ~ "算子 shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ "bound类型为: " ~ bound.bound %} + {% else %} + {% set bound_ns.total_bound = bound.op_name ~ "算子 shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ "bound类型为: " ~ bound.bound %} + {% endif %} + {% endfor %} - {% for element in row %} - - {% endfor %} + + + {% set affinity_ns = namespace(total_affinity='') %} + {% for affinity in format_result.cube[2] %} + {% if not loop.first %} + {% set affinity_ns.total_affinity = affinity_ns.total_affinity ~ "
" ~ affinity.op_name ~ "算子 shape: " ~ affinity.shape ~ " dtype: " ~ affinity.dtype ~ "不亲和类型为: " ~ affinity.suggestion %} + {% else %} + {% set affinity_ns.total_affinity = affinity.op_name ~ "算子 shape: " ~ affinity.shape ~ " dtype: " ~ affinity.dtype ~ "不亲和类型为: " ~ affinity.suggestion %} + {% endif %} {% endfor %} + + + +
{{ header }}类别描述及建议
性能优化算子集合{{ opti_ns.total_opti | safe }}
{{ element|safe }}bound算子集合{{ bound_ns.total_bound | safe }}
bound算子集合{{ affinity_ns.total_affinity | safe }}
+ {% endif %} + + {% if format_result.fa is not none %} + FA算子相关分析,参考如下:
- FlashAttention -

- {% for header in headers %} - - {% endfor %} + + - - {% for row in data %} + {% set opti_ns = namespace(total_opti='') %} + {% for opti in format_result.fa[0] %} + {% if not loop.first %} + {% set opti_ns.total_opti = opti_ns.total_opti ~ "
" ~ opti.op_name ~ "算子 shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ "参考性能优化空间: " ~ opti.optimization %} + {% else %} + {% set opti_ns.total_opti = opti.op_name ~ "算子 shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ "参考性能优化空间: " ~ opti.optimization %} + {% endif %} + {% endfor %} - {% for element in row %} - - {% endfor %} + + + {% set bound_ns = namespace(total_bound='') %} + {% for bound in format_result.fa[1] %} + {% if not loop.first %} + {% set bound_ns.total_bound = bound_ns.total_bound ~ "
" ~ bound.op_name ~ "算子 shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ "bound类型为: " ~ bound.bound %} + {% else %} + {% set bound_ns.total_bound = bound.op_name ~ "算子 shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ "bound类型为: " ~ bound.bound %} + {% endif %} {% endfor %} + + + + + {% set affinity_ns = namespace(total_affinity='') %} + {% for affinity in format_result.fa[2] %} + {% if not loop.first %} + {% set affinity_ns.total_affinity = affinity_ns.total_affinity ~ "
" ~ affinity.op_name ~ "算子 shape: " ~ affinity.shape ~ " dtype: " ~ affinity.dtype ~ "不亲和类型为: " ~ affinity.suggestion %} + {% else %} + {% set affinity_ns.total_affinity = affinity.op_name ~ "算子 shape: " ~ affinity.shape ~ " dtype: " ~ affinity.dtype ~ "不亲和类型为: " ~ affinity.suggestion %} + {% endif %} + {% endfor %} + + + +
{{ header }}类别描述及建议
{{ element|safe }}性能优化算子集合{{ opti_ns.total_opti | safe }}
bound算子集合{{ bound_ns.total_bound | safe }}
不亲和算子集合{{ affinity_ns.total_affinity | safe }}
+ {% endif %} + + {% if format_result.cube is not none %} + Vector算子相关分析,参考如下:
- Vector -

- {% for header in headers %} - - {% endfor %} + + - - {% for row in data %} + {% set opti_ns = namespace(total_opti='') %} + {% for opti in format_result.vector[0] %} + {% if not loop.first %} + {% set opti_ns.total_opti = opti_ns.total_opti ~ "
" ~ opti.op_name ~ "算子 shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ "参考性能优化空间: " ~ opti.optimization %} + {% else %} + {% set opti_ns.total_opti = opti.op_name ~ "算子 shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ "参考性能优化空间: " ~ opti.optimization %} + {% endif %} + {% endfor %} - {% for element in row %} - - {% endfor %} + + + {% set bound_ns = namespace(total_bound='') %} + {% for bound in format_result.vector[1] %} + {% if not loop.first %} + {% set bound_ns.total_bound = bound_ns.total_bound ~ "
" ~ bound.op_name ~ "算子 shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ "bound类型为: " ~ bound.bound %} + {% else %} + {% set bound_ns.total_bound = bound.op_name ~ "算子 shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ "bound类型为: " ~ bound.bound %} + {% endif %} {% endfor %} + + + +
{{ header }}类别描述及建议
{{ element|safe }}性能优化算子集合{{ opti_ns.total_opti | safe }}
bound算子集合{{ bound_ns.total_bound | safe }}
- + {% endif %}
{% endif %} \ No newline at end of file diff --git a/profiler/advisor/rules/cn/aicore_performance.yaml b/profiler/advisor/rules/cn/aicore_performance.yaml index 7eef1598a..60d813e1d 100644 --- a/profiler/advisor/rules/cn/aicore_performance.yaml +++ b/profiler/advisor/rules/cn/aicore_performance.yaml @@ -1,109 +1,6 @@ -problem: "AICPU算子" -description: "一些算子和任务执行时间超过了{}us,比如:\n" -suggestion: "修改代码避免使用aicpu类算子" -double_suggestion: "尝试将double类型的算子转换成float,比如{}" -DataTypeSuggestion: &DataTypeSuggestion "数据类型{}在{}算子中可能会造成AICpu问题, 如果可以,尝试转换成{}。" -AICPU_DOC_URL: &AICPU_DOC_URL "https://gitee.com/ascend/mstt/blob/master/profiler/advisor/doc/Samples%20of%20AI%20CPU%20Operator%20Replacement.md" - -CommonChecker: - - DataTypeChecker: - cann_version: [7.0.RC1] - op_type: [ __ALL__ ] - ignore_type: [ cast, tensoraequal, equal, nonzero, mul ] - input: [ float, float32, float16, bool, int32, uint32, int64, uint64, int8, uint8, int16, uint16, dt_bf16 ] - output: [ float, float32, float16, bool, int32, uint32, int64, uint64, int8, uint8, int16, uint16, dt_bf16 ] - suggestion: *DataTypeSuggestion - - - DataTypeChecker: - cann_version: [7.0.RC1] - op_type: [ cast ] - input: [ float, float32, float16, bool, int32, uint32, int64, uint64, uint8, dt_bf16 ] - output: [ float, float32, float16, bool, int32, uint32, int64, uint64, uint8, dt_bf16 ] - suggestion: *DataTypeSuggestion - - - DataTypeChecker: - cann_version: [7.0.RC1] - op_type: [ tensorequal ] - input: [ float, float32, float16, bool, int32, int8, uint8 ] - output: [ bool ] - suggestion: *DataTypeSuggestion - - - DataTypeChecker: - cann_version: [7.0.RC1] - op_type: [ equal ] - input: [ float, float32, float16, bool, int32, int64, int8, uint8 ] - output: [ bool ] - suggestion: *DataTypeSuggestion - - - DataTypeChecker: - cann_version: [7.0.RC1] - op_type: [ nonzero ] - input: [ float16, bool, dt_bf16 ] - output: [ int64 ] - suggestion: *DataTypeSuggestion - - - DataTypeChecker: - cann_version: [7.0.RC1] - op_type: [ mul ] - input: [ float, float32, float16, bool, int32, uint32, int64, uint64, int8, uint8, dt_bf16 ] - output: [ float, float32, float16, bool, int32, uint32, int64, uint64, int8, uint8, dt_bf16 ] - suggestion: *DataTypeSuggestion - - - DataTypeChecker: - cann_version: [8.0.RC1, 7.0.0] - op_type: [ __ALL__ ] - ignore_type: [ cast, tensorequal, equal, nonzero, mul ] - input: [ float, float32, float16, dt_bf16, float64, bool, int32, int64, int8, uint8, int16, complex64, complex128 ] - output: [ float, float32, float16, dt_bf16, float64, bool, int32, int64, int8, uint8, int16, complex64, complex128 ] - suggestion: *DataTypeSuggestion - - - DataTypeChecker: - cann_version: [8.0.RC1, 7.0.0] - op_type: [ cast ] - input: [ float, float32, float16, bool, int32, uint32, int64, uint64, uint8, dt_bf16 ] - output: [ float, float32, float16, bool, int32, uint32, int64, uint64, uint8, dt_bf16 ] - suggestion: *DataTypeSuggestion - - - DataTypeChecker: - cann_version: [8.0.RC1, 7.0.0] - op_type: [ tensorequal ] - input: [ float, float32, float16, dt_bf16, float64, bool, int32, int8, uint8 ] - output: [ bool ] - suggestion: *DataTypeSuggestion - - - DataTypeChecker: - cann_version: [8.0.RC1, 7.0.0] - op_type: [ equal ] - input: [ float, float32, float16, dt_bf16, float64, bool, int32, int64, int8, uint8 ] - output: [ bool ] - suggestion: *DataTypeSuggestion - - - DataTypeChecker: - cann_version: [8.0.RC1, 7.0.0] - op_type: [ mul ] - input: [ float, float32, float16, dt_bf16, float64, bool, int32, int64, int8, uint8, complex64 ] - output: [ float, float32, float16, dt_bf16, float64, bool, int32, int64, int8, uint8, complex64 ] - suggestion: *DataTypeSuggestion - -ExampleGuideChecker: - - IndexPutChecker: - op_type: [index] - url: *AICPU_DOC_URL - suggestion: "请参考链接修改源码,尝试用等价的算子替换index算子。" - - - NonzeroChecker: - op_type: [ indexput, indexputv2 ] - url: *AICPU_DOC_URL - suggestion: "请参考链接修改源码,尝试用等价的算子替换indexput算子。" - - - CastChecker: - op_type: [ argmin ] - url: *AICPU_DOC_URL - suggestion: "请参考链接更新cann-tookit包到7.0.RC1及以上的版本。" - - - CastChecker: - op_type: [ nonzero ] - url: *AICPU_DOC_URL - suggestion: "请参考链接修改源码,尝试用等价的算子替换nonzero算子。" - - +problem: "AICORE算子" +description: "提供一些AICORE算子的参考瓶颈" +suggestion: "请根据亲和性、bound类型或优化空间尝试分析筛选出来的算子" +affinity_suggestion: "{op_name}算子 shape{shape} dtype{dtype} 有不亲和特征: {suggestion}\n" +bound_suggestion: "{op_name}算子 shape{shape} dtype{dtype} bound类型为: {bound} bound\n" +optimization_suggestion: "{op_name}算子 shape{shape} dtype{dtype} 疑似有性能优化空间,参考性能优化空间{optimization}\n" \ No newline at end of file diff --git a/profiler/advisor/rules/en/aicore_performance.yaml b/profiler/advisor/rules/en/aicore_performance.yaml new file mode 100644 index 000000000..247022214 --- /dev/null +++ b/profiler/advisor/rules/en/aicore_performance.yaml @@ -0,0 +1,6 @@ +problem: "AICORE Operator" +description: "Provide some reference bottlenecks for the AICORE operator" +suggestion: "Please try to analyze the filtered operators based on affinity, bound type or optimization space" +affinity_suggestion: "{op_name} Op shape{shape} dtype{dtype} with disaffection characteristics: {suggestion}\n" +bound_suggestion: "{op_name} Op shape{shape} dtype{dtype} bound type: {bound} bound\n" +optimization_suggestion: "{op_name} Op shape{shape} dtype{dtype} suspect there is room for performance optimization, refer to Performance Optimization Space: {optimization}\n" \ No newline at end of file diff --git a/profiler/cli/entrance.py b/profiler/cli/entrance.py index 89ac8187d..fa7d2421f 100644 --- a/profiler/cli/entrance.py +++ b/profiler/cli/entrance.py @@ -69,7 +69,7 @@ msprof_analyze_cli.add_command(auto_complete_cli, name="auto-completion") if __name__ == "__main__": msprof_analyze_cli.main( [ - "analyze","all","-d", - r"D:\data\file","-l","cn" + "advisor","computation","-d", + r"E:\B站\910b-33f-cpsp4-add_contiguous\train-2184159-master-0_1058382_20240910063706363_ascend_pt","-l","cn" ] ) -- Gitee From 7aa2873f0b52469eae7dfdaa3b88b6b8d091981d Mon Sep 17 00:00:00 2001 From: xubanxia <904691018@qq.com> Date: Mon, 20 Jan 2025 10:28:55 +0800 Subject: [PATCH 15/72] =?UTF-8?q?ND=20NZ=E6=A0=BC=E5=BC=8F=E8=B0=83?= =?UTF-8?q?=E6=95=B4=EF=BC=88=E5=86=85=E8=BD=B4=E8=AE=A1=E7=AE=97=E4=B8=8E?= =?UTF-8?q?=E5=86=85=E5=AD=98=E8=AE=A1=E7=AE=97=EF=BC=89?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ai_core_performance_checker.py | 53 ++++++++++++------- 1 file changed, 35 insertions(+), 18 deletions(-) diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py index e7267fdaf..6a6c2bbc7 100644 --- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py +++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py @@ -107,11 +107,16 @@ class AICorePerformanceChecker: @staticmethod def memory_size(operator): input_shapes = operator.input_shapes[1:-1].split(";") - # todo batchmatmul - if len(input_shapes) > 2: - return 1 - memory = sum(int(shape.split(",")[0]) * int(shape.split(",")[1]) for shape in input_shapes) - memory += int(operator.output_shapes[1:-1].split(",")[0]) * int(operator.output_shapes[1:-1].split(",")[1]) + memory = 0 + if len(input_shapes.split(",")) == 4: + memory = sum(int(shape[0]) * int(shape[1]) * int(shape[2]) * int(shape[3]) + for shape in (shapes.split(",") for shapes in input_shapes)) + output_shape = operator.output_shapes[1:-1].split(",") + memory += (int(output_shape[0]) * int(output_shape[1]) * int(output_shape[2]) * int(output_shape[3])) + else: + memory += sum(int(shape[0]) * int(shape[1]) for shape in (shapes.split(",") for shapes in input_shapes)) + output_shape = operator.output_shapes[1:-1].split(",") + memory += (int(output_shape[0]) * int(output_shape[1])) return memory * 2 / 1024 / 1024 def check_ai_core_performance(self, promoting_dataset: ProfilingDataset): @@ -126,7 +131,6 @@ class AICorePerformanceChecker: self.result["vector"] = self.check_vector_operator(promoting_dataset) def check_cube_operator(self, profiling_dataset: ProfilingDataset): - # todo 未处理ND、NZ格式 cube_dict = self.cube_dict optimization_queue = [] bound_queue = [] @@ -140,9 +144,22 @@ class AICorePerformanceChecker: dtype = None shape_duration = 0. # 判断输入shape内轴是否为256的倍数 - affinity_flag = (int(shape.split("-")[0].split(";")[0].split(",")[1]) + - int(shape.split("-")[0].split(";")[1].split(",")[0])) % 256 != 0 - if affinity_flag: + if (len(shape.split("-")[0].split(";")[0].split(","))) == 4: + # NZ格式 + shapes = shape.split("-")[0].split(";") + b = shapes[0].split(",")[1] + c = shapes[0].split(",")[2] + + f = shapes[1].split(",")[1] + g = shapes[1].split(",")[2] + affinity_flag = (b * c % 256 == 0) and (f * g % 256 == 0) + else: + # ND格式 + shapes = shape.split("-")[0].split(";") + l = shapes[0].split(",")[1] + k = shapes[1].split(",")[1] + affinity_flag = (l % 256 == 0) and (k % 256 == 0) + if not affinity_flag: for operator in operator_list: if (operator.op_name == op and operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape): @@ -342,17 +359,17 @@ class AICorePerformanceChecker: optimization = max(0.7 - aiv_vec_ratio, 0.7 - aiv_mte2_ratio, 0.7 - aiv_mte3_ratio) if bound: bound_queue.append({ - "op_name": op_name, - "shape": shape, - "bound": bound, - "dtype": dtype, - "duration": shape_duration}) + "op_name": op_name, + "shape": shape, + "bound": bound, + "dtype": dtype, + "duration": shape_duration}) else: optimization_queue.append({ - "op_name": op_name, - "shape": shape, - "dtype": dtype, - "optimization": optimization}) + "op_name": op_name, + "shape": shape, + "dtype": dtype, + "optimization": optimization}) return [sorted(optimization_queue, key=lambda x: x["optimization"], reverse=True)[:5], sorted(bound_queue, key=lambda x: x["duration"], reverse=True)[:5]] -- Gitee From b6dd0cc690cda6e263b5ca63146aea1bcf3abcbb Mon Sep 17 00:00:00 2001 From: xubanxia <904691018@qq.com> Date: Mon, 20 Jan 2025 11:15:33 +0800 Subject: [PATCH 16/72] =?UTF-8?q?ND=20NZ=E9=94=99=E8=AF=AF=E4=BF=AE?= =?UTF-8?q?=E5=A4=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ai_core_performance_checker.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py index 6a6c2bbc7..cde048844 100644 --- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py +++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py @@ -108,7 +108,7 @@ class AICorePerformanceChecker: def memory_size(operator): input_shapes = operator.input_shapes[1:-1].split(";") memory = 0 - if len(input_shapes.split(",")) == 4: + if len(input_shapes[0].split(",")) == 4: memory = sum(int(shape[0]) * int(shape[1]) * int(shape[2]) * int(shape[3]) for shape in (shapes.split(",") for shapes in input_shapes)) output_shape = operator.output_shapes[1:-1].split(",") @@ -147,17 +147,17 @@ class AICorePerformanceChecker: if (len(shape.split("-")[0].split(";")[0].split(","))) == 4: # NZ格式 shapes = shape.split("-")[0].split(";") - b = shapes[0].split(",")[1] - c = shapes[0].split(",")[2] + b = int(shapes[0].split(",")[1]) + c = int(shapes[0].split(",")[2]) - f = shapes[1].split(",")[1] - g = shapes[1].split(",")[2] + f = int(shapes[1].split(",")[1]) + g = int(shapes[1].split(",")[2]) affinity_flag = (b * c % 256 == 0) and (f * g % 256 == 0) else: # ND格式 shapes = shape.split("-")[0].split(";") - l = shapes[0].split(",")[1] - k = shapes[1].split(",")[1] + l = int(shapes[0].split(",")[1]) + k = int(shapes[1].split(",")[1]) affinity_flag = (l % 256 == 0) and (k % 256 == 0) if not affinity_flag: for operator in operator_list: -- Gitee From 2711f076df26d09d553d34467c1c839c3fb6b22f Mon Sep 17 00:00:00 2001 From: xubanxia <904691018@qq.com> Date: Mon, 20 Jan 2025 14:09:09 +0800 Subject: [PATCH 17/72] =?UTF-8?q?=E9=94=99=E8=AF=AF=E4=BF=AE=E5=A4=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ai_core_performance_checker.py | 658 +++++++++--------- 1 file changed, 330 insertions(+), 328 deletions(-) diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py index cde048844..5514753ff 100644 --- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py +++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py @@ -123,351 +123,353 @@ class AICorePerformanceChecker: """ :Param profiling_dataset: dataset of operator performance from kernel_details.csv """ - if self.cube_dict: - self.result["cube"] = self.check_cube_operator(promoting_dataset) - if self.fa_dict: - self.result["fa"] = self.check_fa_operator(promoting_dataset) - if self.vector_dict: - self.result["vector"] = self.check_vector_operator(promoting_dataset) - - def check_cube_operator(self, profiling_dataset: ProfilingDataset): - cube_dict = self.cube_dict - optimization_queue = [] - bound_queue = [] - affinity_queue = [] - operator_list = [op for op in profiling_dataset.op_summary.op_list - if op.op_name in cube_dict - and op.input_shapes[1:-1] + "-" + op.output_shapes[1:-1] in cube_dict[op.op_name]] - suggestion = "内轴无法被256整除" - for op in cube_dict: - for shape in cube_dict[op]: - dtype = None - shape_duration = 0. - # 判断输入shape内轴是否为256的倍数 - if (len(shape.split("-")[0].split(";")[0].split(","))) == 4: - # NZ格式 - shapes = shape.split("-")[0].split(";") - b = int(shapes[0].split(",")[1]) - c = int(shapes[0].split(",")[2]) - - f = int(shapes[1].split(",")[1]) - g = int(shapes[1].split(",")[2]) - affinity_flag = (b * c % 256 == 0) and (f * g % 256 == 0) - else: - # ND格式 - shapes = shape.split("-")[0].split(";") - l = int(shapes[0].split(",")[1]) - k = int(shapes[1].split(",")[1]) - affinity_flag = (l % 256 == 0) and (k % 256 == 0) - if not affinity_flag: - for operator in operator_list: - if (operator.op_name == op and - operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape): - dtype = operator.input_data_types - shape_duration += float(operator.task_duration) - affinity_queue.append({ + self.result["cube"] = self.check_cube_operator(promoting_dataset) + self.result["fa"] = self.check_fa_operator(promoting_dataset) + self.result["vector"] = self.check_vector_operator(promoting_dataset) + + +def check_cube_operator(self, profiling_dataset: ProfilingDataset): + cube_dict = self.cube_dict + optimization_queue = [] + bound_queue = [] + affinity_queue = [] + operator_list = [op for op in profiling_dataset.op_summary.op_list + if op.op_name in cube_dict + and op.input_shapes[1:-1] + "-" + op.output_shapes[1:-1] in cube_dict[op.op_name]] + suggestion = "内轴无法被256整除" + for op in cube_dict: + for shape in cube_dict[op]: + dtype = None + shape_duration = 0. + # 判断输入shape内轴是否为256的倍数 + if (len(shape.split("-")[0].split(";")[0].split(","))) == 4: + # NZ格式 + shapes = shape.split("-")[0].split(";") + b = int(shapes[0].split(",")[1]) + c = int(shapes[0].split(",")[2]) + + f = int(shapes[1].split(",")[1]) + g = int(shapes[1].split(",")[2]) + affinity_flag = (b * c % 256 == 0) and (f * g % 256 == 0) + else: + # ND格式 + shapes = shape.split("-")[0].split(";") + l = int(shapes[0].split(",")[1]) + k = int(shapes[1].split(",")[1]) + affinity_flag = (l % 256 == 0) and (k % 256 == 0) + if not affinity_flag: + for operator in operator_list: + if (operator.op_name == op and + operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape): + dtype = operator.input_data_types + shape_duration += float(operator.task_duration) + affinity_queue.append({ + "op_name": op, + "shape": shape.split("-")[0], + "dtype": dtype, + "duration": shape_duration, + "suggestion": suggestion}) + continue + else: + shap_list = [operator for operator in operator_list if + operator.op_name == op and + operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape] + shape_duration = sum(float(operator.task_duration) for operator in shap_list) + dtype = shap_list[0].input_data_types if shap_list else None + aic_mac_ratio = sum(float(operator.aic_mac_ratio) for operator in shap_list) / len(shap_list) + aic_mte2_ratio = sum(float(operator.aic_mte2_ratio) for operator in shap_list) / len(shap_list) + if aic_mac_ratio >= 0.8 and aic_mte2_ratio >= 0.95: + bound_queue.append({ "op_name": op, "shape": shape.split("-")[0], "dtype": dtype, - "duration": shape_duration, - "suggestion": suggestion}) - continue - else: - shap_list = [operator for operator in operator_list if - operator.op_name == op and - operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape] - shape_duration = sum(float(operator.task_duration) for operator in shap_list) - dtype = shap_list[0].input_data_types if shap_list else None - aic_mac_ratio = sum(float(operator.aic_mac_ratio) for operator in shap_list) / len(shap_list) - aic_mte2_ratio = sum(float(operator.aic_mte2_ratio) for operator in shap_list) / len(shap_list) - if aic_mac_ratio >= 0.8 and aic_mte2_ratio >= 0.95: - bound_queue.append({ - "op_name": op, - "shape": shape.split("-")[0], - "dtype": dtype, - "bound": "mac_and_mte2_bound", - "duration": shape_duration}) - elif aic_mac_ratio >= 0.8: - bound_queue.append({ - "op_name": op, - "shape": shape.split("-")[0], - "dtype": dtype, - "bound": "mac_bound", - "duration": shape_duration}) - elif aic_mte2_ratio >= 0.95: - bound_queue.append({ - "op_name": op, - "shape": shape.split("-")[0], - "dtype": dtype, - "bound": "mte2_bound", - "duration": shape_duration}) - else: - optimization_queue.append({ - "op_name": op, - "shape": shape.split("-")[0], - "dtype": dtype, - "optimization": max(0.8 - aic_mac_ratio, 0.95 - aic_mte2_ratio)}) - return [sorted(optimization_queue, key=lambda x: x["optimization"], reverse=True)[:5], - sorted(bound_queue, key=lambda x: x["duration"], reverse=True)[:5], - sorted(affinity_queue, key=lambda x: x["duration"], reverse=True)[:5]] - - def check_fa_operator(self, profiling_dataset: ProfilingDataset): - fa_list = self.fa_list - fa_dict = self.fa_dict - optimization_queue = [] - bound_queue = [] - affinity_queue = [] - # 不亲和算子筛选 - for op in fa_dict: - for shape in fa_dict[op]: - affinity_flag = False - shape_duration = 0. - dtype = None - suggestion = "" - if "varlen" in op.lower(): - # 处理变长算子 如果不亲和则affinity_flag为False - if shape.split("-")[0].split(";")[0].split(",")[2] % 128 != 0: - affinity_flag = True - suggestion = "D不能被128整除" - for operator in fa_list: - if (operator.op_name == op and - operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape): - shape_duration += float(operator.task_duration) - dtype = operator.input_data_types - else: - # 处理定长算子 如果不亲和则affinity_flag为False - head_dim = 0 - seq_len = int(shape.split("-")[1].split(";")[1].split(",")[2]) - input_first_tensor = shape.split("-")[0].split(";")[0].split(",") - if len(input_first_tensor) == 3: - head_dim = int(input_first_tensor[2]) / int(shape.split("-")[1].split(";")[0].split(",")[1]) - else: - head_dim = int(input_first_tensor[3]) - if head_dim % 128 != 0 and seq_len % 128 != 0: - affinity_flag = True - suggestion = "D和S均不能被128整除" - elif head_dim % 128 != 0: - affinity_flag = True - suggestion = "D不能被128整除" - elif seq_len % 128 != 0: - affinity_flag = True - suggestion = "S不能被128整除" - if affinity_flag: - for operator in fa_list: - if (operator.op_name == op and - operator.input_shapes[1:-1] + "-" + - operator.output_shapes[1:-1] == shape): - shape_duration += float(operator.task_duration) - dtype = operator.input_data_types - - if affinity_flag: - # 不亲和算子 计算耗时,加入affinity_queue - affinity_queue.append({ + "bound": "mac_and_mte2_bound", + "duration": shape_duration}) + elif aic_mac_ratio >= 0.8: + bound_queue.append({ + "op_name": op, + "shape": shape.split("-")[0], + "dtype": dtype, + "bound": "mac_bound", + "duration": shape_duration}) + elif aic_mte2_ratio >= 0.95: + bound_queue.append({ "op_name": op, "shape": shape.split("-")[0], "dtype": dtype, - "suggestion": suggestion, + "bound": "mte2_bound", "duration": shape_duration}) - continue else: - # 处理bound算子和优化算子 - aiv_vec_ratio, aic_fixpipe_ratio, aic_mte2_ratio, optimization = 0., 0., 0., 0. - bound = "" - if len(shape.split("-")) > 2: - for operator in fa_list: - if (operator.op_name == op and - operator.input_shapes[1:-1] + "-" + - operator.output_shapes[1:-1] + "-grad" == shape): - aic_fixpipe_ratio += float(operator.aic_fixpipe_ratio) - aic_mte2_ratio += float(operator.aic_mte2_ratio) - shape_duration += float(operator.task_duration) - dtype = operator.input_data_types - if aic_mte2_ratio >= 0.8 and aic_fixpipe_ratio >= 0.75: - bound = "mte2_and_fixpipe_bound" - elif aic_mte2_ratio >= 0.8: - bound = "mte2_bound" - elif aiv_vec_ratio >= 0.75: - bound = "vec_bound" - else: - optimization = max(0.8 - aic_mte2_ratio, 0.75 - aiv_vec_ratio) - else: - for operator in fa_list: - if (operator.op_name == op and - operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape): - aiv_vec_ratio += float(operator.aiv_vec_ratio) - aic_mte2_ratio += float(operator.aic_mte2_ratio) - shape_duration += float(operator.task_duration) - if aic_mte2_ratio >= 0.8 and aiv_vec_ratio >= 0.75: - bound = "mte2_and_vec_bound" - elif aic_mte2_ratio >= 0.8: - bound = "mte2_bound" - elif aiv_vec_ratio >= 0.75: - bound = "vec_bound" - else: - optimization = max(0.8 - aic_mte2_ratio, 0.75 - aiv_vec_ratio) - if bound: - bound_queue.append({ - "op_name": op, - "shape": shape.split("-")[0], - "dtype": dtype, - "bound": bound, - "duration": shape_duration}) + optimization_queue.append({ + "op_name": op, + "shape": shape.split("-")[0], + "dtype": dtype, + "optimization": max(0.8 - aic_mac_ratio, 0.95 - aic_mte2_ratio)}) + return [sorted(optimization_queue, key=lambda x: x["optimization"], reverse=True)[:5], + sorted(bound_queue, key=lambda x: x["duration"], reverse=True)[:5], + sorted(affinity_queue, key=lambda x: x["duration"], reverse=True)[:5]] + + +def check_fa_operator(self, profiling_dataset: ProfilingDataset): + fa_list = self.fa_list + fa_dict = self.fa_dict + optimization_queue = [] + bound_queue = [] + affinity_queue = [] + # 不亲和算子筛选 + for op in fa_dict: + for shape in fa_dict[op]: + affinity_flag = False + shape_duration = 0. + dtype = None + suggestion = "" + if "varlen" in op.lower(): + # 处理变长算子 如果不亲和则affinity_flag为False + if shape.split("-")[0].split(";")[0].split(",")[2] % 128 != 0: + affinity_flag = True + suggestion = "D不能被128整除" + for operator in fa_list: + if (operator.op_name == op and + operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape): + shape_duration += float(operator.task_duration) + dtype = operator.input_data_types + else: + # 处理定长算子 如果不亲和则affinity_flag为False + head_dim = 0 + seq_len = int(shape.split("-")[1].split(";")[1].split(",")[2]) + input_first_tensor = shape.split("-")[0].split(";")[0].split(",") + if len(input_first_tensor) == 3: + head_dim = int(input_first_tensor[2]) / int(shape.split("-")[1].split(";")[0].split(",")[1]) + else: + head_dim = int(input_first_tensor[3]) + if head_dim % 128 != 0 and seq_len % 128 != 0: + affinity_flag = True + suggestion = "D和S均不能被128整除" + elif head_dim % 128 != 0: + affinity_flag = True + suggestion = "D不能被128整除" + elif seq_len % 128 != 0: + affinity_flag = True + suggestion = "S不能被128整除" + if affinity_flag: + for operator in fa_list: + if (operator.op_name == op and + operator.input_shapes[1:-1] + "-" + + operator.output_shapes[1:-1] == shape): + shape_duration += float(operator.task_duration) + dtype = operator.input_data_types + + if affinity_flag: + # 不亲和算子 计算耗时,加入affinity_queue + affinity_queue.append({ + "op_name": op, + "shape": shape.split("-")[0], + "dtype": dtype, + "suggestion": suggestion, + "duration": shape_duration}) + continue + else: + # 处理bound算子和优化算子 + aiv_vec_ratio, aic_fixpipe_ratio, aic_mte2_ratio, optimization = 0., 0., 0., 0. + bound = "" + if len(shape.split("-")) > 2: + for operator in fa_list: + if (operator.op_name == op and + operator.input_shapes[1:-1] + "-" + + operator.output_shapes[1:-1] + "-grad" == shape): + aic_fixpipe_ratio += float(operator.aic_fixpipe_ratio) + aic_mte2_ratio += float(operator.aic_mte2_ratio) + shape_duration += float(operator.task_duration) + dtype = operator.input_data_types + if aic_mte2_ratio >= 0.8 and aic_fixpipe_ratio >= 0.75: + bound = "mte2_and_fixpipe_bound" + elif aic_mte2_ratio >= 0.8: + bound = "mte2_bound" + elif aiv_vec_ratio >= 0.75: + bound = "vec_bound" else: - optimization_queue.append({ - "op_name": op, - "shape": shape.split("-")[0], - "dtype": dtype, - "optimization": optimization}) - - return [sorted(optimization_queue, key=lambda x: x["optimization"], reverse=True)[:5], - sorted(bound_queue, key=lambda x: x["duration"], reverse=True)[:5], - sorted(affinity_queue, key=lambda x: x["duration"], reverse=True)[:5]] - - def check_vector_operator(self, profiling_dataset: ProfilingDataset): - vector_dict = self.vector_dict - vector_list = [] - optimization_queue = [] - bound_queue = [] - vector_list.extend( - operator for op_name in vector_dict - for shape in vector_dict[op_name] - for operator in profiling_dataset.op_summary.op_list - if operator.op_name == op_name - and operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape - ) - for op_name in vector_dict: - for shape in vector_dict[op_name]: - aiv_vec_ratio, aiv_mte2_ratio, aiv_mte3_ratio, shape_duration, optimization = 0., 0., 0., 0., 0. - bound, dtype = "", "" - for operator in vector_list: - if (operator.op_name == op_name and - operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape): - aiv_vec_ratio += float(operator.aiv_vec_ratio) - aiv_mte2_ratio += float(operator.aiv_mte2_ratio) - aiv_mte3_ratio += float(operator.aiv_mte3_ratio) - shape_duration += float(operator.task_duration) - dtype = operator.input_data_types - if aiv_vec_ratio + aiv_mte2_ratio + aiv_mte3_ratio >= 0.9: - bound = "vec_mte2_mte3_bound" - elif aiv_mte2_ratio >= 0.7: - bound = "mte2_bound" - elif aiv_mte3_ratio >= 0.7: - bound = "mte3_bound" - elif aiv_vec_ratio >= 0.7: - bound = "vec_bound" + optimization = max(0.8 - aic_mte2_ratio, 0.75 - aiv_vec_ratio) else: - optimization = max(0.7 - aiv_vec_ratio, 0.7 - aiv_mte2_ratio, 0.7 - aiv_mte3_ratio) + for operator in fa_list: + if (operator.op_name == op and + operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape): + aiv_vec_ratio += float(operator.aiv_vec_ratio) + aic_mte2_ratio += float(operator.aic_mte2_ratio) + shape_duration += float(operator.task_duration) + if aic_mte2_ratio >= 0.8 and aiv_vec_ratio >= 0.75: + bound = "mte2_and_vec_bound" + elif aic_mte2_ratio >= 0.8: + bound = "mte2_bound" + elif aiv_vec_ratio >= 0.75: + bound = "vec_bound" + else: + optimization = max(0.8 - aic_mte2_ratio, 0.75 - aiv_vec_ratio) if bound: bound_queue.append({ - "op_name": op_name, - "shape": shape, - "bound": bound, + "op_name": op, + "shape": shape.split("-")[0], "dtype": dtype, + "bound": bound, "duration": shape_duration}) else: optimization_queue.append({ - "op_name": op_name, - "shape": shape, + "op_name": op, + "shape": shape.split("-")[0], "dtype": dtype, "optimization": optimization}) - return [sorted(optimization_queue, key=lambda x: x["optimization"], reverse=True)[:5], - sorted(bound_queue, key=lambda x: x["duration"], reverse=True)[:5]] - pass + return [sorted(optimization_queue, key=lambda x: x["optimization"], reverse=True)[:5], + sorted(bound_queue, key=lambda x: x["duration"], reverse=True)[:5], + sorted(affinity_queue, key=lambda x: x["duration"], reverse=True)[:5]] - def make_record(self, result: OptimizeResult): - """ - make record for what and how to optimize - """ - if not self.ai_core_performance_issues: - return self.ai_core_performance_issues - - cube_problem = "Cube算子性能分析" - fa_problem = "FA算子性能分析" - vector_problem = "Vector算子性能分析" - sugg_keys = ['opti', 'bound', 'affinity'] - cube_desc = dict.fromkeys(sugg_keys, "") - fa_desc = dict.fromkeys(sugg_keys, "") - vector_desc = dict.fromkeys(sugg_keys, "") - if self.result["cube"]: - optimization_item = OptimizeItem(cube_problem, self.desc, [self.suggestion]) - result.add(OptimizeRecord(optimization_item)) - headers = [ - "Type", - "Description and Suggestion", - ] - result.add_detail(cube_problem, headers=headers) - for cube_opti_issue in self.result["cube"][0]: - opti_sugg = self._OPTI_SUGGESTION.format(**cube_opti_issue) - cube_desc["opti"] += opti_sugg - result.add_detail(cube_problem, detail=["性能优化算子集合", cube_desc["opti"]]) - for cube_bound_issue in self.result["cube"][1]: - bound_sugg = self._BOUND_SUGGESTION.format(**cube_bound_issue) - cube_desc["bound"] += bound_sugg - result.add_detail(cube_problem, detail=["bound算子集合", cube_desc["bound"]]) - for cube_affinity_issue in self.result["cube"][2]: - affinity_sugg = self._AFFINITY_SUGGESTION.format(**cube_affinity_issue) - cube_desc["affinity"] += affinity_sugg - result.add_detail(cube_problem, detail=["不亲和算子集合", cube_desc["affinity"]]) - - if self.result["fa"]: - optimization_item = OptimizeItem(cube_problem, self.desc, [self.suggestion]) - result.add(OptimizeRecord(optimization_item)) - headers = [ - "Type", - "Description and Suggestion", - ] - result.add_detail(fa_problem, headers=headers) - for fa_opti_issue in self.result["fa"][0]: - opti_sugg = self._OPTI_SUGGESTION.format(**fa_opti_issue) - fa_desc["opti"] += opti_sugg - result.add_detail(fa_problem, detail=["性能优化算子集合", fa_desc["opti"]]) - for fa_bound_issue in self.result["fa"][1]: - bound_sugg = self._BOUND_SUGGESTION.format(**fa_bound_issue) - fa_desc["bound"] += bound_sugg - result.add_detail(fa_problem, detail=["bound算子集合", fa_desc["bound"]]) - for fa_affinity_issue in self.result["fa"][2]: - affinity_sugg = self._AFFINITY_SUGGESTION.format(**fa_affinity_issue) - fa_desc["affinity"] += affinity_sugg - result.add_detail(fa_problem, detail=["不亲和算子集合", fa_desc["affinity"]]) - - if self.result["vector"]: - optimization_item = OptimizeItem(vector_problem, self.desc, [self.suggestion]) - result.add(OptimizeRecord(optimization_item)) - headers = [ - "Type", - "Description and Suggestion", - ] - result.add_detail(vector_problem, headers=headers) - for vector_opti_issue in self.result["vector"][0]: - opti_sugg = self._OPTI_SUGGESTION.format(**vector_opti_issue) - vector_desc["opti"] += opti_sugg - result.add_detail(vector_problem, detail=["性能优化算子集合", vector_desc["opti"]]) - for vector_bound_issue in self.result["vector"][1]: - bound_sugg = self._BOUND_SUGGESTION.format(**vector_bound_issue) - vector_desc["bound"] += bound_sugg - result.add_detail(vector_problem, detail=["bound算子集合", vector_desc["bound"]]) - return True - - def make_render(self, html_render, add_render_list=True, **kwargs): - if not self.ai_core_performance_issues: - return self.ai_core_performance_issues - - priority = kwargs.get("priority") - return html_render.render_template(key="computation", - template_dir="templates", - template_name="ai_core_performance.html", - format_result=self.result, - add_render_list=add_render_list, - priority_background_color=priority, - rank=kwargs.get("rank")) - - def check_task_dict(self, profiling_dataset: ProfilingDataset) -> bool: - if not hasattr(profiling_dataset, "op_summary"): - logger.warning("Skip %s checker because of not containing %s", self._CHECKER, "op summary") - return False - if not hasattr(profiling_dataset.op_summary, "task_dict") or not hasattr(profiling_dataset.op_summary, - "op_list"): - logger.warning("Skip %s checker because of not containing %s", self._CHECKER, "op summary") - return False - return True + +def check_vector_operator(self, profiling_dataset: ProfilingDataset): + vector_dict = self.vector_dict + vector_list = [] + optimization_queue = [] + bound_queue = [] + vector_list.extend( + operator for op_name in vector_dict + for shape in vector_dict[op_name] + for operator in profiling_dataset.op_summary.op_list + if operator.op_name == op_name + and operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape + ) + for op_name in vector_dict: + for shape in vector_dict[op_name]: + aiv_vec_ratio, aiv_mte2_ratio, aiv_mte3_ratio, shape_duration, optimization = 0., 0., 0., 0., 0. + bound, dtype = "", "" + for operator in vector_list: + if (operator.op_name == op_name and + operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape): + aiv_vec_ratio += float(operator.aiv_vec_ratio) + aiv_mte2_ratio += float(operator.aiv_mte2_ratio) + aiv_mte3_ratio += float(operator.aiv_mte3_ratio) + shape_duration += float(operator.task_duration) + dtype = operator.input_data_types + if aiv_vec_ratio + aiv_mte2_ratio + aiv_mte3_ratio >= 0.9: + bound = "vec_mte2_mte3_bound" + elif aiv_mte2_ratio >= 0.7: + bound = "mte2_bound" + elif aiv_mte3_ratio >= 0.7: + bound = "mte3_bound" + elif aiv_vec_ratio >= 0.7: + bound = "vec_bound" + else: + optimization = max(0.7 - aiv_vec_ratio, 0.7 - aiv_mte2_ratio, 0.7 - aiv_mte3_ratio) + if bound: + bound_queue.append({ + "op_name": op_name, + "shape": shape, + "bound": bound, + "dtype": dtype, + "duration": shape_duration}) + else: + optimization_queue.append({ + "op_name": op_name, + "shape": shape, + "dtype": dtype, + "optimization": optimization}) + return [sorted(optimization_queue, key=lambda x: x["optimization"], reverse=True)[:5], + sorted(bound_queue, key=lambda x: x["duration"], reverse=True)[:5]] + + pass + + +def make_record(self, result: OptimizeResult): + """ + make record for what and how to optimize + """ + if not self.ai_core_performance_issues: + return self.ai_core_performance_issues + + cube_problem = "Cube算子性能分析" + fa_problem = "FA算子性能分析" + vector_problem = "Vector算子性能分析" + sugg_keys = ['opti', 'bound', 'affinity'] + cube_desc = dict.fromkeys(sugg_keys, "") + fa_desc = dict.fromkeys(sugg_keys, "") + vector_desc = dict.fromkeys(sugg_keys, "") + if self.result["cube"]: + optimization_item = OptimizeItem(cube_problem, self.desc, [self.suggestion]) + result.add(OptimizeRecord(optimization_item)) + headers = [ + "Type", + "Description and Suggestion", + ] + result.add_detail(cube_problem, headers=headers) + for cube_opti_issue in self.result["cube"][0]: + opti_sugg = self._OPTI_SUGGESTION.format(**cube_opti_issue) + cube_desc["opti"] += opti_sugg + result.add_detail(cube_problem, detail=["性能优化算子集合", cube_desc["opti"]]) + for cube_bound_issue in self.result["cube"][1]: + bound_sugg = self._BOUND_SUGGESTION.format(**cube_bound_issue) + cube_desc["bound"] += bound_sugg + result.add_detail(cube_problem, detail=["bound算子集合", cube_desc["bound"]]) + for cube_affinity_issue in self.result["cube"][2]: + affinity_sugg = self._AFFINITY_SUGGESTION.format(**cube_affinity_issue) + cube_desc["affinity"] += affinity_sugg + result.add_detail(cube_problem, detail=["不亲和算子集合", cube_desc["affinity"]]) + + if self.result["fa"]: + optimization_item = OptimizeItem(cube_problem, self.desc, [self.suggestion]) + result.add(OptimizeRecord(optimization_item)) + headers = [ + "Type", + "Description and Suggestion", + ] + result.add_detail(fa_problem, headers=headers) + for fa_opti_issue in self.result["fa"][0]: + opti_sugg = self._OPTI_SUGGESTION.format(**fa_opti_issue) + fa_desc["opti"] += opti_sugg + result.add_detail(fa_problem, detail=["性能优化算子集合", fa_desc["opti"]]) + for fa_bound_issue in self.result["fa"][1]: + bound_sugg = self._BOUND_SUGGESTION.format(**fa_bound_issue) + fa_desc["bound"] += bound_sugg + result.add_detail(fa_problem, detail=["bound算子集合", fa_desc["bound"]]) + for fa_affinity_issue in self.result["fa"][2]: + affinity_sugg = self._AFFINITY_SUGGESTION.format(**fa_affinity_issue) + fa_desc["affinity"] += affinity_sugg + result.add_detail(fa_problem, detail=["不亲和算子集合", fa_desc["affinity"]]) + + if self.result["vector"]: + optimization_item = OptimizeItem(vector_problem, self.desc, [self.suggestion]) + result.add(OptimizeRecord(optimization_item)) + headers = [ + "Type", + "Description and Suggestion", + ] + result.add_detail(vector_problem, headers=headers) + for vector_opti_issue in self.result["vector"][0]: + opti_sugg = self._OPTI_SUGGESTION.format(**vector_opti_issue) + vector_desc["opti"] += opti_sugg + result.add_detail(vector_problem, detail=["性能优化算子集合", vector_desc["opti"]]) + for vector_bound_issue in self.result["vector"][1]: + bound_sugg = self._BOUND_SUGGESTION.format(**vector_bound_issue) + vector_desc["bound"] += bound_sugg + result.add_detail(vector_problem, detail=["bound算子集合", vector_desc["bound"]]) + return True + + +def make_render(self, html_render, add_render_list=True, **kwargs): + if not self.ai_core_performance_issues: + return self.ai_core_performance_issues + + priority = kwargs.get("priority") + return html_render.render_template(key="computation", + template_dir="templates", + template_name="ai_core_performance.html", + format_result=self.result, + add_render_list=add_render_list, + priority_background_color=priority, + rank=kwargs.get("rank")) + + +def check_task_dict(self, profiling_dataset: ProfilingDataset) -> bool: + if not hasattr(profiling_dataset, "op_summary"): + logger.warning("Skip %s checker because of not containing %s", self._CHECKER, "op summary") + return False + if not hasattr(profiling_dataset.op_summary, "op_list"): + logger.warning("Skip %s checker because of not containing %s", self._CHECKER, "op_list") + return False + return True -- Gitee From d0e72de18d3164e9019156b7880ef451a0d55bc0 Mon Sep 17 00:00:00 2001 From: kiritorl Date: Mon, 20 Jan 2025 15:16:02 +0800 Subject: [PATCH 18/72] =?UTF-8?q?=E9=80=82=E9=85=8D=E8=8B=B1=E6=96=87?= =?UTF-8?q?=E7=89=88=E8=BE=93=E5=87=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ai_core_performance_checker.py | 54 ++++++----- .../html/templates/ai_core_performance.html | 90 ++++++++++++------- .../advisor/rules/cn/aicore_performance.yaml | 17 +++- .../advisor/rules/en/aicore_performance.yaml | 17 +++- profiler/cli/entrance.py | 4 +- 5 files changed, 116 insertions(+), 66 deletions(-) diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py index e7267fdaf..e6c6e382b 100644 --- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py +++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py @@ -57,9 +57,19 @@ class AICorePerformanceChecker: if not os.path.exists(rule_path): logger.warning("Skip analyze aicpu issues, because %s does not exist.", rule_path) + self.language = language self.aicore_rules = FileManager.read_yaml_file(rule_path) - self._PROBLEM = self.aicore_rules.get("problem") + self._CUBE_PROBLEM = self.aicore_rules.get("cube_problem") + self._FA_PROBLEM = self.aicore_rules.get("fa_problem") + self._VECTOR_PROBLEM = self.aicore_rules.get("vector_problem") self.desc = self.aicore_rules.get("description") + self._BOUND_DESC = self.aicore_rules.get("bound_description") + self._OPTI_DESC = self.aicore_rules.get("optimization_description") + self._AFFINITY_DESC = self.aicore_rules.get("affinity_description") + self._CUBE_AFFINITY_DESC = self.aicore_rules.get("cube_affinity_desc") + self._FA_AFFINITY_DESC_TYPE1 = self.aicore_rules.get("fa_affinity_desc_type1") + self._FA_AFFINITY_DESC_TYPE2 = self.aicore_rules.get("fa_affinity_desc_type2") + self._FA_AFFINITY_DESC_TYPE3 = self.aicore_rules.get("fa_affinity_desc_type3") self.suggestion = self.aicore_rules.get("suggestion") self._AFFINITY_SUGGESTION = self.aicore_rules.get("affinity_suggestion") self._BOUND_SUGGESTION = self.aicore_rules.get("bound_suggestion") @@ -134,7 +144,7 @@ class AICorePerformanceChecker: operator_list = [op for op in profiling_dataset.op_summary.op_list if op.op_name in cube_dict and op.input_shapes[1:-1] + "-" + op.output_shapes[1:-1] in cube_dict[op.op_name]] - suggestion = "内轴无法被256整除" + suggestion = self._CUBE_AFFINITY_DESC for op in cube_dict: for shape in cube_dict[op]: dtype = None @@ -211,7 +221,7 @@ class AICorePerformanceChecker: # 处理变长算子 如果不亲和则affinity_flag为False if shape.split("-")[0].split(";")[0].split(",")[2] % 128 != 0: affinity_flag = True - suggestion = "D不能被128整除" + suggestion = self._FA_AFFINITY_DESC_TYPE1 for operator in fa_list: if (operator.op_name == op and operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape): @@ -228,13 +238,13 @@ class AICorePerformanceChecker: head_dim = int(input_first_tensor[3]) if head_dim % 128 != 0 and seq_len % 128 != 0: affinity_flag = True - suggestion = "D和S均不能被128整除" + suggestion = self._FA_AFFINITY_DESC_TYPE3 elif head_dim % 128 != 0: affinity_flag = True - suggestion = "D不能被128整除" + suggestion = self._FA_AFFINITY_DESC_TYPE1 elif seq_len % 128 != 0: affinity_flag = True - suggestion = "S不能被128整除" + suggestion = self._FA_AFFINITY_DESC_TYPE2 if affinity_flag: for operator in fa_list: if (operator.op_name == op and @@ -365,71 +375,68 @@ class AICorePerformanceChecker: if not self.ai_core_performance_issues: return self.ai_core_performance_issues - cube_problem = "Cube算子性能分析" - fa_problem = "FA算子性能分析" - vector_problem = "Vector算子性能分析" sugg_keys = ['opti', 'bound', 'affinity'] cube_desc = dict.fromkeys(sugg_keys, "") fa_desc = dict.fromkeys(sugg_keys, "") vector_desc = dict.fromkeys(sugg_keys, "") if self.result["cube"]: - optimization_item = OptimizeItem(cube_problem, self.desc, [self.suggestion]) + optimization_item = OptimizeItem(self._CUBE_PROBLEM, self.desc, [self.suggestion]) result.add(OptimizeRecord(optimization_item)) headers = [ "Type", "Description and Suggestion", ] - result.add_detail(cube_problem, headers=headers) + result.add_detail(self._CUBE_PROBLEM, headers=headers) for cube_opti_issue in self.result["cube"][0]: opti_sugg = self._OPTI_SUGGESTION.format(**cube_opti_issue) cube_desc["opti"] += opti_sugg - result.add_detail(cube_problem, detail=["性能优化算子集合", cube_desc["opti"]]) + result.add_detail(self._CUBE_PROBLEM, detail=[self._OPTI_DESC, cube_desc["opti"]]) for cube_bound_issue in self.result["cube"][1]: bound_sugg = self._BOUND_SUGGESTION.format(**cube_bound_issue) cube_desc["bound"] += bound_sugg - result.add_detail(cube_problem, detail=["bound算子集合", cube_desc["bound"]]) + result.add_detail(self._CUBE_PROBLEM, detail=[self._BOUND_DESC, cube_desc["bound"]]) for cube_affinity_issue in self.result["cube"][2]: affinity_sugg = self._AFFINITY_SUGGESTION.format(**cube_affinity_issue) cube_desc["affinity"] += affinity_sugg - result.add_detail(cube_problem, detail=["不亲和算子集合", cube_desc["affinity"]]) + result.add_detail(self._CUBE_PROBLEM, detail=[self._AFFINITY_DESC, cube_desc["affinity"]]) if self.result["fa"]: - optimization_item = OptimizeItem(cube_problem, self.desc, [self.suggestion]) + optimization_item = OptimizeItem(self._FA_PROBLEM, self.desc, [self.suggestion]) result.add(OptimizeRecord(optimization_item)) headers = [ "Type", "Description and Suggestion", ] - result.add_detail(fa_problem, headers=headers) + result.add_detail(self._FA_PROBLEM, headers=headers) for fa_opti_issue in self.result["fa"][0]: opti_sugg = self._OPTI_SUGGESTION.format(**fa_opti_issue) fa_desc["opti"] += opti_sugg - result.add_detail(fa_problem, detail=["性能优化算子集合", fa_desc["opti"]]) + result.add_detail(self._FA_PROBLEM, detail=[self._OPTI_DESC, fa_desc["opti"]]) for fa_bound_issue in self.result["fa"][1]: bound_sugg = self._BOUND_SUGGESTION.format(**fa_bound_issue) fa_desc["bound"] += bound_sugg - result.add_detail(fa_problem, detail=["bound算子集合", fa_desc["bound"]]) + result.add_detail(self._FA_PROBLEM, detail=[self._BOUND_DESC, fa_desc["bound"]]) for fa_affinity_issue in self.result["fa"][2]: affinity_sugg = self._AFFINITY_SUGGESTION.format(**fa_affinity_issue) fa_desc["affinity"] += affinity_sugg - result.add_detail(fa_problem, detail=["不亲和算子集合", fa_desc["affinity"]]) + result.add_detail(self._FA_PROBLEM, detail=[self._AFFINITY_DESC, fa_desc["affinity"]]) if self.result["vector"]: - optimization_item = OptimizeItem(vector_problem, self.desc, [self.suggestion]) + optimization_item = OptimizeItem(self._VECTOR_PROBLEM, self.desc, [self.suggestion]) result.add(OptimizeRecord(optimization_item)) headers = [ "Type", "Description and Suggestion", ] - result.add_detail(vector_problem, headers=headers) + result.add_detail(self._VECTOR_PROBLEM, headers=headers) for vector_opti_issue in self.result["vector"][0]: opti_sugg = self._OPTI_SUGGESTION.format(**vector_opti_issue) vector_desc["opti"] += opti_sugg - result.add_detail(vector_problem, detail=["性能优化算子集合", vector_desc["opti"]]) + result.add_detail(self._VECTOR_PROBLEM, detail=[self._OPTI_DESC, vector_desc["opti"]]) for vector_bound_issue in self.result["vector"][1]: bound_sugg = self._BOUND_SUGGESTION.format(**vector_bound_issue) vector_desc["bound"] += bound_sugg - result.add_detail(vector_problem, detail=["bound算子集合", vector_desc["bound"]]) + result.add_detail(self._VECTOR_PROBLEM, detail=[self._BOUND_DESC, vector_desc["bound"]]) return True def make_render(self, html_render, add_render_list=True, **kwargs): @@ -441,6 +448,7 @@ class AICorePerformanceChecker: template_dir="templates", template_name="ai_core_performance.html", format_result=self.result, + language=self.language, add_render_list=add_render_list, priority_background_color=priority, rank=kwargs.get("rank")) diff --git a/profiler/advisor/display/html/templates/ai_core_performance.html b/profiler/advisor/display/html/templates/ai_core_performance.html index 7feb3e768..48e62ad6c 100644 --- a/profiler/advisor/display/html/templates/ai_core_performance.html +++ b/profiler/advisor/display/html/templates/ai_core_performance.html @@ -2,132 +2,156 @@

AI CORE Performance Analysis

+ {% if language == "cn" %} + {% set title_ns = namespace(type='类别', desc='描述及建议', opti_set='性能优化算子集合', bound_set='bound算子集合', affinity_set='不亲和算子集合', + opti_refer=' 参考性能优化空间: ', bound_refer=' bound类型为: ', affinity_refer=' 不亲和类型为: ', title_desc='算子相关分析,参考如下: ') %} + {% else %} + {% set title_ns = namespace(type='Type', desc='Description and Suggestion', opti_set='set of performance optimization operators', + bound_set='set of bound operators', affinity_set='set of unaffine operators', opti_refer=' refer to Performance Optimization Space: ', + bound_refer=' bound type: ', affinity_refer=' type of disaffinity: ', title_desc=' Operator related analysis, referenced below: ') %} + {% endif %} {% if format_result.cube is not none %} - MatMul算子相关分析,参考如下: + MatMul{{ title_ns.title_desc }}
- - + + {% set opti_ns = namespace(total_opti='') %} {% for opti in format_result.cube[0] %} {% if not loop.first %} - {% set opti_ns.total_opti = opti_ns.total_opti ~ "
" ~ opti.op_name ~ "算子 shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ "参考性能优化空间: " ~ opti.optimization %} + {% set opti_ns.total_opti = opti_ns.total_opti ~ "
" ~ opti.op_name ~ "operator shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ title_ns.opti_refer ~ opti.optimization %} {% else %} - {% set opti_ns.total_opti = opti.op_name ~ "算子 shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ "参考性能优化空间: " ~ opti.optimization %} + {% set opti_ns.total_opti = opti.op_name ~ "operator shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ title_ns.opti_refer ~ opti.optimization %} {% endif %} {% endfor %} + {% if opti_ns.total_opti|length > 0 %} - + + {% endif %} {% set bound_ns = namespace(total_bound='') %} {% for bound in format_result.cube[1] %} {% if not loop.first %} - {% set bound_ns.total_bound = bound_ns.total_bound ~ "
" ~ bound.op_name ~ "算子 shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ "bound类型为: " ~ bound.bound %} + {% set bound_ns.total_bound = bound_ns.total_bound ~ "
" ~ bound.op_name ~ "operator shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ title_ns.bound_refer ~ bound.bound %} {% else %} - {% set bound_ns.total_bound = bound.op_name ~ "算子 shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ "bound类型为: " ~ bound.bound %} + {% set bound_ns.total_bound = bound.op_name ~ "operator shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ title_ns.bound_refer ~ bound.bound %} {% endif %} {% endfor %} + {% if bound_ns.total_bound|length > 0 %} - + + {% endif %} {% set affinity_ns = namespace(total_affinity='') %} {% for affinity in format_result.cube[2] %} {% if not loop.first %} - {% set affinity_ns.total_affinity = affinity_ns.total_affinity ~ "
" ~ affinity.op_name ~ "算子 shape: " ~ affinity.shape ~ " dtype: " ~ affinity.dtype ~ "不亲和类型为: " ~ affinity.suggestion %} + {% set affinity_ns.total_affinity = affinity_ns.total_affinity ~ "
" ~ affinity.op_name ~ "operator shape: " ~ affinity.shape ~ " dtype: " ~ affinity.dtype ~ title_ns.affinity_refer ~ affinity.suggestion %} {% else %} - {% set affinity_ns.total_affinity = affinity.op_name ~ "算子 shape: " ~ affinity.shape ~ " dtype: " ~ affinity.dtype ~ "不亲和类型为: " ~ affinity.suggestion %} + {% set affinity_ns.total_affinity = affinity.op_name ~ "operator shape: " ~ affinity.shape ~ " dtype: " ~ affinity.dtype ~ title_ns.affinity_refer ~ affinity.suggestion %} {% endif %} {% endfor %} + {% if affinity_ns.total_affinity|length > 0 %} - + + {% endif %}
类别描述及建议{{ title_ns.type }}{{ title_ns.desc }}
性能优化算子集合{{ title_ns.opti_set }} {{ opti_ns.total_opti | safe }}
bound算子集合{{ title_ns.bound_set }} {{ bound_ns.total_bound | safe }}
bound算子集合{{ title_ns.affinity_set }} {{ affinity_ns.total_affinity | safe }}
{% endif %} {% if format_result.fa is not none %} - FA算子相关分析,参考如下: + FA{{ title_ns.title_desc }}
- - + + {% set opti_ns = namespace(total_opti='') %} {% for opti in format_result.fa[0] %} {% if not loop.first %} - {% set opti_ns.total_opti = opti_ns.total_opti ~ "
" ~ opti.op_name ~ "算子 shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ "参考性能优化空间: " ~ opti.optimization %} + {% set opti_ns.total_opti = opti_ns.total_opti ~ "
" ~ opti.op_name ~ "operator shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ title_ns.opti_refer ~ opti.optimization %} {% else %} - {% set opti_ns.total_opti = opti.op_name ~ "算子 shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ "参考性能优化空间: " ~ opti.optimization %} + {% set opti_ns.total_opti = opti.op_name ~ "operator shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ title_ns.opti_refer ~ opti.optimization %} {% endif %} {% endfor %} + {% if opti_ns.total_opti|length > 0 %} - + + {% endif %} {% set bound_ns = namespace(total_bound='') %} {% for bound in format_result.fa[1] %} {% if not loop.first %} - {% set bound_ns.total_bound = bound_ns.total_bound ~ "
" ~ bound.op_name ~ "算子 shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ "bound类型为: " ~ bound.bound %} + {% set bound_ns.total_bound = bound_ns.total_bound ~ "
" ~ bound.op_name ~ "operator shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ title_ns.bound_refer ~ bound.bound %} {% else %} - {% set bound_ns.total_bound = bound.op_name ~ "算子 shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ "bound类型为: " ~ bound.bound %} + {% set bound_ns.total_bound = bound.op_name ~ "operator shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ title_ns.bound_refer ~ bound.bound %} {% endif %} {% endfor %} + {% if bound_ns.total_bound|length > 0 %} - + + {% endif %} {% set affinity_ns = namespace(total_affinity='') %} {% for affinity in format_result.fa[2] %} {% if not loop.first %} - {% set affinity_ns.total_affinity = affinity_ns.total_affinity ~ "
" ~ affinity.op_name ~ "算子 shape: " ~ affinity.shape ~ " dtype: " ~ affinity.dtype ~ "不亲和类型为: " ~ affinity.suggestion %} + {% set affinity_ns.total_affinity = affinity_ns.total_affinity ~ "
" ~ affinity.op_name ~ "operator shape: " ~ affinity.shape ~ " dtype: " ~ affinity.dtype ~ title_ns.affinity_refer ~ affinity.suggestion %} {% else %} - {% set affinity_ns.total_affinity = affinity.op_name ~ "算子 shape: " ~ affinity.shape ~ " dtype: " ~ affinity.dtype ~ "不亲和类型为: " ~ affinity.suggestion %} + {% set affinity_ns.total_affinity = affinity.op_name ~ "operator shape: " ~ affinity.shape ~ " dtype: " ~ affinity.dtype ~ title_ns.affinity_refer ~ affinity.suggestion %} {% endif %} {% endfor %} + {% if affinity_ns.total_affinity|length > 0 %} - + + {% endif %}
类别描述及建议{{ title_ns.type }}{{ title_ns.desc }}
性能优化算子集合{{ title_ns.opti_set }} {{ opti_ns.total_opti | safe }}
bound算子集合{{ title_ns.bound_set }} {{ bound_ns.total_bound | safe }}
不亲和算子集合{{ title_ns.affinity_set }} {{ affinity_ns.total_affinity | safe }}
{% endif %} {% if format_result.cube is not none %} - Vector算子相关分析,参考如下: + Vector{{ title_ns.title_desc }}
- - + + {% set opti_ns = namespace(total_opti='') %} {% for opti in format_result.vector[0] %} {% if not loop.first %} - {% set opti_ns.total_opti = opti_ns.total_opti ~ "
" ~ opti.op_name ~ "算子 shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ "参考性能优化空间: " ~ opti.optimization %} + {% set opti_ns.total_opti = opti_ns.total_opti ~ "
" ~ opti.op_name ~ "operator shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ title_ns.opti_refer ~ opti.optimization %} {% else %} - {% set opti_ns.total_opti = opti.op_name ~ "算子 shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ "参考性能优化空间: " ~ opti.optimization %} + {% set opti_ns.total_opti = opti.op_name ~ "operator shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ title_ns.opti_refer ~ opti.optimization %} {% endif %} {% endfor %} + {% if opti_ns.total_opti|length > 0 %} - + + {% endif %} {% set bound_ns = namespace(total_bound='') %} {% for bound in format_result.vector[1] %} {% if not loop.first %} - {% set bound_ns.total_bound = bound_ns.total_bound ~ "
" ~ bound.op_name ~ "算子 shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ "bound类型为: " ~ bound.bound %} + {% set bound_ns.total_bound = bound_ns.total_bound ~ "
" ~ bound.op_name ~ "operator shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ title_ns.bound_refer ~ bound.bound %} {% else %} - {% set bound_ns.total_bound = bound.op_name ~ "算子 shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ "bound类型为: " ~ bound.bound %} + {% set bound_ns.total_bound = bound.op_name ~ "operator shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ title_ns.bound_refer ~ bound.bound %} {% endif %} {% endfor %} + {% if bound_ns.total_bound|length > 0 %} - + + {% endif %}
类别描述及建议{{ title_ns.type }}{{ title_ns.desc }}
性能优化算子集合{{ title_ns.opti_set }} {{ opti_ns.total_opti | safe }}
bound算子集合{{ title_ns.bound_set }} {{ bound_ns.total_bound | safe }}
{% endif %}
diff --git a/profiler/advisor/rules/cn/aicore_performance.yaml b/profiler/advisor/rules/cn/aicore_performance.yaml index 60d813e1d..f00f0a4b7 100644 --- a/profiler/advisor/rules/cn/aicore_performance.yaml +++ b/profiler/advisor/rules/cn/aicore_performance.yaml @@ -1,6 +1,15 @@ -problem: "AICORE算子" +cube_problem: "Cube算子性能分析" +fa_problem: "FA算子性能分析" +vector_problem: "Vector算子性能分析" description: "提供一些AICORE算子的参考瓶颈" +bound_description: "bound算子集合" +optimization_description: "性能优化算子集合" +affinity_description: "不亲和算子集合" +cube_affinity_desc: "内轴无法被256整除" +fa_affinity_desc_type1: "D不能被128整除" +fa_affinity_desc_type2: "S不能被128整除" +fa_affinity_desc_type3: "D和S均不能被128整除" suggestion: "请根据亲和性、bound类型或优化空间尝试分析筛选出来的算子" -affinity_suggestion: "{op_name}算子 shape{shape} dtype{dtype} 有不亲和特征: {suggestion}\n" -bound_suggestion: "{op_name}算子 shape{shape} dtype{dtype} bound类型为: {bound} bound\n" -optimization_suggestion: "{op_name}算子 shape{shape} dtype{dtype} 疑似有性能优化空间,参考性能优化空间{optimization}\n" \ No newline at end of file +affinity_suggestion: "{op_name}算子 shape: {shape} dtype: {dtype} 有不亲和特征: {suggestion}\n" +bound_suggestion: "{op_name}算子 shape: {shape} dtype: {dtype} bound类型为: {bound} bound\n" +optimization_suggestion: "{op_name}算子 shape: {shape} dtype: {dtype} 疑似有性能优化空间,参考性能优化空间: {optimization}\n" \ No newline at end of file diff --git a/profiler/advisor/rules/en/aicore_performance.yaml b/profiler/advisor/rules/en/aicore_performance.yaml index 247022214..28f52f1ed 100644 --- a/profiler/advisor/rules/en/aicore_performance.yaml +++ b/profiler/advisor/rules/en/aicore_performance.yaml @@ -1,6 +1,15 @@ -problem: "AICORE Operator" +cube_problem: "Cube operator performance analysis" +fa_problem: "FA operator performance analysis" +vector_problem: "Vector operator performance analysis" description: "Provide some reference bottlenecks for the AICORE operator" +bound_description: "set of bound operators" +optimization_description: "set of performance optimization operators" +affinity_description: "set of unaffine operators" +cube_affinity_desc: "Then inner axis is not divisible by 256" +fa_affinity_desc_type1: "D is not divisible by 128" +fa_affinity_desc_type2: "S is not divisible by 128" +fa_affinity_desc_type3: "Neither D nor S is not divisible by 128" suggestion: "Please try to analyze the filtered operators based on affinity, bound type or optimization space" -affinity_suggestion: "{op_name} Op shape{shape} dtype{dtype} with disaffection characteristics: {suggestion}\n" -bound_suggestion: "{op_name} Op shape{shape} dtype{dtype} bound type: {bound} bound\n" -optimization_suggestion: "{op_name} Op shape{shape} dtype{dtype} suspect there is room for performance optimization, refer to Performance Optimization Space: {optimization}\n" \ No newline at end of file +affinity_suggestion: "{op_name} Op shape: {shape} dtype: {dtype} with disaffection characteristics: {suggestion}\n" +bound_suggestion: "{op_name} Op shape: {shape} dtype: {dtype} bound type: {bound} bound\n" +optimization_suggestion: "{op_name} Op shape: {shape} dtype: {dtype} suspect there is room for performance optimization, refer to Performance Optimization Space: {optimization}\n" \ No newline at end of file diff --git a/profiler/cli/entrance.py b/profiler/cli/entrance.py index fa7d2421f..89ac8187d 100644 --- a/profiler/cli/entrance.py +++ b/profiler/cli/entrance.py @@ -69,7 +69,7 @@ msprof_analyze_cli.add_command(auto_complete_cli, name="auto-completion") if __name__ == "__main__": msprof_analyze_cli.main( [ - "advisor","computation","-d", - r"E:\B站\910b-33f-cpsp4-add_contiguous\train-2184159-master-0_1058382_20240910063706363_ascend_pt","-l","cn" + "analyze","all","-d", + r"D:\data\file","-l","cn" ] ) -- Gitee From 295e6ee731585815c0a49adb9bcfc470377cd315 Mon Sep 17 00:00:00 2001 From: xubanxia <904691018@qq.com> Date: Mon, 20 Jan 2025 15:36:34 +0800 Subject: [PATCH 19/72] =?UTF-8?q?=E9=94=99=E8=AF=AF=E4=BF=AE=E5=A4=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ai_core_performance/ai_core_performance_checker.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py index 2baa00d19..5b8e14550 100644 --- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py +++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py @@ -133,12 +133,9 @@ class AICorePerformanceChecker: """ :Param profiling_dataset: dataset of operator performance from kernel_details.csv """ - if self.cube_dict: - self.result["cube"] = self.check_cube_operator(promoting_dataset) - if self.fa_dict: - self.result["fa"] = self.check_fa_operator(promoting_dataset) - if self.vector_dict: - self.result["vector"] = self.check_vector_operator(promoting_dataset) + self.result["cube"] = self.check_cube_operator(promoting_dataset) + self.result["fa"] = self.check_fa_operator(promoting_dataset) + self.result["vector"] = self.check_vector_operator(promoting_dataset) def check_cube_operator(self, profiling_dataset: ProfilingDataset): cube_dict = self.cube_dict -- Gitee From 2313499b7ad30c325f21acc62db3090c74c56d91 Mon Sep 17 00:00:00 2001 From: xubanxia <904691018@qq.com> Date: Mon, 20 Jan 2025 16:18:06 +0800 Subject: [PATCH 20/72] =?UTF-8?q?=E9=94=99=E8=AF=AF=E4=BF=AE=E5=A4=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ai_core_performance_checker.py | 27 ++++++++++--------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py index 5b8e14550..bbf8b3b1a 100644 --- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py +++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py @@ -116,17 +116,19 @@ class AICorePerformanceChecker: @staticmethod def memory_size(operator): - input_shapes = operator.input_shapes[1:-1].split(";") memory = 0 - if len(input_shapes[0].split(",")) == 4: - memory = sum(int(shape[0]) * int(shape[1]) * int(shape[2]) * int(shape[3]) - for shape in (shapes.split(",") for shapes in input_shapes)) - output_shape = operator.output_shapes[1:-1].split(",") - memory += (int(output_shape[0]) * int(output_shape[1]) * int(output_shape[2]) * int(output_shape[3])) - else: - memory += sum(int(shape[0]) * int(shape[1]) for shape in (shapes.split(",") for shapes in input_shapes)) - output_shape = operator.output_shapes[1:-1].split(",") - memory += (int(output_shape[0]) * int(output_shape[1])) + input_shapes = operator.input_shapes[1:-1].split(";") + for shapes in input_shapes: + start = 1 + for shape in shapes.split(","): + start *= int(shape) + memory += start + + output_shape = operator.output_shapes[1:-1].split(",") + start = 1 + for shapes in output_shape: + start *= int(shapes) + memory += int(start) return memory * 2 / 1024 / 1024 def check_ai_core_performance(self, promoting_dataset: ProfilingDataset): @@ -471,8 +473,7 @@ class AICorePerformanceChecker: if not hasattr(profiling_dataset, "op_summary"): logger.warning("Skip %s checker because of not containing %s", self._CHECKER, "op summary") return False - if not hasattr(profiling_dataset.op_summary, "task_dict") or not hasattr(profiling_dataset.op_summary, - "op_list"): - logger.warning("Skip %s checker because of not containing %s", self._CHECKER, "op summary") + if not not hasattr(profiling_dataset.op_summary, "op_list"): + logger.warning("Skip %s checker because of not containing %s", self._CHECKER, "op_list") return False return True -- Gitee From 078be76380bf4ff0db578c7e5e52fd8180aa769f Mon Sep 17 00:00:00 2001 From: xubanxia <904691018@qq.com> Date: Mon, 20 Jan 2025 16:22:19 +0800 Subject: [PATCH 21/72] =?UTF-8?q?=E9=94=99=E8=AF=AF=E4=BF=AE=E5=A4=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ai_core_performance/ai_core_performance_checker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py index bbf8b3b1a..8bdc92c80 100644 --- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py +++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py @@ -473,7 +473,7 @@ class AICorePerformanceChecker: if not hasattr(profiling_dataset, "op_summary"): logger.warning("Skip %s checker because of not containing %s", self._CHECKER, "op summary") return False - if not not hasattr(profiling_dataset.op_summary, "op_list"): + if not hasattr(profiling_dataset.op_summary, "op_list"): logger.warning("Skip %s checker because of not containing %s", self._CHECKER, "op_list") return False return True -- Gitee From 4718bb7517d7ca0b898817b127b52cf39b2b02dc Mon Sep 17 00:00:00 2001 From: xubanxia <904691018@qq.com> Date: Mon, 20 Jan 2025 16:25:56 +0800 Subject: [PATCH 22/72] =?UTF-8?q?=E9=94=99=E8=AF=AF=E4=BF=AE=E5=A4=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ai_core_performance/ai_core_performance_analyzer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py index 03b0a8c6e..89b6be779 100644 --- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py +++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py @@ -51,4 +51,4 @@ class AICorePerformanceAnalyzer(BaseAnalyzer): return self.result def get_priority(self, max_mem_op_dur=None): - return PriorityBackgroundColor.high # html 底色设置 \ No newline at end of file + return PriorityBackgroundColor.low \ No newline at end of file -- Gitee From 4dee540ee5d220afa742c46be7da04f862d45605 Mon Sep 17 00:00:00 2001 From: xubanxia <904691018@qq.com> Date: Mon, 20 Jan 2025 16:50:43 +0800 Subject: [PATCH 23/72] =?UTF-8?q?=E5=86=85=E5=AD=98=E8=AE=A1=E7=AE=97?= =?UTF-8?q?=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ai_core_performance/ai_core_performance_checker.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py index 8bdc92c80..647ef0c7f 100644 --- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py +++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py @@ -119,6 +119,10 @@ class AICorePerformanceChecker: memory = 0 input_shapes = operator.input_shapes[1:-1].split(";") for shapes in input_shapes: + if not "," in shapes: + # 多的一维是 bias ,预先乘2 + memory += int (shapes) * 2 + continue start = 1 for shape in shapes.split(","): start *= int(shape) -- Gitee From 61390688f533d13b6f53abc6448243d66e809b54 Mon Sep 17 00:00:00 2001 From: xubanxia <904691018@qq.com> Date: Mon, 20 Jan 2025 17:07:52 +0800 Subject: [PATCH 24/72] =?UTF-8?q?=E4=BB=A3=E7=A0=81=E8=B0=83=E6=95=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ai_core_performance/ai_core_performance_checker.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py index 647ef0c7f..f70a3c815 100644 --- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py +++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py @@ -16,7 +16,6 @@ import logging import os from profiler.advisor.dataset.profiling.profiling_dataset import ProfilingDataset -from profiler.advisor.display.prompt.base_prompt import BasePrompt from profiler.advisor.result.item import OptimizeItem, OptimizeRecord from profiler.advisor.result.result import OptimizeResult from profiler.prof_common.additional_args_manager import AdditionalArgsManager @@ -119,7 +118,7 @@ class AICorePerformanceChecker: memory = 0 input_shapes = operator.input_shapes[1:-1].split(";") for shapes in input_shapes: - if not "," in shapes: + if not "," in shapes and shapes != "": # 多的一维是 bias ,预先乘2 memory += int (shapes) * 2 continue @@ -184,7 +183,6 @@ class AICorePerformanceChecker: "dtype": dtype, "duration": shape_duration, "suggestion": suggestion}) - continue else: shap_list = [operator for operator in operator_list if operator.op_name == op and @@ -281,7 +279,6 @@ class AICorePerformanceChecker: "dtype": dtype, "suggestion": suggestion, "duration": shape_duration}) - continue else: # 处理bound算子和优化算子 aiv_vec_ratio, aic_fixpipe_ratio, aic_mte2_ratio, optimization = 0., 0., 0., 0. -- Gitee From 4fe9ffd3d761152213dc933dbf79986aef8a0744 Mon Sep 17 00:00:00 2001 From: xubanxia <904691018@qq.com> Date: Mon, 20 Jan 2025 18:26:17 +0800 Subject: [PATCH 25/72] =?UTF-8?q?=E6=B8=85=E7=90=86=E6=97=A0=E7=94=A8?= =?UTF-8?q?=E5=86=85=E5=AE=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../advisor/analyzer/analyzer_controller.py | 33 ++++++++++--------- profiler/advisor/analyzer/base_analyzer.py | 8 ++--- .../ai_core_performance_checker.py | 10 +++--- .../computation/profiling_analyzer.py | 2 +- 4 files changed, 26 insertions(+), 27 deletions(-) diff --git a/profiler/advisor/analyzer/analyzer_controller.py b/profiler/advisor/analyzer/analyzer_controller.py index e8a62c69d..1a5a28b63 100644 --- a/profiler/advisor/analyzer/analyzer_controller.py +++ b/profiler/advisor/analyzer/analyzer_controller.py @@ -186,6 +186,7 @@ class AnalyzerController: return True + @staticmethod def _get_step_rank_for_cluster_statistic_diff(target_cluster_statistic_data, benchmark_cluster_statistic_data, headers, dimension, get_max=False): @@ -255,10 +256,10 @@ class AnalyzerController: return dimensions, AsyncParams.user_total_params def do_analysis(self, dimensions, **kwargs): - pid = os.getpid() # 获取当前进程的pid + pid = os.getpid() resp = {"id": pid} - self.args_manager = AdditionalArgsManager() # 初始化参数管理器 - self.args_manager.init(kwargs) # 初始化参数管理器 + self.args_manager = AdditionalArgsManager() + self.args_manager.init(kwargs) output_path = kwargs.get("output_path") AnalyzerController._set_analysis_process_priority(pid) @@ -277,9 +278,9 @@ class AnalyzerController: PathManager.make_dir_safety(output_path) Config().set_config("_work_path", output_path) - Config().set_log_path(f"mstt_advisor_{Timer().strftime}.xlsx") # 设置日志路径 + Config().set_log_path(f"mstt_advisor_{Timer().strftime}.xlsx") - self._do_analysis(dimensions, pid=pid, async_resp=resp, **kwargs) # 执行分析 + self._do_analysis(dimensions, pid=pid, async_resp=resp, **kwargs) except Exception as e: self._update_analysis_process_resp(pid, resp, status_code=AsyncAnalysisStatus.INNER_ERROR_STATUS_CODE, status=AsyncAnalysisStatus.FAILED, error_msg=str(e)) @@ -611,8 +612,8 @@ class AnalyzerController: return job_list def _do_analysis(self, dimensions, pid=0, async_resp=None, **kwargs): - self.dimensions = dimensions # 设置分析维度 - self.kwargs = kwargs # 设置分析参数 + self.dimensions = dimensions + self.kwargs = kwargs result_list = [] profiling_path = PathManager.get_realpath(self.kwargs.get("profiling_path")) benchmark_profiling_path = self.kwargs.get("benchmark_profiling_path") @@ -621,7 +622,7 @@ class AnalyzerController: benchmark_profiling_path = PathManager.get_realpath(benchmark_profiling_path) PathManager.check_path_owner_consistent([benchmark_profiling_path]) - if not self._check_profiling_path_valid(profiling_path): # 检查profiling路径是否有效 + if not self._check_profiling_path_valid(profiling_path): error_msg = f"Got invalid argument '-d/--profiling_path' {profiling_path}, skip analysis" self._update_analysis_process_resp(pid, async_resp, error_msg=error_msg, status_code=AsyncAnalysisStatus.BAD_REQUEST_STATUS_CODE, @@ -629,8 +630,8 @@ class AnalyzerController: logger.error(error_msg) return - if benchmark_profiling_path and not self._check_profiling_path_valid( - benchmark_profiling_path): # 检查benchmark_profiling路径是否有效 + + if benchmark_profiling_path and not self._check_profiling_path_valid(benchmark_profiling_path): error_msg = (f"Got invalid argument '-bp/--benchmark_profiling_path' {benchmark_profiling_path}, " f"skip analysis") self._update_analysis_process_resp(pid, async_resp, error_msg=error_msg, @@ -639,7 +640,7 @@ class AnalyzerController: logger.error(error_msg) return - self._is_cluster = self._is_cluster_profiling(profiling_path) # 判断是否是集群profiling + self._is_cluster = self._is_cluster_profiling(profiling_path) if benchmark_profiling_path: # 构建benchmark profiling的map,用于根据rank获取profiling路径,否则无法进行比对 is_benchmark_cluster = self._is_cluster_profiling(benchmark_profiling_path) @@ -654,16 +655,16 @@ class AnalyzerController: return if not self._is_cluster: - job_list = self.single_rank_analysis(profiling_path, benchmark_profiling_path) # 单卡分析 + job_list = self.single_rank_analysis(profiling_path, benchmark_profiling_path) else: self.slow_rank_analyzer = SlowRankAnalyzer(profiling_path, output_path=self.kwargs.get("output_path")) self.slow_link_analyzer = SlowLinkAnalyzer(profiling_path, output_path=self.kwargs.get("output_path")) - job_list = self.do_cluster_analysis(profiling_path, benchmark_profiling_path) # 集群分析 + job_list = self.do_cluster_analysis(profiling_path, benchmark_profiling_path) - for i, (dimension, scope, interface, kwargs) in enumerate(job_list[::-1]): # dimension: 分析维度,scope: 分析器 + for i, (dimension, scope, interface, kwargs) in enumerate(job_list[::-1]): result_list.append( - # 获取分析结果 - interface.get_result(dimension, scope, render_html=i == len(job_list) - 1, output_dict=False, **kwargs) + interface.get_result(dimension, scope, render_html=i == len(job_list) - 1, output_dict=False, + **kwargs) ) for result in result_list[::-1]: diff --git a/profiler/advisor/analyzer/base_analyzer.py b/profiler/advisor/analyzer/base_analyzer.py index adf82ab8a..0391eb88a 100644 --- a/profiler/advisor/analyzer/base_analyzer.py +++ b/profiler/advisor/analyzer/base_analyzer.py @@ -105,7 +105,7 @@ class BaseAnalyzer(VersionControl, metaclass=ABCMeta): def get_priority(self, max_mem_op_dur): pass - def identify_profiling_type(self, profiling_type_list): # 确定分析类型 + def identify_profiling_type(self, profiling_type_list): profiling_type = None if self.collection_path.endswith(ASCEND_MS): profiling_type = [elem for elem in profiling_type_list if Constant.MINDSPORE in elem][0] @@ -134,7 +134,7 @@ class BaseAnalyzer(VersionControl, metaclass=ABCMeta): profiling_type = profiling_type_list[0] return profiling_type - def identify_profiling_version(self): # 确定分析版本 + def identify_profiling_version(self): profiling_version = "" if Constant.MINDSPORE in self.profiling_type: ascend_dirs = [] @@ -166,7 +166,7 @@ class BaseAnalyzer(VersionControl, metaclass=ABCMeta): self.__class__.__name__, self.kwargs.get(Constant.TORCH_VERSION), profiling_version) return profiling_version - def init_dataset_list(self) -> None: # 初始化数据集列表 + def init_dataset_list(self) -> None: dataset_cls_list = self.dataset_cls_list if len(dataset_cls_list) == 0: logger.warning(f"Analyser: %s don't rely on any dataset!", self.__class__.__name__) @@ -184,7 +184,7 @@ class BaseAnalyzer(VersionControl, metaclass=ABCMeta): self.dataset_list[key] = [] self.dataset_list[key].append(dataset) - def get_priority_by_time_ratio(self, dur, step_dur): # 根据时间比例确定优先级 + def get_priority_by_time_ratio(self, dur, step_dur): time_ratio = safe_division(dur, step_dur) if time_ratio >= self.ANALYZER_HIGH_PRIORITY_TIME_RATIO: return PriorityBackgroundColor.high diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py index f70a3c815..1784c9ce3 100644 --- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py +++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py @@ -36,12 +36,10 @@ class AICorePerformanceChecker: self.result = dict() self.ai_core_performance_issues = False self.desc = "" - self.suggestions = "" self.cube_dict = {} self.fa_dict = {} self.fa_list = [] self.vector_dict = {} - self.vector_list = [] self.load_aicore_perf_rules() def load_aicore_perf_rules(self): @@ -392,10 +390,10 @@ class AICorePerformanceChecker: if not self.ai_core_performance_issues: return self.ai_core_performance_issues - sugg_keys = ['opti', 'bound', 'affinity'] - cube_desc = dict.fromkeys(sugg_keys, "") - fa_desc = dict.fromkeys(sugg_keys, "") - vector_desc = dict.fromkeys(sugg_keys, "") + suggestion_keys = ['opti', 'bound', 'affinity'] + cube_desc = dict.fromkeys(suggestion_keys, "") + fa_desc = dict.fromkeys(suggestion_keys, "") + vector_desc = dict.fromkeys(suggestion_keys, "") if self.result["cube"]: optimization_item = OptimizeItem(self._CUBE_PROBLEM, self.desc, [self.suggestion]) result.add(OptimizeRecord(optimization_item)) diff --git a/profiler/advisor/analyzer/computation/profiling_analyzer.py b/profiler/advisor/analyzer/computation/profiling_analyzer.py index bbea136f0..ccf671139 100644 --- a/profiler/advisor/analyzer/computation/profiling_analyzer.py +++ b/profiler/advisor/analyzer/computation/profiling_analyzer.py @@ -115,4 +115,4 @@ class OperatorBoundAnalyzer(ProfilingAnalyzer): class AicpuAnalyzer(ProfilingAnalyzer): def __init__(self, collection_path, **kwargs) -> None: super().__init__(collection_path, **kwargs) - self.checker = AicpuChecker(self.cann_version) \ No newline at end of file + self.checker = AicpuChecker(self.cann_version) -- Gitee From 657d436b30cc81f319f54e0a2dac783ea75b6762 Mon Sep 17 00:00:00 2001 From: xubanxia <904691018@qq.com> Date: Tue, 21 Jan 2025 14:47:23 +0800 Subject: [PATCH 26/72] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E6=95=B0=E6=8D=AE?= =?UTF-8?q?=E9=87=87=E9=9B=86=E7=AD=89=E7=BA=A7=E6=A0=A1=E9=AA=8C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ai_core_performance_checker.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py index 1784c9ce3..e2ca19405 100644 --- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py +++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py @@ -73,7 +73,7 @@ class AICorePerformanceChecker: self._OPTI_SUGGESTION = self.aicore_rules.get("optimization_suggestion") def data_filter(self, profiling_dataset: ProfilingDataset): - if not self.check_task_dict(profiling_dataset): + if not self.check_task_list(profiling_dataset): return operator_list = profiling_dataset.op_summary.op_list total_duration = sum(float(operator.task_duration) for operator in operator_list) @@ -118,7 +118,7 @@ class AICorePerformanceChecker: for shapes in input_shapes: if not "," in shapes and shapes != "": # 多的一维是 bias ,预先乘2 - memory += int (shapes) * 2 + memory += int(shapes) * 2 continue start = 1 for shape in shapes.split(","): @@ -468,11 +468,16 @@ class AICorePerformanceChecker: priority_background_color=priority, rank=kwargs.get("rank")) - def check_task_dict(self, profiling_dataset: ProfilingDataset) -> bool: + def check_task_list(self, profiling_dataset: ProfilingDataset) -> bool: if not hasattr(profiling_dataset, "op_summary"): logger.warning("Skip %s checker because of not containing %s", self._CHECKER, "op summary") return False if not hasattr(profiling_dataset.op_summary, "op_list"): logger.warning("Skip %s checker because of not containing %s", self._CHECKER, "op_list") return False + if (not hasattr(profiling_dataset.op_summary, "input_shapes") or + not hasattr(profiling_dataset.op_summary, "input_data_types")): + logger.warning("Skip %s checker because of not containing input datas, " + "Please use L1 and above", self._CHECKER) + return False return True -- Gitee From e8d3759a197baaaa79a6b8143bded9e8a45db95f Mon Sep 17 00:00:00 2001 From: xubanxia <904691018@qq.com> Date: Tue, 21 Jan 2025 15:14:54 +0800 Subject: [PATCH 27/72] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E6=95=B0=E6=8D=AE?= =?UTF-8?q?=E9=87=87=E9=9B=86=E7=AD=89=E7=BA=A7=E6=A0=A1=E9=AA=8C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ai_core_performance/ai_core_performance_checker.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py index e2ca19405..47a90e98a 100644 --- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py +++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py @@ -477,7 +477,6 @@ class AICorePerformanceChecker: return False if (not hasattr(profiling_dataset.op_summary, "input_shapes") or not hasattr(profiling_dataset.op_summary, "input_data_types")): - logger.warning("Skip %s checker because of not containing input datas, " - "Please use L1 and above", self._CHECKER) + logger.warning("Skip %s checker because of not containing input datas", self._CHECKER) return False return True -- Gitee From edd6abcbf81abaad4ce119478eba7987e3ddfe42 Mon Sep 17 00:00:00 2001 From: xubanxia <904691018@qq.com> Date: Tue, 21 Jan 2025 16:07:21 +0800 Subject: [PATCH 28/72] =?UTF-8?q?=E8=A1=A5=E5=85=85=E5=9D=87=E5=80=BC?= =?UTF-8?q?=E5=A4=84=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ai_core_performance_checker.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py index 47a90e98a..c9f6e039f 100644 --- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py +++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py @@ -281,6 +281,7 @@ class AICorePerformanceChecker: # 处理bound算子和优化算子 aiv_vec_ratio, aic_fixpipe_ratio, aic_mte2_ratio, optimization = 0., 0., 0., 0. bound = "" + length = 0 if len(shape.split("-")) > 2: for operator in fa_list: if (operator.op_name == op and @@ -290,6 +291,9 @@ class AICorePerformanceChecker: aic_mte2_ratio += float(operator.aic_mte2_ratio) shape_duration += float(operator.task_duration) dtype = operator.input_data_types + length += 1 + aic_fixpipe_ratio = aic_fixpipe_ratio / length + aic_mte2_ratio = aic_mte2_ratio / length if aic_mte2_ratio >= 0.8 and aic_fixpipe_ratio >= 0.75: bound = "mte2_and_fixpipe_bound" elif aic_mte2_ratio >= 0.8: @@ -305,6 +309,9 @@ class AICorePerformanceChecker: aiv_vec_ratio += float(operator.aiv_vec_ratio) aic_mte2_ratio += float(operator.aic_mte2_ratio) shape_duration += float(operator.task_duration) + length += 1 + aiv_vec_ratio = aiv_vec_ratio / length + aic_mte2_ratio = aic_mte2_ratio / length if aic_mte2_ratio >= 0.8 and aiv_vec_ratio >= 0.75: bound = "mte2_and_vec_bound" elif aic_mte2_ratio >= 0.8: @@ -346,6 +353,7 @@ class AICorePerformanceChecker: for op_name in vector_dict: for shape in vector_dict[op_name]: aiv_vec_ratio, aiv_mte2_ratio, aiv_mte3_ratio, shape_duration, optimization = 0., 0., 0., 0., 0. + length = 0 bound, dtype = "", "" for operator in vector_list: if (operator.op_name == op_name and @@ -355,6 +363,11 @@ class AICorePerformanceChecker: aiv_mte3_ratio += float(operator.aiv_mte3_ratio) shape_duration += float(operator.task_duration) dtype = operator.input_data_types + length += 1 + # todo 取平均值 + aiv_vec_ratio = aiv_vec_ratio / length + aiv_mte2_ratio = aiv_mte2_ratio / length + aiv_mte2_ratio = aiv_mte2_ratio / length if aiv_vec_ratio + aiv_mte2_ratio + aiv_mte3_ratio >= 0.9: bound = "vec_mte2_mte3_bound" elif aiv_mte2_ratio >= 0.7: -- Gitee From f9cfeeae8589c37cc2e491051a5bea1163eefe26 Mon Sep 17 00:00:00 2001 From: xubanxia <904691018@qq.com> Date: Tue, 21 Jan 2025 16:07:45 +0800 Subject: [PATCH 29/72] =?UTF-8?q?=E8=A1=A5=E5=85=85=E5=9D=87=E5=80=BC?= =?UTF-8?q?=E5=A4=84=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ai_core_performance/ai_core_performance_checker.py | 1 - 1 file changed, 1 deletion(-) diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py index c9f6e039f..0517083c3 100644 --- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py +++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py @@ -364,7 +364,6 @@ class AICorePerformanceChecker: shape_duration += float(operator.task_duration) dtype = operator.input_data_types length += 1 - # todo 取平均值 aiv_vec_ratio = aiv_vec_ratio / length aiv_mte2_ratio = aiv_mte2_ratio / length aiv_mte2_ratio = aiv_mte2_ratio / length -- Gitee From c5359dd7e27a7be5f7e61f677d0a0d1bb4ccb1b7 Mon Sep 17 00:00:00 2001 From: xubanxia <904691018@qq.com> Date: Tue, 21 Jan 2025 16:12:01 +0800 Subject: [PATCH 30/72] =?UTF-8?q?=E9=94=99=E8=AF=AF=E4=BF=AE=E5=A4=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ai_core_performance/ai_core_performance_checker.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py index 0517083c3..1c341e4fb 100644 --- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py +++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py @@ -487,8 +487,8 @@ class AICorePerformanceChecker: if not hasattr(profiling_dataset.op_summary, "op_list"): logger.warning("Skip %s checker because of not containing %s", self._CHECKER, "op_list") return False - if (not hasattr(profiling_dataset.op_summary, "input_shapes") or - not hasattr(profiling_dataset.op_summary, "input_data_types")): + if (not hasattr(profiling_dataset.op_summary.op_list, "input_shapes") or + not hasattr(profiling_dataset.op_summary.op_list, "input_data_types")): logger.warning("Skip %s checker because of not containing input datas", self._CHECKER) return False return True -- Gitee From 1ceadac5a761201d7396940d0fdb600ac169306c Mon Sep 17 00:00:00 2001 From: xubanxia <904691018@qq.com> Date: Tue, 21 Jan 2025 16:37:08 +0800 Subject: [PATCH 31/72] =?UTF-8?q?Checker=E5=BC=82=E5=B8=B8=E5=A4=84?= =?UTF-8?q?=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ai_core_performance_checker.py | 25 ++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py index 1c341e4fb..6057df7aa 100644 --- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py +++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py @@ -136,9 +136,28 @@ class AICorePerformanceChecker: """ :Param profiling_dataset: dataset of operator performance from kernel_details.csv """ - self.result["cube"] = self.check_cube_operator(promoting_dataset) - self.result["fa"] = self.check_fa_operator(promoting_dataset) - self.result["vector"] = self.check_vector_operator(promoting_dataset) + try: + self.result["cube"] = self.check_cube_operator(promoting_dataset) + except (IndexError, ValueError, AttributeError): + logger.error("Failed to check ai core performance, cube operator incorrect shapes value.") + self.result["cube"] = [] + + try: + self.result["fa"] = self.check_fa_operator(promoting_dataset) + except (IndexError, ValueError, AttributeError): + logger.error("Failed to check ai core performance, fa operator incorrect shapes value.") + self.result["fa"] = [] + + try: + self.result["vector"] = self.check_vector_operator(promoting_dataset) + except (IndexError, ValueError, AttributeError): + logger.error("Failed to check ai core performance, vector operator incorrect shapes value.") + self.result["vector"] = [] + + if not any([self.result["cube"], self.result["fa"], self.result["vector"]]): + self.ai_core_performance_issues = False + + def check_cube_operator(self, profiling_dataset: ProfilingDataset): cube_dict = self.cube_dict -- Gitee From d815739679f562ea492e75733b85aae4dd3cdddc Mon Sep 17 00:00:00 2001 From: xubanxia <904691018@qq.com> Date: Wed, 22 Jan 2025 10:28:38 +0800 Subject: [PATCH 32/72] UT --- .../test_ai_core_performance_advice.py | 93 +++++++++++++++++++ 1 file changed, 93 insertions(+) create mode 100644 profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py diff --git a/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py b/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py new file mode 100644 index 000000000..4782ee635 --- /dev/null +++ b/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py @@ -0,0 +1,93 @@ +import csv +import os +import shutil +import stat + +import unittest +from profiler.advisor.interface.interface import Interface +from profiler.advisor.common.analyzer_scopes import SupportedScopes +from profiler.advisor.dataset.timeline_event_dataset import ComputationAnalysisDataset + + +class TestAICorePerformanceAdvice(unittest.TestCase): + TMP_DIR = "./ascend_pt" + OUTPUT_DIR = "./ascend_pt/ASCEND_PROFILER_OUTPUT" + interface = None + err_interface = None + + def tearDown(self): + if os.path.exists(TestAICorePerformanceAdvice.TMP_DIR): + shutil.rmtree(TestAICorePerformanceAdvice.TMP_DIR) + self.clear_htmls() + + def setUp(self): + if os.path.exists(TestAICorePerformanceAdvice.TMP_DIR): + shutil.rmtree(TestAICorePerformanceAdvice.TMP_DIR) + if not os.path.exists(TestAICorePerformanceAdvice.TMP_DIR): + os.makedirs(TestAICorePerformanceAdvice.TMP_DIR) + if not os.path.exists(TestAICorePerformanceAdvice.OUTPUT_DIR): + os.makedirs(TestAICorePerformanceAdvice.OUTPUT_DIR) + self.clear_htmls() + + @classmethod + def clear_htmls(cls): + current_path = os.path.dirname(os.path.abspath(__file__)) + for filename in os.listdir(current_path): + # 检查文件是否以“att”开头 + if filename.startswith("att"): + # 构建文件的完整路径 + file_path = os.path.join(current_path, filename) + # 删除文件 + os.remove(file_path) + + + @classmethod + def create_kernel_details(cls): + # create csv files + csv_header = ['Step Id', 'Model ID', 'Task ID', 'Stream ID', 'Name', 'Type', 'Accelerator Core', + 'Start Time(us)', + 'Duration(us)', 'Wait Time(us)', 'Block Dim', 'Mix Block Dim', 'Input Shapes', 'Input Data Types', + 'Input Formats', 'Output Shapes', 'Output Data Types', 'Output Formats', 'Context ID', + 'aicore_time(us)', + 'aic_total_cycles', 'aic_mac_ratio', 'aic_mac_int8_ratio', 'aic_cube_fops', + 'aic_vector_fops', + 'aiv_time(us)', 'aiv_total_cycles', 'aiv_vec_fp32_ratio', 'aiv_vec_fp16_ratio', + 'aiv_vec_int32_ratio', + 'aiv_vec_misc_ratio', 'aiv_cube_fops', 'aiv_vector_fops'] + csv_row1 = [1, 4294967295, 1265, 16, 'MatMul56', 'MatMul', 'AI_CORE', "172317\t", 21.2, 261.56, 9, + 0, + '4,1025', 'INT64', 'FORMAT_ND', '4,1025', 'INT32', 'FORMAT_ND', 'N/A', + 0, 0, 0, 0, 0, 0, + 1.77, 29508, 0, 0, 0.0062, 0, 0, 5856] + csv_row2 = [1, 4294967295, 1265, 16, 'Add2', 'Add', 'AI_VECTOR_CORE', "183317\t", 1.5, 261.56, 9, + 0, + '4,1025', 'INT64', 'FORMAT_ND', '4,1025', 'INT32', 'FORMAT_ND', 'N/A', + 0, 0, 0, 0, 0, 0, + 1.77, 29508, 0, 0, 0.0062, 0, 0, 5856] + csv_row3 = [1, 4294967295, 1265, 16, 'MatMul57', 'MatMul', 'AI_CORE', "189233\t", 3.14, 261.56, 9, 0, + '4,1025', 'INT64', 'FORMAT_ND', '4,1025', 'INT32', 'FORMAT_ND', 'N/A', + 0, 0, 0, 0, 0, 0, + 1.77, 29508, 0, 0, 0.0062, 0, 0, 5856] + csv_row4 = [1, 4294967295, 1265, 16, 'Add1', 'Add', 'AI_CORE', "189933\t", 3.14, 261.56, 9, 0, + '4,1025', 'INT64', 'FORMAT_ND', '4,1025', 'INT32', 'FORMAT_ND', 'N/A', + 2.3, 28888, 0.2, 0.1, 0.1, 0.7, + 0, 0, 0, 0, 0, 0, 0, 0] + + with os.fdopen(os.open(f"{TestAICorePerformanceAdvice.OUTPUT_DIR}/kernel_details.csv", + os.O_WRONLY | os.O_CREAT, stat.S_IWUSR | stat.S_IRUSR), 'w', newline='') as fp: + csv_writer = csv.writer(fp) + csv_writer.writerow(csv_header) + csv_writer.writerow(csv_row1) + csv_writer.writerow(csv_row2) + csv_writer.writerow(csv_row3) + csv_writer.writerow(csv_row4) + + def test_ai_core_performance_data(self): + self.create_kernel_details() + interface = Interface(profiling_path=self.TMP_DIR) + dimension = Interface.COMMUNICATION + scope = SupportedScopes.AICORE_PERFORMANCE_ANALYSIS + result = interface.get_result(dimension, scope, render_html=1, output_dict=False, profiling_path=self.TMP_DIR) + self.assertEqual(2, len(result.data.get("带宽分析", []))) + self.assertEqual(1, len(result.data.get("带宽分析", []).get('data'))) + result.clear() \ No newline at end of file -- Gitee From 2dabcd38c699653dccb9cd571911df4df4532535 Mon Sep 17 00:00:00 2001 From: xubanxia <904691018@qq.com> Date: Wed, 22 Jan 2025 10:51:15 +0800 Subject: [PATCH 33/72] =?UTF-8?q?=E9=94=99=E8=AF=AF=E4=BF=AE=E5=A4=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ai_core_performance/ai_core_performance_checker.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py index 6057df7aa..ef360ee12 100644 --- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py +++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py @@ -506,8 +506,8 @@ class AICorePerformanceChecker: if not hasattr(profiling_dataset.op_summary, "op_list"): logger.warning("Skip %s checker because of not containing %s", self._CHECKER, "op_list") return False - if (not hasattr(profiling_dataset.op_summary.op_list, "input_shapes") or - not hasattr(profiling_dataset.op_summary.op_list, "input_data_types")): + if (not hasattr(profiling_dataset.op_summary.op_list[0], "input_shapes") or + not hasattr(profiling_dataset.op_summary.op_list[0], "input_data_types")): logger.warning("Skip %s checker because of not containing input datas", self._CHECKER) return False return True -- Gitee From 118abaff82f2f10c610347f552c0e648bea51f1a Mon Sep 17 00:00:00 2001 From: xubanxia <904691018@qq.com> Date: Wed, 22 Jan 2025 11:39:21 +0800 Subject: [PATCH 34/72] =?UTF-8?q?UT=20=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../compute_advice/data/kernel_details.csv | 0 .../test_ai_core_performance_advice.py | 96 ++++++++++--------- 2 files changed, 49 insertions(+), 47 deletions(-) create mode 100644 profiler/test/ut/advisor/compute_advice/data/kernel_details.csv diff --git a/profiler/test/ut/advisor/compute_advice/data/kernel_details.csv b/profiler/test/ut/advisor/compute_advice/data/kernel_details.csv new file mode 100644 index 000000000..e69de29bb diff --git a/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py b/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py index 4782ee635..aef4e6ed1 100644 --- a/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py +++ b/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py @@ -6,7 +6,6 @@ import stat import unittest from profiler.advisor.interface.interface import Interface from profiler.advisor.common.analyzer_scopes import SupportedScopes -from profiler.advisor.dataset.timeline_event_dataset import ComputationAnalysisDataset class TestAICorePerformanceAdvice(unittest.TestCase): @@ -33,61 +32,64 @@ class TestAICorePerformanceAdvice(unittest.TestCase): def clear_htmls(cls): current_path = os.path.dirname(os.path.abspath(__file__)) for filename in os.listdir(current_path): - # 检查文件是否以“att”开头 - if filename.startswith("att"): + # 检查文件是否以“mstt”开头 + if filename.startswith("mstt"): # 构建文件的完整路径 file_path = os.path.join(current_path, filename) # 删除文件 os.remove(file_path) - @classmethod - def create_kernel_details(cls): - # create csv files - csv_header = ['Step Id', 'Model ID', 'Task ID', 'Stream ID', 'Name', 'Type', 'Accelerator Core', - 'Start Time(us)', - 'Duration(us)', 'Wait Time(us)', 'Block Dim', 'Mix Block Dim', 'Input Shapes', 'Input Data Types', - 'Input Formats', 'Output Shapes', 'Output Data Types', 'Output Formats', 'Context ID', - 'aicore_time(us)', - 'aic_total_cycles', 'aic_mac_ratio', 'aic_mac_int8_ratio', 'aic_cube_fops', - 'aic_vector_fops', - 'aiv_time(us)', 'aiv_total_cycles', 'aiv_vec_fp32_ratio', 'aiv_vec_fp16_ratio', - 'aiv_vec_int32_ratio', - 'aiv_vec_misc_ratio', 'aiv_cube_fops', 'aiv_vector_fops'] - csv_row1 = [1, 4294967295, 1265, 16, 'MatMul56', 'MatMul', 'AI_CORE', "172317\t", 21.2, 261.56, 9, - 0, - '4,1025', 'INT64', 'FORMAT_ND', '4,1025', 'INT32', 'FORMAT_ND', 'N/A', - 0, 0, 0, 0, 0, 0, - 1.77, 29508, 0, 0, 0.0062, 0, 0, 5856] - csv_row2 = [1, 4294967295, 1265, 16, 'Add2', 'Add', 'AI_VECTOR_CORE', "183317\t", 1.5, 261.56, 9, - 0, - '4,1025', 'INT64', 'FORMAT_ND', '4,1025', 'INT32', 'FORMAT_ND', 'N/A', - 0, 0, 0, 0, 0, 0, - 1.77, 29508, 0, 0, 0.0062, 0, 0, 5856] - csv_row3 = [1, 4294967295, 1265, 16, 'MatMul57', 'MatMul', 'AI_CORE', "189233\t", 3.14, 261.56, 9, 0, - '4,1025', 'INT64', 'FORMAT_ND', '4,1025', 'INT32', 'FORMAT_ND', 'N/A', - 0, 0, 0, 0, 0, 0, - 1.77, 29508, 0, 0, 0.0062, 0, 0, 5856] - csv_row4 = [1, 4294967295, 1265, 16, 'Add1', 'Add', 'AI_CORE', "189933\t", 3.14, 261.56, 9, 0, - '4,1025', 'INT64', 'FORMAT_ND', '4,1025', 'INT32', 'FORMAT_ND', 'N/A', - 2.3, 28888, 0.2, 0.1, 0.1, 0.7, - 0, 0, 0, 0, 0, 0, 0, 0] + def copy_kernel_details(cls,path): + # Define source and destination paths + source_csv_path = f"./data/{path}" + destination_csv_path = f"{TestAICorePerformanceAdvice.OUTPUT_DIR}/kernel_detail.csv" + + # Check if source CSV file exists + if not os.path.exists(source_csv_path): + raise FileNotFoundError(f"test data file not found:{source_csv_path}") + + # Ensure the output directory exists + if not os.path.exists(TestAICorePerformanceAdvice.OUTPUT_DIR): + os.makedirs(TestAICorePerformanceAdvice.OUTPUT_DIR) + + # Copy the CSV file from source to destination + shutil.copyfile(source_csv_path, destination_csv_path) + + def test_ai_core_performance_total(self): + file_path = "kernel_details.csv" + self.copy_kernel_details(file_path) + interface = Interface(profiling_path=self.TMP_DIR) + dimension = Interface.COMMUNICATION + scope = SupportedScopes.AICORE_PERFORMANCE_ANALYSIS + result = interface.get_result(dimension, scope, render_html=1, output_dict=False, profiling_path=self.TMP_DIR) + # TODO 测试结果验证 + result.clear() + + def test_ai_core_performance_cube_operator(self): + self.copy_kernel_details() + interface = Interface(profiling_path=self.TMP_DIR) + dimension = Interface.COMMUNICATION + scope = SupportedScopes.AICORE_PERFORMANCE_ANALYSIS + result = interface.get_result(dimension, scope, render_html=1, output_dict=False, profiling_path=self.TMP_DIR) + # TODO 测试结果验证 + result.clear() + + def test_ai_core_performance_fa_operator(self): + self.copy_kernel_details() + interface = Interface(profiling_path=self.TMP_DIR) + dimension = Interface.COMMUNICATION + scope = SupportedScopes.AICORE_PERFORMANCE_ANALYSIS + result = interface.get_result(dimension, scope, render_html=1, output_dict=False, profiling_path=self.TMP_DIR) + # TODO 测试结果验证 + result.clear() - with os.fdopen(os.open(f"{TestAICorePerformanceAdvice.OUTPUT_DIR}/kernel_details.csv", - os.O_WRONLY | os.O_CREAT, stat.S_IWUSR | stat.S_IRUSR), 'w', newline='') as fp: - csv_writer = csv.writer(fp) - csv_writer.writerow(csv_header) - csv_writer.writerow(csv_row1) - csv_writer.writerow(csv_row2) - csv_writer.writerow(csv_row3) - csv_writer.writerow(csv_row4) - def test_ai_core_performance_data(self): - self.create_kernel_details() + def test_ai_core_performance_vector_operator(self): + self.copy_kernel_details() interface = Interface(profiling_path=self.TMP_DIR) dimension = Interface.COMMUNICATION scope = SupportedScopes.AICORE_PERFORMANCE_ANALYSIS result = interface.get_result(dimension, scope, render_html=1, output_dict=False, profiling_path=self.TMP_DIR) - self.assertEqual(2, len(result.data.get("带宽分析", []))) - self.assertEqual(1, len(result.data.get("带宽分析", []).get('data'))) - result.clear() \ No newline at end of file + # TODO 测试结果验证 + result.clear() -- Gitee From 1eeb72ea1f7999ae8bf75be3ff6c40b1757d74e5 Mon Sep 17 00:00:00 2001 From: xubanxia <904691018@qq.com> Date: Wed, 22 Jan 2025 14:27:25 +0800 Subject: [PATCH 35/72] =?UTF-8?q?UT=20=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../test_ai_core_performance_advice.py | 21 ++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py b/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py index aef4e6ed1..ff372d2d5 100644 --- a/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py +++ b/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py @@ -58,27 +58,32 @@ class TestAICorePerformanceAdvice(unittest.TestCase): def test_ai_core_performance_total(self): file_path = "kernel_details.csv" + destination_csv_path = f"{TestAICorePerformanceAdvice.OUTPUT_DIR}/kernel_detail.csv" self.copy_kernel_details(file_path) interface = Interface(profiling_path=self.TMP_DIR) - dimension = Interface.COMMUNICATION + dimension = Interface.COMPUTATION scope = SupportedScopes.AICORE_PERFORMANCE_ANALYSIS result = interface.get_result(dimension, scope, render_html=1, output_dict=False, profiling_path=self.TMP_DIR) # TODO 测试结果验证 result.clear() def test_ai_core_performance_cube_operator(self): - self.copy_kernel_details() + file_path = "kernel_details_cube.csv" + destination_csv_path = f"{TestAICorePerformanceAdvice.OUTPUT_DIR}/kernel_detail.csv" + self.copy_kernel_details(file_path) interface = Interface(profiling_path=self.TMP_DIR) - dimension = Interface.COMMUNICATION + dimension = Interface.COMPUTATION scope = SupportedScopes.AICORE_PERFORMANCE_ANALYSIS result = interface.get_result(dimension, scope, render_html=1, output_dict=False, profiling_path=self.TMP_DIR) # TODO 测试结果验证 result.clear() def test_ai_core_performance_fa_operator(self): - self.copy_kernel_details() + file_path = "kernel_details_fa.csv" + destination_csv_path = f"{TestAICorePerformanceAdvice.OUTPUT_DIR}/kernel_detail.csv" + self.copy_kernel_details(file_path) interface = Interface(profiling_path=self.TMP_DIR) - dimension = Interface.COMMUNICATION + dimension = Interface.COMPUTATION scope = SupportedScopes.AICORE_PERFORMANCE_ANALYSIS result = interface.get_result(dimension, scope, render_html=1, output_dict=False, profiling_path=self.TMP_DIR) # TODO 测试结果验证 @@ -86,9 +91,11 @@ class TestAICorePerformanceAdvice(unittest.TestCase): def test_ai_core_performance_vector_operator(self): - self.copy_kernel_details() + file_path = "kernel_details_vector.csv" + destination_csv_path = f"{TestAICorePerformanceAdvice.OUTPUT_DIR}/kernel_detail.csv" + self.copy_kernel_details(file_path) interface = Interface(profiling_path=self.TMP_DIR) - dimension = Interface.COMMUNICATION + dimension = Interface.COMPUTATION scope = SupportedScopes.AICORE_PERFORMANCE_ANALYSIS result = interface.get_result(dimension, scope, render_html=1, output_dict=False, profiling_path=self.TMP_DIR) # TODO 测试结果验证 -- Gitee From 776aaf9db5bccc21d7a7940f55d6bc8483bf21d7 Mon Sep 17 00:00:00 2001 From: xubanxia <904691018@qq.com> Date: Wed, 22 Jan 2025 14:40:10 +0800 Subject: [PATCH 36/72] =?UTF-8?q?UT=20=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../test_ai_core_performance_advice.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py b/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py index ff372d2d5..ac0ba3807 100644 --- a/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py +++ b/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py @@ -17,7 +17,6 @@ class TestAICorePerformanceAdvice(unittest.TestCase): def tearDown(self): if os.path.exists(TestAICorePerformanceAdvice.TMP_DIR): shutil.rmtree(TestAICorePerformanceAdvice.TMP_DIR) - self.clear_htmls() def setUp(self): if os.path.exists(TestAICorePerformanceAdvice.TMP_DIR): @@ -58,7 +57,7 @@ class TestAICorePerformanceAdvice(unittest.TestCase): def test_ai_core_performance_total(self): file_path = "kernel_details.csv" - destination_csv_path = f"{TestAICorePerformanceAdvice.OUTPUT_DIR}/kernel_detail.csv" + destination_csv_path = f"{TestAICorePerformanceAdvice.OUTPUT_DIR}/kernel_details.csv" self.copy_kernel_details(file_path) interface = Interface(profiling_path=self.TMP_DIR) dimension = Interface.COMPUTATION @@ -66,10 +65,11 @@ class TestAICorePerformanceAdvice(unittest.TestCase): result = interface.get_result(dimension, scope, render_html=1, output_dict=False, profiling_path=self.TMP_DIR) # TODO 测试结果验证 result.clear() + self.clear_htmls() def test_ai_core_performance_cube_operator(self): file_path = "kernel_details_cube.csv" - destination_csv_path = f"{TestAICorePerformanceAdvice.OUTPUT_DIR}/kernel_detail.csv" + destination_csv_path = f"{TestAICorePerformanceAdvice.OUTPUT_DIR}/kernel_details.csv" self.copy_kernel_details(file_path) interface = Interface(profiling_path=self.TMP_DIR) dimension = Interface.COMPUTATION @@ -77,10 +77,11 @@ class TestAICorePerformanceAdvice(unittest.TestCase): result = interface.get_result(dimension, scope, render_html=1, output_dict=False, profiling_path=self.TMP_DIR) # TODO 测试结果验证 result.clear() + self.clear_htmls() def test_ai_core_performance_fa_operator(self): file_path = "kernel_details_fa.csv" - destination_csv_path = f"{TestAICorePerformanceAdvice.OUTPUT_DIR}/kernel_detail.csv" + destination_csv_path = f"{TestAICorePerformanceAdvice.OUTPUT_DIR}/kernel_details.csv" self.copy_kernel_details(file_path) interface = Interface(profiling_path=self.TMP_DIR) dimension = Interface.COMPUTATION @@ -88,11 +89,12 @@ class TestAICorePerformanceAdvice(unittest.TestCase): result = interface.get_result(dimension, scope, render_html=1, output_dict=False, profiling_path=self.TMP_DIR) # TODO 测试结果验证 result.clear() + self.clear_htmls() def test_ai_core_performance_vector_operator(self): file_path = "kernel_details_vector.csv" - destination_csv_path = f"{TestAICorePerformanceAdvice.OUTPUT_DIR}/kernel_detail.csv" + destination_csv_path = f"{TestAICorePerformanceAdvice.OUTPUT_DIR}/kernel_details.csv" self.copy_kernel_details(file_path) interface = Interface(profiling_path=self.TMP_DIR) dimension = Interface.COMPUTATION @@ -100,3 +102,4 @@ class TestAICorePerformanceAdvice(unittest.TestCase): result = interface.get_result(dimension, scope, render_html=1, output_dict=False, profiling_path=self.TMP_DIR) # TODO 测试结果验证 result.clear() + self.clear_htmls() -- Gitee From b58e60ab5c76abf6624944253db4859c48aa06b9 Mon Sep 17 00:00:00 2001 From: xubanxia <904691018@qq.com> Date: Wed, 22 Jan 2025 14:52:22 +0800 Subject: [PATCH 37/72] =?UTF-8?q?UT=20=E9=97=AE=E9=A2=98=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../advisor/compute_advice/test_ai_core_performance_advice.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py b/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py index ac0ba3807..7849391f1 100644 --- a/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py +++ b/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py @@ -42,7 +42,7 @@ class TestAICorePerformanceAdvice(unittest.TestCase): def copy_kernel_details(cls,path): # Define source and destination paths source_csv_path = f"./data/{path}" - destination_csv_path = f"{TestAICorePerformanceAdvice.OUTPUT_DIR}/kernel_detail.csv" + destination_csv_path = f"{TestAICorePerformanceAdvice.OUTPUT_DIR}/kernel_details.csv" # Check if source CSV file exists if not os.path.exists(source_csv_path): -- Gitee From 0abef7750282ad08013f1cde031cefe9fb996994 Mon Sep 17 00:00:00 2001 From: xubanxia <904691018@qq.com> Date: Wed, 22 Jan 2025 15:08:42 +0800 Subject: [PATCH 38/72] =?UTF-8?q?checker=20=E6=8A=A5=E9=94=99=E5=86=85?= =?UTF-8?q?=E5=AE=B9=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ai_core_performance_checker.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py index ef360ee12..c28675e9f 100644 --- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py +++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py @@ -138,27 +138,25 @@ class AICorePerformanceChecker: """ try: self.result["cube"] = self.check_cube_operator(promoting_dataset) - except (IndexError, ValueError, AttributeError): - logger.error("Failed to check ai core performance, cube operator incorrect shapes value.") + except (IndexError, ValueError, AttributeError) as e: + logger.error(f"Failed to check ai core performance, {e}.") self.result["cube"] = [] try: self.result["fa"] = self.check_fa_operator(promoting_dataset) - except (IndexError, ValueError, AttributeError): - logger.error("Failed to check ai core performance, fa operator incorrect shapes value.") + except (IndexError, ValueError, AttributeError) as e: + logger.error(f"Failed to check ai core performance, {e}.") self.result["fa"] = [] try: self.result["vector"] = self.check_vector_operator(promoting_dataset) - except (IndexError, ValueError, AttributeError): - logger.error("Failed to check ai core performance, vector operator incorrect shapes value.") + except (IndexError, ValueError, AttributeError) as e: + logger.error(f"Failed to check ai core performance, {e}.") self.result["vector"] = [] if not any([self.result["cube"], self.result["fa"], self.result["vector"]]): self.ai_core_performance_issues = False - - def check_cube_operator(self, profiling_dataset: ProfilingDataset): cube_dict = self.cube_dict optimization_queue = [] -- Gitee From 6db14fa52598cd74eb7a8e0bb4ca31365b191d4c Mon Sep 17 00:00:00 2001 From: xubanxia <904691018@qq.com> Date: Wed, 22 Jan 2025 15:35:47 +0800 Subject: [PATCH 39/72] =?UTF-8?q?UT=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../test_ai_core_performance_advice.py | 42 +------------------ 1 file changed, 2 insertions(+), 40 deletions(-) diff --git a/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py b/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py index 7849391f1..6ff49cad1 100644 --- a/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py +++ b/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py @@ -17,6 +17,7 @@ class TestAICorePerformanceAdvice(unittest.TestCase): def tearDown(self): if os.path.exists(TestAICorePerformanceAdvice.TMP_DIR): shutil.rmtree(TestAICorePerformanceAdvice.TMP_DIR) + self.clear_htmls() def setUp(self): if os.path.exists(TestAICorePerformanceAdvice.TMP_DIR): @@ -57,49 +58,10 @@ class TestAICorePerformanceAdvice(unittest.TestCase): def test_ai_core_performance_total(self): file_path = "kernel_details.csv" - destination_csv_path = f"{TestAICorePerformanceAdvice.OUTPUT_DIR}/kernel_details.csv" - self.copy_kernel_details(file_path) - interface = Interface(profiling_path=self.TMP_DIR) - dimension = Interface.COMPUTATION - scope = SupportedScopes.AICORE_PERFORMANCE_ANALYSIS - result = interface.get_result(dimension, scope, render_html=1, output_dict=False, profiling_path=self.TMP_DIR) - # TODO 测试结果验证 - result.clear() - self.clear_htmls() - - def test_ai_core_performance_cube_operator(self): - file_path = "kernel_details_cube.csv" - destination_csv_path = f"{TestAICorePerformanceAdvice.OUTPUT_DIR}/kernel_details.csv" self.copy_kernel_details(file_path) interface = Interface(profiling_path=self.TMP_DIR) dimension = Interface.COMPUTATION scope = SupportedScopes.AICORE_PERFORMANCE_ANALYSIS result = interface.get_result(dimension, scope, render_html=1, output_dict=False, profiling_path=self.TMP_DIR) # TODO 测试结果验证 - result.clear() - self.clear_htmls() - - def test_ai_core_performance_fa_operator(self): - file_path = "kernel_details_fa.csv" - destination_csv_path = f"{TestAICorePerformanceAdvice.OUTPUT_DIR}/kernel_details.csv" - self.copy_kernel_details(file_path) - interface = Interface(profiling_path=self.TMP_DIR) - dimension = Interface.COMPUTATION - scope = SupportedScopes.AICORE_PERFORMANCE_ANALYSIS - result = interface.get_result(dimension, scope, render_html=1, output_dict=False, profiling_path=self.TMP_DIR) - # TODO 测试结果验证 - result.clear() - self.clear_htmls() - - - def test_ai_core_performance_vector_operator(self): - file_path = "kernel_details_vector.csv" - destination_csv_path = f"{TestAICorePerformanceAdvice.OUTPUT_DIR}/kernel_details.csv" - self.copy_kernel_details(file_path) - interface = Interface(profiling_path=self.TMP_DIR) - dimension = Interface.COMPUTATION - scope = SupportedScopes.AICORE_PERFORMANCE_ANALYSIS - result = interface.get_result(dimension, scope, render_html=1, output_dict=False, profiling_path=self.TMP_DIR) - # TODO 测试结果验证 - result.clear() - self.clear_htmls() + result.clear() \ No newline at end of file -- Gitee From 5a23b7934c3bf5579aa0a7bc9032f7722374c022 Mon Sep 17 00:00:00 2001 From: kiritorl Date: Wed, 22 Jan 2025 15:37:09 +0800 Subject: [PATCH 40/72] =?UTF-8?q?=E5=8E=BB=E9=99=A4=E6=B2=A1=E6=9C=89?= =?UTF-8?q?=E5=BB=BA=E8=AE=AE=E9=A1=B9=E6=97=B6=E7=9A=84=E7=A9=BA=E8=A1=A8?= =?UTF-8?q?=E5=B1=95=E7=A4=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ai_core_performance_checker.py | 24 ++++++++++++------- .../html/templates/ai_core_performance.html | 6 ++--- 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py index c28675e9f..a0e3fcd2b 100644 --- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py +++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py @@ -434,15 +434,18 @@ class AICorePerformanceChecker: for cube_opti_issue in self.result["cube"][0]: opti_sugg = self._OPTI_SUGGESTION.format(**cube_opti_issue) cube_desc["opti"] += opti_sugg - result.add_detail(self._CUBE_PROBLEM, detail=[self._OPTI_DESC, cube_desc["opti"]]) + if cube_desc["opti"]: + result.add_detail(self._CUBE_PROBLEM, detail=[self._OPTI_DESC, cube_desc["opti"]]) for cube_bound_issue in self.result["cube"][1]: bound_sugg = self._BOUND_SUGGESTION.format(**cube_bound_issue) cube_desc["bound"] += bound_sugg - result.add_detail(self._CUBE_PROBLEM, detail=[self._BOUND_DESC, cube_desc["bound"]]) + if cube_desc["bound"]: + result.add_detail(self._CUBE_PROBLEM, detail=[self._BOUND_DESC, cube_desc["bound"]]) for cube_affinity_issue in self.result["cube"][2]: affinity_sugg = self._AFFINITY_SUGGESTION.format(**cube_affinity_issue) cube_desc["affinity"] += affinity_sugg - result.add_detail(self._CUBE_PROBLEM, detail=[self._AFFINITY_DESC, cube_desc["affinity"]]) + if cube_desc["affinity"]: + result.add_detail(self._CUBE_PROBLEM, detail=[self._AFFINITY_DESC, cube_desc["affinity"]]) if self.result["fa"]: optimization_item = OptimizeItem(self._FA_PROBLEM, self.desc, [self.suggestion]) @@ -455,15 +458,18 @@ class AICorePerformanceChecker: for fa_opti_issue in self.result["fa"][0]: opti_sugg = self._OPTI_SUGGESTION.format(**fa_opti_issue) fa_desc["opti"] += opti_sugg - result.add_detail(self._FA_PROBLEM, detail=[self._OPTI_DESC, fa_desc["opti"]]) + if fa_desc["opti"]: + result.add_detail(self._FA_PROBLEM, detail=[self._OPTI_DESC, fa_desc["opti"]]) for fa_bound_issue in self.result["fa"][1]: bound_sugg = self._BOUND_SUGGESTION.format(**fa_bound_issue) fa_desc["bound"] += bound_sugg - result.add_detail(self._FA_PROBLEM, detail=[self._BOUND_DESC, fa_desc["bound"]]) + if fa_desc["bound"]: + result.add_detail(self._FA_PROBLEM, detail=[self._BOUND_DESC, fa_desc["bound"]]) for fa_affinity_issue in self.result["fa"][2]: affinity_sugg = self._AFFINITY_SUGGESTION.format(**fa_affinity_issue) fa_desc["affinity"] += affinity_sugg - result.add_detail(self._FA_PROBLEM, detail=[self._AFFINITY_DESC, fa_desc["affinity"]]) + if fa_desc["affinity"]: + result.add_detail(self._FA_PROBLEM, detail=[self._AFFINITY_DESC, fa_desc["affinity"]]) if self.result["vector"]: optimization_item = OptimizeItem(self._VECTOR_PROBLEM, self.desc, [self.suggestion]) @@ -476,11 +482,13 @@ class AICorePerformanceChecker: for vector_opti_issue in self.result["vector"][0]: opti_sugg = self._OPTI_SUGGESTION.format(**vector_opti_issue) vector_desc["opti"] += opti_sugg - result.add_detail(self._VECTOR_PROBLEM, detail=[self._OPTI_DESC, vector_desc["opti"]]) + if vector_desc["opti"]: + result.add_detail(self._VECTOR_PROBLEM, detail=[self._OPTI_DESC, vector_desc["opti"]]) for vector_bound_issue in self.result["vector"][1]: bound_sugg = self._BOUND_SUGGESTION.format(**vector_bound_issue) vector_desc["bound"] += bound_sugg - result.add_detail(self._VECTOR_PROBLEM, detail=[self._BOUND_DESC, vector_desc["bound"]]) + if vector_desc["bound"]: + result.add_detail(self._VECTOR_PROBLEM, detail=[self._BOUND_DESC, vector_desc["bound"]]) return True def make_render(self, html_render, add_render_list=True, **kwargs): diff --git a/profiler/advisor/display/html/templates/ai_core_performance.html b/profiler/advisor/display/html/templates/ai_core_performance.html index 48e62ad6c..5bf133550 100644 --- a/profiler/advisor/display/html/templates/ai_core_performance.html +++ b/profiler/advisor/display/html/templates/ai_core_performance.html @@ -10,7 +10,7 @@ bound_set='set of bound operators', affinity_set='set of unaffine operators', opti_refer=' refer to Performance Optimization Space: ', bound_refer=' bound type: ', affinity_refer=' type of disaffinity: ', title_desc=' Operator related analysis, referenced below: ') %} {% endif %} - {% if format_result.cube is not none %} + {% if format_result.cube|length > 0 %} MatMul{{ title_ns.title_desc }}
@@ -63,7 +63,7 @@
{% endif %} - {% if format_result.fa is not none %} + {% if format_result.fa|length > 0 %} FA{{ title_ns.title_desc }}
@@ -116,7 +116,7 @@
{% endif %} - {% if format_result.cube is not none %} + {% if format_result.vector|length > 0 %} Vector{{ title_ns.title_desc }}
-- Gitee From 5bc247663f11862050123ffe82c527d4de3fd55f Mon Sep 17 00:00:00 2001 From: kiritorl Date: Wed, 22 Jan 2025 15:52:36 +0800 Subject: [PATCH 41/72] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dhtml=E7=A9=BA=E8=A1=A8?= =?UTF-8?q?=E7=9A=84=E5=B1=95=E7=A4=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../advisor/display/html/templates/ai_core_performance.html | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/profiler/advisor/display/html/templates/ai_core_performance.html b/profiler/advisor/display/html/templates/ai_core_performance.html index 5bf133550..d5ab1a3fa 100644 --- a/profiler/advisor/display/html/templates/ai_core_performance.html +++ b/profiler/advisor/display/html/templates/ai_core_performance.html @@ -10,7 +10,7 @@ bound_set='set of bound operators', affinity_set='set of unaffine operators', opti_refer=' refer to Performance Optimization Space: ', bound_refer=' bound type: ', affinity_refer=' type of disaffinity: ', title_desc=' Operator related analysis, referenced below: ') %} {% endif %} - {% if format_result.cube|length > 0 %} + {% if format_result.cube[0]|length + format_result.cube[1]|length + format_result.cube[2]|length > 0 %} MatMul{{ title_ns.title_desc }}
@@ -63,7 +63,7 @@
{% endif %} - {% if format_result.fa|length > 0 %} + {% if format_result.fa[0]|length + format_result.fa[1]|length + format_result.fa[2]|length > 0 %} FA{{ title_ns.title_desc }}
@@ -116,7 +116,7 @@
{% endif %} - {% if format_result.vector|length > 0 %} + {% if format_result.vector[0]|length + format_result.vector[1]|length > 0 %} Vector{{ title_ns.title_desc }}
-- Gitee From 1aefa6d6f6083327266c1563621b10f40e5ff839 Mon Sep 17 00:00:00 2001 From: xubanxia <904691018@qq.com> Date: Wed, 22 Jan 2025 16:01:41 +0800 Subject: [PATCH 42/72] =?UTF-8?q?checker=20optimization=20=E6=A0=BC?= =?UTF-8?q?=E5=BC=8F=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ai_core_performance/ai_core_performance_checker.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py index c28675e9f..5e562349d 100644 --- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py +++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py @@ -232,7 +232,7 @@ class AICorePerformanceChecker: "op_name": op, "shape": shape.split("-")[0], "dtype": dtype, - "optimization": max(0.8 - aic_mac_ratio, 0.95 - aic_mte2_ratio)}) + "optimization": round(max(0.8 - aic_mac_ratio, 0.95 - aic_mte2_ratio) * 100, 2)}) return [sorted(optimization_queue, key=lambda x: x["optimization"], reverse=True)[:5], sorted(bound_queue, key=lambda x: x["duration"], reverse=True)[:5], sorted(affinity_queue, key=lambda x: x["duration"], reverse=True)[:5]] @@ -349,7 +349,7 @@ class AICorePerformanceChecker: "op_name": op, "shape": shape.split("-")[0], "dtype": dtype, - "optimization": optimization}) + "optimization": round(optimization * 100, 2)}) return [sorted(optimization_queue, key=lambda x: x["optimization"], reverse=True)[:5], sorted(bound_queue, key=lambda x: x["duration"], reverse=True)[:5], @@ -406,7 +406,7 @@ class AICorePerformanceChecker: "op_name": op_name, "shape": shape, "dtype": dtype, - "optimization": optimization}) + "optimization": round(optimization * 100, 2)}) return [sorted(optimization_queue, key=lambda x: x["optimization"], reverse=True)[:5], sorted(bound_queue, key=lambda x: x["duration"], reverse=True)[:5]] -- Gitee From 71c43f4f0208e155affa522ba2b57b9348ef28a6 Mon Sep 17 00:00:00 2001 From: kiritorl Date: Wed, 22 Jan 2025 16:07:34 +0800 Subject: [PATCH 43/72] =?UTF-8?q?=E4=BC=98=E5=8C=96=E6=80=A7=E8=83=BD?= =?UTF-8?q?=E7=A9=BA=E9=97=B4=E6=94=B9=E4=B8=BA=E7=99=BE=E5=88=86=E6=AF=94?= =?UTF-8?q?=E5=B1=95=E7=A4=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../html/templates/ai_core_performance.html | 32 +++++++++---------- .../advisor/rules/cn/aicore_performance.yaml | 2 +- .../advisor/rules/en/aicore_performance.yaml | 2 +- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/profiler/advisor/display/html/templates/ai_core_performance.html b/profiler/advisor/display/html/templates/ai_core_performance.html index d5ab1a3fa..77e5e0cb5 100644 --- a/profiler/advisor/display/html/templates/ai_core_performance.html +++ b/profiler/advisor/display/html/templates/ai_core_performance.html @@ -21,9 +21,9 @@ {% set opti_ns = namespace(total_opti='') %} {% for opti in format_result.cube[0] %} {% if not loop.first %} - {% set opti_ns.total_opti = opti_ns.total_opti ~ "
" ~ opti.op_name ~ "operator shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ title_ns.opti_refer ~ opti.optimization %} + {% set opti_ns.total_opti = opti_ns.total_opti ~ "
" ~ opti.op_name ~ " operator shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ title_ns.opti_refer ~ opti.optimization ~ "%" %} {% else %} - {% set opti_ns.total_opti = opti.op_name ~ "operator shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ title_ns.opti_refer ~ opti.optimization %} + {% set opti_ns.total_opti = opti.op_name ~ " operator shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ title_ns.opti_refer ~ opti.optimization ~ "%" %} {% endif %} {% endfor %} {% if opti_ns.total_opti|length > 0 %} @@ -35,9 +35,9 @@ {% set bound_ns = namespace(total_bound='') %} {% for bound in format_result.cube[1] %} {% if not loop.first %} - {% set bound_ns.total_bound = bound_ns.total_bound ~ "
" ~ bound.op_name ~ "operator shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ title_ns.bound_refer ~ bound.bound %} + {% set bound_ns.total_bound = bound_ns.total_bound ~ "
" ~ bound.op_name ~ " operator shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ title_ns.bound_refer ~ bound.bound %} {% else %} - {% set bound_ns.total_bound = bound.op_name ~ "operator shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ title_ns.bound_refer ~ bound.bound %} + {% set bound_ns.total_bound = bound.op_name ~ " operator shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ title_ns.bound_refer ~ bound.bound %} {% endif %} {% endfor %} {% if bound_ns.total_bound|length > 0 %} @@ -49,9 +49,9 @@ {% set affinity_ns = namespace(total_affinity='') %} {% for affinity in format_result.cube[2] %} {% if not loop.first %} - {% set affinity_ns.total_affinity = affinity_ns.total_affinity ~ "
" ~ affinity.op_name ~ "operator shape: " ~ affinity.shape ~ " dtype: " ~ affinity.dtype ~ title_ns.affinity_refer ~ affinity.suggestion %} + {% set affinity_ns.total_affinity = affinity_ns.total_affinity ~ "
" ~ affinity.op_name ~ " operator shape: " ~ affinity.shape ~ " dtype: " ~ affinity.dtype ~ title_ns.affinity_refer ~ affinity.suggestion %} {% else %} - {% set affinity_ns.total_affinity = affinity.op_name ~ "operator shape: " ~ affinity.shape ~ " dtype: " ~ affinity.dtype ~ title_ns.affinity_refer ~ affinity.suggestion %} + {% set affinity_ns.total_affinity = affinity.op_name ~ " operator shape: " ~ affinity.shape ~ " dtype: " ~ affinity.dtype ~ title_ns.affinity_refer ~ affinity.suggestion %} {% endif %} {% endfor %} {% if affinity_ns.total_affinity|length > 0 %} @@ -74,9 +74,9 @@ {% set opti_ns = namespace(total_opti='') %} {% for opti in format_result.fa[0] %} {% if not loop.first %} - {% set opti_ns.total_opti = opti_ns.total_opti ~ "
" ~ opti.op_name ~ "operator shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ title_ns.opti_refer ~ opti.optimization %} + {% set opti_ns.total_opti = opti_ns.total_opti ~ "
" ~ opti.op_name ~ " operator shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ title_ns.opti_refer ~ opti.optimization ~ "%" %} {% else %} - {% set opti_ns.total_opti = opti.op_name ~ "operator shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ title_ns.opti_refer ~ opti.optimization %} + {% set opti_ns.total_opti = opti.op_name ~ " operator shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ title_ns.opti_refer ~ opti.optimization ~ "%" %} {% endif %} {% endfor %} {% if opti_ns.total_opti|length > 0 %} @@ -88,9 +88,9 @@ {% set bound_ns = namespace(total_bound='') %} {% for bound in format_result.fa[1] %} {% if not loop.first %} - {% set bound_ns.total_bound = bound_ns.total_bound ~ "
" ~ bound.op_name ~ "operator shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ title_ns.bound_refer ~ bound.bound %} + {% set bound_ns.total_bound = bound_ns.total_bound ~ "
" ~ bound.op_name ~ " operator shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ title_ns.bound_refer ~ bound.bound %} {% else %} - {% set bound_ns.total_bound = bound.op_name ~ "operator shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ title_ns.bound_refer ~ bound.bound %} + {% set bound_ns.total_bound = bound.op_name ~ " operator shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ title_ns.bound_refer ~ bound.bound %} {% endif %} {% endfor %} {% if bound_ns.total_bound|length > 0 %} @@ -102,9 +102,9 @@ {% set affinity_ns = namespace(total_affinity='') %} {% for affinity in format_result.fa[2] %} {% if not loop.first %} - {% set affinity_ns.total_affinity = affinity_ns.total_affinity ~ "
" ~ affinity.op_name ~ "operator shape: " ~ affinity.shape ~ " dtype: " ~ affinity.dtype ~ title_ns.affinity_refer ~ affinity.suggestion %} + {% set affinity_ns.total_affinity = affinity_ns.total_affinity ~ "
" ~ affinity.op_name ~ " operator shape: " ~ affinity.shape ~ " dtype: " ~ affinity.dtype ~ title_ns.affinity_refer ~ affinity.suggestion %} {% else %} - {% set affinity_ns.total_affinity = affinity.op_name ~ "operator shape: " ~ affinity.shape ~ " dtype: " ~ affinity.dtype ~ title_ns.affinity_refer ~ affinity.suggestion %} + {% set affinity_ns.total_affinity = affinity.op_name ~ " operator shape: " ~ affinity.shape ~ " dtype: " ~ affinity.dtype ~ title_ns.affinity_refer ~ affinity.suggestion %} {% endif %} {% endfor %} {% if affinity_ns.total_affinity|length > 0 %} @@ -127,9 +127,9 @@ {% set opti_ns = namespace(total_opti='') %} {% for opti in format_result.vector[0] %} {% if not loop.first %} - {% set opti_ns.total_opti = opti_ns.total_opti ~ "
" ~ opti.op_name ~ "operator shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ title_ns.opti_refer ~ opti.optimization %} + {% set opti_ns.total_opti = opti_ns.total_opti ~ "
" ~ opti.op_name ~ " operator shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ title_ns.opti_refer ~ opti.optimization ~ "%" %} {% else %} - {% set opti_ns.total_opti = opti.op_name ~ "operator shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ title_ns.opti_refer ~ opti.optimization %} + {% set opti_ns.total_opti = opti.op_name ~ " operator shape: " ~ opti.shape ~ " dtype: " ~ opti.dtype ~ title_ns.opti_refer ~ opti.optimization ~ "%" %} {% endif %} {% endfor %} {% if opti_ns.total_opti|length > 0 %} @@ -141,9 +141,9 @@ {% set bound_ns = namespace(total_bound='') %} {% for bound in format_result.vector[1] %} {% if not loop.first %} - {% set bound_ns.total_bound = bound_ns.total_bound ~ "
" ~ bound.op_name ~ "operator shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ title_ns.bound_refer ~ bound.bound %} + {% set bound_ns.total_bound = bound_ns.total_bound ~ "
" ~ bound.op_name ~ " operator shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ title_ns.bound_refer ~ bound.bound %} {% else %} - {% set bound_ns.total_bound = bound.op_name ~ "operator shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ title_ns.bound_refer ~ bound.bound %} + {% set bound_ns.total_bound = bound.op_name ~ " operator shape: " ~ bound.shape ~ " dtype: " ~ bound.dtype ~ title_ns.bound_refer ~ bound.bound %} {% endif %} {% endfor %} {% if bound_ns.total_bound|length > 0 %} diff --git a/profiler/advisor/rules/cn/aicore_performance.yaml b/profiler/advisor/rules/cn/aicore_performance.yaml index f00f0a4b7..8d44aaab2 100644 --- a/profiler/advisor/rules/cn/aicore_performance.yaml +++ b/profiler/advisor/rules/cn/aicore_performance.yaml @@ -12,4 +12,4 @@ fa_affinity_desc_type3: "D和S均不能被128整除" suggestion: "请根据亲和性、bound类型或优化空间尝试分析筛选出来的算子" affinity_suggestion: "{op_name}算子 shape: {shape} dtype: {dtype} 有不亲和特征: {suggestion}\n" bound_suggestion: "{op_name}算子 shape: {shape} dtype: {dtype} bound类型为: {bound} bound\n" -optimization_suggestion: "{op_name}算子 shape: {shape} dtype: {dtype} 疑似有性能优化空间,参考性能优化空间: {optimization}\n" \ No newline at end of file +optimization_suggestion: "{op_name}算子 shape: {shape} dtype: {dtype} 疑似有性能优化空间,参考性能优化空间: {optimization}%\n" \ No newline at end of file diff --git a/profiler/advisor/rules/en/aicore_performance.yaml b/profiler/advisor/rules/en/aicore_performance.yaml index 28f52f1ed..e85a919ab 100644 --- a/profiler/advisor/rules/en/aicore_performance.yaml +++ b/profiler/advisor/rules/en/aicore_performance.yaml @@ -12,4 +12,4 @@ fa_affinity_desc_type3: "Neither D nor S is not divisible by 128" suggestion: "Please try to analyze the filtered operators based on affinity, bound type or optimization space" affinity_suggestion: "{op_name} Op shape: {shape} dtype: {dtype} with disaffection characteristics: {suggestion}\n" bound_suggestion: "{op_name} Op shape: {shape} dtype: {dtype} bound type: {bound} bound\n" -optimization_suggestion: "{op_name} Op shape: {shape} dtype: {dtype} suspect there is room for performance optimization, refer to Performance Optimization Space: {optimization}\n" \ No newline at end of file +optimization_suggestion: "{op_name} Op shape: {shape} dtype: {dtype} suspect there is room for performance optimization, refer to Performance Optimization Space: {optimization}%\n" \ No newline at end of file -- Gitee From 2f5acce4e8ab4e9402d4924a706fd46465a2c491 Mon Sep 17 00:00:00 2001 From: xubanxia <904691018@qq.com> Date: Wed, 22 Jan 2025 16:55:30 +0800 Subject: [PATCH 44/72] =?UTF-8?q?=E9=97=AE=E9=A2=98=E4=BF=AE=E5=A4=8D?= =?UTF-8?q?=E4=B8=8EUT=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ai_core_performance/ai_core_performance_checker.py | 2 +- .../advisor/compute_advice/test_ai_core_performance_advice.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py index 270c43d41..0ef45f52c 100644 --- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py +++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py @@ -252,7 +252,7 @@ class AICorePerformanceChecker: suggestion = "" if "varlen" in op.lower(): # 处理变长算子 如果不亲和则affinity_flag为False - if shape.split("-")[0].split(";")[0].split(",")[2] % 128 != 0: + if int(shape.split("-")[0].split(";")[0].split(",")[2]) % 128 != 0: affinity_flag = True suggestion = self._FA_AFFINITY_DESC_TYPE1 for operator in fa_list: diff --git a/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py b/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py index 6ff49cad1..922d4b4c0 100644 --- a/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py +++ b/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py @@ -63,5 +63,7 @@ class TestAICorePerformanceAdvice(unittest.TestCase): dimension = Interface.COMPUTATION scope = SupportedScopes.AICORE_PERFORMANCE_ANALYSIS result = interface.get_result(dimension, scope, render_html=1, output_dict=False, profiling_path=self.TMP_DIR) - # TODO 测试结果验证 + self.assertLess(1,len(result.data.get("Cube算子性能分析").get("data")[0])) + self.assertLess(1,len(result.data.get("Cube算子性能分析").get("data")[1])) + self.assertLess(1,len(result.data.get("Cube算子性能分析").get("data")[2])) result.clear() \ No newline at end of file -- Gitee From d98ef93f50206a0fd3142d49c32eee92a380c394 Mon Sep 17 00:00:00 2001 From: xubanxia <904691018@qq.com> Date: Wed, 22 Jan 2025 17:26:53 +0800 Subject: [PATCH 45/72] =?UTF-8?q?=E9=97=AE=E9=A2=98=E4=BF=AE=E5=A4=8D?= =?UTF-8?q?=E4=B8=8EUT=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ai_core_performance/ai_core_performance_checker.py | 4 ++-- .../compute_advice/test_ai_core_performance_advice.py | 5 +++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py index 0ef45f52c..09bf91349 100644 --- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py +++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py @@ -397,14 +397,14 @@ class AICorePerformanceChecker: if bound: bound_queue.append({ "op_name": op_name, - "shape": shape, + "shape": shape.split("-")[0], "bound": bound, "dtype": dtype, "duration": shape_duration}) else: optimization_queue.append({ "op_name": op_name, - "shape": shape, + "shape": shape.split("-")[0], "dtype": dtype, "optimization": round(optimization * 100, 2)}) return [sorted(optimization_queue, key=lambda x: x["optimization"], reverse=True)[:5], diff --git a/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py b/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py index 922d4b4c0..40fa81837 100644 --- a/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py +++ b/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py @@ -66,4 +66,9 @@ class TestAICorePerformanceAdvice(unittest.TestCase): self.assertLess(1,len(result.data.get("Cube算子性能分析").get("data")[0])) self.assertLess(1,len(result.data.get("Cube算子性能分析").get("data")[1])) self.assertLess(1,len(result.data.get("Cube算子性能分析").get("data")[2])) + self.assertLess(1,len(result.data.get("Fa算子性能分析").get("data")[0])) + self.assertLess(1,len(result.data.get("Fa算子性能分析").get("data")[1])) + self.assertLess(1,len(result.data.get("Fa算子性能分析").get("data")[2])) + self.assertLess(1,len(result.data.get("Vector算子性能分析").get("data")[0])) + self.assertLess(1,len(result.data.get("Vector算子性能分析").get("data")[1])) result.clear() \ No newline at end of file -- Gitee From a8e535fddd557ddac6f1cd7cd01aa7fa06b1cd92 Mon Sep 17 00:00:00 2001 From: xubanxia <904691018@qq.com> Date: Wed, 22 Jan 2025 17:31:09 +0800 Subject: [PATCH 46/72] =?UTF-8?q?UT=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../test_ai_core_performance_advice.py | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py b/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py index 40fa81837..61ae35d13 100644 --- a/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py +++ b/profiler/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py @@ -40,7 +40,7 @@ class TestAICorePerformanceAdvice(unittest.TestCase): os.remove(file_path) @classmethod - def copy_kernel_details(cls,path): + def copy_kernel_details(cls, path): # Define source and destination paths source_csv_path = f"./data/{path}" destination_csv_path = f"{TestAICorePerformanceAdvice.OUTPUT_DIR}/kernel_details.csv" @@ -63,12 +63,12 @@ class TestAICorePerformanceAdvice(unittest.TestCase): dimension = Interface.COMPUTATION scope = SupportedScopes.AICORE_PERFORMANCE_ANALYSIS result = interface.get_result(dimension, scope, render_html=1, output_dict=False, profiling_path=self.TMP_DIR) - self.assertLess(1,len(result.data.get("Cube算子性能分析").get("data")[0])) - self.assertLess(1,len(result.data.get("Cube算子性能分析").get("data")[1])) - self.assertLess(1,len(result.data.get("Cube算子性能分析").get("data")[2])) - self.assertLess(1,len(result.data.get("Fa算子性能分析").get("data")[0])) - self.assertLess(1,len(result.data.get("Fa算子性能分析").get("data")[1])) - self.assertLess(1,len(result.data.get("Fa算子性能分析").get("data")[2])) - self.assertLess(1,len(result.data.get("Vector算子性能分析").get("data")[0])) - self.assertLess(1,len(result.data.get("Vector算子性能分析").get("data")[1])) - result.clear() \ No newline at end of file + self.assertLess(1, len(result.data.get("Cube算子性能分析").get("data")[0])) + self.assertLess(1, len(result.data.get("Cube算子性能分析").get("data")[1])) + self.assertLess(1, len(result.data.get("Cube算子性能分析").get("data")[2])) + self.assertLess(1, len(result.data.get("FA算子性能分析").get("data")[0])) + self.assertLess(1, len(result.data.get("FA算子性能分析").get("data")[1])) + self.assertLess(1, len(result.data.get("FA算子性能分析").get("data")[2])) + self.assertLess(1, len(result.data.get("Vector算子性能分析").get("data")[0])) + self.assertLess(1, len(result.data.get("Vector算子性能分析").get("data")[1])) + result.clear() -- Gitee From 6440ccc765205eb29310a30cfb1e817831306736 Mon Sep 17 00:00:00 2001 From: xubanxia <904691018@qq.com> Date: Thu, 23 Jan 2025 11:21:09 +0800 Subject: [PATCH 47/72] =?UTF-8?q?=E9=97=AE=E9=A2=98=E4=BF=AE=E5=A4=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ai_core_performance_checker.py | 47 ++++++++++++------- 1 file changed, 30 insertions(+), 17 deletions(-) diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py index 09bf91349..3175168e4 100644 --- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py +++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py @@ -75,10 +75,12 @@ class AICorePerformanceChecker: def data_filter(self, profiling_dataset: ProfilingDataset): if not self.check_task_list(profiling_dataset): return + operator_list = profiling_dataset.op_summary.op_list total_duration = sum(float(operator.task_duration) for operator in operator_list) cube_memory_dict = {} vector_type_dict = {} + # filter cube operator and fa operator for op in operator_list: shapes = op.input_shapes[1:-1] + "-" + op.output_shapes[1:-1] @@ -204,8 +206,10 @@ class AICorePerformanceChecker: operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape] shape_duration = sum(float(operator.task_duration) for operator in shap_list) dtype = shap_list[0].input_data_types if shap_list else None - aic_mac_ratio = sum(float(operator.aic_mac_ratio) for operator in shap_list) / len(shap_list) - aic_mte2_ratio = sum(float(operator.aic_mte2_ratio) for operator in shap_list) / len(shap_list) + aic_mac_ratio = sum(float(operator.aic_mac_ratio) for operator in shap_list + if operator.aic_mac_ratio != "N/A") / len(shap_list) + aic_mte2_ratio = sum(float(operator.aic_mte2_ratio) for operator in shap_list + if operator.aic_mac_ratio != "N/A") / len(shap_list) if aic_mac_ratio >= 0.8 and aic_mte2_ratio >= 0.95: bound_queue.append({ "op_name": op, @@ -304,11 +308,14 @@ class AICorePerformanceChecker: if (operator.op_name == op and operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] + "-grad" == shape): - aic_fixpipe_ratio += float(operator.aic_fixpipe_ratio) - aic_mte2_ratio += float(operator.aic_mte2_ratio) - shape_duration += float(operator.task_duration) - dtype = operator.input_data_types - length += 1 + try: + aic_fixpipe_ratio += float(operator.aic_fixpipe_ratio) + aic_mte2_ratio += float(operator.aic_mte2_ratio) + shape_duration += float(operator.task_duration) + dtype = operator.input_data_types + length += 1 + except ValueError: + continue aic_fixpipe_ratio = aic_fixpipe_ratio / length aic_mte2_ratio = aic_mte2_ratio / length if aic_mte2_ratio >= 0.8 and aic_fixpipe_ratio >= 0.75: @@ -323,10 +330,13 @@ class AICorePerformanceChecker: for operator in fa_list: if (operator.op_name == op and operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape): - aiv_vec_ratio += float(operator.aiv_vec_ratio) - aic_mte2_ratio += float(operator.aic_mte2_ratio) - shape_duration += float(operator.task_duration) - length += 1 + try: + aiv_vec_ratio += float(operator.aiv_vec_ratio) + aic_mte2_ratio += float(operator.aic_mte2_ratio) + shape_duration += float(operator.task_duration) + length += 1 + except ValueError: + continue aiv_vec_ratio = aiv_vec_ratio / length aic_mte2_ratio = aic_mte2_ratio / length if aic_mte2_ratio >= 0.8 and aiv_vec_ratio >= 0.75: @@ -375,12 +385,15 @@ class AICorePerformanceChecker: for operator in vector_list: if (operator.op_name == op_name and operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape): - aiv_vec_ratio += float(operator.aiv_vec_ratio) - aiv_mte2_ratio += float(operator.aiv_mte2_ratio) - aiv_mte3_ratio += float(operator.aiv_mte3_ratio) - shape_duration += float(operator.task_duration) - dtype = operator.input_data_types - length += 1 + try: + aiv_vec_ratio += float(operator.aiv_vec_ratio) + aiv_mte2_ratio += float(operator.aiv_mte2_ratio) + aiv_mte3_ratio += float(operator.aiv_mte3_ratio) + shape_duration += float(operator.task_duration) + dtype = operator.input_data_types + length += 1 + except ValueError: + continue aiv_vec_ratio = aiv_vec_ratio / length aiv_mte2_ratio = aiv_mte2_ratio / length aiv_mte2_ratio = aiv_mte2_ratio / length -- Gitee From 43932f3a4f6f6362d44185f8b90eb76c0f521f02 Mon Sep 17 00:00:00 2001 From: xubanxia <904691018@qq.com> Date: Thu, 23 Jan 2025 15:50:59 +0800 Subject: [PATCH 48/72] =?UTF-8?q?=E9=97=AE=E9=A2=98=E4=BF=AE=E5=A4=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ai_core_performance_checker.py | 29 ++++++++++++++----- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py index 3175168e4..8964e93c4 100644 --- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py +++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py @@ -141,19 +141,19 @@ class AICorePerformanceChecker: try: self.result["cube"] = self.check_cube_operator(promoting_dataset) except (IndexError, ValueError, AttributeError) as e: - logger.error(f"Failed to check ai core performance, {e}.") + logger.error(f"Failed to check ai core performance cube operator, {e}.") self.result["cube"] = [] try: self.result["fa"] = self.check_fa_operator(promoting_dataset) except (IndexError, ValueError, AttributeError) as e: - logger.error(f"Failed to check ai core performance, {e}.") + logger.error(f"Failed to check ai core performance fa operator, {e}.") self.result["fa"] = [] try: self.result["vector"] = self.check_vector_operator(promoting_dataset) except (IndexError, ValueError, AttributeError) as e: - logger.error(f"Failed to check ai core performance, {e}.") + logger.error(f"Failed to check ai core performance vector operator, {e}.") self.result["vector"] = [] if not any([self.result["cube"], self.result["fa"], self.result["vector"]]): @@ -206,10 +206,19 @@ class AICorePerformanceChecker: operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape] shape_duration = sum(float(operator.task_duration) for operator in shap_list) dtype = shap_list[0].input_data_types if shap_list else None - aic_mac_ratio = sum(float(operator.aic_mac_ratio) for operator in shap_list - if operator.aic_mac_ratio != "N/A") / len(shap_list) - aic_mte2_ratio = sum(float(operator.aic_mte2_ratio) for operator in shap_list - if operator.aic_mac_ratio != "N/A") / len(shap_list) + aic_mac_ratio, aic_mte2_ratio = 0., 0. + length = 0 + for operator in shap_list: + try: + aic_mac_ratio += float(operator.aic_mac_ratio) + aic_mte2_ratio += float(operator.aic_mte2_ratio) + length += 1 + except ValueError: + continue + if length == 0: + continue + aic_mac_ratio = aic_mac_ratio / length + aic_mte2_ratio = aic_mte2_ratio / length if aic_mac_ratio >= 0.8 and aic_mte2_ratio >= 0.95: bound_queue.append({ "op_name": op, @@ -316,6 +325,8 @@ class AICorePerformanceChecker: length += 1 except ValueError: continue + if length == 0: + continue aic_fixpipe_ratio = aic_fixpipe_ratio / length aic_mte2_ratio = aic_mte2_ratio / length if aic_mte2_ratio >= 0.8 and aic_fixpipe_ratio >= 0.75: @@ -337,6 +348,8 @@ class AICorePerformanceChecker: length += 1 except ValueError: continue + if length == 0: + continue aiv_vec_ratio = aiv_vec_ratio / length aic_mte2_ratio = aic_mte2_ratio / length if aic_mte2_ratio >= 0.8 and aiv_vec_ratio >= 0.75: @@ -394,6 +407,8 @@ class AICorePerformanceChecker: length += 1 except ValueError: continue + if length == 0: + continue aiv_vec_ratio = aiv_vec_ratio / length aiv_mte2_ratio = aiv_mte2_ratio / length aiv_mte2_ratio = aiv_mte2_ratio / length -- Gitee From 1d1394fe9e2d48e36ea882112f8112ec5ed8cf41 Mon Sep 17 00:00:00 2001 From: kiritorl Date: Wed, 5 Feb 2025 17:14:35 +0800 Subject: [PATCH 49/72] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E6=89=93=E5=B1=8F?= =?UTF-8?q?=E5=92=8C=E8=A1=A8=E6=A0=BC=E9=87=8C=E6=98=BE=E7=A4=BA=E7=A9=BA?= =?UTF-8?q?=E6=95=B0=E6=8D=AE=E7=9A=84=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ai_core_performance/ai_core_performance_checker.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py index 270c43d41..32bd1da84 100644 --- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py +++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py @@ -423,7 +423,7 @@ class AICorePerformanceChecker: cube_desc = dict.fromkeys(suggestion_keys, "") fa_desc = dict.fromkeys(suggestion_keys, "") vector_desc = dict.fromkeys(suggestion_keys, "") - if self.result["cube"]: + if any(self.result["cube"]): optimization_item = OptimizeItem(self._CUBE_PROBLEM, self.desc, [self.suggestion]) result.add(OptimizeRecord(optimization_item)) headers = [ @@ -447,7 +447,7 @@ class AICorePerformanceChecker: if cube_desc["affinity"]: result.add_detail(self._CUBE_PROBLEM, detail=[self._AFFINITY_DESC, cube_desc["affinity"]]) - if self.result["fa"]: + if any(self.result["fa"]): optimization_item = OptimizeItem(self._FA_PROBLEM, self.desc, [self.suggestion]) result.add(OptimizeRecord(optimization_item)) headers = [ @@ -471,7 +471,7 @@ class AICorePerformanceChecker: if fa_desc["affinity"]: result.add_detail(self._FA_PROBLEM, detail=[self._AFFINITY_DESC, fa_desc["affinity"]]) - if self.result["vector"]: + if any(self.result["vector"]): optimization_item = OptimizeItem(self._VECTOR_PROBLEM, self.desc, [self.suggestion]) result.add(OptimizeRecord(optimization_item)) headers = [ -- Gitee From e2c29e98910cd5d97a78d77df07a684033f8ed27 Mon Sep 17 00:00:00 2001 From: xubanxia <904691018@qq.com> Date: Thu, 6 Feb 2025 14:46:57 +0800 Subject: [PATCH 50/72] =?UTF-8?q?=E9=97=AE=E9=A2=98=E4=BF=AE=E5=A4=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ai_core_performance/ai_core_performance_checker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py index 90d7daa09..3316cf43d 100644 --- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py +++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py @@ -276,7 +276,7 @@ class AICorePerformanceChecker: else: # 处理定长算子 如果不亲和则affinity_flag为False head_dim = 0 - seq_len = int(shape.split("-")[1].split(";")[1].split(",")[2]) + seq_len = int(shape.split("-")[1].split(";")[0].split(",")[2]) input_first_tensor = shape.split("-")[0].split(";")[0].split(",") if len(input_first_tensor) == 3: head_dim = int(input_first_tensor[2]) / int(shape.split("-")[1].split(";")[0].split(",")[1]) -- Gitee From a22dbe4b438ace270e8e84a25f4e0a039fe7d717 Mon Sep 17 00:00:00 2001 From: xubanxia <904691018@qq.com> Date: Thu, 6 Feb 2025 14:55:54 +0800 Subject: [PATCH 51/72] =?UTF-8?q?=E4=BB=A3=E7=A0=81=E4=BC=98=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ai_core_performance_checker.py | 24 ++++++++----------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py index 3316cf43d..9fd7f9a7f 100644 --- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py +++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py @@ -219,33 +219,29 @@ class AICorePerformanceChecker: continue aic_mac_ratio = aic_mac_ratio / length aic_mte2_ratio = aic_mte2_ratio / length + bound = "" + optimization = 0. if aic_mac_ratio >= 0.8 and aic_mte2_ratio >= 0.95: - bound_queue.append({ - "op_name": op, - "shape": shape.split("-")[0], - "dtype": dtype, - "bound": "mac_and_mte2_bound", - "duration": shape_duration}) + bound = "mac_and_mte2_bound" elif aic_mac_ratio >= 0.8: - bound_queue.append({ - "op_name": op, - "shape": shape.split("-")[0], - "dtype": dtype, - "bound": "mac_bound", - "duration": shape_duration}) + bound = "mac_bound" elif aic_mte2_ratio >= 0.95: + bound = "mte2_bound" + else: + optimization = round(max(0.8 - aic_mac_ratio, 0.95 - aic_mte2_ratio) * 100, 2) + if bound: bound_queue.append({ "op_name": op, "shape": shape.split("-")[0], "dtype": dtype, - "bound": "mte2_bound", + "bound": bound, "duration": shape_duration}) else: optimization_queue.append({ "op_name": op, "shape": shape.split("-")[0], "dtype": dtype, - "optimization": round(max(0.8 - aic_mac_ratio, 0.95 - aic_mte2_ratio) * 100, 2)}) + "optimization": optimization}) return [sorted(optimization_queue, key=lambda x: x["optimization"], reverse=True)[:5], sorted(bound_queue, key=lambda x: x["duration"], reverse=True)[:5], sorted(affinity_queue, key=lambda x: x["duration"], reverse=True)[:5]] -- Gitee From b455912d39ba8251100b4a6eeb06fcc2dcd1aa36 Mon Sep 17 00:00:00 2001 From: xubanxia <904691018@qq.com> Date: Thu, 6 Feb 2025 15:02:29 +0800 Subject: [PATCH 52/72] =?UTF-8?q?=E4=BB=A3=E7=A0=81=E4=BC=98=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ai_core_performance/ai_core_performance_checker.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py index 9fd7f9a7f..5aecaaf03 100644 --- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py +++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py @@ -1,7 +1,6 @@ # Copyright (c) 2024, Huawei Technologies Co., Ltd. # All rights reserved. # -# Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # @@ -434,8 +433,6 @@ class AICorePerformanceChecker: return [sorted(optimization_queue, key=lambda x: x["optimization"], reverse=True)[:5], sorted(bound_queue, key=lambda x: x["duration"], reverse=True)[:5]] - pass - def make_record(self, result: OptimizeResult): """ make record for what and how to optimize -- Gitee From d7d4c7c2d66f8018a4dcf96ecd4a3c89b15a9d6c Mon Sep 17 00:00:00 2001 From: xubanxia <904691018@qq.com> Date: Thu, 6 Feb 2025 16:34:24 +0800 Subject: [PATCH 53/72] =?UTF-8?q?=E4=BB=A3=E7=A0=81=E4=BC=98=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ai_core_performance_checker.py | 29 ++++++++++--------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py index 5aecaaf03..b1d14ef58 100644 --- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py +++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py @@ -13,6 +13,7 @@ # limitations under the License. import logging import os +from functools import reduce from profiler.advisor.dataset.profiling.profiling_dataset import ProfilingDataset from profiler.advisor.result.item import OptimizeItem, OptimizeRecord @@ -80,20 +81,26 @@ class AICorePerformanceChecker: cube_memory_dict = {} vector_type_dict = {} - # filter cube operator and fa operator for op in operator_list: shapes = op.input_shapes[1:-1] + "-" + op.output_shapes[1:-1] + # preliminary filter cube operator if op.task_type == "AI_CORE" and "matmul" in op.op_type.lower(): cube_memory_dict.setdefault(op.op_name, {}).setdefault(shapes, 0) cube_memory_dict[op.op_name][shapes] += self.memory_size(op) - elif op.op_type == "FlashAttentionScore": + continue + + # preliminary filter vector operator + if op.task_type in ["AI_VECTOR_CORE", "MIX_AIV"]: + vector_type_dict.setdefault(op.op_type, set()).add(op) + continue + + # filter fa operator + if op.op_type == "FlashAttentionScore": self.fa_dict.setdefault(op.op_name, set()).add(shapes) self.fa_list.append(op) elif op.op_type == "FlashAttentionScoreGrad": self.fa_dict.setdefault(op.op_name, set()).add(shapes + "-grad") self.fa_list.append(op) - elif op.task_type in ["AI_VECTOR_CORE", "MIX_AIV"]: - vector_type_dict.setdefault(op.op_type, set()).add(op) # filter cube operator for op_name in cube_memory_dict: @@ -116,21 +123,15 @@ class AICorePerformanceChecker: def memory_size(operator): memory = 0 input_shapes = operator.input_shapes[1:-1].split(";") + output_shapes = operator.output_shapes[1:-1] for shapes in input_shapes: if not "," in shapes and shapes != "": # 多的一维是 bias ,预先乘2 memory += int(shapes) * 2 continue - start = 1 - for shape in shapes.split(","): - start *= int(shape) - memory += start - - output_shape = operator.output_shapes[1:-1].split(",") - start = 1 - for shapes in output_shape: - start *= int(shapes) - memory += int(start) + memory += reduce(lambda x, y: x*y, map(int, shapes.split(","))) + memory += reduce(lambda x, y: x * y, map(int, output_shapes.split(","))) + return memory * 2 / 1024 / 1024 def check_ai_core_performance(self, promoting_dataset: ProfilingDataset): -- Gitee From 4a6625cc7f8a4c132439d7efbf1d0af71bc066d1 Mon Sep 17 00:00:00 2001 From: xubanxia <904691018@qq.com> Date: Thu, 6 Feb 2025 16:35:08 +0800 Subject: [PATCH 54/72] =?UTF-8?q?=E4=BB=A3=E7=A0=81=E4=BC=98=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ai_core_performance/ai_core_performance_checker.py | 1 + 1 file changed, 1 insertion(+) diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py index b1d14ef58..a3622ebdf 100644 --- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py +++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py @@ -1,6 +1,7 @@ # Copyright (c) 2024, Huawei Technologies Co., Ltd. # All rights reserved. # +# Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # -- Gitee From 3f0b15adb831e6d468f738a318417796a71b1c73 Mon Sep 17 00:00:00 2001 From: xubanxia <11655130+xubanxia@user.noreply.gitee.com> Date: Tue, 11 Feb 2025 03:01:59 +0000 Subject: [PATCH 55/72] =?UTF-8?q?=E5=88=A0=E9=99=A4=E6=96=87=E4=BB=B6=20pr?= =?UTF-8?q?ofiler/cli/entrance.py?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- profiler/cli/entrance.py | 75 ---------------------------------------- 1 file changed, 75 deletions(-) delete mode 100644 profiler/cli/entrance.py diff --git a/profiler/cli/entrance.py b/profiler/cli/entrance.py deleted file mode 100644 index 89ac8187d..000000000 --- a/profiler/cli/entrance.py +++ /dev/null @@ -1,75 +0,0 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- -# Copyright (c) 2024, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import logging -import click - -from profiler.cli.analyze_cli import analyze_cli -from profiler.cli.complete_cli import auto_complete_cli -from profiler.cli.compare_cli import compare_cli -from profiler.cli.cluster_cli import cluster_cli -from profiler.advisor.version import print_version_callback, cli_version - -logger = logging.getLogger() -CONTEXT_SETTINGS = dict(help_option_names=['-H', '-h', '--help'], - max_content_width=160) - -COMMAND_PRIORITY = { - "advisor": 1, - "compare": 2, - "cluster": 3, - "auto-completion": 4 -} - - -class SpecialHelpOrder(click.Group): - - def __init__(self, *args, **kwargs): - super(SpecialHelpOrder, self).__init__(*args, **kwargs) - - def list_commands_for_help(self, ctx): - """ - reorder the list of commands when listing the help - """ - commands = super(SpecialHelpOrder, self).list_commands(ctx) - return [item[1] for item in sorted((COMMAND_PRIORITY.get(command, float('INF')), - command) for command in commands)] - - def get_help(self, ctx): - self.list_commands = self.list_commands_for_help - return super(SpecialHelpOrder, self).get_help(ctx) - - -@click.group(context_settings=CONTEXT_SETTINGS, cls=SpecialHelpOrder) -@click.option('--version', '-V', '-v', is_flag=True, - callback=print_version_callback, expose_value=False, - is_eager=True, help=cli_version()) -def msprof_analyze_cli(**kwargs): - pass - - -msprof_analyze_cli.add_command(analyze_cli, name="advisor") -msprof_analyze_cli.add_command(compare_cli, name="compare") -msprof_analyze_cli.add_command(cluster_cli, name="cluster") -msprof_analyze_cli.add_command(auto_complete_cli, name="auto-completion") - -if __name__ == "__main__": - msprof_analyze_cli.main( - [ - "analyze","all","-d", - r"D:\data\file","-l","cn" - ] - ) -- Gitee From 353723271a2e967a4d9e4495db343b9b4aef2cc0 Mon Sep 17 00:00:00 2001 From: xubanxia <11655130+xubanxia@user.noreply.gitee.com> Date: Tue, 11 Feb 2025 03:07:41 +0000 Subject: [PATCH 56/72] =?UTF-8?q?Revert=20"=E5=88=A0=E9=99=A4=E6=96=87?= =?UTF-8?q?=E4=BB=B6=20profiler/cli/entrance.py"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 3f0b15adb831e6d468f738a318417796a71b1c73. --- profiler/cli/entrance.py | 75 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 profiler/cli/entrance.py diff --git a/profiler/cli/entrance.py b/profiler/cli/entrance.py new file mode 100644 index 000000000..89ac8187d --- /dev/null +++ b/profiler/cli/entrance.py @@ -0,0 +1,75 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import logging +import click + +from profiler.cli.analyze_cli import analyze_cli +from profiler.cli.complete_cli import auto_complete_cli +from profiler.cli.compare_cli import compare_cli +from profiler.cli.cluster_cli import cluster_cli +from profiler.advisor.version import print_version_callback, cli_version + +logger = logging.getLogger() +CONTEXT_SETTINGS = dict(help_option_names=['-H', '-h', '--help'], + max_content_width=160) + +COMMAND_PRIORITY = { + "advisor": 1, + "compare": 2, + "cluster": 3, + "auto-completion": 4 +} + + +class SpecialHelpOrder(click.Group): + + def __init__(self, *args, **kwargs): + super(SpecialHelpOrder, self).__init__(*args, **kwargs) + + def list_commands_for_help(self, ctx): + """ + reorder the list of commands when listing the help + """ + commands = super(SpecialHelpOrder, self).list_commands(ctx) + return [item[1] for item in sorted((COMMAND_PRIORITY.get(command, float('INF')), + command) for command in commands)] + + def get_help(self, ctx): + self.list_commands = self.list_commands_for_help + return super(SpecialHelpOrder, self).get_help(ctx) + + +@click.group(context_settings=CONTEXT_SETTINGS, cls=SpecialHelpOrder) +@click.option('--version', '-V', '-v', is_flag=True, + callback=print_version_callback, expose_value=False, + is_eager=True, help=cli_version()) +def msprof_analyze_cli(**kwargs): + pass + + +msprof_analyze_cli.add_command(analyze_cli, name="advisor") +msprof_analyze_cli.add_command(compare_cli, name="compare") +msprof_analyze_cli.add_command(cluster_cli, name="cluster") +msprof_analyze_cli.add_command(auto_complete_cli, name="auto-completion") + +if __name__ == "__main__": + msprof_analyze_cli.main( + [ + "analyze","all","-d", + r"D:\data\file","-l","cn" + ] + ) -- Gitee From 3b444579f0280889b0d77c7a3e2e018401115e04 Mon Sep 17 00:00:00 2001 From: xubanxia <11655130+xubanxia@user.noreply.gitee.com> Date: Tue, 11 Feb 2025 03:08:19 +0000 Subject: [PATCH 57/72] update profiler/cli/entrance.py. Signed-off-by: xubanxia <11655130+xubanxia@user.noreply.gitee.com> --- profiler/cli/entrance.py | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/profiler/cli/entrance.py b/profiler/cli/entrance.py index 89ac8187d..503cf9ea6 100644 --- a/profiler/cli/entrance.py +++ b/profiler/cli/entrance.py @@ -64,12 +64,4 @@ def msprof_analyze_cli(**kwargs): msprof_analyze_cli.add_command(analyze_cli, name="advisor") msprof_analyze_cli.add_command(compare_cli, name="compare") msprof_analyze_cli.add_command(cluster_cli, name="cluster") -msprof_analyze_cli.add_command(auto_complete_cli, name="auto-completion") - -if __name__ == "__main__": - msprof_analyze_cli.main( - [ - "analyze","all","-d", - r"D:\data\file","-l","cn" - ] - ) +msprof_analyze_cli.add_command(auto_complete_cli, name="auto-completion") \ No newline at end of file -- Gitee From 7308fe6d140b9b0fa099381aeb5265c6712d1e62 Mon Sep 17 00:00:00 2001 From: xubanxia <11655130+xubanxia@user.noreply.gitee.com> Date: Tue, 11 Feb 2025 03:11:03 +0000 Subject: [PATCH 58/72] update profiler/test/ut/advisor/compute_advice/data/kernel_details.csv. Signed-off-by: xubanxia <11655130+xubanxia@user.noreply.gitee.com> --- .../compute_advice/data/kernel_details.csv | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/profiler/test/ut/advisor/compute_advice/data/kernel_details.csv b/profiler/test/ut/advisor/compute_advice/data/kernel_details.csv index e69de29bb..020178358 100644 --- a/profiler/test/ut/advisor/compute_advice/data/kernel_details.csv +++ b/profiler/test/ut/advisor/compute_advice/data/kernel_details.csv @@ -0,0 +1,30 @@ +Step Id,Model ID,Task ID,Stream ID,Name,Type,OP State,Accelerator Core,Start Time(us),Duration(us),Wait Time(us),Block Dim,Mix Block Dim,HF32 Eligible,Input Shapes,Input Data Types,Input Formats,Output Shapes,Output Data Types,Output Formats,Context ID,aicore_time(us),aic_total_cycles,aic_mac_time(us),aic_mac_ratio,aic_scalar_time(us),aic_scalar_ratio,aic_mte1_time(us),aic_mte1_ratio,aic_mte2_time(us),aic_mte2_ratio,aic_fixpipe_time(us),aic_fixpipe_ratio,aic_icache_miss_rate,aiv_time(us),aiv_total_cycles,aiv_vec_time(us),aiv_vec_ratio,aiv_scalar_time(us),aiv_scalar_ratio,aiv_mte2_time(us),aiv_mte2_ratio,aiv_mte3_time(us),aiv_mte3_ratio,aiv_icache_miss_rate,cube_utilization(%) +19,4294967295,61653,2,aclnnMatmul_MatMulCommon_MatMulV2,MatMulV2,dynamic,AI_CORE,"1736413971558972.912 ",185.504,1.087,16,0,NO,"""81920,4096;8192,512""",DT_BF16;DT_BF16,ND;ND,"""4096,512""",DT_BF16,ND,N/A,183.87,5295467,151.425,0.824,88.03,0.479,119.148,0.648,177.314,0.964,5.736,0.031,0.001,0,0,0,0,0,0,0,0,0,0,0,79.295 +19,4294967295,61669,2,aclnnMatmul_MatMulV3Common_MatMulV3,MatMulV3,dynamic,AI_CORE,"1736413971560588.764 ",501.17,2.2,20,0,NO,"""81920,1536;8192,4096""",DT_BF16;DT_BF16,ND;ND,"""1536,4096""",DT_BF16,ND,N/A,478.701,17233251,356.349,0.744,118.087,0.247,296.009,0.618,452.112,0.944,35.833,0.075,0.001,0,0,0,0,0,0,0,0,0,0,0,95.517 +19,4294967295,61694,2,aclnnMatmul_MatMulCommon_MatMulV2,MatMulV2,dynamic,AI_CORE,"1736413971565213.257 ",186.823,1.178,16,0,NO,"""81920,4096;8192,512""",DT_BF16;DT_BF16,ND;ND,"""4096,512""",DT_BF16,ND,N/A,183.728,5291376,151.502,0.825,87.902,0.478,118.519,0.645,177.654,0.967,5.773,0.031,0.001,0,0,0,0,0,0,0,0,0,0,0,78.675 +19,4294967295,61710,2,aclnnMatmul_MatMulV3Common_MatMulV3,MatMulV3,dynamic,AI_CORE,"1736413971566843.489 ",516.991,2.33,20,0,NO,"""81920,1536;8192,4096""",DT_BF16;DT_BF16,ND;ND,"""1536,4096""",DT_BF16,ND,N/A,491.775,17703905,356.249,0.724,118.59,0.241,295.046,0.6,463.696,0.943,37.671,0.077,0.001,0,0,0,0,0,0,0,0,0,0,0,95.123 +19,4294967295,61735,2,aclnnMatmul_MatMulCommon_MatMulV2,MatMulV2,dynamic,AI_CORE,"1736413971571596.404 ",187.724,0.766,16,0,NO,"""81920,4096;8192,512""",DT_BF16;DT_BF16,ND;ND,"""4096,512""",DT_BF16,ND,N/A,184.904,5325221,151.489,0.819,87.893,0.475,118.63,0.642,178.815,0.967,5.77,0.031,0.001,0,0,0,0,0,0,0,0,0,0,0,78.798 +19,4294967295,61751,2,aclnnMatmul_MatMulV3Common_MatMulV3,MatMulV3,dynamic,AI_CORE,"1736413971573223.437 ",514.87,2.15,20,0,NO,"""81920,1536;8192,4096""",DT_BF16;DT_BF16,ND;ND,"""1536,4096""",DT_BF16,ND,N/A,486.931,17529512,356.117,0.731,118.847,0.244,295.529,0.607,457.002,0.939,37.938,0.078,0.001,0,0,0,0,0,0,0,0,0,0,0,94.574 +19,4294967295,61776,2,aclnnMatmul_MatMulCommon_MatMulV2,MatMulV2,dynamic,AI_CORE,"1736413971577931.851 ",190.544,1.367,16,0,NO,"""81920,4096;8192,512""",DT_BF16;DT_BF16,ND;ND,"""4096,512""",DT_BF16,ND,N/A,187.073,5387702,151.741,0.811,87.935,0.47,117.467,0.628,181.043,0.968,5.803,0.031,0.001,0,0,0,0,0,0,0,0,0,0,0,78.543 +19,4294967295,61792,2,aclnnMatmul_MatMulV3Common_MatMulV3,MatMulV3,dynamic,AI_CORE,"1736413971579566.403 ",504.071,2.28,20,0,NO,"""81920,1536;8192,4096""",DT_BF16;DT_BF16,ND;ND,"""1536,4096""",DT_BF16,ND,N/A,485.542,17479517,356.283,0.734,117.755,0.243,296.421,0.61,455.064,0.937,37.75,0.078,0.001,0,0,0,0,0,0,0,0,0,0,0,96.324 +19,4294967295,13792,2,aclnnMatmul_MatMulV3Common_MatMulV5,MatMulV3,dynamic,AI_CORE,"1736413974248200.543 ",521.31,2.22,20,0,NO,"""8192,15365;8192,4096""",DT_BF16;DT_BF16,ND;ND,"""1536,4096""",DT_BF16,ND,N/A,499.234,17972434,356.364,0.714,117.639,0.236,295.58,0.592,471.784,0.945,35.825,0.072,0.001,0,0,0,0,0,0,0,0,0,0,0,95.765 +19,4294967295,13792,2,aclnnMatmul_MatMulV3Common_MatMulV5,MatMulV3,dynamic,AI_CORE,"1736413974248200.543 ",521.31,2.22,20,0,NO,"""8192,15365;8192,4096""",DT_BF16;DT_BF16,ND;ND,"""1536,4096""",DT_BF16,ND,N/A,499.234,17972434,356.364,0.714,117.639,0.236,295.58,0.592,471.784,0.945,35.825,0.072,0.001,0,0,0,0,0,0,0,0,0,0,0,95.765 +19,4294967295,13792,2,aclnnMatmul_MatMulV3Common_MatMulV5,MatMulV3,dynamic,AI_CORE,"1736413974248200.543 ",521.31,2.22,20,0,NO,"""8192,15365;8192,4096""",DT_BF16;DT_BF16,ND;ND,"""1536,4096""",DT_BF16,ND,N/A,499.234,17972434,356.364,0.714,117.639,0.236,295.58,0.592,471.784,0.945,35.825,0.072,0.001,0,0,0,0,0,0,0,0,0,0,0,95.765 +19,4294967295,13792,2,aclnnMatmul_MatMulV3Common_MatMulV5,MatMulV3,dynamic,AI_CORE,"1736413974248200.543 ",521.31,2.22,20,0,NO,"""8192,15365;8192,4096""",DT_BF16;DT_BF16,ND;ND,"""1536,4096""",DT_BF16,ND,N/A,499.234,17972434,356.364,0.714,117.639,0.236,295.58,0.592,471.784,0.945,35.825,0.072,0.001,0,0,0,0,0,0,0,0,0,0,0,95.765 +19,4294967295,60679,2,aclnnFlashAttentionScore_FlashAttentionScore_FlashAttentionScore,FlashAttentionScore,dynamic,MIX_AIC,"1736413971411629.128 ",410.188,1.53,20,40,NO,"""4096,2,512;4096,2,512;4096,2,512;;;;4096,4096;;;;;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16;UINT8;DT_BF16;BOOL;INT64;INT64;INT64;INT64;INT64,NCL;NCL;NCL;ND;ND;ND;ND;ND;ND;ND;ND;ND,"""2,4,4096,8;2,4,4096,8;;4096,2,512""",FLOAT;FLOAT;DT_BF16;DT_BF16,ND;ND;ND;ND,0,366.147,13181275,129.055,0.352,352.275,0.962,108.364,0.296,172.86,0.872,216.141,0.59,0.003,365.782,26336326,228.687,0.625,137.979,0.377,118.603,0.324,71.448,0.195,0.013,89.263 +19,4294967295,60707,2,aclnnFlashAttentionScore_FlashAttentionScore_FlashAttentionScore,FlashAttentionScore,dynamic,MIX_AIC,"1736413971415611.468 ",406.128,1.279,20,40,NO,"""4096,2,512;4096,2,512;4096,2,512;;;;4096,4096;;;;;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16;UINT8;DT_BF16;BOOL;INT64;INT64;INT64;INT64;INT64,NCL;NCL;NCL;ND;ND;ND;ND;ND;ND;ND;ND;ND,"""2,4,4096,8;2,4,4096,8;;4096,2,512""",FLOAT;FLOAT;DT_BF16;DT_BF16,ND;ND;ND;ND,0,358.77,12915719,128.96,0.359,345.096,0.962,108.337,0.302,168.284,0.869,209.057,0.583,0.003,358.308,25798146,228.693,0.638,137.809,0.385,108.679,0.303,70.099,0.196,0.013,88.339 +19,4294967295,60735,2,aclnnFlashAttentionScore_FlashAttentionScore_FlashAttentionScore,FlashAttentionScore,dynamic,MIX_AIC,"1736413971420248.800 ",407.008,0.84,20,40,NO,"""4096,2,512;4096,2,512;4096,2,512;;;;4096,4096;;;;;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16;UINT8;DT_BF16;BOOL;INT64;INT64;INT64;INT64;INT64,NCL;NCL;NCL;ND;ND;ND;ND;ND;ND;ND;ND;ND,"""2,4,4096,8;2,4,4096,8;;4096,2,512""",FLOAT;FLOAT;DT_BF16;DT_BF16,ND;ND;ND;ND,0,359.702,12949284,128.975,0.359,346.306,0.963,108.43,0.301,166.899,0.864,209.018,0.581,0.003,359.274,25867705,228.693,0.637,138.438,0.385,107.723,0.3,70.146,0.195,0.013,88.377 +19,4294967295,60763,2,aclnnFlashAttentionScore_FlashAttentionScore_FlashAttentionScore,FlashAttentionScore,dynamic,MIX_AIC,"1736413971424592.447 ",405.228,1.35,20,40,NO,"""4096,2,512;4096,2,512;4096,2,512;;;;4096,4096;;;;;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16;UINT8;DT_BF16;BOOL;INT64;INT64;INT64;INT64;INT64,NCL;NCL;NCL;ND;ND;ND;ND;ND;ND;ND;ND;ND,"""2,4,4096,8;2,4,4096,8;;4096,2,512""",FLOAT;FLOAT;DT_BF16;DT_BF16,ND;ND;ND;ND,0,359.793,12952532,128.923,0.358,345.768,0.961,108.411,0.301,167.379,0.865,208.79,0.58,0.003,359.294,25869164,228.691,0.637,138.411,0.385,107.868,0.3,70.163,0.195,0.013,88.788 +19,4294967295,61655,2,aclnnFlashAttentionScoreGrad_FlashAttentionScoreGrad_FlashAttentionScoreGrad,FlashAttentionScoreGrad,dynamic,MIX_AIC,"1736413971559180.676 ",762.215,1.37,20,40,NO,"""4096,2,512;4096,2,512;4096,2,512;4096,2,512;4096,4096;2,4,4096,8;2,4,4096,8;;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16;BOOL;FLOAT;FLOAT;DT_BF16;DT_BF16;INT64,NCL;NCL;NCL;NCL;ND;NCHW;NCHW;ND;NCL;ND,"""4096,2,512;4096,2,512;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16,ND;ND;ND;ND,0,755.664,27203907,344.023,0.455,592.472,0.784,266.388,0.353,397.091,0.525,589.726,0.78,0.004,755.04,54362915,318.452,0.422,184.623,0.245,206.78,0.274,152.973,0.203,0.006,99.141 +19,4294967295,61696,2,aclnnFlashAttentionScoreGrad_FlashAttentionScoreGrad_FlashAttentionScoreGrad,FlashAttentionScoreGrad,dynamic,MIX_AIC,"1736413971565420.821 ",763.215,1.189,20,40,NO,"""4096,2,512;4096,2,512;4096,2,512;4096,2,512;4096,4096;2,4,4096,8;2,4,4096,8;;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16;BOOL;FLOAT;FLOAT;DT_BF16;DT_BF16;INT64,NCL;NCL;NCL;NCL;ND;NCHW;NCHW;ND;NCL;ND,"""4096,2,512;4096,2,512;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16,ND;ND;ND;ND,0,757.83,27281885,344.047,0.454,595.954,0.786,266.123,0.351,389.105,0.513,576.226,0.76,0.004,757.046,54507345,318.443,0.421,188.292,0.249,200.176,0.264,162.113,0.214,0.006,99.294 +19,4294967295,61737,2,aclnnFlashAttentionScoreGrad_FlashAttentionScoreGrad_FlashAttentionScoreGrad,FlashAttentionScoreGrad,dynamic,MIX_AIC,"1736413971571804.228 ",757.095,0.88,20,40,NO,"""4096,2,512;4096,2,512;4096,2,512;4096,2,512;4096,4096;2,4,4096,8;2,4,4096,8;;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16;BOOL;FLOAT;FLOAT;DT_BF16;DT_BF16;INT64,NCL;NCL;NCL;NCL;ND;NCHW;NCHW;ND;NCL;ND,"""4096,2,512;4096,2,512;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16,ND;ND;ND;ND,0,750.605,27021778,343.983,0.458,586.708,0.782,266.304,0.355,392.522,0.523,584.432,0.779,0.004,749.913,53993736,318.436,0.425,188.508,0.251,207.668,0.277,152.634,0.204,0.006,99.143 +19,4294967295,61778,2,aclnnFlashAttentionScoreGrad_FlashAttentionScoreGrad_FlashAttentionScoreGrad,FlashAttentionScoreGrad,dynamic,MIX_AIC,"1736413971578144.095 ",755.915,1.22,20,40,NO,"""4096,2,512;4096,2,512;4096,2,512;4096,2,512;4096,4096;2,4,4096,8;2,4,4096,8;;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16;BOOL;FLOAT;FLOAT;DT_BF16;DT_BF16;INT64,NCL;NCL;NCL;NCL;ND;NCHW;NCHW;ND;NCL;ND,"""4096,2,512;4096,2,512;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16,ND;ND;ND;ND,0,750.152,27005467,344.115,0.459,579.317,0.772,266.08,0.355,398.019,0.531,587.37,0.783,0.004,749.348,53953058,318.444,0.425,186.908,0.249,207.068,0.276,151.329,0.202,0.006,99.238 +19,4294967295,60763,2,aclnnFlashAttentionScore_FlashAttentionScore_FlashAttentionScore_varlen,FlashAttentionScore,dynamic,MIX_AIC,"1736413971424592.447 ",405.228,1.35,20,40,NO,"""4096,2,511;4096,2,512;4096,2,512;;;;4096,4096;;;;;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16;UINT8;DT_BF16;BOOL;INT64;INT64;INT64;INT64;INT64,NCL;NCL;NCL;ND;ND;ND;ND;ND;ND;ND;ND;ND,"""2,3,4096,8;2,4,4096,8;;4096,2,512""",FLOAT;FLOAT;DT_BF16;DT_BF16,ND;ND;ND;ND,0,359.793,12952532,128.923,0.358,345.768,0.961,108.411,0.301,167.379,0.465,208.79,0.58,0.003,359.294,25869164,228.691,0.637,138.411,0.385,107.868,0.3,70.163,0.195,0.013,88.788 +19,4294967295,60683,2,aclnnAdd_AddAiCore_Add,Add,dynamic,AI_VECTOR_CORE,"1736413971412768.871 ",26.78,0.485,40,0,NO,"""512,2,4096;512,2,4096""",DT_BF16;DT_BF16,NCL;NCL,"""512,2,4096""",DT_BF16,ND,N/A,0,0,0,0,0,0,0,0,0,0,0,0,0,24.19,1741674,5.986,0.247,1.352,0.056,20.363,0.842,3.195,0.132,0.027,0 +19,4294967295,60690,2,aclnnAdd_AddAiCore_Add,Add,dynamic,AI_VECTOR_CORE,"1736413971414677.549 ",31.201,0.664,40,0,NO,"""512,2,4096;512,2,4096""",DT_BF16;DT_BF16,NCL;NCL,"""512,2,4096""",DT_BF16,ND,N/A,0,0,0,0,0,0,0,0,0,0,0,0,0,28.617,2060443,5.986,0.209,1.444,0.05,25.005,0.874,3.336,0.117,0.026,0 +19,4294967295,60711,2,aclnnAdd_AddAiCore_Add,Add,dynamic,AI_VECTOR_CORE,"1736413971416743.250 ",27.021,1.246,40,0,NO,"""512,2,4096;512,2,4096""",DT_BF16;DT_BF16,NCL;NCL,"""512,2,4096""",DT_BF16,ND,N/A,0,0,0,0,0,0,0,0,0,0,0,0,0,24.304,1749862,5.986,0.246,1.258,0.052,20.424,0.84,3.23,0.133,0.027,0 +19,4294967295,60718,2,aclnnAdd_AddAiCore_Add,Add,dynamic,AI_VECTOR_CORE,"1736413971419318.962 ",25.08,0.984,40,0,NO,"""512,2,4096;512,2,4096""",DT_BF16;DT_BF16,NCL;NCL,"""512,2,4096""",DT_BF16,ND,N/A,0,0,0,0,0,0,0,0,0,0,0,0,0,22.47,1617840,5.989,0.267,2.009,0.089,18.809,0.837,3.191,0.142,0.024,0 +19,4294967295,13907,2,aclnnAdd_AddAiCore_Add,Add,dynamic,AI_VECTOR_CORE,"1736413974268377.206 ",1.38,31.48,1,0,NO,""";""",FLOAT;FLOAT,ND;ND,"""""",FLOAT,ND,N/A,0,0,0,0,0,0,0,0,0,0,0,0,0,0.883,1589,0.027,0.03,0.265,0.3,0.18,0.204,0.108,0.123,0.182,0 +19,4294967295,13910,2,aclnnAdd_AddAiCore_Add,Add,dynamic,AI_VECTOR_CORE,"1736413974268502.128 ",1.46,17.48,1,0,NO,""";""",FLOAT;FLOAT,ND;ND,"""""",FLOAT,ND,N/A,0,0,0,0,0,0,0,0,0,0,0,0,0,0.948,1706,0.027,0.028,0.276,0.291,0.217,0.229,0.127,0.134,0.174,0 +19,4294967295,13913,2,aclnnAdd_AddAiCore_Add,Add,dynamic,AI_VECTOR_CORE,"1736413974268605.410 ",1.5,0.09,1,0,NO,""";""",FLOAT;FLOAT,ND;ND,"""""",FLOAT,ND,N/A,0,0,0,0,0,0,0,0,0,0,0,0,0,0.96,1728,0.027,0.028,0.268,0.28,0.221,0.23,0.132,0.137,0.145,0 +19,4294967295,13916,2,aclnnAdd_AddAiCore_Add,Add,dynamic,AI_VECTOR_CORE,"1736413974268747.953 ",1.58,28.28,1,0,NO,""";""",FLOAT;FLOAT,ND;ND,"""""",FLOAT,ND,N/A,0,0,0,0,0,0,0,0,0,0,0,0,0,1.107,1993,0.027,0.024,0.426,0.384,0.201,0.181,0.118,0.106,0.162,0 \ No newline at end of file -- Gitee From 3fd8aae7004492e4215e3a78d4fa12570dd193f4 Mon Sep 17 00:00:00 2001 From: xubanxia <904691018@qq.com> Date: Wed, 12 Feb 2025 09:13:14 +0800 Subject: [PATCH 59/72] =?UTF-8?q?Cube=E4=B8=8Efa=E7=AE=97=E5=AD=90?= =?UTF-8?q?=E5=86=85=E8=BD=B4=E5=88=A4=E6=96=AD=E6=96=B9=E6=B3=95=E6=8A=BD?= =?UTF-8?q?=E5=8F=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ai_core_performance_checker.py | 123 ++++++++++-------- 1 file changed, 66 insertions(+), 57 deletions(-) diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py index a3622ebdf..598c3690f 100644 --- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py +++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py @@ -31,6 +31,8 @@ class AICorePerformanceChecker: """ _CHECKER = "AICorePerformanceChecker" CUBE_OPERATOR_MEMORY_SIZE_MB = 100 + INNER_AXIS_256 = 256 + INNER_AXIS_128 = 128 def __init__(self): @@ -130,7 +132,7 @@ class AICorePerformanceChecker: # 多的一维是 bias ,预先乘2 memory += int(shapes) * 2 continue - memory += reduce(lambda x, y: x*y, map(int, shapes.split(","))) + memory += reduce(lambda x, y: x * y, map(int, shapes.split(","))) memory += reduce(lambda x, y: x * y, map(int, output_shapes.split(","))) return memory * 2 / 1024 / 1024 @@ -173,22 +175,7 @@ class AICorePerformanceChecker: for shape in cube_dict[op]: dtype = None shape_duration = 0. - # 判断输入shape内轴是否为256的倍数 - if (len(shape.split("-")[0].split(";")[0].split(","))) == 4: - # NZ格式 - shapes = shape.split("-")[0].split(";") - b = int(shapes[0].split(",")[1]) - c = int(shapes[0].split(",")[2]) - - f = int(shapes[1].split(",")[1]) - g = int(shapes[1].split(",")[2]) - affinity_flag = (b * c % 256 == 0) and (f * g % 256 == 0) - else: - # ND格式 - shapes = shape.split("-")[0].split(";") - l = int(shapes[0].split(",")[1]) - k = int(shapes[1].split(",")[1]) - affinity_flag = (l % 256 == 0) and (k % 256 == 0) + affinity_flag = self.check_affinity(shape) if not affinity_flag: for operator in operator_list: if (operator.op_name == op and @@ -247,6 +234,24 @@ class AICorePerformanceChecker: sorted(bound_queue, key=lambda x: x["duration"], reverse=True)[:5], sorted(affinity_queue, key=lambda x: x["duration"], reverse=True)[:5]] + def _check_cube_inner_axis(self, shape): + # 判断输入shape内轴是否为256的倍数 + if (len(shape.split("-")[0].split(";")[0].split(","))) == 4: + # NZ格式 + shapes = shape.split("-")[0].split(";") + b = int(shapes[0].split(",")[1]) + c = int(shapes[0].split(",")[2]) + + f = int(shapes[1].split(",")[1]) + g = int(shapes[1].split(",")[2]) + return (b * c % self.INNER_AXIS_256 == 0) and (f * g % self.INNER_AXIS_256 == 0) + else: + # ND格式 + shapes = shape.split("-")[0].split(";") + l = int(shapes[0].split(",")[1]) + k = int(shapes[1].split(",")[1]) + return (l % self.INNER_AXIS_256 == 0) and (k % self.INNER_AXIS_256 == 0) + def check_fa_operator(self, profiling_dataset: ProfilingDataset): fa_list = self.fa_list fa_dict = self.fa_dict @@ -256,46 +261,7 @@ class AICorePerformanceChecker: # 不亲和算子筛选 for op in fa_dict: for shape in fa_dict[op]: - affinity_flag = False - shape_duration = 0. - dtype = None - suggestion = "" - if "varlen" in op.lower(): - # 处理变长算子 如果不亲和则affinity_flag为False - if int(shape.split("-")[0].split(";")[0].split(",")[2]) % 128 != 0: - affinity_flag = True - suggestion = self._FA_AFFINITY_DESC_TYPE1 - for operator in fa_list: - if (operator.op_name == op and - operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape): - shape_duration += float(operator.task_duration) - dtype = operator.input_data_types - else: - # 处理定长算子 如果不亲和则affinity_flag为False - head_dim = 0 - seq_len = int(shape.split("-")[1].split(";")[0].split(",")[2]) - input_first_tensor = shape.split("-")[0].split(";")[0].split(",") - if len(input_first_tensor) == 3: - head_dim = int(input_first_tensor[2]) / int(shape.split("-")[1].split(";")[0].split(",")[1]) - else: - head_dim = int(input_first_tensor[3]) - if head_dim % 128 != 0 and seq_len % 128 != 0: - affinity_flag = True - suggestion = self._FA_AFFINITY_DESC_TYPE3 - elif head_dim % 128 != 0: - affinity_flag = True - suggestion = self._FA_AFFINITY_DESC_TYPE1 - elif seq_len % 128 != 0: - affinity_flag = True - suggestion = self._FA_AFFINITY_DESC_TYPE2 - if affinity_flag: - for operator in fa_list: - if (operator.op_name == op and - operator.input_shapes[1:-1] + "-" + - operator.output_shapes[1:-1] == shape): - shape_duration += float(operator.task_duration) - dtype = operator.input_data_types - + affinity_flag, dtype, shape_duration, suggestion = self.method_name(fa_list, op, shape) if affinity_flag: # 不亲和算子 计算耗时,加入affinity_queue affinity_queue.append({ @@ -375,6 +341,49 @@ class AICorePerformanceChecker: sorted(bound_queue, key=lambda x: x["duration"], reverse=True)[:5], sorted(affinity_queue, key=lambda x: x["duration"], reverse=True)[:5]] + def _check_fa_inner_axis(self, fa_list, op, shape): + shape_duration = 0. + affinity_flag = False + dtype = None + suggestion = "" + if "varlen" in op.lower(): + # 处理变长算子 如果不亲和则affinity_flag为False + inner_axis = int(shape.split("-")[0].split(";")[0].split(",")[2]) + if inner_axis % self.INNER_AXIS_128 != 0: + affinity_flag = True + suggestion = self._FA_AFFINITY_DESC_TYPE1 + for operator in fa_list: + if (operator.op_name == op and + operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape): + shape_duration += float(operator.task_duration) + dtype = operator.input_data_types + else: + # 处理定长算子 如果不亲和则affinity_flag为False + head_dim = 0 + seq_len = int(shape.split("-")[1].split(";")[0].split(",")[2]) + input_first_tensor = shape.split("-")[0].split(";")[0].split(",") + if len(input_first_tensor) == 3: + head_dim = int(input_first_tensor[2]) / int(shape.split("-")[1].split(";")[0].split(",")[1]) + else: + head_dim = int(input_first_tensor[3]) + if head_dim % self.INNER_AXIS_128 != 0 and seq_len % self.INNER_AXIS_128 != 0: + affinity_flag = True + suggestion = self._FA_AFFINITY_DESC_TYPE3 + elif head_dim % self.INNER_AXIS_128 != 0: + affinity_flag = True + suggestion = self._FA_AFFINITY_DESC_TYPE1 + elif seq_len % self.INNER_AXIS_128 != 0: + affinity_flag = True + suggestion = self._FA_AFFINITY_DESC_TYPE2 + if affinity_flag: + for operator in fa_list: + if (operator.op_name == op and + operator.input_shapes[1:-1] + "-" + + operator.output_shapes[1:-1] == shape): + shape_duration += float(operator.task_duration) + dtype = operator.input_data_types + return affinity_flag, dtype, shape_duration, suggestion + def check_vector_operator(self, profiling_dataset: ProfilingDataset): vector_dict = self.vector_dict vector_list = [] -- Gitee From 1902efa711e28f6a24c2600253f90fd841e33aea Mon Sep 17 00:00:00 2001 From: xubanxia <904691018@qq.com> Date: Wed, 12 Feb 2025 09:24:18 +0800 Subject: [PATCH 60/72] =?UTF-8?q?Cube=E4=B8=8Evector=E7=AE=97=E5=AD=90?= =?UTF-8?q?=E6=89=A9=E5=B1=95=E5=B9=B6=E6=8F=90=E5=8F=96=E6=94=B6=E9=9B=86?= =?UTF-8?q?=E7=AE=97=E5=AD=90=E5=88=97=E8=A1=A8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ai_core_performance_checker.py | 36 ++++++++++++------- 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py index 598c3690f..baaa5e949 100644 --- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py +++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py @@ -164,13 +164,11 @@ class AICorePerformanceChecker: def check_cube_operator(self, profiling_dataset: ProfilingDataset): cube_dict = self.cube_dict + suggestion = self._CUBE_AFFINITY_DESC optimization_queue = [] bound_queue = [] affinity_queue = [] - operator_list = [op for op in profiling_dataset.op_summary.op_list - if op.op_name in cube_dict - and op.input_shapes[1:-1] + "-" + op.output_shapes[1:-1] in cube_dict[op.op_name]] - suggestion = self._CUBE_AFFINITY_DESC + operator_list = self._get_operator_list(cube_dict, profiling_dataset) for op in cube_dict: for shape in cube_dict[op]: dtype = None @@ -234,6 +232,16 @@ class AICorePerformanceChecker: sorted(bound_queue, key=lambda x: x["duration"], reverse=True)[:5], sorted(affinity_queue, key=lambda x: x["duration"], reverse=True)[:5]] + @staticmethod + def _get_operator_list(cube_dict, profiling_dataset): + operator_list = [] + for op in profiling_dataset.op_summary.op_list: + if op.op_name in cube_dict: + key = op.input_shapes[1:-1] + "-" + op.output_shapes[1:-1] + if key in cube_dict[op.op_name]: + operator_list.append(op) + return operator_list + def _check_cube_inner_axis(self, shape): # 判断输入shape内轴是否为256的倍数 if (len(shape.split("-")[0].split(";")[0].split(","))) == 4: @@ -386,16 +394,9 @@ class AICorePerformanceChecker: def check_vector_operator(self, profiling_dataset: ProfilingDataset): vector_dict = self.vector_dict - vector_list = [] optimization_queue = [] bound_queue = [] - vector_list.extend( - operator for op_name in vector_dict - for shape in vector_dict[op_name] - for operator in profiling_dataset.op_summary.op_list - if operator.op_name == op_name - and operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape - ) + vector_list = self._get_vector_list(profiling_dataset, vector_dict) for op_name in vector_dict: for shape in vector_dict[op_name]: aiv_vec_ratio, aiv_mte2_ratio, aiv_mte3_ratio, shape_duration, optimization = 0., 0., 0., 0., 0. @@ -444,6 +445,17 @@ class AICorePerformanceChecker: return [sorted(optimization_queue, key=lambda x: x["optimization"], reverse=True)[:5], sorted(bound_queue, key=lambda x: x["duration"], reverse=True)[:5]] + @staticmethod + def _get_vector_list(profiling_dataset, vector_dict): + vector_list = [] + for op_name in vector_dict: + for shape in vector_dict[op_name]: + for operator in profiling_dataset.op_summary.op_list: + if operator.op_name == op_name and operator.input_shapes[1:-1] + "-" + operator.output_shapes[ + 1:-1] == shape: + vector_list.extend([operator]) + return vector_list + def make_record(self, result: OptimizeResult): """ make record for what and how to optimize -- Gitee From cec50141aaa01bc64497f6ec0cf3a9ee58c5475c Mon Sep 17 00:00:00 2001 From: xubanxia <904691018@qq.com> Date: Wed, 12 Feb 2025 09:41:30 +0800 Subject: [PATCH 61/72] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E9=99=A4=E9=9B=B6?= =?UTF-8?q?=E6=A0=A1=E9=AA=8C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ai_core_performance_checker.py | 30 +++++++++++-------- 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py index baaa5e949..dcd87f1c4 100644 --- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py +++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py @@ -192,8 +192,7 @@ class AICorePerformanceChecker: operator.input_shapes[1:-1] + "-" + operator.output_shapes[1:-1] == shape] shape_duration = sum(float(operator.task_duration) for operator in shap_list) dtype = shap_list[0].input_data_types if shap_list else None - aic_mac_ratio, aic_mte2_ratio = 0., 0. - length = 0 + aic_mac_ratio, aic_mte2_ratio, length = 0., 0., 0 for operator in shap_list: try: aic_mac_ratio += float(operator.aic_mac_ratio) @@ -201,10 +200,10 @@ class AICorePerformanceChecker: length += 1 except ValueError: continue - if length == 0: + aic_mac_ratio = self.safe_divide(aic_mac_ratio, length) + aic_mte2_ratio = self.safe_divide(aic_mte2_ratio, length) + if aic_mac_ratio is None or aic_mte2_ratio is None: continue - aic_mac_ratio = aic_mac_ratio / length - aic_mte2_ratio = aic_mte2_ratio / length bound = "" optimization = 0. if aic_mac_ratio >= 0.8 and aic_mte2_ratio >= 0.95: @@ -296,10 +295,10 @@ class AICorePerformanceChecker: length += 1 except ValueError: continue - if length == 0: + aic_fixpipe_ratio = self.safe_divide(aic_fixpipe_ratio, length) + aic_mte2_ratio = self.safe_divide(aic_mte2_ratio, length) + if aic_mte2_ratio is None or aic_fixpipe_ratio is None: continue - aic_fixpipe_ratio = aic_fixpipe_ratio / length - aic_mte2_ratio = aic_mte2_ratio / length if aic_mte2_ratio >= 0.8 and aic_fixpipe_ratio >= 0.75: bound = "mte2_and_fixpipe_bound" elif aic_mte2_ratio >= 0.8: @@ -414,11 +413,11 @@ class AICorePerformanceChecker: length += 1 except ValueError: continue - if length == 0: + aiv_vec_ratio = self.safe_divide(aiv_vec_ratio,length) + aiv_mte2_ratio = self.safe_divide(aiv_mte2_ratio,length) + aiv_mte3_ratio = self.safe_divide(aiv_mte3_ratio,length) + if aiv_vec_ratio is None or aiv_mte2_ratio is None or aiv_mte3_ratio is None: continue - aiv_vec_ratio = aiv_vec_ratio / length - aiv_mte2_ratio = aiv_mte2_ratio / length - aiv_mte2_ratio = aiv_mte2_ratio / length if aiv_vec_ratio + aiv_mte2_ratio + aiv_mte3_ratio >= 0.9: bound = "vec_mte2_mte3_bound" elif aiv_mte2_ratio >= 0.7: @@ -561,3 +560,10 @@ class AICorePerformanceChecker: logger.warning("Skip %s checker because of not containing input datas", self._CHECKER) return False return True + + @staticmethod + def safe_divide(numerator, denominator): + if denominator == 0: + logger.warning("Warning: Division by zero is not allowed.") + return None + return numerator / denominator -- Gitee From a601933b53899387cc8fb9bad30ea59669bafa52 Mon Sep 17 00:00:00 2001 From: xubanxia <904691018@qq.com> Date: Wed, 12 Feb 2025 10:34:16 +0800 Subject: [PATCH 62/72] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E9=99=A4=E9=9B=B6?= =?UTF-8?q?=E6=A0=A1=E9=AA=8C=EF=BC=8C=E9=85=8D=E7=BD=AE=E6=96=87=E4=BB=B6?= =?UTF-8?q?=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ai_core_performance_checker.py | 13 +++---- .../advisor/rules/cn/aicore_performance.yaml | 34 ++++++++++++++++++- .../advisor/rules/en/aicore_performance.yaml | 34 ++++++++++++++++++- 3 files changed, 73 insertions(+), 8 deletions(-) diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py index dcd87f1c4..445b5f761 100644 --- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py +++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py @@ -16,6 +16,7 @@ import logging import os from functools import reduce +from debug.accuracy_tools.msprobe.core.common.utils import convert_tuple from profiler.advisor.dataset.profiling.profiling_dataset import ProfilingDataset from profiler.advisor.result.item import OptimizeItem, OptimizeRecord from profiler.advisor.result.result import OptimizeResult @@ -318,10 +319,10 @@ class AICorePerformanceChecker: length += 1 except ValueError: continue - if length == 0: + aiv_vec_ratio = self.safe_divide(aiv_vec_ratio, length) + aic_mte2_ratio = self.safe_divide(aic_mte2_ratio, length) + if aiv_vec_ratio is None or aic_mte2_ratio is None: continue - aiv_vec_ratio = aiv_vec_ratio / length - aic_mte2_ratio = aic_mte2_ratio / length if aic_mte2_ratio >= 0.8 and aiv_vec_ratio >= 0.75: bound = "mte2_and_vec_bound" elif aic_mte2_ratio >= 0.8: @@ -413,9 +414,9 @@ class AICorePerformanceChecker: length += 1 except ValueError: continue - aiv_vec_ratio = self.safe_divide(aiv_vec_ratio,length) - aiv_mte2_ratio = self.safe_divide(aiv_mte2_ratio,length) - aiv_mte3_ratio = self.safe_divide(aiv_mte3_ratio,length) + aiv_vec_ratio = self.safe_divide(aiv_vec_ratio, length) + aiv_mte2_ratio = self.safe_divide(aiv_mte2_ratio, length) + aiv_mte3_ratio = self.safe_divide(aiv_mte3_ratio, length) if aiv_vec_ratio is None or aiv_mte2_ratio is None or aiv_mte3_ratio is None: continue if aiv_vec_ratio + aiv_mte2_ratio + aiv_mte3_ratio >= 0.9: diff --git a/profiler/advisor/rules/cn/aicore_performance.yaml b/profiler/advisor/rules/cn/aicore_performance.yaml index 8d44aaab2..382de6db5 100644 --- a/profiler/advisor/rules/cn/aicore_performance.yaml +++ b/profiler/advisor/rules/cn/aicore_performance.yaml @@ -12,4 +12,36 @@ fa_affinity_desc_type3: "D和S均不能被128整除" suggestion: "请根据亲和性、bound类型或优化空间尝试分析筛选出来的算子" affinity_suggestion: "{op_name}算子 shape: {shape} dtype: {dtype} 有不亲和特征: {suggestion}\n" bound_suggestion: "{op_name}算子 shape: {shape} dtype: {dtype} bound类型为: {bound} bound\n" -optimization_suggestion: "{op_name}算子 shape: {shape} dtype: {dtype} 疑似有性能优化空间,参考性能优化空间: {optimization}%\n" \ No newline at end of file +optimization_suggestion: "{op_name}算子 shape: {shape} dtype: {dtype} 疑似有性能优化空间,参考性能优化空间: {optimization}%\n" + +cube_operators: + - target: aic_mac_ratio + bound: mac + threshold_value: 0.8 + - target: aic_mte2_ratio + bound: mte2 + threshold_value: 0.95 + +fa_operators: + - target: aic_mte2_ratio + bound: mac + threshold_value: 0.8 + - target: aic_fixpipe_ratio + bound: fixpipe + threshold_value: 0.75 + - target: aiv_vec_ratio + bound: vec + threshold_value: 0.75 + +vector_operators: + - target: total + threshold_value: 0.9 + - target: aiv_vec_ratio + bound: vec + threshold_value: 0.7 + - target: aiv_mte2_ratio + bound: mte2 + threshold_value: 0.7 + - target: aiv_mte3_ratio + bound: mte3 + threshold_value: 0.7 \ No newline at end of file diff --git a/profiler/advisor/rules/en/aicore_performance.yaml b/profiler/advisor/rules/en/aicore_performance.yaml index e85a919ab..cae3700b2 100644 --- a/profiler/advisor/rules/en/aicore_performance.yaml +++ b/profiler/advisor/rules/en/aicore_performance.yaml @@ -12,4 +12,36 @@ fa_affinity_desc_type3: "Neither D nor S is not divisible by 128" suggestion: "Please try to analyze the filtered operators based on affinity, bound type or optimization space" affinity_suggestion: "{op_name} Op shape: {shape} dtype: {dtype} with disaffection characteristics: {suggestion}\n" bound_suggestion: "{op_name} Op shape: {shape} dtype: {dtype} bound type: {bound} bound\n" -optimization_suggestion: "{op_name} Op shape: {shape} dtype: {dtype} suspect there is room for performance optimization, refer to Performance Optimization Space: {optimization}%\n" \ No newline at end of file +optimization_suggestion: "{op_name} Op shape: {shape} dtype: {dtype} suspect there is room for performance optimization, refer to Performance Optimization Space: {optimization}%\n" + +cube_operators: + - target: aic_mac_ratio + bound: mac + threshold_value: 0.8 + - target: aic_mte2_ratio + bound: mte2 + threshold_value: 0.95 + +fa_operators: + - target: aic_mte2_ratio + bound: mac + threshold_value: 0.8 + - target: aic_fixpipe_ratio + bound: fixpipe + threshold_value: 0.75 + - target: aiv_vec_ratio + bound: vec + threshold_value: 0.75 + +vector_operators: + - target: total + threshold_value: 0.9 + - target: aiv_vec_ratio + bound: vec + threshold_value: 0.7 + - target: aiv_mte2_ratio + bound: mte2 + threshold_value: 0.7 + - target: aiv_mte3_ratio + bound: mte3 + threshold_value: 0.7 \ No newline at end of file -- Gitee From af7d34f13a7d02c230582b0c132a85605b8ca5df Mon Sep 17 00:00:00 2001 From: xubanxia <904691018@qq.com> Date: Wed, 12 Feb 2025 11:27:38 +0800 Subject: [PATCH 63/72] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E8=A7=84=E5=88=99?= =?UTF-8?q?=E8=AE=BE=E7=BD=AE=EF=BC=8C=E5=B0=86=E9=98=88=E5=80=BC=E6=94=BE?= =?UTF-8?q?=E5=85=A5=E9=85=8D=E7=BD=AE=E6=96=87=E4=BB=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ai_core_performance_checker.py | 102 ++++++++++++------ .../advisor/rules/cn/aicore_performance.yaml | 18 ++-- .../advisor/rules/en/aicore_performance.yaml | 19 ++-- 3 files changed, 89 insertions(+), 50 deletions(-) diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py index 445b5f761..00e35c84d 100644 --- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py +++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py @@ -16,7 +16,6 @@ import logging import os from functools import reduce -from debug.accuracy_tools.msprobe.core.common.utils import convert_tuple from profiler.advisor.dataset.profiling.profiling_dataset import ProfilingDataset from profiler.advisor.result.item import OptimizeItem, OptimizeRecord from profiler.advisor.result.result import OptimizeResult @@ -75,6 +74,9 @@ class AICorePerformanceChecker: self._AFFINITY_SUGGESTION = self.aicore_rules.get("affinity_suggestion") self._BOUND_SUGGESTION = self.aicore_rules.get("bound_suggestion") self._OPTI_SUGGESTION = self.aicore_rules.get("optimization_suggestion") + self._OPERATOR_RULES = {"cube_operators": self.aicore_rules.get("cube_operators"), + "fa_operators": self.aicore_rules.get("fa_operators"), + "vector_operators": self.aicore_rules.get("vector_operators")} def data_filter(self, profiling_dataset: ProfilingDataset): if not self.check_task_list(profiling_dataset): @@ -207,14 +209,22 @@ class AICorePerformanceChecker: continue bound = "" optimization = 0. - if aic_mac_ratio >= 0.8 and aic_mte2_ratio >= 0.95: - bound = "mac_and_mte2_bound" - elif aic_mac_ratio >= 0.8: - bound = "mac_bound" - elif aic_mte2_ratio >= 0.95: - bound = "mte2_bound" + aic_mac_ratio_rule, aic_mte2_ratio_rule = None, None + for operator_rule in self._OPERATOR_RULES["cube_operators"]: + if operator_rule["target"] == "aic_mac_ratio": + aic_mac_ratio_rule = operator_rule + elif operator_rule["target"] == "aic_mte2_ratio": + aic_mte2_ratio_rule = operator_rule + if (aic_mac_ratio >= aic_mac_ratio_rule["threshold"] + and aic_mte2_ratio >= aic_mte2_ratio_rule["threshold"]): + bound = aic_mac_ratio_rule["bound"] + "_and_" + aic_mte2_ratio_rule["bound"] + "_bound" + elif aic_mac_ratio >= aic_mte2_ratio_rule["threshold"]: + bound = aic_mac_ratio_rule["bound"] + elif aic_mte2_ratio >= aic_mte2_ratio_rule["threshold"]: + bound = aic_mte2_ratio_rule["bound"] else: - optimization = round(max(0.8 - aic_mac_ratio, 0.95 - aic_mte2_ratio) * 100, 2) + optimization = max(aic_mac_ratio["threshold"] - aic_mac_ratio, + aic_mte2_ratio_rule["threshold"] - aic_mte2_ratio) if bound: bound_queue.append({ "op_name": op, @@ -227,7 +237,7 @@ class AICorePerformanceChecker: "op_name": op, "shape": shape.split("-")[0], "dtype": dtype, - "optimization": optimization}) + "optimization": round(optimization * 100, 2)}) return [sorted(optimization_queue, key=lambda x: x["optimization"], reverse=True)[:5], sorted(bound_queue, key=lambda x: x["duration"], reverse=True)[:5], sorted(affinity_queue, key=lambda x: x["duration"], reverse=True)[:5]] @@ -300,14 +310,22 @@ class AICorePerformanceChecker: aic_mte2_ratio = self.safe_divide(aic_mte2_ratio, length) if aic_mte2_ratio is None or aic_fixpipe_ratio is None: continue - if aic_mte2_ratio >= 0.8 and aic_fixpipe_ratio >= 0.75: - bound = "mte2_and_fixpipe_bound" - elif aic_mte2_ratio >= 0.8: - bound = "mte2_bound" - elif aiv_vec_ratio >= 0.75: - bound = "vec_bound" + aic_fixpipe_ratio_rule, aic_mte2_ratio_rule = None, None + for rule in self._OPERATOR_RULES["fa_operators"]: + if rule["target"] == "aic_fixpipe_ratio": + aic_fixpipe_ratio_rule = rule + elif rule["target"] == "aic_mte2_ratio": + aic_mte2_ratio_rule = rule + if (aic_mte2_ratio >= aic_mte2_ratio_rule["threshold"] and + aic_fixpipe_ratio >= aic_fixpipe_ratio_rule["threshold"]): + bound = aic_fixpipe_ratio_rule["bound"] + "_and_" + aic_mte2_ratio_rule["bound"] + "_bound" + elif aic_mte2_ratio >= aic_mte2_ratio_rule["threshold"]: + bound = aic_mte2_ratio_rule["bound"] + elif aic_fixpipe_ratio >= aic_fixpipe_ratio_rule["threshold"]: + bound = aic_fixpipe_ratio_rule["bound"] else: - optimization = max(0.8 - aic_mte2_ratio, 0.75 - aiv_vec_ratio) + optimization = max(aic_fixpipe_ratio_rule["threshold"] - aic_fixpipe_ratio, + aic_mte2_ratio_rule["threshold"] - aic_mte2_ratio) else: for operator in fa_list: if (operator.op_name == op and @@ -323,14 +341,22 @@ class AICorePerformanceChecker: aic_mte2_ratio = self.safe_divide(aic_mte2_ratio, length) if aiv_vec_ratio is None or aic_mte2_ratio is None: continue - if aic_mte2_ratio >= 0.8 and aiv_vec_ratio >= 0.75: - bound = "mte2_and_vec_bound" - elif aic_mte2_ratio >= 0.8: - bound = "mte2_bound" - elif aiv_vec_ratio >= 0.75: - bound = "vec_bound" + aiv_vec_ratio_rule, aic_mte2_ratio_rule = None, None + for rule in self._OPERATOR_RULES["fa_operators"]: + if rule["target"] == "aiv_vec_ratio": + aiv_vec_ratio_rule = rule + elif rule["target"] == "aic_mte2_ratio": + aic_mte2_ratio_rule = rule + if (aic_mte2_ratio >= aic_mte2_ratio_rule["threshold"] + and aiv_vec_ratio >= aiv_vec_ratio_rule["threshold"]): + bound = aic_mte2_ratio_rule["bound"] + "_and_" + aiv_vec_ratio_rule["bound"] + "_bound" + elif aic_mte2_ratio >= aic_mte2_ratio_rule["threshold"]: + bound = aic_mte2_ratio_rule["bound"] + elif aiv_vec_ratio >= aiv_vec_ratio_rule["threshold"]: + bound = aiv_vec_ratio_rule["bound"] else: - optimization = max(0.8 - aic_mte2_ratio, 0.75 - aiv_vec_ratio) + optimization = max(aiv_vec_ratio_rule["threshold"] - aiv_vec_ratio, + aic_mte2_ratio_rule["threshold"] - aic_mte2_ratio) if bound: bound_queue.append({ "op_name": op, @@ -419,16 +445,28 @@ class AICorePerformanceChecker: aiv_mte3_ratio = self.safe_divide(aiv_mte3_ratio, length) if aiv_vec_ratio is None or aiv_mte2_ratio is None or aiv_mte3_ratio is None: continue - if aiv_vec_ratio + aiv_mte2_ratio + aiv_mte3_ratio >= 0.9: - bound = "vec_mte2_mte3_bound" - elif aiv_mte2_ratio >= 0.7: - bound = "mte2_bound" - elif aiv_mte3_ratio >= 0.7: - bound = "mte3_bound" - elif aiv_vec_ratio >= 0.7: - bound = "vec_bound" + aiv_vec_ratio_rule, aiv_mte2_ratio_rule, aiv_mte3_ratio_rule, total_rule = None, None, None, None + for operator_rule in self._OPERATOR_RULES["vector_operators"]: + if operator_rule["target"] == "aiv_vec_ratio": + aiv_vec_ratio_rule = operator_rule + elif operator_rule["target"] == "aic_mte2_ratio": + aiv_mte2_ratio_rule = operator_rule + elif operator_rule["target"] == "aic_mte3_ratio": + aiv_mte3_ratio_rule = operator_rule + elif operator_rule["target"] == "total": + total_rule = operator_rule + if aiv_vec_ratio + aiv_mte2_ratio + aiv_mte3_ratio >= total_rule["threshold"]: + bound = total_rule["bound"] + elif aiv_mte2_ratio >= aiv_mte2_ratio_rule["threshold"]: + bound = aiv_mte2_ratio_rule["bound"] + elif aiv_mte3_ratio >= aiv_mte3_ratio_rule["threshold"]: + bound = aiv_mte3_ratio_rule["bound"] + elif aiv_vec_ratio >= aiv_vec_ratio_rule["threshold"]: + bound = aiv_vec_ratio_rule["bound"] else: - optimization = max(0.7 - aiv_vec_ratio, 0.7 - aiv_mte2_ratio, 0.7 - aiv_mte3_ratio) + optimization = max(aiv_vec_ratio_rule["threshold"] - aiv_vec_ratio, + aiv_mte2_ratio_rule["threshold"] - aiv_mte2_ratio, + aiv_mte3_ratio_rule["threshold"] - aiv_mte3_ratio) if bound: bound_queue.append({ "op_name": op_name, diff --git a/profiler/advisor/rules/cn/aicore_performance.yaml b/profiler/advisor/rules/cn/aicore_performance.yaml index 382de6db5..f6fd914ac 100644 --- a/profiler/advisor/rules/cn/aicore_performance.yaml +++ b/profiler/advisor/rules/cn/aicore_performance.yaml @@ -17,31 +17,31 @@ optimization_suggestion: "{op_name}算子 shape: {shape} dtype: {dtype} 疑似 cube_operators: - target: aic_mac_ratio bound: mac - threshold_value: 0.8 + threshold: 0.8 - target: aic_mte2_ratio bound: mte2 - threshold_value: 0.95 + threshold: 0.95 fa_operators: - target: aic_mte2_ratio bound: mac - threshold_value: 0.8 + threshold: 0.8 - target: aic_fixpipe_ratio bound: fixpipe - threshold_value: 0.75 + threshold: 0.75 - target: aiv_vec_ratio bound: vec - threshold_value: 0.75 + threshold: 0.75 vector_operators: - target: total - threshold_value: 0.9 + threshold: 0.9 - target: aiv_vec_ratio bound: vec - threshold_value: 0.7 + threshold: 0.7 - target: aiv_mte2_ratio bound: mte2 - threshold_value: 0.7 + threshold: 0.7 - target: aiv_mte3_ratio bound: mte3 - threshold_value: 0.7 \ No newline at end of file + threshold: 0.7 \ No newline at end of file diff --git a/profiler/advisor/rules/en/aicore_performance.yaml b/profiler/advisor/rules/en/aicore_performance.yaml index cae3700b2..b1e5e4701 100644 --- a/profiler/advisor/rules/en/aicore_performance.yaml +++ b/profiler/advisor/rules/en/aicore_performance.yaml @@ -17,31 +17,32 @@ optimization_suggestion: "{op_name} Op shape: {shape} dtype: {dtype} suspect the cube_operators: - target: aic_mac_ratio bound: mac - threshold_value: 0.8 + threshold: 0.8 - target: aic_mte2_ratio bound: mte2 - threshold_value: 0.95 + threshold: 0.95 fa_operators: - target: aic_mte2_ratio bound: mac - threshold_value: 0.8 + threshold: 0.8 - target: aic_fixpipe_ratio bound: fixpipe - threshold_value: 0.75 + threshold: 0.75 - target: aiv_vec_ratio bound: vec - threshold_value: 0.75 + threshold: 0.75 vector_operators: - target: total - threshold_value: 0.9 + bound: vec_mte2_mte3 + threshold: 0.9 - target: aiv_vec_ratio bound: vec - threshold_value: 0.7 + threshold: 0.7 - target: aiv_mte2_ratio bound: mte2 - threshold_value: 0.7 + threshold: 0.7 - target: aiv_mte3_ratio bound: mte3 - threshold_value: 0.7 \ No newline at end of file + threshold: 0.7 \ No newline at end of file -- Gitee From f32c30bda3489fa385fdf246da4ffc2a2dd7b289 Mon Sep 17 00:00:00 2001 From: xubanxia <904691018@qq.com> Date: Wed, 12 Feb 2025 11:40:36 +0800 Subject: [PATCH 64/72] =?UTF-8?q?=E9=97=AE=E9=A2=98=E4=BF=AE=E5=A4=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ai_core_performance/ai_core_performance_checker.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py index 00e35c84d..4d51da450 100644 --- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py +++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py @@ -176,7 +176,7 @@ class AICorePerformanceChecker: for shape in cube_dict[op]: dtype = None shape_duration = 0. - affinity_flag = self.check_affinity(shape) + affinity_flag = self._check_cube_inner_axis(shape) if not affinity_flag: for operator in operator_list: if (operator.op_name == op and @@ -279,7 +279,7 @@ class AICorePerformanceChecker: # 不亲和算子筛选 for op in fa_dict: for shape in fa_dict[op]: - affinity_flag, dtype, shape_duration, suggestion = self.method_name(fa_list, op, shape) + affinity_flag, dtype, shape_duration, suggestion = self._check_fa_inner_axis(fa_list, op, shape) if affinity_flag: # 不亲和算子 计算耗时,加入affinity_queue affinity_queue.append({ -- Gitee From 89a5eeecb71f391249c5075bfaa2f2f32fdb1aba Mon Sep 17 00:00:00 2001 From: xubanxia <904691018@qq.com> Date: Wed, 12 Feb 2025 11:47:26 +0800 Subject: [PATCH 65/72] =?UTF-8?q?=E9=97=AE=E9=A2=98=E4=BF=AE=E5=A4=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ai_core_performance/ai_core_performance_checker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py index 4d51da450..eaf6340cd 100644 --- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py +++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py @@ -223,7 +223,7 @@ class AICorePerformanceChecker: elif aic_mte2_ratio >= aic_mte2_ratio_rule["threshold"]: bound = aic_mte2_ratio_rule["bound"] else: - optimization = max(aic_mac_ratio["threshold"] - aic_mac_ratio, + optimization = max(aic_mac_ratio_rule["threshold"] - aic_mac_ratio, aic_mte2_ratio_rule["threshold"] - aic_mte2_ratio) if bound: bound_queue.append({ -- Gitee From f5d71f751ebe5fdd67431e213f3ba6e67c088b46 Mon Sep 17 00:00:00 2001 From: xubanxia <904691018@qq.com> Date: Wed, 12 Feb 2025 11:49:26 +0800 Subject: [PATCH 66/72] =?UTF-8?q?=E9=97=AE=E9=A2=98=E4=BF=AE=E5=A4=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- profiler/advisor/rules/cn/aicore_performance.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/profiler/advisor/rules/cn/aicore_performance.yaml b/profiler/advisor/rules/cn/aicore_performance.yaml index f6fd914ac..3f60747b2 100644 --- a/profiler/advisor/rules/cn/aicore_performance.yaml +++ b/profiler/advisor/rules/cn/aicore_performance.yaml @@ -35,6 +35,7 @@ fa_operators: vector_operators: - target: total + bound: vec_mte2_mte3 threshold: 0.9 - target: aiv_vec_ratio bound: vec -- Gitee From ffe03877b86e44b239817c663162b58ed2c27148 Mon Sep 17 00:00:00 2001 From: xubanxia <904691018@qq.com> Date: Wed, 12 Feb 2025 11:59:39 +0800 Subject: [PATCH 67/72] =?UTF-8?q?=E9=97=AE=E9=A2=98=E4=BF=AE=E5=A4=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ai_core_performance/ai_core_performance_checker.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py index eaf6340cd..b58f734c6 100644 --- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py +++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py @@ -449,9 +449,9 @@ class AICorePerformanceChecker: for operator_rule in self._OPERATOR_RULES["vector_operators"]: if operator_rule["target"] == "aiv_vec_ratio": aiv_vec_ratio_rule = operator_rule - elif operator_rule["target"] == "aic_mte2_ratio": + elif operator_rule["target"] == "aiv_mte2_ratio": aiv_mte2_ratio_rule = operator_rule - elif operator_rule["target"] == "aic_mte3_ratio": + elif operator_rule["target"] == "aiv_mte3_ratio": aiv_mte3_ratio_rule = operator_rule elif operator_rule["target"] == "total": total_rule = operator_rule -- Gitee From f6c52443206feb83f8b2744afbdcdd124e947778 Mon Sep 17 00:00:00 2001 From: xubanxia <11655130+xubanxia@user.noreply.gitee.com> Date: Wed, 12 Feb 2025 06:28:49 +0000 Subject: [PATCH 68/72] update profiler/test/ut/advisor/compute_advice/data/kernel_details.csv. Signed-off-by: xubanxia <11655130+xubanxia@user.noreply.gitee.com> --- .../ut/advisor/compute_advice/data/kernel_details.csv | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/profiler/test/ut/advisor/compute_advice/data/kernel_details.csv b/profiler/test/ut/advisor/compute_advice/data/kernel_details.csv index 020178358..f22cb8008 100644 --- a/profiler/test/ut/advisor/compute_advice/data/kernel_details.csv +++ b/profiler/test/ut/advisor/compute_advice/data/kernel_details.csv @@ -15,10 +15,10 @@ Step Id,Model ID,Task ID,Stream ID,Name,Type,OP State,Accelerator Core,Start Tim 19,4294967295,60707,2,aclnnFlashAttentionScore_FlashAttentionScore_FlashAttentionScore,FlashAttentionScore,dynamic,MIX_AIC,"1736413971415611.468 ",406.128,1.279,20,40,NO,"""4096,2,512;4096,2,512;4096,2,512;;;;4096,4096;;;;;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16;UINT8;DT_BF16;BOOL;INT64;INT64;INT64;INT64;INT64,NCL;NCL;NCL;ND;ND;ND;ND;ND;ND;ND;ND;ND,"""2,4,4096,8;2,4,4096,8;;4096,2,512""",FLOAT;FLOAT;DT_BF16;DT_BF16,ND;ND;ND;ND,0,358.77,12915719,128.96,0.359,345.096,0.962,108.337,0.302,168.284,0.869,209.057,0.583,0.003,358.308,25798146,228.693,0.638,137.809,0.385,108.679,0.303,70.099,0.196,0.013,88.339 19,4294967295,60735,2,aclnnFlashAttentionScore_FlashAttentionScore_FlashAttentionScore,FlashAttentionScore,dynamic,MIX_AIC,"1736413971420248.800 ",407.008,0.84,20,40,NO,"""4096,2,512;4096,2,512;4096,2,512;;;;4096,4096;;;;;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16;UINT8;DT_BF16;BOOL;INT64;INT64;INT64;INT64;INT64,NCL;NCL;NCL;ND;ND;ND;ND;ND;ND;ND;ND;ND,"""2,4,4096,8;2,4,4096,8;;4096,2,512""",FLOAT;FLOAT;DT_BF16;DT_BF16,ND;ND;ND;ND,0,359.702,12949284,128.975,0.359,346.306,0.963,108.43,0.301,166.899,0.864,209.018,0.581,0.003,359.274,25867705,228.693,0.637,138.438,0.385,107.723,0.3,70.146,0.195,0.013,88.377 19,4294967295,60763,2,aclnnFlashAttentionScore_FlashAttentionScore_FlashAttentionScore,FlashAttentionScore,dynamic,MIX_AIC,"1736413971424592.447 ",405.228,1.35,20,40,NO,"""4096,2,512;4096,2,512;4096,2,512;;;;4096,4096;;;;;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16;UINT8;DT_BF16;BOOL;INT64;INT64;INT64;INT64;INT64,NCL;NCL;NCL;ND;ND;ND;ND;ND;ND;ND;ND;ND,"""2,4,4096,8;2,4,4096,8;;4096,2,512""",FLOAT;FLOAT;DT_BF16;DT_BF16,ND;ND;ND;ND,0,359.793,12952532,128.923,0.358,345.768,0.961,108.411,0.301,167.379,0.865,208.79,0.58,0.003,359.294,25869164,228.691,0.637,138.411,0.385,107.868,0.3,70.163,0.195,0.013,88.788 -19,4294967295,61655,2,aclnnFlashAttentionScoreGrad_FlashAttentionScoreGrad_FlashAttentionScoreGrad,FlashAttentionScoreGrad,dynamic,MIX_AIC,"1736413971559180.676 ",762.215,1.37,20,40,NO,"""4096,2,512;4096,2,512;4096,2,512;4096,2,512;4096,4096;2,4,4096,8;2,4,4096,8;;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16;BOOL;FLOAT;FLOAT;DT_BF16;DT_BF16;INT64,NCL;NCL;NCL;NCL;ND;NCHW;NCHW;ND;NCL;ND,"""4096,2,512;4096,2,512;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16,ND;ND;ND;ND,0,755.664,27203907,344.023,0.455,592.472,0.784,266.388,0.353,397.091,0.525,589.726,0.78,0.004,755.04,54362915,318.452,0.422,184.623,0.245,206.78,0.274,152.973,0.203,0.006,99.141 -19,4294967295,61696,2,aclnnFlashAttentionScoreGrad_FlashAttentionScoreGrad_FlashAttentionScoreGrad,FlashAttentionScoreGrad,dynamic,MIX_AIC,"1736413971565420.821 ",763.215,1.189,20,40,NO,"""4096,2,512;4096,2,512;4096,2,512;4096,2,512;4096,4096;2,4,4096,8;2,4,4096,8;;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16;BOOL;FLOAT;FLOAT;DT_BF16;DT_BF16;INT64,NCL;NCL;NCL;NCL;ND;NCHW;NCHW;ND;NCL;ND,"""4096,2,512;4096,2,512;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16,ND;ND;ND;ND,0,757.83,27281885,344.047,0.454,595.954,0.786,266.123,0.351,389.105,0.513,576.226,0.76,0.004,757.046,54507345,318.443,0.421,188.292,0.249,200.176,0.264,162.113,0.214,0.006,99.294 -19,4294967295,61737,2,aclnnFlashAttentionScoreGrad_FlashAttentionScoreGrad_FlashAttentionScoreGrad,FlashAttentionScoreGrad,dynamic,MIX_AIC,"1736413971571804.228 ",757.095,0.88,20,40,NO,"""4096,2,512;4096,2,512;4096,2,512;4096,2,512;4096,4096;2,4,4096,8;2,4,4096,8;;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16;BOOL;FLOAT;FLOAT;DT_BF16;DT_BF16;INT64,NCL;NCL;NCL;NCL;ND;NCHW;NCHW;ND;NCL;ND,"""4096,2,512;4096,2,512;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16,ND;ND;ND;ND,0,750.605,27021778,343.983,0.458,586.708,0.782,266.304,0.355,392.522,0.523,584.432,0.779,0.004,749.913,53993736,318.436,0.425,188.508,0.251,207.668,0.277,152.634,0.204,0.006,99.143 -19,4294967295,61778,2,aclnnFlashAttentionScoreGrad_FlashAttentionScoreGrad_FlashAttentionScoreGrad,FlashAttentionScoreGrad,dynamic,MIX_AIC,"1736413971578144.095 ",755.915,1.22,20,40,NO,"""4096,2,512;4096,2,512;4096,2,512;4096,2,512;4096,4096;2,4,4096,8;2,4,4096,8;;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16;BOOL;FLOAT;FLOAT;DT_BF16;DT_BF16;INT64,NCL;NCL;NCL;NCL;ND;NCHW;NCHW;ND;NCL;ND,"""4096,2,512;4096,2,512;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16,ND;ND;ND;ND,0,750.152,27005467,344.115,0.459,579.317,0.772,266.08,0.355,398.019,0.531,587.37,0.783,0.004,749.348,53953058,318.444,0.425,186.908,0.249,207.068,0.276,151.329,0.202,0.006,99.238 +19,4294967295,61655,2,aclnnFlashAttentionScoreGrad_FlashAttentionScoreGrad_FlashAttentionScoreGrad,FlashAttentionScoreGrad,dynamic,MIX_AIC,"1736413971559180.676 ",762.215,1.37,20,40,NO,"""4096,2,512;4096,2,512;4096,2,512;4096,2,512;4096,4096;2,4,4096,8;2,4,4096,8;;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16;BOOL;FLOAT;FLOAT;DT_BF16;DT_BF16;INT64,NCL;NCL;NCL;NCL;ND;NCHW;NCHW;ND;NCL;ND,"""4096,2,512;4096,2,512;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16,ND;ND;ND;ND,0,755.664,27203907,344.023,0.455,592.472,0.784,266.388,0.353,397.091,0.525,589.726,0.525,0.004,755.04,54362915,318.452,0.422,184.623,0.245,206.78,0.274,152.973,0.203,0.006,99.141 +19,4294967295,61696,2,aclnnFlashAttentionScoreGrad_FlashAttentionScoreGrad_FlashAttentionScoreGrad,FlashAttentionScoreGrad,dynamic,MIX_AIC,"1736413971565420.821 ",763.215,1.189,20,40,NO,"""4096,2,512;4096,2,512;4096,2,512;4096,2,512;4096,4096;2,4,4096,8;2,4,4096,8;;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16;BOOL;FLOAT;FLOAT;DT_BF16;DT_BF16;INT64,NCL;NCL;NCL;NCL;ND;NCHW;NCHW;ND;NCL;ND,"""4096,2,512;4096,2,512;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16,ND;ND;ND;ND,0,757.83,27281885,344.047,0.454,595.954,0.786,266.123,0.351,389.105,0.513,576.226,0.513,0.004,757.046,54507345,318.443,0.421,188.292,0.249,200.176,0.264,162.113,0.214,0.006,99.294 +19,4294967295,61737,2,aclnnFlashAttentionScoreGrad_FlashAttentionScoreGrad_FlashAttentionScoreGrad,FlashAttentionScoreGrad,dynamic,MIX_AIC,"1736413971571804.228 ",757.095,0.88,20,40,NO,"""4096,2,512;4096,2,512;4096,2,512;4096,2,512;4096,4096;2,4,4096,8;2,4,4096,8;;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16;BOOL;FLOAT;FLOAT;DT_BF16;DT_BF16;INT64,NCL;NCL;NCL;NCL;ND;NCHW;NCHW;ND;NCL;ND,"""4096,2,512;4096,2,512;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16,ND;ND;ND;ND,0,750.605,27021778,343.983,0.458,586.708,0.782,266.304,0.355,392.522,0.523,584.432,0.523,0.004,749.913,53993736,318.436,0.425,188.508,0.251,207.668,0.277,152.634,0.204,0.006,99.143 +19,4294967295,61778,2,aclnnFlashAttentionScoreGrad_FlashAttentionScoreGrad_FlashAttentionScoreGrad,FlashAttentionScoreGrad,dynamic,MIX_AIC,"1736413971578144.095 ",755.915,1.22,20,40,NO,"""4096,2,512;4096,2,512;4096,2,512;4096,2,512;4096,4096;2,4,4096,8;2,4,4096,8;;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16;BOOL;FLOAT;FLOAT;DT_BF16;DT_BF16;INT64,NCL;NCL;NCL;NCL;ND;NCHW;NCHW;ND;NCL;ND,"""4096,2,512;4096,2,512;4096,2,512;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16,ND;ND;ND;ND,0,750.152,27005467,344.115,0.459,579.317,0.772,266.08,0.355,398.019,0.531,587.37,0.531,0.004,749.348,53953058,318.444,0.425,186.908,0.249,207.068,0.276,151.329,0.202,0.006,99.238 19,4294967295,60763,2,aclnnFlashAttentionScore_FlashAttentionScore_FlashAttentionScore_varlen,FlashAttentionScore,dynamic,MIX_AIC,"1736413971424592.447 ",405.228,1.35,20,40,NO,"""4096,2,511;4096,2,512;4096,2,512;;;;4096,4096;;;;;""",DT_BF16;DT_BF16;DT_BF16;DT_BF16;UINT8;DT_BF16;BOOL;INT64;INT64;INT64;INT64;INT64,NCL;NCL;NCL;ND;ND;ND;ND;ND;ND;ND;ND;ND,"""2,3,4096,8;2,4,4096,8;;4096,2,512""",FLOAT;FLOAT;DT_BF16;DT_BF16,ND;ND;ND;ND,0,359.793,12952532,128.923,0.358,345.768,0.961,108.411,0.301,167.379,0.465,208.79,0.58,0.003,359.294,25869164,228.691,0.637,138.411,0.385,107.868,0.3,70.163,0.195,0.013,88.788 19,4294967295,60683,2,aclnnAdd_AddAiCore_Add,Add,dynamic,AI_VECTOR_CORE,"1736413971412768.871 ",26.78,0.485,40,0,NO,"""512,2,4096;512,2,4096""",DT_BF16;DT_BF16,NCL;NCL,"""512,2,4096""",DT_BF16,ND,N/A,0,0,0,0,0,0,0,0,0,0,0,0,0,24.19,1741674,5.986,0.247,1.352,0.056,20.363,0.842,3.195,0.132,0.027,0 19,4294967295,60690,2,aclnnAdd_AddAiCore_Add,Add,dynamic,AI_VECTOR_CORE,"1736413971414677.549 ",31.201,0.664,40,0,NO,"""512,2,4096;512,2,4096""",DT_BF16;DT_BF16,NCL;NCL,"""512,2,4096""",DT_BF16,ND,N/A,0,0,0,0,0,0,0,0,0,0,0,0,0,28.617,2060443,5.986,0.209,1.444,0.05,25.005,0.874,3.336,0.117,0.026,0 @@ -27,4 +27,4 @@ Step Id,Model ID,Task ID,Stream ID,Name,Type,OP State,Accelerator Core,Start Tim 19,4294967295,13907,2,aclnnAdd_AddAiCore_Add,Add,dynamic,AI_VECTOR_CORE,"1736413974268377.206 ",1.38,31.48,1,0,NO,""";""",FLOAT;FLOAT,ND;ND,"""""",FLOAT,ND,N/A,0,0,0,0,0,0,0,0,0,0,0,0,0,0.883,1589,0.027,0.03,0.265,0.3,0.18,0.204,0.108,0.123,0.182,0 19,4294967295,13910,2,aclnnAdd_AddAiCore_Add,Add,dynamic,AI_VECTOR_CORE,"1736413974268502.128 ",1.46,17.48,1,0,NO,""";""",FLOAT;FLOAT,ND;ND,"""""",FLOAT,ND,N/A,0,0,0,0,0,0,0,0,0,0,0,0,0,0.948,1706,0.027,0.028,0.276,0.291,0.217,0.229,0.127,0.134,0.174,0 19,4294967295,13913,2,aclnnAdd_AddAiCore_Add,Add,dynamic,AI_VECTOR_CORE,"1736413974268605.410 ",1.5,0.09,1,0,NO,""";""",FLOAT;FLOAT,ND;ND,"""""",FLOAT,ND,N/A,0,0,0,0,0,0,0,0,0,0,0,0,0,0.96,1728,0.027,0.028,0.268,0.28,0.221,0.23,0.132,0.137,0.145,0 -19,4294967295,13916,2,aclnnAdd_AddAiCore_Add,Add,dynamic,AI_VECTOR_CORE,"1736413974268747.953 ",1.58,28.28,1,0,NO,""";""",FLOAT;FLOAT,ND;ND,"""""",FLOAT,ND,N/A,0,0,0,0,0,0,0,0,0,0,0,0,0,1.107,1993,0.027,0.024,0.426,0.384,0.201,0.181,0.118,0.106,0.162,0 \ No newline at end of file +19,4294967295,13916,2,aclnnAdd_AddAiCore_Add,Add,dynamic,AI_VECTOR_CORE,"1736413974268747.953 ",1.58,28.28,1,0,NO,""";""",FLOAT;FLOAT,ND;ND,"""""",FLOAT,ND,N/A,0,0,0,0,0,0,0,0,0,0,0,0,0,1.107,1993,0.027,0.024,0.426,0.384,0.201,0.181,0.118,0.106,0.162,0 -- Gitee From 97ecae67dc91b58b00c8647b677873edade33b89 Mon Sep 17 00:00:00 2001 From: kiritorl Date: Wed, 12 Feb 2025 22:49:58 +0800 Subject: [PATCH 69/72] =?UTF-8?q?=E6=8F=90=E5=8F=96make=5Frecord=E6=96=B9?= =?UTF-8?q?=E6=B3=95=E4=B8=AD=E7=9A=84=E5=8A=9F=E8=83=BD=E9=A1=B9=EF=BC=8C?= =?UTF-8?q?=E7=BC=A9=E5=87=8F=E6=96=B9=E6=B3=95=E5=86=85=E9=95=BF=E5=BA=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ai_core_performance_checker.py | 102 +++++++----------- 1 file changed, 37 insertions(+), 65 deletions(-) diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py index b58f734c6..20fac2d92 100644 --- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py +++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py @@ -494,6 +494,39 @@ class AICorePerformanceChecker: vector_list.extend([operator]) return vector_list + def draw_record(self, op_type: str, result: OptimizeResult): + suggestion_keys = ['opti', 'bound', 'affinity'] + desc = dict.fromkeys(suggestion_keys, "") + problem_map = { + 'cube': self._CUBE_PROBLEM, + 'fa': self._FA_PROBLEM, + 'vector': self._VECTOR_PROBLEM + } + optimization_item = OptimizeItem(problem_map[op_type], self.desc, [self.suggestion]) + result.add(OptimizeRecord(optimization_item)) + headers = [ + "Type", + "Description and Suggestion", + ] + result.add_detail(problem_map[op_type], headers=headers) + for opti_issue in self.result[op_type][0]: + opti_sugg = self._OPTI_SUGGESTION.format(**opti_issue) + desc["opti"] += opti_sugg + if desc["opti"]: + result.add_detail(self._CUBE_PROBLEM, detail=[self._OPTI_DESC, desc["opti"]]) + for bound_issue in self.result[op_type][1]: + bound_sugg = self._BOUND_SUGGESTION.format(**bound_issue) + desc["bound"] += bound_sugg + if desc["bound"]: + result.add_detail(self._CUBE_PROBLEM, detail=[self._BOUND_DESC, desc["bound"]]) + if op_type == "vector": # vector 类型没有亲和性建议 + return + for affinity_issue in self.result[op_type][2]: + affinity_sugg = self._AFFINITY_SUGGESTION.format(**affinity_issue) + desc["affinity"] += affinity_sugg + if desc["affinity"]: + result.add_detail(self._CUBE_PROBLEM, detail=[self._AFFINITY_DESC, desc["affinity"]]) + def make_record(self, result: OptimizeResult): """ make record for what and how to optimize @@ -501,76 +534,15 @@ class AICorePerformanceChecker: if not self.ai_core_performance_issues: return self.ai_core_performance_issues - suggestion_keys = ['opti', 'bound', 'affinity'] - cube_desc = dict.fromkeys(suggestion_keys, "") - fa_desc = dict.fromkeys(suggestion_keys, "") - vector_desc = dict.fromkeys(suggestion_keys, "") if any(self.result["cube"]): - optimization_item = OptimizeItem(self._CUBE_PROBLEM, self.desc, [self.suggestion]) - result.add(OptimizeRecord(optimization_item)) - headers = [ - "Type", - "Description and Suggestion", - ] - result.add_detail(self._CUBE_PROBLEM, headers=headers) - for cube_opti_issue in self.result["cube"][0]: - opti_sugg = self._OPTI_SUGGESTION.format(**cube_opti_issue) - cube_desc["opti"] += opti_sugg - if cube_desc["opti"]: - result.add_detail(self._CUBE_PROBLEM, detail=[self._OPTI_DESC, cube_desc["opti"]]) - for cube_bound_issue in self.result["cube"][1]: - bound_sugg = self._BOUND_SUGGESTION.format(**cube_bound_issue) - cube_desc["bound"] += bound_sugg - if cube_desc["bound"]: - result.add_detail(self._CUBE_PROBLEM, detail=[self._BOUND_DESC, cube_desc["bound"]]) - for cube_affinity_issue in self.result["cube"][2]: - affinity_sugg = self._AFFINITY_SUGGESTION.format(**cube_affinity_issue) - cube_desc["affinity"] += affinity_sugg - if cube_desc["affinity"]: - result.add_detail(self._CUBE_PROBLEM, detail=[self._AFFINITY_DESC, cube_desc["affinity"]]) + self.draw_record("cube", result) if any(self.result["fa"]): - optimization_item = OptimizeItem(self._FA_PROBLEM, self.desc, [self.suggestion]) - result.add(OptimizeRecord(optimization_item)) - headers = [ - "Type", - "Description and Suggestion", - ] - result.add_detail(self._FA_PROBLEM, headers=headers) - for fa_opti_issue in self.result["fa"][0]: - opti_sugg = self._OPTI_SUGGESTION.format(**fa_opti_issue) - fa_desc["opti"] += opti_sugg - if fa_desc["opti"]: - result.add_detail(self._FA_PROBLEM, detail=[self._OPTI_DESC, fa_desc["opti"]]) - for fa_bound_issue in self.result["fa"][1]: - bound_sugg = self._BOUND_SUGGESTION.format(**fa_bound_issue) - fa_desc["bound"] += bound_sugg - if fa_desc["bound"]: - result.add_detail(self._FA_PROBLEM, detail=[self._BOUND_DESC, fa_desc["bound"]]) - for fa_affinity_issue in self.result["fa"][2]: - affinity_sugg = self._AFFINITY_SUGGESTION.format(**fa_affinity_issue) - fa_desc["affinity"] += affinity_sugg - if fa_desc["affinity"]: - result.add_detail(self._FA_PROBLEM, detail=[self._AFFINITY_DESC, fa_desc["affinity"]]) + self.draw_record("fa", result) if any(self.result["vector"]): - optimization_item = OptimizeItem(self._VECTOR_PROBLEM, self.desc, [self.suggestion]) - result.add(OptimizeRecord(optimization_item)) - headers = [ - "Type", - "Description and Suggestion", - ] - result.add_detail(self._VECTOR_PROBLEM, headers=headers) - for vector_opti_issue in self.result["vector"][0]: - opti_sugg = self._OPTI_SUGGESTION.format(**vector_opti_issue) - vector_desc["opti"] += opti_sugg - if vector_desc["opti"]: - result.add_detail(self._VECTOR_PROBLEM, detail=[self._OPTI_DESC, vector_desc["opti"]]) - for vector_bound_issue in self.result["vector"][1]: - bound_sugg = self._BOUND_SUGGESTION.format(**vector_bound_issue) - vector_desc["bound"] += bound_sugg - if vector_desc["bound"]: - result.add_detail(self._VECTOR_PROBLEM, detail=[self._BOUND_DESC, vector_desc["bound"]]) + self.draw_record("vector", result) + return True def make_render(self, html_render, add_render_list=True, **kwargs): -- Gitee From 7e5e5936afb2e1d972e1896ab047cebff1b6cc64 Mon Sep 17 00:00:00 2001 From: xubanxia <904691018@qq.com> Date: Thu, 13 Feb 2025 15:47:42 +0800 Subject: [PATCH 70/72] =?UTF-8?q?=E9=97=AE=E9=A2=98=E4=BF=AE=E5=A4=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ai_core_performance/ai_core_performance_checker.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py index 20fac2d92..e947b140d 100644 --- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py +++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py @@ -518,14 +518,14 @@ class AICorePerformanceChecker: bound_sugg = self._BOUND_SUGGESTION.format(**bound_issue) desc["bound"] += bound_sugg if desc["bound"]: - result.add_detail(self._CUBE_PROBLEM, detail=[self._BOUND_DESC, desc["bound"]]) + result.add_detail(self._FA_PROBLEM, detail=[self._BOUND_DESC, desc["bound"]]) if op_type == "vector": # vector 类型没有亲和性建议 return for affinity_issue in self.result[op_type][2]: affinity_sugg = self._AFFINITY_SUGGESTION.format(**affinity_issue) desc["affinity"] += affinity_sugg if desc["affinity"]: - result.add_detail(self._CUBE_PROBLEM, detail=[self._AFFINITY_DESC, desc["affinity"]]) + result.add_detail(self._VECTOR_PROBLEM, detail=[self._AFFINITY_DESC, desc["affinity"]]) def make_record(self, result: OptimizeResult): """ -- Gitee From 863b681c903a3299c024a7ffc5ec0b6ebab65dbd Mon Sep 17 00:00:00 2001 From: xubanxia <904691018@qq.com> Date: Fri, 14 Feb 2025 10:22:52 +0800 Subject: [PATCH 71/72] =?UTF-8?q?=E9=97=AE=E9=A2=98=E4=BF=AE=E5=A4=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ai_core_performance/ai_core_performance_checker.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py index e947b140d..e3c3defc6 100644 --- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py +++ b/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py @@ -513,19 +513,19 @@ class AICorePerformanceChecker: opti_sugg = self._OPTI_SUGGESTION.format(**opti_issue) desc["opti"] += opti_sugg if desc["opti"]: - result.add_detail(self._CUBE_PROBLEM, detail=[self._OPTI_DESC, desc["opti"]]) + result.add_detail(problem_map[op_type], detail=[self._OPTI_DESC, desc["opti"]]) for bound_issue in self.result[op_type][1]: bound_sugg = self._BOUND_SUGGESTION.format(**bound_issue) desc["bound"] += bound_sugg if desc["bound"]: - result.add_detail(self._FA_PROBLEM, detail=[self._BOUND_DESC, desc["bound"]]) + result.add_detail(problem_map[op_type], detail=[self._BOUND_DESC, desc["bound"]]) if op_type == "vector": # vector 类型没有亲和性建议 return for affinity_issue in self.result[op_type][2]: affinity_sugg = self._AFFINITY_SUGGESTION.format(**affinity_issue) desc["affinity"] += affinity_sugg if desc["affinity"]: - result.add_detail(self._VECTOR_PROBLEM, detail=[self._AFFINITY_DESC, desc["affinity"]]) + result.add_detail(problem_map[op_type], detail=[self._AFFINITY_DESC, desc["affinity"]]) def make_record(self, result: OptimizeResult): """ -- Gitee From e418fac51c15fbd29832ce6658d1e1efb4d027ff Mon Sep 17 00:00:00 2001 From: xubanxia <904691018@qq.com> Date: Fri, 14 Feb 2025 15:51:10 +0800 Subject: [PATCH 72/72] =?UTF-8?q?=E7=9B=AE=E5=BD=95=E8=BF=81=E7=A7=BB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../computation/ai_core_performance/__init__.py | 0 .../ai_core_performance_analyzer.py | 12 ++++++------ .../ai_core_performance_checker.py | 0 .../msprof_analyze/advisor/interface/interface.py | 2 +- .../test_ai_core_performance_advice.py | 4 ++-- 5 files changed, 9 insertions(+), 9 deletions(-) rename profiler/{ => msprof_analyze}/advisor/analyzer/computation/ai_core_performance/__init__.py (100%) rename profiler/{ => msprof_analyze}/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py (78%) rename profiler/{ => msprof_analyze}/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py (100%) diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/__init__.py b/profiler/msprof_analyze/advisor/analyzer/computation/ai_core_performance/__init__.py similarity index 100% rename from profiler/advisor/analyzer/computation/ai_core_performance/__init__.py rename to profiler/msprof_analyze/advisor/analyzer/computation/ai_core_performance/__init__.py diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py b/profiler/msprof_analyze/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py similarity index 78% rename from profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py rename to profiler/msprof_analyze/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py index 89b6be779..a648fb074 100644 --- a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py +++ b/profiler/msprof_analyze/advisor/analyzer/computation/ai_core_performance/ai_core_performance_analyzer.py @@ -14,13 +14,13 @@ # limitations under the License. import logging -from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer -from profiler.advisor.analyzer.computation.ai_core_performance.ai_core_performance_checker import \ +from profiler.msprof_analyze.advisor.analyzer.base_analyzer import BaseAnalyzer +from profiler.msprof_analyze.advisor.analyzer.computation.ai_core_performance.ai_core_performance_checker import \ AICorePerformanceChecker -from profiler.advisor.dataset.profiling.profiling_dataset import ProfilingDataset -from profiler.advisor.result.result import OptimizeResult -from profiler.advisor.display.html.priority_background_color import PriorityBackgroundColor -from profiler.advisor.display.html.render import HTMLRender +from profiler.msprof_analyze.advisor.dataset.profiling.profiling_dataset import ProfilingDataset +from profiler.msprof_analyze.advisor.result.result import OptimizeResult +from profiler.msprof_analyze.advisor.display.html.priority_background_color import PriorityBackgroundColor +from profiler.msprof_analyze.advisor.display.html.render import HTMLRender logger = logging.getLogger() diff --git a/profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/msprof_analyze/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py similarity index 100% rename from profiler/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py rename to profiler/msprof_analyze/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py diff --git a/profiler/msprof_analyze/advisor/interface/interface.py b/profiler/msprof_analyze/advisor/interface/interface.py index 30d9d0eef..cce2de625 100644 --- a/profiler/msprof_analyze/advisor/interface/interface.py +++ b/profiler/msprof_analyze/advisor/interface/interface.py @@ -44,7 +44,7 @@ from msprof_analyze.advisor.analyzer.schedule.gc.gc_analyzer import GcAnalyzer from msprof_analyze.advisor.analyzer.schedule.conjectured_gc.conjectured_gc_analyzer import ConjecturedGcAnalyzer from msprof_analyze.advisor.analyzer.comparison.comparison_analyzer import ComparisonAnalyzer from msprof_analyze.advisor.analyzer.schedule.fusible_ops.fusible_operator_analyzer import FusibleOperatorAnalyzer -from profiler.advisor.analyzer.computation.ai_core_performance.ai_core_performance_analyzer import \ +from profiler.msprof_analyze.advisor.analyzer.computation.ai_core_performance import \ AICorePerformanceAnalyzer logger = logging.getLogger() diff --git a/profiler/msprof_analyze/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py b/profiler/msprof_analyze/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py index 61ae35d13..e45f6ea3b 100644 --- a/profiler/msprof_analyze/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py +++ b/profiler/msprof_analyze/test/ut/advisor/compute_advice/test_ai_core_performance_advice.py @@ -4,8 +4,8 @@ import shutil import stat import unittest -from profiler.advisor.interface.interface import Interface -from profiler.advisor.common.analyzer_scopes import SupportedScopes +from profiler.msprof_analyze.advisor.interface.interface import Interface +from profiler.msprof_analyze.advisor.common.analyzer_scopes import SupportedScopes class TestAICorePerformanceAdvice(unittest.TestCase): -- Gitee