diff --git a/profiler/advisor/advisor_backend/overall_advice/overall_summary_advice.py b/profiler/advisor/advisor_backend/overall_advice/overall_summary_advice.py index 80a83e58698b90c658ebc59d6b4491cfb2c5f0ca..bf23de569b8d033623a5dc0f8aa1391180bab387 100644 --- a/profiler/advisor/advisor_backend/overall_advice/overall_summary_advice.py +++ b/profiler/advisor/advisor_backend/overall_advice/overall_summary_advice.py @@ -19,30 +19,10 @@ from profiler.prof_common.constant import Constant from compare_interface.comparison_interface import ComparisonInterface +from profiler.prof_common.additional_args_manager import AdditionalArgsManager + class OverallSummaryAdvice(AdviceBase): - advice_map = { - "Computing Time": "if you want more detailed advice please use msprof-analyze advisor computation.", - "Uncovered Communication Time": "if you want more detailed advice, please use msprof-analyze advisor schedule.", - "Free Time": "if you want more detailed advice please use msprof-analyze advisor schedule." - } - time_name_map = { - "Computing Time": "computing", - "Uncovered Communication Time": "communication", - "Free Time": "free", - 'Cube Time(Num)': 'Cube Time', - 'Vector Time(Num)': 'Vector Time', - 'Flash Attention Time(Forward)(Num)': 'Flash Attention Time(Forward)', - 'Flash Attention Time(Backward)(Num)': 'Flash Attention Time(Backward)', - 'Other Time': "Other Computing Time", - 'SDMA Time(Num)': 'SDMA Time' - } - performance_time_dict = { - "Computing Time": ['Cube Time(Num)', 'Vector Time(Num)', 'Flash Attention Time(Forward)(Num)', - 'Flash Attention Time(Backward)(Num)', 'Other Time'], - "Uncovered Communication Time(Wait Time)": [], - "Free Time": ['SDMA Time(Num)'] - } def __init__(self, collection_path: str, kwargs: dict): super().__init__(collection_path) @@ -56,6 +36,19 @@ class OverallSummaryAdvice(AdviceBase): self._base_data = [] self._comparison_data = [] + self._init_prompt_by_language() + + def _init_prompt_by_language(self): + language = AdditionalArgsManager().language + if language == "en": + from profiler.advisor.display.prompt.en.overall_summary_advice_prompt import OverallSummaryAdvicePrompt + else: + from profiler.advisor.display.prompt.cn.overall_summary_advice_prompt import OverallSummaryAdvicePrompt + + self.advice_map = OverallSummaryAdvicePrompt.PERFORMANCE_TIME_DICT + self.time_name_map = OverallSummaryAdvicePrompt.TIME_NAME_MAP + self.performance_time_dict = OverallSummaryAdvicePrompt.PERFORMANCE_TIME_DICT + @staticmethod def split_duration_and_num(time_value: str) -> tuple: split_data = time_value.split("s") # time value example: 0.229s(1756) diff --git a/profiler/advisor/analyzer/cluster/Communication_retransmission_checker.py b/profiler/advisor/analyzer/cluster/Communication_retransmission_checker.py index c9cd81f21b6472c69c8b24e9a1f49ad3416a7baa..ce5128bccf352eae7eada6e420309b8e59a2d653 100644 --- a/profiler/advisor/analyzer/cluster/Communication_retransmission_checker.py +++ b/profiler/advisor/analyzer/cluster/Communication_retransmission_checker.py @@ -17,11 +17,12 @@ import os from typing import Dict, List from collections import defaultdict from profiler.advisor.dataset.cluster.cluster_dataset import ClusterCommunicationDataset +from profiler.advisor.display.prompt.base_prompt import BasePrompt from profiler.advisor.result.result import OptimizeResult from profiler.advisor.result.item import OptimizeItem, OptimizeRecord -from profiler.prof_common.additional_args_manager import AdditionalArgsManager -from profiler.prof_common.file_manager import FileManager +from profiler.cluster_analyse.common_func.file_manager import FileManager from profiler.advisor.dataset.cluster.hccl_collection import HcclInfo +from profiler.prof_common.additional_args_manager import AdditionalArgsManager logger = logging.getLogger() @@ -100,11 +101,10 @@ class CommunicationRetransmissionChecker: """ make record for what and how to optimize """ - optimization_item = OptimizeItem("Communication retransmission analysis", self.desc, self.suggestions) + optimization_item = OptimizeItem(self.problem, self.desc, self.suggestions) result.add(OptimizeRecord(optimization_item)) - sub_table_name = \ - "Comm Retransmission Analysis" if not self.stage else f"Stage-{self.stage}: Comm Retransmission Analysis" + sub_table_name = BasePrompt.get_sub_table_name(self.problem, self.stage) result.add_detail(sub_table_name, headers=self.headers) for row in self.abnormal_rdma_list: @@ -130,7 +130,8 @@ class CommunicationRetransmissionChecker: ) syncbn_rule = FileManager.read_yaml_file(syncbn_rule_path) - self.desc = syncbn_rule.get("problem") + self.problem = syncbn_rule.get("problem") + self.desc = syncbn_rule.get("description") self.min_retransmission_time = syncbn_rule.get("min_retransmission_time") self.solutions = syncbn_rule.get("solutions") diff --git a/profiler/advisor/analyzer/cluster/slow_link_analyzer.py b/profiler/advisor/analyzer/cluster/slow_link_analyzer.py index a49ba83774601a7c2b1aba8db2f1aac225cd7774..2f131509d0403d8a7ce75b23cd04e27d6af73740 100644 --- a/profiler/advisor/analyzer/cluster/slow_link_analyzer.py +++ b/profiler/advisor/analyzer/cluster/slow_link_analyzer.py @@ -23,6 +23,7 @@ from profiler.advisor.result.result import OptimizeResult from profiler.advisor.result.item import OptimizeItem, OptimizeRecord from profiler.advisor.dataset.cluster.cluster_dataset import ClusterCommunicationDataset from profiler.advisor.utils.utils import safe_index_value, convert_to_int +from profiler.prof_common.additional_args_manager import AdditionalArgsManager logger = logging.getLogger() @@ -86,11 +87,19 @@ class SlowLinkAnalyzer(BaseAnalyzer): logger.info("The slow link (identified bottleneck) cannot provide a bottleneck \ because the analysis data is missing bandwidth information.") return - self.bottelneck += f'{link_type}: \n' \ - f' The average is {avg_bw}, \n' \ - f' while the maximum is {round(max(data_list), 3)}GB/s \n' \ - f' and the minimum is {round(min(data_list), 3)}GB/s. \n' \ - f' the difference is {round(max(data_list) - min(data_list), 3)}GB/s. \n' + language = AdditionalArgsManager().language + if language == "en": + self.bottelneck += f'{link_type}: \n' \ + f' The average is {avg_bw}, \n' \ + f' while the maximum is {round(max(data_list), 3)}GB/s \n' \ + f' and the minimum is {round(min(data_list), 3)}GB/s. \n' \ + f' the difference is {round(max(data_list) - min(data_list), 3)}GB/s. \n' + else: + self.bottelneck += f'{link_type}: \n' \ + f' 平均值是 {avg_bw}, \n' \ + f' 但最大值是 {round(max(data_list), 3)}GB/s ,\n' \ + f' 最小值是 {round(min(data_list), 3)}GB/s。\n' \ + f' 差距为 {round(max(data_list) - min(data_list), 3)}GB/s。 \n' def format_details(self): if not self.rank_bw_dict: diff --git a/profiler/advisor/analyzer/cluster/slow_rank_analyzer.py b/profiler/advisor/analyzer/cluster/slow_rank_analyzer.py index 7b2311157ae7207b4415b6c386854e39e61ba03d..d96956f320dc19f6b99985546e59b40f783016ae 100644 --- a/profiler/advisor/analyzer/cluster/slow_rank_analyzer.py +++ b/profiler/advisor/analyzer/cluster/slow_rank_analyzer.py @@ -21,6 +21,7 @@ from profiler.advisor.result.result import OptimizeResult from profiler.advisor.result.item import OptimizeItem, OptimizeRecord from profiler.advisor.dataset.cluster.cluster_dataset import ClusterStepTraceTimeDataset from profiler.advisor.utils.utils import safe_index_value, safe_division, convert_to_int, safe_index, convert_to_float +from profiler.prof_common.additional_args_manager import AdditionalArgsManager logger = logging.getLogger() @@ -30,6 +31,7 @@ class SlowRankAnalyzer(BaseAnalyzer): RANK = "rank" RATIO_THRESHOLD = 0.05 BOTTLENECK_LIST = ['Computing', 'Communication', "Free"] + BOTTLENECK_LIST_CN = ['计算', '通信', "空闲"] dataset_cls_list = [ClusterStepTraceTimeDataset] COMPUTE = "compute(us)" FREE = "free(us)" @@ -80,16 +82,26 @@ class SlowRankAnalyzer(BaseAnalyzer): self.produce_bottleneck(self.step_trace_dict, i, mean_total_time) if not self.bottelneck: - self.bottelneck = "There is no slow rank issues" + language = AdditionalArgsManager().language + if language == "en": + self.bottelneck = "There is no slow rank issues" + else: + self.bottelneck = "没有慢节点问题" def produce_bottleneck(self, step_dict: dict, produce_type: int, mean_total_time: float): data_list = [data_tuple[produce_type] for rank_id, data_tuple in step_dict.items()] max_ratio = self.compute_max_gap_ratio(data_list, mean_total_time) if max_ratio > self.RATIO_THRESHOLD: - self.bottelneck += f'{self.BOTTLENECK_LIST[produce_type]} \n' \ - f' has some issues in the cluster, \n' \ - f' because the max difference of {self.BOTTLENECK_LIST[produce_type]} time \n' \ - f' has reached {round(max_ratio * mean_total_time / 1000, 3)}ms. \n' + language = AdditionalArgsManager().language + if language == "en": + self.bottelneck += f'{self.BOTTLENECK_LIST[produce_type]} \n' \ + f' has some issues in the cluster, \n' \ + f' because the max difference of {self.BOTTLENECK_LIST[produce_type]} time \n' \ + f' has reached {round(max_ratio * mean_total_time / 1000, 3)}ms. \n' + else: + self.bottelneck += f'集群中的{self.BOTTLENECK_LIST_CN[produce_type]}有问题, \n' \ + f'因为{self.BOTTLENECK_LIST_CN[produce_type]}时间的最大差距已经达到 \n' \ + f'{round(max_ratio * mean_total_time / 1000, 3)}ms。 \n' def make_record(self): """ diff --git a/profiler/advisor/analyzer/communication/alignment/byte_alignment_checker.py b/profiler/advisor/analyzer/communication/alignment/byte_alignment_checker.py index 9dcd5dda136bf3f2169f4379908840cfabd40841..b99e91df9c1e432a7ba1d2485dfda059d4c4aa56 100644 --- a/profiler/advisor/analyzer/communication/alignment/byte_alignment_checker.py +++ b/profiler/advisor/analyzer/communication/alignment/byte_alignment_checker.py @@ -18,6 +18,7 @@ from typing import List from profiler.advisor.dataset.communication.hccl_detail_dataset import HcclDetailDataset from profiler.advisor.dataset.profiling.info_collection import HcclTask from profiler.advisor.display.html.priority_background_color import PriorityBackgroundColor +from profiler.advisor.display.prompt.base_prompt import BasePrompt from profiler.advisor.result.result import OptimizeResult from profiler.advisor.result.item import OptimizeItem, OptimizeRecord from profiler.prof_common.additional_args_manager import AdditionalArgsManager @@ -77,12 +78,12 @@ class ByteAlignmentChecker: """ make record for what and how to optimize """ - optimization_item = OptimizeItem("byte alignment analysis", self.desc, self.suggestions) + optimization_item = OptimizeItem(self.problem, self.desc, self.suggestions) result.add(OptimizeRecord(optimization_item)) - sub_table_name = "Byte Alignment Analysis" if not self.stage else f"Stage-{self.stage}: " \ - f"Byte Alignment Analysis" + sub_table_name = BasePrompt.get_sub_table_name(self.problem, self.stage) result.add_detail(sub_table_name, headers=self.headers) + for hccl_op in self.abnormal_ops: result.add_detail(sub_table_name, detail=hccl_op) @@ -135,7 +136,8 @@ class ByteAlignmentChecker: ) byte_alignment_rule = FileManager.read_yaml_file(rule_path) - self.desc = byte_alignment_rule.get("problem") + self.problem = byte_alignment_rule.get("problem") + self.desc = byte_alignment_rule.get("description") self.min_size = byte_alignment_rule.get("min_size", self._MIN_SIZE) self.topk = byte_alignment_rule.get("top_num", 3) self.solutions = byte_alignment_rule.get("solutions") @@ -143,7 +145,7 @@ class ByteAlignmentChecker: raise RuntimeError("The configuration file of the byte alignment analyzer is abnormal. Please check.") for solution in self.solutions: for key, val in solution.items(): - self.suggestions.append(f"{key}, {val.get('desc')}") + self.suggestions.append(f"{val.get('desc')}") def _get_priority(self): if safe_division(self.abnormal_ops_dur, self.total_ops_dur) < self._LOW_PRIORITY: diff --git a/profiler/advisor/analyzer/communication/contention/bandwidth_contention_checker.py b/profiler/advisor/analyzer/communication/contention/bandwidth_contention_checker.py index b665ba2931b9af61c47551a76fc28bd95f7e9ea5..18d458737be46ef88b3d2937b1a863940bd85fb3 100644 --- a/profiler/advisor/analyzer/communication/contention/bandwidth_contention_checker.py +++ b/profiler/advisor/analyzer/communication/contention/bandwidth_contention_checker.py @@ -17,6 +17,7 @@ import os from typing import List from profiler.advisor.dataset.communication.communication_dataset import CommunicationDataset from profiler.advisor.dataset.profiling.profiling_dataset import ProfilingDataset +from profiler.advisor.display.prompt.base_prompt import BasePrompt from profiler.advisor.result.result import OptimizeResult from profiler.advisor.result.item import OptimizeItem, OptimizeRecord from profiler.prof_common.additional_args_manager import AdditionalArgsManager @@ -24,6 +25,7 @@ from profiler.prof_common.file_manager import FileManager from profiler.advisor.utils.utils import convert_to_float from profiler.advisor.dataset.cluster.hccl_collection import HcclInfo from profiler.advisor.dataset.profiling.info_collection import OpInfo +from profiler.prof_common.additional_args_manager import AdditionalArgsManager logger = logging.getLogger() @@ -136,12 +138,12 @@ class BandwidthContentionChecker: """ make record for what and how to optimize """ - optimization_item = OptimizeItem("bandwidth contention analysis", self.desc, self.suggestions) + optimization_item = OptimizeItem(self.problem, self.desc, self.suggestions) result.add(OptimizeRecord(optimization_item)) - sub_table_name = "Bandwidth Contention Analysis" if not self.stage else f"Stage-{self.stage}: " \ - f"Bandwidth Contention Analysis" + sub_table_name = BasePrompt.get_sub_table_name(self.problem, self.stage) result.add_detail(sub_table_name, headers=self.headers) + for hccl_op in self.abnormal_sdma_list: result.add_detail(sub_table_name, detail=[hccl_op.name, round(hccl_op.dur, 4), round(hccl_op.bandwidth, 2)]) @@ -167,7 +169,8 @@ class BandwidthContentionChecker: ) contention_rule = FileManager.read_yaml_file(contention_rule_path) - self.desc = contention_rule.get("problem") + self.problem = contention_rule.get("problem") + self.desc = contention_rule.get("description") self.threshold = contention_rule.get("threshold", 0) * contention_rule.get("sdma_baseline", 0) self.contention_topk = contention_rule.get("top_num", 3) self.solutions = contention_rule.get("solutions") @@ -175,4 +178,4 @@ class BandwidthContentionChecker: raise RuntimeError("The configuration file of the bandwidth contention analyzer is abnormal. Please check.") for solution in self.solutions: for key, val in solution.items(): - self.suggestions.append(f"{key}, {val.get('desc')}") + self.suggestions.append(f"{val.get('desc')}") diff --git a/profiler/advisor/analyzer/communication/packet/packet_checker.py b/profiler/advisor/analyzer/communication/packet/packet_checker.py index 02a29c6200db0a2c627d90fb653d3fcb9b874103..c53a0f9d0b193b4bd753d958538f6e8b98a5bc0a 100644 --- a/profiler/advisor/analyzer/communication/packet/packet_checker.py +++ b/profiler/advisor/analyzer/communication/packet/packet_checker.py @@ -15,11 +15,13 @@ import logging import os from profiler.advisor.dataset.communication.communication_dataset import CommunicationDataset +from profiler.advisor.display.prompt.base_prompt import BasePrompt from profiler.advisor.result.result import OptimizeResult from profiler.advisor.result.item import OptimizeItem, OptimizeRecord from profiler.prof_common.additional_args_manager import AdditionalArgsManager from profiler.prof_common.file_manager import FileManager from profiler.advisor.utils.utils import convert_to_float +from profiler.prof_common.additional_args_manager import AdditionalArgsManager logger = logging.getLogger() @@ -110,10 +112,11 @@ class PacketChecker: """ make record for what and how to optimize """ - optimization_item = OptimizeItem("Packet analysis", self.desc, self.suggestions) + optimization_item = OptimizeItem(self.problem, self.desc, self.suggestions) result.add(OptimizeRecord(optimization_item)) - sub_table_name = "Packet Analysis" if not self.stage else f"Stage-{self.stage}: Packet Analysis" + sub_table_name = BasePrompt.get_sub_table_name(self.problem, self.stage) + result.add_detail(sub_table_name, headers=self.headers) result.add_detail(sub_table_name, detail=self.small_packet_detail) @@ -138,7 +141,8 @@ class PacketChecker: ) syncbn_rule = FileManager.read_yaml_file(syncbn_rule_path) - self.desc = syncbn_rule.get("problem") + self.problem = syncbn_rule.get("problem") + self.desc = syncbn_rule.get("description") self.sdma_desc = syncbn_rule.get("sdma_problem") self.rdma_desc = syncbn_rule.get("rdma_problem") self.min_sdma_size = convert_to_float(syncbn_rule.get("min_sdma_size")) diff --git a/profiler/advisor/analyzer/communication/retransmission/communication_retransmission_checker.py b/profiler/advisor/analyzer/communication/retransmission/communication_retransmission_checker.py index 441dec5980b21d09c25f5ce8813d48ae64877554..8220ac67bc78cd4f15854dfdc636cf3139808fda 100644 --- a/profiler/advisor/analyzer/communication/retransmission/communication_retransmission_checker.py +++ b/profiler/advisor/analyzer/communication/retransmission/communication_retransmission_checker.py @@ -17,6 +17,7 @@ import os from typing import Dict, List from collections import defaultdict from profiler.advisor.dataset.cluster.cluster_dataset import ClusterCommunicationDataset +from profiler.advisor.display.prompt.base_prompt import BasePrompt from profiler.advisor.result.result import OptimizeResult from profiler.advisor.result.item import OptimizeItem, OptimizeRecord from profiler.prof_common.additional_args_manager import AdditionalArgsManager @@ -103,11 +104,10 @@ class CommunicationRetransmissionChecker: """ make record for what and how to optimize """ - optimization_item = OptimizeItem("Communication retransmission analysis", self.desc, self.suggestions) + optimization_item = OptimizeItem(self.problem, self.desc, self.suggestions) result.add(OptimizeRecord(optimization_item)) - sub_table_name = \ - "Comm Retransmission Analysis" if not self.stage else f"Stage-{self.stage}: Comm Retransmission Analysis" + sub_table_name = BasePrompt.get_sub_table_name(self.problem, self.stage) result.add_detail(sub_table_name, headers=self.headers) for row in self.abnormal_rdma_list: @@ -134,7 +134,8 @@ class CommunicationRetransmissionChecker: ) syncbn_rule = FileManager.read_yaml_file(syncbn_rule_path) - self.desc = syncbn_rule.get("problem") + self.problem = syncbn_rule.get("problem") + self.desc = syncbn_rule.get("description") self.min_retransmission_time = syncbn_rule.get("min_retransmission_time") self.solutions = syncbn_rule.get("solutions") diff --git a/profiler/advisor/analyzer/computation/ai_core_freq/ai_core_freq_checker.py b/profiler/advisor/analyzer/computation/ai_core_freq/ai_core_freq_checker.py index f42b9514782e977c56a0f7776627beddbdebcd60..e5e00142fb7b6f4116e4382aefe55256007b5dca 100644 --- a/profiler/advisor/analyzer/computation/ai_core_freq/ai_core_freq_checker.py +++ b/profiler/advisor/analyzer/computation/ai_core_freq/ai_core_freq_checker.py @@ -19,6 +19,7 @@ from profiler.advisor.result.result import OptimizeResult from profiler.advisor.result.item import OptimizeItem, OptimizeRecord from profiler.advisor.config.config import Config from profiler.advisor.utils.utils import convert_to_float +from profiler.prof_common.additional_args_manager import AdditionalArgsManager logger = logging.getLogger() @@ -79,12 +80,6 @@ class AICoreFreqChecker: if not self.ai_core_freq_issues: return - self.desc = (f"{len(self.decrease_freq_ops)} operators are found during frequency reduction, and the reduction " - f"ratio is larger than {self.DECREASE_FREQ_RATIO}.") - if self.rank: - self.desc = f"For rank {self.rank}, " + self.desc.lower() - self.suggestions = "Please check the temperature or max power of your machine." - def make_record(self, result: OptimizeResult): """ make record for what and how to optimize @@ -92,11 +87,21 @@ class AICoreFreqChecker: if not self.ai_core_freq_issues: return self.ai_core_freq_issues - sheet_name = "AI Core Frequency" + language = AdditionalArgsManager().language + if language == "en": + from profiler.advisor.display.prompt.en.ai_core_freq_prompt import AICoreFreqPrompt + else: + from profiler.advisor.display.prompt.cn.ai_core_freq_prompt import AICoreFreqPrompt + + problem = AICoreFreqPrompt.PROBLEM if self.rank is not None: - sheet_name = f"rank {self.rank} AI Core Frequency".capitalize() + problem += AICoreFreqPrompt.RANK_ID.format(self.rank) + + self.desc = AICoreFreqPrompt.DESCRIPTION.format(len(self.decrease_freq_ops), self.DECREASE_FREQ_RATIO) + if self.rank: + self.desc = AICoreFreqPrompt.RANK_DESCRIPTION.format(self.rank) + self.desc.lower() - optimization_item = OptimizeItem(sheet_name, self.desc, [self.suggestions]) + optimization_item = OptimizeItem(problem, self.desc, [AICoreFreqPrompt.SUGGESTION]) result.add(OptimizeRecord(optimization_item)) self.headers = [ @@ -108,10 +113,10 @@ class AICoreFreqChecker: "Max frequency", "Min frequency", ] - result.add_detail(sheet_name, headers=self.headers) + result.add_detail(problem, headers=self.headers) for row in self.decrease_freq_ops: - result.add_detail(sheet_name, detail=row) + result.add_detail(problem, detail=row) return True def make_render(self, html_render, add_render_list=True, **kwargs): diff --git a/profiler/advisor/analyzer/computation/aicpu/aicpu_checker.py b/profiler/advisor/analyzer/computation/aicpu/aicpu_checker.py index e5cf1058e6f8dcf40f0cabfb3d393b8ec844c1b7..752c5b38ce352dbbca4d610fcae0fa1c9b88783d 100644 --- a/profiler/advisor/analyzer/computation/aicpu/aicpu_checker.py +++ b/profiler/advisor/analyzer/computation/aicpu/aicpu_checker.py @@ -28,10 +28,7 @@ from profiler.prof_common.constant import Constant class AicpuChecker(OperatorChecker): _CHECKER = "aicpu operator" - _PROBLEM = "AICPU operator" _MIN_TASK_DURATION = 20 - _description = f"Some operators and task duration exceed {_MIN_TASK_DURATION} us, such as :\n" - _SUGGESTION: List[str] = ["Modify code to avoid aicpu operator"] STACK_INFO_ITEMS = "stack_info" SUGGESTION_INFO_ITEMS = "suggestions" _ITEMS = [ @@ -46,10 +43,11 @@ class AicpuChecker(OperatorChecker): self.load_aicpu_rules() self.total_task_duration = 0.0 self.aicpu_task_duration = 0.0 + self.double_suggestion = None def load_aicpu_rules(self): language = AdditionalArgsManager().language - rule_path = "rules/aicpu_rules.yaml" + rule_path = "rules/" + language + "/aicpu_rules.yaml" if not os.path.isabs(rule_path): rule_path = os.path.join(os.path.dirname(__file__), "../../../", rule_path) @@ -58,6 +56,10 @@ class AicpuChecker(OperatorChecker): logger.warning("Skip analyze aicpu issues, because %s does not exist.", rule_path) self.aicpu_rules = FileManager.read_yaml_file(rule_path) + self._PROBLEM = self.aicpu_rules.get("problem") + self._description = self.aicpu_rules.get("description").format(self._MIN_TASK_DURATION) + self._SUGGESTION = [self.aicpu_rules.get("suggestion")] + self.double_suggestion = self.aicpu_rules.get("double_suggestion") self.filter_aicpu_rules(self.aicpu_rules) for checker_name, check_rule in self.aicpu_rules.items(): if not isinstance(check_rule, (list, dict,)): @@ -155,8 +157,7 @@ class AicpuChecker(OperatorChecker): and op.op_name not in double_type_ai_cpu_operator): double_type_ai_cpu_operator.append(op.op_name) if bool(double_type_ai_cpu_operator): - self._SUGGESTION.append("Try to convert double type operator to float, such as {}".format( - ",".join(double_type_ai_cpu_operator))) + self._SUGGESTION.append(self.double_suggestion.format(",".join(double_type_ai_cpu_operator))) return True def make_render(self, html_render, record, add_render_list=True, **kwargs): diff --git a/profiler/advisor/analyzer/computation/bound/block_dim_checker.py b/profiler/advisor/analyzer/computation/bound/block_dim_checker.py index 18b46b5176d4c02e0f0362b5de4b237338211da4..6b83ef88e951829858bf034057b5ddf064dbd1cd 100644 --- a/profiler/advisor/analyzer/computation/bound/block_dim_checker.py +++ b/profiler/advisor/analyzer/computation/bound/block_dim_checker.py @@ -19,6 +19,7 @@ from profiler.advisor.analyzer.computation.operator_checker import OperatorCheck from profiler.prof_common.constant import Constant from profiler.advisor.config.config import Config from profiler.advisor.dataset.profiling.profiling_dataset import ProfilingDataset +from profiler.prof_common.additional_args_manager import AdditionalArgsManager logger = logging.getLogger() @@ -26,15 +27,29 @@ logger = logging.getLogger() class BlockDimChecker(OperatorChecker): _SUGGESTION: List[str] = [] _CHECKER = "block dim" - _PROBLEM = "block dim" _aicore_num = 0 _aiv_num = 0 - _description = "some operator does not make full use of {} ai core" _ITEMS = [ "op_name", "op_type", "task_type", "task_duration", "income", "block_dim", "mix_block_dim", "input_shapes", "input_data_types", "input_formats", "output_shapes", "output_data_types", "output_formats" ] + def __init__(self, cann_version): + super(BlockDimChecker, self).__init__(cann_version=cann_version) + self._init_prompt_by_language() + + def _init_prompt_by_language(self): + language = AdditionalArgsManager().language + if language == "en": + from profiler.advisor.display.prompt.en.block_dim_prompt import BlockDimPrompt + else: + from profiler.advisor.display.prompt.cn.block_dim_prompt import BlockDimPrompt + + self._PROBLEM = BlockDimPrompt.PROBLEM + self._description = BlockDimPrompt.DESCRIPTION + self.aiv_num_desc = BlockDimPrompt.AIV_NUM_DESCRIPTION + self.top_duration_op_desc = BlockDimPrompt.TOP_DURATION_OP_DESCRIPTION + def pre_check(self, profiling_data) -> bool: return not self.is_dynamic_shape(profiling_data) @@ -82,11 +97,11 @@ class BlockDimChecker(OperatorChecker): self._aiv_num = int(Config().get_config("aiv_num")) except ValueError as e: logger.warning("get aiv_num failed, please check info.json: %s", e) + self._description = self._description.format(self._aicore_num) if self._aiv_num: - self._description += f" or {self._aiv_num} ai vector core" - self._description += f";\n Top-{OperatorChecker._MAX_TUNE_OP_NUM} operator of " \ - "task duration are as follows:\n" + self._description += self.aiv_num_desc.format(self._aiv_num) + self._description += self.top_duration_op_desc.format(OperatorChecker._MAX_TUNE_OP_NUM) return True def _check_operator(self, op_info) -> bool: diff --git a/profiler/advisor/analyzer/computation/bound/operator_bound_checker.py b/profiler/advisor/analyzer/computation/bound/operator_bound_checker.py index 5e2ee2251620bc5884339d833da971471fb00154..4aeb7a43286ee72d3f9ed80fc84716cc54cc8179 100644 --- a/profiler/advisor/analyzer/computation/bound/operator_bound_checker.py +++ b/profiler/advisor/analyzer/computation/bound/operator_bound_checker.py @@ -16,6 +16,7 @@ import logging from typing import List from profiler.advisor.analyzer.computation.operator_checker import OperatorChecker +from profiler.prof_common.additional_args_manager import AdditionalArgsManager from profiler.prof_common.constant import Constant from profiler.advisor.config.config import Config from profiler.advisor.dataset.profiling.profiling_dataset import ProfilingDataset @@ -27,17 +28,27 @@ logger = logging.getLogger() class OperatorBoundChecker(OperatorChecker): _MIN_TASK_DURATION = 20 # min task duration 20us _CHECKER = "operator no bound" - _PROBLEM = "operator no bound" _SUGGESTION: List[str] = [] - _description = ( - f"There is no mte, cube, vector, scalar ratio is more than {to_percent(Config().operator_bound_ratio)};\n" + - f"Top task duration operators need to be tuned are as follows: \n") _ITEMS = [ "op_name", "op_type", "task_type", "task_duration", "vec_ratio", "mac_ratio", "scalar_ratio", "mte1_ratio", "mte2_ratio", "mte3_ratio", "block_dim", "input_shapes", "input_data_types", "input_formats", "output_shapes", "output_data_types", "output_formats" ] + def __init__(self, cann_version) -> None: + super().__init__(cann_version=cann_version) + self._init_prompt_by_language() + + def _init_prompt_by_language(self): + language = AdditionalArgsManager().language + if language == "en": + from profiler.advisor.display.prompt.en.operator_bound_prompt import OperatorBoundPrompt + else: + from profiler.advisor.display.prompt.cn.operator_bound_prompt import OperatorBoundPrompt + + self._PROBLEM = OperatorBoundPrompt.PROBLEM + self._description = OperatorBoundPrompt.DESCRIPTION.format(to_percent(Config().operator_bound_ratio)) + def pre_check(self, profiling_data) -> bool: return not self.is_dynamic_shape(profiling_data) diff --git a/profiler/advisor/analyzer/computation/op_compile/dynamic_shape_checker.py b/profiler/advisor/analyzer/computation/op_compile/dynamic_shape_checker.py index 4ca563e7fd99523723553b7e7c1a2b8d4b4cac3b..d0702489c33a8731db6c3ecbb53487d294a15f9e 100644 --- a/profiler/advisor/analyzer/computation/op_compile/dynamic_shape_checker.py +++ b/profiler/advisor/analyzer/computation/op_compile/dynamic_shape_checker.py @@ -14,31 +14,42 @@ # limitations under the License. import copy import logging +import os from typing import List from profiler.advisor.analyzer.computation.operator_checker import OperatorChecker from profiler.advisor.config.config import Config from profiler.advisor.dataset.profiling.info_collection import OpInfo from profiler.advisor.result.item import OptimizeItem, StatisticsItem, OptimizeRecord +from profiler.prof_common.additional_args_manager import AdditionalArgsManager +from profiler.prof_common.file_manager import FileManager logger = logging.getLogger() class DynamicShapeChecker(OperatorChecker): - ENABLE_COMPILED_SUGGESTION = "1. Please try to set environment by execute `export HOST_CACHE_CAPACITY=20`.\n." \ - "2. Please place the following code at the entrance of the python script to disable jit compile.\n " \ - "Code: `torch_npu.npu.set_compile_mode(jit_compile=False);\n " \ - "torch_npu.npu.config.allow_internal_format = False`.\n" - _SUGGESTION: List[str] = [ENABLE_COMPILED_SUGGESTION] _CHECKER = "dynamic shape operator" - _PROBLEM = "Dynamic shape operator" - _description = f"Found all operators are dynamic shape" _op_list: List[OpInfo] = [] _tune_op_list: List[str] = [] # record op name to be tuned, and save to tune_ops_file.cfg _op_views: List = [] def __init__(self, cann_version) -> None: super().__init__(cann_version=cann_version) + self._init_prompt_by_language() + + def _init_prompt_by_language(self): + language = AdditionalArgsManager().language + if language == "en": + from profiler.advisor.display.prompt.en.dynamic_shape_prompt import DynamicShapePrompt + else: + from profiler.advisor.display.prompt.cn.dynamic_shape_prompt import DynamicShapePrompt + + self.rank_id = DynamicShapePrompt.RANK_ID + self._PROBLEM = DynamicShapePrompt.PROBLEM + self._description = DynamicShapePrompt.DESCRIPTION + self.enable_compiled_suggestion = DynamicShapePrompt.ENABLE_COMPILED_SUGGESTION + self._SUGGESTION = [DynamicShapePrompt.ENABLE_COMPILED_SUGGESTION] + self.release_suggestion = DynamicShapePrompt.RELEASE_SUGGESTION def check(self, profiling_data) -> bool: return self.is_dynamic_shape(profiling_data) @@ -47,9 +58,8 @@ class DynamicShapeChecker(OperatorChecker): """ make record for what and how to optimize """ - if rank is not None: - self._PROBLEM = f"rank {rank} ".capitalize() + self._PROBLEM.lower() + self._PROBLEM = self.rank_id + self._PROBLEM.lower() optimization_item = OptimizeItem( self._PROBLEM, self._description, @@ -69,9 +79,8 @@ class DynamicShapeChecker(OperatorChecker): release_suggestion_list = [] for suggestion in optimization_item.suggestion: release_suggestion = copy.deepcopy(suggestion) - if release_suggestion == DynamicShapeChecker.ENABLE_COMPILED_SUGGESTION: - release_suggestion += \ - f"for details please refer to link : LINK" + if release_suggestion == self.enable_compiled_suggestion: + release_suggestion += self.release_suggestion.format(Config().enable_compiled_tune_url) release_suggestion_list.append(release_suggestion.replace('\n', '
')) format_result = {"record": record.__dict__, "suggestion": '
'.join(release_suggestion_list)} return format_result diff --git a/profiler/advisor/analyzer/computation/operator_checker.py b/profiler/advisor/analyzer/computation/operator_checker.py index a58fc0d895622374a3f68e0fc07a4ef34bace36d..e70b13b18523b09334ed52868d037d3d26b47a45 100644 --- a/profiler/advisor/analyzer/computation/operator_checker.py +++ b/profiler/advisor/analyzer/computation/operator_checker.py @@ -25,6 +25,7 @@ from profiler.advisor.dataset.profiling.info_collection import OpInfo from profiler.advisor.dataset.profiling.profiling_dataset import ProfilingDataset from profiler.advisor.result.item import OptimizeItem, StatisticsItem, OptimizeRecord from profiler.advisor.utils.utils import safe_division, convert_to_float +from profiler.prof_common.additional_args_manager import AdditionalArgsManager logger = logging.getLogger() @@ -43,19 +44,27 @@ class OperatorChecker(VersionControl): _SUGGESTION: List[str] = [] SKIP_CHECK_MSG = "Skip %s checker because of not containing %s" _tune_op_info_list: List[OpInfo] = [] - PyTorch_OPERATOR_TUNE_SUGGESTION = f"Optimize operator by AOE, such as:\n" \ - f"'aoe --job_type=2 --model_path=$user_dump_path " \ - f"--tune_ops_file={Config().tune_ops_file}'\n" - MSLite_OPERATOR_TUNE_SUGGESTION = f"Optimize operator by AOE in mindspore lite framework, such as:\n" \ - f"converter_lite --fmk=ONNX --optimize=ascend_oriented --saveType=MINDIR " \ - f"--modelFile=$user_model.onnx --outputFile=user_model " \ - f"--configFile=./config.txt\n" def __init__(self, cann_version: str): self.cann_version = cann_version self._op_list: List[OpInfo] = [] self._tune_op_list: List[str] = [] + self._init_prompt_by_language() + + def _init_prompt_by_language(self): + language = AdditionalArgsManager().language + if language == "en": + from profiler.advisor.display.prompt.en.operator_prompt import OperatorPrompt + else: + from profiler.advisor.display.prompt.cn.operator_prompt import OperatorPrompt + + self.rank_id = OperatorPrompt.RANK_ID + self.pytorch_op_tune_suggestion = OperatorPrompt.PYTORCH_OPERATOR_TUNE_SUGGESTION + self.mslite_op_tune_suggestion = OperatorPrompt.MSLITE_OPERATOR_TUNE_SUGGESTION + self.pytorch_release_suggestion = OperatorPrompt.PYTORCH_RELEASE_SUGGESTION + self.mslite_release_suggestion = OperatorPrompt.MSLITE_RELEASE_SUGGESTION + @staticmethod def get_ratio(op_info: OpInfo, attr: str) -> float: if not op_info.has_attr(attr): @@ -116,9 +125,8 @@ class OperatorChecker(VersionControl): :param profiling_data: profiling data :return: optimize record """ - if rank is not None: - self._PROBLEM = f"rank {rank} ".capitalize() + self._PROBLEM.lower() + self._PROBLEM = self.rank_id.format(rank) + self._PROBLEM.lower() task_duration_list = [float(op_info.get_attr("task_duration")) for op_info in self._op_list @@ -127,11 +135,13 @@ class OperatorChecker(VersionControl): total_task_duration = profiling_data.op_summary.get_total_task_duration() count = len(task_duration_list) statistics_item = StatisticsItem(total_task_duration, total_cost_time, count, self.get_incomes()) + optimization_item = OptimizeItem( self._PROBLEM, self._get_description(self._description, self.get_op_type_list(self._op_list)[:self._MAX_TUNE_OP_NUM]), self._SUGGESTION ) + return OptimizeRecord(optimization_item, statistics_item) def _get_description(self, description, op_type_list=None): @@ -196,17 +206,12 @@ class OperatorChecker(VersionControl): release_suggestion_list = [] for suggestion in optimization_item.suggestion: release_suggestion = copy.deepcopy(suggestion) - if release_suggestion == OperatorChecker.PyTorch_OPERATOR_TUNE_SUGGESTION: - release_suggestion += \ - (f"for details please refer to link : LINK") - elif release_suggestion == OperatorChecker.MSLite_OPERATOR_TUNE_SUGGESTION: - release_suggestion += \ - (f"\nThe config file for MSLite AOE usage is as follows:\n" \ - f"[ascend_context]\n" \ - f"aoe_mode=\"operator tuning\"\n" \ - f"--tune_ops_file={Config().tune_ops_file}\n" - f"\nFor details please refer to link : LINK") + if release_suggestion == self.pytorch_op_tune_suggestion: + release_suggestion += (self.pytorch_release_suggestion.format(Config().pytorch_aoe_operator_tune_url)) + elif release_suggestion == self.mslite_op_tune_suggestion: + release_suggestion += (self.mslite_release_suggestion.format( + Config().tune_ops_file, Config().mslite_infer_aoe_operator_tune_url)) + release_suggestion_list.append(release_suggestion.replace('\n', '
')) format_result = { "record": record.__dict__, @@ -321,10 +326,11 @@ class OperatorChecker(VersionControl): return details def format_suggestion_content(self, profiling_data: ProfilingDataset) -> None: - if profiling_data.prof_type == EnumParamsParser().profiling_type.ascend_pytorch_profiler: - self._SUGGESTION.append(self.PyTorch_OPERATOR_TUNE_SUGGESTION) - elif profiling_data.prof_type == EnumParamsParser.profiling_type.mslite: - self._SUGGESTION.append(self.MSLite_OPERATOR_TUNE_SUGGESTION) + language = AdditionalArgsManager().language + if profiling_data.PROF_TYPE == EnumParamsParser().profiling_type.ascend_pytorch_profiler: + self._SUGGESTION.append(self.pytorch_op_tune_suggestion) + elif profiling_data.PROF_TYPE == EnumParamsParser.profiling_type.mslite: + self._SUGGESTION.append(self.mslite_op_tune_suggestion) def _check_data(self, profiling_data): return True diff --git a/profiler/advisor/analyzer/graph_fusion/graph_fusion_checker.py b/profiler/advisor/analyzer/graph_fusion/graph_fusion_checker.py index 2cfde931a6116db41f1ed3bec2f17f64cd88ddeb..7c9218c4af0694c308d23fb3ca9eb2f69e253ff8 100644 --- a/profiler/advisor/analyzer/graph_fusion/graph_fusion_checker.py +++ b/profiler/advisor/analyzer/graph_fusion/graph_fusion_checker.py @@ -23,6 +23,7 @@ from profiler.advisor.common.graph.graph import Graph from profiler.advisor.common.graph.graph_parser import QueryGraphParser from profiler.advisor.dataset.graph_dataset import GraphDataset from profiler.advisor.common.graph.graph_match import find_isomorphisms +from profiler.prof_common.additional_args_manager import AdditionalArgsManager logger = logging.getLogger() @@ -180,11 +181,18 @@ class GraphFusionRules: if not self.candidates: return + language = AdditionalArgsManager().language + if language == "en": + from profiler.advisor.display.prompt.cn.graph_fusion_prompt import GraphFusionPrompt + else: + from profiler.advisor.display.prompt.en.graph_fusion_prompt import GraphFusionPrompt + optimization_item = OptimizeItem( - "fusion issue", - f"Found {len(self.candidates)} fusion issues", - ["Check fusion issues detail in mstt_advisor*.html"] + GraphFusionPrompt.PRIBLEM, + GraphFusionPrompt.DESCRIPTION.format(len(self.candidates)), + [GraphFusionPrompt.SUGGESTION] ) + total_time = 0.0 for candidate in self.task_duration_list: for duration in candidate: diff --git a/profiler/advisor/analyzer/overall/environment_variable_checker.py b/profiler/advisor/analyzer/overall/environment_variable_checker.py index 47cf3a5d25a5e4b730a0fe0d2defa473e186812b..f3940e53e6c16d1f12accce10b80c01fe42b2b2e 100644 --- a/profiler/advisor/analyzer/overall/environment_variable_checker.py +++ b/profiler/advisor/analyzer/overall/environment_variable_checker.py @@ -22,6 +22,7 @@ from profiler.advisor.result.item import OptimizeRecord from profiler.advisor.common.analyzer_scopes import SupportedScopes from profiler.advisor.display.html.render import HTMLRender from profiler.advisor.utils.utils import convert_to_int +from profiler.prof_common.additional_args_manager import AdditionalArgsManager class EnvironmentVariabelChecker: @@ -82,18 +83,22 @@ class EnvironmentVariabelChecker: def make_record(self, result: OptimizeResult): if not self.env_suggest_csv: return - desc = f"Describe and suggest the optimal environment variable settings" - suggestion = "Please set the optimal environment variable" + + language = AdditionalArgsManager().language + if language == "en": + from profiler.advisor.display.prompt.en.environment_variable_prompt import EnvironmentVariablePrompt + else: + from profiler.advisor.display.prompt.cn.environment_variable_prompt import EnvironmentVariablePrompt optimization_item = OptimizeItem( - SupportedScopes.ENVIRONMENT_VARIABLE_ANALYSIS, - desc, - [suggestion] + EnvironmentVariablePrompt.PRIBLEM, + EnvironmentVariablePrompt.DESCRIPTION, + [EnvironmentVariablePrompt.SUGGESTION] ) result.add(OptimizeRecord(optimization_item)) - result.add_detail(SupportedScopes.ENVIRONMENT_VARIABLE_ANALYSIS, headers=self.HEADERS) + result.add_detail(EnvironmentVariablePrompt.PRIBLEM, headers=self.HEADERS) for env_suggest in self.env_suggest_csv: - result.add_detail(SupportedScopes.ENVIRONMENT_VARIABLE_ANALYSIS, detail=env_suggest) + result.add_detail(EnvironmentVariablePrompt.PRIBLEM, detail=env_suggest) def make_render(self, html_render: HTMLRender): if not self.env_suggest_html: diff --git a/profiler/advisor/analyzer/overall/overall_summary_analyzer.py b/profiler/advisor/analyzer/overall/overall_summary_analyzer.py index 143eb854be2f90e9b0325ace537cc3961b54d747..3c69526941e617e23b847f5991abdce6d6d4ef2a 100644 --- a/profiler/advisor/analyzer/overall/overall_summary_analyzer.py +++ b/profiler/advisor/analyzer/overall/overall_summary_analyzer.py @@ -21,41 +21,10 @@ from profiler.advisor.result.item import OptimizeItem, OptimizeRecord from profiler.advisor.result.result import OptimizeResult from profiler.compare_tools.compare_interface.comparison_interface import ComparisonInterface from profiler.prof_common.constant import Constant +from profiler.prof_common.additional_args_manager import AdditionalArgsManager + class OverallSummaryAnalyzer(BaseAnalyzer): - OVERALL_SUMMARY_ANALYZER = "overall summary" - advice_map = { - "Computing Time": "if you want more detailed advice please go to mstt_advisor_*.html", - "Uncovered Communication Time": "if you want more detailed advice please go to mstt_advisor_*.html", - "Free Time": "if you want more detailed advice please go to mstt_advisor_*.html" - } - time_name_map = { - "Computing Time": "computing", - "Uncovered Communication Time": "communication", - "Free Time": "free", - 'Cube Time(Num)': 'Cube Time', - 'Vector Time(Num)': 'Vector Time', - 'Flash Attention Time(Forward)(Num)': 'Flash Attention Time(Forward)', - 'Flash Attention Time(Backward)(Num)': 'Flash Attention Time(Backward)', - 'Other Time': "Other Computing Time", - 'SDMA Time(Num)': 'SDMA Time' - } - performance_time_dict = { - "Computing Time": "computing_time_ms", - " -- Flash Attention": "fa_time_ms", - " -- Conv": "conv_time_ms", - " -- Matmul": "matmul_time_ms", - " -- Vector": "vector_time_ms", - " -- SDMA(Tensor Move)": "tensor_move_time_ms", - " -- Other Cube": "other_cube_time_ms", - "Uncovered Communication Time": "uncovered_communication_time_ms", - " -- Wait": "wait_time_ms", - " -- Transmit": "transmit_time_ms", - "Free Time": "free_time_ms", - " -- SDMA": "sdma_time_ms", - " -- Free": "free_ms", - "E2E Time": "e2e_time_ms" - } def __init__(self, collection_path: str, n_processes: int = 1, **kwargs): profile_path = get_profile_path(collection_path) @@ -73,6 +42,20 @@ class OverallSummaryAnalyzer(BaseAnalyzer): self.bottleneck_str = "" self.over_summary_analysis = {} + self._init_prompt_by_language() + + def _init_prompt_by_language(self): + language = AdditionalArgsManager().language + if language == "en": + from profiler.advisor.display.prompt.en.overall_summary_analyzer_prompt import OverallSummaryAnalyzePrompt + else: + from profiler.advisor.display.prompt.cn.overall_summary_analyzer_prompt import OverallSummaryAnalyzePrompt + + self.over_summary_analyzer = OverallSummaryAnalyzePrompt.OVERALL_SUMMARY_ANALYZER + self.advice_map = OverallSummaryAnalyzePrompt.PERFORMANCE_TIME_DICT + self.time_name_map = OverallSummaryAnalyzePrompt.TIME_NAME_MAP + self.performance_time_dict = OverallSummaryAnalyzePrompt.PERFORMANCE_TIME_DICT + @staticmethod def calculate_ratio(dividend, divisor): if not divisor: @@ -81,11 +64,19 @@ class OverallSummaryAnalyzer(BaseAnalyzer): @staticmethod def get_time_category_dict(overall_dict: dict): - time_category_dict = { - "Computing Time": round(overall_dict.get('computing_time_ms', 0.0), 3), - "Uncovered Communication Time": round(overall_dict.get('uncovered_communication_time_ms', 0.0), 3), - "Free Time": round(overall_dict.get('free_time_ms', 0.0), 3) - } + language = AdditionalArgsManager().language + if language == "en": + time_category_dict = { + "Computing Time": round(overall_dict.get('computing_time_ms', 0.0), 3), + "Uncovered Communication Time": round(overall_dict.get('uncovered_communication_time_ms', 0.0), 3), + "Free Time": round(overall_dict.get('free_time_ms', 0.0), 3) + } + else: + time_category_dict = { + "计算时长": round(overall_dict.get('computing_time_ms', 0.0), 3), + "未被掩盖的通信时长": round(overall_dict.get('uncovered_communication_time_ms', 0.0), 3), + "空闲时长": round(overall_dict.get('free_time_ms', 0.0), 3) + } return time_category_dict def path_check(self): @@ -111,14 +102,25 @@ class OverallSummaryAnalyzer(BaseAnalyzer): if not overall_data: return e2e_time = round(sum([data for data in overall_data.values()]), 3) - overall_bottleneck = f"The Model E2E Time is {e2e_time}ms.\n" + + language = AdditionalArgsManager().language + if language == "en": + overall_bottleneck = f"The Model E2E Time is {e2e_time}ms.\n" + else: + overall_bottleneck = f"模型E2E的时间是{e2e_time}ms。\n" comparison_bottleneck = "" for time_type, time_value in overall_data.items(): # add overall bottleneck - overall_bottleneck += f" -- {time_type} is {time_value}ms\n" + if language == "en": + overall_bottleneck += f" -- {time_type} is {time_value}ms\n" + else: + overall_bottleneck += f" -- {time_type}是{time_value}ms\n" if time_type == "Free Time" and self._is_minimal_profiling and self.calculate_ratio(time_value, e2e_time) > 0.1: - overall_bottleneck += "percentage of free time exceed the threshold 10%." + if language == "en": + overall_bottleneck += "percentage of free time exceed the threshold 10%." + else: + overall_bottleneck += "空闲时间的百分比超过了阈值的10%。" if not self._has_benchmark_profiling: continue # add comparison bottleneck @@ -127,7 +129,10 @@ class OverallSummaryAnalyzer(BaseAnalyzer): ).get(time_type) if time_value > base_duration: ratio = "{:.2%}".format(self.calculate_ratio(time_value - base_duration, base_duration)) - comparison_bottleneck += f"{time_type} exceeds the benchmark by {ratio}\n" + if language == "en": + comparison_bottleneck += f"{time_type} exceeds the benchmark by {ratio}\n" + else: + comparison_bottleneck += f"{time_type}超过了基线{ratio}。\n" self.cur_bottleneck["overall_data"] = overall_bottleneck if comparison_bottleneck: self.cur_bottleneck["comparison_result"] = comparison_bottleneck @@ -202,18 +207,18 @@ class OverallSummaryAnalyzer(BaseAnalyzer): if not self.bottleneck_str and not self.cur_advices: return optimization_item = OptimizeItem( - OverallSummaryAnalyzer.OVERALL_SUMMARY_ANALYZER, + self.over_summary_analyzer, self.bottleneck_str, self.cur_advices ) self.result.add(OptimizeRecord(optimization_item)) self.result.add_detail( - OverallSummaryAnalyzer.OVERALL_SUMMARY_ANALYZER, + self.over_summary_analyzer, headers=self.over_summary_analysis["headers"] ) for data in self.over_summary_analysis["data"]: - self.result.add_detail(OverallSummaryAnalyzer.OVERALL_SUMMARY_ANALYZER, detail=data) + self.result.add_detail(self.over_summary_analyzer, detail=data) def make_render(self): if not self.bottleneck_str and not self.cur_advices: @@ -226,7 +231,7 @@ class OverallSummaryAnalyzer(BaseAnalyzer): "details": [self.over_summary_analysis] } self.html_render.render_template(key="overall", - title=OverallSummaryAnalyzer.OVERALL_SUMMARY_ANALYZER, + title="Overall Summary", template_dir="templates", template_name="cluster_analysis.html", cann_version=self.cann_version, diff --git a/profiler/advisor/analyzer/schedule/dispatch/timeline_op_dispatch_analyzer.py b/profiler/advisor/analyzer/schedule/dispatch/timeline_op_dispatch_analyzer.py index a722ee0e1eb7433d48a311e0b4d8d56c3df12f02..3ee132d30dbf83d2ca06da850e3d4a8e6eed9897 100644 --- a/profiler/advisor/analyzer/schedule/dispatch/timeline_op_dispatch_analyzer.py +++ b/profiler/advisor/analyzer/schedule/dispatch/timeline_op_dispatch_analyzer.py @@ -23,6 +23,7 @@ from profiler.advisor.result.item import OptimizeItem, OptimizeRecord from profiler.advisor.result.result import OptimizeResult from profiler.advisor.display.html.render import HTMLRender from profiler.advisor.display.html.priority_background_color import PriorityBackgroundColor +from profiler.prof_common.additional_args_manager import AdditionalArgsManager logger = logging.getLogger() @@ -80,17 +81,24 @@ class OpDispatchAnalyzer(BaseAnalyzer): """ if not self._op_compile or len(self._issues_record) <= 0: return - desc = f"Found {self._op_compile.total_count} operator compile issues." - suggestion = ("Please place the following code at the entrance of the python script to disable jit compile. " \ - "Code: `torch_npu.npu.set_compile_mode(jit_compile=False); " - "torch_npu.npu.config.allow_internal_format = False`") - self.optimization_item.append(OptimizeItem("Operator dispatch", desc, [suggestion])) + + language = AdditionalArgsManager().language + if language == "en": + from profiler.advisor.display.prompt.en.timeline_op_dispatch_prompt import TimelineOpDispatchPrompt + else: + from profiler.advisor.display.prompt.cn.timeline_op_dispatch_prompt import TimelineOpDispatchPrompt + + self.optimization_item.append(OptimizeItem( + TimelineOpDispatchPrompt.PRIBLEM, + TimelineOpDispatchPrompt.DESCRIPTION.format(self._op_compile.total_count), + [TimelineOpDispatchPrompt.SUGGESTION])) for optimization in self.optimization_item: result.add(OptimizeRecord(optimization)) + record_title = ["Issues", "op name", "counts", "total time"] - result.add_detail('operator dispatch', headers=record_title) + result.add_detail(TimelineOpDispatchPrompt.PRIBLEM, headers=record_title) for op_info in self._issues_record: - result.add_detail('operator dispatch', detail=op_info) + result.add_detail(TimelineOpDispatchPrompt.PRIBLEM, detail=op_info) def make_render(self, html_render, **kwargs): issues = [] diff --git a/profiler/advisor/analyzer/schedule/fusion_ops/fusion_ops_analyzer.py b/profiler/advisor/analyzer/schedule/fusion_ops/fusion_ops_analyzer.py index 098fab15316a9bf541c0e68e8d3838d52723d5a7..3a2ad9c7ab10f3b295eadb33e2c99cb2fff8fc50 100644 --- a/profiler/advisor/analyzer/schedule/fusion_ops/fusion_ops_analyzer.py +++ b/profiler/advisor/analyzer/schedule/fusion_ops/fusion_ops_analyzer.py @@ -29,6 +29,7 @@ from profiler.advisor.result.item import OptimizeItem, OptimizeRecord from profiler.advisor.utils.utils import format_timeline_result from profiler.advisor.common.timeline.fusion_ops_db import init_timeline_ops_db from profiler.advisor.display.html.priority_background_color import PriorityBackgroundColor +from profiler.prof_common.additional_args_manager import AdditionalArgsManager logger = logging.getLogger() @@ -94,36 +95,35 @@ class TimelineFusionOpsAnalyzer(BaseAnalyzer): """ if not self.matched_op_stacks: return + language = AdditionalArgsManager().language + if language == "en": + from profiler.advisor.display.prompt.en.fusion_ops_prompt import FusionOpsPrompt + else: + from profiler.advisor.display.prompt.cn.fusion_ops_prompt import FusionOpsPrompt - desc = f"Found {len(format_timeline_result(self.matched_op_stacks))} apis to be replaced" \ - f" based on the runtime env cann-{self.cann_version} and torch-{self.profiling_version}" - suggestion = "Please replace training api according to sub table 'Affinity training api'" + desc = FusionOpsPrompt.DESCRIPTION.format(self.cann_version, self.torch_version, + len(format_timeline_result(self.matched_op_stacks))) + suggestion = FusionOpsPrompt.SUGGESTION if self.empty_stacks: - desc += ", but with no stack" - suggestion = Constant.TIMELINE_EMPTY_STACKS_PROMPT.format( + desc += FusionOpsPrompt.EMPTY_STACK_DESCRIPTION + suggestion = FusionOpsPrompt.EMPTY_STACKS_SUGGESTION.format( timeline_profiling_doc_url=Config().timeline_with_stack_doc_url ) - sheet_name = "Affinity apis" - optimization_item = OptimizeItem( - sheet_name, - desc, - [suggestion] - ) + optimization_item = OptimizeItem(FusionOpsPrompt.PROBLEM, desc, [suggestion]) self.result.add(OptimizeRecord(optimization_item)) - record_title = ["Affinity API", "Code stacks", "Stack called counts"] - self.result.add_detail(sheet_name, headers=record_title) + self.result.add_detail(FusionOpsPrompt.PROBLEM, headers=record_title) for api_name, stacks_info in format_timeline_result(self.matched_op_stacks).items(): if not stacks_info: detail = [api_name, "null", "null"] - self.result.add_detail(sheet_name, detail=detail) + self.result.add_detail(FusionOpsPrompt.PROBLEM, detail=detail) else: for stack in stacks_info: detail = [api_name, *stack] - self.result.add_detail(sheet_name, detail=detail) + self.result.add_detail(FusionOpsPrompt.PROBLEM, detail=detail) def make_render(self, **kwargs): rank = kwargs.get("rank") diff --git a/profiler/advisor/analyzer/schedule/fusion_ops/timeline_api_stack_checker.py b/profiler/advisor/analyzer/schedule/fusion_ops/timeline_api_stack_checker.py index 72c302e1efc8c9912a55637028c0d2e73211ba71..17c494dbdc8688430bd696efe003b4287573621f 100644 --- a/profiler/advisor/analyzer/schedule/fusion_ops/timeline_api_stack_checker.py +++ b/profiler/advisor/analyzer/schedule/fusion_ops/timeline_api_stack_checker.py @@ -21,6 +21,7 @@ from profiler.advisor.dataset.timeline_event_dataset import ComputationAnalysisD from profiler.advisor.result.result import OptimizeResult from profiler.advisor.result.item import OptimizeItem, OptimizeRecord from profiler.advisor.utils.utils import get_analyze_processes, ParallelJob +from profiler.prof_common.additional_args_manager import AdditionalArgsManager logger = logging.getLogger() @@ -90,17 +91,33 @@ class OpStackFinder: if not self._stack_record: return - desc = f"Found {len(self._stack_record)} called stacks for" - if self.op_name and self.task_type: - desc += f" operators with name '{self.op_name}' with task type '{self.task_type}'" - elif self.op_name and not self.task_type: - desc += f" operators with name '{self.op_name}'" - elif self.task_type and not self.op_name: - desc += f" operators with task type '{self.task_type}'" + language = AdditionalArgsManager().language + if language == "en": + desc = f"Found {len(self._stack_record)} called stacks for" + if self.op_name and self.task_type: + desc += f" operators with name '{self.op_name}' with task type '{self.task_type}'" + elif self.op_name and not self.task_type: + desc += f" operators with name '{self.op_name}'" + elif self.task_type and not self.op_name: + desc += f" operators with task type '{self.task_type}'" + else: + desc += " all operators" + + suggestion = f"Please use command 'ma-advisor analyze profiling' to analyze operators" else: - desc += " all operators" + desc = f"发现以下{len(self._stack_record)}个算子的调用堆栈," + if self.op_name and self.task_type: + desc += f"任务类型为'{self.task_type}'的'{self.op_name}'算子" + elif self.op_name and not self.task_type: + desc += f"'{self.op_name}'算子" + elif self.task_type and not self.op_name: + desc += f"算子类型为'{self.task_type}'" + else: + desc += "包括全部算子" + + suggestion = f"请用命令'ma-advisor analyze profiling'分析算子" + - suggestion = f"Please use command 'ma-advisor analyze profiling' to analyze operators" optimization_item = OptimizeItem( "Operator stacks", desc, diff --git a/profiler/advisor/analyzer/schedule/gc/gc_checker.py b/profiler/advisor/analyzer/schedule/gc/gc_checker.py index bd45ddb43e129eff37b028656662d7f7d60cfdd5..eb7dea746a73dbbcc72863a5384fedd840a0d777 100644 --- a/profiler/advisor/analyzer/schedule/gc/gc_checker.py +++ b/profiler/advisor/analyzer/schedule/gc/gc_checker.py @@ -13,12 +13,14 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging +import math import os from profiler.advisor.dataset.timeline_event_dataset import ScheduleAnalysisDataset +from profiler.advisor.display.prompt.base_prompt import BasePrompt from profiler.advisor.result.result import OptimizeResult from profiler.advisor.result.item import OptimizeItem, OptimizeRecord -from profiler.advisor.utils.utils import convert_to_float, convert_to_int +from profiler.advisor.utils.utils import convert_to_float, convert_to_int, safe_division from profiler.prof_common.additional_args_manager import AdditionalArgsManager from profiler.prof_common.constant import Constant from profiler.prof_common.file_manager import FileManager @@ -34,6 +36,7 @@ class GcChecker: self.optimization_item = [] self.gc_issues = False self.gc_problem_with_count = "" + self.gc_problem_with_free = "" self.desc = "" self.suggestions = [] self.solutions = None @@ -52,8 +55,17 @@ class GcChecker: self.rank = rank self.stage = stage - # 当用户cann和pta版本不支持采集gc信息时,跳过该分析器 + # 当用户cann和pta版本不支持采集gc信息时,通过timeline中的free和cann层acl事件 综合判断是否可能存在free if not event_dataset.gc_events: + acl_events = getattr(event_dataset, "acl_events", []) + large_free_events = getattr(event_dataset, "large_free_events", []) + # 如果acl_events为空,则没有采集cann信息,不基于free+acl events进行gc分析 + if acl_events and large_free_events: + free_event = self.get_free_events_include_gc(large_free_events, acl_events) + if not free_event: + return + self.desc = self.gc_problem_with_free.format(free_duration_time=free_event.dur) + return for gc_event in event_dataset.gc_events: @@ -74,14 +86,15 @@ class GcChecker: if not self.gc_issues: return - self.optimization_item.append(OptimizeItem("GC", self.desc, self.suggestions)) + self.optimization_item.append(OptimizeItem(self.problem, self.desc, self.suggestions)) for optimization in self.optimization_item: result.add(OptimizeRecord(optimization)) - headers = self.headers + if self.rank is not None: - headers = ["Rank id"] + headers - sub_table_name = "GcAnalysis" if not self.stage else f"Stage-{self.stage}: GcAnalysis" - result.add_detail(sub_table_name, headers=headers) + self.headers = ["Rank id"] + self.headers + + sub_table_name = BasePrompt.get_sub_table_name(self.problem, self.stage) + result.add_detail(sub_table_name, headers=self.headers) for row in self.abnormal_gc_list: if self.rank is not None: @@ -97,7 +110,6 @@ class GcChecker: html_render.render_template(key="schedule", template_dir="templates", template_name="gc.html", - title="GC Analysis", desc=self.desc, solutions=self.solutions, headers=self.headers, @@ -106,6 +118,55 @@ class GcChecker: priority_background_color=priority, rank=rank) + def get_free_events_include_gc(self, large_free_events, acl_events): + free_event_index, acl_event_index = 0, 0 + free_include_acl_events = {} + + while free_event_index < len(large_free_events) and acl_event_index < len(acl_events): + free_event = large_free_events[free_event_index] + free_event_name = f"{Constant.FREE}-{free_event_index}" + free_event_start_time = convert_to_float(free_event.ts) + free_event_end_time = free_event_start_time + convert_to_float(free_event.dur) + if free_event_name not in free_include_acl_events: + free_include_acl_events[free_event_name] = {} + + while acl_event_index < len(acl_events): + acl_event = acl_events[acl_event_index] + acl_event_start_time = convert_to_float(acl_event.ts) + acl_event_end_time = acl_event_start_time + convert_to_float(acl_event.dur) + + if acl_event_end_time < free_event_start_time: + acl_event_index += 1 + continue + if acl_event_start_time > free_event_end_time: + break + + if "acl_event_count" not in free_include_acl_events[free_event_name]: + free_include_acl_events[free_event_name]["acl_event_count"] = 0 + free_include_acl_events[free_event_name]["acl_event_count"] += 1 + + if "acl_event_dur" not in free_include_acl_events[free_event_name]: + free_include_acl_events[free_event_name]["acl_event_dur"] = 0.0 + free_include_acl_events[free_event_name]["acl_event_dur"] += convert_to_float(acl_event.dur) + + acl_event_index += 1 + + free_event_index += 1 + + # 按free持续时间降序排列,优先判断持续时间最长的free + event_indexs = range(len(large_free_events)) + for index, free_event in sorted(zip(event_indexs, large_free_events), key=lambda x: x[1].dur, reverse=True): + + free_event_name = f"{Constant.FREE}-{index}" + free_duration = convert_to_float(free_event.dur) + acl_event_dur = free_include_acl_events.get(free_event_name, {}).get("acl_event_dur", 0.0) + acl_event_count = free_include_acl_events.get(free_event_name, {}).get("acl_event_count", 0) + if safe_division(acl_event_dur, free_duration) < self.max_acl_event_time_ratio and safe_division( + acl_event_count, free_duration) < self.max_acl_event_num_ratio: + self.gc_issues = True + return free_event + return {} + def _init_rule(self): language = AdditionalArgsManager().language gc_rule_path = os.path.join( @@ -114,10 +175,17 @@ class GcChecker: language, "gc.yaml" ) + gc_rule = FileManager.read_yaml_file(gc_rule_path) + + self.problem = gc_rule.get("problem") self.gc_threshold = convert_to_float(gc_rule.get("gc_threshold", 0)) self.gc_topk_num = convert_to_int(gc_rule.get("top_num", 0)) self.gc_problem_with_count = gc_rule.get("gc_problem_with_count", "") + self.gc_problem_with_free = gc_rule.get("gc_problem_with_free", "") + self.max_acl_event_num_ratio = convert_to_float(gc_rule.get("max_acl_event_num_ratio")) + self.max_acl_event_time_ratio = convert_to_float(gc_rule.get("max_acl_event_time_ratio")) + self.solutions = gc_rule.get("solutions", []) for solution in self.solutions: for key, val in solution.items(): diff --git a/profiler/advisor/display/prompt/__init__.py b/profiler/advisor/display/prompt/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/profiler/advisor/display/prompt/base_prompt.py b/profiler/advisor/display/prompt/base_prompt.py new file mode 100644 index 0000000000000000000000000000000000000000..b3b4751ca836b5d2a315083fc2a292cd9800fa53 --- /dev/null +++ b/profiler/advisor/display/prompt/base_prompt.py @@ -0,0 +1,27 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from profiler.prof_common.additional_args_manager import AdditionalArgsManager + + +class BasePrompt: + @staticmethod + def get_sub_table_name(problem, stage): + language = AdditionalArgsManager().language + if language == "en": + sub_table_name = problem if not stage else f"Stage-{stage}: {problem}" + else: + sub_table_name = problem if not stage else f"阶段-{stage}:{problem}" + return sub_table_name diff --git a/profiler/advisor/display/prompt/cn/__init__.py b/profiler/advisor/display/prompt/cn/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/profiler/advisor/display/prompt/cn/ai_core_freq_prompt.py b/profiler/advisor/display/prompt/cn/ai_core_freq_prompt.py new file mode 100644 index 0000000000000000000000000000000000000000..9a00926bf528f1ad02e638fb5d58fa5d2ad5c79a --- /dev/null +++ b/profiler/advisor/display/prompt/cn/ai_core_freq_prompt.py @@ -0,0 +1,21 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +class AICoreFreqPrompt(object): + RANK_ID = "{}号卡" + PROBLEM = "AIcore频率" + DESCRIPTION = "在降频期间发现{}个算子,频率降低比例超过了{}。" + RANK_DESCRIPTION = "对于{}号卡," + SUGGESTION = "请检查您的机器温度或最大功率。" \ No newline at end of file diff --git a/profiler/advisor/display/prompt/cn/block_dim_prompt.py b/profiler/advisor/display/prompt/cn/block_dim_prompt.py new file mode 100644 index 0000000000000000000000000000000000000000..eb0c87ebb9fab78de0e0a0da1348e0c542b0860d --- /dev/null +++ b/profiler/advisor/display/prompt/cn/block_dim_prompt.py @@ -0,0 +1,21 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +class BlockDimPrompt(object): + PROBLEM = "AICore核数" + DESCRIPTION = "一些算子没有充分利用{}个AICore核" + AIV_NUM_DESCRIPTION = "或者{}个AIVector核" + TOP_DURATION_OP_DESCRIPTION = ";\n 任务耗时最长的{}个算子如下:" + diff --git a/profiler/advisor/display/prompt/cn/dynamic_shape_prompt.py b/profiler/advisor/display/prompt/cn/dynamic_shape_prompt.py new file mode 100644 index 0000000000000000000000000000000000000000..79c2e1ad208d1b16d3fcdede111d32d5cdead84b --- /dev/null +++ b/profiler/advisor/display/prompt/cn/dynamic_shape_prompt.py @@ -0,0 +1,24 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +class DynamicShapePrompt(object): + RANK_ID = "{}号卡" + PROBLEM = "动态shape算子" + DESCRIPTION = f"找到所有是动态shape的算子" + ENABLE_COMPILED_SUGGESTION = "1. 尝试设置环境变量'export HOST_CACHE_CAPACITY=20'。\n" \ + "2. 在python脚本入口加入以下代码关闭在线编译:\n" \ + "'torch_npu.npu.set_compile_mode(jit_compile=False) \n " \ + "torch_npu.npu.config.allow_internal_format = False' \n" + RELEASE_SUGGESTION = "详细信息请参考:链接" \ No newline at end of file diff --git a/profiler/advisor/display/prompt/cn/environment_variable_prompt.py b/profiler/advisor/display/prompt/cn/environment_variable_prompt.py new file mode 100644 index 0000000000000000000000000000000000000000..5536af0d788211e9b06bf5355266e8046964d2fe --- /dev/null +++ b/profiler/advisor/display/prompt/cn/environment_variable_prompt.py @@ -0,0 +1,19 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +class EnvironmentVariablePrompt(object): + PRIBLEM = "环境变量分析" + DESCRIPTION = "描述并给出最优的环境变量配置建议" + SUGGESTION = "请设置最优的环境变量" diff --git a/profiler/advisor/display/prompt/cn/fusion_ops_prompt.py b/profiler/advisor/display/prompt/cn/fusion_ops_prompt.py new file mode 100644 index 0000000000000000000000000000000000000000..0113cb58a03814c79103437cfecfc44f9a9c0f54 --- /dev/null +++ b/profiler/advisor/display/prompt/cn/fusion_ops_prompt.py @@ -0,0 +1,23 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +class FusionOpsPrompt(object): + PROBLEM = "亲和API接口" + DESCRIPTION = "目前运行环境版本为cann-{}和torch-{},发现有{}个api接口可以替换。" + SUGGESTION = "请根据子表'Affinity training api'替换训练api接口" + EMPTY_STACK_DESCRIPTION = ",但没有堆栈" + EMPTY_STACKS_SUGGESTION = "这些API接口没有代码堆栈。如果采集profiling时参数为'with_stack=False'," \ + "请参考{}设置'with_stack=True'。" \ + "另外,由于反向传播没有堆栈,请忽略以下亲和APIs。" diff --git a/profiler/advisor/display/prompt/cn/graph_fusion_prompt.py b/profiler/advisor/display/prompt/cn/graph_fusion_prompt.py new file mode 100644 index 0000000000000000000000000000000000000000..4366a094705379eb6a7d86548ed9efbf048b405d --- /dev/null +++ b/profiler/advisor/display/prompt/cn/graph_fusion_prompt.py @@ -0,0 +1,19 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +class GraphFusionPrompt(object): + PRIBLEM = "融合问题" + DESCRIPTION = "发现 {} 个融合问题" + SUGGESTION = "在mstt_advisor*.html中查看融合问题的细节信息" \ No newline at end of file diff --git a/profiler/advisor/display/prompt/cn/operator_bound_prompt.py b/profiler/advisor/display/prompt/cn/operator_bound_prompt.py new file mode 100644 index 0000000000000000000000000000000000000000..76257e36a27fc966bd46a015775782a33964abad --- /dev/null +++ b/profiler/advisor/display/prompt/cn/operator_bound_prompt.py @@ -0,0 +1,18 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +class OperatorBoundPrompt(object): + PROBLEM = "算子瓶颈" + DESCRIPTION = "mte,cube,vetor,scalar比都没有超过 {},需要调整的任务执行时间最长的算子如下:\n" diff --git a/profiler/advisor/display/prompt/cn/operator_prompt.py b/profiler/advisor/display/prompt/cn/operator_prompt.py new file mode 100644 index 0000000000000000000000000000000000000000..4eb09da55039aa0f61ab7f0c46926171032da4d7 --- /dev/null +++ b/profiler/advisor/display/prompt/cn/operator_prompt.py @@ -0,0 +1,31 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +class OperatorPrompt(object): + RANK_ID = "{}号卡" + PYTORCH_OPERATOR_TUNE_SUGGESTION = "通过AOE优化算子,使用样例如下:\n" \ + "'aoe --job_type=2 --model_path=$user_dump_path " \ + "--tune_ops_file={}'\n" + MSLITE_OPERATOR_TUNE_SUGGESTION = f"在Mindpore Lite 框架通过AOE优化算子,使用样例如下:\n" \ + f"converter_lite --fmk=ONNX --optimize=ascend_oriented --saveType=MINDIR " \ + f"--modelFile=$user_model.onnx --outputFile=user_model " \ + f"--configFile=./config.txt\n" + PYTORCH_RELEASE_SUGGESTION = "详细信息请参考:链接" + MSLITE_RELEASE_SUGGESTION = "\nMSLite AOE的配置文件如下usage:\n" \ + "[ascend_context]\n" \ + "aoe_mode=\"operator tuning\"\n" \ + "--tune_ops_file={}\n" \ + "\n详细信息请参考:链接" \ No newline at end of file diff --git a/profiler/advisor/display/prompt/cn/overall_summary_advice_prompt.py b/profiler/advisor/display/prompt/cn/overall_summary_advice_prompt.py new file mode 100644 index 0000000000000000000000000000000000000000..2f41edad0ab53c175d378b91feb23a9cac52a8dd --- /dev/null +++ b/profiler/advisor/display/prompt/cn/overall_summary_advice_prompt.py @@ -0,0 +1,38 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +class OverallSummaryAdvicePrompt(object): + ADVICE_MAP = { + "计算时长": "如果你想了解更多详细建议请使用 msprof-analyze advisor computation.", + "未被掩盖的通信时长": "如果你想了解更多详细建议请使用 msprof-analyze advisor schedule.", + "空闲时长": "如果你想了解更多详细建议请使用 msprof-analyze advisor schedule." + } + TIME_NAME_MAP = { + "计算时长": "computing", + "未被掩盖的通信时长": "communication", + "空闲时长": "free", + 'Cube算子时长(数量)': 'Cube Time', + 'Vector算子时长(数量)': 'Vector Time', + 'Flash Attention算子时长(前向)(数量)': 'Flash Attention Time(Forward)', + 'Flash Attention算子时长(反向)(数量)': 'Flash Attention Time(Backward)', + '其它时长': "Other Computing Time", + 'SDMA时长(数量)': 'SDMA Time' + } + PERFORMANCE_TIME_DICT = { + "计算时长": ['Cube时长(数量)', 'Vector时长(数量)', 'Flash Attention时长(前向)(数量)', + 'Flash Attention时长(反向)(数量)', '其它时长'], + "未被掩盖的通信时长(等待时长)": [], + "空闲时长": ['SDMA Time(Num)'] + } \ No newline at end of file diff --git a/profiler/advisor/display/prompt/cn/overall_summary_analyzer_prompt.py b/profiler/advisor/display/prompt/cn/overall_summary_analyzer_prompt.py new file mode 100644 index 0000000000000000000000000000000000000000..7690e7541bd7ec90a6bd53fd83b9086f6e4ad930 --- /dev/null +++ b/profiler/advisor/display/prompt/cn/overall_summary_analyzer_prompt.py @@ -0,0 +1,49 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +class OverallSummaryAnalyzePrompt(object): + OVERALL_SUMMARY_ANALYZER = "整网耗时分析" + ADVICE_MAP = { + "计算时长": "如果你想了解更多详细建议请看mstt_advisor_*.html", + "未被掩盖的通信时长": "如果你想了解更多详细建议请看mstt_advisor_*.html", + "空闲时长": "如果你想了解更多详细建议请看mstt_advisor_*.html" + } + TIME_NAME_MAP = { + "计算时长": "computing", + "未被掩盖的通信时长": "communication", + "空闲时长": "free", + 'Cube算子时长(数量)': 'Cube Time', + 'Vector算子时长(数量)': 'Vector Time', + 'Flash Attention算子时长(前向)(数量)': 'Flash Attention Time(Forward)', + 'Flash Attention算子时长(反向)(数量)': 'Flash Attention Time(Backward)', + '其它时长': "Other Computing Time", + 'SDMA时长(数量)': 'SDMA Time' + } + PERFORMANCE_TIME_DICT = { + "计算时长": "computing_time_ms", + " -- Flash Attention": "fa_time_ms", + " -- Conv": "conv_time_ms", + " -- Matmul": "matmul_time_ms", + " -- Vector": "vector_time_ms", + " -- SDMA(Tensor Move)": "tensor_move_time_ms", + " -- 其它Cube": "other_cube_time_ms", + "未被掩盖的通信时长": "uncovered_communication_time_ms", + " -- 等待时长": "wait_time_ms", + " -- 传输时长": "transmit_time_ms", + "空闲时长": "free_time_ms", + " -- SDMA": "sdma_time_ms", + " -- 空闲时长": "free_ms", + "E2E时长": "e2e_time_ms" + } diff --git a/profiler/advisor/display/prompt/cn/timeline_op_dispatch_prompt.py b/profiler/advisor/display/prompt/cn/timeline_op_dispatch_prompt.py new file mode 100644 index 0000000000000000000000000000000000000000..0933c291456be50b6837e24492ed0ee74f1027dc --- /dev/null +++ b/profiler/advisor/display/prompt/cn/timeline_op_dispatch_prompt.py @@ -0,0 +1,21 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +class TimelineOpDispatchPrompt(object): + PRIBLEM = "算子下发" + DESCRIPTION = "发现{}个算子编译问题。" + SUGGESTION = "请在python脚本入口添加以下代码关闭在线编译:\n" \ + "'torch_npu.npu.set_compile_mode(jit_compile=False) \n" \ + "torch_npu.npu.config.allow_internal_format = False' \n" diff --git a/profiler/advisor/display/prompt/en/__init__.py b/profiler/advisor/display/prompt/en/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/profiler/advisor/display/prompt/en/ai_core_freq_prompt.py b/profiler/advisor/display/prompt/en/ai_core_freq_prompt.py new file mode 100644 index 0000000000000000000000000000000000000000..7737a372ad29d6ac35bfc1d61e75ce747395f8af --- /dev/null +++ b/profiler/advisor/display/prompt/en/ai_core_freq_prompt.py @@ -0,0 +1,22 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +class AICoreFreqPrompt(object): + RANK_ID = "RANK {} " + PROBLEM = "AI Core Frequency" + DESCRIPTION = "{} operators are found during frequency reduction, and the reduction " \ + "ratio is larger than {}." + RANK_DESCRIPTION = "For rank {}, " + SUGGESTION = "Please check the temperature or max power of your machine." \ No newline at end of file diff --git a/profiler/advisor/display/prompt/en/block_dim_prompt.py b/profiler/advisor/display/prompt/en/block_dim_prompt.py new file mode 100644 index 0000000000000000000000000000000000000000..410fcdd41cf569481bbc4c64750e9457bfd5191c --- /dev/null +++ b/profiler/advisor/display/prompt/en/block_dim_prompt.py @@ -0,0 +1,20 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +class BlockDimPrompt(object): + PROBLEM = "block dim" + DESCRIPTION = "some operator does not make full use of {} ai core" + AIV_NUM_DESCRIPTION = " or {} ai vector core" + TOP_DURATION_OP_DESCRIPTION = ";\n Top-{} operator of task duration are as follows:\n" diff --git a/profiler/advisor/display/prompt/en/dynamic_shape_prompt.py b/profiler/advisor/display/prompt/en/dynamic_shape_prompt.py new file mode 100644 index 0000000000000000000000000000000000000000..b350f603f115eb9dad32c49b2d1659ec367df6fe --- /dev/null +++ b/profiler/advisor/display/prompt/en/dynamic_shape_prompt.py @@ -0,0 +1,24 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +class DynamicShapePrompt(object): + RANK_ID = "RANK {} " + PROBLEM = "Dynamic Shape Operator" + DESCRIPTION = "Found all operators are dynamic shape" + ENABLE_COMPILED_SUGGESTION = "1. Please try to set environment by execute `export HOST_CACHE_CAPACITY=20`.\n." \ + "2. Please place the following code at the entrance of the python script to disable jit compile.\n " \ + "Code: `torch_npu.npu.set_compile_mode(jit_compile=False) \n " \ + "torch_npu.npu.config.allow_internal_format = False`.\n" + RELEASE_SUGGESTION = "for details please refer to link : LINK" diff --git a/profiler/advisor/display/prompt/en/environment_variable_prompt.py b/profiler/advisor/display/prompt/en/environment_variable_prompt.py new file mode 100644 index 0000000000000000000000000000000000000000..fb9f2d264d52c096c26677386a1c2a6764475511 --- /dev/null +++ b/profiler/advisor/display/prompt/en/environment_variable_prompt.py @@ -0,0 +1,19 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +class EnvironmentVariablePrompt(object): + PRIBLEM = "Environment Variable Analysis" + DESCRIPTION = "Describe and suggest the optimal environment variable settings" + SUGGESTION = "Please set the optimal environment variable" diff --git a/profiler/advisor/display/prompt/en/fusion_ops_prompt.py b/profiler/advisor/display/prompt/en/fusion_ops_prompt.py new file mode 100644 index 0000000000000000000000000000000000000000..e02f2f29a6fd46658238ffc7428b61e1ddc80474 --- /dev/null +++ b/profiler/advisor/display/prompt/en/fusion_ops_prompt.py @@ -0,0 +1,23 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +class FusionOpsPrompt(object): + PROBLEM = "Affinity Apis" + DESCRIPTION = "On the runtime env cann-{} and torch-{}, found {} apis to be replaced" + SUGGESTION = "Please replace training api according to sub table 'Affinity training api'" + EMPTY_STACK_DESCRIPTION = ", but with no stack" + EMPTY_STACKS_SUGGESTION = "These APIs have no code stack. If parameter 'with_stack=False' while profiling, " \ + "please refer to {} to set 'with_stack=True'. " \ + "Otherwise, ignore following affinity APIs due to backward broadcast lack of stack." diff --git a/profiler/advisor/display/prompt/en/graph_fusion_prompt.py b/profiler/advisor/display/prompt/en/graph_fusion_prompt.py new file mode 100644 index 0000000000000000000000000000000000000000..1a2696f5d2f283609b0d03119c6c4e08ca72cef6 --- /dev/null +++ b/profiler/advisor/display/prompt/en/graph_fusion_prompt.py @@ -0,0 +1,19 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +class GraphFusionPrompt(object): + PRIBLEM = "Fusion Issue" + DESCRIPTION = "Found {} fusion issues" + SUGGESTION = "Check fusion issues detail in mstt_advisor*.html" diff --git a/profiler/advisor/display/prompt/en/operator_bound_prompt.py b/profiler/advisor/display/prompt/en/operator_bound_prompt.py new file mode 100644 index 0000000000000000000000000000000000000000..f4f29f25d1be7adb2203ea08375ccafe423dbdbe --- /dev/null +++ b/profiler/advisor/display/prompt/en/operator_bound_prompt.py @@ -0,0 +1,19 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +class OperatorBoundPrompt(object): + PROBLEM = "operator no bound" + DESCRIPTION = "There is no mte, cube, vector, scalar ratio is more than {},\n" \ + "Top task duration operators need to be tuned are as follows: \n" diff --git a/profiler/advisor/display/prompt/en/operator_prompt.py b/profiler/advisor/display/prompt/en/operator_prompt.py new file mode 100644 index 0000000000000000000000000000000000000000..0491cefa9294c18a5f7d485c185410e1bcb14717 --- /dev/null +++ b/profiler/advisor/display/prompt/en/operator_prompt.py @@ -0,0 +1,31 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +class OperatorPrompt(object): + RANK_ID = "RANK {} " + PYTORCH_OPERATOR_TUNE_SUGGESTION = "Optimize operator by AOE, such as:\n" \ + "'aoe --job_type=2 --model_path=$user_dump_path " \ + "--tune_ops_file={}'\n" + MSLITE_OPERATOR_TUNE_SUGGESTION = f"Optimize operator by AOE in mindspore lite framework, such as:\n" \ + f"converter_lite --fmk=ONNX --optimize=ascend_oriented --saveType=MINDIR " \ + f"--modelFile=$user_model.onnx --outputFile=user_model " \ + f"--configFile=./config.txt\n" + PYTORCH_RELEASE_SUGGESTION = "for details please refer to link : LINK" + MSLITE_RELEASE_SUGGESTION = "\nThe config file for MSLite AOE usage is as follows:\n" \ + "[ascend_context]\n" \ + "aoe_mode=\"operator tuning\"\n" \ + "--tune_ops_file={}\n" \ + "\nFor details please refer to link : LINK" diff --git a/profiler/advisor/display/prompt/en/overall_summary_advice_prompt.py b/profiler/advisor/display/prompt/en/overall_summary_advice_prompt.py new file mode 100644 index 0000000000000000000000000000000000000000..05bbd6215596cd0e59a671db14ed580f7cf7c488 --- /dev/null +++ b/profiler/advisor/display/prompt/en/overall_summary_advice_prompt.py @@ -0,0 +1,38 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +class OverallSummaryAdvicePrompt(object): + ADVICE_MAP = { + "Computing Time": "if you want more detailed advice please use msprof-analyze advisor computation.", + "Uncovered Communication Time": "if you want more detailed advice, please use msprof-analyze advisor schedule.", + "Free Time": "if you want more detailed advice please use msprof-analyze advisor schedule." + } + TIME_NAME_MAP = { + "Computing Time": "computing", + "Uncovered Communication Time": "communication", + "Free Time": "free", + 'Cube Time(Num)': 'Cube Time', + 'Vector Time(Num)': 'Vector Time', + 'Flash Attention Time(Forward)(Num)': 'Flash Attention Time(Forward)', + 'Flash Attention Time(Backward)(Num)': 'Flash Attention Time(Backward)', + 'Other Time': "Other Computing Time", + 'SDMA Time(Num)': 'SDMA Time' + } + PERFORMANCE_TIME_DICT = { + "Computing Time": ['Cube Time(Num)', 'Vector Time(Num)', 'Flash Attention Time(Forward)(Num)', + 'Flash Attention Time(Backward)(Num)', 'Other Time'], + "Uncovered Communication Time(Wait Time)": [], + "Free Time": ['SDMA Time(Num)'] + } \ No newline at end of file diff --git a/profiler/advisor/display/prompt/en/overall_summary_analyzer_prompt.py b/profiler/advisor/display/prompt/en/overall_summary_analyzer_prompt.py new file mode 100644 index 0000000000000000000000000000000000000000..4d3f17261db07c19277383ddf793b0735307ab18 --- /dev/null +++ b/profiler/advisor/display/prompt/en/overall_summary_analyzer_prompt.py @@ -0,0 +1,49 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +class OverallSummaryAnalyzePrompt(object): + OVERALL_SUMMARY_ANALYZER = "Overall Summary" + ADVICE_MAP = { + "Computing Time": "if you want more detailed advice please go to mstt_advisor_*.html", + "Uncovered Communication Time": "if you want more detailed advice please go to mstt_advisor_*.html", + "Free Time": "if you want more detailed advice please go to mstt_advisor_*.html" + } + TIME_NAME_MAP = { + "Computing Time": "computing", + "Uncovered Communication Time": "communication", + "Free Time": "free", + 'Cube Time(Num)': 'Cube Time', + 'Vector Time(Num)': 'Vector Time', + 'Flash Attention Time(Forward)(Num)': 'Flash Attention Time(Forward)', + 'Flash Attention Time(Backward)(Num)': 'Flash Attention Time(Backward)', + 'Other Time': "Other Computing Time", + 'SDMA Time(Num)': 'SDMA Time' + } + PERFORMANCE_TIME_DICT = { + "Computing Time": "computing_time_ms", + " -- Flash Attention": "fa_time_ms", + " -- Conv": "conv_time_ms", + " -- Matmul": "matmul_time_ms", + " -- Vector": "vector_time_ms", + " -- SDMA(Tensor Move)": "tensor_move_time_ms", + " -- Other Cube": "other_cube_time_ms", + "Uncovered Communication Time": "uncovered_communication_time_ms", + " -- Wait": "wait_time_ms", + " -- Transmit": "transmit_time_ms", + "Free Time": "free_time_ms", + " -- SDMA": "sdma_time_ms", + " -- Free": "free_ms", + "E2E Time": "e2e_time_ms" + } diff --git a/profiler/advisor/display/prompt/en/timeline_op_dispatch_prompt.py b/profiler/advisor/display/prompt/en/timeline_op_dispatch_prompt.py new file mode 100644 index 0000000000000000000000000000000000000000..e52a22cb73ee2cde0a3226812eea82b2677be7a2 --- /dev/null +++ b/profiler/advisor/display/prompt/en/timeline_op_dispatch_prompt.py @@ -0,0 +1,22 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +class TimelineOpDispatchPrompt(object): + PRIBLEM = "Operator Dispatch" + DESCRIPTION = "Found {} operator compile issues." + SUGGESTION = "Please place the following code at the entrance of the python script to disable jit compile. \n" \ + "Code: `torch_npu.npu.set_compile_mode(jit_compile=False) \n" \ + "torch_npu.npu.config.allow_internal_format = False` \n" + diff --git a/profiler/advisor/result/result.py b/profiler/advisor/result/result.py index eb8e38c41083b9f73414639f68f3e6a78aca32c6..74b9358f626f130e563390ea9dbee2cad898eb1d 100644 --- a/profiler/advisor/result/result.py +++ b/profiler/advisor/result/result.py @@ -22,6 +22,7 @@ import click import xlsxwriter from prettytable import ALL, PrettyTable +from profiler.prof_common.additional_args_manager import AdditionalArgsManager from profiler.prof_common.constant import Constant from profiler.advisor.utils.utils import singleton, logger from profiler.advisor.config.config import Config @@ -152,7 +153,11 @@ class OptimizeResult: self._tune_op_list.append(operator_name) def add(self, overview_item): - sheet_name = "problems" + language = AdditionalArgsManager().language + if language == "en": + sheet_name = "problems" + else: + sheet_name = "问题综述" headers = overview_item.headers data = overview_item.data @@ -208,11 +213,19 @@ class TerminalResult: def __init__(self): self.width, _ = self.get_terminal_size() - if self.width is None: - self.table = PrettyTable(["No.", "Category", "Description", "Suggestion"]) + language = AdditionalArgsManager().language + if language == "en": + if self.width is None: + self.table = PrettyTable(["No.", "Category", "Description", "Suggestion"]) + else: + self.table = PrettyTable(["No.", "Category", "Description", "Suggestion"], + max_table_width=max(self.width - 20, 180)) else: - self.table = PrettyTable(["No.", "Category", "Description", "Suggestion"], - max_table_width=max(self.width - 20, 180)) + if self.width is None: + self.table = PrettyTable(["No.", "类型", "描述", "建议"]) + else: + self.table = PrettyTable(["No.", "类型", "描述", "建议"], + max_table_width=max(self.width - 20, 180)) self.table.hrules = ALL self.result_list = [] diff --git a/profiler/advisor/rules/cn/aicpu_rules.yaml b/profiler/advisor/rules/cn/aicpu_rules.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f14fc044d47885cc49f12fac6507604c850706a3 --- /dev/null +++ b/profiler/advisor/rules/cn/aicpu_rules.yaml @@ -0,0 +1,107 @@ +problem: "AICPU算子" +description: "一些算子和任务执行时间超过了{}us,比如:\n" +suggestion: "修改代码避免使用aicpu类算子" +double_suggestion: "尝试将double类型的算子转换成float,比如{}" +DataTypeSuggeation: &DataTypeSuggeation "数据类型{}在{}算子中可能会造成AICpu问题, 如果可以,尝试转换成{}。" +AICPU_DOC_URL: &AICPU_DOC_URL "https://gitee.com/ascend/mstt/blob/master/profiler/advisor/doc/Samples%20of%20AI%20CPU%20Operator%20Replacement.md" + +CommonChecker: + - DataTypeChecker: + cann_version: [7.0.RC1] + op_type: [ __ALL__ ] + ignore_type: [ cast, tensorequal, equal, nonzero, mul ] + input: [ float, float32, float16, bool, int32, uint32, int64, uint64, int8, uint8, int16, uint16, dt_bf16 ] + output: [ float, float32, float16, bool, int32, uint32, int64, uint64, int8, uint8, int16, uint16, dt_bf16 ] + suggestion: *DataTypeSuggeation + + - DataTypeChecker: + cann_version: [7.0.RC1] + op_type: [ cast ] + input: [ float, float32, float16, bool, int32, uint32, int64, uint64, uint8, dt_bf16 ] + output: [ float, float32, float16, bool, int32, uint32, int64, uint64, uint8, dt_bf16 ] + suggestion: *DataTypeSuggeation + + - DataTypeChecker: + cann_version: [7.0.RC1] + op_type: [ tensorequal ] + input: [ float, float32, float16, bool, int32, int8, uint8 ] + output: [ bool ] + suggestion: *DataTypeSuggeation + + - DataTypeChecker: + cann_version: [7.0.RC1] + op_type: [ equal ] + input: [ float, float32, float16, bool, int32, int64, int8, uint8 ] + output: [ bool ] + suggestion: *DataTypeSuggeation + + - DataTypeChecker: + cann_version: [7.0.RC1] + op_type: [ nonzero ] + input: [ float16, bool, dt_bf16 ] + output: [ int64 ] + suggestion: *DataTypeSuggeation + + - DataTypeChecker: + cann_version: [7.0.RC1] + op_type: [ mul ] + input: [ float, float32, float16, bool, int32, uint32, int64, uint64, int8, uint8, dt_bf16 ] + output: [ float, float32, float16, bool, int32, uint32, int64, uint64, int8, uint8, dt_bf16 ] + suggestion: *DataTypeSuggeation + + - DataTypeChecker: + cann_version: [8.0.RC1, 7.0.0] + op_type: [ __ALL__ ] + ignore_type: [ cast, tensorequal, equal, nonzero, mul ] + input: [ float, float32, float16, dt_bf16, float64, bool, int32, int64, int8, uint8, int16, complex64, complex128 ] + output: [ float, float32, float16, dt_bf16, float64, bool, int32, int64, int8, uint8, int16, complex64, complex128 ] + suggestion: *DataTypeSuggeation + + - DataTypeChecker: + cann_version: [8.0.RC1, 7.0.0] + op_type: [ cast ] + input: [ float, float32, float16, bool, int32, uint32, int64, uint64, uint8, dt_bf16 ] + output: [ float, float32, float16, bool, int32, uint32, int64, uint64, uint8, dt_bf16 ] + suggestion: *DataTypeSuggeation + + - DataTypeChecker: + cann_version: [8.0.RC1, 7.0.0] + op_type: [ tensorequal ] + input: [ float, float32, float16, dt_bf16, float64, bool, int32, int8, uint8 ] + output: [ bool ] + suggestion: *DataTypeSuggeation + + - DataTypeChecker: + cann_version: [8.0.RC1, 7.0.0] + op_type: [ equal ] + input: [ float, float32, float16, dt_bf16, float64, bool, int32, int64, int8, uint8 ] + output: [ bool ] + suggestion: *DataTypeSuggeation + + - DataTypeChecker: + cann_version: [8.0.RC1, 7.0.0] + op_type: [ mul ] + input: [ float, float32, float16, dt_bf16, float64, bool, int32, int64, int8, uint8, complex64 ] + output: [ float, float32, float16, dt_bf16, float64, bool, int32, int64, int8, uint8, complex64 ] + suggestion: *DataTypeSuggeation + +ExampleGuideChecker: + - IndexPutChecker: + op_type: [index] + url: *AICPU_DOC_URL + suggestion: "请参考链接修改源码,尝试用等价的算子替换index算子。" + + - NonzeroChecker: + op_type: [ indexput, indexputv2 ] + url: *AICPU_DOC_URL + suggestion: "请参考链接修改源码,尝试用等价的算子替换indexput算子。" + + - CastChecker: + op_type: [ argmin ] + url: *AICPU_DOC_URL + suggestion: "请参考链接更新cann-tookit包到7.0.RC1及以上的版本。" + + - CastChecker: + op_type: [ nonzero ] + url: *AICPU_DOC_URL + suggestion: "请参考链接修改源码,尝试用等价的算子替换nonzero算子。" diff --git a/profiler/advisor/rules/cn/bandwidth_contention.yaml b/profiler/advisor/rules/cn/bandwidth_contention.yaml index 52a85b5abebbac3feeddfea4db50802ab8d9695c..b6c24f1acfa3cbde7aab494a31a565a7e32b1e07 100644 --- a/profiler/advisor/rules/cn/bandwidth_contention.yaml +++ b/profiler/advisor/rules/cn/bandwidth_contention.yaml @@ -1,4 +1,5 @@ -problem: "在执行计算和通信任务时,SDMA带宽低于 {threshold}GB/s。通常,并行计算和通信可以提高模型的运行效率。并发计算和通信任务可能会影响通信带宽。" +problem: "带宽分析" +description: "在执行计算和通信任务时,SDMA带宽低于 {threshold}GB/s。通常,并行计算和通信可以提高模型的运行效率。并发计算和通信任务可能会影响通信带宽。" sdma_baseline: 18 #M threshold: 0.8 top_num: 10 diff --git a/profiler/advisor/rules/cn/byte_alignment.yaml b/profiler/advisor/rules/cn/byte_alignment.yaml index a653d5632dee37cf2151254c1b5e5e829b0bd93e..6a4be8e9520f18f6b4f2f7d585f4c288e3aabc12 100644 --- a/profiler/advisor/rules/cn/byte_alignment.yaml +++ b/profiler/advisor/rules/cn/byte_alignment.yaml @@ -1,4 +1,5 @@ -problem: "{count}个通信算子的数据大小未对齐,这会降低通信性能。" +problem: "字节对齐分析" +description: "{count}个通信算子的数据大小未对齐,这会降低通信性能。" min_size: 512 # byte top_num: 5 solutions: diff --git a/profiler/advisor/rules/cn/environment_variable_info.yaml b/profiler/advisor/rules/cn/environment_variable_info.yaml index 976dbf449e247bc9965d985b4b4e8aa47387258c..eedaf83990e3a797936a8df187f21f499338e14d 100644 --- a/profiler/advisor/rules/cn/environment_variable_info.yaml +++ b/profiler/advisor/rules/cn/environment_variable_info.yaml @@ -1,7 +1,7 @@ ASCEND_GLOBAL_LOG_LEVEL: desc: "日志级别: 0-调试,1-信息,2-警告,3-错误。\n 默认为错误级别。" - suggest: "调试或信息级别可能会导致培训性能下降,\n + suggest: "调试或信息级别可能会导致训练性能下降,\n 建议通过执行命令'export ASCEND_GLOBAL_LOGLEVEL=3来设置错误级别。" HCCL_RDAM_TC: desc: "配置网络端口发送的RoCE数据包的DSCP值。\n @@ -18,7 +18,7 @@ HCCL_RDMA_SL: suggest_html: "请参考 链接" ACLNN_CACHE_LIMIT: desc: "缓存的aclnn算子的数量。" - suggest: "在aclnn和host耗时过长时,可以设置一个较大的数字,例如'export ACLNN_CACHE_LIMIT=100000'。" + suggest: "在alcnn和host耗时过长时,可以设置一个较大的数字,例如'export ACLNN_CACHE_LIMIT=100000'。" HOST_CACHE_CAPACITY: desc: "启用动态shape缓存。\n 默认值为0,表示数据缓存已禁用。\n @@ -40,8 +40,4 @@ ASCEND_LAUNCH_BLOCKING: desc: "是否在操作执行期间启用同步模式。\n 当设置为1时,强制算子同步运行,从而更容易调试和跟踪代码中的问题。\n 如果设置为0,则任务将以异步模式执行。" - suggest: "export ASCEND_LAUNCH_BLOCKING=1" -HCCL_ALGO: - desc: "用于配置集合通信Server间跨机通信算法,支持如下几种取值:ring, H-D_R, NHR, NHR_V1, NB, pipeline.\n - 当不设置此环境变量时,会根据产品形态、节点数以及数据量自动选择算法。" - suggest: "建议通过执行命令'unset HCCL_ALGO'取消此环境变量的设置" \ No newline at end of file + suggest: "export ASCEND_LAUNCH_BLOCKING=1" \ No newline at end of file diff --git a/profiler/advisor/rules/cn/gc.yaml b/profiler/advisor/rules/cn/gc.yaml index 2d44576bc91ea8bf8dc61ca592829d290ba8376e..295f9876616c872be18bb9f94568ac77e0363cfa 100644 --- a/profiler/advisor/rules/cn/gc.yaml +++ b/profiler/advisor/rules/cn/gc.yaml @@ -1,7 +1,12 @@ +problem: GC分析 gc_problem_with_count: "检测到异常垃圾收集(GC)事件{gc_count}次,总时间为{gc_total_time}毫秒。\n GC操作耗时且会阻塞整个过程。因此,模型训练过程中的某些步骤比其他步骤需要更长的时间。" +gc_problem_with_free: "由于torch_npu的版本较低,在分析时没有收集垃圾收集(GC)数据。但在{free_duration_time}微秒(us)的空闲时间内几乎没有主机任务,这可能是由Python的异常GC引起的。" gc_threshold: 1000 #us top_num: 10 +max_free_threshold: 200000 # us +max_acl_event_num_ratio: 0.0001 # max 10 events per 100 ms +max_acl_event_time_ratio: 0.01 # total time of acl events no larger than 0.01 * free duration solutions: - 内存管理: desc: "实现有效的Python内存管理;不使用时及时释放内存,避免长期保留;避免对象之间的循环引用。" diff --git a/profiler/advisor/rules/cn/packet.yaml b/profiler/advisor/rules/cn/packet.yaml index 2b0fff4b72bd4156bed49e34fa6f35857f35d40d..621b0cb07d4ba153650f75275a33d1793f840e4a 100644 --- a/profiler/advisor/rules/cn/packet.yaml +++ b/profiler/advisor/rules/cn/packet.yaml @@ -1,4 +1,5 @@ -problem: "过小的通信数据包可能会导致host传递瓶颈。\n" +problem: "包分析" +description: "过小的通信数据包可能会导致host传递瓶颈。\n" sdma_problem: "在SDMA通信中,通信数据量的{abnormal_ratio}小于{min_size}MB,总时间为{abnormal_time}ms。\n" rdma_problem: "在RDMA通信中,通信数据量的{abnormal_ratio}小于{min_size}MB,总时间为{abnormal_time}ms。\n" min_sdma_size: 16 #M diff --git a/profiler/advisor/rules/cn/rdma_analysis.yaml b/profiler/advisor/rules/cn/rdma_analysis.yaml index c5a7bd14f2e73da914c5c764494780bede18dd28..1492b42350ad38da259f702642c94a09c0e65eb1 100644 --- a/profiler/advisor/rules/cn/rdma_analysis.yaml +++ b/profiler/advisor/rules/cn/rdma_analysis.yaml @@ -1,4 +1,5 @@ -problem: "发生RDMA通信重传。单次重传需要4秒以上。重传问题在{group_count}通信域中检测到。\n建议执行以下建议。" +problem: "通信重传分析" +description: "发生RDMA通信重传。单次重传需要4秒以上。重传问题在{group_count}通信域中检测到。\n建议执行以下建议。" min_retransmission_time: 4000 #ms solutions: - 检查RDMA传输时长: diff --git a/profiler/advisor/rules/aicpu_rules.yaml b/profiler/advisor/rules/en/aicpu_rules.yaml similarity index 95% rename from profiler/advisor/rules/aicpu_rules.yaml rename to profiler/advisor/rules/en/aicpu_rules.yaml index bdbb71fecc7df9ec0ab9f82fec2fb8decc1b001c..d1869a25efffc80fb3926a63304918fdc5859866 100644 --- a/profiler/advisor/rules/aicpu_rules.yaml +++ b/profiler/advisor/rules/en/aicpu_rules.yaml @@ -1,3 +1,7 @@ +problem: "AICPU operator" +description: "Some operators and task duration exceed {} us, such as :\n" +suggestion: "Modify code to avoid aicpu operator" +double_suggestion: "Try to convert double type operator to float, such as {}" DataTypeSuggeation: &DataTypeSuggeation "Data type {} in {} operator may cause AICPU issues, Try to convert to {} if possible." AICPU_DOC_URL: &AICPU_DOC_URL "https://gitee.com/ascend/mstt/blob/master/profiler/advisor/doc/Samples%20of%20AI%20CPU%20Operator%20Replacement.md" diff --git a/profiler/advisor/rules/en/bandwidth_contention.yaml b/profiler/advisor/rules/en/bandwidth_contention.yaml index 684ac22e96aa71d7437d8149130c7ec171a86d95..fd3f7557306fc886f636318cae8045049e689693 100644 --- a/profiler/advisor/rules/en/bandwidth_contention.yaml +++ b/profiler/advisor/rules/en/bandwidth_contention.yaml @@ -1,4 +1,5 @@ -problem: "The SDMA bandwidth is lower than {threshold} GB/s when computing and communication tasks are performed \n +problem: "Bandwidth Contention Analysis" +description: "The SDMA bandwidth is lower than {threshold} GB/s when computing and communication tasks are performed \n concurrently. Generally, parallel computing and communication improves the running efficiency of the model. \n Concurrent computing and communication tasks may affect the communication bandwidth." sdma_baseline: 18 #M diff --git a/profiler/advisor/rules/en/gc.yaml b/profiler/advisor/rules/en/gc.yaml index 219682c9a52d72ff980cf7ea87cb78f09bc38829..a3e50a6a7aab05fb7a6200b4a42c6b6879d3709b 100644 --- a/profiler/advisor/rules/en/gc.yaml +++ b/profiler/advisor/rules/en/gc.yaml @@ -1,3 +1,4 @@ +problem: GC Analysis gc_problem_with_count: "Abnormal garbage collection (GC) event is detected for {gc_count} times, and the total time is {gc_total_time} ms\n. The GC operation is time-consuming and blocks the entire process. As a result, some steps in the model training process take a longer time than other steps." gc_threshold: 1000 #us diff --git a/profiler/advisor/rules/en/packet.yaml b/profiler/advisor/rules/en/packet.yaml index d938098824ae9b385c44f4ceb2c405705c95b651..c74cd16fd9c7c913c4e2b036e2a37275ecd252e9 100644 --- a/profiler/advisor/rules/en/packet.yaml +++ b/profiler/advisor/rules/en/packet.yaml @@ -1,4 +1,5 @@ -problem: "Excessive small communication packets may cause host delivery bottlenecks.\n" +problem: "Packet analysis" +description: "Excessive small communication packets may cause host delivery bottlenecks.\n" sdma_problem: "In the SDMA communication, {abnormal_ratio} of the communication data volume is less than {min_size} MB, and the total time is {abnormal_time} ms.\n" rdma_problem: "In the RDMA communication, {abnormal_ratio} of the communication data volume is less than {min_size} MB, and the total time is {abnormal_time} ms." min_sdma_size: 16 #M diff --git a/profiler/advisor/rules/en/rdma_analysis.yaml b/profiler/advisor/rules/en/rdma_analysis.yaml index 6c6062775763089b04a29ac4e16f5f1c9e106ca0..a21f9fa98be09f9cc8f8f9da6821b641b1c99c4d 100644 --- a/profiler/advisor/rules/en/rdma_analysis.yaml +++ b/profiler/advisor/rules/en/rdma_analysis.yaml @@ -1,4 +1,5 @@ -problem: "RDMA communication retransmission occurs. A single retransmission takes more than 4s. Retransmission problems +problem: "Communication retransmission analysis" +description: "RDMA communication retransmission occurs. A single retransmission takes more than 4s. Retransmission problems are detected in {group_count} communication domains. \n Advised to perform the following suggestions" min_retransmission_time: 4000 #ms diff --git a/profiler/advisor/rules/timeline_fusion_ops.yaml b/profiler/advisor/rules/timeline_fusion_ops.yaml index 3337c938625ccd4b4ea77a0dafa9879222cf1bfe..46e02fef77785a3bf3da65899539c42ac05fadd4 100644 --- a/profiler/advisor/rules/timeline_fusion_ops.yaml +++ b/profiler/advisor/rules/timeline_fusion_ops.yaml @@ -45,18 +45,6 @@ "(slice|chunk)-mul-mul-sigmoid" ] - cann_version: 8.0.RC1 - torch_version: [1.11.0, 2.1.0] - unique_id: 3 - inherit_unique_id: 2 - operator_rules: - aten: - add: - torch_npu.npu_geglu: [ "(slice|chunk)-gelu-mul", "(slice|chunk)-mul-gelu" ] - torch_npu.npu_group_norm_silu: [ "group_norm-silu" ] - torch.addmm: [ "mul-mul-add" ] - torch_npu.npu_add_layer_norm: [ "add-layer_norm" ] - -- cann_version: 8.0.0 torch_version: [1.11.0, 2.1.0] unique_id: 3 inherit_unique_id: 2 diff --git a/profiler/cli/analyze_cli.py b/profiler/cli/analyze_cli.py index f800788592c37e0c13f8f626feae87e4914043ff..cc32f54951e991055d4931268298856a28e34957 100644 --- a/profiler/cli/analyze_cli.py +++ b/profiler/cli/analyze_cli.py @@ -48,7 +48,7 @@ def analyze_cli(**kwargs): help="Indicates whether to skip file size verification and owner verification") @click.option("-l", "--language", - type=str, + type=click.Choice(["cn", "en"]), required=False, default="en", help="Language of the profiling advisor.") @@ -87,7 +87,7 @@ def analyze_all(**kwargs) -> None: help="Indicates whether to skip file size verification and owner verification") @click.option("-l", "--language", - type=str, + type=click.Choice(["cn", "en"]), required=False, default="en", help="Language of the profiling advisor.") @@ -126,7 +126,7 @@ def analyze_schedule(**kwargs) -> None: help="Indicates whether to skip file size verification and owner verification") @click.option("-l", "--language", - type=str, + type=click.Choice(["cn", "en"]), required=False, default="en", help="Language of the profiling advisor.") diff --git a/profiler/prof_common/constant.py b/profiler/prof_common/constant.py index 6387ac17f31d9243fd413f2a75701e030f7e6f62..c71f5f2d71340087208785fddc91dafa3d4256bd 100644 --- a/profiler/prof_common/constant.py +++ b/profiler/prof_common/constant.py @@ -284,10 +284,6 @@ class Constant(object): TIMELINE_ACL_TO_NPU_NO_STACK_CODE: "Incoming flow is 'acl_to_npu', without call stacks in profiling." } AFFINITY_TRAINING_API = "Affinity training api" - TIMELINE_EMPTY_STACKS_PROMPT = "These APIs have no code stack. If parameter 'with_stack=False' while profiling, " \ - "please refer to {timeline_profiling_doc_url} to set 'with_stack=True'. " \ - "Otherwise, ignore following affinity APIs due to backward broadcast lack of stack." - CLUSTER_ANALYSIS = "Cluster analysis" SLOW_RANK_TIME_RATIO_THRESHOLD = 0.05 @@ -385,4 +381,5 @@ class Constant(object): MINDSPORE_VERSION = "mindspore_version" PYTORCH = "pytorch" - MINDSPORE = "mindspore" \ No newline at end of file + MINDSPORE = "mindspore" +