diff --git a/profiler/advisor/advisor_backend/compute_advice/compute_advice_base.py b/profiler/advisor/advisor_backend/compute_advice/compute_advice_base.py index cafbafd8e28c162bc76edb2f77ebd0645fed552f..7594836f4151fa6328040483637ac53be5640314 100644 --- a/profiler/advisor/advisor_backend/compute_advice/compute_advice_base.py +++ b/profiler/advisor/advisor_backend/compute_advice/compute_advice_base.py @@ -42,7 +42,8 @@ class ComputeAdviceBase(AdviceBase): if not os.path.exists(self.collection_path): print("[ERROR] Path: {} is not exist.".format(self.collection_path)) return False - if os.path.isdir(self.collection_path) and self.collection_path.endswith("ascend_pt"): + if os.path.isdir(self.collection_path) and \ + (self.collection_path.endswith("ascend_pt") or self.collection_path.endswith("ascend_ms")): self.kernel_details_path = os.path.join(self.collection_path, "ASCEND_PROFILER_OUTPUT", "kernel_details.csv") if not os.path.exists(self.kernel_details_path): diff --git a/profiler/advisor/advisor_backend/timeline_advice/timeline_advice_base.py b/profiler/advisor/advisor_backend/timeline_advice/timeline_advice_base.py index 2b8f470a1472910e1b7bf0e40cae6287957736b7..77d94f76b8d82c18f270aa96cdfcbfc33a1d6be4 100644 --- a/profiler/advisor/advisor_backend/timeline_advice/timeline_advice_base.py +++ b/profiler/advisor/advisor_backend/timeline_advice/timeline_advice_base.py @@ -55,7 +55,8 @@ class TimelineAdviceBase(AdviceBase): if not os.path.exists(self.collection_path): logger.error("Path: %s is not exist.",str(self.collection_path)) return False - if os.path.isdir(self.collection_path) and self.collection_path.endswith("ascend_pt"): + if os.path.isdir(self.collection_path) and \ + (self.collection_path.endswith("ascend_pt") or self.collection_path.endswith("ascend_ms")): self.trace_view_path = os.path.join(self.collection_path, "ASCEND_PROFILER_OUTPUT", "trace_view.json") if not os.path.exists(self.trace_view_path): logger.error("trace_view.json is not exist in the Path: %s."\ diff --git a/profiler/advisor/analyzer/analyzer_controller.py b/profiler/advisor/analyzer/analyzer_controller.py index cde7e2d413bd97de66cefc8a5b2ad83b791a16c2..13efebb0b5920f4f5643f124ef302a30d274abdc 100644 --- a/profiler/advisor/analyzer/analyzer_controller.py +++ b/profiler/advisor/analyzer/analyzer_controller.py @@ -40,6 +40,7 @@ from profiler.advisor.common.enum_params_parser import EnumParamsParser from profiler.advisor.utils.utils import Timer, safe_index_value, safe_division, safe_index, convert_to_int from profiler.advisor.interface.interface import Interface from profiler.cluster_analyse.cluster_data_preprocess.pytorch_data_preprocessor import PytorchDataPreprocessor +from profiler.cluster_analyse.cluster_data_preprocess.mindspore_data_preprocessor import MindsporeDataPreprocessor from profiler.prof_common.path_manager import PathManager from profiler.compare_tools.compare_backend.utils.constant import Constant as CompareConstant @@ -186,28 +187,6 @@ class AnalyzerController: return True - @staticmethod - def _whether_include_mindspore_prof(profiling_path): - # 暂不支持Mindspore数据,支持后可删除该限制 - ASCEND_MS = "ascend_ms" - - has_ascend_ms_dirs = False - for root, dirs, _ in os.walk(profiling_path): - if root.endswith(ASCEND_MS): - has_ascend_ms_dirs = True - break - for dir_name in dirs: - if dir_name.endswith(ASCEND_MS): - has_ascend_ms_dirs = True - break - if has_ascend_ms_dirs: - break - - if has_ascend_ms_dirs: - logger.error("Advisor does not support data from MindSpore now, existing dirs end with 'ascend_ms'") - return True - - return False @staticmethod def _get_step_rank_for_cluster_statistic_diff(target_cluster_statistic_data, benchmark_cluster_statistic_data, @@ -319,7 +298,8 @@ class AnalyzerController: dimensions: analysis dimension, normally set as Interface.all_dimension, support specific dimension analysis such as ['computation'] or ['computation', 'schedule'] cann_version: cann version of your runtime, inpact on the analysis of affinity api and AICPU operators - torch_version: torch version of your runtime, inpact on the analysis of affinity api + profiling_type: profiling type of your runtime + profiling_version: profiling version of your runtime, inpact on the analysis of affinity api analysis_dimensions: can overwite dimensions. advisor_analyze_processes: number of processes to use while the training params pipeline parallel(pp) >1, can reduce the time of analysis. @@ -647,14 +627,6 @@ class AnalyzerController: logger.error(error_msg) return - # 暂不支持Mindspore数据,支持后可删除该限制 - if self._whether_include_mindspore_prof(profiling_path): - error_msg = f"Got *_ascend_ms dirs from {profiling_path}, skip analysis" - self._update_analysis_process_resp(pid, async_resp, error_msg=error_msg, - status_code=AsyncAnalysisStatus.FAILED_STATUS_CODE, - status=AsyncAnalysisStatus.FAILED) - logger.error(error_msg) - return if benchmark_profiling_path and not self._check_profiling_path_valid(benchmark_profiling_path): error_msg = (f"Got invalid argument '-bp/--benchmark_profiling_path' {benchmark_profiling_path}, " @@ -841,7 +813,14 @@ class AnalyzerController: return False path_list = [os.path.join(profiling_path, dir_name) for dir_name in os.listdir(profiling_path)] ascend_pt_dirs = [path for path in path_list if os.path.isdir(path) and path.endswith("ascend_pt")] - data_processor = PytorchDataPreprocessor(ascend_pt_dirs) + ascend_ms_dirs = [path for path in path_list if os.path.isdir(path) and path.endswith("ascend_ms")] + if ascend_ms_dirs and ascend_pt_dirs: + logger.error("Cannot analyze pytorch and mindspore meantime.") + return False + if ascend_ms_dirs and not ascend_pt_dirs: + data_processor = MindsporeDataPreprocessor(ascend_ms_dirs) + elif ascend_pt_dirs and not ascend_ms_dirs: + data_processor = PytorchDataPreprocessor(ascend_pt_dirs) self.cluster_local_data_map[profiling_path] = data_processor.get_data_map() diff --git a/profiler/advisor/analyzer/base_analyzer.py b/profiler/advisor/analyzer/base_analyzer.py index 38b7ea0be68683c0a24f2faab59d2b311917f5d5..f348579342f48091c50daa0d8a12887625439040 100644 --- a/profiler/advisor/analyzer/base_analyzer.py +++ b/profiler/advisor/analyzer/base_analyzer.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging +import os from functools import wraps from typing import Dict, List, Union from abc import abstractmethod, ABCMeta @@ -25,10 +26,18 @@ from profiler.advisor.result.result import OptimizeResult from profiler.advisor.display.html.render import HTMLRender from profiler.advisor.display.html.priority_background_color import PriorityBackgroundColor from profiler.advisor.utils.utils import safe_division +from profiler.cluster_analyse.common_func.file_manager import FileManager logger = logging.getLogger() +ASCEND_PT = "ascend_pt" +ASCEND_MS = "ascend_ms" +PROFILER_INFO_HEAD = "profiler_info_" +PROFILER_INFO_EXTENSION = ".json" +MS_VERSION = "ms_version" + + class BaseAnalyzer(VersionControl, metaclass=ABCMeta): _SUPPORT_VERSIONS = EnumParamsParser().get_options(constant.CANN_VERSION) ANALYZER_HIGH_PRIORITY_TIME_RATIO = 0.05 @@ -38,11 +47,12 @@ class BaseAnalyzer(VersionControl, metaclass=ABCMeta): def __init__(self, collection_path, n_processes: int = 1, **kwargs): self.n_processes = n_processes + self.kwargs = kwargs + self.collection_path = collection_path self.cann_version = kwargs.get(constant.CANN_VERSION, EnumParamsParser().get_default(constant.CANN_VERSION)) - self.torch_version = kwargs.get(constant.TORCH_VERSION, EnumParamsParser().get_default(constant.TORCH_VERSION)) + self.profiling_type = self.identify_profiling_type(EnumParamsParser().get_options(constant.PROFILING_TYPE)) + self.profiling_version = self.identify_profiling_version() self.html_render = HTMLRender() - self.collection_path = collection_path - self.kwargs = kwargs self.dataset_list: Dict[str, List[Dataset]] = {} self.init_dataset_list() self.result = OptimizeResult() @@ -94,6 +104,63 @@ class BaseAnalyzer(VersionControl, metaclass=ABCMeta): def get_priority(self, max_mem_op_dur): pass + def identify_profiling_type(self, profiling_type_list): + profiling_type = None + if self.collection_path.endswith(ASCEND_MS): + profiling_type = [elem for elem in profiling_type_list if constant.MINDSPORE in elem][0] + elif self.collection_path.endswith(ASCEND_PT): + profiling_type = [elem for elem in profiling_type_list if constant.PYTORCH in elem][0] + else: + for _, dirs, __ in os.walk(self.collection_path): + is_found_type = False + for dir in dirs: + if dir.endswith(ASCEND_MS): + profiling_type = [elem for elem in profiling_type_list if constant.MINDSPORE in elem][0] + is_found_type = True + break + elif dir.endswith(ASCEND_PT): + profiling_type = [elem for elem in profiling_type_list if constant.PYTORCH in elem][0] + is_found_type = True + break + if is_found_type: + break + if self.kwargs.get(constant.PROFILING_TYPE) and self.kwargs.get(constant.PROFILING_TYPE) != profiling_type: + logger.warning("%s The input profiling type %s is inconsistent with the actual profiling type %s.", + self.__class__.__name__, self.kwargs.get(constant.PROFILING_TYPE), profiling_type) + if not profiling_type: + logger.warning("Unknown profiling type, the default value is set pytorch.") + profiling_type = profiling_type_list[0] + return profiling_type + def identify_profiling_version(self): + profiling_version = "" + if constant.MINDSPORE in self.profiling_type: + ascend_dirs = [] + if self.collection_path.endswith(ASCEND_MS): + ascend_dirs.append(self.collection_path) + else: + for root, dirs, _ in os.walk(self.collection_path): + for dir in dirs: + if dir.endswith(ASCEND_MS): + ascend_dirs.append(os.path.join(root, dir)) + if ascend_dirs: + ascend_dir = ascend_dirs[0] + for file_name in os.listdir(ascend_dir): + if file_name.startswith(PROFILER_INFO_HEAD) and file_name.endswith(PROFILER_INFO_EXTENSION): + file_path = os.path.join(ascend_dir, file_name) + config = FileManager.read_json_file(file_path) + profiling_version = config.get(MS_VERSION, "") + break + if profiling_version and self.kwargs.get(constant.MINDSPORE_VERSION): + if profiling_version != self.kwargs.get(constant.MINDSPORE_VERSION): + logger.warning("%s The input version %s is inconsistent with the actual version %s.", + self.__class__.__name__, self.kwargs.get(constant.MINDSPORE_VERSION), profiling_version) + elif constant.PYTORCH in self.profiling_type: + profiling_version = self.kwargs.get(constant.TORCH_VERSION, EnumParamsParser().get_default(constant.TORCH_VERSION)) + if self.kwargs.get(constant.TORCH_VERSION) and profiling_version != self.kwargs.get(constant.TORCH_VERSION): + logger.warning("%s The input version %s is inconsistent with the actual version %s.", + self.__class__.__name__, self.kwargs.get(constant.TORCH_VERSION), profiling_version) + return profiling_version + def init_dataset_list(self) -> None: dataset_cls_list = self.dataset_cls_list if len(dataset_cls_list) == 0: diff --git a/profiler/advisor/analyzer/cluster/slow_link_analyzer.py b/profiler/advisor/analyzer/cluster/slow_link_analyzer.py index 259e5eb0c4255afc97aad83210b72a14b7285888..6ca96077d459891bf93e20e1e745d1aa9096b18b 100644 --- a/profiler/advisor/analyzer/cluster/slow_link_analyzer.py +++ b/profiler/advisor/analyzer/cluster/slow_link_analyzer.py @@ -143,7 +143,8 @@ class SlowLinkAnalyzer(BaseAnalyzer): template_dir="templates", template_name="cluster_analysis.html", cann_version=self.cann_version, - torch_version=self.torch_version, + profiling_type=self.profiling_type, + profiling_version=self.profiling_version, result=result_for_html) def get_global_step_rank(self, bindwidth_type): diff --git a/profiler/advisor/analyzer/cluster/slow_rank_analyzer.py b/profiler/advisor/analyzer/cluster/slow_rank_analyzer.py index 165dec7db7f3a6aa2fbb88654cf4590da09abcf9..b0623a69b9a741c810b783a90784375aedce8152 100644 --- a/profiler/advisor/analyzer/cluster/slow_rank_analyzer.py +++ b/profiler/advisor/analyzer/cluster/slow_rank_analyzer.py @@ -134,7 +134,8 @@ class SlowRankAnalyzer(BaseAnalyzer): template_dir="templates", template_name="cluster_analysis.html", cann_version=self.cann_version, - torch_version=self.torch_version, + profiling_type=self.profiling_type, + profiling_version=self.profiling_version, result=result_for_html) def get_global_step_rank(self, dimension): diff --git a/profiler/advisor/analyzer/comparison/comparison_checker.py b/profiler/advisor/analyzer/comparison/comparison_checker.py index ad4cb83d33c43614c90e198a6b35a2dc1f301782..0bced10e7ac7b7f246c1f0ce4e8dd50b5f22f54a 100644 --- a/profiler/advisor/analyzer/comparison/comparison_checker.py +++ b/profiler/advisor/analyzer/comparison/comparison_checker.py @@ -67,6 +67,9 @@ class ComparisonChecker: if compare_mode is None: return self.compare_mode = compare_mode + if ("Api" in compare_mode) and self.benchmark_profiling_path.endswith("ascend_ms"): + logger.warning("The current compare mode %s does not support Mindspore.", compare_mode) + return compare_interface = ComparisonInterface(self.profiling_path, self.benchmark_profiling_path, self.step, self.benchmark_step) result = compare_interface.compare(self.compare_mode) diff --git a/profiler/advisor/analyzer/computation/profiling_analyzer.py b/profiler/advisor/analyzer/computation/profiling_analyzer.py index 6d525f303cc8c5971bda8a11d16d638ef3dcf2c3..dab4c7d95f4f58ec4c9d139a7f230857930f219a 100644 --- a/profiler/advisor/analyzer/computation/profiling_analyzer.py +++ b/profiler/advisor/analyzer/computation/profiling_analyzer.py @@ -90,6 +90,9 @@ class ProfilingAnalyzer(BaseAnalyzer, ABC): class DynamicShapeAnalyzer(ProfilingAnalyzer): def __init__(self, collection_path, **kwargs) -> None: super().__init__(collection_path, **kwargs) + if collection_path.endswith("ascend_ms"): + logger.warning("Dynamic shape analyzer does not support Mindspore.") + return self.checker = DynamicShapeChecker(self.cann_version) diff --git a/profiler/advisor/analyzer/overall/overall_summary_analyzer.py b/profiler/advisor/analyzer/overall/overall_summary_analyzer.py index 33dd081bc32d0fb7caccfba5f79b10d6d0cdf818..5b5c46ef47da83302f56e6e91784a24fffff3aea 100644 --- a/profiler/advisor/analyzer/overall/overall_summary_analyzer.py +++ b/profiler/advisor/analyzer/overall/overall_summary_analyzer.py @@ -231,7 +231,8 @@ class OverallSummaryAnalyzer(BaseAnalyzer): template_dir="templates", template_name="cluster_analysis.html", cann_version=self.cann_version, - torch_version=self.torch_version, + profiling_type=self.profiling_type, + profiling_version=self.profiling_version, result=result_for_html) def get_priority(self): diff --git a/profiler/advisor/analyzer/schedule/dispatch/timeline_op_dispatch_analyzer.py b/profiler/advisor/analyzer/schedule/dispatch/timeline_op_dispatch_analyzer.py index 126fe30176cf6ca0f1d7d3557c360f95af7b20be..fff0ecf3f5643ba38f46b6d7daca71b2d531033e 100644 --- a/profiler/advisor/analyzer/schedule/dispatch/timeline_op_dispatch_analyzer.py +++ b/profiler/advisor/analyzer/schedule/dispatch/timeline_op_dispatch_analyzer.py @@ -49,6 +49,9 @@ class OpDispatchAnalyzer(BaseAnalyzer): :param data: input datasets :return: result """ + if "mindspore" in self.profiling_type: + logger.warning("The analyzer %s does not support MindSpore.", self.__class__.__name__) + return self.result self.get_op_compile_info(self.dataset) self.make_record(self.result) self.make_render(self.html_render, rank=kwargs.get('rank')) diff --git a/profiler/advisor/analyzer/schedule/fusion_ops/fusion_ops_analyzer.py b/profiler/advisor/analyzer/schedule/fusion_ops/fusion_ops_analyzer.py index 7407823106ec6039605e87539e86e66f737e20f4..3ddd9cf0a7f695b1857c57def780275396e57f93 100644 --- a/profiler/advisor/analyzer/schedule/fusion_ops/fusion_ops_analyzer.py +++ b/profiler/advisor/analyzer/schedule/fusion_ops/fusion_ops_analyzer.py @@ -56,7 +56,9 @@ class TimelineFusionOpsAnalyzer(BaseAnalyzer): for mode in [const.ATEN.lower(), const.OPTIMIZER.lower()]: - for op_combined, npu_apis in tqdm(getattr(init_timeline_ops_db(self.cann_version, self.torch_version), + for op_combined, npu_apis in tqdm(getattr(init_timeline_ops_db(self.cann_version, + self.profiling_type, + self.profiling_version), f"_{mode}_op_api_map").items(), leave=False, ncols=100, desc="Scanning timeline for affinity apis"): for npu_api in npu_apis.split("/"): @@ -131,7 +133,8 @@ class TimelineFusionOpsAnalyzer(BaseAnalyzer): template_dir="templates", template_name="affinity_api.html", cann_version=self.cann_version, - torch_version=self.torch_version, + profiling_type=self.profiling_type, + profiling_version=self.profiling_version, empty_stacks=self.empty_stacks, with_stack_doc_url=Config().timeline_with_stack_doc_url, api_doc_url=Config().timeline_api_doc_url, diff --git a/profiler/advisor/analyzer/schedule/gc/gc_analyzer.py b/profiler/advisor/analyzer/schedule/gc/gc_analyzer.py index b59a8fc2e2a25428e34cb51917462f9f6162bc46..8c77e93b333322fd04cd239caa4d8c49a20b7cc6 100644 --- a/profiler/advisor/analyzer/schedule/gc/gc_analyzer.py +++ b/profiler/advisor/analyzer/schedule/gc/gc_analyzer.py @@ -36,6 +36,9 @@ class GcAnalyzer(BaseAnalyzer): @BaseAnalyzer.check_data((ScheduleAnalysisDataset.get_key(),)) def optimize(self, **kwargs): + if "mindspore" in self.profiling_type: + logger.warning("The analyzer %s does not support MindSpore.", self.__class__.__name__) + return self.result gc_checker = GcChecker() gc_checker.check_gc(self.timeline_event_dataset, rank=kwargs.get("rank"), stage=kwargs.get("stage")) gc_checker.make_record(self.result) diff --git a/profiler/advisor/common/constant.py b/profiler/advisor/common/constant.py index dbdfbde0b19e07e85bc6be2d01ffae01ae49cf47..b70bc7400ea83368b7324d565d19723c3060af42 100644 --- a/profiler/advisor/common/constant.py +++ b/profiler/advisor/common/constant.py @@ -64,6 +64,9 @@ SLOW_RANK_TIME_RATIO_THRESHOLD = 0.05 CANN_VERSION = "cann_version" TORCH_VERSION = "torch_version" +MINDSPORE_VERSION = "mindspore_version" +PYTORCH = "pytorch" +MINDSPORE = "mindspore" PROFILING_TYPE = "profiling_type" ANALYSIS_DIMENSIONS = "analysis_dimensions" diff --git a/profiler/advisor/common/timeline/fusion_ops_db.py b/profiler/advisor/common/timeline/fusion_ops_db.py index ad8b5981c72b12c213146b205d1f1d86dd408589..d4545a698065d6fc95d23fe40ca45138ecfae73e 100644 --- a/profiler/advisor/common/timeline/fusion_ops_db.py +++ b/profiler/advisor/common/timeline/fusion_ops_db.py @@ -29,10 +29,12 @@ logger = logging.getLogger() logger.setLevel(get_log_level()) -def init_timeline_ops_db(cann_version=None, torch_version=None): +def init_timeline_ops_db(cann_version=None, profiling_type=None, profiling_version=None): logger.debug("init operators database") - return FusionOperatorDB(cann_version=cann_version, torch_version=torch_version) + return FusionOperatorDB(cann_version=cann_version, + profiling_type=profiling_type, + profiling_version=profiling_version) def get_timeline_fusion_ops_yaml_path(): @@ -65,11 +67,12 @@ def get_timeline_fusion_ops_yaml_path(): class FusionOperatorDB: - def __init__(self, file_path=None, cann_version=None, torch_version=None): + def __init__(self, file_path=None, cann_version=None, profiling_type=None, profiling_version=None): self.timeline_fusion_ops_yaml_path = os.path.normpath(get_timeline_fusion_ops_yaml_path()) self.cann_version = cann_version or EnumParamsParser().get_default(constant.CANN_VERSION) - self.torch_version = torch_version or EnumParamsParser().get_default(constant.TORCH_VERSION) + self.profiling_type = profiling_type or EnumParamsParser().get_default(constant.PROFILING_TYPE) + self.profiling_version = profiling_version or EnumParamsParser().get_default(constant.PROFILING_VERSION) self._supported_version_dict = {} @@ -124,11 +127,13 @@ class FusionOperatorDB: self.fusion_operator = self.get_fusion_operator_with_unique_id(unique_id) self.regenerate_op_api_map_and_op_names() - def regenerate_timeline_op_rule_with_version(self, cann_version=None, torch_version=None): + def regenerate_timeline_op_rule_with_version(self, cann_version=None, profiling_type=None, profiling_version=None): cann_version = cann_version or self.cann_version - torch_version = torch_version or self.torch_version + profiling_type = profiling_type or self.profiling_type + profiling_version = profiling_version or self.profiling_version unique_id = self._get_unique_id_in_supported_version_dict(cann_version=cann_version, - torch_version=torch_version) + profiling_type=profiling_type, + profiling_version=profiling_version) self.regenerate_timeline_op_rule_with_unique_id(unique_id) def regenerate_op_api_map_and_op_names(self): @@ -152,10 +157,12 @@ class FusionOperatorDB: if not isinstance(rule_dic, dict) or rule_dic.get("unique_id") is None: continue cann_version_list = rule_dic.get("cann_version") + profiling_type_list = rule_dic.get("profiling_type") + mindspore_version_list = rule_dic.get("mindspore_version") torch_version_list = rule_dic.get("torch_version") - if not cann_version_list or not torch_version_list: + if not cann_version_list or not torch_version_list or not mindspore_version_list: continue - supported_version = [cann_version_list, torch_version_list] + supported_version = [cann_version_list, profiling_type_list, torch_version_list, mindspore_version_list] unique_id = rule_dic.get("unique_id") if unique_id < 0: @@ -179,39 +186,53 @@ class FusionOperatorDB: is_version_supported = self._is_version_supported_in_supported_version_dict() if not is_version_supported: # 若规则库不支持当前版本, 则log警告信息 - logger.warning("Unsupported versions: cann-%s and torch-%s, supported version list of ['cann', 'torch'] " - "is %s", self.cann_version, self.torch_version, self._supported_version_dict.values()) + logger.warning("Unsupported versions: cann-%s and profiling_type-%s," + "profiling_version-%s, supported version list of ['cann', 'torch', 'mindspore'] " + "is %s", self.cann_version, self.profiling_type, + self.profiling_version, self._supported_version_dict.values()) return is_version_supported - def _is_version_supported_in_supported_version_dict(self, cann_version=None, torch_version=None): + def _is_version_supported_in_supported_version_dict(self, cann_version=None, profiling_type=None, profiling_version=None): """校验当前版本是否存在在规则库中的版本支持字典中""" for _, supported_version in self._supported_version_dict.items(): - if self._is_version_supported_in_versions(supported_version, cann_version, torch_version): + if self._is_version_supported_in_versions(supported_version, cann_version, profiling_type, profiling_version): return True return False - def _get_unique_id_in_supported_version_dict(self, cann_version=None, torch_version=None) -> int: + def _get_unique_id_in_supported_version_dict(self, cann_version=None, profiling_type=None, profiling_version=None) -> int: """校验当前版本是否存在在规则库中的版本支持字典中, 在使用前请检查是否支持该版本""" for key_unique_id, supported_version in self._supported_version_dict.items(): - if self._is_version_supported_in_versions(supported_version, cann_version, torch_version): + if self._is_version_supported_in_versions(supported_version, cann_version, profiling_type, profiling_version): return key_unique_id return constant.TIMELINE_FUSION_OPS_INVALID_UNIQUE_ID - def _is_version_supported_in_versions(self, supported_version, cann_version=None, torch_version=None): + def _is_version_supported_in_versions(self, supported_version, cann_version=None, profiling_type=None, profiling_version=None): """校验当前cann版本和torch版本是否存在在规则库中的版本支持数组的元素中""" cann_version_list = supported_version[0] if not isinstance(cann_version_list, list): cann_version_list = [cann_version_list] - torch_version_list = supported_version[1] + profiling_type_list = supported_version[1] + if not isinstance(profiling_type_list, list): + profiling_type_list = [profiling_type_list] + + torch_version_list = supported_version[2] if not isinstance(torch_version_list, list): torch_version_list = [torch_version_list] + mindspore_version_list = supported_version[3] + if not isinstance(mindspore_version_list, list): + mindspore_version_list = [mindspore_version_list] + cann_version = cann_version or self.cann_version - torch_version = torch_version or self.torch_version + profiling_type = profiling_type or self.profiling_type + profiling_version = profiling_version or self.profiling_version - if (cann_version in cann_version_list) and (torch_version in torch_version_list): - return True + if (cann_version in cann_version_list) and (profiling_type in profiling_type_list): + if ("pytorch" in profiling_type) and (profiling_version in torch_version_list): + return True + if ("mindspore" in profiling_type) and (profiling_version in mindspore_version_list): + return True return False def _parse_db(self): diff --git a/profiler/advisor/config/enum_parameters.yaml b/profiler/advisor/config/enum_parameters.yaml index b1a0548d480b8722609df40ae8e9331b5d3d34bd..678fe72b43c7f5b2fd66b3f38c3114cc9793cd50 100644 --- a/profiler/advisor/config/enum_parameters.yaml +++ b/profiler/advisor/config/enum_parameters.yaml @@ -6,7 +6,9 @@ arguments: - 7.0.RC1 - 7.0.0 - 8.0.RC1 - default: 8.0.RC1 + - 8.0.RC2 + - 8.0.0 + default: 8.0.0 torch_version: type: str @@ -14,7 +16,12 @@ arguments: - 1.11.0 - 2.1.0 default: 2.1.0 - + mindspore_version: + type: str + options: + - 2.3.0 + - 2.4.0 + default: 2.4.0 analysis_dimensions: type: list options: @@ -28,10 +35,11 @@ arguments: profiling_type: type: str options: - - ascend_pytorch_profiler + - pytorch - mslite - msprof - default: ascend_pytorch_profiler + - mindspore + default: pytorch envs: ADVISOR_ANALYZE_PROCESSES: diff --git a/profiler/advisor/config/profiling_data_version_config.yaml b/profiler/advisor/config/profiling_data_version_config.yaml index ed064e287583b699379b5707a21be301164c2b74..1c82a8a6bcfbd7ae3d09cab4ca1852b875b2e64c 100644 --- a/profiler/advisor/config/profiling_data_version_config.yaml +++ b/profiler/advisor/config/profiling_data_version_config.yaml @@ -1,4 +1,28 @@ versions: + - version: 8.0.0 + dirs_pattern: + ASCEND_PROFILER_OUTPUT: [ op_summary, msprof ] + ^PROF_\d{6}_\d{17}_\w+$: + mindstudio_profiler_output: [ op_summary, msprof ] + class_attr: + op_summary: OpSummary + msprof: Msprof + file_attr: + msprof: [trace_view.json, '^msprof_\d{14}\.json$'] + op_summary: [ kernel_details.csv, '^op_summary_\d{14}\.csv$' ] + + - version: 8.0.RC2 + dirs_pattern: + ASCEND_PROFILER_OUTPUT: [ op_summary, msprof ] + ^PROF_\d{6}_\d{17}_\w+$: + mindstudio_profiler_output: [ op_summary, msprof ] + class_attr: + op_summary: OpSummary + msprof: Msprof + file_attr: + msprof: [trace_view.json, '^msprof_\d{14}\.json$'] + op_summary: [ kernel_details.csv, '^op_summary_\d{14}\.csv$' ] + - version: 8.0.RC1 dirs_pattern: ASCEND_PROFILER_OUTPUT: [ op_summary, msprof ] diff --git a/profiler/advisor/dataset/communication/communication_dataset.py b/profiler/advisor/dataset/communication/communication_dataset.py index 01a72ef93044ad8f0afb5af4ee864a99c7865060..4844e9fef2d515eaf629949239159c0e8ef17732 100644 --- a/profiler/advisor/dataset/communication/communication_dataset.py +++ b/profiler/advisor/dataset/communication/communication_dataset.py @@ -30,7 +30,7 @@ class CommunicationDataset: def __init__(self, collection_path, data: dict, **kwargs) -> None: self.timeline_dir = collection_path - if not self.timeline_dir.endswith("ascend_pt"): + if not self.timeline_dir.endswith("ascend_pt") and not self.timeline_dir.endswith("ascend_ms"): return self.timeline_data_list = self.get_file_path_from_directory( self.timeline_dir, diff --git a/profiler/advisor/display/html/templates/affinity_api.html b/profiler/advisor/display/html/templates/affinity_api.html index 7cd3d7ad33d0220c7aba055721eddf049161a0d8..b227afae9624cadb5ce753c80f1e8719040eacb4 100644 --- a/profiler/advisor/display/html/templates/affinity_api.html +++ b/profiler/advisor/display/html/templates/affinity_api.html @@ -8,14 +8,14 @@ The analysis results of following affinity APIs are based on runtime env cann-{{ cann_version }} and - torch-{{ torch_version }} + {{profiling_type}}-{{ profiling_type }}
{% if empty_stacks %} Suggestion: These APIs have no code stack. If parameter 'with_stack=False' was set while profiling, please refer to - Ascend PyTorch Profiler to set + Ascend Profiler to set 'with_stack=True'. Otherwise, ignore following affinity APIs due to backward broadcast lack of stack. {% endif %} diff --git a/profiler/cli/analyze_cli.py b/profiler/cli/analyze_cli.py index 456ecef07ae6dd7a3f08fe252d87190567cee6f2..80c68ecb2b0df342a495a86b9855bca8891b8a46 100644 --- a/profiler/cli/analyze_cli.py +++ b/profiler/cli/analyze_cli.py @@ -37,17 +37,12 @@ def analyze_cli(**kwargs): default=EnumParamsParser().get_default(constant.CANN_VERSION), help='The CANN software version, which can be viewed by executing the following command: ' '"cat /usr/local/Ascend/ascend-toolkit/latest/aarch64-linux/ascend_toolkit_install.info"') -@click.option('--torch_version', '-tv', 'torch_version', - type=click.Choice(EnumParamsParser().get_options(constant.TORCH_VERSION), case_sensitive=False), - default=EnumParamsParser().get_default(constant.TORCH_VERSION), - help='The runtime torch version, which can be detected by exec command "pip show torch"') @click.option("-pt", "--profiling_type", metavar="", - default=EnumParamsParser().get_default(constant.PROFILING_TYPE), required=False, type=click.Choice(EnumParamsParser().get_options(constant.PROFILING_TYPE)), - help="enter the profiling type, selectable range ascend_pytorch_profiler, mslite ,msprof") + help="enter the profiling type, selectable range pytorch, mindspore, mslite ,msprof") @click.option("--force", is_flag=True, help="Indicates whether to skip file size verification and owner verification") diff --git a/profiler/cluster_analyse/prof_bean/step_trace_time_bean.py b/profiler/cluster_analyse/prof_bean/step_trace_time_bean.py index b0a3be4f5eaccea70aa912bc85e68d70dbda3bde..3cf4bc4fa5b1a8c8ccf93c010fe5fe8e9843e513 100644 --- a/profiler/cluster_analyse/prof_bean/step_trace_time_bean.py +++ b/profiler/cluster_analyse/prof_bean/step_trace_time_bean.py @@ -14,6 +14,10 @@ # limitations under the License. +import logging + +logger = logging.getLogger() + class StepTraceTimeBean: STEP = "Step" COMPLEMENT_HEADER = ["Step", "Type", "Index"] @@ -27,7 +31,11 @@ class StepTraceTimeBean: for field_name in self._data.keys(): if field_name == self.STEP: continue - row.append(float(self._data.get(field_name, ))) + try: + row.append(float(self._data.get(field_name, ))) + except Exception as e: + logger.warning(e) + row.append(0) return row @property diff --git a/profiler/compare_tools/compare_backend/compare_bean/origin_data_bean/kernel_details_bean.py b/profiler/compare_tools/compare_backend/compare_bean/origin_data_bean/kernel_details_bean.py index 6293f0cea52b1a024f2de477839732028d539bf6..49c196dcbcc1de4c2de66c4bdd1a26d6f7b07973 100644 --- a/profiler/compare_tools/compare_backend/compare_bean/origin_data_bean/kernel_details_bean.py +++ b/profiler/compare_tools/compare_backend/compare_bean/origin_data_bean/kernel_details_bean.py @@ -84,7 +84,7 @@ class KernelDetailsBean: @property def step_id(self) -> int: - return int(self._step_id) if self._step_id else Constant.VOID_STEP + return int(self._step_id) if self._step_id.isnumeric() else Constant.VOID_STEP def is_hide_op_pmu(self): if "mac_time(us)" in self._data.keys() or "aiv_vec_time(us)" in self._data.keys(): diff --git a/profiler/compare_tools/compare_backend/profiling_parser/npu_profiling_parser.py b/profiler/compare_tools/compare_backend/profiling_parser/npu_profiling_parser.py index 515085ec8f06e50c6c2bfc3489022f3a43c41cf3..cd2aa0982b7a84b79bc66ed6c5b39438071bb3c3 100644 --- a/profiler/compare_tools/compare_backend/profiling_parser/npu_profiling_parser.py +++ b/profiler/compare_tools/compare_backend/profiling_parser/npu_profiling_parser.py @@ -102,10 +102,6 @@ class NPUProfilingParser(BaseProfilingParser): return kernels_dict = {} for kernel in kernel_details: - if kernel.is_invalid(): - continue - if self._step_id != Constant.VOID_STEP and kernel.step_id != self._step_id: - continue input_shapes = kernel.input_shapes if kernel.input_shapes else 'N/A' kernels_dict.setdefault(kernel.op_type, {}).setdefault(input_shapes, []).append( [kernel.name, kernel.duration]) diff --git a/profiler/test/st/advisor/test_advisor_cmd_single_ascend_pt_no_compare.py b/profiler/test/st/advisor/test_advisor_cmd_single_ascend_pt_no_compare.py index e243f4656813517c88fafbe9e3044e2537c9d0f7..ce6eeb3a39c6944037abeeb835bdad53f8e7efbd 100644 --- a/profiler/test/st/advisor/test_advisor_cmd_single_ascend_pt_no_compare.py +++ b/profiler/test/st/advisor/test_advisor_cmd_single_ascend_pt_no_compare.py @@ -50,6 +50,7 @@ class TestAdvisorCmdSingleAscendPtNoCompare(TestCase): def test_all_problems(self): category = [ "overall summary", + "byte alignment analysis", "bandwidth contention analysis", "AICPU operator", "Dynamic shape operator", @@ -58,13 +59,13 @@ class TestAdvisorCmdSingleAscendPtNoCompare(TestCase): ] #True presents the attr is nan - description_len = [6,3,2,1,1,1] - suggestion_len = [True,1,2,5,1,1] - problem_count = [True,True,2.0,1.0,True,True] - total_time = [True,True,57674709.54,True,True,True] - time_ratio = [True,True,0.0,True,True,True] - income = [True,True,True,True,True,True] - income_ratio = [True,True,True,True,True,True] + description_len = [6,1,3,2,1,1,1] + suggestion_len = [True,1,1,2,5,1,1] + problem_count = [True,True,True,2.0,1.0,True,True] + total_time = [True,True,True,57674709.54,True,True,True] + time_ratio = [True,True,True,0.0,True,True,True] + income = [True,True,True,True,True,True,True] + income_ratio = [True,True,True,True,True,True,True] try: df = pd.read_excel(self.RESULT_EXCEL.get("all",None), sheet_name='problems',header=0) except FileNotFoundError: diff --git a/profiler/test/ut/advisor/common/test_enum_params_parser.py b/profiler/test/ut/advisor/common/test_enum_params_parser.py index 8e5ddb680444c944898201bb58f7b71a520b924b..c27a632f64c844eaa4fa079214d87a2caae022ca 100644 --- a/profiler/test/ut/advisor/common/test_enum_params_parser.py +++ b/profiler/test/ut/advisor/common/test_enum_params_parser.py @@ -17,7 +17,7 @@ class TestEnumParamsParser(unittest.TestCase): def setUp(self) -> None: self.enum_params_parser = EnumParamsParser() - self.argument_keys = sorted(["cann_version", "torch_version", "analysis_dimensions", "profiling_type"]) + self.argument_keys = sorted(["cann_version", "torch_version", "analysis_dimensions", "profiling_type", "mindspore_version"]) self.env_keys = ["ADVISOR_ANALYZE_PROCESSES", "DISABLE_PROFILING_COMPARISON", "DISABLE_AFFINITY_API"] def test_get_keys(self):