diff --git a/.gitignore b/.gitignore index a81c8ee121952cf06bfaf9ff9988edd8cded763c..7e605b88a3833292ae0ddae977d6a6513381c004 100644 --- a/.gitignore +++ b/.gitignore @@ -25,6 +25,7 @@ share/python-wheels/ .installed.cfg *.egg MANIFEST +.vscode # PyInstaller # Usually these files are written by a python script from a template @@ -136,3 +137,9 @@ dmypy.json # Cython debug symbols cython_debug/ + +# advisor analysis output +att_advisor*.html +*.xlsx +operator_tuning_file*.cfg +.ipynb_checkpoints/ \ No newline at end of file diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000000000000000000000000000000000000..6068a6df083d6490a8adb4a5a333705b35eff77e --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,4 @@ +recursive-include profiler/ * +recursive-include profiler/advisor/display/html/templates * +global-exclude */__pycache__/* +global-exclude *.pyc diff --git a/profiler/advisor/__init__.py b/profiler/advisor/__init__.py index 0428ee03f05fac6a068642ccd7c36d56d219ea81..cea5ae3406c7f8268a2262fc49da50c4dfc31f3e 100644 --- a/profiler/advisor/__init__.py +++ b/profiler/advisor/__init__.py @@ -1,15 +1 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - +from profiler.advisor.interface.interface import Interface \ No newline at end of file diff --git a/profiler/advisor/advisor_backend/__init__.py b/profiler/advisor/advisor_backend/__init__.py deleted file mode 100644 index a0e9f748f4b10347a874f60cec1fa9f6e5285a5e..0000000000000000000000000000000000000000 --- a/profiler/advisor/advisor_backend/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. \ No newline at end of file diff --git a/profiler/advisor/advisor_backend/advice_base.py b/profiler/advisor/advisor_backend/advice_base.py deleted file mode 100644 index 35939bcea9c87fb09f2113bd19f77ea18ba54e34..0000000000000000000000000000000000000000 --- a/profiler/advisor/advisor_backend/advice_base.py +++ /dev/null @@ -1,50 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -from abc import abstractmethod - - -class AdviceBase: - DATA = "data" - BOTTLENECK = "bottleneck" - ADVICE = "advice" - - def __init__(self, collection_path: str): - self.collection_path = os.path.realpath(collection_path) - self.bottelneck = '' - self.output_format_data = { - self.DATA: [], - self.BOTTLENECK: '', - self.ADVICE: '' - } - - @abstractmethod - def path_check(self): - """ - check whether input path is valid - """ - - @abstractmethod - def run(self): - """ - analyze profiling data and advice - """ - - @abstractmethod - def output(self): - """ - output relevant data - """ \ No newline at end of file diff --git a/profiler/advisor/advisor_backend/advice_factory/__init__.py b/profiler/advisor/advisor_backend/advice_factory/__init__.py deleted file mode 100644 index a0e9f748f4b10347a874f60cec1fa9f6e5285a5e..0000000000000000000000000000000000000000 --- a/profiler/advisor/advisor_backend/advice_factory/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. \ No newline at end of file diff --git a/profiler/advisor/advisor_backend/advice_factory/advice_factory.py b/profiler/advisor/advisor_backend/advice_factory/advice_factory.py deleted file mode 100644 index 639f4800cfe8c9acdc8fe7ea5f65a43fc8892b2b..0000000000000000000000000000000000000000 --- a/profiler/advisor/advisor_backend/advice_factory/advice_factory.py +++ /dev/null @@ -1,50 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os - -from common_func.path_manager import PathManager - - -class AdviceFactory: - def __init__(self, collection_path: str): - self.collection_path = os.path.realpath(collection_path) - - @staticmethod - def run_advice(self, advice: str, kwargs: dict): - """ - run advice to produce data - """ - - def produce_advice(self, advice: str, kwargs: dict): - """ - produce data for input mode and advice - """ - self.path_check() - self.advice_check(advice) - return self.run_advice(advice, kwargs) - - def path_check(self): - """ - check whether input path is valid - """ - PathManager.input_path_common_check(self.collection_path) - - def advice_check(self, advice: str): - """ - check whether input advice is valid - """ - if advice not in self.ADVICE_LIB.keys(): - msg = '[ERROR]Input advice is illegal.' - raise RuntimeError(msg) diff --git a/profiler/advisor/advisor_backend/advice_factory/cluster_advice_factory.py b/profiler/advisor/advisor_backend/advice_factory/cluster_advice_factory.py deleted file mode 100644 index 6bb93f46704eb13fef14d070f891e350446829ea..0000000000000000000000000000000000000000 --- a/profiler/advisor/advisor_backend/advice_factory/cluster_advice_factory.py +++ /dev/null @@ -1,38 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from advice_factory.advice_factory import AdviceFactory -from cluster_advice.slow_link_advice import SlowLinkAdvice -from cluster_advice.slow_rank_advice import SlowRankAdvice -from cluster_advice.cluster_pipeline_advice import ClusterPipelineAdvice -from cluster_advice.kernel_cluster_advice import KernelClusterAdvice -from common_func_advisor.constant import Constant - - -class ClusterAdviceFactory(AdviceFactory): - ADVICE_LIB = { - Constant.SLOW_RANK: SlowRankAdvice, - Constant.SLOW_LINK: SlowLinkAdvice, - Constant.PIPELINE: ClusterPipelineAdvice, - Constant.KERNEL: KernelClusterAdvice - } - - def __init__(self, collection_path: str): - super().__init__(collection_path) - - def run_advice(self, advice: str, kwargs: dict): - """ - run advice to produce data - """ - return self.ADVICE_LIB.get(advice)(self.collection_path, kwargs).run() diff --git a/profiler/advisor/advisor_backend/advice_factory/compute_advice_factory.py b/profiler/advisor/advisor_backend/advice_factory/compute_advice_factory.py deleted file mode 100644 index 336bef7dd8553eb82586d52260443a7d01e84ab0..0000000000000000000000000000000000000000 --- a/profiler/advisor/advisor_backend/advice_factory/compute_advice_factory.py +++ /dev/null @@ -1,34 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from common_func_advisor.constant import Constant -from advice_factory.advice_factory import AdviceFactory -from compute_advice.npu_fused_advice import NpuFusedAdvice -from compute_advice.npu_slow_advice import NpuSlowAdvice - - -class ComputeAdviceFactory(AdviceFactory): - ADVICE_LIB = { - Constant.NPU_FUSED: NpuFusedAdvice, - Constant.NPU_SLOW: NpuSlowAdvice, - } - - def __init__(self, collection_path: str): - super().__init__(collection_path) - - def run_advice(self, advice: str, kwargs: dict): - """ - run advice to produce data - """ - return self.ADVICE_LIB.get(advice)(self.collection_path).run() diff --git a/profiler/advisor/advisor_backend/advice_factory/overall_advice_factory.py b/profiler/advisor/advisor_backend/advice_factory/overall_advice_factory.py deleted file mode 100644 index baf80cc200f4c3cd1057b7fc28e750948a450cf1..0000000000000000000000000000000000000000 --- a/profiler/advisor/advisor_backend/advice_factory/overall_advice_factory.py +++ /dev/null @@ -1,32 +0,0 @@ -# Copyright (c) 2024, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from advice_factory.advice_factory import AdviceFactory -from common_func_advisor.constant import Constant -from overall_advice.overall_summary_advice import OverallSummaryAdvice - - -class OverallAdviceFactory(AdviceFactory): - ADVICE_LIB = { - Constant.SUMMARY: OverallSummaryAdvice - } - - def __init__(self, collection_path: str): - super().__init__(collection_path) - - def run_advice(self, advice: str, kwargs: dict): - """ - run advice to produce data - """ - return self.ADVICE_LIB.get(advice)(self.collection_path, kwargs).run() diff --git a/profiler/advisor/advisor_backend/advice_factory/timeline_advice_factory.py b/profiler/advisor/advisor_backend/advice_factory/timeline_advice_factory.py deleted file mode 100644 index 44b352e95a7bb1007bc7373193603c2a0b9d8b6c..0000000000000000000000000000000000000000 --- a/profiler/advisor/advisor_backend/advice_factory/timeline_advice_factory.py +++ /dev/null @@ -1,34 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from advice_factory.advice_factory import AdviceFactory -from common_func_advisor.constant import Constant -from timeline_advice.optimizer_advice import OptimizerAdvice -from timeline_advice.op_schedule_advice import OpScheduleAdvice - - -class TimelineAdviceFactory(AdviceFactory): - ADVICE_LIB = { - Constant.OPTIM: OptimizerAdvice, - Constant.OP_SCHE: OpScheduleAdvice, - } - - def __init__(self, collection_path: str): - super().__init__(collection_path) - - def run_advice(self, advice: str, kwargs: dict): - """ - run advice to produce data - """ - return self.ADVICE_LIB.get(advice)(self.collection_path).run() diff --git a/profiler/advisor/advisor_backend/cluster_advice/__init__.py b/profiler/advisor/advisor_backend/cluster_advice/__init__.py deleted file mode 100644 index 8400fd5ecd1246eaee795cebfccfacc80a94f08c..0000000000000000000000000000000000000000 --- a/profiler/advisor/advisor_backend/cluster_advice/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/profiler/advisor/advisor_backend/cluster_advice/cluster_advice_base.py b/profiler/advisor/advisor_backend/cluster_advice/cluster_advice_base.py deleted file mode 100644 index e9be4675963a9cd48da3b4cd91ee646f8e82468b..0000000000000000000000000000000000000000 --- a/profiler/advisor/advisor_backend/cluster_advice/cluster_advice_base.py +++ /dev/null @@ -1,67 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -from abc import abstractmethod -from common_func.constant import Constant -from advice_base import AdviceBase -from cluster_analysis import Interface - - -class ClusterAdviceBase(AdviceBase): - def __init__(self, collection_path: str): - super().__init__(collection_path) - - @staticmethod - def compute_max_gap_ratio(data: list, mean: float): - if mean == 0: - return 0 - else: - return (max(data) - min(data)) / mean - - def path_check(self): - """ - check whether input path is valid - """ - for file in os.listdir(self.collection_path): - if file == 'cluster_analysis_output': - print("[INFO]Cluster has been analyzed " - "because of the existence of cluster analysis output directory.") - print("[INFO]Skip Cluster analyze backend.") - return - print("[INFO] cluster analysis is in the process, please wait...") - self.cluster_analyze() - - def cluster_analyze(self): - parameter = { - Constant.COLLECTION_PATH: self.collection_path, - Constant.ANALYSIS_MODE: "all" - } - try: - Interface(parameter).run() - except Exception as e: - raise ValueError(f"Cluster analyze backend failed:{e}") from e - - @abstractmethod - def run(self): - """ - analyze profiling data and advice - """ - - @abstractmethod - def output(self): - """ - output relevant data - """ \ No newline at end of file diff --git a/profiler/advisor/advisor_backend/cluster_advice/cluster_pipeline_advice.py b/profiler/advisor/advisor_backend/cluster_advice/cluster_pipeline_advice.py deleted file mode 100644 index 7f8846f1d99e9bc81636df32d04148df99d12920..0000000000000000000000000000000000000000 --- a/profiler/advisor/advisor_backend/cluster_advice/cluster_pipeline_advice.py +++ /dev/null @@ -1,437 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import time -import multiprocessing -from typing import Dict -from typing import Optional -from typing import Deque -from typing import List -from typing import Tuple -from collections import defaultdict -from collections import deque -from decimal import Decimal -from dataclasses import dataclass - -from common_func.file_manager import FileManager -from common_func_advisor.constant import Constant -from common_func_advisor.trace_view_preprocessor import FineTraceViewData -from common_func_advisor.trace_view_preprocessor import TraceViewPreProcessor -from cluster_advice.cluster_advice_base import ClusterAdviceBase -from cluster_data_preprocess.pytorch_data_preprocessor import PytorchDataPreprocessor - - -@dataclass -class PipelineTimeSlice: - start: str = "" - end: str = "" - slice_type: str = "" - bp_timeslice: list = None - - def __post_init__(self): - self.bp_timeslice = self.bp_timeslice or [] - - -class PipelineTraceViewer: - STAGE_COLOR = "good" - BUBBLE_COLOR = "generic_work" - FP_COLOR = "good" - BP_COLOR = "bad" - PIPLINE_VIEW = "Pipeline View" - STAGE = "Stage" - BUBBLE = "Bubble" - FP = "FP" - BP = "BP" - - COLORS = { - STAGE: STAGE_COLOR, - BUBBLE: BUBBLE_COLOR, - FP: FP_COLOR, - BP: BP_COLOR - } - - def _gen_trace_pair(self, name: str, start_ts: str, end_ts: str, pid: str, tid: str) -> Dict: - data = { - Constant.OP_NAME: name, - Constant.CNAME: self.COLORS.get(name, self.BUBBLE), - Constant.PH: Constant.PH_X, - Constant.PID: pid, - Constant.OP_TID: tid, - Constant.TS: start_ts, - Constant.DUR: str(Decimal(end_ts) - Decimal(start_ts)) - } - - return data - - def gen_stage_bubble_trace_data(self, rank_id: int, timeslice_list: List[PipelineTimeSlice]) -> List[Dict]: - """ - generate stage bubble trace json data - """ - rank_str = f'Rank {rank_id}' - trace_data = [] - - for timeslice in timeslice_list: - data = self._gen_trace_pair(timeslice.slice_type, timeslice.start, - timeslice.end, self.PIPLINE_VIEW, rank_str) - trace_data.append(data) - - return trace_data - - def gen_fp_bp_trace_data(self, rank_id: int, timeslice_list: List[PipelineTimeSlice]) -> List[Dict]: - """ - generate fp bp trace json data - """ - rank_str = f'Rank {rank_id}' - trace_data = [] - - for timeslice in timeslice_list: - if timeslice.slice_type == self.BUBBLE: - data = self._gen_trace_pair(timeslice.slice_type, timeslice.start, - timeslice.end, self.PIPLINE_VIEW, rank_str) - trace_data.append(data) - else: - last_end = timeslice.start - for bp_bound in timeslice.bp_timeslice: - data = self._gen_trace_pair(self.FP, last_end, - bp_bound[0], self.PIPLINE_VIEW, rank_str) - trace_data.append(data) - last_end = bp_bound[1] - - data = self._gen_trace_pair(self.BP, bp_bound[0], - bp_bound[1], self.PIPLINE_VIEW, rank_str) - trace_data.append(data) - - last_data = self._gen_trace_pair(self.FP, last_end, - timeslice.end, self.PIPLINE_VIEW, rank_str) - trace_data.append(last_data) - - return trace_data - - -class ClusterPipelineAdvice(ClusterAdviceBase): - BUBBLE = "Bubble" - STAGE = "Stage" - PIPELINE_VIEW = "Pipeline View" - SAVE_JSON = "pipeline_view.json" - - def __init__(self, collection_path: str, kwargs: dict): - super().__init__(collection_path) - self.rank_ids = list(set(kwargs.get("rank_ids", []))) - self.worker_num = kwargs.get("worker_num", int(multiprocessing.cpu_count() / 2)) - self.rank_prof_dirs = {} - self.cur_data = [] - self.cur_bottleneck = {} - self.cur_advices = "" - - def run(self) -> dict: - """ - Unified entrance interface - """ - self.rank_prof_dirs = self.get_rank_prof_dirs(self.rank_ids) - if not self.rank_prof_dirs: - print("[ERROR] No rank profiling data found, please check the rank ids or dir path.") - return {} - - self.process() - self.output() - self.identify_bottleneck() - return self.output_format_data - - def process(self) -> None: - """ - process all rank profiling data by using multi-process - """ - start_time = time.time() - print(f"[INFO] Start to process {len(self.rank_prof_dirs)} rank profiling data with {self.worker_num} workers.") - with multiprocessing.Pool(self.worker_num) as pool: - results = pool.map(self.work, self.rank_prof_dirs.items()) - - for (rank_id, _), (res, show_fp_bp) in zip(self.rank_prof_dirs.items(), results): - if show_fp_bp: - self.cur_data += PipelineTraceViewer().gen_fp_bp_trace_data(rank_id, res) - else: - self.cur_data += PipelineTraceViewer().gen_stage_bubble_trace_data(rank_id, res) - print(f"[INFO] Pipline view data process finished, cost {time.time() - start_time:.2f}s.") - - @staticmethod - def _align_trace_bound(results: List) -> None: - """ - align all rank trace bound for better visualization - """ - start_list, end_list = [], [] - for res in results: - start_list.append(res[0].start) - end_list.append(res[-1].end) - - # update all rank trace bound - for res in results: - res[0].start = min(start_list) - res[-1].end = max(end_list) - - def work(self, kv: Tuple[int, str]) -> Tuple[List[PipelineTimeSlice], bool]: - """ - single process worker function - """ - show_fp_bp = False - rank_id, rank_prof_dir = kv - print(f"[INFO] [Rank {rank_id}] Start to process rank profiling data.") - json_path = os.path.join(rank_prof_dir, Constant.ASCEND_PROFILER_OUTPUT, Constant.TRACE_VIEW_JSON) - fine_data = self.load_trace_view_data(json_path) - if not fine_data.hcom_ops or not fine_data.hcom_tids: - print(f"[ERROR] [Rank {rank_id}] No hcom send recv ops found, make sure the trace view data is pipeline " - f"parallel sense.") - return [], show_fp_bp - - timeslice_list = self.get_pipeline_timeslice(fine_data.hcom_ops, fine_data.hcom_tids, fine_data.min_ts, - fine_data.max_ts) - if not fine_data.fp_ops or not fine_data.bp_ops: - print(f"[INFO] [Rank {rank_id}] No frameWork data in trace view, only show stage and bubble.") - elif len(fine_data.hcom_tids) > 1: - print(f"[WARN] [Rank {rank_id}] More than one hcom tid found, only show stage and bubble.") - else: - print(f"[INFO] [Rank {rank_id}] Found frameWork data in trace view, show fp bp and bubble.") - bp_ops = self.get_fp_bp_bound_ops(fine_data) - self.update_stage_fp_bp(timeslice_list, bp_ops) - show_fp_bp = True - print(f"[INFO] [Rank {rank_id}] Rank profiling data process finished.") - - return timeslice_list, show_fp_bp - - def identify_bottleneck(self) -> None: - pass - - def output(self) -> None: - """ - output result - """ - self.cur_data.append( - { - Constant.OP_NAME: Constant.PROCESS_NAME, - Constant.PH: Constant.PH_META, - Constant.PID: self.PIPELINE_VIEW, - Constant.OP_TID: self.PIPELINE_VIEW, - Constant.ARGS: { - Constant.OP_NAME: self.PIPELINE_VIEW - } - } - ) - self.output_format_data[self.DATA] = self.cur_data - self.output_format_data[self.BOTTLENECK] = self.cur_bottleneck - self.output_format_data[self.ADVICE] = self.cur_advices - - def get_rank_prof_dirs(self, rank_ids: list) -> Dict[int, str]: - """ - get rank profiling directories by rank ids - """ - rank_prof_dirs = defaultdict(str) - prof_dirs = [] - for prof_dir in os.listdir(self.collection_path): - if prof_dir.endswith(Constant.PT_PROF_SUFFIX): - prof_dirs.append(os.path.join(self.collection_path, prof_dir)) - - data_map = PytorchDataPreprocessor(prof_dirs).get_data_map() - for rank_id in rank_ids: - if rank_id in data_map: - rank_prof_dirs[rank_id] = data_map[rank_id] - else: - print(f'[Warning] Rank {rank_id} not found in {self.collection_path}') - - return rank_prof_dirs - - @staticmethod - def load_trace_view_data(json_path) -> Optional[FineTraceViewData]: - """ - load trace view data from json file and preprocess - """ - raw_data = FileManager.read_json_file(json_path) - return TraceViewPreProcessor().process(raw_data) - - @staticmethod - def double_queue_pop(fp_que: Deque[dict], bp_que: Deque[dict]) -> Tuple[list, list]: - """ - double queue (fp and bp que) pop alternating algorithm implementation - """ - res_fp_ops, res_bp_ops = [], [] - pop_fp = fp_que[0][Constant.TS] < bp_que[0][Constant.TS] - fp_start_op, fp_end_op = fp_que[0], fp_que[0] - bp_start_op, bp_end_op = bp_que[0], bp_que[0] - - def update_bound_op(que: Deque[dict], start_op: dict, end_op: dict) -> Tuple[dict, dict]: - """ - update fp and bp bound op - """ - op = que.popleft() - op_s = Decimal(op[Constant.TS]) - op_e = op_s + Decimal(op[Constant.DUR]) - - start_op = op if op_s < Decimal(start_op[Constant.TS]) else start_op - end_op = op if op_e > Decimal(end_op[Constant.TS]) + Decimal(end_op[Constant.DUR]) else end_op - - return start_op, end_op - - while fp_que and bp_que: - if pop_fp: - if len(fp_que) > 1 and bp_que and fp_que[1][Constant.TS] > bp_que[0][Constant.TS]: - pop_fp = False # pop bp que - if len(fp_que) == 1: - pop_fp = False # pop bp que - - fp_start_op, fp_end_op = update_bound_op(fp_que, fp_start_op, fp_end_op) - - # time to pop bp que, need to record fp ops and update bp start op - if not pop_fp: - res_fp_ops.append((fp_start_op, fp_end_op)) - if fp_que: - bp_start_op, bp_end_op = bp_que[0], bp_que[0] - else: - if len(bp_que) > 1 and fp_que and bp_que[1][Constant.TS] > fp_que[0][Constant.TS]: - pop_fp = True # pop fp que - if len(bp_que) == 1: - pop_fp = True # pop fp que - - bp_start_op, bp_end_op = update_bound_op(bp_que, bp_start_op, bp_end_op) - - # time to pop fp que, need to record bp ops and update fp start op - if pop_fp: - res_bp_ops.append((bp_start_op, bp_end_op)) - if bp_que: - fp_start_op, fp_end_op = fp_que[0], fp_que[0] - - if fp_que: - fp_start_op, fp_end_op = fp_que[0], fp_que[0] - while fp_que: - fp_start_op, fp_end_op = update_bound_op(fp_que, fp_start_op, fp_end_op) - res_fp_ops.append((fp_start_op, fp_end_op)) - - if bp_que: - bp_start_op, bp_end_op = bp_que[0], bp_que[0] - while bp_que: - bp_start_op, bp_end_op = update_bound_op(bp_que, bp_start_op, bp_end_op) - res_bp_ops.append((bp_start_op, bp_end_op)) - - return res_fp_ops, res_bp_ops - - @staticmethod - def update_ops_time(ops_list: List[List[dict]], torch_to_npu_links: List[dict], - npu_ops_ts_dur: dict) -> List[List[dict]]: - """ - update fp and bp bound ops time at device by using torch_to_npu_links - """ - ops_que = deque(ops_list) - torch_to_npu_que = deque(torch_to_npu_links) - res = [] - link_stack = [] - while ops_que and torch_to_npu_que: - link = torch_to_npu_que.popleft() - link_s = Decimal(link[Constant.TS]) - - # bound op at framework level - cpu_op_l, cpu_op_r = ops_que[0][0], ops_que[0][1] - cpu_op_s = Decimal(cpu_op_l[Constant.TS]) - cpu_op_e = Decimal(cpu_op_r[Constant.TS]) + Decimal(cpu_op_r[Constant.DUR]) - - if cpu_op_s < link_s < cpu_op_e: - link_stack.append(link) - if link_s > cpu_op_e or \ - (link_stack and not torch_to_npu_que): - min_link = link_stack[0] - max_link = link_stack[-1] - - min_link_s = str(min_link[Constant.ID]) - max_link_s = str(max_link[Constant.ID]) - # for compatibility with old data (ts is float type) - if isinstance(min_link[Constant.ID], float): - cpu_op_l["npu_op_ts"] = min_link_s - cpu_op_r["npu_op_ts"] = max_link_s - else: - cpu_op_l["npu_op_ts"] = f"{min_link_s[:-3]}.{min_link_s[-3:]}" - cpu_op_r["npu_op_ts"] = f"{max_link_s[:-3]}.{max_link_s[-3:]}" - cpu_op_l["npu_op_dur"] = npu_ops_ts_dur.get(cpu_op_l["npu_op_ts"], 0) - cpu_op_r["npu_op_dur"] = npu_ops_ts_dur.get(cpu_op_r["npu_op_ts"], 0) - - res.append([cpu_op_l, cpu_op_r]) - ops_que.popleft() - link_stack.clear() - - return res - - def get_fp_bp_bound_ops(self, fine_data: FineTraceViewData) -> List[List[dict]]: - """ - get fp and bp bound ops by using double queue alternating pop algorithm and - update fp and bp bound ops time at device by using torch_to_npu_links - """ - fp_que = deque(fine_data.fp_ops) - bp_que = deque(fine_data.bp_ops) - - # get fp and bp bound ops - _, res_bp_ops = self.double_queue_pop(fp_que, bp_que) - - # according to torch_to_npu_links, split fp and bp timeslice - bp_ops = self.update_ops_time(res_bp_ops, fine_data.torch_to_npu_links, fine_data.npu_ops_ts_dur) - return bp_ops - - def get_pipeline_timeslice(self, hcom_ops: list, hcom_tids: list, - min_ts: str, max_ts: str) -> List[PipelineTimeSlice]: - """ - get pipeline timeslice by using hcom ops - """ - timeslice_list = [] - last_op_end = None - if len(hcom_tids) > 1: - print("[WARN] More than one hcom tid found, default to show minimal tid pipeline view.") - - for op in hcom_ops: - if op[Constant.OP_TID] == min(hcom_tids): - # gap between two hcom ops - if last_op_end: - timeslice_list.append(PipelineTimeSlice(str(last_op_end), op[Constant.TS], self.STAGE)) - # hcom op - last_op_end = Decimal(op[Constant.TS]) + Decimal(op[Constant.DUR]) - timeslice_list.append(PipelineTimeSlice(op[Constant.TS], str(last_op_end), self.BUBBLE)) - - # add start STAGE and end STAGE - timeslice_list.insert(0, PipelineTimeSlice(min_ts, timeslice_list[0].start, self.STAGE)) - timeslice_list.insert(len(timeslice_list), PipelineTimeSlice(timeslice_list[-1].end, max_ts, self.STAGE)) - return timeslice_list - - def update_stage_fp_bp(self, timeslice_list: List[PipelineTimeSlice], - bp_ops: List[List[dict]]) -> None: - """ - update stage fp and bp time - """ - pipeline_que = deque(timeslice_list) - bp_bound_que = deque(bp_ops) - - while pipeline_que and bp_bound_que: - while pipeline_que[0].slice_type != self.STAGE: - pipeline_que.popleft() - if not pipeline_que: - return None - - bp_bound_data = bp_bound_que[0] - bp_bound_s = Decimal(bp_bound_data[0]['npu_op_ts']) - bp_bound_e = Decimal(bp_bound_data[1]['npu_op_ts']) + Decimal(bp_bound_data[1]['npu_op_dur']) - - pipeline_s = Decimal(pipeline_que[0].start) - pipeline_e = Decimal(pipeline_que[0].end) - - if pipeline_s <= bp_bound_s and bp_bound_e <= pipeline_e: - pipeline_que[0].bp_timeslice.append((str(bp_bound_s), str(bp_bound_e))) - bp_bound_que.popleft() - elif bp_bound_s > pipeline_e: - pipeline_que.popleft() - else: - bp_bound_que.popleft() diff --git a/profiler/advisor/advisor_backend/cluster_advice/kernel_cluster_advice.py b/profiler/advisor/advisor_backend/cluster_advice/kernel_cluster_advice.py deleted file mode 100644 index 6fa83c765f5fe1f4ac20dcc62895fe0450e338ce..0000000000000000000000000000000000000000 --- a/profiler/advisor/advisor_backend/cluster_advice/kernel_cluster_advice.py +++ /dev/null @@ -1,62 +0,0 @@ -import os -import pandas as pd -from common_func.path_manager import PathManager -from common_func.constant import Constant -from common_func_advisor.constant import Constant as AdvisorConstant -from cluster_advice.cluster_advice_base import ClusterAdviceBase -from cluster_data_preprocess.pytorch_data_preprocessor import PytorchDataPreprocessor - - -class KernelClusterAdvice(ClusterAdviceBase): - COLUMNS_TO_GROUP = ["Name", "Input Shapes", "Input Data Types", "Output Shapes"] - COLUMNS_TO_CAL = ["Duration(us)"] - CAL_FUN = ['mean', 'var', 'max', 'min', 'count', 'sum'] - - def __init__(self, collection_path: str, kwargs: dict = None): - super().__init__(collection_path) - self.all_kernel_data = pd.DataFrame() - - def run(self): - self.load_kernel_details_data() - return self.calculate_data() - - def load_kernel_details_data(self): - prof_dirs = self.get_prof_dirs(self.collection_path) - if not prof_dirs: - msg = "[ERROR] There is no profile in this collection path, terminate analysis." - raise RuntimeError(msg) - - data_map = PytorchDataPreprocessor(prof_dirs).get_data_map() - self.all_kernel_data = pd.DataFrame() - for rank_id, profiling_dir_path in data_map.items(): - kernel_file = os.path.join(profiling_dir_path, Constant.SINGLE_OUTPUT, Constant.KERNEL_DETAILS_CSV) - if kernel_file: - # 判断csv文件大小 - PathManager.check_path_readable(kernel_file) - # 读取CSV文件 - df_temp = pd.read_csv(kernel_file) - columns_to_keep = self.COLUMNS_TO_GROUP + self.COLUMNS_TO_CAL - if [1 for element in columns_to_keep if element not in list(df_temp)]: - msg = "[ERROR] Kernel details.csv has wrong data columns, terminate analysis." - raise RuntimeError(msg) - df = df_temp[columns_to_keep] - df.insert(loc=0, column='rank id', value=rank_id) - # 将数据添加到最终的数据框中 - self.all_kernel_data = pd.concat([self.all_kernel_data, df], ignore_index=True) - - def calculate_data(self): - # 存储所有合并后的数据 - calculate_dict = {self.COLUMNS_TO_CAL[i]: self.CAL_FUN - for i in range(len(self.COLUMNS_TO_CAL))} - group_col = ["rank id"] + self.COLUMNS_TO_GROUP - view_data = self.all_kernel_data.groupby(group_col).agg(calculate_dict).reset_index() - view_data.columns = [''.join(col) if col[1] == "" else '_'.join(col) for col in view_data.columns] - return view_data - - def get_prof_dirs(self, collection_path): - prof_dirs = [] - for prof_dir in os.listdir(collection_path): - if prof_dir.endswith(AdvisorConstant.PT_PROF_SUFFIX): - prof_dirs.append(os.path.join(collection_path, prof_dir)) - - return prof_dirs \ No newline at end of file diff --git a/profiler/advisor/advisor_backend/common_func_advisor/__init__.py b/profiler/advisor/advisor_backend/common_func_advisor/__init__.py deleted file mode 100644 index 8400fd5ecd1246eaee795cebfccfacc80a94f08c..0000000000000000000000000000000000000000 --- a/profiler/advisor/advisor_backend/common_func_advisor/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/profiler/advisor/advisor_backend/common_func_advisor/constant.py b/profiler/advisor/advisor_backend/common_func_advisor/constant.py deleted file mode 100644 index 46a7fb24c2dade75c157f18118f29233eb924b88..0000000000000000000000000000000000000000 --- a/profiler/advisor/advisor_backend/common_func_advisor/constant.py +++ /dev/null @@ -1,225 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from enum import Enum - - -class CsvTitle: - MODEL_NAME = "Model Name" - MODEL_ID = "Model ID" - TASK_ID = "Task ID" - STREAM_ID = "Stream ID" - INFER_ID = "Infer ID" - TASK_START_TIME = "Task Start Time(us)" - TASK_WAIT_TIME = "Task Wait Time(us)" - BLOCK_DIM = "Block Dim" - MIX_BLOCK_DIM = "Mix Block Dim" - HF32_ELIGIBLE = "HF32 Eligible" - INPUT_SHAPES = "Input Shapes" - INPUT_DATA_TYPES = "Input Data Types" - INPUT_FORMATS = "Input Formats" - OUTPUT_SHAPES = "Output Shapes" - OUTPUT_DATA_TYPES = "Output Data Types" - OUTPUT_FORMATS = "Output Formats" - CONTEXT_ID = "Context ID" - AICORE_TIME = "aicore_time(us)" - AIC_TOTAL_CYCLES = "aic_total_cycles" - AIC_MAC_TIME = "aic_mac_time(us)" - AIC_MAC_RATIO = "aic_mac_ratio" - AIC_SCALAR_TIME = "aic_scalar_time(us)" - AIC_SCALAR_RATIO = "aic_scalar_ratio" - AIC_MTE1_TIME = "aic_mte1_time(us)" - AIC_MTE1_RATIO = "aic_mte1_ratio" - AIC_MTE2_TIME = "aic_mte2_time(us)" - AIC_MTE2_RATIO = "aic_mte2_ratio" - AIC_FIXPIPE_TIME = "aic_fixpipe_time(us)" - AIC_FIXPIPE_RATIO = "aic_fixpipe_ratio" - AIC_ICACHE_MISS_RATE = "aic_icache_miss_rate" - AIV_TIME = "aiv_time(us)" - AIV_TOTAL_CYCLES = "aiv_total_cycles" - AIV_VEC_TIME = "aiv_vec_time(us)" - AIV_VEC_RATIO = "aiv_vec_ratio" - AIV_SCALAR_TIME = "aiv_scalar_time(us)" - AIV_SCALAR_RATIO = "aiv_scalar_ratio" - AIV_MTE2_TIME = "aiv_mte2_time(us)" - AIV_MTE2_RATIO = "aiv_mte2_ratio" - AIV_MTE3_TIME = "aiv_mte3_time(us)" - AIV_MTE3_RATIO = "aiv_mte3_ratio" - AIV_ICACHE_MISS_RATE = "aiv_icache_miss_rate" - CUBE_UTILIZATION = "cube_utilization( %)" - TASK_DURATION_SUM = "Task Duration Sum(us)" - TASK_DURATION_MEAN = "Task Duration Mean(us)" - TASK_DURATION_STD = "Task Duration Std(us)" - TASK_DURATION_RATIO = "Task Duration Ratio(100%)" - SIZE = "size(MB)" - THROUGHPUT = "throughput(GB/s)" - COLOR = "color" - GAP = "Gap(us)" - DURATION_SUM = "Duration Sum(us)" - COUNT = "Count" - MAX_DURATION = "Max Duration(us)" - MIN_DURATION = "Min Duration(us)" - AVG_DURATION = "Avg Duration(us)" - DURATION_RATIO = "Duration Ratio" - INDEX = "Index" - - -# 定义CSV_TITILE_V1类,继承自CSV_TITILE类, 适配旧版csv -class CsvTitleV1(CsvTitle): - OP_NAME = "Op Name" - OP_TYPE = "OP Type" - TASK_TYPE = "Task Type" - TASK_DURATION = "Task Duration(us)" - - -# 定义CSV_TITILE_V1类,继承自CSV_TITILE类, 适配新版csv -class CsvTitleV2(CsvTitle): - OP_NAME = "Name" - OP_TYPE = "Type" - TASK_TYPE = "Accelerator Core" - TASK_DURATION = "Duration(us)" - - -class Constant: - DTYPE_SIZE_MAP = {"int8": 1, "uint8": 1, - "int16": 2, "uint16": 2, - "int32": 4, "uint32": 4, - "int64": 8, "uint64": 8, - "float16": 2, - "bfloat16": 2, - "bf16": 2, - "dt_bf16": 2, - "float32": 4, - "float": 4, - "float64": 8, - "complex64": 8, - "complex128": 16, - "bool": 1} - TP_THRESHOLD = 1150 - MAX_INPUT_MODE_LEN = 30 - MAX_INPUT_ADVICE_LEN = 30 - SMALL_OP_DUR_RATIO = 0.2 - SMALL_OP_NUM_RATIO = 0.2 - BYTE_UNIT_TRANS = 1024 - UNIT_TRANS = 1000 - - # mode list - COMPUTE = "compute" - TIMELINE = "timeline" - CLUSTER = "cluster" - OVERALL = "overall" - PIPELINE = "pipeline" - - # advice list - SLOW_RANK = "slow rank" - SLOW_LINK = "slow link" - KERNEL = "kernel" - - # compute - NPU_FUSED = "npu_fused" - NPU_SLOW = "npu_slow" - - # timeline - OPTIM = "optimizer" - OP_SCHE = "op_schedule" - - # overall - SUMMARY = "summary" - - PT_PROF_SUFFIX = "ascend_pt" - ASCEND_PROFILER_OUTPUT = "ASCEND_PROFILER_OUTPUT" - COLLECTION_PATH = "collection_path" - CLUSTER_ANALYSIS_OUTPUT = "cluster_analysis_output" - KERNEL_DETAILS_CSV = "kernel_details.csv" - CLUSTER_STEP_TIME_CSV = "cluster_step_trace_time.csv" - CLUSTER_COMM_JSON = "cluster_communication.json" - - # pipline - OP_NAME = "name" - OP_TID = "tid" - PID = "pid" - TS = "ts" - DUR = "dur" - CAT = "cat" - ARGS = "args" - PH = "ph" - ID = "id" - PH_START = "s" - PH_BEGIN = "B" - PH_END = "E" - PH_META = "M" - PH_X = "X" - CNAME = "cname" - PROCESS_NAME = "process_name" - FRAMEWORK_NAME = "Python" - ASCEND_HARDWARE_NAME = "Ascend Hardware" - ASYNC_NPU = "async_npu" - STEP_PREFIX = "ProfilerStep#" - FP_ATEN_OP = "aten" - FP_C10D_OP = "c10d" - HCOM_OP_PREFIX = "hcom_" - BP_AUTOGRAD_OP = "autograd" - TRACE_VIEW_JSON = "trace_view.json" - - # pattern_dict key: pattern, value: pattern name - PATTERN_DICT = {("Add", "DropOutDoMask", "Add"): "bias_dropout_add", - ("BatchMatMul", "Mul", "Cast", "Mul", "MaskedFill", "SoftmaxV2", "Cast", "DropOutDoMask", - "AsStrided", "BatchMatMul", "Transpose"): "FA", - ("Transpose", "Transpose", "Transpose", "Mul", "Transpose", "BatchMatMulV2", "MaskedFill", - "Cast", "SoftmaxV2", "Cast", "DropOutDoMask", "BatchMatMulV2", "Transpose"): "FA", - ("Transpose", "BatchMatMulV2", "Transpose", "Transpose", "BatchMatMulV2", "ZerosLike", - "DropOutDoMask", "Cast", "SoftmaxGrad", "Cast", "MaskedFill", "BatchMatMulV2", - "BatchMatMulV2", "Mul"): "FA", - ("Cast", "Square", "ReduceMeanD", "Add", "Rsqrt", "Cast", "Cast", "Mul", "Cast", "Cast", - "Mul", "Cast"): "RMSNORM", - ("Cast", "LayerNorm", "Cast"): "LayerNorm", - ("Add", "LayerNorm"): "AddLayerNorm", - ("Add", "LayerNormV3"): "AddLayerNorm", - ("Gelu", "Add"): "GeluAdd", - ("Cast", "Square", "MemSet", "ReduceMean", "Add", "Rsqrt", "Mul", "Cast", "Mul"): "RMSNorm", - ("BatchMatMul", "RealDiv", "Add", "Maximum", "SoftmaxV2", "Cast", "BatchMatMul"): "FA", - ("BatchMatMulV2", "RealDiv", "Add", "Cast", "Maximum", "Cast", "SoftmaxV2", "AsStrided", - "BatchMatMulV2"): "FA", - ("BatchMatMulV2", "RealDiv", "Add", "Cast", "SoftmaxV2", "Cast", "BroadcastTo", - "BatchMatMulV2"): "FA", - ("Mul", "Slice", "Neg", "Slice", "ConcatD", "Cast", "Mul", "Add"): "RotaryMul", - ("Mul", "AsStrided", "Neg", "AsStrided", "ConcatD", "Mul", "Add"): "RotaryMul", - ("Mul", "Slice", "Neg", "Slice", "ConcatD", "Mul", "Add"): "RotaryMul", - ("MatMulV2", "Swish", "MatMulV2", "Mul", "MatMulV2"): "FFN", - ("Transpose", "Transpose", "GatherElement", "Transpose"): "GatherElement", - ("Slice", "Slice", "Swish", "Mul"): "torch_npu.npu_swiglu", - ("Cast", "Mul", "MaskedFill", "SoftmaxV2", "Cast"): "torch_npu.npu_scaled_masked_softmax", - ("Mul", "Slice", "Neg", "Slice", "ConcatD", "Mul"): "torch_npu.npu_rotary_mul", - ("Cast", "Square", "ReduceMeanD", "Add", "Rsqrt", "Mul", "Cast", "Mul"): "torch_npu.npu_rms_norm"} - TITLE = CsvTitleV2 - - @classmethod - def update_title(cls): - cls.TITLE = CsvTitleV1 - - -class CoreType: - AIV = "AI_VECTOR_CORE" - AIC = "AI_CORE" - AICPU = "AI_CPU" - MIX_AIV = "MIX_AIV" - MIX_AIC = "MIX_AIC" - HCCL = "HCCL" - - -class PerfColor(Enum): - WHITE = 0 - GREEN = 1 - YELLOW = 2 - RED = 3 diff --git a/profiler/advisor/advisor_backend/common_func_advisor/trace_view_json.py b/profiler/advisor/advisor_backend/common_func_advisor/trace_view_json.py deleted file mode 100644 index 8171f06ee235fc02da715044b4d310087c36c102..0000000000000000000000000000000000000000 --- a/profiler/advisor/advisor_backend/common_func_advisor/trace_view_json.py +++ /dev/null @@ -1,209 +0,0 @@ -# Copyright (c) 2024, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -from abc import abstractmethod -from dataclasses import dataclass -from dataclasses import field -from typing import Dict -from typing import List - -import pandas as pd - -from common_func.file_manager import FileManager - - -@dataclass -class TraceObj: - ph: str = "" - bp: str = "" - cat: str = "" - name: str = "" - pid: int = 0 - tid: int = 0 - id: int = 0 - ts: str = "" - dur: float = 0.0 - args: dict = field(default='unknown') - - @abstractmethod - def hash(self): - raise Exception("To be implemented") - - def valid(self): - return self.name != "" - - def check_hashable(self): - if not self.valid(): - raise Exception("Illegal {} to hash".format(self.__class__.name)) - - -@dataclass -class Process(TraceObj): - def hash(self): - self.check_hashable() - # msprof 保证name唯一性 - return self.args.get("name") - - -@dataclass -class Thread(TraceObj): - def hash(self): - self.check_hashable() - # msprof 保证name唯一性 - return self.args.get("name") - - -@dataclass -class DurationEvent(TraceObj): - def hash(self): - self.check_hashable() - return self.ts - - -@dataclass -class FlowEvent(TraceObj): - s_point_ts: str = "" - e_point_ts: str = "" - - def hash(self): - self.check_hashable() - return self.e_point_ts - - -class TraceViewJson: - - def __init__(self, path): - self.processes: Dict[str, Process] = dict() - self.threads: Dict[str, Thread] = dict() - self.python_dur_events: Dict[str, DurationEvent] = dict() - self.cann_dur_events: Dict[str, DurationEvent] = dict() - self.ascend_hardware_dur_events: Dict[str, DurationEvent] = dict() - self.torch_2_npu_flow_events: Dict[str, FlowEvent] = dict() - traces = FileManager.read_json_file(path) - self._load_obj(traces) - - def get_call_stack(self, data: pd.DataFrame, index_id: int, ts_col: str) -> str: - if ts_col not in data.columns.tolist(): - print("[ERROR] No {} col found in data columns.".format(ts_col)) - return "" - row = data.loc[index_id] - timestamp = row[ts_col] - flow_event = self.get_torch_2_npu_flow_event(timestamp) - if not flow_event.valid(): - print("[ERROR] Get flow event failed for pattern {}.".format(row['pattern'])) - return "" - flow_event_s_key = flow_event.s_point_ts - python_dur_events = self.get_python_dur_events_contain_ts(flow_event_s_key) - if not python_dur_events: - print("[ERROR] No python dur event found for pattern {}.".format(row['pattern'])) - return "" - # 保持新老版本callstack兼容性 - if python_dur_events[0].args.get("Call stack"): - # 旧版本 - call_stack_list = python_dur_events[0].args.get("Call stack").split(";") - else: - python_dur_events.sort(key=lambda e: e.ts) - # 新版本 - call_stack_list = [event.name for event in python_dur_events if event.cat == "python_function"] - call_stack = "\n".join(call_stack_list) - return call_stack - - def get_torch_2_npu_flow_event(self, end_time) -> FlowEvent: - if not self.torch_2_npu_flow_events or not self.torch_2_npu_flow_events.get(end_time): - print("[ERROR] Find flow event failed for ts: {}".format(end_time)) - return FlowEvent() - return self.torch_2_npu_flow_events.get(end_time) - - def get_python_dur_events_contain_ts(self, ts) -> List[DurationEvent]: - res = [] - for event in self.python_dur_events.values(): - if float(event.ts) <= float(ts) <= float(event.ts) + event.dur: - res.append(event) - return res - - def _load_obj(self, traces): - self._load_format(traces) - if not self._check_format(): - print("[ERROR] parse json failed for error format") - return - self._load_duration_events(traces) - self._load_torch_to_npu_flow_events(traces) - - def _check_format(self): - # 当前功能只需要这两个process,可扩展 - check_processes = ['Python', 'Ascend Hardware'] - for check_process in check_processes: - if check_process in self.processes: - continue - print("[ERROR] {} process not found in json.".format(check_process)) - return False - return True - - # 加载pid, tid头 - def _load_format(self, traces: List[Dict]): - for i, trace in enumerate(traces): - if trace.get('name') == 'process_name': - if not trace.get('args') or not trace.get('args').get('name') or not trace.get('pid'): - continue - process = Process(**trace) - self.processes[process.hash()] = process - if trace.get('name') == 'thread_name': - if not trace.get('args') or not trace.get('args').get('name') or not trace.get('tid'): - continue - thread = Thread(**trace) - self.threads[thread.hash()] = thread - - def _load_duration_events(self, traces: List[Dict]): - def check_events(_trace): - return _trace.get('name') and _trace.get("ts") and _trace.get("dur") - - python_pid = self.processes.get("Python").pid - cann_pid = self.processes.get("CANN").pid - ascend_hardware_pid = self.processes.get("Ascend Hardware").pid - for i, trace in enumerate(traces): - if trace.get('ph') != 'X': - continue - if not check_events(trace): - continue - event = DurationEvent(**trace) - if trace.get('pid') == python_pid: - self.python_dur_events[event.hash()] = event - elif trace.get('pid') == cann_pid: - self.cann_dur_events[event.hash()] = event - elif trace.get("pid") == ascend_hardware_pid: - self.ascend_hardware_dur_events[event.hash()] = event - - def _load_torch_to_npu_flow_events(self, traces: List[Dict]): - def check_events(_trace): - return _trace.get('name') and _trace.get("id") and _trace.get("ts") - - flow_events_table_by_id = dict() - - python_pid = self.processes.get("Python") - for i, trace in enumerate(traces): - if trace.get('ph') != 's' and trace.get('ph') != 'f' and trace.get('pid') != python_pid: - continue - if not check_events(trace): - continue - event = flow_events_table_by_id.get(trace.get("id")) - if not event: - event = FlowEvent(**trace) - if trace.get('ph') == 's': - event.s_point_ts = trace.get('ts') - else: - event.e_point_ts = trace.get('ts') - flow_events_table_by_id[event.id] = event - - self.torch_2_npu_flow_events = {eve.hash(): eve for eve in flow_events_table_by_id.values()} diff --git a/profiler/advisor/advisor_backend/common_func_advisor/trace_view_preprocessor.py b/profiler/advisor/advisor_backend/common_func_advisor/trace_view_preprocessor.py deleted file mode 100644 index 7b9baa32d9423a46bf93d563a6fabbbbb652aaf8..0000000000000000000000000000000000000000 --- a/profiler/advisor/advisor_backend/common_func_advisor/trace_view_preprocessor.py +++ /dev/null @@ -1,208 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import re -import sys -from typing import Optional -from dataclasses import dataclass - -from common_func_advisor.constant import Constant - - -@dataclass -class FineTraceViewData: - py_pid: int = -1 - fp_tid: int = -1 - bp_tid: int = -1 - ascend_pid: int = -1 - min_ts: str = str(sys.maxsize) - max_ts: str = "0" - hcom_tids: list = None - fp_ops: list = None - bp_ops: list = None - hcom_ops: list = None - npu_ops_ts_dur: dict = None - torch_to_npu_links: list = None - - def __post_init__(self): - self.hcom_tids = self.hcom_tids or [] - self.fp_ops = self.fp_ops or [] - self.bp_ops = self.bp_ops or [] - self.hcom_ops = self.hcom_ops or [] - self.npu_ops_ts_dur = self.npu_ops_ts_dur or {} - self.torch_to_npu_links = self.torch_to_npu_links or [] - - def sort(self): - self.fp_ops.sort(key=lambda x: x[Constant.TS]) - self.bp_ops.sort(key=lambda x: x[Constant.TS]) - self.hcom_ops.sort(key=lambda x: x[Constant.TS]) - self.torch_to_npu_links.sort(key=lambda x: x[Constant.TS]) - - -class TraceViewPreProcessor: - """ - Trace view data preprocess - """ - - @staticmethod - def _is_fp_op(op_name: str) -> bool: - """ - check whether op is fp op - """ - return op_name.startswith(Constant.FP_ATEN_OP) or op_name.startswith(Constant.FP_C10D_OP) - - @staticmethod - def _is_fp_data(data: dict, fp_tid: int, py_pid: int) -> bool: - """ - check whether data is valid fp data - """ - return data[Constant.OP_TID] == fp_tid and \ - Constant.TS in data and Constant.DUR in data and \ - not data[Constant.OP_NAME].startswith(Constant.STEP_PREFIX) and \ - data[Constant.PID] == py_pid - - @staticmethod - def _is_bp_op(op_name: str) -> bool: - """ - check whether op is bp op - """ - return op_name.startswith(Constant.BP_AUTOGRAD_OP) - - @staticmethod - def _is_bp_data(data: dict, bp_tid: int, py_pid: int) -> bool: - """ - check whether data is valid bp data - """ - return data[Constant.OP_TID] == bp_tid and \ - Constant.TS in data and Constant.DUR in data and \ - data[Constant.PID] == py_pid - - @staticmethod - def _is_torch_to_npu_link(data: dict, fp_tid: int) -> bool: - """ - check whether data is torch to npu link - """ - return Constant.CAT in data and data[Constant.CAT] == Constant.ASYNC_NPU and \ - data[Constant.PH] == Constant.PH_START and \ - data[Constant.PID] == fp_tid - - @staticmethod - def _is_send_recv_op(op_name: str) -> bool: - """ - check whether op is hcom send or recv op - """ - # eg: hcom_BatchSendRecv__101_0_1 - p1 = re.compile(r'hcom_\w+SendRecv__\d+') - # eg: hcom_send__101_0_1 - p2 = re.compile(r'hcom_send__\d+') - # eg: hcom_receive__101_0_1 - p3 = re.compile(r'hcom_receive__\d+') - return bool(p1.match(op_name)) or bool(p2.match(op_name)) or bool(p3.match(op_name)) - - @staticmethod - def _is_hcom_op(op_name: str) -> bool: - """ - check whether data is hcom data - """ - return op_name.startswith(Constant.HCOM_OP_PREFIX) - - @staticmethod - def _is_python_process(data: dict) -> bool: - """ - check whether data is python process - """ - return Constant.PH in data and data[Constant.PH] == Constant.PH_META and \ - data[Constant.OP_NAME] == Constant.PROCESS_NAME and \ - data[Constant.ARGS][Constant.OP_NAME] == Constant.FRAMEWORK_NAME - - @staticmethod - def _is_step_op(data: dict) -> bool: - """ - check whether data is step data - """ - return data[Constant.OP_NAME].startswith(Constant.STEP_PREFIX) - - @staticmethod - def _is_ascend_process(data: dict) -> bool: - """ - check whether data is ascend process data - """ - return Constant.PH in data and data[Constant.PH] == Constant.PH_META and \ - data[Constant.OP_NAME] == Constant.PROCESS_NAME and \ - data[Constant.ARGS][Constant.OP_NAME] == Constant.ASCEND_HARDWARE_NAME - - @staticmethod - def _is_npu_op(data: dict, ascend_pid: int) -> bool: - """ - check whether data is npu op - """ - return Constant.PH in data and data[Constant.PH] == Constant.PH_X and \ - not data[Constant.OP_NAME].isupper() and \ - data[Constant.PID] == ascend_pid - - def process(self, raw_data: list) -> Optional[FineTraceViewData]: - """ - preprocess raw data - """ - if not raw_data: - print("[ERROR] No raw data found in trace view data.") - return None - - raw_fp_tids, raw_bp_tids, raw_hcom_tids = set(), set(), set() - fine_data = FineTraceViewData() - - # counting fp ops and bp ops tid and ascend pid - for data in raw_data: - if self._is_fp_op(data[Constant.OP_NAME]): - raw_fp_tids.add(data[Constant.OP_TID]) - elif self._is_bp_op(data[Constant.OP_NAME]): - raw_bp_tids.add(data[Constant.OP_TID]) - elif self._is_send_recv_op(data[Constant.OP_NAME]): - fine_data.hcom_ops.append(data) - raw_hcom_tids.add(data[Constant.OP_TID]) - elif self._is_python_process(data): - fine_data.py_pid = data[Constant.PID] - elif self._is_ascend_process(data): - fine_data.ascend_pid = data[Constant.PID] - - # find max and min ts in hcom ops - if self._is_hcom_op(data[Constant.OP_NAME]): - # for compatibility with old data (ts is float type) - ts = data[Constant.TS] if not isinstance(data[Constant.TS], float) else str(data[Constant.TS]) - fine_data.min_ts = min(fine_data.min_ts, ts) - fine_data.max_ts = max(fine_data.max_ts, ts) - - unique_fp_tid = list(raw_fp_tids - raw_bp_tids) - unique_bp_tid = list(raw_bp_tids) - fine_data.hcom_tids = list(raw_hcom_tids) - - if not unique_fp_tid or not unique_bp_tid: - print("[INFO] No fp or bp tid found in trace view data.") - else: - fine_data.fp_tid, fine_data.bp_tid = unique_fp_tid[0], unique_bp_tid[0] - - # filter fp ops and bp ops and torch_to_npu_links - for data in raw_data: - if self._is_fp_data(data, fine_data.fp_tid, fine_data.py_pid): - fine_data.fp_ops.append(data) - elif self._is_bp_data(data, fine_data.bp_tid, fine_data.py_pid): - fine_data.bp_ops.append(data) - elif self._is_torch_to_npu_link(data, fine_data.fp_tid): - fine_data.torch_to_npu_links.append(data) - elif self._is_npu_op(data, fine_data.ascend_pid): - fine_data.npu_ops_ts_dur[data[Constant.TS]] = data[Constant.DUR] - - fine_data.sort() - return fine_data diff --git a/profiler/advisor/advisor_backend/compute_advice/__init__.py b/profiler/advisor/advisor_backend/compute_advice/__init__.py deleted file mode 100644 index 8400fd5ecd1246eaee795cebfccfacc80a94f08c..0000000000000000000000000000000000000000 --- a/profiler/advisor/advisor_backend/compute_advice/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/profiler/advisor/advisor_backend/compute_advice/compute_advice_base.py b/profiler/advisor/advisor_backend/compute_advice/compute_advice_base.py deleted file mode 100644 index cafbafd8e28c162bc76edb2f77ebd0645fed552f..0000000000000000000000000000000000000000 --- a/profiler/advisor/advisor_backend/compute_advice/compute_advice_base.py +++ /dev/null @@ -1,105 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from abc import abstractmethod -from collections import defaultdict -import os - -from advice_base import AdviceBase -from common_func.file_manager import FileManager - - -class ComputeAdviceBase(AdviceBase): - ASCEND_PT = 'ascend_pt' - ASCEND_PROFILER_OUTPUT = 'ASCEND_PROFILER_OUTPUT' - KERNEL_DETAIL_FILE = "kernel_details.csv" - TRACE_VIEW_FILE = "trace_view.json" - - def __init__(self, collection_path: str): - super().__init__(collection_path) - self.kernel_details_path = "" - self.has_preparse = False - self.preparse_data = defaultdict(list) - self.call_stack = None - self.trace_view_path = "" - - def path_check(self): - """ - check whether input path is valid - """ - if not os.path.exists(self.collection_path): - print("[ERROR] Path: {} is not exist.".format(self.collection_path)) - return False - if os.path.isdir(self.collection_path) and self.collection_path.endswith("ascend_pt"): - self.kernel_details_path = os.path.join(self.collection_path, "ASCEND_PROFILER_OUTPUT", - "kernel_details.csv") - if not os.path.exists(self.kernel_details_path): - print("[ERROR] kernel_details.csv is not exist in the Path: {}.".format( - os.path.join(self.collection_path, "ASCEND_PROFILER_OUTPUT"))) - return False - elif os.path.isfile(self.collection_path) and os.path.basename(self.collection_path) == "kernel_details.csv": - self.kernel_details_path = self.collection_path - else: - print("[ERROR] Please input ascend_pt or kernel_details.csv") - return False - print("[INFO] Start to analyse the target file: {}".format(self.kernel_details_path)) - self.preparse() - return True - - def has_callstack(self): - if self.call_stack is not None: - return self.call_stack - profiler_info_json_path = "" - for file in os.listdir(self.collection_path): - if file.startswith("profiler_info"): - profiler_info_json_path = os.path.join(self.collection_path, file) - break - if not profiler_info_json_path: - self.call_stack = False - return self.call_stack - self.trace_view_path = os.path.join(self.collection_path, self.ASCEND_PROFILER_OUTPUT, "trace_view.json") - if not os.path.exists(profiler_info_json_path) or not os.path.exists(self.trace_view_path): - self.call_stack = False - return self.call_stack - info = FileManager.read_json_file(profiler_info_json_path) - if not info.get("config") or not info.get("config").get("common_config") \ - or not info.get("config").get("common_config").get("with_stack"): - self.call_stack = False - return self.call_stack - activities = info.get("config").get("common_config").get("activities") - if not activities or "ProfilerActivity.CPU" not in activities: - self.call_stack = False - return self.call_stack - self.call_stack = info.get("config").get("common_config").get("with_stack") - return self.call_stack - - @abstractmethod - def run(self): - """ - analyze profiling data and advice - """ - - @abstractmethod - def output(self): - """ - output relevant data - """ - self.output_format_data[self.DATA] = self.cur_data - self.output_format_data[self.BOTTLENECK] = self.cur_bottleneck - self.output_format_data[self.ADVICE] = self.cur_advice - - def preparse(self): - if self.has_preparse: - return diff --git a/profiler/advisor/advisor_backend/compute_advice/npu_fused/__init__.py b/profiler/advisor/advisor_backend/compute_advice/npu_fused/__init__.py deleted file mode 100644 index 8400fd5ecd1246eaee795cebfccfacc80a94f08c..0000000000000000000000000000000000000000 --- a/profiler/advisor/advisor_backend/compute_advice/npu_fused/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/profiler/advisor/advisor_backend/compute_advice/npu_fused/csv_analyzer.py b/profiler/advisor/advisor_backend/compute_advice/npu_fused/csv_analyzer.py deleted file mode 100644 index c85c14d618ceda199c9c376abc27a3581eed97b8..0000000000000000000000000000000000000000 --- a/profiler/advisor/advisor_backend/compute_advice/npu_fused/csv_analyzer.py +++ /dev/null @@ -1,81 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import multiprocessing - -import pandas as pd -import numpy as np - -from common_func_advisor.constant import Constant -from .op_perf import OpPerfFactory - - -class CSVAnalyzer: - def __init__(self, path) -> None: - self._path = path - - def process(self): - df = pd.read_csv(self._path, dtype={"Start Time(us)": str}) - # 分析是否存在可融合的算子 - op_type_list = df["Type"].tolist() - duration_list = df["Duration(us)"].tolist() - start_times = df["Start Time(us)"].tolist() - # 去除末尾的\t分隔符 - start_times = [start_time[:-1] for start_time in start_times] - result_list = [] - for pattern in Constant.PATTERN_DICT.keys(): - result_list.extend(self.find_all_sub_lists(op_type_list, duration_list, start_times, pattern)) - data_frame = pd.DataFrame(result_list) - data_frame.columns = ["pattern_name", "pattern", "len", "count", "duration sum(us)", "op durations(us)", - "index", "first_timestamp"] - return data_frame - - @staticmethod - def find_all_sub_lists(op_type_list, duration_list, start_times, expect_sub_list): - # 创建一个空字典,用来存储子列表和它们的出现次数和起始位置 - len_sub_list = len(expect_sub_list) - expect_sub_list = tuple(expect_sub_list) - sublist_dict = {} - # 遍历列表,从每个位置开始,取长度为N的子列表 - for i in range(len(op_type_list) - len_sub_list + 1): - sublist = tuple(op_type_list[i:i + len_sub_list]) - if sublist != expect_sub_list: - continue - # 如果子列表已经在字典中,就增加它的出现次数,否则就初始化为1 - if sublist in sublist_dict: - # count - sublist_dict[sublist][0] += 1 - # index - sublist_dict[sublist][1].append(i) - # total duration - sublist_dict[sublist][2] += sum(duration_list[i:i + len_sub_list]) - # duration - zip_data = zip(sublist_dict[sublist][3], duration_list[i:i + len_sub_list]) - sublist_dict[sublist][3] = [a + b for a, b in zip_data] - else: - sublist_dict[sublist] = [1, [i], sum(duration_list[i:i + len_sub_list]), - duration_list[i:i + len_sub_list], len_sub_list, start_times[i]] - # 创建一个空列表,用来存储所有重复的子列表 - repeated_sublists = [] - for sublist, (count, index, duration_sum, op_durations, sublist_len, first_time) in sublist_dict.items(): - pattern_name = Constant.PATTERN_DICT.get(sublist, "unknown") - op_durations = [round(num, 2) for num in op_durations] - repeated_sublists.append([pattern_name, sublist, sublist_len, count, - duration_sum, op_durations, index, first_time]) - if len(sublist_dict) == 0: - pattern_name = Constant.PATTERN_DICT.get(expect_sub_list, "unknown") - repeated_sublists.append([pattern_name, expect_sub_list, 0, 0, 0, 0, 0, 0]) - # 返回所有重复的子列表 - return repeated_sublists diff --git a/profiler/advisor/advisor_backend/compute_advice/npu_fused/json_analyzer.py b/profiler/advisor/advisor_backend/compute_advice/npu_fused/json_analyzer.py deleted file mode 100644 index fd2a72ffa39bfde1b3e59450c6d76f51d98110d9..0000000000000000000000000000000000000000 --- a/profiler/advisor/advisor_backend/compute_advice/npu_fused/json_analyzer.py +++ /dev/null @@ -1,55 +0,0 @@ -# Copyright (c) 2024, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pandas as pd - -from common_func_advisor.trace_view_json import TraceViewJson - - -class JSONAnalyzer(object): - def __init__(self, path): - self._path = path - - def get_custom_code(self, data: pd.DataFrame, ts_col: str, output_col: str): - trace_json = TraceViewJson(self._path) - callstacks = pd.DataFrame(columns=[output_col]) - - for i, row in data.iterrows(): - if ts_col not in data.columns.tolist(): - print("[ERROR] No {} col found in data columns.".format(ts_col)) - return callstacks - timestamp = row[ts_col] - flow_event = trace_json.get_torch_2_npu_flow_event(timestamp) - if not flow_event.valid(): - print("[ERROR] Get flow event failed for pattern {}.".format(row['pattern'])) - callstacks.loc[i] = "" - continue - flow_event_s_key = flow_event.s_point_ts - python_dur_events = trace_json.get_python_dur_events_contain_ts(flow_event_s_key) - if not python_dur_events: - print("[ERROR] No python dur event found for pattern {}.".format(row['pattern'])) - callstacks.loc[i] = "" - continue - # 保持新老版本callstack兼容性 - if python_dur_events[0].args.get("Call stack"): - # 旧版本 - callstack = python_dur_events[0].args.get("Call stack").split(";") - else: - python_dur_events.sort(key=lambda e: e.ts) - # 新版本 - callstack = [event.name for event in python_dur_events if event.cat == "python_function"] - callstack_str = "\n".join(callstack) - callstacks.loc[i] = callstack_str - return callstacks diff --git a/profiler/advisor/advisor_backend/compute_advice/npu_fused/op_perf.py b/profiler/advisor/advisor_backend/compute_advice/npu_fused/op_perf.py deleted file mode 100644 index 7bcbed5a75807b57a55787c743cfaaff55a68589..0000000000000000000000000000000000000000 --- a/profiler/advisor/advisor_backend/compute_advice/npu_fused/op_perf.py +++ /dev/null @@ -1,196 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import functools -from typing import Dict - -from common_func_advisor.constant import Constant -from common_func_advisor.constant import CoreType -from common_func_advisor.constant import PerfColor - - -class OpPerfFactory: - @classmethod - def build(cls, op_row: Dict): - if op_row.get(Constant.TITLE.TASK_TYPE) == CoreType.AIV: - return VecOpPerf(op_row) - elif op_row.get(Constant.TITLE.TASK_TYPE) == CoreType.AIC: - return CubeOpPerf(op_row) - else: - return OpPerf(op_row) - - -class OpPerf: - def __init__(self, op_row: Dict): - if "OP Type" in op_row.keys(): - Constant.update_title() - self.row = op_row - self.model_name = op_row.get("Model Name") - self.model_id = op_row.get("Model ID") - self.task_id = op_row.get("Task ID") - self.stream_id = op_row.get("Stream ID") - self.infer_id = op_row.get("Infer ID") - self.op_name = op_row.get("Name") - self.op_type = op_row.get("Type") - self.task_type = op_row.get("Accelerator Core") - self.task_start_time = op_row.get("Start Time(us)") - self.task_duration = op_row.get("Duration(us)") - self.task_wait_time = op_row.get("Wait Time(us)") - self.block_dim = op_row.get("Block Dim") - self.mix_block_dim = op_row.get("Mix Block Dim") - - self.hf32_eligible = op_row.get("HF32 Eligible") - self.input_shapes = op_row.get("Input Shapes") - self.input_data_types = op_row.get("Input Data Types") - self.input_formats = op_row.get("Input Formats") - self.output_shapes = op_row.get("Output Shapes") - self.output_data_types = op_row.get("Output Data Types") - self.output_formats = op_row.get("Output Formats") - self.context_id = op_row.get("Context ID") - self.aicore_time = op_row.get("aicore_time(us)") - self.aic_total_cycles = op_row.get("aic_total_cycles") - - self.aic_mac_time = op_row.get("aic_mac_time(us)") - self.aic_mac_ratio = op_row.get("aic_mac_ratio") - self.aic_scalar_time = op_row.get("aic_scalar_time(us)") - self.aic_scalar_ratio = op_row.get("aic_scalar_ratio") - self.aic_mte1_time = op_row.get("aic_mte1_time(us)") - self.aic_mte1_ratio = op_row.get("aic_mte1_ratio") - self.aic_mte2_time = op_row.get("aic_mte2_time(us)") - self.aic_mte2_ratio = op_row.get("aic_mte2_ratio") - self.aic_fixpipe_time = op_row.get("aic_fixpipe_time(us)") - self.aic_fixpipe_ratio = op_row.get("aic_fixpipe_ratio") - self.aic_icache_miss_rate = op_row.get("aic_icache_miss_rate") - self.aiv_time = op_row.get("aiv_time(us)") - self.aiv_total_cycles = op_row.get("aiv_total_cycles") - self.aiv_vec_time = op_row.get("aiv_vec_time(us)") - self.aiv_vec_ratio = op_row.get("aiv_vec_ratio") - self.aiv_scalar_time = op_row.get("aiv_scalar_time(us)") - self.aiv_scalar_ratio = op_row.get("aiv_scalar_ratio") - self.aiv_mte2_time = op_row.get("aiv_mte2_time(us)") - - self.aiv_mte2_ratio = op_row.get("aiv_mte2_ratio") - self.aiv_mte3_time = op_row.get("aiv_mte3_time(us)") - self.aiv_mte3_ratio = op_row.get("aiv_mte3_ratio") - self.aiv_icache_miss_rate = op_row.get("aiv_icache_miss_rate") - self.cube_utilization = op_row.get("cube_utilization( %)") - - @staticmethod - def get_dtype_size(dtype_str: str): - return Constant.DTYPE_SIZE_MAP.get(dtype_str.lower(), 0) - - @staticmethod - def get_element_count(shape: list): - return functools.reduce(lambda x, y: int(x) * int(y), shape) - - @staticmethod - def shape_to_tuple(shape_str: str) -> tuple: - if not isinstance(shape_str, str): - return [] - shape_str = shape_str.strip('"') - split_shape = shape_str.strip(';') - if not split_shape: - return [] - pairs = split_shape.split(';') - shape_result = [] - for pair in pairs: - pair = pair.strip(";") - elements = pair.split(',') - elements = tuple(int(element) if "" != element else 0 for element in elements) - shape_result.append(elements) - return tuple(shape_result) - - @staticmethod - def dtype_to_tuple(dtypes_str: str) -> tuple: - if not isinstance(dtypes_str, str): - return [] - dtypes_str = dtypes_str.strip('"') - split_dtypes = dtypes_str.strip(';') - if not split_dtypes: - return [] - pairs = split_dtypes.split(';') - return tuple(pairs) - - def get_mac_ratio(self): - return self.aic_mac_ratio - - def get_size(self, shapes_str, dtypes_str): - shapes = self.shape_to_tuple(shapes_str) - dtypes = self.dtype_to_tuple(dtypes_str) - if len(shapes) > len(dtypes): - print(f"[ERROR] The size of shape is greater than that of dtypes.") - return 0 - if len(shapes) < len(dtypes): - shapes = list(shapes) - shapes.extend([(1,)] * (len(dtypes) - len(shapes))) - all_size = 0 - for index, shape in enumerate(shapes): - element_count = self.get_element_count(shape) - dtype_size = self.get_dtype_size(dtypes[index]) - all_size += element_count * dtype_size - return all_size - - def get_calc_size(self): - # input and output bytes (MB) - if not self.input_shapes or not self.output_shapes: - print("[ERROR] There is no tensor data, do not assess vector op performance.") - return 0 - intput_size = self.get_size(self.input_shapes, self.input_data_types) - output_size = self.get_size(self.output_shapes, self.output_data_types) - return (intput_size + output_size) / (Constant.BYTE_UNIT_TRANS * Constant.BYTE_UNIT_TRANS) - - def get_throughput(self): - # throughput(GB/s) - if not self.task_duration or abs(self.task_duration) < 1e-6: - print("[ERROR] There is no task_duration, do not assess vector op performance.") - return 0 - return self.row[Constant.TITLE.SIZE] / Constant.BYTE_UNIT_TRANS / self.task_duration * Constant.UNIT_TRANS * Constant.UNIT_TRANS - - def get_perf_color(self): - return PerfColor.WHITE - - def update(self): - self.row[Constant.TITLE.SIZE] = self.get_calc_size() - self.row[Constant.TITLE.THROUGHPUT] = self.get_throughput() - self.row[Constant.TITLE.COLOR] = self.get_perf_color().name - return self.row - - -class VecOpPerf(OpPerf): - def get_perf_color(self) -> PerfColor: - throughput = self.row[Constant.TITLE.THROUGHPUT] - op_duration = self.task_duration - tp_threshold = Constant.TP_THRESHOLD - if throughput == 0: - return PerfColor.WHITE - if throughput < tp_threshold / 2 and op_duration > 20: - return PerfColor.RED - elif tp_threshold / 2 <= throughput < tp_threshold: - return PerfColor.YELLOW - else: - return PerfColor.GREEN - - -class CubeOpPerf(OpPerf): - def get_perf_color(self) -> PerfColor: - aic_mac_ratio = self.get_mac_ratio() - if not aic_mac_ratio: - print("[WARNING] There is no aic_mac_ratio, do not assess cube op performance.") - return PerfColor.WHITE - elif aic_mac_ratio < 0.6: - return PerfColor.RED - elif 0.6 <= aic_mac_ratio < 0.8: - return PerfColor.YELLOW - else: - return PerfColor.GREEN diff --git a/profiler/advisor/advisor_backend/compute_advice/npu_fused_advice.py b/profiler/advisor/advisor_backend/compute_advice/npu_fused_advice.py deleted file mode 100644 index fd5610bbbbb98d15fbab22bb646b2dd7de36ac3d..0000000000000000000000000000000000000000 --- a/profiler/advisor/advisor_backend/compute_advice/npu_fused_advice.py +++ /dev/null @@ -1,71 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -from abc import ABC - -import pandas as pd - -from compute_advice.compute_advice_base import ComputeAdviceBase -from compute_advice.npu_fused.csv_analyzer import CSVAnalyzer -from compute_advice.npu_fused.json_analyzer import JSONAnalyzer - - -class NpuFusedAdvice(ComputeAdviceBase, ABC): - - def __init__(self, collection_path: str): - super().__init__(collection_path) - self.cur_data = dict() - self.cur_bottleneck = str() - self.cur_advice = str() - self.kernel_details_path = "" - self.call_stack = None - - def run(self): - if not self.path_check(): - return self.output_format_data - self.process() - self.output() - return self.output_format_data - - def process(self): - csv_analyzer = CSVAnalyzer(self.kernel_details_path) - all_pattern_data = csv_analyzer.process() - all_pattern_data = all_pattern_data.sort_values(by='duration sum(us)', ascending=False) - filter_data = all_pattern_data.get(all_pattern_data.get("duration sum(us)", 0) > 0) - if not self.has_callstack(): - print("[Warning] No call stack info found, advice will be incomplete") - self.cur_data = filter_data - else: - json_analyzer = JSONAnalyzer(self.trace_view_path) - custom_code = json_analyzer.get_custom_code(filter_data, "first_timestamp", "custom code") - self.cur_data = pd.concat([filter_data, custom_code], axis=1) - op_num = len(self.cur_data.index) - op_dur = filter_data["duration sum(us)"].sum() - if op_num > 0: - index = 0 - self.cur_bottleneck = f"The computing time of fusable op is {round(op_dur, 2)} ms." - self.cur_advice = "" - for _, row in self.cur_data.iterrows(): - advice = f"Advice {index}:\n" - cur_op = "[" + ", ".join(row.loc["pattern"]) + "]" - npu_fused_op = row.loc["pattern_name"] - advice += f"Replace {cur_op} with {npu_fused_op}. " - if self.call_stack: - advice += f"This pattern first happened in: \n{row['custom code']}" - if index != op_num - 1: - advice += "\n" - index += 1 - self.cur_advice += advice diff --git a/profiler/advisor/advisor_backend/compute_advice/npu_slow_advice.py b/profiler/advisor/advisor_backend/compute_advice/npu_slow_advice.py deleted file mode 100644 index caff1c792c2171c33a4dd876b0741d6c215c5766..0000000000000000000000000000000000000000 --- a/profiler/advisor/advisor_backend/compute_advice/npu_slow_advice.py +++ /dev/null @@ -1,82 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from abc import ABC -import multiprocessing - -import pandas as pd - -from compute_advice.compute_advice_base import ComputeAdviceBase -from compute_advice.npu_fused.op_perf import OpPerfFactory -from common_func_advisor.constant import Constant -from common_func_advisor.constant import PerfColor -from advisor_backend.common_func_advisor.trace_view_json import TraceViewJson - - -class NpuSlowAdvice(ComputeAdviceBase, ABC): - OP_PERF_SHEET = "op_perf" - - def __init__(self, collection_path: str): - super().__init__(collection_path) - self.kernel_details_path = "" - self.data = pd.DataFrame() - - @staticmethod - def save_to_excel(data: pd.DataFrame, file_path: str) -> None: - writer = pd.ExcelWriter(file_path, engine="xlsxwriter", mode="w") - data.index.name = Constant.TITLE.INDEX - data.to_excel(writer, index=True, sheet_name=NpuSlowAdvice.OP_PERF_SHEET) - NpuSlowAdvice.color_sheet(data, writer.book, writer.sheets[NpuSlowAdvice.OP_PERF_SHEET]) - writer.sheets[NpuSlowAdvice.OP_PERF_SHEET].freeze_panes = "A2" - writer.close() - - @staticmethod - def color_sheet(data: pd.DataFrame, workbook, worksheet): - color_rgb = { - PerfColor.GREEN.name: workbook.add_format({'bg_color': '#C6EFCE'}), - PerfColor.YELLOW.name: workbook.add_format({'bg_color': '#FFEB9C'}), - PerfColor.RED.name: workbook.add_format({'bg_color': '#FFC7CE'}), - } - for row in data.iterrows(): - color = row[1][Constant.TITLE.COLOR] - fill_format = color_rgb.get(color) - if not fill_format: - continue - worksheet.set_row(row[0] + 1, None, fill_format) - - @staticmethod - def update_op_row(row: tuple): - return OpPerfFactory.build(row[1]).update() - - def get_call_stack(self, data: pd.DataFrame, index_id: int, ts_col: str) -> str: - if not self.has_callstack(): - print("There is no call stack info, please set 'with_stack=True'") - return "" - trace_json = TraceViewJson(self.trace_view_path) - return trace_json.get_call_stack(data, index_id, ts_col) - - def run(self): - if not self.path_check(): - return self.data - self.process() - return self.data - - def process(self): - self.data = pd.read_csv(self.kernel_details_path, dtype={"Start Time(us)": str}) - # 去除末尾的\t分隔符 - self.data["Start Time(us)"] = self.data["Start Time(us)"].apply(lambda x: x[:-1]) - pool = multiprocessing.Pool(multiprocessing.cpu_count()) - result = pool.map(self.update_op_row, self.data.iterrows()) - pool.close() - self.data = pd.DataFrame(result) diff --git a/profiler/advisor/advisor_backend/interface.py b/profiler/advisor/advisor_backend/interface.py deleted file mode 100644 index 3e20c26d4d7bb000b20c28439b28ddf4811f057f..0000000000000000000000000000000000000000 --- a/profiler/advisor/advisor_backend/interface.py +++ /dev/null @@ -1,62 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import sys - -sys.path.append( - os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "advisor_backend")) -sys.path.append( - os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))), "compare_tools")) -sys.path.append( - os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))), "cluster_analyse")) -from common_func_advisor.constant import Constant -from advisor_backend.advice_factory.cluster_advice_factory import ClusterAdviceFactory -from advisor_backend.advice_factory.compute_advice_factory import ComputeAdviceFactory -from advisor_backend.advice_factory.timeline_advice_factory import TimelineAdviceFactory -from advisor_backend.advice_factory.overall_advice_factory import OverallAdviceFactory - - -class Interface: - def __init__(self, collection_path: str): - self.collection_path = os.path.realpath(collection_path) - self._factory_controller = FactoryController(collection_path) - - def get_data(self: any, mode: str, advice: str, **kwargs): - if len(mode) > Constant.MAX_INPUT_MODE_LEN or len(advice) > Constant.MAX_INPUT_ADVICE_LEN: - msg = '[ERROR]Input Mode is illegal.' - raise RuntimeError(msg) - factory = self._factory_controller.create_advice_factory(mode, kwargs.get("input_path", "")) - return factory.produce_advice(advice, kwargs) - - -class FactoryController: - FACTORY_LIB = { - Constant.CLUSTER: ClusterAdviceFactory, - Constant.COMPUTE: ComputeAdviceFactory, - Constant.TIMELINE: TimelineAdviceFactory, - Constant.OVERALL: OverallAdviceFactory - } - - def __init__(self, collection_path: str): - self.collection_path = os.path.realpath(collection_path) - self.temp_input_path = None - - def create_advice_factory(self, mode: str, input_path: str): - collection_path = input_path if input_path else self.collection_path - return self.FACTORY_LIB.get(mode)(collection_path) - - -if __name__ == "__main__": - Interface() diff --git a/profiler/advisor/advisor_backend/prof_bean_advisor/__init__.py b/profiler/advisor/advisor_backend/prof_bean_advisor/__init__.py deleted file mode 100644 index 8400fd5ecd1246eaee795cebfccfacc80a94f08c..0000000000000000000000000000000000000000 --- a/profiler/advisor/advisor_backend/prof_bean_advisor/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/profiler/advisor/advisor_backend/timeline_advice/__init__.py b/profiler/advisor/advisor_backend/timeline_advice/__init__.py deleted file mode 100644 index 8400fd5ecd1246eaee795cebfccfacc80a94f08c..0000000000000000000000000000000000000000 --- a/profiler/advisor/advisor_backend/timeline_advice/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/profiler/advisor/advisor_backend/timeline_advice/op_schedule_advice.py b/profiler/advisor/advisor_backend/timeline_advice/op_schedule_advice.py deleted file mode 100644 index 9e492b2156c6faee6c023206f3cfc4f852eeb547..0000000000000000000000000000000000000000 --- a/profiler/advisor/advisor_backend/timeline_advice/op_schedule_advice.py +++ /dev/null @@ -1,89 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from decimal import Decimal -from common_func_advisor.constant import Constant -from timeline_advice.timeline_advice_base import TimelineAdviceBase - - -class OpScheduleAdvice(TimelineAdviceBase): - def __init__(self, collection_path: str): - super().__init__(collection_path) - self.cur_data = list() - self.cur_bottleneck = str() - self.cur_advice = str() - - def run(self): - if not self.path_check(): - return self.output_format_data - self.preparse() - self.process() - self.output() - return self.output_format_data - - def process(self): - cpt_data = self.preparse_data[self.PREPARSE_TYPE.OVERLAP_CPT] - free_data = self.preparse_data[self.PREPARSE_TYPE.OVERLAP_FREE] - if not cpt_data or not free_data: - print("[ERROR] Fail to find Overlap data.") - return - - op_dur = [entry.get("dur", 0) for entry in cpt_data] - op_free = [0.0] * len(cpt_data) - merge_data = list() - merge_data.extend(cpt_data) - merge_data.extend(free_data) - merge_data.sort(key=lambda x : Decimal(x.get("ts"))) - idx = free_idx = 0 - while idx < len(merge_data) and free_idx < len(op_free): - entry = merge_data[idx] - entry_name = entry.get("name") - if entry_name == 'Free': - op_free[free_idx] = merge_data[idx].get('dur') - elif entry_name == 'Computing': - free_idx += 1 - idx += 1 - self.cur_data.append(op_dur) - self.cur_data.append(op_free) - free_ratio, cpt_ratio, _ = self.get_ratio() - if free_ratio < 0.2: - return - self.cur_bottleneck = f"NPU Utilication: {round(free_ratio * 100, 2)}%, " \ - f"NPU Free Utilization: {round(cpt_ratio * 100, 2)}%." - if len(self.preparse_data[self.PREPARSE_TYPE.SYNCHRONIZE]) > 1: - self.cur_advice = f"Device synchronize {len(self.preparse_data[self.PREPARSE_TYPE.SYNCHRONIZE])} times, " \ - "try to reduce synchronization statements to alleviate the bottleneck of operator delivery.\n" - small_op_num = self.small_op_block(op_free, op_dur) - small_op_ratio = small_op_num / len(op_dur) if op_dur else 0.0 - if small_op_ratio > Constant.SMALL_OP_NUM_RATIO: - self.cur_advice += "There are too many small operators, you can increase the batch size appropriately." - - def small_op_block(self, op_frees, op_durs): - small_op_num = 0 - for op_free, op_dur in zip(op_frees, op_durs): - if op_free > op_dur * Constant.SMALL_OP_DUR_RATIO: - small_op_num += 1 - return small_op_num - - def get_ratio(self): - cpt_data = self.preparse_data[self.PREPARSE_TYPE.OVERLAP_CPT] - free_data = self.preparse_data[self.PREPARSE_TYPE.OVERLAP_FREE] - cmu_data = self.preparse_data[self.PREPARSE_TYPE.OVERLAP_CMU] - cpt_time = sum([x.get("dur", 0) for x in cpt_data]) - free_time = sum([x.get("dur", 0) for x in free_data]) - cmu_time = sum([x.get("dur", 0) for x in cmu_data]) - total_time = cpt_time + free_time + cmu_time - if total_time > 0.0: - return (free_time / total_time, cpt_time / total_time, cmu_time / total_time) - return (0.0, 0.0, 0.0) diff --git a/profiler/advisor/advisor_backend/timeline_advice/optimizer_advice.py b/profiler/advisor/advisor_backend/timeline_advice/optimizer_advice.py deleted file mode 100644 index dee2e7ba563d0d00b4459333dffb4099dee9240a..0000000000000000000000000000000000000000 --- a/profiler/advisor/advisor_backend/timeline_advice/optimizer_advice.py +++ /dev/null @@ -1,55 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from timeline_advice.timeline_advice_base import TimelineAdviceBase - - -class OptimizerAdvice(TimelineAdviceBase): - OPTIMIZER_MAP = { - "Optimizer.step#SGD.step": "torch_npu.optim.NpuFusedSGD", - "Optimizer.step#Adadelta.step": "torch_npu.optim.NpuFusedAdadelta", - "Optimizer.step#Lamb.step": "torch_npu.optim.NpuFusedLamb", - "Optimizer.step#Adam.step": "torch_npu.optim.NpuFusedAdam", - "Optimizer.step#AdamW.step": "torch_npu.optim.NpuFusedAdamW", - "Optimizer.step#AdamP.step": "torch_npu.optim.NpuFusedAdamP", - "Optimizer.step#BertAdam.step": "torch_npu.optim.NpuFusedBertAdam", - "Optimizer.step#RMSprop.step": "torch_npu.optim.NpuFusedRMSprop", - "Optimizer.step#RMSpropTF.step": "torch_npu.optim.NpuFusedRMSpropTF", - } - - def __init__(self, collection_path: str): - super().__init__(collection_path) - self.cur_data = list() - self.cur_bottleneck = str() - self.cur_advice = str() - - def run(self): - if not self.path_check(): - return self.output_format_data - self.preparse() - self.process() - self.output() - return self.output_format_data - - def process(self): - if not self.preparse_data[self.PREPARSE_TYPE.OPTIMIZER]: - return - - self.cur_data = list(set([entry.get("name", None) for entry in self.preparse_data[self.PREPARSE_TYPE.OPTIMIZER]])) - for index, opt_name in enumerate(self.cur_data): - self.cur_advice += f"You can choose {self.OPTIMIZER_MAP.get(opt_name)} to replace the current Optimizer: {opt_name}." - if index != len(self.cur_data) - 1: - self.cur_advice += "\n" - self.cur_bottleneck = self.cur_advice diff --git a/profiler/advisor/advisor_backend/timeline_advice/timeline_advice_base.py b/profiler/advisor/advisor_backend/timeline_advice/timeline_advice_base.py deleted file mode 100644 index 4c7ac96cd22673741accd6bb2abb463566a2e652..0000000000000000000000000000000000000000 --- a/profiler/advisor/advisor_backend/timeline_advice/timeline_advice_base.py +++ /dev/null @@ -1,99 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from abc import abstractmethod -from collections import defaultdict -import json -import os - -from advice_base import AdviceBase -from common_func.file_manager import FileManager - - -class TimelineAdviceBase(AdviceBase): - class PREPARSE_TYPE: - OPTIMIZER = 0 - STEP = 1 - OVERLAP_CPT = 2 - OVERLAP_FREE = 3 - OVERLAP_CMU = 4 - ENQUEUE = 5 - DEQUEUE = 6 - HOST_TO_DEVICE = 7 - SYNCHRONIZE = 8 - - def __init__(self, collection_path: str): - super().__init__(collection_path) - self.trace_view_path = "" - self.has_preparse = False - self.preparse_data = defaultdict(list) - self.entry_map = { - 'Computing': self.PREPARSE_TYPE.OVERLAP_CPT, - 'Free': self.PREPARSE_TYPE.OVERLAP_FREE, - 'AscendCL@aclrtSynchronizeDevice': self.PREPARSE_TYPE.SYNCHRONIZE - } - - def path_check(self): - """ - check whether input path is valid - """ - if not os.path.exists(self.collection_path): - print("[ERROR] Path: {} is not exist.".format(self.collection_path)) - return False - if os.path.isdir(self.collection_path) and self.collection_path.endswith("ascend_pt"): - self.trace_view_path = os.path.join(self.collection_path, "ASCEND_PROFILER_OUTPUT", "trace_view.json") - if not os.path.exists(self.trace_view_path): - print("[ERROR] trace_view.json is not exist in the Path: {}.".format(os.path.join(self.collection_path, "ASCEND_PROFILER_OUTPUT"))) - return False - elif os.path.isfile(self.collection_path) and os.path.basename(self.collection_path) == "trace_view.json": - self.trace_view_path = self.collection_path - else: - print("[ERROR] Please input ascend_pt or trace_view.json.") - return False - print("[INFO] Start to analyse the target file: {}".format(self.trace_view_path)) - return True - - @abstractmethod - def run(self): - """ - analyze profiling data and advice - """ - - @abstractmethod - def output(self): - """ - output relevant data - """ - self.output_format_data[self.DATA] = self.cur_data - self.output_format_data[self.BOTTLENECK] = self.cur_bottleneck - self.output_format_data[self.ADVICE] = self.cur_advice - - def preparse(self): - if self.has_preparse: - return - json_reader = FileManager.read_json_file(self.trace_view_path) - if not isinstance(json_reader, list): - return - for entry in json_reader: - name = entry.get("name", None) - if not name: - continue - if name.startswith("Optimizer.step#") and name.endswith(".step"): - self.preparse_data[self.PREPARSE_TYPE.OPTIMIZER].append(entry) - elif name.startswith("ProfilerStep#"): - self.preparse_data[self.PREPARSE_TYPE.STEP].append(entry) - elif name in self.entry_map: - self.preparse_data[self.entry_map[name]].append(entry) - self.has_preparse = True diff --git a/profiler/advisor/advisor_backend/overall_advice/__init__.py b/profiler/advisor/analyzer/__init__.py similarity index 100% rename from profiler/advisor/advisor_backend/overall_advice/__init__.py rename to profiler/advisor/analyzer/__init__.py diff --git a/profiler/advisor/analyzer/base_analyzer.py b/profiler/advisor/analyzer/base_analyzer.py new file mode 100644 index 0000000000000000000000000000000000000000..e2f3abc53723db5834f1c48c2c8a130213a69eac --- /dev/null +++ b/profiler/advisor/analyzer/base_analyzer.py @@ -0,0 +1,95 @@ +import logging +from functools import wraps +from typing import Dict, List, Union +from abc import abstractmethod, ABCMeta + +from profiler.advisor.common import constant +from profiler.advisor.common.version_control import VersionControl +from profiler.advisor.dataset.dataset import Dataset +from profiler.advisor.result.result import OptimizeResult +from profiler.advisor.display.html.render import HTMLRender + +logger = logging.getLogger() + + +class BaseAnalyzer(VersionControl, metaclass=ABCMeta): + _SUPPORT_VERSIONS = constant.SUPPORTED_CANN_VERSION + + dataset_cls_list = [] + + def __init__(self, collection_path, n_processes: int = 1, cann_version=constant.DEFAULT_CANN_VERSION, + torch_version=constant.DEFAULT_TORCH_VERSION, **kwargs): + self.n_processes = n_processes + self.cann_version = cann_version + self.torch_version = torch_version + self.html_render = HTMLRender() + self.collection_path = collection_path + self.kwargs = kwargs + self.dataset_list: Dict[str, List[Dataset]] = {} + self.init_dataset_list() + self.result = OptimizeResult() + self.record_list: Dict[str, List] = {} + + @classmethod + def check_data(cls, data_list: tuple): + """ + check if all data in data list is contained + :param data_list: data list to check + :return: func ptr if check success + """ + + def decorate(func): + + @wraps(func) + def wrapper(self): + data = self.dataset_list + if data is None: + return None + for data_key in data_list: + if data_key not in data: + return None + + logger.info("Enable analysis %s with %s", self.__class__.__name__, ",".join(data_list)) + return func(self) + + return wrapper + + return decorate + + @abstractmethod + def optimize(self): + pass + + @abstractmethod + def make_record(self): + pass + + @abstractmethod + def make_render(self): + pass + + def init_dataset_list(self)->None: + dataset_cls_list = self.dataset_cls_list + if len(dataset_cls_list) == 0: + logger.warning(f"Analyser: %s don't rely on any dataset!", self.__class__.__name__) + return + + for dataset_cls in dataset_cls_list: + if dataset_cls and callable(dataset_cls): + dataset = dataset_cls(collection_path=self.collection_path, data=self.dataset_list, **self.kwargs) + key = dataset_cls.get_key() + if key not in self.dataset_list: + self.dataset_list[key] = [] + self.dataset_list[key].append(dataset) + + @staticmethod + def get_first_data_by_key(data, key) -> Union[Dataset, None]: + """ + get the first member from data with key + :param data: input data + :param key: data key + :return: the first dataset in dataset list + """ + if key in data and len(data[key]) > 0: + return data[key][0] + return None diff --git a/profiler/advisor/analyzer/cluster/__init__.py b/profiler/advisor/analyzer/cluster/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/profiler/advisor/analyzer/cluster/slow_link_analyser.py b/profiler/advisor/analyzer/cluster/slow_link_analyser.py new file mode 100644 index 0000000000000000000000000000000000000000..52da3965f6c3a5384a2e0bb8718845bb6f56cf22 --- /dev/null +++ b/profiler/advisor/analyzer/cluster/slow_link_analyser.py @@ -0,0 +1,117 @@ +# Copyright (c) 2023, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from collections import defaultdict +from typing import Dict, List +from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer +from profiler.advisor.common import constant +from profiler.advisor.result.result import OptimizeResult +from profiler.advisor.result.item import OptimizeItem, OptimizeRecord +from profiler.advisor.dataset.cluster.cluster_dataset import ClusterCommunicationDataSet + + +class SlowLinkAnalyzer(BaseAnalyzer): + RDMA_TIME_MS = "RDMA time(ms)" + RDMA_SIZE_MB = "RDMA size(mb)" + SDMA_TIME_MS = "SDMA time(ms)" + SDMA_SIZE_MB = "SDMA size(mb)" + RDMA_BANDWIDTH = "RDMA bandwidth(GB/s)" + SDMA_BANDWIDTH = "SDMA bandwidth(GB/s)" + COMMUNICATION_BANDWIDTH_INFO = "Communication Bandwidth Info" + TRANSIT_TIME = "Transit Time(ms)" + TRANSIT_SIZE = "Transit Size(MB)" + SDMA = "SDMA" + RDMA = "RDMA" + SLOW_LINK_ANALYSIS = "slow_link_analysis" + dataset_cls_list = [ClusterCommunicationDataSet] + + def __init__(self, collection_path, n_processes: int = 1, cann_version=constant.DEFAULT_CANN_VERSION, + torch_version=constant.DEFAULT_TORCH_VERSION, **kwargs): + super().__init__(collection_path, n_processes, cann_version, torch_version, **kwargs) + key = ClusterCommunicationDataSet.get_key() + self.communication_data_class = self.get_first_data_by_key(self.dataset_list, key) + self.rank_bw_dict = self.communication_data_class.get_data() + self.result = OptimizeResult() + self.bottelneck = '' + self.suggestion = '' + self.format_datas = [] + + def optimize(self, **kwargs): + if self.rank_bw_dict is None: + print("slow_link 分析失败,原因是数据加载失败,请检查你的cluster_analysis_outpu文件夹, \ + 如不关心这类数据请忽略") + return self.result + self.process() + self.format_datas = self.format_details() + self.make_record() + self.make_render() + return self.result + + def process(self): + if self.rank_bw_dict: + self.produce_bottleneck(self.RDMA_BANDWIDTH) + self.produce_bottleneck(self.SDMA_BANDWIDTH) + + def produce_bottleneck(self, link_type: str): + data_list = [rank_dict.get(link_type, 0) for rank_id, rank_dict in self.rank_bw_dict.items()] + avg_bw = round(sum(data_list) / len(data_list), 3) + if avg_bw == 0: + return + self.bottelneck += f'{link_type}: \n' \ + f'The average is {avg_bw}, ' \ + f'while the maximum is {round(max(data_list), 3)}GB/s and ' \ + f'the minimum is {round(min(data_list), 3)}GB/s. ' \ + f'the difference is {round(max(data_list) - min(data_list), 3)}GB/s. \n' + + def format_details(self): + details_dict = {} + headers = ['rank_id'] + list(self.rank_bw_dict[0].keys()) + data_list = [] + for rank_id, rank_bw in self.rank_bw_dict.items(): + data_list.append([rank_id] + list(rank_bw.values())) + + details_dict["headers"] = headers + details_dict["data"] = data_list + + return details_dict + + def make_record(self): + """ + make record for what and how to optimize + """ + optimization_item = OptimizeItem( + SlowLinkAnalyzer.SLOW_LINK_ANALYSIS, + self.bottelneck, + [""] + ) + self.result.add(OptimizeRecord(optimization_item)) + + for i, data in enumerate(self.format_datas["data"]): + self.result.add_detail(SlowLinkAnalyzer.SLOW_LINK_ANALYSIS, self.format_datas["headers"], data) + + def make_render(self): + result_for_html = { + "Description" : self.bottelneck, + "suggestion" : self.suggestion, + "details" : [self.format_datas] + } + + self.html_render.render_template(key="cluster", + title=SlowLinkAnalyzer.SLOW_LINK_ANALYSIS, + template_dir="templates", + template_name="cluster_analysis.html", + cann_version=self.cann_version, + torch_version=self.torch_version, + result=result_for_html) \ No newline at end of file diff --git a/profiler/advisor/advisor_backend/cluster_advice/slow_rank_advice.py b/profiler/advisor/analyzer/cluster/slow_rank_analyser.py similarity index 32% rename from profiler/advisor/advisor_backend/cluster_advice/slow_rank_advice.py rename to profiler/advisor/analyzer/cluster/slow_rank_analyser.py index 4e789fb7fb688626df7e8f5b25b84e4955d6c2a3..6b1400485e3d967baadf13037e9d44d964c27f6e 100644 --- a/profiler/advisor/advisor_backend/cluster_advice/slow_rank_advice.py +++ b/profiler/advisor/analyzer/cluster/slow_rank_analyser.py @@ -13,50 +13,50 @@ # See the License for the specific language governing permissions and # limitations under the License. -import os from collections import defaultdict -from common_func_advisor.constant import Constant -from common_func.file_manager import FileManager -from cluster_advice.cluster_advice_base import ClusterAdviceBase -from prof_bean_advisor.cluster_step_trace_time_bean import ClusterStepTraceTimeBean +from typing import Dict, List +from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer +from profiler.advisor.common import constant +from profiler.advisor.result.result import OptimizeResult +from profiler.advisor.result.item import OptimizeItem, OptimizeRecord +from profiler.advisor.dataset.cluster.cluster_dataset import ClusterStepTraceTimeDataSet -class SlowRankAdvice(ClusterAdviceBase): +class SlowRankAnalyzer(BaseAnalyzer): + SLOW_RANK_ANALYSIS = "slow_rank_analysis" RANK = "rank" RATIO_THRESHOLD = 0.05 BOTTLENECK_LIST = ['Computing', 'Communication', "Free"] + dataset_cls_list = [ClusterStepTraceTimeDataSet] - def __init__(self, collection_path: str, kwargs: dict = None): - super().__init__(collection_path) + def __init__(self, collection_path, n_processes: int = 1, cann_version=constant.DEFAULT_CANN_VERSION, + torch_version=constant.DEFAULT_TORCH_VERSION, **kwargs): + super().__init__(collection_path, n_processes, cann_version, torch_version, **kwargs) + key = ClusterStepTraceTimeDataSet.get_key() + self.step_trace_class = self.get_first_data_by_key(self.dataset_list, key) + self.step_trace_dict = self.step_trace_class.get_data() + self.result = OptimizeResult() + self.bottelneck = '' + self.suggestion = '' + self.format_datas = [] - def load_step_time(self): - csv_path = os.path.join(self.collection_path, Constant.CLUSTER_ANALYSIS_OUTPUT, Constant.CLUSTER_STEP_TIME_CSV) - if not os.path.exists(csv_path): - msg = "[ERROR] cluster_step_trace_time.csv doesn't exist, terminate analysis." - raise RuntimeError(msg) - step_time = FileManager.read_csv_file(csv_path, ClusterStepTraceTimeBean) - return step_time - - def run(self): - self.path_check() - step_data = self.load_step_time() - step_dict = self.process(step_data) - self.output(step_dict) - return self.output_format_data - - def process(self, step_data: list): - step_dict = defaultdict(lambda: [0, 0, 0, 0]) - for step_bean in step_data: - if step_bean.type == self.RANK: - step_dict[step_bean.index][0] += step_bean.compute - step_dict[step_bean.index][1] += step_bean.communication - step_dict[step_bean.index][2] += step_bean.free - total_time_list = [sum(data_tuple) for rank_id, data_tuple in step_dict.items()] + def optimize(self, **kwargs): + if self.step_trace_dict is None: + print("slow_rank 分析失败,原因是数据加载失败,请检查你的cluster_analysis_outpu文件夹 \ + 如不关心这类数据请忽略") + return self.result + self.process() + self.format_datas = self.format_details() + self.make_record() + self.make_render() + return self.result + + def process(self): + total_time_list = [sum(data_tuple) for rank_id, data_tuple in self.step_trace_dict.items()] if total_time_list: mean_total_time = sum(total_time_list) / len(total_time_list) for i in range(len(self.BOTTLENECK_LIST)): - self.produce_bottleneck(step_dict, i, mean_total_time) - return step_dict + self.produce_bottleneck(self.step_trace_dict, i, mean_total_time) def produce_bottleneck(self, step_dict: dict, produce_type: int, mean_total_time: float): data_list = [data_tuple[produce_type] for rank_id, data_tuple in step_dict.items()] @@ -66,6 +66,47 @@ class SlowRankAdvice(ClusterAdviceBase): f'because the max difference of {self.BOTTLENECK_LIST[produce_type]} time ' \ f'has reached {round(max_ratio * mean_total_time / 1000, 3)}ms. \n' - def output(self, step_dict: dict): - self.output_format_data[self.DATA] = step_dict - self.output_format_data[self.BOTTLENECK] = self.bottelneck + def make_record(self): + """ + make record for what and how to optimize + """ + optimization_item = OptimizeItem( + SlowRankAnalyzer.SLOW_RANK_ANALYSIS, + self.bottelneck, + [""] + ) + self.result.add(OptimizeRecord(optimization_item)) + for i, data in enumerate(self.format_datas["data"]): + self.result.add_detail(SlowRankAnalyzer.SLOW_RANK_ANALYSIS, self.format_datas["headers"], data) + + def format_details(self): + details_dict = {} + headers = ["rank_id", "comupte", "communication", "free"] + data_list = [] + for key,value in self.step_trace_dict.items(): + data_list.append([key] + value) + details_dict["headers"] = headers + details_dict["data"] = data_list + return details_dict + + def make_render(self): + result_for_html = { + "Description" : self.bottelneck, + "suggestion" : self.suggestion, + "details" : [self.format_datas] + } + + self.html_render.render_template(key="cluster", + title=SlowRankAnalyzer.SLOW_RANK_ANALYSIS, + template_dir="templates", + template_name="cluster_analysis.html", + cann_version=self.cann_version, + torch_version=self.torch_version, + result=result_for_html) + + @staticmethod + def compute_max_gap_ratio(data: list, mean: float): + if mean == 0: + return 0 + else: + return (max(data) - min(data)) / mean diff --git a/profiler/advisor/analyzer/communication/__init__.py b/profiler/advisor/analyzer/communication/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/profiler/advisor/analyzer/communication/bandwidth/__init__.py b/profiler/advisor/analyzer/communication/bandwidth/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/profiler/advisor/analyzer/communication/environment/__init__.py b/profiler/advisor/analyzer/communication/environment/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/profiler/advisor/analyzer/computation/__init__.py b/profiler/advisor/analyzer/computation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/profiler/advisor/analyzer/computation/aicpu/__init__.py b/profiler/advisor/analyzer/computation/aicpu/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/profiler/advisor/analyzer/computation/aicpu/aicpu_checker.py b/profiler/advisor/analyzer/computation/aicpu/aicpu_checker.py new file mode 100644 index 0000000000000000000000000000000000000000..4eca1c6c0278349cf4068544d2a53d8de7f0d5e1 --- /dev/null +++ b/profiler/advisor/analyzer/computation/aicpu/aicpu_checker.py @@ -0,0 +1,278 @@ +import copy +import os +from functools import partial +from typing import List, Dict, Optional + +import yaml +from profiler.advisor.analyzer.computation.operator_checker import OperatorChecker, logger +from profiler.advisor.analyzer.schedule.fusion_ops.timeline_api_stack_checker import OpStackFinder +from profiler.advisor.common import constant +from profiler.advisor.dataset.dataset import Dataset +from profiler.advisor.dataset.profiling.profiling_dataset import ProfilingDataset +from profiler.advisor.dataset.timeline_event_dataset import TimelineEventDataset + + +class AicpuChecker(OperatorChecker): + _CHECKER = "aicpu operator" + _PROBLEM = "AICPU operator" + _MIN_TASK_DURATION = 20 + _description = f"Some operators and task duration exceed {_MIN_TASK_DURATION} us, such as :\n" + _SUGGESTION: List[str] = ["Modify code to avoid aicpu operator"] + STACK_INFO_ITEMS = "stack_info" + SUGGESTION_INFO_ITEMS = "suggestions" + _ITEMS = [ + "op_name", "op_type", "task_duration", "input_shapes", "input_data_types", "input_formats", "output_shapes", + "output_data_types", "output_formats" + ] + + def __init__(self, cann_version): + super(AicpuChecker, self).__init__(cann_version=cann_version) + self.aicpu_rules: Dict = {} + self.aicpu_checker: Dict = {} + self.load_aicpu_rules() + + def _check_data(self, profiling_data: ProfilingDataset) -> bool: + if not self._check_summary(profiling_data): + return False + return True + + def _check_operator(self, op_info) -> bool: + return op_info.task_type == constant.AI_CPU + + def load_aicpu_rules(self, rule_path="rules/aicpu_rules.yaml") -> Dict: + if not os.path.isabs(rule_path): + rule_path = os.path.join(os.path.dirname(__file__), + "../../../", rule_path) + + if not os.path.exists(rule_path): + logger.warning("Skip analyze aicpu issues, because %s does not exist.", rule_path) + return {} + with open(rule_path, 'r') as f: + self.aicpu_rules = yaml.safe_load(f) + self.filter_aicpu_rules(self.aicpu_rules) + for checker_name, check_rule in self.aicpu_rules.items(): + if not isinstance(check_rule, (list, dict,)): + continue + + if checker_name not in AICPU_CHECKER.keys(): + logger.warning("Skip %s, which is not support now.", checker_name) + continue + + self.aicpu_checker[checker_name] = AICPU_CHECKER[checker_name](check_rule) + + def filter_aicpu_rules(self, aicpu_rules): + support_checkers = [] + for checkers in aicpu_rules['CommonChecker']: + for key, value in checkers.items(): + if key == 'DataTypeChecker' and self.cann_version in value['cann_version']: + support_checkers.append(checkers) + aicpu_rules['CommonChecker'] = support_checkers + return + + def check_aicpu_attr(self, op_info) -> List[str]: + suggestions = [] + for _, checker in self.aicpu_checker.items(): + suggestions.extend(checker.check(op_info)) + return suggestions + + def check(self, profiling_data: ProfilingDataset) -> bool: + """ + check if any operator need optimize + :param profiling_data: profiling datasest + :return: true or false + """ + + if not self._check_data(profiling_data): + return False + op_summary = profiling_data.op_summary + + def get_opeartor_stack_info(api_stack_finder: OpStackFinder, op_name_list: list) -> list: + data: Dict[str, Dataset] = {} + event_dataset = TimelineEventDataset(collection_path=profiling_data.collection_path, data=data, task_type=constant.AI_CPU) + + # disable multiprocessing, avoid cost time of enable new process for light task + api_stack_finder.get_api_stack_by_op(event_dataset, op_name_list, constant.AI_CPU, + disable_multiprocess=True) + return api_stack_finder._stack_record + + self._op_list = [] + total_task_duration = 0.0 + max_task_duration = 0.0 + for op_info in op_summary.op_list: + if self._check_operator(op_info): + self._op_list.append(op_info) + + task_duration = float(op_info.task_duration) + total_task_duration += task_duration + max_task_duration = max(max_task_duration, task_duration) + if (not self._op_list) or (max_task_duration < self._MIN_TASK_DURATION): + return False + + # 获取所有算子堆栈的信息 + op_name_list = [] + for op in self._op_list: + if op.op_name not in op_name_list: + op_name_list.append(op.op_name) + api_stack_finder = OpStackFinder() + stack_record = get_opeartor_stack_info(api_stack_finder, op_name_list) + + # task_id 到 stack 信息的对应 + self._op_list.sort(key=lambda x: int(x.task_id)) + stack_record.sort(key=lambda x: x[0]) + task_id_to_stack = dict() + for stack in stack_record: + task_id_to_stack[stack[0]] = stack[-1] + + # 算子追加堆栈属性 + for op in self._op_list: + stack = task_id_to_stack.get(int(op.task_id)) + op.add_attr(self.STACK_INFO_ITEMS, stack) + suggestions = self.check_aicpu_attr(op) + op.add_attr(self.SUGGESTION_INFO_ITEMS, suggestions) + + # double 类型算子判断 + double_type_ai_cpu_operator = [] + for op in self._op_list: + if not op.has_attr("input_data_types"): + logger.warning( + "Skip checking of input data in AICPU checker because of not containing input_data_dtypes in op summary") + break + if op.has_attr( + "input_data_types") and "DOUBLE" in op.input_data_types and op.op_name not in double_type_ai_cpu_operator: + double_type_ai_cpu_operator.append(op.op_name) + if bool(double_type_ai_cpu_operator): + self._SUGGESTION.append("Try to convert double type operator to float, such as {}".format( + ",".join(double_type_ai_cpu_operator))) + return True + + def make_render(self, html_render, record): + html_render.render_template(key="computation", + template_dir="templates", + template_name="operator_ai_cpu.html", + format_result=self.format_operator_result(record, constant.OPERATOR_LIST_UNLIMIT)) + + def format_operator_result(self, record, limit): + """ + Format operator result to html + :param record: profiling check record + :param limit: Limit number of operator statistics lists. + :return: + """ + optimization_item = record.optimization_item + release_suggestion_list = [] + for suggestion in optimization_item.suggestion: + release_suggestion_list.append(suggestion.replace('\n', '
')) + logger.debug("suggestion list is %s", release_suggestion_list) + format_result = {"record": record.__dict__, "suggestion": '
'.join(release_suggestion_list), + "task_duration": round(record.statistics_item.task_duration, 2)} + + statistic = self.group_by(copy.deepcopy(self._op_list), op_key='op_type', + limit=limit) + format_result["statistic"] = statistic + stack_key_list = ["stack_info", "input_data_types", "output_data_types"] + if statistic: + for key, info in statistic: + op_info_list = self.group_by_list(info.get("op_info_list"), stack_key_list, limit) + info["op_info_list"] = op_info_list + return format_result + + def group_by_list(self, op_list, op_key_list: List = ["stack_info", "input_data_types", "output_data_types"], + limit: int = constant.OPERATOR_LIST_UNLIMIT): + if op_list is None: + op_list = [] + + # op_key_list 合并添加合并的属性,作为 groupby 的 key value + op_key = '+'.join(op_key_list) # str, json + for op_info in op_list: + attribute = "" + for _op in op_key_list: + if op_info.get_attr(_op): + attribute += op_info.get_attr(_op) + op_info.add_attr(op_key, attribute) + + return self.group_by(op_list, op_key=op_key, limit=limit) + + +class BaserChecker: + def __init__(self, *args, **kwargs): + self.checker_list = [] + + def build(self): + raise NotImplementedError + + def check(self, op_info) -> List[str]: + suggestions = [] + for checker in self.checker_list: + suggestion = checker(op_info) + if suggestion is not None: + suggestions.append(suggestion) + return suggestions + + +class CommonChecker(BaserChecker): + def __init__(self, check_rules: List[Dict] = None): + super(CommonChecker, self).__init__() + self.check_rules = check_rules if check_rules is not None else [] + self.supported_checker = dict(DataTypeChecker=self.datatype_checker) + self.build() + + @staticmethod + def datatype_checker(check_item: Dict, op_info) -> Optional[str]: + supported_op_type = check_item.get('op_type', []) + suggestion = check_item.get('suggestion', "") + valid_inputs = check_item.get('input', []) + valid_outputs = check_item.get('output', []) + ignore_type = check_item.get('ignore_type', []) + op_type = getattr(op_info, 'op_type', "UNKNOWN") + if "__ALL__" in supported_op_type or \ + op_type.lower() in supported_op_type: + if op_type.lower() in ignore_type: + return None + + op_input_dtype = getattr(op_info, 'input_data_types', "").split(";") + op_input_dtype = [item.lower() for item in op_input_dtype] + op_output_dtype = getattr(op_info, 'output_data_types', "").split(";") + op_output_dtype = [item.lower() for item in op_output_dtype] + input_dtype_diff = set(op_input_dtype).difference(set(valid_inputs)) + output_dtype_diff = set(op_output_dtype).difference(set(valid_outputs)) + unsupported_dtype_diff = input_dtype_diff.union(output_dtype_diff) + if not unsupported_dtype_diff: + return None + + return suggestion.format(",".join(unsupported_dtype_diff).upper(), + op_type, + ",".join(valid_inputs).upper()) + + def build(self): + for check in self.check_rules: + (check_func, check_rule), = check.items() + if check_func not in self.supported_checker: + logger.warning("Skip %s, which has not been implemented.", check_func) + continue + self.checker_list.append(partial(self.supported_checker.get(check_func), check_rule)) + + +class ExampleGuideChecker(BaserChecker): + def __init__(self, check_rules: List[Dict] = None): + super(ExampleGuideChecker, self).__init__() + self.check_rules = check_rules if check_rules is not None else [] + self.build() + + def build(self): + def _guide_url(check_item: Dict, op_info) -> Optional[str]: + supported_op_type = check_item.get('op_type', []) + url = check_item.get('url', "") + suggestion = check_item.get('suggestion', "") + + if getattr(op_info, 'op_type', "UNKNOWN").lower() in supported_op_type: + return suggestion if "{}" not in suggestion else suggestion.format(url) + + for check in self.check_rules: + (_, check_rule), = check.items() + self.checker_list.append(partial(_guide_url, check_rule)) + + +AICPU_CHECKER = { + "CommonChecker": CommonChecker, + "ExampleGuideChecker": ExampleGuideChecker +} diff --git a/profiler/advisor/analyzer/computation/bound/__init__.py b/profiler/advisor/analyzer/computation/bound/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/profiler/advisor/analyzer/computation/bound/block_dim_checker.py b/profiler/advisor/analyzer/computation/bound/block_dim_checker.py new file mode 100644 index 0000000000000000000000000000000000000000..d90ef56c78b52987bc7d1dbb2c17d5caf9eb7706 --- /dev/null +++ b/profiler/advisor/analyzer/computation/bound/block_dim_checker.py @@ -0,0 +1,77 @@ +import logging + +from typing import List + +from profiler.advisor.analyzer.computation.operator_checker import OperatorChecker +from profiler.advisor.common import constant +from profiler.advisor.config.config import Config +from profiler.advisor.dataset.profiling.profiling_dataset import ProfilingDataset + +logger = logging.getLogger() + + +class BlockDimChecker(OperatorChecker): + _SUGGESTION: List[str] = [] + _CHECKER = "block dim" + _PROBLEM = "block dim" + _description = "some operator does not make full use of {} ai core" + _ITEMS = [ + "op_name", "op_type", "task_type", "task_duration", "income", "block_dim", "mix_block_dim", "input_shapes", + "input_data_types", "input_formats", "output_shapes", "output_data_types", "output_formats" + ] + + def _check_data(self, data): + if not self._check_summary(data): + return False + if not Config().get_config("ai_core_num"): + logger.warning(self.SKIP_CHECK_MSG, self._CHECKER, "ai core num in info.json file") + return False + summary = data.op_summary + op_info = summary.op_list[0] + if not hasattr(op_info, "block_dim"): + logger.warning(self.SKIP_CHECK_MSG, self._CHECKER, "block dim in op summary") + return False + if Config().get_config("ai_core_num"): + self._aicore_num = int(Config().get_config("ai_core_num")) + if Config().get_config("aiv_num"): + self._aiv_num = int(Config().get_config("aiv_num")) + self._description = self._description.format(self._aicore_num) + if self._aiv_num: + self._description += f" or {self._aiv_num} ai vector core" + self._description += f";\n Top-{OperatorChecker._MAX_TUNE_OP_NUM} operator of " \ + "task duration are as follows:\n" + return True + + def make_render(self, html_render, record): + html_render.render_template(key="computation", + template_dir="templates", + template_name="operator_block_dim.html", + format_result=self.format_operator_result(record, constant.OPERATOR_OUT_TOPK)) + + def _check_operator(self, op_info) -> bool: + if op_info.task_type not in ["AI_CORE", "AI_VECTOR_CORE", "MIX_AIC"]: + return False + block_dim = int(op_info.block_dim) + core_num = self.get_core_num(op_info) + if block_dim % core_num == 0: + return False + if op_info.task_type == "MIX_AIC" and hasattr(op_info, "mix_block_dim") \ + and self._aiv_num and int(op_info.mix_block_dim) % self._aiv_num == 0: + return False + return True + + def get_core_num(self, op_info): + """ + get core num of task type + """ + if op_info.task_type == "AI_CORE" or not self._aiv_num: + core_num = self._aicore_num + else: + core_num = self._aiv_num + return core_num + + def format_suggestion_content(self, profiling_data: ProfilingDataset) -> None: + if profiling_data.PROF_TYPE == constant.ASCEND_PYTORCH_PROFILER: + self._SUGGESTION.append(self.PyTorch_OPERATOR_TUNE_SUGGESTION) + elif profiling_data.PROF_TYPE == constant.MSLITE: + self._SUGGESTION.append(self.MSLite_OPERATOR_TUNE_SUGGESTION) diff --git a/profiler/advisor/analyzer/computation/bound/operator_bound_checker.py b/profiler/advisor/analyzer/computation/bound/operator_bound_checker.py new file mode 100644 index 0000000000000000000000000000000000000000..4ede3c94e6703f2ce38f9339db8fe9405fcfa82f --- /dev/null +++ b/profiler/advisor/analyzer/computation/bound/operator_bound_checker.py @@ -0,0 +1,56 @@ +import logging +from typing import List + +from profiler.advisor.analyzer.computation.operator_checker import OperatorChecker +from profiler.advisor.common import constant +from profiler.advisor.config.config import Config +from profiler.advisor.dataset.profiling.profiling_dataset import ProfilingDataset +from profiler.advisor.utils.utils import to_percent + +logger = logging.getLogger() + + +class OperatorBoundChecker(OperatorChecker): + _MIN_TASK_DURATION = 20 # min task duration 20us + _CHECKER = "operator no bound" + _PROBLEM = "operator no bound" + _SUGGESTION: List[str] = [] + _description = ( + f"There is no mte, cube, vector, scalar ratio is more than {to_percent(Config().operator_bound_ratio)};\n" + + f"Top task duration operators need to be tuned are as follows: \n") + _ITEMS = [ + "op_name", "op_type", "task_type", "task_duration", "vec_ratio", "mac_ratio", "scalar_ratio", "mte1_ratio", + "mte2_ratio", "mte3_ratio", "block_dim", "input_shapes", "input_data_types", "input_formats", "output_shapes", + "output_data_types", "output_formats" + ] + + def _check_data(self, data): + if not self._check_summary(data): + return False + for op_info in data.op_summary.op_list: + if self._check_operator(op_info): + return True + + logger.warning(self.SKIP_CHECK_MSG, self._CHECKER, "ratio in op summary") + return False + + def _check_operator(self, op_info) -> bool: + bound_list = ["vec_ratio", "mac_ratio", "scalar_ratio", "mte1_ratio", "mte2_ratio", "mte3_ratio"] + ratio_list = [self.get_ratio(op_info, attr) for attr in bound_list] + if not any(ratio_list): + return False # no data, skip check + if any(ratio and ratio > Config().operator_bound_ratio for ratio in ratio_list): + return False + return True + + def make_render(self, html_render, record): + html_render.render_template(key="computation", + template_dir="templates", + template_name="operator_no_bound.html", + format_result=self.format_operator_result(record, constant.OPERATOR_OUT_TOPK)) + + def format_suggestion_content(self, profiling_data: ProfilingDataset) -> None: + if profiling_data.PROF_TYPE == constant.ASCEND_PYTORCH_PROFILER: + self._SUGGESTION.append(self.PyTorch_OPERATOR_TUNE_SUGGESTION) + elif profiling_data.PROF_TYPE == constant.MSLITE: + self._SUGGESTION.append(self.MSLite_OPERATOR_TUNE_SUGGESTION) diff --git a/profiler/advisor/analyzer/computation/op_compile/__init__.py b/profiler/advisor/analyzer/computation/op_compile/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/profiler/advisor/analyzer/computation/op_compile/dynamic_shape_checker.py b/profiler/advisor/analyzer/computation/op_compile/dynamic_shape_checker.py new file mode 100644 index 0000000000000000000000000000000000000000..070b3a3b57b0a3d850a2e34bf408c5cf6c2a9610 --- /dev/null +++ b/profiler/advisor/analyzer/computation/op_compile/dynamic_shape_checker.py @@ -0,0 +1,91 @@ +import copy +import logging +from typing import List + +from profiler.advisor.analyzer.computation.operator_checker import OperatorChecker +from profiler.advisor.common import constant +from profiler.advisor.dataset.profiling.info_collection import OpInfo +from profiler.advisor.result.item import OptimizeItem, StatisticsItem, OptimizeRecord + +logger = logging.getLogger() + + +class DynamicShapeChecker(OperatorChecker): + ENABLE_COMPILED_SUGGESTION = "Optimize by enabling compiled operator, such as:\n" \ + "`torch_npu.npu.set_compile_mode(jit_compile=False)`\n" + _SUGGESTION: List[str] = [ENABLE_COMPILED_SUGGESTION] + _CHECKER = "dynamic shape operator" + _PROBLEM = "Dynamic shape operator" + _description = f"Found all operators are dynamic shape" + _op_list: List[OpInfo] = [] + _tune_op_list: List[str] = [] # record op name to be tuned, and save to tune_ops_file.cfg + _op_views: List = [] + + def __init__(self, cann_version) -> None: + super().__init__(cann_version=cann_version) + + def check(self, profiling_database) -> bool: + less_than_cann800_list = [constant.CANN_VERSION_C30, constant.CANN_VERSION_C13, constant.CANN_VERSION_C15] + # CANN 8.0.0 之前从 ge_info 中获取 op_state 属性,进行动态 shape 逻辑判断 + if self.cann_version in less_than_cann800_list: + if hasattr(profiling_database, "ge_info"): + ge_info = profiling_database.ge_info + static_shape_operators = ge_info.get_static_shape_operators() + if len(static_shape_operators) == 0: + OperatorChecker.IS_ALL_OPERATOR_DYNAMIC_SHAPE = True + return True + else: + logger.warning( + "Skip dynamic shape checker because of not containing ge_info.db file in host filefloder.\n" + "To enable dynamic shape checker, please try to set data_simplification=False in experimental_config.\n" + "More details please refer to link : %s", constant.ASCEND_PROFILER_URL) + else: + # CANN 8.0.0 之后 op_state 属性从 op_summary 文件中获取 + if hasattr(profiling_database, "op_summary"): + static_shape_operators = profiling_database.op_summary.get_static_shape_operators() + if len(static_shape_operators) == 0: + OperatorChecker.IS_ALL_OPERATOR_DYNAMIC_SHAPE = True + return True + else: + logger.warning( + "Skip dynamic shape checker because of not containing op_summary.csv file in current filefloder." + ) + + return False + + def make_record(self, profiling_database) -> OptimizeRecord: + """ + make record for what and how to optimize + """ + + optimization_item = OptimizeItem( + self._PROBLEM, + self._description, + self._SUGGESTION + ) + statistics_item = StatisticsItem("", "", 1) + return OptimizeRecord(optimization_item, statistics_item) + + def format_operator_result(self, record, limit=-1): + """ + Format operator result to html + :param record: profiling check record + :param limit: Limit number of operator statistics lists. + :return: + """ + optimization_item = record.optimization_item + release_suggestion_list = [] + for suggestion in optimization_item.suggestion: + release_suggestion = copy.deepcopy(suggestion) + if release_suggestion == DynamicShapeChecker.ENABLE_COMPILED_SUGGESTION: + release_suggestion += \ + f"for details please refer to link : LINK" + release_suggestion_list.append(release_suggestion.replace('\n', '
')) + format_result = {"record": record.__dict__, "suggestion": '
'.join(release_suggestion_list)} + return format_result + + def make_render(self, html_render, record): + html_render.render_template(key="computation", + template_dir="templates", + template_name="operator_dynamic_shape.html", + format_result=self.format_operator_result(record)) diff --git a/profiler/advisor/analyzer/computation/operator_checker.py b/profiler/advisor/analyzer/computation/operator_checker.py new file mode 100644 index 0000000000000000000000000000000000000000..6bb837004b4282e406d8e1c7b3c5c2a135b9be0c --- /dev/null +++ b/profiler/advisor/analyzer/computation/operator_checker.py @@ -0,0 +1,282 @@ +import copy +import logging +from textwrap import fill +from typing import List + +from profiler.advisor.common import constant +from profiler.advisor.common.version_control import VersionControl +from profiler.advisor.config.config import Config +from profiler.advisor.dataset.profiling.info_collection import OpInfo +from profiler.advisor.dataset.profiling.profiling_dataset import ProfilingDataset +from profiler.advisor.result.item import OptimizeItem, StatisticsItem, OptimizeRecord +from profiler.advisor.utils.utils import safe_division + +logger = logging.getLogger() + + +class OperatorChecker(VersionControl): + _SUPPORT_VERSIONS = [constant.CANN_VERSION_C30, constant.CANN_VERSION_C13, constant.CANN_VERSION_C15, constant.CANN_VERSION_C17] + IS_ALL_OPERATOR_DYNAMIC_SHAPE = False + _MAX_TUNE_OP_NUM = constant.OPERATOR_OUT_TOPK + _MIN_TASK_DURATION = 0 + _MIN_TASK_DURATION_RATIO = 1.0 + _MIN_TOTAL_DURATION_RATIO = 1.0 + _CHECKER = str() + _PROBLEM = str() + _description = str() + STACK_INFO_ITEMS = "" + _ITEMS: List[str] = [] + _SUGGESTION: List[str] = [] + SKIP_CHECK_MSG = "Skip %s checker because of not containing %s" + _tune_op_info_list: List[OpInfo] = [] + PyTorch_OPERATOR_TUNE_SUGGESTION = f"Optimize operator by AOE, such as:\n" \ + f"'aoe --job_type=2 --model_path=$user_dump_path " \ + f"--tune_ops_file={Config().tune_ops_file}'\n" + MSLite_OPERATOR_TUNE_SUGGESTION = f"Optimize operator by AOE in mindspore lite framework, such as:\n" \ + f"converter_lite --fmk=ONNX --optimize=ascend_oriented --saveType=MINDIR " \ + f"--modelFile=$user_model.onnx --outputFile=user_model --configFile=./config.txt\n" + _tune_op_list: List[str] = [] + + def __init__(self, cann_version: str): + self.cann_version = cann_version + self._op_list: List[OpInfo] = [] + + def check(self, profiling_data: ProfilingDataset) -> bool: + """ + check if any operator need optimize + :param profiling_data: profiling datasest + :return: true or false + """ + if not self._check_data(profiling_data): + return False + + summary = profiling_data.op_summary + total_task_duration = 0.0 + max_task_duration = 0.0 + for op_info in summary.op_list: + if not self._check_operator(op_info): + continue + task_duration = float(op_info.task_duration) + total_task_duration += task_duration + max_task_duration = max(max_task_duration, task_duration) + self._op_list.append(op_info) + if task_duration > self._MIN_TASK_DURATION: + self._tune_op_info_list.append(op_info) + + if any([ + max_task_duration > self._MIN_TASK_DURATION, + round(safe_division(max_task_duration, summary.get_total_task_duration()), + 4) > self._MIN_TASK_DURATION_RATIO, + round(safe_division(total_task_duration, summary.get_total_task_duration()), 4) > + self._MIN_TOTAL_DURATION_RATIO, + ]): + self._op_list.sort(key=lambda x: float(x.get_attr("task_duration")), reverse=True) + self._tune_op_info_list.sort(key=lambda x: float(x.get_attr("task_duration")), reverse=True) + for op in self._op_list: + if op.op_name not in self._tune_op_list and len(self._tune_op_list) < constant.OPERATOR_OUT_TOPK: + self._tune_op_list.append(op.op_name) + return True + return False + + def make_record(self, profiling_data: ProfilingDataset): + """ + Make record for what and how to optimize + :param profiling_data: profiling data + :return: optimize record + """ + task_duration_list = [float(op_info.get_attr("task_duration")) for op_info in self._op_list if + hasattr(op_info, "get_attr")] + total_cost_time = sum(task_duration_list) + total_task_duration = profiling_data.op_summary.get_total_task_duration() + count = len(task_duration_list) + statistics_item = StatisticsItem(total_task_duration, total_cost_time, count, self.get_incomes()) + optimization_item = OptimizeItem( + self._PROBLEM, + self._get_description(self._description, self.get_op_type_list(self._op_list)[:self._MAX_TUNE_OP_NUM]), + self._SUGGESTION + ) + return OptimizeRecord(optimization_item, statistics_item) + + def _get_description(self, description, op_type_list=None): + if not op_type_list: + return description + + desc_suffix = [] + for i in range(len(op_type_list)): + if i % 3 == 0 and i != 0: + desc_suffix.append("\n") + + desc_suffix.append(f"{op_type_list[i]}") + + if i < len(op_type_list) - 1: + desc_suffix.append(", ") + + description += "".join(desc_suffix) + return description + + def pre_check(self, profiling_data) -> bool: + self.format_suggestion_content(profiling_data) + return not (OperatorChecker.IS_ALL_OPERATOR_DYNAMIC_SHAPE and ( + OperatorChecker.PyTorch_OPERATOR_TUNE_SUGGESTION or OperatorChecker.MSLite_OPERATOR_TUNE_SUGGESTION + ) in self._SUGGESTION) + + def format_operator_result(self, record, limit): + """ + Format operator result to html + :param record: profiling check record + :param limit: Limit number of operator statistics lists. + :return: + """ + optimization_item = record.optimization_item + release_suggestion_list = [] + for suggestion in optimization_item.suggestion: + release_suggestion = copy.deepcopy(suggestion) + if release_suggestion == OperatorChecker.PyTorch_OPERATOR_TUNE_SUGGESTION: + release_suggestion += \ + (f"for details please refer to link : LINK") + elif release_suggestion == OperatorChecker.MSLite_OPERATOR_TUNE_SUGGESTION: + release_suggestion += \ + (f"\nThe config file for MSLite AOE usage is as follows:\n" \ + f"[ascend_context]\n" \ + f"aoe_mode=\"operator tuning\"\n" \ + f"--tune_ops_file={Config().tune_ops_file}\n" + f"\nFor details please refer to link : LINK") + release_suggestion_list.append(release_suggestion.replace('\n', '
')) + format_result = {"record": record.__dict__, + "suggestion": fill('
'.join(release_suggestion_list), width=200), + "task_duration": round(record.statistics_item.task_duration, 2)} + statistic = self.group_by(copy.deepcopy(self._op_list), limit=limit) + format_result["statistic"] = statistic + return format_result + + def group_by(self, op_list, op_key="op_type", + limit: int = constant.OPERATOR_LIST_UNLIMIT): + """ + group by Profiling.OpInfo's attribute key, then return top limit tuple by duration + :param op_list: input a OpInfo list + :param op_key: group by Profiling.OpInfo's attribute key + :param limit: top limit num, if you do not need to limit the length of tuple, input -1(int) + :return: + """ + if op_list is None: + op_list = [] + statistic = {} # str, json + for op_info in op_list: + if statistic.get(op_info.get_attr(op_key)): + statistic[op_info.get_attr(op_key)]["summary"]["total_duration"] = float( + statistic[op_info.get_attr(op_key)]["summary"]["total_duration"]) + float( + op_info.get_attr("task_duration", constant.DEFAULT_DURATION_ZERO)) + statistic[op_info.get_attr(op_key)]["summary"]["counts"] += 1 + stack_info = op_info.get_attr("stack_info") + if stack_info: + op_info.stack_info = stack_info.replace('\r\n', '
') + statistic[op_info.get_attr(op_key)]["op_info_list"].append(op_info) + else: + statistic[op_info.get_attr(op_key)] = {"summary": {}, "op_info_list": []} + statistic[op_info.get_attr(op_key)]["summary"]["op_type"] = op_info.get_attr( + "op_type", constant.DEFAULT_OPERATOR_TYPE) + statistic[op_info.get_attr(op_key)]["summary"]["total_duration"] = float( + op_info.get_attr("task_duration", constant.DEFAULT_DURATION_ZERO)) + statistic[op_info.get_attr(op_key)]["summary"]["counts"] = 1 + stack_info = op_info.get_attr("stack_info") + if stack_info: + op_info.stack_info = stack_info.replace('\r\n', '
') + statistic[op_info.get_attr(op_key)]["op_info_list"] = [op_info] + + if statistic: + for op_key in statistic.keys(): + statistic[op_key]["summary"]["total_duration"] = round( + statistic[op_key]["summary"]["total_duration"], 2) + # Grouped by op_type, sorted by total_duration, and obtained the top 10 operators that take the most time. + if limit > 0: + statistic = sorted( + statistic.items(), key=lambda kv: kv[1]["summary"]["total_duration"], reverse=True)[:limit] + else: + statistic = sorted(statistic.items(), key=lambda kv: kv[1]["summary"]["total_duration"], reverse=True) + else: + logger.warning("%s checker do not has results to format html", str(self.__class__.__name__)) + return statistic + + def _check_data(self, profiling_data): + return True + + def _check_operator(self, op_info): + return False + + def _get_income(self, _op_info: OpInfo) -> float: + return 0 + + def get_tune_op_list(self): + """ + get tune op list + :return: tune op list + """ + return self._tune_op_list + + def get_views(self, _graph_data): + """Get node views.""" + return [] + + @classmethod + def get_name(cls): + """ + get name of checker + :return: checker name + """ + return cls._PROBLEM + + def get_incomes(self) -> float: + """get incomes""" + incomes = 0.0 + for op_info in self._op_list: + income = self._get_income(op_info) + setattr(op_info, "income", round(income, 2)) + incomes += income + return incomes + + def get_op_type_list(self, op_list: List[OpInfo]): + """get op type list""" + op_type_list = [] + for op_info in op_list: + if op_info.op_type not in op_type_list: + op_type_list.append(op_info.op_type) + return op_type_list + + def _check_summary(self, data: ProfilingDataset): + if not hasattr(data, "op_summary"): + logger.warning(self.SKIP_CHECK_MSG, self._CHECKER, "op summary") + return False + return True + + @staticmethod + def get_ratio(op_info: OpInfo, attr: str) -> float: + if not op_info.has_attr(attr): + return 0 + value = op_info.get_attr(attr) + if not value or value == "N/A": + return 0 + return float(value) + + def get_details(self) -> list: + """ + get details of operator to be optimized + :return: detail list + """ + op_list = self._op_list + if not op_list or not (self._ITEMS + [self.STACK_INFO_ITEMS]): + return [] + details = [] + attrs = [attr for attr in (self._ITEMS + [self.STACK_INFO_ITEMS]) if op_list[0].has_attr(attr)] + details.append(attrs) + op_list = sorted(op_list, key=lambda x: float(x.get_attr("task_duration")), reverse=True) + for op_info in op_list: + content = [ + op_info.get_attr(attr) if attr != "aicore_time" + else op_info.get_float_attr(attr, strict_mode=True) + + op_info.get_float_attr("aiv_time", strict_mode=True) for attr in attrs + ] + details.append(content) + return details + + def format_suggestion_content(self, profiling_data: ProfilingDataset) -> None: + return diff --git a/profiler/advisor/analyzer/computation/profiling_analyzer.py b/profiler/advisor/analyzer/computation/profiling_analyzer.py new file mode 100644 index 0000000000000000000000000000000000000000..98d3c5c49b74362137126ec1276c3684284662f0 --- /dev/null +++ b/profiler/advisor/analyzer/computation/profiling_analyzer.py @@ -0,0 +1,71 @@ +import logging +from abc import ABC +from typing import Dict, List + +from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer +from profiler.advisor.common import constant +from profiler.advisor.result.result import OptimizeResult +from profiler.advisor.analyzer.computation.aicpu.aicpu_checker import AicpuChecker +from profiler.advisor.analyzer.computation.bound.block_dim_checker import BlockDimChecker +from profiler.advisor.analyzer.computation.bound.operator_bound_checker import OperatorBoundChecker +from profiler.advisor.analyzer.computation.operator_checker import OperatorChecker +from profiler.advisor.analyzer.computation.op_compile.dynamic_shape_checker import DynamicShapeChecker +from profiler.advisor.analyzer.computation.operator_checker import OperatorChecker +from profiler.advisor.display.html.render import HTMLRender +from profiler.advisor.dataset.profiling.profiling_dataset import ProfilingDataset +from profiler.advisor.utils.utils import get_supported_subclass + +logger = logging.getLogger() + + +class ProfilingAnalyzer(BaseAnalyzer, ABC): + dataset_cls_list = [ProfilingDataset] + + def __init__(self, collection_path, **kwargs) -> None: + cann_version = kwargs.get("cann_version", constant.DEFAULT_CANN_VERSION) + torch_version = kwargs.get("torch_version", constant.DEFAULT_TORCH_VERSION) + super().__init__(collection_path, cann_version=cann_version, torch_version=torch_version, **kwargs) + self.checker_list = [checker(cann_version) for checker in get_supported_subclass(OperatorChecker, cann_version)] + # 动态 shape checker 放到首位,因为动态 shape 情形下AOE算子调优现在不支持,AOE 算子调优 checker 可以跳过 + index = next((i for i, item in enumerate(self.checker_list) if isinstance(item, DynamicShapeChecker)), None) + self.checker_list.insert(0, self.checker_list.pop(index)) + self.html_render = HTMLRender() + self.result = OptimizeResult() + + @BaseAnalyzer.check_data((ProfilingDataset.get_key(),)) + def optimize(self) -> OptimizeResult: + """ + optimize operator + :param data: input datasets + :return: result + """ + profiling_data = self.get_first_data_by_key(self.dataset_list, ProfilingDataset.get_key()) + for checker in self.checker_list: + if not checker.pre_check(profiling_data): + continue + if checker.check(profiling_data): + # add record + record = checker.make_record(profiling_data) + checker.make_render(self.html_render, record) + self.result.add(record) + # add details + details = checker.get_details() + if details: + for i, detail in enumerate(details): + if i == 0: + # the first row is header + self.result.add_detail(checker.get_name(), headers=detail) + else: + self.result.add_detail(checker.get_name(), detail=detail) + # add tune op list + tune_op_list = checker.get_tune_op_list() + if tune_op_list: + self.result.add_tune_op_list(tune_op_list) + + return self.result + + def make_record(self): + pass + + def make_render(self): + pass diff --git a/profiler/advisor/analyzer/dataloader/__init__.py b/profiler/advisor/analyzer/dataloader/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/profiler/advisor/analyzer/graph_fusion/__init__.py b/profiler/advisor/analyzer/graph_fusion/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/profiler/advisor/analyzer/graph_fusion/graph_fusion_analyzer.py b/profiler/advisor/analyzer/graph_fusion/graph_fusion_analyzer.py new file mode 100644 index 0000000000000000000000000000000000000000..713e1184299944ce506afbd48c4c2f1ec3f7d6e4 --- /dev/null +++ b/profiler/advisor/analyzer/graph_fusion/graph_fusion_analyzer.py @@ -0,0 +1,49 @@ +from typing import List +from functools import partial + +from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer +from profiler.advisor.result.result import OptimizeResult +from profiler.advisor.dataset.graph_dataset import GraphDataset +from profiler.advisor.analyzer.graph_fusion.graph_fusion_checker import GraphFusionRules +from profiler.advisor.dataset.profiling.profiling_dataset import ProfilingDataset +from profiler.advisor.display.html.render import HTMLRender + + +class FusionOPAnalyzer(BaseAnalyzer): + """ + fusion optimizer + """ + RULES = dict(graph_dataset=partial(GraphFusionRules, "rules/op_fusion_pass.yaml")) + dataset_cls_list = [GraphDataset, ProfilingDataset] + + def __init__(self, collection_path, **kwargs) -> None: + super(FusionOPAnalyzer, self).__init__(collection_path, **kwargs) + self.result = OptimizeResult() + self.html_render = HTMLRender() + + @BaseAnalyzer.check_data((GraphDataset.get_key(),)) + def optimize(self): + """ + :return: result + """ + self._check(self.dataset_list.get("GraphDataset"), self.dataset_list.get("ProfilingDataset")) + return self.result + + def _check(self, graph_data: List[GraphDataset], + profiling_data: List[ProfilingDataset] = None) -> None: + if len(graph_data) == 0 or graph_data[0].is_empty(): + return + for _, rule in self.RULES.items(): + checker = rule() + if profiling_data is None: + checker.find_fusion_matched_issues(graph_data) + else: + checker.find_fusion_matched_issues_with_times(graph_data, profiling_data) + checker.make_record(self.result) + checker.make_render(self.html_render) + + def make_record(self): + pass + + def make_render(self): + pass diff --git a/profiler/advisor/analyzer/graph_fusion/graph_fusion_checker.py b/profiler/advisor/analyzer/graph_fusion/graph_fusion_checker.py new file mode 100644 index 0000000000000000000000000000000000000000..e64020fdfe2ace37172e82ed562db1b66971d3d6 --- /dev/null +++ b/profiler/advisor/analyzer/graph_fusion/graph_fusion_checker.py @@ -0,0 +1,207 @@ +import logging +from typing import List + +from tqdm import tqdm + +from profiler.advisor.result.result import OptimizeResult +from profiler.advisor.result.item import OptimizeItem, OptimizeRecord, StatisticsItem +from profiler.advisor.common.graph.graph import Graph +from profiler.advisor.common.graph.graph_parser import QueryGraphParser +from profiler.advisor.dataset.graph_dataset import GraphDataset +from profiler.advisor.common.graph.graph_match import find_isomorphisms + +logger = logging.getLogger() + + +class GraphFusionRules: + def __init__(self, fusion_rules: str): + self.fusion_rules = fusion_rules + self.candidates = [] + self.task_duration_list = [] + + @staticmethod + def build_query_graph(query_graphs) -> List[Graph]: + for _, query_graph in query_graphs.fusion_rules.items(): + for sub_graph in query_graph: + graph = Graph(*sub_graph) + graph.build() + yield graph + + def find_fusion_matched_issues(self, graphs: List[GraphDataset]): + query_graphs = QueryGraphParser(self.fusion_rules) + with tqdm(total=query_graphs.num_rules, leave=False, ncols=100, unit=" rules") as pbar: + pbar.set_description(f"Searching Isomorphic Subgraph") + for query_graph in self.build_query_graph(query_graphs): + query_candidates = find_isomorphisms(query_graph.graph, graphs[0].graphs[-1].graph) + pbar.update(1) + if len(query_candidates) > 0: + self.candidates.append(query_candidates) + + def find_fusion_matched_issues_with_times(self, graphs: List[GraphDataset], profiling): + self.find_fusion_matched_issues(graphs) + if len(self.candidates) == 0 or len(profiling) == 0: + return + + if not hasattr(profiling[0], 'op_summary') or profiling[0].op_summary is None: + if hasattr(profiling[0], 'msprof'): + self.match_time_from_msprof(profiling[0].msprof) + return + else: + logger.warning("Skip analyze operator because of not containing op summary.") + return + + self.match_time_from_summary(profiling[0].op_summary) + time_duration_sum = [] + for task_duration in self.task_duration_list: + time_duration_sum.append(sum([sum(duration) for duration in task_duration])) + time_duration_index = sorted(range(len(time_duration_sum)), + key=time_duration_sum.__getitem__, + reverse=True) + self.task_duration_list = [self.task_duration_list[i] for i in time_duration_index] + self.candidates = [self.candidates[i] for i in time_duration_index] + + def match_time_from_summary(self, op_summary): + op_dict = op_summary.task_dict + for candidates in self.candidates: + candidate_duration = [] + for candidate in candidates: + duration_list = [] + for node in candidate.values(): + if node.op_name not in op_dict or op_dict[node.op_name][0].op_type.lower() != node.op_type.lower(): + logger.warning("Operator %s is missing in op summary, which will be set to 0.", node.op_name) + duration_list.append(0.0) + continue + duration_list.append(float(op_dict[node.op_name][0].task_duration)) + candidate_duration.append(duration_list) + self.task_duration_list.append(candidate_duration) + + def match_time_from_msprof(self, msprof): + op_dict = dict() + for task in msprof.tasks: + if "item_id" not in task.args: + continue + op_dict[task.args["item_id"]] = {"task_duration": task.dur} + for candidates in self.candidates: + candidate_duration = [] + for candidate in candidates: + duration_list = [] + for node in candidate.values(): + if node.op_name not in op_dict: + logger.warning("Operator %s is missing in msprof, which will be set to 0.", node.op_name) + duration_list.append(0.0) + continue + duration_list.append(float(op_dict[node.op_name].get("task_duration"))) + candidate_duration.append(duration_list) + self.task_duration_list.append(candidate_duration) + + def make_render(self, html_render): + if not self.candidates: + return + + candidates_list = [] + for case_id, nodes in enumerate(self.candidates): + candidate_dict = dict() + candidate_dict['counts'] = len(nodes) + candidate_dict['matches'] = [] + has_time_info = False + if self.task_duration_list: + has_time_info = True + candidate_dict['total_duration'] = round(sum(sum(duration) for duration in + self.task_duration_list[case_id]), 2) + for node_index, refer_node in enumerate(nodes): + match = [] + index = 0 + pass_name = ','.join(item.op_type for item in refer_node.keys()) + for query_node, host_node in refer_node.items(): + fusion_pattern = query_node.op_pass + + if 'op_pass' not in candidate_dict: + candidate_dict['op_pass'] = fusion_pattern + if 'fusion_pattern' not in candidate_dict: + candidate_dict['fusion_pattern'] = pass_name + match_attr = dict() + match_attr['op_name'] = host_node.op_name + match_attr['dtype'] = query_node.op_type + if has_time_info: + match_attr['duration'] = round(self.task_duration_list[case_id][node_index][index], 2) + index += 1 + match.append(match_attr) + match_attr = dict() + match_attr['op_name'] = "-" + match_attr['dtype'] = "-" + if has_time_info: + match_attr['duration'] = round(sum(self.task_duration_list[case_id][node_index]), 2) + match.append(match_attr) + candidate_dict['matches'].append(match) + candidates_list.append(candidate_dict) + html_render.render_template(key="computation", + template_dir="templates", + template_name="fusion.html", + candidates=candidates_list) + + def make_record(self, result: OptimizeResult): + """ + make record for what and how to optimize + """ + if not self.candidates: + return + + optimization_item = OptimizeItem( + "fusion issue", + f"Found {len(self.candidates)} fusion issues", + ["Check fusion issues detail in att_advisor*.html"] + ) + total_time = 0.0 + for candidate in self.task_duration_list: + for duration in candidate: + total_time += sum(duration) + statistics_item = StatisticsItem(0, + total_time, + sum([len(candidate) for candidate in self.candidates]) + ) + result.add(OptimizeRecord(optimization_item, statistics_item)) + + record_title = [ + "issue_id", "graph_name", "op_name", "fusion_structure", "fusion_pattern", + "op_type", "input_shape", "input_format", + "input_dtype", "output_shape", "output_format", "output_dtype" + ] + result.add_detail('fusion issues', headers=record_title) + + for case_id, nodes in enumerate(self.candidates): + for _, refer_node in enumerate(nodes): + pass_name = ','.join(item.op_type for item in refer_node.keys()) + for query_node, host_node in refer_node.items(): + fusion_pattern = query_node.op_pass + detail = [ + case_id, + host_node.graph_name, + host_node.op_name, + pass_name, + fusion_pattern, + query_node.op_type, + self.get_attr_shape(host_node, "input", "shape"), + self.get_attr_type(host_node, "input", "format"), + self.get_attr_type(host_node, "input", "dtype"), + self.get_attr_shape(host_node, "output", "shape"), + self.get_attr_type(host_node, "output", "format"), + self.get_attr_type(host_node, "output", "dtype"), + ] + result.add_detail('fusion issues', detail=detail) + + @staticmethod + def get_attr_shape(node, type_name: str, attr_name: str) -> str: + attr_shape = [] + node_attrs = getattr(node, type_name, []) + for attrs in node_attrs: + attr = getattr(attrs, attr_name, []) + attr_shape.append(",".join(attr)) + return ";".join(attr_shape) + + @staticmethod + def get_attr_type(node, type_name: str, attr_name: str) -> str: + attr_type = [] + node_attrs = getattr(node, type_name, []) + for attr in node_attrs: + attr_type.append(getattr(attr, attr_name, "")) + return ";".join(attr_type) diff --git a/profiler/advisor/analyzer/overall/__init__.py b/profiler/advisor/analyzer/overall/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/profiler/advisor/analyzer/overall/overall_analyzer.py b/profiler/advisor/analyzer/overall/overall_analyzer.py new file mode 100644 index 0000000000000000000000000000000000000000..e31a5d4288f71afc6644ec2001cf03d603866a5e --- /dev/null +++ b/profiler/advisor/analyzer/overall/overall_analyzer.py @@ -0,0 +1,45 @@ +import logging +from typing import Dict, List + +from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer +from profiler.advisor.display.html.render import HTMLRender +from profiler.advisor.result.result import OptimizeResult +from profiler.compare_tools.compare_backend.utils.constant import Constant +from profiler.compare_tools.compare_interface.comparison_interface import ComparisonInterface + +logger = logging.getLogger() + + +class OverallSummaryAnalyzer(BaseAnalyzer): + + def __init__(self, profiling_path, benchmark_profiling_path=None, **kwargs): + self.benchmark_profiling_path = benchmark_profiling_path or profiling_path + self.profiling_path = profiling_path + self.html_render = HTMLRender() + self.result = OptimizeResult() + + def optimize(self): + compare_result = ComparisonInterface(self.benchmark_profiling_path, self.profiling_path).compare( + Constant.OVERALL_COMPARE) + + headers = compare_result.get('Model Profiling Time Distribution').get("headers", []) + rows = compare_result.get('Model Profiling Time Distribution').get("rows", []) + + self.make_record() + self.make_render(headers=headers, rows=rows) + return compare_result + + def make_record(self): + pass + + def make_render(self, **kwargs): + headers = kwargs.get("headers") + rows = kwargs.get("rows") + + if not headers or not rows: + logger.info("Empty headers or rows, skip render overall analysis html") + self.html_render.render_template(key="overall", + template_dir="templates", + template_name="overall_analysis.html", + headers=kwargs.get("headers"), + rows=kwargs.get("rows")) diff --git a/profiler/advisor/advisor_backend/overall_advice/overall_summary_advice.py b/profiler/advisor/analyzer/overall/overall_summary_analyzer.py similarity index 64% rename from profiler/advisor/advisor_backend/overall_advice/overall_summary_advice.py rename to profiler/advisor/analyzer/overall/overall_summary_analyzer.py index bdee8029b8470d568b2e8888e84a1e14dc3d03a4..b7c320b7aa8f91f6a792ed30d62b4c66845d95f3 100644 --- a/profiler/advisor/advisor_backend/overall_advice/overall_summary_advice.py +++ b/profiler/advisor/analyzer/overall/overall_summary_analyzer.py @@ -13,17 +13,27 @@ # See the License for the specific language governing permissions and # limitations under the License. import os +import copy -from advisor_backend.advice_base import AdviceBase -from compare_backend.utils.constant import Constant -from compare_interface.comparison_interface import ComparisonInterface +import logging +from typing import Dict, List +from profiler.advisor.display.html.render import HTMLRender +from profiler.advisor.result.result import OptimizeResult +from profiler.advisor.result.item import OptimizeItem, OptimizeRecord +from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer +from profiler.compare_tools.compare_backend.utils.constant import Constant +from profiler.advisor.common import constant as const +from profiler.compare_tools.compare_interface.comparison_interface import ComparisonInterface +from profiler.advisor.utils.utils import get_file_path_from_directory, load_parameter -class OverallSummaryAdvice(AdviceBase): + +class OverallSummaryAnalyzer(BaseAnalyzer): + OVERALL_SUMMARY_ANALYZER = "overall_summary_analysis" advice_map = { - "Computing Time": "if you want more detailed advice please go to compute_perf_analysis.ipynb.", - "Uncovered Communication Time": "if you want more detailed advice please go to cluster_perf_analysis.ipynb.", - "Free Time": "if you want more detailed advice please go to timeline_perf_analysis.ipynb." + "Computing Time": "if you want more detailed advice please go to att_advisor_*.html", + "Uncovered Communication Time": "if you want more detailed advice please go to att_advisor_*.html", + "Free Time": "if you want more detailed advice please go to att_advisor_*.html" } time_name_map = { "Computing Time": "computing", @@ -43,17 +53,24 @@ class OverallSummaryAdvice(AdviceBase): "Free Time": ['SDMA Time(Num)'] } - def __init__(self, collection_path: str, kwargs: dict): - super().__init__(collection_path) + def __init__(self, collection_path: str, n_processes: int = 1, cann_version=const.DEFAULT_CANN_VERSION, + torch_version=const.DEFAULT_TORCH_VERSION, **kwargs): + profile_path = get_profile_path(collection_path) + super().__init__(profile_path, n_processes, cann_version, torch_version, **kwargs) self.base_collection_path = kwargs.get("base_collection_path", "") self._has_base_collection = False self._is_minimal_profiling = False self.cur_data = {} + self.cur_data_table = {} self.cur_bottleneck = {} self.cur_advices = "" self._headers = [] self._base_data = [] self._comparison_data = [] + self.html_render = HTMLRender() + self.result = OptimizeResult() + self.bottleneck_str = "" + self.bottleneck_table = {} @staticmethod def split_duration_and_num(time_value: str) -> tuple: @@ -77,13 +94,6 @@ class OverallSummaryAdvice(AdviceBase): return float("inf") return dividend / divisor - def run(self): - if self.path_check(): - self.process() - self.output() - self.identify_bottleneck() - return self.output_format_data - def path_check(self): if self.base_collection_path: if os.path.exists(self.base_collection_path): @@ -170,7 +180,79 @@ class OverallSummaryAdvice(AdviceBase): self.cur_bottleneck["overall_data"] = overall_bottleneck self.cur_bottleneck["comparison_result"] = comparison_bottleneck - def output(self): - self.output_format_data[self.DATA] = self.cur_data - self.output_format_data[self.BOTTLENECK] = self.cur_bottleneck - self.output_format_data[self.ADVICE] = self.cur_advices + def optimize(self): + if self.path_check(): + self.process() + self.identify_bottleneck() + self.format_bottleneck() + self.format_cur_data() + self.make_record() + self.make_render() + return self.result + + def format_bottleneck(self): + result = '' + headers = [] + data_list = [] + data = [] + for key, value in self.cur_bottleneck.items(): + result += f'{key}: {value} \n' + headers.append(key) + data.append(value) + data_list.append(data) + self.bottleneck_str = result + self.bottleneck_table["headers"] = headers + self.bottleneck_table["data"] = data_list + + def format_cur_data(self): + if not self.cur_data: + return + for data_type, data in self.cur_data.items(): + if not data: + continue + if data_type not in list(self.time_name_map.values()): + data_list = list(data.values()) + else: + data_list = [','.join(map(str, value)) for value in data.values()] + headers = list(data.keys()) + data_table = {"headers": headers, "data": [data_list]} + self.cur_data_table[data_type] = copy.deepcopy(data_table) + + + def make_record(self): + """ + make record for what and how to optimize + """ + optimization_item = OptimizeItem( + OverallSummaryAnalyzer.OVERALL_SUMMARY_ANALYZER, + self.bottleneck_str, + self.cur_advices + ) + self.result.add(OptimizeRecord(optimization_item)) + + self.result.add_detail(const.BOTTLENECK, self.bottleneck_table["headers"], self.bottleneck_table["data"][0]) + for data_type, data_dict in self.cur_data_table.items(): + if data_dict: + self.result.add_detail(const.DATA + data_type, data_dict["headers"], data_dict["data"][0]) + + def make_render(self): + result_for_html = { + "Description" : self.bottleneck_str, + "suggestion" : self.cur_advices, + "details" : [self.bottleneck_table] + } + + self.html_render.render_template(key="cluster", + title=OverallSummaryAnalyzer.OVERALL_SUMMARY_ANALYZER, + template_dir="templates", + template_name="cluster_analysis.html", + cann_version=self.cann_version, + torch_version=self.torch_version, + result=result_for_html) + +def get_profile_path(collection_path): + for root, dirs, files in os.walk(collection_path): + for file in files: + if file.startswith("profiler_info"): + return root + return None \ No newline at end of file diff --git a/profiler/advisor/analyzer/schedule/__init__.py b/profiler/advisor/analyzer/schedule/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/profiler/advisor/analyzer/schedule/free_event/__init__.py b/profiler/advisor/analyzer/schedule/free_event/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/profiler/advisor/analyzer/schedule/fusion_ops/__init__.py b/profiler/advisor/analyzer/schedule/fusion_ops/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/profiler/advisor/analyzer/schedule/fusion_ops/fusion_ops_analyzer.py b/profiler/advisor/analyzer/schedule/fusion_ops/fusion_ops_analyzer.py new file mode 100644 index 0000000000000000000000000000000000000000..01613dbe328d513048eb2e1539ac4a19f0c5d587 --- /dev/null +++ b/profiler/advisor/analyzer/schedule/fusion_ops/fusion_ops_analyzer.py @@ -0,0 +1,272 @@ +import multiprocessing +import logging +import re + +from tqdm import tqdm + +from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer +from profiler.advisor.common import constant as const +from profiler.advisor.common.analyzer_scopes import SupportedScopes +from profiler.advisor.common.timeline.event import TimelineEvent +from profiler.advisor.dataset.timeline_event_dataset import TimelineEventDataset +from profiler.advisor.result.item import OptimizeItem, OptimizeRecord +from profiler.advisor.utils.utils import format_timeline_result +from profiler.advisor.common.timeline.fusion_ops_db import init_timeline_ops_db + +logger = logging.getLogger() + + +class TimelineFusionOpsAnalyzer(BaseAnalyzer): + dataset_cls_list = [TimelineEventDataset] + + def __init__(self, collection_path, n_processes: int = 1, cann_version=const.DEFAULT_CANN_VERSION, + torch_version=const.DEFAULT_TORCH_VERSION, **kwargs): + super().__init__(collection_path, n_processes, cann_version, torch_version, **kwargs) + self._matched_op_index = {} if self.n_processes <= 1 else multiprocessing.Manager().dict() + self.matched_op_stacks = {} + self.empty_stacks = True + key = TimelineEventDataset.get_key() + self.timeline_event_dataset = self.get_first_data_by_key(self.dataset_list, key) + + def optimize(self, **kwargs): + for mode in [const.ATEN.lower(), const.OPTIMIZER.lower()]: + + for op_combined, npu_apis in tqdm(getattr(init_timeline_ops_db(self.cann_version, self.torch_version), + f"_{mode}_op_api_map").items(), leave=False, ncols=100, + desc="Scanning timeline for affinity apis"): + for npu_api in npu_apis.split("/"): + self.find_fusion_ops(self.timeline_event_dataset, op_combined, npu_api, mode) + + self.query_stack(self.timeline_event_dataset) + + logger.info("Finish timeline analysis") + self.make_record() + self.make_render() + return self.result + + def find_fusion_ops(self, event_dataset, ops: str, npu_api: str, mode: str): + """ + :Param event_dataset: dataset of timeline event + :Param ops: operator combination with '-' as separator , e.g. permute-reshape + :Param npu_api: api of torch_npu, generally more efficient than torch api + :Param mode: aten or dequeue or optimizer + :Return: json of op_name and called times and detail stacks + """ + op_rule_pattern, enable_regex = self._format_rule_to_pattern(ops) + if not enable_regex: + self._match_ops(event_dataset, op_rule_pattern, npu_api, mode) + else: + try: + self._match_ops_with_regex(event_dataset, op_rule_pattern, npu_api, mode) + except Exception as e: + logger.warning("Failed to find fusion operators with regex %s, reason is %s", ops, e) + + def _match_ops(self, event_dataset, ops: str, npu_api: str, mode: str): + """ match operator based on fusion operators rule(without regex), + only strictly equals of op name list means matched + :Param event_dataset: dataset of timeline event + :Param ops: operator combination with '-' as separator , e.g. permute-reshape + :Param npu_api: api of torch_npu, generally more efficient than torch api + :Param mode: aten or dequeue or optimizer + """ + op_list = ops.split(const.OP_SEP) + + matched_op_index = set() + api_ops_matched = False + + for index, event in enumerate(getattr(event_dataset, mode)): + if self._replace_op_name_prefix(event.name, mode) != op_list[0]: + continue + tmp_dequeue_event_names = [self._replace_op_name_prefix(event.name, mode) for event in + getattr(event_dataset, mode)[index: index + len(op_list)]] + if tmp_dequeue_event_names != op_list: + continue + api_ops_matched = True + matched_op_index.add(event.dataset_index) + + if api_ops_matched: + self._matched_op_index[npu_api + f":{ops}"] = matched_op_index + + def _match_ops_with_regex(self, event_dataset, op_rule_pattern: str, npu_api: str, + mode: str): + """ match operator based on fusion operators rule(with regex), + using regex to support condition like 'a = torch.mul(xxx) if xxx else torch.add(xxx)' + :Param event_dataset: dataset of timeline event + :Param op_rule_pattern: fusion operators rule with regex definition , e.g. add-mul{0,10}, add-mul* + :Param npu_api: api of torch_npu, generally more efficient than torch api + :Param mode: aten or dequeue or optimizer + """ + matched_op_index = set() + total_op_name = "".join([f"{const.OP_SEP}{self._replace_op_name_prefix(event.name, mode)}{const.OP_SEP}" + for event in + getattr(event_dataset, mode)]) + + matched_pattern_index_tuple = [(x.start(0), x.end(0)) for x in re.finditer(op_rule_pattern, total_op_name)] + # convert list of index tuple to a whole list: [(3, 25), ...] -> [3, 25, ...] + total_ops_split_points = [num for sublist in matched_pattern_index_tuple for num in sublist] + + api_ops_matched = len(total_ops_split_points) != 0 + + op_index = [] + if 0 not in total_ops_split_points: + total_ops_split_points = [0] + total_ops_split_points + if len(list(total_op_name)) not in total_ops_split_points: + total_ops_split_points.append(len(list(total_op_name))) + + # convert total ops name like "-add-mul-xxx-div-" to small pieces like [["add", "mul"], [...], ["div"]] + # by the regex index and then calculate the real index for matched fusion operators in event dataset + for l, r in zip(total_ops_split_points, total_ops_split_points[1:]): + matched_op_flag = True if (l, r) in matched_pattern_index_tuple else False + matched_ops_list = total_op_name[l: r].strip(const.OP_SEP).split(const.OP_SEP + const.OP_SEP) + op_index.append([matched_op_flag, len(matched_ops_list)]) + for i, _ in enumerate(op_index): + if i > 0: + # calculate cumsum for indexing matched operator + op_index[i][1] = op_index[i][1] + op_index[i - 1][1] + op_index = [[False, 0]] + op_index + + for i, _ in enumerate(op_index): + if not op_index[i][0]: + continue + index = op_index[i - 1][1] + matched_op_index.add(index) + + if index > len(getattr(event_dataset, mode)) - 1: + continue + dataset_index = getattr(event_dataset, mode)[index].get("dataset_index") + matched_op_index.add(dataset_index) + + if api_ops_matched: + self._matched_op_index[npu_api + f":{op_rule_pattern}"] = sorted(list(matched_op_index)) + + def make_record(self): + """ + make record for what and how to optimize + """ + if not self.matched_op_stacks: + return + + desc = f"Found {len(format_timeline_result(self.matched_op_stacks))} apis to be replaced" \ + f" based on the runtime env cann-{self.cann_version} and torch-{self.torch_version}" + suggestion = "Please replace training api according to sub table 'Affinity training api'" + if self.empty_stacks: + desc += ", but with no stack" + suggestion = const.TIMELINE_EMPTY_STACKS_PROMPT.format( + timeline_profiling_doc_url=const.TIMELINE_WITH_STACK_DOC_URL + ) + + optimization_item = OptimizeItem( + SupportedScopes.TIMELINE_FUSION_OPS, + desc, + [suggestion] + ) + + self.result.add(OptimizeRecord(optimization_item)) + + record_title = ["Affinity API", "Code stacks", "Stack called counts"] + self.result.add_detail(SupportedScopes.TIMELINE_FUSION_OPS, headers=record_title) + + for api_name, stacks_info in format_timeline_result(self.matched_op_stacks).items(): + if not stacks_info: + detail = [api_name, "null", "null"] + self.result.add_detail(SupportedScopes.TIMELINE_FUSION_OPS, detail=detail) + else: + for stack in stacks_info: + detail = [api_name, *stack] + self.result.add_detail(SupportedScopes.TIMELINE_FUSION_OPS, detail=detail) + + def make_render(self): + format_result_for_html = format_timeline_result(dict(self.matched_op_stacks), dump_html=True) + + self.html_render.render_template(key="schedule", + template_dir="templates", + template_name="affinity_api.html", + cann_version=self.cann_version, + torch_version=self.torch_version, + empty_stacks=self.empty_stacks, + with_stack_doc_url=const.TIMELINE_WITH_STACK_DOC_URL, + api_doc_url=const.TIMELINE_API_DOC_URL, + result=format_result_for_html) + + def query_stack(self, event_dataset): + if all([len(matched_index) == 0 for matched_index in self._matched_op_index.values()]): + return + + op_stack_list = event_dataset.parse_data_with_generator(self._query_stack_by_matched_index) + for op_stack in op_stack_list: + for op_rule, stack in op_stack.items(): + if op_rule not in self.matched_op_stacks: + self.matched_op_stacks[op_rule] = {} + if stack == const.TIMELINE_FUSION_OPS_NO_STACK_FLAG: + continue + if stack not in self.matched_op_stacks[op_rule]: + self.matched_op_stacks[op_rule][stack] = 0 + self.matched_op_stacks[op_rule][stack] += 1 + + def _query_stack_by_matched_index(self, index, event): + stack_record = {} + event = TimelineEvent(event) + + matched_op_rules = [] + for op_rule, matched_index in self._matched_op_index.items(): + if index not in matched_index: + continue + + matched_op_rules.append(op_rule) + stack = event.args.get(const.CALL_STACKS) + + if not stack: + logger.debug("Got empty '%s' for event %s", const.CALL_STACKS, event) + continue + + if self.empty_stacks and stack: + self.empty_stacks = False + + stack_record[op_rule] = stack + + if matched_op_rules and not stack_record: + for op_rule in matched_op_rules: + stack_record[op_rule] = const.TIMELINE_FUSION_OPS_NO_STACK_FLAG + + return stack_record + + def _replace_op_name_prefix(self, event_name, mode): + if mode == const.DEQUEUE.lower(): + op_name_prefix = f"{const.DEQUEUE}{const.DEQUEUE_SEP}" + elif mode == const.ATEN: + op_name_prefix = f"{const.ATEN}{const.ATEN_SEP}" + else: + op_name_prefix = f"{const.OPTIMIZER}.{const.OPTIMIZER_STEP}{const.OPTIMIZER_SEP}" + + return event_name.replace(op_name_prefix, "") + + def _format_rule_to_pattern(self, op_rule): + """ + Args: + op_rule: like (mul){0,1}-(add|neg){0,2}-dropout-(softmax)* + + Returns: op_pattern like (-mul-){0,1}(-add-|-neg-){0,2}(-dropout-)(-softmax-)* + """ + enable_regex = False + if "(" not in op_rule and ")" not in op_rule: + # op_rule which requires fuzzy matching mush consist of "()" + return op_rule, enable_regex + + enable_regex = True + op_pattern_list = op_rule.split(const.OP_SEP) + format_op_pattern = "" + for op_pattern in op_pattern_list: + matched_res = re.search(r'\((.*?)\)', op_pattern) + + ops_index_range = (matched_res.start() + 1, matched_res.end() - 1) if matched_res else ( + 0, len(op_pattern)) + + op_names = op_pattern[ops_index_range[0]: ops_index_range[1]] + tmp_op_names_record = [] + for op_name in op_names.split("|"): + tmp_op_names_record.append(f"{const.OP_SEP}{op_name.strip(' ')}{const.OP_SEP}") + op_suffix = op_pattern[ops_index_range[1] + 1:] + op_names_format = f"({'|'.join(tmp_op_names_record)}){op_suffix}" + + format_op_pattern += op_names_format + return format_op_pattern, enable_regex diff --git a/profiler/advisor/analyzer/schedule/fusion_ops/timeline_api_stack_checker.py b/profiler/advisor/analyzer/schedule/fusion_ops/timeline_api_stack_checker.py new file mode 100644 index 0000000000000000000000000000000000000000..f684a4892111f113f6c502a010c9e14ccd43768a --- /dev/null +++ b/profiler/advisor/analyzer/schedule/fusion_ops/timeline_api_stack_checker.py @@ -0,0 +1,163 @@ +import logging +from typing import List + +from profiler.advisor.common import constant as const +from profiler.advisor.common.timeline.event import TimelineEvent +from profiler.advisor.dataset.timeline_event_dataset import TimelineEventDataset +from profiler.advisor.result.result import OptimizeResult +from profiler.advisor.result.item import OptimizeItem, OptimizeRecord +from profiler.advisor.utils.utils import get_analyze_processes, ParallelJob + +logger = logging.getLogger() + + +class OpStackFinder: + + def __init__(self): + self.n_processes = get_analyze_processes() + self._stack_record = [] + self._task_id_record = {} + self.op_name = None + self.task_type = None + self.matched_index = set() + + def get_api_stack_by_op(self, event_dataset: TimelineEventDataset, op_name: List[str] = None, task_type: str = None, + disable_multiprocess=False): + """ + :Param event_dataset: dataset of timeline event + :Param op_name: operator name, e.g. IndexPutV2 + :Param task_type: operator task type, optionals are AI_CPU and AI_CORE + :Param disable_multiprocess: disable multiprocessing, avoid cost time of enable new process for light task + """ + if not op_name: + op_name = [] + if not isinstance(op_name, list): + op_name = [op_name] + + self.op_name = ",".join(op_name) + self.task_type = task_type + op_name_list = event_dataset.task_op_names if not op_name else op_name + + if self.n_processes <= 1 or disable_multiprocess: + self._query_stacks_multiprocess(event_dataset, op_name_list, task_type) + else: + event_num_per_process = int(len(op_name_list) / self.n_processes) + 1 + parallel_analyzer = ParallelJob( + self._query_stacks_multiprocess, + [[event_dataset, op_name_list[i:i + event_num_per_process], task_type] + for i in range(0, len(op_name_list), event_num_per_process)], + job_name="Analyzing operator stacks from timeline" + ) + parallel_analyzer.start(self.n_processes) + self.query_stack(event_dataset) + + def make_record(self, result: OptimizeResult): + """ + make record for what and how to optimize + """ + if not self._stack_record: + return + + desc = f"Found {len(self._stack_record)} called stacks for" + if self.op_name and self.task_type: + desc += f" operators with name '{self.op_name}' with task type '{self.task_type}'" + elif self.op_name and not self.task_type: + desc += f" operators with name '{self.op_name}'" + elif self.task_type and not self.op_name: + desc += f" operators with task type '{self.task_type}'" + else: + desc += " all operators" + + suggestion = f"Please use command 'ma-advisor analyze profiling' to analyze operators" + optimization_item = OptimizeItem( + "Operator stacks", + desc, + [suggestion] + ) + result.add(OptimizeRecord(optimization_item)) + + record_title = ["Task ID", "op name", "op type", "code stacks"] + result.add_detail('operator stacks', headers=record_title) + + for op_info in self._stack_record: + result.add_detail('operator stacks', detail=op_info) + + def _get_api_stack_by_op(self, event_dataset: TimelineEventDataset, op_name: str, task_type: str): + for _, src_op_event in event_dataset.ops_with_task_type.items(): + + op_task_type = src_op_event.get(const.TASK_TYPE) + if not (src_op_event.name == op_name and op_task_type and op_task_type == task_type): + continue + + torch_to_npu_key = f"s-{src_op_event.tid}-{src_op_event.ts}" + torch_to_npu_event = event_dataset.torch_to_npu.get(torch_to_npu_key) or event_dataset.torch_to_npu.get( + f"s-{src_op_event.ts}") or event_dataset.torch_to_npu.get(f"s-{src_op_event.ts.replace('.', '')}") + + acl_to_npu_event = src_op_event.ts in event_dataset.acl_to_npu + + if not torch_to_npu_event and not acl_to_npu_event: + continue + + # query stack by torch_to_npu first, due to each operator had acl_to_npu incoming flow in cann6.3 + if torch_to_npu_event: + dst_op_index = self._query_index_by_torch_to_npu(event_dataset, torch_to_npu_event) + else: + dst_op_index = self._query_index_by_acl_to_npu(acl_to_npu_event) + + if not dst_op_index: + continue + + task_id = src_op_event.task_id + if not task_id: + continue + self.matched_index.add(dst_op_index) + if dst_op_index not in self._task_id_record: + self._task_id_record[dst_op_index] = [] + self._task_id_record[dst_op_index].append([task_id, op_name, task_type]) + + def _query_index_by_torch_to_npu(self, event_dataset, torch_to_npu_event): + dst_op_event_key = torch_to_npu_event.ts + dst_op_event = event_dataset.ops_with_stack.get(dst_op_event_key) + + if not dst_op_event: + return const.TIMELINE_BACKWARD_NO_STACK_CODE + + return dst_op_event.get("dataset_index") + + def _query_index_by_acl_to_npu(self, acl_to_npu_event): + if acl_to_npu_event: + return const.TIMELINE_ACL_TO_NPU_NO_STACK_CODE + + def _query_stacks_multiprocess(self, event_dataset, op_name_list, task_type): + + for op_name in op_name_list: + if task_type is not None: + self._get_api_stack_by_op(event_dataset, op_name, task_type) + else: + self._get_api_stack_by_op(event_dataset, op_name, const.AI_CORE) + self._get_api_stack_by_op(event_dataset, op_name, const.AI_CPU) + + def _format_stack_record(self): + stack_list = [] + for task_id, stack_info in self._task_id_record.items(): + stack_list.append([task_id, *stack_info]) + return stack_list + + def _query_stack_by_matched_index(self, index, event): + if index not in self.matched_index: + return None + event = TimelineEvent(event) + stack = event.args.get(const.CALL_STACKS) + stack = stack if stack else const.NO_STACK_REASON_MAP.get(const.TIMELINE_BACKWARD_NO_STACK_CODE) + for matched_op_info in self._task_id_record.get(index, []): + self._stack_record.append([*matched_op_info, stack]) + + for matched_op_info in self._task_id_record.get(const.TIMELINE_ACL_TO_NPU_NO_STACK_CODE, []): + self._stack_record.append([*matched_op_info, + const.NO_STACK_REASON_MAP.get(const.TIMELINE_ACL_TO_NPU_NO_STACK_CODE)]) + return None + + def query_stack(self, event_dataset: TimelineEventDataset): + if not event_dataset.dataset_len: + return + _ = event_dataset.parse_data_with_generator(self._query_stack_by_matched_index) diff --git a/profiler/advisor/cluster_perf_analysis.ipynb b/profiler/advisor/cluster_perf_analysis.ipynb deleted file mode 100644 index 39e389dd3a59c37564e79d10d31413c7acd4464b..0000000000000000000000000000000000000000 --- a/profiler/advisor/cluster_perf_analysis.ipynb +++ /dev/null @@ -1,625 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 4, - "id": "initial_id", - "metadata": { - "ExecuteTime": { - "end_time": "2023-11-21T13:31:25.022339600Z", - "start_time": "2023-11-21T13:31:25.016155200Z" - } - }, - "outputs": [], - "source": [ - "from advisor_backend.interface import Interface\n", - "import matplotlib.pyplot as plt\n", - "import numpy as np" - ] - }, - { - "cell_type": "markdown", - "id": "57d17a21205c3c5e", - "metadata": { - "collapsed": false - }, - "source": [ - "# 集群调优分析\n", - "## 1. 集群分析的数据准备\n", - "首先我们当前支持PyTorch多卡大模型的集群分析,您需要输入集群分析的profiling_path路径,例如: \n", - "--{profiling_path} \n", - " -- xxxx_ascend_pt \n", - " -- xxxx_ascend_pt \n", - " -- xxxx_ascend_pt \n", - " ...... \n", - " -- xxxx_ascend_pt \n", - "里面每张卡的profiling文件都是ascend_pt结尾的文件。 \n", - "\n", - "## 2. 集群分析解决的问题 \n", - "当前的功能主要有四项: \n", - "1). 识别多卡间的计算慢卡(根据计算时间等推断) \n", - "2). 识别多卡间的通信慢现象(根据通信链路的带宽判断) \n", - "3). 对多卡间的计算算子进行统计展示(识别不同卡的算子差异) \n", - "4). 展示集群流水并行图(根据时间轴展示多卡间的计算和通信时间) " - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "36b7a24cc7ca5da2", - "metadata": { - "ExecuteTime": { - "end_time": "2023-11-21T12:53:38.379699800Z", - "start_time": "2023-11-21T12:53:38.363755900Z" - }, - "collapsed": false - }, - "outputs": [], - "source": [ - "# EDIT THE PROFILING DATA PATH\n", - "cluster_path = \"YOUR PATH\"\n", - "interface = Interface(cluster_path)" - ] - }, - { - "cell_type": "markdown", - "id": "cf832ac2e0dfa30f", - "metadata": { - "collapsed": false - }, - "source": [ - "## 1) 识别慢卡" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "40aac93278dd6e34", - "metadata": { - "ExecuteTime": { - "end_time": "2023-11-21T12:53:41.815599700Z", - "start_time": "2023-11-21T12:53:41.783393700Z" - }, - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[INFO]Cluster has been analyzed because of the existence of cluster analysis output directory.\n", - "[INFO]Skip Cluster analyze backend.\n" - ] - } - ], - "source": [ - "dataset = interface.get_data('cluster', 'slow rank')\n" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "cd3fceda-49f0-439f-9c54-cc31490fc99e", - "metadata": {}, - "outputs": [], - "source": [ - "# EDIT THE DATA TO SHOW WHAT YOU WANT\n", - "data = dataset.get('data')\n", - "words = dataset.get('bottleneck')\n", - "rank_ids = list(data.keys())\n", - "# 柱状图显示属性\n", - "compute_time = [data.get(key, {})[0] for key in rank_ids]\n", - "communication_time = [data.get(key, {})[1] for key in rank_ids]\n", - "free_time = [data.get(key, {})[2] for key in rank_ids]\n", - "# 柱宽\n", - "width = 0.2\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "6a1d82fb-a31b-49ab-a859-6d4bb898c512", - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Communication has some issues in the cluster, because the max difference of Communication time has reached 88.476ms. \n", - "Free has some issues in the cluster, because the max difference of Free time has reached 29.224ms. \n" - ] - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# 设置展示图大小\n", - "fig, ax = plt.subplots(figsize=(10,8))\n", - "\n", - "x = np.arange(len(rank_ids)) # the label locations\n", - "\n", - "rects1 = ax.bar(x - width, compute_time, width, label='Computing')\n", - "rects2 = ax.bar(x, communication_time, width, label='Communication')\n", - "rects3 = ax.bar(x + width, free_time, width, label='Free')\n", - "\n", - "\n", - "# Add some text for labels, title and custom x-axis tick labels, etc.\n", - "ax.set_ylabel('Time(us)')\n", - "ax.set_xlabel('Rank ID')\n", - "ax.set_title('Step Time')\n", - "ax.set_xticks(x)\n", - "ax.set_xticklabels(rank_ids)\n", - "ax.legend()\n", - "print(words)" - ] - }, - { - "cell_type": "markdown", - "id": "3511befaff513e8e", - "metadata": { - "collapsed": false - }, - "source": [ - "## 2)识别通信链路慢" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "2a1e617d2a117125", - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[INFO]Cluster has been analyzed because of the existence of cluster analysis output directory.\n", - "[INFO]Skip Cluster analyze backend.\n" - ] - } - ], - "source": [ - "dataset = interface.get_data('cluster', 'slow link')" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "c8bca314-a8da-4a5b-985a-c36f00154552", - "metadata": {}, - "outputs": [], - "source": [ - "# EDIT THE DATA TO SHOW WHAT YOU WANT\n", - "data = dataset.get('data')\n", - "words = dataset.get('bottleneck')\n", - "rank_ids = list(data.keys())\n", - "# 柱状图显示属性\n", - "sdma_bw = [data.get(key, {}).get(\"SDMA bandwidth(GB/s)\") for key in rank_ids]\n", - "rdma_bw = [data.get(key, {}).get(\"RDMA bandwidth(GB/s)\") for key in rank_ids]\n", - "# 柱宽\n", - "width = 0.4" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "99ef04c9-ec07-4790-bbb6-0de9bf6c99d0", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "RDMA bandwidth(GB/s): \n", - "The average is 0.041, while the maximum is 0.041GB/s and the minimum is 0.041GB/s. the difference is 0.0GB/s. \n", - "SDMA bandwidth(GB/s): \n", - "The average is 0.054, while the maximum is 0.056GB/s and the minimum is 0.052GB/s. the difference is 0.003GB/s. \n" - ] - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# 设置展示图大小\n", - "fig, ax = plt.subplots(figsize=(10,8))\n", - "\n", - "x = np.arange(len(rank_ids)) # the label locations\n", - "\n", - "rects1 = ax.bar(x - width/2, sdma_bw, width, label='SDMA')\n", - "rects2 = ax.bar(x + width/2, rdma_bw, width, label='RDMA')\n", - "\n", - "# Add some text for labels, title and custom x-axis tick labels, etc.\n", - "ax.set_ylabel('Bandwidth(GB/s)')\n", - "ax.set_xlabel('Rank ID')\n", - "ax.set_title('Transport Bandwidth')\n", - "ax.set_xticks(x)\n", - "ax.set_xticklabels(rank_ids)\n", - "ax.legend()\n", - "print(words)" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "77d6efa1-48e3-409f-82c4-3e2b3d868898", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "RDMA bandwidth(GB/s): \n", - "The average is 0.041, while the maximum is 0.041GB/s and the minimum is 0.041GB/s. the difference is 0.0GB/s. \n", - "SDMA bandwidth(GB/s): \n", - "The average is 0.054, while the maximum is 0.056GB/s and the minimum is 0.052GB/s. the difference is 0.003GB/s. \n" - ] - } - ], - "source": [ - "print(dataset.get('bottleneck'))" - ] - }, - { - "cell_type": "markdown", - "id": "ce27a1d3-1354-45f7-88d8-dcb8e438b2b2", - "metadata": {}, - "source": [ - "## 3) 分布式卡上的kernel算子统计展示" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "e05774e9-c47e-400f-8421-b4b71bcdcbc4", - "metadata": {}, - "outputs": [], - "source": [ - "dataset = interface.get_data('cluster', 'kernel')" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "e95b6849-1738-4975-929f-734edff5d1c1", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
rank idNameInput ShapesInput Data TypesOutput ShapesDuration(us)_meanDuration(us)_varDuration(us)_maxDuration(us)_minDuration(us)_countDuration(us)_sum
00Add\"1024,2,5120;1024,2,5120\"DT_BF16;DT_BF16\"1024,2,5120\"45.01205082.95274855.925535.310816720.1928
10Add\"2,8192,5120;2,8192,5120\"DT_BF16;DT_BF16\"2,8192,5120\"447.183700NaN447.1837447.18371447.1837
20Add\"8192,2,1920;1920\"DT_BF16;DT_BF16\"8192,2,1920\"54.3308501.34284655.245652.64634217.3234
30Add\"8192,2,2560;2560\"DT_BF16;DT_BF16\"8192,2,2560\"75.4853750.76131576.280274.24074301.9415
40Add\";\"FLOAT;FLOAT\"\"1.2008840.0172571.49960.95975060.0442
....................................
144115atomic_memset-1_67_1998432_1_0\"\"UNDEFINED\"\"3.160000NaN3.16003.160013.1600
144215trans_Cast_14\"1\"FLOAT\"1\"1.3900000.0230671.60001.260045.5600
144315trans_Cast_15\"\"INT32\"\"64.44500036.27610070.300059.20004257.7800
144415trans_Cast_4\"1\"FLOAT\"1\"1.5550000.0358571.94001.3200812.4400
144515trans_Cast_5\"\"INT32\"\"62.89500015.58420069.860056.76008503.1600
\n", - "

1446 rows × 11 columns

\n", - "
" - ], - "text/plain": [ - " rank id Name Input Shapes \\\n", - "0 0 Add \"1024,2,5120;1024,2,5120\" \n", - "1 0 Add \"2,8192,5120;2,8192,5120\" \n", - "2 0 Add \"8192,2,1920;1920\" \n", - "3 0 Add \"8192,2,2560;2560\" \n", - "4 0 Add \";\" \n", - "... ... ... ... \n", - "1441 15 atomic_memset-1_67_1998432_1_0 \"\" \n", - "1442 15 trans_Cast_14 \"1\" \n", - "1443 15 trans_Cast_15 \"\" \n", - "1444 15 trans_Cast_4 \"1\" \n", - "1445 15 trans_Cast_5 \"\" \n", - "\n", - " Input Data Types Output Shapes Duration(us)_mean Duration(us)_var \\\n", - "0 DT_BF16;DT_BF16 \"1024,2,5120\" 45.012050 82.952748 \n", - "1 DT_BF16;DT_BF16 \"2,8192,5120\" 447.183700 NaN \n", - "2 DT_BF16;DT_BF16 \"8192,2,1920\" 54.330850 1.342846 \n", - "3 DT_BF16;DT_BF16 \"8192,2,2560\" 75.485375 0.761315 \n", - "4 FLOAT;FLOAT \"\" 1.200884 0.017257 \n", - "... ... ... ... ... \n", - "1441 UNDEFINED \"\" 3.160000 NaN \n", - "1442 FLOAT \"1\" 1.390000 0.023067 \n", - "1443 INT32 \"\" 64.445000 36.276100 \n", - "1444 FLOAT \"1\" 1.555000 0.035857 \n", - "1445 INT32 \"\" 62.895000 15.584200 \n", - "\n", - " Duration(us)_max Duration(us)_min Duration(us)_count Duration(us)_sum \n", - "0 55.9255 35.3108 16 720.1928 \n", - "1 447.1837 447.1837 1 447.1837 \n", - "2 55.2456 52.6463 4 217.3234 \n", - "3 76.2802 74.2407 4 301.9415 \n", - "4 1.4996 0.9597 50 60.0442 \n", - "... ... ... ... ... \n", - "1441 3.1600 3.1600 1 3.1600 \n", - "1442 1.6000 1.2600 4 5.5600 \n", - "1443 70.3000 59.2000 4 257.7800 \n", - "1444 1.9400 1.3200 8 12.4400 \n", - "1445 69.8600 56.7600 8 503.1600 \n", - "\n", - "[1446 rows x 11 columns]" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "dataset" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "27b75df4-792b-43dc-aa5c-d3c265642c1e", - "metadata": {}, - "outputs": [], - "source": [ - "# 保存到csv查看, 可修改保存路径\n", - "dataset.to_csv('cluster_kernel_details.csv', index=False, sep='\\t')" - ] - }, - { - "cell_type": "markdown", - "source": [ - "## 4) 展示集群流水并行图\n", - "使用说明: \n", - "1). 需要使用Ascend Torch Profiler采集数据,如果需要展示FP和BP需要将activities设置为采集CPU和NPU \n", - "2). rank_ids为要展示的rank id列表,必选参数, 可视化顺序与rank_ids的顺序一致 \n", - "3). worker_num为多进程数量,可选参数,请根据机器配置调整,默认值为机器可用核心数的一半 \n", - "4). 如果没有采集CPU数据,则展示Stage和Bubble的流水图 \n", - "5). 生成的json文件可以在chrome trace中查看 \n", - "\n", - "示例图:\n", - "![pipeline_view](../../profiler/test/resource/pipeline_view.png)" - ], - "metadata": { - "collapsed": false - }, - "id": "ae45826394463cc4" - }, - { - "cell_type": "code", - "outputs": [], - "source": [ - "import json\n", - "\n", - "# rank_ids为要呈现的rank id列表,必选参数\n", - "# 可以使用列表推导式生成需要的rank_ids,最终展示顺序和rank_ids的顺序一致\n", - "# worker_num为多进程数量,可选参数,请根据机器配置调整,默认值为机器可用核心数的一半\n", - "dataset = interface.get_data(\"cluster\", \"pipeline\", rank_ids=[0, 1, 2, 3, 4, 5, 6, 7], worker_num=8)\n", - "\n", - "# 保存json数据,在chrome trace中查看\n", - "with open(\"./pipeline_view.json\", \"w\") as f:\n", - " json.dump(dataset.get(\"data\", []), f)" - ], - "metadata": { - "collapsed": false - }, - "id": "baf66781eccfbca1" - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.7" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/profiler/advisor/common/__init__.py b/profiler/advisor/common/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/profiler/advisor/common/analyzer_scopes.py b/profiler/advisor/common/analyzer_scopes.py new file mode 100644 index 0000000000000000000000000000000000000000..44f09d0a58b8ec5327846572af65f7161ee93e43 --- /dev/null +++ b/profiler/advisor/common/analyzer_scopes.py @@ -0,0 +1,10 @@ +class SupportedScopes: + + # used for specify fourth-level commands and define the key of the result dict + # the key defined bellow must be the same as value + TIMELINE_FUSION_OPS = "timeline_fusion_ops" + GRAPH = "graph" + SLOW_RANK = "slow_rank" + SLOW_LINK = "slow_link" + OVER_ALL = "over_all" + PROFILING_OPERATOR_ANALYSIS = "profiling_operator_analysis" diff --git a/profiler/advisor/common/constant.py b/profiler/advisor/common/constant.py new file mode 100644 index 0000000000000000000000000000000000000000..4c3fc42ff3a67462065498cd3ea569ef4c7b054c --- /dev/null +++ b/profiler/advisor/common/constant.py @@ -0,0 +1,140 @@ +# Copyright (c) 2023, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# timeline +DEQUEUE = "Dequeue" +DEQUEUE_SEP = "@" +ATEN = "aten" +NPU = "npu" +ATEN_SEP = "::" +OPTIMIZER = "Optimizer" +OPTIMIZER_SEP = "#" +OPTIMIZER_STEP = "step" +ENQUEUE = "enqueue" +TORCH_TO_NPU = "torch_to_npu" +OP_COMPILE_NAME = "AscendCL@aclopCompileAndExecute" +OP_COMPILE_ID = "aclopCompileAndExecute" +MAX_OP_COMPILE_NUM = 20 +ACL_TO_NPU = "acl_to_npu" +TASK_TYPE = "Task Type" +CPU_OP = "cpu_op" +AI_CORE = "AI_CORE" +AI_CPU = "AI_CPU" +CALL_STACKS = "Call stack" +INPUT_DIMS = "Input Dims" +OP_SEP = "-" +MA_ADVISOR_MAX_PROCESSES = 16 +MA_ADVISOR_ANALYZE_PROCESSES = "MA_ADVISOR_ANALYZE_PROCESSES" +TIMELINE_OP_STACKS_DATASET = "timeline_op_stacks_dataset" +TIMELINE_BACKWARD_NO_STACK = "Backward broadcast, without call stacks in profiling." +TIMELINE_ACL_TO_NPU_NO_STACK = "Incoming flow is 'acl_to_npu', without call stacks in profiling." +TIMELINE_BACKWARD_NO_STACK_CODE = -1 +TIMELINE_ACL_TO_NPU_NO_STACK_CODE = -2 +TIMELINE_FUSION_OPS_NO_STACK_FLAG = "NO STACK" +NO_STACK_REASON_MAP = { + TIMELINE_BACKWARD_NO_STACK_CODE: "Backward broadcast, without call stacks in profiling.", + TIMELINE_ACL_TO_NPU_NO_STACK_CODE: "Incoming flow is 'acl_to_npu', without call stacks in profiling." +} +TIMELINE_API_DOC_URL = "https://support.huaweicloud.com/bestpractice-modelarts/modelarts_10_2516.html" +AFFINITY_TRAINING_API = "Affinity training api" +TIMELINE_WITH_STACK_DOC_URL = "https://www.hiascend.com/document/detail/zh/canncommercial/" \ + "70RC1/modeldevpt/ptmigr/AImpug_0067.html" +PyTorch_AOE_OPERATOR_TUNE_URL = "https://www.hiascend.com/document/detail/zh/canncommercial/" \ + "70RC1/devtools/auxiliarydevtool/aoe_16_045.html" +MSLite_Infer_AOE_OPEATOR_TUNE_URL = "https://www.mindspore.cn/lite/docs/en/master/use/cloud_infer/converter_tool_ascend.html#aoe-auto-tuning" +ENABLE_COMPILED_TUNE_URL = "https://www.hiascend.com/document/detail/zh/canncommercial/" \ + "70RC1/modeldevpt/ptmigr/AImpug_0059.html" + +ASCEND_PROFILER_URL = "https://www.hiascend.com/document/detail/zh/canncommercial/70RC1/modeldevpt/ptmigr/AImpug_0067.html" +TIMELINE_EMPTY_STACKS_PROMPT = "These APIs have no code stack. If parameter 'with_stack=False' while profiling, " \ + "please refer to {timeline_profiling_doc_url} to set 'with_stack=True'. " \ + "Otherwise, ignore following affinity APIs due to backward broadcast lack of stack." + +CLUSTER_ANALYSIS = "Cluster analysis" +SLOW_RANK_TIME_RATIO_THRESHOLD = 0.05 + +# version_control +CANN_VERSION_C30 = '6.3.RC2' +CANN_VERSION_C13 = '7.0.RC1' +CANN_VERSION_C15 = '7.0.0' +CANN_VERSION_C17 = '8.0.0' +SUPPORTED_CANN_VERSION = [CANN_VERSION_C30, CANN_VERSION_C13, CANN_VERSION_C15, CANN_VERSION_C17] +DEFAULT_CANN_VERSION = CANN_VERSION_C17 +ASCEND_PYTORCH_PROFILER = "ascend_pytorch_profiler" +MSLITE = "mslite" +MSPROF = "msprof" +SUPPORTED_PROFILING_TYPE = [ASCEND_PYTORCH_PROFILER, MSLITE, MSPROF] +DEFAULT_PROFILING_TYPE = ASCEND_PYTORCH_PROFILER +TORCH_VERSION_1_11_0 = '1.11.0' +TORCH_VERSION_2_1_0 = '2.1.0' + +SUPPORTED_TORCH_VERSION = [TORCH_VERSION_1_11_0, TORCH_VERSION_2_1_0] +DEFAULT_TORCH_VERSION = TORCH_VERSION_2_1_0 + +TERMINAL_OUTPUT_HEADERS = ["No.", "Problem", "Description", "Suggestion"] +SKIP_ANALYZE_PROMPT = "Finish analysis, no optimization suggestions" +SKIP_QUERY_PROMPT = "Finish query operator stack, no operators" + +# operator output constant +OPERATOR_OUT_TOPK = 10 +OPERATOR_LIST_UNLIMIT = -1 + +DEFAULT_OPERATOR_TYPE = 'None_type' +DEFAULT_DURATION_ZERO = 0.0 + +ADVISOR_LOG_LEVEL = "ADVISOR_LOG_LEVEL" +DEFAULT_LOG_LEVEL = "INFO" +SUPPORTED_LOG_LEVEL = ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] + +RULE_BUCKET = "RULE-BUCKET" +CLOUD_RULE_REGION_CN_NORTH_9 = "cn-north-9" +CLOUD_RULE_REGION_CN_NORTH_7 = "cn-north-7" +CLOUD_RULE_REGION_CN_SOUTHWEST_2 = "cn-southwest-2" +CLOUD_RULE_REGION_LIST = [CLOUD_RULE_REGION_CN_NORTH_7, CLOUD_RULE_REGION_CN_NORTH_9, CLOUD_RULE_REGION_CN_SOUTHWEST_2] +INNER_REGION_LIST = [CLOUD_RULE_REGION_CN_NORTH_7] +DEFAULT_CLOUD_RULE_REGION = CLOUD_RULE_REGION_CN_SOUTHWEST_2 + +HTTP_PREFIXES = "http://" +HTTPS_PREFIXES = "https://" +COMMON_YAML_DIR = "modelarts/solution/ma_advisor_rules/" +COMMON_ENDPOINT_SUFFIX = "obs.{}.myhuaweicloud.com" +INNER_ENDPOINT_SUFFIX= "obs.{}.ulanqab.huawei.com" + +AICPU_RULES_YAML_NAME = "aicpu_rules.yaml" +FUSION_PASS_YAML_NAME = "op_fusion_pass.yaml" +TIMELINE_FUSION_OPS_YAML_NAME = "timeline_fusion_ops.yaml" +CLOUD_YAML_NAME_LIST = [AICPU_RULES_YAML_NAME, FUSION_PASS_YAML_NAME, TIMELINE_FUSION_OPS_YAML_NAME] + +MAX_RETRIES = 3 +TIMEOUT = 3 + +ADVISOR_RULE_PATH = "ADVISOR_RULE_PATH" +CLOUD_RULE_PATH = "rules/cloud/" +DEFAULT_RULE_PATH = "./rules/" + +TIMELINE_FUSION_OPS_INVALID_UNIQUE_ID = -1 + +DEFAULT_TEMPLATE_HEADER = "Performance Optimization Suggestions" + +PT_PROF_SUFFIX = "ascend_pt" +ASCEND_PROFILER_OUTPUT = "ASCEND_PROFILER_OUTPUT" +COLLECTION_PATH = "collection_path" +CLUSTER_ANALYSIS_OUTPUT = "cluster_analysis_output" +KERNEL_DETAILS_CSV = "kernel_details.csv" +CLUSTER_STEP_TIME_CSV = "cluster_step_trace_time.csv" +CLUSTER_COMM_JSON = "cluster_communication.json" + +BOTTLENECK = "bottleneck" +DATA = "data" \ No newline at end of file diff --git a/profiler/advisor/common/graph/__init__.py b/profiler/advisor/common/graph/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/profiler/advisor/common/graph/graph.py b/profiler/advisor/common/graph/graph.py new file mode 100644 index 0000000000000000000000000000000000000000..6bab2042de3a09f9317f71fc6a5c9740743cc790 --- /dev/null +++ b/profiler/advisor/common/graph/graph.py @@ -0,0 +1,135 @@ +import logging +from typing import Dict, List, Tuple, Callable, Any, Optional, Union + +import networkx as nx + +from profiler.advisor.common.graph.graph_parser import HostGraphNode, QueryGraphNode + +logger = logging.getLogger() + + +class Graph: + """ + Graph Struct + """ + + # pylint: disable=too-many-instance-attributes + def __init__(self, + nodes: Dict[str, Optional[Union[HostGraphNode, QueryGraphNode]]] = None, + edges: List[Tuple[Optional[Union[HostGraphNode, QueryGraphNode]], + Optional[Union[HostGraphNode, QueryGraphNode]]]] = None, + name: str = None): + self.name = name + self.graph = nx.DiGraph(name=name) + self.nodes = nodes if nodes is not None else {} + self.edges = edges if edges is not None else list() + + def build(self): + for op_name, node in self.nodes.items(): + # add node and mark op_name as tag + self.add_node(node, + op_type=node.op_type + ) + for edge in self.edges: + self.add_edge(*edge) + return self.graph + + def get_size(self) -> Dict[str, int]: + if not hasattr(self.graph, "nodes"): + return {"edges": 0, "nodes": 0} + + return {"edges": len(self.graph.edges), + "nodes": len(self.graph.nodes)} + + def add_node(self, node: HostGraphNode, **kwargs): + if node is None: + return + self.graph.add_node(node, **kwargs) + + def add_edge(self, pre_node: HostGraphNode, next_node: HostGraphNode): + if pre_node is None or next_node is None: + return + + if pre_node not in self.graph or \ + next_node not in self.graph: + logging.error("Nodes between edge should be both exists.") + return + + self.graph.add_edge(pre_node, next_node) + + def add_node_with_edge(self, node, adj_nodes: List[HostGraphNode]): + self.add_node(node) + for adj in adj_nodes: + self.add_edge(node, adj) + + def remove_node(self, node: HostGraphNode = None) -> None: + if node is None: + return + + self.graph.remove_node(node) + + def remove_edge(self, pre_node: HostGraphNode = None, next_node: HostGraphNode = None) -> None: + if pre_node is None or next_node is None: + raise ValueError(f"Invalid edge from {pre_node} to {pre_node}.") + + self.remove_edge(pre_node, next_node) + + def get_subgraph(self, nodes: List[HostGraphNode]) -> nx.DiGraph: + nodes = list(set(nodes)) + for node in nodes: + if not self.is_node_exists(node): + raise ValueError(f"Failed to subtract subgraph because {node.op_name} is not in the graph.") + + return self.graph.subgraph(nodes) + + def highlight_subgraph(self, subgraph: nx.DiGraph = None) -> None: + pass + + def get_node(self, node: HostGraphNode): + if node not in self.graph: + return + + return self.graph[node] + + def get_node_by_name(self, node_name: str): + return self.nodes.get(node_name, None) + + def is_node_exists(self, node: HostGraphNode): + return node in self.graph + + def draw(self, + graph: nx.DiGraph = None, + with_labels: bool = False, + labels: Dict[HostGraphNode, Any] = None, + pos_func: Callable = None, + font_weight: str = "bold", + savefig: bool = False, + node_size: int = 50, + **kwargs + ): + try: + import matplotlib.pylab as plt + except ImportError: + logger.error('Please install matplotlib first by using `pip install matplotlib`.') + return + + if graph is None: + graph = self.graph + + pos = pos_func(graph) if pos_func is not None else None + + if with_labels: + if labels is None: + labels = {k: f"{k}\n({v['op_name']})" for k, v in graph.nodes.items()} + + nx.draw(graph, + with_labels=with_labels, + pos=pos, + node_size=node_size, + font_weight=font_weight, + labels=labels, + **kwargs + ) + if savefig: + plt.savefig(self.name + ".png") + plt.show() diff --git a/profiler/advisor/common/graph/graph_match.py b/profiler/advisor/common/graph/graph_match.py new file mode 100644 index 0000000000000000000000000000000000000000..d0dfc162952b0c52bf9ed73cef2ff18ff5ffda24 --- /dev/null +++ b/profiler/advisor/common/graph/graph_match.py @@ -0,0 +1,355 @@ +import itertools +import logging +from functools import lru_cache +from collections import deque +from typing import Dict, Generator, List, Callable, Hashable, Tuple + +import networkx as nx + + +@lru_cache() +def match_node_attr_fun(query_node: Hashable, + host_node: Hashable, + query_graph: nx.Graph, + host_graph: nx.Graph + ) -> bool: + """ + Check query node matches the attributes in host graph + + :param query_node: Query graph node + :param host_node: Host graph node + :param query_graph: Query Graph + :param host_graph: Host graph + :return: bool, match or not + """ + # get node attr + if query_node not in query_graph.nodes or host_node not in host_graph.nodes: + return False + + query_node = query_graph.nodes[query_node] + host_node = host_graph.nodes[host_node] + for attr, val in query_node.items(): + if attr not in host_node: + return False + if isinstance(host_node[attr], str) and isinstance(val, str): + if host_node[attr].lower() != val.lower(): + return False + else: + if host_node[attr] != val: + return False + return True + + +@lru_cache() +def match_node_struct_fun(query_node: Hashable, + host_node: Hashable, + query_graph: nx.Graph, + host_graph: nx.Graph + ) -> bool: + """ + Check query node matches the structure in host graph + + :param query_node: Query graph node + :param host_node: Host graph node + :param query_graph: Query Graph + :param host_graph: Host graph + :return: bool, match or not + """ + if query_node not in query_graph.nodes or host_node not in host_graph.nodes: + return False + + return host_graph.degree(host_node) >= query_graph.degree(query_node) + + +@lru_cache() +def match_edge_attr_fun(query_edge: Tuple[Hashable, Hashable], + host_edge: Tuple[Hashable, Hashable], + query_graph: nx.Graph, + host_graph: nx.Graph + ) -> bool: + """ + Check query edge matches the attr in host graph + + :param query_edge: Query graph edge + :param host_edge: Host graph edge + :param query_graph: Query Graph + :param host_graph: Host graph + :return: bool, match or not + """ + # get edge attr + if query_edge not in query_graph.edges or host_edge not in host_graph.edges: + return False + + query_edge = query_graph.edges[query_edge] + host_edge = host_graph.edges[host_edge] + for attr, val in query_edge.items(): + if attr not in host_edge: + return False + if isinstance(host_edge[attr], str) and isinstance(val, str): + if host_edge[attr].lower() != val.lower(): + return False + else: + if host_edge[attr] != val: + return False + return True + + +def find_isomorphisms(query_graph: nx.Graph, + host_graph: nx.Graph, + *args, + _node_attr_fun: Callable = match_node_attr_fun, + _node_struct_fun: Callable = match_node_struct_fun, + _edge_attr_fun: Callable = match_edge_attr_fun, + limit: int = None, + **kwargs) -> List[Dict[Hashable, Hashable]]: + """ + Find all the sub graphs that are isomorphic to query_graph in host_graph . + + :param query_graph: The graph object to query + :param host_graph: The graph object to be queried + :param args: Position args + :param _node_attr_fun: The function to match node attr + :param _node_struct_fun: The function to match node structural + :param _edge_attr_fun: The function to match edge attr + :param limit: The limitation for the number of returned mappings + :param kwargs: Keyword args + :return: Matched node mapping list + ``` + [{query_id: host_id, ...}, ...] + ``` + """ + candidates = [] + for query_result in find_isomorphisms_iter( + query_graph, + host_graph, + *args, + _node_attr_fun=_node_attr_fun, + _node_struct_fun=_node_struct_fun, + _edge_attr_fun=_edge_attr_fun, + **kwargs + ): + candidates.append(query_result) + if limit and len(candidates) >= limit: + return candidates + return candidates + + +def find_isomorphisms_iter(query_graph: nx.Graph, + host_graph: nx.Graph, + directed: bool = None, + _node_attr_fun: Callable = None, + _node_struct_fun: Callable = None, + _edge_attr_fun: Callable = None, + ) -> Generator[Dict[Hashable, Hashable], None, None]: + """ + A generation to find one isomorphic subgraph in host_graph for query_graph. + + :param query_graph: The graph object to query + :param host_graph: The graph object to be queried + :param directed: Whether direction should be considered during search + :param _node_attr_fun: The function to match node attr + :param _node_struct_fun: The function to match node structural + :param _edge_attr_fun: The function to match edge attr + :return: Yield mappings from query node IDs to host graph IDs: {query_id: host_id, ...} + + """ + if directed is None: + # query graph and host graph should consider directions. + if isinstance(query_graph, nx.DiGraph) and \ + isinstance(host_graph, nx.DiGraph): + directed = True + else: + directed = False + + # Initialize queue + dq = deque() + dq.appendleft({}) + + while len(dq) > 0: + backbone = dq.pop() + next_candidate_backbones = get_next_candidates(backbone=backbone, + query_graph=query_graph, + host_graph=host_graph, + directed=directed, + _node_attr_fun=_node_attr_fun, + _node_struct_fun=_node_struct_fun, + _edge_attr_fun=_edge_attr_fun, + ) + for candidate in next_candidate_backbones: + # find a legal isomorphism + if len(candidate) == len(query_graph): + yield candidate + else: + # continue to search + dq.appendleft(candidate) + + +def get_next_candidates( + backbone: Dict, + query_graph: nx.Graph, # noqa + host_graph: nx.Graph, # noqa + next_node: Hashable = None, + directed: bool = True, # noqa + _node_attr_fun: Callable = None, # noqa + _node_struct_fun: Callable = None, # noqa + _edge_attr_fun: Callable = None # noqa +) -> List[Dict[Hashable, Hashable]]: + """ + Get a list of candidate node assignments for the next "step" of this map. + + :param backbone: Mapping of query node IDs to one set of host graph IDs + :param next_node: Optional suggestion for the next node to assign + :return: List[Dict[Hashable, Hashable]]: A new list of node mappings with one additional element mapped + """ + node_priority = {n: 1 for n in query_graph.nodes} + candidate_nodes = [] + + if next_node is None and len(backbone) == 0: + # Start case + next_node = max(node_priority.keys(), + key=lambda x: node_priority.get(x, 0)) + + for node in host_graph.nodes: + if _node_attr_fun(next_node, node, query_graph, host_graph) and \ + _node_struct_fun(next_node, node, query_graph, host_graph): + candidate_nodes.append({next_node: node}) + return candidate_nodes + + nodes_with_maximum_backbone = [] + for query_node_id in query_graph.nodes: + if query_node_id in backbone: + continue + + backbone_neighbors = [] + if not directed: + backbone_neighbors = query_graph.adj[query_node_id] + else: + # nx.DiGraph.pred: A <- B: find previous node from B to A + # nx.DiGraph.adj: A -> B : find next node from A to B + backbone_neighbors = list(set(query_graph.adj[query_node_id]).union(set(query_graph.pred[query_node_id]))) + + query_backbone_node_count = sum([1 for _node in backbone_neighbors if _node in backbone]) + if query_backbone_node_count > 0: + # Find a longer backbone node + nodes_with_maximum_backbone.append(query_node_id) + + # next_node is connected to the current backbone. + next_node = max(nodes_with_maximum_backbone, key=lambda x: node_priority.get(x, 0)) + + # verify all edges between `next_node` and nodes in the backbone are exist in host graph + # Step1: find all edges between `next_node` and nodes in the backbone + next_edge_edges = [] + for _node in query_graph.adj[next_node]: + if _node in backbone: + # `next_node` -> `_node` + next_edge_edges.append((None, next_node, _node)) + + if directed: + for _node in query_graph.pred[next_node]: + if _node in backbone: + # `_node` -> `next_node` + next_edge_edges.append((_node, next_node, None)) + + if len(next_edge_edges) == 0: + logging.warning("Find node without any edge, which is invalid.") + return [] + # Step2: verify candidate nodes that have such edges in the host graph + candidate_nodes = [] + if len(next_edge_edges) == 1: + source, _, target = next_edge_edges[0] + if not directed: + candidate_nodes = list(host_graph.adj[backbone[target]]) + else: + if source is not None: + # means `source` is a `from` edge + candidate_nodes = list(host_graph.adj[backbone[source]]) + elif target is not None: + # means `target` is a `from` edge + candidate_nodes = list(host_graph.pred[backbone[target]]) + + elif len(next_edge_edges) > 1: + candidate_nodes_set = set() + for (source, _, target) in candidate_nodes: + if not directed: + candidate_nodes_from_this_edge = host_graph.adj[backbone[target]] + else: + if source is not None: + candidate_nodes_from_this_edge = host_graph.adj[backbone[source]] + else: # target is not None: + candidate_nodes_from_this_edge = host_graph.pred[backbone[target]] + + if len(candidate_nodes_set) > 0: + candidate_nodes_set = candidate_nodes_set.intersection(candidate_nodes_from_this_edge) + else: + # Initialize candidate_nodes_set + candidate_nodes_set.update(candidate_nodes_from_this_edge) + candidate_nodes = list(candidate_nodes_set) + + tentative_results = [] + for _node in candidate_nodes: + if all([_node not in backbone.values(), + _node_attr_fun(next_node, _node, query_graph, host_graph), + _node_struct_fun(next_node, _node, query_graph, host_graph)] + ): + tentative_results.append({**backbone, + next_node: _node}) + + final_candidates = check_edges_mapping(tentative_results, + query_graph=query_graph, + host_graph=host_graph, + _edge_attr_fun=_edge_attr_fun) + return final_candidates + + +def check_edges_mapping(candidates: List[Dict[Hashable, Hashable]], + query_graph: nx.Graph, + host_graph: nx.Graph, + _edge_attr_fun: Callable = None + ) -> List[Dict[Hashable, Hashable]]: + """ + Check that all edges between the assigned nodes exist in the host graph. + + :param candidates: mapping nodes candidates + :param query_graph: The graph object to query + :param host_graph: The graph object to be queried + :param _edge_attr_fun: The function to match edge attr + :return: + """ + monomorphism_candidates = [] + + for candidate in candidates: + if len(candidate) != len(query_graph): + monomorphism_candidates.append(candidate) + continue + + all_pass_flag = True + for edge_start, edge_end in query_graph.edges: + # check edge in host graph + if not host_graph.has_edge(candidate[edge_start], candidate[edge_end]): + all_pass_flag = False + break + + # check edge attr + if _edge_attr_fun is None or not _edge_attr_fun( + (edge_start, edge_end), + (candidate[edge_start], candidate[edge_end]), + query_graph, + host_graph + ): + all_pass_flag = False + break + + if all_pass_flag: + monomorphism_candidates.append(candidate) + + # Isomorphisms check + final_candidates = [] + for candidate in monomorphism_candidates: + all_product = itertools.product(candidate.keys(), candidate.keys()) + for edge_start, edge_end in all_product: + if not query_graph.has_edge(edge_start, edge_end) and \ + host_graph.has_edge(candidate[edge_start], candidate[edge_end]): + break + else: + final_candidates.append(candidate) + return final_candidates diff --git a/profiler/advisor/common/graph/graph_parser.py b/profiler/advisor/common/graph/graph_parser.py new file mode 100644 index 0000000000000000000000000000000000000000..d4c67fc1918af37a837e016bd9e5b813957b1aef --- /dev/null +++ b/profiler/advisor/common/graph/graph_parser.py @@ -0,0 +1,413 @@ +import os +import logging +import yaml +import itertools +from collections import deque +from dataclasses import dataclass +from typing import List, Tuple, Dict + +logger = logging.getLogger() + + +@dataclass +class Tensor: + def __init__(self): + super().__init__() + self.shape = [] + self.origin_shape = [] + self.shape_range = [] + self.origin_shape_range = [] + self.dtype = "" + self.origin_data_type = "" + self.format = "" + self.origin_format = [] + + +@dataclass +class Attr: + + def __init__(self): + super().__init__() + self.key = str() + self.value = [] + + +class HostGraphNode: + def __init__(self): + super().__init__() + self.graph_name = str() + self.op_name = str() + self.op_type = str() + self.inputs = [] + self.input = [] + self.outputs = [] + self.output = [] + self.strides = [] + self.pads = [] + self.groups = "" + self.dilations = [] + self.kernelname = "" + self._attrs = [] + + def __repr__(self): + return f"" + + +@dataclass +class HostGraph: + def __init__(self): + super().__init__() + self.name = "" + self.nodes = {} + self.inputs = [] + self.edges = [] + self.model_name = None + self.file_path = None + + def build(self): + """build a graph""" + for name, node in self.nodes.items(): + for input_node in node.inputs: + if input_node not in self.nodes: + continue + self.nodes[input_node].outputs.append(name) + + +class HostGraphParser: + """ + Parse graph metadata from text file + """ + def __init__(self, file_path): + self.buffer = deque(maxlen=100) + self.line_no = 0 + self._file_path = file_path + self.edges: List[Tuple[HostGraphNode, HostGraphNode]] = [] + self.nodes: Dict[str, HostGraphNode] = {} + self.graphs = self._parse(self._file_path) + self._get_node_dict() + self._get_edges_list() + del self.graphs[0] + + @staticmethod + def _get_key_value( line): + res = line.split(':', 1) + return res[0].strip(), res[1].strip().strip('"') + + @staticmethod + def _parse_attr(key, value, obj): + if not isinstance(obj, list) and not obj: + return + if key == "dim" and hasattr(obj, "shape"): + obj.shape.append(value) + elif key == "name" and hasattr(obj, "op_name"): + obj.op_name = value + elif key == "name" and hasattr(obj, "name"): + obj.name = value + elif key == "dtype" and hasattr(obj, "dtype"): + obj.dtype = value + elif key == "layout" and hasattr(obj, "format"): + obj.format = value + elif key == "type" and hasattr(obj, "op_type"): + obj.op_type = value + elif key == "input" and hasattr(obj, "input"): + obj.inputs.append(value.strip('"').split(':')[0]) + elif key == "key" and hasattr(obj, "key"): + obj.key = value + elif hasattr(obj, key): + setattr(obj, key, value) + elif isinstance(obj, list) and key != "val_type": + obj.append(value) + + def _parse_struct(self, in_file, key, in_obj): + + def parse_shape(file, obj): + obj = self._parse_line(file, obj) + + def parse_input_desc(file, obj): + tensor = self._parse_line(file, Tensor()) + if obj and hasattr(obj, "input"): + obj.input.append(tensor) + + def parse_out_desc(file, obj): + tensor = self._parse_line(file, Tensor()) + if obj and hasattr(obj, "output"): + obj.output.append(tensor) + + def parse_op(file, obj: HostGraph): + node = self._parse_line(file, HostGraphNode()) + if hasattr(obj, "name"): + node.graph_name = obj.name + if obj and hasattr(obj, "nodes") and node.op_name: + obj.nodes[node.op_name] = node + + def parse_graph(file, obj): + graph = self._parse_line(file, HostGraph()) + obj.append(graph) + + def parse_attr(file, obj): + attr = self._parse_line(file, Attr()) + if hasattr(obj, attr.key): + if attr.key not in ['format']: + setattr(obj, attr.key, attr.value) + elif attr.key.endswith("_kernelname"): + setattr(obj, "kernelname", attr.value) + if obj and hasattr(obj, "get_attrs"): + obj.get_attrs().append(attr) + + def parse_list(file, obj): + value = [] + self._parse_line(file, value) + if isinstance(obj, list): + obj.append(value) + else: + obj = value + + def parse_value(file, obj): + if hasattr(obj, "value"): + obj.value = self._parse_line(file, obj.value) + + def parse_default(file, _obj=None): + """function with unused argument""" + self._parse_line(file, None) + + parse_methods = { + "shape": parse_shape, + "input_desc": parse_input_desc, + "output_desc": parse_out_desc, + "op": parse_op, + "graph": parse_graph, + "attr": parse_attr, + "list_list_int": parse_list, + "list_list_i": parse_list, + "list": parse_list, + "value": parse_value, + } + parse_methods.get(key, parse_default)(in_file, in_obj) + + def _read_line(self, file): + self.line_no += 1 + line = file.readline() + if line.strip().endswith('}'): + end_line = "" + while self.buffer and not end_line.strip().endswith("{"): + end_line = self.buffer.pop() + else: + self.buffer.append(line) + return line.strip() + + def _parse_line(self, file, obj=None): + line = self._read_line(file) + try: + while line and not line.endswith("}"): + if line.endswith('{'): + key = line.rstrip('{').strip() + self._parse_struct(file, key, obj) + else: + key, value = self._get_key_value(line) + self._parse_attr(key, value, obj) + line = self._read_line(file) + except Exception as exception: + if self.buffer: + logger.debug("***********************graph content**************************") + while self.buffer: + line = self.buffer.popleft() + logger.debug(line) + logger.debug("***********************graph content**************************") + raise exception + return obj + + def _parse(self, graph_file): + # pylint:disable=broad-except + graph_list = [] + with open(graph_file, "r", encoding="gbk") as file: + try: + graph_list = self._parse_line(file, graph_list) + except Exception: + logger.error( + "Parse line %s of file %s failed, make sure the format is correct.", self.line_no, graph_file + ) + graphs = [] + for graph in graph_list: + if isinstance(graph, HostGraph): + graphs.append(graph) + for graph in graphs: + graph.model_name = graphs[0].name + graph.file_path = self._file_path + graph.build() + return graphs + + def _get_edges_list(self) -> None: + if len(self.graphs) <= 0: + return + + def is_repeat_edge(edge, edge_collector): + for _edge in edge_collector: + if edge[0].op_name == _edge[0].op_name and edge[1].op_name == _edge[1].op_name: + return True + return False + + for node in self.nodes.values(): + for input_node_name in node.inputs: + if input_node_name not in self.nodes: + continue + input_node = self.nodes[input_node_name] + if not is_repeat_edge((input_node, node), self.edges): + self.edges.append((input_node, node)) + for output_node_name in node.outputs: + if output_node_name not in self.nodes: + continue + output_node = self.nodes[output_node_name] + if not is_repeat_edge((node, output_node), self.edges): + self.edges.append((node, output_node)) + + def _get_node_dict(self) -> None: + if not self.graphs: + self.nodes = {} + return + self.nodes = {node.op_name: node for graph in self.graphs for node in graph.nodes.values()} + + +class QueryGraphNode: + """ + Graph Node + """ + _ID = 0 + + def __init__(self, op_type: str, op_pass: str): + self._op_type = op_type + self._id = QueryGraphNode._ID + self._op_pass = op_pass + QueryGraphNode._ID += 1 + + def get_property(self, name): + """ + get property + """ + return getattr(self, name, lambda: None) + + @property + def op_type(self): + return self._op_type + + @property + def op_name(self): + return self._op_type + "_id_" + str(self._id) + + @property + def op_pass(self): + return self._op_pass + + @op_type.setter + def op_type(self, op_type): + self._op_type = op_type + + def __eq__(self, other): + return self._op_type == other._op_type and \ + self._id == other._id + + def __hash__(self): + return hash(self._op_type + str(self._id)) + + @staticmethod + def trim_string(string: str, length: int = -1): + """ + + Trim string to target length + :param string: Original string + :param length: Target length of string, -1 indicates original string. + :return: Trimmed string + """ + if string is None or not isinstance(string, str): + raise TypeError(f"Param string must be a string type but got {type(string)}.") + + if length <= -1 or len(string) <= length: + return string + + return string[:length] + + +class QueryGraphParser: + def __init__(self, rule_database_path: str): + self._fusion_rules: Dict[str, List[Tuple]] = dict() + self.load_database(rule_database_path) + self.num_rules = sum([len(v) for v in self._fusion_rules.values()]) + + @property + def fusion_rules(self): + return self._fusion_rules + + def load_database(self, rule_database): + if not os.path.isabs(rule_database): + rule_database = os.path.join(os.path.dirname(__file__), + "../", "../", + rule_database) + + if not os.path.exists(rule_database): + raise FileNotFoundError(f"Path {rule_database} does not exist.") + with open(rule_database, 'r') as f: + database = yaml.safe_load(f) + self.parse_yaml(database) + + def parse_yaml(self, yaml_database): + fusion_strategy_list = yaml_database.get("GraphFusion", []) + if yaml_database.get("UBFusion", []): + fusion_strategy_list.extend(yaml_database.get("UBFusion", [])) + for fusion_strategy in fusion_strategy_list: + if not isinstance(fusion_strategy, dict): + continue + (fusion_name, strategy), = fusion_strategy.items() + version = strategy.get("version", 0) + if version == 0 or version == "0": + self._fusion_rules[fusion_name] = self.build_query_graph_v0(fusion_name, + strategy.get('struct', [])) + elif version == 1 or version == "1": + self._fusion_rules[fusion_name] = self.build_query_graph_v1(fusion_name, + strategy.get('nodes', []), + strategy.get('edges', [])) + + @staticmethod + def build_query_graph_v0(graph_name: str, graph_struct: List[str]) -> List[Tuple]: + nodes = dict() + graphs = [] + edges = [] + + pre_node, next_node = None, None + for node in graph_struct: + pre_node = next_node + next_node = QueryGraphNode(node, graph_name) + nodes[next_node.op_name] = next_node + if pre_node is None or next_node is None: + continue + edges.append((pre_node, next_node,)) + graphs.append((nodes, edges, graph_name,)) + return graphs + + @staticmethod + def build_query_graph_v1(graph_name: str, + nodes_list: List[Dict], + edges_list: List[List[str]]) -> List[Tuple]: + graphs = [] + node_index = dict() + multi_node_list = [] + for index, node in enumerate(nodes_list): + (node_name, op_type), = node.items() + if isinstance(op_type, str): + op_type = [op_type] + multi_node_list.append([QueryGraphNode(op, graph_name) for op in op_type]) + node_index[node_name] = index + + multi_node = list(itertools.product(*multi_node_list)) + + for index, sub_nodes in enumerate(multi_node): + sub_graph_name = graph_name if index == 0 else f"{graph_name}#{index}" + sub_edge = [] + sub_node = dict() + for node in sub_nodes: + sub_node[node.op_name] = node + for edge in edges_list: + pre_node, next_node = edge + pre_node_index, next_node_index = node_index.get(pre_node), node_index.get(next_node) + sub_edge.append((sub_nodes[pre_node_index], sub_nodes[next_node_index])) + sub_graph = (sub_node, sub_edge, sub_graph_name,) + graphs.append(sub_graph) + return graphs diff --git a/profiler/advisor/common/profiling/__init__.py b/profiler/advisor/common/profiling/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/profiler/advisor/common/profiling/ge_info.py b/profiler/advisor/common/profiling/ge_info.py new file mode 100644 index 0000000000000000000000000000000000000000..9996ec611a2a835bd8dffd24c3fbe7d8817ec29a --- /dev/null +++ b/profiler/advisor/common/profiling/ge_info.py @@ -0,0 +1,47 @@ +""" +DB +""" +import logging +import os +from typing import Any, List + +from sqlalchemy import text + +from profiler.advisor.dataset.profiling.db_manager import ConnectionManager +from profiler.advisor.dataset.profiling.profiling_parser import ProfilingParser + +logger = logging.getLogger() + + +class GeInfo(ProfilingParser): + """ + ge info file + """ + FILE_PATTERN = r"ge_info.db" + FILE_PATTERN_MSG = "ge_info.db" + FILE_INFO = "ge info" + STATIC_OP_STATE = "0" + DYNAMIC_OP_STATE = "1" + + def __init__(self, path: str) -> None: + super().__init__(path) + self.op_state_info_list = None + + def parse_from_file(self, profiling_db_file): + """ + ge info + """ + db_path, db_file = os.path.split(profiling_db_file) + if not ConnectionManager.check_db_exists(db_path, [db_file]): + return False + conn = ConnectionManager(db_path, db_file) + if conn.check_table_exists(['TaskInfo']): + with conn().connect() as sql_conn: + self.op_state_info_list = sql_conn.execute(text("select op_name, op_state from TaskInfo")).fetchall() + return True + + def get_static_shape_operators(self) -> List[Any]: + return [op for op, state in self.op_state_info_list if state == self.STATIC_OP_STATE] + + def get_dynamic_shape_operators(self) -> List[Any]: + return [op for op, state in self.op_state_info_list if state == self.DYNAMIC_OP_STATE] diff --git a/profiler/advisor/common/profiling/msprof.py b/profiler/advisor/common/profiling/msprof.py new file mode 100644 index 0000000000000000000000000000000000000000..9453986b8225ccad68f2135d674e3832d987fcf0 --- /dev/null +++ b/profiler/advisor/common/profiling/msprof.py @@ -0,0 +1,144 @@ +""" +msprof +""" +import logging +from typing import Dict, List + +from profiler.advisor.dataset.profiling.info_collection import TaskInfo +from profiler.advisor.dataset.profiling.profiling_parser import ProfilingParser + +logger = logging.getLogger() + + +class TaskChecker: + """ + check task info + """ + + def __init__(self): + self.sqe_keys = set() + + def is_sqe(self, task: TaskInfo) -> bool: + """check sqe""" + key = (task.pid, task.tid) + if task.args.get('name', '').endswith('_SQE'): + self.sqe_keys.add(key) + return False + + return key in self.sqe_keys + + +class Msprof(ProfilingParser): + """ + msprof + + """ + FILE_PATTERN = r"^msprof[_\d]+.json$" + FILE_PATTERN_MSG = "msprof_*.json" + FILE_INFO = "msprof" + + def __init__(self, path: str) -> None: + super().__init__(path) + self._tasks: List[TaskInfo] = [] + self._iteration_time = 0.0 + self._model_id = None + self._iteration_id = None + self._process_pid: Dict[str, str] = {} + self._min_time = 0.0 + self._max_time = 0.0 + self._data_process_time = 0.0 + self._start_point = 0.0 + + def parse_from_file(self, file: str): + if not self._parse_json(file): + return False + min_time = float('inf') + max_time = 0.0 + task_checker = TaskChecker() + is_iter = False + for item in self._raw_data: + task = TaskInfo(item) + if task.cat == "Iteration Time": + self._min_time = task.start_time + self._max_time = task.end_time + self._iteration_time = task.dur + is_iter = True + if task.cat == "Data_aug Bound" and "Data_aug Bound(us)" in task.args: + self._data_process_time = task.args["Data_aug Bound(us)"] + + if self._start_point == 0 and task.start_time > 0: + self._start_point = task.start_time + + if task_checker.is_sqe(task): + continue + + self._tasks.append(task) + self._parse_task(task) + + start_time = task.start_time + dur = task.dur + if start_time == -1 or dur == -1 or dur == 0: + continue + if start_time < min_time: + min_time = start_time + end_time = start_time + dur + if end_time > max_time: + max_time = end_time + if not is_iter: + self._iteration_time = dur + self._max_time = max_time + self._min_time = min_time + if self._tasks: + return True + return False + + def _parse_task(self, task): + if "Iteration Refresh" in task.name: + self._iteration_id = task.args.get("Iteration ID") + elif "Model ID" in task.name: + self._model_id = int(task.name.split(":")[1]) + elif "process_name" == task.name: + self._process_pid[task.args.get("name")] = task.pid + + @property + def step_time(self): + return self._iteration_time + self._data_process_time + + @property + def iteration_time(self): + return self._iteration_time + + @property + def iter_max_time(self): + return self._max_time + + @property + def iter_min_time(self): + return self._min_time + + @property + def data_process_time(self): + return self._data_process_time + + @property + def tasks(self): + return self._tasks + + @property + def model_id(self): + return self._model_id + + @property + def iteration_id(self): + return self._iteration_id + + @property + def process_pid(self): + return self._process_pid + + def __len__(self): + return len(self._tasks) + + @property + def start_point(self): + return self._start_point diff --git a/profiler/advisor/common/profiling/op_summary.py b/profiler/advisor/common/profiling/op_summary.py new file mode 100644 index 0000000000000000000000000000000000000000..d79439dbad8e2c105bed737c1a1c3be1a2cecfc1 --- /dev/null +++ b/profiler/advisor/common/profiling/op_summary.py @@ -0,0 +1,76 @@ +""" +summary +""" +import logging +from decimal import Decimal +from typing import List, Any + +from profiler.advisor.dataset.profiling.info_collection import OpInfo +from profiler.advisor.dataset.profiling.profiling_parser import ProfilingParser +from profiler.advisor.utils.utils import format_excel_title, lazy_property + +logger = logging.getLogger() + + +class OpSummary(ProfilingParser): + """ + op summary + """ + + FILE_PATTERN = r"^op_summary_[_\d]+\.csv$" + FILE_PATTERN_MSG = "op_summary_*.csv" + FILE_INFO = "op summary" + STATIC_OP_STATE = "static" + DYNAMIC_OP_STATE = "dynamic" + + def __init__(self, path: str) -> None: + super().__init__(path) + self.op_list: List[OpInfo] = [] + self._total_task_duration = 0.0 + self._total_task_wait_time = 0.0 + self._raw_data: List[List[str]] = [] + + def parse_from_file(self, file: str): + if not self._parse_csv(file): + return False + title_dict = dict(enumerate(self._raw_data[0])) + for op_data in self._raw_data[1:]: + op_info = OpInfo() + for idx, value in enumerate(op_data): + title = title_dict.get(idx, "") + formatted_title = format_excel_title(title) + if formatted_title == 'task_start_time' and 'us' in title and \ + value.replace('.', '').replace("E+", "").isnumeric(): + value = str(Decimal(value) * Decimal(1000)) + op_info.add_attr(formatted_title, value) + self.op_list.append(op_info) + self._total_task_duration += self.get_float(op_info.get_attr("task_duration")) + self._total_task_wait_time += self.get_float(op_info.get_attr("task_wait_time")) + if not self.op_list: + logger.error("No valid op info in %s", file) + return False + return True + + def get_static_shape_operators(self) -> List[Any]: + return [op_info.get_attr("op_name") for op_info in self.op_list if op_info.get_attr("op_state") == self.STATIC_OP_STATE] + + def get_total_task_duration(self): + """ + get total task duration of all operators + :return: + """ + return self._total_task_duration + + @lazy_property + def task_dict(self): + """ + task dict + """ + task_dict = {} + for op_info in self.op_list: + if op_info.op_name not in task_dict: + task_dict[op_info.op_name] = [op_info] + else: + task_dict[op_info.op_name].append(op_info) + + return task_dict diff --git a/profiler/advisor/common/profiling/tasktime.py b/profiler/advisor/common/profiling/tasktime.py new file mode 100644 index 0000000000000000000000000000000000000000..3ce09a783851e94163aa72f423788a373da5eb3a --- /dev/null +++ b/profiler/advisor/common/profiling/tasktime.py @@ -0,0 +1,75 @@ +""" +task time +""" +import logging +from typing import Dict, List + +from profiler.advisor.dataset.profiling.info_collection import TaskInfo +from profiler.advisor.dataset.profiling.profiling_parser import ProfilingParser + +logger = logging.getLogger() + +AICPU_TASK_TYPE = "AI_CPU" +AICORE_TASK_TYPE = "AI_CORE" + + +class TaskTime(ProfilingParser): + """ + task time info + """ + + FILE_PATTERN = r"^task_time_[_\d]+\.json$" + FILE_PATTERN_MSG = "task_time*.json" + FILE_INFO = "task time" + + def __init__(self, path: str) -> None: + super().__init__(path) + self._tasks: List[TaskInfo] = [] + self._aicore_tasks: List[TaskInfo] = [] + self._aicpu_tasks: List[TaskInfo] = [] + self._process_map: Dict[str, str] = {} + self._pid_map: Dict[str, str] = {} + + def get_aicpu_tasks(self): + """ + get aicpu tasks + :return: aicpu tasks + """ + return self._aicpu_tasks + + def get_aicore_tasks(self): + """ + get aicore tasks + :return: aicore tasks + """ + return self._aicore_tasks + + def parse_from_file(self, file: str): + if not self._parse_json(file): + return False + for item in self._raw_data: + if item.get("ph") != "M": # header + continue + if item.get("name") != "process_name": + continue + pid = item.get("pid") + pname = item["args"]["name"] + self._process_map[pid] = pname + self._pid_map[pname] = pid + for item in self._raw_data: + if item.get("ph") == "M": # header + continue + task = TaskInfo(item) + self._tasks.append(task) + if task.pid != self._pid_map.get("Task Scheduler"): + continue + if task.task_type == AICORE_TASK_TYPE: + self._aicore_tasks.append(task) + elif task.task_type == AICPU_TASK_TYPE: + self._aicpu_tasks.append(task) + self._aicore_tasks.sort(key=lambda x: x.start_time) + self._aicpu_tasks.sort(key=lambda x: x.start_time) + if not self._tasks: + logger.error("No valid task info in %s", file) + return False + return True diff --git a/profiler/advisor/common/timeline/__init__.py b/profiler/advisor/common/timeline/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/profiler/advisor/common/timeline/event.py b/profiler/advisor/common/timeline/event.py new file mode 100644 index 0000000000000000000000000000000000000000..8eebc550d08e8df55541a2070f45e684c2a0c84a --- /dev/null +++ b/profiler/advisor/common/timeline/event.py @@ -0,0 +1,23 @@ +class AdvisorDict(dict): + def __getstate__(self): + return self.__dict__ + + def __setstate__(self, d): + self.__dict__.update(d) + + def __getattr__(self, key: str): + if key not in self: + return {} + + value = self[key] + if isinstance(value, dict): + value = AdvisorDict(value) + return value + + +class TimelineEvent(AdvisorDict): + + def ts_include(self, event): + + return float(self.ts) <= float(event.ts) and float(self.ts) + float(self.dur) >= float(event.ts) + float( + event.dur) \ No newline at end of file diff --git a/profiler/advisor/common/timeline/fusion_ops_db.py b/profiler/advisor/common/timeline/fusion_ops_db.py new file mode 100644 index 0000000000000000000000000000000000000000..f37cfe50d14c3994f5fc6d6a740cb1f79a54b31e --- /dev/null +++ b/profiler/advisor/common/timeline/fusion_ops_db.py @@ -0,0 +1,269 @@ +import logging +import os + +import yaml + +from profiler.advisor.common import constant +from profiler.advisor.common.timeline.fusion_ops_rule import OpRule +from profiler.advisor.common.timeline.fusion_ops_rule_handler import TimelineOpRuleHandler +from profiler.advisor.utils.log import get_log_level +from profiler.advisor.utils.utils import get_file_path_by_walk + +logger = logging.getLogger() +logger.setLevel(get_log_level()) + + +def init_timeline_ops_db(cann_version=None, torch_version=None): + logger.debug("init operators database") + + return FusionOperatorDB(cann_version=cann_version, torch_version=torch_version) + + +def get_timeline_fusion_ops_yaml_path(): + # 环境变量 ADVISOR_RULE_PATH 不为空且该路径存在, os.walk遍历其下文件, 若存在相应的规则文件则返回路径 + advisor_rule_path = os.getenv(constant.ADVISOR_RULE_PATH) + if advisor_rule_path and os.path.exists(advisor_rule_path): + specified_file_path = get_file_path_by_walk(advisor_rule_path, constant.TIMELINE_FUSION_OPS_YAML_NAME) + if len(specified_file_path.strip()) and os.path.exists(specified_file_path): + logger.debug("Successfully find The %s file which is specified by the environment variable: %s.", + specified_file_path, constant.ADVISOR_RULE_PATH) + return specified_file_path + logger.warning("The %s does not exist in path: %s. Try to use cloud or default local YAML file.", + constant.TIMELINE_FUSION_OPS_YAML_NAME, os.path.normpath(advisor_rule_path)) + # 检查云文件默认保存路径文件夹下是否存在相应文件, 默认路径 ~/rules/cloud/ + cloud_file_path = os.path.join(os.path.expanduser("~"), constant.CLOUD_RULE_PATH, constant.TIMELINE_FUSION_OPS_YAML_NAME) + if os.path.exists(cloud_file_path): + logger.debug("Successfully find The cloud %s file in %s.", constant.TIMELINE_FUSION_OPS_YAML_NAME, + cloud_file_path) + return cloud_file_path + # 检查本地默认文件 + local_file_path = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))), + constant.DEFAULT_RULE_PATH, constant.TIMELINE_FUSION_OPS_YAML_NAME) + if not os.path.exists(local_file_path): + # 若本地默认文件不存在, 则log异常信息并 + logger.error("The default local YAML file does not exist. Please check the YAML file in the default path %s.", + local_file_path) + return local_file_path + + +class FusionOperatorDB: + + def __init__(self, file_path=None, cann_version=None, torch_version=None): + self.timeline_fusion_ops_yaml_path = os.path.normpath(get_timeline_fusion_ops_yaml_path()) + + self.cann_version = cann_version or constant.DEFAULT_CANN_VERSION + self.torch_version = torch_version or constant.DEFAULT_TORCH_VERSION + + self._supported_version_dict = {} + + self.is_empty = False + self.timeline_op_rule_handler = TimelineOpRuleHandler() + self.fusion_operator = self._load_yaml(self.timeline_fusion_ops_yaml_path) + + self._dequeue_op_names = [] + self._aten_op_names = [] + self._optimizer_op_names = [] + self._dequeue_op_api_map = {} + self._aten_op_api_map = {} + self._optimizer_op_api_map = {} + self._parse_db() + + @property + def dequeue_op_names(self): + return self._dequeue_op_names + + @property + def aten_op_names(self): + return self._aten_op_names + + @property + def optimizer_op_names(self): + return self._optimizer_op_names + + @property + def dequeue_op_api_map(self): + return self._dequeue_op_api_map + + @property + def aten_op_api_map(self): + return self._aten_op_api_map + + @property + def optimizer_op_api_map(self): + return self._optimizer_op_api_map + + def get_fusion_operator_with_unique_id(self, unique_id): + if unique_id == constant.TIMELINE_FUSION_OPS_INVALID_UNIQUE_ID: + logger.warning("The specified unique id: %s is invalid.Please check whether the rule of the unique id " + "exists and modify the rule.", constant.TIMELINE_FUSION_OPS_INVALID_UNIQUE_ID) + return {} + result_tmp_rule = self.timeline_op_rule_handler.get_tmp_timeline_op_rule_with_unique_id(unique_id) + result_op_rule = OpRule(result_tmp_rule) + return result_op_rule.get_final_rules() + + def regenerate_timeline_op_rule_with_unique_id(self, unique_id): + self.fusion_operator.clear() + logger.debug("Program try to regenerate the rule to version %s.", unique_id) + self.fusion_operator = self.get_fusion_operator_with_unique_id(unique_id) + self.regenerate_op_api_map_and_op_names() + + def regenerate_timeline_op_rule_with_version(self, cann_version=None, torch_version=None): + cann_version = cann_version or self.cann_version + torch_version = torch_version or self.torch_version + unique_id = self._get_unique_id_in_supported_version_dict(cann_version=cann_version, + torch_version=torch_version) + self.regenerate_timeline_op_rule_with_unique_id(unique_id) + + def regenerate_op_api_map_and_op_names(self): + self._dequeue_op_names.clear() + self._aten_op_names.clear() + self._optimizer_op_names.clear() + self._dequeue_op_api_map.clear() + self._aten_op_api_map.clear() + self._optimizer_op_api_map.clear() + self._parse_db() + + def _is_version_supported(self, db_content): + """校验当前版本是否被规则库中的版本支持, 保存版本支持信息数组, 按数组或字符串的可变方式保存""" + if db_content is None: + logger.warning( + "The rule library is empty. Check the rule library file: %s", + self.timeline_fusion_ops_yaml_path + ) + return False + for rule_dic in db_content: + if not isinstance(rule_dic, dict) or rule_dic.get("unique_id") is None: + continue + cann_version_list = rule_dic.get("cann_version") + torch_version_list = rule_dic.get("torch_version") + if not cann_version_list or not torch_version_list: + continue + supported_version = [cann_version_list, torch_version_list] + + unique_id = rule_dic.get("unique_id") + if unique_id < 0: + logger.warning( + "The unique id: %s of the rule should be a positive integer. " + "Please check and modify the rule configuration in the YAML file: %s.", + unique_id, os.path.normpath(self.timeline_fusion_ops_yaml_path) + ) + self._supported_version_dict[unique_id] = supported_version + + # 若解析timeline规则库的版本支持数组为空, 则存在问题 + if not self._supported_version_dict: + logger.warning( + "The rule library does not contain rules that support the current version. " + "Check the rule library file: %s", + self.timeline_fusion_ops_yaml_path + ) + return False + + # 检验当前版本是否被规则库支持 + is_version_supported = self._is_version_supported_in_supported_version_dict() + if not is_version_supported: + # 若规则库不支持当前版本, 则log警告信息 + logger.warning("Unsupported versions: cann-%s and torch-%s, supported version list of ['cann', 'torch'] " + "is %s", self.cann_version, self.torch_version, self._supported_version_dict.values()) + return is_version_supported + + def _is_version_supported_in_supported_version_dict(self, cann_version=None, torch_version=None): + """校验当前版本是否存在在规则库中的版本支持字典中""" + for _, supported_version in self._supported_version_dict.items(): + if self._is_version_supported_in_versions(supported_version, cann_version, torch_version): + return True + return False + + def _get_unique_id_in_supported_version_dict(self, cann_version=None, torch_version=None) -> int: + """校验当前版本是否存在在规则库中的版本支持字典中, 在使用前请检查是否支持该版本""" + for key_unique_id, supported_version in self._supported_version_dict.items(): + if self._is_version_supported_in_versions(supported_version, cann_version, torch_version): + return key_unique_id + return constant.TIMELINE_FUSION_OPS_INVALID_UNIQUE_ID + + def _is_version_supported_in_versions(self, supported_version, cann_version=None, torch_version=None): + """校验当前cann版本和torch版本是否存在在规则库中的版本支持数组的元素中""" + cann_version_list = supported_version[0] + if not isinstance(cann_version_list, list): + cann_version_list = [cann_version_list] + + torch_version_list = supported_version[1] + if not isinstance(torch_version_list, list): + torch_version_list = [torch_version_list] + + cann_version = cann_version or self.cann_version + torch_version = torch_version or self.torch_version + + if (cann_version in cann_version_list) and (torch_version in torch_version_list): + return True + return False + + def _parse_db(self): + """生成输出的规则库""" + self._parse(constant.ATEN) + self._parse(constant.DEQUEUE) + self._parse(constant.OPTIMIZER) + + def _parse(self, mode): + """生成输出的规则库中指定部分, 如aten, Optimizer等""" + op_info = self.fusion_operator.get(mode, []) or [] + for ops in op_info: + for npu_api, op_combined in ops.items(): + if not isinstance(op_combined, list): + self._parse_in_list(mode, op_combined, npu_api) + for _op_combined in op_combined: + self._parse_in_list(mode, _op_combined, npu_api) + + def _parse_in_list(self, mode, op_combined, npu_api): + """生成输出的规则库中具体部分, 如{silu: torch_npu.npu_silu/torch_npu.contrib.module.SiLU}等""" + if not isinstance(op_combined, str): + logger.warning("Error type in yaml: %s", op_combined) + return + mode_str = mode.lower() + getattr(self, f"{mode_str}_op_names", []).extend(op_combined.split("-")) + + new_npu_api = npu_api + pre_npu_api = getattr(self, f"{mode_str}_op_api_map", {}).get(op_combined) + if pre_npu_api: + new_npu_api = f"{pre_npu_api}/{npu_api}" + getattr(self, f"{mode_str}_op_api_map", {})[op_combined] = new_npu_api + logger.debug("Output rule: %s: %s: %s: %s ", mode, op_combined, new_npu_api, op_combined.split("-")) + + def _load_yaml(self, file_path): + """生成timeline规则库""" + logger.debug("Try to use the following yaml file as timeline ops rule: %s.", os.path.abspath(file_path)) + # 若文件不存在,则报错, 并返回空字典 + if not os.path.exists(file_path): + logger.warning("Path: '%s' does not exist, please specific existed path of " + "fusion operators yaml file by setting env '%s'", + os.path.abspath(file_path), constant.ADVISOR_RULE_PATH) + self.is_empty = True + return {} + + logger.debug("The rule yaml file is successfully found in path: %s", os.path.abspath(file_path)) + + with open(file_path, "rb") as file: + db_content = yaml.safe_load(file) + + if not self._is_version_supported(db_content): + self.is_empty = True + return {} + + logger.debug("The rule library supports the current environment version.") + + # 获取所有版本timeline规则库 + self.timeline_op_rule_handler.set_db_content(db_content) + + # 获取所需版本规则 + unique_id = self._get_unique_id_in_supported_version_dict() + logger.debug("Program is using version %s of the rule.", unique_id) + result_op_rule = self.get_fusion_operator_with_unique_id(unique_id) + if result_op_rule and len(result_op_rule) > 0: + return result_op_rule + + logger.warning( + "Failed to load fusion operators database, skip analyze timeline for affinity api," + " please refer to database yaml %s to customize your yaml.", + self.timeline_fusion_ops_yaml_path + ) + self.is_empty = True + return {} diff --git a/profiler/advisor/common/timeline/fusion_ops_rule.py b/profiler/advisor/common/timeline/fusion_ops_rule.py new file mode 100644 index 0000000000000000000000000000000000000000..deee68edb9a92d0588f3f3c155a7b2595317a5c7 --- /dev/null +++ b/profiler/advisor/common/timeline/fusion_ops_rule.py @@ -0,0 +1,110 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2024-2024. All rights reserved. +import copy +import logging + +from profiler.advisor.utils.log import get_log_level + +logger = logging.getLogger() +logger.setLevel(get_log_level()) + + +class OpRule: + + def __init__(self, rule=None, timeline_op_rule_handler=None): + if rule is None: + self._tmp_rule = {} + else: + self._tmp_rule = copy.deepcopy(rule) + if timeline_op_rule_handler is None: + self.timeline_op_rule_handler = {} + else: + self.timeline_op_rule_handler = copy.deepcopy(timeline_op_rule_handler) + self._rule = {} + + @property + def tmp_rule(self): + return self._tmp_rule + + @staticmethod + def _format_rule(rule): + """格式化规则函数, 将额外规则格式化为{key,数组list}形式, 使得yaml文件中operator_rules若写成key:str形式也能正常读取""" + format_rule = {} + for key, val in rule.items(): + if not isinstance(val, list): + val = [val] + format_rule[key] = val + return format_rule + + def merge(self, extra_rule): + """合并函数, 将已有规则库与额外规则合并, 若无继承则已有规则库应为空""" + for key, val in extra_rule.items(): + for func, op_rules in val.items(): + try: + getattr(self, f"{func}")(key, op_rules) + except AttributeError: + logger.error("Undefined field and function name. Ensure that %s is correct in the rule " + "library.", func) + + def get_final_rules(self): + """获取最终的规则库""" + self._restore_rule() + return self._rule + + def add(self, key, add_rules: dict): + """新增函数, 新增已有规则库不存在的额外规则""" + if add_rules is None: + return + if self._tmp_rule.get(key) is None: + self._tmp_rule[key] = {} + format_add_rule = self._format_rule(add_rules) + for add_key, add_val in format_add_rule.items(): + logger.debug("add: %s: %s", add_key, add_val) + if add_key not in self._tmp_rule: + self._tmp_rule[key][add_key] = add_val + else: + logger.warning("This key has been written to the rule, " + "%s: %s should be written in the overwrite section", add_key, add_val) + self._tmp_rule[key][add_key].update(add_val) + + def overwrite(self, key, overwrite_rules: dict): + """重写函数, 重写已有规则库中已经存在的规则""" + if overwrite_rules is None: + return + if self._tmp_rule.get(key) is None: + self._tmp_rule[key] = {} + format_overwrite_rules = self._format_rule(overwrite_rules) + for overwrite_key, overwrite_val in format_overwrite_rules.items(): + logger.debug("overwrite: %s: %s", overwrite_key, overwrite_val) + if overwrite_key not in self._tmp_rule: + logger.warning("This key is not written to the rule. " + "%s: %s should be written in the add section", overwrite_key, overwrite_val) + self._tmp_rule[key][overwrite_key] = overwrite_val + else: + self._tmp_rule[key][overwrite_key].update(overwrite_val) + + def exclude(self, key, exclude_rules: list): + """除外函数, 将已有规则库已有的规则除外删除""" + if exclude_rules is None: + return + for exclude_key in exclude_rules: + logger.debug("exclude: %s", exclude_key) + if isinstance(exclude_key, str): + if exclude_key not in self._tmp_rule[key]: + logger.warning("This key is not written to the rule. " + "do not need to exclude: %s.", exclude_key) + continue + self._tmp_rule[key].pop(exclude_key) + else: + logger.warning("Error type rule in exclude: %s", exclude_key) + + def inherit_unique_id(self, key, inherit_unique_id): + """局部继承函数, 将规则库中指定unique_id版本覆盖指定位置""" + result_rule = self.timeline_op_rule_handler.get_tmp_timeline_op_rule_with_unique_id(inherit_unique_id) + if result_rule is not None and result_rule.get(key) is not None: + self._tmp_rule[key] = copy.deepcopy(result_rule.get(key)) + return + logger.error("Rule library version %s does not exist. ", inherit_unique_id) + + def _restore_rule(self): + for key, op_api_map in self._tmp_rule.items(): + self._rule[key] = [{op_combined: api} for op_combined, api in op_api_map.items()] diff --git a/profiler/advisor/common/timeline/fusion_ops_rule_handler.py b/profiler/advisor/common/timeline/fusion_ops_rule_handler.py new file mode 100644 index 0000000000000000000000000000000000000000..b0558cca6d951ee057e538b5e4da6d9c2e78111b --- /dev/null +++ b/profiler/advisor/common/timeline/fusion_ops_rule_handler.py @@ -0,0 +1,193 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2024-2024. All rights reserved. +import copy +import logging + +from profiler.advisor.common import constant +from profiler.advisor.common.timeline.fusion_ops_rule import OpRule +from profiler.advisor.utils.log import get_log_level + +logger = logging.getLogger() +logger.setLevel(get_log_level()) + + +class TimelineOpRuleHandler: + """基于线性规划思想保存OpRule,用于局部继承、全局继承等功能""" + + def __init__(self): + self._db_content = None + # 具体生成的timeline规则,key为unique_id + self._all_tmp_timeline_op_rule = {} + # 所有timeline规则的dict集合,key为unique_id + self._all_origin_timeline_op_rule_dict = {} + # 已生成timeline规则的id数组 + self._exist_timeline_op_rule_unique_id_list = [] + + @staticmethod + def _get_local_inherit_id_list(op_rule: dict): + local_inherit_id_list = [] + for _, val in op_rule.items(): + if val.get("inherit_unique_id") is not None: + local_inherit_id_list.append(val.get("inherit_unique_id")) + return local_inherit_id_list + + @staticmethod + def _is_duplicated_element_in_lists(list_a, list_b): + """检查两个数组中是否存在重复的元素,若有任意元素重复,返回True""" + if not isinstance(list_a, list): + list_a = [list_a] + if not isinstance(list_b, list): + list_b = [list_b] + # 将两个数组合并为一个列表,使用集合(set)判断列表中是否存在重复元素 + combined_list = list_a + list_b + if len(combined_list) != len(set(combined_list)): + return True + return False + + def set_db_content(self, db_content): + # 过滤非 dict 格式, 或 dict 中没有定义 unique_id 的数据, 并保存到 _all_origin_timeline_op_rule_dict 中 + self._db_content = copy.deepcopy(db_content) + for rule_dic in self._db_content: + if not isinstance(rule_dic, dict) or rule_dic.get("unique_id") is None: + continue + self._all_origin_timeline_op_rule_dict[rule_dic.get("unique_id")] = rule_dic + if self._all_origin_timeline_op_rule_dict: + self.generate_all_timeline_op_rule() + + def generate_basic_timeline_op_rules(self): + """用于实现获取无全局继承规则, 无全局继承的规则认为是基础版本规则, 默认不会存在局部继承""" + for _, rule_dic in self._all_origin_timeline_op_rule_dict.items(): + if rule_dic.get("inherit_unique_id") is None: + self.add_basic_timeline_op_rule(rule_dic) + + def add_basic_timeline_op_rule(self, rule_dic): + # 若基础规则中存在局部继承的规则,则跳过 + local_inherit_id_list = self._get_local_inherit_id_list(rule_dic.get("operator_rules")) + if local_inherit_id_list: + return + + temp_rule = OpRule() + temp_rule.merge(rule_dic.get("operator_rules")) + + unique_id = rule_dic.get("unique_id") + logger.debug("The rule of version %s is basic rule.", unique_id) + self.add_new_timeline_op_rule(unique_id, temp_rule.tmp_rule) + + def add_empty_timeline_op_rule(self, unique_id): + if self._all_origin_timeline_op_rule_dict.get(unique_id) is None: + self._all_origin_timeline_op_rule_dict[unique_id] = {} + tmp_rule = {} + logger.debug("The rule of version %s is empty.", unique_id) + self.add_new_timeline_op_rule(unique_id, tmp_rule) + + def add_new_timeline_op_rule(self, unique_id, tmp_rule): + if unique_id not in self._exist_timeline_op_rule_unique_id_list: + self._exist_timeline_op_rule_unique_id_list.append(unique_id) + self._all_tmp_timeline_op_rule[unique_id] = tmp_rule + logger.debug("The rule of version %s is successfully generated.", unique_id) + + def generate_specified_list_timeline_op_rule(self, specified_unique_id_list, kid_id_list=None): + for specified_unique_id in specified_unique_id_list: + if specified_unique_id in self._exist_timeline_op_rule_unique_id_list: + self.generate_specified_timeline_op_rule(specified_unique_id, kid_id_list) + + def generate_specified_timeline_op_rule(self, specified_unique_id, kid_id_list=None): + """用于实现生成特定版本规则 + + 若不存在相应specified_unique_id的规则、或是已生成、循环继承等情况,将该规则置空并返回 + 规则库文件结构设置为多叉树, 结构决定了不断向下搜索最终应该是从基础版本开始继承, 递归生成, + 直到specified_unique_id规则依赖继承的规则库全部生成完毕, 再生成该指定规则库, 将specified_unique_id的规则库归档 + + 参数: + specified_unique_id: 指定版本规则id + kid_id_list: 子规则id数组, 用于防止循环继承, 如间接继承自身或直接继承自身等情况 + 返回: + None + """ + if kid_id_list is None: + kid_id_list = [] + + # 若该unique_id规则在timeline_fusion_ops.yaml中没有相应的规则, 生成该id规则,置为空 + if self._all_origin_timeline_op_rule_dict.get(specified_unique_id) is None: + logger.warning("The specified version %s does not exist in the rule library. " + "Ensure that the corresponding rule is configured in the YAML file. " + "The version %s is left blank.", + specified_unique_id, + specified_unique_id) + self.add_empty_timeline_op_rule(specified_unique_id) + return + + # 若该unique_id规则已经生成,则无需再次生成 + if specified_unique_id in self._exist_timeline_op_rule_unique_id_list: + logger.warning("The rule has been generated and does not need to be generated again. " + "Check whether unique id %s in the YAML file is duplicate.", + specified_unique_id) + return + + # 若kid_id_list不为空,且间接继承自身,则尝试生成空规则用于继承 + if kid_id_list and self._is_duplicated_element_in_lists(specified_unique_id, kid_id_list): + logger.warning("It cannot be inherited indirectly. Ensure that the corresponding rules are correctly " + "configured in the YAML file and leave Version %s blank.", + specified_unique_id) + self.add_empty_timeline_op_rule(specified_unique_id) + return + + rule_dic = self._all_origin_timeline_op_rule_dict.get(specified_unique_id) + if rule_dic is not None: + kid_id_list.append(specified_unique_id) + + global_inherit_id = rule_dic.get("inherit_unique_id") + if global_inherit_id and global_inherit_id not in self._exist_timeline_op_rule_unique_id_list: + logger.debug("The rule of version %s global inherit the rule of version %s", + specified_unique_id, global_inherit_id) + self.generate_specified_timeline_op_rule(global_inherit_id, kid_id_list) + + # 若局部继承的规则未生成, 生成该规则 + local_inherit_id_list = self._get_local_inherit_id_list(rule_dic.get("operator_rules")) + if local_inherit_id_list: + logger.debug("The rule of version %s local inherit the rule of version %s", + specified_unique_id, local_inherit_id_list) + self.generate_specified_list_timeline_op_rule(specified_unique_id_list=local_inherit_id_list, + kid_id_list=kid_id_list) + logger.debug("Start to generate rule of version %s", specified_unique_id) + # 实现全局继承与局部继承 + temp_rule = OpRule(timeline_op_rule_handler=self, + rule=self._all_tmp_timeline_op_rule.get(global_inherit_id)) + temp_rule.merge(rule_dic.get("operator_rules")) + # 将生成的规则归档保存 + self.add_new_timeline_op_rule(specified_unique_id, temp_rule.tmp_rule) + return + logger.error("Failed to generate the rule whose unique_id is %s. Ensure that the rule is configured in " + "the YAML file and the version %s is empty.", specified_unique_id, specified_unique_id) + self.add_empty_timeline_op_rule(specified_unique_id) + + def generate_all_timeline_op_rule(self): + """用于实现获取所有版本规则 + + 查找db_content中的规则库, 规则库文件结构设置为多叉树, 优先生成无继承的基础规则版本 + 循环并生成其他版本, 文件结构决定了不断向下搜索最终应该是从基础版本开始继承, 递归生成,直到全部规则库生成后退出函数 + + 参数: + None + 返回: + None + """ + self.generate_basic_timeline_op_rules() + _unique_id_list = copy.deepcopy(list(self._all_origin_timeline_op_rule_dict.keys())) + for unique_id in _unique_id_list: + if unique_id in self._exist_timeline_op_rule_unique_id_list: + continue + self.generate_specified_timeline_op_rule(unique_id) + + def get_tmp_timeline_op_rule_with_unique_id(self, unique_id): + if unique_id not in self._exist_timeline_op_rule_unique_id_list: + logger.error("The specified unique_id does not exist in the rule library. Ensure that the " + "corresponding rule is configured in the YAML file and the version %s is empty." + "If the value of unique_id is a negative number, the version may not be supported.", + unique_id) + self.add_empty_timeline_op_rule(unique_id) + if unique_id < 0: + logger.error("Advise to use a positive integer as the unique id of rules. " + "Negative numbers: %s are not recommended to use as unique id. " + "If specified invalid unique id: %s is used, an empty rule is returned by default.", + unique_id, constant.TIMELINE_FUSION_OPS_INVALID_UNIQUE_ID) + return self._all_tmp_timeline_op_rule.get(unique_id) diff --git a/profiler/advisor/common/version_control.py b/profiler/advisor/common/version_control.py new file mode 100644 index 0000000000000000000000000000000000000000..e3b3006a80f02343eae31a80afeea4f72b89a778 --- /dev/null +++ b/profiler/advisor/common/version_control.py @@ -0,0 +1,26 @@ +import logging +from typing import List + +logger = logging.getLogger() + + +class VersionControl: + _SUPPORT_VERSIONS = [] + + @classmethod + def is_supported(cls, cann_version: str) -> bool: + """ + Check whether the CANN software version is supported, which can be viewed by executing the following command: + 'cat /usr/local/Ascend/ascend-toolkit/latest/aarch64-linux/ascend_toolkit_install.info' + """ + flag = (cls._SUPPORT_VERSIONS.__contains__(cann_version)) + if not flag: + logger.debug("class type is %s, which is not support current CANN version %s", cls.__name__, cann_version) + return flag + + def get_support_version(self) -> List[str]: + """ + Acquire the CANN software version + :return: supported CANN software version + """ + return self._SUPPORT_VERSIONS diff --git a/profiler/advisor/compute_perf_analysis.ipynb b/profiler/advisor/compute_perf_analysis.ipynb deleted file mode 100644 index e7a663130c8da335129513a5ca1a99cf28fe48b7..0000000000000000000000000000000000000000 --- a/profiler/advisor/compute_perf_analysis.ipynb +++ /dev/null @@ -1,366 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "ExecuteTime": { - "end_time": "2024-02-21T09:19:13.937531900Z", - "start_time": "2024-02-21T09:19:13.267899500Z" - } - }, - "outputs": [], - "source": [ - "import os\n", - "import pandas as pd\n", - "\n", - "from advisor_backend.interface import Interface\n", - "import numpy as np" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# 算子调优分析\n", - "## 1. 算子分析的数据准备\n", - "当前算子分析工具支持分析Ascend Pyorch Profiler方式生成的ascend_pt目录\n", - "## 2. 融合算子分析\n", - "当前支持分析模型中存在可融合的小算子,并给出优化建议。\n", - "\n", - "\"更多融合算子信息,请查阅 https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/700alpha003/processormodel/hardwaredesc_0001.html\n", - "\n", - "## 3. 异常性能算子分析\n", - "支持分析模型中性能异常的计算算子" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "ExecuteTime": { - "end_time": "2024-02-22T08:41:17.455567500Z", - "start_time": "2024-02-22T08:41:16.716884800Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[INFO] Start to analyse the target file: D:\\work\\ascend_pt\\ASCEND_PROFILER_OUTPUT\\kernel_details.csv\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
pattern_namepatternlencountduration sum(us)op durations(us)index
18torch_npu.npu_swiglu(Slice, Slice, Swish, Mul)4127.53[21.2, 0.05, 3.14, 3.14][0]
\n", - "
" - ], - "text/plain": [ - " pattern_name pattern len count duration sum(us) op durations(us) index\n", - "18 torch_npu.npu_swiglu (Slice, Slice, Swish, Mul) 4 1 27.53 [21.2, 0.05, 3.14, 3.14] [0]" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "\n", - "The computing time of fusable op is 27.53 ms.\n", - "\n", - "\n", - "Advice 0:\n", - "Replace [Slice, Slice, Swish, Mul] with torch_npu.npu_swiglu. This pattern first happened in: \n", - "/root/torch/module.py\n", - "/root/test/slice.py(116)\n" - ] - } - ], - "source": [ - "# EDIT THE PROFILING DATA PATH\n", - "compute_path = \"[YOUR PATH]\"\n", - "interface = Interface(compute_path)\n", - "data = interface.get_data('compute', 'npu_fused')\n", - "pd.set_option('display.max_columns', None)\n", - "pd.set_option('display.width', 900)\n", - "display(data['data'].iloc[:, :-2])\n", - "print('\\n')\n", - "print(data['bottleneck'])\n", - "print('\\n')\n", - "print(data['advice'])" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[INFO] Start to analyse the target file: D:\\work\\ascend_pt\\ASCEND_PROFILER_OUTPUT\\kernel_details.csv\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Step IdModel IDTask IDStream IDNameTypeAccelerator CoreStart Time(us)Duration(us)Wait Time(us)Block DimMix Block DimInput ShapesInput Data TypesInput FormatsOutput ShapesOutput Data TypesOutput FormatsContext IDaicore_time(us)aic_total_cyclesaic_mac_ratioaic_mac_int8_ratioaic_cube_fopsaic_vector_fopsaiv_time(us)aiv_total_cyclesaiv_vec_fp32_ratioaiv_vec_fp16_ratioaiv_vec_int32_ratioaiv_vec_misc_ratioaiv_cube_fopsaiv_vector_fopssize(MB)throughput(GB/s)color
014294967295126516Slice1SliceAI_VECTOR_CORE169952962310675021.20261.56904,1025INT64FORMAT_ND4,1025INT32FORMAT_NDNaN0.00.00.00.00.00.01.7729508.00.00.00.00620.00.05856.00.0469212.161371RED
414294967295126516Add1AddAI_CORE16995296231067543.14261.56904,1025INT64FORMAT_ND4,1025INT32FORMAT_NDNaN2.328888.00.20.10.10.70.000.00.00.00.00000.00.00.00.04692114.592698RED
\n", - "
" - ], - "text/plain": [ - " Step Id Model ID Task ID Stream ID Name Type Accelerator Core Start Time(us) Duration(us) Wait Time(us) Block Dim Mix Block Dim Input Shapes Input Data Types Input Formats Output Shapes Output Data Types Output Formats Context ID aicore_time(us) aic_total_cycles aic_mac_ratio aic_mac_int8_ratio aic_cube_fops aic_vector_fops aiv_time(us) aiv_total_cycles aiv_vec_fp32_ratio aiv_vec_fp16_ratio aiv_vec_int32_ratio aiv_vec_misc_ratio aiv_cube_fops aiv_vector_fops size(MB) throughput(GB/s) color\n", - "0 1 4294967295 1265 16 Slice1 Slice AI_VECTOR_CORE 1699529623106750 21.20 261.56 9 0 4,1025 INT64 FORMAT_ND 4,1025 INT32 FORMAT_ND NaN 0.0 0.0 0.0 0.0 0.0 0.0 1.77 29508.0 0.0 0.0 0.0062 0.0 0.0 5856.0 0.046921 2.161371 RED\n", - "4 1 4294967295 1265 16 Add1 Add AI_CORE 1699529623106754 3.14 261.56 9 0 4,1025 INT64 FORMAT_ND 4,1025 INT32 FORMAT_ND NaN 2.3 28888.0 0.2 0.1 0.1 0.7 0.00 0.0 0.0 0.0 0.0000 0.0 0.0 0.0 0.046921 14.592698 RED" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# 异常性能算子识别\n", - "from advisor_backend.compute_advice.npu_slow_advice import NpuSlowAdvice\n", - "\n", - "npu_slow_advice = NpuSlowAdvice(compute_path)\n", - "data = interface.get_data('compute', 'npu_slow')\n", - "slow_op_data = data[data[\"color\"] == \"RED\"]\n", - "display(slow_op_data)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "NpuSlowAdvice.save_to_excel(data, file_path=os.path.join(compute_path, \"slow_op.xlsx\"))" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "call stack: \n", - "/root/torch/module.py\n", - "/root/test/slice.py(116)\n" - ] - } - ], - "source": [ - "# 异常性能算子call stack\n", - "call_stack = npu_slow_advice.get_call_stack(data, index_id=0, ts_col=\"Start Time(us)\")\n", - "print(\"call stack: \")\n", - "print(call_stack)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.5" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/profiler/advisor/config/__init__.py b/profiler/advisor/config/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/profiler/advisor/config/config.ini b/profiler/advisor/config/config.ini new file mode 100644 index 0000000000000000000000000000000000000000..7185a6b3bfe6a609cba6d160a18e4305b8996eb7 --- /dev/null +++ b/profiler/advisor/config/config.ini @@ -0,0 +1,16 @@ +[LOG] +# console_logging_level : DEBUG/INFO/WARNING/ERROR +console_logging_level = INFO +[ANALYSE] +# analysis_result_file : filename of analysis result +analysis_result_file = analysis_result_file.xlsx +# tune_ops_file: filename of tune op name list +tune_ops_file = operator_tuning_file.cfg +[THRESHOLD] +# operator_bound_ratio: (mte, cube, vector, scalar) ratio greater than this value will be checked in operator_bound_checker +operator_bound_ratio = 0.8 +[RULE-BUCKET] +# region : URL of different regions where can download rule yaml file +cn-north-9 = cnnorth9-modelarts-sdk +cn-southwest-2 = cnsouthwest2-modelarts-sdk +cn-north-7 = cnnorth7-modelarts-sdk \ No newline at end of file diff --git a/profiler/advisor/config/config.py b/profiler/advisor/config/config.py new file mode 100644 index 0000000000000000000000000000000000000000..183c2ed5a25382981e0a7e13b5bd2e4d3d28be49 --- /dev/null +++ b/profiler/advisor/config/config.py @@ -0,0 +1,103 @@ +""" +advisor config +""" +from profiler.advisor.utils.utils import Timer + +import logging +import os +from configparser import ConfigParser + +from profiler.advisor.utils.utils import singleton + +logger = logging.getLogger() + + +@singleton +class Config: + """ + config + """ + # pylint: disable=too-many-instance-attributes + + _CONFIG_DIR_NAME = "config" + _CONFIG_FILE_NAME = "config.ini" + + def __init__(self) -> None: + config = ConfigParser(allow_no_value=True) + self._work_path = os.getcwd() # pwd + self._root_path = os.path.abspath(os.path.join(__file__, "../../")) + config.read(os.path.join(self._root_path, self._CONFIG_DIR_NAME, self._CONFIG_FILE_NAME)) + self.config = config + # ANALYSE + self._analysis_result_file = self._normalize_path(config.get("ANALYSE", "analysis_result_file")) + self._tune_ops_file = os.path.abspath( + os.path.join(self._work_path, f"operator_tuning_file_{Timer().strftime}.cfg")) + + def _normalize_path(self, file) -> str: + if not file.startswith("/"): + file = os.path.join(self._work_path, file) + return os.path.abspath(file) + + @property + def work_path(self) -> str: + """ + get work path + :return: work path + """ + return self._work_path + + @property + def root_path(self) -> str: + """ + get root path + :return: root path + """ + return self._root_path + + def set_config(self, key, value) -> None: + """ + set config value + :param key: config key + :param value: config value + """ + setattr(self, key, value) + + def get_config(self, key) -> str: + """ + get value of config + :param key: config key + :return: config value + """ + try: + return getattr(self, key) + except AttributeError: + return "" + + @property + def analysis_result_file(self) -> str: + """ + get filename of op result file + :return: filename + """ + return self._analysis_result_file + + @property + def tune_ops_file(self) -> str: + """ + get filename of tune op file + :return: filename + """ + return self._tune_ops_file + + @property + def operator_bound_ratio(self) -> float: + """ + operator_bound_ratio + """ + return float(self.config.get("THRESHOLD", "operator_bound_ratio")) + + def set_log_path(self, result_file: str, log_path: str = None): + log_path = log_path if log_path is not None else os.path.join(self._work_path, "log") + os.makedirs(log_path, exist_ok=True) + self.config._analysis_result_file = os.path.join(log_path, result_file) + self._analysis_result_file = os.path.join(log_path, result_file) diff --git a/profiler/advisor/config/profiling_data_version_config.yaml b/profiler/advisor/config/profiling_data_version_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f73aecd3baf18e06981ef4d4b0db7d6faadd419a --- /dev/null +++ b/profiler/advisor/config/profiling_data_version_config.yaml @@ -0,0 +1,80 @@ +versions: + - version: 8.0.0 + dirs_pattern: + ^PROF_\d{6}_\d{17}_\w+$: + mindstudio_profiler_output: + [ op_summary, msprof ] + class_attr: + op_summary: OpSummary + msprof: Msprof + file_attr: + op_summary: ^op_summary_\d{14}\.csv$ + msprof: ^msprof_\d{14}\.json$ + + - version: 7.0.0 + dirs_pattern: + ^PROF_\d{6}_\d{17}_\w+$: + ^device_\d+$: + summary: + [ op_summary ] + timeline: + [ msprof, task_time ] + host: + sqlite: + [ ge_info ] + class_attr: + op_summary: OpSummary + task_time: TaskTime + msprof: Msprof + ge_info: GeInfo + file_attr: + op_summary: ^op_summary_\d+_\d+_\d{14}\.csv$ + task_time: ^task_time_\d+_\d+_\d{14}\.json$ + msprof: ^msprof_\d+_\d+_\d{14}\.json$ + ge_info: ge_info.db + + - version: 7.0.RC1 + dirs_pattern: + ^PROF_\d{6}_\d{17}_\w+$: + ^device_\d+$: + summary: + [ op_summary ] + timeline: + [ msprof, task_time ] + host: + sqlite: + [ ge_info ] + class_attr: + op_summary: OpSummary + task_time: TaskTime + msprof: Msprof + ge_info: GeInfo + file_attr: + op_summary: ^op_summary_\d+_\d+_\d+_\d{14}\.csv$ + task_time: ^task_time_\d+_\d+_\d+_\d{14}\.json$ + msprof: ^msprof_\d+_\d+_\d+_\d{14}\.json$ + ge_info: ge_info.db + + - version: 6.3.RC2 + dirs_pattern: + ^PROF_\d{6}_\d{17}_\w+$: + ^device_\d+$: + summary: + [ op_summary ] + timeline: + [ msprof, task_time ] + host: + sqlite: + [ ge_info ] + class_attr: + op_summary: OpSummary + task_time: TaskTime + msprof: Msprof + ge_info: GeInfo + file_attr: + op_summary: ^op_summary_\d+_\d+\.csv$ + task_time: ^task_time_\d+_\d+\.json$ + msprof: ^msprof_\d+_\d+\.json$ + ge_info: ge_info.db + + diff --git a/profiler/advisor/dataset/__init__.py b/profiler/advisor/dataset/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/profiler/advisor/dataset/cluster/__init__.py b/profiler/advisor/dataset/cluster/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/profiler/advisor/advisor_backend/cluster_advice/slow_link_advice.py b/profiler/advisor/dataset/cluster/cluster_dataset.py similarity index 41% rename from profiler/advisor/advisor_backend/cluster_advice/slow_link_advice.py rename to profiler/advisor/dataset/cluster/cluster_dataset.py index f8a625242f3939602cbb7b8391cd8062e21fe01b..94527cdf5b26b307847eb460c567678c66a2e568 100644 --- a/profiler/advisor/advisor_backend/cluster_advice/slow_link_advice.py +++ b/profiler/advisor/dataset/cluster/cluster_dataset.py @@ -1,26 +1,100 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +import logging import os + +from profiler.advisor.dataset.dataset import Dataset +from profiler.advisor.utils.utils import singleton +from profiler.cluster_analyse.common_func.file_manager import FileManager +from profiler.advisor.common import constant as const +from profiler.cluster_analyse.common_func.constant import Constant from collections import defaultdict -from common_func_advisor.constant import Constant -from common_func.file_manager import FileManager -from cluster_advice.cluster_advice_base import ClusterAdviceBase +from profiler.cluster_analyse.cluster_analysis import ClusterAnalysis +from profiler.advisor.dataset.cluster.cluster_step_trace_time_bean import ClusterStepTraceTimeBean + +logger = logging.getLogger() + + +class ClusterDataset(Dataset): + def __init__(self, collection_path, data: dict, **kwargs) -> None: + super().__init__(collection_path, data) + + def is_cluster_analysis_output_exist(self): + """ + check whether input path is valid + """ + for file in os.listdir(self.collection_path): + if file == 'cluster_analysis_output': + print("[INFO]Cluster has been analyzed " + "because of the existence of cluster analysis output directory.") + print("[INFO]Skip Cluster analyze backend.") + return True + return False + + def cluster_analyze(self): + if self.is_cluster_analysis_output_exist(): + return + parameter = { + Constant.COLLECTION_PATH: self.collection_path, + Constant.ANALYSIS_MODE: "all" + } + print("[INFO] cluster analysis is in the process, please wait...") + try: + ClusterAnalysis(parameter).run() + except Exception as e: + raise ValueError(f"Cluster analyze backend failed:{e}") from e + + def load_csv_data(self, file_name, dataBean): + csv_path = os.path.join(self.collection_path, const.CLUSTER_ANALYSIS_OUTPUT, file_name) + if not os.path.exists(csv_path): + msg = "[ERROR] cluster_step_trace_time.csv doesn't exist, terminate analysis." + raise RuntimeError(msg) + data = FileManager.read_csv_file(csv_path, dataBean) + return data + + def load_json_data(self, file_name): + json_path = os.path.join(self.collection_path, const.CLUSTER_ANALYSIS_OUTPUT, file_name) + if not os.path.exists(json_path): + msg = "[ERROR] cluster_communication.json doesn't exist, terminate analysis." + raise RuntimeError(msg) + data = FileManager.read_json_file(json_path) + return data -class SlowLinkAdvice(ClusterAdviceBase): + +@singleton +class ClusterStepTraceTimeDataSet(ClusterDataset): + RANK = "rank" + + def __init__(self, collection_path: str, data: dict, kwargs: dict = None): + self._step_dict = defaultdict() + super().__init__(collection_path, data) + + def _parse(self): + self.cluster_analyze() + try: + step_data = self.load_csv_data(const.CLUSTER_STEP_TIME_CSV, ClusterStepTraceTimeBean) + except RuntimeError as e: + print("捕获到异常:", e) + self._step_dict = None + return False + self._step_dict = self.formate_data(step_data) + return True + + def formate_data(self, step_data: list): + step_dict = defaultdict(lambda: [0, 0, 0]) + for step_bean in step_data: + if step_bean.type == self.RANK: + step_dict[step_bean.index][0] += step_bean.compute + step_dict[step_bean.index][1] += step_bean.communication + step_dict[step_bean.index][2] += step_bean.free + return step_dict + + def get_data(self): + return self._step_dict + + +@singleton +class ClusterCommunicationDataSet(ClusterDataset): RDMA_TIME_MS = "RDMA time(ms)" RDMA_SIZE_MB = "RDMA size(mb)" SDMA_TIME_MS = "SDMA time(ms)" @@ -33,14 +107,14 @@ class SlowLinkAdvice(ClusterAdviceBase): SDMA = "SDMA" RDMA = "RDMA" - def __init__(self, collection_path: str, kwargs: dict = None): - super().__init__(collection_path) + def __init__(self, collection_path: str, data: dict, kwargs: dict = None): self.rank_bw_dict = defaultdict(lambda: { self.RDMA_TIME_MS: 0, self.RDMA_SIZE_MB: 0, self.SDMA_TIME_MS: 0, self.SDMA_SIZE_MB: 0, }) + super().__init__(collection_path, data) @staticmethod def compute_ratio(dividend: float, divisor: float): @@ -49,29 +123,22 @@ class SlowLinkAdvice(ClusterAdviceBase): else: return round(dividend / divisor, 4) - def load_communication_json(self): - json_path = os.path.join(self.collection_path, Constant.CLUSTER_ANALYSIS_OUTPUT, Constant.CLUSTER_COMM_JSON) - if not os.path.exists(json_path): - msg = "[ERROR] cluster_communication.json doesn't exist, terminate analysis." - raise RuntimeError(msg) - communication_json = FileManager.read_json_file(json_path) - return communication_json - - def run(self): - self.path_check() - communication_json = self.load_communication_json() + def _parse(self): + self.cluster_analyze() + try: + communication_json = self.load_json_data(const.CLUSTER_COMM_JSON) + except RuntimeError as e: + print("捕获到异常:", e) + self.rank_bw_dict = None + return False self.process(communication_json) - self.output() - return self.output_format_data + return True def process(self, communication_json: dict): for comm_group, group_dict in communication_json.items(): for step, step_dict in group_dict.items(): for op, op_dict in step_dict.items(): self.compute_bandwidth(op_dict) - if self.rank_bw_dict: - self.produce_bottleneck(self.RDMA_BANDWIDTH) - self.produce_bottleneck(self.SDMA_BANDWIDTH) def compute_bandwidth(self, op_dict: dict): for rank_id, rank_dict in op_dict.items(): @@ -94,17 +161,5 @@ class SlowLinkAdvice(ClusterAdviceBase): self.rank_bw_dict[rank][self.SDMA_BANDWIDTH] = self.compute_ratio( self.rank_bw_dict[rank][self.SDMA_SIZE_MB], self.rank_bw_dict[rank][self.SDMA_TIME_MS]) - def produce_bottleneck(self, link_type: str): - data_list = [rank_dict.get(link_type, 0) for rank_id, rank_dict in self.rank_bw_dict.items()] - avg_bw = round(sum(data_list) / len(data_list), 3) - if avg_bw == 0: - return - self.bottelneck += f'{link_type}: \n' \ - f'The average is {avg_bw}, ' \ - f'while the maximum is {round(max(data_list), 3)}GB/s and ' \ - f'the minimum is {round(min(data_list), 3)}GB/s. ' \ - f'the difference is {round(max(data_list) - min(data_list), 3)}GB/s. \n' - - def output(self): - self.output_format_data[self.DATA] = self.rank_bw_dict - self.output_format_data[self.BOTTLENECK] = self.bottelneck + def get_data(self): + return self.rank_bw_dict diff --git a/profiler/advisor/advisor_backend/prof_bean_advisor/cluster_step_trace_time_bean.py b/profiler/advisor/dataset/cluster/cluster_step_trace_time_bean.py similarity index 100% rename from profiler/advisor/advisor_backend/prof_bean_advisor/cluster_step_trace_time_bean.py rename to profiler/advisor/dataset/cluster/cluster_step_trace_time_bean.py diff --git a/profiler/advisor/dataset/dataset.py b/profiler/advisor/dataset/dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..7f1e40a38b8a4a26585eecfe6271cc75ea054d2d --- /dev/null +++ b/profiler/advisor/dataset/dataset.py @@ -0,0 +1,38 @@ +""" +dataset module +""" +import logging +import os + +from profiler.advisor.config.config import Config + +logger = logging.getLogger() + + +class Dataset: + """ + :param collection_path: dataSet absolute path + dataset base class + """ + + def __init__(self, collection_path, data=None) -> None: + if data is None: + data = {} + self.collection_path = os.path.abspath(os.path.join(Config().work_path, collection_path)) + logger.debug("init %s with %s", self.__class__.__name__, self.collection_path) + if self._parse(): + key = self.get_key() + if key not in data: + data[key] = [] + data[key].append(self) + + def _parse(self): + return None + + @classmethod + def get_key(cls): + """ + get key of dataset + :return: key + """ + return cls.__name__.rsplit('.', maxsplit=1)[-1] diff --git a/profiler/advisor/dataset/graph_dataset.py b/profiler/advisor/dataset/graph_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..951de7fd26b1f986d25285547e63b1a420968249 --- /dev/null +++ b/profiler/advisor/dataset/graph_dataset.py @@ -0,0 +1,53 @@ +import logging +from typing import List + +from profiler.advisor.dataset.dataset import Dataset +from profiler.advisor.common.graph.graph_parser import HostGraphParser +from profiler.advisor.common.graph.graph import Graph +from profiler.advisor.utils.utils import load_parameter, lazy_property, get_file_path_from_directory + +logger = logging.getLogger() + + +class GraphDataset(Dataset): + """ + data directory dataset + """ + FILE_PATTERN = "ATT_ADVISOR_GRAPH_FILE" + + def __init__(self, collection_path, data: dict = None, **kwargs) -> None: + self.graph_files: List[HostGraphParser] = [] + super().__init__(collection_path, data) + + def _parse(self): + graph_list = get_file_path_from_directory(self.collection_path, + lambda file: file.endswith( + load_parameter(self.FILE_PATTERN, "_Build.txt"))) + + for graph_file_path in graph_list[-1:]: + logger.info("Prepare to parse %s as default graph.", graph_file_path) + graph_file = HostGraphParser(graph_file_path) + self.graph_files.append(graph_file) + return self.graph_files + + @lazy_property + def graphs(self) -> List[Graph]: + """ + get a list of graphs + return: List[Graph] + """ + graphs = [] + for parser in self.graph_files: + graph = Graph(nodes=parser.nodes, + edges=parser.edges, + name="Default") + graph.build() + graphs.append(graph) + graphs.sort(key=lambda g: g.name) + if len(self.graph_files) >= 1: + del self.graph_files[0] # remove previous useless data + return graphs + + def is_empty(self) -> bool: + """check empty graph dataset""" + return len(self.graph_files) == 0 diff --git a/profiler/advisor/dataset/profiling/__init__.py b/profiler/advisor/dataset/profiling/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/profiler/advisor/dataset/profiling/builder_base.py b/profiler/advisor/dataset/profiling/builder_base.py new file mode 100644 index 0000000000000000000000000000000000000000..2bfe14f9462b701db2a4ede1d539a07659f48ae8 --- /dev/null +++ b/profiler/advisor/dataset/profiling/builder_base.py @@ -0,0 +1,39 @@ +""" +profiling base +""" +import logging +from typing import Dict, List + +from profiler.advisor.dataset.profiling.profiling_parser import ProfilingParser +from profiler.advisor.utils.utils import join_prof_path + +logger = logging.getLogger() + + +class ProfilingBuilderBase: + """ + profiling base + """ + DATA_LIST: List[Dict] = [] + + def __init__(self, path) -> None: + self._path = path + + def parse_data(self) -> bool: + """ + parse data for file in data_dir + """ + if isinstance(self, ProfilingParser): + return True + ret = False + for data in self.DATA_LIST: + class_name = data.get("class_name") + if class_name is not None: + if data.get("subdir_name"): + data_class = data.get("class_name")(join_prof_path(self._path, data.get("subdir_name"))) + else: + data_class = data.get("class_name")(self._path) + if data_class.parse_data(): + setattr(self, str(data.get("attr_name")), data_class) + ret = True + return ret diff --git a/profiler/advisor/dataset/profiling/db_manager.py b/profiler/advisor/dataset/profiling/db_manager.py new file mode 100644 index 0000000000000000000000000000000000000000..c9fb73c7cf69d94c3ca1aba8c726f574d63cd1a3 --- /dev/null +++ b/profiler/advisor/dataset/profiling/db_manager.py @@ -0,0 +1,70 @@ +""" +connection manager +""" +import os +import re +from typing import List + +from sqlalchemy import MetaData, create_engine + + +class ConnectionManager: + """ + Connection Manager + """ + + def __init__(self, path, db_name): + self.db_path = os.path.join(path, db_name) + self.connection = create_engine(f'sqlite:///{self.db_path}') + self.metadata = MetaData() + self.metadata.reflect(bind=self.connection) + + def __call__(self, *args, **kwargs): + return self.connection + + @staticmethod + def check_db_exists(db_path:str, dbs:List) -> bool: + """ + check db exists + """ + if not os.path.isdir(db_path): + return False + for prof_db in dbs: + if not os.access(db_path, os.R_OK) or prof_db not in os.listdir(db_path): + return False + return True + + def check_table_exists(self, tables:List) -> bool: + """ + check table exists + """ + for table in tables: + if table not in self.metadata.tables: + return False + return True + + def check_column_exists(self, table_name:str, columns:List) -> bool: + """ + check column exists + """ + if table_name not in self.metadata.tables: + return False + for column in columns: + if column not in self.metadata.tables[table_name].columns: + return False + return True + + @classmethod + def get_connection(cls, path, dbs, tables=None, is_host=False): + """ + get connection + """ + if is_host: + pattern = r"/device_[0-9]" + path = re.sub(pattern, "/host", path) + if not cls.check_db_exists(path, dbs): + return None + conn = cls(path, dbs) + if tables and not conn.check_table_exists(tables): + return None + return conn diff --git a/profiler/advisor/dataset/profiling/device_info.py b/profiler/advisor/dataset/profiling/device_info.py new file mode 100644 index 0000000000000000000000000000000000000000..b58930777f969d023eab7885a9095d46aa7ba6ea --- /dev/null +++ b/profiler/advisor/dataset/profiling/device_info.py @@ -0,0 +1,61 @@ +""" +profiling info +""" +import json +import logging + +from profiler.advisor.config.config import Config +from profiler.advisor.utils.utils import get_file_path_from_directory + +logger = logging.getLogger() + + +class DeviceInfoParser: + """ + profiling info + device_id device 名称信息 + "aiv_num" ai vector 个数 + "ai_core_num" aicore 个数 + """ + DATA_LIST = [] + + def __init__(self, path) -> None: + self._path = path + + def parse_data(self) -> bool: + """ + parse profiling data + :return: true for success or false + """ + file_list = get_file_path_from_directory(self._path, lambda x: x.startswith("info.json.")) + if not file_list: + return False + for info in file_list: + if self._parse(info): + return True + return False + + @staticmethod + def _parse(info_file: str) -> bool: + if info_file.endswith("done"): + return False # skip info.json.0.done + try: + with open(info_file, encoding="utf-8") as file: + info = json.load(file) + except (IOError, ValueError) as error: + logger.error("Parse json info file %s failed : %s", info_file, error) + return False + if "DeviceInfo" not in info: + logger.error("No device info in json info file %s", info_file) + return False + config = Config() + for device_info in info["DeviceInfo"]: + if "id" in device_info: + config.set_config("device_id", device_info["id"]) + if "aiv_num" in device_info: + config.set_config("aiv_num", device_info["aiv_num"]) + if "ai_core_num" in device_info: + config.set_config("ai_core_num", device_info["ai_core_num"]) + return True + logger.error("No ai_core_num in json info file %s", info_file) + return False diff --git a/profiler/advisor/dataset/profiling/info_collection.py b/profiler/advisor/dataset/profiling/info_collection.py new file mode 100644 index 0000000000000000000000000000000000000000..b1f84313bb7980ea2186d2727db51b5fba49e12e --- /dev/null +++ b/profiler/advisor/dataset/profiling/info_collection.py @@ -0,0 +1,270 @@ +""" +profiling info +""" +import decimal +import logging + +from profiler.advisor.utils.utils import lazy_property + +logger = logging.getLogger() + + +class Info: + """ + op info + """ + _attr_pre_fix_list = [""] + + def add_attr(self, key: str, value: str): + """ + add attr to op info + :param key: op info key + :param value: op info value + :return: None + """ + if not key or hasattr(self, key): + return + setattr(self, key, value) + + def has_attr(self, key: str, strict_mode=False): + """ + check if op info has attr key + :param key: attr key + :return: true or false + """ + if strict_mode: + return hasattr(self, key) + for prefix in self._attr_pre_fix_list: + attr = prefix + key + if hasattr(self, attr): + return True + return False + + def get_attr(self, key, strict_mode=False): + """ + get attr value by key + :param key: attr key + :return: attr value + """ + if strict_mode: + if hasattr(self, key): + return getattr(self, key) + else: + for prefix in self._attr_pre_fix_list: + attr = prefix + key + if key.startswith("mac") and prefix == "aiv_": + # e.g mac_ratio must match aic_mac_ratio, not aiv_mac_ratio + continue + if key.startswith("vec") and prefix == "aic_": + # e.g vec_ratio must match aiv_vec_ratio, not aic_vec_ratio + continue + if hasattr(self, attr): + return getattr(self, attr) + return "" + + def get_float_attr(self, attr, strict_mode=False): + """ + get attr value by key + :param key: attr key + :return: attr value + """ + try: + return float((self.get_attr(attr, strict_mode))) + except (ValueError, FloatingPointError): + pass + return 0 + + def get_decimal_attr(self, attr, strict_mode=False): + """ + get attr value by key + :param key: attr key + :return: attr value + """ + try: + return decimal.Decimal((self.get_attr(attr, strict_mode))) + except (ValueError, decimal.InvalidOperation): + pass + return decimal.Decimal(0) + + def get_attrs(self) -> dict: + """ + get attr list + :return: attr list + """ + return self.__dict__ + + +class OpInfo(Info): + """ + summary info + """ + + _attr_pre_fix_list = ["", "aic_", "aiv_"] + _mac_ratio_attrs = ["mac_ratio", "mac_fp16_ratio", "mac_int8_ratio", "aic_mac_ratio"] + _aicore_time_key = ["aicore_time", "aiv_time"] + _total_cycles_key = ["total_cycles", "aic_total_cycles", "aiv_total_cycles"] + + def __lt__(self, other): + return self.get_float_attr("task_start_time") < other.get_float_attr("task_start_time") + + @lazy_property + def is_cube_op(self) -> bool: + """ + check type of operator if cube or not + """ + for attr in self._mac_ratio_attrs: + if hasattr(self, attr): + try: + if float(getattr(self, attr)) > 0: + if hasattr(self, "ffts_type") and getattr(self, "ffts_type") == "1": + logger.warning( + "ffts type of op %s is vector buf mac ratio is not 0", getattr(self, "op_name") + ) + return True + except ValueError: + pass + # not cube op + if hasattr(self, "ffts_type") and getattr(self, "ffts_type") == "0": + logger.warning("ffts type of op %s is cube but mac ratio is 0", getattr(self, "op_name")) + return False + + @lazy_property + def has_mac_ratio(self) -> bool: + """ + check if op_info has mac ratio + """ + for attr in self._mac_ratio_attrs: + if attr in self.__dict__: + return True + return False + + def attr_sum(self, attr_list): + """sum of a list attrs""" + total = 0 + for attr in attr_list: + total += self.get_float_attr(attr, strict_mode=True) + return total + + def get_aicore_time(self): + """ + get sum of aicore time and ai vector core time + """ + return self.attr_sum(self._aicore_time_key) + + def get_total_cycles(self): + """ + get sum of total cycle for aicore and ai vector core + """ + return self.attr_sum(self._total_cycles_key) + + +class TaskInfo: + """ + task info + """ + EVENT_TYPE = {"metadata": ['M'], "duration": ['B', 'E'], "complete": ['X'], 'flow': ['s', 't', 'f']} + + def __init__(self, content: dict) -> None: + self._name = content.get("name", "") + self._pid = content.get("pid", 0) + self._tid = content.get("tid", 0) + self._start_time = float(content.get("ts", 0.0)) + self._dur = float(content.get("dur", 0.0)) + self._args = content.get("args", {}) + self._cat = content.get("cat", "") + self._id = content.get("id", "") + + @property + def pk_id(self): + """ + get id + :return: id + """ + return self._id + + @property + def pid(self): + """ + get pid + :return: pid + """ + return self._pid + + @property + def tid(self): + """ + get tid + :return: tid + """ + return self._tid + + @property + def task_type(self): + """ + get pid + :return: pid + """ + return self._args.get("Task Type", "NA") + + @property + def start_time(self): + """ + get starttime + :return: starttime + """ + return self._start_time + + @property + def end_time(self): + """ + get endtime + :return: endtime + """ + return self._start_time + self._dur + + @property + def dur(self): + """ + get duration + :return: duration + """ + return self._dur + + @property + def name(self): + """ + get task name + :return: task name + """ + return self._name + + @property + def stream_id(self): + """ + get stream_id + :return: steram id + """ + return self._args.get("Stream Id", "NA") + + @property + def task_id(self): + """ + get task id + :return: task_id + """ + return self._args.get("Task Id", "NA") + + @property + def args(self): + """ + get args of task + :return: args + """ + return self._args + + @property + def cat(self): + """ + get category of task + """ + return self._cat diff --git a/profiler/advisor/dataset/profiling/profiling_dataset.py b/profiler/advisor/dataset/profiling/profiling_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..46d4a4fe8b12a419f6d0d7472f9776369e122f03 --- /dev/null +++ b/profiler/advisor/dataset/profiling/profiling_dataset.py @@ -0,0 +1,79 @@ +import logging +import os + +import yaml +from profiler.advisor.common import constant +from profiler.advisor.common.profiling.ge_info import GeInfo +from profiler.advisor.common.profiling.msprof import Msprof +from profiler.advisor.common.profiling.op_summary import OpSummary +from profiler.advisor.common.profiling.tasktime import TaskTime +from profiler.advisor.dataset.dataset import Dataset +from profiler.advisor.dataset.profiling.device_info import DeviceInfoParser +from profiler.advisor.utils.utils import join_prof_path + + +logger = logging.getLogger() + + +class ProfilingDataset(Dataset): + PROF_TYPE = "" + + def __init__(self, collection_path, data: dict, **kwargs) -> None: + self.cann_version = kwargs.get("cann_version", constant.DEFAULT_CANN_VERSION) + self.PROF_TYPE = kwargs.get("profiling_type", constant.DEFAULT_PROFILING_TYPE) + self.patterns = self.parse_pattern() + self.current_version_pattern = self.get_current_version_pattern() + super().__init__(collection_path, data) + + def _parse(self): + info = DeviceInfoParser(self.collection_path) + if info.parse_data(): + self._info = info + ret = False + if self.current_version_pattern is not None: + self.build_from_pattern(self.current_version_pattern["dirs_pattern"], self.collection_path) + ret = True + + return ret + + def build_from_pattern(self, dirs_pattern, current_path): + if isinstance(dirs_pattern, dict): + for key, value in dirs_pattern.items(): + self.build_from_pattern(value, join_prof_path(current_path, key)) + elif isinstance(dirs_pattern, list): + for item in dirs_pattern: + data_class = globals()[self.current_version_pattern.get('class_attr').get(item)] + data_class.FILE_PATTERN = self.current_version_pattern.get('file_attr').get(item) + data_object = data_class(current_path) + is_success = data_object.parse_data() + if is_success: + setattr(self, item, data_object) + else: + logger.warning("Skip parse %s from local path %s", self.current_version_pattern.get('class_attr').get(item), current_path) + else: + logger.warning(f"Unsupported arguments : %s to build %s", dirs_pattern, self.__class__.__name__) + + def get_current_version_pattern(self): + for version_config_dict in self.patterns['versions']: + if version_config_dict['version'] == self.cann_version: + return version_config_dict + return dict() + + def parse_pattern(self, config_path="config/profiling_data_version_config.yaml"): + + if not os.path.isabs(config_path): + config_path = os.path.join(os.path.dirname(__file__), + "../", "../", config_path) + + if not os.path.exists(config_path): + logger.warning("Skip parse profiling dataset, because %s does not exist.", config_path) + return [] + + with open(config_path, 'r') as f: + patterns = yaml.safe_load(f) + + return patterns + + def collection_path(self): + """collection_path""" + return self.collection_path diff --git a/profiler/advisor/dataset/profiling/profiling_parser.py b/profiler/advisor/dataset/profiling/profiling_parser.py new file mode 100644 index 0000000000000000000000000000000000000000..bb4caeb29e5c94cbc4373b1d6b10e32f3e10e02e --- /dev/null +++ b/profiler/advisor/dataset/profiling/profiling_parser.py @@ -0,0 +1,132 @@ +import csv +import json +import os +import re +from typing import List, Dict + +from profiler.advisor.dataset.profiling.info_collection import logger +from profiler.advisor.utils.utils import get_file_path_from_directory, SafeOpen, format_excel_title + + +class ProfilingParser: + """ + profiling + """ + FILE_PATTERN = "" + FILE_PATTERN_MSG = "" + FILE_INFO = "" + FILE_PATH = "" + + def __init__(self, path: str) -> None: + self._path = path + self._raw_data: List[List[str]] = [] + self._filename = "" + + @staticmethod + def file_match_func(pattern): + """file match function""" + return lambda x: re.search(re.compile(pattern), x) + + def parse_data(self) -> bool: + """ + pase task time file + :return: true or false + """ + if self._parse_from_file(): + return True + return False + + def _parse_from_file(self): + file_list = get_file_path_from_directory(self._path, self.file_match_func(self.FILE_PATTERN)) + if not file_list: + return False + ## get last file + file = file_list[-1] + self.FILE_PATH = file + if len(file_list) > 1: + logger.warning("Multiple copies of %s were found, use %s", self.FILE_INFO, file) + return self.parse_from_file(file) + + @staticmethod + def get_float(data) -> float: + """ + get float or 0.0 + """ + try: + return float(data) + except (FloatingPointError, ValueError): + return 0.0 + + def parse_from_file(self, file): + """ + parse from file + """ + return False + + @staticmethod + def _check_csv_file_format(csv_file_name: str, csv_content: List[List[str]]): + if not csv_content: + logger.error("%s is empty", csv_file_name) + return False + return True + + def _parse_csv(self, file, check_csv=True) -> bool: + logger.debug("Parse file %s", file) + self._filename = os.path.splitext(os.path.basename(file))[0] + with SafeOpen(file, encoding="utf-8") as csv_file: + try: + csv_content = csv.reader(csv_file) + for row in csv_content: + self._raw_data.append(row) + if check_csv and not self._check_csv_file_format(file, self._raw_data): + logger.error("Invalid csv file : %s", file) + return False + except OSError as error: + logger.error("Read csv file failed : %s", error) + return False + + if not csv_file: + return False + if not self._raw_data: + logger.warning("File %s has no content", file) + return False + return True + + def _parse_json(self, file) -> bool: + logger.debug("Parse file %s", file) + self._filename = os.path.splitext(os.path.basename(file))[0] + try: + with open(file, encoding="utf-8") as json_file: + self._raw_data = json.load(json_file) + except (OSError, ValueError) as error: + logger.error("Parse json file %s failed : %s", file, error) + return False + return True + + def get_raw_data(self): + """ + get raw file name and data + """ + return self._filename, self._raw_data + + @staticmethod + def _get_csv_title(data: List, number=0, title_index=0): + """ + number = 0 replace (us) (ns).. + other replace " " to "_" + title_index: position of title default 0 + """ + title_dict: Dict[int, str] = {} + for idx, title in enumerate(data[title_index]): + if number == 0: + title_dict[idx] = format_excel_title(title) + else: + title_dict[idx] = title.replace(" ", "_") + return title_dict + + @property + def path(self): + """ + path + """ + return self._path diff --git a/profiler/advisor/dataset/timeline_event_dataset.py b/profiler/advisor/dataset/timeline_event_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..9b4c102dffb97f4f3c78d49c3c432b81fc1ac237 --- /dev/null +++ b/profiler/advisor/dataset/timeline_event_dataset.py @@ -0,0 +1,185 @@ +import logging +from typing import List + +import ijson +from profiler.advisor.dataset.dataset import Dataset +from tqdm import tqdm + +from profiler.advisor.common import constant as const +from profiler.advisor.common.timeline.event import TimelineEvent +from profiler.advisor.utils.utils import get_file_path_from_directory +from profiler.advisor.utils.utils import singleton + +logger = logging.getLogger() + + +@singleton +class TimelineEventDataset(Dataset): + + def __init__(self, collection_path, data: dict, **kwargs) -> None: + self._ops_with_task_type = {} + self._ops_with_stack = {} + self._torch_to_npu = {} + self._acl_to_npu = set() + self._aten: List[str] = [] + self._optimizer: List[str] = [] + self.timeline_dir = collection_path + self.timeline_data_list = get_file_path_from_directory(collection_path, lambda file: file.endswith("trace_view.json")) + self.dataset_len = None + self.analysis_mode = kwargs.get("analysis_mode") + self.task_type = kwargs.get("task_type") + self.cann_version = kwargs.get("cann_version") + self.torch_version = kwargs.get("torch_version") + + if self.analysis_mode in ["fusion_ops", "all"]: + logger.info("Load fusion operators database for cann version '%s' and torch version '%s'", + self.cann_version, self.torch_version) + + super().__init__(collection_path, data) + + if self.analysis_mode in ["op_stack", "all"]: + self._task_op_names = list(set([event_key.split("-")[0] for event_key in self._ops_with_task_type.keys()])) + + self._post_process() + + + @property + def ops_with_stack(self): + return self._ops_with_stack + + @property + def torch_to_npu(self): + return self._torch_to_npu + + @property + def acl_to_npu(self): + return self._acl_to_npu + + @property + def ops_with_task_type(self): + return self._ops_with_task_type + + @property + def task_op_names(self): + return self._task_op_names + + @property + def optimizer(self): + return self._optimizer + + @property + def aten(self): + return self._aten + + def _parse(self): + + if len(self.timeline_data_list) == 0: + logger.warning("Please ensure trace_view.json in %s, skip timeline analysis.", self.timeline_dir) + return False + + if len(self.timeline_data_list) > 1: + logger.warning("Please ensure only one trace_view.json in %s, there will analysis first timeline profiling data.", self.timeline_dir) + self.timeline_data_list = [self.timeline_data_list[0]] + + result = self.parse_data_with_generator(self._add_event) + + if not self.dataset_len: + self.dataset_len = len(result) + + return True + + def parse_data_with_generator(self, func): + result = [] + try: + with open(self.timeline_data_list[0], "r") as f: + for i, event in tqdm(enumerate(ijson.items(f, "item")), + leave=False, ncols=100, desc="Building dataset for timeline analysis", + total=self.dataset_len): + func_res = func(index=i, event=event) + if func_res is not None: + result.append(func_res) + except Exception as e: + logger.warning("Error %s while parsing file %s, continue to timeline analysis", e, + self.timeline_data_list[0]) + return result + + def _add_ops_with_task_type(self, event): + key = f"{event.name}-{event.ts}" + self._ops_with_task_type[key] = TimelineEvent( + { + const.TASK_TYPE: event.args.get(const.TASK_TYPE), + "task_id": event.args.get("Task Id"), + "tid": event.tid, + "name": event.name, + "ts": str(event.ts) + } + ) + + def _add_ops_with_stack(self, event): + self._ops_with_stack[str(event.ts)] = TimelineEvent({"name": event.name, "dataset_index": event.dataset_index}) + + def _add_torch_to_npu(self, event): + key = f"{event.ph}-{event.id}" + self._torch_to_npu[key] = TimelineEvent({"tid": event.tid, "ts": str(event.ts)}) + + def _add_acl_to_npu(self, event): + # op with task type equals to ai_cpu which derived from acl_to_npu do not have stacks + self._acl_to_npu.add(str(event.ts)) + + def _add_optimizer(self, event: TimelineEvent): + self._optimizer.append(TimelineEvent({"name": event.name, "dataset_index": event.dataset_index})) + + def _add_aten(self, event: TimelineEvent): + self._aten.append(TimelineEvent({ + "name": event.name, "dataset_index": event.dataset_index, "ts": event.ts, "dur": event.dur + })) + + def _add_event(self, index, event): + event["dataset_index"] = index + if not isinstance(event, TimelineEvent): + event = TimelineEvent(event) + + if self.analysis_mode == "fusion_ops": + self._add_event_for_fusion_ops(event) + elif self.analysis_mode == "op_stack": + self._add_event_for_op_stack(event) + else: + self._add_event_for_fusion_ops(event) + self._add_event_for_op_stack(event) + return True + + def _add_event_for_fusion_ops(self, event): + if event.name.lower().startswith(f"{const.ATEN}{const.ATEN_SEP}") or event.name.lower().startswith( + f"{const.NPU}{const.ATEN_SEP}"): + self._add_aten(event) + return + + if event.name.startswith(f"{const.OPTIMIZER}.{const.OPTIMIZER_STEP}{const.OPTIMIZER_SEP}"): + self._add_optimizer(event) + return + + def _add_event_for_op_stack(self, event): + if event.name.lower() == const.TORCH_TO_NPU: + self._add_torch_to_npu(event) + return + + if event.args.get(const.CALL_STACKS): + self._add_ops_with_stack(event) + return + + if event.args.get(const.TASK_TYPE) and event.args.get(const.TASK_TYPE) in [const.AI_CORE, const.AI_CPU]: + self._add_ops_with_task_type(event) + return + + if event.name and event.ts and event.name == const.ACL_TO_NPU: + self._add_acl_to_npu(event) + return + + def _post_process(self): + # eliminate sub aten operator of the first level aten operator by 'ts' and 'dur', + # keep the first level aten operator contiguous + formated_atens = [] + for aten_event in sorted(self._aten, key=lambda x: x.get("ts", -1)): + if not formated_atens or not formated_atens[-1].ts_include(aten_event): + formated_atens.append(aten_event) + self._aten = formated_atens diff --git a/profiler/advisor/display/__init__.py b/profiler/advisor/display/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/profiler/advisor/display/html/__init__.py b/profiler/advisor/display/html/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/profiler/advisor/display/html/render.py b/profiler/advisor/display/html/render.py new file mode 100644 index 0000000000000000000000000000000000000000..eb427ffc5a817aeb2355d11be3d9f2980b281f25 --- /dev/null +++ b/profiler/advisor/display/html/render.py @@ -0,0 +1,44 @@ +import os +import logging +from typing import List, Dict + +from jinja2 import Environment, FileSystemLoader +from profiler.advisor.common import constant + +from profiler.advisor.utils.utils import singleton, safe_write + +logger = logging.getLogger() + + +@singleton +class HTMLRender: + def __init__(self): + self.html = "" + self.render_list: Dict[str, List] = {} + + def render_html(self, template_dir: str = "templates", template_name: str = "main.html", + template_header=constant.DEFAULT_TEMPLATE_HEADER): + self.html = self.render_template("main", template_dir, template_name, render_list=self.render_list, + template_header=template_header) + + def render_template(self, key: str, template_dir: str, template_name: str, **kwargs): + if not os.path.isabs(template_dir): + template_dir = os.path.join(os.path.dirname(__file__), template_dir) + + env = Environment(loader=FileSystemLoader(template_dir), + autoescape=True) + template = env.get_template(template_name) + rendered_html = template.render(**kwargs) + if key not in self.render_list: + self.render_list[key] = [] + self.render_list[key].append(rendered_html) + return rendered_html + + def save_to_file(self, save_path: str): + if not save_path.endswith(".html"): + logger.error("Skip save html file because file name must endswith `.html`, " + "but got %s.", os.path.basename(save_path)) + return + + safe_write(self.html, save_path) + logger.info("Save suggestion to %s.", save_path) diff --git a/profiler/advisor/display/html/templates/affinity_api.html b/profiler/advisor/display/html/templates/affinity_api.html new file mode 100644 index 0000000000000000000000000000000000000000..f059fbf4c1fc5fb2d75694e58a63f471ee05e7bd --- /dev/null +++ b/profiler/advisor/display/html/templates/affinity_api.html @@ -0,0 +1,50 @@ +{% if result|length > 0 %} +
+

Affinity API Issues

+
+ The analysis results of following affinity APIs are based on runtime env + cann-{{ cann_version }} + and + torch-{{ torch_version }} + +
+ + {% if empty_stacks %} + Suggestion: + These APIs have no code stack. If parameter 'with_stack=False' was set while profiling, please refer to + Ascend PyTorch Profiler to set + 'with_stack=True'. Otherwise, ignore following affinity APIs due to backward broadcast lack of stack. + {% endif %} + + {% for api_name, stacks in result.items() %} + + {% if empty_stacks %} +
{{api_name|safe}}
+ + {% else %} + +
{{api_name|safe}}
+
+ +
+ {% for stack in stacks %} +
No.{{loop.index|safe}} code stack, called {{stack[1]|safe}} times
+ + {% endfor %} +
+
+ {% endif %} + + {% endfor %} + +
+ +
+
+{% endif %} diff --git a/profiler/advisor/display/html/templates/cluster_analysis.html b/profiler/advisor/display/html/templates/cluster_analysis.html new file mode 100644 index 0000000000000000000000000000000000000000..32379d56fcb87a78269612107d1b7634b722d8d8 --- /dev/null +++ b/profiler/advisor/display/html/templates/cluster_analysis.html @@ -0,0 +1,49 @@ +
+

{{title|safe}}

+
+
+ + {% if result.get("Description") %} +
Description
+ + {% endif %} + + {% if result.get("Suggestion") %} +
Suggestion
+ + {% endif %} + + {% if result.get("details") %} +
details
+
+ {% for item in result.get("details") %} + + + {% for header in item.get("headers") %} + + {% endfor %} + + {% for row in item.get("data") %} + + {% for element in row %} + {% if element is number %} + + {% else %} + + {% endif %} + {% endfor %} + + {% endfor %} +
{{ header }}
{{ element|round(2) }}{{ element }}
+ {% endfor %} +
+ {% endif %} + +
+ +
+
\ No newline at end of file diff --git a/profiler/advisor/display/html/templates/compute_analysis.html b/profiler/advisor/display/html/templates/compute_analysis.html new file mode 100644 index 0000000000000000000000000000000000000000..e1907c091b705969004bf709db24211c66c38107 --- /dev/null +++ b/profiler/advisor/display/html/templates/compute_analysis.html @@ -0,0 +1,29 @@ +
+

Abnormal Performance Operator

+
+ {{table.get("title")}} + + + + {% for header in table.get("headers") %} + + {% endfor %} + + {% for row in table.get("rows") %} + + {% for element in row %} + {% if element is number %} + + {% else %} + + {% endif %} + {% endfor %} + + {% endfor %} +
{{ header }}
{{ element|round(2) }}{{ element }}
+ {% if call_stack %} + call stack:
+ {{call_stack}} + {% endif %} +
+
\ No newline at end of file diff --git a/profiler/advisor/display/html/templates/fusion.html b/profiler/advisor/display/html/templates/fusion.html new file mode 100644 index 0000000000000000000000000000000000000000..605a9d748f7d4499a603efb87bc310fab9bc02f3 --- /dev/null +++ b/profiler/advisor/display/html/templates/fusion.html @@ -0,0 +1,47 @@ +{% if candidates|length > 0 %} +
+

Fusion Issues

+
+
+ {% for node in candidates %} +
{{node.op_pass|safe}}
+
+ + + + + + + + + + + +
StructureCountsElapsed Time(us)
{{ node.fusion_pattern|safe }}{{ node.counts|safe }}{{ node.total_duration|safe }}
+
+ {% for match in node.matches %} +
SubGraph {{ loop.index|safe }}
+
+ + + + + + + {% for node in match %} + + + + + + {% endfor %} +
OP NameOP TypeElapsed Time(us)
{{ node.op_name|safe }}{{ node.dtype|safe }}{{ node.duration|safe }}
+
+ {% endfor %} +
+
+ {% endfor %} +
+
+
+{% endif %} diff --git a/profiler/advisor/display/html/templates/main.html b/profiler/advisor/display/html/templates/main.html new file mode 100644 index 0000000000000000000000000000000000000000..251961d79dea6f1e5a5aa10d98abcc12b9fbbce9 --- /dev/null +++ b/profiler/advisor/display/html/templates/main.html @@ -0,0 +1,203 @@ + + + + + + + +
+

Performance Optimization Suggestions

+{% for key, renders in render_list.items() %} + {% if key == 'operator'%} +
+

computation

+
+ {% for render in renders %} + {{render|safe}} + {% endfor %} +
+
+ {% else %} +
+

{{ key }}

+
+ {% for render in renders %} + {{render|safe}} + {% endfor %} +
+
+ {% endif %} +{% endfor %} + +
+ + + + + \ No newline at end of file diff --git a/profiler/advisor/display/html/templates/operator_ai_cpu.html b/profiler/advisor/display/html/templates/operator_ai_cpu.html new file mode 100644 index 0000000000000000000000000000000000000000..b3235a88022fc3973ae0098f543d94cc4b7fac25 --- /dev/null +++ b/profiler/advisor/display/html/templates/operator_ai_cpu.html @@ -0,0 +1,61 @@ +
+

AICPU Issues

+
+ + + + + + + + + + + + + +
DescriptionSuggestionElapsed Time(us)Time Ratio
{{ format_result.record.optimization_item.description|safe }}{{ format_result.suggestion|safe }}{{ format_result.task_duration|safe }}{{ format_result.record.statistics_item.task_duration_ratio|safe }}
+
+ {% for op_type, op_info in format_result.statistic %} +
{{ op_type|safe }}
+
+ + + + + + + + + + + +
Operator TypeCountsElapsed Time(us)
{{ op_info.summary.op_type|safe }}{{ op_info.summary.counts|safe }}{{ op_info.summary.total_duration|safe }}
+
+ {% for trace_stack, info in op_info.op_info_list %} +
+ {{ info.summary.op_type|safe }} | Input DType:({{info.op_info_list[0].input_data_types|safe}}) | Output DType:({{info.op_info_list[0].output_data_types|safe}}) | Counts:{{ info.summary.counts|safe}} | Elapsed Time(us):{{ + info.summary.total_duration|safe}} +
+
+ {% if info.op_info_list[0].suggestions|length > 0 %} +
+ {% for suggestion in info.op_info_list[0].suggestions %} +

+ Suggestion {{ loop.index|safe }}: {{suggestion|safe}} +

+ {% endfor %} +
+ {% else %} +

Suggestion 1: Modify code to avoid AICPU operator

+ {% endif %} +
+ {{ info.op_info_list[0].stack_info|safe }} +
+ {% endfor %} +
+
+ {% endfor %} +
+
+
\ No newline at end of file diff --git a/profiler/advisor/display/html/templates/operator_block_dim.html b/profiler/advisor/display/html/templates/operator_block_dim.html new file mode 100644 index 0000000000000000000000000000000000000000..4e2c832f623a4c0a0f315ebdc2b7a97aeb1996a1 --- /dev/null +++ b/profiler/advisor/display/html/templates/operator_block_dim.html @@ -0,0 +1,38 @@ +
+

Block Dim Issues

+
+ + + + + + + + + + + + + +
DescriptionSuggestionElapsed Time(us)Time Ratio
{{ format_result.record.optimization_item.description|safe }}{{ format_result.suggestion|safe }}{{ format_result.task_duration|safe }}{{ format_result.record.statistics_item.task_duration_ratio|safe }}
+
+ {% for op_type, op_info in format_result.statistic %} +
{{ op_type|safe }}
+
+ + + + + + + + + + + +
Operator TypeCountsElapsed Time(us)
{{ op_info.summary.op_type|safe }}{{ op_info.summary.counts|safe }}{{ op_info.summary.total_duration|safe }}
+
+ {% endfor %} +
+
+
\ No newline at end of file diff --git a/profiler/advisor/display/html/templates/operator_dynamic_shape.html b/profiler/advisor/display/html/templates/operator_dynamic_shape.html new file mode 100644 index 0000000000000000000000000000000000000000..59920b6c9ec276c9edddfd1906a31b41fb106e26 --- /dev/null +++ b/profiler/advisor/display/html/templates/operator_dynamic_shape.html @@ -0,0 +1,15 @@ +
+

Operator Dynamic Shape Issues

+
+ + + + + + + + + +
DescriptionSuggestion
{{ format_result.record.optimization_item.description|safe }}{{ format_result.suggestion|safe }}
+
+
\ No newline at end of file diff --git a/profiler/advisor/display/html/templates/operator_no_bound.html b/profiler/advisor/display/html/templates/operator_no_bound.html new file mode 100644 index 0000000000000000000000000000000000000000..cfbd20baad208216d2d9a1ee856702a163a6abfa --- /dev/null +++ b/profiler/advisor/display/html/templates/operator_no_bound.html @@ -0,0 +1,38 @@ +
+

Operator No Bound Issues

+
+ + + + + + + + + + + + + +
DescriptionSuggestionElapsed Time(us)Time Ratio
{{ format_result.record.optimization_item.description|safe }}{{ format_result.suggestion|safe }}{{ format_result.task_duration|safe }}{{ format_result.record.statistics_item.task_duration_ratio|safe }}
+
+ {% for op_type, op_info in format_result.statistic %} +
{{ op_type|safe }}
+
+ + + + + + + + + + + +
Operator TypeCountsElapsed Time(us)
{{ op_info.summary.op_type|safe }}{{ op_info.summary.counts|safe }}{{ op_info.summary.total_duration|safe }}
+
+ {% endfor %} +
+
+
\ No newline at end of file diff --git a/profiler/advisor/display/html/templates/overall_analysis.html b/profiler/advisor/display/html/templates/overall_analysis.html new file mode 100644 index 0000000000000000000000000000000000000000..4c859a7bf9cb10fb5d1f3a6e6ee9a12e5218c511 --- /dev/null +++ b/profiler/advisor/display/html/templates/overall_analysis.html @@ -0,0 +1,15 @@ +

Model Profiling Time Distribution

+ + + {% for header in headers %} + + {% endfor %} + + {% for row in rows %} + + {% for element in row %} + + {% endfor %} + + {% endfor %} +
{{ header }}
{{ element }}
\ No newline at end of file diff --git a/profiler/advisor/display/html/templates/timeline_analysis.html b/profiler/advisor/display/html/templates/timeline_analysis.html new file mode 100644 index 0000000000000000000000000000000000000000..b5ea89124277e05e7fdea63a34704df52bb322d4 --- /dev/null +++ b/profiler/advisor/display/html/templates/timeline_analysis.html @@ -0,0 +1,34 @@ +
+

{{title|safe}}

+
+
+
+ {% if result.get("img") %} +
+ Image +
+ {% endif %} + + {% if result.get("current") %} + + {% endif %} + + {% if result.get("bottlenect") %} + + {% endif %} + + {% if result.get("advice") %} + + {% endif %} + +
+
+
+
diff --git a/profiler/advisor/img/advisor_result.PNG b/profiler/advisor/img/advisor_result.PNG deleted file mode 100644 index a9652f4ca53ff142a5ebd1033075aad54f8f0297..0000000000000000000000000000000000000000 Binary files a/profiler/advisor/img/advisor_result.PNG and /dev/null differ diff --git a/profiler/advisor/img/jupyter_report.PNG b/profiler/advisor/img/jupyter_report.PNG deleted file mode 100644 index baa860a7893e1801337916aea37475ea69bbaf04..0000000000000000000000000000000000000000 Binary files a/profiler/advisor/img/jupyter_report.PNG and /dev/null differ diff --git a/profiler/advisor/interface/__init__.py b/profiler/advisor/interface/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/profiler/advisor/interface/interface.py b/profiler/advisor/interface/interface.py new file mode 100644 index 0000000000000000000000000000000000000000..c0d04db8ebf4aafd219cc5528ca4cc5802354eb3 --- /dev/null +++ b/profiler/advisor/interface/interface.py @@ -0,0 +1,70 @@ +import os +from collections import OrderedDict +import sys +sys.path.insert(0, os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))), "cluster_analyse")) +sys.path.insert(0, os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))), "compare_tools")) + +from profiler.advisor.utils.utils import Timer +from profiler.advisor.analyzer.computation.profiling_analyzer import ProfilingAnalyzer +from profiler.advisor.analyzer.schedule.fusion_ops.fusion_ops_analyzer import TimelineFusionOpsAnalyzer +from profiler.advisor.analyzer.graph_fusion.graph_fusion_analyzer import FusionOPAnalyzer +from profiler.advisor.common.analyzer_scopes import SupportedScopes +from profiler.advisor.analyzer.cluster.slow_rank_analyser import SlowRankAnalyzer +from profiler.advisor.analyzer.cluster.slow_link_analyser import SlowLinkAnalyzer +from profiler.advisor.analyzer.overall.overall_summary_analyzer import OverallSummaryAnalyzer + +class Interface: + supported_analyzer = { + "schedule": OrderedDict({ + SupportedScopes.TIMELINE_FUSION_OPS: TimelineFusionOpsAnalyzer + }), + "computation": OrderedDict({ + SupportedScopes.PROFILING_OPERATOR_ANALYSIS: ProfilingAnalyzer, + SupportedScopes.GRAPH: FusionOPAnalyzer + }), + "communication": OrderedDict(), + "overall": OrderedDict({SupportedScopes.OVER_ALL: OverallSummaryAnalyzer}), + "dataloader": OrderedDict(), + "cluster": OrderedDict({ + SupportedScopes.SLOW_RANK: SlowRankAnalyzer, + SupportedScopes.SLOW_LINK: SlowLinkAnalyzer + }) + } + + all_dimension = list(supported_analyzer.keys()) + + def __init__(self, **kwargs): + self.collection_path = os.path.realpath(kwargs.get("profiling_path")) + + @staticmethod + def get_scope(dimension): + return list(Interface.supported_analyzer.get(dimension).keys()) + + @staticmethod + def get_analyzer(dimension, scope): + return Interface.supported_analyzer.get(dimension).get(scope) + + def get_result(self: any, dimension: str, scope: str, render_html=False, output_dict=True, **kwargs): + """ + :Param mode: affinity apis, ai cpu and so on. + """ + if dimension not in self.all_dimension: + raise ValueError(f"Error dimension {dimension}, supported dimensions are {self.all_dimension}") + + supported_scopes = self.get_scope(dimension) + if scope not in supported_scopes: + raise ValueError(f"Error scope {scope}, supported scopes are {supported_scopes}") + + analyzer = self.get_analyzer(dimension, scope)(collection_path=self.collection_path, **kwargs) + result = analyzer.optimize(**kwargs) + + if render_html: + if hasattr(analyzer, "html_render"): + analyzer.html_render.render_html() + analyzer.html_render.save_to_file(f'att_advisor_{Timer().strftime}.html') + + return result if not output_dict else dict(result.data) + + +if __name__ == "__main__": + Interface() diff --git a/profiler/advisor/overall_perf_analysis.ipynb b/profiler/advisor/overall_perf_analysis.ipynb deleted file mode 100644 index 0d1d5fcf66761bf9eaa18a8524a4d8b3369693e1..0000000000000000000000000000000000000000 --- a/profiler/advisor/overall_perf_analysis.ipynb +++ /dev/null @@ -1,323 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 18, - "id": "initial_id", - "metadata": { - "ExecuteTime": { - "end_time": "2023-11-21T13:31:25.022339600Z", - "start_time": "2023-11-21T13:31:25.016155200Z" - } - }, - "outputs": [], - "source": [ - "from advisor_backend.interface import Interface\n", - "import matplotlib.pyplot as plt\n", - "import pandas as pd" - ] - }, - { - "cell_type": "markdown", - "id": "57d17a21205c3c5e", - "metadata": { - "jupyter": { - "outputs_hidden": false - } - }, - "source": [ - "# 总体性能拆解分析\n", - "### 1. 数据准备\n", - "我们当前支持Ascend PyTorch Profiler工具采集到的性能数据,您需要采集到的profiling_path路径,指定到*_ascend_pt。\n", - "\n", - "### 2. 拆解项说明\n", - "将整体耗时拆解为计算(Computing Time)、通信(Uncovered Communication Time)和空闲(Free Time)3个部分。\n", - "\n", - "1). Computing Time:指device在执行计算的耗时,若存在多条流并行计算的情况,对于耗时重叠部分只会计算一次\n", - "\n", - "计算耗时细分如下\n", - "\n", - " Cube Time:Cube算子耗时,该耗时占Computing Time的60%以上更能充分发挥NPU的算力\n", - " Vector Time:Vector算子耗时\n", - " Flash Attention Time(Forward):Flash Attention算子前向耗时\n", - " Flash Attention Time(Backward):Flash Attention算子反向耗时\n", - " Oter Time:AI CPU、DSA、TensorMove等其他算子耗时\n", - " \n", - "2). Uncovered Communication Time:未被计算掩盖的通信耗时,即总通信耗时减去通信与计算并行执行的耗时\n", - "\n", - "3). Free Time:指device既不在通信又不在计算的时间,空闲耗时 = 整体耗时 - 计算耗时 - 未被计算掩盖的通信耗时,该时间包含下发调度、SDMA时间(内存拷贝时间)。该耗时建议保持在10%以下\n", - "\n", - "空闲耗时细分如下\n", - "\n", - " SDMA Time:内存拷贝任务的耗时\n", - "\n", - "特别说明:通信(Uncovered Communication Time)和空闲(Free Time)耗时会受profiling性能膨胀的影响,以L0 + NPU采集的profiling为准。" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "id": "36b7a24cc7ca5da2", - "metadata": { - "ExecuteTime": { - "end_time": "2023-11-21T12:53:38.379699800Z", - "start_time": "2023-11-21T12:53:38.363755900Z" - }, - "jupyter": { - "outputs_hidden": false - } - }, - "outputs": [], - "source": [ - "# 数据准备 EDIT THE PROFILING DATA PATH\n", - "profiling_path = \"YOUR PATH\"\n", - "# 若您有GPU上采集到的性能数据,可将NPU的性能数据与GPU之间进行对比,分析性能差距。输入GPU的性能数据路径\n", - "gpu_profiling_path = \"\" #默认为空,若有则可填写\n", - "interface = Interface(profiling_path)" - ] - }, - { - "cell_type": "markdown", - "id": "cf832ac2e0dfa30f", - "metadata": { - "jupyter": { - "outputs_hidden": false - } - }, - "source": [ - "## 1) 性能拆解分析" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "id": "40aac93278dd6e34", - "metadata": { - "ExecuteTime": { - "end_time": "2023-11-21T12:53:41.815599700Z", - "start_time": "2023-11-21T12:53:41.783393700Z" - }, - "jupyter": { - "outputs_hidden": false - }, - "scrolled": false - }, - "outputs": [], - "source": [ - "print(\"Start performance analysis, please wait...\")\n", - "dataset = interface.get_data('overall', 'summary', base_collection_path=gpu_profiling_path)\n", - "data = dataset.get('data', {}) or {}\n", - "bottleneck = dataset.get('bottleneck', {}) or {}\n", - "print(\"Performance analysis is complete, you can edit the data to show what you want.\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3f353506", - "metadata": {}, - "outputs": [], - "source": [ - "# 等待性能分析完成后再查看数据" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "cd3fceda-49f0-439f-9c54-cc31490fc99e", - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The Model E2E Time is 9.352s.\n", - " --Computing Time is 6.273s\n", - " --Uncovered Communication Time is 0.464s\n", - " --Free Time is 2.615s\n" - ] - } - ], - "source": [ - "# 饼图展示计算、通信、空闲耗时的占比\n", - "overall_data = data.get(\"overall_data\", {})\n", - "plt.figure(figsize=(6, 6)) #设置饼图大小\n", - "plt.pie(x=overall_data.values(), labels=overall_data.keys(), explode=[0.01]*len(overall_data), autopct=\"%1.1f%%\")\n", - "plt.title(\"Model Profiling Time Distribution\")\n", - "plt.show()\n", - "print(bottleneck.get(\"overall_data\", \"\"))" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "6a1d82fb-a31b-49ab-a859-6d4bb898c512", - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " Computing Time Subtype Duration(s) Duration Ratio Kernel Number\n", - "0 Cube Time 3.956 63.06% 584\n", - "1 Vector Time 1.994 31.79% 5224\n", - "\n", - "Computing Time is 6.273s\n", - " if you want more detailed advice please go to compute_perf_analysis.ipynb\n" - ] - } - ], - "source": [ - "# 展示计算细分耗时,NPU开启level1或level2,aic_metric设为PipeUtilization\n", - "compute_time = data.get(\"computing\", {})\n", - "print(pd.DataFrame(compute_time))\n", - "print(\"\\n\", bottleneck.get(\"computing\", \"\"))" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "id": "35df1f13", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "\n" - ] - } - ], - "source": [ - "# 展示通信细分耗时,通信耗时受profiling性能膨胀的影响,以L0 + NPU采集的profiling为准\n", - "communication_time = data.get(\"communication\", {})\n", - "print(pd.DataFrame(communication_time))\n", - "print(\"\\n\", bottleneck.get(\"communication\", \"\"))" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "c5e6034e", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " Free Time Subtype Duration(s) Duration Ratio Kernel Number\n", - "0 SDMA Time 0.073 2.79% 852\n", - "\n", - "Free Time is 2.615s\n", - " if you want more detailed advice please go to timeline_perf_analysis.ipynb\n" - ] - } - ], - "source": [ - "# 展示空闲细分耗时,该耗时受profiling性能膨胀的影响,以L0 + NPU采集的profiling为准\n", - "free_time = data.get(\"free\", {})\n", - "print(pd.DataFrame(free_time))\n", - "print(\"\\n\", bottleneck.get(\"free\", \"\"))" - ] - }, - { - "cell_type": "markdown", - "id": "3511befaff513e8e", - "metadata": { - "jupyter": { - "outputs_hidden": false - } - }, - "source": [ - "## 2)有对标的GPU数据" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "2a1e617d2a117125", - "metadata": { - "jupyter": { - "outputs_hidden": false - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "+----------------------------------------------------------------------------------------------------------------+\n", - "| Model Profiling Time Distribution |\n", - "+-----+----------------+------------------+----------------+------------------------------+-----------+----------+\n", - "| | Cube Time(Num) | Vector Time(Num) | Computing Time | Uncovered Communication Time | Free Time | E2E Time |\n", - "+-----+----------------+------------------+----------------+------------------------------+-----------+----------+\n", - "| GPU | 3.149s(582) | 1.346s(3433) | 4.748s | 0.024s | 0.051s | 4.840s |\n", - "| NPU | 3.956s(584) | 1.994s(5224) | 6.273s | 0.464s | 2.615s | 9.352s |\n", - "+-----+----------------+------------------+----------------+------------------------------+-----------+----------+\n" - ] - } - ], - "source": [ - "# 有可对比的GPU数据情况下,展示比对结果\n", - "from prettytable import PrettyTable\n", - "comparison_result = data.get(\"comparison_result\", {})\n", - "if not comparison_result:\n", - " print(\"Invalid comparison data, you need to set the gpu_profiling_path.\")\n", - "if comparison_result:\n", - " for sheet_name, data in comparison_result.items():\n", - " if data.get(\"rows\", []):\n", - " table = PrettyTable()\n", - " table.title = sheet_name\n", - " table.field_names = data.get(\"headers\", [])\n", - " for row in data.get(\"rows\", []):\n", - " table.add_row(row)\n", - " print(table)\n", - " print(bottleneck.get(\"comparison_result\", \"\"))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0d968851", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.9" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/profiler/advisor/result/__init__.py b/profiler/advisor/result/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/profiler/advisor/result/item.py b/profiler/advisor/result/item.py new file mode 100644 index 0000000000000000000000000000000000000000..500d32c9875f35e1094f3a38491a2efe4d630130 --- /dev/null +++ b/profiler/advisor/result/item.py @@ -0,0 +1,61 @@ +class OptimizeItem: + + def __init__(self, problem, description, suggestion): + self.problem = problem + self.description = description + self.suggestion = suggestion + + @property + def data(self): + format_suggestions = [] + for index, suggesion in enumerate(self.suggestion): + format_suggestions.append(f"{index + 1}. {suggesion}") + suggestion_str = "\n".join(format_suggestions) + return [self.problem, self.description, suggestion_str] + + @property + def headers(self): + return ["problem", "description", "suggestion"] + + +class StatisticsItem: + def __init__(self, total_task_duration, task_duration, count, income=None): + self.total_task_duration = total_task_duration + self.task_duration = task_duration + self.count = count + self.income = income + if not isinstance(task_duration, str): + self.task_duration_ratio = round(task_duration / total_task_duration, 4) if total_task_duration != 0 else 0 + else: + self.task_duration_ratio = "" + + @property + def data(self): + + def _cal_ratio(divisor, dividend): + if divisor and dividend != 0: + return divisor, round(divisor / dividend, 4) + else: + return "", "" + + income, income_ratio = _cal_ratio(self.income, self.total_task_duration) + return [self.count, self.total_task_duration, self.task_duration_ratio, income, income_ratio] + + @property + def headers(self): + return ["problem count", "total_time(us)", "time ratio", "income(us)", "income ratio"] + + +class OptimizeRecord: + + def __init__(self, optimization_item, statistics_item=None) -> None: + self.optimization_item = optimization_item + self.statistics_item = statistics_item or StatisticsItem("", "", "") + + @property + def data(self): + return self.optimization_item.data + self.statistics_item.data + + @property + def headers(self): + return self.optimization_item.headers + self.statistics_item.headers diff --git a/profiler/advisor/result/result.py b/profiler/advisor/result/result.py new file mode 100644 index 0000000000000000000000000000000000000000..06a515e7836dec4ca21ce7fa8fe213778c0d8c61 --- /dev/null +++ b/profiler/advisor/result/result.py @@ -0,0 +1,206 @@ +import json +import os +import stat +from textwrap import fill +from collections import OrderedDict + +import click +import xlsxwriter +from prettytable import ALL, PrettyTable + +from profiler.advisor.common import constant as const +from profiler.advisor.utils.utils import singleton, logger +from profiler.advisor.config.config import Config + + +class ResultWriter: + def __init__(self, result_path=None): + self.result_path = result_path + self.workbook = xlsxwriter.Workbook(result_path) + + self.header_format = None + self.data_cell_format = None + self._init_header_format() + self._init_data_cell_format() + + def _init_header_format(self): + self.header_format = self.workbook.add_format({ + "bold": True, + "color": "#FFFFFF", + "bg_color": "#187498", + "align": "center", + "border": 1, + "font_name": "Arial", + }) + + def _init_data_cell_format(self): + self.data_cell_format = self.workbook.add_format({ + "bold": False, + "align": "left", + "valign": "top", + "border": 1, + "font_name": "Arial", + 'text_wrap': True + }) + + def add_data(self, sheet_name, headers, data_list): + sheet = self.workbook.add_worksheet(sheet_name) + + if headers: + for col_index, header in enumerate(headers): + sheet.write(0, col_index, header, self.header_format) + + if data_list: + for i, row_data in enumerate(data_list): + row_index = i + 1 + for col_index, value in enumerate(row_data): + sheet.write(row_index, col_index, value, self.data_cell_format) + + sheet.autofit() + + def save(self): + try: + self.workbook.close() + except Exception as e: + logger.error("Failed to save analysis results, reason is %s", e) + + +@singleton +class SheetRecoder: + + def __init__(self): + self._sheet_data = OrderedDict() + + @property + def sheet_data(self): + return self._sheet_data + + def _init_sheet_name(self, sheet_name): + if sheet_name not in self._sheet_data: + self._sheet_data[sheet_name] = {} + + def add_headers(self, sheet_name, headers): + self._init_sheet_name(sheet_name) + + if self._sheet_data[sheet_name].get("headers") is None: + self._sheet_data[sheet_name]["headers"] = headers + + def add_data(self, sheet_name, data): + self._init_sheet_name(sheet_name) + + if not isinstance(self._sheet_data[sheet_name].get("data"), list): + self._sheet_data[sheet_name]["data"] = [] + if data not in self._sheet_data[sheet_name]["data"]: + self._sheet_data[sheet_name]["data"].append(data) + + +@singleton +class OptimizeResult: + + def __init__(self): + self.result_writer = ResultWriter(Config().analysis_result_file) + self.sheet_recorder = SheetRecoder() + self.page_dict = False + self._tune_op_list = [] + + @property + def data(self): + return self.sheet_recorder.sheet_data + + def add_tune_op_list(self, tune_op_list) -> None: + """ + add tune op name to tune op list + :param tune_op_list: tune op name list to be added + :return: None + """ + for op_name in tune_op_list: + if op_name not in self._tune_op_list: + self._tune_op_list.append(op_name) + + def add(self, overview_item): + sheet_name = "problems" + + headers = overview_item.headers + data = overview_item.data + self.sheet_recorder.add_headers(sheet_name, headers) + self.sheet_recorder.add_data(sheet_name, data) + + TerminalResult().add(overview_item.optimization_item.data) + self.page_dict = True + + def add_detail(self, sheet_name, headers=None, detail=None): + if headers: + self.sheet_recorder.add_headers(sheet_name, headers) + if detail: + self.sheet_recorder.add_data(sheet_name, detail) + self.page_dict = True + + def show(self): + for sheet_name, sheet_data in self.sheet_recorder.sheet_data.items(): + self.result_writer.add_data(sheet_name, sheet_data.get("headers"), sheet_data.get("data")) + self.result_writer.save() + self._save_op_file_list() + TerminalResult().print() + + def _save_op_file_list(self) -> None: + if not self._tune_op_list: + return + tune_op_dict = {"tune_ops_name": self._tune_op_list} + tune_ops_file = Config().tune_ops_file + try: + + with os.fdopen(os.open(tune_ops_file, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, stat.S_IWUSR | stat.S_IRUSR), + 'w', encoding="utf-8") as op_tune_file: + json.dump(tune_op_dict, op_tune_file) + except OSError as error: + logger.error("Dump op_list to %s failed, %s", tune_ops_file, error) + return + logger.info("Save tune op name list to %s", tune_ops_file) + + +@singleton +class TerminalResult: + """ + Result output to screen + """ + + def __init__(self): + self.width, _ = self.get_terminal_size() + if self.width is None: + self.table = PrettyTable(["No.", "Problem", "Description", "Suggestion"]) + else: + self.table = PrettyTable(["No.", "Problem", "Description", "Suggestion"], + max_table_width=max(self.width - 20, 180)) + self.table.hrules = ALL + self.result_list = [] + + @staticmethod + def get_terminal_size(): + try: + width, height = os.get_terminal_size() + except OSError: + width, height = None, None + return width, height + + def add(self, result_str): + """ + add a result str + """ + self.result_list.append(result_str) + + def print(self): + """ + print screen result with format table + """ + table_row_cnt = 0 + for result in self.result_list: + table_row_cnt += 1 + result[1] = fill(result[1], width=40) + result[2] = fill(result[2], width=40) + self.table.add_row([table_row_cnt] + result) + self.table.align = "l" + + if table_row_cnt > 0: + click.echo(self.table) + else: + click.echo(click.style(const.SKIP_ANALYZE_PROMPT, fg='red')) diff --git a/profiler/advisor/rules/__init__.py b/profiler/advisor/rules/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/profiler/advisor/rules/aicpu_rules.yaml b/profiler/advisor/rules/aicpu_rules.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9313700c800d337eaea18f5a634521710f09e465 --- /dev/null +++ b/profiler/advisor/rules/aicpu_rules.yaml @@ -0,0 +1,103 @@ +DataTypeSuggeation: &DataTypeSuggeation "Data type {} in {} operator may cause AICPU issues, Try to convert to {} if possible." +AICPU_DOC_URL: &AICPU_DOC_URL "https://support.huaweicloud.com/bestpractice-modelarts/modelarts_10_2517.html" + +CommonChecker: + - DataTypeChecker: + cann_version: [7.0.RC1] + op_type: [ __ALL__ ] + ignore_type: [ cast, tensorequal, equal, nonzero, mul ] + input: [ float, float32, float16, bool, int32, uint32, int64, uint64, int8, uint8, int16, uint16, dt_bf16 ] + output: [ float, float32, float16, bool, int32, uint32, int64, uint64, int8, uint8, int16, uint16, dt_bf16 ] + suggestion: *DataTypeSuggeation + + - DataTypeChecker: + cann_version: [7.0.RC1] + op_type: [ cast ] + input: [ float, float32, float16, bool, int32, uint32, int64, uint64, uint8, dt_bf16 ] + output: [ float, float32, float16, bool, int32, uint32, int64, uint64, uint8, dt_bf16 ] + suggestion: *DataTypeSuggeation + + - DataTypeChecker: + cann_version: [7.0.RC1] + op_type: [ tensorequal ] + input: [ float, float32, float16, bool, int32, int8, uint8 ] + output: [ bool ] + suggestion: *DataTypeSuggeation + + - DataTypeChecker: + cann_version: [7.0.RC1] + op_type: [ equal ] + input: [ float, float32, float16, bool, int32, int64, int8, uint8 ] + output: [ bool ] + suggestion: *DataTypeSuggeation + + - DataTypeChecker: + cann_version: [7.0.RC1] + op_type: [ nonzero ] + input: [ float16, bool, dt_bf16 ] + output: [ int64 ] + suggestion: *DataTypeSuggeation + + - DataTypeChecker: + cann_version: [7.0.RC1] + op_type: [ mul ] + input: [ float, float32, float16, bool, int32, uint32, int64, uint64, int8, uint8, dt_bf16 ] + output: [ float, float32, float16, bool, int32, uint32, int64, uint64, int8, uint8, dt_bf16 ] + suggestion: *DataTypeSuggeation + + - DataTypeChecker: + cann_version: [8.0.0, 7.0.0] + op_type: [ __ALL__ ] + ignore_type: [ cast, tensorequal, equal, nonzero, mul ] + input: [ float, float32, float16, dt_bf16, float64, bool, int32, int64, int8, uint8, int16, complex64, complex128 ] + output: [ float, float32, float16, dt_bf16, float64, bool, int32, int64, int8, uint8, int16, complex64, complex128 ] + suggestion: *DataTypeSuggeation + + - DataTypeChecker: + cann_version: [8.0.0, 7.0.0] + op_type: [ cast ] + input: [ float, float32, float16, bool, int32, uint32, int64, uint64, uint8, dt_bf16 ] + output: [ float, float32, float16, bool, int32, uint32, int64, uint64, uint8, dt_bf16 ] + suggestion: *DataTypeSuggeation + + - DataTypeChecker: + cann_version: [8.0.0, 7.0.0] + op_type: [ tensorequal ] + input: [ float, float32, float16, dt_bf16, float64, bool, int32, int8, uint8 ] + output: [ bool ] + suggestion: *DataTypeSuggeation + + - DataTypeChecker: + cann_version: [8.0.0, 7.0.0] + op_type: [ equal ] + input: [ float, float32, float16, dt_bf16, float64, bool, int32, int64, int8, uint8 ] + output: [ bool ] + suggestion: *DataTypeSuggeation + + - DataTypeChecker: + cann_version: [8.0.0, 7.0.0] + op_type: [ mul ] + input: [ float, float32, float16, dt_bf16, float64, bool, int32, int64, int8, uint8, complex64 ] + output: [ float, float32, float16, dt_bf16, float64, bool, int32, int64, int8, uint8, complex64 ] + suggestion: *DataTypeSuggeation + +ExampleGuideChecker: + - IndexPutChecker: + op_type: [index] + url: *AICPU_DOC_URL + suggestion: 'Please modify source code followed by this LINK, try to replace index operator with equivalent operator.' + + - NonzeroChecker: + op_type: [ indexput, indexputv2 ] + url: *AICPU_DOC_URL + suggestion: 'Please modify source code followed by this LINK, try to replace indexput operator with equivalent operator.' + + - CastChecker: + op_type: [ argmin ] + url: *AICPU_DOC_URL + suggestion: 'Please update your cann-tookit to at least 7.0.RC1 version by this LINK.' + + - CastChecker: + op_type: [ nonzero ] + url: *AICPU_DOC_URL + suggestion: 'Please modify source code followed by this LINK, try to replace nonzero operator with equivalent operator.' \ No newline at end of file diff --git a/profiler/advisor/rules/op_fusion_pass.yaml b/profiler/advisor/rules/op_fusion_pass.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3ff69a578285ba15d075f2acbb852499d56021a2 --- /dev/null +++ b/profiler/advisor/rules/op_fusion_pass.yaml @@ -0,0 +1,491 @@ +Elementwise: &Elementwise [ Relu, Pow, Add, Sub, Mul, Div, Abs, Ceil, Log, Sqrt, Exp, LeakyRelu ] + +GraphFusion: + - FlashAttentionFusionPass: + version: 1 + nodes: + - node_1: [ BatchMatMulV2, BatchMatMul, MatMul, MatMulV2 ] + - node_2: [ Mul ] + - node_3: [ Softmax, SoftmaxV2 ] + - node_4: [ BatchMatMulV2, BatchMatMul, MatMul, MatMulV2 ] + + edges: + - [ node_1, node_2 ] + - [ node_2, node_3 ] + - [ node_3, node_4 ] + + - FlashAttentionFusionPass_V2: + version: 1 + nodes: + - node_1: [ BatchMatMulV2, BatchMatMul, MatMul, MatMulV2 ] + - node_2: [ Mul ] + - node_3: [ TransData ] + - node_4: [ Softmax, SoftmaxV2 ] + - node_5: [ BatchMatMulV2, BatchMatMul, MatMul, MatMulV2 ] + + edges: + - [ node_1, node_2 ] + - [ node_2, node_3 ] + - [ node_3, node_4 ] + - [ node_4, node_5 ] + + - BMMStridedSliceDGeluFusionPass: + version: 1 + nodes: + - node_1: [ BatchMatMulV2, BatchMatMul, MatMul, MatMulV2 ] + - node_2: [StridedSliceD] + - node_3: [Relu] + edges: + - [ node_1, node_2 ] + - [ node_2, node_3 ] + + - BMMConfusionTransposeDFusionPass: + version: 1 + nodes: + - node_1: [ BatchMatMulV2, BatchMatMul, MatMul, MatMulV2 ] + - node_2: [ ConfusionTransposeD ] + - node_3: [ Relu ] + edges: + - [ node_1, node_2 ] + - [ node_2, node_3 ] + + - BMMConfusionTransposeDFusionPass_V2: + version: 1 + nodes: + - node_1: [ BatchMatMulV2, BatchMatMul, MatMul, MatMulV2 ] + - node_2: [ ConfusionTransposeD ] + edges: + - [ node_1, node_2 ] + + - Conv2DAddGroupNormFusionPass: + version: 0 + struct: [ Conv2D, Add, GroupNorm ] + + - RMSnormAddFusionPass: + version: 0 + struct: [ RMSnorm, Add ] + + - ConvToFullyConnectionFusionPass: + version: 0 + struct: [ Conv ] + + - ZConcatv2dFusionPass: + version: 0 + struct: [ ConcatV2d, ConcatV2d ] + + - BatchMatMulReduceMeanFusionPass: + version: 1 + nodes: + - node_1: [ BatchMatMulV2, BatchMatMul, MatMul, MatMulV2 ] + - node_2: [ Add ] + - node_3: [ Relu ] + - node_4: [ ReduceMean ] + edges: + - [ node_1, node_2 ] + - [ node_2, node_3 ] + - [ node_3, node_4 ] + + - PadDepthwiseConv2dFusionPass: + version: 0 + struct: [ PadD, DepthwiseConv2D ] + + - ConvBatchnormFusionPass: + version: 1 + nodes: + - node_1: [ Conv2d, Conv3d, DepthwiseConv2d ] + - node_2: [ Batchnorm ] + + edges: + - [ node_1, node_2 ] + + - AConv2dMulFusion: + version: 1 + nodes: + - node_1: [ Conv2d, Conv3d ] + - node_2: [ Mul ] + + edges: + - [ node_1, node_2 ] + + - TBEConvAddFusion: + version: 1 + nodes: + - node_1: [ Conv2d, Conv3d ] + - node_2: [ Add ] + + edges: + - [ node_1, node_2 ] + + - ZBNupdateReluV2Conv2DBNreducePass: + version: 0 + struct: [ BNTrainingUpdate, ReluV2, Conv2D, BNTrainingReduce ] + + - ASplitConv2dConcatPass: + version: 1 + nodes: + - node_1: [ MatMul, MatMulV2, BatchMatMul, BatchMatMulV2 ] + - node_2: [ Cast ] + + edges: + - [ node_1, node_2 ] + + - MatMulBiasAddFusionPass: + version: 1 + nodes: + - node_1: [ MatMul, MatMulV2, BatchMatMul, BatchMatMulV2 ] + - node_2: [ BiasAdd, Add ] + + edges: + - [ node_1, node_2 ] + + - Conv2DbpInputBiasAddFusionPass: + version: 0 + struct: [ Conv2DBackpropInput, BiasAdd ] + + - BatchMatmulV2ReduceFusionPass: + version: 0 + struct: [ BatchMatMulV2, ReduceSumD ] + + - BatchMatmulV2ReduceFusionPass_V2: + version: 0 + struct: [ BatchMatMulV2, Cast, ReduceSumD ] + + - Conv3DbpInputBiasAddFusionPass: + version: 0 + struct: [ Conv3DBackpropInputD, BiasAdd ] + + - AFullyConnectionReshapePass: + version: 0 + struct: [ FullyConnection, Reshape ] + + - GemmTransFusionPass: + version: 0 + struct: [ Transpose, Gemm ] + + - Resnet50DbnDwFusionPass: + version: 0 + struct: [ BNTrainingReduceGrad, Conv2DBackpropFilterD ] + + - CastReluCastFusionPass: + version: 0 + struct: [ Cast, Relu, Cast ] + + - PadConv2dFusionPass: + version: 1 + nodes: + - node_1: [ PadD, PadDV3 ] + - node_2: [ Conv2D ] + + edges: + - [ node_1, node_2 ] + + - Conv2DTransposeBatchnormFusionPass: + version: 1 + nodes: + - node_1: [ Conv2dTranspose ] + - node_2: [ BatchNorm, BNInference ] + + edges: + - [ node_1, node_2 ] + + - AvgPoolV2GradFusionPass: + version: 0 + struct: [ AvgPooV2lGrad ] + + - DropOutDoMaskFusionPass: + version: 0 + struct: [ DropOutDoMaskV3D ] + + - ConvCastFusionPass: + version: 0 + struct: [ Conv2D, Cast ] + + - ConvCastFusionPass_V2: + version: 0 + struct: [ Conv2D, TransData, Cast ] + + - StridedSliceConcatFusionPass: + version: 1 + nodes: + - node_1: [ StridedSliceD ] + - node_2: [ StridedSliceD ] + - node_3: [ ConcatD ] + + edges: + - [ node_1, node_3 ] + - [ node_2, node_3 ] + + - ConvCastFusionPass: + version: 0 + struct: [ SplitV ] + + - AInplaceAddFusionPass: + version: 0 + struct: [ InplaceAdd ] + + - AInplaceSubFusionPass: + version: 0 + struct: [ InplaceSub ] + + - AInplaceUpdateFusionPass: + version: 0 + struct: [ InplaceUpdate ] + +UBFusion: + - TbeConv3dElemwisePass: + version: 1 + nodes: + - node_1: [ Conv3D ] + - node_2: *Elementwise + edges: + - [ node_1, node_2 ] + + - TbeConv3dDxElemwisePass: + version: 0 + struct: [ Conv3dBackpropInput, AddN, LeakyReluGrad ] + + - TbeConv3dDxElemwisePass_V2: + version: 0 + struct: [ Conv3dBackpropInput, LeakyReluGrad ] + + - MatMulDropoutDoMaskV3dFusionPass: + version: 0 + struct: [ MatMul, Dropout_do_mask_v3_d, Add ] + + - BatchMatMulDropoutDoMaskV3dFusionPass: + version: 0 + struct: [ BatchMatMul, Dropout_do_mask_v3_d, Add ] + + - MatmulReduceSumUbFusion: + version: 0 + struct: [ BatchMatMul, ReduceSum ] + + - TbeBatchMatMulElementWiseFusionPass: + version: 1 + nodes: + - node_1: [ BatchMatMul, GEMM ] + - node_2: *Elementwise + + edges: + - [ node_1, node_2 ] + + - ATbeMatMulElemwiseFusionPass: + version: 1 + nodes: + - node_1: [ MatMul, GEMM ] + - node_2: *Elementwise + + edges: + - [ node_1, node_2 ] + + - MatmulConfusiontransposeUbFusion: + version: 0 + struct: [ MatMul, matmul_transpose ] + + - TbeFullyconnectionElemwiseDequantFusionPass: + version: 1 + nodes: + - node_1: [ BatchMatMul, MatMul, FullyConnection ] + - node_2: *Elementwise + + edges: + - [ node_1, node_2 ] + + - BatchMatmulConfusiontransposeUbFusion: + version: 0 + struct: [ BatchMatMul, batchmatmul_transpose ] + + - TbeConvSigmoidMulQuantFusionPass: + version: 1 + nodes: + - node_1: [ Conv ] + - node_2: [ Sigmoid ] + - node_3: [ Mul ] + - node_4: [ Quant ] + + edges: + - [ node_1, node_2 ] + - [ node_1, node_3 ] + - [ node_2, node_3 ] + - [ node_3, node_4 ] + + - TbeConv2DReluv2Pass: + version: 0 + struct: [ Conv2D, ReluV2 ] + + - TbeConvDoubleInFusionPass: + version: 1 + nodes: + - node_1: [ Conv2D ] + - node_2: *Elementwise + - node_3: *Elementwise + edges: + - [ node_1, node_2 ] + - [ node_2, node_3 ] + + - TbeConv2dAddClipMulDivFusionPass: + version: 0 + struct: [ Conv2D, Add, Clip, Mul, Div ] + + - TbeConv2dAddClipMulDivFusionPass_V2: + version: 0 + struct: [ Conv2D, Add, Clip, Mul ] + + - TbeConv2dAddRelu6MulMulFusionPass: + version: 1 + nodes: + - node_1: [ Conv2D, DepthwiseConv2D ] + - node_2: [ Add ] + - node_3: [ Relu6 ] + - node_4: [ Mul ] + - node_5: [ Mul ] + + edges: + - [ node_1, node_2 ] + - [ node_2, node_3 ] + - [ node_3, node_4 ] + - [ node_4, node_5 ] + + - ConvClipByValueFusionPass: + version: 1 + nodes: + - node_1: [ Conv2D ] + - node_2: *Elementwise + edges: + - [ node_1, node_2 ] + + - TbeAippConvReluMaxpoolingFusion: + version: 1 + nodes: + - node_1: [ Conv2D ] + - node_2: *Elementwise + - node_3: [ MaxPool, MaxPoolv3 ] + + edges: + - [ node_1, node_2 ] + - [ node_2, node_3 ] + + - TbeReduceElemwiseFusionPass: + version: 1 + nodes: + - node_1: *Elementwise + - node_2: [ CommReduce ] + edges: + - [ node_1, node_2 ] + + - TbeReadSelectEltwiseFusionPass: + version: 1 + nodes: + - node_1: [ ReadSelect ] + - node_2: *Elementwise + + edges: + - [ node_1, node_2 ] + + - TbeEltwiseWriteSelectFusionPass: + version: 1 + nodes: + - node_1: *Elementwise + - node_2: [ write_select ] + + edges: + - [ node_1, node_2 ] + + - TbeEltwiseFusionPass: + version: 1 + nodes: + - node_1: *Elementwise + - node_2: *Elementwise + + edges: + - [ node_1, node_2 ] + + - TbeConvBnreduceFusionPass: + version: 0 + struct: [ Convolution, bn_reduce ] + + - TbeBnupdateEltwiseFusionPass: + version: 1 + nodes: + - node_1: [ bn_update ] + - node_2: *Elementwise + edges: + - [ node_1, node_2 ] + + - TbeConv2DBackpropElemwiseFusionPass: + version: 1 + nodes: + - node_1: [ Conv2DBackpropInputD, Conv2DTransposeD, Deconvolution ] + - node_2: [ Add, ReluGradV2 ] + + edges: + - [ node_1, node_2 ] + + - TbeDxElemwisePass: + version: 1 + nodes: + - node_1: [ Conv2DBackpropInputD, Conv2DTransposeD, Deconvolution ] + - node_2: [ LeakyRelu, Prelu ] + + edges: + - [ node_1, node_2 ] + + - TbeConv2dBackpropRequantFusionPass: + version: 1 + nodes: + - node_1: [ Conv2DBackpropInputD, Conv2DTransposeD, Deconvolution ] + - node_2: [ AscendRequant ] + + edges: + - [ node_1, node_2 ] + + - TbeDwTransdataFusionPass: + version: 1 + nodes: + - node_1: [ Transdate ] + - node_2: [ Transdate ] + - node_3: [ Conv2DBackpropFilter ] + + edges: + - [ node_1, node_3 ] + - [ node_2, node_3 ] + + - TbeDxTransdataFusionPass: + version: 1 + nodes: + - node_1: [ Transdate ] + - node_2: [ Transdate ] + - node_3: [ Conv2DBackpropInput ] + + edges: + - [ node_1, node_3 ] + - [ node_2, node_3 ] + + - TbeEltwiseCastFusionPass: + version: 1 + nodes: + - node_1: [ Relu, Add, Mul, Sqrt ] + - node_2: [ Cast ] + + edges: + - [ node_1, node_2 ] + + - TbeEltwiseCastFusionPass_V2: + version: 1 + nodes: + - node_1: [ Cast ] + - node_2: [ Relu, Add, Mul, Sqrt ] + + + edges: + - [ node_1, node_2 ] + + - TbeConv2DBackpropDequantFusionPass: + version: 1 + nodes: + - node_1: [ Conv2DBackpropInputD, Conv2DTransposeD, Deconvolution ] + - node_2: [ AscendDequant ] + + + edges: + - [ node_1, node_2 ] diff --git a/profiler/advisor/rules/timeline_fusion_ops.yaml b/profiler/advisor/rules/timeline_fusion_ops.yaml new file mode 100644 index 0000000000000000000000000000000000000000..764dd5d50fe3e5c526ccfc762e28c491e76b8ea7 --- /dev/null +++ b/profiler/advisor/rules/timeline_fusion_ops.yaml @@ -0,0 +1,59 @@ +- cann_version: 6.3.RC2 + torch_version: 1.11.0 + unique_id: 0 + operator_rules: + aten: + add: + torch_npu.npu_confusion_transpose: ["(permute|transpose)-(contiguous){0,1}-(reshape|view)", + "(reshape|view)-(contiguous){0,1}-(permute|transpose)"] + torch_npu.fast_gelu: [gelu] + torch_npu.npu_linear: [linear] + torch_npu.npu_mish: [mish] + torch_npu.contrib.module.Mish: [mish] + torch_npu.npu_scaled_masked_softmax: [ "softmax-(mul){0,1}-(masked_fill_|add)" ] + torch_npu.npu_silu: [ silu, mul-sigmoid, sigmoid-mul ] + torch_npu.contrib.module.SiLU: [ silu, mul-sigmoid, sigmoid-mul ] + optimizer.clip_grad_norm_fused_: [add-reciprocal-mul] + Optimizer: + add: + torch_npu.optim.NpuFusedAdamW: [AdamW.step] + torch_npu.optim.NpuFusedSGD: [SGD.step] + torch_npu.optim.NpuFusedAdadelta: [Adadelta.step] + torch_npu.optim.NpuFusedLamb: [Lamb.step] + torch_npu.optim.NpuFusedAdamP: [AdamP.step] + torch_npu.optim.NpuFusedBertAdam: [BertAdam.step] + torch_npu.optim.NpuFusedRMSprop: [RMSprop.step] + torch_npu.optim.NpuFusedRMSpropTF: [RMSpropTF.step] + torch_npu.optim.NpuFusedAdam: [Adam.step] + + +- cann_version: 7.0.RC1 + torch_version: [1.11.0,2.1.0] + unique_id: 1 + inherit_unique_id: 0 + operator_rules: + aten: + add: + torch_npu.npu_fusion_attention: ["matmul-(add){0,1}-(mul){0,1}-(masked_fill_|add){0,1}-softmax-(dropout){0,1}-matmul"] + torch_npu.npu_rotary_mul: ["(chunk|slice)-neg-cat-(mul){0,2}-add"] + +- cann_version: 7.0.0 + torch_version: [1.11.0, 2.1.0] + unique_id: 2 + inherit_unique_id: 1 + operator_rules: + aten: + add: + torch_npu.npu_rms_norm: ["(pow){0,1}-(mean){0,1}-(add){0,1}-rsqrt-mul-(type_as){0,1}"] + torch_npu.npu_swiglu: [ "(slice|chunk)-silu-mul", "(slice|chunk)-mul-silu", + "(slice|chunk)-sigmoid-mul-mul", "(slice|chunk)-mul-sigmoid-mul", + "(slice|chunk)-mul-mul-sigmoid" ] + +- cann_version: 8.0.0 + torch_version: [1.11.0, 2.1.0] + unique_id: 3 + inherit_unique_id: 2 + operator_rules: + aten: + add: + torch_npu.npu_geglu: ["(slice|chunk)-gelu-mul", "(slice|chunk)-mul-gelu"] \ No newline at end of file diff --git a/profiler/advisor/timeline_perf_analysis.ipynb b/profiler/advisor/timeline_perf_analysis.ipynb deleted file mode 100644 index 34233db6fe10f6cec0e708e3d829a6be73436d6b..0000000000000000000000000000000000000000 --- a/profiler/advisor/timeline_perf_analysis.ipynb +++ /dev/null @@ -1,163 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "from advisor_backend.interface import Interface\n", - "import matplotlib.pyplot as plt" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Timeline调优分析\n", - "\n", - "## 1. Timeline分析的数据准备\n", - "我们当前支持Ascend PyTorch Profiler方式采集后的ascend_pt目录,并支持单独分析ascend_pt/ASCEND_PROFILER_OUTPUT目录下的trace_view.json文件。\n", - "\n", - "## 2. Timeline分析解决的问题\n", - "当前支持的功能:\n", - "1) 识别当前可选择的NPU亲和优化器。\n", - "2) 分析算子调度瓶颈。" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "# EDIT THE PROFILING DATA PATH\n", - "timeline_path = \"[YOUR PATH]\"\n", - "interface = Interface(timeline_path)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 1)亲和优化器识别" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[INFO] Start to analyse the target file: [YOUR PATH]\n", - "['Optimizer.step#AdamW.step']\n", - "You can choose torch_npu.optim.NpuFusedAdamW to replace the current Optimizer: Optimizer.step#AdamW.step.\n" - ] - } - ], - "source": [ - "dataset = interface.get_data('timeline', 'optimizer')\n", - "# 打印当前使用的优化器\n", - "data = dataset.get('data')\n", - "print(data)\n", - "\n", - "# 如果使用了原生优化器,则打印优化建议\n", - "advice = dataset.get('advice')\n", - "print(advice)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 2)算子调度分析\n" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[INFO] Start to analyse the target file: [YOUR PATH]\n" - ] - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "NPU Utilication: 71.33%, NPU Free Utilization: 28.67%.\n", - "Device synchronize 5 times, try to reduce synchronization statements to alleviate the bottleneck of operator delivery.\n", - "There are too many small operators, you can increase the batch size appropriately.\n" - ] - } - ], - "source": [ - "dataset = interface.get_data('timeline', 'op_schedule')\n", - "data = dataset.get(\"data\")\n", - "import math\n", - "op_dur = [math.log(i + 1) for i in data[0]]\n", - "op_free = [math.log(i + 1) for i in data[1]]\n", - "x = [i for i in range(len(op_dur))]\n", - "fig = plt.figure(figsize=(15, 8))\n", - "plt.plot(x, op_dur, c='r', ls='-', label='op duration')\n", - "plt.plot(x, op_free, c='g', ls='-', label='op wait')\n", - "\n", - "plt.xlabel('operator')\n", - "plt.ylabel('log(time + 1)')\n", - "plt.title('Op Schedule')\n", - "plt.legend(loc='upper right')\n", - "plt.show()\n", - "\n", - "print(dataset.get('bottleneck'))\n", - "print(dataset.get('advice'))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.1" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/profiler/advisor/utils/__init__.py b/profiler/advisor/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/profiler/advisor/utils/log.py b/profiler/advisor/utils/log.py new file mode 100644 index 0000000000000000000000000000000000000000..1ca111592f02b41043d8c8ed832c09a8cd3b3442 --- /dev/null +++ b/profiler/advisor/utils/log.py @@ -0,0 +1,63 @@ +""" +log module +""" +import logging +import os + +from profiler.advisor.common import constant as const + + +def get_log_level(): + log_level = os.getenv(const.ADVISOR_LOG_LEVEL, const.DEFAULT_LOG_LEVEL).upper() + if not hasattr(logging, log_level): + raise AttributeError(f"module 'logging' has no attribute '{log_level}', " + f"supported log level: {', '.join(const.SUPPORTED_LOG_LEVEL)}") + return log_level + + +def init_logger(ctx, param, debug_mode) -> logging.Logger: + logging.logThreads = False + logging.logMultiprocessing = False + logging.logProcesses = False + + class LevelFilter(logging.Filter): + """ + level filter, filer only log with level out + """ + + # pylint:disable=too-few-public-methods + def filter(self, record): + if record.levelno == 60: + return False + return True + + console_log_level = getattr(logging, get_log_level()) + console_handle = logging.StreamHandler() + console_handle.setLevel(console_log_level) + console_handle.addFilter(LevelFilter()) + if debug_mode and not ctx.resilient_parsing: + formatter = logging.Formatter(fmt="[%(asctime)s][%(levelname)s][%(filename)s L%(lineno)s] %(message)s", + datefmt='%Y-%m-%d,%H:%M:%S') + else: + formatter = logging.Formatter(fmt="[%(asctime)s][%(levelname)s] %(message)s", + datefmt='%Y-%m-%d,%H:%M:%S') + console_handle.setFormatter(formatter) + + # add log level out + logging.addLevelName(60, 'OUT') + logger = logging.getLogger() + setattr(logger, 'out', lambda *args: logger.log(60, *args)) + output_handle = logging.StreamHandler() + output_handle.setLevel("OUT") + formatter = logging.Formatter("%(message)s") + output_handle.setFormatter(formatter) + + logger.setLevel("DEBUG") + logger.handlers = [] + if not logger.handlers: + logger.addHandler(console_handle) + logger.addHandler(output_handle) + else: + logger.info(logger.handlers) + logger.debug("The logger of analysis have initialized successfully.") + return logger diff --git a/profiler/advisor/utils/tools.py b/profiler/advisor/utils/tools.py new file mode 100644 index 0000000000000000000000000000000000000000..1189675e8319d673bf78a6cadc03c09ce00f4604 --- /dev/null +++ b/profiler/advisor/utils/tools.py @@ -0,0 +1,76 @@ +from functools import partial + +import click + +CONTEXT_SETTINGS = dict(help_option_names=['-H', '-h', '--help']) + + +class ClickAliasedGroup(click.Group): + """ + Alias click command + """ + FORMAT_LIMIT_LEN = 6 + + def __init__(self, *args, **kwargs): + super(ClickAliasedGroup, self).__init__(*args, **kwargs) + self._alias_dict = {} + self._commands = {} + + def command(self, *args, **kwargs): + alias = kwargs.pop('alias', None) + decorator = super(ClickAliasedGroup, self).command(*args, **kwargs) + if not alias: + return decorator + + return partial(self._decorator_warpper, decorator, alias) + + def group(self, *args, **kwargs): + alias = kwargs.pop('alias', None) + decorator = super(ClickAliasedGroup, self).group(*args, **kwargs) + if not alias: + return decorator + + return partial(self._decorator_warpper, decorator, alias) + + def _decorator_warpper(self, decorator, alias, func=None): + cmd = decorator(func) + self._commands[cmd.name] = alias + self._alias_dict[alias] = cmd.name + return cmd + + def resolve_alias(self, cmd_name): + if cmd_name in self._alias_dict.keys(): + return self._alias_dict[cmd_name] + return cmd_name + + def get_command(self, ctx, cmd_name): + cmd_name = self.resolve_alias(cmd_name) + command = super(ClickAliasedGroup, self).get_command(ctx, cmd_name) + return command if command else None + + def format_commands(self, ctx, formatter): + rows = [] + sub_commands = self.list_commands(ctx) + max_len = 0 + if len(sub_commands) > 0: + max_len = max(len(cmd) for cmd in sub_commands) + + limit = formatter.width - self.FORMAT_LIMIT_LEN - max_len + for sub_command in sub_commands: + cmd = self.get_command(ctx, sub_command) + if cmd is None: + continue + if hasattr(cmd, 'hidden') and cmd.hidden: + continue + if sub_command in self._commands: + alias = self._commands[sub_command] + sub_command = f'{sub_command}, {alias}' + if click.__version__[0] < '7': + cmd_help = cmd.short_help or '' + else: + cmd_help = cmd.get_short_help_str(limit) + rows.append((sub_command, cmd_help)) + + if rows: + with formatter.section('Commands'): + formatter.write_dl(rows) diff --git a/profiler/advisor/utils/utils.py b/profiler/advisor/utils/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..84419b67087f8a434361f77479899d10ef91b9f5 --- /dev/null +++ b/profiler/advisor/utils/utils.py @@ -0,0 +1,552 @@ +import inspect +import json +import logging +import multiprocessing as mp +import os +import queue +import re +import stat +import time +import traceback +import types +from functools import wraps +from typing import Any, Set + +import click +import requests +from requests.adapters import HTTPAdapter +from tqdm import tqdm + +from profiler.advisor.common import constant as const +from profiler.advisor.common.version_control import VersionControl +from profiler.advisor.utils.log import init_logger, get_log_level + +logger = logging.getLogger() +logger.setLevel(get_log_level()) +permission_warned: Set = set() + + +def ignore_warning(exception: Exception = None): + return exception + + +class ContextObject(object): + def __init__(self): + self._debug = False + + def set_debug(self, debug=False): + self._debug = debug + + @property + def debug_mode(self): + return self._debug + + +def debug_option(f): + return click.option('--debug', '-D', + is_flag=True, + expose_value=False, + is_eager=True, + callback=init_logger, + help="Debug Mode. Shows full stack trace when error occurs.")(f) + + +def get_class_absolute_path(cls): + module = inspect.getmodule(cls) + if module is not None: + module_path = module.__name__ + class_name = cls.__name__ + return f"{module_path}.{class_name}" + else: + return None + + +def is_static_func(function_obj): + return isinstance(function_obj, staticmethod) + + +def singleton(cls): + """ + :param cls: any class + :return: singleton handle + + When using the singleton function, you need to manually specify collection_path='dataSet_path'. Otherwise, the singleton function + is initialized by class name. + if cls has 'collection_path' property, _instance map will build by class_name and 'collection_path', the default value of + collection path is class absolute path. + + _instance = {cls.name: {collection_path: instance}} + """ + _instance = {} + + def _singleton(*args: any, **kw: any) -> any: + collection_path = kw.get("collection_path") + if not collection_path: + collection_path = get_class_absolute_path(cls) + if cls in _instance and collection_path in _instance[cls]: + return _instance[cls].get(collection_path) + if cls not in _instance: + _instance[cls] = {collection_path: cls(*args, **kw)} + else: + _instance[cls][collection_path] = cls(*args, **kw) + return _instance[cls].get(collection_path) + + # 保留原始类的属性和方法 + _singleton.__name__ = cls.__name__ + _singleton.__module__ = cls.__module__ + _singleton.__doc__ = cls.__doc__ + + # 拷贝原始类的类方法和静态方法 + _singleton.__dict__.update(cls.__dict__) + for base_class in inspect.getmro(cls)[::-1]: + # 获取类的所有成员 + members = inspect.getmembers(base_class) + + # 过滤出函数对象 + function_objs = [member[1] for member in members if inspect.isfunction(member[1]) or inspect.ismethod(member[1])] + for function_obj in function_objs: + if inspect.isfunction(function_obj) and not is_static_func(function_obj): + continue + setattr(_singleton, function_obj.__name__, function_obj) + + return _singleton + + +def lazy_property(func): + """ + Lazy loading of class attributes. + which is calculated only once when it is called for the first time, + and will not be repeated for each call after that. + """ + attr_name = "_lazy_" + func.__name__ + + @property + def _lazy_property(instance): + if not hasattr(instance, attr_name): + setattr(instance, attr_name, func(instance)) + return getattr(instance, attr_name) + + return _lazy_property + + +class CheckPathAccess: + """ + check path access permissions + """ + + # pylint: disable=no-member + def __init__(self, func): + wraps(func)(self) + self.warned = permission_warned + + def __call__(self, *args, **kwargs): + path = args[0] + if not os.access(path, os.R_OK) and path not in self.warned: + logger.warning("%s can not read, check the permissions", path) + self.warned.add(path) + return self.__wrapped__(*args, **kwargs) + + def __get__(self, instance, cls): + if instance is None: + return self + return types.MethodType(self, instance) + + +def walk_error_handler(error): + """ + handle dir walk error + """ + if error.filename not in permission_warned: + logger.warning(error) + permission_warned.add(error.filename) + + +@CheckPathAccess +def get_file_path_from_directory(path: str, check_func: Any) -> list: + """ + get file from directory + """ + file_list = [] + for root, _, files in os.walk(path, onerror=walk_error_handler): + for filename in files: + filepath = os.path.join(root, filename) + if check_func(filename): + file_list.append(filepath) + return file_list + + +@singleton +class Timer: + def __init__(self): + self.strftime = time.strftime("%Y%m%d%H%M%S", time.localtime(time.time())) + + +def get_analyze_processes(): + # n_processes not exposed to user through att-advisor command arguments now + return min(int(os.getenv(const.MA_ADVISOR_ANALYZE_PROCESSES, 1)), const.MA_ADVISOR_MAX_PROCESSES) + + +def format_timeline_result(result: dict, dump_html=False): + """ + :Param result: json for api name and stack + :Return: json after format + """ + format_result = {} + if dump_html: + result = json.loads(json.dumps(result).replace("\\r\\n", "
").replace("", "<module>")) + + for key, stacks in result.items(): + api_name = key.split(":")[0] + format_result[api_name] = sorted(list(stacks.items()), key=lambda stack: stack[1], reverse=True) + return format_result + + +class ParallelJob: + + def __init__(self, src_func, ops_api_list, job_name=None): + if not callable(src_func): + raise TypeError(f"src_func should be callable") + + if not isinstance(ops_api_list, (list, tuple)): + raise TypeError(f"ops_api_list should be list or tuple") + + self.src_func = src_func + self.ops_api_list = ops_api_list + self.job_name = job_name + + def start(self, n_proccesses): + + job_queue = mp.Queue(len(self.ops_api_list)) + completed_queue = mp.Queue() + for i in range(len(self.ops_api_list)): + job_queue.put(i) + + processes = [] + listen = mp.Process(target=self.listener, args=(completed_queue, len(self.ops_api_list),)) + listen.start() + + for i in range(n_proccesses): + p = mp.Process(target=self.parallel_queue, args=(job_queue, completed_queue,)) + processes.append(p) + p.start() + + for p in processes: + p.join() + + completed_queue.put(None) + listen.join() + + def listener(self, completed_queue, num): + pbar = tqdm(total=num, position=0, leave=False, ncols=100, desc=self.job_name) + for _ in iter(completed_queue.get, None): + pbar.update() + pbar.refresh() + pbar.n = num + + def parallel_queue(self, job_queue, completed_queue): + while True: + try: + if job_queue.empty(): + break + token = job_queue.get(timeout=1) + except queue.Empty: + continue + self.src_func(*self.ops_api_list[token]) + completed_queue.put(token) + + +def mp_queue_to_list(job_queue): + queue_list = [] + while True: + try: + if job_queue.empty(): + break + token = job_queue.get(timeout=1) + queue_list.append(token) + except queue.Empty: + continue + return queue_list + + +def load_parameter(parameter, default): + if not os.environ.get(parameter, None): + return default + else: + return os.environ.get(parameter) + + +def get_supported_subclass(clazz: VersionControl.__class__, cann_version: str): + """ + Returns a list of subclasses that support the specified version, because of the __subclasses__(), + you need to import the all subclass first + :param clazz: Class name which is extends to VersionControl.__class__ + :param cann_version: The CANN software version + :return: The list of subclasses that support the specified CANN version + """ + # 获取所有支持这个cann版本的子类 + dataset_classes = clazz.__subclasses__() + sub_class_list = [cls for cls in dataset_classes if cls.is_supported(cann_version)] + logger.debug("The support subclass list is %s, cann version is %s", str(sub_class_list), cann_version) + return sub_class_list + + +def to_percent(num: float) -> str: + """ + change float to percent format + """ + num = num * 100 + return f"{num:.2f}%" + + +def safe_division(numerator, denominator): + """Return 0 if denominator is 0.""" + return denominator and numerator / denominator + + +def safe_write(content, save_path): + if os.path.dirname(save_path) != "": + os.makedirs(os.path.dirname(save_path), exist_ok=True) + + with os.fdopen(os.open(save_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, + stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP), "w") as f: + f.write(content) + + +def create_directory_for_file(file: str) -> None: + """ + create directory for file + """ + dirname = os.path.dirname(file) + if not os.path.exists(dirname): + os.makedirs(dirname) + + +class CheckPathAccess: + """ + check path access permissions + """ + + # pylint: disable=no-member + def __init__(self, func): + wraps(func)(self) + self.warned = permission_warned + + def __call__(self, *args, **kwargs): + path = args[0] + if path and not os.access(path, os.R_OK) and path not in self.warned: + logger.warning("%s can not read, check the permissions", path) + self.warned.add(path) + return self.__wrapped__(*args, **kwargs) + + def __get__(self, instance, cls): + if instance is None: + return self + return types.MethodType(self, instance) + + +@CheckPathAccess +def get_file_path_from_directory(path, check_func): + """ + get file from directory + """ + file_list = [] + + if not path: + return file_list + + if not os.path.isdir(path): + logger.warning("Expected existed directory, but got %s", path) + + for root, _, files in os.walk(path): + for filename in files: + filepath = os.path.join(root, filename) + if check_func(filename): + file_list.append(filepath) + return file_list + + +@CheckPathAccess +def get_dir_path_from_directory(path: str, check_func: Any) -> list: + """ + get file from directory + """ + file_list = [] + for root, _, files in os.walk(path, onerror=walk_error_handler): + for filename in files: + filepath = os.path.join(root, filename) + if check_func(filename): + file_list.append(filepath) + return file_list + + +def is_regex_pattern(string: str): + """ + Check if str is a regular expression. + """ + escaped_string = re.escape(string) + return not (escaped_string == string) + + +def join_prof_path(root_dir: str, sub_dir: str) -> str: + """ + regular expression matching method for path concatenation + """ + if is_regex_pattern(sub_dir): + for root, _, _ in os.walk(root_dir, onerror=walk_error_handler): + if re.match(sub_dir, os.path.basename(root)): + return root + logger.debug("Fail to get profiling path %s from local path %s by regular expression matching", sub_dir, root_dir) + else: + sub_dir = os.path.join(root_dir, sub_dir) + if os.path.exists(sub_dir): + return sub_dir + logger.debug("Fail to get profiling path %s from local path %s", sub_dir, root_dir) + return "" + + +def format_excel_title(title: str) -> str: + """ + format excel title + """ + title = title.lower() + title = title.replace("(us)", '') + title = title.replace("(ns)", '') + title = title.replace("(%)", '') + title = title.replace(" ", "_") + return title + + +def format_float(num: float) -> float: + """ + format float num, round to 2 decimal places + """ + return round(num, 2) + + +class SafeOpen: + """ + safe open to check file + """ + + # pylint: disable=consider-using-with + def __init__(self, name, mode='r', encoding=None): + self.file = None + if not os.path.exists(name): + logger.warning("%s not exist, please check", name) + return + + if os.access(name, os.R_OK): + self.file = open(name, mode, encoding=encoding, errors="ignore") + else: + logger.warning("%s can not read, check the permissions", name) + + def __enter__(self): + return self.file + + def __exit__(self, exc_type, exc_val, exc_tb): + if self.file: + self.file.close() + return True + + +def save_downloaded_file(response, url_path, file_save_path): + """保存响应体中的文件 + + 参数: + response: 请求后获取的响应体 + url_path: url路径 + file_save_path: 保存路径 + 返回: + final_file_path: 文件保存绝对路径 + """ + # 获取url路径中的文件名, 拼接在保存路径下 + file_save_path = os.path.normpath(file_save_path) + file_name = os.path.basename(url_path) + final_file_path = os.path.join(file_save_path, file_name) + # 若目标保存路径不存在,则自动生成 + if not os.path.exists(file_save_path): + os.makedirs(file_save_path) + if response.status_code <= 300: + logger.debug("Response status code is %s", response.status_code) + flags = os.O_WRONLY | os.O_CREAT | os.O_EXCL + modes = stat.S_IWUSR | stat.S_IRUSR + # 若文件已存在,则移除已有的文件并保存最新的文件 + if os.path.exists(final_file_path): + os.remove(final_file_path) + # 保存文件 + with os.fdopen(os.open(final_file_path, flags, modes), mode="wb") as f: + f.write(response.content) + logger.info("Success to save content in: %s", os.path.abspath(final_file_path)) + else: + # 若响应码不为预期的数值, 显示相应告警 + logger.warning("Failed to save the response body. The response status code is %s. " + "Please check the network or try another region", response.status_code) + + +def request_with_retry(url_path, region_name=None): + """使用requests请求获取文件, 失败则进行重试, 最多请求 max_retries+1 次 + + 参数: + url_path: URL路径 + file_save_path: 云文件保存路径 + """ + logger.debug("Requesting or retrying to get file from region: %s", region_name) + + # 若从环境变量指定了保存路径,优先从环境变量中获取,若为空则使用默认的云文件保存路径constant.CLOUD_RULE_PATH + file_save_path = os.path.join(os.path.expanduser("~"), const.CLOUD_RULE_PATH) + if os.getenv(const.ADVISOR_RULE_PATH): + file_save_path = os.getenv(const.ADVISOR_RULE_PATH) + + session = requests.Session() + # 使用session发起的所有请求, 默认最多会重试 max_retries 次, 计入最初请求, 最差情况下请求 max_retries+1 次 + adapter = HTTPAdapter(max_retries=const.MAX_RETRIES) + session.mount(const.HTTP_PREFIXES, adapter) + session.mount(const.HTTPS_PREFIXES, adapter) + + logger.debug('Session try to get response') + response = None + try: + response = session.get(url_path, timeout=const.TIMEOUT) + except Exception as e: + logger.debug("Error: %s: %s", e, traceback.format_exc()) + + if response is None: + logger.warning("Fail to download file from region: %s, response is None, " + "please use the environment variable %s for more detailed information", + region_name, const.ADVISOR_LOG_LEVEL) + else: + try: + # 若响应码为400~600之间,response.raise_for_status抛出HTTPError错误, 跳过调用save_downloaded_file函数逻辑 + response.raise_for_status() + save_downloaded_file(response, url_path=url_path, file_save_path=file_save_path) + except Exception as e: + logger.warning("Error: %s: %s", e, traceback.format_exc()) + # 关闭 session, 清除所有装配器 + session.close() + + +def read_csv(file): + import csv + + raw_data = [] + logger.debug("Parse file %s", file) + with SafeOpen(file, encoding="utf-8") as csv_file: + try: + csv_content = csv.reader(csv_file) + for row in csv_content: + raw_data.append(row) + except OSError as error: + logger.error("Read csv file failed : %s", error) + return [] + + return raw_data + + +def get_file_path_by_walk(root, filename): + file_path = "" + for root, _, files in os.walk(root, topdown=True): + for name in files: + if name == filename: + file_path = os.path.join(root, name) + return file_path + return file_path diff --git a/profiler/advisor/version.py b/profiler/advisor/version.py new file mode 100644 index 0000000000000000000000000000000000000000..caf2acb5521a37c3595e8f9e49fb6ebd86ffa99c --- /dev/null +++ b/profiler/advisor/version.py @@ -0,0 +1,38 @@ +import sys + + +def get_package_version(package_name) -> str: + """ + Get package version info by importlib + Args: + package_name: package name + + Returns: + version: version info string + """ + if sys.version_info >= (3, 8): + # Because importlib_metadata has been changed to importlib.metadata in py3.8 + from importlib import metadata + from importlib.metadata import PackageNotFoundError + else: + import importlib_metadata as metadata + from importlib_metadata import PackageNotFoundError + + try: + version = metadata.version(package_name) + except PackageNotFoundError: + version = "UNKNOWN" + return version + + +def print_version_callback(ctx, param, value): # NOQA + import click + + if not value or ctx.resilient_parsing: + return + click.echo('Version {}'.format(get_package_version("att_advisor"))) + ctx.exit() + + +def cli_version(): + return get_package_version("att_advisor") diff --git a/profiler/cli/__init__.py b/profiler/cli/__init__.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..2cba173de10fbf024aad5c1d2c543ae4304ff447 100644 --- a/profiler/cli/__init__.py +++ b/profiler/cli/__init__.py @@ -0,0 +1,4 @@ +from profiler.advisor.config.config import Config +from profiler.advisor.utils.utils import Timer + +Config().set_log_path(f"att_advisor_{Timer().strftime}.xlsx") diff --git a/profiler/cli/analyze_cli.py b/profiler/cli/analyze_cli.py new file mode 100644 index 0000000000000000000000000000000000000000..8e056100645a46f531858694457bb13ea73a00e7 --- /dev/null +++ b/profiler/cli/analyze_cli.py @@ -0,0 +1,130 @@ +import click +import sys +import os +import logging + +sys.path.append(os.path.join(os.path.dirname(os.path.dirname(__file__)), "compare_tools")) +sys.path.append(os.path.join(os.path.dirname(os.path.dirname(__file__)), "cluster_analyse")) + +from profiler.advisor.utils.tools import CONTEXT_SETTINGS, ClickAliasedGroup +from profiler.advisor.common import constant +from profiler.advisor.utils.utils import debug_option +from profiler.advisor.interface.interface import Interface +from profiler.cluster_analyse.cluster_data_preprocess.pytorch_data_preprocessor import PytorchDataPreprocessor + +logger = logging.getLogger() + + +def _analyze(dimensions, **kwargs): + result_list = [] + job_list = [] + + def is_cluster(): + profiling_path = kwargs.get("profiling_path") + path_list = [os.path.join(profiling_path, dir_name) for dir_name in os.listdir(profiling_path)] + dir_list = [path for path in path_list if os.path.isdir(path)] + data_processor = PytorchDataPreprocessor(dir_list) + data_map = data_processor.get_data_map() + return len(data_map) > 1 + + is_cluster = is_cluster() + + for dimension in dimensions: + if not is_cluster and dimension == "cluster": + continue + for scope in Interface.get_scope(dimension): + interface = Interface(**kwargs) + job_list.append((dimension, scope, interface)) + + for i, (dimension, scope, interface) in enumerate(job_list[::-1]): + result_list.append( + interface.get_result(dimension, scope, render_html=i == len(job_list) - 1, output_dict=False)) + + for result in result_list[::-1]: + if result and hasattr(result, "show"): + result.show() + break + + +@click.group(name="analyze", cls=ClickAliasedGroup) +def analyze_cli(**kwargs): + """Analyze profiling datasets and give performance optimization suggestion.""" + pass + + +@analyze_cli.command(context_settings=CONTEXT_SETTINGS, + name="all", + short_help='Analyze timeline, operators and graph.') +@click.option('--profiling_path', '-d', 'profiling_path', type=click.Path(), required=True, + help='Directory of profiling data') +@click.option('--benchmark_profiling_path', '-bp', 'benchmark_profiling_path', type=click.Path(), + help='Directory of benchmark profiling data, used for compare performance') +@click.option('--cann_version', '-cv', 'cann_version', + type=click.Choice(constant.SUPPORTED_CANN_VERSION, case_sensitive=False), + default=constant.DEFAULT_CANN_VERSION, + help='The CANN software version, which can be viewed by executing the following command: ' + '"cat /usr/local/Ascend/ascend-toolkit/latest/aarch64-linux/ascend_toolkit_install.info"') +@click.option('--torch_version', '-tv', 'torch_version', + type=click.Choice(constant.SUPPORTED_TORCH_VERSION, case_sensitive=False), + default=constant.DEFAULT_TORCH_VERSION, + help='The runtime torch version, which can be detected by exec command "pip show torch"') +# @click.option('--is_inference', is_flag=True, help="Enable performance analysis of inference task") +@click.option("-pt", + "--profiling_type", + metavar="", + default=constant.ASCEND_PYTORCH_PROFILER, + required=False, + type=click.Choice(constant.SUPPORTED_PROFILING_TYPE), + help="enter the profiling type, selectable range ascend_pytorch_profiler, mslite ,msprof") +@debug_option +def analyze_all(**kwargs) -> None: + # 当前compare_tools必须输入两个profiling路径,att-advisor有等价功能支持输入一个Profiling路径,后续替换成对应实现 + if not kwargs.get("benchmark_profiling_path"): + kwargs["benchmark_profiling_path"] = kwargs.get("profiling_path") + + _analyze(Interface.all_dimension, **kwargs) + + +@analyze_cli.command(context_settings=CONTEXT_SETTINGS, + name="schedule", + short_help='Analyze timeline, operators and graph.') +@click.option('--profiling_path', '-d', 'profiling_path', type=click.Path(), required=True, + help='Directory of profiling data') +@click.option('--cann_version', '-cv', 'cann_version', + type=click.Choice(constant.SUPPORTED_CANN_VERSION, case_sensitive=False), + default=constant.DEFAULT_CANN_VERSION, + help='The CANN software version, which can be viewed by executing the following command: ' + '"cat /usr/local/Ascend/ascend-toolkit/latest/aarch64-linux/ascend_toolkit_install.info"') +@click.option('--torch_version', '-tv', 'torch_version', + type=click.Choice(constant.SUPPORTED_TORCH_VERSION, case_sensitive=False), + default=constant.DEFAULT_TORCH_VERSION, + help='The runtime torch version, which can be detected by exec command "pip show torch"') +@debug_option +def analyze_schedule(**kwargs) -> None: + _analyze(["schedule"], **kwargs) + + +@analyze_cli.command(context_settings=CONTEXT_SETTINGS, + name="computation", + short_help='Analyze timeline, operators and graph.') +@click.option('--profiling_path', '-d', 'profiling_path', type=click.Path(), required=True, + help='Directory of profiling data') +@click.option('--cann_version', '-cv', 'cann_version', + type=click.Choice(constant.SUPPORTED_CANN_VERSION, case_sensitive=False), + default=constant.DEFAULT_CANN_VERSION, + help='The CANN software version, which can be viewed by executing the following command: ' + '"cat /usr/local/Ascend/ascend-toolkit/latest/aarch64-linux/ascend_toolkit_install.info"') +@click.option('--torch_version', '-tv', 'torch_version', + type=click.Choice(constant.SUPPORTED_TORCH_VERSION, case_sensitive=False), + default=constant.DEFAULT_TORCH_VERSION, + help='The runtime torch version, which can be detected by exec command "pip show torch"') +@click.option("-pt", + "--profiling_type", + metavar="", + default=constant.ASCEND_PYTORCH_PROFILER, + required=False, + type=click.Choice(constant.SUPPORTED_PROFILING_TYPE), + help="enter the profiling type, selectable range ascend_pytorch_profiler, mslite ,msprof") +@debug_option +def analyze_computation(**kwargs) -> None: + _analyze(["computation"], **kwargs) \ No newline at end of file diff --git a/profiler/cli/cluster_cli.py b/profiler/cli/cluster_cli.py index 62c06c2e21d54ee4540ad5e747efefabb65ed762..5e49e2523b3fef2bcf363e74433980b9d55d1fef 100644 --- a/profiler/cli/cluster_cli.py +++ b/profiler/cli/cluster_cli.py @@ -12,16 +12,17 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import ast import click import os import sys -sys.path.append( - os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "cluster_analyse")) - +sys.path.append(os.path.dirname(os.path.dirname(__file__))) +from profiler.advisor.utils.tools import CONTEXT_SETTINGS, ClickAliasedGroup +from profiler.advisor.utils.utils import debug_option from profiler.prof_common.constant import Constant from profiler.cluster_analyse.cluster_analysis import ALL_FEATURE_LIST -from cluster_analysis import Interface +from profiler.cluster_analyse.cluster_analysis import ClusterAnalysis @click.command(context_settings=Constant.CONTEXT_SETTINGS, name="cluster", @@ -34,4 +35,4 @@ def cluster_cli(profiling_path, mode) -> None: Constant.COLLECTION_PATH: profiling_path, Constant.ANALYSIS_MODE: mode } - Interface(parameter).run() + ClusterAnalysis(parameter).run() diff --git a/profiler/cli/compare_cli.py b/profiler/cli/compare_cli.py index b25fa039daa5f1ca4bd2b6c2d9becee8bc9abc2d..4dbc8379aa97e59e18e357574786b83fe92b9073 100644 --- a/profiler/cli/compare_cli.py +++ b/profiler/cli/compare_cli.py @@ -12,20 +12,16 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import ast import click import os import sys -import ast -sys.path.append( - os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "cluster_analyse")) -sys.path.append( - os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "compare_tools")) +sys.path.append(os.path.dirname(os.path.dirname(__file__))) -from profiler.prof_common.analyze_dict import AnalyzeDict from profiler.prof_common.constant import Constant -from compare_backend.comparison_generator import ComparisonGenerator - +from profiler.prof_common.analyze_dict import AnalyzeDict +from profiler.compare_tools.compare_backend.comparison_generator import ComparisonGenerator @click.command(context_settings=Constant.CONTEXT_SETTINGS, name="compare", short_help='Compare the performance differences between GPUs and NPUs.') diff --git a/profiler/cli/complete_cli.py b/profiler/cli/complete_cli.py new file mode 100644 index 0000000000000000000000000000000000000000..28f00c5866912efb2f246fdbb52a439129b546ab --- /dev/null +++ b/profiler/cli/complete_cli.py @@ -0,0 +1,29 @@ +import click + +from profiler.advisor.utils.tools import CONTEXT_SETTINGS + + +@click.command(context_settings=CONTEXT_SETTINGS, + short_help='Auto complete ma-advisor command in terminal, support "bash(default)/zsh/fish".') +@click.argument('shell_type', nargs=1, default="Bash", type=click.Choice(["Bash", "Zsh", "Fish"], case_sensitive=False)) +def auto_complete_cli(shell_type): + """ + Auto complete ma-advisor command in terminal. + + Example: + + \b + # print bash auto complete command to terminal + ma-advisor auto-completion Bash + """ + click.echo("Tips: please paste following shell command to your terminal to activate auto completion.\n") + if shell_type.lower() == "bash": + bash_str = 'eval "$(_advisor_COMPLETE=bash_source msprof-analyze)"' + elif shell_type.lower() == "zsh": + bash_str = 'eval "$(_advisor_COMPLETE=zsh_source msprof-analyze)"' + elif shell_type.lower() == "fish": + bash_str = 'eval (env _advisor_COMPLETE=fish_source msprof-analyze)' + else: + click.echo(f'Unsupported shell type {shell_type}.') + return + click.echo(f'{bash_str}\n') diff --git a/profiler/cli/entrance.py b/profiler/cli/entrance.py index 96fc008f58f3081ac569da7825d8536003af74e3..61d46012dc1f2952ee26b827e8cc6721cdd874e3 100644 --- a/profiler/cli/entrance.py +++ b/profiler/cli/entrance.py @@ -17,16 +17,21 @@ import logging import click -from profiler.cli.cluster_cli import cluster_cli +from profiler.cli.analyze_cli import analyze_cli +from profiler.cli.complete_cli import auto_complete_cli from profiler.cli.compare_cli import compare_cli +from profiler.cli.cluster_cli import cluster_cli +from profiler.advisor.version import print_version_callback, cli_version logger = logging.getLogger() CONTEXT_SETTINGS = dict(help_option_names=['-H', '-h', '--help'], max_content_width=160) COMMAND_PRIORITY = { - "cluster": 1, - "compare": 2 + "advisor": 1, + "compare": 2, + "cluster": 3, + "auto-completion": 4 } @@ -49,9 +54,14 @@ class SpecialHelpOrder(click.Group): @click.group(context_settings=CONTEXT_SETTINGS, cls=SpecialHelpOrder) -def msprof_analyze_cli(): +@click.option('--version', '-V', '-v', is_flag=True, + callback=print_version_callback, expose_value=False, + is_eager=True, help=cli_version()) +def advisor_cli(**kwargs): pass -msprof_analyze_cli.add_command(cluster_cli, name="cluster") -msprof_analyze_cli.add_command(compare_cli, name="compare") +advisor_cli.add_command(analyze_cli, name="advisor") +advisor_cli.add_command(compare_cli, name="compare") +advisor_cli.add_command(cluster_cli, name="cluster") +advisor_cli.add_command(auto_complete_cli, name="auto-completion") diff --git a/profiler/cluster_analyse/cluster_analysis.py b/profiler/cluster_analyse/cluster_analysis.py index 5ece8971915ee530ea790ed48620fff138525d36..514c4d20dbc8ab113928ae6b00611af4f0ffe2f7 100644 --- a/profiler/cluster_analyse/cluster_analysis.py +++ b/profiler/cluster_analyse/cluster_analysis.py @@ -16,14 +16,14 @@ import argparse import os -from cluster_data_preprocess.pytorch_data_preprocessor import PytorchDataPreprocessor -from cluster_data_preprocess.mindspore_data_preprocessor import MindsporeDataPreprocessor -from communication_group.communication_group_generator import CommunicationGroupGenerator -from common_func.constant import Constant -from common_func.file_manager import FileManager -from common_func.path_manager import PathManager -from common_func import analysis_loader -from analysis.analysis_facade import AnalysisFacade +from profiler.cluster_analyse.cluster_data_preprocess.pytorch_data_preprocessor import PytorchDataPreprocessor +from profiler.cluster_analyse.cluster_data_preprocess.mindspore_data_preprocessor import MindsporeDataPreprocessor +from profiler.cluster_analyse.communication_group.communication_group_generator import CommunicationGroupGenerator +from profiler.cluster_analyse.common_func.constant import Constant +from profiler.cluster_analyse.common_func.file_manager import FileManager +from profiler.cluster_analyse.common_func.path_manager import PathManager +from profiler.cluster_analyse.analysis.analysis_facade import AnalysisFacade +from profiler.cluster_analyse.common_func import analysis_loader COMM_FEATURE_LIST = ['all', 'communication_time', 'communication_matrix'] ALL_FEATURE_LIST = ['all', 'communication_time', 'communication_matrix', 'cann_api_sum'] @@ -40,7 +40,7 @@ def parse_recipe_params(analysis_name, analysis_args): if not analysis_class: print("[ERROR] undefined analysis.") return None - + args_parsed = get_analysis_args(analysis_class, analysis_args) recipe_params = { Constant.RECIPE_NAME: analysis_class[0], @@ -50,7 +50,7 @@ def parse_recipe_params(analysis_name, analysis_args): } return recipe_params -class Interface: +class ClusterAnalysis: ASCEND_PT = "ascend_pt" ASCEND_MS = "ascend_ms" @@ -132,4 +132,4 @@ if __name__ == "__main__": } if args_parsed.mode not in COMM_FEATURE_LIST: parameter.update(parse_recipe_params(args_parsed.mode, args_remained)) - Interface(parameter).run() + ClusterAnalysis(parameter).run() diff --git a/profiler/cluster_analyse/common_func/db_manager.py b/profiler/cluster_analyse/common_func/db_manager.py index c0d6ad89be8edd8bbb2a4ee8e0653141550b0129..9d55c7c8b7d818cee3b2d82352218f998e7b3bc3 100644 --- a/profiler/cluster_analyse/common_func/db_manager.py +++ b/profiler/cluster_analyse/common_func/db_manager.py @@ -15,6 +15,9 @@ import os import sqlite3 +import sys + +sys.path.append("../../") from common_func.constant import Constant from common_func.empty_class import EmptyClass diff --git a/profiler/cluster_analyse/common_func/file_manager.py b/profiler/cluster_analyse/common_func/file_manager.py index 64b7e6c4f7023d252062240f52187b3e2307a07f..1dd5159ea7d234abd01172084e26a288c62ca494 100644 --- a/profiler/cluster_analyse/common_func/file_manager.py +++ b/profiler/cluster_analyse/common_func/file_manager.py @@ -17,8 +17,8 @@ import os import csv import json -from common_func.constant import Constant -from common_func.path_manager import PathManager +from profiler.cluster_analyse.common_func.constant import Constant +from profiler.cluster_analyse.common_func.path_manager import PathManager class FileManager: diff --git a/profiler/test/tools/__init__.py b/profiler/test/tools/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/profiler/test/tools/tool.py b/profiler/test/tools/tool.py new file mode 100644 index 0000000000000000000000000000000000000000..cb2f1064f73c0110764551020c693f4089e413a5 --- /dev/null +++ b/profiler/test/tools/tool.py @@ -0,0 +1,30 @@ +import os +import re +import shutil +import shlex +from subprocess import Popen, PIPE + + +def delete_file(pattern, work_path): + file_list = os.listdir(work_path) + for file_name in file_list: + if re.match(pattern, file_name): + + os.remove(os.path.join(work_path, file_name)) + + +def recover_env(work_path="./"): + if os.path.exists("./log"): + shutil.rmtree("./log") + + if os.path.exists("./tune_ops_file.cfg"): + os.remove("./tune_ops_file.cfg") + + delete_file(r"att_advisor_+", work_path) + + +def run_command(cmd): + # Make sure the process output can be displayed on the console + p = Popen(shlex.split(cmd, posix=False), stdout=PIPE, bufsize=0, universal_newlines=False) + p.wait() + diff --git a/profiler/test/ut/advisor/profiling/__init__.py b/profiler/test/ut/advisor/profiling/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/profiler/test/ut/advisor/profiling/test_profiling_analyzer.py b/profiler/test/ut/advisor/profiling/test_profiling_analyzer.py new file mode 100644 index 0000000000000000000000000000000000000000..e6c11f09db39bd66e4bc7372818e1a190e2a88b8 --- /dev/null +++ b/profiler/test/ut/advisor/profiling/test_profiling_analyzer.py @@ -0,0 +1,42 @@ +import os +import unittest + +from build.lib.profiler.advisor.analyzer.computation.profiling_analyzer import ProfilingAnalyzer +from profiler.advisor.common.constant import CANN_VERSION_C15 +from profiler.advisor.utils.utils import get_supported_subclass +from test.tools.tool import recover_env + + +class TestProfilingAnalyzer(unittest.TestCase): + @classmethod + def tearDownClass(cls) -> None: + recover_env() + + # def test_profiling_optimize_and_make_render(self): + # data_root_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.realpath(__file__))), + # "data/profiling_result", "cann700cnnTest_0001_20240113084145.001_ascend_pt") + # kwargs = { + # "analysis_mode": "profiling", + # "data_dir": data_root_dir + # } + + # data_list = Collector().load(white_list=['timeline_event_dataset', 'profiling_dataset'], **kwargs) + # # 获取所有ProfilingAnalyzerBase支持cann版本的子类 + # classes = get_supported_subclass(ProfilingAnalyzer, CANN_VERSION_C15) + + # results_list = [] + # for analyzer_clazz in classes: + # analyzer = analyzer_clazz(CANN_VERSION_C15) + # results_list.append(analyzer.optimize(data=data_list)) + + # # there has results_list and one problem dynamic shape here + # self.assertTrue(len(results_list) == 1) + + # if hasattr(analyzer, 'html_render'): + # analyzer.html_render.render_html() + # analyzer.html_render.save_to_file(f'ma_advisor_test.html') + + # self.assertTrue(hasattr(analyzer, 'html_render')) + + + diff --git a/profiler/test/ut/advisor/profiling/test_profiling_dataset.py b/profiler/test/ut/advisor/profiling/test_profiling_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..c312c1650e0075fa7b328d9709b7cc32abe80f69 --- /dev/null +++ b/profiler/test/ut/advisor/profiling/test_profiling_dataset.py @@ -0,0 +1,46 @@ +import os +import unittest + +from profiler.advisor.config.config import Config +from profiler.advisor.common import constant +from test.tools.tool import recover_env + + +class TestProfilingDataset(unittest.TestCase): + + def setUp(self): + self.data_root_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.realpath(__file__))), + "data/profiling_result", "cann700cnnTest_0001_20240113084145.001_ascend_pt") + kwargs = { + "analysis_mode": "profiling", + "data_dir": self.data_root_dir + } + self.data_list = Collector().load(white_list=['timeline_event_dataset', 'profiling_dataset'], **kwargs) + + @classmethod + def tearDownClass(cls) -> None: + recover_env() + + def test_profiling_dataset_build(self): + # check profiling data dir whether exists. + self.assertTrue(os.path.exists(self.data_root_dir)) + self.assertTrue(self.data_list.__contains__('profiling_dataset_base')) + + self.assertTrue(self.data_list['profiling_dataset_base'][0]) + # check ge_info.db in profiling data. + self.assertTrue(hasattr(self.data_list['profiling_dataset_base'][0], 'ge_info')) + self.assertTrue(len(self.data_list['profiling_dataset_base'][0].ge_info.op_state_info_list) > 0) + # check op_summary in profiling data. + self.assertTrue(hasattr(self.data_list['profiling_dataset_base'][0], 'op_summary')) + self.assertTrue(len(self.data_list['profiling_dataset_base'][0].op_summary.op_list) > 0) + # check task_time in profiling data. + self.assertTrue(hasattr(self.data_list['profiling_dataset_base'][0], 'task_time')) + self.assertTrue(len(self.data_list['profiling_dataset_base'][0].task_time._tasks) > 0) + # check msprof in profiling data. + self.assertTrue(hasattr(self.data_list['profiling_dataset_base'][0], 'msprof')) + self.assertTrue(len(self.data_list['profiling_dataset_base'][0].msprof.tasks) > 0) + + def test_profiling_type(self): + # check profiling type + self.assertTrue(hasattr(self.data_list['profiling_dataset_base'][0], 'PROF_TYPE')) + self.assertTrue(self.data_list['profiling_dataset_base'][0].PROF_TYPE in constant.SUPPORTED_PROFILING_TYPE) diff --git a/profiler/test/ut/advisor/test_utils.py b/profiler/test/ut/advisor/test_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..a99b2c247555b6a7b65c958daf285434c9e53955 --- /dev/null +++ b/profiler/test/ut/advisor/test_utils.py @@ -0,0 +1,49 @@ +import os +import unittest + +from profiler.advisor.analyzer.computation.aicpu.aicpu_checker import AicpuChecker +from profiler.advisor.analyzer.computation.op_compile.dynamic_shape_checker import DynamicShapeChecker +from profiler.advisor.analyzer.computation.bound.block_dim_checker import BlockDimChecker +from profiler.advisor.analyzer.computation.bound.operator_bound_checker import OperatorBoundChecker +from profiler.advisor.analyzer.computation.operator_checker import OperatorChecker +import profiler.advisor.common.constant as constant +from profiler.advisor.utils.utils import get_supported_subclass, singleton + + +@singleton +class SingletonTest: + @property + def timeline_dir(self) : + return self._timeline_dir + + @property + def id(self): + return self._id + + def __init__(self, collection_path=None, **kwargs) -> None: + self._timeline_dir = collection_path + self._id = kwargs.get('id') + + +class TestProfilingAnalyzer(unittest.TestCase): + @classmethod + def test_get_supported_subclass(cls): + clazz = get_supported_subclass(OperatorChecker, constant.DEFAULT_CANN_VERSION) + assert clazz.__contains__(OperatorBoundChecker) + assert clazz.__contains__(AicpuChecker) + assert clazz.__contains__(DynamicShapeChecker) + assert clazz.__contains__(BlockDimChecker) + + @classmethod + def test_singleton(cls): + single1 = SingletonTest(collection_path="data_path_1", id='single1') + single2 = SingletonTest(collection_path="data_path_2", id='single2') + single3 = SingletonTest(collection_path="data_path_1", id='single3') + assert single1.id != single2.id + assert single1.id == single3.id + + + +if __name__ == '__main__': + TestProfilingAnalyzer.test_get_supported_subclass() + TestProfilingAnalyzer.test_singleton() \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..9d7eaf19f73d7d4e36f0236440fc3c9c4d66d3ae --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +-r requirements/build.txt +-r requirements/tests.txt \ No newline at end of file diff --git a/requirements/build.txt b/requirements/build.txt new file mode 100644 index 0000000000000000000000000000000000000000..2c5638eaef944fdb21eea1e8585275f7014b641c --- /dev/null +++ b/requirements/build.txt @@ -0,0 +1,12 @@ +click +tabulate +networkx +jinja2 +PyYaml +tqdm +prettytable +ijson +requests +xlsxwriter +sqlalchemy +urllib3<2.0 \ No newline at end of file diff --git a/requirements/tests.txt b/requirements/tests.txt new file mode 100644 index 0000000000000000000000000000000000000000..bab89704aa267e69a0fca03d99e855d5b47f9d5b --- /dev/null +++ b/requirements/tests.txt @@ -0,0 +1,5 @@ +pytest==6.2.4 +pytest-cov==2.12.0 +pytest-mock==3.6.1 +pytest-cookies==0.6.1 +mock==4.0.3 \ No newline at end of file diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000000000000000000000000000000000000..cf9acbbc4f3d15f74b7b94a75bce0ace611ce1f6 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,32 @@ +[isort] +line_length = 120 +multi_line_output = 0 +known_standard_library = setuptools +no_lines_before = STDLIB,LOCALFOLDER +default_section = THIRDPARTY +include_trailing_comma = true +force_grid_wrap = 0 +use_parentheses = true + +[flake8] +exclude = tests/* +max-line-length = 120 + +[pycodestyle] +max-line-length = 120 +exclude = tests/* + +[yapf] +BASED_ON_STYLE = pep8 +BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF = true +SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN = true +COLUMN_LIMIT = 120 + +[aliases] +test=pytest + +[mypy] +ignore_missing_imports = True + +[mypy-tests.*] +ignore_errors = True diff --git a/setup.py b/setup.py new file mode 100644 index 0000000000000000000000000000000000000000..0c0306444b158462b8940c7d8b6c5b5ba662852b --- /dev/null +++ b/setup.py @@ -0,0 +1,42 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +from setuptools import find_packages, setup # type: ignore + + +extras = { + "test": [ + "pytest==6.2.4", + "pytest-cookies==0.6.1", + "pytest-cov==2.12.0", + "mock==4.0.3", + ] +} + +with open('requirements/build.txt', 'r') as f: + requires = f.read().splitlines() + +with open('requirements/tests.txt', 'r') as f: + tests_requires = f.read().splitlines() +tests_requires.extend(set(requires)) + +with open('version.txt', 'r') as f: + version = f.read().strip() + +setup( + name="msprof-analyze", + version=version, + description="Ascend advisor tools", + packages=find_packages(), + include_package_data=True, + python_requires='>=3.7', + install_requires=requires, + package_data={'': ['*.json', '*.ini', '*.txt', '*.yaml', '*.html']}, + tests_require=tests_requires, + entry_points=""" + [console_scripts] + msprof-analyze=profiler.cli.entrance:advisor_cli + """ +) + +# build cmd: pip install --editable . diff --git a/version.txt b/version.txt new file mode 100644 index 0000000000000000000000000000000000000000..9f8e9b69a33f4e8067d5b21661a35d8856758aba --- /dev/null +++ b/version.txt @@ -0,0 +1 @@ +1.0 \ No newline at end of file