diff --git a/profiler/cluster_analyse/analysis/communication_analysis.py b/profiler/cluster_analyse/analysis/communication_analysis.py index a7b27913443f0fdb15d59811bd0aedffbbf01a3a..45fc5cd44840c4ef3ae591d74aa4ad9be38d9b1f 100644 --- a/profiler/cluster_analyse/analysis/communication_analysis.py +++ b/profiler/cluster_analyse/analysis/communication_analysis.py @@ -15,9 +15,9 @@ import os from abc import abstractmethod +from collections import defaultdict from common_func.constant import Constant -from collections import defaultdict from common_func.file_manager import FileManager diff --git a/profiler/cluster_analyse/analysis/step_trace_time_analysis.py b/profiler/cluster_analyse/analysis/step_trace_time_analysis.py index 5f0497811662184c8bd10ef8b6fb96feae94ed7a..f83ba66edc06590e36fd5f6d8345972d203927d8 100644 --- a/profiler/cluster_analyse/analysis/step_trace_time_analysis.py +++ b/profiler/cluster_analyse/analysis/step_trace_time_analysis.py @@ -14,9 +14,9 @@ # limitations under the License. import os +from collections import defaultdict from common_func.constant import Constant -from collections import defaultdict from common_func.file_manager import FileManager from prof_bean.step_trace_time_bean import StepTraceTimeBean @@ -71,7 +71,7 @@ class StepTraceTimeAnalysis: return step_group_dict = {} for data_list in self.step_data_list: - stage_group = 'None' + stage_group = tuple() for stage in stage_list: if data_list[2] in stage: stage_group = tuple(stage) diff --git a/profiler/cluster_analyse/cluster_analysis.py b/profiler/cluster_analyse/cluster_analysis.py index 57c65678ab798b7dc478b744775fc803be25eb79..a27820983c0353c4c9e727540f0fbed933d14c3d 100644 --- a/profiler/cluster_analyse/cluster_analysis.py +++ b/profiler/cluster_analyse/cluster_analysis.py @@ -58,5 +58,5 @@ class Interface: if __name__ == "__main__": parser = argparse.ArgumentParser(description="cluster analysis module") parser.add_argument('-d', '--collection_path', type=str, required=True, help="profiling data path") - args = parser.parse_args() - Interface(args).run() + args_parsed = parser.parse_args() + Interface(args_parsed).run() diff --git a/profiler/cluster_analyse/cluster_kernels_analysis/cluster_prof_Info_analysis.py b/profiler/cluster_analyse/cluster_kernels_analysis/cluster_prof_Info_analysis.py index f9c802d9f0b38cbce5b0bcbca0707b4e8f4e2f32..9ee4ad12cbd083e4bf36ca5f8ba527b54c4dfa0f 100644 --- a/profiler/cluster_analyse/cluster_kernels_analysis/cluster_prof_Info_analysis.py +++ b/profiler/cluster_analyse/cluster_kernels_analysis/cluster_prof_Info_analysis.py @@ -13,18 +13,19 @@ # See the License for the specific language governing permissions and # limitations under the License. -from pathlib import Path import sys -import pandas as pd import argparse import re -import plotly.graph_objects as go -from plotly.subplots import make_subplots -from plotly.offline import plot import os import stat import shutil import warnings +from pathlib import Path + +import pandas as pd +import plotly.graph_objects as go +from plotly.subplots import make_subplots +from plotly.offline import plot sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) @@ -64,13 +65,13 @@ class FormDataProcessor: # 从文件名提取设备ID try: df['device_id'] = self.getDeviceId(f) - except: + except Exception: print(f"文件 \"{f}\" 的路径或者是文件夹名没有按照要求,请确保存在[device_]这一级文件夹,具体操作指导见readme\n") continue # 添加新列 "device_id" try: df['node_id'] = self.getNodeId(f) - except: + except Exception: print(f"文件 \"{f}\" 的路径或者是文件夹名没有按照要求,请确保存在[node*]这一级文件夹,具体操作指导见readme\n") continue # 将数据添加到最终的数据框中 @@ -100,7 +101,7 @@ class FormDataProcessor: class ViewInfoManager: def __init__(self, chip_type): self.chip_type = chip_type - self.op_summary_columns_dict = [] + self.op_summary_columns_dict = {} self.setOpSummaryColumnsParams() def setOpSummaryColumnsParams(self): @@ -140,7 +141,7 @@ class ViewInfoManager: } def getColumnsInfo(self, analyzer_type): - return self.op_summary_columns_dict[self.chip_type][analyzer_type] + return self.op_summary_columns_dict.get(self.chip_type, {}).get(analyzer_type) class OpSummaryAnalyzerBase: @@ -259,6 +260,7 @@ class StatisticalInfoToHtmlAnalyzer(OpSummaryAnalyzerBase): else: return 1 + class DeliverableGenerator: def __init__(self, args): self.args = args diff --git a/profiler/cluster_analyse/common_func/file_manager.py b/profiler/cluster_analyse/common_func/file_manager.py index e4e0416600c5244be7d460fdd5d08a9ee3e47dba..8fa988bd2957940363df4fa85583746071fa8104 100644 --- a/profiler/cluster_analyse/common_func/file_manager.py +++ b/profiler/cluster_analyse/common_func/file_manager.py @@ -40,8 +40,8 @@ class FileManager: reader = csv.DictReader(csv_file) for row in reader: result_data.append(class_bean(row)) - except Exception: - raise RuntimeError(f"Failed to read the file: {base_name}") + except Exception as e: + raise RuntimeError(f"Failed to read the file: {base_name}") from e return result_data @classmethod @@ -56,8 +56,8 @@ class FileManager: try: with open(file_path, "r") as json_file: result_data = json.load(json_file) - except Exception: - raise RuntimeError(f"Failed to read the file: {base_name}") + except Exception as e: + raise RuntimeError(f"Failed to read the file: {base_name}") from e return result_data @classmethod @@ -78,8 +78,8 @@ class FileManager: if headers: writer.writerow(headers) writer.writerows(data) - except Exception: - raise RuntimeError(f"Can't create file: {base_name}") + except Exception as e: + raise RuntimeError(f"Can't create file: {base_name}") from e @classmethod def create_json_file(cls, profiler_path: str, data: dict, file_name: str) -> None: @@ -94,8 +94,8 @@ class FileManager: os.open(output_file, os.O_WRONLY | os.O_CREAT, cls.DATA_FILE_AUTHORITY), 'w' ) as file: json.dump(data, file) - except Exception: - raise RuntimeError(f"Can't create the file: {base_name}") + except Exception as e: + raise RuntimeError(f"Can't create the file: {base_name}") from e @classmethod def create_output_dir(cls, collection_path: str) -> None: diff --git a/profiler/cluster_analyse/communication_group/communication_group_generator.py b/profiler/cluster_analyse/communication_group/communication_group_generator.py index 31576eed07c9c4ab59b229472310d1b912798ba7..bdda0f65268bbeb9cadc37f2c9ece654c46e25d5 100644 --- a/profiler/cluster_analyse/communication_group/communication_group_generator.py +++ b/profiler/cluster_analyse/communication_group/communication_group_generator.py @@ -15,9 +15,9 @@ import os from copy import deepcopy +from collections import defaultdict from common_func.constant import Constant from common_func.file_manager import FileManager -from collections import defaultdict class CommunicationGroupGenerator: @@ -171,4 +171,4 @@ class UnionFind(object): if p & q: return True else: - False + return False diff --git a/profiler/compare_tools/generation/communication_comparison_generator.py b/profiler/compare_tools/generation/communication_comparison_generator.py index e91126a8b9e4ddcd896ba0f7380e6002137001cf..8e0f260892f7c9493659e50e5a2badfc314c2d4c 100644 --- a/profiler/compare_tools/generation/communication_comparison_generator.py +++ b/profiler/compare_tools/generation/communication_comparison_generator.py @@ -122,13 +122,13 @@ class CommunicationComparisonGenerator: comparison_detail_data[0] = "|" if index < len(base_data): total_dur = sum([data[2] for data in base_data]) - percent = 0.0 if total_dur < Constant.EPS else base_data[index][2] / total_dur + percent = 0.0 if abs(total_dur) < Constant.EPS else base_data[index][2] / total_dur dur_percent = "%.2f%%" % (percent * 100) base_data[index][0] = f"{base_data[index][0]} ({dur_percent})" base_detail_data[1:] = base_data[index] if index < len(comparison_data): total_dur = sum([data[2] for data in comparison_data]) - percent = 0.0 if total_dur < Constant.EPS else comparison_data[index][2] / total_dur + percent = 0.0 if abs(total_dur) < Constant.EPS else comparison_data[index][2] / total_dur dur_percent = "%.2f%%" % (percent * 100) comparison_data[index][0] = f"{comparison_data[index][0]} ({dur_percent})" comparison_detail_data[1:] = comparison_data[index] diff --git a/profiler/compare_tools/profiling_analysis/gpu_parser.py b/profiler/compare_tools/profiling_analysis/gpu_parser.py index 3b470a8d4abdf17842ca9c9c72205f386cafd435..4443562bd4edae71d30c0314ea22756a8d20b534 100644 --- a/profiler/compare_tools/profiling_analysis/gpu_parser.py +++ b/profiler/compare_tools/profiling_analysis/gpu_parser.py @@ -19,6 +19,7 @@ import pandas as pd import profiling_analysis.parser_helper as parser_helper from utils.file_reader import FileReader +from utils.constant import Constant class OpTimeWarper: @@ -134,7 +135,10 @@ class GpuProfilingParser: self.profiling_info.scheduling_time = self.profiling_info.e2e_time - all_op_time / 10 ** 6 - \ self.profiling_info.communication_not_overlapped - self.profiling_info.scheduling_ratio = self.profiling_info.scheduling_time / self.profiling_info.e2e_time + if self.profiling_info.e2e_time < Constant.EPS: + self.profiling_info.scheduling_ratio = 0.0 + else: + self.profiling_info.scheduling_ratio = self.profiling_info.scheduling_time / self.profiling_info.e2e_time self.parse_memory_reserved() def parse_e2e_time(self): diff --git a/profiler/compare_tools/profiling_analysis/npu_parser.py b/profiler/compare_tools/profiling_analysis/npu_parser.py index a8725a1486c43cbcd77875285526b6d515c95a72..2c71b0dc4a5e10f5f40f70da618dd017cfc461f7 100644 --- a/profiler/compare_tools/profiling_analysis/npu_parser.py +++ b/profiler/compare_tools/profiling_analysis/npu_parser.py @@ -14,8 +14,8 @@ # limitations under the License. import sys -import pandas as pd from collections import defaultdict +import pandas as pd import profiling_analysis.parser_helper as parser_helper from utils.file_reader import FileReader from common_func.path_manager import PathManager diff --git a/profiler/compare_tools/utils/file_reader.py b/profiler/compare_tools/utils/file_reader.py index 34e4ecab49c7143d231b3b1e2b208fa2f93d8696..ef0287b35f862ca5bd807de498cc8684256d7c43 100644 --- a/profiler/compare_tools/utils/file_reader.py +++ b/profiler/compare_tools/utils/file_reader.py @@ -26,9 +26,9 @@ class FileReader: try: with open(file_path, "rt") as file: json_data = json.loads(file.read()) - except Exception: + except Exception as e: msg = f"Can't read file: {file_path}" - raise RuntimeError(msg) + raise RuntimeError(msg) from e return json_data @classmethod @@ -51,9 +51,9 @@ class FileReader: reader = csv.DictReader(csv_file) for row in reader: result_data.append(row) - except Exception: + except Exception as e: msg = f"Failed to read the file: {file_path}" - raise RuntimeError(msg) + raise RuntimeError(msg) from e return result_data @classmethod diff --git a/profiler/compare_tools/utils/profiling_parser.py b/profiler/compare_tools/utils/profiling_parser.py index ceb24e6c310c838e336f2b9e6ede878a7a416a68..a94887ecc2f6a2b6069d031f0cfada2537f8cf46 100644 --- a/profiler/compare_tools/utils/profiling_parser.py +++ b/profiler/compare_tools/utils/profiling_parser.py @@ -205,7 +205,7 @@ class NPUProfilingParser(ProfilingParser): match_dequeue_data = self._match_cann_memory_data(dequeue_data, ts_time) if match_dequeue_data is not None: correlation_id = match_dequeue_data.get("args", {}).get("correlation_id", "") - ts = enqueue_dict[correlation_id].get("ts", 0) + ts = enqueue_dict.get(correlation_id, {}).get("ts", 0) self._memory_list.append({Constant.SIZE: float(data.get(Constant.SIZE, 0)), Constant.TS: ts, Constant.NAME: data.get(Constant.NAME, ""), Constant.ALLOCATION_TIME: float(data.get(Constant.ALLOCATION_TIME, 0)), diff --git a/profiler/compare_tools/utils/tree_builder.py b/profiler/compare_tools/utils/tree_builder.py index 4010ba0c8855054f5c445ae1f9b41d8ab287b6f6..b08aa6b9703e7b3cfce8db413ea6330659300cd5 100644 --- a/profiler/compare_tools/utils/tree_builder.py +++ b/profiler/compare_tools/utils/tree_builder.py @@ -1,4 +1,5 @@ from queue import Queue +from typing import Optional, Dict, List from utils.constant import Constant from utils.torch_op_node import TorchOpNode @@ -21,7 +22,12 @@ class TreeBuilder: return root_node @classmethod - def update_tree_node(cls, root_node: TorchOpNode, flow_kernel_dict: dict = {}, memory_allocated_list: list = []): + def update_tree_node( + cls, + root_node: TorchOpNode, + flow_kernel_dict: Optional[Dict] = None, + memory_allocated_list: Optional[List] = None, + ): def set_kernel_helper(node_queue, ts, kernel_num, kernel_list): while not node_queue.empty(): tree_node = node_queue.get() @@ -32,6 +38,9 @@ class TreeBuilder: else: tree_node.set_kernel_list(kernel_list) + flow_kernel_dict = flow_kernel_dict if flow_kernel_dict else {} + memory_allocated_list = memory_allocated_list if memory_allocated_list else [] + if flow_kernel_dict: for ts, kernel_list in flow_kernel_dict.items(): matched_child_node = root_node.match_child_node(ts)