diff --git a/profiler/cluster_analyse/analysis/step_trace_time_analysis.py b/profiler/cluster_analyse/analysis/step_trace_time_analysis.py index 5f0497811662184c8bd10ef8b6fb96feae94ed7a..f83ba66edc06590e36fd5f6d8345972d203927d8 100644 --- a/profiler/cluster_analyse/analysis/step_trace_time_analysis.py +++ b/profiler/cluster_analyse/analysis/step_trace_time_analysis.py @@ -14,9 +14,9 @@ # limitations under the License. import os +from collections import defaultdict from common_func.constant import Constant -from collections import defaultdict from common_func.file_manager import FileManager from prof_bean.step_trace_time_bean import StepTraceTimeBean @@ -71,7 +71,7 @@ class StepTraceTimeAnalysis: return step_group_dict = {} for data_list in self.step_data_list: - stage_group = 'None' + stage_group = tuple() for stage in stage_list: if data_list[2] in stage: stage_group = tuple(stage) diff --git a/profiler/cluster_analyse/cluster_analysis.py b/profiler/cluster_analyse/cluster_analysis.py index 57c65678ab798b7dc478b744775fc803be25eb79..a27820983c0353c4c9e727540f0fbed933d14c3d 100644 --- a/profiler/cluster_analyse/cluster_analysis.py +++ b/profiler/cluster_analyse/cluster_analysis.py @@ -58,5 +58,5 @@ class Interface: if __name__ == "__main__": parser = argparse.ArgumentParser(description="cluster analysis module") parser.add_argument('-d', '--collection_path', type=str, required=True, help="profiling data path") - args = parser.parse_args() - Interface(args).run() + args_parsed = parser.parse_args() + Interface(args_parsed).run() diff --git a/profiler/cluster_analyse/cluster_kernels_analysis/cluster_prof_Info_analysis.py b/profiler/cluster_analyse/cluster_kernels_analysis/cluster_prof_Info_analysis.py index d761a2f60541fa45bf09d664bf8dfe67e622e933..49446da43df74be43700038e3ac8d4f177e5dc2e 100644 --- a/profiler/cluster_analyse/cluster_kernels_analysis/cluster_prof_Info_analysis.py +++ b/profiler/cluster_analyse/cluster_kernels_analysis/cluster_prof_Info_analysis.py @@ -100,7 +100,7 @@ class FormDataProcessor: class ViewInfoManager: def __init__(self, chip_type): self.chip_type = chip_type - self.op_summary_columns_dict = [] + self.op_summary_columns_dict = {} self.setOpSummaryColumnsParams() def setOpSummaryColumnsParams(self): @@ -140,7 +140,7 @@ class ViewInfoManager: } def getColumnsInfo(self, analyzer_type): - return self.op_summary_columns_dict[self.chip_type][analyzer_type] + return self.op_summary_columns_dict.get(self.chip_type, {}).get(analyzer_type) class OpSummaryAnalyzerBase: @@ -259,6 +259,7 @@ class StatisticalInfoToHtmlAnalyzer(OpSummaryAnalyzerBase): else: return 1 + class DeliverableGenerator: def __init__(self, args): self.args = args diff --git a/profiler/cluster_analyse/common_func/file_manager.py b/profiler/cluster_analyse/common_func/file_manager.py index e4e0416600c5244be7d460fdd5d08a9ee3e47dba..8fa988bd2957940363df4fa85583746071fa8104 100644 --- a/profiler/cluster_analyse/common_func/file_manager.py +++ b/profiler/cluster_analyse/common_func/file_manager.py @@ -40,8 +40,8 @@ class FileManager: reader = csv.DictReader(csv_file) for row in reader: result_data.append(class_bean(row)) - except Exception: - raise RuntimeError(f"Failed to read the file: {base_name}") + except Exception as e: + raise RuntimeError(f"Failed to read the file: {base_name}") from e return result_data @classmethod @@ -56,8 +56,8 @@ class FileManager: try: with open(file_path, "r") as json_file: result_data = json.load(json_file) - except Exception: - raise RuntimeError(f"Failed to read the file: {base_name}") + except Exception as e: + raise RuntimeError(f"Failed to read the file: {base_name}") from e return result_data @classmethod @@ -78,8 +78,8 @@ class FileManager: if headers: writer.writerow(headers) writer.writerows(data) - except Exception: - raise RuntimeError(f"Can't create file: {base_name}") + except Exception as e: + raise RuntimeError(f"Can't create file: {base_name}") from e @classmethod def create_json_file(cls, profiler_path: str, data: dict, file_name: str) -> None: @@ -94,8 +94,8 @@ class FileManager: os.open(output_file, os.O_WRONLY | os.O_CREAT, cls.DATA_FILE_AUTHORITY), 'w' ) as file: json.dump(data, file) - except Exception: - raise RuntimeError(f"Can't create the file: {base_name}") + except Exception as e: + raise RuntimeError(f"Can't create the file: {base_name}") from e @classmethod def create_output_dir(cls, collection_path: str) -> None: diff --git a/profiler/cluster_analyse/communication_group/communication_group_generator.py b/profiler/cluster_analyse/communication_group/communication_group_generator.py index 31576eed07c9c4ab59b229472310d1b912798ba7..6611b6406c7511ca4ebc38f1c3bc31f9750f3e74 100644 --- a/profiler/cluster_analyse/communication_group/communication_group_generator.py +++ b/profiler/cluster_analyse/communication_group/communication_group_generator.py @@ -15,9 +15,9 @@ import os from copy import deepcopy +from collections import defaultdict from common_func.constant import Constant from common_func.file_manager import FileManager -from collections import defaultdict class CommunicationGroupGenerator: diff --git a/profiler/compare_tools/generation/communication_comparison_generator.py b/profiler/compare_tools/generation/communication_comparison_generator.py index e91126a8b9e4ddcd896ba0f7380e6002137001cf..8e0f260892f7c9493659e50e5a2badfc314c2d4c 100644 --- a/profiler/compare_tools/generation/communication_comparison_generator.py +++ b/profiler/compare_tools/generation/communication_comparison_generator.py @@ -122,13 +122,13 @@ class CommunicationComparisonGenerator: comparison_detail_data[0] = "|" if index < len(base_data): total_dur = sum([data[2] for data in base_data]) - percent = 0.0 if total_dur < Constant.EPS else base_data[index][2] / total_dur + percent = 0.0 if abs(total_dur) < Constant.EPS else base_data[index][2] / total_dur dur_percent = "%.2f%%" % (percent * 100) base_data[index][0] = f"{base_data[index][0]} ({dur_percent})" base_detail_data[1:] = base_data[index] if index < len(comparison_data): total_dur = sum([data[2] for data in comparison_data]) - percent = 0.0 if total_dur < Constant.EPS else comparison_data[index][2] / total_dur + percent = 0.0 if abs(total_dur) < Constant.EPS else comparison_data[index][2] / total_dur dur_percent = "%.2f%%" % (percent * 100) comparison_data[index][0] = f"{comparison_data[index][0]} ({dur_percent})" comparison_detail_data[1:] = comparison_data[index] diff --git a/profiler/compare_tools/profiling_analysis/gpu_parser.py b/profiler/compare_tools/profiling_analysis/gpu_parser.py index 3b470a8d4abdf17842ca9c9c72205f386cafd435..4443562bd4edae71d30c0314ea22756a8d20b534 100644 --- a/profiler/compare_tools/profiling_analysis/gpu_parser.py +++ b/profiler/compare_tools/profiling_analysis/gpu_parser.py @@ -19,6 +19,7 @@ import pandas as pd import profiling_analysis.parser_helper as parser_helper from utils.file_reader import FileReader +from utils.constant import Constant class OpTimeWarper: @@ -134,7 +135,10 @@ class GpuProfilingParser: self.profiling_info.scheduling_time = self.profiling_info.e2e_time - all_op_time / 10 ** 6 - \ self.profiling_info.communication_not_overlapped - self.profiling_info.scheduling_ratio = self.profiling_info.scheduling_time / self.profiling_info.e2e_time + if self.profiling_info.e2e_time < Constant.EPS: + self.profiling_info.scheduling_ratio = 0.0 + else: + self.profiling_info.scheduling_ratio = self.profiling_info.scheduling_time / self.profiling_info.e2e_time self.parse_memory_reserved() def parse_e2e_time(self): diff --git a/profiler/compare_tools/profiling_analysis/npu_parser.py b/profiler/compare_tools/profiling_analysis/npu_parser.py index a8725a1486c43cbcd77875285526b6d515c95a72..2c71b0dc4a5e10f5f40f70da618dd017cfc461f7 100644 --- a/profiler/compare_tools/profiling_analysis/npu_parser.py +++ b/profiler/compare_tools/profiling_analysis/npu_parser.py @@ -14,8 +14,8 @@ # limitations under the License. import sys -import pandas as pd from collections import defaultdict +import pandas as pd import profiling_analysis.parser_helper as parser_helper from utils.file_reader import FileReader from common_func.path_manager import PathManager diff --git a/profiler/compare_tools/utils/file_reader.py b/profiler/compare_tools/utils/file_reader.py index 34e4ecab49c7143d231b3b1e2b208fa2f93d8696..ef0287b35f862ca5bd807de498cc8684256d7c43 100644 --- a/profiler/compare_tools/utils/file_reader.py +++ b/profiler/compare_tools/utils/file_reader.py @@ -26,9 +26,9 @@ class FileReader: try: with open(file_path, "rt") as file: json_data = json.loads(file.read()) - except Exception: + except Exception as e: msg = f"Can't read file: {file_path}" - raise RuntimeError(msg) + raise RuntimeError(msg) from e return json_data @classmethod @@ -51,9 +51,9 @@ class FileReader: reader = csv.DictReader(csv_file) for row in reader: result_data.append(row) - except Exception: + except Exception as e: msg = f"Failed to read the file: {file_path}" - raise RuntimeError(msg) + raise RuntimeError(msg) from e return result_data @classmethod diff --git a/profiler/compare_tools/utils/profiling_parser.py b/profiler/compare_tools/utils/profiling_parser.py index ceb24e6c310c838e336f2b9e6ede878a7a416a68..a94887ecc2f6a2b6069d031f0cfada2537f8cf46 100644 --- a/profiler/compare_tools/utils/profiling_parser.py +++ b/profiler/compare_tools/utils/profiling_parser.py @@ -205,7 +205,7 @@ class NPUProfilingParser(ProfilingParser): match_dequeue_data = self._match_cann_memory_data(dequeue_data, ts_time) if match_dequeue_data is not None: correlation_id = match_dequeue_data.get("args", {}).get("correlation_id", "") - ts = enqueue_dict[correlation_id].get("ts", 0) + ts = enqueue_dict.get(correlation_id, {}).get("ts", 0) self._memory_list.append({Constant.SIZE: float(data.get(Constant.SIZE, 0)), Constant.TS: ts, Constant.NAME: data.get(Constant.NAME, ""), Constant.ALLOCATION_TIME: float(data.get(Constant.ALLOCATION_TIME, 0)), diff --git a/profiler/compare_tools/utils/tree_builder.py b/profiler/compare_tools/utils/tree_builder.py index 4010ba0c8855054f5c445ae1f9b41d8ab287b6f6..b08aa6b9703e7b3cfce8db413ea6330659300cd5 100644 --- a/profiler/compare_tools/utils/tree_builder.py +++ b/profiler/compare_tools/utils/tree_builder.py @@ -1,4 +1,5 @@ from queue import Queue +from typing import Optional, Dict, List from utils.constant import Constant from utils.torch_op_node import TorchOpNode @@ -21,7 +22,12 @@ class TreeBuilder: return root_node @classmethod - def update_tree_node(cls, root_node: TorchOpNode, flow_kernel_dict: dict = {}, memory_allocated_list: list = []): + def update_tree_node( + cls, + root_node: TorchOpNode, + flow_kernel_dict: Optional[Dict] = None, + memory_allocated_list: Optional[List] = None, + ): def set_kernel_helper(node_queue, ts, kernel_num, kernel_list): while not node_queue.empty(): tree_node = node_queue.get() @@ -32,6 +38,9 @@ class TreeBuilder: else: tree_node.set_kernel_list(kernel_list) + flow_kernel_dict = flow_kernel_dict if flow_kernel_dict else {} + memory_allocated_list = memory_allocated_list if memory_allocated_list else [] + if flow_kernel_dict: for ts, kernel_list in flow_kernel_dict.items(): matched_child_node = root_node.match_child_node(ts)