diff --git a/profiler/compare_tools/comparator/__init__.py b/profiler/compare_tools/comparator/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/profiler/compare_tools/comparator/index_comparator.py b/profiler/compare_tools/comparator/index_comparator.py new file mode 100644 index 0000000000000000000000000000000000000000..d122e3ea3c1e0600c44b2c97949d247a4e6b92d2 --- /dev/null +++ b/profiler/compare_tools/comparator/index_comparator.py @@ -0,0 +1,44 @@ +import pandas as pd + +from utils.args_manager import ArgsManager +from utils.constant import Constant + + +class IndexComparator: + def __init__(self, args: any): + self._args = args + self._args_manager = ArgsManager() + self._base_profiling = self._args_manager.base_profiling + self._comparison_profiling = self._args_manager.comparison_profiling + + def compare(self) -> list: + base_data, comparison_data = [], [] + if not self._base_profiling.communication_data: + print(f"[warning] Can't find any communication op in the file: {self._base_profiling.json_path}") + for data in self._base_profiling.communication_data: + name_list = data.get("name", "").split("_") + if len(name_list) >= 2: + base_data.append([name_list[1].lower(), float(data.get("dur", 0))]) + if not base_data: + base_data = pd.DataFrame(base_data, columns=Constant.COLUMNS) + else: + base_df = pd.DataFrame(base_data, columns=[Constant.OP_KEY, Constant.DEVICE_DUR]) + base_data = base_df.groupby(Constant.OP_KEY).agg(["count", "sum", "mean", "max", "min"]).reset_index() + base_data.columns = Constant.COLUMNS + if self._args.base_profiling_path == self._args.comparison_profiling_path: + comparison_data = [] + else: + if not self._comparison_profiling.communication_data: + print(f"[warning] Can't find any communication op in the file: {self._comparison_profiling.json_path}") + for data in self._comparison_profiling.communication_data: + name_list = data.get("name", "").split("_") + if len(name_list) >= 2: + comparison_data.append([name_list[1].lower(), float(data.get("dur", 0))]) + if not comparison_data: + comparison_data = pd.DataFrame(comparison_data, columns=Constant.COLUMNS) + else: + comparison_df = pd.DataFrame(comparison_data, columns=[Constant.OP_KEY, Constant.DEVICE_DUR]) + comparison_data = comparison_df.groupby(Constant.OP_KEY).agg( + ["count", "sum", "mean", "max", "min"]).reset_index() + comparison_data.columns = Constant.COLUMNS + return pd.merge(base_data, comparison_data, how="outer", on=Constant.OP_KEY) diff --git a/profiler/compare_tools/comparator/op_comparator.py b/profiler/compare_tools/comparator/op_comparator.py new file mode 100644 index 0000000000000000000000000000000000000000..f299463fe55660fb3334f6de792d887c44837e29 --- /dev/null +++ b/profiler/compare_tools/comparator/op_comparator.py @@ -0,0 +1,129 @@ +from collections import deque + +import numpy as np + +from utils.args_manager import ArgsManager +from utils.name_function import NameFunction +from utils.torch_op_node import TorchOpNode +from utils.tree_builder import TreeBuilder + + +class OpComparator: + def __init__(self, args: any): + self._args = args + self._args_manager = ArgsManager() + self._base_profiling = self._args_manager.base_profiling + self._comparison_profiling = self._args_manager.comparison_profiling + + def compare(self) -> list: + base_ops = self._get_top_layer_ops(self._base_profiling) + if self._args.base_profiling_path == self._args.comparison_profiling_path: + comparison_ops = [] + else: + comparison_ops = self._get_top_layer_ops(self._comparison_profiling) + if not base_ops and not comparison_ops: + return [] + name_func = NameFunction(self._args).get_name_func() + compare_result_data = self._matching_op(base_ops, comparison_ops, name_func) + if self._args.max_kernel_num is not None: + compare_result_data = self._drill_down(compare_result_data, name_func) + return compare_result_data + + + @classmethod + def _matching_op(cls, base_ops: list, comparison_ops: list, name_func: any) -> list: + if not comparison_ops: + result_data = [None] * len(base_ops) + for index in range(len(base_ops)): + result_data[index] = [base_ops[index], None] + return result_data + + result_data = [] + comparison_len, base_len = len(comparison_ops), len(base_ops) + dp = [[0] * (base_len + 1) for _ in range(comparison_len + 1)] + for comparison_index in range(1, comparison_len + 1): + for base_index in range(1, base_len + 1): + if name_func(base_ops[base_index - 1]) == name_func( + comparison_ops[comparison_index - 1]): + dp[comparison_index][base_index] = dp[comparison_index - 1][base_index - 1] + 1 + else: + dp[comparison_index][base_index] = max(dp[comparison_index][base_index - 1], + dp[comparison_index - 1][base_index]) + matched_op = [] + comparison_index, base_index = comparison_len, base_len + while comparison_index > 0 and base_index > 0: + if name_func(base_ops[base_index - 1]) == name_func( + comparison_ops[comparison_index - 1]): + matched_op.append([comparison_index - 1, base_index - 1]) + comparison_index -= 1 + base_index -= 1 + continue + if dp[comparison_index][base_index - 1] > dp[comparison_index - 1][base_index]: + base_index -= 1 + else: + comparison_index -= 1 + if not matched_op: + matched_base_index_list = [] + else: + matched_op.reverse() + matched_op = np.array(matched_op) + matched_base_index_list = list(matched_op[:, 1]) + curr_comparison_index = 0 + for base_index, base_api_node in enumerate(base_ops): + if base_index not in matched_base_index_list: + result_data.append([base_api_node, None]) + continue + matched_comparison_index = matched_op[matched_base_index_list.index(base_index), 0] + for comparison_index in range(curr_comparison_index, matched_comparison_index): + result_data.append([None, comparison_ops[comparison_index]]) + result_data.append([base_api_node, comparison_ops[matched_comparison_index]]) + curr_comparison_index = matched_comparison_index + 1 + if curr_comparison_index < len(comparison_ops): + for comparison_index in range(curr_comparison_index, len(comparison_ops)): + result_data.append([None, comparison_ops[comparison_index]]) + return result_data + + def _get_top_layer_ops(self, profiling_instance: any) -> any: + torch_op_data = profiling_instance.torch_op_data + if not torch_op_data: + print(f"[warning] Can't find any torch op in the file: {profiling_instance.json_path}") + root_node = TreeBuilder.build_tree(torch_op_data) + + kernel_dict, memory_list = {}, [] + if not self._args.disable_operator_compare: + kernel_dict = profiling_instance.kernel_dict + if not kernel_dict: + print(f"[warning] Can't find any flow event in the file: {profiling_instance.json_path}") + if not self._args.disable_memory_compare: + memory_list = profiling_instance.memory_list + if not memory_list: + print(f"[warning] Can't find any memory event in the file: {profiling_instance.file_path}") + + TreeBuilder.update_tree_node(root_node, kernel_dict, memory_list) + level1_child_nodes = root_node.child_nodes + result_data = [] + for level1_node in level1_child_nodes: + if level1_node.is_step_profiler(): + result_data.extend(level1_node.child_nodes) + else: + result_data.append(level1_node) + return result_data + + def _drill_down(self, compare_result_data: list, name_func: any) -> list: + drill_down_result = [] + compare_result_data.reverse() + op_deque = deque(compare_result_data) + while op_deque: + match_data = op_deque.pop() + base_op = match_data[0] if match_data[0] else TorchOpNode() + comparison_op = match_data[1] if match_data[1] else TorchOpNode() + if not base_op.child_nodes or not comparison_op.child_nodes: + drill_down_result.append(match_data) + continue + if max(base_op.kernel_num, comparison_op.kernel_num) <= self._args.max_kernel_num: + drill_down_result.append(match_data) + continue + match_list = self._matching_op(base_op.child_nodes, comparison_op.child_nodes, name_func) + match_list.reverse() + for data in match_list: + op_deque.append(data) diff --git a/profiler/compare_tools/generation/__init__.py b/profiler/compare_tools/generation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/profiler/compare_tools/generation/abstract_cmp.py b/profiler/compare_tools/generation/abstract_cmp.py new file mode 100644 index 0000000000000000000000000000000000000000..20070600d20de390915ab4d25d46dc89089bed94 --- /dev/null +++ b/profiler/compare_tools/generation/abstract_cmp.py @@ -0,0 +1,7 @@ +from abc import ABCMeta, abstractmethod + + +class AbstractCMP(metaclass=ABCMeta): + @abstractmethod + def create_sheet(self): + raise NotImplementedError diff --git a/profiler/compare_tools/generation/communication_comparison_generator.py b/profiler/compare_tools/generation/communication_comparison_generator.py new file mode 100644 index 0000000000000000000000000000000000000000..7838c45aec59e6b794ec26d12f65336a22ee681c --- /dev/null +++ b/profiler/compare_tools/generation/communication_comparison_generator.py @@ -0,0 +1,141 @@ +import math +import pandas as pd + +from openpyxl.styles import Font, PatternFill, Alignment +from openpyxl.workbook import Workbook +from pandas import DataFrame + +from utils.args_manager import ArgsManager +from utils.constant import Constant + + +class CommunicationComparisonGenerator: + def __init__(self, args: any, compare_result_data: DataFrame): + self._args = args + self._args_manager = ArgsManager() + self._compare_result_data = compare_result_data + + def create_sheet(self, workbook: Workbook): + ws = workbook.create_sheet("CommunicationCompare", 0) + ws.sheet_properties.tabColor = Constant.YELLOW_COLOR + # write headers + base_headers = Constant.CMP_COMMUNICATION_HEADER + comparison_headers = Constant.CMP_COMMUNICATION_HEADER + headers = base_headers + comparison_headers + [Constant.DIFF] + base_trace_start_column = 0 + comparison_trace_start_column = len(base_headers) + diff_start_column = len(base_headers) + len(comparison_headers) + + for col_index in range(len(headers)): + ws.cell(row=1, column=col_index + 1).border = Constant.BORDER + ws.cell(row=1, column=col_index + 1).font = Font(name='Arial') + ws.cell(row=1, column=col_index + 1).fill = Constant.HEADERS_FILL + ws.cell(row=2, column=col_index + 1).border = Constant.BORDER + ws.cell(row=2, column=col_index + 1).font = Font(name='Arial', bold=True) + ws.cell(row=2, column=col_index + 1).fill = Constant.HEADERS_FILL + header_name = headers[col_index] + if col_index < comparison_trace_start_column: + ws.cell(row=1, column=col_index + 1).value = Constant.BASE_PROFILING + elif col_index < diff_start_column: + ws.cell(row=1, column=col_index + 1).value = Constant.COMPARISON_PROFILING + else: + ws.cell(row=1, column=col_index + 1).value = header_name + ws.cell(row=2, column=col_index + 1).value = header_name + dim = ws.cell(row=2, column=col_index + 1).coordinate + ws.column_dimensions[dim[0]].width = Constant.COLUMN_WIDTH_CLL.get(header_name) + ws.merge_cells(start_row=1, start_column=base_trace_start_column + 1, + end_row=1, end_column=comparison_trace_start_column) + ws.merge_cells(start_row=1, start_column=comparison_trace_start_column + 1, + end_row=1, end_column=diff_start_column) + ws.merge_cells(start_row=1, start_column=headers.index(Constant.DIFF) + 1, + end_row=2, end_column=headers.index(Constant.DIFF) + 1) + + # write lines + row_index = 3 + for _, row in self._compare_result_data.iterrows(): + # write summary lines + base_name = Constant.NA if math.isnan(row[Constant.BASE_CALLS]) else row[Constant.OP_KEY] + comparison_name = Constant.NA if math.isnan(row[Constant.COMPARISON_CALLS]) else row[Constant.OP_KEY] + if math.isnan(row[Constant.BASE_SUM]) or math.isnan(row[Constant.COMPARISON_SUM]) or row[ + Constant.BASE_SUM] == 0: + diff = Constant.NA + else: + diff = (row[Constant.COMPARISON_SUM] - row[Constant.BASE_SUM]) / row[Constant.BASE_SUM] + row_data = [base_name, Constant.NA, row[Constant.BASE_CALLS], row[Constant.BASE_SUM], + row[Constant.BASE_AVG], row[Constant.BASE_MAX], row[Constant.BASE_MIN], comparison_name, + Constant.NA, row[Constant.COMPARISON_CALLS], row[Constant.COMPARISON_SUM], + row[Constant.COMPARISON_AVG], row[Constant.COMPARISON_MAX], row[Constant.COMPARISON_MIN], diff] + for index in range(len(headers)): + if headers[index] in ( + Constant.CALLS, Constant.TOTAL_DURATION, Constant.AVG_DURATION, Constant.MAX_DURATION, + Constant.MIN_DURATION): + ws.cell(row=row_index, column=index + 1).number_format = '0.00' + if headers[index] == Constant.DIFF: + ws.cell(row=row_index, column=index + 1).number_format = '0.00%' + if diff != Constant.NA and diff < 0: + ws.cell(row=row_index, column=index + 1).font = Font(name='Arial', + color=Constant.GREEN_COLOR) + elif diff != Constant.NA and diff >= 0: + ws.cell(row=row_index, column=index + 1).font = Font(name='Arial', color=Constant.RED_COLOR) + else: + bold = headers[index] == Constant.COMMUNICAT_OP + ws.cell(row=row_index, column=index + 1).font = Font(name='Arial', bold=bold) + value = row_data[index] + if value != Constant.NA: + ws.cell(row=row_index, column=index + 1).value = value + ws.cell(row=row_index, column=index + 1).border = Constant.BORDER + ws.cell(row=row_index, column=index + 1).fill = PatternFill("solid", + fgColor=Constant.SUMMARY_LINE_COLOR) + row_index += 1 + + # write detail lines + base_task_list = self._args_manager.base_profiling.communication_task_data.get(base_name, []) + comparison_task_list = self._args_manager.comparison_profiling.communication_task_data.get(comparison_name, + []) + if base_task_list: + base_data = [[data.get("name", ""), float(data.get("dur", 0))] for data in base_task_list] + base_df = pd.DataFrame(base_data, columns=[Constant.OP_KEY, Constant.DEVICE_DUR]) + base_data = base_df.groupby(Constant.OP_KEY).agg( + ["count", "sum", "mean", "max", "min"]).reset_index().values.tolist() + else: + base_data = [] + if comparison_task_list: + comparison_data = [[data.get("name", ""), float(data.get("dur", 0))] for data in comparison_task_list] + comparison_df = pd.DataFrame(comparison_data, columns=[Constant.OP_KEY, Constant.DEVICE_DUR]) + comparison_data = comparison_df.groupby(Constant.OP_KEY).agg( + ["count", "sum", "mean", "max", "min"]).reset_index().values.tolist() + else: + comparison_data = [] + + for index in range(max(len(base_data), len(comparison_data))): + base_detail_data, comparison_detail_data = [Constant.NA] * len(base_headers), \ + [Constant.NA] * len(comparison_headers) + base_detail_data[0] = "|" + comparison_detail_data[0] = "|" + if index < len(base_data): + total_dur = sum([data[2] for data in base_data]) + dur_percent = "%.2f%%" % (base_data[index][2] / total_dur * 100) + base_data[index][0] = f"{base_data[index][0]} ({dur_percent})" + base_detail_data[1:] = base_data[index] + if index < len(comparison_data): + total_dur = sum([data[2] for data in comparison_data]) + dur_percent = "%.2f%%" % (comparison_data[index][2] / total_dur * 100) + comparison_data[index][0] = f"{comparison_data[index][0]} ({dur_percent})" + comparison_detail_data[1:] = comparison_data[index] + + detail_data = base_detail_data + comparison_detail_data + [Constant.NA] + for colum_index in range(len(headers)): + if headers[colum_index] in ( + Constant.CALLS, Constant.TOTAL_DURATION, Constant.AVG_DURATION, Constant.MAX_DURATION, + Constant.MIN_DURATION): + ws.cell(row=row_index, column=colum_index + 1) .number_format = '0.00' + value = detail_data[colum_index] + if value != Constant.NA: + ws.cell(row=row_index, column=colum_index + 1).value = value + bold = headers[colum_index] == Constant.OP_NAME + ws.cell(row=row_index, column=colum_index + 1).font = Font(name='Arial', bold=bold) + ws.cell(row=row_index, column=colum_index + 1).border = Constant.BORDER + if headers[colum_index] == Constant.COMMUNICAT_OP: + ws.cell(row=row_index, column=colum_index + 1).alignment = Alignment(horizontal="center", + vertical="center") + row_index += 1 diff --git a/profiler/compare_tools/generation/comparison_generator.py b/profiler/compare_tools/generation/comparison_generator.py new file mode 100644 index 0000000000000000000000000000000000000000..f415262cd239cc282603520f5caaaf3c4819e2bd --- /dev/null +++ b/profiler/compare_tools/generation/comparison_generator.py @@ -0,0 +1,35 @@ +import os + +from openpyxl.workbook import Workbook + +from comparator.index_comparator import IndexComparator +from comparator.op_comparator import OpComparator +from generation.communication_comparison_generator import CommunicationComparisonGenerator +from generation.op_comparison_generator import OpComparisonGenerator +from utils.constant import Constant +from utils.args_manager import ArgsManager + + +class ComparisonGenerator: + def __init__(self, args: any): + self._args = args + self._args_manager = ArgsManager() + + def create_excel(self, file_path: str): + wb = Workbook() + if not self._args.disable_operator_compare or not self._args.disable_memory_compare: + op_compare_result = OpComparator(self._args).compare() + if op_compare_result: + if not self._args.disable_operator_compare: + OpComparisonGenerator(self._args, op_compare_result, Constant.OPERATOR_COMPARE).create_sheet(wb) + if not self._args.disable_memory_compare: + OpComparisonGenerator(self._args, op_compare_result, Constant.MEMORY_COMPARE).create_sheet(wb) + + if not self._args.disable_communication_compare: + index_compare_result = IndexComparator(self._args).compare() + if not index_compare_result.empty: + CommunicationComparisonGenerator(self._args, index_compare_result).create_sheet(wb) + + wb.save(file_path) + wb.close() + os.chmod(file_path, Constant.FILE_AUTHORITY) diff --git a/profiler/compare_tools/generation/op_comparison_generator.py b/profiler/compare_tools/generation/op_comparison_generator.py new file mode 100644 index 0000000000000000000000000000000000000000..980b683896f79253e12de15c45975f30048c5e7f --- /dev/null +++ b/profiler/compare_tools/generation/op_comparison_generator.py @@ -0,0 +1,158 @@ +import copy + +from openpyxl.styles import Font, PatternFill, Alignment +from openpyxl.workbook import Workbook + +from utils.args_manager import ArgsManager +from utils.constant import Constant +from utils.tree_builder import TreeBuilder + + +class OpComparisonGenerator: + def __init__(self, args: any, compare_result_data: list, compare_type: str): + self._args = args + self._compare_result_data = compare_result_data + self._compare_type = compare_type + self._base_headers = [] + self._comparison_headers = [] + self.update_headers() + + def update_headers(self): + base_profiling_type = ArgsManager().base_profiling_type + comparison_profiling_type = ArgsManager().comparison_profiling_type + if self._compare_type == Constant.MEMORY_COMPARE: + self._base_headers = Constant.CMP_MEMORY_HEADER + self._comparison_headers = Constant.CMP_MEMORY_HEADER + elif self._compare_type == Constant.OPERATOR_COMPARE: + self._base_headers = Constant.GPU_CMP_KERNEL_HEADER if base_profiling_type == Constant.GPU else \ + Constant.NPU_CMP_KERNEL_HEADER + self._comparison_headers = Constant.GPU_CMP_KERNEL_HEADER if comparison_profiling_type == Constant.GPU \ + else Constant.NPU_CMP_KERNEL_HEADER + + def create_sheet(self, workbook: Workbook): + ws = workbook.create_sheet(self._compare_type, 0) + ws.sheet_properties.tabColor = Constant.YELLOW_COLOR + # write headers + headers = self._base_headers + self._comparison_headers + [Constant.DIFF, Constant.OP_NAME_FILTER, + Constant.DIFF_FILTER] + + base_trace_start_column = 0 + comparison_trace_start_column = len(self._base_headers) + diff_start_column = len(self._base_headers) + len(self._comparison_headers) + + for col_index in range(len(headers)): + ws.cell(row=1, column=col_index + 1).border = Constant.BORDER + ws.cell(row=1, column=col_index + 1).font = Font(name='Arial') + ws.cell(row=1, column=col_index + 1).fill = Constant.HEADERS_FILL + ws.cell(row=2, column=col_index + 1).border = Constant.BORDER + ws.cell(row=2, column=col_index + 1).font = Font(name='Arial', bold=True) + ws.cell(row=2, column=col_index + 1).fill = Constant.HEADERS_FILL + header_name = headers[col_index] + if col_index < comparison_trace_start_column: + ws.cell(row=1, column=col_index + 1).value = Constant.BASE_PROFILING + elif col_index < diff_start_column: + ws.cell(row=1, column=col_index + 1).value = Constant.COMPARISON_PROFILING + else: + ws.cell(row=1, column=col_index + 1).value = header_name + ws.cell(row=2, column=col_index + 1).value = header_name + dim = ws.cell(row=2, column=col_index + 1).coordinate + width = Constant.COLUMN_WIDTH.get(header_name) if Constant.COLUMN_WIDTH.get( + header_name) else Constant.DEFAULT_WIDTH + ws.column_dimensions[dim[0]].width = width + ws.merge_cells(start_row=1, start_column=base_trace_start_column + 1, + end_row=1, end_column=comparison_trace_start_column) + ws.merge_cells(start_row=1, start_column=comparison_trace_start_column + 1, + end_row=1, end_column=diff_start_column) + ws.merge_cells(start_row=1, start_column=headers.index(Constant.DIFF) + 1, + end_row=2, end_column=headers.index(Constant.DIFF) + 1) + ws.merge_cells(start_row=1, start_column=headers.index(Constant.OP_NAME_FILTER) + 1, + end_row=2, end_column=headers.index(Constant.OP_NAME_FILTER) + 1) + ws.merge_cells(start_row=1, start_column=headers.index(Constant.DIFF_FILTER) + 1, + end_row=2, end_column=headers.index(Constant.DIFF_FILTER) + 1) + + # write lines + row_index = 3 + for data in self._compare_result_data: + # write summary lines + base_event_list = TreeBuilder.get_total_compare_event(data[0], self._compare_type) if data[0] else [] + comparison_event_list = TreeBuilder.get_total_compare_event(data[1], self._compare_type) if data[1] else [] + base_summary_data, comparison_summary_data = [Constant.NA] * len(self._base_headers), \ + [Constant.NA] * len(self._comparison_headers) + if data[0]: + base_summary_data[0] = data[0].name + base_summary_data[1] = data[0].input_shape + base_summary_data[2] = data[0].input_type + base_summary_data[3] = sum( + [x.compare_index for x in base_event_list]) if base_event_list else Constant.NA + if data[1]: + comparison_summary_data[0] = data[1].name + comparison_summary_data[1] = data[1].input_shape + comparison_summary_data[2] = data[1].input_type + comparison_summary_data[3] = sum( + [x.compare_index for x in comparison_event_list]) if comparison_event_list else Constant.NA + if base_event_list and comparison_event_list and base_summary_data[3]: + diff = (comparison_summary_data[3] - base_summary_data[3]) / base_summary_data[3] + else: + diff = Constant.NA + op_name = data[0].name if data[0] else data[1].name + + summary_data = base_summary_data + comparison_summary_data + [diff, op_name, diff] + for index in range(len(headers)): + value = summary_data[index] + if headers[index] == Constant.DIFF: + ws.cell(row=row_index, column=index + 1).number_format = '0.00%' + if value != Constant.NA and value < 0: + ws.cell(row=row_index, column=index + 1).font = Font(name='Arial', color=Constant.GREEN_COLOR) + elif value != Constant.NA and value >= 0: + ws.cell(row=row_index, column=index + 1).font = Font(name='Arial', color=Constant.RED_COLOR) + if headers[index] == Constant.DIFF_FILTER: + if value != Constant.NA and value < 0: + ws.cell(row=row_index, column=index + 1).fill = PatternFill("solid", + fgColor=Constant.GREEN_COLOR) + elif value != Constant.NA and value >= 0: + ws.cell(row=row_index, column=index + 1).fill = PatternFill("solid", fgColor=Constant.RED_COLOR) + elif headers[index] != Constant.OP_NAME_FILTER: + ws.cell(row=row_index, column=index + 1).fill = PatternFill("solid", + fgColor=Constant.SUMMARY_LINE_COLOR) + + if value != Constant.NA: + ws.cell(row=row_index, column=index + 1).value = value + bold = headers[index] == Constant.OP_NAME + if headers[index] != Constant.DIFF: + ws.cell(row=row_index, column=index + 1).font = Font(name='Arial', bold=bold) + ws.cell(row=row_index, column=index + 1).border = Constant.BORDER + row_index += 1 + + # write detail lines + base_event_num, comparison_event_num = len(base_event_list), len(comparison_event_list) + for index in range(max(base_event_num, comparison_event_num)): + base_detail_data, comparison_detail_data = [Constant.NA] * len(self._base_headers), \ + [Constant.NA] * len(self._comparison_headers) + base_detail_data[0] = "|" + comparison_detail_data[0] = "|" + if index < base_event_num: + base_event = base_event_list[index] + base_detail_data[1:] = base_event.get_record() + if index < comparison_event_num: + comparison_event = comparison_event_list[index] + comparison_detail_data[1:] = comparison_event.get_record() + + detail_data = base_detail_data + comparison_detail_data + [Constant.NA, op_name, Constant.NA] + for colum_index in range(len(headers)): + value = detail_data[colum_index] + if value != Constant.NA: + ws.cell(row=row_index, column=colum_index + 1).value = value + bold = headers[colum_index] == Constant.OP_NAME + ws.cell(row=row_index, column=colum_index + 1).font = Font(name='Arial', bold=bold) + ws.cell(row=row_index, column=colum_index + 1).border = Constant.BORDER + if headers[colum_index] == Constant.DIFF_FILTER: + if diff != Constant.NA and diff < 0: + ws.cell(row=row_index, column=colum_index + 1).fill = PatternFill("solid", + fgColor=Constant.GREEN_COLOR) + elif diff != Constant.NA and diff >= 0: + ws.cell(row=row_index, column=colum_index + 1).fill = PatternFill("solid", + fgColor=Constant.RED_COLOR) + if headers[colum_index] == Constant.OP_NAME: + ws.cell(row=row_index, column=colum_index + 1).alignment = Alignment(horizontal="center", + vertical="center") + row_index += 1 diff --git a/profiler/compare_tools/performance_compare.py b/profiler/compare_tools/performance_compare.py new file mode 100644 index 0000000000000000000000000000000000000000..e2c47ef6c58129c2ba3db723e99cb073c47af11b --- /dev/null +++ b/profiler/compare_tools/performance_compare.py @@ -0,0 +1,43 @@ +import argparse +import ast +import datetime +import os.path +import sys +import time + +from generation.comparison_generator import ComparisonGenerator +from utils.args_manager import ArgsManager + + +def main(): + sys.path.append(os.path.dirname(__file__)) + parser = argparse.ArgumentParser(description="Compare trace of GPU and NPU") + parser.add_argument("base_profiling_path", type=str, default='', help="base profiling file path") + parser.add_argument("comparison_profiling_path", type=str, default='', help="comparison profiling file path") + parser.add_argument("--disable_operator_compare", default=False, action='store_true', + help="do not compare operator execution time") + parser.add_argument("--disable_memory_compare", default=False, action='store_true', + help="do not compare memory usage by operator dimensions") + parser.add_argument("--disable_communication_compare", default=False, action='store_true', + help="do not compare communication operator execution time") + parser.add_argument("--output_path", type=str, default='', help="性能数据比对结果的存放路径") + parser.add_argument("--max_kernel_num", type=int, help="每个torch op的kernel数量限制") + parser.add_argument("--op_name_map", type=ast.literal_eval, default={}, + help="配置GPU OP与NPU OP等价的名称映射关系,以字典的形式传入") + parser.add_argument("--use_input_shape", default=False, action='store_true', help="使用input shape作为匹配信息") + parser.add_argument("--gpu_flow_cat", type=str, default='', help="gpu flow event的分类标识") + args = parser.parse_args() + + ArgsManager().init(args) + dir_path = args.output_path if args.output_path else "./" + file_name = "performance_comparison_result_{}.xlsx".format(time.strftime("%Y%m%d%H%M%S", time.localtime(time.time()))) + result_file_path = os.path.join(dir_path, file_name) + + ComparisonGenerator(args).create_excel(result_file_path) + + +if __name__ == "__main__": + start_time = datetime.datetime.now() + main() + end_time = datetime.datetime.now() + print(f'The comparison task has been completed in a total time of {end_time - start_time}') diff --git a/profiler/compare_tools/torch_op_compare.py b/profiler/compare_tools/torch_op_compare.py deleted file mode 100644 index fbbcba95e3859eb0a01746f3e888751705d9f514..0000000000000000000000000000000000000000 --- a/profiler/compare_tools/torch_op_compare.py +++ /dev/null @@ -1,462 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import argparse -import ast -import copy -import json -import os.path -import time -from queue import Queue -import numpy as np - -from openpyxl.styles import PatternFill, Font, Border, Side -from openpyxl.workbook import Workbook - -GPU = 0 -NPU = 1 -NA = 'N/A' -LIMIT_KERNEL = 3 -OP_NAME = 'Operator Name' -INPUT_SHAPE = 'Input Shape' -INPUT_TYPE = 'Input Type' -KERNEL_NAME = 'Kernel Name' -DEVICE_DUR = 'Device Duration(us)' -TASK_ID = 'Task Id' -KERNEL_TYPE = 'Kernel Type' -DIFF = 'DIFF: (sum(Trace2 Duration)-sum(Trace1 Duration))/sum(Trace1 Duration)' -OP_NAME_FILTER = 'Operator Name Filter' -DIFF_FILTER = 'DIFF Filter' -BASE_TRACE = 'Base Trace' -COMPARISON_TRACE = 'Comparison Trace' -BASE_TRACE_TYPE = None -COMPARISON_TRACE_TYPE = None -BASE_TYPE = 1 -COMPARISON_TYPE = 2 -GPU_HEADER = [OP_NAME, INPUT_SHAPE, INPUT_TYPE, KERNEL_NAME, DEVICE_DUR] -NPU_HEADER = [OP_NAME, INPUT_SHAPE, INPUT_TYPE, KERNEL_NAME, TASK_ID, KERNEL_TYPE, DEVICE_DUR] -FILL_DICT = { - BASE_TYPE: PatternFill("solid", fgColor='003366FF'), COMPARISON_TYPE: PatternFill("solid", fgColor='0033CCCC'), - DIFF: PatternFill("solid", fgColor='00FF0000'), OP_NAME_FILTER: PatternFill("solid", fgColor='00FFFF00'), - DIFF_FILTER: PatternFill("solid", fgColor='00FFFF00') -} -COLUMN_WIDTH = {OP_NAME: 50, INPUT_SHAPE: 25, INPUT_TYPE: 25, KERNEL_NAME: 25, DEVICE_DUR: 25, - TASK_ID: 20, KERNEL_TYPE: 25, DIFF: 25, OP_NAME_FILTER: 25, DIFF_FILTER: 25} -BORDER = Border(top=Side(border_style="thin", color='00000000'), - left=Side(border_style="thin", color='00000000'), - right=Side(border_style="thin", color='00000000'), - bottom=Side(border_style="thin", color='00000000')) - - -class TorchOpNode: - def __init__(self, event=None, parent_node=None): - self._event = event - self._parent_node = parent_node - self._child_nodes = [] - self._kernel_list = [] - self._kernel_num = 0 - - @property - def start_time(self): - return self._event.get("ts", 0) - - @property - def end_time(self): - return self._event.get("ts", 0) + self._event.get("dur", 0) - - @property - def name(self): - return str(self._event.get("name", NA)) - - @property - def input_shape(self): - return str(self._event.get("args", {}).get("Input Dims", NA)) - - @property - def input_type(self): - return str(self._event.get("args", {}).get("Input type", NA)) - - @property - def parent(self): - return self._parent_node - - @property - def child_nodes(self): - return self._child_nodes - - @property - def kernel_list(self): - return self._kernel_list - - @property - def kernel_num(self): - return self._kernel_num - - def add_child_node(self, child_node): - self._child_nodes.append(child_node) - - def set_kernel_list(self, kernel_list: list): - self._kernel_list = kernel_list - - def add_kernel_num(self, kernel_num: int): - self._kernel_num += kernel_num - - def is_step_profiler(self) -> bool: - return self.name.find("ProfilerStep#") != -1 - - -class TreeBuilder: - @classmethod - def build_tree(cls, event_list: list, flow_kernel_dict: dict) -> TorchOpNode: - root_node = TorchOpNode() - event_list.sort(key=lambda x: x.get("ts", 0)) - last_node = root_node - for event in event_list: - kernel_list = flow_kernel_dict.get(event.get("ts", 0), []) - while last_node: - if last_node == root_node or event.get("ts", 0) < last_node.end_time: - tree_node = TorchOpNode(event, last_node) - last_node.add_child_node(tree_node) - if kernel_list: - tree_node.set_kernel_list(kernel_list) - last_node = tree_node - break - last_node = last_node.parent - return root_node - - @classmethod - def mark_kernel_num(cls, root_node: TorchOpNode, flow_kernel_dict: dict): - for ts, kernel_list in flow_kernel_dict.items(): - curr_node = root_node - while curr_node.child_nodes: - for node in curr_node.child_nodes: - if node.start_time <= ts <= node.end_time: - node.add_kernel_num(len(kernel_list)) - curr_node = node - break - - @classmethod - def get_total_kernels(cls, root_node: TorchOpNode) -> list: - result_list = [] - node_queue = Queue() - for child_node in root_node.child_nodes: - node_queue.put(child_node) - while not node_queue.empty(): - tree_node = node_queue.get() - result_list.extend(tree_node.kernel_list) - for child_node in tree_node.child_nodes: - node_queue.put(child_node) - return result_list - - -def read_json_file(file_path: str, trace_type: int) -> any: - event_list = [] - flow_kernel_dict = {} - if not os.path.isfile(file_path): - raise RuntimeError(f"File not exists: {file_path}") - try: - with open(file_path, "rt") as file: - json_data = json.loads(file.read()) - except Exception: - raise RuntimeError(f"Can't read file: {file_path}") - flow_start_dict, flow_end_dict, event_dict = {}, {}, {} - flow_cat = ("async_gpu", "ac2g", "async_npu") - if trace_type == BASE_TYPE: - global BASE_TRACE_TYPE - BASE_TRACE_TYPE = GPU if isinstance(json_data, dict) else NPU - _type = BASE_TRACE_TYPE - else: - global COMPARISON_TRACE_TYPE - COMPARISON_TRACE_TYPE = GPU if isinstance(json_data, dict) else NPU - _type = COMPARISON_TRACE_TYPE - total_events = json_data.get("traceEvents", []) if _type == GPU else json_data - for event in total_events: - if event.get("cat") == "cpu_op" or event.get("cat") in ("Runtime", "cuda_runtime"): - event_list.append(event) - elif event.get("cat") in flow_cat and event.get("ph") == "s": - flow_start_dict[event.get("id")] = event - elif event.get("cat") in flow_cat and event.get("ph") == "f": - flow_end_dict[event.get("id")] = event - elif _type == GPU and event.get("cat", "").capitalize() == "Kernel".capitalize(): - event_dict["{}-{}-{}".format(event.get("pid"), event.get("tid"), event.get("ts"))] = event - elif _type == NPU and event.get("ph") != "f": - event_dict["{}-{}-{}".format(event.get("pid"), event.get("tid"), event.get("ts"))] = event - - for flow_id, start_flow in flow_start_dict.items(): - end_flow = flow_end_dict.get(flow_id) - if end_flow is None: - continue - kernel_event = event_dict.get("{}-{}-{}".format(end_flow.get("pid"), end_flow.get("tid"), end_flow.get("ts"))) - if kernel_event is None: - continue - flow_kernel_dict.setdefault(start_flow.get("ts"), []).append(kernel_event) - return event_list, flow_kernel_dict - - -def get_top_layer_apis(file_path: str, trace_type: int, max_kernel_num: int) -> any: - event_list, flow_kernel_dict = read_json_file(file_path, trace_type) - root_node = TreeBuilder.build_tree(event_list, flow_kernel_dict) - if max_kernel_num is not None: - TreeBuilder.mark_kernel_num(root_node, flow_kernel_dict) - level1_child_nodes = root_node.child_nodes - if not level1_child_nodes: - raise RuntimeError(f"Can't find any torch op in the file: {file_path}") - result_data = [] - for level1_node in level1_child_nodes: - if level1_node.is_step_profiler(): - result_data.extend(level1_node.child_nodes) - else: - result_data.append(level1_node) - return result_data - - -def compare(base_top_layer_apis: list, comparison_top_layer_apis: list, op_name_map: dict) -> list: - result_data = [] - comparison_len, base_len = len(comparison_top_layer_apis), len(base_top_layer_apis) - dp = [[0] * (base_len + 1) for _ in range(comparison_len + 1)] - for comparison_index in range(1, comparison_len + 1): - for base_index in range(1, base_len + 1): - base_name = base_top_layer_apis[base_index - 1].name - comparison_name = comparison_top_layer_apis[comparison_index - 1].name - if op_name_map.get(comparison_name, comparison_name) == op_name_map.get(base_name, base_name): - dp[comparison_index][base_index] = dp[comparison_index - 1][base_index - 1] + 1 - else: - dp[comparison_index][base_index] = max(dp[comparison_index][base_index - 1], - dp[comparison_index - 1][base_index]) - matched_op = [] - comparison_index, base_index = comparison_len, base_len - while comparison_index > 0 and base_index > 0: - base_name = base_top_layer_apis[base_index - 1].name - comparison_name = comparison_top_layer_apis[comparison_index - 1].name - if op_name_map.get(comparison_name, comparison_name) == op_name_map.get(base_name, base_name): - matched_op.append([comparison_index - 1, base_index - 1]) - comparison_index -= 1 - base_index -= 1 - continue - if dp[comparison_index][base_index - 1] > dp[comparison_index - 1][base_index]: - base_index -= 1 - else: - comparison_index -= 1 - if not matched_op: - matched_base_index_list = [] - else: - matched_op.reverse() - matched_op = np.array(matched_op) - matched_base_index_list = list(matched_op[:, 1]) - curr_comparison_index = 0 - for base_index, base_api_node in enumerate(base_top_layer_apis): - if base_index not in matched_base_index_list: - result_data.append([base_api_node, None]) - continue - matched_comparison_index = matched_op[matched_base_index_list.index(base_index), 0] - for comparison_index in range(curr_comparison_index, matched_comparison_index): - result_data.append([None, comparison_top_layer_apis[comparison_index]]) - result_data.append([base_api_node, comparison_top_layer_apis[matched_comparison_index]]) - curr_comparison_index = matched_comparison_index + 1 - if curr_comparison_index < len(comparison_top_layer_apis): - for comparison_index in range(curr_comparison_index, len(comparison_top_layer_apis)): - result_data.append([None, comparison_top_layer_apis[comparison_index]]) - return result_data - - -def create_data(base_api_node: TorchOpNode, comparison_api_node: TorchOpNode) -> list: - result_data = [] - base_kernel_list = TreeBuilder.get_total_kernels(base_api_node) if base_api_node else [] - comparison_kernel_list = TreeBuilder.get_total_kernels(comparison_api_node) if comparison_api_node else [] - if not base_kernel_list or not comparison_kernel_list: - diff = NA - else: - base_total_dur = sum([kernel.get("dur", 0) for kernel in base_kernel_list]) - comparison_total_dur = sum([kernel.get("dur", 0) for kernel in comparison_kernel_list]) - diff = (comparison_total_dur - base_total_dur) / base_total_dur - op_name = base_api_node.name if base_api_node else comparison_api_node.name - base_kernel_num, comparison_kernel_num = len(base_kernel_list), len(comparison_kernel_list) - base_data = [NA] * len(GPU_HEADER) if BASE_TRACE_TYPE == GPU else [NA] * len(NPU_HEADER) - if base_api_node: - base_data[0] = base_api_node.name - base_data[1] = base_api_node.input_shape - base_data[2] = base_api_node.input_type - comparison_data = [NA] * len(GPU_HEADER) if COMPARISON_TRACE_TYPE == GPU else [NA] * len(NPU_HEADER) - if comparison_api_node: - comparison_data[0] = comparison_api_node.name - comparison_data[1] = comparison_api_node.input_shape - comparison_data[2] = comparison_api_node.input_type - if base_kernel_num == 0 and comparison_kernel_num == 0: - data = base_data + comparison_data + [diff, op_name] - result_data.append(data) - return result_data - for index in range(max(base_kernel_num, comparison_kernel_num)): - base_row_data, comparison_row_data = copy.deepcopy(base_data), copy.deepcopy(comparison_data) - if index < base_kernel_num: - base_kernel = base_kernel_list[index] - if BASE_TRACE_TYPE == GPU: - base_row_data[3] = base_kernel.get("name") - base_row_data[4] = base_kernel.get("dur") - else: - base_row_data[3] = base_kernel.get("name") - base_row_data[4] = base_kernel.get("args", {}).get("Task Id") - base_row_data[5] = base_kernel.get("args", {}).get("Task Type") - base_row_data[6] = base_kernel.get("dur") - if index < comparison_kernel_num: - comparison_kernel = comparison_kernel_list[index] - if COMPARISON_TRACE_TYPE == GPU: - comparison_row_data[3] = comparison_kernel.get("name") - comparison_row_data[4] = comparison_kernel.get("dur") - else: - comparison_row_data[3] = comparison_kernel.get("name") - comparison_row_data[4] = comparison_kernel.get("args", {}).get("Task Id") - comparison_row_data[5] = comparison_kernel.get("args", {}).get("Task Type") - comparison_row_data[6] = comparison_kernel.get("dur") - data = base_row_data + comparison_row_data + [diff, op_name] - result_data.append(data) - return result_data - - -def drill_down(compare_result_data: list, max_kernel_num: int, op_name_map: dict) -> list: - result_data = [] - for data in compare_result_data: - base_api = data[0] if data[0] else TorchOpNode() - comparison_api = data[1] if data[1] else TorchOpNode() - if max(base_api.kernel_num, comparison_api.kernel_num) <= max_kernel_num: - result_data.append(data) - continue - result_data.extend(compare(base_api.child_nodes, comparison_api.child_nodes, op_name_map)) - return result_data - - -def have_to_drill_down(compare_result_data: list, max_kernel_num: int) -> bool: - for data in compare_result_data: - base_api = data[0] if data[0] else TorchOpNode() - comparison_api = data[1] if data[1] else TorchOpNode() - if max(base_api.kernel_num, comparison_api.kernel_num) > max_kernel_num: - return True - return False - - -def main(): - global BASE_TRACE, COMPARISON_TRACE - parser = argparse.ArgumentParser(description="Compare trace of GPU and NPU") - parser.add_argument("base_trace_path", help="base trace file path") - parser.add_argument("comparison_trace_path", help="comparison trace file path") - parser.add_argument("--output_path", help="性能数据比对结果的存放路径") - parser.add_argument("--max_kernel_num", type=int, help="每个torch op的kernel数量限制") - parser.add_argument("--op_name_map", type=ast.literal_eval, default={}, - help="配置GPU OP与NPU OP等价的名称映射关系,以字典的形式传入") - args = parser.parse_args() - if args.max_kernel_num is not None and args.max_kernel_num <= LIMIT_KERNEL: - raise RuntimeError(f"Invalid param, --max_kernel_num has to be greater than {LIMIT_KERNEL}") - if not isinstance(args.op_name_map, dict): - raise RuntimeError("Invalid param, --op_name_map must be dict, for example: --op_name_map={'name1':'name2'}") - base_top_layer_apis = get_top_layer_apis(args.base_trace_path, BASE_TYPE, args.max_kernel_num) - if BASE_TRACE_TYPE == GPU: - BASE_TRACE += ' [GPU] : ' + os.path.basename(args.base_trace_path) - else: - BASE_TRACE += ' [NPU] : ' + os.path.basename(args.base_trace_path) - comparison_top_layer_apis = get_top_layer_apis(args.comparison_trace_path, COMPARISON_TYPE, args.max_kernel_num) - if COMPARISON_TRACE_TYPE == GPU: - COMPARISON_TRACE += ' [GPU] : ' + os.path.basename(args.comparison_trace_path) - else: - COMPARISON_TRACE += ' [NPU] : ' + os.path.basename(args.comparison_trace_path) - compare_result_data = compare(base_top_layer_apis, comparison_top_layer_apis, args.op_name_map) - - if args.max_kernel_num is not None: - while have_to_drill_down(compare_result_data, args.max_kernel_num): - compare_result_data = drill_down(compare_result_data, args.max_kernel_num, args.op_name_map) - - dir_path = args.output_path if args.output_path else "./" - file_name = "torch_op_compare_{}.xlsx".format(time.strftime("%Y%m%d%H%M%S", time.localtime(time.time()))) - result_file_path = os.path.join(dir_path, file_name) - - wb = Workbook() - ws = wb.create_sheet("CompareResult", 0) - ws.sheet_properties.tabColor = '00CED1' - # write headers - base_trace_headers = GPU_HEADER if BASE_TRACE_TYPE == GPU else NPU_HEADER - comparison_trace_headers = GPU_HEADER if COMPARISON_TRACE_TYPE == GPU else NPU_HEADER - headers = base_trace_headers + comparison_trace_headers + [DIFF, OP_NAME_FILTER, DIFF_FILTER] - base_trace_start_column = 0 - comparison_trace_start_column = len(base_trace_headers) - diff_start_column = len(base_trace_headers) + len(comparison_trace_headers) - - for col_index in range(len(headers)): - ws.cell(row=1, column=col_index + 1).border = BORDER - ws.cell(row=1, column=col_index + 1).font = Font(name='Arial', bold=True) - ws.cell(row=2, column=col_index + 1).border = BORDER - ws.cell(row=2, column=col_index + 1).font = Font(name='Arial', bold=True) - header_name = headers[col_index] - if col_index < comparison_trace_start_column: - ws.cell(row=1, column=col_index + 1).value = BASE_TRACE - ws.cell(row=1, column=col_index + 1).fill = FILL_DICT.get(BASE_TYPE) - ws.cell(row=2, column=col_index + 1).fill = FILL_DICT.get(BASE_TYPE) - elif col_index < diff_start_column: - ws.cell(row=1, column=col_index + 1).value = COMPARISON_TRACE - ws.cell(row=1, column=col_index + 1).fill = FILL_DICT.get(COMPARISON_TYPE) - ws.cell(row=2, column=col_index + 1).fill = FILL_DICT.get(COMPARISON_TYPE) - else: - ws.cell(row=1, column=col_index + 1).value = header_name - ws.cell(row=1, column=col_index + 1).fill = FILL_DICT.get(header_name) - ws.cell(row=2, column=col_index + 1).value = header_name - dim = ws.cell(row=2, column=col_index + 1).coordinate - ws.column_dimensions[dim[0]].width = COLUMN_WIDTH.get(header_name) - ws.merge_cells(start_row=1, start_column=base_trace_start_column + 1, - end_row=1, end_column=comparison_trace_start_column) - ws.merge_cells(start_row=1, start_column=comparison_trace_start_column + 1, - end_row=1, end_column=diff_start_column) - ws.merge_cells(start_row=1, start_column=headers.index(DIFF) + 1, - end_row=2, end_column=headers.index(DIFF) + 1) - ws.merge_cells(start_row=1, start_column=headers.index(OP_NAME_FILTER) + 1, - end_row=2, end_column=headers.index(OP_NAME_FILTER) + 1) - ws.merge_cells(start_row=1, start_column=headers.index(DIFF_FILTER) + 1, - end_row=2, end_column=headers.index(DIFF_FILTER) + 1) - - # write lines - start_row_index = 3 - for data in compare_result_data: - rows = create_data(data[0], data[1]) - row_number = 0 - for row in rows: - row_index = start_row_index + row_number - ws.cell(row=row_index, column=len(row) + 1).border = BORDER - for index, value in enumerate(row): - if index == headers.index(DIFF): - ws.cell(row=row_index, column=index + 1).number_format = '0.00%' - if value != NA and value < 0: - ws.cell(row=row_index, column=index + 1).fill = PatternFill("solid", fgColor='0000FF00') - ws.cell(row=row_index, column=index + 3).fill = PatternFill("solid", fgColor='0000FF00') - if value != NA and value >= 0: - ws.cell(row=row_index, column=index + 1).fill = PatternFill("solid", fgColor='00FF0000') - ws.cell(row=row_index, column=index + 3).fill = PatternFill("solid", fgColor='00FF0000') - if index in [key for key, value in enumerate(headers) if value == OP_NAME]: - ws.cell(row=row_index, column=index + 1).font = Font(name='Arial', bold=True) - else: - ws.cell(row=row_index, column=index + 1).font = Font(name='Arial') - ws.cell(row=row_index, column=index + 1).value = value - ws.cell(row=row_index, column=index + 1).border = BORDER - row_number += 1 - if row_number > 1: - # 合并单元格 - merged_index = set( - [key for key, value in enumerate(headers) if value in (OP_NAME, INPUT_SHAPE, INPUT_TYPE, DIFF)]) - for col_index in merged_index: - ws.merge_cells(start_row=start_row_index, start_column=col_index + 1, - end_row=start_row_index + row_number - 1, end_column=col_index + 1) - start_row_index = start_row_index + row_number - - wb.save(result_file_path) - wb.close() - - -if __name__ == "__main__": - main() diff --git a/profiler/compare_tools/utils/__init__.py b/profiler/compare_tools/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/profiler/compare_tools/utils/args_manager.py b/profiler/compare_tools/utils/args_manager.py new file mode 100644 index 0000000000000000000000000000000000000000..eba55d72e362123011048fbcaa5cdc6977c176be --- /dev/null +++ b/profiler/compare_tools/utils/args_manager.py @@ -0,0 +1,133 @@ +import os.path + +from utils.constant import Constant +from utils.file_reader import FileReader +from utils.profiling_parser import GPUProfilingParser, NPUProfilingParser + + +class Singleton(object): + def __init__(self, cls): + self._cls = cls + self._instance = {} + + def __call__(self): + if self._cls not in self._instance: + self._instance[self._cls] = self._cls() + return self._instance[self._cls] + + +@Singleton +class ArgsManager: + PARSER_DICT = {Constant.NPU: NPUProfilingParser, Constant.GPU: GPUProfilingParser} + + def __init__(self): + self._args = None + self._base_profiling_type = None + self._comparison_profiling_type = None + self._base_profiling = None + self._comparison_profiling = None + + @property + def base_profiling_type(self): + return self._base_profiling_type + + @property + def comparison_profiling_type(self): + return self._comparison_profiling_type + + @property + def base_profiling(self): + return self._base_profiling + + @property + def comparison_profiling(self): + return self._comparison_profiling + + @classmethod + def check_profiling_path(cls, file_path: str): + if len(file_path) > Constant.MAX_PATH_LENGTH: + msg = f"The length of file path exceeded the maximum value {Constant.MAX_PATH_LENGTH}: {file_path}" + raise RuntimeError(msg) + if not os.path.exists(file_path): + msg = f"Invalid profiling path: {file_path}" + raise RuntimeError(msg) + if os.path.islink(file_path): + msg = f"Invalid profiling path is soft link: {file_path}" + raise RuntimeError(msg) + if not os.access(file_path, os.R_OK): + msg = f"The file path has no read permission: {file_path}" + raise RuntimeError(msg) + + @classmethod + def check_output_path(cls, output_path: str): + if len(output_path) > Constant.MAX_PATH_LENGTH: + msg = f"Invalid param, the length of output_path exceeded the maximum value {Constant.MAX_PATH_LENGTH}" + raise RuntimeError(msg) + if os.path.islink(output_path): + raise RuntimeError("Invalid param, the output_path is soft link") + if not os.path.exists(output_path): + try: + os.makedirs(output_path, mode=Constant.DIR_AUTHORITY) + except Exception: + msg = f"Can't create directory: {output_path}" + raise RuntimeError(msg) + if not os.path.isdir(output_path): + msg = f"Invalid output_path: {output_path}" + raise RuntimeError(msg) + if not os.access(output_path, os.W_OK): + msg = f"The output path has no write permission: {output_path}" + raise RuntimeError(msg) + + def parse_profiling_path(self, file_path: str): + self.check_profiling_path(file_path) + if os.path.isfile(file_path): + (split_file_path, split_file_name) = os.path.split(file_path) + (shot_name, extension) = os.path.splitext(split_file_name) + if extension != ".json": + msg = f"Invalid profiling path suffix: {file_path}" + raise RuntimeError(msg) + json_type = FileReader.check_json_type(file_path) + return {Constant.PROFILING_TYPE: json_type, Constant.PROFILING_PATH: file_path, + Constant.TRACE_PATH: file_path} + ascend_output = os.path.join(file_path, "ASCEND_PROFILER_OUTPUT") + profiler_output = ascend_output if os.path.isdir(ascend_output) else file_path + json_path = os.path.join(profiler_output, "trace_view.json") + memory_path = os.path.join(profiler_output, "operator_memory.csv") + if not os.path.isfile(json_path): + msg = f"Invalid profiling path: {file_path}" + raise RuntimeError(msg) + memory_path = memory_path if os.path.isfile(memory_path) else None + return {Constant.PROFILING_TYPE: Constant.NPU, Constant.PROFILING_PATH: file_path, + Constant.TRACE_PATH: json_path, Constant.MEMORY_DATA_PATH: memory_path} + + def init(self, args: any): + self._args = args + if self._args.max_kernel_num is not None and self._args.max_kernel_num <= Constant.LIMIT_KERNEL: + msg = f"Invalid param, --max_kernel_num has to be greater than {Constant.LIMIT_KERNEL}" + raise RuntimeError(msg) + if not isinstance(self._args.op_name_map, dict): + raise RuntimeError( + "Invalid param, --op_name_map must be dict, for example: --op_name_map={'name1':'name2'}") + if self._args.gpu_flow_cat and len(self._args.gpu_flow_cat) > Constant.MAX_FLOW_CAT_LEN: + msg = f"Invalid param, --gpu_flow_cat exceeded the maximum value {Constant.MAX_FLOW_CAT_LEN}" + raise RuntimeError(msg) + + base_profiling_dict = self.parse_profiling_path(self._args.base_profiling_path) + comparison_profiling_dict = self.parse_profiling_path(self._args.comparison_profiling_path) + + if self._args.output_path: + self.check_output_path(self._args.output_path) + + Constant.BASE_PROFILING = Constant.BASE_PROFILING + self._args.base_profiling_path + self._base_profiling_type = base_profiling_dict.get(Constant.PROFILING_TYPE) + self._base_profiling = self.PARSER_DICT.get(self._base_profiling_type)(self._args, base_profiling_dict) + + if self._args.base_profiling_path == self._args.comparison_profiling_path: + Constant.COMPARISON_PROFILING = "Same To Base Profiling" + self._comparison_profiling_type = self._base_profiling_type + self._comparison_profiling = self._base_profiling + else: + Constant.COMPARISON_PROFILING = Constant.COMPARISON_PROFILING + self._args.comparison_profiling_path + self._comparison_profiling_type = comparison_profiling_dict.get(Constant.PROFILING_TYPE) + self._comparison_profiling = self.PARSER_DICT.get(self._comparison_profiling_type)(self._args, + comparison_profiling_dict) diff --git a/profiler/compare_tools/utils/compare_event.py b/profiler/compare_tools/utils/compare_event.py new file mode 100644 index 0000000000000000000000000000000000000000..1ce2d820d7fae4e6d9d779e6e99a30882b513f9c --- /dev/null +++ b/profiler/compare_tools/utils/compare_event.py @@ -0,0 +1,50 @@ +from utils.constant import Constant + + +class KernelEvent: + def __init__(self, event: dict, device_type: int): + self._event = event + self._device_type = device_type + + @property + def kernel_name(self) -> str: + return self._event.get("name", "") + + @property + def device_dur(self) -> float: + return self._event.get("dur", 0) + + @property + def task_id(self) -> int: + return self._event.get("args", {}).get("Task Id") + + @property + def task_type(self) -> str: + return self._event.get("args", {}).get("Task Type") + + @property + def compare_index(self) -> float: + return self.device_dur + + def get_record(self) -> list: + if self._device_type == Constant.GPU: + return [self.kernel_name, Constant.NA, self.device_dur] + return [self.kernel_name, f"{self.task_id}, {self.task_type}", self.device_dur] + + +class MemoryEvent: + def __init__(self, event: dict, name: str): + self._event = event + self._name = name + + @property + def compare_index(self) -> float: + return self._event.get(Constant.SIZE, 0) + + def get_record(self) -> list: + if self._event.get(Constant.RELEASE_TIME): + duration = float(self._event.get(Constant.RELEASE_TIME)) - self._event.get(Constant.ALLOCATION_TIME, 0) + else: + duration = Constant.NA + name = self._event.get(Constant.NAME, "") if self._event.get(Constant.NAME, "") else self._name + return [name, self._event.get(Constant.SIZE, 0), duration] diff --git a/profiler/compare_tools/utils/constant.py b/profiler/compare_tools/utils/constant.py new file mode 100644 index 0000000000000000000000000000000000000000..ff39744b6471315b18a6bed386a0c21baecc0f86 --- /dev/null +++ b/profiler/compare_tools/utils/constant.py @@ -0,0 +1,94 @@ +from openpyxl.styles import PatternFill, Border, Side + + +class Constant(object): + GPU = 0 + NPU = 1 + NA = 'N/A' + LIMIT_KERNEL = 3 + MAX_PATH_LENGTH = 4096 + MAX_FLOW_CAT_LEN = 20 + MAX_FILE_SIZE = 1024 * 1024 * 1024 * 5 + BYTE_TO_KB = 1024 + YELLOW_COLOR = "FFFF00" + GREEN_COLOR = "0000FF00" + RED_COLOR = "00FF0000" + SUMMARY_LINE_COLOR = "F0F8FF" + + # autority + FILE_AUTHORITY = 0o640 + DIR_AUTHORITY = 0o750 + + PROFILING_TYPE = "profiling type" + ASCEND_OUTPUT_PATH = "ascend output" + # path + PROFILING_PATH = "profiling_path" + TRACE_PATH = "trace_path" + MEMORY_DATA_PATH = "memory_data_path" + + # excel headers + BASE_PROFILING = 'Base Profiling: ' + COMPARISON_PROFILING = 'Comparison Profiling: ' + + OP_NAME = 'Operator Name' + INPUT_SHAPE = 'Input Shape' + INPUT_TYPE = 'Input Type' + + DIFF = 'DIFF: (sum(comparison)-sum(base))/sum(base)' + OP_NAME_FILTER = 'Operator Name Filter' + DIFF_FILTER = 'DIFF Filter' + + HEADERS_FILL = PatternFill("solid", fgColor='00BFFF') # 1E90FF + + BORDER = Border(top=Side(border_style="thin", color='00000000'), + left=Side(border_style="thin", color='00000000'), + right=Side(border_style="thin", color='00000000'), + bottom=Side(border_style="thin", color='00000000')) + + # kernel + KERNEL_NAME = 'Kernel Name' + DEVICE_DUR = 'Device Duration(us)' + TASK_INFO = 'Task Info' + GPU_CMP_KERNEL_HEADER = [OP_NAME, INPUT_SHAPE + " / " + KERNEL_NAME, INPUT_TYPE, DEVICE_DUR] + NPU_CMP_KERNEL_HEADER = [OP_NAME, INPUT_SHAPE + " / " + KERNEL_NAME, INPUT_TYPE + " / " + TASK_INFO, DEVICE_DUR] + + # memory + SIZE = "Size(KB)" + TS = "ts" + ALLOCATION_TIME = "Allocation Time(us)" + RELEASE_TIME = "Release Time(us)" + MEMORY_OP_NAME = 'OP Name' + NAME = "Name" + CMP_MEMORY_HEADER = [OP_NAME, INPUT_SHAPE + " / " + MEMORY_OP_NAME, INPUT_TYPE + " / " + RELEASE_TIME, SIZE] + + # compare type + OPERATOR_COMPARE = "OperatorCompare" + MEMORY_COMPARE = "MemoryCompare" + + DEFAULT_WIDTH = 25 + COLUMN_WIDTH = {OP_NAME: 45, INPUT_SHAPE + " / " + MEMORY_OP_NAME: 30, INPUT_SHAPE + " / " + KERNEL_NAME: 30} + + # communication + COMMUNICAT_OP = "Communication OP Name" + TASK_NAME = "Task Name" + CALLS = "Calls" + TOTAL_DURATION = "Total Duration(us)" + AVG_DURATION = "Avg Duration(us)" + MAX_DURATION = "Max Duration(us)" + MIN_DURATION = "Min Duration(us)" + OP_KEY = COMMUNICAT_OP + BASE_CALLS = CALLS + "_x" + BASE_SUM = TOTAL_DURATION + "_x" + BASE_AVG = AVG_DURATION + "_x" + BASE_MAX = MAX_DURATION + "_x" + BASE_MIN = MIN_DURATION + "_x" + COMPARISON_CALLS = CALLS + "_y" + COMPARISON_SUM = TOTAL_DURATION + "_y" + COMPARISON_AVG = AVG_DURATION + "_y" + COMPARISON_MAX = MAX_DURATION + "_y" + COMPARISON_MIN = MIN_DURATION + "_y" + CMP_COMMUNICATION_HEADER = [COMMUNICAT_OP, TASK_NAME, CALLS, TOTAL_DURATION, AVG_DURATION, MAX_DURATION, + MIN_DURATION] + COLUMNS = [COMMUNICAT_OP, CALLS, TOTAL_DURATION, AVG_DURATION, MAX_DURATION, MIN_DURATION] + COLUMN_WIDTH_CLL = {COMMUNICAT_OP: 25, TASK_NAME: 22, CALLS: 10, TOTAL_DURATION: 20, AVG_DURATION: 20, + MAX_DURATION: 20, MIN_DURATION: 20, DIFF: 20} diff --git a/profiler/compare_tools/utils/file_reader.py b/profiler/compare_tools/utils/file_reader.py new file mode 100644 index 0000000000000000000000000000000000000000..b536fce0f7c34e064d96baca0687e369441625b8 --- /dev/null +++ b/profiler/compare_tools/utils/file_reader.py @@ -0,0 +1,57 @@ +import csv +import json +import os + +from utils.constant import Constant + + +class FileReader: + + @classmethod + def read_trace_file(cls, file_path: str) -> any: + if not os.path.isfile(file_path): + msg = f"File not exists: {file_path}" + raise RuntimeError(msg) + file_size = os.path.getsize(file_path) + if file_size <= 0: + return [] + if file_size > Constant.MAX_FILE_SIZE: + print(f"The file size exceeds the preset value {Constant.MAX_FILE_SIZE / 1024 / 1024}MB, " + f"please check the file: {file_path}") + return [] + try: + with open(file_path, "rt") as file: + json_data = json.loads(file.read()) + except Exception: + msg = f"Can't read file: {file_path}" + raise RuntimeError(msg) + return json_data + + @classmethod + def read_csv_file(cls, file_path: str) -> any: + if not os.path.isfile(file_path): + return [] + file_size = os.path.getsize(file_path) + if file_size <= 0: + return [] + if file_size > Constant.MAX_FILE_SIZE: + print(f"[WARN] The file size exceeds the preset value {Constant.MAX_FILE_SIZE / 1024 / 1024}MB, " + f"please check the file: {file_path}") + return [] + result_data = [] + try: + with open(file_path, newline="") as csv_file: + reader = csv.DictReader(csv_file) + for row in reader: + result_data.append(row) + except Exception: + msg = f"Failed to read the file: {file_path}" + raise RuntimeError(msg) + return result_data + + @classmethod + def check_json_type(cls, file_path: str) -> str: + json_data = cls.read_trace_file(file_path) + if isinstance(json_data, dict): + return Constant.GPU + return Constant.NPU diff --git a/profiler/compare_tools/utils/name_function.py b/profiler/compare_tools/utils/name_function.py new file mode 100644 index 0000000000000000000000000000000000000000..b6a0d05f49108e90aa514f772d8872e90c7d563f --- /dev/null +++ b/profiler/compare_tools/utils/name_function.py @@ -0,0 +1,43 @@ +from utils.torch_op_node import TorchOpNode + + +class NameFunction: + def __init__(self, args: any): + self.args = args + + @classmethod + def get_name(cls, op_node: TorchOpNode) -> str: + return op_node.name + + @classmethod + def get_full_name(cls, op_node: TorchOpNode) -> str: + if isinstance(op_node.origin_input_shape, list): + data = [] + for dim in op_node.origin_input_shape: + data.append(','.join([str(x) for x in dim])) + input_shape = ';\r\n'.join(data) + return f'{op_node.name}{input_shape}' + return f'{op_node.name}{op_node.input_shape}' + + def get_name_func(self): + if not self.args.op_name_map and not self.args.use_input_shape: + name_func = self.get_name + elif self.args.op_name_map and not self.args.use_input_shape: + name_func = self.get_map_name + elif self.args.op_name_map and not self.args.use_input_shape: + name_func = self.get_full_name + else: + name_func = self.get_full_map_name + return name_func + + def get_map_name(self, op_node: TorchOpNode) -> str: + return self.args.op_name_map.get(op_node.name, op_node.name) + + def get_full_map_name(self, op_node: TorchOpNode) -> str: + if isinstance(op_node.origin_input_shape, list): + data = [] + for dim in op_node.origin_input_shape: + data.append(','.join([str(x) for x in dim])) + input_shape = ';\r\n'.join(data) + return f'{self.args.op_name_map.get(op_node.name, op_node.name)}{input_shape}' + return f'{self.args.op_name_map.get(op_node.name, op_node.name)}{op_node.input_shape}' diff --git a/profiler/compare_tools/utils/profiling_parser.py b/profiler/compare_tools/utils/profiling_parser.py new file mode 100644 index 0000000000000000000000000000000000000000..231f91f2b78c3ab68bf535947181c474bfa7fd62 --- /dev/null +++ b/profiler/compare_tools/utils/profiling_parser.py @@ -0,0 +1,298 @@ +from abc import ABCMeta, abstractmethod +from math import ceil + +from utils.compare_event import KernelEvent +from utils.constant import Constant +from utils.file_reader import FileReader + + +class ProfilingParser(metaclass=ABCMeta): + @abstractmethod + def get_torch_op_data(self): + raise NotImplementedError + + @abstractmethod + def get_kernel_dict(self): + raise NotImplementedError + + @abstractmethod + def get_memory_list(self): + raise NotImplementedError + + +class GPUProfilingParser(ProfilingParser): + def __init__(self, args: any, path_dict: dict): + self._args = args + self._profiling_path = path_dict.get(Constant.PROFILING_PATH) + self._json_path = path_dict.get(Constant.PROFILING_PATH) + self._torch_op_data = None + self._kernel_dict = None + self._memory_list = None + self._communication_data = None + self._communication_task_data = None + + @property + def file_path(self) -> str: + return self._profiling_path + + @property + def json_path(self) -> str: + return self._json_path + + @property + def torch_op_data(self) -> list: + if self._torch_op_data is None: + self.get_torch_op_data() + return self._torch_op_data + + @property + def kernel_dict(self) -> dict: + if self._kernel_dict is None: + self.get_kernel_dict() + return self._kernel_dict + + @property + def memory_list(self) -> dict: + if self._memory_list is None: + self.get_memory_list() + return self._memory_list + + @property + def communication_data(self) -> dict: + if self._communication_data is None: + self.get_communication_data() + return self._communication_data + + @property + def communication_task_data(self) -> dict: + if self._communication_task_data is None: + self.get_communication_data() + return self._communication_task_data + + def get_torch_op_data(self): + torch_op_list = [] + json_data = FileReader.read_trace_file(self._json_path) + total_events = json_data.get("traceEvents", []) + for event in total_events: + if event.get("cat") == "cpu_op": + torch_op_list.append(event) + self._torch_op_data = torch_op_list + + def get_kernel_dict(self): + flow_kernel_dict = {} + json_data = FileReader.read_trace_file(self._json_path) + total_events = json_data.get("traceEvents", []) + flow_cat = self._args.gpu_flow_cat if self._args.gpu_flow_cat else "async_gpu" + + flow_start_dict, flow_end_dict, kernel_dict = {}, {}, {} + for event in total_events: + if event.get("cat") == flow_cat and event.get("ph") == "s": + flow_start_dict[event.get("id")] = event + elif event.get("cat") == flow_cat and event.get("ph") == "f": + flow_end_dict[event.get("id")] = event + elif event.get("cat", "").capitalize() == "Kernel".capitalize(): + kernel_dict["{}-{}-{}".format(event.get("pid"), event.get("tid"), event.get("ts"))] = event + + for flow_id, start_flow in flow_start_dict.items(): + end_flow = flow_end_dict.get(flow_id) + if end_flow is None: + continue + kernel_event = kernel_dict.get( + "{}-{}-{}".format(end_flow.get("pid"), end_flow.get("tid"), end_flow.get("ts"))) + if kernel_event is None: + continue + flow_kernel_dict.setdefault(start_flow.get("ts"), []).append(KernelEvent(kernel_event, Constant.GPU)) + self._kernel_dict = flow_kernel_dict + + def get_memory_list(self): + self._memory_list = [] + memory_events = [] + json_data = FileReader.read_trace_file(self._json_path) + total_events = json_data.get("traceEvents", []) + for event in total_events: + if event.get("name", "") == "[memory]": + memory_events.append(event) + memory_events.sort(key=lambda x: x.get("ts", 0)) + addr_dict = {} + for memory_event in memory_events: + args = memory_event.get("args", {}) + if args.get("Device Type", -1) != 1: + continue + allocate_bytes = args.get("Bytes", 0) / Constant.BYTE_TO_KB + record = addr_dict.get(args.get("Addr")) + if allocate_bytes > 0: + if record: + self._memory_list.append(record) + addr_dict[args.get("Addr")] = {Constant.SIZE: allocate_bytes, + Constant.TS: memory_event.get("ts", 0), + Constant.ALLOCATION_TIME: memory_event.get("ts", 0)} + if allocate_bytes < 0 and record: + if abs(allocate_bytes) == record.get(Constant.SIZE): + record[Constant.RELEASE_TIME] = memory_event.get("ts", 0) + self._memory_list.append(record) + del addr_dict[args.get("Addr")] + + def get_communication_data(self): + self._communication_data, self._communication_task_data = [], {} + json_data = FileReader.read_trace_file(self._json_path) + total_events = json_data.get("traceEvents", []) + for data in total_events: + if data.get("cat", "") == "Kernel" and data.get("name", "").split("_")[0] == "ncclKernel": + self._communication_data.append(data) + + +class NPUProfilingParser(ProfilingParser): + def __init__(self, args: any, path_dict: str): + self._args = args + self._profiling_path = path_dict.get(Constant.PROFILING_PATH) + self._json_path = path_dict.get(Constant.TRACE_PATH) + self._memory_data_path = path_dict.get(Constant.MEMORY_DATA_PATH) + self._torch_op_data = None + self._kernel_dict = None + self._memory_list = None + self._communication_data = None + self._communication_task_data = None + + @property + def file_path(self) -> str: + return self._profiling_path + + @property + def json_path(self) -> str: + return self._json_path + + @property + def torch_op_data(self) -> list: + if self._torch_op_data is None: + self.get_torch_op_data() + return self._torch_op_data + + @property + def kernel_dict(self) -> dict: + if self._kernel_dict is None: + self.get_kernel_dict() + return self._kernel_dict + + @property + def memory_list(self) -> dict: + if self._memory_list is None: + self.get_memory_list() + return self._memory_list + + @property + def communication_data(self) -> dict: + if self._communication_data is None: + self.get_communication_data() + return self._communication_data + + @property + def communication_task_data(self) -> dict: + if self._communication_task_data is None: + self.get_communication_data() + return self._communication_task_data + + def get_torch_op_data(self): + torch_op_list = [] + json_data = FileReader.read_trace_file(self._json_path) + for event in json_data: + if event.get("cat") == "cpu_op": + torch_op_list.append(event) + self._torch_op_data = torch_op_list + + def get_kernel_dict(self): + flow_kernel_dict = {} + json_data = FileReader.read_trace_file(self._json_path) + flow_cat = "async_npu" + + flow_start_dict, flow_end_dict, kernel_dict = {}, {}, {} + for event in json_data: + if event.get("cat") == flow_cat and event.get("ph") == "s": + flow_start_dict[event.get("id")] = event + elif event.get("cat") == flow_cat and event.get("ph") == "f": + flow_end_dict[event.get("id")] = event + elif event.get("ph") == "X" and event.get("cat") != 'cpu_op': + kernel_dict["{}-{}-{}".format(event.get("pid"), event.get("tid"), event.get("ts"))] = event + + for flow_id, start_flow in flow_start_dict.items(): + end_flow = flow_end_dict.get(flow_id) + if end_flow is None: + continue + kernel_event = kernel_dict.get( + "{}-{}-{}".format(end_flow.get("pid"), end_flow.get("tid"), end_flow.get("ts"))) + if kernel_event is None: + continue + flow_kernel_dict.setdefault(start_flow.get("ts"), []).append(KernelEvent(kernel_event, Constant.NPU)) + self._kernel_dict = flow_kernel_dict + + def get_memory_list(self): + self._memory_list = [] + enqueue_dict, dequeue_data = {}, [] + json_data = FileReader.read_trace_file(self._json_path) + for data in json_data: + if data.get("cat", "enqueue"): + enqueue_dict[data.get("args", {}).get("correlation_id", "")] = data + elif data.get("cat", "dequeue"): + dequeue_data.append(data) + + if not self._memory_data_path: + return + memory_data = FileReader.read_csv_file(self._memory_data_path) + for data in memory_data: + if "cann::" in data.get("Name", ""): + ts_time = float(data.get(Constant.ALLOCATION_TIME, 0)) + match_dequeue_data = self._match_cann_memory_data(dequeue_data, ts_time) + if match_dequeue_data is not None: + correlation_id = match_dequeue_data.get("args", {}).get("correlation_id", "") + ts = enqueue_dict[correlation_id].get("ts", 0) + self._memory_list.append({Constant.SIZE: float(data.get(Constant.SIZE, 0)), Constant.TS: ts, + Constant.NAME: data.get(Constant.NAME, ""), + Constant.ALLOCATION_TIME: float(data.get(Constant.ALLOCATION_TIME, 0)), + Constant.RELEASE_TIME: data.get(Constant.RELEASE_TIME, 0)}) + self._memory_list.append({Constant.SIZE: float(data.get(Constant.SIZE, 0)), + Constant.TS: float(data.get(Constant.ALLOCATION_TIME, 0)), + Constant.ALLOCATION_TIME: float(data.get(Constant.ALLOCATION_TIME, 0)), + Constant.RELEASE_TIME: data.get(Constant.RELEASE_TIME, 0)}) + + @classmethod + def _match_cann_memory_data(cls, dequeue_data: list, ts_time: float): + if not dequeue_data: + return None + right = len(dequeue_data) - 1 + left = 0 + while right > left: + mid = left + ceil((right - left) / 2) + if ts_time >= dequeue_data[mid].get("ts", 0): + left = mid + else: + right = mid - 1 + end_time = dequeue_data[left].get("ts", 0) + dequeue_data[left].get("dur", 0) + return dequeue_data[left] if end_time > ts_time else None + + def get_communication_data(self): + self._communication_data, self._communication_task_data = [], {} + pid, tid = None, None + json_data = FileReader.read_trace_file(self._json_path) + for data in json_data: + if data.get("ph", "") == "M" and data.get("name", "") == "thread_name" \ + and data.get("args", {}).get("name", "") == "Communication OP": + pid = data.get("pid", "") + tid = data.get("tid", "") + if not pid or not tid: + return + for data in json_data: + if data.get("ph", "") == "X" and data.get("pid", "") == pid and data.get("tid", "") == tid: + self._communication_data.append(data) + if not self._communication_data: + return + for data in json_data: + if data.get("ph", "") != "X" or data.get("pid", "") != pid or data.get("tid", "") == tid: + continue + ts = data.get("ts", 0) + for communication_op in self._communication_data: + if ts < communication_op.get("ts", 0) or ts - communication_op.get("ts", 0) > communication_op.get( + "dur", 0): + continue + name_list = communication_op.get("name", "").split("_") + if len(name_list) >= 2: + self._communication_task_data.setdefault(name_list[1].lower(), []).append(data) + break diff --git a/profiler/compare_tools/utils/torch_op_node.py b/profiler/compare_tools/utils/torch_op_node.py new file mode 100644 index 0000000000000000000000000000000000000000..8995dafc1e48d71226903912b243c294e192784a --- /dev/null +++ b/profiler/compare_tools/utils/torch_op_node.py @@ -0,0 +1,93 @@ +from math import ceil + +from utils.compare_event import MemoryEvent +from utils.constant import Constant + + +class TorchOpNode: + def __init__(self, event=None, parent_node=None): + self._event = event + self._parent_node = parent_node + self._child_nodes = [] + self._kernel_list = [] + self._kernel_num = 0 + self._memory_allocated_list = [] + + @property + def start_time(self): + return self._event.get("ts", 0) + + @property + def end_time(self): + return self._event.get("ts", 0) + self._event.get("dur", 0) + + @property + def name(self): + return str(self._event.get("name", Constant.NA)) + + @property + def input_shape(self): + return str(self._event.get("args", {}).get("Input Dims", Constant.NA)) + + @property + def origin_input_shape(self): + return self._event.get("args", {}).get("Input Dims", Constant.NA) + + @property + def input_type(self): + return str(self._event.get("args", {}).get("Input type", Constant.NA)) + + @property + def call_stack(self): + return str(self._event.get("args", {}).get("Call stack", Constant.NA)) + + @property + def parent(self): + return self._parent_node + + @property + def child_nodes(self): + return self._child_nodes + + @property + def kernel_list(self): + return self._kernel_list + + @property + def kernel_num(self): + return self._kernel_num + + @property + def memory_allocated(self): + return self._memory_allocated_list + + def add_child_node(self, child_node): + self._child_nodes.append(child_node) + + def set_kernel_list(self, kernel_list: list): + self._kernel_list = kernel_list + + def add_kernel_num(self, kernel_num: int): + self._kernel_num += kernel_num + + def set_memory_allocated(self, memory_allocated: dict): + self._memory_allocated_list.append(MemoryEvent(memory_allocated, self.name)) + + def is_step_profiler(self) -> bool: + return self.name.find("ProfilerStep#") != -1 + + def get_op_info(self) -> list: + return [self.name, self.input_shape, self.input_type, self.call_stack] + + def match_child_node(self, ts_time: float) -> any: + if not self._child_nodes: + return None + right = len(self._child_nodes) - 1 + left = 0 + while right > left: + mid = left + ceil((right - left) / 2) + if ts_time >= self._child_nodes[mid].start_time: + left = mid + else: + right = mid - 1 + return self._child_nodes[left] if self._child_nodes[left].end_time > ts_time else None diff --git a/profiler/compare_tools/utils/tree_builder.py b/profiler/compare_tools/utils/tree_builder.py new file mode 100644 index 0000000000000000000000000000000000000000..6d765eb0d70b2c58e14c52ff66c09598d4ef2061 --- /dev/null +++ b/profiler/compare_tools/utils/tree_builder.py @@ -0,0 +1,89 @@ +from queue import Queue + +from utils.constant import Constant +from utils.torch_op_node import TorchOpNode + + +class TreeBuilder: + @classmethod + def build_tree(cls, event_list: list) -> TorchOpNode: + root_node = TorchOpNode() + event_list.sort(key=lambda x: x.get("ts", 0)) + last_node = root_node + for event in event_list: + while last_node: + if last_node == root_node or event.get("ts", 0) < last_node.end_time: + tree_node = TorchOpNode(event, last_node) + last_node.add_child_node(tree_node) + last_node = tree_node + break + last_node = last_node.parent + return root_node + + @classmethod + def update_tree_node(cls, root_node: TorchOpNode, flow_kernel_dict: dict = {}, memory_allocated_list: list = []): + if flow_kernel_dict: + for ts, kernel_list in flow_kernel_dict.items(): + matched_child_node = root_node.match_child_node(ts) + if not matched_child_node: + return + kernel_num = len(kernel_list) + node_queue = Queue() + node_queue.put(matched_child_node) + while not node_queue.empty(): + tree_node = node_queue.get() + tree_node.add_kernel_num(kernel_num) + matched_child_node = tree_node.match_child_node(ts) + if matched_child_node: + node_queue.put(matched_child_node) + else: + tree_node.set_kernel_list(kernel_list) + for memory_allocated in memory_allocated_list: + ts = memory_allocated.get(Constant.TS) + matched_child_node = root_node.match_child_node(ts) + if not matched_child_node: + continue + node_queue = Queue() + node_queue.put(matched_child_node) + while not node_queue.empty(): + tree_node = node_queue.get() + matched_child_node = tree_node.match_child_node(ts) + if matched_child_node: + node_queue.put(matched_child_node) + else: + tree_node.set_memory_allocated(memory_allocated) + + @classmethod + def get_total_compare_event(cls, root_node: TorchOpNode, compare_type: str) -> list: + if compare_type == Constant.MEMORY_COMPARE: + return cls._get_total_memory(root_node) + elif compare_type == Constant.OPERATOR_COMPARE: + return cls._get_total_kernels(root_node) + + @classmethod + def _get_total_kernels(cls, root_node: TorchOpNode) -> list: + result_list = [] + result_list.extend(root_node.kernel_list) + node_queue = Queue() + for child_node in root_node.child_nodes: + node_queue.put(child_node) + while not node_queue.empty(): + tree_node = node_queue.get() + result_list.extend(tree_node.kernel_list) + for child_node in tree_node.child_nodes: + node_queue.put(child_node) + return result_list + + @classmethod + def _get_total_memory(cls, root_node: TorchOpNode) -> list: + result_list = [] + result_list.extend(root_node.memory_allocated) + node_queue = Queue() + for child_node in root_node.child_nodes: + node_queue.put(child_node) + while not node_queue.empty(): + tree_node = node_queue.get() + result_list.extend(tree_node.memory_allocated) + for child_node in tree_node.child_nodes: + node_queue.put(child_node) + return result_list