diff --git a/profiler/compare_tools/comparator/base_comparator.py b/profiler/compare_tools/comparator/base_comparator.py new file mode 100644 index 0000000000000000000000000000000000000000..330fb871ee19b9bac1c0dfff4cae5648ebeedf1c --- /dev/null +++ b/profiler/compare_tools/comparator/base_comparator.py @@ -0,0 +1,24 @@ +from abc import ABC, abstractmethod + + +class BaseComparator(ABC): + def __init__(self, origin_data: any, bean: any): + self._sheet_name = bean.TABLE_NAME + self._headers = bean.HEADERS + self._overhead = bean.OVERHEAD + self._origin_data = origin_data + self._bean = bean + self._rows = [] + + def generate_data(self) -> dict: + ''' + generate one sheet(table) data + type: dict + sheet name as the dict key + ''' + self._compare() + return {self._sheet_name: {"headers": self._headers, "rows": self._rows, "overhead": self._overhead}} + + @abstractmethod + def _compare(self): + raise NotImplementedError("Function _compare need to be implemented.") diff --git a/profiler/compare_tools/comparator/communication_comparator.py b/profiler/compare_tools/comparator/communication_comparator.py new file mode 100644 index 0000000000000000000000000000000000000000..72ed8576cdd340fd926f577eb630825ead4490a7 --- /dev/null +++ b/profiler/compare_tools/comparator/communication_comparator.py @@ -0,0 +1,20 @@ +from comparator.base_comparator import BaseComparator +from compare_bean.communication_bean import CommunicationBean +from utils.constant import Constant +from utils.common_func import update_order_id + + +class CommunicationComparator(BaseComparator): + def __init__(self, origin_data: dict, bean: any): + super().__init__(origin_data, bean) + + def _compare(self): + base_data = self._origin_data.get(Constant.BASE_DATA, {}) + comparison_data = self._origin_data.get(Constant.COMPARISON_DATA, {}) + for comm_name, comm_data in base_data.items(): + comparison_comm_data = comparison_data.pop(comm_name, {}) + self._rows.extend(CommunicationBean(comm_name, comm_data, comparison_comm_data).rows) + for comm_name, comm_data in comparison_data.items(): + self._rows.extend(CommunicationBean(comm_name, {}, comm_data).rows) + update_order_id(self._rows) + diff --git a/profiler/compare_tools/comparator/index_comparator.py b/profiler/compare_tools/comparator/index_comparator.py deleted file mode 100644 index 91b050548def5fe1ec34353621c46462dbf748de..0000000000000000000000000000000000000000 --- a/profiler/compare_tools/comparator/index_comparator.py +++ /dev/null @@ -1,44 +0,0 @@ -from utils.args_manager import ArgsManager - - -class IndexComparator: - def __init__(self, args: any): - self._args = args - self._args_manager = ArgsManager() - self._base_profiling = self._args_manager.base_profiling - self._comparison_profiling = self._args_manager.comparison_profiling - - def compare(self) -> list: - base_data_dict, comparison_data_dict = {}, {} - if not self._base_profiling.communication_data: - print(f"[WARNING] Can't find any communication op in the file: {self._base_profiling.json_path}") - for data in self._base_profiling.communication_data: - name_list = data.get("name", "").split("_") - if len(name_list) >= 2: - base_data_dict.setdefault(name_list[1].lower(), []).append(float(data.get("dur", 0))) - if self._args.base_profiling_path != self._args.comparison_profiling_path: - if not self._comparison_profiling.communication_data: - print(f"[WARNING] Can't find any communication op in the file: {self._comparison_profiling.json_path}") - for data in self._comparison_profiling.communication_data: - name_list = data.get("name", "").split("_") - if len(name_list) >= 2: - comparison_data_dict.setdefault(name_list[1].lower(), []).append(float(data.get("dur", 0))) - result_data = [] - for name, base_dur_list in base_data_dict.items(): - base_row = [name, None, len(base_dur_list), sum(base_dur_list), sum(base_dur_list) / len(base_dur_list), - max(base_dur_list), min(base_dur_list)] - if self._args.base_profiling_path == self._args.comparison_profiling_path: - result_data.append(base_row + [None] * 7) - continue - com_dur_list = comparison_data_dict.pop(name, None) - if not com_dur_list: - com_row = [None, None, None, 0, None, None, None] - else: - com_row = [name, None, len(com_dur_list), sum(com_dur_list), sum(com_dur_list) / len(com_dur_list), - max(com_dur_list), min(com_dur_list)] - result_data.append(base_row + com_row) - for name, com_dur_list in comparison_data_dict.items(): - com_row = [name, None, len(com_dur_list), sum(com_dur_list), sum(com_dur_list) / len(com_dur_list), - max(com_dur_list), min(com_dur_list)] - result_data.append([None, None, None, 0, None, None, None] + com_row) - return result_data diff --git a/profiler/compare_tools/comparator/operator_comparator.py b/profiler/compare_tools/comparator/operator_comparator.py new file mode 100644 index 0000000000000000000000000000000000000000..d7b22af577571b82cbc71b10005b4fe85b034f5b --- /dev/null +++ b/profiler/compare_tools/comparator/operator_comparator.py @@ -0,0 +1,13 @@ +from comparator.base_comparator import BaseComparator + + +class OperatorComparator(BaseComparator): + def __init__(self, origin_data: any, bean: any): + super().__init__(origin_data, bean) + + def _compare(self): + if not self._origin_data: + return + self._rows = [None] * (len(self._origin_data)) + for index, (base_op, comparison_op) in enumerate(self._origin_data): + self._rows[index] = self._bean(index, base_op, comparison_op).row diff --git a/profiler/compare_tools/comparator/operator_statistic_comparator.py b/profiler/compare_tools/comparator/operator_statistic_comparator.py new file mode 100644 index 0000000000000000000000000000000000000000..8ae1285abf32969a7312c376d6862822f387df85 --- /dev/null +++ b/profiler/compare_tools/comparator/operator_statistic_comparator.py @@ -0,0 +1,28 @@ +from comparator.base_comparator import BaseComparator +from utils.common_func import update_order_id + + +class OperatorStatisticComparator(BaseComparator): + def __init__(self, origin_data: list, bean: any): + super().__init__(origin_data, bean) + + def _compare(self): + if not self._origin_data: + return + base_op_dict, comparison_op_dict = self._group_by_op_name() + for op_name, base_data in base_op_dict.items(): + comparison_data = comparison_op_dict.pop(op_name, []) + self._rows.append(self._bean(op_name, base_data, comparison_data).row) + for op_name, comparison_data in comparison_op_dict.items(): + self._rows.append(self._bean(op_name, [], comparison_data).row) + self._rows.sort(key=lambda x: x[-2], reverse=True) + update_order_id(self._rows) + + def _group_by_op_name(self): + base_op_dict, comparison_op_dict = {}, {} + for base_op, comparison_op in self._origin_data: + if base_op: + base_op_dict.setdefault(base_op.name, []).append(base_op) + if comparison_op: + comparison_op_dict.setdefault(comparison_op.name, []).append(comparison_op) + return base_op_dict, comparison_op_dict diff --git a/profiler/compare_tools/comparator/overall_performance_comparator.py b/profiler/compare_tools/comparator/overall_performance_comparator.py new file mode 100644 index 0000000000000000000000000000000000000000..161f574ba53e91e3efe0e33d27f363744ee559a1 --- /dev/null +++ b/profiler/compare_tools/comparator/overall_performance_comparator.py @@ -0,0 +1,52 @@ +from comparator.base_comparator import BaseComparator +from utils.constant import Constant + + +class OverallPerformanceComparator(BaseComparator): + def __init__(self, origin_data: dict, bean: any): + super().__init__(origin_data, bean) + + def _compare(self): + base_profiling_info = self._origin_data.get(Constant.BASE_DATA) + comp_profiling_info = self._origin_data.get(Constant.COMPARISON_DATA) + self._headers = [''] + base_col = [f'{base_profiling_info.profiling_type}'] + comp_col = [f'{comp_profiling_info.profiling_type}'] + if not base_profiling_info.hide_op_details and not comp_profiling_info.hide_op_details: + self._headers.extend(['Cube Time(Num)', 'Vector Time(Num)']) + base_col.extend([f'{base_profiling_info.cube_time:.3f}s({base_profiling_info.cube_num})', + f'{base_profiling_info.vec_time:.3f}s({base_profiling_info.vec_num})']) + comp_col.extend([f'{comp_profiling_info.cube_time:.3f}s({comp_profiling_info.cube_num})', + f'{comp_profiling_info.vec_time:.3f}s({comp_profiling_info.vec_num})']) + if base_profiling_info.other_time or comp_profiling_info.other_time: + self._headers.append('Other Time') + base_col.append(f'{base_profiling_info.other_time:.3f}s') + comp_col.append(f'{comp_profiling_info.other_time:.3f}s') + if base_profiling_info.fa_time_fwd or comp_profiling_info.fa_time_fwd: + self._headers.append('Flash Attention Time(Forward)(Num)') + base_col.append(f'{base_profiling_info.fa_time_fwd:.3f}s({base_profiling_info.fa_num_fwd})') + comp_col.append(f'{comp_profiling_info.fa_time_fwd:.3f}s({comp_profiling_info.fa_num_fwd})') + if base_profiling_info.fa_time_bwd or comp_profiling_info.fa_time_bwd: + self._headers.append('Flash Attention Time(Backward)(Num)') + base_col.append(f'{base_profiling_info.fa_time_bwd:.3f}s({base_profiling_info.fa_num_bwd})') + comp_col.append(f'{comp_profiling_info.fa_time_bwd:.3f}s({comp_profiling_info.fa_num_bwd})') + self._headers.extend(['Computing Time']) + base_col.extend([f'{base_profiling_info.compute_time:.3f}s']) + comp_col.extend([f'{comp_profiling_info.compute_time:.3f}s']) + if base_profiling_info.memory_used or comp_profiling_info.memory_used: + self._headers.append('Mem Usage') + base_col.append(f'{base_profiling_info.memory_used:.2f}G') + comp_col.append(f'{comp_profiling_info.memory_used:.2f}G') + self._headers.extend(['Uncovered Communication Time']) + base_col.extend([f'{base_profiling_info.communication_not_overlapped: .3f}s']) + comp_col.extend([f'{comp_profiling_info.communication_not_overlapped: .3f}s']) + if base_profiling_info.sdma_time or comp_profiling_info.sdma_time: + self._headers.append('SDMA Time(Num)') + base_col.append(f'{base_profiling_info.sdma_time:.3f}s({base_profiling_info.sdma_num})') + comp_col.append(f'{comp_profiling_info.sdma_time:.3f}s({comp_profiling_info.sdma_num})') + cue = '(Not minimal profiling)' if base_profiling_info.is_not_minimal_profiling() or \ + comp_profiling_info.is_not_minimal_profiling() else '' + self._headers.extend(['Free Time', 'E2E Time' + cue]) + base_col.extend([f'{base_profiling_info.scheduling_time:.3f}s', f'{base_profiling_info.e2e_time:.3f}s']) + comp_col.extend([f'{comp_profiling_info.scheduling_time:.3f}s', f'{comp_profiling_info.e2e_time:.3f}s']) + self._rows = [base_col, comp_col] diff --git a/profiler/compare_tools/compare_bean/communication_bean.py b/profiler/compare_tools/compare_bean/communication_bean.py new file mode 100644 index 0000000000000000000000000000000000000000..0af0a7fa9d277f99562b663969e9669b4a68024c --- /dev/null +++ b/profiler/compare_tools/compare_bean/communication_bean.py @@ -0,0 +1,72 @@ +from utils.constant import Constant +from utils.excel_config import ExcelConfig +from utils.common_func import calculate_diff_ratio + + +class CommunicationInfo: + + def __init__(self, name: str, data_list: list, is_task: bool): + self.comm_op_name = None + self.task_name = None + self.calls = None + self.total_duration = 0 + self.avg_duration = None + self.max_duration = None + self.min_duration = None + if data_list: + self.comm_op_name = "|" if is_task else name + self.task_name = name if is_task else None + self.calls = len(data_list) + self.total_duration = sum(data_list) + self.avg_duration = sum(data_list) / len(data_list) + self.max_duration = max(data_list) + self.min_duration = min(data_list) + + +class CommunicationBean: + TABLE_NAME = Constant.COMMUNICATION_TABLE + HEADERS = ExcelConfig.HEADERS.get(TABLE_NAME) + OVERHEAD = ExcelConfig.OVERHEAD.get(TABLE_NAME) + + def __init__(self, name: str, base_comm_data: dict, comparison_comm_data: dict): + self._name = name + self._base_comm = base_comm_data + self._comparison_comm = comparison_comm_data + + @property + def rows(self): + rows = [] + base_comm = CommunicationInfo(self._name, self._base_comm.get("comm_list", []), is_task=False) + comparison_comm = CommunicationInfo(self._name, self._comparison_comm.get("comm_list", []), is_task=False) + rows.append(self._get_row(base_comm, comparison_comm, is_task=False)) + + base_task = self._base_comm.get("comm_task", {}) + comparison_task = self._comparison_comm.get("comm_task", {}) + if not base_task and not comparison_task: + return rows + + for task_name, task_list in base_task.items(): + base_task_info = CommunicationInfo(task_name, task_list, is_task=True) + comparison_task_info = CommunicationInfo("", [], is_task=True) + for _task_name, _task_list in comparison_task.items(): + comparison_task_info = CommunicationInfo(_task_name, _task_list, is_task=True) + comparison_task.pop(_task_name, None) + break + rows.append(self._get_row(base_task_info, comparison_task_info, is_task=True)) + for task_name, task_list in comparison_task.items(): + base_task_info = CommunicationInfo("", [], is_task=True) + comparison_task_info = CommunicationInfo(task_name, task_list, is_task=True) + rows.append(self._get_row(base_task_info, comparison_task_info, is_task=True)) + + return rows + + @classmethod + def _get_row(cls, base_info: CommunicationInfo, comparison_info: CommunicationInfo, is_task: bool) -> list: + row = [None, base_info.comm_op_name, base_info.task_name, base_info.calls, base_info.total_duration, + base_info.avg_duration, base_info.max_duration, base_info.min_duration, comparison_info.comm_op_name, + comparison_info.task_name, comparison_info.calls, comparison_info.total_duration, + comparison_info.avg_duration, comparison_info.max_duration, comparison_info.min_duration] + diff_fields = [None, None] if is_task else calculate_diff_ratio(base_info.total_duration, + comparison_info.total_duration) + row.extend(diff_fields) + return row diff --git a/profiler/compare_tools/compare_bean/memory_compare_bean.py b/profiler/compare_tools/compare_bean/memory_compare_bean.py new file mode 100644 index 0000000000000000000000000000000000000000..22af09b531e7c9e33f423800abff2f375832b5fc --- /dev/null +++ b/profiler/compare_tools/compare_bean/memory_compare_bean.py @@ -0,0 +1,47 @@ +from utils.common_func import calculate_diff_ratio +from utils.constant import Constant +from utils.excel_config import ExcelConfig +from utils.torch_op_node import TorchOpNode +from utils.tree_builder import TreeBuilder + + +class MemoryCompareBean: + TABLE_NAME = Constant.MEMORY_TABLE + HEADERS = ExcelConfig.HEADERS.get(TABLE_NAME) + OVERHEAD = ExcelConfig.OVERHEAD.get(TABLE_NAME) + + def __init__(self, index: int, base_op: TorchOpNode, comparison_op: TorchOpNode): + self._index = index + self._base_op = MemoryInfo(base_op) + self._comparison_op = MemoryInfo(comparison_op) + + @property + def row(self): + row = [self._index + 1, self._base_op.operator_name, self._base_op.input_shape, self._base_op.input_type, + self._base_op.memory_details, self._base_op.size, self._comparison_op.operator_name, + self._comparison_op.input_shape, self._comparison_op.input_type, self._comparison_op.memory_details, + self._comparison_op.size] + diff_fields = calculate_diff_ratio(self._base_op.size, self._comparison_op.size) + row.extend(diff_fields) + return row + + +class MemoryInfo: + def __init__(self, torch_op: TorchOpNode): + self.operator_name = None + self.input_shape = None + self.input_type = None + self.size = 0 + self.memory_details = "" + self._memory_list = [] + if torch_op: + self.operator_name = torch_op.name + self.input_shape = torch_op.input_shape + self.input_type = torch_op.input_type + self._memory_list = TreeBuilder.get_total_memory(torch_op) + self._update_memory_fields() + + def _update_memory_fields(self): + for memory in self._memory_list: + self.size += memory.size + self.memory_details += memory.memory_details diff --git a/profiler/compare_tools/compare_bean/memory_statistic_bean.py b/profiler/compare_tools/compare_bean/memory_statistic_bean.py new file mode 100644 index 0000000000000000000000000000000000000000..827f095704e68ad7f7f58248f67fe512c2cb5a6c --- /dev/null +++ b/profiler/compare_tools/compare_bean/memory_statistic_bean.py @@ -0,0 +1,38 @@ +from utils.common_func import calculate_diff_ratio +from utils.constant import Constant +from utils.tree_builder import TreeBuilder +from utils.excel_config import ExcelConfig + + +class MemoryStatisticBean: + TABLE_NAME = Constant.MEMORY_TOP_TABLE + HEADERS = ExcelConfig.HEADERS.get(TABLE_NAME) + OVERHEAD = ExcelConfig.OVERHEAD.get(TABLE_NAME) + + def __init__(self, name: str, base_data: list, comparison_data: list): + self._name = name + self._base_info = MemoryStatisticInfo(base_data) + self._comparison_info = MemoryStatisticInfo(comparison_data) + + @property + def row(self): + row = [None, self._name, self._base_info.duration_ms, self._base_info.size_mb, self._base_info.number, + self._comparison_info.duration_ms, self._comparison_info.size_mb, self._comparison_info.number] + diff_fields = calculate_diff_ratio(self._base_info.size_mb, self._comparison_info.size_mb) + row.extend(diff_fields) + return row + + +class MemoryStatisticInfo: + def __init__(self, data_list: list): + self._data_list = data_list + self.duration_ms = 0 + self.size_mb = 0 + self.number = len(data_list) + self._get_info() + + def _get_info(self): + for op_data in self._data_list: + memory_list = TreeBuilder.get_total_memory(op_data) + self.duration_ms += sum([memory.duration / Constant.US_TO_MS for memory in memory_list]) + self.size_mb += sum([memory.size / Constant.KB_TO_MB for memory in memory_list]) diff --git a/profiler/compare_tools/compare_bean/operator_compare_bean.py b/profiler/compare_tools/compare_bean/operator_compare_bean.py new file mode 100644 index 0000000000000000000000000000000000000000..ee0c71c383203dfc71f429315c7d6565613edd64 --- /dev/null +++ b/profiler/compare_tools/compare_bean/operator_compare_bean.py @@ -0,0 +1,47 @@ +from utils.common_func import calculate_diff_ratio +from utils.constant import Constant +from utils.excel_config import ExcelConfig +from utils.torch_op_node import TorchOpNode +from utils.tree_builder import TreeBuilder + + +class OperatorCompareBean: + TABLE_NAME = Constant.OPERATOR_TABLE + HEADERS = ExcelConfig.HEADERS.get(TABLE_NAME) + OVERHEAD = ExcelConfig.OVERHEAD.get(TABLE_NAME) + + def __init__(self, index: int, base_op: TorchOpNode, comparison_op: TorchOpNode): + self._index = index + self._base_op = OperatorInfo(base_op) + self._comparison_op = OperatorInfo(comparison_op) + + @property + def row(self): + row = [self._index + 1, self._base_op.operator_name, self._base_op.input_shape, self._base_op.input_type, + self._base_op.kernel_details, self._base_op.device_dur, self._comparison_op.operator_name, + self._comparison_op.input_shape, self._comparison_op.input_type, self._comparison_op.kernel_details, + self._comparison_op.device_dur] + diff_fields = calculate_diff_ratio(self._base_op.device_dur, self._comparison_op.device_dur) + row.extend(diff_fields) + return row + + +class OperatorInfo: + def __init__(self, torch_op: TorchOpNode): + self.operator_name = None + self.input_shape = None + self.input_type = None + self.device_dur = 0 + self.kernel_details = "" + self._kernel_list = [] + if torch_op: + self.operator_name = torch_op.name + self.input_shape = torch_op.input_shape + self.input_type = torch_op.input_type + self._kernel_list = TreeBuilder.get_total_kernels(torch_op) + self._update_kernel_fields() + + def _update_kernel_fields(self): + for kernel in self._kernel_list: + self.device_dur += kernel.device_dur + self.kernel_details += kernel.kernel_details diff --git a/profiler/compare_tools/compare_bean/operator_statistic_bean.py b/profiler/compare_tools/compare_bean/operator_statistic_bean.py new file mode 100644 index 0000000000000000000000000000000000000000..6aab6ecfe96e11b996dd1dedd73cce5d73069320 --- /dev/null +++ b/profiler/compare_tools/compare_bean/operator_statistic_bean.py @@ -0,0 +1,36 @@ +from utils.common_func import calculate_diff_ratio +from utils.constant import Constant +from utils.excel_config import ExcelConfig +from utils.tree_builder import TreeBuilder + + +class OperatorStatisticBean: + TABLE_NAME = Constant.OPERATOR_TOP_TABLE + HEADERS = ExcelConfig.HEADERS.get(TABLE_NAME) + OVERHEAD = ExcelConfig.OVERHEAD.get(TABLE_NAME) + + def __init__(self, name: str, base_data: list, comparison_data: list): + self._name = name + self._base_info = OperatorStatisticInfo(base_data) + self._comparison_info = OperatorStatisticInfo(comparison_data) + + @property + def row(self): + row = [None, self._name, self._base_info.device_dur_ms, self._base_info.number, + self._comparison_info.device_dur_ms, self._comparison_info.number] + diff_fields = calculate_diff_ratio(self._base_info.device_dur_ms, self._comparison_info.device_dur_ms) + row.extend(diff_fields) + return row + + +class OperatorStatisticInfo: + def __init__(self, data_list: list): + self._data_list = data_list + self.device_dur_ms = 0 + self.number = len(data_list) + self._get_info() + + def _get_info(self): + for op_data in self._data_list: + kernel_list = TreeBuilder.get_total_kernels(op_data) + self.device_dur_ms += sum([kernel.device_dur / Constant.US_TO_MS for kernel in kernel_list]) diff --git a/profiler/compare_tools/compare_bean/profiling_info.py b/profiler/compare_tools/compare_bean/profiling_info.py index f3143f5ca9a6f539c87d652e72b9058464117275..f7711261d12c86ab16ccad01cf5b7ed66f272227 100644 --- a/profiler/compare_tools/compare_bean/profiling_info.py +++ b/profiler/compare_tools/compare_bean/profiling_info.py @@ -86,3 +86,6 @@ class ProfilingInfo: def set_memory_used(self, memory: float): self.memory_used = memory + + def is_not_minimal_profiling(self) -> bool: + return self.profiling_type == Constant.NPU and not self.minimal_profiling diff --git a/profiler/compare_tools/generation/base_generator.py b/profiler/compare_tools/generation/base_generator.py deleted file mode 100644 index e65bf337380518497d6a50d75bd32fe58acf5e07..0000000000000000000000000000000000000000 --- a/profiler/compare_tools/generation/base_generator.py +++ /dev/null @@ -1,11 +0,0 @@ -from abc import ABC, abstractmethod - - -class BaseGenerator(ABC): - def __init__(self, sheet_name: str, data: any): - self.sheet_name = sheet_name - self.data = data - - @abstractmethod - def generate_data(self): - raise NotImplementedError("Function generate_data need to be implemented.") diff --git a/profiler/compare_tools/generation/communication_compare_generator.py b/profiler/compare_tools/generation/communication_compare_generator.py deleted file mode 100644 index 243d6a51506f32cc627b4046c0976b7504c4dbf4..0000000000000000000000000000000000000000 --- a/profiler/compare_tools/generation/communication_compare_generator.py +++ /dev/null @@ -1,50 +0,0 @@ -import math - -import pandas as pd - -from generation.base_generator import BaseGenerator -from utils.args_manager import ArgsManager -from utils.common_func import calculate_diff_ratio -from utils.constant import Constant - - -class CommunicationCompareGenerator(BaseGenerator): - def __init__(self, data: list): - super().__init__(Constant.COMMUNICATION_SHEET, data) - self._base_task_data = ArgsManager().base_profiling.communication_task_data - self._comparison_task_data = ArgsManager().comparison_profiling.communication_task_data - - def generate_data(self): - result_data = [] - row_headers = ["base_op", "base_task", "base_calls", "base_total_dur", "base_avg_dur", "base_max_dur", - "base_min_dur", "com_op", "com_task", "com_calls", "com_total_dur", "com_avg_dur", "com_max_dur", - "com_min_dur"] - for row in self.data: - if ArgsManager().base_profiling_path == ArgsManager().comparison_profiling_path: - result_data.append(row + [None, None]) - else: - result_data.append(row + calculate_diff_ratio(row[row_headers.index("base_total_dur")], - row[row_headers.index("com_total_dur")])) - base_data = self._get_task_statistic(row[row_headers.index("base_op")], is_base=True) - comparison_data = self._get_task_statistic(row[row_headers.index("com_op")], is_base=False) - for index in range(max(len(base_data), len(comparison_data))): - if index >= len(base_data): - base_row = ["|"] + [None] * 6 - else: - base_row = ["|"] + base_data[index] - if index >= len(comparison_data): - comparison_row = ["|"] + [None] * 6 - else: - comparison_row = ["|"] + comparison_data[index] - result_data.append(base_row + comparison_row + [None, None]) - return result_data - - def _get_task_statistic(self, name: str, is_base: bool): - if not name: - return [] - task_list = self._base_task_data.get(name) if is_base else self._comparison_task_data.get(name) - if task_list: - data = [[data.get("name", ""), float(data.get("dur", 0))] for data in task_list] - df = pd.DataFrame(data, columns=[Constant.OP_KEY, Constant.DEVICE_DUR]) - return df.groupby(Constant.OP_KEY).agg(["count", "sum", "mean", "max", "min"]).reset_index().values.tolist() - return [] diff --git a/profiler/compare_tools/generation/comparison_generator.py b/profiler/compare_tools/generation/comparison_generator.py deleted file mode 100644 index 44798cb95441ba27fe53e2a3bbb19d803162be83..0000000000000000000000000000000000000000 --- a/profiler/compare_tools/generation/comparison_generator.py +++ /dev/null @@ -1,33 +0,0 @@ -from comparator.index_comparator import IndexComparator -from comparator.op_comparator import OpComparator -from generation.communication_compare_generator import CommunicationCompareGenerator -from generation.memory_compare_generator import MemoryCompareGenerator -from generation.memory_statistic_generator import MemoryStatisticGenerator -from generation.operator_compare_generator import OperatorCompareGenerator -from generation.operator_statistic_generator import OperatorStatisticGenerator -from view.excel_view import ExcelViewer -from utils.constant import Constant -from utils.args_manager import ArgsManager -from utils.torch_op_node import TorchOpNode -from utils.tree_builder import TreeBuilder - - -class ComparisonGenerator: - def __init__(self, args: any): - self._args = args - self._args_manager = ArgsManager() - - def run(self, file_path: str): - data_dict = {} - if self._args.enable_operator_compare or self._args.enable_memory_compare: - op_compare_result = OpComparator(self._args).compare() - if self._args.enable_communication_compare: - index_compare_result = IndexComparator(self._args).compare() - data_dict[Constant.COMMUNICATION_SHEET] = CommunicationCompareGenerator(index_compare_result).generate_data() - if self._args.enable_operator_compare: - data_dict[Constant.OPERATOR_SHEET] = OperatorCompareGenerator(op_compare_result).generate_data() - data_dict[Constant.OPERATOR_TOP_SHEET] = OperatorStatisticGenerator(op_compare_result).generate_data() - if self._args.enable_memory_compare: - data_dict[Constant.MEMORY_SHEET] = MemoryCompareGenerator(op_compare_result).generate_data() - data_dict[Constant.MEMORY_TOP_SHEET] = MemoryStatisticGenerator(op_compare_result).generate_data() - ExcelViewer(data_dict, file_path).generate_view() diff --git a/profiler/compare_tools/generation/memory_compare_generator.py b/profiler/compare_tools/generation/memory_compare_generator.py deleted file mode 100644 index 2cf919d1f658a57f893231a8a873f113601197cb..0000000000000000000000000000000000000000 --- a/profiler/compare_tools/generation/memory_compare_generator.py +++ /dev/null @@ -1,37 +0,0 @@ -from generation.base_generator import BaseGenerator -from utils.args_manager import ArgsManager -from utils.common_func import calculate_diff_ratio -from utils.constant import Constant -from utils.torch_op_node import TorchOpNode -from utils.tree_builder import TreeBuilder - - -class MemoryCompareGenerator(BaseGenerator): - def __init__(self, data: list): - super().__init__(Constant.MEMORY_SHEET, data) - - def generate_data(self): - def get_row_info(torch_op_node: TorchOpNode): - if not torch_op_node: - return [None] * 4 + [0] - memory_list = TreeBuilder.get_total_memory(torch_op_node) - size = 0 - memory_details = "" - for memory in memory_list: - size += memory.size - memory_details += memory.memory_details - return [torch_op_node.name, torch_op_node.input_shape, torch_op_node.input_type, memory_details, size] - - if not self.data: - return [] - data = [None] * (len(self.data)) - for index, (base_op, comparison_op) in enumerate(self.data): - base_row = get_row_info(base_op) - if ArgsManager().base_profiling_path == ArgsManager().comparison_profiling_path: - comparison_row = [None] * 5 - diff_ratio = [None] * 2 - else: - comparison_row = get_row_info(comparison_op) - diff_ratio = calculate_diff_ratio(base_row[-1], comparison_row[-1]) - data[index] = base_row + comparison_row + diff_ratio - return data diff --git a/profiler/compare_tools/generation/memory_statistic_generator.py b/profiler/compare_tools/generation/memory_statistic_generator.py deleted file mode 100644 index 652e73a38d2e2c07fd401f2319565d46c56a1853..0000000000000000000000000000000000000000 --- a/profiler/compare_tools/generation/memory_statistic_generator.py +++ /dev/null @@ -1,53 +0,0 @@ -from generation.base_generator import BaseGenerator -from utils.args_manager import ArgsManager -from utils.common_func import calculate_diff_ratio -from utils.constant import Constant -from utils.tree_builder import TreeBuilder - - -class MemoryStatisticGenerator(BaseGenerator): - def __init__(self, data: list): - super().__init__(Constant.MEMORY_TOP_SHEET, data) - - def generate_data(self): - base_op_dict, comparison_op_dict = {}, {} - for base_op, comparison_op in self.data: - if base_op: - memory_list = TreeBuilder.get_total_memory(base_op) - size = sum([memory.size / Constant.KB_TO_MB for memory in memory_list]) - duration = sum([memory.duration / Constant.US_TO_MS for memory in memory_list]) - base_op_dict.setdefault(base_op.name, {}).setdefault("size", []).append(size) - base_op_dict.setdefault(base_op.name, {}).setdefault("duration", []).append(duration) - if comparison_op: - memory_list = TreeBuilder.get_total_memory(comparison_op) - size = sum([memory.size / Constant.KB_TO_MB for memory in memory_list]) - duration = sum([memory.duration / Constant.US_TO_MS for memory in memory_list]) - comparison_op_dict.setdefault(comparison_op.name, {}).setdefault("size", []).append(size) - comparison_op_dict.setdefault(comparison_op.name, {}).setdefault("duration", []).append(duration) - result_data = [] - for op_name, base_data in base_op_dict.items(): - base_dur = sum(base_data.get("duration", [])) - base_size = sum(base_data.get("size", [])) - base_num = len(base_data.get("size", [])) - comparison_data = comparison_op_dict.pop(op_name, None) - if ArgsManager().base_profiling_path == ArgsManager().comparison_profiling_path: - result_data.append([op_name, base_dur, base_size, base_num] + [None] * 5) - elif comparison_data: - comparison_dur = sum(comparison_data.get("duration", [])) - comparison_size = sum(comparison_data.get("size", [])) - comparison_num = len(comparison_data.get("size", [])) - result_data.append( - [op_name, base_dur, base_size, base_num, comparison_dur, comparison_size, - comparison_num] + calculate_diff_ratio(base_size, comparison_size)) - else: - result_data.append( - [op_name, base_dur, base_size, base_num, 0, 0, 0] + calculate_diff_ratio(base_size, 0)) - for op_name, comparison_data_dict in comparison_op_dict.items(): - comparison_dur = sum(comparison_data_dict.get("duration", [])) - comparison_size = sum(comparison_data_dict.get("size", [])) - comparison_num = len(comparison_data_dict.get("size", [])) - result_data.append([op_name, 0, 0, 0, comparison_dur, comparison_size, comparison_num] + - calculate_diff_ratio(0, comparison_size)) - if ArgsManager().base_profiling_path != ArgsManager().comparison_profiling_path: - result_data.sort(key=lambda x: x[-2], reverse=True) - return result_data diff --git a/profiler/compare_tools/generation/operator_compare_generator.py b/profiler/compare_tools/generation/operator_compare_generator.py deleted file mode 100644 index 0f876a3ed834a96e9fa581bda06004905f7c4e2d..0000000000000000000000000000000000000000 --- a/profiler/compare_tools/generation/operator_compare_generator.py +++ /dev/null @@ -1,39 +0,0 @@ -from generation.base_generator import BaseGenerator -from utils.args_manager import ArgsManager -from utils.common_func import calculate_diff_ratio -from utils.constant import Constant -from utils.torch_op_node import TorchOpNode -from utils.tree_builder import TreeBuilder - - -class OperatorCompareGenerator(BaseGenerator): - def __init__(self, data: list): - super().__init__(Constant.OPERATOR_SHEET, data) - - def generate_data(self): - def get_row_info(torch_op_node: TorchOpNode): - if not torch_op_node: - return [None] * 4 + [0] - kernel_list = TreeBuilder.get_total_kernels(torch_op_node) - duration = 0 - kernel_details = "" - for kernel in kernel_list: - duration += kernel.device_dur - kernel_details += kernel.kernel_details - return [torch_op_node.name, torch_op_node.input_shape, torch_op_node.input_type, kernel_details, duration] - - if not self.data: - return [] - data = [None] * (len(self.data)) - index = 0 - for base_op, comparison_op in self.data: - base_row = get_row_info(base_op) - if ArgsManager().base_profiling_path == ArgsManager().comparison_profiling_path: - comparison_row = [None] * 5 - diff_ratio = [None] * 2 - else: - comparison_row = get_row_info(comparison_op) - diff_ratio = calculate_diff_ratio(base_row[-1], comparison_row[-1]) - data[index] = base_row + comparison_row + diff_ratio - index += 1 - return data diff --git a/profiler/compare_tools/generation/operator_statistic_generator.py b/profiler/compare_tools/generation/operator_statistic_generator.py deleted file mode 100644 index ec685c42f41f7e73521ef82b711a3c88fb011801..0000000000000000000000000000000000000000 --- a/profiler/compare_tools/generation/operator_statistic_generator.py +++ /dev/null @@ -1,43 +0,0 @@ -from generation.base_generator import BaseGenerator -from utils.args_manager import ArgsManager -from utils.common_func import calculate_diff_ratio -from utils.constant import Constant -from utils.tree_builder import TreeBuilder - - -class OperatorStatisticGenerator(BaseGenerator): - def __init__(self, data: list): - super().__init__(Constant.OPERATOR_TOP_SHEET, data) - - def generate_data(self): - base_op_dict, comparison_op_dict = {}, {} - for base_op, comparison_op in self.data: - if base_op: - kernel_list = TreeBuilder.get_total_kernels(base_op) - duration = sum([kernel.device_dur / Constant.US_TO_MS for kernel in kernel_list]) - base_op_dict.setdefault(base_op.name, []).append(duration) - if comparison_op: - kernel_list = TreeBuilder.get_total_kernels(comparison_op) - duration = sum([kernel.device_dur / Constant.US_TO_MS for kernel in kernel_list]) - comparison_op_dict.setdefault(comparison_op.name, []).append(duration) - result_data = [] - for op_name, base_duration_list in base_op_dict.items(): - base_dur = sum(base_duration_list) - comparison_duration_list = comparison_op_dict.pop(op_name, None) - if ArgsManager().base_profiling_path == ArgsManager().comparison_profiling_path: - result_data.append([op_name, base_dur, len(base_duration_list)] + [None] * 4) - elif comparison_duration_list: - comparison_dur = sum(comparison_duration_list) - result_data.append( - [op_name, base_dur, len(base_duration_list), comparison_dur, - len(comparison_duration_list)] + calculate_diff_ratio(base_dur, comparison_dur)) - else: - result_data.append( - [op_name, base_dur, len(base_duration_list), 0, 0] + calculate_diff_ratio(base_dur, 0)) - for op_name, comparison_duration_list in comparison_op_dict.items(): - comparison_dur = sum(comparison_duration_list) - result_data.append([op_name, 0, 0, comparison_dur, len(comparison_duration_list)] + - calculate_diff_ratio(0, comparison_dur)) - if ArgsManager().base_profiling_path != ArgsManager().comparison_profiling_path: - result_data.sort(key=lambda x: x[-2], reverse=True) - return result_data diff --git a/profiler/compare_tools/generation/__init__.py b/profiler/compare_tools/generator/__init__.py similarity index 100% rename from profiler/compare_tools/generation/__init__.py rename to profiler/compare_tools/generator/__init__.py diff --git a/profiler/compare_tools/generator/base_generator.py b/profiler/compare_tools/generator/base_generator.py new file mode 100644 index 0000000000000000000000000000000000000000..c472bc9922e6febf118f62a66424056243156c07 --- /dev/null +++ b/profiler/compare_tools/generator/base_generator.py @@ -0,0 +1,22 @@ +from abc import ABC, abstractmethod +from multiprocessing import Process + + +class BaseGenerator(Process, ABC): + def __init__(self, profiling_data_dict: dict, args: any): + super(BaseGenerator, self).__init__() + self._profiling_data_dict = profiling_data_dict + self._args = args + self._result_data = {} + + def run(self): + self.compare() + self.generate_view() + + @abstractmethod + def compare(self): + raise NotImplementedError("Function compare need to be implemented.") + + @abstractmethod + def generate_view(self): + raise NotImplementedError("Function generate_view need to be implemented.") diff --git a/profiler/compare_tools/generator/comparison_generator.py b/profiler/compare_tools/generator/comparison_generator.py new file mode 100644 index 0000000000000000000000000000000000000000..a8f8835e20c81f87154968f666cfe50831f2122f --- /dev/null +++ b/profiler/compare_tools/generator/comparison_generator.py @@ -0,0 +1,36 @@ +from generator.detail_performance_generator import DetailPerformanceGenerator +from generator.overall_performance_generator import OverallPerformanceGenerator +from profiling_parser.gpu_profiling_parser import GPUProfilingParser +from profiling_parser.npu_profiling_parser import NPUProfilingParser +from utils.constant import Constant +from utils.args_manager import ArgsManager + + +class ComparisonGenerator: + PARSER_DICT = {Constant.NPU: NPUProfilingParser, Constant.GPU: GPUProfilingParser} + + def __init__(self): + self._args_manager = ArgsManager() + self._overall_data = None + self._details_data = None + + def run(self): + self.load_data() + self.generate_compare_result() + + def load_data(self): + base_data = self.PARSER_DICT.get(self._args_manager.base_profiling_type)( + self._args_manager.args, self._args_manager.base_path_dict).load_data() + comparison_data = self.PARSER_DICT.get(self._args_manager.comparison_profiling_type)( + self._args_manager.args, self._args_manager.comparison_path_dict).load_data() + self._overall_data = {Constant.BASE_DATA: base_data.overall_metrics, + Constant.COMPARISON_DATA: comparison_data.overall_metrics} + self._details_data = {Constant.BASE_DATA: base_data, Constant.COMPARISON_DATA: comparison_data} + + def generate_compare_result(self): + generator_list = [OverallPerformanceGenerator(self._overall_data, self._args_manager.args), + DetailPerformanceGenerator(self._details_data, self._args_manager.args)] + for generator in generator_list: + generator.start() + for generator in generator_list: + generator.join() diff --git a/profiler/compare_tools/comparator/op_comparator.py b/profiler/compare_tools/generator/detail_performance_generator.py similarity index 58% rename from profiler/compare_tools/comparator/op_comparator.py rename to profiler/compare_tools/generator/detail_performance_generator.py index 8ccd428ef82250266722bed0dfd59a97c6dc39c0..88394c907dca1fc9af2d963050b26a3d5faece29 100644 --- a/profiler/compare_tools/comparator/op_comparator.py +++ b/profiler/compare_tools/generator/detail_performance_generator.py @@ -1,26 +1,70 @@ +import os from collections import deque +from datetime import datetime import numpy as np -from utils.args_manager import ArgsManager +from comparator.communication_comparator import CommunicationComparator +from comparator.operator_comparator import OperatorComparator +from comparator.operator_statistic_comparator import OperatorStatisticComparator +from compare_bean.communication_bean import CommunicationBean +from compare_bean.memory_compare_bean import MemoryCompareBean +from compare_bean.memory_statistic_bean import MemoryStatisticBean +from compare_bean.operator_compare_bean import OperatorCompareBean +from compare_bean.operator_statistic_bean import OperatorStatisticBean +from generator.base_generator import BaseGenerator +from profiling_parser.base_profiling_parser import ProfilingResult +from utils.constant import Constant from utils.name_function import NameFunction from utils.torch_op_node import TorchOpNode from utils.tree_builder import TreeBuilder +from view.excel_view import ExcelView -class OpComparator: - def __init__(self, args: any): - self._args = args - self._args_manager = ArgsManager() - self._base_profiling = self._args_manager.base_profiling - self._comparison_profiling = self._args_manager.comparison_profiling +class DetailPerformanceGenerator(BaseGenerator): + def __init__(self, profiling_data_dict: dict, args: any): + super().__init__(profiling_data_dict, args) - def compare(self) -> list: - base_ops = self._get_top_layer_ops(self._base_profiling) - if self._args.base_profiling_path == self._args.comparison_profiling_path: - comparison_ops = [] - else: - comparison_ops = self._get_top_layer_ops(self._comparison_profiling) + def compare(self): + if self._args.enable_operator_compare or self._args.enable_memory_compare or \ + self._args.enable_communication_compare: + print("[INFO] Start to compare performance detail data, please wait.") + comparator_list = self._create_comparator() + for comparator in comparator_list: + self._result_data.update(comparator.generate_data()) + + def generate_view(self): + if not self._result_data: + return + dir_path = self._args.output_path if self._args.output_path else "./" + file_name = "performance_comparison_result_{}.xlsx".format(datetime.utcnow().strftime("%Y%m%d%H%M%S")) + result_file_path = os.path.realpath(os.path.join(dir_path, file_name)) + ExcelView(self._result_data, result_file_path, self._args).generate_view() + print(f"[INFO] The comparison result file has been generated: {result_file_path}") + + def _create_comparator(self): + comparator_list = [] + if self._args.enable_operator_compare or self._args.enable_memory_compare: + op_compare_result = self.match_torch_op() + + if self._args.enable_communication_compare: + communication_data = { + Constant.BASE_DATA: self._profiling_data_dict.get(Constant.BASE_DATA).communication_dict, + Constant.COMPARISON_DATA: self._profiling_data_dict.get(Constant.COMPARISON_DATA).communication_dict} + comparator_list.append(CommunicationComparator(communication_data, CommunicationBean)) + + if self._args.enable_operator_compare: + comparator_list.append(OperatorComparator(op_compare_result, OperatorCompareBean)) + comparator_list.append(OperatorStatisticComparator(op_compare_result, OperatorStatisticBean)) + + if self._args.enable_memory_compare: + comparator_list.append(OperatorComparator(op_compare_result, MemoryCompareBean)) + comparator_list.append(OperatorStatisticComparator(op_compare_result, MemoryStatisticBean)) + return comparator_list + + def match_torch_op(self) -> list: + base_ops = self._get_top_layer_ops(self._profiling_data_dict.get(Constant.BASE_DATA)) + comparison_ops = self._get_top_layer_ops(self._profiling_data_dict.get(Constant.COMPARISON_DATA)) if not base_ops and not comparison_ops: return [] name_func = NameFunction(self._args).get_name_func() @@ -29,7 +73,6 @@ class OpComparator: compare_result_data = self._drill_down(compare_result_data, name_func) return compare_result_data - @classmethod def _matching_op(cls, base_ops: list, comparison_ops: list, name_func: any) -> list: if not comparison_ops: @@ -83,23 +126,9 @@ class OpComparator: result_data.append([None, comparison_ops[comparison_index]]) return result_data - def _get_top_layer_ops(self, profiling_instance: any) -> any: - torch_op_data = profiling_instance.torch_op_data - if not torch_op_data: - print(f"[WARNING] Can't find any torch op in the file: {profiling_instance.json_path}") - root_node = TreeBuilder.build_tree(torch_op_data) - - kernel_dict, memory_list = {}, [] - if self._args.enable_operator_compare: - kernel_dict = profiling_instance.kernel_dict - if not kernel_dict: - print(f"[WARNING] Can't find any flow event in the file: {profiling_instance.json_path}") - if self._args.enable_memory_compare: - memory_list = profiling_instance.memory_list - if not memory_list: - print(f"[WARNING] Can't find any memory event in the file: {profiling_instance.file_path}") - - TreeBuilder.update_tree_node(root_node, kernel_dict, memory_list) + def _get_top_layer_ops(self, profiling_data: ProfilingResult) -> any: + root_node = TreeBuilder.build_tree(profiling_data.torch_op_data) + TreeBuilder.update_tree_node(root_node, profiling_data.kernel_dict, profiling_data.memory_list) level1_child_nodes = root_node.child_nodes result_data = [] for level1_node in level1_child_nodes: diff --git a/profiler/compare_tools/generator/overall_performance_generator.py b/profiler/compare_tools/generator/overall_performance_generator.py new file mode 100644 index 0000000000000000000000000000000000000000..d2aa181371b2b4c2bcd6667ebe394ab5096674c6 --- /dev/null +++ b/profiler/compare_tools/generator/overall_performance_generator.py @@ -0,0 +1,19 @@ +from comparator.overall_performance_comparator import OverallPerformanceComparator +from compare_bean.profiling_info import ProfilingInfo +from generator.base_generator import BaseGenerator +from view.screen_view import ScreenView + + +class OverallPerformanceGenerator(BaseGenerator): + def __init__(self, profiling_data_dict: dict, args: any): + super().__init__(profiling_data_dict, args) + + def compare(self): + if not self._args.enable_profiling_compare: + return + self._result_data = OverallPerformanceComparator(self._profiling_data_dict, ProfilingInfo).generate_data() + + def generate_view(self): + if not self._result_data: + return + ScreenView(self._result_data).generate_view() diff --git a/profiler/compare_tools/performance_compare.py b/profiler/compare_tools/performance_compare.py index 4ab8bb898522d99cc78759496ad1d36ed6f421cd..6218c7e969850fb2a2926e9f38e0a45bd77c6770 100644 --- a/profiler/compare_tools/performance_compare.py +++ b/profiler/compare_tools/performance_compare.py @@ -3,21 +3,12 @@ import ast import datetime import os.path import sys -import time sys.path.append( os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "cluster_analyse")) -from generation.comparison_generator import ComparisonGenerator +from generator.comparison_generator import ComparisonGenerator from utils.args_manager import ArgsManager -from profiling_analysis.profiling_parse import prof_main -from common_func.path_manager import PathManager - - -def performance_compare(args): - if not args.enable_profiling_compare: - return - prof_main() def main(): @@ -37,20 +28,7 @@ def main(): args = parser.parse_args() ArgsManager().init(args) - - try: - performance_compare(args) - except Exception: - print("[WARNING] Profiling failed to analyze.") - - if any([args.enable_operator_compare, args.enable_memory_compare, args.enable_communication_compare]): - print("[INFO] Start to compare performance data, please wait.") - dir_path = args.output_path if args.output_path else "./" - file_name = "performance_comparison_result_{}.xlsx".format( - time.strftime("%Y%m%d%H%M%S", time.localtime(time.time()))) - result_file_path = PathManager.get_realpath(os.path.join(dir_path, file_name)) - ComparisonGenerator(args).run(result_file_path) - print(f"[INFO] The comparison result file has been generated: {result_file_path}") + ComparisonGenerator().run() if __name__ == "__main__": diff --git a/profiler/compare_tools/profiling_analysis/__init__.py b/profiler/compare_tools/profiling_analysis/__init__.py deleted file mode 100644 index 8400fd5ecd1246eaee795cebfccfacc80a94f08c..0000000000000000000000000000000000000000 --- a/profiler/compare_tools/profiling_analysis/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/profiler/compare_tools/profiling_analysis/gpu_parser.py b/profiler/compare_tools/profiling_analysis/gpu_parser.py deleted file mode 100644 index 8f1b6d9c033683621c77d7eaf7a0cf54ab31813f..0000000000000000000000000000000000000000 --- a/profiler/compare_tools/profiling_analysis/gpu_parser.py +++ /dev/null @@ -1,213 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -from collections import Counter, defaultdict -import pandas as pd - -import profiling_analysis.parser_helper as parser_helper -from utils.file_reader import FileReader -from utils.constant import Constant - - -class OpTimeWarper: - def __init__( - self, - cube_time: float = 0.0, - sdma_time: float = 0.0, - vec_time: float = 0.0, - fa_time_fwd: float = 0.0, - fa_time_bwd: float = 0.0, - all_op_time: float = 0.0, - compute_stream_dur: float = 0.0, - cube_num: int = 0, - vec_num: int = 0, - sdma_num: int = 0, - fa_num_bwd: int = 0, - fa_num_fwd: int = 0 - ): - self.cube_time = cube_time - self.sdma_time = sdma_time - self.vec_time = vec_time - self.fa_time_fwd = fa_time_fwd - self.fa_time_bwd = fa_time_bwd - self.all_op_time = all_op_time - self.compute_stream_dur = compute_stream_dur - self.cube_num = cube_num - self.vec_num = vec_num - self.sdma_num = sdma_num - self.fa_num_bwd = fa_num_bwd - self.fa_num_fwd = fa_num_fwd - - -class GpuProfilingParser: - NCCL_MARK = 'nccl' - CUBE_MARK = 'gemm' - FA_MARK_LIST = [['fmha', 'kernel'], ['flash', 'kernel']] - SDMA_MARK_LIST = ['htod', 'dtod', 'dtoh', 'memset (device)'] - - def __init__(self, gpu_path): - self.trace_events = FileReader.read_trace_file(gpu_path).get('traceEvents') - self.compute_stream_id = self.infer_compute_stream_id() - self.one_step_time = 0 - self.profiling_info = parser_helper.ProfilingInfo('GPU') - - def is_flash_attention(self, name: str): - for fa_mark in self.FA_MARK_LIST: - if not len([1 for mark in fa_mark if mark not in name.lower()]): - return True - return False - - def is_sdma_time(self, name: str): - for mark in self.SDMA_MARK_LIST: - if mark in name.lower(): - return True - return False - - def update_op_list(self, op_list, marks): - cube_time = 0.0 - all_op_time = 0.0 - fa_time_bwd = 0.0 - fa_time_fwd = 0.0 - sdma_time = 0.0 - vec_time = 0.0 - cube_num = 0 - vec_num = 0 - sdma_num = 0 - fa_num_bwd = 0 - fa_num_fwd = 0 - compute_stream_dur = 0.0 - for event in self.trace_events: - if not isinstance(event, dict): - continue - if event.get('args') and event.get('args').get('stream') == self.compute_stream_id: - compute_stream_dur += float(event.get('dur')) - if not {'name', 'cat', 'dur', 'ts'} < event.keys(): - continue - name = event.get('name') - dur = event.get('dur') - ts = event.get('ts') - cat = event.get('cat', '') - if event.get('args') and event.get('args').get('stream') == self.compute_stream_id: - if self.is_sdma_time(name): - sdma_time += float(dur) - sdma_num += 1 - continue - if cat.lower() != 'kernel': - continue - if self.NCCL_MARK in name.lower(): - for timestep in range(ts + 1, ts + dur + 1): - marks[str(timestep)] += 1 # mark this timestep in communication stream - continue - else: - for timestep in range(ts + 1, ts + dur + 1): - marks[str(timestep)] += -100 # mark this timestep in compute stream - if self.is_flash_attention(name): - if 'bwd' in name.lower(): - fa_time_bwd += float(dur) - fa_num_bwd += 1 - else: - fa_time_fwd += float(dur) - fa_num_fwd += 1 - elif self.CUBE_MARK in name.lower(): - cube_num += 1 - cube_time += float(dur) - else: - vec_num += 1 - vec_time += float(dur) - all_op_time += float(dur) - op_list.append([ts, name, cat, dur]) - time_wrapper = OpTimeWarper( - cube_time=cube_time, - sdma_time=sdma_time, - vec_time=vec_time, - fa_time_fwd=fa_time_fwd, - fa_time_bwd=fa_time_bwd, - all_op_time=all_op_time, - compute_stream_dur=compute_stream_dur, - cube_num=cube_num, - vec_num=vec_num, - sdma_num=sdma_num, - fa_num_bwd=fa_num_bwd, - fa_num_fwd=fa_num_fwd - ) - return time_wrapper - - def parse_events(self): - op_list = [] - marks = defaultdict(int) # mark for compute communication_not_overlapped time - - time_wrapper = self.update_op_list(op_list, marks) - cube_time = time_wrapper.cube_time - fa_time_fwd = time_wrapper.fa_time_fwd - fa_time_bwd = time_wrapper.fa_time_bwd - all_op_time = time_wrapper.all_op_time - compute_stream_dur = time_wrapper.compute_stream_dur - cube_num = time_wrapper.cube_num - vec_num = time_wrapper.vec_num - sdma_num = time_wrapper.sdma_num - sdma_time = time_wrapper.sdma_time - vec_time = time_wrapper.vec_time - - self.profiling_info.compute_time = len([_ for _, value in marks.items() if value < 0]) / 10 ** 6 - self.profiling_info.communication_not_overlapped = len([_ for _, value in marks.items() if value > 0]) / 10 ** 6 - self.profiling_info.flash_attention_time_bwd = fa_time_bwd / 10 ** 6 - self.profiling_info.flash_attention_time_fwd = fa_time_fwd / 10 ** 6 - self.profiling_info.cube_time = cube_time / 10 ** 6 - self.profiling_info.vec_time = self.profiling_info.compute_time - (cube_time + fa_time_fwd + fa_time_bwd) / 10 ** 6 - self.profiling_info.cube_num = cube_num - self.profiling_info.vec_num = vec_num - self.profiling_info.sdma_num = sdma_num - self.profiling_info.fa_num_bwd = time_wrapper.fa_num_bwd - self.profiling_info.fa_num_fwd = time_wrapper.fa_num_fwd - self.profiling_info.sdma_time = sdma_time / 10 ** 6 - self.parse_e2e_time() - - self.profiling_info.scheduling_time = self.profiling_info.e2e_time - self.profiling_info.compute_time - \ - self.profiling_info.communication_not_overlapped - if self.profiling_info.e2e_time < Constant.EPS: - self.profiling_info.scheduling_ratio = 0.0 - else: - self.profiling_info.scheduling_ratio = self.profiling_info.scheduling_time / self.profiling_info.e2e_time - self.parse_memory_reserved() - - def parse_e2e_time(self): - compute_events_timeline = [event for event in self.trace_events if - event.get('args') and event.get('args').get('stream')] - compute_events_timeline = sorted(compute_events_timeline, key=lambda event: event.get('ts')) - self.profiling_info.e2e_time = (compute_events_timeline[-1].get('ts') + compute_events_timeline[-1].get('dur') - - compute_events_timeline[0].get('ts')) / 10 ** 6 - - def parse_memory_reserved(self): - memories = [ - event.get('args').get('Total Reserved') for event in self.trace_events - if event.get('name', '').lower() == '[memory]' and event.get('args').get('Device Id') >= 0 - ] - if not memories: - print("[INFO] Gpu profiling data doesn't contain memory info") - return - self.profiling_info.memory_used = max(memories) / 1024 ** 3 - - def infer_compute_stream_id(self): - kernel_stream_ids = [] - for event in self.trace_events: - is_kernel_exec_event = event.get('cat', '').lower() == 'kernel' and self.NCCL_MARK not in event.get('name', '').lower() - has_stream_id_event = event.get('args') and event.get('args').get('stream') - if is_kernel_exec_event and has_stream_id_event: - kernel_stream_ids.append(event.get('args').get('stream')) - if not kernel_stream_ids: - raise RuntimeError('[ERROR] The profiling data does not contain kernel running data.') - counter = Counter(kernel_stream_ids) - return counter.most_common(1)[0][0] diff --git a/profiler/compare_tools/profiling_analysis/npu_parser.py b/profiler/compare_tools/profiling_analysis/npu_parser.py deleted file mode 100644 index 25b140106631e6379c7c8d1d32631cf6d23e23b9..0000000000000000000000000000000000000000 --- a/profiler/compare_tools/profiling_analysis/npu_parser.py +++ /dev/null @@ -1,297 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -from collections import defaultdict -import pandas as pd -import profiling_analysis.parser_helper as parser_helper -from utils.file_reader import FileReader -from common_func.path_manager import PathManager -from common_func.file_manager import FileManager - - -class NpuInfoWrapper: - def __init__( - self, - compute_time: int, - communication_time: int, - sdma_time: int, - sdma_num: int, - is_cluster: bool, - event_wait_sqe: dict, - ai_core_dict: dict, - event_wait_sqe_res: dict, - ai_core_res: dict, - ): - self.compute_time = compute_time - self.communication_time = communication_time - self.sdma_time = sdma_time - self.sdma_num = sdma_num - self.is_cluster = is_cluster - self.event_wait_sqe = event_wait_sqe - self.ai_core_dict = ai_core_dict - self.event_wait_sqe_res = event_wait_sqe_res - self.ai_core_res = ai_core_res - - -class NpuProfilingParser: - FLASH_ATTENTION = "flashattention" - ACLNNINPLACE_COPY = "aclnninplacecopy" - TENSORMOVE = "tensormove" - MATMUL = "matmul" - - def __init__(self, npu_step_time, npu_file_path): - self.npu_json_file = npu_file_path.get('trace_view') - self.npu_summary_file = npu_file_path.get('kernel_details') - self.npu_mem_file = npu_file_path.get('memory_record') - self.info_json = npu_file_path.get('info') - self.profiling_info = parser_helper.ProfilingInfo('NPU') - self.npu_step_time = npu_step_time - self.parallel_time = 0 - self.aicore_time = 0 - self.min_stream_ts = sys.float_info.max - self.max_stream_ts = sys.float_info.min - self.sdma_sqe = defaultdict(float) - self.sdma_num_cnt = defaultdict(int) - - def get_sdma_para(self, sdma_sqe, sdma_num_cnt, ai_core_dict, event_wait_sqe) -> (float, int): - compute_stream = [] - parallel_stream = [] - sdma_time = 0.0 - sdma_parallel_time = 0.0 - sdma_num = 0 - sdma_parallel_num = 0 - if len(ai_core_dict) == 1: - compute_stream.append(min(ai_core_dict.keys())) - elif len(ai_core_dict) == 2: # 2个ai_core,存在并行流(当前最多2条算子计算流) - compute_stream = list(event_wait_sqe.keys() & ai_core_dict.keys()) - parallel_stream = list(ai_core_dict.keys() - set(compute_stream)) - else: - print('[WARNING] Npu Compute Stream Num Error.') - if parallel_stream: - sdma_parallel_time = sdma_sqe[parallel_stream[0]] - sdma_parallel_num = sdma_num_cnt[parallel_stream[0]] - if compute_stream: - sdma_time = sdma_sqe[compute_stream[0]] + sdma_parallel_time - sdma_num = sdma_num_cnt[compute_stream[0]] + sdma_parallel_num - return sdma_time, sdma_num - - def parse_npu_json_events(self): - if not self.npu_json_file: - print('[WARNING] Npu trace json file is not available.') - return - compute_time = 0 - communication_time = 0 - min_ts = sys.float_info.max - max_ts = sys.float_info.min - is_cluster = False # 表明没有获取到compute time的耗时 - data = FileReader.read_trace_file(self.npu_json_file) - event_wait_sqe = defaultdict(list) - ai_core_dict = defaultdict(list) - event_wait_sqe_res = defaultdict(float) - ai_core_res = defaultdict(float) - for dic in data: - self.get_ts_by_task_type(dic, event_wait_sqe, ai_core_dict, event_wait_sqe_res, ai_core_res) - if ('name' in dic) and (dic.get('name', '') == 'Computing'): - is_cluster = True - ts = float(dic.get('ts', 0)) - dur = dic.get('dur') - compute_time += dur - min_ts = ts if ts < min_ts else min_ts - max_ts = (ts + dur) if (ts + dur) > max_ts else max_ts - if ('name' in dic) and (dic.get('name', '') == 'Communication(Not Overlapped)'): - is_cluster = True - ts = float(dic.get('ts')) - dur = dic.get('dur') - communication_time += dur - min_ts = ts if ts < min_ts else min_ts - max_ts = (ts + dur) if (ts + dur) > max_ts else max_ts - sdma_time, sdma_num = self.get_sdma_para(self.sdma_sqe, self.sdma_num_cnt, ai_core_dict, event_wait_sqe) - npu_info_wrapper = NpuInfoWrapper( - compute_time, communication_time, sdma_time, sdma_num, is_cluster, - event_wait_sqe, ai_core_dict, event_wait_sqe_res, ai_core_res) - self.update_npu_info(max_ts - min_ts, npu_info_wrapper) - - def update_npu_info(self, ts_dur, npu_info_wrapper): - compute_time = npu_info_wrapper.compute_time - communication_time = npu_info_wrapper.communication_time - is_cluster = npu_info_wrapper.is_cluster - event_wait_sqe = npu_info_wrapper.event_wait_sqe - ai_core_dict = npu_info_wrapper.ai_core_dict - event_wait_sqe_res = npu_info_wrapper.event_wait_sqe_res - ai_core_res = npu_info_wrapper.ai_core_res - sdma_time = npu_info_wrapper.sdma_time - sdma_num = npu_info_wrapper.sdma_num - # AI_CORE和EVENT_WAIT_SQE共存为计算流 - compute_stream = [] - parallel_stream = [] - if not is_cluster: - #单机单卡没有overlap analysis - if len(ai_core_dict) == 1: - compute_stream.append(min(ai_core_dict.keys())) - elif len(ai_core_dict) == 2: # 2个ai_core,存在并行流(当前最多2条算子计算流) - compute_stream = list(event_wait_sqe.keys() & ai_core_dict.keys()) - parallel_stream = list(ai_core_dict.keys() - set(compute_stream)) - else: - print('[WARNING] Npu trace json file lack of Stream info') - return - cs_event_wait_sqe_list = event_wait_sqe[compute_stream[0]] - if parallel_stream: - cs_ai_core_list = ai_core_dict[parallel_stream[0]] - sorted(cs_event_wait_sqe_list, key=lambda x: (x[0])) - sorted(cs_ai_core_list, key=lambda x: (x[0])) - self.parallel_time = self.interval_intersection(cs_event_wait_sqe_list, cs_ai_core_list) - self.profiling_info.compute_time = compute_time / 10 ** 6 if is_cluster else \ - ai_core_res[compute_stream[0]] / 10 ** 6 - self.profiling_info.other_time = max(0, self.profiling_info.compute_time - self.profiling_info.cube_time - \ - self.profiling_info.flash_attention_time_fwd - self.profiling_info.flash_attention_time_bwd - \ - self.profiling_info.vec_time) - self.profiling_info.e2e_time = ts_dur / 10 ** 6 if is_cluster else \ - (self.max_stream_ts - self.min_stream_ts) / 10 ** 6 - self.profiling_info.communication_not_overlapped = communication_time / 10 ** 6 \ - if is_cluster else (event_wait_sqe_res[compute_stream[0]] - self.parallel_time) / 10 ** 6 - time_required = self.profiling_info.compute_time + self.profiling_info.communication_not_overlapped - self.profiling_info.sdma_time += sdma_time / 10 ** 6 - self.profiling_info.sdma_num += sdma_num - if self.npu_step_time: - self.profiling_info.scheduling_time = self.npu_step_time - time_required - else: - self.profiling_info.scheduling_time = self.profiling_info.e2e_time - time_required - self.profiling_info.scheduling_ratio = self.profiling_info.scheduling_time / self.profiling_info.e2e_time \ - if self.profiling_info.e2e_time != 0 else 0 - - def parse_info_json(self): - if not self.info_json: - return - json_data = FileReader.read_trace_file(self.info_json) - if not json_data: - return - if "ProfilerActivity.CPU" in json_data.get('config', {}).get('common_config', {}).get('activities', []): - return - if 'Level0' != json_data.get('config', {}).get('experimental_config', {}).get('_profiler_level', ''): - return - self.profiling_info.minimal_profiling = True - - def parse_npu_csv_events(self): - self.parse_mem_csv() - if not self.npu_summary_file: - print('[WARNING] Npu kernel details csv file is not available.') - return - PathManager.check_path_readable(self.npu_summary_file) - FileManager.check_file_size(self.npu_summary_file) - info = pd.read_csv(self.npu_summary_file, index_col=None) - cube_time = 0.0 - vec_time = 0.0 - sdma_time = 0.0 - fa_time_fwd = 0.0 - fa_time_bwd = 0.0 - cube_num = 0 - vec_num = 0 - fa_num_bwd = 0 - fa_num_fwd = 0 - sdma_num = 0 - if info.get('mac_time(us)') is None and info.get('aiv_vec_time(us)') is None: - self.profiling_info.hide_op_details = True - return - for i in range(len(info['Model ID'])): - op_type = info.loc[i, 'Type'] - name = info.loc[i, 'Name'] - aiv_vec_time = info.loc[i, 'aiv_vec_time(us)'] if info.get('aiv_vec_time(us)') is not None else None - mac_time = info.loc[i, 'mac_time(us)'] if info.get('mac_time(us)') is not None else None - if pd.isna(aiv_vec_time) and pd.isna(mac_time): - continue - task_durations = info.loc[i, 'Duration(us)'] - if self.FLASH_ATTENTION in op_type.lower(): - if 'bwd' in op_type.lower() or 'grad' in op_type.lower(): - fa_time_bwd += task_durations - fa_num_bwd += 1 - else: - fa_time_fwd += task_durations - fa_num_fwd += 1 - elif self.MATMUL in op_type.lower(): - cube_time += task_durations - cube_num += 1 - elif name.lower().startswith(self.ACLNNINPLACE_COPY) and self.TENSORMOVE in name.lower(): - sdma_time += task_durations - sdma_num += 1 - else: - is_vec = (aiv_vec_time and aiv_vec_time > 0) or (mac_time is not None and mac_time == 0) - if is_vec: - vec_time += task_durations - vec_num += 1 - else: - cube_time += task_durations - cube_num += 1 - - self.profiling_info.cube_time = cube_time / 10 ** 6 - self.profiling_info.vec_time = vec_time / 10 ** 6 - self.profiling_info.flash_attention_time_bwd = fa_time_bwd / 10 ** 6 - self.profiling_info.flash_attention_time_fwd = fa_time_fwd / 10 ** 6 - self.profiling_info.cube_num = cube_num - self.profiling_info.vec_num = vec_num - self.profiling_info.fa_num_bwd = fa_num_bwd - self.profiling_info.fa_num_fwd = fa_num_fwd - self.profiling_info.sdma_time = sdma_time / 10 ** 6 - self.profiling_info.sdma_num = sdma_num - - - def parse_mem_csv(self): - if not self.npu_mem_file: - print('[INFO] Npu op memory csv file is not available.') - return - try: - PathManager.check_path_readable(self.npu_mem_file) - FileManager.check_file_size(self.npu_mem_file) - info = pd.read_csv(self.npu_mem_file, usecols=['Total Reserved(MB)'], index_col=None) - except ValueError: - print('[ERROR] Load memory info failed.') - else: - self.profiling_info.memory_used = max(info.get('Total Reserved(MB)')) / 1024 - - @staticmethod - def interval_intersection(cs_event_wait_sqe_list, cs_ai_core_list): - ans = 0 - i = 0 - j = 0 - while i < len(cs_event_wait_sqe_list) and j < len(cs_ai_core_list): - lo = max(cs_event_wait_sqe_list[i][0], cs_ai_core_list[j][0]) - hi = min(cs_event_wait_sqe_list[i][1], cs_ai_core_list[j][1]) - if lo <= hi: - ans += (hi - lo) - if cs_event_wait_sqe_list[i][1] < cs_ai_core_list[j][1]: - i += 1 - else: - j += 1 - return ans - - def get_ts_by_task_type(self, dic, event_wait_sqe, ai_core_dict, enent_wait_res, ai_core_res): - if not dic.get('args'): - return - args = dic.get('args') - if args.get('Stream Id'): - stream_id = args.get('Stream Id') - ts = float(dic.get('ts')) - dur = dic.get('dur') - if args.get('Task Type') == 'EVENT_WAIT_SQE': - enent_wait_res[stream_id] += dur - event_wait_sqe[stream_id].append([ts, ts + dur]) - elif args.get('Task Type') in ('SDMA_SQE', 'PCIE_DMA_SQE'): - self.sdma_sqe[stream_id] += dur - self.sdma_num_cnt[stream_id] += 1 - elif args.get('Task Type') in ('AI_CORE', 'MIX_AIC', 'MIX_AIV', 'AI_CPU', 'AI_VECTOR_CORE', 'FFTS_PLUS'): - ai_core_res[stream_id] += dur - ai_core_dict[stream_id].append([ts, ts + dur]) - self.min_stream_ts = ts if ts < self.min_stream_ts else self.min_stream_ts - self.max_stream_ts = (ts + dur) if (ts + dur) > self.max_stream_ts else self.max_stream_ts diff --git a/profiler/compare_tools/profiling_analysis/parser_helper.py b/profiler/compare_tools/profiling_analysis/parser_helper.py deleted file mode 100644 index caf09056ee4ee5884067abf9e5283fd1c9113c12..0000000000000000000000000000000000000000 --- a/profiler/compare_tools/profiling_analysis/parser_helper.py +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import json -import os - - -class ProfilingInfo: - def __init__(self, profiling_type: str): - self.profiling_type = profiling_type - self.cube_time = 0.0 - self.other_time = 0.0 - self.vec_time = 0.0 - self.cube_num = 0 - self.vec_num = 0 - self.sdma_num = 0 - self.fa_num_fwd = 0 - self.fa_num_bwd = 0 - self.compute_time = 0.0 - self.communication_not_overlapped = 0.0 - self.scheduling_ratio = 0.0 - self.memory_used = 0.0 - self.e2e_time = 0.0 - self.sdma_time = 0.0 - self.scheduling_time = 0.0 - self.flash_attention_time_bwd = 0.0 - self.flash_attention_time_fwd = 0.0 - self.minimal_profiling = False - self.hide_op_details = False diff --git a/profiler/compare_tools/profiling_analysis/profiling_parse.py b/profiler/compare_tools/profiling_analysis/profiling_parse.py deleted file mode 100644 index adf182900f8d0e76e5904dfe0838aa31496c74ed..0000000000000000000000000000000000000000 --- a/profiler/compare_tools/profiling_analysis/profiling_parse.py +++ /dev/null @@ -1,131 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import argparse -import os - -from prettytable import PrettyTable - -from profiling_analysis.gpu_parser import GpuProfilingParser -from profiling_analysis.npu_parser import NpuProfilingParser -from profiling_analysis.parser_helper import ProfilingInfo -from utils.args_manager import ArgsManager -from utils.constant import Constant - - -def generate_table_info(base_profiling_info, comp_profiling_info, table): - headers = [''] - base_col = [f'{base_profiling_info.profiling_type}'] - comp_col = [f'{comp_profiling_info.profiling_type}'] - if not base_profiling_info.hide_op_details and not comp_profiling_info.hide_op_details: - headers.extend(['Cube Time(Num)', 'Vector Time(Num)']) - base_col.extend([f'{base_profiling_info.cube_time:.3f}s({base_profiling_info.cube_num})', - f'{base_profiling_info.vec_time:.3f}s({base_profiling_info.vec_num})']) - comp_col.extend([f'{comp_profiling_info.cube_time:.3f}s({comp_profiling_info.cube_num})', - f'{comp_profiling_info.vec_time:.3f}s({comp_profiling_info.vec_num})']) - if base_profiling_info.other_time or comp_profiling_info.other_time: - headers.append('Other Time') - base_col.append(f'{base_profiling_info.other_time:.3f}s') - comp_col.append(f'{comp_profiling_info.other_time:.3f}s') - if base_profiling_info.flash_attention_time_fwd or comp_profiling_info.flash_attention_time_fwd: - headers.append('Flash Attention Time(Forward)(Num)') - base_col.append(f'{base_profiling_info.flash_attention_time_fwd:.3f}s({base_profiling_info.fa_num_fwd})') - comp_col.append(f'{comp_profiling_info.flash_attention_time_fwd:.3f}s({comp_profiling_info.fa_num_fwd})') - if base_profiling_info.flash_attention_time_bwd or comp_profiling_info.flash_attention_time_bwd: - headers.append('Flash Attention Time(Backward)(Num)') - base_col.append(f'{base_profiling_info.flash_attention_time_bwd:.3f}s({base_profiling_info.fa_num_bwd})') - comp_col.append(f'{comp_profiling_info.flash_attention_time_bwd:.3f}s({comp_profiling_info.fa_num_bwd})') - headers.extend(['Computing Time']) - base_col.extend([f'{base_profiling_info.compute_time:.3f}s']) - comp_col.extend([f'{comp_profiling_info.compute_time:.3f}s']) - if base_profiling_info.memory_used or comp_profiling_info.memory_used: - headers.append('Mem Usage') - base_col.append(f'{base_profiling_info.memory_used:.2f}G') - comp_col.append(f'{comp_profiling_info.memory_used:.2f}G') - headers.extend(['Uncovered Communication Time']) - base_col.extend( - [f'{base_profiling_info.communication_not_overlapped: .3f}s']) - comp_col.extend( - [f'{comp_profiling_info.communication_not_overlapped: .3f}s']) - if base_profiling_info.sdma_time or comp_profiling_info.sdma_time: - headers.append('SDMA Time(Num)') - base_col.append(f'{base_profiling_info.sdma_time:.3f}s({base_profiling_info.sdma_num})') - comp_col.append(f'{comp_profiling_info.sdma_time:.3f}s({comp_profiling_info.sdma_num})') - cue = '' - if ((base_profiling_info.profiling_type == "NPU" and not base_profiling_info.minimal_profiling) or - (comp_profiling_info.profiling_type == "NPU" and not comp_profiling_info.minimal_profiling)): - - cue = '(Not minimal profiling)' - - headers.extend(['Free Time', 'E2E Time' + cue]) - base_col.extend( - [f'{base_profiling_info.scheduling_time:.3f}s', f'{base_profiling_info.e2e_time:.3f}s']) - comp_col.extend( - [f'{comp_profiling_info.scheduling_time:.3f}s', f'{comp_profiling_info.e2e_time:.3f}s']) - table.field_names = headers - table.add_row(base_col) - table.add_row(comp_col) - - -def show_table(base_profiling_info, comp_profiling_info): - table = PrettyTable() - table.title = 'Model Profiling Time Distribution' - generate_table_info(base_profiling_info, comp_profiling_info, table) - print(table) - - -def parse_gpu(gpu_path): - gpu_parser = GpuProfilingParser(gpu_path) - gpu_parser.parse_events() - return gpu_parser.profiling_info - - -def parse_npu(npu_path): - npu_dir = {'trace_view': None, 'memory_record': None, 'kernel_details': None} - for root, _, files in os.walk(npu_path): - for file in files: - if file == 'trace_view.json': - npu_dir['trace_view'] = os.path.join(root, file) - if file == 'memory_record.csv': - npu_dir['memory_record'] = os.path.join(root, file) - if 'kernel_details' in file: - npu_dir['kernel_details'] = os.path.join(root, file) - if 'profiler_info' in file: - npu_dir['info'] = os.path.join(root, file) - - npu_parser = NpuProfilingParser(0, npu_dir) - npu_parser.parse_npu_csv_events() - npu_parser.parse_info_json() - npu_parser.parse_npu_json_events() - return npu_parser.profiling_info - - -def prof_main(): - base_info = ProfilingInfo('None') - comp_info = ProfilingInfo('None') - if ArgsManager().base_profiling_type == Constant.NPU: - base_info = parse_npu(ArgsManager().base_profiling.file_path) - elif ArgsManager().base_profiling_type == Constant.GPU: - base_info = parse_gpu(ArgsManager().base_profiling.file_path) - if ArgsManager().comparison_profiling_type == Constant.NPU: - comp_info = parse_npu(ArgsManager().comparison_profiling.file_path) - elif ArgsManager().comparison_profiling_type == Constant.GPU: - comp_info = parse_gpu(ArgsManager().comparison_profiling.file_path) - - show_table(base_info, comp_info) - - -if __name__ == '__main__': - prof_main() diff --git a/profiler/compare_tools/utils/args_manager.py b/profiler/compare_tools/utils/args_manager.py index 543e8f60a8e63055ba67e91d494322ad2acb02e8..49cde24f156a172a1b51b5e660fa2863e1b54d34 100644 --- a/profiler/compare_tools/utils/args_manager.py +++ b/profiler/compare_tools/utils/args_manager.py @@ -1,9 +1,9 @@ import os.path +import re from common_func.path_manager import PathManager from utils.constant import Constant from utils.file_reader import FileReader -from utils.profiling_parser import GPUProfilingParser, NPUProfilingParser class Singleton(object): @@ -19,38 +19,55 @@ class Singleton(object): @Singleton class ArgsManager: - PARSER_DICT = {Constant.NPU: NPUProfilingParser, Constant.GPU: GPUProfilingParser} def __init__(self): self._args = None - self._base_profiling_type = None - self._comparison_profiling_type = None - self._base_profiling = None - self._comparison_profiling = None + self._base_path_dict = {} + self._comparison_path_dict = {} + + @property + def args(self): + return self._args @property def base_profiling_type(self): - return self._base_profiling_type + return self._base_path_dict.get(Constant.PROFILING_TYPE) @property def comparison_profiling_type(self): - return self._comparison_profiling_type + return self._comparison_path_dict.get(Constant.PROFILING_TYPE) @property - def base_profiling(self): - return self._base_profiling + def base_profiling_path(self): + return self._args.base_profiling_path @property - def comparison_profiling(self): - return self._comparison_profiling + def comparison_profiling_path(self): + return self._args.comparison_profiling_path_dict @property - def base_profiling_path(self): - return self._args.base_profiling_path + def base_path_dict(self): + return self._base_path_dict @property - def comparison_profiling_path(self): - return self._args.comparison_profiling_path + def comparison_path_dict(self): + return self._comparison_path_dict + + @property + def enable_profiling_compare(self): + return self._args.enable_profiling_compare + + @property + def enable_operator_compare(self): + return self._args.enable_operator_compare + + @property + def enable_memory_compare(self): + return self._args.enable_memory_compare + + @property + def enable_communication_compare(self): + return self._args.enable_communication_compare @classmethod def check_profiling_path(cls, file_path: str): @@ -77,13 +94,16 @@ class ArgsManager: ascend_output = os.path.join(file_path, "ASCEND_PROFILER_OUTPUT") profiler_output = ascend_output if os.path.isdir(ascend_output) else file_path json_path = os.path.join(profiler_output, "trace_view.json") - memory_path = os.path.join(profiler_output, "operator_memory.csv") if not os.path.isfile(json_path): msg = f"Invalid profiling path: {file_path}" raise RuntimeError(msg) - memory_path = memory_path if os.path.isfile(memory_path) else None - return {Constant.PROFILING_TYPE: Constant.NPU, Constant.PROFILING_PATH: file_path, - Constant.TRACE_PATH: json_path, Constant.MEMORY_DATA_PATH: memory_path} + path_dict = {Constant.PROFILING_TYPE: Constant.NPU, Constant.PROFILING_PATH: file_path, + Constant.TRACE_PATH: json_path, Constant.ASCEND_OUTPUT_PATH: profiler_output} + sub_dirs = os.listdir(file_path) + for dir_name in sub_dirs: + if dir_name == "profiler_info.json" or re.match(r"profiler_info_[0-9]+\.json", dir_name): + path_dict.update({Constant.INFO_JSON_PATH: os.path.join(file_path, dir_name)}) + return path_dict def init(self, args: any): self._args = args @@ -106,24 +126,10 @@ class ArgsManager: base_profiling_path = PathManager.get_realpath(self._args.base_profiling_path) self.check_profiling_path(base_profiling_path) - base_profiling_dict = self.parse_profiling_path(base_profiling_path) + self._base_path_dict = self.parse_profiling_path(base_profiling_path) comparison_profiling_path = PathManager.get_realpath(self._args.comparison_profiling_path) self.check_profiling_path(comparison_profiling_path) - comparison_profiling_dict = self.parse_profiling_path(comparison_profiling_path) + self._comparison_path_dict = self.parse_profiling_path(comparison_profiling_path) if self._args.output_path: self.check_output_path(PathManager.get_realpath(self._args.output_path)) - - Constant.BASE_PROFILING = Constant.BASE_PROFILING + self._args.base_profiling_path - self._base_profiling_type = base_profiling_dict.get(Constant.PROFILING_TYPE) - self._base_profiling = self.PARSER_DICT.get(self._base_profiling_type)(self._args, base_profiling_dict) - - if base_profiling_path == comparison_profiling_path: - Constant.COMPARISON_PROFILING = "Same To Base Profiling" - self._comparison_profiling_type = self._base_profiling_type - self._comparison_profiling = self._base_profiling - else: - Constant.COMPARISON_PROFILING = Constant.COMPARISON_PROFILING + self._args.comparison_profiling_path - self._comparison_profiling_type = comparison_profiling_dict.get(Constant.PROFILING_TYPE) - self._comparison_profiling = self.PARSER_DICT.get(self._comparison_profiling_type)(self._args, - comparison_profiling_dict) diff --git a/profiler/compare_tools/utils/common_func.py b/profiler/compare_tools/utils/common_func.py index c9c362b74ec70e82721c2ca1f02683a2fd9c8328..2211d15d2d7aa27ce05594f2f8a2c36cdcfdf55c 100644 --- a/profiler/compare_tools/utils/common_func.py +++ b/profiler/compare_tools/utils/common_func.py @@ -9,6 +9,12 @@ def calculate_diff_ratio(base_value: float, comparison_value: float): return [comparison_value - base_value, ratio] +def update_order_id(data_list: list): + for index, data in enumerate(data_list): + if data: + data[0] = index + 1 + + def convert_to_float(data: any) -> float: try: float_value = float(data) diff --git a/profiler/compare_tools/utils/compare_event.py b/profiler/compare_tools/utils/compare_event.py deleted file mode 100644 index 31cad3e59a9001b2f16a3279f5d307154604546e..0000000000000000000000000000000000000000 --- a/profiler/compare_tools/utils/compare_event.py +++ /dev/null @@ -1,53 +0,0 @@ -from utils.constant import Constant - - -class KernelEvent: - def __init__(self, event: dict, device_type: int): - self._event = event - self._device_type = device_type - - @property - def kernel_name(self) -> str: - return self._event.get("name", "") - - @property - def device_dur(self) -> float: - return self._event.get("dur", 0) - - @property - def task_id(self) -> int: - return self._event.get("args", {}).get("Task Id") - - @property - def task_type(self) -> str: - return self._event.get("args", {}).get("Task Type") - - @property - def kernel_details(self): - if self._device_type == Constant.GPU: - return f"{self.kernel_name} [duration: {self.device_dur}]" - return f"{self.kernel_name}, {self.task_id}, {self.task_type} [duration: {self.device_dur}]\n" - - -class MemoryEvent: - def __init__(self, event: dict, name: str): - self._event = event - self._name = name - - @property - def size(self) -> float: - return self._event.get(Constant.SIZE, 0) - - @property - def duration(self) -> float: - if not self._event.get(Constant.ALLOCATION_TIME) or not self._event.get(Constant.RELEASE_TIME): - return 0 - return float(self._event.get(Constant.RELEASE_TIME)) - self._event.get(Constant.ALLOCATION_TIME, 0) - - @property - def memory_details(self) -> str: - name = self._event.get(Constant.NAME, "") if self._event.get(Constant.NAME, "") else self._name - release_time = self._event.get(Constant.RELEASE_TIME) - allocation_time = self._event.get(Constant.ALLOCATION_TIME) - duration = float(release_time) - float(allocation_time) if release_time and allocation_time else None - return f"{name}, ({allocation_time}, {release_time}), [duration: {duration}], [size: {self.size}]\n" diff --git a/profiler/compare_tools/utils/excel_config.py b/profiler/compare_tools/utils/excel_config.py index 1783b5d81b941b4402cc00c248c2f0f050a164a7..123cf31836fbd21a8d94c2714ae7c82e40a9fc57 100644 --- a/profiler/compare_tools/utils/excel_config.py +++ b/profiler/compare_tools/utils/excel_config.py @@ -1,8 +1,22 @@ from utils.constant import Constant +class CellFormatType: + DEFAULT = {"font_name": "Arial", 'font_size': 11, 'align': 'left', 'valign': 'vcenter', 'border': True, + 'num_format': '#,##0'} # 数字显示整数,无背景色 + DEFAULT_FLOAT = {"font_name": "Arial", 'font_size': 11, 'align': 'left', 'valign': 'vcenter', 'border': True, + 'num_format': '#,##0.00'} # 保留2位小数,无背景色 + DEFAULT_RATIO = {"font_name": "Arial", 'font_size': 11, 'align': 'left', 'valign': 'vcenter', + 'border': True, 'num_format': '0.00%'} # 百分比显示,保留2位小数,无背景色 + RED_RATIO = {"font_name": "Arial", 'font_size': 11, 'align': 'left', 'valign': 'vcenter', + 'border': True, 'num_format': '0.00%', "fg_color": Constant.RED_COLOR} # 百分比显示,保留2位小数,单元格背景色为红色 + BOLD_STR = {"font_name": "Arial", 'font_size': 11, 'align': 'left', 'valign': 'vcenter', 'border': True, + 'bold': True} # 字符串,无背景色,字体加粗 + BLUE_BOLD = {"font_name": "Arial", 'font_size': 11, 'fg_color': Constant.BLUE_COLOR, 'align': 'left', + 'valign': 'vcenter', 'bold': True, 'border': True} # 蓝色背景,加粗 + + class ExcelConfig(object): - COL_IDS = "ABCDEFGHIJKLMNOPQRSTUVW" ORDER = "Order Id" OPERATOR_NAME = "Operator Name" INPUT_SHAPE = "Input Shape" @@ -34,67 +48,79 @@ class ExcelConfig(object): MIN_DURATION = "Min Duration(us)" HEADERS = { - Constant.OPERATOR_SHEET: [ORDER, OPERATOR_NAME, INPUT_SHAPE, INPUT_TYPE, KERNEL_DETAILS, DEVICE_DURATION, - OPERATOR_NAME, INPUT_SHAPE, INPUT_TYPE, KERNEL_DETAILS, DEVICE_DURATION, DIFF_DUR, - DIFF_RATIO], - Constant.MEMORY_SHEET: [ORDER, OPERATOR_NAME, INPUT_SHAPE, INPUT_TYPE, MEMORY_DETAILS, SIZE, OPERATOR_NAME, - INPUT_SHAPE, INPUT_TYPE, MEMORY_DETAILS, SIZE, DIFF_SIZE, DIFF_RATIO], - Constant.OPERATOR_TOP_SHEET: [TOP, OPERATOR_NAME, BASE_DEVICE_DURATION, BASE_OPERATOR_NUMBER, - COMPARISON_DEVICE_DURATION, COMPARISON_OPERATOR_NUMBER, DIFF_TIME, DIFF_RATIO], - Constant.MEMORY_TOP_SHEET: [TOP, OPERATOR_NAME, BASE_ALLOCATED_TIMES, BASE_ALLOCATED_MEMORY, - BASE_OPERATOR_NUMBER, COMPARISON_ALLOCATED_TIMES, COMPARISON_ALLOCATED_MEMORY, - COMPARISON_OPERATOR_NUMBER, DIFF_MEMORY, DIFF_RATIO], - Constant.COMMUNICATION_SHEET: [ORDER, COMM_OP_NAME, TASK_NAME, CALLS, TOTAL_DURATION, AVG_DURATION, - MAX_DURATION, MIN_DURATION, COMM_OP_NAME, TASK_NAME, CALLS, TOTAL_DURATION, - AVG_DURATION, MAX_DURATION, MIN_DURATION, DIFF_DUR, DIFF_RATIO] + Constant.OPERATOR_TABLE: [ + {"name": ORDER, "type": CellFormatType.DEFAULT, "width": 10}, + {"name": OPERATOR_NAME, "type": CellFormatType.BOLD_STR, "width": 30}, + {"name": INPUT_SHAPE, "type": CellFormatType.DEFAULT, "width": 20}, + {"name": INPUT_TYPE, "type": CellFormatType.DEFAULT, "width": 20}, + {"name": KERNEL_DETAILS, "type": CellFormatType.DEFAULT, "width": 20}, + {"name": DEVICE_DURATION, "type": CellFormatType.DEFAULT_FLOAT, "width": 20}, + {"name": OPERATOR_NAME, "type": CellFormatType.BOLD_STR, "width": 30}, + {"name": INPUT_SHAPE, "type": CellFormatType.DEFAULT, "width": 20}, + {"name": INPUT_TYPE, "type": CellFormatType.DEFAULT, "width": 20}, + {"name": KERNEL_DETAILS, "type": CellFormatType.DEFAULT, "width": 20}, + {"name": DEVICE_DURATION, "type": CellFormatType.DEFAULT_FLOAT, "width": 20}, + {"name": DIFF_DUR, "type": CellFormatType.DEFAULT_FLOAT, "width": 20}, + {"name": DIFF_RATIO, "type": CellFormatType.DEFAULT_RATIO, "width": 20} + ], + Constant.MEMORY_TABLE: [ + {"name": ORDER, "type": CellFormatType.DEFAULT, "width": 10}, + {"name": OPERATOR_NAME, "type": CellFormatType.BOLD_STR, "width": 30}, + {"name": INPUT_SHAPE, "type": CellFormatType.DEFAULT, "width": 20}, + {"name": INPUT_TYPE, "type": CellFormatType.DEFAULT, "width": 20}, + {"name": MEMORY_DETAILS, "type": CellFormatType.DEFAULT, "width": 20}, + {"name": SIZE, "type": CellFormatType.DEFAULT_FLOAT, "width": 20}, + {"name": OPERATOR_NAME, "type": CellFormatType.BOLD_STR, "width": 30}, + {"name": INPUT_SHAPE, "type": CellFormatType.DEFAULT, "width": 20}, + {"name": INPUT_TYPE, "type": CellFormatType.DEFAULT, "width": 20}, + {"name": MEMORY_DETAILS, "type": CellFormatType.DEFAULT, "width": 20}, + {"name": SIZE, "type": CellFormatType.DEFAULT_FLOAT, "width": 20}, + {"name": DIFF_SIZE, "type": CellFormatType.DEFAULT_FLOAT, "width": 20}, + {"name": DIFF_RATIO, "type": CellFormatType.DEFAULT_RATIO, "width": 20} + ], + Constant.OPERATOR_TOP_TABLE: [ + {"name": TOP, "type": CellFormatType.DEFAULT, "width": 10}, + {"name": OPERATOR_NAME, "type": CellFormatType.BOLD_STR, "width": 30}, + {"name": BASE_DEVICE_DURATION, "type": CellFormatType.DEFAULT_FLOAT, "width": 25}, + {"name": BASE_OPERATOR_NUMBER, "type": CellFormatType.DEFAULT, "width": 25}, + {"name": COMPARISON_DEVICE_DURATION, "type": CellFormatType.DEFAULT_FLOAT, "width": 30}, + {"name": COMPARISON_OPERATOR_NUMBER, "type": CellFormatType.DEFAULT, "width": 30}, + {"name": DIFF_TIME, "type": CellFormatType.DEFAULT_FLOAT, "width": 20}, + {"name": DIFF_RATIO, "type": CellFormatType.DEFAULT_RATIO, "width": 20} + ], + Constant.MEMORY_TOP_TABLE: [ + {"name": TOP, "type": CellFormatType.DEFAULT, "width": 10}, + {"name": OPERATOR_NAME, "type": CellFormatType.BOLD_STR, "width": 30}, + {"name": BASE_ALLOCATED_TIMES, "type": CellFormatType.DEFAULT_FLOAT, "width": 25}, + {"name": BASE_ALLOCATED_MEMORY, "type": CellFormatType.DEFAULT_FLOAT, "width": 30}, + {"name": BASE_OPERATOR_NUMBER, "type": CellFormatType.DEFAULT, "width": 25}, + {"name": COMPARISON_ALLOCATED_TIMES, "type": CellFormatType.DEFAULT_FLOAT, "width": 27}, + {"name": COMPARISON_ALLOCATED_MEMORY, "type": CellFormatType.DEFAULT_FLOAT, "width": 33}, + {"name": COMPARISON_OPERATOR_NUMBER, "type": CellFormatType.DEFAULT, "width": 25}, + {"name": DIFF_MEMORY, "type": CellFormatType.DEFAULT_FLOAT, "width": 20}, + {"name": DIFF_RATIO, "type": CellFormatType.DEFAULT_RATIO, "width": 20} + ], + Constant.COMMUNICATION_TABLE: [ + {"name": ORDER, "type": CellFormatType.DEFAULT, "width": 10}, + {"name": COMM_OP_NAME, "type": CellFormatType.BOLD_STR, "width": 25}, + {"name": TASK_NAME, "type": CellFormatType.DEFAULT, "width": 20}, + {"name": CALLS, "type": CellFormatType.DEFAULT, "width": 10}, + {"name": TOTAL_DURATION, "type": CellFormatType.DEFAULT_FLOAT, "width": 17}, + {"name": AVG_DURATION, "type": CellFormatType.DEFAULT_FLOAT, "width": 17}, + {"name": MAX_DURATION, "type": CellFormatType.DEFAULT_FLOAT, "width": 17}, + {"name": MIN_DURATION, "type": CellFormatType.DEFAULT_FLOAT, "width": 17}, + {"name": COMM_OP_NAME, "type": CellFormatType.BOLD_STR, "width": 25}, + {"name": TASK_NAME, "type": CellFormatType.DEFAULT, "width": 20}, + {"name": CALLS, "type": CellFormatType.DEFAULT, "width": 10}, + {"name": TOTAL_DURATION, "type": CellFormatType.DEFAULT_FLOAT, "width": 17}, + {"name": AVG_DURATION, "type": CellFormatType.DEFAULT_FLOAT, "width": 17}, + {"name": MAX_DURATION, "type": CellFormatType.DEFAULT_FLOAT, "width": 17}, + {"name": MIN_DURATION, "type": CellFormatType.DEFAULT_FLOAT, "width": 17}, + {"name": DIFF_DUR, "type": CellFormatType.DEFAULT_FLOAT, "width": 20}, + {"name": DIFF_RATIO, "type": CellFormatType.DEFAULT_RATIO, "width": 20} + ] } - COLUMNS = {ORDER: 10, OPERATOR_NAME: 30, TOP: 10, BASE_OPERATOR_NUMBER: 25, BASE_DEVICE_DURATION: 25, - COMPARISON_OPERATOR_NUMBER: 30, COMPARISON_DEVICE_DURATION: 30, BASE_ALLOCATED_TIMES: 25, - BASE_ALLOCATED_MEMORY: 30, COMPARISON_ALLOCATED_TIMES: 27, COMPARISON_ALLOCATED_MEMORY: 33, - CALLS: 10, TOTAL_DURATION: 17, AVG_DURATION: 17, MAX_DURATION: 17, MIN_DURATION: 17, COMM_OP_NAME: 25} - - OVERHEAD = {Constant.OPERATOR_SHEET: ["B1:F1", "G1:K1"], Constant.MEMORY_SHEET: ["B1:F1", "G1:K1"], - Constant.COMMUNICATION_SHEET: ["B1:H1", "I1:O1"], Constant.OPERATOR_TOP_SHEET: ["C1:D1", "E1:F1"], - Constant.MEMORY_TOP_SHEET: ["C1:E1", "F1:H1"]} - - FORMAT = {"int": {"font_name": "Arial", 'font_size': 11, 'align': 'left', 'valign': 'vcenter', 'border': True, - 'num_format': '#,##0'}, - "float": {"font_name": "Arial", 'font_size': 11, 'align': 'left', 'valign': 'vcenter', 'border': True, - 'num_format': '#,##0.00'}, - "ratio": {"font_name": "Arial", 'font_size': 11, 'align': 'left', 'valign': 'vcenter', - 'border': True, 'num_format': '0.00%'}, - "ratio_red": {"font_name": "Arial", 'font_size': 11, 'align': 'left', 'valign': 'vcenter', - 'border': True, 'num_format': '0.00%', "fg_color": Constant.RED_COLOR}, - "str_bold": {"font_name": "Arial", 'font_size': 11, 'align': 'left', 'valign': 'vcenter', 'border': True, - 'bold': True}} - - FIELD_TYPE_MAP = {ORDER: "int", - OPERATOR_NAME: "str_bold", - INPUT_SHAPE: "int", - INPUT_TYPE: "str", - KERNEL_DETAILS: "int", - MEMORY_DETAILS: "int", - DEVICE_DURATION: "float", - DIFF_RATIO: "ratio", - DIFF_DUR: "float", - DIFF_SIZE: "float", - SIZE: "float", - TOP: "int", - BASE_DEVICE_DURATION: "float", - COMPARISON_DEVICE_DURATION: "float", - BASE_OPERATOR_NUMBER: "int", - COMPARISON_OPERATOR_NUMBER: "int", - DIFF_TIME: "float", - BASE_ALLOCATED_TIMES: "float", - COMPARISON_ALLOCATED_TIMES: "float", - BASE_ALLOCATED_MEMORY: "float", - COMPARISON_ALLOCATED_MEMORY: "float", - DIFF_MEMORY: "float", - COMM_OP_NAME: "str_bold", - TASK_NAME: "int", - CALLS: "int", - TOTAL_DURATION: "float", - AVG_DURATION: "float", - MAX_DURATION: "float", - MIN_DURATION: "float"} + OVERHEAD = {Constant.OPERATOR_TABLE: ["B1:F1", "G1:K1"], Constant.MEMORY_TABLE: ["B1:F1", "G1:K1"], + Constant.COMMUNICATION_TABLE: ["B1:H1", "I1:O1"], Constant.OPERATOR_TOP_TABLE: ["C1:D1", "E1:F1"], + Constant.MEMORY_TOP_TABLE: ["C1:E1", "F1:H1"]} diff --git a/profiler/compare_tools/utils/profiling_parser.py b/profiler/compare_tools/utils/profiling_parser.py deleted file mode 100644 index 30dfce4ef8baf457797106a553c2c8c698eec0f3..0000000000000000000000000000000000000000 --- a/profiler/compare_tools/utils/profiling_parser.py +++ /dev/null @@ -1,300 +0,0 @@ -from abc import abstractmethod -from math import ceil - -from utils.compare_event import KernelEvent -from utils.constant import Constant -from utils.file_reader import FileReader -from utils.trace_event_data import TraceEventData - - -class ProfilingParser: - def __init__(self, args: any, path_dict: dict): - self._args = args - self._profiling_path = path_dict.get(Constant.PROFILING_PATH) - self._torch_op_data = None - self._kernel_dict = None - self._memory_list = None - self._communication_data = None - self._communication_task_data = None - - @property - def file_path(self) -> str: - return self._profiling_path - - @property - def json_path(self) -> str: - return self._json_path - - @property - def torch_op_data(self) -> list: - if self._torch_op_data is None: - self.get_torch_op_data() - return self._torch_op_data - - @property - def kernel_dict(self) -> dict: - if self._kernel_dict is None: - self.get_kernel_dict() - return self._kernel_dict - - @property - def memory_list(self) -> list: - if self._memory_list is None: - self.get_memory_list() - return self._memory_list - - @property - def communication_data(self) -> dict: - if self._communication_data is None: - self.get_communication_data() - return self._communication_data - - @property - def communication_task_data(self) -> dict: - if self._communication_task_data is None: - self.get_communication_data() - return self._communication_task_data - - @abstractmethod - def get_torch_op_data(self): - raise NotImplementedError - - @abstractmethod - def get_kernel_dict(self): - raise NotImplementedError - - @abstractmethod - def get_memory_list(self): - raise NotImplementedError - - -class GPUProfilingParser(ProfilingParser): - def __init__(self, args: any, path_dict: dict): - super().__init__(args, path_dict) - self._json_path = path_dict.get(Constant.PROFILING_PATH) - - def get_torch_op_data(self): - torch_op_list = [] - json_data = FileReader.read_trace_file(self._json_path) - total_events = json_data.get("traceEvents", []) - for event in total_events: - if event.get("cat", "").lower() in ("cpu_op", "user_annotation", "cuda_runtime", "Operator"): - torch_op_list.append(event) - self._torch_op_data = torch_op_list - - def get_kernel_dict(self): - flow_kernel_dict = {} - json_data = FileReader.read_trace_file(self._json_path) - total_events = json_data.get("traceEvents", []) - flow_cat = (self._args.gpu_flow_cat,) if self._args.gpu_flow_cat else ("async_gpu", "async_cpu_to_gpu", - "ac2g", "async") - flow_start_dict, flow_end_dict, kernel_dict = {}, {}, {} - for event in total_events: - if event.get("cat", "") in flow_cat and event.get("ph") == "s": - flow_start_dict[event.get("id")] = event - elif event.get("cat", "") in flow_cat and event.get("ph") == "f": - flow_end_dict[event.get("id")] = event - elif event.get("cat", "").lower() == "kernel" and event.get("name", "").split("_")[0].lower() != "ncclkernel": - kernel_dict["{}-{}-{}".format(event.get("pid"), event.get("tid"), float(event.get("ts")))] = event - - for flow_id, start_flow in flow_start_dict.items(): - end_flow = flow_end_dict.get(flow_id) - if end_flow is None: - continue - kernel_event = kernel_dict.get( - "{}-{}-{}".format(end_flow.get("pid"), end_flow.get("tid"), float(end_flow.get("ts")))) - if kernel_event is None: - continue - flow_kernel_dict.setdefault(float(start_flow.get("ts")), []).append(KernelEvent(kernel_event, Constant.GPU)) - self._kernel_dict = flow_kernel_dict - - def get_memory_list(self): - self._memory_list = [] - memory_events = [] - json_data = FileReader.read_trace_file(self._json_path) - total_events = json_data.get("traceEvents", []) - for event in total_events: - if event.get("name", "").lower() == "[memory]": - memory_events.append(event) - memory_events.sort(key=lambda x: float(x.get("ts", 0))) - addr_dict = {} - for memory_event in memory_events: - args = memory_event.get("args", {}) - if args.get("Device Type", -1) != 1: - continue - allocate_bytes = args.get("Bytes", 0) / Constant.BYTE_TO_KB - record = addr_dict.get(args.get("Addr")) - if allocate_bytes > 0: - if record: - self._memory_list.append(record) - addr_dict[args.get("Addr")] = {Constant.SIZE: allocate_bytes, - Constant.TS: float(memory_event.get("ts", 0)), - Constant.ALLOCATION_TIME: float(memory_event.get("ts", 0))} - if allocate_bytes < 0 and record: - if abs(allocate_bytes) == record.get(Constant.SIZE): - record[Constant.RELEASE_TIME] = float(memory_event.get("ts", 0)) - self._memory_list.append(record) - del addr_dict[args.get("Addr")] - - def get_communication_data(self): - self._communication_data, self._communication_task_data = [], {} - json_data = FileReader.read_trace_file(self._json_path) - total_events = json_data.get("traceEvents", []) - for data in total_events: - if data.get("cat", "").lower() == "kernel" and data.get("name", "").split("_")[0].lower() == "ncclkernel": - self._communication_data.append(data) - - -class NPUProfilingParser(ProfilingParser): - def __init__(self, args: any, path_dict: str): - super().__init__(args, path_dict) - self._json_path = path_dict.get(Constant.TRACE_PATH) - self._memory_data_path = path_dict.get(Constant.MEMORY_DATA_PATH) - - def get_torch_op_data(self): - torch_op_list = [] - json_data = FileReader.read_trace_file(self._json_path) - for event in json_data: - if event.get("cat", "").lower() == "cpu_op": - torch_op_list.append(event) - self._torch_op_data = torch_op_list - - def get_kernel_dict(self): - flow_kernel_dict = {} - json_data = FileReader.read_trace_file(self._json_path) - flow_cat = "async_npu" - - flow_start_dict, flow_end_dict, kernel_dict = {}, {}, {} - for event in json_data: - if event.get("cat", "") == flow_cat and event.get("ph") == "s": - flow_start_dict[event.get("id")] = event - elif event.get("cat", "") == flow_cat and event.get("ph") == "f": - flow_end_dict[event.get("id")] = event - elif event.get("ph") == "X" and event.get("cat", "") != 'cpu_op': - kernel_dict["{}-{}-{}".format(event.get("pid"), event.get("tid"), float(event.get("ts")))] = event - - for flow_id, start_flow in flow_start_dict.items(): - end_flow = flow_end_dict.get(flow_id) - if end_flow is None: - continue - kernel_event = kernel_dict.get( - "{}-{}-{}".format(end_flow.get("pid"), end_flow.get("tid"), float(end_flow.get("ts")))) - if kernel_event is None: - continue - flow_kernel_dict.setdefault(float(start_flow.get("ts")), []).append(KernelEvent(kernel_event, Constant.NPU)) - self._kernel_dict = flow_kernel_dict - - def get_memory_list(self): - self._memory_list = [] - enqueue_dict, dequeue_data = {}, [] - json_data = FileReader.read_trace_file(self._json_path) - for data in json_data: - if data.get("cat", "").lower() == "enqueue": - enqueue_dict[data.get("args", {}).get("correlation_id", "")] = data - elif data.get("cat", "").lower() == "dequeue": - dequeue_data.append(data) - - if not self._memory_data_path: - return - memory_data = FileReader.read_csv_file(self._memory_data_path) - for data in memory_data: - if not data.get(Constant.ALLOCATION_TIME, 0): - continue - if "cann::" in data.get("Name", ""): - ts_time = float(data.get(Constant.ALLOCATION_TIME, 0)) - match_dequeue_data = self._match_cann_memory_data(dequeue_data, ts_time) - if match_dequeue_data is not None: - correlation_id = match_dequeue_data.get("args", {}).get("correlation_id", "") - ts = float(enqueue_dict.get(correlation_id, {}).get("ts", 0)) - self._memory_list.append({Constant.SIZE: float(data.get(Constant.SIZE, 0)), Constant.TS: ts, - Constant.NAME: data.get(Constant.NAME, ""), - Constant.ALLOCATION_TIME: float(data.get(Constant.ALLOCATION_TIME, 0)), - Constant.RELEASE_TIME: data.get(Constant.RELEASE_TIME, 0)}) - self._memory_list.append({Constant.SIZE: float(data.get(Constant.SIZE, 0)), - Constant.TS: float(data.get(Constant.ALLOCATION_TIME, 0)), - Constant.ALLOCATION_TIME: float(data.get(Constant.ALLOCATION_TIME, 0)), - Constant.RELEASE_TIME: data.get(Constant.RELEASE_TIME, 0)}) - - @classmethod - def _match_cann_memory_data(cls, dequeue_data: list, ts_time: float): - if not dequeue_data: - return None - right = len(dequeue_data) - 1 - left = 0 - while right > left: - mid = left + ceil((right - left) / 2) - if ts_time >= float(dequeue_data[mid].get("ts", 0)): - left = mid - else: - right = mid - 1 - end_time = float(dequeue_data[left].get("ts", 0)) + dequeue_data[left].get("dur", 0) - return dequeue_data[left] if end_time > ts_time else None - - def get_communication_data(self): - def get_pid(json_data): - pid = None - for data in json_data: - trace_event = TraceEventData(data) - if not trace_event.is_process_meta(): - continue - if trace_event.is_hccl_process(): - pid = trace_event.pid - break - return pid - - def get_tid_list(pid, tid_list, json_data): - for data in json_data: - trace_event = TraceEventData(data) - if not trace_event.is_thread_meta(): - continue - if trace_event.pid != pid: - continue - if trace_event.is_communication_op_thread(): - tid_list.append(trace_event.tid) - - def get_comm_data(pid, tid_list, json_data): - for data in json_data: - trace_event = TraceEventData(data) - if not trace_event.is_x_mode(): - continue - if trace_event.pid != pid: - continue - if trace_event.tid in tid_list: - self._communication_data.append(data) - - def get_comm_task_data(pid, tid_list, json_data): - for data in json_data: - trace_event = TraceEventData(data) - if not trace_event.is_x_mode(): - continue - if trace_event.pid != pid: - continue - if trace_event.tid in tid_list: - continue - ts = trace_event.start_time - for communication_op in self._communication_data: - comm_op_event = TraceEventData(communication_op) - if ts < comm_op_event.start_time or ts > comm_op_event.end_time: - continue - name_list = communication_op.get("name", "").split("_") - if len(name_list) >= 2: - self._communication_task_data.setdefault(name_list[1].lower(), []).append(data) - break - - self._communication_data, self._communication_task_data = [], {} - json_data = FileReader.read_trace_file(self._json_path) - - pid = get_pid(json_data) - if pid is None: - return - - tid_list = [] - get_tid_list(pid, tid_list, json_data) - if not tid_list: - return - - get_comm_data(pid, tid_list, json_data) - if not self._communication_data: - return - - get_comm_task_data(pid, tid_list, json_data) diff --git a/profiler/compare_tools/utils/torch_op_node.py b/profiler/compare_tools/utils/torch_op_node.py index c62526c766332257df3c739118545dccbe34a973..8b6f21f201f7050f772c995182a5692bbf943480 100644 --- a/profiler/compare_tools/utils/torch_op_node.py +++ b/profiler/compare_tools/utils/torch_op_node.py @@ -1,11 +1,12 @@ from math import ceil -from utils.compare_event import MemoryEvent +from compare_bean.origin_data_bean.compare_event import MemoryEvent +from compare_bean.origin_data_bean.trace_event_bean import TraceEventBean from utils.constant import Constant class TorchOpNode: - def __init__(self, event=None, parent_node=None): + def __init__(self, event=TraceEventBean, parent_node=None): self._event = event self._parent_node = parent_node self._child_nodes = [] @@ -15,31 +16,31 @@ class TorchOpNode: @property def start_time(self): - return float(self._event.get("ts", 0)) + return self._event.start_time @property def end_time(self): - return float(self._event.get("ts", 0)) + self._event.get("dur", 0) + return self._event.end_time @property def name(self): - return str(self._event.get("name", Constant.NA)) + return self._event.name @property def input_shape(self): - return str(self._event.get("args", {}).get("Input Dims", Constant.NA)) + return str(self._event.args.get("Input Dims", Constant.NA)) @property def origin_input_shape(self): - return self._event.get("args", {}).get("Input Dims", Constant.NA) + return self._event.args.get("Input Dims", Constant.NA) @property def input_type(self): - return str(self._event.get("args", {}).get("Input type", Constant.NA)) + return str(self._event.args.get("Input type", Constant.NA)) @property def call_stack(self): - return str(self._event.get("args", {}).get("Call stack", Constant.NA)) + return str(self._event.args.get("Call stack", Constant.NA)) @property def parent(self): diff --git a/profiler/compare_tools/utils/trace_event_data.py b/profiler/compare_tools/utils/trace_event_data.py deleted file mode 100644 index ff70b230e740c5808d201577028d8f434da3e9c5..0000000000000000000000000000000000000000 --- a/profiler/compare_tools/utils/trace_event_data.py +++ /dev/null @@ -1,42 +0,0 @@ -class TraceEventData: - - def __init__(self, event: dict): - self._event = event - - @property - def pid(self) -> int: - return self._event.get("pid", "") - - @property - def tid(self) -> int: - return self._event.get("tid", "") - - @property - def process_name(self) -> int: - return self._event.get("args", {}).get("name", "") - - @property - def start_time(self) -> float: - return float(self._event.get("ts", 0)) - - @property - def end_time(self) -> float: - return float(self._event.get("ts", 0)) + self._event.get("dur", 0) - - def is_m_mode(self) -> bool: - return self._event.get("ph", "") == "M" - - def is_x_mode(self) -> bool: - return self._event.get("ph", "") == "X" - - def is_process_meta(self) -> bool: - return self.is_m_mode() and self._event.get("name", "") == "process_name" - - def is_thread_meta(self) -> bool: - return self.is_m_mode() and self._event.get("name", "") == "thread_name" - - def is_communication_op_thread(self) -> bool: - return self._event.get("args", {}).get("name", "").find("Communication") != -1 - - def is_hccl_process(self) -> bool: - return self.process_name == "HCCL" diff --git a/profiler/compare_tools/utils/tree_builder.py b/profiler/compare_tools/utils/tree_builder.py index fc827f0fb853f9f41b135b8826aeadbb02b27564..a2e00070b941154d24b83584edfcd32ee92c9599 100644 --- a/profiler/compare_tools/utils/tree_builder.py +++ b/profiler/compare_tools/utils/tree_builder.py @@ -9,11 +9,11 @@ class TreeBuilder: @classmethod def build_tree(cls, event_list: list) -> TorchOpNode: root_node = TorchOpNode() - event_list.sort(key=lambda x: float(x.get("ts", 0))) + event_list.sort(key=lambda x: x.start_time) last_node = root_node for event in event_list: while last_node: - if last_node == root_node or float(event.get("ts", 0)) < last_node.end_time: + if last_node == root_node or event.start_time < last_node.end_time: tree_node = TorchOpNode(event, last_node) last_node.add_child_node(tree_node) last_node = tree_node diff --git a/profiler/compare_tools/view/base_view.py b/profiler/compare_tools/view/base_view.py new file mode 100644 index 0000000000000000000000000000000000000000..d18980b7de2098b5a1015d14fbd1b5be91a23bfc --- /dev/null +++ b/profiler/compare_tools/view/base_view.py @@ -0,0 +1,10 @@ +from abc import ABC, abstractmethod + + +class BaseView(ABC): + def __init__(self, data_dict: dict): + self._data_dict = data_dict + + @abstractmethod + def generate_view(self): + raise NotImplementedError("Function generate_view need to be implemented.") diff --git a/profiler/compare_tools/view/excel_view.py b/profiler/compare_tools/view/excel_view.py index 457012bbc8efd6f54780e591d0c6446f54a2e359..864a136a3fe55298ed5ae04722899a26af269b8d 100644 --- a/profiler/compare_tools/view/excel_view.py +++ b/profiler/compare_tools/view/excel_view.py @@ -2,19 +2,21 @@ import os from xlsxwriter import Workbook +from view.base_view import BaseView from view.work_sheet_creator import WorkSheetCreator from utils.constant import Constant -class ExcelViewer: +class ExcelView(BaseView): - def __init__(self, data_dict: dict, file_path: str): - self._data_dict = data_dict + def __init__(self, data_dict: dict, file_path: str, args: any): + super().__init__(data_dict) self._file_path = file_path + self._args = args def generate_view(self): workbook = Workbook(self._file_path) for sheet_name, data in self._data_dict.items(): - WorkSheetCreator(workbook, sheet_name, data).create_sheet() + WorkSheetCreator(workbook, sheet_name, data, self._args).create_sheet() workbook.close() os.chmod(self._file_path, Constant.FILE_AUTHORITY) diff --git a/profiler/compare_tools/view/screen_view.py b/profiler/compare_tools/view/screen_view.py new file mode 100644 index 0000000000000000000000000000000000000000..9c256ac3ab975f1d1ef40f48db37bc7b5bf4c4ff --- /dev/null +++ b/profiler/compare_tools/view/screen_view.py @@ -0,0 +1,19 @@ +from prettytable import PrettyTable + +from view.base_view import BaseView + + +class ScreenView(BaseView): + def __init__(self, data_dict: dict): + super().__init__(data_dict) + + def generate_view(self): + for sheet_name, data in self._data_dict.items(): + if not data.get("rows", []): + return + table = PrettyTable() + table.title = sheet_name + table.field_names = data.get("headers", []) + for row in data.get("rows", []): + table.add_row(row) + print(table) diff --git a/profiler/compare_tools/view/work_sheet_creator.py b/profiler/compare_tools/view/work_sheet_creator.py index 909d4445315b4bc6d2d2773a3e5c0ce567edf244..ef7f8deeddaaf165c8297497d8fb716cdd8e75f3 100644 --- a/profiler/compare_tools/view/work_sheet_creator.py +++ b/profiler/compare_tools/view/work_sheet_creator.py @@ -1,67 +1,52 @@ from xlsxwriter import Workbook -from utils.args_manager import ArgsManager -from utils.constant import Constant -from utils.excel_config import ExcelConfig +from utils.excel_config import ExcelConfig, CellFormatType class WorkSheetCreator: - def __init__(self, work_book: Workbook, sheet_name: str, data: list): + def __init__(self, work_book: Workbook, sheet_name: str, data: dict, args: any): self._work_book = work_book self._sheet_name = sheet_name self._data = data + self._args = args self._work_sheet = None self._row_id = 1 + self._field_format = {} + self._diff_ratio_index = None + self._col_ids = "ABCDEFGHIJKLMNOPQRSTUVW" def create_sheet(self): + if not self._data.get("rows", []): + return self._work_sheet = self._work_book.add_worksheet(self._sheet_name) self._write_headers() self._write_data() def _write_headers(self): - header_format = self._work_book.add_format( - {"font_name": "Arial", 'font_size': 11, 'fg_color': Constant.BLUE_COLOR, 'align': 'left', - 'valign': 'vcenter', 'bold': True, 'border': True}) - headers = ExcelConfig.HEADERS.get(self._sheet_name, []) - overhead = ExcelConfig.OVERHEAD.get(self._sheet_name, []) + header_format = self._work_book.add_format(CellFormatType.BLUE_BOLD) + overhead = self._data.get("overhead", []) if overhead: - base_path = f"Base Profiling: {ArgsManager().base_profiling_path}" + base_path = f"Base Profiling: {self._args.base_profiling_path}" self._work_sheet.merge_range(overhead[0], base_path, header_format) - if ArgsManager().base_profiling_path == ArgsManager().comparison_profiling_path: - comparison_path = "Same To Base Profiling" - else: - comparison_path = f"Comparison Profiling: {ArgsManager().comparison_profiling_path}" + comparison_path = f"Comparison Profiling: {self._args.comparison_profiling_path}" self._work_sheet.merge_range(overhead[1], comparison_path, header_format) self._row_id += 2 - for index, header in enumerate(headers): - column_width = ExcelConfig.COLUMNS.get(header, 20) - col_id = ExcelConfig.COL_IDS[index] - self._work_sheet.set_column(f"{col_id}:{col_id}", column_width) - self._work_sheet.write(f"{col_id}{self._row_id}", header, header_format) + for index, header in enumerate(self._data.get("headers")): + col_id = self._col_ids[index] + self._work_sheet.set_column(f"{col_id}:{col_id}", header.get("width")) + self._work_sheet.write(f"{col_id}{self._row_id}", header.get("name"), header_format) + self._field_format[index] = self._work_book.add_format(header.get("type")) + if header.get("name") == ExcelConfig.DIFF_RATIO: + self._diff_ratio_index = index self._row_id += 1 def _write_data(self): - default_format = self._work_book.add_format(ExcelConfig.FORMAT.get("int")) - red_ratio_format = self._work_book.add_format(ExcelConfig.FORMAT.get("ratio_red")) - headers = ExcelConfig.HEADERS.get(self._sheet_name, []) - field_format = {} - diff_ratio_index = None - for index, header in enumerate(headers): - format_dict = ExcelConfig.FORMAT.get(ExcelConfig.FIELD_TYPE_MAP.get(header, "int")) - if not format_dict: - format_dict = ExcelConfig.FORMAT.get("int") - field_format[index] = self._work_book.add_format(format_dict) - if header == ExcelConfig.DIFF_RATIO: - diff_ratio_index = index - order_id = 1 - for data in self._data: - self._work_sheet.write(f"{ExcelConfig.COL_IDS[0]}{self._row_id}", order_id, default_format) + red_ratio_format = self._work_book.add_format(CellFormatType.RED_RATIO) + for data in self._data.get("rows"): for index, cell_data in enumerate(data): - data_index = index + 1 - cell_format = field_format.get(data_index, default_format) - if data_index == diff_ratio_index and cell_data and cell_data > 1: + cell_format = self._field_format.get(index) + if index == self._diff_ratio_index and cell_data and cell_data > 1: cell_format = red_ratio_format cell_data = "INF" if cell_data == float('inf') else cell_data - self._work_sheet.write(f"{ExcelConfig.COL_IDS[data_index]}{self._row_id}", cell_data, cell_format) - order_id += 1 + self._work_sheet.write(f"{self._col_ids[index]}{self._row_id}", cell_data, cell_format) self._row_id += 1 diff --git a/profiler/test/ut/compare_tools/comparator/test_communication_comparator.py b/profiler/test/ut/compare_tools/comparator/test_communication_comparator.py new file mode 100644 index 0000000000000000000000000000000000000000..cd40453e1922173e1a5b43f900331081d17e9f91 --- /dev/null +++ b/profiler/test/ut/compare_tools/comparator/test_communication_comparator.py @@ -0,0 +1,40 @@ +import unittest + +from comparator.communication_comparator import CommunicationComparator +from compare_bean.communication_bean import CommunicationBean + + +class TestCommunicationComparator(unittest.TestCase): + ORIGIN_DATA = { + "base_data": { + "allreduce": {"comm_list": [0.5, 7], "comm_task": {"Notify Wait": [1, 2, 3], "Memcpy": [5]}}, + "allgather": {"comm_list": [1, 4], "comm_task": {}} + }, + "comparison_data": { + "allreduce": {"comm_list": [4, 5], "comm_task": {"Notify Wait": [1, 2, 3]}}, + "gather": {"comm_list": [1], "comm_task": {"Notify Wait": [1, 2, 3]}} + } + } + RESULT_DATA = [[1, 'allreduce', None, 2, 7.5, 3.75, 7, 0.5, 'allreduce', None, 2, 9, 4.5, 5, 4, 1.5, 1.2], + [2, '|', 'Notify Wait', 3, 6, 2.0, 3, 1, '|', 'Notify Wait', 3, 6, 2.0, 3, 1, None, None], + [3, '|', 'Memcpy', 1, 5, 5.0, 5, 5, None, None, None, 0, None, None, None, None, None], + [4, 'allgather', None, 2, 5, 2.5, 4, 1, None, None, None, 0, None, None, None, -5, 0.0], + [5, None, None, None, 0, None, None, None, 'gather', None, 1, 1, 1.0, 1, 1, 1, float('inf')], + [6, None, None, None, 0, None, None, None, '|', 'Notify Wait', 3, 6, 2.0, 3, 1, None, None]] + + def test_compare_when_valid_data(self): + comm_comparator = CommunicationComparator(self.ORIGIN_DATA, CommunicationBean) + comm_comparator._compare() + self.assertEqual(comm_comparator._rows, self.RESULT_DATA) + + def test_compare_when_invalid_data(self): + comm_comparator = CommunicationComparator({}, CommunicationBean) + comm_comparator._compare() + self.assertEqual(comm_comparator._rows, []) + + def test_compare_when_invalid_base_data(self): + data = {"comparison_data": {"allreduce": {"comm_list": [4, 5], "comm_task": {}}}} + result = [[1, None, None, None, 0, None, None, None, 'allreduce', None, 2, 9, 4.5, 5, 4, 9, float('inf')]] + comm_comparator = CommunicationComparator(data, CommunicationBean) + comm_comparator._compare() + self.assertEqual(comm_comparator._rows, result) diff --git a/profiler/test/ut/compare_tools/comparator/test_operator_comparator.py b/profiler/test/ut/compare_tools/comparator/test_operator_comparator.py new file mode 100644 index 0000000000000000000000000000000000000000..c26a308ae1119f30368ad9394c7580a9df1702b0 --- /dev/null +++ b/profiler/test/ut/compare_tools/comparator/test_operator_comparator.py @@ -0,0 +1,32 @@ +import unittest + +from comparator.operator_comparator import OperatorComparator + + +class MockBean: + TABLE_NAME = "TEST" + HEADERS = ["INDEX", "VALUE1", "VALUE2"] + OVERHEAD = [] + + def __init__(self, index, base_op, comparison_op): + self._index = index + self._base_op = base_op + self._comparison_op = comparison_op + + @property + def row(self): + return [self._index + 1, 1, 1] + + +class TestOperatorComparator(unittest.TestCase): + def test_compare_when_valid_data(self): + data = [[1, 1]] * 3 + result = [[1, 1, 1], [2, 1, 1], [3, 1, 1]] + comparator = OperatorComparator(data, MockBean) + comparator._compare() + self.assertEqual(comparator._rows, result) + + def test_compare_when_invalid_data(self): + comparator = OperatorComparator({}, MockBean) + comparator._compare() + self.assertEqual(comparator._rows, []) diff --git a/profiler/test/ut/compare_tools/comparator/test_operator_statistic_comparator.py b/profiler/test/ut/compare_tools/comparator/test_operator_statistic_comparator.py new file mode 100644 index 0000000000000000000000000000000000000000..75f2da77f6c851bb6eb27d58f0844b45fe094157 --- /dev/null +++ b/profiler/test/ut/compare_tools/comparator/test_operator_statistic_comparator.py @@ -0,0 +1,53 @@ +import unittest +from unittest.mock import patch + +from comparator.operator_statistic_comparator import OperatorStatisticComparator + + +class MockBean: + TABLE_NAME = "TEST" + HEADERS = ["INDEX", "VALUE1", "VALUE2"] + OVERHEAD = [] + + def __init__(self, name, base_data, comparison_data): + self._name = name + self._base_data = 0 if not base_data else 1 + self._comparison_data = 0 if not comparison_data else 1 + + @property + def row(self): + return [self._name, self._base_data, self._comparison_data] + + +class TestOperatorStatisticComparator(unittest.TestCase): + def test_compare_when_valid_data(self): + base_dict = {"add": [1], "matmul": [1]} + comparison_dict = {"add": [1], "reduce": [1]} + with patch("comparator.operator_statistic_comparator.OperatorStatisticComparator._group_by_op_name", + return_value=(base_dict, comparison_dict)): + comparator = OperatorStatisticComparator({1: 2}, MockBean) + comparator._compare() + self.assertEqual(comparator._rows, [[1, 1, 1], [2, 1, 0], [3, 0, 1]]) + + def test_compare_when_invalid_data(self): + comparator = OperatorStatisticComparator({}, MockBean) + comparator._compare() + self.assertEqual(comparator._rows, []) + + def test_group_by_op_name_when_valid_data(self): + class Node: + def __init__(self, name): + self.name = name + + data = [[Node("add"), Node("add")], [None, Node("reduce")], [Node("matmul"), None], + [Node("matmul"), Node("matmul")], [Node("reduce"), Node("reduce")]] + comparator = OperatorStatisticComparator(data, MockBean) + base_dict, comparison_dict = comparator._group_by_op_name() + self.assertEqual(len(base_dict.get("matmul")), 2) + self.assertEqual(len(comparison_dict.get("reduce")), 2) + + def test_group_by_op_name_when_invalid_data(self): + comparator = OperatorStatisticComparator([], MockBean) + base_dict, comparison_dict = comparator._group_by_op_name() + self.assertEqual(base_dict, {}) + self.assertEqual(comparison_dict, {}) diff --git a/profiler/test/ut/compare_tools/compare_bean/test_communication_bean.py b/profiler/test/ut/compare_tools/compare_bean/test_communication_bean.py new file mode 100644 index 0000000000000000000000000000000000000000..02605d85b18b8d46cad3323d6dfc39d3e2a1581f --- /dev/null +++ b/profiler/test/ut/compare_tools/compare_bean/test_communication_bean.py @@ -0,0 +1,15 @@ +import unittest + +from compare_bean.communication_bean import CommunicationBean + + +class TestCommunicationBean(unittest.TestCase): + def test_rows_when_valid_data(self): + base_data = {"comm_list": [0.5, 7], "comm_task": {"Notify Wait": [1, 2, 3]}} + comparison_data = {"comm_list": [1, 3, 5], "comm_task": {"Notify Wait": [1, 2, 3], "Memcpy": [5]}} + result = [[None, 'allreduce', None, 2, 7.5, 3.75, 7, 0.5, 'allreduce', None, 3, 9, 3.0, 5, 1, 1.5, 1.2], + [None, '|', 'Notify Wait', 3, 6, 2.0, 3, 1, '|', 'Notify Wait', 3, 6, 2.0, 3, 1, None, None], + [None, None, None, None, 0, None, None, None, '|', 'Memcpy', 1, 5, 5.0, 5, 5, None, None]] + + comm = CommunicationBean("allreduce", base_data, comparison_data) + self.assertEqual(comm.rows, result) diff --git a/profiler/test/ut/compare_tools/compare_bean/test_memory_compare_bean.py b/profiler/test/ut/compare_tools/compare_bean/test_memory_compare_bean.py new file mode 100644 index 0000000000000000000000000000000000000000..1db3dbee3d6eae3c576bba7d0649fa2ff4b7b16b --- /dev/null +++ b/profiler/test/ut/compare_tools/compare_bean/test_memory_compare_bean.py @@ -0,0 +1,39 @@ +import unittest +from unittest.mock import patch + +from compare_bean.memory_compare_bean import MemoryCompareBean + + +class MockNode: + def __init__(self, name): + self.name = name + self.input_shape = None + self.input_type = None + + +class MockMemory: + def __init__(self, size): + self.size = size + self.memory_details = "add" + + +class TestMemoryCompareBean(unittest.TestCase): + name = 'aten::add' + + def test_row_when_valid_data(self): + result = [2, self.name, None, None, 'add', 8, self.name, None, None, 'add', 8, 0, 1.0] + with patch("utils.tree_builder.TreeBuilder.get_total_memory", return_value=[MockMemory(8)]): + mem = MemoryCompareBean(1, MockNode(self.name), MockNode(self.name)) + self.assertEqual(mem.row, result) + + def test_row_when_invalid_base_data(self): + result = [2, None, None, None, "", 0, self.name, None, None, 'add', 8, 8, float("inf")] + with patch("utils.tree_builder.TreeBuilder.get_total_memory", return_value=[MockMemory(8)]): + mem = MemoryCompareBean(1, None, MockNode(self.name)) + self.assertEqual(mem.row, result) + + def test_row_when_invalid_comparison_data(self): + result = [2, self.name, None, None, 'add', 8, None, None, None, '', 0, -8, 0] + with patch("utils.tree_builder.TreeBuilder.get_total_memory", return_value=[MockMemory(8)]): + mem = MemoryCompareBean(1, MockNode(self.name), None) + self.assertEqual(mem.row, result) diff --git a/profiler/test/ut/compare_tools/compare_bean/test_memory_statistic_bean.py b/profiler/test/ut/compare_tools/compare_bean/test_memory_statistic_bean.py new file mode 100644 index 0000000000000000000000000000000000000000..5e5daee1bf2eede69957f9188a1c48c0f8bb537b --- /dev/null +++ b/profiler/test/ut/compare_tools/compare_bean/test_memory_statistic_bean.py @@ -0,0 +1,35 @@ +import unittest +from unittest.mock import patch + +from compare_bean.memory_statistic_bean import MemoryStatisticBean + + +class MockMemory: + def __init__(self, size, duration): + self.size = size + self.duration = duration + + +class TestMemoryStatisticBean(unittest.TestCase): + name = "matmul" + + def test_row_when_valid_data(self): + result = [None, self.name, 8.0, 40.0, 2, 4.0, 20.0, 1, -20.0, 0.5] + with patch("utils.tree_builder.TreeBuilder.get_total_memory", + return_value=[MockMemory(10240, 2000), MockMemory(10240, 2000)]): + bean = MemoryStatisticBean(self.name, [1, 1], [1]) + self.assertEqual(bean.row, result) + + def test_row_when_invalid_base_data(self): + result = [None, self.name, 0, 0, 0, 4.0, 20.0, 1, 20.0, float("inf")] + with patch("utils.tree_builder.TreeBuilder.get_total_memory", + return_value=[MockMemory(10240, 2000), MockMemory(10240, 2000)]): + bean = MemoryStatisticBean(self.name, [], [1]) + self.assertEqual(bean.row, result) + + def test_row_when_invalid_comparison_data(self): + result = [None, self.name, 8.0, 40.0, 2, 0, 0, 0, -40.0, 0] + with patch("utils.tree_builder.TreeBuilder.get_total_memory", + return_value=[MockMemory(10240, 2000), MockMemory(10240, 2000)]): + bean = MemoryStatisticBean(self.name, [1, 1], []) + self.assertEqual(bean.row, result) diff --git a/profiler/test/ut/compare_tools/compare_bean/test_operator_compare_bean.py b/profiler/test/ut/compare_tools/compare_bean/test_operator_compare_bean.py new file mode 100644 index 0000000000000000000000000000000000000000..b87c22cbff4c079c5821dae098a900656ef76c82 --- /dev/null +++ b/profiler/test/ut/compare_tools/compare_bean/test_operator_compare_bean.py @@ -0,0 +1,39 @@ +import unittest +from unittest.mock import patch + +from compare_bean.operator_compare_bean import OperatorCompareBean + + +class MockNode: + def __init__(self, name): + self.name = name + self.input_shape = None + self.input_type = None + + +class MockKernel: + def __init__(self, device_dur): + self.device_dur = device_dur + self.kernel_details = "add" + + +class TestOperatorCompareBean(unittest.TestCase): + name = 'aten::add' + + def test_row_when_valid_data(self): + result = [2, self.name, None, None, 'add', 8, self.name, None, None, 'add', 8, 0, 1.0] + with patch("utils.tree_builder.TreeBuilder.get_total_kernels", return_value=[MockKernel(8)]): + op = OperatorCompareBean(1, MockNode(self.name), MockNode(self.name)) + self.assertEqual(op.row, result) + + def test_row_when_invalid_base_data(self): + result = [2, None, None, None, "", 0, self.name, None, None, 'add', 8, 8, float("inf")] + with patch("utils.tree_builder.TreeBuilder.get_total_kernels", return_value=[MockKernel(8)]): + op = OperatorCompareBean(1, None, MockNode(self.name)) + self.assertEqual(op.row, result) + + def test_row_when_invalid_comparison_data(self): + result = [2, self.name, None, None, 'add', 8, None, None, None, '', 0, -8, 0] + with patch("utils.tree_builder.TreeBuilder.get_total_kernels", return_value=[MockKernel(8)]): + op = OperatorCompareBean(1, MockNode(self.name), None) + self.assertEqual(op.row, result) diff --git a/profiler/test/ut/compare_tools/compare_bean/test_operator_statistic_bean.py b/profiler/test/ut/compare_tools/compare_bean/test_operator_statistic_bean.py new file mode 100644 index 0000000000000000000000000000000000000000..0f71b6399ff613df2e862dc60757911bcc14ef7e --- /dev/null +++ b/profiler/test/ut/compare_tools/compare_bean/test_operator_statistic_bean.py @@ -0,0 +1,34 @@ +import unittest +from unittest.mock import patch + +from compare_bean.operator_statistic_bean import OperatorStatisticBean + + +class MockKernel: + def __init__(self, device_dur): + self.device_dur = device_dur + + +class TestOperatorStatisticBean(unittest.TestCase): + name = "matmul" + + def test_row_when_valid_data(self): + result = [None, self.name, 8.0, 2, 4.0, 1, -4.0, 0.5] + with patch("utils.tree_builder.TreeBuilder.get_total_kernels", + return_value=[MockKernel(2000), MockKernel(2000)]): + bean = OperatorStatisticBean(self.name, [1, 1], [1]) + self.assertEqual(bean.row, result) + + def test_row_when_invalid_base_data(self): + result = [None, self.name, 0, 0, 4.0, 1, 4.0, float("inf")] + with patch("utils.tree_builder.TreeBuilder.get_total_kernels", + return_value=[MockKernel(2000), MockKernel(2000)]): + bean = OperatorStatisticBean(self.name, [], [1]) + self.assertEqual(bean.row, result) + + def test_row_when_invalid_comparison_data(self): + result = [None, self.name, 8.0, 2, 0, 0, -8.0, 0] + with patch("utils.tree_builder.TreeBuilder.get_total_kernels", + return_value=[MockKernel(2000), MockKernel(2000)]): + bean = OperatorStatisticBean(self.name, [1, 1], []) + self.assertEqual(bean.row, result) diff --git a/profiler/test/ut/compare_tools/compare_bean/test_profiling_info.py b/profiler/test/ut/compare_tools/compare_bean/test_profiling_info.py new file mode 100644 index 0000000000000000000000000000000000000000..49978cc9625bc7d2629e9e8647458107ce66dbe2 --- /dev/null +++ b/profiler/test/ut/compare_tools/compare_bean/test_profiling_info.py @@ -0,0 +1,116 @@ +import unittest + +from compare_bean.profiling_info import ProfilingInfo + + +class TestProfilingInfo(unittest.TestCase): + def test_calculate_other_time(self): + info = ProfilingInfo("NPU") + info.compute_time = 10 + info.cube_time = 1 + info.fa_time_fwd = 2 + info.fa_time_bwd = 2 + info.vec_time = 3 + info.calculate_other_time() + self.assertEqual(info.other_time, 2) + info.vec_time = 7 + info.calculate_other_time() + self.assertEqual(info.other_time, 0) + + def test_calculate_vec_time(self): + info = ProfilingInfo("NPU") + info.compute_time = 10 + info.cube_time = 1 + info.fa_time_fwd = 2 + info.fa_time_bwd = 2 + info.calculate_vec_time() + self.assertEqual(info.vec_time, 5) + + def test_calculate_schedule_time(self): + info = ProfilingInfo("NPU") + info.e2e_time = 10 + info.compute_time = 5 + info.communication_not_overlapped = 3 + info.calculate_schedule_time() + self.assertEqual(info.scheduling_time, 2) + + def test_update_fa_fwd_info(self): + info = ProfilingInfo("NPU") + info.update_fa_fwd_info(5) + info.update_fa_fwd_info(5) + self.assertEqual(info.fa_time_fwd, 10) + self.assertEqual(info.fa_num_fwd, 2) + + def test_update_fa_bwd_info(self): + info = ProfilingInfo("NPU") + info.update_fa_bwd_info(5) + info.update_fa_bwd_info(5) + self.assertEqual(info.fa_time_bwd, 10) + self.assertEqual(info.fa_num_bwd, 2) + + def test_update_sdma_info(self): + info = ProfilingInfo("NPU") + info.update_sdma_info(5) + self.assertEqual(info.sdma_time, 5) + self.assertEqual(info.sdma_num, 1) + info.update_sdma_info(5, 5) + self.assertEqual(info.sdma_time, 10) + self.assertEqual(info.sdma_num, 6) + + def test_update_cube_info(self): + info = ProfilingInfo("NPU") + info.update_cube_info(5) + info.update_cube_info(5) + self.assertEqual(info.cube_time, 10) + self.assertEqual(info.cube_num, 2) + + def test_update_vec_info(self): + info = ProfilingInfo("NPU") + info.update_vec_info(5) + info.update_vec_info(5) + self.assertEqual(info.vec_time, 10) + self.assertEqual(info.vec_num, 2) + + def test_set_compute_time(self): + info = ProfilingInfo("NPU") + info.update_compute_time(1) + info.set_compute_time(5) + self.assertEqual(info.compute_time, 5) + + def test_update_compute_time(self): + info = ProfilingInfo("NPU") + info.update_compute_time(5) + info.update_compute_time(5) + self.assertEqual(info.compute_time, 10) + + def test_set_e2e_time(self): + info = ProfilingInfo("NPU") + info.set_e2e_time(5) + self.assertEqual(info.e2e_time, 5) + + def test_set_comm_not_overlap(self): + info = ProfilingInfo("NPU") + info.update_comm_not_overlap(10) + info.set_comm_not_overlap(5) + self.assertEqual(info.communication_not_overlapped, 5) + + def test_update_comm_not_overlap(self): + info = ProfilingInfo("NPU") + info.update_comm_not_overlap(5) + info.update_comm_not_overlap(5) + self.assertEqual(info.communication_not_overlapped, 10) + + def test_set_memory_used(self): + info = ProfilingInfo("NPU") + info.set_memory_used(10) + self.assertEqual(info.memory_used, 10) + + def test_is_not_minimal_profiling(self): + info = ProfilingInfo("GPU") + info.minimal_profiling = False + self.assertFalse(info.is_not_minimal_profiling()) + info = ProfilingInfo("NPU") + info.minimal_profiling = True + self.assertFalse(info.is_not_minimal_profiling()) + info.minimal_profiling = False + self.assertTrue(info.is_not_minimal_profiling()) diff --git a/profiler/test/ut/compare_tools/view/__init__.py b/profiler/test/ut/compare_tools/view/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/profiler/test/ut/compare_tools/view/test_excel_view.py b/profiler/test/ut/compare_tools/view/test_excel_view.py new file mode 100644 index 0000000000000000000000000000000000000000..dfea18e3c412f3a345231c401fcfbee6d3289481 --- /dev/null +++ b/profiler/test/ut/compare_tools/view/test_excel_view.py @@ -0,0 +1,18 @@ +import os +import unittest +from unittest.mock import patch + +from view.excel_view import ExcelView + + +class TestExcelView(unittest.TestCase): + file_path = "./test.xlsx" + + def tearDown(self) -> None: + if not os.path.exists(self.file_path): + raise RuntimeError("ut failed.") + os.remove(self.file_path) + + def test_generate_view(self): + with patch("view.work_sheet_creator.WorkSheetCreator.create_sheet"): + ExcelView({"table1": {}, "table2": {}}, self.file_path, {}).generate_view() diff --git a/profiler/test/ut/compare_tools/view/test_screen_view.py b/profiler/test/ut/compare_tools/view/test_screen_view.py new file mode 100644 index 0000000000000000000000000000000000000000..6828b20f0100d0e8363309588550aedc791bae83 --- /dev/null +++ b/profiler/test/ut/compare_tools/view/test_screen_view.py @@ -0,0 +1,9 @@ +import unittest + +from view.screen_view import ScreenView + + +class TestScreenView(unittest.TestCase): + def test_generate_view(self): + data = {"table": {"headers": ["index", "value"], "rows": [[1, 1], [2, 2]]}} + ScreenView(data).generate_view() diff --git a/profiler/test/ut/compare_tools/view/test_worker_sheet_creator.py b/profiler/test/ut/compare_tools/view/test_worker_sheet_creator.py new file mode 100644 index 0000000000000000000000000000000000000000..ba59e8d1c0279f98845c244d5b194b0bfc38a6d0 --- /dev/null +++ b/profiler/test/ut/compare_tools/view/test_worker_sheet_creator.py @@ -0,0 +1,45 @@ +import os +import unittest + +import pandas as pd +from xlsxwriter import Workbook + +from utils.excel_config import ExcelConfig +from view.work_sheet_creator import WorkSheetCreator + + +class TestWorkerSheetCreator(unittest.TestCase): + file_path = "./test.xlsx" + table_name = "OperatorCompareStatistic" + + def tearDown(self) -> None: + if not os.path.exists(self.file_path): + raise RuntimeError("ut failed.") + os.remove(self.file_path) + + def test_create_sheet_when_valid_data(self): + class Args: + def __init__(self, base, comparison): + self.base_profiling_path = base + self.comparison_profiling_path = comparison + + data = {"headers": ExcelConfig.HEADERS.get(self.table_name), + "overhead": ExcelConfig.OVERHEAD.get(self.table_name), + "rows": [[1, 2, 3, 4, 5, 6, 7, 8], [1, 2, 3, 4, 5, 6, 7, float("inf")], [1, 2, 3, 4, 5, 6, 7, 0.45], + [1, 2, 3, 4, 5, 6, 7, 0]]} + creator = WorkSheetCreator(Workbook(self.file_path), self.table_name, data, Args("base", "comparison")) + creator.create_sheet() + creator._work_book.close() + data = pd.read_excel(self.file_path) + self.assertEqual(data.shape[0], 6) + self.assertEqual(data.shape[1], 8) + + def test_create_sheet_when_invalid_data(self): + data = {"headers": ExcelConfig.HEADERS.get(self.table_name), + "overhead": ExcelConfig.OVERHEAD.get(self.table_name), + "rows": []} + creator = WorkSheetCreator(Workbook(self.file_path), self.table_name, data, {}) + creator.create_sheet() + creator._work_book.close() + data = pd.read_excel(self.file_path) + self.assertEqual(data.shape[0], 0)