From aabc4de5c868c0192206445e87ea57ae201080ba Mon Sep 17 00:00:00 2001 From: y00836044 Date: Tue, 24 Oct 2023 19:28:27 +0800 Subject: [PATCH] =?UTF-8?q?ATT=E6=80=A7=E8=83=BD=E5=AE=89=E5=85=A8?= =?UTF-8?q?=E9=97=AE=E9=A2=98=E6=8E=92=E6=9F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../analysis/communication_analysis.py | 2 +- .../pytorch_data_preprocessor.py | 5 +- .../cluster_prof_Info_analysis.py | 45 +++-- .../cluster_analyse/common_func/constant.py | 7 + .../common_func/file_manager.py | 42 +++- .../communication_group_generator.py | 22 +- .../communication_comparison_generator.py | 180 +++++++++-------- .../generation/op_comparison_generator.py | 191 ++++++++++-------- profiler/compare_tools/performance_compare.py | 5 +- .../profiling_analysis/gpu_parser.py | 59 +++++- .../profiling_analysis/npu_parser.py | 40 +++- profiler/compare_tools/utils/constant.py | 3 + profiler/compare_tools/utils/file_reader.py | 4 +- .../compare_tools/utils/profiling_parser.py | 184 ++++++++--------- profiler/compare_tools/utils/tree_builder.py | 20 +- 15 files changed, 482 insertions(+), 327 deletions(-) diff --git a/profiler/cluster_analyse/analysis/communication_analysis.py b/profiler/cluster_analyse/analysis/communication_analysis.py index 1d036872986..a7b27913443 100644 --- a/profiler/cluster_analyse/analysis/communication_analysis.py +++ b/profiler/cluster_analyse/analysis/communication_analysis.py @@ -31,7 +31,7 @@ class BaseCommAnalysis: @staticmethod def compute_ratio(dividend: float, divisor: float): - if abs(divisor) < 1e-15: + if abs(divisor) < Constant.EPS: return 0 else: return round(dividend / divisor, 4) diff --git a/profiler/cluster_analyse/cluster_data_preprocess/pytorch_data_preprocessor.py b/profiler/cluster_analyse/cluster_data_preprocess/pytorch_data_preprocessor.py index 8df922ea19b..5d3596093f7 100644 --- a/profiler/cluster_analyse/cluster_data_preprocess/pytorch_data_preprocessor.py +++ b/profiler/cluster_analyse/cluster_data_preprocess/pytorch_data_preprocessor.py @@ -14,8 +14,9 @@ # limitations under the License. from collections import defaultdict -from common_func.file_manager import FileManager import os +from common_func.file_manager import FileManager +from common_func.path_manager import PathManager class PytorchDataPreprocessor: @@ -23,7 +24,7 @@ class PytorchDataPreprocessor: PROFILER_INFO_EXTENSION = '.json' def __init__(self, path: str): - self.path = os.path.realpath(path) + self.path = PathManager.get_realpath(path) def get_data_map(self) -> dict: ascend_pt_dirs = [] diff --git a/profiler/cluster_analyse/cluster_kernels_analysis/cluster_prof_Info_analysis.py b/profiler/cluster_analyse/cluster_kernels_analysis/cluster_prof_Info_analysis.py index 3b2b05676d8..c4c8bb8f303 100644 --- a/profiler/cluster_analyse/cluster_kernels_analysis/cluster_prof_Info_analysis.py +++ b/profiler/cluster_analyse/cluster_kernels_analysis/cluster_prof_Info_analysis.py @@ -25,15 +25,21 @@ import os import warnings +from common_func.path_manager import PathManager +from common_func.file_manager import FileManager + class FormDataProcessor: def __init__(self, path, form_name): self.form_name = form_name + path = PathManager().get_realpath(path) self.files = self.get_files_with_prefix_recursive(path, form_name) def get_files_with_prefix_recursive(self, csv_path, match_str): matched_ir_files = list(Path(csv_path).rglob(match_str)) - assert len(matched_ir_files) > 0, f"Didn't find any file in folder {csv_path} that matches {match_str}" + if not matched_ir_files: + msg = f"Didn't find any file in folder {csv_path} that matches {match_str}" + raise RuntimeError(msg) return [str(item) for item in matched_ir_files] def readSummaryData(self, columns_to_keep): @@ -43,6 +49,7 @@ class FormDataProcessor: if "mindstudio_profiler_output" in f: continue # 读取CSV文件 + PathManager.check_path_readable(f) df = pd.read_csv(f) # 保留需要的列 try: @@ -62,6 +69,8 @@ class FormDataProcessor: def getChipType(self): file = self.files[0] + PathManager.check_path_readable(file) + FileManager.check_file_size(file) df = pd.read_csv(file) if 'aiv_time(us)' in df.columns: return "ASCEND_910B" @@ -129,8 +138,10 @@ class ViewInfoManager: class OpSummaryAnalyzerBase: def __init__(self, chip_type, analyzer_type, dir_path): self.chip_type = chip_type - self.result_dir = f"{dir_path}/result" - os.makedirs(self.result_dir, exist_ok=True) # 文件路径不存在则创建 + result_dir = os.path.join(dir_path, "result") + self.result_dir = PathManager.get_realpath(result_dir) + PathManager.check_path_writeable(dir_path) + PathManager.make_dir_safety(self.result_dir) view_info = ViewInfoManager(chip_type).getColumnsInfo(analyzer_type) self.columns_to_view = view_info['columns_to_view'] self.calculate_fun = view_info['calculate_fun'] @@ -152,6 +163,7 @@ class OpSummaryAnalyzerBase: view_data = summary_data.groupby(self.attrs_to_group).agg(calculate_dict).reset_index() return view_data + class TimeToCsvAnalyzer(OpSummaryAnalyzerBase): def __init__(self, chip_type, dir_path): super().__init__(chip_type, "TimeToCsvAnalyzer", dir_path) @@ -162,7 +174,9 @@ class TimeToCsvAnalyzer(OpSummaryAnalyzerBase): view_data.columns = [''.join(col) if col[1] == "" else '_'.join(col) for col in view_data.columns] for column in self.columns_to_view: view_data[column + '_range'] = view_data[column + '_max'] - view_data[column + '_min'] - view_data.to_csv(self.result_dir + "/cluster_duration_time_analysis.csv", index=False) + csv_save_path = os.path.join(self.result_dir, "cluster_duration_time_analysis.csv") + PathManager.create_file_safety(csv_save_path) + view_data.to_csv(csv_save_path, index=False) return view_data @@ -210,7 +224,9 @@ class StatisticalInfoToHtmlAnalyzer(OpSummaryAnalyzerBase): height=int(500 * row_num), width=int(rank_num * 100 * col_num), title_text="Op Performance Comparison") - plot(fig, filename=self.result_dir + "/" + column + "_Info.html") + save_path = os.path.join(self.result_dir, column + "_Info.html") + PathManager.create_file_safety(save_path) + plot(fig, filename=save_path) def getCalNum(self, rank_num): # 计算每行应该画多少个子图 @@ -219,6 +235,7 @@ class StatisticalInfoToHtmlAnalyzer(OpSummaryAnalyzerBase): else: return 1 + class DeliverableGenerator: def __init__(self, args): self.args = args @@ -255,15 +272,15 @@ class DeliverableGenerator: def main(): - # 解析命令行参数 - parser = argparse.ArgumentParser() - parser.add_argument("--dir", "-d", default=None, help="root dir of PROF_* data") - parser.add_argument("--top_n", "-n", default=10, help="how many operators to show", type=int) - parser.add_argument("--type", "-t", default='html', help="compare ratio or aicore-time", type=str) - args = parser.parse_args() - - deviverable_gen = DeliverableGenerator(args) - deviverable_gen.run() + # 解析命令行参数 + parser = argparse.ArgumentParser() + parser.add_argument("--dir", "-d", default=None, help="root dir of PROF_* data") + parser.add_argument("--top_n", "-n", default=10, help="how many operators to show", type=int) + parser.add_argument("--type", "-t", default='html', help="compare ratio or aicore-time", type=str) + args = parser.parse_args() + + deviverable_gen = DeliverableGenerator(args) + deviverable_gen.run() if __name__ == "__main__": main() diff --git a/profiler/cluster_analyse/common_func/constant.py b/profiler/cluster_analyse/common_func/constant.py index 8910099c7f8..091007cf56b 100644 --- a/profiler/cluster_analyse/common_func/constant.py +++ b/profiler/cluster_analyse/common_func/constant.py @@ -66,3 +66,10 @@ class Constant(object): # step time RANK = 'rank' STAGE = 'stage' + + # epsilon + EPS = 1e-15 + + # file suffix + JSON_SUFFIX = ".json" + CSV_SUFFIX = ".csv" diff --git a/profiler/cluster_analyse/common_func/file_manager.py b/profiler/cluster_analyse/common_func/file_manager.py index 063332acc09..1a905c78ba9 100644 --- a/profiler/cluster_analyse/common_func/file_manager.py +++ b/profiler/cluster_analyse/common_func/file_manager.py @@ -22,6 +22,8 @@ from common_func.path_manager import PathManager class FileManager: + DATA_FILE_AUTHORITY = 0o640 + DATA_DIR_AUTHORITY = 0o750 @classmethod def read_csv_file(cls, file_path: str, class_bean: any) -> list: @@ -31,7 +33,7 @@ class FileManager: return [] if file_size > Constant.MAX_CSV_SIZE: check_msg = input( - f"The file({file_path}) size exceeds the preset max value, do you continue reading the file? [y/n]") + f"The file({file_path}) size exceeds the preset max value. Continue reading the file? [y/n]") if check_msg.lower() != "y": print(f"[WARNING] The user choose not to read the file: {file_path}") return [] @@ -53,7 +55,7 @@ class FileManager: return {} if file_size > Constant.MAX_JSON_SIZE: check_msg = input( - f"The file({file_path}) size exceeds the preset max value, do you continue reading the file? [y/n]") + f"The file({file_path}) size exceeds the preset max value. Continue reading the file? [y/n]") if check_msg.lower() != "y": print(f"[WARNING] The user choose not to read the file: {file_path}") return [] @@ -68,12 +70,15 @@ class FileManager: def create_csv_file(cls, profiler_path: str, data: list, file_name: str, headers: list = None) -> None: if not data: return - output_path = os.path.join(profiler_path, Constant.CLUSTER_ANALYSIS_OUTPUT) + output_path = os.path.join( + profiler_path, Constant.CLUSTER_ANALYSIS_OUTPUT) output_file = os.path.join(output_path, file_name) - PathManager.create_file_safety(output_file) - PathManager.check_path_writeable(output_file) + PathManager.check_path_writeable(output_path) try: - with open(output_file, "w", newline="") as file: + with os.fdopen( + os.open(output_file, os.O_WRONLY | os.O_CREAT, cls.DATA_FILE_AUTHORITY), + 'w', newline="" + ) as file: writer = csv.writer(file) if headers: writer.writerow(headers) @@ -87,16 +92,33 @@ class FileManager: return output_path = os.path.join(profiler_path, Constant.CLUSTER_ANALYSIS_OUTPUT) output_file = os.path.join(output_path, file_name) - PathManager.create_file_safety(output_file) - PathManager.check_path_writeable(output_file) + PathManager.check_path_writeable(output_path) try: - with open(output_file, "w") as file: + with os.fdopen( + os.open(output_file, os.O_WRONLY | os.O_CREAT, cls.DATA_FILE_AUTHORITY), 'w' + ) as file: json.dump(data, file) except Exception: raise RuntimeError(f"Can't create the file: {output_file}") @classmethod def create_output_dir(cls, collection_path: str) -> None: - output_path = os.path.join(collection_path, Constant.CLUSTER_ANALYSIS_OUTPUT) + output_path = os.path.join( + collection_path, Constant.CLUSTER_ANALYSIS_OUTPUT) PathManager.remove_path_safety(output_path) PathManager.make_dir_safety(output_path) + + @classmethod + def check_file_size(cls, file_path): + suffix = os.path.splitext(file_path) + if suffix == Constant.CSV_SUFFIX: + limit_size = Constant.MAX_JSON_SIZE + else: + limit_size = Constant.MAX_CSV_SIZE + file_size = os.path.getsize(file_path) + if file_size > limit_size: + check_msg = input( + f"The file({file_path}) size exceeds the preset max value, do you want to continue? [y/n]") + if check_msg.lower() != "y": + msg = f"[ERROR] The user choose to stop." + raise RuntimeError(msg) diff --git a/profiler/cluster_analyse/communication_group/communication_group_generator.py b/profiler/cluster_analyse/communication_group/communication_group_generator.py index a367e624fb9..31576eed07c 100644 --- a/profiler/cluster_analyse/communication_group/communication_group_generator.py +++ b/profiler/cluster_analyse/communication_group/communication_group_generator.py @@ -119,14 +119,20 @@ class CommunicationGroupGenerator: return p2p_ops = ops.get(Constant.P2P, {}) for op_name, link_dict in p2p_ops.items(): - for link in link_dict: - src_rank = int(link.split('-')[0]) - dst_rank = int(link.split('-')[1]) - if src_rank != dst_rank: - rank_set = set([src_rank, dst_rank]) - if rank_set in self.p2p_link: - continue - self.p2p_link.append(rank_set) + self.append_p2p_link(op_name, link_dict) + + def append_p2p_link(self, op_name, link_dict): + for link in link_dict: + if '-' not in link: + print(f"{op_name} has an invalid link key {link}!") + break + src_rank = int(link.split('-')[0]) + dst_rank = int(link.split('-')[1]) + if src_rank != dst_rank: + rank_set = set([src_rank, dst_rank]) + if rank_set in self.p2p_link: + continue + self.p2p_link.append(rank_set) def get_collective_ops_name(self, rank_id: int, comm_op_dict: dict): for comm_op in comm_op_dict: diff --git a/profiler/compare_tools/generation/communication_comparison_generator.py b/profiler/compare_tools/generation/communication_comparison_generator.py index 7838c45aec5..1607852276d 100644 --- a/profiler/compare_tools/generation/communication_comparison_generator.py +++ b/profiler/compare_tools/generation/communication_comparison_generator.py @@ -14,18 +14,22 @@ class CommunicationComparisonGenerator: self._args = args self._args_manager = ArgsManager() self._compare_result_data = compare_result_data + self._row_index = 3 def create_sheet(self, workbook: Workbook): ws = workbook.create_sheet("CommunicationCompare", 0) ws.sheet_properties.tabColor = Constant.YELLOW_COLOR - # write headers base_headers = Constant.CMP_COMMUNICATION_HEADER comparison_headers = Constant.CMP_COMMUNICATION_HEADER headers = base_headers + comparison_headers + [Constant.DIFF] + self.set_header(ws, base_headers, comparison_headers, headers) + self.write_lines(ws, base_headers, comparison_headers, headers) + + def set_header(self, ws, base_headers, comparison_headers, headers): + # write headers base_trace_start_column = 0 comparison_trace_start_column = len(base_headers) diff_start_column = len(base_headers) + len(comparison_headers) - for col_index in range(len(headers)): ws.cell(row=1, column=col_index + 1).border = Constant.BORDER ws.cell(row=1, column=col_index + 1).font = Font(name='Arial') @@ -50,92 +54,98 @@ class CommunicationComparisonGenerator: ws.merge_cells(start_row=1, start_column=headers.index(Constant.DIFF) + 1, end_row=2, end_column=headers.index(Constant.DIFF) + 1) + def write_lines(self, ws, base_headers, comparison_headers, headers): # write lines - row_index = 3 + self._row_index = 3 for _, row in self._compare_result_data.iterrows(): - # write summary lines - base_name = Constant.NA if math.isnan(row[Constant.BASE_CALLS]) else row[Constant.OP_KEY] - comparison_name = Constant.NA if math.isnan(row[Constant.COMPARISON_CALLS]) else row[Constant.OP_KEY] - if math.isnan(row[Constant.BASE_SUM]) or math.isnan(row[Constant.COMPARISON_SUM]) or row[ - Constant.BASE_SUM] == 0: - diff = Constant.NA + self.write_summary_lines(ws, row, headers) + self._row_index += 1 + self.write_detail_lines(ws, row, base_headers, comparison_headers, headers) + + def write_summary_lines(self, ws, row, headers): + # write summary lines + base_name = Constant.NA if math.isnan(row[Constant.BASE_CALLS]) else row[Constant.OP_KEY] + comparison_name = Constant.NA if math.isnan(row[Constant.COMPARISON_CALLS]) else row[Constant.OP_KEY] + if math.isnan(row[Constant.BASE_SUM]) or math.isnan(row[Constant.COMPARISON_SUM]) or row[ + Constant.BASE_SUM] == 0: + diff = Constant.NA + else: + diff = (row[Constant.COMPARISON_SUM] - row[Constant.BASE_SUM]) / row[Constant.BASE_SUM] + row_data = [base_name, Constant.NA, row[Constant.BASE_CALLS], row[Constant.BASE_SUM], + row[Constant.BASE_AVG], row[Constant.BASE_MAX], row[Constant.BASE_MIN], comparison_name, + Constant.NA, row[Constant.COMPARISON_CALLS], row[Constant.COMPARISON_SUM], + row[Constant.COMPARISON_AVG], row[Constant.COMPARISON_MAX], row[Constant.COMPARISON_MIN], diff] + for index in range(len(headers)): + if headers[index] in ( + Constant.CALLS, Constant.TOTAL_DURATION, Constant.AVG_DURATION, Constant.MAX_DURATION, + Constant.MIN_DURATION): + ws.cell(row=self._row_index, column=index + 1).number_format = '0.00' + if headers[index] == Constant.DIFF: + ws.cell(row=self._row_index, column=index + 1).number_format = '0.00%' + if diff != Constant.NA and diff < 0: + ws.cell(row=self._row_index, column=index + 1).font = Font(name='Arial', + color=Constant.GREEN_COLOR) + elif diff != Constant.NA and diff >= 0: + ws.cell(row=self._row_index, column=index + 1).font = Font(name='Arial', color=Constant.RED_COLOR) else: - diff = (row[Constant.COMPARISON_SUM] - row[Constant.BASE_SUM]) / row[Constant.BASE_SUM] - row_data = [base_name, Constant.NA, row[Constant.BASE_CALLS], row[Constant.BASE_SUM], - row[Constant.BASE_AVG], row[Constant.BASE_MAX], row[Constant.BASE_MIN], comparison_name, - Constant.NA, row[Constant.COMPARISON_CALLS], row[Constant.COMPARISON_SUM], - row[Constant.COMPARISON_AVG], row[Constant.COMPARISON_MAX], row[Constant.COMPARISON_MIN], diff] - for index in range(len(headers)): - if headers[index] in ( + bold = headers[index] == Constant.COMMUNICAT_OP + ws.cell(row=self._row_index, column=index + 1).font = Font(name='Arial', bold=bold) + value = row_data[index] + if value != Constant.NA: + ws.cell(row=self._row_index, column=index + 1).value = value + ws.cell(row=self._row_index, column=index + 1).border = Constant.BORDER + ws.cell(row=self._row_index, column=index + 1).fill = PatternFill("solid", + fgColor=Constant.SUMMARY_LINE_COLOR) + + def write_detail_lines(self, ws, row, base_headers, comparison_headers, headers): + # write detail lines + base_name = Constant.NA if math.isnan(row[Constant.BASE_CALLS]) else row[Constant.OP_KEY] + comparison_name = Constant.NA if math.isnan(row[Constant.COMPARISON_CALLS]) else row[Constant.OP_KEY] + base_task_list = self._args_manager.base_profiling.communication_task_data.get(base_name, []) + comparison_task_list = self._args_manager.comparison_profiling.communication_task_data.get(comparison_name, []) + if base_task_list: + base_data = [[data.get("name", ""), float(data.get("dur", 0))] for data in base_task_list] + base_df = pd.DataFrame(base_data, columns=[Constant.OP_KEY, Constant.DEVICE_DUR]) + base_data = base_df.groupby(Constant.OP_KEY).agg( + ["count", "sum", "mean", "max", "min"]).reset_index().values.tolist() + else: + base_data = [] + if comparison_task_list: + comparison_data = [[data.get("name", ""), float(data.get("dur", 0))] for data in comparison_task_list] + comparison_df = pd.DataFrame(comparison_data, columns=[Constant.OP_KEY, Constant.DEVICE_DUR]) + comparison_data = comparison_df.groupby(Constant.OP_KEY).agg( + ["count", "sum", "mean", "max", "min"]).reset_index().values.tolist() + else: + comparison_data = [] + for index in range(max(len(base_data), len(comparison_data))): + base_detail_data, comparison_detail_data = [Constant.NA] * len(base_headers), [Constant.NA] * len(comparison_headers) + base_detail_data[0] = "|" + comparison_detail_data[0] = "|" + if index < len(base_data): + total_dur = sum([data[2] for data in base_data]) + percent = 0.0 if total_dur < Constant.EPS else base_data[index][2] / total_dur + dur_percent = "%.2f%%" % (percent * 100) + base_data[index][0] = f"{base_data[index][0]} ({dur_percent})" + base_detail_data[1:] = base_data[index] + if index < len(comparison_data): + total_dur = sum([data[2] for data in comparison_data]) + percent = 0.0 if total_dur < Constant.EPS else comparison_data[index][2] / total_dur + dur_percent = "%.2f%%" % (percent * 100) + comparison_data[index][0] = f"{comparison_data[index][0]} ({dur_percent})" + comparison_detail_data[1:] = comparison_data[index] + + detail_data = base_detail_data + comparison_detail_data + [Constant.NA] + for colum_index in range(len(headers)): + if headers[colum_index] in ( Constant.CALLS, Constant.TOTAL_DURATION, Constant.AVG_DURATION, Constant.MAX_DURATION, Constant.MIN_DURATION): - ws.cell(row=row_index, column=index + 1).number_format = '0.00' - if headers[index] == Constant.DIFF: - ws.cell(row=row_index, column=index + 1).number_format = '0.00%' - if diff != Constant.NA and diff < 0: - ws.cell(row=row_index, column=index + 1).font = Font(name='Arial', - color=Constant.GREEN_COLOR) - elif diff != Constant.NA and diff >= 0: - ws.cell(row=row_index, column=index + 1).font = Font(name='Arial', color=Constant.RED_COLOR) - else: - bold = headers[index] == Constant.COMMUNICAT_OP - ws.cell(row=row_index, column=index + 1).font = Font(name='Arial', bold=bold) - value = row_data[index] + ws.cell(row=self._row_index, column=colum_index + 1) .number_format = '0.00' + value = detail_data[colum_index] if value != Constant.NA: - ws.cell(row=row_index, column=index + 1).value = value - ws.cell(row=row_index, column=index + 1).border = Constant.BORDER - ws.cell(row=row_index, column=index + 1).fill = PatternFill("solid", - fgColor=Constant.SUMMARY_LINE_COLOR) - row_index += 1 - - # write detail lines - base_task_list = self._args_manager.base_profiling.communication_task_data.get(base_name, []) - comparison_task_list = self._args_manager.comparison_profiling.communication_task_data.get(comparison_name, - []) - if base_task_list: - base_data = [[data.get("name", ""), float(data.get("dur", 0))] for data in base_task_list] - base_df = pd.DataFrame(base_data, columns=[Constant.OP_KEY, Constant.DEVICE_DUR]) - base_data = base_df.groupby(Constant.OP_KEY).agg( - ["count", "sum", "mean", "max", "min"]).reset_index().values.tolist() - else: - base_data = [] - if comparison_task_list: - comparison_data = [[data.get("name", ""), float(data.get("dur", 0))] for data in comparison_task_list] - comparison_df = pd.DataFrame(comparison_data, columns=[Constant.OP_KEY, Constant.DEVICE_DUR]) - comparison_data = comparison_df.groupby(Constant.OP_KEY).agg( - ["count", "sum", "mean", "max", "min"]).reset_index().values.tolist() - else: - comparison_data = [] - - for index in range(max(len(base_data), len(comparison_data))): - base_detail_data, comparison_detail_data = [Constant.NA] * len(base_headers), \ - [Constant.NA] * len(comparison_headers) - base_detail_data[0] = "|" - comparison_detail_data[0] = "|" - if index < len(base_data): - total_dur = sum([data[2] for data in base_data]) - dur_percent = "%.2f%%" % (base_data[index][2] / total_dur * 100) - base_data[index][0] = f"{base_data[index][0]} ({dur_percent})" - base_detail_data[1:] = base_data[index] - if index < len(comparison_data): - total_dur = sum([data[2] for data in comparison_data]) - dur_percent = "%.2f%%" % (comparison_data[index][2] / total_dur * 100) - comparison_data[index][0] = f"{comparison_data[index][0]} ({dur_percent})" - comparison_detail_data[1:] = comparison_data[index] - - detail_data = base_detail_data + comparison_detail_data + [Constant.NA] - for colum_index in range(len(headers)): - if headers[colum_index] in ( - Constant.CALLS, Constant.TOTAL_DURATION, Constant.AVG_DURATION, Constant.MAX_DURATION, - Constant.MIN_DURATION): - ws.cell(row=row_index, column=colum_index + 1) .number_format = '0.00' - value = detail_data[colum_index] - if value != Constant.NA: - ws.cell(row=row_index, column=colum_index + 1).value = value - bold = headers[colum_index] == Constant.OP_NAME - ws.cell(row=row_index, column=colum_index + 1).font = Font(name='Arial', bold=bold) - ws.cell(row=row_index, column=colum_index + 1).border = Constant.BORDER - if headers[colum_index] == Constant.COMMUNICAT_OP: - ws.cell(row=row_index, column=colum_index + 1).alignment = Alignment(horizontal="center", - vertical="center") - row_index += 1 + ws.cell(row=self._row_index, column=colum_index + 1).value = value + bold = headers[colum_index] == Constant.OP_NAME + ws.cell(row=self._row_index, column=colum_index + 1).font = Font(name='Arial', bold=bold) + ws.cell(row=self._row_index, column=colum_index + 1).border = Constant.BORDER + if headers[colum_index] == Constant.COMMUNICAT_OP: + ws.cell(row=self._row_index, column=colum_index + 1).alignment = Alignment(horizontal="center", vertical="center") + self._row_index += 1 diff --git a/profiler/compare_tools/generation/op_comparison_generator.py b/profiler/compare_tools/generation/op_comparison_generator.py index 1e48d7d0b44..359376855ef 100644 --- a/profiler/compare_tools/generation/op_comparison_generator.py +++ b/profiler/compare_tools/generation/op_comparison_generator.py @@ -1,4 +1,5 @@ import copy +from collections import namedtuple from openpyxl.styles import Font, PatternFill, Alignment from openpyxl.workbook import Workbook @@ -15,6 +16,7 @@ class OpComparisonGenerator: self._compare_type = compare_type self._base_headers = [] self._comparison_headers = [] + self._row_index = 3 self.update_headers() def update_headers(self): @@ -32,10 +34,23 @@ class OpComparisonGenerator: def create_sheet(self, workbook: Workbook): ws = workbook.create_sheet(self._compare_type, 0) ws.sheet_properties.tabColor = Constant.YELLOW_COLOR - # write headers headers = self._base_headers + self._comparison_headers + [Constant.DIFF, Constant.OP_NAME_FILTER, Constant.DIFF_FILTER] + self.writer_headers(ws, headers) + # write lines + self._row_index = 3 + for data in self._compare_result_data: + base_event_list = TreeBuilder.get_total_compare_event(data[0], self._compare_type) if data[0] else [] + comparison_event_list = TreeBuilder.get_total_compare_event(data[1], self._compare_type) if data[1] else [] + diff = self.write_summary_lines(ws, headers, data, base_event_list, comparison_event_list) + self._row_index += 1 + EventListWrapper = namedtuple('EventListWrapper', ['base_event_list', 'comparison_event_list']) + event_list = EventListWrapper(base_event_list, comparison_event_list) + self.write_detail_lines(ws, headers, data, diff, event_list) + + def writer_headers(self, ws, headers): + # write headers base_trace_start_column = 0 comparison_trace_start_column = len(self._base_headers) diff_start_column = len(self._base_headers) + len(self._comparison_headers) @@ -70,90 +85,98 @@ class OpComparisonGenerator: ws.merge_cells(start_row=1, start_column=headers.index(Constant.DIFF_FILTER) + 1, end_row=2, end_column=headers.index(Constant.DIFF_FILTER) + 1) - # write lines - row_index = 3 - for data in self._compare_result_data: - # write summary lines - base_event_list = TreeBuilder.get_total_compare_event(data[0], self._compare_type) if data[0] else [] - comparison_event_list = TreeBuilder.get_total_compare_event(data[1], self._compare_type) if data[1] else [] - base_summary_data, comparison_summary_data = [Constant.NA] * len(self._base_headers), \ - [Constant.NA] * len(self._comparison_headers) - if data[0]: - base_summary_data[0] = data[0].name - base_summary_data[1] = data[0].input_shape - base_summary_data[2] = data[0].input_type - base_summary_data[3] = sum( - [x.compare_index for x in base_event_list]) if base_event_list else Constant.NA - if data[1]: - comparison_summary_data[0] = data[1].name - comparison_summary_data[1] = data[1].input_shape - comparison_summary_data[2] = data[1].input_type - comparison_summary_data[3] = sum( - [x.compare_index for x in comparison_event_list]) if comparison_event_list else Constant.NA - if base_event_list and comparison_event_list and base_summary_data[3]: - diff = (comparison_summary_data[3] - base_summary_data[3]) / base_summary_data[3] - else: - diff = Constant.NA - op_name = data[0].name if data[0] else data[1].name + def write_summary_lines(self, ws, headers, data, base_event_list, comparison_event_list): + def ws_write_diff(ws, index, value): + ws.cell(row=self._row_index, column=index + 1).number_format = '0.00%' + if value != Constant.NA and value < 0: + ws.cell(row=self._row_index, column=index + 1).font = Font(name='Arial', color=Constant.GREEN_COLOR) + elif value != Constant.NA and value >= 0: + ws.cell(row=self._row_index, column=index + 1).font = Font(name='Arial', color=Constant.RED_COLOR) + + def ws_write_diff_filter(ws, index, headers, diff_value): + if diff_value != Constant.NA and diff_value < 0: + ws.cell(row=self._row_index, column=index + 1).fill = PatternFill("solid", + fgColor=Constant.GREEN_COLOR) + elif diff_value != Constant.NA and diff_value >= 0: + ws.cell(row=self._row_index, column=index + 1).fill = PatternFill("solid", fgColor=Constant.RED_COLOR) + # write summary lines + base_summary_data, comparison_summary_data = [Constant.NA] * len(self._base_headers), \ + [Constant.NA] * len(self._comparison_headers) + if data[0]: + base_summary_data[0] = data[0].name + base_summary_data[1] = data[0].input_shape + base_summary_data[2] = data[0].input_type + base_summary_data[3] = sum( + [x.compare_index for x in base_event_list]) if base_event_list else Constant.NA + if data[1]: + comparison_summary_data[0] = data[1].name + comparison_summary_data[1] = data[1].input_shape + comparison_summary_data[2] = data[1].input_type + comparison_summary_data[3] = sum( + [x.compare_index for x in comparison_event_list]) if comparison_event_list else Constant.NA + if base_event_list and comparison_event_list and base_summary_data[3]: + diff = (comparison_summary_data[3] - base_summary_data[3]) / base_summary_data[3] + else: + diff = Constant.NA + op_name = data[0].name if data[0] else data[1].name + + summary_data = base_summary_data + comparison_summary_data + [diff, op_name, Constant.NA] + for index in range(len(headers)): + value = summary_data[index] + if headers[index] == Constant.DIFF: + ws_write_diff(ws, index, value) + if headers[index] == Constant.DIFF_FILTER: + diff_value = summary_data[headers.index(Constant.DIFF)] + ws_write_diff_filter(ws, index, headers, diff_value) + elif headers[index] != Constant.OP_NAME_FILTER: + ws.cell(row=self._row_index, column=index + 1).fill = PatternFill("solid", + fgColor=Constant.SUMMARY_LINE_COLOR) - summary_data = base_summary_data + comparison_summary_data + [diff, op_name, Constant.NA] - for index in range(len(headers)): - value = summary_data[index] - if headers[index] == Constant.DIFF: - ws.cell(row=row_index, column=index + 1).number_format = '0.00%' - if value != Constant.NA and value < 0: - ws.cell(row=row_index, column=index + 1).font = Font(name='Arial', color=Constant.GREEN_COLOR) - elif value != Constant.NA and value >= 0: - ws.cell(row=row_index, column=index + 1).font = Font(name='Arial', color=Constant.RED_COLOR) - if headers[index] == Constant.DIFF_FILTER: - diff_value = summary_data[headers.index(Constant.DIFF)] - if diff_value != Constant.NA and diff_value < 0: - ws.cell(row=row_index, column=index + 1).fill = PatternFill("solid", - fgColor=Constant.GREEN_COLOR) - elif diff_value != Constant.NA and diff_value >= 0: - ws.cell(row=row_index, column=index + 1).fill = PatternFill("solid", fgColor=Constant.RED_COLOR) - elif headers[index] != Constant.OP_NAME_FILTER: - ws.cell(row=row_index, column=index + 1).fill = PatternFill("solid", - fgColor=Constant.SUMMARY_LINE_COLOR) + if value != Constant.NA: + ws.cell(row=self._row_index, column=index + 1).value = value + bold = headers[index] == Constant.OP_NAME + if headers[index] != Constant.DIFF: + ws.cell(row=self._row_index, column=index + 1).font = Font(name='Arial', bold=bold) + ws.cell(row=self._row_index, column=index + 1).border = Constant.BORDER + return diff - if value != Constant.NA: - ws.cell(row=row_index, column=index + 1).value = value - bold = headers[index] == Constant.OP_NAME - if headers[index] != Constant.DIFF: - ws.cell(row=row_index, column=index + 1).font = Font(name='Arial', bold=bold) - ws.cell(row=row_index, column=index + 1).border = Constant.BORDER - row_index += 1 + def write_detail_lines(self, ws, headers, data, diff, event_list): + def ws_write_helper(ws, colum_index, value, diff, headers): + if value != Constant.NA: + ws.cell(row=self._row_index, column=colum_index + 1).value = value + bold = headers[colum_index] == Constant.OP_NAME + ws.cell(row=self._row_index, column=colum_index + 1).font = Font(name='Arial', bold=bold) + ws.cell(row=self._row_index, column=colum_index + 1).border = Constant.BORDER + if headers[colum_index] == Constant.DIFF_FILTER: + if diff != Constant.NA and diff < 0: + ws.cell(row=self._row_index, column=colum_index + 1).fill = PatternFill("solid", + fgColor=Constant.GREEN_COLOR) + elif diff != Constant.NA and diff >= 0: + ws.cell(row=self._row_index, column=colum_index + 1).fill = PatternFill("solid", + fgColor=Constant.RED_COLOR) + if headers[colum_index] == Constant.OP_NAME: + ws.cell(row=self._row_index, column=colum_index + 1).alignment = Alignment(horizontal="center", + vertical="center") - # write detail lines - base_event_num, comparison_event_num = len(base_event_list), len(comparison_event_list) - for index in range(max(base_event_num, comparison_event_num)): - base_detail_data, comparison_detail_data = [Constant.NA] * len(self._base_headers), \ - [Constant.NA] * len(self._comparison_headers) - base_detail_data[0] = "|" - comparison_detail_data[0] = "|" - if index < base_event_num: - base_event = base_event_list[index] - base_detail_data[1:] = base_event.get_record() - if index < comparison_event_num: - comparison_event = comparison_event_list[index] - comparison_detail_data[1:] = comparison_event.get_record() + base_event_list = event_list.base_event_list + comparison_event_list = event_list.comparison_event_list + # write detail lines + op_name = data[0].name if data[0] else data[1].name + base_event_num, comparison_event_num = len(base_event_list), len(comparison_event_list) + for index in range(max(base_event_num, comparison_event_num)): + base_detail_data, comparison_detail_data = [Constant.NA] * len(self._base_headers), \ + [Constant.NA] * len(self._comparison_headers) + base_detail_data[0] = "|" + comparison_detail_data[0] = "|" + if index < base_event_num: + base_event = base_event_list[index] + base_detail_data[1:] = base_event.get_record() + if index < comparison_event_num: + comparison_event = comparison_event_list[index] + comparison_detail_data[1:] = comparison_event.get_record() - detail_data = base_detail_data + comparison_detail_data + [Constant.NA, op_name, Constant.NA] - for colum_index in range(len(headers)): - value = detail_data[colum_index] - if value != Constant.NA: - ws.cell(row=row_index, column=colum_index + 1).value = value - bold = headers[colum_index] == Constant.OP_NAME - ws.cell(row=row_index, column=colum_index + 1).font = Font(name='Arial', bold=bold) - ws.cell(row=row_index, column=colum_index + 1).border = Constant.BORDER - if headers[colum_index] == Constant.DIFF_FILTER: - if diff != Constant.NA and diff < 0: - ws.cell(row=row_index, column=colum_index + 1).fill = PatternFill("solid", - fgColor=Constant.GREEN_COLOR) - elif diff != Constant.NA and diff >= 0: - ws.cell(row=row_index, column=colum_index + 1).fill = PatternFill("solid", - fgColor=Constant.RED_COLOR) - if headers[colum_index] == Constant.OP_NAME: - ws.cell(row=row_index, column=colum_index + 1).alignment = Alignment(horizontal="center", - vertical="center") - row_index += 1 + detail_data = base_detail_data + comparison_detail_data + [Constant.NA, op_name, Constant.NA] + for colum_index in range(len(headers)): + value = detail_data[colum_index] + ws_write_helper(ws, colum_index, value, diff, headers) + self._row_index += 1 diff --git a/profiler/compare_tools/performance_compare.py b/profiler/compare_tools/performance_compare.py index 2d98fce4be6..ecbcdb66cdd 100644 --- a/profiler/compare_tools/performance_compare.py +++ b/profiler/compare_tools/performance_compare.py @@ -7,9 +7,12 @@ import time sys.path.append( os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "cluster_analyse", "common_func")) +sys.path.append( + os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "cluster_analyse")) from generation.comparison_generator import ComparisonGenerator from utils.args_manager import ArgsManager from profiling_analysis.profiling_parse import prof_main +from path_manager import PathManager def performance_compare(args): @@ -46,7 +49,7 @@ def main(): dir_path = args.output_path if args.output_path else "./" file_name = "performance_comparison_result_{}.xlsx".format( time.strftime("%Y%m%d%H%M%S", time.localtime(time.time()))) - result_file_path = os.path.realpath(os.path.join(dir_path, file_name)) + result_file_path = PathManager.get_realpath(os.path.join(dir_path, file_name)) ComparisonGenerator(args).create_excel(result_file_path) print(f"[INFO] The comparison result file has been generated: {result_file_path}") diff --git a/profiler/compare_tools/profiling_analysis/gpu_parser.py b/profiler/compare_tools/profiling_analysis/gpu_parser.py index b7999257894..f9dd25203b5 100644 --- a/profiler/compare_tools/profiling_analysis/gpu_parser.py +++ b/profiler/compare_tools/profiling_analysis/gpu_parser.py @@ -13,11 +13,33 @@ # See the License for the specific language governing permissions and # limitations under the License. +import os from collections import Counter, defaultdict import pandas as pd import profiling_analysis.parser_helper as parser_helper from utils.file_reader import FileReader +from path_manager import PathManager + + +class OpTimeWarper: + def __init__( + self, + cube_time: float = 0.0, + fa_time_fwd: float = 0.0, + fa_time_bwd: float = 0.0, + all_op_time: float = 0.0, + compute_stream_dur: float = 0.0, + cube_num: int = 0, + vec_num: int = 0 + ): + self.cube_time = cube_time + self.fa_time_fwd = fa_time_fwd + self.fa_time_bwd = fa_time_bwd + self.all_op_time = all_op_time + self.compute_stream_dur = compute_stream_dur + self.cube_num = cube_num + self.vec_num = vec_num class GpuProfilingParser: @@ -36,18 +58,15 @@ class GpuProfilingParser: if not len([1 for mark in fa_mark if mark not in name.lower()]): return True return False - - def parse_events(self): + + def update_op_list(self, op_list, marks): cube_time = 0.0 all_op_time = 0.0 fa_time_bwd = 0.0 fa_time_fwd = 0.0 cube_num = 0 vec_num = 0 - op_list = [] compute_stream_dur = 0.0 - marks = defaultdict(int) # mark for compute communication_not_overlapped time - for event in self.trace_events: if not isinstance(event, dict): continue @@ -80,8 +99,36 @@ class GpuProfilingParser: vec_num += 1 all_op_time += float(dur) op_list.append([ts, name, cat, dur]) + time_wrapper = OpTimeWarper( + cube_time=cube_time, + fa_time_fwd=fa_time_fwd, + fa_time_bwd=fa_time_bwd, + all_op_time=all_op_time, + compute_stream_dur=compute_stream_dur, + cube_num=cube_num, + vec_num=vec_num + ) + return time_wrapper + + def parse_events(self): + op_list = [] + marks = defaultdict(int) # mark for compute communication_not_overlapped time + + time_wrapper = self.update_op_list(op_list, marks) + cube_time = time_wrapper.cube_time + fa_time_fwd = time_wrapper.fa_time_fwd + fa_time_bwd = time_wrapper.fa_time_bwd + all_op_time = time_wrapper.all_op_time + compute_stream_dur = time_wrapper.compute_stream_dur + cube_num = time_wrapper.cube_num + vec_num = time_wrapper.vec_num + op_dataframe = pd.DataFrame(op_list, columns=['time start', 'name', 'cat', 'dur']) - op_dataframe.to_csv('gpu_perf.csv', index=False) + root_path = os.getcwd() + PathManager.check_path_writeable(root_path) + gpu_perf_path = os.path.join(root_path, 'gpu_perf.csv') + PathManager.create_file_safety(gpu_perf_path) + op_dataframe.to_csv(gpu_perf_path, index=False) self.profiling_info.compute_time = len([_ for _, value in marks.items() if value < 0]) / 10 ** 6 self.profiling_info.communication_not_overlapped = len([_ for _, value in marks.items() if value > 0]) / 10 ** 6 self.profiling_info.flash_attention_time_bwd = fa_time_bwd / 10 ** 6 diff --git a/profiler/compare_tools/profiling_analysis/npu_parser.py b/profiler/compare_tools/profiling_analysis/npu_parser.py index 73eeb1fd00b..43ed42c5df7 100644 --- a/profiler/compare_tools/profiling_analysis/npu_parser.py +++ b/profiler/compare_tools/profiling_analysis/npu_parser.py @@ -18,6 +18,28 @@ import pandas as pd from collections import defaultdict import profiling_analysis.parser_helper as parser_helper from utils.file_reader import FileReader +from path_manager import PathManager +from file_manager import FileManager + + +class NpuInfoWrapper: + def __init__( + self, + compute_time: int, + communication_time: int, + is_cluster: bool, + event_wait_sqe: dict, + ai_core_dict: dict, + event_wait_sqe_res: dict, + ai_core_res: dict, + ): + self.compute_time = compute_time + self.communication_time = communication_time + self.is_cluster = is_cluster + self.event_wait_sqe = event_wait_sqe + self.ai_core_dict = ai_core_dict + self.event_wait_sqe_res = event_wait_sqe_res + self.ai_core_res = ai_core_res class NpuProfilingParser: @@ -65,7 +87,19 @@ class NpuProfilingParser: communication_time += dur min_ts = ts if ts < min_ts else min_ts max_ts = (ts + dur) if (ts + dur) > max_ts else max_ts + npu_info_wrapper = NpuInfoWrapper( + compute_time, communication_time, is_cluster, + event_wait_sqe, ai_core_dict, event_wait_sqe_res, ai_core_res) + self.update_npu_info(max_ts - min_ts, npu_info_wrapper) + def update_npu_info(self, ts_dur, npu_info_wrapper): + compute_time = npu_info_wrapper.compute_time + communication_time = npu_info_wrapper.communication_time + is_cluster = npu_info_wrapper.is_cluster + event_wait_sqe = npu_info_wrapper.event_wait_sqe + ai_core_dict = npu_info_wrapper.ai_core_dict + event_wait_sqe_res = npu_info_wrapper.event_wait_sqe_res + ai_core_res = npu_info_wrapper.ai_core_res # AI_CORE和EVENT_WAIT_SQE共存为计算流 compute_stream = [] parallel_stream = [] @@ -87,7 +121,7 @@ class NpuProfilingParser: self.parallel_time = self.interval_intersection(cs_event_wait_sqe_list, cs_ai_core_list) self.profiling_info.compute_time = compute_time / 10 ** 6 if is_cluster else \ ai_core_res[compute_stream[0]] / 10 ** 6 - self.profiling_info.e2e_time = (max_ts - min_ts) / 10 ** 6 if is_cluster else \ + self.profiling_info.e2e_time = ts_dur / 10 ** 6 if is_cluster else \ (self.max_stream_ts - self.min_stream_ts) / 10 ** 6 self.profiling_info.communication_not_overlapped = communication_time / 10 ** 6 \ if is_cluster else (event_wait_sqe_res[compute_stream[0]] - self.parallel_time) / 10 ** 6 @@ -116,6 +150,8 @@ class NpuProfilingParser: if not self.npu_summary_file: print('[WARNING] Npu kernel details csv file is not available.') return + PathManager.check_path_readable(self.npu_summary_file) + FileManager.check_file_size(self.npu_summary_file) info = pd.read_csv(self.npu_summary_file, index_col=None) cube_time = 0.0 vec_time = 0.0 @@ -155,6 +191,8 @@ class NpuProfilingParser: print('[INFO] Npu op memory csv file is not available.') return try: + PathManager.check_path_readable(self.npu_mem_file) + FileManager.check_file_size(self.npu_mem_file) info = pd.read_csv(self.npu_mem_file, usecols=['Total Reserved(MB)'], index_col=None) except ValueError: print('[ERROR] Load memory info failed.') diff --git a/profiler/compare_tools/utils/constant.py b/profiler/compare_tools/utils/constant.py index 360c2ab44ae..828dc758014 100644 --- a/profiler/compare_tools/utils/constant.py +++ b/profiler/compare_tools/utils/constant.py @@ -15,6 +15,9 @@ class Constant(object): RED_COLOR = "00FF0000" SUMMARY_LINE_COLOR = "F0F8FF" + # epsilon + EPS = 1e-15 + # autority FILE_AUTHORITY = 0o640 DIR_AUTHORITY = 0o750 diff --git a/profiler/compare_tools/utils/file_reader.py b/profiler/compare_tools/utils/file_reader.py index 37853f41f42..8957e5d9b8c 100644 --- a/profiler/compare_tools/utils/file_reader.py +++ b/profiler/compare_tools/utils/file_reader.py @@ -19,7 +19,7 @@ class FileReader: return [] if file_size > Constant.MAX_FILE_SIZE: check_msg = input( - f"The file({file_path}) size exceeds the preset max value, do you continue reading the file? [y/n]") + f"The file({file_path}) size exceeds the preset max value. Continue reading the file? [y/n]") if check_msg.lower() != "y": print(f"[WARNING] The user choose not to read the file: {file_path}") return [] @@ -41,7 +41,7 @@ class FileReader: return [] if file_size > Constant.MAX_FILE_SIZE: check_msg = input( - f"The file({file_path}) size exceeds the preset max value, do you continue reading the file? [y/n]") + f"The file({file_path}) size exceeds the preset max value. Continue reading the file? [y/n]") if check_msg.lower() != "y": print(f"[WARNING] The user choose not to read the file: {file_path}") return [] diff --git a/profiler/compare_tools/utils/profiling_parser.py b/profiler/compare_tools/utils/profiling_parser.py index b49217b844b..ceb24e6c310 100644 --- a/profiler/compare_tools/utils/profiling_parser.py +++ b/profiler/compare_tools/utils/profiling_parser.py @@ -1,4 +1,4 @@ -from abc import ABCMeta, abstractmethod +from abc import abstractmethod from math import ceil from utils.compare_event import KernelEvent @@ -7,31 +7,16 @@ from utils.file_reader import FileReader from utils.trace_event_data import TraceEventData -class ProfilingParser(metaclass=ABCMeta): - @abstractmethod - def get_torch_op_data(self): - raise NotImplementedError - - @abstractmethod - def get_kernel_dict(self): - raise NotImplementedError - - @abstractmethod - def get_memory_list(self): - raise NotImplementedError - - -class GPUProfilingParser(ProfilingParser): +class ProfilingParser: def __init__(self, args: any, path_dict: dict): self._args = args self._profiling_path = path_dict.get(Constant.PROFILING_PATH) - self._json_path = path_dict.get(Constant.PROFILING_PATH) self._torch_op_data = None self._kernel_dict = None self._memory_list = None self._communication_data = None self._communication_task_data = None - + @property def file_path(self) -> str: return self._profiling_path @@ -70,6 +55,24 @@ class GPUProfilingParser(ProfilingParser): self.get_communication_data() return self._communication_task_data + @abstractmethod + def get_torch_op_data(self): + raise NotImplementedError + + @abstractmethod + def get_kernel_dict(self): + raise NotImplementedError + + @abstractmethod + def get_memory_list(self): + raise NotImplementedError + + +class GPUProfilingParser(ProfilingParser): + def __init__(self, args: any, path_dict: dict): + super().__init__(args, path_dict) + self._json_path = path_dict.get(Constant.PROFILING_PATH) + def get_torch_op_data(self): torch_op_list = [] json_data = FileReader.read_trace_file(self._json_path) @@ -144,53 +147,9 @@ class GPUProfilingParser(ProfilingParser): class NPUProfilingParser(ProfilingParser): def __init__(self, args: any, path_dict: str): - self._args = args - self._profiling_path = path_dict.get(Constant.PROFILING_PATH) + super().__init__(args, path_dict) self._json_path = path_dict.get(Constant.TRACE_PATH) self._memory_data_path = path_dict.get(Constant.MEMORY_DATA_PATH) - self._torch_op_data = None - self._kernel_dict = None - self._memory_list = None - self._communication_data = None - self._communication_task_data = None - - @property - def file_path(self) -> str: - return self._profiling_path - - @property - def json_path(self) -> str: - return self._json_path - - @property - def torch_op_data(self) -> list: - if self._torch_op_data is None: - self.get_torch_op_data() - return self._torch_op_data - - @property - def kernel_dict(self) -> dict: - if self._kernel_dict is None: - self.get_kernel_dict() - return self._kernel_dict - - @property - def memory_list(self) -> list: - if self._memory_list is None: - self.get_memory_list() - return self._memory_list - - @property - def communication_data(self) -> dict: - if self._communication_data is None: - self.get_communication_data() - return self._communication_data - - @property - def communication_task_data(self) -> dict: - if self._communication_task_data is None: - self.get_communication_data() - return self._communication_task_data def get_torch_op_data(self): torch_op_list = [] @@ -272,55 +231,70 @@ class NPUProfilingParser(ProfilingParser): return dequeue_data[left] if end_time > ts_time else None def get_communication_data(self): + def get_pid(json_data): + pid = None + for data in json_data: + trace_event = TraceEventData(data) + if not trace_event.is_process_meta(): + continue + if trace_event.is_hccl_process(): + pid = trace_event.pid + break + return pid + + def get_tid_list(pid, tid_list, json_data): + for data in json_data: + trace_event = TraceEventData(data) + if not trace_event.is_thread_meta(): + continue + if trace_event.pid != pid: + continue + if trace_event.is_communication_op_thread(): + tid_list.append(trace_event.tid) + + def get_comm_data(pid, tid_list, json_data): + for data in json_data: + trace_event = TraceEventData(data) + if not trace_event.is_x_mode(): + continue + if trace_event.pid != pid: + continue + if trace_event.tid in tid_list: + self._communication_data.append(data) + + def get_comm_task_data(pid, tid_list, json_data): + for data in json_data: + trace_event = TraceEventData(data) + if not trace_event.is_x_mode(): + continue + if trace_event.pid != pid: + continue + if trace_event.tid in tid_list: + continue + ts = trace_event.start_time + for communication_op in self._communication_data: + comm_op_event = TraceEventData(communication_op) + if ts < comm_op_event.start_time or ts > comm_op_event.end_time: + continue + name_list = communication_op.get("name", "").split("_") + if len(name_list) >= 2: + self._communication_task_data.setdefault(name_list[1].lower(), []).append(data) + break + self._communication_data, self._communication_task_data = [], {} json_data = FileReader.read_trace_file(self._json_path) - pid = None - for data in json_data: - trace_event = TraceEventData(data) - if not trace_event.is_process_meta(): - continue - if trace_event.is_hccl_process(): - pid = trace_event.pid - break + + pid = get_pid(json_data) if pid is None: return - tid_list = [] - for data in json_data: - trace_event = TraceEventData(data) - if not trace_event.is_thread_meta(): - continue - if trace_event.pid != pid: - continue - if trace_event.is_communication_op_thread(): - tid_list.append(trace_event.tid) + tid_list = [] + get_tid_list(pid, tid_list, json_data) if not tid_list: return - for data in json_data: - trace_event = TraceEventData(data) - if not trace_event.is_x_mode(): - continue - if trace_event.pid != pid: - continue - if trace_event.tid in tid_list: - self._communication_data.append(data) + get_comm_data(pid, tid_list, json_data) if not self._communication_data: return - for data in json_data: - trace_event = TraceEventData(data) - if not trace_event.is_x_mode(): - continue - if trace_event.pid != pid: - continue - if trace_event.tid in tid_list: - continue - ts = trace_event.start_time - for communication_op in self._communication_data: - comm_op_event = TraceEventData(communication_op) - if ts < comm_op_event.start_time or ts > comm_op_event.end_time: - continue - name_list = communication_op.get("name", "").split("_") - if len(name_list) >= 2: - self._communication_task_data.setdefault(name_list[1].lower(), []).append(data) - break + + get_comm_task_data(pid, tid_list, json_data) diff --git a/profiler/compare_tools/utils/tree_builder.py b/profiler/compare_tools/utils/tree_builder.py index 6d765eb0d70..4010ba0c885 100644 --- a/profiler/compare_tools/utils/tree_builder.py +++ b/profiler/compare_tools/utils/tree_builder.py @@ -22,6 +22,16 @@ class TreeBuilder: @classmethod def update_tree_node(cls, root_node: TorchOpNode, flow_kernel_dict: dict = {}, memory_allocated_list: list = []): + def set_kernel_helper(node_queue, ts, kernel_num, kernel_list): + while not node_queue.empty(): + tree_node = node_queue.get() + tree_node.add_kernel_num(kernel_num) + matched_child_node = tree_node.match_child_node(ts) + if matched_child_node: + node_queue.put(matched_child_node) + else: + tree_node.set_kernel_list(kernel_list) + if flow_kernel_dict: for ts, kernel_list in flow_kernel_dict.items(): matched_child_node = root_node.match_child_node(ts) @@ -30,14 +40,8 @@ class TreeBuilder: kernel_num = len(kernel_list) node_queue = Queue() node_queue.put(matched_child_node) - while not node_queue.empty(): - tree_node = node_queue.get() - tree_node.add_kernel_num(kernel_num) - matched_child_node = tree_node.match_child_node(ts) - if matched_child_node: - node_queue.put(matched_child_node) - else: - tree_node.set_kernel_list(kernel_list) + set_kernel_helper(node_queue, ts, kernel_num, kernel_list) + for memory_allocated in memory_allocated_list: ts = memory_allocated.get(Constant.TS) matched_child_node = root_node.match_child_node(ts) -- Gitee