From acc94e8c55067b86b152d358c1db43e1f8357430 Mon Sep 17 00:00:00 2001 From: stby <295887736@qq.com> Date: Thu, 8 Aug 2024 17:16:18 +0800 Subject: [PATCH] adapt optimizing --- .../origin_data_bean/kernel_details_bean.py | 6 +++ .../compare_backend/comparison_generator.py | 8 +++- .../data_prepare/operator_data_prepare.py | 39 ++++++++++++++----- .../generator/detail_performance_generator.py | 8 +++- .../profiling_parser/base_profiling_parser.py | 6 ++- .../profiling_parser/gpu_profiling_parser.py | 4 +- .../profiling_parser/npu_profiling_parser.py | 14 +++++-- .../compare_backend/utils/args_manager.py | 25 +++++++++++- .../compare_backend/utils/compare_args.py | 6 ++- .../compare_backend/utils/torch_op_node.py | 5 +++ .../compare_interface/comparison_interface.py | 7 +++- profiler/compare_tools/performance_compare.py | 2 + .../test_base_profiling_parser.py | 1 + 13 files changed, 106 insertions(+), 25 deletions(-) diff --git a/profiler/compare_tools/compare_backend/compare_bean/origin_data_bean/kernel_details_bean.py b/profiler/compare_tools/compare_backend/compare_bean/origin_data_bean/kernel_details_bean.py index c15396e9c..0da2321c0 100644 --- a/profiler/compare_tools/compare_backend/compare_bean/origin_data_bean/kernel_details_bean.py +++ b/profiler/compare_tools/compare_backend/compare_bean/origin_data_bean/kernel_details_bean.py @@ -18,6 +18,7 @@ class KernelDetailsBean: self._mac_time = 0.0 self._duration = 0.0 self._start_time = Decimal("0") + self._step_id = "" self.init() @property @@ -65,6 +66,10 @@ class KernelDetailsBean: @property def end_time(self) -> Decimal: return self.start_time + convert_to_decimal(self._duration) + + @property + def step_id(self) -> int: + return int(self._step_id) if self._step_id else -1 def is_hide_op_pmu(self): if "mac_time(us)" in self._data.keys() or "aiv_vec_time(us)" in self._data.keys(): @@ -119,4 +124,5 @@ class KernelDetailsBean: self._aicore_time = self._data.get("aicore_time(us)", "") self._mac_time = self._data.get('mac_time(us)', "") self._duration = self._data.get('Duration(us)', 0) + self._step_id = self._data.get('Step Id', "") self._start_time = Decimal(self._data.get("Start Time(us)", "0")) diff --git a/profiler/compare_tools/compare_backend/comparison_generator.py b/profiler/compare_tools/compare_backend/comparison_generator.py index b4d17f88e..bfbc1bb7b 100644 --- a/profiler/compare_tools/compare_backend/comparison_generator.py +++ b/profiler/compare_tools/compare_backend/comparison_generator.py @@ -31,9 +31,13 @@ class ComparisonGenerator: def load_data(self): self._data_dict[Constant.BASE_DATA] = self.PARSER_DICT.get(self._args_manager.base_profiling_type)( - self._args_manager.args, self._args_manager.base_path_dict).load_data() + self._args_manager.args, + self._args_manager.base_path_dict, + self._args_manager.base_step).load_data() self._data_dict[Constant.COMPARISON_DATA] = self.PARSER_DICT.get(self._args_manager.comparison_profiling_type)( - self._args_manager.args, self._args_manager.comparison_path_dict).load_data() + self._args_manager.args, + self._args_manager.comparison_path_dict, + self._args_manager.comparison_step).load_data() def generate_compare_result(self): overall_data = {Constant.BASE_DATA: self._data_dict.get(Constant.BASE_DATA).overall_metrics, diff --git a/profiler/compare_tools/compare_backend/data_prepare/operator_data_prepare.py b/profiler/compare_tools/compare_backend/data_prepare/operator_data_prepare.py index 59913528a..bab381b3e 100644 --- a/profiler/compare_tools/compare_backend/data_prepare/operator_data_prepare.py +++ b/profiler/compare_tools/compare_backend/data_prepare/operator_data_prepare.py @@ -3,27 +3,46 @@ from compare_backend.utils.tree_builder import TreeBuilder class OperatorDataPrepare: - def __init__(self, profiling_data: ProfilingResult): + def __init__(self, profiling_data: ProfilingResult, specified_step_id: int = -1): self.profiling_data = profiling_data self._all_nodes = self._build_tree() self._root_node = self._all_nodes[0] + self._specified_step_id = specified_step_id def get_top_layer_ops(self) -> any: - level1_child_nodes = self._root_node.child_nodes - result_data = [] - for level1_node in level1_child_nodes: - if level1_node.is_step_profiler(): - result_data.extend(level1_node.child_nodes) - else: - result_data.append(level1_node) - return result_data + if len(self._all_nodes) < 1: + return [] + return self._get_top_layers_ops_from_root_node(self._root_node.child_nodes) def get_all_layer_ops(self) -> any: result_data = [] if len(self._all_nodes) < 1: return result_data - return list(filter(lambda x: not x.is_step_profiler(), self._all_nodes[1:])) + if self._specified_step_id == -1: + return list(filter(lambda x: not x.is_step_profiler(), self._all_nodes[1:])) + node_queue = self._get_top_layers_ops_from_root_node(self._root_node.child_nodes) + while len(node_queue) > 0: + node = node_queue.pop(0) + result_data.append(node) + if node.child_nodes: + node_queue.extend(node.child_nodes) + return result_data def _build_tree(self): return TreeBuilder.build_tree(self.profiling_data.torch_op_data, self.profiling_data.kernel_dict, self.profiling_data.memory_list) + + def _get_top_layers_ops_from_root_node(self, top_layers_nodes: list) -> list: + result_data = [] + for level1_node in top_layers_nodes: + if self._specified_step_id == -1: + if level1_node.is_step_profiler(): + result_data.extend(level1_node.child_nodes) + else: + result_data.append(level1_node) + elif level1_node.is_step_profiler() and level1_node.get_step_id() == self._specified_step_id: + result_data.extend(level1_node.child_nodes) + if not result_data and self._specified_step_id != -1: + print(f"[WARNING] There is no operator infomation for step {self._specified_step_id}, " \ + "please check whether the data contains this step.") + return result_data \ No newline at end of file diff --git a/profiler/compare_tools/compare_backend/generator/detail_performance_generator.py b/profiler/compare_tools/compare_backend/generator/detail_performance_generator.py index c0da4b65b..fc59963c1 100644 --- a/profiler/compare_tools/compare_backend/generator/detail_performance_generator.py +++ b/profiler/compare_tools/compare_backend/generator/detail_performance_generator.py @@ -31,6 +31,8 @@ from compare_backend.data_prepare.sequence_pre_matching import SequencePreMatchi class DetailPerformanceGenerator(BaseGenerator): def __init__(self, profiling_data_dict: dict, args: any): super().__init__(profiling_data_dict, args) + self._base_step_id = int(args.base_step) if args.base_step else -1 + self._comparison_step_id = int(args.comparison_step) if args.comparison_step else -1 def compare(self): enable_compare = [self._args.enable_operator_compare, self._args.enable_memory_compare, @@ -83,8 +85,10 @@ class DetailPerformanceGenerator(BaseGenerator): # build tree for operator_compare memory_compare and api_compare base_op_prepare, comparison_op_prepare = None, None if self._args.enable_memory_compare or self.enable_api_compare or enable_operator_compare: - base_op_prepare = OperatorDataPrepare(self._profiling_data_dict.get(Constant.BASE_DATA)) - comparison_op_prepare = OperatorDataPrepare(self._profiling_data_dict.get(Constant.COMPARISON_DATA)) + base_op_prepare = OperatorDataPrepare(self._profiling_data_dict.get(Constant.BASE_DATA), + self._base_step_id) + comparison_op_prepare = OperatorDataPrepare(self._profiling_data_dict.get(Constant.COMPARISON_DATA), + self._comparison_step_id) # 算子性能比对-operator级 op_compare_result = [] diff --git a/profiler/compare_tools/compare_backend/profiling_parser/base_profiling_parser.py b/profiler/compare_tools/compare_backend/profiling_parser/base_profiling_parser.py index a2591dd0f..011f237cc 100644 --- a/profiler/compare_tools/compare_backend/profiling_parser/base_profiling_parser.py +++ b/profiler/compare_tools/compare_backend/profiling_parser/base_profiling_parser.py @@ -55,7 +55,7 @@ class ProfilingResult: class BaseProfilingParser(ABC): - def __init__(self, args: any, path_dict: dict): + def __init__(self, args: any, path_dict: dict, step_id: int = -1): self._args = args self._profiling_type = path_dict.get(Constant.PROFILING_TYPE) self._profiling_path = path_dict.get(Constant.PROFILING_PATH) @@ -80,6 +80,7 @@ class BaseProfilingParser(ABC): self._categorize_performance_index = 0 self._cpu_cube_op = None self._bwd_tid = None + self._step_id = step_id @property def cpu_cube_op(self): @@ -120,6 +121,9 @@ class BaseProfilingParser(ABC): def load_data(self) -> ProfilingResult: self._result_data.update_bwd_tid(self._bwd_tid) + if self._step_id != -1 and self._profiling_type == Constant.GPU: + msg = "[WARNING] step id is invalid in GPU data, please use this when comparing between NPU datas." + raise RuntimeError(msg) self._dispatch_events() self._update_kernel_dict() self._update_communication_dict() diff --git a/profiler/compare_tools/compare_backend/profiling_parser/gpu_profiling_parser.py b/profiler/compare_tools/compare_backend/profiling_parser/gpu_profiling_parser.py index 91b4094c2..04c89e206 100644 --- a/profiler/compare_tools/compare_backend/profiling_parser/gpu_profiling_parser.py +++ b/profiler/compare_tools/compare_backend/profiling_parser/gpu_profiling_parser.py @@ -13,8 +13,8 @@ class GPUProfilingParser(BaseProfilingParser): FLOW_CAT = ("async_gpu", "async_cpu_to_gpu", "ac2g", "async") TORCH_OP_CAT = ("cpu_op", "user_annotation", "cuda_runtime", "operator", "runtime") - def __init__(self, args: any, path_dict: dict): - super().__init__(args, path_dict) + def __init__(self, args: any, path_dict: dict, step_id: int = -1): + super().__init__(args, path_dict, step_id) self._trace_events = [TraceEventBean(event) for event in self._trace_events.get("traceEvents", [])] self._flow_cat = (args.gpu_flow_cat,) if args.gpu_flow_cat else self.FLOW_CAT self._compute_stream_id = self._infer_compute_stream_id() diff --git a/profiler/compare_tools/compare_backend/profiling_parser/npu_profiling_parser.py b/profiler/compare_tools/compare_backend/profiling_parser/npu_profiling_parser.py index 29e9fea8d..fd92d44d3 100644 --- a/profiler/compare_tools/compare_backend/profiling_parser/npu_profiling_parser.py +++ b/profiler/compare_tools/compare_backend/profiling_parser/npu_profiling_parser.py @@ -17,8 +17,8 @@ class NPUProfilingParser(BaseProfilingParser): ACTIVE_CPU = "ProfilerActivity.CPU" LEVEL_0 = "Level0" - def __init__(self, args: any, path_dict: dict): - super().__init__(args, path_dict) + def __init__(self, args: any, path_dict: dict, step_id: int = -1): + super().__init__(args, path_dict, step_id) self._operator_memory_path = os.path.join(path_dict.get(Constant.ASCEND_OUTPUT_PATH, ""), "operator_memory.csv") self._memory_record_path = os.path.join(path_dict.get(Constant.ASCEND_OUTPUT_PATH, ""), "memory_record.csv") self._kernel_detail_path = os.path.join(path_dict.get(Constant.ASCEND_OUTPUT_PATH, ""), "kernel_details.csv") @@ -72,11 +72,17 @@ class NPUProfilingParser(BaseProfilingParser): for kernel in kernel_details: if kernel.is_invalid(): continue + if self._step_id != -1 and kernel.step_id != self._step_id: + continue input_shapes = kernel.input_shapes if kernel.input_shapes else 'N/A' kernels_dict.setdefault(kernel.op_type, {}).setdefault(input_shapes, []).append( [kernel.name, kernel.duration]) - if len(kernels_dict) == 1: - print("[ERROR] Failed to enable enable_kernel_compare, type of kernel_details.csv is null.") + if not kernels_dict: + if self._step_id != -1: + print(f"[ERROR] There is no kernel details infomation for step {self._step_id}, " \ + "please check whether the data contains this step.") + else: + print("[ERROR] Failed to enable enable_kernel_compare, type of kernel_details.csv is null.") return self._result_data.update_kernel_details(kernels_dict) diff --git a/profiler/compare_tools/compare_backend/utils/args_manager.py b/profiler/compare_tools/compare_backend/utils/args_manager.py index 579bf9b99..09954bc7b 100644 --- a/profiler/compare_tools/compare_backend/utils/args_manager.py +++ b/profiler/compare_tools/compare_backend/utils/args_manager.py @@ -24,6 +24,8 @@ class ArgsManager: self._args = args self._base_path_dict = {} self._comparison_path_dict = {} + self._base_step = -1 + self._comparison_step = -1 @property def args(self): @@ -53,6 +55,14 @@ class ArgsManager: def comparison_path_dict(self): return self._comparison_path_dict + @property + def base_step(self): + return self._base_step + + @property + def comparison_step(self): + return self._comparison_step + @property def enable_profiling_compare(self): return self._args.enable_profiling_compare @@ -88,6 +98,18 @@ class ArgsManager: PathManager.make_dir_safety(output_path) PathManager.check_path_writeable(output_path) + def get_step_args_with_validating(self): + if self._args.base_step and self._args.comparison_step: + if all([self._args.base_step.isdigit(), self._args.comparison_step.isdigit()]): + self._base_step = int(self._args.base_step) + self._comparison_step = int(self._args.comparison_step) + else: + msg = "Invalid param, base_step and comparison_step must be a number." + raise RuntimeError(msg) + elif any([self._args.base_step, self._args.comparison_step]): + msg = "Invalid param, base_step and comparison_step must be set at the same time." + raise RuntimeError(msg) + def parse_profiling_path(self, file_path: str): self.check_profiling_path(file_path) if os.path.isfile(file_path): @@ -134,7 +156,8 @@ class ArgsManager: self._args.enable_communication_compare = True self._args.enable_api_compare = True self._args.enable_kernel_compare = True - + + self.get_step_args_with_validating() base_profiling_path = PathManager.get_realpath(self._args.base_profiling_path) self.check_profiling_path(base_profiling_path) self._base_path_dict = self.parse_profiling_path(base_profiling_path) diff --git a/profiler/compare_tools/compare_backend/utils/compare_args.py b/profiler/compare_tools/compare_backend/utils/compare_args.py index 9e6291e89..36199b5b0 100644 --- a/profiler/compare_tools/compare_backend/utils/compare_args.py +++ b/profiler/compare_tools/compare_backend/utils/compare_args.py @@ -12,7 +12,9 @@ class Args: max_kernel_num: int = None, op_name_map: dict = {}, use_input_shape: bool = False, - gpu_flow_cat: str = ""): + gpu_flow_cat: str = "", + base_step: str = "", + comparison_step: str = ""): self.base_profiling_path = base_profiling_path self.comparison_profiling_path = comparison_profiling_path self.enable_profiling_compare = enable_profiling_compare @@ -26,3 +28,5 @@ class Args: self.op_name_map = op_name_map self.use_input_shape = use_input_shape self.gpu_flow_cat = gpu_flow_cat + self.base_step = base_step + self.comparison_step = comparison_step \ No newline at end of file diff --git a/profiler/compare_tools/compare_backend/utils/torch_op_node.py b/profiler/compare_tools/compare_backend/utils/torch_op_node.py index bb116a60c..7d37046dc 100644 --- a/profiler/compare_tools/compare_backend/utils/torch_op_node.py +++ b/profiler/compare_tools/compare_backend/utils/torch_op_node.py @@ -100,5 +100,10 @@ class TorchOpNode: def is_step_profiler(self) -> bool: return self._event.is_step_profiler() + def get_step_id(self) -> int: + if self.is_step_profiler(): + return int(self._event.name.split("#")[1]) + return -1 + def get_op_info(self) -> list: return [self.name, self.input_shape, self.input_type, self.call_stack] diff --git a/profiler/compare_tools/compare_interface/comparison_interface.py b/profiler/compare_tools/compare_interface/comparison_interface.py index b747aae47..68bbcc026 100644 --- a/profiler/compare_tools/compare_interface/comparison_interface.py +++ b/profiler/compare_tools/compare_interface/comparison_interface.py @@ -12,11 +12,14 @@ from compare_backend.utils.constant import Constant class ComparisonInterface: - def __init__(self, base_profiling_path: str, comparison_profiling_path: str = ""): + def __init__(self, base_profiling_path: str, comparison_profiling_path: str = "", + base_step: str = "", comparison_step: str = ""): self.base_profiling_path = base_profiling_path if comparison_profiling_path: self._args = Args(base_profiling_path=base_profiling_path, - comparison_profiling_path=comparison_profiling_path) + comparison_profiling_path=comparison_profiling_path, + base_step=base_step, + comparison_step=comparison_step) def compare(self, compare_type: str) -> dict: if compare_type == Constant.OVERALL_COMPARE: diff --git a/profiler/compare_tools/performance_compare.py b/profiler/compare_tools/performance_compare.py index 7c3fcdb6e..dff87db2f 100644 --- a/profiler/compare_tools/performance_compare.py +++ b/profiler/compare_tools/performance_compare.py @@ -27,6 +27,8 @@ def main(): help="配置GPU与NPU等价的算子名称映射关系,以字典的形式传入") parser.add_argument("--use_input_shape", default=False, action='store_true', help="开启算子的精准匹配") parser.add_argument("--gpu_flow_cat", type=str, default='', help="gpu flow event的分类标识") + parser.add_argument("--base_step", type=str, default='', help="基准性能数据指定比对step") + parser.add_argument("--comparison_step", type=str, default='', help="比较性能数据指定比对step") args = parser.parse_args() ComparisonGenerator(args).run() diff --git a/profiler/test/ut/compare_tools/profiling_parser/test_base_profiling_parser.py b/profiler/test/ut/compare_tools/profiling_parser/test_base_profiling_parser.py index e84cfe048..b78c59f1f 100644 --- a/profiler/test/ut/compare_tools/profiling_parser/test_base_profiling_parser.py +++ b/profiler/test/ut/compare_tools/profiling_parser/test_base_profiling_parser.py @@ -27,6 +27,7 @@ class ProfilingParser(BaseProfilingParser): self._enable_kernel_compare = True self._enable_api_compare = True self._bwd_tid = 1 + self._step_id = -1 def _update_kernel_details(self): pass -- Gitee