From acc94e8c55067b86b152d358c1db43e1f8357430 Mon Sep 17 00:00:00 2001
From: stby <295887736@qq.com>
Date: Thu, 8 Aug 2024 17:16:18 +0800
Subject: [PATCH] adapt optimizing

---
 .../origin_data_bean/kernel_details_bean.py   |  6 +++
 .../compare_backend/comparison_generator.py   |  8 +++-
 .../data_prepare/operator_data_prepare.py     | 39 ++++++++++++++-----
 .../generator/detail_performance_generator.py |  8 +++-
 .../profiling_parser/base_profiling_parser.py |  6 ++-
 .../profiling_parser/gpu_profiling_parser.py  |  4 +-
 .../profiling_parser/npu_profiling_parser.py  | 14 +++++--
 .../compare_backend/utils/args_manager.py     | 25 +++++++++++-
 .../compare_backend/utils/compare_args.py     |  6 ++-
 .../compare_backend/utils/torch_op_node.py    |  5 +++
 .../compare_interface/comparison_interface.py |  7 +++-
 profiler/compare_tools/performance_compare.py |  2 +
 .../test_base_profiling_parser.py             |  1 +
 13 files changed, 106 insertions(+), 25 deletions(-)

diff --git a/profiler/compare_tools/compare_backend/compare_bean/origin_data_bean/kernel_details_bean.py b/profiler/compare_tools/compare_backend/compare_bean/origin_data_bean/kernel_details_bean.py
index c15396e9c..0da2321c0 100644
--- a/profiler/compare_tools/compare_backend/compare_bean/origin_data_bean/kernel_details_bean.py
+++ b/profiler/compare_tools/compare_backend/compare_bean/origin_data_bean/kernel_details_bean.py
@@ -18,6 +18,7 @@ class KernelDetailsBean:
         self._mac_time = 0.0
         self._duration = 0.0
         self._start_time = Decimal("0")
+        self._step_id = ""
         self.init()
 
     @property
@@ -65,6 +66,10 @@ class KernelDetailsBean:
     @property
     def end_time(self) -> Decimal:
         return self.start_time + convert_to_decimal(self._duration)
+    
+    @property
+    def step_id(self) -> int:
+        return int(self._step_id) if self._step_id else -1
 
     def is_hide_op_pmu(self):
         if "mac_time(us)" in self._data.keys() or "aiv_vec_time(us)" in self._data.keys():
@@ -119,4 +124,5 @@ class KernelDetailsBean:
         self._aicore_time = self._data.get("aicore_time(us)", "")
         self._mac_time = self._data.get('mac_time(us)', "")
         self._duration = self._data.get('Duration(us)', 0)
+        self._step_id = self._data.get('Step Id', "")
         self._start_time = Decimal(self._data.get("Start Time(us)", "0"))
diff --git a/profiler/compare_tools/compare_backend/comparison_generator.py b/profiler/compare_tools/compare_backend/comparison_generator.py
index b4d17f88e..bfbc1bb7b 100644
--- a/profiler/compare_tools/compare_backend/comparison_generator.py
+++ b/profiler/compare_tools/compare_backend/comparison_generator.py
@@ -31,9 +31,13 @@ class ComparisonGenerator:
 
     def load_data(self):
         self._data_dict[Constant.BASE_DATA] = self.PARSER_DICT.get(self._args_manager.base_profiling_type)(
-            self._args_manager.args, self._args_manager.base_path_dict).load_data()
+            self._args_manager.args,
+            self._args_manager.base_path_dict,
+            self._args_manager.base_step).load_data()
         self._data_dict[Constant.COMPARISON_DATA] = self.PARSER_DICT.get(self._args_manager.comparison_profiling_type)(
-            self._args_manager.args, self._args_manager.comparison_path_dict).load_data()
+            self._args_manager.args,
+            self._args_manager.comparison_path_dict,
+            self._args_manager.comparison_step).load_data()
 
     def generate_compare_result(self):
         overall_data = {Constant.BASE_DATA: self._data_dict.get(Constant.BASE_DATA).overall_metrics,
diff --git a/profiler/compare_tools/compare_backend/data_prepare/operator_data_prepare.py b/profiler/compare_tools/compare_backend/data_prepare/operator_data_prepare.py
index 59913528a..bab381b3e 100644
--- a/profiler/compare_tools/compare_backend/data_prepare/operator_data_prepare.py
+++ b/profiler/compare_tools/compare_backend/data_prepare/operator_data_prepare.py
@@ -3,27 +3,46 @@ from compare_backend.utils.tree_builder import TreeBuilder
 
 
 class OperatorDataPrepare:
-    def __init__(self, profiling_data: ProfilingResult):
+    def __init__(self, profiling_data: ProfilingResult, specified_step_id: int = -1):
         self.profiling_data = profiling_data
         self._all_nodes = self._build_tree()
         self._root_node = self._all_nodes[0]
+        self._specified_step_id = specified_step_id
 
     def get_top_layer_ops(self) -> any:
-        level1_child_nodes = self._root_node.child_nodes
-        result_data = []
-        for level1_node in level1_child_nodes:
-            if level1_node.is_step_profiler():
-                result_data.extend(level1_node.child_nodes)
-            else:
-                result_data.append(level1_node)
-        return result_data
+        if len(self._all_nodes) < 1:
+            return []
+        return self._get_top_layers_ops_from_root_node(self._root_node.child_nodes)
 
     def get_all_layer_ops(self) -> any:
         result_data = []
         if len(self._all_nodes) < 1:
             return result_data
-        return list(filter(lambda x: not x.is_step_profiler(), self._all_nodes[1:]))
+        if self._specified_step_id == -1:
+            return list(filter(lambda x: not x.is_step_profiler(), self._all_nodes[1:]))
+        node_queue = self._get_top_layers_ops_from_root_node(self._root_node.child_nodes)
+        while len(node_queue) > 0:
+            node = node_queue.pop(0)
+            result_data.append(node)
+            if node.child_nodes:
+                node_queue.extend(node.child_nodes)
+        return result_data
 
     def _build_tree(self):
         return TreeBuilder.build_tree(self.profiling_data.torch_op_data, self.profiling_data.kernel_dict,
                                       self.profiling_data.memory_list)
+
+    def _get_top_layers_ops_from_root_node(self, top_layers_nodes: list) -> list:
+        result_data = []
+        for level1_node in top_layers_nodes:
+            if self._specified_step_id == -1:
+                if level1_node.is_step_profiler():
+                    result_data.extend(level1_node.child_nodes)
+                else:
+                    result_data.append(level1_node)
+            elif level1_node.is_step_profiler() and level1_node.get_step_id() == self._specified_step_id:
+                result_data.extend(level1_node.child_nodes)
+        if not result_data and self._specified_step_id != -1:
+            print(f"[WARNING] There is no operator infomation for step {self._specified_step_id}, " \
+                    "please check whether the data contains this step.")
+        return result_data
\ No newline at end of file
diff --git a/profiler/compare_tools/compare_backend/generator/detail_performance_generator.py b/profiler/compare_tools/compare_backend/generator/detail_performance_generator.py
index c0da4b65b..fc59963c1 100644
--- a/profiler/compare_tools/compare_backend/generator/detail_performance_generator.py
+++ b/profiler/compare_tools/compare_backend/generator/detail_performance_generator.py
@@ -31,6 +31,8 @@ from compare_backend.data_prepare.sequence_pre_matching import SequencePreMatchi
 class DetailPerformanceGenerator(BaseGenerator):
     def __init__(self, profiling_data_dict: dict, args: any):
         super().__init__(profiling_data_dict, args)
+        self._base_step_id = int(args.base_step) if args.base_step else -1
+        self._comparison_step_id = int(args.comparison_step) if args.comparison_step else -1
 
     def compare(self):
         enable_compare = [self._args.enable_operator_compare, self._args.enable_memory_compare,
@@ -83,8 +85,10 @@ class DetailPerformanceGenerator(BaseGenerator):
         # build tree for operator_compare memory_compare and api_compare
         base_op_prepare, comparison_op_prepare = None, None
         if self._args.enable_memory_compare or self.enable_api_compare or enable_operator_compare:
-            base_op_prepare = OperatorDataPrepare(self._profiling_data_dict.get(Constant.BASE_DATA))
-            comparison_op_prepare = OperatorDataPrepare(self._profiling_data_dict.get(Constant.COMPARISON_DATA))
+            base_op_prepare = OperatorDataPrepare(self._profiling_data_dict.get(Constant.BASE_DATA),
+                                                  self._base_step_id)
+            comparison_op_prepare = OperatorDataPrepare(self._profiling_data_dict.get(Constant.COMPARISON_DATA),
+                                                        self._comparison_step_id)
 
         # 算子性能比对-operator级
         op_compare_result = []
diff --git a/profiler/compare_tools/compare_backend/profiling_parser/base_profiling_parser.py b/profiler/compare_tools/compare_backend/profiling_parser/base_profiling_parser.py
index a2591dd0f..011f237cc 100644
--- a/profiler/compare_tools/compare_backend/profiling_parser/base_profiling_parser.py
+++ b/profiler/compare_tools/compare_backend/profiling_parser/base_profiling_parser.py
@@ -55,7 +55,7 @@ class ProfilingResult:
 
 class BaseProfilingParser(ABC):
 
-    def __init__(self, args: any, path_dict: dict):
+    def __init__(self, args: any, path_dict: dict, step_id: int = -1):
         self._args = args
         self._profiling_type = path_dict.get(Constant.PROFILING_TYPE)
         self._profiling_path = path_dict.get(Constant.PROFILING_PATH)
@@ -80,6 +80,7 @@ class BaseProfilingParser(ABC):
         self._categorize_performance_index = 0
         self._cpu_cube_op = None
         self._bwd_tid = None
+        self._step_id = step_id
 
     @property
     def cpu_cube_op(self):
@@ -120,6 +121,9 @@ class BaseProfilingParser(ABC):
 
     def load_data(self) -> ProfilingResult:
         self._result_data.update_bwd_tid(self._bwd_tid)
+        if self._step_id != -1 and self._profiling_type == Constant.GPU:
+            msg = "[WARNING] step id is invalid in GPU data, please use this when comparing between NPU datas."
+            raise RuntimeError(msg)
         self._dispatch_events()
         self._update_kernel_dict()
         self._update_communication_dict()
diff --git a/profiler/compare_tools/compare_backend/profiling_parser/gpu_profiling_parser.py b/profiler/compare_tools/compare_backend/profiling_parser/gpu_profiling_parser.py
index 91b4094c2..04c89e206 100644
--- a/profiler/compare_tools/compare_backend/profiling_parser/gpu_profiling_parser.py
+++ b/profiler/compare_tools/compare_backend/profiling_parser/gpu_profiling_parser.py
@@ -13,8 +13,8 @@ class GPUProfilingParser(BaseProfilingParser):
     FLOW_CAT = ("async_gpu", "async_cpu_to_gpu", "ac2g", "async")
     TORCH_OP_CAT = ("cpu_op", "user_annotation", "cuda_runtime", "operator", "runtime")
 
-    def __init__(self, args: any, path_dict: dict):
-        super().__init__(args, path_dict)
+    def __init__(self, args: any, path_dict: dict, step_id: int = -1):
+        super().__init__(args, path_dict, step_id)
         self._trace_events = [TraceEventBean(event) for event in self._trace_events.get("traceEvents", [])]
         self._flow_cat = (args.gpu_flow_cat,) if args.gpu_flow_cat else self.FLOW_CAT
         self._compute_stream_id = self._infer_compute_stream_id()
diff --git a/profiler/compare_tools/compare_backend/profiling_parser/npu_profiling_parser.py b/profiler/compare_tools/compare_backend/profiling_parser/npu_profiling_parser.py
index 29e9fea8d..fd92d44d3 100644
--- a/profiler/compare_tools/compare_backend/profiling_parser/npu_profiling_parser.py
+++ b/profiler/compare_tools/compare_backend/profiling_parser/npu_profiling_parser.py
@@ -17,8 +17,8 @@ class NPUProfilingParser(BaseProfilingParser):
     ACTIVE_CPU = "ProfilerActivity.CPU"
     LEVEL_0 = "Level0"
 
-    def __init__(self, args: any, path_dict: dict):
-        super().__init__(args, path_dict)
+    def __init__(self, args: any, path_dict: dict, step_id: int = -1):
+        super().__init__(args, path_dict, step_id)
         self._operator_memory_path = os.path.join(path_dict.get(Constant.ASCEND_OUTPUT_PATH, ""), "operator_memory.csv")
         self._memory_record_path = os.path.join(path_dict.get(Constant.ASCEND_OUTPUT_PATH, ""), "memory_record.csv")
         self._kernel_detail_path = os.path.join(path_dict.get(Constant.ASCEND_OUTPUT_PATH, ""), "kernel_details.csv")
@@ -72,11 +72,17 @@ class NPUProfilingParser(BaseProfilingParser):
         for kernel in kernel_details:
             if kernel.is_invalid():
                 continue
+            if self._step_id != -1 and kernel.step_id != self._step_id:
+                continue
             input_shapes = kernel.input_shapes if kernel.input_shapes else 'N/A'
             kernels_dict.setdefault(kernel.op_type, {}).setdefault(input_shapes, []).append(
                 [kernel.name, kernel.duration])
-        if len(kernels_dict) == 1:
-            print("[ERROR] Failed to enable enable_kernel_compare, type of kernel_details.csv is null.")
+        if not kernels_dict:
+            if self._step_id != -1:
+                print(f"[ERROR] There is no kernel details infomation for step {self._step_id}, " \
+                        "please check whether the data contains this step.")
+            else:
+                print("[ERROR] Failed to enable enable_kernel_compare, type of kernel_details.csv is null.")
             return
         self._result_data.update_kernel_details(kernels_dict)
 
diff --git a/profiler/compare_tools/compare_backend/utils/args_manager.py b/profiler/compare_tools/compare_backend/utils/args_manager.py
index 579bf9b99..09954bc7b 100644
--- a/profiler/compare_tools/compare_backend/utils/args_manager.py
+++ b/profiler/compare_tools/compare_backend/utils/args_manager.py
@@ -24,6 +24,8 @@ class ArgsManager:
         self._args = args
         self._base_path_dict = {}
         self._comparison_path_dict = {}
+        self._base_step = -1
+        self._comparison_step = -1
 
     @property
     def args(self):
@@ -53,6 +55,14 @@ class ArgsManager:
     def comparison_path_dict(self):
         return self._comparison_path_dict
 
+    @property
+    def base_step(self):
+        return self._base_step
+
+    @property
+    def comparison_step(self):
+        return self._comparison_step
+
     @property
     def enable_profiling_compare(self):
         return self._args.enable_profiling_compare
@@ -88,6 +98,18 @@ class ArgsManager:
         PathManager.make_dir_safety(output_path)
         PathManager.check_path_writeable(output_path)
 
+    def get_step_args_with_validating(self):
+        if self._args.base_step and self._args.comparison_step:
+            if all([self._args.base_step.isdigit(), self._args.comparison_step.isdigit()]):
+                self._base_step = int(self._args.base_step)
+                self._comparison_step = int(self._args.comparison_step)
+            else:
+                msg = "Invalid param, base_step and comparison_step must be a number."
+                raise RuntimeError(msg)
+        elif any([self._args.base_step, self._args.comparison_step]):
+            msg = "Invalid param, base_step and comparison_step must be set at the same time."
+            raise RuntimeError(msg)
+
     def parse_profiling_path(self, file_path: str):
         self.check_profiling_path(file_path)
         if os.path.isfile(file_path):
@@ -134,7 +156,8 @@ class ArgsManager:
             self._args.enable_communication_compare = True
             self._args.enable_api_compare = True
             self._args.enable_kernel_compare = True
-
+        
+        self.get_step_args_with_validating()
         base_profiling_path = PathManager.get_realpath(self._args.base_profiling_path)
         self.check_profiling_path(base_profiling_path)
         self._base_path_dict = self.parse_profiling_path(base_profiling_path)
diff --git a/profiler/compare_tools/compare_backend/utils/compare_args.py b/profiler/compare_tools/compare_backend/utils/compare_args.py
index 9e6291e89..36199b5b0 100644
--- a/profiler/compare_tools/compare_backend/utils/compare_args.py
+++ b/profiler/compare_tools/compare_backend/utils/compare_args.py
@@ -12,7 +12,9 @@ class Args:
                  max_kernel_num: int = None,
                  op_name_map: dict = {},
                  use_input_shape: bool = False,
-                 gpu_flow_cat: str = ""):
+                 gpu_flow_cat: str = "",
+                 base_step: str = "",
+                 comparison_step: str = ""):
         self.base_profiling_path = base_profiling_path
         self.comparison_profiling_path = comparison_profiling_path
         self.enable_profiling_compare = enable_profiling_compare
@@ -26,3 +28,5 @@ class Args:
         self.op_name_map = op_name_map
         self.use_input_shape = use_input_shape
         self.gpu_flow_cat = gpu_flow_cat
+        self.base_step = base_step
+        self.comparison_step = comparison_step
\ No newline at end of file
diff --git a/profiler/compare_tools/compare_backend/utils/torch_op_node.py b/profiler/compare_tools/compare_backend/utils/torch_op_node.py
index bb116a60c..7d37046dc 100644
--- a/profiler/compare_tools/compare_backend/utils/torch_op_node.py
+++ b/profiler/compare_tools/compare_backend/utils/torch_op_node.py
@@ -100,5 +100,10 @@ class TorchOpNode:
     def is_step_profiler(self) -> bool:
         return self._event.is_step_profiler()
 
+    def get_step_id(self) -> int:
+        if self.is_step_profiler():
+            return int(self._event.name.split("#")[1])
+        return -1
+
     def get_op_info(self) -> list:
         return [self.name, self.input_shape, self.input_type, self.call_stack]
diff --git a/profiler/compare_tools/compare_interface/comparison_interface.py b/profiler/compare_tools/compare_interface/comparison_interface.py
index b747aae47..68bbcc026 100644
--- a/profiler/compare_tools/compare_interface/comparison_interface.py
+++ b/profiler/compare_tools/compare_interface/comparison_interface.py
@@ -12,11 +12,14 @@ from compare_backend.utils.constant import Constant
 
 
 class ComparisonInterface:
-    def __init__(self, base_profiling_path: str, comparison_profiling_path: str = ""):
+    def __init__(self, base_profiling_path: str, comparison_profiling_path: str = "",
+                 base_step: str = "", comparison_step: str = ""):
         self.base_profiling_path = base_profiling_path
         if comparison_profiling_path:
             self._args = Args(base_profiling_path=base_profiling_path,
-                              comparison_profiling_path=comparison_profiling_path)
+                              comparison_profiling_path=comparison_profiling_path,
+                              base_step=base_step,
+                              comparison_step=comparison_step)
 
     def compare(self, compare_type: str) -> dict:
         if compare_type == Constant.OVERALL_COMPARE:
diff --git a/profiler/compare_tools/performance_compare.py b/profiler/compare_tools/performance_compare.py
index 7c3fcdb6e..dff87db2f 100644
--- a/profiler/compare_tools/performance_compare.py
+++ b/profiler/compare_tools/performance_compare.py
@@ -27,6 +27,8 @@ def main():
                         help="配置GPU与NPU等价的算子名称映射关系，以字典的形式传入")
     parser.add_argument("--use_input_shape", default=False, action='store_true', help="开启算子的精准匹配")
     parser.add_argument("--gpu_flow_cat", type=str, default='', help="gpu flow event的分类标识")
+    parser.add_argument("--base_step", type=str, default='', help="基准性能数据指定比对step")
+    parser.add_argument("--comparison_step", type=str, default='', help="比较性能数据指定比对step")
     args = parser.parse_args()
 
     ComparisonGenerator(args).run()
diff --git a/profiler/test/ut/compare_tools/profiling_parser/test_base_profiling_parser.py b/profiler/test/ut/compare_tools/profiling_parser/test_base_profiling_parser.py
index e84cfe048..b78c59f1f 100644
--- a/profiler/test/ut/compare_tools/profiling_parser/test_base_profiling_parser.py
+++ b/profiler/test/ut/compare_tools/profiling_parser/test_base_profiling_parser.py
@@ -27,6 +27,7 @@ class ProfilingParser(BaseProfilingParser):
         self._enable_kernel_compare = True
         self._enable_api_compare = True
         self._bwd_tid = 1
+        self._step_id = -1
 
     def _update_kernel_details(self):
         pass
-- 
Gitee