diff --git a/profiler/compare_tools/compare_backend/generator/detail_performance_generator.py b/profiler/compare_tools/compare_backend/generator/detail_performance_generator.py index 4ae6ebcec26749bc4e1dfa6c0a28acb550b44154..677b0d18c2cea7e72ccdc78b2a5d2b750276e0ed 100644 --- a/profiler/compare_tools/compare_backend/generator/detail_performance_generator.py +++ b/profiler/compare_tools/compare_backend/generator/detail_performance_generator.py @@ -108,6 +108,8 @@ class DetailPerformanceGenerator(BaseGenerator): self._profiling_data_dict.get(Constant.COMPARISON_DATA)).build_module_tree() for index, base_node in enumerate(base_root_node): comparison_node = comparison_root_node[index] if index < len(comparison_root_node) else None + if not base_node or not comparison_node: + continue module_compare_result.extend(self._matching_all_modules(base_node, comparison_node)) return module_compare_result diff --git a/profiler/compare_tools/compare_backend/profiling_parser/gpu_profiling_parser.py b/profiler/compare_tools/compare_backend/profiling_parser/gpu_profiling_parser.py index 77785d0261f6b6cbc15dfd55cbee3110037ac287..923854bdf73ad4d60f8b48b7eb3c83d526113b82 100644 --- a/profiler/compare_tools/compare_backend/profiling_parser/gpu_profiling_parser.py +++ b/profiler/compare_tools/compare_backend/profiling_parser/gpu_profiling_parser.py @@ -9,7 +9,7 @@ from compare_backend.utils.constant import Constant class GPUProfilingParser(BaseProfilingParser): CUBE_MARK = 'gemm' - FA_MARK_LIST = [['fmha', 'kernel'], ['flash', 'kernel']] + FA_MARK_LIST = [['fmha', 'kernel'], ['flash', 'kernel'], ['attention', 'kernel']] SDMA_MARK_LIST = ['htod', 'dtod', 'dtoh', 'memset (device)'] FLOW_CAT = ("async_gpu", "async_cpu_to_gpu", "ac2g", "async") TORCH_OP_CAT = ("cpu_op", "user_annotation", "cuda_runtime", "operator") @@ -138,7 +138,7 @@ class GPUProfilingParser(BaseProfilingParser): func_set.add(self._picking_kernel_event) if self._enable_operator_compare: func_set.add(self._picking_python_function_event) - func_set .add(self._picking_fwdbwd_flow_event) + func_set.add(self._picking_fwdbwd_flow_event) if self._enable_operator_compare or self._args.max_kernel_num: func_set.add(self._picking_kernel_event) func_set.add(self._picking_flow_event)