diff --git a/profiler/compare_tools/README.md b/profiler/compare_tools/README.md
index b7e3473c4c9102764f1750edd3124522a6cf578a..f9c81d8625d6ebe3500fe766bd771cc5eb756c0a 100644
--- a/profiler/compare_tools/README.md
+++ b/profiler/compare_tools/README.md
@@ -66,7 +66,7 @@ pytorch profiler数据目录结构如下：
 ```
 
 #### NPU性能数据采集
-通过Ascend PyTorch Profiler工具采集NPU的性能数据，采集参数配置跟GPU一致，参考链接：https://www.hiascend.com/document/detail/zh/canncommercial/63RC2/modeldevpt/ptmigr/ptmigr_0066.html
+通过Ascend PyTorch Profiler工具采集NPU的性能数据，采集参数配置跟GPU一致，参考链接：https://www.hiascend.com/document/detail/zh/canncommercial/70RC1/devtools/auxiliarydevtool/atlasprofiling_16_0008.html
 
 将GPU的性能数据采集代码中torch.profiler替换成torch_npu.profiler
 
diff --git a/profiler/compare_tools/utils/profiling_parser.py b/profiler/compare_tools/utils/profiling_parser.py
index d24b63e511e8ffd0eebb00c83d29f45dd7b23047..30dfce4ef8baf457797106a553c2c8c698eec0f3 100644
--- a/profiler/compare_tools/utils/profiling_parser.py
+++ b/profiler/compare_tools/utils/profiling_parser.py
@@ -16,7 +16,7 @@ class ProfilingParser:
         self._memory_list = None
         self._communication_data = None
         self._communication_task_data = None
-    
+
     @property
     def file_path(self) -> str:
         return self._profiling_path
@@ -78,7 +78,7 @@ class GPUProfilingParser(ProfilingParser):
         json_data = FileReader.read_trace_file(self._json_path)
         total_events = json_data.get("traceEvents", [])
         for event in total_events:
-            if event.get("cat", "").lower() == "cpu_op" or event.get("cat", "").lower() == "user_annotation":
+            if event.get("cat", "").lower() in ("cpu_op", "user_annotation", "cuda_runtime", "Operator"):
                 torch_op_list.append(event)
         self._torch_op_data = torch_op_list
 
@@ -86,8 +86,8 @@ class GPUProfilingParser(ProfilingParser):
         flow_kernel_dict = {}
         json_data = FileReader.read_trace_file(self._json_path)
         total_events = json_data.get("traceEvents", [])
-        flow_cat = (self._args.gpu_flow_cat,) if self._args.gpu_flow_cat else ("async_gpu", "async_cpu_to_gpu", "ac2g")
-
+        flow_cat = (self._args.gpu_flow_cat,) if self._args.gpu_flow_cat else ("async_gpu", "async_cpu_to_gpu",
+                                                                               "ac2g", "async")
         flow_start_dict, flow_end_dict, kernel_dict = {}, {}, {}
         for event in total_events:
             if event.get("cat", "") in flow_cat and event.get("ph") == "s":
@@ -241,7 +241,7 @@ class NPUProfilingParser(ProfilingParser):
                     pid = trace_event.pid
                     break
             return pid
-        
+
         def get_tid_list(pid, tid_list, json_data):
             for data in json_data:
                 trace_event = TraceEventData(data)
@@ -251,7 +251,7 @@ class NPUProfilingParser(ProfilingParser):
                     continue
                 if trace_event.is_communication_op_thread():
                     tid_list.append(trace_event.tid)
-        
+
         def get_comm_data(pid, tid_list, json_data):
             for data in json_data:
                 trace_event = TraceEventData(data)