diff --git a/profiler/compare_tools/compare_backend/comparator/overall_performance_comparator.py b/profiler/compare_tools/compare_backend/comparator/overall_performance_comparator.py
index 7283c17b47dea78058d0541c1332df0fa45e90d9..1c5cee43e6bb01c2142112dbd9fcf1000ceecfd1 100644
--- a/profiler/compare_tools/compare_backend/comparator/overall_performance_comparator.py
+++ b/profiler/compare_tools/compare_backend/comparator/overall_performance_comparator.py
@@ -12,6 +12,14 @@ class OverallPerformanceComparator(BaseComparator):
         self._headers = ['']
         base_col = [f'{base_profiling_info.profiling_type}']
         comp_col = [f'{comp_profiling_info.profiling_type}']
+        if base_profiling_info.RDMA_bandwidth or comp_profiling_info.RDMA_bandwidth:
+            self._headers.extend(['RDMA Bandwidth(GB/s)'])
+            base_col.append(f'{base_profiling_info.RDMA_bandwidth:.3f}GB/s')
+            comp_col.append(f'{comp_profiling_info.RDMA_bandwidth:.3f}GB/s')
+        if base_profiling_info.SDMA_bandwidth or comp_profiling_info.SDMA_bandwidth:
+            self._headers.extend(['SDMA Bandwidth(GB/s)'])
+            base_col.append(f'{base_profiling_info.SDMA_bandwidth:.3f}GB/s')
+            comp_col.append(f'{comp_profiling_info.SDMA_bandwidth:.3f}GB/s')
         if not base_profiling_info.hide_op_details and not comp_profiling_info.hide_op_details:
             self._headers.extend(['Cube Time(Num)', 'Vector Time(Num)'])
             base_col.extend([f'{base_profiling_info.cube_time:.3f}s({base_profiling_info.cube_num})',
diff --git a/profiler/compare_tools/compare_backend/compare_bean/profiling_info.py b/profiler/compare_tools/compare_backend/compare_bean/profiling_info.py
index e0a80a4d30d0feda38d4290667df6620855d8562..985374bd9958ec5fa6881f49ebadbc6104ed6d50 100644
--- a/profiler/compare_tools/compare_backend/compare_bean/profiling_info.py
+++ b/profiler/compare_tools/compare_backend/compare_bean/profiling_info.py
@@ -8,8 +8,20 @@ class ProfilingInfo:
 
     def __init__(self, profiling_type: str):
         self.profiling_type = profiling_type
-        self.cube_time = 0.0
         self.other_time = 0.0
+        self.lccl_num = 0
+        self.compute_time = 0.0
+        self.communication_not_overlapped = 0.0
+        self.wait_time = 0.0
+        self.memory_used = 0.0
+        self.e2e_time = 0.0
+        self.scheduling_time = 0.0
+        self.lccl_time = 0.0
+        self.minimal_profiling = False
+        self.hide_op_details = False
+        self.is_level0 = False
+
+        self.cube_time = 0.0
         self.vec_time = 0.0
         self.cube_num = 0
         self.vec_num = 0
@@ -17,26 +29,14 @@ class ProfilingInfo:
         self.fa_num_fwd = 0
         self.fa_num_bwd = 0
         self.pa_num = 0
-        self.lccl_num = 0
         self.conv_time_fwd = 0.0
         self.conv_time_bwd = 0.0
         self.conv_num_fwd = 0
         self.conv_num_bwd = 0
-        self.compute_time = 0.0
-        self.communication_not_overlapped = 0.0
-        self.wait_time = 0.0
-        self.memory_used = 0.0
-        self.e2e_time = 0.0
         self.sdma_time = 0.0
-        self.scheduling_time = 0.0
         self.fa_time_bwd = 0.0
         self.pa_time = 0.0
-        self.lccl_time = 0.0
         self.fa_time_fwd = 0.0
-        self.minimal_profiling = False
-        self.hide_op_details = False
-        self.is_level0 = False
-
         # 性能拆解新指标
         self.fa_time_fwd_cube = 0.0
         self.fa_num_fwd_cube = 0
@@ -76,7 +76,8 @@ class ProfilingInfo:
 
         self.other_cube_time = 0.0
         self.other_cube_num = 0
-
+        self.RDMA_bandwidth = 0.0
+        self.SDMA_bandwidth = 0.0
     @property
     def e2e_time_ms(self):
         return self.e2e_time * 10 ** 3
@@ -137,22 +138,6 @@ class ProfilingInfo:
         return sum((self.vector_num_trans, self.vector_num_notrans))
 
     def trans_time_to_s(self):
-        self.cube_time = self.cube_time / 10 ** 6
-        self.other_time = self.other_time / 10 ** 6
-        self.vec_time = self.vec_time / 10 ** 6
-        self.compute_time = self.compute_time / 10 ** 6
-        self.communication_not_overlapped = self.communication_not_overlapped / 10 ** 6
-        self.wait_time = self.wait_time / 10 ** 6
-        self.e2e_time = self.e2e_time / 10 ** 6
-        self.sdma_time = self.sdma_time / 10 ** 6
-        self.scheduling_time = self.scheduling_time / 10 ** 6
-        self.fa_time_bwd = self.fa_time_bwd / 10 ** 6
-        self.fa_time_fwd = self.fa_time_fwd / 10 ** 6
-        self.pa_time = self.pa_time / 10 ** 6
-        self.lccl_time = self.lccl_time / 10 ** 6
-        self.conv_time_fwd = self.conv_time_fwd / 10 ** 6
-        self.conv_time_bwd = self.conv_time_bwd / 10 ** 6
-
         # 新指标单位为ms
         self.fa_time_fwd_cube /= 10 ** 3
         self.fa_time_bwd_cube /= 10 ** 3
@@ -171,6 +156,30 @@ class ProfilingInfo:
         self.page_attention_time /= 10 ** 3
         self.other_cube_time /= 10 ** 3
 
+        self.cube_time = (self.matmul_time_cube + self.matmul_time_vector + self.other_cube_time) / 1000
+        self.vec_time = (self.vector_time_trans + self.vector_time_notrans) / 1000
+        self.cube_num = (self.matmul_num_cube + self.matmul_num_vector + self.other_cube_num) / 1000
+        self.vec_num = (self.vector_num_trans + self.vector_num_notrans) / 1000
+        self.sdma_num = (self.sdma_num_tensor_move + self.sdma_num_stream) / 1000
+        self.fa_num_fwd = (self.fa_num_fwd_cube + self.fa_num_fwd_vector) / 1000
+        self.fa_num_bwd = (self.fa_num_bwd_cube + self.fa_num_bwd_vector) / 1000
+        self.pa_num = self.page_attention_num / 1000
+        self.conv_time_fwd = (self.conv_time_fwd_cube + self.conv_time_fwd_vector) / 1000
+        self.conv_time_bwd = (self.conv_time_bwd_cube + self.conv_time_bwd_vector) / 1000
+        self.conv_num_fwd = (self.conv_num_fwd_cube + self.conv_num_fwd_vector) / 1000
+        self.conv_num_bwd = (self.conv_num_bwd_cube + self.conv_num_bwd_vector) / 1000
+        self.sdma_time = (self.sdma_time_tensor_move + self.sdma_time_stream) / 1000
+        self.fa_time_bwd = (self.fa_time_bwd_cube + self.fa_time_bwd_vector) / 1000
+        self.pa_time = self.page_attention_time / 1000
+        self.fa_time_fwd = (self.fa_time_fwd_cube + self.fa_time_fwd_vector) / 1000
+
+        self.other_time = self.other_time / 10 ** 6
+        self.compute_time = self.compute_time / 10 ** 6
+        self.communication_not_overlapped = self.communication_not_overlapped / 10 ** 6
+        self.wait_time = self.wait_time / 10 ** 6
+        self.e2e_time = self.e2e_time / 10 ** 6
+        self.scheduling_time = self.scheduling_time / 10 ** 6
+        self.lccl_time = self.lccl_time / 10 ** 6
     def calculate_other_time(self):
         self.other_time = max(
             [0, self.compute_time - self.cube_time - self.fa_time_fwd - self.fa_time_bwd -
@@ -183,14 +192,6 @@ class ProfilingInfo:
     def calculate_schedule_time(self):
         self.scheduling_time = (self.e2e_time - self.compute_time - self.lccl_time - self.communication_not_overlapped)
 
-    def update_fa_fwd_info(self, time: float):
-        self.fa_time_fwd += time
-        self.fa_num_fwd += 1
-
-    def update_fa_bwd_info(self, time: float):
-        self.fa_time_bwd += time
-        self.fa_num_bwd += 1
-
     def update_fa_fwd_cube_info(self, time: float):
         self.fa_time_fwd_cube += time
         self.fa_num_fwd_cube += 1
@@ -215,22 +216,10 @@ class ProfilingInfo:
         self.sdma_time_stream += time
         self.sdma_num_stream += num
 
-    def update_pa_info(self, time: float):
-        self.pa_time += time
-        self.pa_num += 1
-
     def update_lccl_info(self, time: float):
         self.lccl_time += time
         self.lccl_num += 1
 
-    def update_conv_fwd_info(self, time: float):
-        self.conv_time_fwd += time
-        self.conv_num_fwd += 1
-
-    def update_conv_bwd_info(self, time: float):
-        self.conv_time_bwd += time
-        self.conv_num_bwd += 1
-
     def update_conv_bwd_cube_info(self, time: float):
         self.conv_time_bwd_cube += time
         self.conv_num_bwd_cube += 1
@@ -267,18 +256,6 @@ class ProfilingInfo:
         self.vector_time_notrans += time
         self.vector_num_notrans += 1
 
-    def update_sdma_info(self, time: float, num: int = 1):
-        self.sdma_time += time
-        self.sdma_num += num
-
-    def update_cube_info(self, time: float):
-        self.cube_time += time
-        self.cube_num += 1
-
-    def update_vec_info(self, time: float):
-        self.vec_time += time
-        self.vec_num += 1
-
     def update_other_cube_info(self, time: float):
         self.other_cube_time += time
         self.other_cube_num += 1
@@ -306,3 +283,9 @@ class ProfilingInfo:
 
     def is_not_minimal_profiling(self) -> bool:
         return self.profiling_type == Constant.NPU and not self.minimal_profiling
+
+    def set_RDMA_bandwidth(self, bandwidth: float):
+        self.RDMA_bandwidth = bandwidth
+
+    def set_SDMA_bandwidth(self, bandwidth: float):
+        self.SDMA_bandwidth = bandwidth
\ No newline at end of file
diff --git a/profiler/compare_tools/compare_backend/profiling_parser/gpu_profiling_parser.py b/profiler/compare_tools/compare_backend/profiling_parser/gpu_profiling_parser.py
index 0aeeba83efb1ec62b0cf53ced7084dcccb7aa6c8..175b77603c8b83388c34b49843e3ff1257a2414f 100644
--- a/profiler/compare_tools/compare_backend/profiling_parser/gpu_profiling_parser.py
+++ b/profiler/compare_tools/compare_backend/profiling_parser/gpu_profiling_parser.py
@@ -61,9 +61,9 @@ class GPUProfilingParser(BaseProfilingParser):
     def _update_overall_metrics(self):
         self._calculate_performance_time()
         self.__parse_memory_reserved()
+        self._result_data.overall_metrics.trans_time_to_s()
         self._result_data.overall_metrics.calculate_vec_time()
         self._result_data.overall_metrics.calculate_schedule_time()
-        self._result_data.overall_metrics.trans_time_to_s()
 
     def _calculate_performance_time(self):
         min_ts = sys.float_info.max
@@ -76,7 +76,6 @@ class GPUProfilingParser(BaseProfilingParser):
                 min_ts = min(event.start_time, min_ts)
                 max_ts = max(event.end_time, max_ts)
             if event.stream == self._compute_stream_id and self.__is_sdma_time(event.name):
-                self._result_data.overall_metrics.update_sdma_info(event.dur)
                 self._result_data.overall_metrics.update_sdma_stream_info(event.dur)
                 continue
             if not event.is_kernel_cat():
@@ -84,7 +83,6 @@ class GPUProfilingParser(BaseProfilingParser):
             self.__add_marks(event)
             if event.is_nccl_name():
                 continue
-            self.__add_compute_time(event, aten_events, flow_dict_new)
             self.categorize_computing_performance_data(event, flow_dict_new)
         self._aten_events = None
         self._result_data.overall_metrics.set_e2e_time(float(max_ts - min_ts))
@@ -104,23 +102,6 @@ class GPUProfilingParser(BaseProfilingParser):
             for timestep in range(int(event.start_time + 1), int(event.end_time + 1)):
                 self._marks[str(timestep)] += -100  # mark this timestep in compute stream
 
-    def __add_compute_time(self, event: TraceEventBean, aten_events: list, flow_dict_new: dict):
-        if self.__is_flash_attention(event.name):
-            if event.is_backward():
-                self._result_data.overall_metrics.update_fa_bwd_info(event.dur)
-            else:
-                self._result_data.overall_metrics.update_fa_fwd_info(event.dur)
-        elif any(cube_mark in event.lower_name for cube_mark in self.CUBE_MARK):
-            is_conv = self.__check_is_conv(event, aten_events, flow_dict_new)
-            if is_conv == "conv_fwd":
-                self._result_data.overall_metrics.update_conv_fwd_info(event.dur)
-            elif is_conv == "conv_bwd":
-                self._result_data.overall_metrics.update_conv_bwd_info(event.dur)
-            else:
-                self._result_data.overall_metrics.update_cube_info(event.dur)
-        else:
-            self._result_data.overall_metrics.update_vec_info(event.dur)
-
     def __check_is_conv(self, event: TraceEventBean, aten_events: list, flow_dict_new: dict) -> str:
         flow_start_time = flow_dict_new.get(event.start_time)
         if not flow_start_time:
diff --git a/profiler/compare_tools/compare_backend/profiling_parser/npu_profiling_parser.py b/profiler/compare_tools/compare_backend/profiling_parser/npu_profiling_parser.py
index cb25c252c6c825cb22fea63a4c1ecc82f9c61e57..3c3f054273a6d3b919c8a570d4c8898c47010b8f 100644
--- a/profiler/compare_tools/compare_backend/profiling_parser/npu_profiling_parser.py
+++ b/profiler/compare_tools/compare_backend/profiling_parser/npu_profiling_parser.py
@@ -22,6 +22,7 @@ class NPUProfilingParser(BaseProfilingParser):
         self._operator_memory_path = os.path.join(path_dict.get(Constant.ASCEND_OUTPUT_PATH, ""), "operator_memory.csv")
         self._memory_record_path = os.path.join(path_dict.get(Constant.ASCEND_OUTPUT_PATH, ""), "memory_record.csv")
         self._kernel_detail_path = os.path.join(path_dict.get(Constant.ASCEND_OUTPUT_PATH, ""), "kernel_details.csv")
+        self._communication_path = os.path.join(path_dict.get(Constant.ASCEND_OUTPUT_PATH, ""), "communication.json")
         self._info_json_path = path_dict.get(Constant.INFO_JSON_PATH, "")
         self._trace_events = [TraceEventBean(event) for event in self._trace_events]
         self._hccl_pid = None
@@ -78,7 +79,6 @@ class NPUProfilingParser(BaseProfilingParser):
             print("[ERROR] Failed to enable enable_kernel_compare, type of kernel_details.csv is null.")
             return
         self._result_data.update_kernel_details(kernels_dict)
-
     def _update_memory_list(self):
         try:
             memory_data = FileReader.read_csv_file(self._operator_memory_path, OperatorMemoryBean)
@@ -121,6 +121,35 @@ class NPUProfilingParser(BaseProfilingParser):
         return self._dequeue_data[left].corr_id if self._dequeue_data[left].start_time <= ts_time <= \
                                                    self._dequeue_data[left].end_time else Constant.INVALID_VALUE
 
+    def _update_bandwidth(self):
+        try:
+            communication_json = FileReader.read_json_file(self._communication_path)
+        except FileNotFoundError:
+            print("[WARNING] The file communication.json does not exist.")
+        except Exception:
+            print("[ERROR] Failed to read communication.json.")
+            return
+        if not communication_json:
+            print("[WARNING] The JSON file is empty.")
+            return
+        for _, group_dict in communication_json.items():
+            step_dict = group_dict.get("collective")
+            total_op_info = step_dict.get("Total Op Info", {})
+            rdma_size_mb = rdma_time_ms = sdma_size_mb = sdma_time_ms = 0
+            if "Communication Bandwidth Info" in total_op_info:
+                bandwidth_info = total_op_info["Communication Bandwidth Info"]
+                if "RDMA" in bandwidth_info:
+                    rdma_info = bandwidth_info["RDMA"]
+                    rdma_size_mb += rdma_info.get("Transit Size(MB)", 0)  # 单位为 MB
+                    rdma_time_ms += rdma_info.get("Transit Time(ms)", 0)  # 单位为 MS
+                if "SDMA" in bandwidth_info:
+                    sdma_info = bandwidth_info["SDMA"]
+                    sdma_size_mb += sdma_info.get("Transit Size(MB)", 0)  # 单位为 MB
+                    sdma_time_ms += sdma_info.get("Transit Time(ms)", 0)  # 单位为 MS
+                rdma_bandwidth = (rdma_size_mb / 1024) / (rdma_time_ms / 1000) if rdma_time_ms > 0 else 0
+                sdma_bandwidth = (sdma_size_mb / 1024) / (sdma_time_ms / 1000) if sdma_time_ms > 0 else 0
+        self._result_data.overall_metrics.set_RDMA_bandwidth(rdma_bandwidth)
+        self._result_data.overall_metrics.set_SDMA_bandwidth(sdma_bandwidth)
     def _update_overall_metrics(self):
         self.__parse_info_json()
         self.__parse_mem_csv()
@@ -130,10 +159,11 @@ class NPUProfilingParser(BaseProfilingParser):
         self.__add_overlap_analysis_time()
         self._picking_notify_wait_event_and_not_overlap_event()
         self.__add_overlap_wait_time()
+        self._result_data.overall_metrics.trans_time_to_s()
         self._result_data.overall_metrics.calculate_other_time()
         self._result_data.overall_metrics.calculate_schedule_time()
-        self._result_data.overall_metrics.trans_time_to_s()
 
+        self._update_bandwidth()
     def _picking_notify_wait_event_and_not_overlap_event(self):
         self.notify_event_cache = []
         self._not_overlaped_commu_event = []
@@ -271,28 +301,6 @@ class NPUProfilingParser(BaseProfilingParser):
                 self._result_data.overall_metrics.update_lccl_info(event.dur)
 
     def __parse_kernel_csv(self):
-        def __screen_data(kernel: KernelDetailsBean):
-            if kernel.is_flash_attention():
-                if kernel.is_fa_bwd():
-                    self._result_data.overall_metrics.update_fa_bwd_info(kernel.duration)
-                else:
-                    self._result_data.overall_metrics.update_fa_fwd_info(kernel.duration)
-            elif kernel.is_conv():
-                if kernel.is_conv_bwd():
-                    self._result_data.overall_metrics.update_conv_bwd_info(kernel.duration)
-                else:
-                    self._result_data.overall_metrics.update_conv_fwd_info(kernel.duration)
-            elif kernel.is_matmul():
-                self._result_data.overall_metrics.update_cube_info(kernel.duration)
-            elif kernel.is_sdma():
-                self._result_data.overall_metrics.update_sdma_info(kernel.duration)
-            elif kernel.is_page_attention():
-                self._result_data.overall_metrics.update_pa_info(kernel.duration)
-            elif kernel.is_vector():
-                self._result_data.overall_metrics.update_vec_info(kernel.duration)
-            else:
-                self._result_data.overall_metrics.update_cube_info(kernel.duration)
-
         try:
             kernel_details = FileReader.read_csv_file(self._kernel_detail_path, KernelDetailsBean)
         except Exception:
@@ -306,7 +314,6 @@ class NPUProfilingParser(BaseProfilingParser):
         for kernel in kernel_details:
             if kernel.is_invalid():
                 continue
-            __screen_data(kernel)
             self.categorize_computing_performance_data(kernel, flow_dict_new)
 
     def __parse_mem_csv(self):
@@ -353,5 +360,4 @@ class NPUProfilingParser(BaseProfilingParser):
         compute_stream = event_wait_stream & ai_core_stream if event_wait_stream else ai_core_stream
         for stream in compute_stream:
             dur_list = sdma_dict.get(stream, [])
-            self._result_data.overall_metrics.update_sdma_info(sum(dur_list), len(dur_list))
             self._result_data.overall_metrics.update_sdma_stream_info(sum(dur_list), len(dur_list))
diff --git a/profiler/compare_tools/compare_backend/utils/constant.py b/profiler/compare_tools/compare_backend/utils/constant.py
index 252aa536e1c73d58f86071bbefab5286004bb6f9..80d7d5ee4f9a0834831e8c81bc61e299caf42bee 100644
--- a/profiler/compare_tools/compare_backend/utils/constant.py
+++ b/profiler/compare_tools/compare_backend/utils/constant.py
@@ -6,6 +6,7 @@ class Constant(object):
     MAX_PATH_LENGTH = 4096
     MAX_FLOW_CAT_LEN = 20
     MAX_FILE_SIZE = 1024 * 1024 * 1024 * 5
+    MAX_JSON_SIZE = 1024 * 1024 * 1024 * 10
     BYTE_TO_KB = 1024
     YELLOW_COLOR = "FFFF00"
     GREEN_COLOR = "00FF00"
diff --git a/profiler/compare_tools/compare_backend/utils/file_reader.py b/profiler/compare_tools/compare_backend/utils/file_reader.py
index b4ae786388b2f1bed6ad50cfb39ac8621c1ea1f1..99358368cb1d45991da755d115d7542361c24a54 100644
--- a/profiler/compare_tools/compare_backend/utils/file_reader.py
+++ b/profiler/compare_tools/compare_backend/utils/file_reader.py
@@ -7,7 +7,28 @@ from compare_backend.utils.constant import Constant
 
 
 class FileReader:
-
+    @classmethod
+    def read_json_file(cls, file_path: str, bean_class: any = None) -> any:
+        PathManager.check_path_readable(file_path)
+        if not os.path.isfile(file_path):
+            raise FileNotFoundError("File not exists.")
+        file_size = os.path.getsize(file_path)
+        if file_size <= 0:
+            return []
+        if file_size > Constant.MAX_JSON_SIZE:
+            check_msg = input(
+                f"The file({file_path}) size exceeds the preset max value. Continue reading the file? [y/n]")
+            if check_msg.lower() != "y":
+                print(f"[WARNING] The user choose not to read the file: {file_path}")
+                return []
+        result_data = []
+        try:
+            with open(file_path, "r") as json_file:
+                result_data = json.loads(json_file.read())
+        except Exception as e:
+            msg = f"Failed to read the file: {file_path}"
+            raise RuntimeError(msg) from e
+        return result_data
     @classmethod
     def read_trace_file(cls, file_path: str) -> any:
         PathManager.check_path_readable(file_path)