diff --git a/profiler/compare_tools/comparator/__init__.py b/profiler/compare_tools/comparator/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/profiler/compare_tools/comparator/index_comparator.py b/profiler/compare_tools/comparator/index_comparator.py
new file mode 100644
index 0000000000000000000000000000000000000000..d122e3ea3c1e0600c44b2c97949d247a4e6b92d2
--- /dev/null
+++ b/profiler/compare_tools/comparator/index_comparator.py
@@ -0,0 +1,44 @@
+import pandas as pd
+
+from utils.args_manager import ArgsManager
+from utils.constant import Constant
+
+
+class IndexComparator:
+    def __init__(self, args: any):
+        self._args = args
+        self._args_manager = ArgsManager()
+        self._base_profiling = self._args_manager.base_profiling
+        self._comparison_profiling = self._args_manager.comparison_profiling
+
+    def compare(self) -> list:
+        base_data, comparison_data = [], []
+        if not self._base_profiling.communication_data:
+            print(f"[warning] Can't find any communication op in the file: {self._base_profiling.json_path}")
+        for data in self._base_profiling.communication_data:
+            name_list = data.get("name", "").split("_")
+            if len(name_list) >= 2:
+                base_data.append([name_list[1].lower(), float(data.get("dur", 0))])
+        if not base_data:
+            base_data = pd.DataFrame(base_data, columns=Constant.COLUMNS)
+        else:
+            base_df = pd.DataFrame(base_data, columns=[Constant.OP_KEY, Constant.DEVICE_DUR])
+            base_data = base_df.groupby(Constant.OP_KEY).agg(["count", "sum", "mean", "max", "min"]).reset_index()
+            base_data.columns = Constant.COLUMNS
+        if self._args.base_profiling_path == self._args.comparison_profiling_path:
+            comparison_data = []
+        else:
+            if not self._comparison_profiling.communication_data:
+                print(f"[warning] Can't find any communication op in the file: {self._comparison_profiling.json_path}")
+            for data in self._comparison_profiling.communication_data:
+                name_list = data.get("name", "").split("_")
+                if len(name_list) >= 2:
+                    comparison_data.append([name_list[1].lower(), float(data.get("dur", 0))])
+        if not comparison_data:
+            comparison_data = pd.DataFrame(comparison_data, columns=Constant.COLUMNS)
+        else:
+            comparison_df = pd.DataFrame(comparison_data, columns=[Constant.OP_KEY, Constant.DEVICE_DUR])
+            comparison_data = comparison_df.groupby(Constant.OP_KEY).agg(
+                ["count", "sum", "mean", "max", "min"]).reset_index()
+            comparison_data.columns = Constant.COLUMNS
+        return pd.merge(base_data, comparison_data, how="outer", on=Constant.OP_KEY)
diff --git a/profiler/compare_tools/comparator/op_comparator.py b/profiler/compare_tools/comparator/op_comparator.py
new file mode 100644
index 0000000000000000000000000000000000000000..f299463fe55660fb3334f6de792d887c44837e29
--- /dev/null
+++ b/profiler/compare_tools/comparator/op_comparator.py
@@ -0,0 +1,129 @@
+from collections import deque
+
+import numpy as np
+
+from utils.args_manager import ArgsManager
+from utils.name_function import NameFunction
+from utils.torch_op_node import TorchOpNode
+from utils.tree_builder import TreeBuilder
+
+
+class OpComparator:
+    def __init__(self, args: any):
+        self._args = args
+        self._args_manager = ArgsManager()
+        self._base_profiling = self._args_manager.base_profiling
+        self._comparison_profiling = self._args_manager.comparison_profiling
+
+    def compare(self) -> list:
+        base_ops = self._get_top_layer_ops(self._base_profiling)
+        if self._args.base_profiling_path == self._args.comparison_profiling_path:
+            comparison_ops = []
+        else:
+            comparison_ops = self._get_top_layer_ops(self._comparison_profiling)
+        if not base_ops and not comparison_ops:
+            return []
+        name_func = NameFunction(self._args).get_name_func()
+        compare_result_data = self._matching_op(base_ops, comparison_ops, name_func)
+        if self._args.max_kernel_num is not None:
+            compare_result_data = self._drill_down(compare_result_data, name_func)
+        return compare_result_data
+
+
+    @classmethod
+    def _matching_op(cls, base_ops: list, comparison_ops: list, name_func: any) -> list:
+        if not comparison_ops:
+            result_data = [None] * len(base_ops)
+            for index in range(len(base_ops)):
+                result_data[index] = [base_ops[index], None]
+            return result_data
+
+        result_data = []
+        comparison_len, base_len = len(comparison_ops), len(base_ops)
+        dp = [[0] * (base_len + 1) for _ in range(comparison_len + 1)]
+        for comparison_index in range(1, comparison_len + 1):
+            for base_index in range(1, base_len + 1):
+                if name_func(base_ops[base_index - 1]) == name_func(
+                        comparison_ops[comparison_index - 1]):
+                    dp[comparison_index][base_index] = dp[comparison_index - 1][base_index - 1] + 1
+                else:
+                    dp[comparison_index][base_index] = max(dp[comparison_index][base_index - 1],
+                                                           dp[comparison_index - 1][base_index])
+        matched_op = []
+        comparison_index, base_index = comparison_len, base_len
+        while comparison_index > 0 and base_index > 0:
+            if name_func(base_ops[base_index - 1]) == name_func(
+                    comparison_ops[comparison_index - 1]):
+                matched_op.append([comparison_index - 1, base_index - 1])
+                comparison_index -= 1
+                base_index -= 1
+                continue
+            if dp[comparison_index][base_index - 1] > dp[comparison_index - 1][base_index]:
+                base_index -= 1
+            else:
+                comparison_index -= 1
+        if not matched_op:
+            matched_base_index_list = []
+        else:
+            matched_op.reverse()
+            matched_op = np.array(matched_op)
+            matched_base_index_list = list(matched_op[:, 1])
+        curr_comparison_index = 0
+        for base_index, base_api_node in enumerate(base_ops):
+            if base_index not in matched_base_index_list:
+                result_data.append([base_api_node, None])
+                continue
+            matched_comparison_index = matched_op[matched_base_index_list.index(base_index), 0]
+            for comparison_index in range(curr_comparison_index, matched_comparison_index):
+                result_data.append([None, comparison_ops[comparison_index]])
+            result_data.append([base_api_node, comparison_ops[matched_comparison_index]])
+            curr_comparison_index = matched_comparison_index + 1
+        if curr_comparison_index < len(comparison_ops):
+            for comparison_index in range(curr_comparison_index, len(comparison_ops)):
+                result_data.append([None, comparison_ops[comparison_index]])
+        return result_data
+
+    def _get_top_layer_ops(self, profiling_instance: any) -> any:
+        torch_op_data = profiling_instance.torch_op_data
+        if not torch_op_data:
+            print(f"[warning] Can't find any torch op in the file: {profiling_instance.json_path}")
+        root_node = TreeBuilder.build_tree(torch_op_data)
+
+        kernel_dict, memory_list = {}, []
+        if not self._args.disable_operator_compare:
+            kernel_dict = profiling_instance.kernel_dict
+            if not kernel_dict:
+                print(f"[warning] Can't find any flow event in the file: {profiling_instance.json_path}")
+        if not self._args.disable_memory_compare:
+            memory_list = profiling_instance.memory_list
+            if not memory_list:
+                print(f"[warning] Can't find any memory event in the file: {profiling_instance.file_path}")
+
+        TreeBuilder.update_tree_node(root_node, kernel_dict, memory_list)
+        level1_child_nodes = root_node.child_nodes
+        result_data = []
+        for level1_node in level1_child_nodes:
+            if level1_node.is_step_profiler():
+                result_data.extend(level1_node.child_nodes)
+            else:
+                result_data.append(level1_node)
+        return result_data
+
+    def _drill_down(self, compare_result_data: list, name_func: any) -> list:
+        drill_down_result = []
+        compare_result_data.reverse()
+        op_deque = deque(compare_result_data)
+        while op_deque:
+            match_data = op_deque.pop()
+            base_op = match_data[0] if match_data[0] else TorchOpNode()
+            comparison_op = match_data[1] if match_data[1] else TorchOpNode()
+            if not base_op.child_nodes or not comparison_op.child_nodes:
+                drill_down_result.append(match_data)
+                continue
+            if max(base_op.kernel_num, comparison_op.kernel_num) <= self._args.max_kernel_num:
+                drill_down_result.append(match_data)
+                continue
+            match_list = self._matching_op(base_op.child_nodes, comparison_op.child_nodes, name_func)
+            match_list.reverse()
+            for data in match_list:
+                op_deque.append(data)
diff --git a/profiler/compare_tools/generation/__init__.py b/profiler/compare_tools/generation/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/profiler/compare_tools/generation/abstract_cmp.py b/profiler/compare_tools/generation/abstract_cmp.py
new file mode 100644
index 0000000000000000000000000000000000000000..20070600d20de390915ab4d25d46dc89089bed94
--- /dev/null
+++ b/profiler/compare_tools/generation/abstract_cmp.py
@@ -0,0 +1,7 @@
+from abc import ABCMeta, abstractmethod
+
+
+class AbstractCMP(metaclass=ABCMeta):
+    @abstractmethod
+    def create_sheet(self):
+        raise NotImplementedError
diff --git a/profiler/compare_tools/generation/communication_comparison_generator.py b/profiler/compare_tools/generation/communication_comparison_generator.py
new file mode 100644
index 0000000000000000000000000000000000000000..7838c45aec59e6b794ec26d12f65336a22ee681c
--- /dev/null
+++ b/profiler/compare_tools/generation/communication_comparison_generator.py
@@ -0,0 +1,141 @@
+import math
+import pandas as pd
+
+from openpyxl.styles import Font, PatternFill, Alignment
+from openpyxl.workbook import Workbook
+from pandas import DataFrame
+
+from utils.args_manager import ArgsManager
+from utils.constant import Constant
+
+
+class CommunicationComparisonGenerator:
+    def __init__(self, args: any, compare_result_data: DataFrame):
+        self._args = args
+        self._args_manager = ArgsManager()
+        self._compare_result_data = compare_result_data
+
+    def create_sheet(self, workbook: Workbook):
+        ws = workbook.create_sheet("CommunicationCompare", 0)
+        ws.sheet_properties.tabColor = Constant.YELLOW_COLOR
+        # write headers
+        base_headers = Constant.CMP_COMMUNICATION_HEADER
+        comparison_headers = Constant.CMP_COMMUNICATION_HEADER
+        headers = base_headers + comparison_headers + [Constant.DIFF]
+        base_trace_start_column = 0
+        comparison_trace_start_column = len(base_headers)
+        diff_start_column = len(base_headers) + len(comparison_headers)
+
+        for col_index in range(len(headers)):
+            ws.cell(row=1, column=col_index + 1).border = Constant.BORDER
+            ws.cell(row=1, column=col_index + 1).font = Font(name='Arial')
+            ws.cell(row=1, column=col_index + 1).fill = Constant.HEADERS_FILL
+            ws.cell(row=2, column=col_index + 1).border = Constant.BORDER
+            ws.cell(row=2, column=col_index + 1).font = Font(name='Arial', bold=True)
+            ws.cell(row=2, column=col_index + 1).fill = Constant.HEADERS_FILL
+            header_name = headers[col_index]
+            if col_index < comparison_trace_start_column:
+                ws.cell(row=1, column=col_index + 1).value = Constant.BASE_PROFILING
+            elif col_index < diff_start_column:
+                ws.cell(row=1, column=col_index + 1).value = Constant.COMPARISON_PROFILING
+            else:
+                ws.cell(row=1, column=col_index + 1).value = header_name
+            ws.cell(row=2, column=col_index + 1).value = header_name
+            dim = ws.cell(row=2, column=col_index + 1).coordinate
+            ws.column_dimensions[dim[0]].width = Constant.COLUMN_WIDTH_CLL.get(header_name)
+        ws.merge_cells(start_row=1, start_column=base_trace_start_column + 1,
+                       end_row=1, end_column=comparison_trace_start_column)
+        ws.merge_cells(start_row=1, start_column=comparison_trace_start_column + 1,
+                       end_row=1, end_column=diff_start_column)
+        ws.merge_cells(start_row=1, start_column=headers.index(Constant.DIFF) + 1,
+                       end_row=2, end_column=headers.index(Constant.DIFF) + 1)
+
+        # write lines
+        row_index = 3
+        for _, row in self._compare_result_data.iterrows():
+            # write summary lines
+            base_name = Constant.NA if math.isnan(row[Constant.BASE_CALLS]) else row[Constant.OP_KEY]
+            comparison_name = Constant.NA if math.isnan(row[Constant.COMPARISON_CALLS]) else row[Constant.OP_KEY]
+            if math.isnan(row[Constant.BASE_SUM]) or math.isnan(row[Constant.COMPARISON_SUM]) or row[
+                Constant.BASE_SUM] == 0:
+                diff = Constant.NA
+            else:
+                diff = (row[Constant.COMPARISON_SUM] - row[Constant.BASE_SUM]) / row[Constant.BASE_SUM]
+            row_data = [base_name, Constant.NA, row[Constant.BASE_CALLS], row[Constant.BASE_SUM],
+                        row[Constant.BASE_AVG], row[Constant.BASE_MAX], row[Constant.BASE_MIN], comparison_name,
+                        Constant.NA, row[Constant.COMPARISON_CALLS], row[Constant.COMPARISON_SUM],
+                        row[Constant.COMPARISON_AVG], row[Constant.COMPARISON_MAX], row[Constant.COMPARISON_MIN], diff]
+            for index in range(len(headers)):
+                if headers[index] in (
+                        Constant.CALLS, Constant.TOTAL_DURATION, Constant.AVG_DURATION, Constant.MAX_DURATION,
+                        Constant.MIN_DURATION):
+                    ws.cell(row=row_index, column=index + 1).number_format = '0.00'
+                if headers[index] == Constant.DIFF:
+                    ws.cell(row=row_index, column=index + 1).number_format = '0.00%'
+                    if diff != Constant.NA and diff < 0:
+                        ws.cell(row=row_index, column=index + 1).font = Font(name='Arial',
+                                                                             color=Constant.GREEN_COLOR)
+                    elif diff != Constant.NA and diff >= 0:
+                        ws.cell(row=row_index, column=index + 1).font = Font(name='Arial', color=Constant.RED_COLOR)
+                else:
+                    bold = headers[index] == Constant.COMMUNICAT_OP
+                    ws.cell(row=row_index, column=index + 1).font = Font(name='Arial', bold=bold)
+                value = row_data[index]
+                if value != Constant.NA:
+                    ws.cell(row=row_index, column=index + 1).value = value
+                ws.cell(row=row_index, column=index + 1).border = Constant.BORDER
+                ws.cell(row=row_index, column=index + 1).fill = PatternFill("solid",
+                                                                            fgColor=Constant.SUMMARY_LINE_COLOR)
+            row_index += 1
+
+            # write detail lines
+            base_task_list = self._args_manager.base_profiling.communication_task_data.get(base_name, [])
+            comparison_task_list = self._args_manager.comparison_profiling.communication_task_data.get(comparison_name,
+                                                                                                       [])
+            if base_task_list:
+                base_data = [[data.get("name", ""), float(data.get("dur", 0))] for data in base_task_list]
+                base_df = pd.DataFrame(base_data, columns=[Constant.OP_KEY, Constant.DEVICE_DUR])
+                base_data = base_df.groupby(Constant.OP_KEY).agg(
+                    ["count", "sum", "mean", "max", "min"]).reset_index().values.tolist()
+            else:
+                base_data = []
+            if comparison_task_list:
+                comparison_data = [[data.get("name", ""), float(data.get("dur", 0))] for data in comparison_task_list]
+                comparison_df = pd.DataFrame(comparison_data, columns=[Constant.OP_KEY, Constant.DEVICE_DUR])
+                comparison_data = comparison_df.groupby(Constant.OP_KEY).agg(
+                    ["count", "sum", "mean", "max", "min"]).reset_index().values.tolist()
+            else:
+                comparison_data = []
+
+            for index in range(max(len(base_data), len(comparison_data))):
+                base_detail_data, comparison_detail_data = [Constant.NA] * len(base_headers), \
+                                                           [Constant.NA] * len(comparison_headers)
+                base_detail_data[0] = "|"
+                comparison_detail_data[0] = "|"
+                if index < len(base_data):
+                    total_dur = sum([data[2] for data in base_data])
+                    dur_percent = "%.2f%%" % (base_data[index][2] / total_dur * 100)
+                    base_data[index][0] = f"{base_data[index][0]} ({dur_percent})"
+                    base_detail_data[1:] = base_data[index]
+                if index < len(comparison_data):
+                    total_dur = sum([data[2] for data in comparison_data])
+                    dur_percent = "%.2f%%" % (comparison_data[index][2] / total_dur * 100)
+                    comparison_data[index][0] = f"{comparison_data[index][0]} ({dur_percent})"
+                    comparison_detail_data[1:] = comparison_data[index]
+
+                detail_data = base_detail_data + comparison_detail_data + [Constant.NA]
+                for colum_index in range(len(headers)):
+                    if headers[colum_index] in (
+                            Constant.CALLS, Constant.TOTAL_DURATION, Constant.AVG_DURATION, Constant.MAX_DURATION,
+                            Constant.MIN_DURATION):
+                        ws.cell(row=row_index, column=colum_index + 1) .number_format = '0.00'
+                    value = detail_data[colum_index]
+                    if value != Constant.NA:
+                        ws.cell(row=row_index, column=colum_index + 1).value = value
+                        bold = headers[colum_index] == Constant.OP_NAME
+                        ws.cell(row=row_index, column=colum_index + 1).font = Font(name='Arial', bold=bold)
+                    ws.cell(row=row_index, column=colum_index + 1).border = Constant.BORDER
+                    if headers[colum_index] == Constant.COMMUNICAT_OP:
+                        ws.cell(row=row_index, column=colum_index + 1).alignment = Alignment(horizontal="center",
+                                                                                             vertical="center")
+                row_index += 1
diff --git a/profiler/compare_tools/generation/comparison_generator.py b/profiler/compare_tools/generation/comparison_generator.py
new file mode 100644
index 0000000000000000000000000000000000000000..f415262cd239cc282603520f5caaaf3c4819e2bd
--- /dev/null
+++ b/profiler/compare_tools/generation/comparison_generator.py
@@ -0,0 +1,35 @@
+import os
+
+from openpyxl.workbook import Workbook
+
+from comparator.index_comparator import IndexComparator
+from comparator.op_comparator import OpComparator
+from generation.communication_comparison_generator import CommunicationComparisonGenerator
+from generation.op_comparison_generator import OpComparisonGenerator
+from utils.constant import Constant
+from utils.args_manager import ArgsManager
+
+
+class ComparisonGenerator:
+    def __init__(self, args: any):
+        self._args = args
+        self._args_manager = ArgsManager()
+
+    def create_excel(self, file_path: str):
+        wb = Workbook()
+        if not self._args.disable_operator_compare or not self._args.disable_memory_compare:
+            op_compare_result = OpComparator(self._args).compare()
+            if op_compare_result:
+                if not self._args.disable_operator_compare:
+                    OpComparisonGenerator(self._args, op_compare_result, Constant.OPERATOR_COMPARE).create_sheet(wb)
+                if not self._args.disable_memory_compare:
+                    OpComparisonGenerator(self._args, op_compare_result, Constant.MEMORY_COMPARE).create_sheet(wb)
+
+        if not self._args.disable_communication_compare:
+            index_compare_result = IndexComparator(self._args).compare()
+            if not index_compare_result.empty:
+                CommunicationComparisonGenerator(self._args, index_compare_result).create_sheet(wb)
+
+        wb.save(file_path)
+        wb.close()
+        os.chmod(file_path, Constant.FILE_AUTHORITY)
diff --git a/profiler/compare_tools/generation/op_comparison_generator.py b/profiler/compare_tools/generation/op_comparison_generator.py
new file mode 100644
index 0000000000000000000000000000000000000000..980b683896f79253e12de15c45975f30048c5e7f
--- /dev/null
+++ b/profiler/compare_tools/generation/op_comparison_generator.py
@@ -0,0 +1,158 @@
+import copy
+
+from openpyxl.styles import Font, PatternFill, Alignment
+from openpyxl.workbook import Workbook
+
+from utils.args_manager import ArgsManager
+from utils.constant import Constant
+from utils.tree_builder import TreeBuilder
+
+
+class OpComparisonGenerator:
+    def __init__(self, args: any, compare_result_data: list, compare_type: str):
+        self._args = args
+        self._compare_result_data = compare_result_data
+        self._compare_type = compare_type
+        self._base_headers = []
+        self._comparison_headers = []
+        self.update_headers()
+
+    def update_headers(self):
+        base_profiling_type = ArgsManager().base_profiling_type
+        comparison_profiling_type = ArgsManager().comparison_profiling_type
+        if self._compare_type == Constant.MEMORY_COMPARE:
+            self._base_headers = Constant.CMP_MEMORY_HEADER
+            self._comparison_headers = Constant.CMP_MEMORY_HEADER
+        elif self._compare_type == Constant.OPERATOR_COMPARE:
+            self._base_headers = Constant.GPU_CMP_KERNEL_HEADER if base_profiling_type == Constant.GPU else \
+                Constant.NPU_CMP_KERNEL_HEADER
+            self._comparison_headers = Constant.GPU_CMP_KERNEL_HEADER if comparison_profiling_type == Constant.GPU \
+                else Constant.NPU_CMP_KERNEL_HEADER
+
+    def create_sheet(self, workbook: Workbook):
+        ws = workbook.create_sheet(self._compare_type, 0)
+        ws.sheet_properties.tabColor = Constant.YELLOW_COLOR
+        # write headers
+        headers = self._base_headers + self._comparison_headers + [Constant.DIFF, Constant.OP_NAME_FILTER,
+                                                                   Constant.DIFF_FILTER]
+
+        base_trace_start_column = 0
+        comparison_trace_start_column = len(self._base_headers)
+        diff_start_column = len(self._base_headers) + len(self._comparison_headers)
+
+        for col_index in range(len(headers)):
+            ws.cell(row=1, column=col_index + 1).border = Constant.BORDER
+            ws.cell(row=1, column=col_index + 1).font = Font(name='Arial')
+            ws.cell(row=1, column=col_index + 1).fill = Constant.HEADERS_FILL
+            ws.cell(row=2, column=col_index + 1).border = Constant.BORDER
+            ws.cell(row=2, column=col_index + 1).font = Font(name='Arial', bold=True)
+            ws.cell(row=2, column=col_index + 1).fill = Constant.HEADERS_FILL
+            header_name = headers[col_index]
+            if col_index < comparison_trace_start_column:
+                ws.cell(row=1, column=col_index + 1).value = Constant.BASE_PROFILING
+            elif col_index < diff_start_column:
+                ws.cell(row=1, column=col_index + 1).value = Constant.COMPARISON_PROFILING
+            else:
+                ws.cell(row=1, column=col_index + 1).value = header_name
+            ws.cell(row=2, column=col_index + 1).value = header_name
+            dim = ws.cell(row=2, column=col_index + 1).coordinate
+            width = Constant.COLUMN_WIDTH.get(header_name) if Constant.COLUMN_WIDTH.get(
+                header_name) else Constant.DEFAULT_WIDTH
+            ws.column_dimensions[dim[0]].width = width
+        ws.merge_cells(start_row=1, start_column=base_trace_start_column + 1,
+                       end_row=1, end_column=comparison_trace_start_column)
+        ws.merge_cells(start_row=1, start_column=comparison_trace_start_column + 1,
+                       end_row=1, end_column=diff_start_column)
+        ws.merge_cells(start_row=1, start_column=headers.index(Constant.DIFF) + 1,
+                       end_row=2, end_column=headers.index(Constant.DIFF) + 1)
+        ws.merge_cells(start_row=1, start_column=headers.index(Constant.OP_NAME_FILTER) + 1,
+                       end_row=2, end_column=headers.index(Constant.OP_NAME_FILTER) + 1)
+        ws.merge_cells(start_row=1, start_column=headers.index(Constant.DIFF_FILTER) + 1,
+                       end_row=2, end_column=headers.index(Constant.DIFF_FILTER) + 1)
+
+        # write lines
+        row_index = 3
+        for data in self._compare_result_data:
+            # write summary lines
+            base_event_list = TreeBuilder.get_total_compare_event(data[0], self._compare_type) if data[0] else []
+            comparison_event_list = TreeBuilder.get_total_compare_event(data[1], self._compare_type) if data[1] else []
+            base_summary_data, comparison_summary_data = [Constant.NA] * len(self._base_headers), \
+                                                         [Constant.NA] * len(self._comparison_headers)
+            if data[0]:
+                base_summary_data[0] = data[0].name
+                base_summary_data[1] = data[0].input_shape
+                base_summary_data[2] = data[0].input_type
+                base_summary_data[3] = sum(
+                    [x.compare_index for x in base_event_list]) if base_event_list else Constant.NA
+            if data[1]:
+                comparison_summary_data[0] = data[1].name
+                comparison_summary_data[1] = data[1].input_shape
+                comparison_summary_data[2] = data[1].input_type
+                comparison_summary_data[3] = sum(
+                    [x.compare_index for x in comparison_event_list]) if comparison_event_list else Constant.NA
+            if base_event_list and comparison_event_list and base_summary_data[3]:
+                diff = (comparison_summary_data[3] - base_summary_data[3]) / base_summary_data[3]
+            else:
+                diff = Constant.NA
+            op_name = data[0].name if data[0] else data[1].name
+
+            summary_data = base_summary_data + comparison_summary_data + [diff, op_name, diff]
+            for index in range(len(headers)):
+                value = summary_data[index]
+                if headers[index] == Constant.DIFF:
+                    ws.cell(row=row_index, column=index + 1).number_format = '0.00%'
+                    if value != Constant.NA and value < 0:
+                        ws.cell(row=row_index, column=index + 1).font = Font(name='Arial', color=Constant.GREEN_COLOR)
+                    elif value != Constant.NA and value >= 0:
+                        ws.cell(row=row_index, column=index + 1).font = Font(name='Arial', color=Constant.RED_COLOR)
+                if headers[index] == Constant.DIFF_FILTER:
+                    if value != Constant.NA and value < 0:
+                        ws.cell(row=row_index, column=index + 1).fill = PatternFill("solid",
+                                                                                    fgColor=Constant.GREEN_COLOR)
+                    elif value != Constant.NA and value >= 0:
+                        ws.cell(row=row_index, column=index + 1).fill = PatternFill("solid", fgColor=Constant.RED_COLOR)
+                elif headers[index] != Constant.OP_NAME_FILTER:
+                    ws.cell(row=row_index, column=index + 1).fill = PatternFill("solid",
+                                                                                fgColor=Constant.SUMMARY_LINE_COLOR)
+
+                if value != Constant.NA:
+                    ws.cell(row=row_index, column=index + 1).value = value
+                    bold = headers[index] == Constant.OP_NAME
+                    if headers[index] != Constant.DIFF:
+                        ws.cell(row=row_index, column=index + 1).font = Font(name='Arial', bold=bold)
+                ws.cell(row=row_index, column=index + 1).border = Constant.BORDER
+            row_index += 1
+
+            # write detail lines
+            base_event_num, comparison_event_num = len(base_event_list), len(comparison_event_list)
+            for index in range(max(base_event_num, comparison_event_num)):
+                base_detail_data, comparison_detail_data = [Constant.NA] * len(self._base_headers), \
+                                                           [Constant.NA] * len(self._comparison_headers)
+                base_detail_data[0] = "|"
+                comparison_detail_data[0] = "|"
+                if index < base_event_num:
+                    base_event = base_event_list[index]
+                    base_detail_data[1:] = base_event.get_record()
+                if index < comparison_event_num:
+                    comparison_event = comparison_event_list[index]
+                    comparison_detail_data[1:] = comparison_event.get_record()
+
+                detail_data = base_detail_data + comparison_detail_data + [Constant.NA, op_name, Constant.NA]
+                for colum_index in range(len(headers)):
+                    value = detail_data[colum_index]
+                    if value != Constant.NA:
+                        ws.cell(row=row_index, column=colum_index + 1).value = value
+                        bold = headers[colum_index] == Constant.OP_NAME
+                        ws.cell(row=row_index, column=colum_index + 1).font = Font(name='Arial', bold=bold)
+                    ws.cell(row=row_index, column=colum_index + 1).border = Constant.BORDER
+                    if headers[colum_index] == Constant.DIFF_FILTER:
+                        if diff != Constant.NA and diff < 0:
+                            ws.cell(row=row_index, column=colum_index + 1).fill = PatternFill("solid",
+                                                                                              fgColor=Constant.GREEN_COLOR)
+                        elif diff != Constant.NA and diff >= 0:
+                            ws.cell(row=row_index, column=colum_index + 1).fill = PatternFill("solid",
+                                                                                              fgColor=Constant.RED_COLOR)
+                    if headers[colum_index] == Constant.OP_NAME:
+                        ws.cell(row=row_index, column=colum_index + 1).alignment = Alignment(horizontal="center",
+                                                                                             vertical="center")
+                row_index += 1
diff --git a/profiler/compare_tools/performance_compare.py b/profiler/compare_tools/performance_compare.py
new file mode 100644
index 0000000000000000000000000000000000000000..e2c47ef6c58129c2ba3db723e99cb073c47af11b
--- /dev/null
+++ b/profiler/compare_tools/performance_compare.py
@@ -0,0 +1,43 @@
+import argparse
+import ast
+import datetime
+import os.path
+import sys
+import time
+
+from generation.comparison_generator import ComparisonGenerator
+from utils.args_manager import ArgsManager
+
+
+def main():
+    sys.path.append(os.path.dirname(__file__))
+    parser = argparse.ArgumentParser(description="Compare trace of GPU and NPU")
+    parser.add_argument("base_profiling_path", type=str, default='', help="base profiling file path")
+    parser.add_argument("comparison_profiling_path", type=str, default='', help="comparison profiling file path")
+    parser.add_argument("--disable_operator_compare", default=False, action='store_true',
+                        help="do not compare operator execution time")
+    parser.add_argument("--disable_memory_compare", default=False, action='store_true',
+                        help="do not compare memory usage by operator dimensions")
+    parser.add_argument("--disable_communication_compare", default=False, action='store_true',
+                        help="do not compare communication operator execution time")
+    parser.add_argument("--output_path", type=str, default='', help="性能数据比对结果的存放路径")
+    parser.add_argument("--max_kernel_num", type=int, help="每个torch op的kernel数量限制")
+    parser.add_argument("--op_name_map", type=ast.literal_eval, default={},
+                        help="配置GPU OP与NPU OP等价的名称映射关系，以字典的形式传入")
+    parser.add_argument("--use_input_shape", default=False, action='store_true', help="使用input shape作为匹配信息")
+    parser.add_argument("--gpu_flow_cat", type=str, default='', help="gpu flow event的分类标识")
+    args = parser.parse_args()
+
+    ArgsManager().init(args)
+    dir_path = args.output_path if args.output_path else "./"
+    file_name = "performance_comparison_result_{}.xlsx".format(time.strftime("%Y%m%d%H%M%S", time.localtime(time.time())))
+    result_file_path = os.path.join(dir_path, file_name)
+
+    ComparisonGenerator(args).create_excel(result_file_path)
+
+
+if __name__ == "__main__":
+    start_time = datetime.datetime.now()
+    main()
+    end_time = datetime.datetime.now()
+    print(f'The comparison task has been completed in a total time of {end_time - start_time}')
diff --git a/profiler/compare_tools/torch_op_compare.py b/profiler/compare_tools/torch_op_compare.py
deleted file mode 100644
index fbbcba95e3859eb0a01746f3e888751705d9f514..0000000000000000000000000000000000000000
--- a/profiler/compare_tools/torch_op_compare.py
+++ /dev/null
@@ -1,462 +0,0 @@
-# Copyright (c) 2023, Huawei Technologies Co., Ltd.
-# All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0  (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import ast
-import copy
-import json
-import os.path
-import time
-from queue import Queue
-import numpy as np
-
-from openpyxl.styles import PatternFill, Font, Border, Side
-from openpyxl.workbook import Workbook
-
-GPU = 0
-NPU = 1
-NA = 'N/A'
-LIMIT_KERNEL = 3
-OP_NAME = 'Operator Name'
-INPUT_SHAPE = 'Input Shape'
-INPUT_TYPE = 'Input Type'
-KERNEL_NAME = 'Kernel Name'
-DEVICE_DUR = 'Device Duration(us)'
-TASK_ID = 'Task Id'
-KERNEL_TYPE = 'Kernel Type'
-DIFF = 'DIFF: (sum(Trace2 Duration)-sum(Trace1 Duration))/sum(Trace1 Duration)'
-OP_NAME_FILTER = 'Operator Name Filter'
-DIFF_FILTER = 'DIFF Filter'
-BASE_TRACE = 'Base Trace'
-COMPARISON_TRACE = 'Comparison Trace'
-BASE_TRACE_TYPE = None
-COMPARISON_TRACE_TYPE = None
-BASE_TYPE = 1
-COMPARISON_TYPE = 2
-GPU_HEADER = [OP_NAME, INPUT_SHAPE, INPUT_TYPE, KERNEL_NAME, DEVICE_DUR]
-NPU_HEADER = [OP_NAME, INPUT_SHAPE, INPUT_TYPE, KERNEL_NAME, TASK_ID, KERNEL_TYPE, DEVICE_DUR]
-FILL_DICT = {
-    BASE_TYPE: PatternFill("solid", fgColor='003366FF'), COMPARISON_TYPE: PatternFill("solid", fgColor='0033CCCC'),
-    DIFF: PatternFill("solid", fgColor='00FF0000'), OP_NAME_FILTER: PatternFill("solid", fgColor='00FFFF00'),
-    DIFF_FILTER: PatternFill("solid", fgColor='00FFFF00')
-}
-COLUMN_WIDTH = {OP_NAME: 50, INPUT_SHAPE: 25, INPUT_TYPE: 25, KERNEL_NAME: 25, DEVICE_DUR: 25,
-                TASK_ID: 20, KERNEL_TYPE: 25, DIFF: 25, OP_NAME_FILTER: 25, DIFF_FILTER: 25}
-BORDER = Border(top=Side(border_style="thin", color='00000000'),
-                left=Side(border_style="thin", color='00000000'),
-                right=Side(border_style="thin", color='00000000'),
-                bottom=Side(border_style="thin", color='00000000'))
-
-
-class TorchOpNode:
-    def __init__(self, event=None, parent_node=None):
-        self._event = event
-        self._parent_node = parent_node
-        self._child_nodes = []
-        self._kernel_list = []
-        self._kernel_num = 0
-
-    @property
-    def start_time(self):
-        return self._event.get("ts", 0)
-
-    @property
-    def end_time(self):
-        return self._event.get("ts", 0) + self._event.get("dur", 0)
-
-    @property
-    def name(self):
-        return str(self._event.get("name", NA))
-
-    @property
-    def input_shape(self):
-        return str(self._event.get("args", {}).get("Input Dims", NA))
-
-    @property
-    def input_type(self):
-        return str(self._event.get("args", {}).get("Input type", NA))
-
-    @property
-    def parent(self):
-        return self._parent_node
-
-    @property
-    def child_nodes(self):
-        return self._child_nodes
-
-    @property
-    def kernel_list(self):
-        return self._kernel_list
-
-    @property
-    def kernel_num(self):
-        return self._kernel_num
-
-    def add_child_node(self, child_node):
-        self._child_nodes.append(child_node)
-
-    def set_kernel_list(self, kernel_list: list):
-        self._kernel_list = kernel_list
-
-    def add_kernel_num(self, kernel_num: int):
-        self._kernel_num += kernel_num
-
-    def is_step_profiler(self) -> bool:
-        return self.name.find("ProfilerStep#") != -1
-
-
-class TreeBuilder:
-    @classmethod
-    def build_tree(cls, event_list: list, flow_kernel_dict: dict) -> TorchOpNode:
-        root_node = TorchOpNode()
-        event_list.sort(key=lambda x: x.get("ts", 0))
-        last_node = root_node
-        for event in event_list:
-            kernel_list = flow_kernel_dict.get(event.get("ts", 0), [])
-            while last_node:
-                if last_node == root_node or event.get("ts", 0) < last_node.end_time:
-                    tree_node = TorchOpNode(event, last_node)
-                    last_node.add_child_node(tree_node)
-                    if kernel_list:
-                        tree_node.set_kernel_list(kernel_list)
-                    last_node = tree_node
-                    break
-                last_node = last_node.parent
-        return root_node
-
-    @classmethod
-    def mark_kernel_num(cls, root_node: TorchOpNode, flow_kernel_dict: dict):
-        for ts, kernel_list in flow_kernel_dict.items():
-            curr_node = root_node
-            while curr_node.child_nodes:
-                for node in curr_node.child_nodes:
-                    if node.start_time <= ts <= node.end_time:
-                        node.add_kernel_num(len(kernel_list))
-                        curr_node = node
-                        break
-
-    @classmethod
-    def get_total_kernels(cls, root_node: TorchOpNode) -> list:
-        result_list = []
-        node_queue = Queue()
-        for child_node in root_node.child_nodes:
-            node_queue.put(child_node)
-        while not node_queue.empty():
-            tree_node = node_queue.get()
-            result_list.extend(tree_node.kernel_list)
-            for child_node in tree_node.child_nodes:
-                node_queue.put(child_node)
-        return result_list
-
-
-def read_json_file(file_path: str, trace_type: int) -> any:
-    event_list = []
-    flow_kernel_dict = {}
-    if not os.path.isfile(file_path):
-        raise RuntimeError(f"File not exists: {file_path}")
-    try:
-        with open(file_path, "rt") as file:
-            json_data = json.loads(file.read())
-    except Exception:
-        raise RuntimeError(f"Can't read file: {file_path}")
-    flow_start_dict, flow_end_dict, event_dict = {}, {}, {}
-    flow_cat = ("async_gpu", "ac2g", "async_npu")
-    if trace_type == BASE_TYPE:
-        global BASE_TRACE_TYPE
-        BASE_TRACE_TYPE = GPU if isinstance(json_data, dict) else NPU
-        _type = BASE_TRACE_TYPE
-    else:
-        global COMPARISON_TRACE_TYPE
-        COMPARISON_TRACE_TYPE = GPU if isinstance(json_data, dict) else NPU
-        _type = COMPARISON_TRACE_TYPE
-    total_events = json_data.get("traceEvents", []) if _type == GPU else json_data
-    for event in total_events:
-        if event.get("cat") == "cpu_op" or event.get("cat") in ("Runtime", "cuda_runtime"):
-            event_list.append(event)
-        elif event.get("cat") in flow_cat and event.get("ph") == "s":
-            flow_start_dict[event.get("id")] = event
-        elif event.get("cat") in flow_cat and event.get("ph") == "f":
-            flow_end_dict[event.get("id")] = event
-        elif _type == GPU and event.get("cat", "").capitalize() == "Kernel".capitalize():
-            event_dict["{}-{}-{}".format(event.get("pid"), event.get("tid"), event.get("ts"))] = event
-        elif _type == NPU and event.get("ph") != "f":
-            event_dict["{}-{}-{}".format(event.get("pid"), event.get("tid"), event.get("ts"))] = event
-
-    for flow_id, start_flow in flow_start_dict.items():
-        end_flow = flow_end_dict.get(flow_id)
-        if end_flow is None:
-            continue
-        kernel_event = event_dict.get("{}-{}-{}".format(end_flow.get("pid"), end_flow.get("tid"), end_flow.get("ts")))
-        if kernel_event is None:
-            continue
-        flow_kernel_dict.setdefault(start_flow.get("ts"), []).append(kernel_event)
-    return event_list, flow_kernel_dict
-
-
-def get_top_layer_apis(file_path: str, trace_type: int, max_kernel_num: int) -> any:
-    event_list, flow_kernel_dict = read_json_file(file_path, trace_type)
-    root_node = TreeBuilder.build_tree(event_list, flow_kernel_dict)
-    if max_kernel_num is not None:
-        TreeBuilder.mark_kernel_num(root_node, flow_kernel_dict)
-    level1_child_nodes = root_node.child_nodes
-    if not level1_child_nodes:
-        raise RuntimeError(f"Can't find any torch op in the file: {file_path}")
-    result_data = []
-    for level1_node in level1_child_nodes:
-        if level1_node.is_step_profiler():
-            result_data.extend(level1_node.child_nodes)
-        else:
-            result_data.append(level1_node)
-    return result_data
-
-
-def compare(base_top_layer_apis: list, comparison_top_layer_apis: list, op_name_map: dict) -> list:
-    result_data = []
-    comparison_len, base_len = len(comparison_top_layer_apis), len(base_top_layer_apis)
-    dp = [[0] * (base_len + 1) for _ in range(comparison_len + 1)]
-    for comparison_index in range(1, comparison_len + 1):
-        for base_index in range(1, base_len + 1):
-            base_name = base_top_layer_apis[base_index - 1].name
-            comparison_name = comparison_top_layer_apis[comparison_index - 1].name
-            if op_name_map.get(comparison_name, comparison_name) == op_name_map.get(base_name, base_name):
-                dp[comparison_index][base_index] = dp[comparison_index - 1][base_index - 1] + 1
-            else:
-                dp[comparison_index][base_index] = max(dp[comparison_index][base_index - 1],
-                                                       dp[comparison_index - 1][base_index])
-    matched_op = []
-    comparison_index, base_index = comparison_len, base_len
-    while comparison_index > 0 and base_index > 0:
-        base_name = base_top_layer_apis[base_index - 1].name
-        comparison_name = comparison_top_layer_apis[comparison_index - 1].name
-        if op_name_map.get(comparison_name, comparison_name) == op_name_map.get(base_name, base_name):
-            matched_op.append([comparison_index - 1, base_index - 1])
-            comparison_index -= 1
-            base_index -= 1
-            continue
-        if dp[comparison_index][base_index - 1] > dp[comparison_index - 1][base_index]:
-            base_index -= 1
-        else:
-            comparison_index -= 1
-    if not matched_op:
-        matched_base_index_list = []
-    else:
-        matched_op.reverse()
-        matched_op = np.array(matched_op)
-        matched_base_index_list = list(matched_op[:, 1])
-    curr_comparison_index = 0
-    for base_index, base_api_node in enumerate(base_top_layer_apis):
-        if base_index not in matched_base_index_list:
-            result_data.append([base_api_node, None])
-            continue
-        matched_comparison_index = matched_op[matched_base_index_list.index(base_index), 0]
-        for comparison_index in range(curr_comparison_index, matched_comparison_index):
-            result_data.append([None, comparison_top_layer_apis[comparison_index]])
-        result_data.append([base_api_node, comparison_top_layer_apis[matched_comparison_index]])
-        curr_comparison_index = matched_comparison_index + 1
-    if curr_comparison_index < len(comparison_top_layer_apis):
-        for comparison_index in range(curr_comparison_index, len(comparison_top_layer_apis)):
-            result_data.append([None, comparison_top_layer_apis[comparison_index]])
-    return result_data
-
-
-def create_data(base_api_node: TorchOpNode, comparison_api_node: TorchOpNode) -> list:
-    result_data = []
-    base_kernel_list = TreeBuilder.get_total_kernels(base_api_node) if base_api_node else []
-    comparison_kernel_list = TreeBuilder.get_total_kernels(comparison_api_node) if comparison_api_node else []
-    if not base_kernel_list or not comparison_kernel_list:
-        diff = NA
-    else:
-        base_total_dur = sum([kernel.get("dur", 0) for kernel in base_kernel_list])
-        comparison_total_dur = sum([kernel.get("dur", 0) for kernel in comparison_kernel_list])
-        diff = (comparison_total_dur - base_total_dur) / base_total_dur
-    op_name = base_api_node.name if base_api_node else comparison_api_node.name
-    base_kernel_num, comparison_kernel_num = len(base_kernel_list), len(comparison_kernel_list)
-    base_data = [NA] * len(GPU_HEADER) if BASE_TRACE_TYPE == GPU else [NA] * len(NPU_HEADER)
-    if base_api_node:
-        base_data[0] = base_api_node.name
-        base_data[1] = base_api_node.input_shape
-        base_data[2] = base_api_node.input_type
-    comparison_data = [NA] * len(GPU_HEADER) if COMPARISON_TRACE_TYPE == GPU else [NA] * len(NPU_HEADER)
-    if comparison_api_node:
-        comparison_data[0] = comparison_api_node.name
-        comparison_data[1] = comparison_api_node.input_shape
-        comparison_data[2] = comparison_api_node.input_type
-    if base_kernel_num == 0 and comparison_kernel_num == 0:
-        data = base_data + comparison_data + [diff, op_name]
-        result_data.append(data)
-        return result_data
-    for index in range(max(base_kernel_num, comparison_kernel_num)):
-        base_row_data, comparison_row_data = copy.deepcopy(base_data), copy.deepcopy(comparison_data)
-        if index < base_kernel_num:
-            base_kernel = base_kernel_list[index]
-            if BASE_TRACE_TYPE == GPU:
-                base_row_data[3] = base_kernel.get("name")
-                base_row_data[4] = base_kernel.get("dur")
-            else:
-                base_row_data[3] = base_kernel.get("name")
-                base_row_data[4] = base_kernel.get("args", {}).get("Task Id")
-                base_row_data[5] = base_kernel.get("args", {}).get("Task Type")
-                base_row_data[6] = base_kernel.get("dur")
-        if index < comparison_kernel_num:
-            comparison_kernel = comparison_kernel_list[index]
-            if COMPARISON_TRACE_TYPE == GPU:
-                comparison_row_data[3] = comparison_kernel.get("name")
-                comparison_row_data[4] = comparison_kernel.get("dur")
-            else:
-                comparison_row_data[3] = comparison_kernel.get("name")
-                comparison_row_data[4] = comparison_kernel.get("args", {}).get("Task Id")
-                comparison_row_data[5] = comparison_kernel.get("args", {}).get("Task Type")
-                comparison_row_data[6] = comparison_kernel.get("dur")
-        data = base_row_data + comparison_row_data + [diff, op_name]
-        result_data.append(data)
-    return result_data
-
-
-def drill_down(compare_result_data: list, max_kernel_num: int, op_name_map: dict) -> list:
-    result_data = []
-    for data in compare_result_data:
-        base_api = data[0] if data[0] else TorchOpNode()
-        comparison_api = data[1] if data[1] else TorchOpNode()
-        if max(base_api.kernel_num, comparison_api.kernel_num) <= max_kernel_num:
-            result_data.append(data)
-            continue
-        result_data.extend(compare(base_api.child_nodes, comparison_api.child_nodes, op_name_map))
-    return result_data
-
-
-def have_to_drill_down(compare_result_data: list, max_kernel_num: int) -> bool:
-    for data in compare_result_data:
-        base_api = data[0] if data[0] else TorchOpNode()
-        comparison_api = data[1] if data[1] else TorchOpNode()
-        if max(base_api.kernel_num, comparison_api.kernel_num) > max_kernel_num:
-            return True
-    return False
-
-
-def main():
-    global BASE_TRACE, COMPARISON_TRACE
-    parser = argparse.ArgumentParser(description="Compare trace of GPU and NPU")
-    parser.add_argument("base_trace_path", help="base trace file path")
-    parser.add_argument("comparison_trace_path", help="comparison trace file path")
-    parser.add_argument("--output_path", help="性能数据比对结果的存放路径")
-    parser.add_argument("--max_kernel_num", type=int, help="每个torch op的kernel数量限制")
-    parser.add_argument("--op_name_map", type=ast.literal_eval, default={},
-                        help="配置GPU OP与NPU OP等价的名称映射关系，以字典的形式传入")
-    args = parser.parse_args()
-    if args.max_kernel_num is not None and args.max_kernel_num <= LIMIT_KERNEL:
-        raise RuntimeError(f"Invalid param, --max_kernel_num has to be greater than {LIMIT_KERNEL}")
-    if not isinstance(args.op_name_map, dict):
-        raise RuntimeError("Invalid param, --op_name_map must be dict, for example: --op_name_map={'name1':'name2'}")
-    base_top_layer_apis = get_top_layer_apis(args.base_trace_path, BASE_TYPE, args.max_kernel_num)
-    if BASE_TRACE_TYPE == GPU:
-        BASE_TRACE += ' [GPU] : ' + os.path.basename(args.base_trace_path)
-    else:
-        BASE_TRACE += ' [NPU] : ' + os.path.basename(args.base_trace_path)
-    comparison_top_layer_apis = get_top_layer_apis(args.comparison_trace_path, COMPARISON_TYPE, args.max_kernel_num)
-    if COMPARISON_TRACE_TYPE == GPU:
-        COMPARISON_TRACE += ' [GPU] : ' + os.path.basename(args.comparison_trace_path)
-    else:
-        COMPARISON_TRACE += ' [NPU] : ' + os.path.basename(args.comparison_trace_path)
-    compare_result_data = compare(base_top_layer_apis, comparison_top_layer_apis, args.op_name_map)
-
-    if args.max_kernel_num is not None:
-        while have_to_drill_down(compare_result_data, args.max_kernel_num):
-            compare_result_data = drill_down(compare_result_data, args.max_kernel_num, args.op_name_map)
-
-    dir_path = args.output_path if args.output_path else "./"
-    file_name = "torch_op_compare_{}.xlsx".format(time.strftime("%Y%m%d%H%M%S", time.localtime(time.time())))
-    result_file_path = os.path.join(dir_path, file_name)
-
-    wb = Workbook()
-    ws = wb.create_sheet("CompareResult", 0)
-    ws.sheet_properties.tabColor = '00CED1'
-    # write headers
-    base_trace_headers = GPU_HEADER if BASE_TRACE_TYPE == GPU else NPU_HEADER
-    comparison_trace_headers = GPU_HEADER if COMPARISON_TRACE_TYPE == GPU else NPU_HEADER
-    headers = base_trace_headers + comparison_trace_headers + [DIFF, OP_NAME_FILTER, DIFF_FILTER]
-    base_trace_start_column = 0
-    comparison_trace_start_column = len(base_trace_headers)
-    diff_start_column = len(base_trace_headers) + len(comparison_trace_headers)
-
-    for col_index in range(len(headers)):
-        ws.cell(row=1, column=col_index + 1).border = BORDER
-        ws.cell(row=1, column=col_index + 1).font = Font(name='Arial', bold=True)
-        ws.cell(row=2, column=col_index + 1).border = BORDER
-        ws.cell(row=2, column=col_index + 1).font = Font(name='Arial', bold=True)
-        header_name = headers[col_index]
-        if col_index < comparison_trace_start_column:
-            ws.cell(row=1, column=col_index + 1).value = BASE_TRACE
-            ws.cell(row=1, column=col_index + 1).fill = FILL_DICT.get(BASE_TYPE)
-            ws.cell(row=2, column=col_index + 1).fill = FILL_DICT.get(BASE_TYPE)
-        elif col_index < diff_start_column:
-            ws.cell(row=1, column=col_index + 1).value = COMPARISON_TRACE
-            ws.cell(row=1, column=col_index + 1).fill = FILL_DICT.get(COMPARISON_TYPE)
-            ws.cell(row=2, column=col_index + 1).fill = FILL_DICT.get(COMPARISON_TYPE)
-        else:
-            ws.cell(row=1, column=col_index + 1).value = header_name
-            ws.cell(row=1, column=col_index + 1).fill = FILL_DICT.get(header_name)
-        ws.cell(row=2, column=col_index + 1).value = header_name
-        dim = ws.cell(row=2, column=col_index + 1).coordinate
-        ws.column_dimensions[dim[0]].width = COLUMN_WIDTH.get(header_name)
-    ws.merge_cells(start_row=1, start_column=base_trace_start_column + 1,
-                   end_row=1, end_column=comparison_trace_start_column)
-    ws.merge_cells(start_row=1, start_column=comparison_trace_start_column + 1,
-                   end_row=1, end_column=diff_start_column)
-    ws.merge_cells(start_row=1, start_column=headers.index(DIFF) + 1,
-                   end_row=2, end_column=headers.index(DIFF) + 1)
-    ws.merge_cells(start_row=1, start_column=headers.index(OP_NAME_FILTER) + 1,
-                   end_row=2, end_column=headers.index(OP_NAME_FILTER) + 1)
-    ws.merge_cells(start_row=1, start_column=headers.index(DIFF_FILTER) + 1,
-                   end_row=2, end_column=headers.index(DIFF_FILTER) + 1)
-
-    # write lines
-    start_row_index = 3
-    for data in compare_result_data:
-        rows = create_data(data[0], data[1])
-        row_number = 0
-        for row in rows:
-            row_index = start_row_index + row_number
-            ws.cell(row=row_index, column=len(row) + 1).border = BORDER
-            for index, value in enumerate(row):
-                if index == headers.index(DIFF):
-                    ws.cell(row=row_index, column=index + 1).number_format = '0.00%'
-                    if value != NA and value < 0:
-                        ws.cell(row=row_index, column=index + 1).fill = PatternFill("solid", fgColor='0000FF00')
-                        ws.cell(row=row_index, column=index + 3).fill = PatternFill("solid", fgColor='0000FF00')
-                    if value != NA and value >= 0:
-                        ws.cell(row=row_index, column=index + 1).fill = PatternFill("solid", fgColor='00FF0000')
-                        ws.cell(row=row_index, column=index + 3).fill = PatternFill("solid", fgColor='00FF0000')
-                if index in [key for key, value in enumerate(headers) if value == OP_NAME]:
-                    ws.cell(row=row_index, column=index + 1).font = Font(name='Arial', bold=True)
-                else:
-                    ws.cell(row=row_index, column=index + 1).font = Font(name='Arial')
-                ws.cell(row=row_index, column=index + 1).value = value
-                ws.cell(row=row_index, column=index + 1).border = BORDER
-            row_number += 1
-        if row_number > 1:
-            # 合并单元格
-            merged_index = set(
-                [key for key, value in enumerate(headers) if value in (OP_NAME, INPUT_SHAPE, INPUT_TYPE, DIFF)])
-            for col_index in merged_index:
-                ws.merge_cells(start_row=start_row_index, start_column=col_index + 1,
-                               end_row=start_row_index + row_number - 1, end_column=col_index + 1)
-        start_row_index = start_row_index + row_number
-
-    wb.save(result_file_path)
-    wb.close()
-
-
-if __name__ == "__main__":
-    main()
diff --git a/profiler/compare_tools/utils/__init__.py b/profiler/compare_tools/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/profiler/compare_tools/utils/args_manager.py b/profiler/compare_tools/utils/args_manager.py
new file mode 100644
index 0000000000000000000000000000000000000000..eba55d72e362123011048fbcaa5cdc6977c176be
--- /dev/null
+++ b/profiler/compare_tools/utils/args_manager.py
@@ -0,0 +1,133 @@
+import os.path
+
+from utils.constant import Constant
+from utils.file_reader import FileReader
+from utils.profiling_parser import GPUProfilingParser, NPUProfilingParser
+
+
+class Singleton(object):
+    def __init__(self, cls):
+        self._cls = cls
+        self._instance = {}
+
+    def __call__(self):
+        if self._cls not in self._instance:
+            self._instance[self._cls] = self._cls()
+        return self._instance[self._cls]
+
+
+@Singleton
+class ArgsManager:
+    PARSER_DICT = {Constant.NPU: NPUProfilingParser, Constant.GPU: GPUProfilingParser}
+
+    def __init__(self):
+        self._args = None
+        self._base_profiling_type = None
+        self._comparison_profiling_type = None
+        self._base_profiling = None
+        self._comparison_profiling = None
+
+    @property
+    def base_profiling_type(self):
+        return self._base_profiling_type
+
+    @property
+    def comparison_profiling_type(self):
+        return self._comparison_profiling_type
+
+    @property
+    def base_profiling(self):
+        return self._base_profiling
+
+    @property
+    def comparison_profiling(self):
+        return self._comparison_profiling
+
+    @classmethod
+    def check_profiling_path(cls, file_path: str):
+        if len(file_path) > Constant.MAX_PATH_LENGTH:
+            msg = f"The length of file path exceeded the maximum value {Constant.MAX_PATH_LENGTH}: {file_path}"
+            raise RuntimeError(msg)
+        if not os.path.exists(file_path):
+            msg = f"Invalid profiling path: {file_path}"
+            raise RuntimeError(msg)
+        if os.path.islink(file_path):
+            msg = f"Invalid profiling path is soft link: {file_path}"
+            raise RuntimeError(msg)
+        if not os.access(file_path, os.R_OK):
+            msg = f"The file path has no read permission: {file_path}"
+            raise RuntimeError(msg)
+
+    @classmethod
+    def check_output_path(cls, output_path: str):
+        if len(output_path) > Constant.MAX_PATH_LENGTH:
+            msg = f"Invalid param, the length of output_path exceeded the maximum value {Constant.MAX_PATH_LENGTH}"
+            raise RuntimeError(msg)
+        if os.path.islink(output_path):
+            raise RuntimeError("Invalid param, the output_path is soft link")
+        if not os.path.exists(output_path):
+            try:
+                os.makedirs(output_path, mode=Constant.DIR_AUTHORITY)
+            except Exception:
+                msg = f"Can't create directory: {output_path}"
+                raise RuntimeError(msg)
+        if not os.path.isdir(output_path):
+            msg = f"Invalid output_path: {output_path}"
+            raise RuntimeError(msg)
+        if not os.access(output_path, os.W_OK):
+            msg = f"The output path has no write permission: {output_path}"
+            raise RuntimeError(msg)
+
+    def parse_profiling_path(self, file_path: str):
+        self.check_profiling_path(file_path)
+        if os.path.isfile(file_path):
+            (split_file_path, split_file_name) = os.path.split(file_path)
+            (shot_name, extension) = os.path.splitext(split_file_name)
+            if extension != ".json":
+                msg = f"Invalid profiling path suffix: {file_path}"
+                raise RuntimeError(msg)
+            json_type = FileReader.check_json_type(file_path)
+            return {Constant.PROFILING_TYPE: json_type, Constant.PROFILING_PATH: file_path,
+                    Constant.TRACE_PATH: file_path}
+        ascend_output = os.path.join(file_path, "ASCEND_PROFILER_OUTPUT")
+        profiler_output = ascend_output if os.path.isdir(ascend_output) else file_path
+        json_path = os.path.join(profiler_output, "trace_view.json")
+        memory_path = os.path.join(profiler_output, "operator_memory.csv")
+        if not os.path.isfile(json_path):
+            msg = f"Invalid profiling path: {file_path}"
+            raise RuntimeError(msg)
+        memory_path = memory_path if os.path.isfile(memory_path) else None
+        return {Constant.PROFILING_TYPE: Constant.NPU, Constant.PROFILING_PATH: file_path,
+                Constant.TRACE_PATH: json_path, Constant.MEMORY_DATA_PATH: memory_path}
+
+    def init(self, args: any):
+        self._args = args
+        if self._args.max_kernel_num is not None and self._args.max_kernel_num <= Constant.LIMIT_KERNEL:
+            msg = f"Invalid param, --max_kernel_num has to be greater than {Constant.LIMIT_KERNEL}"
+            raise RuntimeError(msg)
+        if not isinstance(self._args.op_name_map, dict):
+            raise RuntimeError(
+                "Invalid param, --op_name_map must be dict, for example: --op_name_map={'name1':'name2'}")
+        if self._args.gpu_flow_cat and len(self._args.gpu_flow_cat) > Constant.MAX_FLOW_CAT_LEN:
+            msg = f"Invalid param, --gpu_flow_cat exceeded the maximum value {Constant.MAX_FLOW_CAT_LEN}"
+            raise RuntimeError(msg)
+
+        base_profiling_dict = self.parse_profiling_path(self._args.base_profiling_path)
+        comparison_profiling_dict = self.parse_profiling_path(self._args.comparison_profiling_path)
+
+        if self._args.output_path:
+            self.check_output_path(self._args.output_path)
+
+        Constant.BASE_PROFILING = Constant.BASE_PROFILING + self._args.base_profiling_path
+        self._base_profiling_type = base_profiling_dict.get(Constant.PROFILING_TYPE)
+        self._base_profiling = self.PARSER_DICT.get(self._base_profiling_type)(self._args, base_profiling_dict)
+
+        if self._args.base_profiling_path == self._args.comparison_profiling_path:
+            Constant.COMPARISON_PROFILING = "Same To Base Profiling"
+            self._comparison_profiling_type = self._base_profiling_type
+            self._comparison_profiling = self._base_profiling
+        else:
+            Constant.COMPARISON_PROFILING = Constant.COMPARISON_PROFILING + self._args.comparison_profiling_path
+            self._comparison_profiling_type = comparison_profiling_dict.get(Constant.PROFILING_TYPE)
+            self._comparison_profiling = self.PARSER_DICT.get(self._comparison_profiling_type)(self._args,
+                                                                                               comparison_profiling_dict)
diff --git a/profiler/compare_tools/utils/compare_event.py b/profiler/compare_tools/utils/compare_event.py
new file mode 100644
index 0000000000000000000000000000000000000000..1ce2d820d7fae4e6d9d779e6e99a30882b513f9c
--- /dev/null
+++ b/profiler/compare_tools/utils/compare_event.py
@@ -0,0 +1,50 @@
+from utils.constant import Constant
+
+
+class KernelEvent:
+    def __init__(self, event: dict, device_type: int):
+        self._event = event
+        self._device_type = device_type
+
+    @property
+    def kernel_name(self) -> str:
+        return self._event.get("name", "")
+
+    @property
+    def device_dur(self) -> float:
+        return self._event.get("dur", 0)
+
+    @property
+    def task_id(self) -> int:
+        return self._event.get("args", {}).get("Task Id")
+
+    @property
+    def task_type(self) -> str:
+        return self._event.get("args", {}).get("Task Type")
+
+    @property
+    def compare_index(self) -> float:
+        return self.device_dur
+
+    def get_record(self) -> list:
+        if self._device_type == Constant.GPU:
+            return [self.kernel_name, Constant.NA, self.device_dur]
+        return [self.kernel_name, f"{self.task_id}, {self.task_type}", self.device_dur]
+
+
+class MemoryEvent:
+    def __init__(self, event: dict, name: str):
+        self._event = event
+        self._name = name
+
+    @property
+    def compare_index(self) -> float:
+        return self._event.get(Constant.SIZE, 0)
+
+    def get_record(self) -> list:
+        if self._event.get(Constant.RELEASE_TIME):
+            duration = float(self._event.get(Constant.RELEASE_TIME)) - self._event.get(Constant.ALLOCATION_TIME, 0)
+        else:
+            duration = Constant.NA
+        name = self._event.get(Constant.NAME, "") if self._event.get(Constant.NAME, "") else self._name
+        return [name, self._event.get(Constant.SIZE, 0), duration]
diff --git a/profiler/compare_tools/utils/constant.py b/profiler/compare_tools/utils/constant.py
new file mode 100644
index 0000000000000000000000000000000000000000..ff39744b6471315b18a6bed386a0c21baecc0f86
--- /dev/null
+++ b/profiler/compare_tools/utils/constant.py
@@ -0,0 +1,94 @@
+from openpyxl.styles import PatternFill, Border, Side
+
+
+class Constant(object):
+    GPU = 0
+    NPU = 1
+    NA = 'N/A'
+    LIMIT_KERNEL = 3
+    MAX_PATH_LENGTH = 4096
+    MAX_FLOW_CAT_LEN = 20
+    MAX_FILE_SIZE = 1024 * 1024 * 1024 * 5
+    BYTE_TO_KB = 1024
+    YELLOW_COLOR = "FFFF00"
+    GREEN_COLOR = "0000FF00"
+    RED_COLOR = "00FF0000"
+    SUMMARY_LINE_COLOR = "F0F8FF"
+
+    # autority
+    FILE_AUTHORITY = 0o640
+    DIR_AUTHORITY = 0o750
+
+    PROFILING_TYPE = "profiling type"
+    ASCEND_OUTPUT_PATH = "ascend output"
+    # path
+    PROFILING_PATH = "profiling_path"
+    TRACE_PATH = "trace_path"
+    MEMORY_DATA_PATH = "memory_data_path"
+
+    # excel headers
+    BASE_PROFILING = 'Base Profiling: '
+    COMPARISON_PROFILING = 'Comparison Profiling: '
+
+    OP_NAME = 'Operator Name'
+    INPUT_SHAPE = 'Input Shape'
+    INPUT_TYPE = 'Input Type'
+
+    DIFF = 'DIFF: (sum(comparison)-sum(base))/sum(base)'
+    OP_NAME_FILTER = 'Operator Name Filter'
+    DIFF_FILTER = 'DIFF Filter'
+
+    HEADERS_FILL = PatternFill("solid", fgColor='00BFFF')  # 1E90FF
+
+    BORDER = Border(top=Side(border_style="thin", color='00000000'),
+                    left=Side(border_style="thin", color='00000000'),
+                    right=Side(border_style="thin", color='00000000'),
+                    bottom=Side(border_style="thin", color='00000000'))
+
+    # kernel
+    KERNEL_NAME = 'Kernel Name'
+    DEVICE_DUR = 'Device Duration(us)'
+    TASK_INFO = 'Task Info'
+    GPU_CMP_KERNEL_HEADER = [OP_NAME, INPUT_SHAPE + " / " + KERNEL_NAME, INPUT_TYPE, DEVICE_DUR]
+    NPU_CMP_KERNEL_HEADER = [OP_NAME, INPUT_SHAPE + " / " + KERNEL_NAME, INPUT_TYPE + " / " + TASK_INFO, DEVICE_DUR]
+
+    # memory
+    SIZE = "Size(KB)"
+    TS = "ts"
+    ALLOCATION_TIME = "Allocation Time(us)"
+    RELEASE_TIME = "Release Time(us)"
+    MEMORY_OP_NAME = 'OP Name'
+    NAME = "Name"
+    CMP_MEMORY_HEADER = [OP_NAME, INPUT_SHAPE + " / " + MEMORY_OP_NAME, INPUT_TYPE + " / " + RELEASE_TIME, SIZE]
+
+    # compare type
+    OPERATOR_COMPARE = "OperatorCompare"
+    MEMORY_COMPARE = "MemoryCompare"
+
+    DEFAULT_WIDTH = 25
+    COLUMN_WIDTH = {OP_NAME: 45, INPUT_SHAPE + " / " + MEMORY_OP_NAME: 30, INPUT_SHAPE + " / " + KERNEL_NAME: 30}
+
+    # communication
+    COMMUNICAT_OP = "Communication OP Name"
+    TASK_NAME = "Task Name"
+    CALLS = "Calls"
+    TOTAL_DURATION = "Total Duration(us)"
+    AVG_DURATION = "Avg Duration(us)"
+    MAX_DURATION = "Max Duration(us)"
+    MIN_DURATION = "Min Duration(us)"
+    OP_KEY = COMMUNICAT_OP
+    BASE_CALLS = CALLS + "_x"
+    BASE_SUM = TOTAL_DURATION + "_x"
+    BASE_AVG = AVG_DURATION + "_x"
+    BASE_MAX = MAX_DURATION + "_x"
+    BASE_MIN = MIN_DURATION + "_x"
+    COMPARISON_CALLS = CALLS + "_y"
+    COMPARISON_SUM = TOTAL_DURATION + "_y"
+    COMPARISON_AVG = AVG_DURATION + "_y"
+    COMPARISON_MAX = MAX_DURATION + "_y"
+    COMPARISON_MIN = MIN_DURATION + "_y"
+    CMP_COMMUNICATION_HEADER = [COMMUNICAT_OP, TASK_NAME, CALLS, TOTAL_DURATION, AVG_DURATION, MAX_DURATION,
+                                MIN_DURATION]
+    COLUMNS = [COMMUNICAT_OP, CALLS, TOTAL_DURATION, AVG_DURATION, MAX_DURATION, MIN_DURATION]
+    COLUMN_WIDTH_CLL = {COMMUNICAT_OP: 25, TASK_NAME: 22, CALLS: 10, TOTAL_DURATION: 20, AVG_DURATION: 20,
+                        MAX_DURATION: 20, MIN_DURATION: 20, DIFF: 20}
diff --git a/profiler/compare_tools/utils/file_reader.py b/profiler/compare_tools/utils/file_reader.py
new file mode 100644
index 0000000000000000000000000000000000000000..b536fce0f7c34e064d96baca0687e369441625b8
--- /dev/null
+++ b/profiler/compare_tools/utils/file_reader.py
@@ -0,0 +1,57 @@
+import csv
+import json
+import os
+
+from utils.constant import Constant
+
+
+class FileReader:
+
+    @classmethod
+    def read_trace_file(cls, file_path: str) -> any:
+        if not os.path.isfile(file_path):
+            msg = f"File not exists: {file_path}"
+            raise RuntimeError(msg)
+        file_size = os.path.getsize(file_path)
+        if file_size <= 0:
+            return []
+        if file_size > Constant.MAX_FILE_SIZE:
+            print(f"The file size exceeds the preset value {Constant.MAX_FILE_SIZE / 1024 / 1024}MB, "
+                  f"please check the file: {file_path}")
+            return []
+        try:
+            with open(file_path, "rt") as file:
+                json_data = json.loads(file.read())
+        except Exception:
+            msg = f"Can't read file: {file_path}"
+            raise RuntimeError(msg)
+        return json_data
+
+    @classmethod
+    def read_csv_file(cls, file_path: str) -> any:
+        if not os.path.isfile(file_path):
+            return []
+        file_size = os.path.getsize(file_path)
+        if file_size <= 0:
+            return []
+        if file_size > Constant.MAX_FILE_SIZE:
+            print(f"[WARN] The file size exceeds the preset value {Constant.MAX_FILE_SIZE / 1024 / 1024}MB, "
+                  f"please check the file: {file_path}")
+            return []
+        result_data = []
+        try:
+            with open(file_path, newline="") as csv_file:
+                reader = csv.DictReader(csv_file)
+                for row in reader:
+                    result_data.append(row)
+        except Exception:
+            msg = f"Failed to read the file: {file_path}"
+            raise RuntimeError(msg)
+        return result_data
+
+    @classmethod
+    def check_json_type(cls, file_path: str) -> str:
+        json_data = cls.read_trace_file(file_path)
+        if isinstance(json_data, dict):
+            return Constant.GPU
+        return Constant.NPU
diff --git a/profiler/compare_tools/utils/name_function.py b/profiler/compare_tools/utils/name_function.py
new file mode 100644
index 0000000000000000000000000000000000000000..b6a0d05f49108e90aa514f772d8872e90c7d563f
--- /dev/null
+++ b/profiler/compare_tools/utils/name_function.py
@@ -0,0 +1,43 @@
+from utils.torch_op_node import TorchOpNode
+
+
+class NameFunction:
+    def __init__(self, args: any):
+        self.args = args
+
+    @classmethod
+    def get_name(cls, op_node: TorchOpNode) -> str:
+        return op_node.name
+
+    @classmethod
+    def get_full_name(cls, op_node: TorchOpNode) -> str:
+        if isinstance(op_node.origin_input_shape, list):
+            data = []
+            for dim in op_node.origin_input_shape:
+                data.append(','.join([str(x) for x in dim]))
+            input_shape = ';\r\n'.join(data)
+            return f'{op_node.name}{input_shape}'
+        return f'{op_node.name}{op_node.input_shape}'
+
+    def get_name_func(self):
+        if not self.args.op_name_map and not self.args.use_input_shape:
+            name_func = self.get_name
+        elif self.args.op_name_map and not self.args.use_input_shape:
+            name_func = self.get_map_name
+        elif self.args.op_name_map and not self.args.use_input_shape:
+            name_func = self.get_full_name
+        else:
+            name_func = self.get_full_map_name
+        return name_func
+
+    def get_map_name(self, op_node: TorchOpNode) -> str:
+        return self.args.op_name_map.get(op_node.name, op_node.name)
+
+    def get_full_map_name(self, op_node: TorchOpNode) -> str:
+        if isinstance(op_node.origin_input_shape, list):
+            data = []
+            for dim in op_node.origin_input_shape:
+                data.append(','.join([str(x) for x in dim]))
+            input_shape = ';\r\n'.join(data)
+            return f'{self.args.op_name_map.get(op_node.name, op_node.name)}{input_shape}'
+        return f'{self.args.op_name_map.get(op_node.name, op_node.name)}{op_node.input_shape}'
diff --git a/profiler/compare_tools/utils/profiling_parser.py b/profiler/compare_tools/utils/profiling_parser.py
new file mode 100644
index 0000000000000000000000000000000000000000..231f91f2b78c3ab68bf535947181c474bfa7fd62
--- /dev/null
+++ b/profiler/compare_tools/utils/profiling_parser.py
@@ -0,0 +1,298 @@
+from abc import ABCMeta, abstractmethod
+from math import ceil
+
+from utils.compare_event import KernelEvent
+from utils.constant import Constant
+from utils.file_reader import FileReader
+
+
+class ProfilingParser(metaclass=ABCMeta):
+    @abstractmethod
+    def get_torch_op_data(self):
+        raise NotImplementedError
+
+    @abstractmethod
+    def get_kernel_dict(self):
+        raise NotImplementedError
+
+    @abstractmethod
+    def get_memory_list(self):
+        raise NotImplementedError
+
+
+class GPUProfilingParser(ProfilingParser):
+    def __init__(self, args: any, path_dict: dict):
+        self._args = args
+        self._profiling_path = path_dict.get(Constant.PROFILING_PATH)
+        self._json_path = path_dict.get(Constant.PROFILING_PATH)
+        self._torch_op_data = None
+        self._kernel_dict = None
+        self._memory_list = None
+        self._communication_data = None
+        self._communication_task_data = None
+
+    @property
+    def file_path(self) -> str:
+        return self._profiling_path
+
+    @property
+    def json_path(self) -> str:
+        return self._json_path
+
+    @property
+    def torch_op_data(self) -> list:
+        if self._torch_op_data is None:
+            self.get_torch_op_data()
+        return self._torch_op_data
+
+    @property
+    def kernel_dict(self) -> dict:
+        if self._kernel_dict is None:
+            self.get_kernel_dict()
+        return self._kernel_dict
+
+    @property
+    def memory_list(self) -> dict:
+        if self._memory_list is None:
+            self.get_memory_list()
+        return self._memory_list
+
+    @property
+    def communication_data(self) -> dict:
+        if self._communication_data is None:
+            self.get_communication_data()
+        return self._communication_data
+
+    @property
+    def communication_task_data(self) -> dict:
+        if self._communication_task_data is None:
+            self.get_communication_data()
+        return self._communication_task_data
+
+    def get_torch_op_data(self):
+        torch_op_list = []
+        json_data = FileReader.read_trace_file(self._json_path)
+        total_events = json_data.get("traceEvents", [])
+        for event in total_events:
+            if event.get("cat") == "cpu_op":
+                torch_op_list.append(event)
+        self._torch_op_data = torch_op_list
+
+    def get_kernel_dict(self):
+        flow_kernel_dict = {}
+        json_data = FileReader.read_trace_file(self._json_path)
+        total_events = json_data.get("traceEvents", [])
+        flow_cat = self._args.gpu_flow_cat if self._args.gpu_flow_cat else "async_gpu"
+
+        flow_start_dict, flow_end_dict, kernel_dict = {}, {}, {}
+        for event in total_events:
+            if event.get("cat") == flow_cat and event.get("ph") == "s":
+                flow_start_dict[event.get("id")] = event
+            elif event.get("cat") == flow_cat and event.get("ph") == "f":
+                flow_end_dict[event.get("id")] = event
+            elif event.get("cat", "").capitalize() == "Kernel".capitalize():
+                kernel_dict["{}-{}-{}".format(event.get("pid"), event.get("tid"), event.get("ts"))] = event
+
+        for flow_id, start_flow in flow_start_dict.items():
+            end_flow = flow_end_dict.get(flow_id)
+            if end_flow is None:
+                continue
+            kernel_event = kernel_dict.get(
+                "{}-{}-{}".format(end_flow.get("pid"), end_flow.get("tid"), end_flow.get("ts")))
+            if kernel_event is None:
+                continue
+            flow_kernel_dict.setdefault(start_flow.get("ts"), []).append(KernelEvent(kernel_event, Constant.GPU))
+        self._kernel_dict = flow_kernel_dict
+
+    def get_memory_list(self):
+        self._memory_list = []
+        memory_events = []
+        json_data = FileReader.read_trace_file(self._json_path)
+        total_events = json_data.get("traceEvents", [])
+        for event in total_events:
+            if event.get("name", "") == "[memory]":
+                memory_events.append(event)
+        memory_events.sort(key=lambda x: x.get("ts", 0))
+        addr_dict = {}
+        for memory_event in memory_events:
+            args = memory_event.get("args", {})
+            if args.get("Device Type", -1) != 1:
+                continue
+            allocate_bytes = args.get("Bytes", 0) / Constant.BYTE_TO_KB
+            record = addr_dict.get(args.get("Addr"))
+            if allocate_bytes > 0:
+                if record:
+                    self._memory_list.append(record)
+                addr_dict[args.get("Addr")] = {Constant.SIZE: allocate_bytes,
+                                               Constant.TS: memory_event.get("ts", 0),
+                                               Constant.ALLOCATION_TIME: memory_event.get("ts", 0)}
+            if allocate_bytes < 0 and record:
+                if abs(allocate_bytes) == record.get(Constant.SIZE):
+                    record[Constant.RELEASE_TIME] = memory_event.get("ts", 0)
+                    self._memory_list.append(record)
+                del addr_dict[args.get("Addr")]
+
+    def get_communication_data(self):
+        self._communication_data, self._communication_task_data = [], {}
+        json_data = FileReader.read_trace_file(self._json_path)
+        total_events = json_data.get("traceEvents", [])
+        for data in total_events:
+            if data.get("cat", "") == "Kernel" and data.get("name", "").split("_")[0] == "ncclKernel":
+                self._communication_data.append(data)
+
+
+class NPUProfilingParser(ProfilingParser):
+    def __init__(self, args: any, path_dict: str):
+        self._args = args
+        self._profiling_path = path_dict.get(Constant.PROFILING_PATH)
+        self._json_path = path_dict.get(Constant.TRACE_PATH)
+        self._memory_data_path = path_dict.get(Constant.MEMORY_DATA_PATH)
+        self._torch_op_data = None
+        self._kernel_dict = None
+        self._memory_list = None
+        self._communication_data = None
+        self._communication_task_data = None
+
+    @property
+    def file_path(self) -> str:
+        return self._profiling_path
+
+    @property
+    def json_path(self) -> str:
+        return self._json_path
+
+    @property
+    def torch_op_data(self) -> list:
+        if self._torch_op_data is None:
+            self.get_torch_op_data()
+        return self._torch_op_data
+
+    @property
+    def kernel_dict(self) -> dict:
+        if self._kernel_dict is None:
+            self.get_kernel_dict()
+        return self._kernel_dict
+
+    @property
+    def memory_list(self) -> dict:
+        if self._memory_list is None:
+            self.get_memory_list()
+        return self._memory_list
+
+    @property
+    def communication_data(self) -> dict:
+        if self._communication_data is None:
+            self.get_communication_data()
+        return self._communication_data
+
+    @property
+    def communication_task_data(self) -> dict:
+        if self._communication_task_data is None:
+            self.get_communication_data()
+        return self._communication_task_data
+
+    def get_torch_op_data(self):
+        torch_op_list = []
+        json_data = FileReader.read_trace_file(self._json_path)
+        for event in json_data:
+            if event.get("cat") == "cpu_op":
+                torch_op_list.append(event)
+        self._torch_op_data = torch_op_list
+
+    def get_kernel_dict(self):
+        flow_kernel_dict = {}
+        json_data = FileReader.read_trace_file(self._json_path)
+        flow_cat = "async_npu"
+
+        flow_start_dict, flow_end_dict, kernel_dict = {}, {}, {}
+        for event in json_data:
+            if event.get("cat") == flow_cat and event.get("ph") == "s":
+                flow_start_dict[event.get("id")] = event
+            elif event.get("cat") == flow_cat and event.get("ph") == "f":
+                flow_end_dict[event.get("id")] = event
+            elif event.get("ph") == "X" and event.get("cat") != 'cpu_op':
+                kernel_dict["{}-{}-{}".format(event.get("pid"), event.get("tid"), event.get("ts"))] = event
+
+        for flow_id, start_flow in flow_start_dict.items():
+            end_flow = flow_end_dict.get(flow_id)
+            if end_flow is None:
+                continue
+            kernel_event = kernel_dict.get(
+                "{}-{}-{}".format(end_flow.get("pid"), end_flow.get("tid"), end_flow.get("ts")))
+            if kernel_event is None:
+                continue
+            flow_kernel_dict.setdefault(start_flow.get("ts"), []).append(KernelEvent(kernel_event, Constant.NPU))
+        self._kernel_dict = flow_kernel_dict
+
+    def get_memory_list(self):
+        self._memory_list = []
+        enqueue_dict, dequeue_data = {}, []
+        json_data = FileReader.read_trace_file(self._json_path)
+        for data in json_data:
+            if data.get("cat", "enqueue"):
+                enqueue_dict[data.get("args", {}).get("correlation_id", "")] = data
+            elif data.get("cat", "dequeue"):
+                dequeue_data.append(data)
+
+        if not self._memory_data_path:
+            return
+        memory_data = FileReader.read_csv_file(self._memory_data_path)
+        for data in memory_data:
+            if "cann::" in data.get("Name", ""):
+                ts_time = float(data.get(Constant.ALLOCATION_TIME, 0))
+                match_dequeue_data = self._match_cann_memory_data(dequeue_data, ts_time)
+                if match_dequeue_data is not None:
+                    correlation_id = match_dequeue_data.get("args", {}).get("correlation_id", "")
+                    ts = enqueue_dict[correlation_id].get("ts", 0)
+                    self._memory_list.append({Constant.SIZE: float(data.get(Constant.SIZE, 0)), Constant.TS: ts,
+                                              Constant.NAME: data.get(Constant.NAME, ""),
+                                              Constant.ALLOCATION_TIME: float(data.get(Constant.ALLOCATION_TIME, 0)),
+                                              Constant.RELEASE_TIME: data.get(Constant.RELEASE_TIME, 0)})
+            self._memory_list.append({Constant.SIZE: float(data.get(Constant.SIZE, 0)),
+                                      Constant.TS: float(data.get(Constant.ALLOCATION_TIME, 0)),
+                                      Constant.ALLOCATION_TIME: float(data.get(Constant.ALLOCATION_TIME, 0)),
+                                      Constant.RELEASE_TIME: data.get(Constant.RELEASE_TIME, 0)})
+
+    @classmethod
+    def _match_cann_memory_data(cls, dequeue_data: list, ts_time: float):
+        if not dequeue_data:
+            return None
+        right = len(dequeue_data) - 1
+        left = 0
+        while right > left:
+            mid = left + ceil((right - left) / 2)
+            if ts_time >= dequeue_data[mid].get("ts", 0):
+                left = mid
+            else:
+                right = mid - 1
+        end_time = dequeue_data[left].get("ts", 0) + dequeue_data[left].get("dur", 0)
+        return dequeue_data[left] if end_time > ts_time else None
+
+    def get_communication_data(self):
+        self._communication_data, self._communication_task_data = [], {}
+        pid, tid = None, None
+        json_data = FileReader.read_trace_file(self._json_path)
+        for data in json_data:
+            if data.get("ph", "") == "M" and data.get("name", "") == "thread_name" \
+                    and data.get("args", {}).get("name", "") == "Communication OP":
+                pid = data.get("pid", "")
+                tid = data.get("tid", "")
+        if not pid or not tid:
+            return
+        for data in json_data:
+            if data.get("ph", "") == "X" and data.get("pid", "") == pid and data.get("tid", "") == tid:
+                self._communication_data.append(data)
+        if not self._communication_data:
+            return
+        for data in json_data:
+            if data.get("ph", "") != "X" or data.get("pid", "") != pid or data.get("tid", "") == tid:
+                continue
+            ts = data.get("ts", 0)
+            for communication_op in self._communication_data:
+                if ts < communication_op.get("ts", 0) or ts - communication_op.get("ts", 0) > communication_op.get(
+                        "dur", 0):
+                    continue
+                name_list = communication_op.get("name", "").split("_")
+                if len(name_list) >= 2:
+                    self._communication_task_data.setdefault(name_list[1].lower(), []).append(data)
+                break
diff --git a/profiler/compare_tools/utils/torch_op_node.py b/profiler/compare_tools/utils/torch_op_node.py
new file mode 100644
index 0000000000000000000000000000000000000000..8995dafc1e48d71226903912b243c294e192784a
--- /dev/null
+++ b/profiler/compare_tools/utils/torch_op_node.py
@@ -0,0 +1,93 @@
+from math import ceil
+
+from utils.compare_event import MemoryEvent
+from utils.constant import Constant
+
+
+class TorchOpNode:
+    def __init__(self, event=None, parent_node=None):
+        self._event = event
+        self._parent_node = parent_node
+        self._child_nodes = []
+        self._kernel_list = []
+        self._kernel_num = 0
+        self._memory_allocated_list = []
+
+    @property
+    def start_time(self):
+        return self._event.get("ts", 0)
+
+    @property
+    def end_time(self):
+        return self._event.get("ts", 0) + self._event.get("dur", 0)
+
+    @property
+    def name(self):
+        return str(self._event.get("name", Constant.NA))
+
+    @property
+    def input_shape(self):
+        return str(self._event.get("args", {}).get("Input Dims", Constant.NA))
+
+    @property
+    def origin_input_shape(self):
+        return self._event.get("args", {}).get("Input Dims", Constant.NA)
+
+    @property
+    def input_type(self):
+        return str(self._event.get("args", {}).get("Input type", Constant.NA))
+
+    @property
+    def call_stack(self):
+        return str(self._event.get("args", {}).get("Call stack", Constant.NA))
+
+    @property
+    def parent(self):
+        return self._parent_node
+
+    @property
+    def child_nodes(self):
+        return self._child_nodes
+
+    @property
+    def kernel_list(self):
+        return self._kernel_list
+
+    @property
+    def kernel_num(self):
+        return self._kernel_num
+
+    @property
+    def memory_allocated(self):
+        return self._memory_allocated_list
+
+    def add_child_node(self, child_node):
+        self._child_nodes.append(child_node)
+
+    def set_kernel_list(self, kernel_list: list):
+        self._kernel_list = kernel_list
+
+    def add_kernel_num(self, kernel_num: int):
+        self._kernel_num += kernel_num
+
+    def set_memory_allocated(self, memory_allocated: dict):
+        self._memory_allocated_list.append(MemoryEvent(memory_allocated, self.name))
+
+    def is_step_profiler(self) -> bool:
+        return self.name.find("ProfilerStep#") != -1
+
+    def get_op_info(self) -> list:
+        return [self.name, self.input_shape, self.input_type, self.call_stack]
+
+    def match_child_node(self, ts_time: float) -> any:
+        if not self._child_nodes:
+            return None
+        right = len(self._child_nodes) - 1
+        left = 0
+        while right > left:
+            mid = left + ceil((right - left) / 2)
+            if ts_time >= self._child_nodes[mid].start_time:
+                left = mid
+            else:
+                right = mid - 1
+        return self._child_nodes[left] if self._child_nodes[left].end_time > ts_time else None
diff --git a/profiler/compare_tools/utils/tree_builder.py b/profiler/compare_tools/utils/tree_builder.py
new file mode 100644
index 0000000000000000000000000000000000000000..6d765eb0d70b2c58e14c52ff66c09598d4ef2061
--- /dev/null
+++ b/profiler/compare_tools/utils/tree_builder.py
@@ -0,0 +1,89 @@
+from queue import Queue
+
+from utils.constant import Constant
+from utils.torch_op_node import TorchOpNode
+
+
+class TreeBuilder:
+    @classmethod
+    def build_tree(cls, event_list: list) -> TorchOpNode:
+        root_node = TorchOpNode()
+        event_list.sort(key=lambda x: x.get("ts", 0))
+        last_node = root_node
+        for event in event_list:
+            while last_node:
+                if last_node == root_node or event.get("ts", 0) < last_node.end_time:
+                    tree_node = TorchOpNode(event, last_node)
+                    last_node.add_child_node(tree_node)
+                    last_node = tree_node
+                    break
+                last_node = last_node.parent
+        return root_node
+
+    @classmethod
+    def update_tree_node(cls, root_node: TorchOpNode, flow_kernel_dict: dict = {}, memory_allocated_list: list = []):
+        if flow_kernel_dict:
+            for ts, kernel_list in flow_kernel_dict.items():
+                matched_child_node = root_node.match_child_node(ts)
+                if not matched_child_node:
+                    return
+                kernel_num = len(kernel_list)
+                node_queue = Queue()
+                node_queue.put(matched_child_node)
+                while not node_queue.empty():
+                    tree_node = node_queue.get()
+                    tree_node.add_kernel_num(kernel_num)
+                    matched_child_node = tree_node.match_child_node(ts)
+                    if matched_child_node:
+                        node_queue.put(matched_child_node)
+                    else:
+                        tree_node.set_kernel_list(kernel_list)
+        for memory_allocated in memory_allocated_list:
+            ts = memory_allocated.get(Constant.TS)
+            matched_child_node = root_node.match_child_node(ts)
+            if not matched_child_node:
+                continue
+            node_queue = Queue()
+            node_queue.put(matched_child_node)
+            while not node_queue.empty():
+                tree_node = node_queue.get()
+                matched_child_node = tree_node.match_child_node(ts)
+                if matched_child_node:
+                    node_queue.put(matched_child_node)
+                else:
+                    tree_node.set_memory_allocated(memory_allocated)
+
+    @classmethod
+    def get_total_compare_event(cls, root_node: TorchOpNode, compare_type: str) -> list:
+        if compare_type == Constant.MEMORY_COMPARE:
+            return cls._get_total_memory(root_node)
+        elif compare_type == Constant.OPERATOR_COMPARE:
+            return cls._get_total_kernels(root_node)
+
+    @classmethod
+    def _get_total_kernels(cls, root_node: TorchOpNode) -> list:
+        result_list = []
+        result_list.extend(root_node.kernel_list)
+        node_queue = Queue()
+        for child_node in root_node.child_nodes:
+            node_queue.put(child_node)
+        while not node_queue.empty():
+            tree_node = node_queue.get()
+            result_list.extend(tree_node.kernel_list)
+            for child_node in tree_node.child_nodes:
+                node_queue.put(child_node)
+        return result_list
+
+    @classmethod
+    def _get_total_memory(cls, root_node: TorchOpNode) -> list:
+        result_list = []
+        result_list.extend(root_node.memory_allocated)
+        node_queue = Queue()
+        for child_node in root_node.child_nodes:
+            node_queue.put(child_node)
+        while not node_queue.empty():
+            tree_node = node_queue.get()
+            result_list.extend(tree_node.memory_allocated)
+            for child_node in tree_node.child_nodes:
+                node_queue.put(child_node)
+        return result_list