From cc3d23e2c24d3a8058b83f8b39e606de37bb5d1b Mon Sep 17 00:00:00 2001
From: kdaqiu <kdaqiu@163.com>
Date: Fri, 1 Dec 2023 16:57:44 +0800
Subject: [PATCH] Add Compute Advice.

---
 .../advice_factory/compute_advice_factory.py  |  40 +++++
 .../common_func_advisor/constant.py           |  52 ++++++
 .../common_func_advisor/utils.py              |  77 +++++++++
 .../compute_advice/__init__.py                |  14 ++
 .../compute_advice/compute_advice_base.py     |  68 ++++++++
 .../compute_advice/npu_fused/__init__.py      |  14 ++
 .../compute_advice/npu_fused/analyser.py      |  80 +++++++++
 .../compute_advice/npu_fused/op_perf.py       | 155 ++++++++++++++++++
 .../compute_advice/npu_fused_advice.py        |  49 ++++++
 profiler/advisor/advisor_backend/interface.py |   4 +-
 profiler/advisor/compute_perf_analysis.ipynb  | 120 ++++++++++++++
 11 files changed, 672 insertions(+), 1 deletion(-)
 create mode 100644 profiler/advisor/advisor_backend/advice_factory/compute_advice_factory.py
 create mode 100644 profiler/advisor/advisor_backend/common_func_advisor/utils.py
 create mode 100644 profiler/advisor/advisor_backend/compute_advice/__init__.py
 create mode 100644 profiler/advisor/advisor_backend/compute_advice/compute_advice_base.py
 create mode 100644 profiler/advisor/advisor_backend/compute_advice/npu_fused/__init__.py
 create mode 100644 profiler/advisor/advisor_backend/compute_advice/npu_fused/analyser.py
 create mode 100644 profiler/advisor/advisor_backend/compute_advice/npu_fused/op_perf.py
 create mode 100644 profiler/advisor/advisor_backend/compute_advice/npu_fused_advice.py
 create mode 100644 profiler/advisor/compute_perf_analysis.ipynb

diff --git a/profiler/advisor/advisor_backend/advice_factory/compute_advice_factory.py b/profiler/advisor/advisor_backend/advice_factory/compute_advice_factory.py
new file mode 100644
index 00000000000..88e50d660d0
--- /dev/null
+++ b/profiler/advisor/advisor_backend/advice_factory/compute_advice_factory.py
@@ -0,0 +1,40 @@
+# Copyright (c) 2023, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from common_func.path_manager import PathManager
+from common_func_advisor.constant import Constant
+from advice_factory.advice_factory import AdviceFactory
+from compute_advice.npu_fused_advice import NpuFusedAdvice
+
+
+class ComputeAdviceFactory(AdviceFactory):
+    ADVICE_LIB = {
+        Constant.NPU_FUSED: NpuFusedAdvice,
+    }
+
+    def __init__(self, collection_path: str):
+        super().__init__(collection_path)
+
+    def path_check(self):
+        """
+        check whether input path is valid
+        """
+        PathManager.check_input_directory_path(self.collection_path)
+
+    def produce_advice(self, advice: str):
+        """
+        produce data for input mode and advice
+        """
+        self.advice_check(advice)
+        return self.ADVICE_LIB.get(advice)(self.collection_path).run()
\ No newline at end of file
diff --git a/profiler/advisor/advisor_backend/common_func_advisor/constant.py b/profiler/advisor/advisor_backend/common_func_advisor/constant.py
index 9cf2080acf8..4cdfb908f88 100644
--- a/profiler/advisor/advisor_backend/common_func_advisor/constant.py
+++ b/profiler/advisor/advisor_backend/common_func_advisor/constant.py
@@ -12,6 +12,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from enum import Enum
 
 class Constant:
     MAX_INPUT_MODE_LEN = 30
@@ -26,8 +27,59 @@ class Constant:
     SLOW_RANK = "slow rank"
     SLOW_LINK = "slow link"
     KERNEL = "kernel"
+    
+    # compute
+    NPU_FUSED = "npu_fused"
 
     COLLECTION_PATH = "collection_path"
     CLUSTER_ANALYSIS_OUTPUT = "cluster_analysis_output"
     CLUSTER_STEP_TIME_CSV = "cluster_step_trace_time.csv"
     CLUSTER_COMM_JSON = "cluster_communication.json"
+    
+    DTYPE_SIZE_MAP = {"int8": 1, "uint8": 1,
+                      "int16": 2, "uint16": 2,
+                      "int32": 4, "uint32": 4,
+                      "int64": 8, "uint64": 8,
+                      "float16": 2,
+                      "bfloat16": 2,
+                      "bf16": 2,
+                      "dt_bf16": 2,
+                      "float32": 4,
+                      "float": 4,
+                      "float64": 8,
+                      "complex64": 8,
+                      "complex128": 16,
+                      "bool": 1}
+    TP_THRESHOLD = 1150
+    # pattern_dict key: pattern, value: pattern name
+    PATTERN_DICT = {("Add", "DropOutDoMask", "Add"): "bias_dropout_add",
+                    ("BatchMatMul", "Mul", "Cast", "Mul", "MaskedFill", "SoftmaxV2", "Cast", "DropOutDoMask",
+                     "AsStrided", "BatchMatMul", "Transpose"): "FA",
+                    ("Transpose", "Transpose", "Transpose", "Mul", "Transpose", "BatchMatMulV2", "MaskedFill",
+                     "Cast", "SoftmaxV2", "Cast", "DropOutDoMask", "BatchMatMulV2", "Transpose"): "FA",
+                    ("Transpose", "BatchMatMulV2", "Transpose", "Transpose", "BatchMatMulV2", "ZerosLike",
+                     "DropOutDoMask", "Cast", "SoftmaxGrad", "Cast", "MaskedFill", "BatchMatMulV2",
+                     "BatchMatMulV2", "Mul"): "FA",
+                    ("Cast", "Square", "ReduceMeanD", "Add", "Rsqrt", "Cast", "Cast", "Mul", "Cast", "Cast",
+                     "Mul", "Cast"): "RMSNORM",
+                    ("Cast", "LayerNorm", "Cast"): "LayerNorm",
+                    ("Add", "LayerNorm"): "AddLayerNorm",
+                    ("Add", "LayerNormV3"): "AddLayerNorm",
+                    ("Gelu", "Add"): "GeluAdd",
+                    ("Cast", "Square", "MemSet", "ReduceMean", "Add", "Rsqrt", "Mul", "Cast", "Mul"): "RMSNorm",
+                    ("BatchMatMul", "RealDiv", "Add", "Maximum", "SoftmaxV2", "Cast", "BatchMatMul"): "FA",
+                    ("BatchMatMulV2", "RealDiv", "Add", "Cast", "Maximum", "Cast", "SoftmaxV2", "AsStrided",
+                     "BatchMatMulV2"): "FA",
+                    ("BatchMatMulV2", "RealDiv", "Add", "Cast", "SoftmaxV2", "Cast", "BroadcastTo",
+                     "BatchMatMulV2"): "FA",
+                    ("Mul", "Slice", "Neg", "Slice", "ConcatD", "Cast", "Mul", "Add"): "RotaryMul",
+                    ("Mul", "AsStrided", "Neg", "AsStrided", "ConcatD", "Mul", "Add"): "RotaryMul",
+                    ("Mul", "Slice", "Neg", "Slice", "ConcatD", "Mul", "Add"): "RotaryMul",
+                    ("MatMulV2", "Swish", "MatMulV2", "Mul", "MatMulV2"): "FFN",
+                    ("Transpose", "Transpose", "GatherElement", "Transpose"): "GatherElement"}
+    
+class PerfColor(Enum):
+    WHITE = 0
+    GREEN = 1
+    YELLOW = 2
+    RED = 3
diff --git a/profiler/advisor/advisor_backend/common_func_advisor/utils.py b/profiler/advisor/advisor_backend/common_func_advisor/utils.py
new file mode 100644
index 00000000000..c1fe8e007b5
--- /dev/null
+++ b/profiler/advisor/advisor_backend/common_func_advisor/utils.py
@@ -0,0 +1,77 @@
+# Copyright (c) 2023, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import functools
+import os
+
+from .constant import Constant
+
+
+class Utils:
+    @staticmethod
+    def get_dtype_size(dtype_str):
+        """
+        Returns the size of the data type in bytes.
+        """
+        return Constant.DTYPE_SIZE_MAP.get(dtype_str.lower(), 0)
+
+    @staticmethod
+    def get_element_count(shape):
+        """
+        Returns the number of elements of the given data type.
+        """
+        return functools.reduce(lambda x, y: int(x) * int(y), shape)
+
+    @staticmethod
+    def shape_str_to_tuple(ori_str):
+        if not isinstance(ori_str, str):
+            return []
+        s = ori_str.strip('"')
+        s = s.strip(';')
+        if not s:
+            return []
+        pairs = s.split(';')
+        tuple_result = []
+        for pair in pairs:
+            pair = pair.strip(";")
+            elements = pair.split(',')
+            t = tuple(int(e) if "" != e else 0 for e in elements)
+            tuple_result.append(t)
+        return tuple(tuple_result)
+
+    @staticmethod
+    def dtype_str_to_tuple(ori_str):
+
+        if not isinstance(ori_str, str):
+            return []
+
+        s = ori_str.strip('"')
+        s = s.strip(';')
+
+        if not s:
+            return []
+
+        pairs = s.split(';')
+        return tuple(pairs)
+
+
+    @staticmethod
+    def str_to_float(s):
+        if not isinstance(s, str):
+            return 0.0
+        try:
+            return float(s)
+        except ValueError:
+            return 0.0
diff --git a/profiler/advisor/advisor_backend/compute_advice/__init__.py b/profiler/advisor/advisor_backend/compute_advice/__init__.py
new file mode 100644
index 00000000000..8400fd5ecd1
--- /dev/null
+++ b/profiler/advisor/advisor_backend/compute_advice/__init__.py
@@ -0,0 +1,14 @@
+# Copyright (c) 2023, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/profiler/advisor/advisor_backend/compute_advice/compute_advice_base.py b/profiler/advisor/advisor_backend/compute_advice/compute_advice_base.py
new file mode 100644
index 00000000000..25b48d1b48c
--- /dev/null
+++ b/profiler/advisor/advisor_backend/compute_advice/compute_advice_base.py
@@ -0,0 +1,68 @@
+# Copyright (c) 2023, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from advice_base import AdviceBase
+from abc import abstractmethod
+import os
+import csv
+from collections import defaultdict
+
+class ComputeAdviceBase(AdviceBase):
+    def __init__(self, collection_path: str):
+        super().__init__(collection_path)
+        self.kernel_details_path = ""
+        self.has_preparse = False
+        self.preparse_data = defaultdict(list)
+
+    def path_check(self):
+        """
+        check whether input path is valid
+        """
+        if not os.path.exists(self.collection_path):
+            print("[ERROR] Path: {} is not exist.".format(self.collection_path))
+            return False
+        if os.path.isdir(self.collection_path) and self.collection_path.endswith("ascend_pt"):
+            self.kernel_details_path = os.path.join(self.collection_path, "ASCEND_PROFILER_OUTPUT", "kernel_details.csv")
+            if not os.path.exists(self.kernel_details_path):
+                print("[ERROR] trace_view.json is not exist in the Path: {}.".format(os.path.join(self.collection_path, "ASCEND_PROFILER_OUTPUT")))
+                return False
+        elif os.path.isfile(self.collection_path) and os.path.basename(self.collection_path).endswith(".csv"):
+            self.kernel_details_path = self.collection_path
+        else:
+            print("[ERROR] Please input ascend_pt or kernel_details.csv")
+            return False
+        print("[INFO] Start to analyse the target file: {}".format(self.kernel_details_path))
+        self.preparse()
+        return True
+
+    @abstractmethod
+    def run(self):
+        """
+        analyze profiling data and advice
+        """
+
+    @abstractmethod
+    def output(self):
+        """
+        output relevant data
+        """
+        self.output_format_data[self.DATA] = self.cur_data
+        self.output_format_data[self.BOTTLENECK] = self.cur_bottleneck
+        self.output_format_data[self.ADVICE] = self.cur_advice
+
+    def preparse(self):
+        if self.has_preparse:
+            return
+        
diff --git a/profiler/advisor/advisor_backend/compute_advice/npu_fused/__init__.py b/profiler/advisor/advisor_backend/compute_advice/npu_fused/__init__.py
new file mode 100644
index 00000000000..8400fd5ecd1
--- /dev/null
+++ b/profiler/advisor/advisor_backend/compute_advice/npu_fused/__init__.py
@@ -0,0 +1,14 @@
+# Copyright (c) 2023, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/profiler/advisor/advisor_backend/compute_advice/npu_fused/analyser.py b/profiler/advisor/advisor_backend/compute_advice/npu_fused/analyser.py
new file mode 100644
index 00000000000..c43a7b42080
--- /dev/null
+++ b/profiler/advisor/advisor_backend/compute_advice/npu_fused/analyser.py
@@ -0,0 +1,80 @@
+# Copyright (c) 2023, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pandas as pd
+import multiprocessing
+
+from .op_perf import OpPerfFactory
+from common_func_advisor.constant import Constant
+
+class Analyser:
+    def __init__(self, path) -> None:
+        self._path = path
+        
+    def process(self):
+        df = pd.read_csv(self._path)
+        pool = multiprocessing.Pool(multiprocessing.cpu_count())
+        # 数据预解析
+        result = pool.map(self.update_op_row, df.iterrows())
+        pool.close()
+
+        preparse_df = pd.DataFrame(result)
+        # 分析是否存在可融合的算子
+        op_type_list = preparse_df["Type"].tolist()
+        duration_list = preparse_df["Duration(us)"].tolist()
+        result_list = []
+        for pattern in Constant.PATTERN_DICT.keys():
+            result_list.extend(self.find_all_sub_lists(op_type_list, duration_list, pattern))
+        data_frame = pd.DataFrame(result_list)
+        data_frame.columns = ["pattern_name", "pattern", "len", "count", "duration sum(us)", "op durations(us)",
+                              "index"]
+        return data_frame
+
+    @staticmethod
+    def update_op_row(row):
+        return OpPerfFactory.build(row[1]).update()
+
+    @staticmethod
+    def find_all_sub_lists(op_type_list, duration_list, expect_sub_list):
+        # 创建一个空字典，用来存储子列表和它们的出现次数和起始位置
+        len_sub_list = len(expect_sub_list)
+        expect_sub_list = tuple(expect_sub_list)
+        sublist_dict = {}
+        # 遍历列表，从每个位置开始，取长度为N的子列表
+        for i in range(len(op_type_list) - len_sub_list + 1):
+            sublist = tuple(op_type_list[i:i + len_sub_list])
+            if sublist != expect_sub_list:
+                continue
+            # 如果子列表已经在字典中，就增加它的出现次数，否则就初始化为1
+            if sublist in sublist_dict:
+                sublist_dict[sublist][0] += 1
+                sublist_dict[sublist][1].append(i)
+                sublist_dict[sublist][2] += sum(duration_list[i:i + len_sub_list])
+                sublist_dict[sublist][3] = [a + b for a, b in
+                                            zip(sublist_dict[sublist][3], duration_list[i:i + len_sub_list])]
+            else:
+                sublist_dict[sublist] = [1, [i], sum(duration_list[i:i + len_sub_list]),
+                                         duration_list[i:i + len_sub_list], len_sub_list]
+        # 创建一个空列表，用来存储所有重复的子列表
+        repeated_sublists = []
+        for sublist, (count, index, duration_sum, op_durations, sublist_len) in sublist_dict.items():
+            pattern_name = Constant.PATTERN_DICT.get(sublist, "unknown")
+            op_durations = [round(num, 2) for num in op_durations]
+            repeated_sublists.append([pattern_name, sublist, sublist_len, count, duration_sum, op_durations, index])
+        if len(sublist_dict) == 0:
+            pattern_name = Constant.PATTERN_DICT.get(expect_sub_list, "unknown")
+            repeated_sublists.append([pattern_name, expect_sub_list, 0, 0, 0, 0, 0])
+        # 返回所有重复的子列表
+        return repeated_sublists
\ No newline at end of file
diff --git a/profiler/advisor/advisor_backend/compute_advice/npu_fused/op_perf.py b/profiler/advisor/advisor_backend/compute_advice/npu_fused/op_perf.py
new file mode 100644
index 00000000000..160d88f6741
--- /dev/null
+++ b/profiler/advisor/advisor_backend/compute_advice/npu_fused/op_perf.py
@@ -0,0 +1,155 @@
+# Copyright (c) 2023, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Dict
+from common_func_advisor.constant import Constant, PerfColor
+from common_func_advisor.utils import Utils
+
+class OpPerfFactory:
+    @classmethod
+    def build(cls, op_row: Dict):
+        if op_row.get("Accelerator Core") in ("AI_VECTOR_CORE", "MIX_AIV"):
+            return VecOpPerf(op_row)
+        elif op_row.get("Accelerator Core") == "AI_CORE":
+            return CubeOpPerf(op_row)
+        else:
+            return OpPerf(op_row)
+        
+class OpPerf:
+    def __init__(self, op_row: Dict):
+        self.row = op_row
+        self.model_name = op_row.get("Model Name")
+        self.model_id = op_row.get("Model ID")
+        self.task_id = op_row.get("Task ID")
+        self.stream_id = op_row.get("Stream ID")
+        self.infer_id = op_row.get("Infer ID")
+        self.op_name = op_row.get("Name")
+        self.op_type = op_row.get("Type")
+        self.task_type = op_row.get("Accelerator Core")
+        self.task_start_time = op_row.get("Start Time(us)")
+        self.task_duration = op_row.get("Duration(us)")
+        self.task_wait_time = op_row.get("Wait Time(us)")
+        self.block_dim = op_row.get("Block Dim")
+        self.mix_block_dim = op_row.get("Mix Block Dim")
+
+        self.hf32_eligible = op_row.get("HF32 Eligible")
+        self.input_shapes = op_row.get("Input Shapes")
+        self.input_data_types = op_row.get("Input Data Types")
+        self.input_formats = op_row.get("Input Formats")
+        self.output_shapes = op_row.get("Output Shapes")
+        self.output_data_types = op_row.get("Output Data Types")
+        self.output_formats = op_row.get("Output Formats")
+        self.context_id = op_row.get("Context ID")
+        self.aicore_time = op_row.get("aicore_time(us)")
+        self.aic_total_cycles = op_row.get("aic_total_cycles")
+
+        self.aic_mac_time = op_row.get("aic_mac_time(us)")
+        self.aic_mac_ratio = op_row.get("aic_mac_ratio")
+        self.aic_scalar_time = op_row.get("aic_scalar_time(us)")
+        self.aic_scalar_ratio = op_row.get("aic_scalar_ratio")
+        self.aic_mte1_time = op_row.get("aic_mte1_time(us)")
+        self.aic_mte1_ratio = op_row.get("aic_mte1_ratio")
+        self.aic_mte2_time = op_row.get("aic_mte2_time(us)")
+        self.aic_mte2_ratio = op_row.get("aic_mte2_ratio")
+        self.aic_fixpipe_time = op_row.get("aic_fixpipe_time(us)")
+        self.aic_fixpipe_ratio = op_row.get("aic_fixpipe_ratio")
+        self.aic_icache_miss_rate = op_row.get("aic_icache_miss_rate")
+        self.aiv_time = op_row.get("aiv_time(us)")
+        self.aiv_total_cycles = op_row.get("aiv_total_cycles")
+        self.aiv_vec_time = op_row.get("aiv_vec_time(us)")
+        self.aiv_vec_ratio = op_row.get("aiv_vec_ratio")
+        self.aiv_scalar_time = op_row.get("aiv_scalar_time(us)")
+        self.aiv_scalar_ratio = op_row.get("aiv_scalar_ratio")
+        self.aiv_mte2_time = op_row.get("aiv_mte2_time(us)")
+
+        self.aiv_mte2_ratio = op_row.get("aiv_mte2_ratio")
+        self.aiv_mte3_time = op_row.get("aiv_mte3_time(us)")
+        self.aiv_mte3_ratio = op_row.get("aiv_mte3_ratio")
+        self.aiv_icache_miss_rate = op_row.get("aiv_icache_miss_rate")
+        self.cube_utilization = op_row.get("cube_utilization( %)")
+
+    def get_mac_ratio(self):
+        return self.aic_mac_ratio
+
+    def get_size(self, shapes_str, dtypes_str):
+        shapes = Utils.shape_str_to_tuple(shapes_str)
+        dtypes = Utils.dtype_str_to_tuple(dtypes_str)
+
+        if len(shapes) > len(dtypes):
+            return 0
+        if len(shapes) < len(dtypes):
+            shapes = list(shapes)
+            shapes.extend([(1,)] * (len(dtypes) - len(shapes)))
+        all_size = 0
+        for index, shape in enumerate(shapes):
+            element_count = Utils.get_element_count(shape)
+            dtype_size = Utils.get_dtype_size(dtypes[index])
+            all_size += element_count * dtype_size
+        return all_size
+
+    def get_calc_size(self):
+        intput_size = self.get_size(self.input_shapes, self.input_data_types)
+        output_size = self.get_size(self.output_shapes, self.output_data_types)
+        return (intput_size + output_size) / 1e6
+
+    def get_throughput(self):
+        return self.get_calc_size() / self.task_duration * 1e3
+
+    def is_vector_bound(self):
+        return Utils.str_to_float(self.aiv_vec_ratio) > 0.8
+
+    def is_mte_bound(self):
+        return Utils.str_to_float(self.aiv_mte2_ratio) + Utils.str_to_float(self.aiv_mte3_ratio) > 0.8
+
+    def get_perf_color(self):
+        return PerfColor.WHITE
+
+    def update(self):
+        # 创建一个OpPerf对象，传入row作为参数
+        try:
+            # 更新row的值，调用OpPerf类的方法
+            self.row["size(MB)"] = self.get_calc_size()
+            self.row["throughput(GB/s)"] = self.get_throughput()
+            self.row["color"] = self.get_perf_color().name
+        except Exception as e:
+            print(f"[ERROR]分析失败: {self.row.get('Op Name')} \n失败详情:\n", e, flush=True)
+        finally:
+            return self.row
+
+
+class VecOpPerf(OpPerf):
+    def get_perf_color(self) -> PerfColor:
+        throughput = self.get_throughput()
+        op_duration = self.task_duration
+        # throughput小于阈值的一半且算子耗时大于20us红色
+        if throughput < Constant.TP_THRESHOLD / 2 and op_duration > 20:
+            return PerfColor.RED
+        elif Constant.TP_THRESHOLD > throughput > Constant.TP_THRESHOLD / 2:
+            return PerfColor.YELLOW
+        else:
+            return PerfColor.GREEN
+
+
+class CubeOpPerf(OpPerf):
+    def get_perf_color(self) -> PerfColor:
+        aic_mac_ratio = self.get_mac_ratio()
+        if not aic_mac_ratio:
+            return PerfColor.WHITE
+        elif aic_mac_ratio < 0.6:
+            return PerfColor.RED
+        elif 0.6 <= aic_mac_ratio < 0.8:
+            return PerfColor.YELLOW
+        else:
+            return PerfColor.GREEN
\ No newline at end of file
diff --git a/profiler/advisor/advisor_backend/compute_advice/npu_fused_advice.py b/profiler/advisor/advisor_backend/compute_advice/npu_fused_advice.py
new file mode 100644
index 00000000000..f5ed851d31c
--- /dev/null
+++ b/profiler/advisor/advisor_backend/compute_advice/npu_fused_advice.py
@@ -0,0 +1,49 @@
+# Copyright (c) 2023, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from compute_advice.compute_advice_base import ComputeAdviceBase
+from compute_advice.npu_fused.analyser import Analyser
+
+class NpuFusedAdvice(ComputeAdviceBase):
+    def __init__(self, collection_path: str):
+        super().__init__(collection_path)
+        self.cur_data = dict()
+        self.cur_bottleneck = str()
+        self.cur_advice = str()
+
+    def run(self):
+        if not self.path_check():
+            return self.output_format_data
+        self.process()
+        self.output()
+        return self.output_format_data
+
+    def process(self):
+        analyser = Analyser(self.collection_path)
+        self.cur_data = analyser.process()
+        filter_data = self.cur_data[self.cur_data["duration sum(us)"] > 0]
+        op_num = len(filter_data.index)
+        op_dur = filter_data["duration sum(us)"].sum()
+        self.cur_advice = "Advice:\n"
+        if op_num > 0:
+            index = 0
+            self.cur_bottleneck = f"You can choose NPUFused op to replace the current ops to reduce about {op_dur} ms."
+            for _, row in filter_data.iterrows():
+                cur_op = "[" + ", ".join(row.loc["pattern"]) + "]"
+                npu_fused_op = row.loc["pattern_name"]
+                self.cur_advice += f"Replace {cur_op} with {npu_fused_op}."
+                if index != op_num - 1:
+                    self.cur_advice += "\n"
+                index += 1
diff --git a/profiler/advisor/advisor_backend/interface.py b/profiler/advisor/advisor_backend/interface.py
index e4d3bf5639d..159a364a521 100644
--- a/profiler/advisor/advisor_backend/interface.py
+++ b/profiler/advisor/advisor_backend/interface.py
@@ -21,6 +21,7 @@ sys.path.append(
     os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))), "cluster_analyse"))
 from common_func_advisor.constant import Constant
 from advisor_backend.advice_factory.cluster_advice_factory import ClusterAdviceFactory
+from advisor_backend.advice_factory.compute_advice_factory import ComputeAdviceFactory
 
 
 class Interface:
@@ -38,7 +39,8 @@ class Interface:
 
 class FactoryController:
     FACTORY_LIB = {
-        Constant.CLUSTER: ClusterAdviceFactory
+        Constant.CLUSTER: ClusterAdviceFactory,
+        Constant.COMPUTE: ComputeAdviceFactory
     }
 
     def __init__(self, collection_path: str):
diff --git a/profiler/advisor/compute_perf_analysis.ipynb b/profiler/advisor/compute_perf_analysis.ipynb
new file mode 100644
index 00000000000..697005ba7a7
--- /dev/null
+++ b/profiler/advisor/compute_perf_analysis.ipynb
@@ -0,0 +1,120 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from advisor_backend.interface import Interface\n",
+    "import numpy as np"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 算子调优分析\n",
+    "## 1. 算子分析的数据准备\n",
+    "当前算子分析工具支持分析Ascend Pyorch Profiler方式生成的ascend_pt目录以及ascend_pt/ASCEND_PROFILER_OUTPUT/kernek_details.csv文件\n",
+    "## 2. 算子分析解决的问题\n",
+    "当前支持分析模型中存在可融合的小算子，并给出优化建议。"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[INFO] Start to analyse the target file: C:\\Users\\h00848544\\Desktop\\qkd\\ascend_pt\\ASCEND_PROFILER_OUTPUT\\kernel_details.csv\n",
+      "        pattern_name                                            pattern  len  \\\n",
+      "0   bias_dropout_add                          (Add, DropOutDoMask, Add)    3   \n",
+      "1                 FA  (BatchMatMul, Mul, Cast, Mul, MaskedFill, Soft...    0   \n",
+      "2                 FA  (Transpose, Transpose, Transpose, Mul, Transpo...    0   \n",
+      "3                 FA  (Transpose, BatchMatMulV2, Transpose, Transpos...    0   \n",
+      "4            RMSNORM  (Cast, Square, ReduceMeanD, Add, Rsqrt, Cast, ...    0   \n",
+      "5          LayerNorm                            (Cast, LayerNorm, Cast)    0   \n",
+      "6       AddLayerNorm                                   (Add, LayerNorm)    0   \n",
+      "7       AddLayerNorm                                 (Add, LayerNormV3)    2   \n",
+      "8            GeluAdd                                        (Gelu, Add)    0   \n",
+      "9            RMSNorm  (Cast, Square, MemSet, ReduceMean, Add, Rsqrt,...    0   \n",
+      "10                FA  (BatchMatMul, RealDiv, Add, Maximum, SoftmaxV2...    0   \n",
+      "11                FA  (BatchMatMulV2, RealDiv, Add, Cast, Maximum, C...    0   \n",
+      "12                FA  (BatchMatMulV2, RealDiv, Add, Cast, SoftmaxV2,...    0   \n",
+      "13         RotaryMul  (Mul, Slice, Neg, Slice, ConcatD, Cast, Mul, Add)    0   \n",
+      "14         RotaryMul  (Mul, AsStrided, Neg, AsStrided, ConcatD, Mul,...    0   \n",
+      "15         RotaryMul        (Mul, Slice, Neg, Slice, ConcatD, Mul, Add)    0   \n",
+      "16               FFN         (MatMulV2, Swish, MatMulV2, Mul, MatMulV2)    0   \n",
+      "17     GatherElement   (Transpose, Transpose, GatherElement, Transpose)    0   \n",
+      "\n",
+      "    count  duration sum(us)          op durations(us)              index  \n",
+      "0       4           2178.16  [839.64, 464.04, 874.48]   [52, 64, 87, 99]  \n",
+      "1       0              0.00                         0                  0  \n",
+      "2       0              0.00                         0                  0  \n",
+      "3       0              0.00                         0                  0  \n",
+      "4       0              0.00                         0                  0  \n",
+      "5       0              0.00                         0                  0  \n",
+      "6       0              0.00                         0                  0  \n",
+      "7       4           2154.98          [874.48, 1280.5]  [54, 66, 89, 101]  \n",
+      "8       0              0.00                         0                  0  \n",
+      "9       0              0.00                         0                  0  \n",
+      "10      0              0.00                         0                  0  \n",
+      "11      0              0.00                         0                  0  \n",
+      "12      0              0.00                         0                  0  \n",
+      "13      0              0.00                         0                  0  \n",
+      "14      0              0.00                         0                  0  \n",
+      "15      0              0.00                         0                  0  \n",
+      "16      0              0.00                         0                  0  \n",
+      "17      0              0.00                         0                  0  \n",
+      "You can choose NPUFused op to replace the current ops to reduce about 4333.139999999999 ms.\n",
+      "Advice:\n",
+      "Replace [Add, DropOutDoMask, Add] with bias_dropout_add.\n",
+      "Replace [Add, LayerNormV3] with AddLayerNorm.\n"
+     ]
+    }
+   ],
+   "source": [
+    "# EDIT THE PROFILING DATA PATH\n",
+    "compute_path = \"C:\\\\Users\\\\h00848544\\\\Desktop\\\\qkd\\\\ascend_pt\\\\ASCEND_PROFILER_OUTPUT\\\\kernel_details.csv\"\n",
+    "interface = Interface(compute_path)\n",
+    "data = interface.get_data('compute', 'npu_fused')\n",
+    "\n",
+    "print(data['data'])\n",
+    "print(data['bottleneck'])\n",
+    "print(data['advice'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "qkd",
+   "language": "python",
+   "name": "qkd"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.18"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
-- 
Gitee