From cc3d23e2c24d3a8058b83f8b39e606de37bb5d1b Mon Sep 17 00:00:00 2001 From: kdaqiu Date: Fri, 1 Dec 2023 16:57:44 +0800 Subject: [PATCH] Add Compute Advice. --- .../advice_factory/compute_advice_factory.py | 40 +++++ .../common_func_advisor/constant.py | 52 ++++++ .../common_func_advisor/utils.py | 77 +++++++++ .../compute_advice/__init__.py | 14 ++ .../compute_advice/compute_advice_base.py | 68 ++++++++ .../compute_advice/npu_fused/__init__.py | 14 ++ .../compute_advice/npu_fused/analyser.py | 80 +++++++++ .../compute_advice/npu_fused/op_perf.py | 155 ++++++++++++++++++ .../compute_advice/npu_fused_advice.py | 49 ++++++ profiler/advisor/advisor_backend/interface.py | 4 +- profiler/advisor/compute_perf_analysis.ipynb | 120 ++++++++++++++ 11 files changed, 672 insertions(+), 1 deletion(-) create mode 100644 profiler/advisor/advisor_backend/advice_factory/compute_advice_factory.py create mode 100644 profiler/advisor/advisor_backend/common_func_advisor/utils.py create mode 100644 profiler/advisor/advisor_backend/compute_advice/__init__.py create mode 100644 profiler/advisor/advisor_backend/compute_advice/compute_advice_base.py create mode 100644 profiler/advisor/advisor_backend/compute_advice/npu_fused/__init__.py create mode 100644 profiler/advisor/advisor_backend/compute_advice/npu_fused/analyser.py create mode 100644 profiler/advisor/advisor_backend/compute_advice/npu_fused/op_perf.py create mode 100644 profiler/advisor/advisor_backend/compute_advice/npu_fused_advice.py create mode 100644 profiler/advisor/compute_perf_analysis.ipynb diff --git a/profiler/advisor/advisor_backend/advice_factory/compute_advice_factory.py b/profiler/advisor/advisor_backend/advice_factory/compute_advice_factory.py new file mode 100644 index 00000000000..88e50d660d0 --- /dev/null +++ b/profiler/advisor/advisor_backend/advice_factory/compute_advice_factory.py @@ -0,0 +1,40 @@ +# Copyright (c) 2023, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from common_func.path_manager import PathManager +from common_func_advisor.constant import Constant +from advice_factory.advice_factory import AdviceFactory +from compute_advice.npu_fused_advice import NpuFusedAdvice + + +class ComputeAdviceFactory(AdviceFactory): + ADVICE_LIB = { + Constant.NPU_FUSED: NpuFusedAdvice, + } + + def __init__(self, collection_path: str): + super().__init__(collection_path) + + def path_check(self): + """ + check whether input path is valid + """ + PathManager.check_input_directory_path(self.collection_path) + + def produce_advice(self, advice: str): + """ + produce data for input mode and advice + """ + self.advice_check(advice) + return self.ADVICE_LIB.get(advice)(self.collection_path).run() \ No newline at end of file diff --git a/profiler/advisor/advisor_backend/common_func_advisor/constant.py b/profiler/advisor/advisor_backend/common_func_advisor/constant.py index 9cf2080acf8..4cdfb908f88 100644 --- a/profiler/advisor/advisor_backend/common_func_advisor/constant.py +++ b/profiler/advisor/advisor_backend/common_func_advisor/constant.py @@ -12,6 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from enum import Enum class Constant: MAX_INPUT_MODE_LEN = 30 @@ -26,8 +27,59 @@ class Constant: SLOW_RANK = "slow rank" SLOW_LINK = "slow link" KERNEL = "kernel" + + # compute + NPU_FUSED = "npu_fused" COLLECTION_PATH = "collection_path" CLUSTER_ANALYSIS_OUTPUT = "cluster_analysis_output" CLUSTER_STEP_TIME_CSV = "cluster_step_trace_time.csv" CLUSTER_COMM_JSON = "cluster_communication.json" + + DTYPE_SIZE_MAP = {"int8": 1, "uint8": 1, + "int16": 2, "uint16": 2, + "int32": 4, "uint32": 4, + "int64": 8, "uint64": 8, + "float16": 2, + "bfloat16": 2, + "bf16": 2, + "dt_bf16": 2, + "float32": 4, + "float": 4, + "float64": 8, + "complex64": 8, + "complex128": 16, + "bool": 1} + TP_THRESHOLD = 1150 + # pattern_dict key: pattern, value: pattern name + PATTERN_DICT = {("Add", "DropOutDoMask", "Add"): "bias_dropout_add", + ("BatchMatMul", "Mul", "Cast", "Mul", "MaskedFill", "SoftmaxV2", "Cast", "DropOutDoMask", + "AsStrided", "BatchMatMul", "Transpose"): "FA", + ("Transpose", "Transpose", "Transpose", "Mul", "Transpose", "BatchMatMulV2", "MaskedFill", + "Cast", "SoftmaxV2", "Cast", "DropOutDoMask", "BatchMatMulV2", "Transpose"): "FA", + ("Transpose", "BatchMatMulV2", "Transpose", "Transpose", "BatchMatMulV2", "ZerosLike", + "DropOutDoMask", "Cast", "SoftmaxGrad", "Cast", "MaskedFill", "BatchMatMulV2", + "BatchMatMulV2", "Mul"): "FA", + ("Cast", "Square", "ReduceMeanD", "Add", "Rsqrt", "Cast", "Cast", "Mul", "Cast", "Cast", + "Mul", "Cast"): "RMSNORM", + ("Cast", "LayerNorm", "Cast"): "LayerNorm", + ("Add", "LayerNorm"): "AddLayerNorm", + ("Add", "LayerNormV3"): "AddLayerNorm", + ("Gelu", "Add"): "GeluAdd", + ("Cast", "Square", "MemSet", "ReduceMean", "Add", "Rsqrt", "Mul", "Cast", "Mul"): "RMSNorm", + ("BatchMatMul", "RealDiv", "Add", "Maximum", "SoftmaxV2", "Cast", "BatchMatMul"): "FA", + ("BatchMatMulV2", "RealDiv", "Add", "Cast", "Maximum", "Cast", "SoftmaxV2", "AsStrided", + "BatchMatMulV2"): "FA", + ("BatchMatMulV2", "RealDiv", "Add", "Cast", "SoftmaxV2", "Cast", "BroadcastTo", + "BatchMatMulV2"): "FA", + ("Mul", "Slice", "Neg", "Slice", "ConcatD", "Cast", "Mul", "Add"): "RotaryMul", + ("Mul", "AsStrided", "Neg", "AsStrided", "ConcatD", "Mul", "Add"): "RotaryMul", + ("Mul", "Slice", "Neg", "Slice", "ConcatD", "Mul", "Add"): "RotaryMul", + ("MatMulV2", "Swish", "MatMulV2", "Mul", "MatMulV2"): "FFN", + ("Transpose", "Transpose", "GatherElement", "Transpose"): "GatherElement"} + +class PerfColor(Enum): + WHITE = 0 + GREEN = 1 + YELLOW = 2 + RED = 3 diff --git a/profiler/advisor/advisor_backend/common_func_advisor/utils.py b/profiler/advisor/advisor_backend/common_func_advisor/utils.py new file mode 100644 index 00000000000..c1fe8e007b5 --- /dev/null +++ b/profiler/advisor/advisor_backend/common_func_advisor/utils.py @@ -0,0 +1,77 @@ +# Copyright (c) 2023, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import functools +import os + +from .constant import Constant + + +class Utils: + @staticmethod + def get_dtype_size(dtype_str): + """ + Returns the size of the data type in bytes. + """ + return Constant.DTYPE_SIZE_MAP.get(dtype_str.lower(), 0) + + @staticmethod + def get_element_count(shape): + """ + Returns the number of elements of the given data type. + """ + return functools.reduce(lambda x, y: int(x) * int(y), shape) + + @staticmethod + def shape_str_to_tuple(ori_str): + if not isinstance(ori_str, str): + return [] + s = ori_str.strip('"') + s = s.strip(';') + if not s: + return [] + pairs = s.split(';') + tuple_result = [] + for pair in pairs: + pair = pair.strip(";") + elements = pair.split(',') + t = tuple(int(e) if "" != e else 0 for e in elements) + tuple_result.append(t) + return tuple(tuple_result) + + @staticmethod + def dtype_str_to_tuple(ori_str): + + if not isinstance(ori_str, str): + return [] + + s = ori_str.strip('"') + s = s.strip(';') + + if not s: + return [] + + pairs = s.split(';') + return tuple(pairs) + + + @staticmethod + def str_to_float(s): + if not isinstance(s, str): + return 0.0 + try: + return float(s) + except ValueError: + return 0.0 diff --git a/profiler/advisor/advisor_backend/compute_advice/__init__.py b/profiler/advisor/advisor_backend/compute_advice/__init__.py new file mode 100644 index 00000000000..8400fd5ecd1 --- /dev/null +++ b/profiler/advisor/advisor_backend/compute_advice/__init__.py @@ -0,0 +1,14 @@ +# Copyright (c) 2023, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/profiler/advisor/advisor_backend/compute_advice/compute_advice_base.py b/profiler/advisor/advisor_backend/compute_advice/compute_advice_base.py new file mode 100644 index 00000000000..25b48d1b48c --- /dev/null +++ b/profiler/advisor/advisor_backend/compute_advice/compute_advice_base.py @@ -0,0 +1,68 @@ +# Copyright (c) 2023, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from advice_base import AdviceBase +from abc import abstractmethod +import os +import csv +from collections import defaultdict + +class ComputeAdviceBase(AdviceBase): + def __init__(self, collection_path: str): + super().__init__(collection_path) + self.kernel_details_path = "" + self.has_preparse = False + self.preparse_data = defaultdict(list) + + def path_check(self): + """ + check whether input path is valid + """ + if not os.path.exists(self.collection_path): + print("[ERROR] Path: {} is not exist.".format(self.collection_path)) + return False + if os.path.isdir(self.collection_path) and self.collection_path.endswith("ascend_pt"): + self.kernel_details_path = os.path.join(self.collection_path, "ASCEND_PROFILER_OUTPUT", "kernel_details.csv") + if not os.path.exists(self.kernel_details_path): + print("[ERROR] trace_view.json is not exist in the Path: {}.".format(os.path.join(self.collection_path, "ASCEND_PROFILER_OUTPUT"))) + return False + elif os.path.isfile(self.collection_path) and os.path.basename(self.collection_path).endswith(".csv"): + self.kernel_details_path = self.collection_path + else: + print("[ERROR] Please input ascend_pt or kernel_details.csv") + return False + print("[INFO] Start to analyse the target file: {}".format(self.kernel_details_path)) + self.preparse() + return True + + @abstractmethod + def run(self): + """ + analyze profiling data and advice + """ + + @abstractmethod + def output(self): + """ + output relevant data + """ + self.output_format_data[self.DATA] = self.cur_data + self.output_format_data[self.BOTTLENECK] = self.cur_bottleneck + self.output_format_data[self.ADVICE] = self.cur_advice + + def preparse(self): + if self.has_preparse: + return + diff --git a/profiler/advisor/advisor_backend/compute_advice/npu_fused/__init__.py b/profiler/advisor/advisor_backend/compute_advice/npu_fused/__init__.py new file mode 100644 index 00000000000..8400fd5ecd1 --- /dev/null +++ b/profiler/advisor/advisor_backend/compute_advice/npu_fused/__init__.py @@ -0,0 +1,14 @@ +# Copyright (c) 2023, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/profiler/advisor/advisor_backend/compute_advice/npu_fused/analyser.py b/profiler/advisor/advisor_backend/compute_advice/npu_fused/analyser.py new file mode 100644 index 00000000000..c43a7b42080 --- /dev/null +++ b/profiler/advisor/advisor_backend/compute_advice/npu_fused/analyser.py @@ -0,0 +1,80 @@ +# Copyright (c) 2023, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pandas as pd +import multiprocessing + +from .op_perf import OpPerfFactory +from common_func_advisor.constant import Constant + +class Analyser: + def __init__(self, path) -> None: + self._path = path + + def process(self): + df = pd.read_csv(self._path) + pool = multiprocessing.Pool(multiprocessing.cpu_count()) + # 数据预解析 + result = pool.map(self.update_op_row, df.iterrows()) + pool.close() + + preparse_df = pd.DataFrame(result) + # 分析是否存在可融合的算子 + op_type_list = preparse_df["Type"].tolist() + duration_list = preparse_df["Duration(us)"].tolist() + result_list = [] + for pattern in Constant.PATTERN_DICT.keys(): + result_list.extend(self.find_all_sub_lists(op_type_list, duration_list, pattern)) + data_frame = pd.DataFrame(result_list) + data_frame.columns = ["pattern_name", "pattern", "len", "count", "duration sum(us)", "op durations(us)", + "index"] + return data_frame + + @staticmethod + def update_op_row(row): + return OpPerfFactory.build(row[1]).update() + + @staticmethod + def find_all_sub_lists(op_type_list, duration_list, expect_sub_list): + # 创建一个空字典,用来存储子列表和它们的出现次数和起始位置 + len_sub_list = len(expect_sub_list) + expect_sub_list = tuple(expect_sub_list) + sublist_dict = {} + # 遍历列表,从每个位置开始,取长度为N的子列表 + for i in range(len(op_type_list) - len_sub_list + 1): + sublist = tuple(op_type_list[i:i + len_sub_list]) + if sublist != expect_sub_list: + continue + # 如果子列表已经在字典中,就增加它的出现次数,否则就初始化为1 + if sublist in sublist_dict: + sublist_dict[sublist][0] += 1 + sublist_dict[sublist][1].append(i) + sublist_dict[sublist][2] += sum(duration_list[i:i + len_sub_list]) + sublist_dict[sublist][3] = [a + b for a, b in + zip(sublist_dict[sublist][3], duration_list[i:i + len_sub_list])] + else: + sublist_dict[sublist] = [1, [i], sum(duration_list[i:i + len_sub_list]), + duration_list[i:i + len_sub_list], len_sub_list] + # 创建一个空列表,用来存储所有重复的子列表 + repeated_sublists = [] + for sublist, (count, index, duration_sum, op_durations, sublist_len) in sublist_dict.items(): + pattern_name = Constant.PATTERN_DICT.get(sublist, "unknown") + op_durations = [round(num, 2) for num in op_durations] + repeated_sublists.append([pattern_name, sublist, sublist_len, count, duration_sum, op_durations, index]) + if len(sublist_dict) == 0: + pattern_name = Constant.PATTERN_DICT.get(expect_sub_list, "unknown") + repeated_sublists.append([pattern_name, expect_sub_list, 0, 0, 0, 0, 0]) + # 返回所有重复的子列表 + return repeated_sublists \ No newline at end of file diff --git a/profiler/advisor/advisor_backend/compute_advice/npu_fused/op_perf.py b/profiler/advisor/advisor_backend/compute_advice/npu_fused/op_perf.py new file mode 100644 index 00000000000..160d88f6741 --- /dev/null +++ b/profiler/advisor/advisor_backend/compute_advice/npu_fused/op_perf.py @@ -0,0 +1,155 @@ +# Copyright (c) 2023, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Dict +from common_func_advisor.constant import Constant, PerfColor +from common_func_advisor.utils import Utils + +class OpPerfFactory: + @classmethod + def build(cls, op_row: Dict): + if op_row.get("Accelerator Core") in ("AI_VECTOR_CORE", "MIX_AIV"): + return VecOpPerf(op_row) + elif op_row.get("Accelerator Core") == "AI_CORE": + return CubeOpPerf(op_row) + else: + return OpPerf(op_row) + +class OpPerf: + def __init__(self, op_row: Dict): + self.row = op_row + self.model_name = op_row.get("Model Name") + self.model_id = op_row.get("Model ID") + self.task_id = op_row.get("Task ID") + self.stream_id = op_row.get("Stream ID") + self.infer_id = op_row.get("Infer ID") + self.op_name = op_row.get("Name") + self.op_type = op_row.get("Type") + self.task_type = op_row.get("Accelerator Core") + self.task_start_time = op_row.get("Start Time(us)") + self.task_duration = op_row.get("Duration(us)") + self.task_wait_time = op_row.get("Wait Time(us)") + self.block_dim = op_row.get("Block Dim") + self.mix_block_dim = op_row.get("Mix Block Dim") + + self.hf32_eligible = op_row.get("HF32 Eligible") + self.input_shapes = op_row.get("Input Shapes") + self.input_data_types = op_row.get("Input Data Types") + self.input_formats = op_row.get("Input Formats") + self.output_shapes = op_row.get("Output Shapes") + self.output_data_types = op_row.get("Output Data Types") + self.output_formats = op_row.get("Output Formats") + self.context_id = op_row.get("Context ID") + self.aicore_time = op_row.get("aicore_time(us)") + self.aic_total_cycles = op_row.get("aic_total_cycles") + + self.aic_mac_time = op_row.get("aic_mac_time(us)") + self.aic_mac_ratio = op_row.get("aic_mac_ratio") + self.aic_scalar_time = op_row.get("aic_scalar_time(us)") + self.aic_scalar_ratio = op_row.get("aic_scalar_ratio") + self.aic_mte1_time = op_row.get("aic_mte1_time(us)") + self.aic_mte1_ratio = op_row.get("aic_mte1_ratio") + self.aic_mte2_time = op_row.get("aic_mte2_time(us)") + self.aic_mte2_ratio = op_row.get("aic_mte2_ratio") + self.aic_fixpipe_time = op_row.get("aic_fixpipe_time(us)") + self.aic_fixpipe_ratio = op_row.get("aic_fixpipe_ratio") + self.aic_icache_miss_rate = op_row.get("aic_icache_miss_rate") + self.aiv_time = op_row.get("aiv_time(us)") + self.aiv_total_cycles = op_row.get("aiv_total_cycles") + self.aiv_vec_time = op_row.get("aiv_vec_time(us)") + self.aiv_vec_ratio = op_row.get("aiv_vec_ratio") + self.aiv_scalar_time = op_row.get("aiv_scalar_time(us)") + self.aiv_scalar_ratio = op_row.get("aiv_scalar_ratio") + self.aiv_mte2_time = op_row.get("aiv_mte2_time(us)") + + self.aiv_mte2_ratio = op_row.get("aiv_mte2_ratio") + self.aiv_mte3_time = op_row.get("aiv_mte3_time(us)") + self.aiv_mte3_ratio = op_row.get("aiv_mte3_ratio") + self.aiv_icache_miss_rate = op_row.get("aiv_icache_miss_rate") + self.cube_utilization = op_row.get("cube_utilization( %)") + + def get_mac_ratio(self): + return self.aic_mac_ratio + + def get_size(self, shapes_str, dtypes_str): + shapes = Utils.shape_str_to_tuple(shapes_str) + dtypes = Utils.dtype_str_to_tuple(dtypes_str) + + if len(shapes) > len(dtypes): + return 0 + if len(shapes) < len(dtypes): + shapes = list(shapes) + shapes.extend([(1,)] * (len(dtypes) - len(shapes))) + all_size = 0 + for index, shape in enumerate(shapes): + element_count = Utils.get_element_count(shape) + dtype_size = Utils.get_dtype_size(dtypes[index]) + all_size += element_count * dtype_size + return all_size + + def get_calc_size(self): + intput_size = self.get_size(self.input_shapes, self.input_data_types) + output_size = self.get_size(self.output_shapes, self.output_data_types) + return (intput_size + output_size) / 1e6 + + def get_throughput(self): + return self.get_calc_size() / self.task_duration * 1e3 + + def is_vector_bound(self): + return Utils.str_to_float(self.aiv_vec_ratio) > 0.8 + + def is_mte_bound(self): + return Utils.str_to_float(self.aiv_mte2_ratio) + Utils.str_to_float(self.aiv_mte3_ratio) > 0.8 + + def get_perf_color(self): + return PerfColor.WHITE + + def update(self): + # 创建一个OpPerf对象,传入row作为参数 + try: + # 更新row的值,调用OpPerf类的方法 + self.row["size(MB)"] = self.get_calc_size() + self.row["throughput(GB/s)"] = self.get_throughput() + self.row["color"] = self.get_perf_color().name + except Exception as e: + print(f"[ERROR]分析失败: {self.row.get('Op Name')} \n失败详情:\n", e, flush=True) + finally: + return self.row + + +class VecOpPerf(OpPerf): + def get_perf_color(self) -> PerfColor: + throughput = self.get_throughput() + op_duration = self.task_duration + # throughput小于阈值的一半且算子耗时大于20us红色 + if throughput < Constant.TP_THRESHOLD / 2 and op_duration > 20: + return PerfColor.RED + elif Constant.TP_THRESHOLD > throughput > Constant.TP_THRESHOLD / 2: + return PerfColor.YELLOW + else: + return PerfColor.GREEN + + +class CubeOpPerf(OpPerf): + def get_perf_color(self) -> PerfColor: + aic_mac_ratio = self.get_mac_ratio() + if not aic_mac_ratio: + return PerfColor.WHITE + elif aic_mac_ratio < 0.6: + return PerfColor.RED + elif 0.6 <= aic_mac_ratio < 0.8: + return PerfColor.YELLOW + else: + return PerfColor.GREEN \ No newline at end of file diff --git a/profiler/advisor/advisor_backend/compute_advice/npu_fused_advice.py b/profiler/advisor/advisor_backend/compute_advice/npu_fused_advice.py new file mode 100644 index 00000000000..f5ed851d31c --- /dev/null +++ b/profiler/advisor/advisor_backend/compute_advice/npu_fused_advice.py @@ -0,0 +1,49 @@ +# Copyright (c) 2023, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from compute_advice.compute_advice_base import ComputeAdviceBase +from compute_advice.npu_fused.analyser import Analyser + +class NpuFusedAdvice(ComputeAdviceBase): + def __init__(self, collection_path: str): + super().__init__(collection_path) + self.cur_data = dict() + self.cur_bottleneck = str() + self.cur_advice = str() + + def run(self): + if not self.path_check(): + return self.output_format_data + self.process() + self.output() + return self.output_format_data + + def process(self): + analyser = Analyser(self.collection_path) + self.cur_data = analyser.process() + filter_data = self.cur_data[self.cur_data["duration sum(us)"] > 0] + op_num = len(filter_data.index) + op_dur = filter_data["duration sum(us)"].sum() + self.cur_advice = "Advice:\n" + if op_num > 0: + index = 0 + self.cur_bottleneck = f"You can choose NPUFused op to replace the current ops to reduce about {op_dur} ms." + for _, row in filter_data.iterrows(): + cur_op = "[" + ", ".join(row.loc["pattern"]) + "]" + npu_fused_op = row.loc["pattern_name"] + self.cur_advice += f"Replace {cur_op} with {npu_fused_op}." + if index != op_num - 1: + self.cur_advice += "\n" + index += 1 diff --git a/profiler/advisor/advisor_backend/interface.py b/profiler/advisor/advisor_backend/interface.py index e4d3bf5639d..159a364a521 100644 --- a/profiler/advisor/advisor_backend/interface.py +++ b/profiler/advisor/advisor_backend/interface.py @@ -21,6 +21,7 @@ sys.path.append( os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))), "cluster_analyse")) from common_func_advisor.constant import Constant from advisor_backend.advice_factory.cluster_advice_factory import ClusterAdviceFactory +from advisor_backend.advice_factory.compute_advice_factory import ComputeAdviceFactory class Interface: @@ -38,7 +39,8 @@ class Interface: class FactoryController: FACTORY_LIB = { - Constant.CLUSTER: ClusterAdviceFactory + Constant.CLUSTER: ClusterAdviceFactory, + Constant.COMPUTE: ComputeAdviceFactory } def __init__(self, collection_path: str): diff --git a/profiler/advisor/compute_perf_analysis.ipynb b/profiler/advisor/compute_perf_analysis.ipynb new file mode 100644 index 00000000000..697005ba7a7 --- /dev/null +++ b/profiler/advisor/compute_perf_analysis.ipynb @@ -0,0 +1,120 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from advisor_backend.interface import Interface\n", + "import numpy as np" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 算子调优分析\n", + "## 1. 算子分析的数据准备\n", + "当前算子分析工具支持分析Ascend Pyorch Profiler方式生成的ascend_pt目录以及ascend_pt/ASCEND_PROFILER_OUTPUT/kernek_details.csv文件\n", + "## 2. 算子分析解决的问题\n", + "当前支持分析模型中存在可融合的小算子,并给出优化建议。" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[INFO] Start to analyse the target file: C:\\Users\\h00848544\\Desktop\\qkd\\ascend_pt\\ASCEND_PROFILER_OUTPUT\\kernel_details.csv\n", + " pattern_name pattern len \\\n", + "0 bias_dropout_add (Add, DropOutDoMask, Add) 3 \n", + "1 FA (BatchMatMul, Mul, Cast, Mul, MaskedFill, Soft... 0 \n", + "2 FA (Transpose, Transpose, Transpose, Mul, Transpo... 0 \n", + "3 FA (Transpose, BatchMatMulV2, Transpose, Transpos... 0 \n", + "4 RMSNORM (Cast, Square, ReduceMeanD, Add, Rsqrt, Cast, ... 0 \n", + "5 LayerNorm (Cast, LayerNorm, Cast) 0 \n", + "6 AddLayerNorm (Add, LayerNorm) 0 \n", + "7 AddLayerNorm (Add, LayerNormV3) 2 \n", + "8 GeluAdd (Gelu, Add) 0 \n", + "9 RMSNorm (Cast, Square, MemSet, ReduceMean, Add, Rsqrt,... 0 \n", + "10 FA (BatchMatMul, RealDiv, Add, Maximum, SoftmaxV2... 0 \n", + "11 FA (BatchMatMulV2, RealDiv, Add, Cast, Maximum, C... 0 \n", + "12 FA (BatchMatMulV2, RealDiv, Add, Cast, SoftmaxV2,... 0 \n", + "13 RotaryMul (Mul, Slice, Neg, Slice, ConcatD, Cast, Mul, Add) 0 \n", + "14 RotaryMul (Mul, AsStrided, Neg, AsStrided, ConcatD, Mul,... 0 \n", + "15 RotaryMul (Mul, Slice, Neg, Slice, ConcatD, Mul, Add) 0 \n", + "16 FFN (MatMulV2, Swish, MatMulV2, Mul, MatMulV2) 0 \n", + "17 GatherElement (Transpose, Transpose, GatherElement, Transpose) 0 \n", + "\n", + " count duration sum(us) op durations(us) index \n", + "0 4 2178.16 [839.64, 464.04, 874.48] [52, 64, 87, 99] \n", + "1 0 0.00 0 0 \n", + "2 0 0.00 0 0 \n", + "3 0 0.00 0 0 \n", + "4 0 0.00 0 0 \n", + "5 0 0.00 0 0 \n", + "6 0 0.00 0 0 \n", + "7 4 2154.98 [874.48, 1280.5] [54, 66, 89, 101] \n", + "8 0 0.00 0 0 \n", + "9 0 0.00 0 0 \n", + "10 0 0.00 0 0 \n", + "11 0 0.00 0 0 \n", + "12 0 0.00 0 0 \n", + "13 0 0.00 0 0 \n", + "14 0 0.00 0 0 \n", + "15 0 0.00 0 0 \n", + "16 0 0.00 0 0 \n", + "17 0 0.00 0 0 \n", + "You can choose NPUFused op to replace the current ops to reduce about 4333.139999999999 ms.\n", + "Advice:\n", + "Replace [Add, DropOutDoMask, Add] with bias_dropout_add.\n", + "Replace [Add, LayerNormV3] with AddLayerNorm.\n" + ] + } + ], + "source": [ + "# EDIT THE PROFILING DATA PATH\n", + "compute_path = \"C:\\\\Users\\\\h00848544\\\\Desktop\\\\qkd\\\\ascend_pt\\\\ASCEND_PROFILER_OUTPUT\\\\kernel_details.csv\"\n", + "interface = Interface(compute_path)\n", + "data = interface.get_data('compute', 'npu_fused')\n", + "\n", + "print(data['data'])\n", + "print(data['bottleneck'])\n", + "print(data['advice'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "qkd", + "language": "python", + "name": "qkd" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.18" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} -- Gitee