From af5f2bdd4c180d69760b7c1bf11e79105560c37d Mon Sep 17 00:00:00 2001 From: kongdeshuo <1670690897@qq.com> Date: Mon, 15 Jul 2024 16:47:58 +0800 Subject: [PATCH 001/106] =?UTF-8?q?=E6=A3=80=E8=A7=86=E6=84=8F=E8=A7=81?= =?UTF-8?q?=E6=89=B9=E9=87=8F=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- profiler/advisor/analyzer/base_analyzer.py | 14 ++++++++++++++ .../analyzer/cluster/slow_link_analyser.py | 18 +++++++++++------- .../analyzer/cluster/slow_rank_analyser.py | 6 +++--- .../computation/bound/block_dim_checker.py | 1 - profiler/advisor/common/timeline/event.py | 5 +++-- .../advisor/dataset/cluster/cluster_dataset.py | 14 +++++++------- .../advisor/dataset/timeline_event_dataset.py | 15 ++++++++------- profiler/advisor/display/html/render.py | 5 ++--- 8 files changed, 48 insertions(+), 30 deletions(-) diff --git a/profiler/advisor/analyzer/base_analyzer.py b/profiler/advisor/analyzer/base_analyzer.py index 5f4bd3202c..e0e17320b3 100644 --- a/profiler/advisor/analyzer/base_analyzer.py +++ b/profiler/advisor/analyzer/base_analyzer.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import logging from functools import wraps from typing import Dict, List, Union diff --git a/profiler/advisor/analyzer/cluster/slow_link_analyser.py b/profiler/advisor/analyzer/cluster/slow_link_analyser.py index 846b79a50f..0b585cbc7c 100644 --- a/profiler/advisor/analyzer/cluster/slow_link_analyser.py +++ b/profiler/advisor/analyzer/cluster/slow_link_analyser.py @@ -19,7 +19,7 @@ from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer from profiler.advisor.common import constant from profiler.advisor.result.result import OptimizeResult from profiler.advisor.result.item import OptimizeItem, OptimizeRecord -from profiler.advisor.dataset.cluster.cluster_dataset import ClusterCommunicationDataSet +from profiler.advisor.dataset.cluster.cluster_dataset import ClusterCommunicationDataset class SlowLinkAnalyzer(BaseAnalyzer): @@ -35,11 +35,11 @@ class SlowLinkAnalyzer(BaseAnalyzer): SDMA = "SDMA" RDMA = "RDMA" SLOW_LINK_ANALYSIS = "slow_link_analysis" - dataset_cls_list = [ClusterCommunicationDataSet] + dataset_cls_list = [ClusterCommunicationDataset] def __init__(self, collection_path, n_processes: int = 1, **kwargs): super().__init__(collection_path, n_processes, **kwargs) - key = ClusterCommunicationDataSet.get_key() + key = ClusterCommunicationDataset.get_key() self.communication_data_class = self.get_first_data_by_key(self.dataset_list, key) self.rank_bw_dict = self.communication_data_class.get_data() self.result = OptimizeResult() @@ -49,8 +49,9 @@ class SlowLinkAnalyzer(BaseAnalyzer): def optimize(self, **kwargs): if self.rank_bw_dict is None: - print("slow_link 分析失败,原因是数据加载失败,请检查你的cluster_analysis_outpu文件夹, \ - 如不关心这类数据请忽略") + print("Slow link analysis failed due to data loading failure. \ + Please check your cluster_analysis_output folder. \ + If you are not concerned about this type of data, please ignore this message.") return self.result self.process() self.format_datas = self.format_details() @@ -65,8 +66,11 @@ class SlowLinkAnalyzer(BaseAnalyzer): def produce_bottleneck(self, link_type: str): data_list = [rank_dict.get(link_type, 0) for rank_id, rank_dict in self.rank_bw_dict.items()] - avg_bw = round(sum(data_list) / len(data_list), 3) - if avg_bw == 0: + if len(data_list) > 0: + avg_bw = round(sum(data_list) / len(data_list), 3) + else: + print("The slow link (identified bottleneck) cannot provide a bottleneck \ + because the analysis data is missing bandwidth information.") return self.bottelneck += f'{link_type}: \n' \ f' The average is {avg_bw}, \n' \ diff --git a/profiler/advisor/analyzer/cluster/slow_rank_analyser.py b/profiler/advisor/analyzer/cluster/slow_rank_analyser.py index aa0ddad507..f439b31f77 100644 --- a/profiler/advisor/analyzer/cluster/slow_rank_analyser.py +++ b/profiler/advisor/analyzer/cluster/slow_rank_analyser.py @@ -19,7 +19,7 @@ from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer from profiler.advisor.common import constant from profiler.advisor.result.result import OptimizeResult from profiler.advisor.result.item import OptimizeItem, OptimizeRecord -from profiler.advisor.dataset.cluster.cluster_dataset import ClusterStepTraceTimeDataSet +from profiler.advisor.dataset.cluster.cluster_dataset import ClusterStepTraceTimeDataset class SlowRankAnalyzer(BaseAnalyzer): @@ -27,11 +27,11 @@ class SlowRankAnalyzer(BaseAnalyzer): RANK = "rank" RATIO_THRESHOLD = 0.05 BOTTLENECK_LIST = ['Computing', 'Communication', "Free"] - dataset_cls_list = [ClusterStepTraceTimeDataSet] + dataset_cls_list = [ClusterStepTraceTimeDataset] def __init__(self, collection_path, n_processes: int = 1, **kwargs): super().__init__(collection_path, n_processes, **kwargs) - key = ClusterStepTraceTimeDataSet.get_key() + key = ClusterStepTraceTimeDataset.get_key() self.step_trace_class = self.get_first_data_by_key(self.dataset_list, key) self.step_trace_dict = self.step_trace_class.get_data() self.result = OptimizeResult() diff --git a/profiler/advisor/analyzer/computation/bound/block_dim_checker.py b/profiler/advisor/analyzer/computation/bound/block_dim_checker.py index a7d7ddd93c..7a873c6563 100644 --- a/profiler/advisor/analyzer/computation/bound/block_dim_checker.py +++ b/profiler/advisor/analyzer/computation/bound/block_dim_checker.py @@ -1,5 +1,4 @@ import logging - from typing import List from profiler.advisor.analyzer.computation.operator_checker import OperatorChecker diff --git a/profiler/advisor/common/timeline/event.py b/profiler/advisor/common/timeline/event.py index 6001ac8872..e24d983a02 100644 --- a/profiler/advisor/common/timeline/event.py +++ b/profiler/advisor/common/timeline/event.py @@ -1,3 +1,4 @@ +from decimal import Decimal class AdvisorDict(dict): def __getstate__(self): return self.__dict__ @@ -18,6 +19,6 @@ class AdvisorDict(dict): class TimelineEvent(AdvisorDict): def ts_include(self, event): - - return float(self.ts) <= float(event.ts) and float(self.ts) + float(self.dur) >= float(event.ts) + float( + return Decimal(self.ts) <= Decimal(event.ts) and Decimal(self.ts) + Decimal(self.dur) >= Decimal( + event.ts) + Decimal( event.dur) \ No newline at end of file diff --git a/profiler/advisor/dataset/cluster/cluster_dataset.py b/profiler/advisor/dataset/cluster/cluster_dataset.py index 09fda2d4dc..e1163f1cdd 100644 --- a/profiler/advisor/dataset/cluster/cluster_dataset.py +++ b/profiler/advisor/dataset/cluster/cluster_dataset.py @@ -25,9 +25,9 @@ class ClusterDataset(Dataset): """ for file in os.listdir(self.collection_path): if file == 'cluster_analysis_output': - print("[INFO]Cluster has been analyzed " - "because of the existence of cluster analysis output directory.") - print("[INFO]Skip Cluster analyze backend.") + logger.info("[INFO]Cluster has been analyzed " + "because of the existence of cluster analysis output directory.") + logger.info("[INFO]Skip Cluster analyze backend.") return True return False @@ -62,7 +62,7 @@ class ClusterDataset(Dataset): @singleton -class ClusterStepTraceTimeDataSet(ClusterDataset): +class ClusterStepTraceTimeDataset(ClusterDataset): RANK = "rank" def __init__(self, collection_path: str, data: dict, **kwargs): @@ -77,10 +77,10 @@ class ClusterStepTraceTimeDataSet(ClusterDataset): print("捕获到异常:", e) self._step_dict = None return False - self._step_dict = self.formate_data(step_data) + self._step_dict = self.format_data(step_data) return True - def formate_data(self, step_data: list): + def format_data(self, step_data: list): step_dict = defaultdict(lambda: [0, 0, 0]) for step_bean in step_data: if step_bean.type == self.RANK: @@ -94,7 +94,7 @@ class ClusterStepTraceTimeDataSet(ClusterDataset): @singleton -class ClusterCommunicationDataSet(ClusterDataset): +class ClusterCommunicationDataset(ClusterDataset): RDMA_TIME_MS = "RDMA time(ms)" RDMA_SIZE_MB = "RDMA size(mb)" SDMA_TIME_MS = "SDMA time(ms)" diff --git a/profiler/advisor/dataset/timeline_event_dataset.py b/profiler/advisor/dataset/timeline_event_dataset.py index 94b6fdfef7..d3889e4458 100644 --- a/profiler/advisor/dataset/timeline_event_dataset.py +++ b/profiler/advisor/dataset/timeline_event_dataset.py @@ -9,6 +9,7 @@ from profiler.advisor.common import constant as const from profiler.advisor.common.timeline.event import TimelineEvent from profiler.advisor.utils.utils import get_file_path_from_directory from profiler.advisor.utils.utils import singleton +from profiler.cluster_analyse.common_func.file_manager import FileManager logger = logging.getLogger() @@ -121,13 +122,13 @@ class TimelineEventDataset(Dataset): def parse_data_with_generator(self, func): result = [] try: - with open(self.timeline_data_list[0], "r") as f: - for i, event in tqdm(enumerate(ijson.items(f, "item")), - leave=False, ncols=100, desc="Building dataset for timeline analysis", - total=self.dataset_len): - func_res = func(index=i, event=event) - if func_res is not None: - result.append(func_res) + json_content = FileManager.read_json_file(self.timeline_data_list[0]) + for i, event in tqdm(enumerate(json_content), leave=False, ncols=100, + desc="Building dataset for timeline analysis", + total=self.dataset_len): + func_res = func(index=i, event=event) + if func_res: + result.append(func_res) except Exception as e: logger.warning("Error %s while parsing file %s, continue to timeline analysis", e, self.timeline_data_list[0]) diff --git a/profiler/advisor/display/html/render.py b/profiler/advisor/display/html/render.py index 8ea7c9e0fc..3984fa8f34 100644 --- a/profiler/advisor/display/html/render.py +++ b/profiler/advisor/display/html/render.py @@ -1,6 +1,7 @@ import os import logging from typing import List, Dict +from collections import defaultdict from jinja2 import Environment, FileSystemLoader from profiler.advisor.common import constant @@ -15,7 +16,7 @@ logger = logging.getLogger() class HTMLRender: def __init__(self): self.html = "" - self.render_list: Dict[str, List] = {} + self.render_list = defaultdict(list) def render_html(self, template_dir: str = "templates", template_name: str = "main.html", template_header=constant.DEFAULT_TEMPLATE_HEADER): @@ -30,8 +31,6 @@ class HTMLRender: autoescape=True) template = env.get_template(template_name) rendered_html = template.render(**kwargs) - if key not in self.render_list: - self.render_list[key] = [] self.render_list[key].append(rendered_html) return rendered_html -- Gitee From 6e03367c0506a7a744d905dc1ec07eb12b39324c Mon Sep 17 00:00:00 2001 From: qianzhengxin Date: Tue, 16 Jul 2024 14:47:07 +0800 Subject: [PATCH 002/106] pynative grad tool --- .../grad_tool/common/constant.py | 8 ++ .../grad_tool/grad_ms/global_context.py | 8 ++ .../grad_tool/grad_ms/grad_stat_csv.py | 130 ++++++++++++++++++ .../accuracy_tools/grad_tool/grad_ms/hook.py | 84 ++++++++--- .../accuracy_tools/grad_tool/grad_ms/utils.py | 39 ++++++ 5 files changed, 248 insertions(+), 21 deletions(-) create mode 100644 debug/accuracy_tools/grad_tool/grad_ms/grad_stat_csv.py create mode 100644 debug/accuracy_tools/grad_tool/grad_ms/utils.py diff --git a/debug/accuracy_tools/grad_tool/common/constant.py b/debug/accuracy_tools/grad_tool/common/constant.py index 902f54f5e6..d569d47c16 100644 --- a/debug/accuracy_tools/grad_tool/common/constant.py +++ b/debug/accuracy_tools/grad_tool/common/constant.py @@ -46,3 +46,11 @@ class GradConst: STEP_FINISH = "step_finish" SUMMARY = "summary" + + # csv header entry + MD5 = "MD5" + DISTRIBUTION = "distribution" + SHAPE = "shape" + MAX = "max" + MIN = "min" + NORM = "norm" \ No newline at end of file diff --git a/debug/accuracy_tools/grad_tool/grad_ms/global_context.py b/debug/accuracy_tools/grad_tool/grad_ms/global_context.py index 02d1f74454..233bfb9864 100644 --- a/debug/accuracy_tools/grad_tool/grad_ms/global_context.py +++ b/debug/accuracy_tools/grad_tool/grad_ms/global_context.py @@ -72,5 +72,13 @@ class GlobalContext: else: print_warn_log(f"{name} is None or not a list with valid items, use default value.") + def step_need_dump(self, step): + dump_step_list = self.get_context(GradConst.STEP) + return (not dump_step_list) or (step in dump_step_list) + + def rank_need_dump(self, rank): + dump_rank_list = self.get_context(GradConst.RANK) + return (not dump_rank_list) or (rank in dump_rank_list) + grad_context = GlobalContext() diff --git a/debug/accuracy_tools/grad_tool/grad_ms/grad_stat_csv.py b/debug/accuracy_tools/grad_tool/grad_ms/grad_stat_csv.py new file mode 100644 index 0000000000..791553bb11 --- /dev/null +++ b/debug/accuracy_tools/grad_tool/grad_ms/grad_stat_csv.py @@ -0,0 +1,130 @@ +from abc import ABC, abstractmethod +import hashlib + +import mindspore +from mindspore import ops, Tensor +from grad_tool.common.constant import GradConst + + +class CsvInput: + def __init__(self, param_name, grad, bounds): + self.param_name = param_name + self.grad = grad + self.bounds = bounds + +class GradStatCsv: + csv = {} + + @staticmethod + def generate_csv_header(level, csv_input): + header = ["param_name"] + for key in level["header"]: + header.extend(GradStatCsv.csv[key].generate_csv_header(csv_input)) + return header + + @staticmethod + def generate_csv_line(level, csv_input): + line = [csv_input.param_name] + for key in level["header"]: + line.extend(GradStatCsv.csv[key].generate_csv_content(csv_input)) + return line + + +def register_csv_item(key, cls=None): + if cls is None: + # 无参数时,返回装饰器函数 + return lambda cls: register_csv_item(key, cls) + GradStatCsv.csv[key] = cls + return cls + + +class CsvItem(ABC): + @staticmethod + @abstractmethod + def generate_csv_header(csv_input): + pass + + @staticmethod + @abstractmethod + def generate_csv_content(csv_input): + pass + + +@register_csv_item(GradConst.MD5) +class CsvMd5(CsvItem): + def generate_csv_header(csv_input): + return ["MD5"] + + def generate_csv_content(csv_input): + grad = csv_input.grad + tensor_bytes = grad.float().numpy().tobytes() + md5_hash = hashlib.md5(tensor_bytes) + return [md5_hash.hexdigest()] + + +@register_csv_item(GradConst.DISTRIBUTION) +class CsvDistribution(CsvItem): + def generate_csv_header(csv_input): + bounds = csv_input.bounds + intervals = [] + for i, _ in enumerate(bounds): + if i == 0: + intervals.append(f"(-inf, {bounds[i]}]") + else: + intervals.append(f"({bounds[i-1]}, {bounds[i]}]") + intervals.extend([f"({bounds[-1]}, inf)", "=0"]) + return intervals + + def generate_csv_content(csv_input): + grad = csv_input.grad + bounds = csv_input.bounds + if grad.dtype == mindspore.bfloat16: + grad = grad.to(mindspore.float32) + element_num = grad.numel() + grad_equal_0_num = (grad == 0).sum().item() + bound = Tensor(bounds) + bucketsize_result = ops.bucketize(grad, bound) + interval_nums = [(bucketsize_result == i).sum().item() for i in range(len(bound) + 1)] + interval_nums.append(grad_equal_0_num) + return_list = [x / element_num if element_num != 0 else 0 for x in interval_nums] + return return_list + + +@register_csv_item(GradConst.MAX) +class CsvMax(CsvItem): + def generate_csv_header(csv_input): + return ["max"] + + def generate_csv_content(csv_input): + grad = csv_input.grad + return [ops.amax(grad).float().numpy().tolist()] + + +@register_csv_item(GradConst.MIN) +class CsvMin(CsvItem): + def generate_csv_header(csv_input): + return ["min"] + + def generate_csv_content(csv_input): + grad = csv_input.grad + return [ops.amin(grad).float().numpy().tolist()] + + +@register_csv_item(GradConst.NORM) +class CsvNorm(CsvItem): + def generate_csv_header(csv_input): + return ["norm"] + + def generate_csv_content(csv_input): + grad = csv_input.grad + return [ops.norm(grad).float().numpy().tolist()] + + +@register_csv_item(GradConst.SHAPE) +class CsvShape(CsvItem): + def generate_csv_header(csv_input): + return ["shape"] + + def generate_csv_content(csv_input): + grad = csv_input.grad + return [list(grad.shape)] \ No newline at end of file diff --git a/debug/accuracy_tools/grad_tool/grad_ms/hook.py b/debug/accuracy_tools/grad_tool/grad_ms/hook.py index ceadfee614..e1f78d7dbf 100644 --- a/debug/accuracy_tools/grad_tool/grad_ms/hook.py +++ b/debug/accuracy_tools/grad_tool/grad_ms/hook.py @@ -1,4 +1,4 @@ -from functools import wraps + import os import shutil @@ -10,38 +10,80 @@ from mindspore.common.parameter import Parameter from mindspore.common.initializer import initializer from grad_tool.common.constant import GradConst -from grad_tool.common.utils import print_warn_log +from grad_tool.common.utils import print_warn_log, write_csv from grad_tool.grad_ms.global_context import grad_context from grad_tool.grad_ms.grad_analyzer import grad_dump, get_rank_id from grad_tool.grad_ms.grad_analyzer import csv_generator +from grad_tool.grad_ms.grad_stat_csv import GradStatCsv, CsvInput +from grad_tool.grad_ms.utils import save_grad_direction, get_adapted_level +class HookInput: + def __init__(self, opt) -> None: + self.func = opt.construct + self.g_names = [param.name for param in opt._parameters] + self.param_list = grad_context.get_context(GradConst.PARAM_LIST) + self.rank_id = get_rank_id() + output_path = grad_context.get_context(GradConst.OUTPUT_PATH) + self.dump_dir = f"{output_path}/rank_{self.rank_id}/Dump/" + self.save_dir = f"{output_path}/rank_{self.rank_id}/" + self.step_finish_flag = f"{output_path}/rank_{self.rank_id}/Dump/{GradConst.STEP_FINISH}" + if os.path.exists(self.save_dir): + print_warn_log(f"Delete existing path {self.save_dir}.") + shutil.rmtree(self.save_dir) + self.level = grad_context.get_context(GradConst.LEVEL) + self.bounds = grad_context.get_context(GradConst.BOUNDS) + self.mode = mindspore.get_context("mode") -def hook_optimizer(opt: Optimizer): - func = opt.construct - g_names = [param.name for param in opt._parameters] - param_list = grad_context.get_context(GradConst.PARAM_LIST) - rank_id = get_rank_id() - output_path = grad_context.get_context(GradConst.OUTPUT_PATH) - dump_dir = f"{output_path}/rank_{rank_id}/Dump/" - save_dir = f"{output_path}/rank_{rank_id}/" - step_finish_flag = f"{output_path}/rank_{rank_id}/Dump/{GradConst.STEP_FINISH}" - if os.path.exists(save_dir): - print_warn_log(f"Delete existing path {save_dir}.") - shutil.rmtree(save_dir) - level = grad_context.get_context(GradConst.LEVEL) - bounds = grad_context.get_context(GradConst.BOUNDS) - +def hook_graph_mode_optimizer(opt, hook_input): @jit def new_construct(self, gradients): for index, grad_value in enumerate(gradients): - if param_list and g_names[index] not in param_list: + if hook_input.param_list and hook_input.g_names[index] not in hook_input.param_list: continue - grad_dump(dump_dir, g_names[index], self.dump_step, grad_value, level, bounds) - ms.ops.TensorDump()(step_finish_flag, self.dump_step) + grad_dump(hook_input.dump_dir, hook_input.g_names[index], self.dump_step, + grad_value, hook_input.level, hook_input.bounds) + ms.ops.TensorDump()(hook_input.step_finish_flag, self.dump_step) self.assignadd(self.dump_step, self.global_step_increase_tensor) - out = func(gradients) + out = hook_input.func(gradients) return out opt.dump_step = Parameter(initializer(0, [1], ms.int32), name="dump_step") opt.construct = new_construct.__get__(opt, type(opt)) csv_generator.start() + +def hook_pynative_optimizer(opt, hook_input): + level_adapted = get_adapted_level(hook_input.level) + + def new_construct(self, gradients): + cur_step = self.dump_step + if grad_context.step_need_dump(cur_step) and grad_context.rank_need_dump(hook_input.rank_id): + output_lines = [] + for index, grad_value in enumerate(gradients): + param_name = hook_input.g_names[index] + if hook_input.param_list and param_name not in hook_input.param_list: + continue + csv_input = CsvInput(param_name, grad_value, hook_input.bounds) + grad_info = GradStatCsv.generate_csv_line(level_adapted, csv_input) + output_lines.append(grad_info) + if level_adapted["have_grad_direction"]: + save_grad_direction(param_name, grad_value, os.path.join(hook_input.save_dir, f'step_{cur_step}')) + output_csv_path = os.path.join(hook_input.save_dir, f"grad_summary_{cur_step}.csv") + dummy_csv_input = CsvInput(None, None, hook_input.bounds) + write_csv(output_csv_path, output_lines, + GradStatCsv.generate_csv_header(level_adapted, dummy_csv_input)) + + self.assignadd(self.dump_step, self.global_step_increase_tensor) + out = hook_input.func(gradients) + return out + + opt.dump_step = Parameter(initializer(0, [1], ms.int32), name="dump_step") + opt.construct = new_construct.__get__(opt, type(opt)) + + +def hook_optimizer(opt: Optimizer): + hook_input = HookInput(opt) + + if hook_input.mode == mindspore.GRAPH_MODE: + hook_graph_mode_optimizer(opt, hook_input) + else: + hook_pynative_optimizer(opt, hook_input) \ No newline at end of file diff --git a/debug/accuracy_tools/grad_tool/grad_ms/utils.py b/debug/accuracy_tools/grad_tool/grad_ms/utils.py new file mode 100644 index 0000000000..16d312039f --- /dev/null +++ b/debug/accuracy_tools/grad_tool/grad_ms/utils.py @@ -0,0 +1,39 @@ +import os + +import numpy as np + +from grad_tool.common.constant import GradConst +from grad_tool.common.utils import print_warn_log, create_directory, change_mode + +level_adp = { + "L0": { + "header": [GradConst.md5, GradConst.max, GradConst.min, GradConst.norm, GradConst.shape], + "have_grad_direction": False + }, + "L1": { + "header": [GradConst.max, GradConst.min, GradConst.norm, GradConst.shape], + "have_grad_direction": True + }, + "L2": { + "header": [GradConst.distribution, GradConst.max, GradConst.min, GradConst.norm, GradConst.shape], + "have_grad_direction": True + }, + } + +def save_grad_direction(param_name, grad, save_path): + if not os.path.exists(save_path): + create_directory(save_path) + param_grad = grad.clone().detach() + grad_direction_tensor = param_grad > 0 + grad_direction_ndarray = grad_direction_tensor.numpy() + + save_filepath = os.path.join(save_path, f"{param_name}.npy") + np.save(save_filepath, grad_direction_ndarray) + change_mode(save_filepath, 0o640) + +def get_adapted_level(level: str): + if level == GradConst.LEVEL3: + print_warn_log(f"In mindpsore pynative mode, only 'L0', 'L1' and 'L2' are supported, use L0 instead") + level = GradConst.LEVEL0 + level_adapted = level_adp[level] + return level_adapted \ No newline at end of file -- Gitee From a82aeb6af587c054c48a048a221d2cafe7295e6e Mon Sep 17 00:00:00 2001 From: pxp1 <958876660@qq.com> Date: Tue, 16 Jul 2024 15:21:47 +0800 Subject: [PATCH 003/106] =?UTF-8?q?kj600=E4=BB=A3=E7=A0=81=E5=90=8C?= =?UTF-8?q?=E6=AD=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- debug/accuracy_tools/kj600/README.md | 2 +- .../kj600/distributed/wrap_distributed.py | 25 +- .../accuracy_tools/kj600/kj600/module_hook.py | 1 + .../kj600/kj600/unittest/cc_utils.py | 83 ++++++ .../unittest/config_basic_functions.json | 17 ++ .../kj600/kj600/unittest/config_cc.json | 7 + .../unittest/config_cc_codeline_ranks.json | 8 + .../kj600/unittest/config_cc_logonly.json | 8 + .../kj600/kj600/unittest/expected_cc_log.json | 20 ++ .../kj600/unittest/test_anomaly_inform.py | 26 ++ .../kj600/unittest/test_basic_functions.py | 149 ++++++++++ .../kj600/kj600/unittest/test_cc.py | 260 ++++++++++++++++++ .../kj600/unittest/test_cc_codeline_ranks.py | 52 ++++ .../kj600/kj600/unittest/test_cc_log_only.py | 55 ++++ .../kj600/kj600/unittest/test_database.py | 42 +++ .../kj600/kj600/unittest/test_features.py | 33 +++ .../kj600/kj600/unittest/test_module_hook.py | 84 ++++++ 17 files changed, 869 insertions(+), 3 deletions(-) create mode 100644 debug/accuracy_tools/kj600/kj600/unittest/cc_utils.py create mode 100644 debug/accuracy_tools/kj600/kj600/unittest/config_basic_functions.json create mode 100644 debug/accuracy_tools/kj600/kj600/unittest/config_cc.json create mode 100644 debug/accuracy_tools/kj600/kj600/unittest/config_cc_codeline_ranks.json create mode 100644 debug/accuracy_tools/kj600/kj600/unittest/config_cc_logonly.json create mode 100644 debug/accuracy_tools/kj600/kj600/unittest/expected_cc_log.json create mode 100644 debug/accuracy_tools/kj600/kj600/unittest/test_anomaly_inform.py create mode 100644 debug/accuracy_tools/kj600/kj600/unittest/test_basic_functions.py create mode 100644 debug/accuracy_tools/kj600/kj600/unittest/test_cc.py create mode 100644 debug/accuracy_tools/kj600/kj600/unittest/test_cc_codeline_ranks.py create mode 100644 debug/accuracy_tools/kj600/kj600/unittest/test_cc_log_only.py create mode 100644 debug/accuracy_tools/kj600/kj600/unittest/test_database.py create mode 100644 debug/accuracy_tools/kj600/kj600/unittest/test_features.py create mode 100644 debug/accuracy_tools/kj600/kj600/unittest/test_module_hook.py diff --git a/debug/accuracy_tools/kj600/README.md b/debug/accuracy_tools/kj600/README.md index bd97acf6dc..1782e58bec 100644 --- a/debug/accuracy_tools/kj600/README.md +++ b/debug/accuracy_tools/kj600/README.md @@ -79,7 +79,7 @@ pip install . |"mv_distribution"| 可选 | 若为true则会监控指定模块中的参数的优化器状态, 默认为false。需要在TrainerMon构造函数正确指定opt_ty. 目前只支持megatron的混合精度优化器以及megatron的分布式优化器。 Deepspeed的分布式优化器实现暂不支持。 | |"wg_distribution"| 可选 | 若为true则会监控指定模块的参数梯度, 默认为false。 | |"alert"| 必选 | · "rules": 指定自动报警的异常检测机制及其相应的阈值。目前实现的异常检测是AnomalyTurbulence。 如果统计标量超出历史均值的指定浮动范围(threshold指定, 0.5意味着上浮或者下浮50%)则在控制台打印报警信息。
· "inform": 自动报警需要的配置,若想关闭自动报警删掉inform的配置即可。其中"recipient"指定自动报警的通知方式,可选值为"database"或"email",默认为"database"。
- 若"recipient"为"database",则需要指定"connection_str"字段,即数据库的连接URL,默认为{"recipient":"database", "connection_str": "mysql+pymysql://username:password@host:port/database"},若有特殊字符需要转义。
- 若"recipient"为"email",则需要指定"send_email_address"-发送方邮箱地址,"receive_email_address"-接收方邮箱地址,"send_email_username"-发送方邮箱用户名,"send_email_password"-发送方邮箱密码,"smtp_server"-发送方邮箱对应的SMTP服务器,"smtp_port"-发送方邮箱对应的SMTP端口号。默认为:
{"recipient":"email", send_email_address": "sender@huawei.com", "receive_email_address": "receiver@huawei.com", "send_email_username": "username", "send_email_password": "******", "smtp_server": "smtpscn.huawei.com", "smtp_port": "587"}| -|"cc_distribution"| 可选 | 其中“enable”字段控制开关;需要监控通信算子时,务必尽量早地实例化`TrainerMon`, 因为监控通过劫持原始func后挂hook实现,部分加速库初始化时会保存原始function,避免监控失效。“cc_codeline”字段指定监控的代码行,如:`train.py\\[23\\]`,默认为空列表,不特别指定;"cc_pre_hook"字段控制是否监控通信前的数据; "cc_log_only"为true时,仅记录调用到的算子及其调用栈, 不监控通信的输入输出| +|"cc_distribution"| 可选 | 其中"enable"字段控制通信监控模块的开关;需要监控通信算子时,务必尽量早地实例化`TrainerMon`, 因为监控通过劫持原始func后挂hook实现,部分加速库初始化时会保存原始function,避免监控失效。"cc_codeline"字段指定监控的代码行,如:`train.py\\[23\\]`,默认为空列表,不特别指定;"cc_pre_hook"字段控制是否监控通信前的数据; 模块会在第二个optimize.step之前打印通信日志,包括通信api的调用栈、输入dtype、通信group。 "cc_log_only"为true时,仅打印日志,不监控通信的输入输出,并在打印后中断训练。可以根据通信日志设置"cc_codeline",规避与训练过程不相关的通信,比如一些时间、metrics的同步。| |"ops"| 可选 |与ur_distribution、xy_distribution、mv_distribution、wg_distribution、mg_direction、cc_distribution配合,监控所选张量的min、max、norm、zeros值。其中,zeros代表监控所选张量的元素小于eps的比例,id代表监控所选的非张量本身,默认为[]。| |"eps"| 可选 |若ops里包含"zeros"则需要配置,默认为1e-8。| diff --git a/debug/accuracy_tools/kj600/kj600/distributed/wrap_distributed.py b/debug/accuracy_tools/kj600/kj600/distributed/wrap_distributed.py index fad007fe35..4e2d5e175e 100644 --- a/debug/accuracy_tools/kj600/kj600/distributed/wrap_distributed.py +++ b/debug/accuracy_tools/kj600/kj600/distributed/wrap_distributed.py @@ -180,12 +180,33 @@ def create_async_callback_func(context, ops, args, prefix): catch_data(context, ops, args, prefix) return store_data +def get_tensor_dtype(args): + dtypes = [] + for arg in args: + if isinstance(arg, torch.Tensor): + dtypes.append(arg.dtype) + else: + dtypes.append(None) + return dtypes + +def get_group_members(args): + group = None + for arg in args: + if isinstance(arg, dist.ProcessGroup): + group = arg + if group is None: + group = dist.GroupMember.WORLD + return dist.get_process_group_ranks(group) + def create_hooks(context, monitor): def cc_log_hook(module, args, kwargs): + all_args = args + tuple(kwargs.values()) + dtypes = '|'.join([str(i) if i else '' for i in get_tensor_dtype(all_args)]) stack = ';'.join(get_callstack()) - monitor.cc_logged_stack[module.op_name_].add(stack) + group_members = '|'.join([str(i) for i in get_group_members(all_args)]) + monitor.cc_logged_stack[module.op_name_].add(';'.join([dtypes, group_members, stack])) return def cc_pre_hook(module, args, kwargs): @@ -214,8 +235,8 @@ def create_hooks(context, monitor): if (dist.is_initialized() and dist.get_rank() not in monitor.module_rank_list and monitor.module_rank_list != []): return [pre_hooks, hooks] + pre_hooks.append(cc_log_hook) if monitor.cc_log_only: - pre_hooks.append(cc_log_hook) return [pre_hooks, hooks] if monitor.cc_pre_hook: diff --git a/debug/accuracy_tools/kj600/kj600/module_hook.py b/debug/accuracy_tools/kj600/kj600/module_hook.py index 8043c5671c..3b600b2b7f 100644 --- a/debug/accuracy_tools/kj600/kj600/module_hook.py +++ b/debug/accuracy_tools/kj600/kj600/module_hook.py @@ -253,6 +253,7 @@ class TrainerMon: if self.print_struct and not all(value == {} for value in self.module_struct.values()) and not self.struct_printed: self._smallest_rank_print("> module struct:") self._smallest_rank_print(json.dumps(self.module_struct, indent=4)) + self.struct_printed = True if not self.cc_log_only: raise Exception("exit after first step when print model struct") if self.cc_log_only and context.step > 0: diff --git a/debug/accuracy_tools/kj600/kj600/unittest/cc_utils.py b/debug/accuracy_tools/kj600/kj600/unittest/cc_utils.py new file mode 100644 index 0000000000..aa1ff688ec --- /dev/null +++ b/debug/accuracy_tools/kj600/kj600/unittest/cc_utils.py @@ -0,0 +1,83 @@ +import os +from functools import partial +import torch +from torch import distributed as dist +from torch import nn +try: + import torch_npu + BACKEND = 'hccl' + DEVICE = 'npu' +except: + BACKEND = 'nccl' + DEVICE = 'cuda' + +from kj600.features import square_sum, get_max, get_min, get_zeros +from kj600.module_hook import CommunicationContext + + +OP_FUNCS = { + "min": get_min, + "max": get_max, + "norm": square_sum, + "zeros": partial(get_zeros, eps=1e-8) +} + +def ddp_setup(rank, world_size): + os.environ["MASTER_ADDR"] = "localhost" + os.environ["MASTER_PORT"] = "12346" + dist.init_process_group(backend=BACKEND, rank=rank, world_size=world_size) + +def reset_context(context): + if isinstance(context, CommunicationContext): + context.reset() + elif isinstance(context, dict): + for op, v in context.items(): + v.reset() + +def wrap_reset(func): + def reset_and_test(*args, **kwargs): + print(f"testing {func.__name__}") + reset_context(args[0]) + res = func(*args, **kwargs) + return res + + return reset_and_test + +def assert_empty(data): + assert len(data) == 0, f'data is not empty as expected' + +def assert_nonempty(data): + assert len(data) != 0, f'data is empty' + +def assert_equal(a, b, rank, op_name=None, tag=None): + if a.dim() == 0: + assert a==b, f'inequal in rank {rank}: {a}, {b}, {op_name}, {tag}' + else: + assert torch.equal(a,b), f'inequal in rank {rank}: {a},{b}' + +def assert_inequal(a, b, rank): + if a.dim() == 0: + assert a!=b, f'equal in rank {rank}: {a},{b}' + else: + assert not torch.equal(a,b), f'equal in rank {rank}: {a},{b}' + +def assert_context(data, src, rank): + if len(src) == 0: + assert_empty(data) + else: + assert_nonempty(data) + + for op_name, tensors in data.items(): + for tag, tensor in tensors.items(): + prefix, idx = tag.split('_') + idx = int(idx) + assert_equal(tensor, OP_FUNCS[op_name](src[prefix][idx]), rank, op_name, tag) + + +class Model(nn.Module): + def __init__(self): + super(Model, self).__init__() + self.layer = nn.Linear(2,2) + + def forward(self, x): + return self.layer(x) \ No newline at end of file diff --git a/debug/accuracy_tools/kj600/kj600/unittest/config_basic_functions.json b/debug/accuracy_tools/kj600/kj600/unittest/config_basic_functions.json new file mode 100644 index 0000000000..6ce01d653d --- /dev/null +++ b/debug/accuracy_tools/kj600/kj600/unittest/config_basic_functions.json @@ -0,0 +1,17 @@ +{ + "targets": { + "fc": {"input": "tuple[1]:0", "output": "tensor", "input_grad": "tuple[1]:0", "output_grad": "tuple[1]:0"} + }, + "module_ranks": [], + "ur_distribution": true, + "xy_distribution": true, + "mv_distribution": true, + "wg_distribution": true, + "mg_direction": true, + "cc_distribution": {"enable":true, "cc_codeline":[]}, + "alert": { + "rules": [{"rule_name": "AnomalyTurbulence", "args": {"threshold": 0.5}}] + }, + "eps": 1e-8, + "ops": ["min", "max", "norm", "zeros", "id"] +} \ No newline at end of file diff --git a/debug/accuracy_tools/kj600/kj600/unittest/config_cc.json b/debug/accuracy_tools/kj600/kj600/unittest/config_cc.json new file mode 100644 index 0000000000..a4667ce6fe --- /dev/null +++ b/debug/accuracy_tools/kj600/kj600/unittest/config_cc.json @@ -0,0 +1,7 @@ +{ + "targets": { + "foo": {} + }, + "cc_distribution": {"enable": true, "cc_pre_hook":true}, + "ops":["max","min","norm","zeros"] +} \ No newline at end of file diff --git a/debug/accuracy_tools/kj600/kj600/unittest/config_cc_codeline_ranks.json b/debug/accuracy_tools/kj600/kj600/unittest/config_cc_codeline_ranks.json new file mode 100644 index 0000000000..720fbb9dd0 --- /dev/null +++ b/debug/accuracy_tools/kj600/kj600/unittest/config_cc_codeline_ranks.json @@ -0,0 +1,8 @@ +{ + "targets": { + "foo": {} + }, + "cc_distribution": {"enable": true, "cc_codeline":["kj600/unittest/test_cc_codeline_ranks.py\\[19\\]"]}, + "module_ranks": [1], + "ops":["max","min","norm","zeros"] +} \ No newline at end of file diff --git a/debug/accuracy_tools/kj600/kj600/unittest/config_cc_logonly.json b/debug/accuracy_tools/kj600/kj600/unittest/config_cc_logonly.json new file mode 100644 index 0000000000..51e619fc2d --- /dev/null +++ b/debug/accuracy_tools/kj600/kj600/unittest/config_cc_logonly.json @@ -0,0 +1,8 @@ +{ + "targets": { + "foo": {} + }, + "cc_distribution": {"enable": true, "cc_log_only": true}, + "module_ranks": [0,1], + "ops":["max","min","norm","zeros"] +} \ No newline at end of file diff --git a/debug/accuracy_tools/kj600/kj600/unittest/expected_cc_log.json b/debug/accuracy_tools/kj600/kj600/unittest/expected_cc_log.json new file mode 100644 index 0000000000..8f2edd7ecd --- /dev/null +++ b/debug/accuracy_tools/kj600/kj600/unittest/expected_cc_log.json @@ -0,0 +1,20 @@ +{ + "all_gather": [ + [ + "|torch.float32||", + "0|1", + "/home/jovyan/workspace/kj_dev/kj600/unittest/test_cc_log_only.py[18] test_all_gather", + "/home/jovyan/workspace/kj_dev/kj600/unittest/test_cc_log_only.py[40] main", + "[1] " + ] + ], + "all_reduce": [ + [ + "torch.float32|||", + "0|1", + "/home/jovyan/workspace/kj_dev/kj600/unittest/test_cc_log_only.py[23] test_all_reduce", + "/home/jovyan/workspace/kj_dev/kj600/unittest/test_cc_log_only.py[41] main", + "[1] " + ] + ] +} \ No newline at end of file diff --git a/debug/accuracy_tools/kj600/kj600/unittest/test_anomaly_inform.py b/debug/accuracy_tools/kj600/kj600/unittest/test_anomaly_inform.py new file mode 100644 index 0000000000..1ad76b919e --- /dev/null +++ b/debug/accuracy_tools/kj600/kj600/unittest/test_anomaly_inform.py @@ -0,0 +1,26 @@ +import uuid +import unittest + +from kj600.anomaly_inform import AnomalyInformFactory + + +class TestAnomalyInform(unittest.TestCase): + def test_database_inform(self): + inform_args = {"inform": {"recipient": "database", "connection_str": "mysql+pymysql://username:password@host:port/database"}} + anomaly_inform = AnomalyInformFactory.create_informer(**inform_args["inform"]) + exception_message = '\x1b[93m> Rule AnomalyTurbulence reports anomaly signal in language_model.encoder.layers.0.self_attention.query_key_value.weight/0/exp_avg_sq_min at step 49.\x1b[0m' + job_id = str(uuid.uuid4()) + anomaly_inform.run(exception_message, job_id) + + def test_email_inform(self): + inform_args = {"inform": {"recipient": "email", "send_email_address": "xueyuqing@huawei.com", "receive_email_address": "xueyuqing@huawei.com", + "send_email_username": "x30021831", "send_email_password": "********", + "smtp_server": "smtpscn.huawei.com", "smtp_port": "587"}} + anomaly_inform = AnomalyInformFactory.create_informer(**inform_args["inform"]) + exception_message = '\x1b[93m> Rule AnomalyTurbulence reports anomaly signal in language_model.encoder.layers.0.self_attention.query_key_value.weight/0/exp_avg_sq_min at step 49.\x1b[0m' + job_id = str(uuid.uuid4()) + anomaly_inform.run(exception_message, job_id) + + +if __name__ == "__main__": + unittest.main() diff --git a/debug/accuracy_tools/kj600/kj600/unittest/test_basic_functions.py b/debug/accuracy_tools/kj600/kj600/unittest/test_basic_functions.py new file mode 100644 index 0000000000..b7cdd3385b --- /dev/null +++ b/debug/accuracy_tools/kj600/kj600/unittest/test_basic_functions.py @@ -0,0 +1,149 @@ +import unittest +import shutil +import torch +import json +import os +try: + import torch_npu + device = torch.device('npu:0') +except ModuleNotFoundError: + device = torch.device('cpu') +from kj600.module_hook import TrainerMon + +from tensorboard.backend.event_processing.event_accumulator import EventAccumulator + +class Model(torch.nn.Module): + def __init__(self): + super().__init__() + self.fc = torch.nn.Linear(784, 2) + self.relu = torch.nn.ReLU() + + def forward(self, x): + return self.relu(self.fc(x)) + +class ToyDataset(torch.utils.data.Dataset): + def __init__(self): + self.data = torch.randn(16, 784, requires_grad=True) + self.labels = torch.randint(low=0, high=8, size=(16,)) + def __len__(self): + return len(self.labels) + def __getitem__(self, idx): + return self.data[idx].to(device), self.labels[idx].to(device) +def get_file_path(): + output_dir = os.environ.get("KJ600_OUTPUT_DIR") + for root1, dirs, files in os.walk(output_dir): + for root2, dir, file in os.walk(os.path.join(root1, dirs[-1])): + return os.path.join(root2, file[0]) + +def get_config(): + os.environ["KJ600_OUTPUT_DIR"] = "./test_kj600_output" + with open("config_basic_functions.json", 'r') as file: + config_test = json.load(file) + return config_test +def get_tensorbaord(event_file_path): + tensorboard = EventAccumulator(event_file_path) + tensorboard.Reload() + tags = tensorboard.Tags() + scalers_tag = [] + for tag in tags['scalars']: + tag = tag.split('/') + scalers_tag.append(tag[1]) + images_tag = [] + for tag in tags['images']: + tag = tag.split('/') + images_tag.append(tag[1]) + return scalers_tag, images_tag + +def clean_output(): + folder_path = os.environ.get("KJ600_OUTPUT_DIR") + if os.path.exists(folder_path): + shutil.rmtree(folder_path) + +def train(): + model = Model().to(device=device) + hooker = TrainerMon('config_basic_functions.json', False, + opt_ty="Megatron_Float16OptimizerWithFloat16Params") # or opt_ty=Megatron_DistributedOptimizer + hooker.hook_modules(model=model, grad_acc_steps=1) + + train_ds = ToyDataset() + train_loader = torch.utils.data.DataLoader(train_ds, shuffle=True, batch_size=2) + + optimizer = torch.optim.AdamW(model.parameters(), lr=0.0001) + + for (inputs, targets) in train_loader: + optimizer.zero_grad() + # inputs and param torch.float32 -> torch.float16 + inputs = inputs.half() + for param in model.parameters(): + param.data = param.data.half() + # outputs torch.float32 + outputs = model(inputs) + output = outputs[0] + targets = targets.float() + # loss torch.float16 -> torch.float32 + loss = torch.nn.functional.cross_entropy(output, targets) + + loss.backward() + optimizer.step() + +class TestKj600(unittest.TestCase): + def __init__(self, method_name: str) -> None: + super(TestKj600, self).__init__(method_name) + self.config_test = get_config() + self.event_file_path = None + self.scalers_tag = None + self.images_tag = None + + @classmethod + def setUpClass(cls): + train() + + def setUp(self): + self.config_test = get_config() + self.event_file_path = get_file_path() + self.scalers_tag, self.images_tag = get_tensorbaord(self.event_file_path) + + def test_ops(self): + if self.config_test["ops"]: + for op in self.config_test.get("ops"): + if op == "id": + assert any(op in item for item in self.scalers_tag) == self.config_test.get('mg_direction'), f"{op} in ops did not take effect" + else: + assert any(op in item for item in self.scalers_tag), f"{op} in ops did not take effect" + print("ops has taken effect") + + def test_ur_distribution(self): + if self.config_test.get("ur_distribution"): + assert any('adam_update' in item for item in self.images_tag) and any( + 'adam_ratio' in item for item in self.images_tag), "ur_distribution did not take effect" + print("ur_distribution has taken effect") + + def test_xy_distribution(self): + if self.config_test.get("xy_distribution"): + assert any('input' in item for item in self.scalers_tag) and any( + 'output' in item for item in self.scalers_tag), "xy_distribution did not take effect" + print("xy_distribution has taken effect") + + def test_mv_distribution(self): + if self.config_test.get("mv_distribution"): + assert any('exp_avg' in item for item in self.scalers_tag) and any( + 'exp_avg_sq' in item for item in self.scalers_tag), "mv_distribution did not take effect" + print("mv_distribution has taken effect") + + def test_mg_direction(self): + if self.config_test.get("mg_direction"): + assert any('mg_direction' in item for item in self.scalers_tag), "mg_direction did not take effect" + print("mg_direction has taken effect") + + def test_wg_distribution(self): + if self.config_test.get("wg_distribution"): + assert any('weight' in item for item in self.scalers_tag), "wg_distribution did not take effect" + print("wg_distribution has taken effect") + + @classmethod + def tearDownClass(cls) -> None: + clean_output() + + +if __name__ == "__main__": + unittest.main() diff --git a/debug/accuracy_tools/kj600/kj600/unittest/test_cc.py b/debug/accuracy_tools/kj600/kj600/unittest/test_cc.py new file mode 100644 index 0000000000..b5e92417a4 --- /dev/null +++ b/debug/accuracy_tools/kj600/kj600/unittest/test_cc.py @@ -0,0 +1,260 @@ +import sys +sys.path.append(".") +import time +import torch +from torch import nn +from torch import distributed as dist +import torch.multiprocessing as mp +from kj600.module_hook import TrainerMon +from kj600.unittest.cc_utils import * + +DEBUG = False +DIM = 2 +DTYPE = torch.float16 + +# 采集数据正确 +# 通信结果正确 + +def test_broadcast(context, rank, async_op): + a = torch.tensor([rank+1] * DIM, dtype=DTYPE, device=f'{DEVICE}:{rank}') + local_a = a.clone() + src = 0 + work = dist.broadcast(a, src, dist.group.WORLD, async_op) + if work: + work.wait() + context.aggregate() + if rank == src: + assert_context(context.data, {'pre':[local_a], 'post':[a]}, rank) + assert torch.equal(local_a, a), f"{local_a}, {a}" + else: + src_tensor = torch.tensor([src+1, src+1], dtype=DTYPE, device=f'{DEVICE}:{rank}') + assert_context(context.data, {'pre': [local_a], 'post':[src_tensor]}, rank) + assert_equal(src_tensor, a, rank) + +@wrap_reset +def test_gather(context, rank, world_size, async_op): + a = torch.tensor([rank+1] * DIM, dtype=DTYPE, device=f'{DEVICE}:{rank}') + dst = 0 + if rank == dst: + data = [torch.zeros_like(a) for _ in range(world_size)] + else: + data = None + work = dist.gather(a, data, dst, group=dist.group.WORLD, async_op=async_op) + if work: + work.wait() + context.aggregate() + if rank == dst: + assert_context(context.data, {'pre':[a, torch.zeros(world_size, 2, dtype=DTYPE)], 'post':[a, torch.stack(data)]}, rank) + for i in range(world_size): + local_a = torch.tensor([i+1] * DIM, dtype=DTYPE, device=f'{DEVICE}:{rank}') + assert_equal(data[i], local_a, rank) + + +@wrap_reset +def test_all_gather(context, rank, world_size, async_op): + a = torch.tensor([rank+1] * DIM, dtype=DTYPE, device=f'{DEVICE}:{rank}') + data = [torch.zeros_like(a, dtype=DTYPE) for _ in range(world_size)] + work = dist.all_gather(data, a, group=dist.group.WORLD, async_op=async_op) + if work: + work.wait() + context.aggregate() + assert_context(context.data, {'pre':[torch.zeros(world_size, DIM, dtype=DTYPE), a], 'post':[torch.stack(data), a]}, rank) + assert_equal(data[rank], a, rank) + +@wrap_reset +def test_all_gather_into_tensor(context, rank, world_size, async_op): + a = torch.tensor([rank+1] * DIM, dtype=DTYPE, device=f'{DEVICE}:{rank}') + # concatenation + data = torch.zeros(world_size * DIM, dtype=DTYPE, device=f'{DEVICE}:{rank}') + res = torch.tensor([[i+1] for i in range(world_size)], dtype=DTYPE, device=f'{DEVICE}:{rank}').repeat(1, DIM) + work = dist.all_gather_into_tensor(data, a, group=dist.group.WORLD, async_op=async_op) + if work: + work.wait() + context.aggregate() + assert_context(context.data, {'pre': [torch.zeros(world_size * DIM, dtype=DTYPE), a], 'post': [data, a]}, rank) + assert_equal(data, res.flatten(), rank) + + context.reset() + # concatenation + data = torch.zeros(world_size, DIM, dtype=DTYPE, device=f'{DEVICE}:{rank}') + work = dist.all_gather_into_tensor(data, a, group=dist.group.WORLD, async_op=async_op) + if work: + work.wait() + + context.aggregate() + assert_context(context.data, {'pre': [torch.zeros(world_size, DIM, dtype=DTYPE), a], 'post': [data, a]}, rank) + assert_equal(data, res, rank) + +@wrap_reset +def test_reduce(context, rank, world_size, async_op): + a = torch.tensor([rank+1] * DIM, dtype=DTYPE, device=f'{DEVICE}:{rank}') + local_a = a.clone() + dst = 0 + work = dist.reduce(a, dst, op=dist.ReduceOp.SUM, group=dist.group.WORLD, async_op=async_op) + if work: + work.wait() + context.aggregate() + total = sum([i+1 for i in range(world_size)]) + res = torch.tensor([total] * DIM, dtype=DTYPE, device=f'{DEVICE}:{rank}') + if rank == dst: + assert_context(context.data, {'pre':[local_a], 'post':[res]}, rank) + assert_equal(res, a, rank) + else: + assert_context(context.data, {'pre':[a], 'post':[a]}, rank) + assert_equal(local_a, a, rank) + +@wrap_reset +def test_all_reduce(context, rank, world_size, async_op): + repeat = 2 + for _ in range(repeat): # test aggregate + a = torch.tensor([rank+1] * DIM, dtype=DTYPE, device=f'{DEVICE}:{rank}') + local_a = a.clone() + if rank == 0: + time.sleep(6) + work = dist.all_reduce(a, op=dist.ReduceOp.SUM, group=dist.group.WORLD, async_op=async_op) + if work: + work.wait() + context.aggregate() + total = sum([i+1 for i in range(world_size)]) + res = torch.tensor([total] * DIM, dtype=DTYPE, device=f'{DEVICE}:{rank}') + assert_context(context.data, {'pre': [local_a.repeat(repeat)],'post': [res.repeat(repeat)]}, rank) + assert_equal(res, a, rank) + + +@wrap_reset +def test_reduce_scatter(context, rank, world_size, async_op): + a = torch.tensor([rank+1, rank+1], dtype=DTYPE, device=f'{DEVICE}:{rank}') + output = torch.zeros_like(a) + data = [a*(i+1) for i in range(world_size)] + work = dist.reduce_scatter(output, data, op=dist.ReduceOp.SUM, group=dist.group.WORLD, async_op=async_op) + if work: + work.wait() + context.aggregate() + total = sum([i+1 for i in range(world_size)]) + res = (rank+1) * torch.tensor([total, total], dtype=DTYPE, device=f'{DEVICE}:{rank}') + assert_context(context.data,{'pre': [torch.zeros_like(a), torch.stack(data)], 'post':[output, torch.stack(data)]}, rank) + assert_equal(res, output, rank) + + +@wrap_reset +def test_reduce_scatter_tensor(context, rank, world_size, async_op): + a = torch.tensor([rank+1] * DIM * world_size, dtype=DTYPE, device=f'{DEVICE}:{rank}') + output = torch.zeros(DIM, dtype=DTYPE, device=f'{DEVICE}:{rank}') + work = dist.reduce_scatter_tensor(output, a, op=dist.ReduceOp.SUM, group=dist.group.WORLD, async_op=async_op) + if work: + work.wait() + context.aggregate() + total = sum([i+1 for i in range(world_size)]) + res = torch.tensor([total] * DIM, dtype=DTYPE, device=f'{DEVICE}:{rank}') + assert_context(context.data,{'pre': [torch.zeros_like(a, dtype=DTYPE, device=f'{DEVICE}:{rank}'), a], 'post':[output, a]}, rank) + assert_equal(res, output, rank) + +@wrap_reset +def test_scatter(context, rank, world_size, async_op): + a = torch.tensor([rank+1] * DIM, dtype=DTYPE, device=f'{DEVICE}:{rank}') + local_a = a.clone() + src = 0 + if rank == src: + scatter_list = [10*torch.tensor([i+1] * DIM, dtype=DTYPE, device=f'{DEVICE}:{rank}') for i in range(world_size)] + else: + scatter_list = None + work = dist.scatter(a, scatter_list, src, group=dist.group.WORLD, async_op=async_op) + if work: + work.wait() + context.aggregate() + if rank == src: + assert_context(context.data, {'pre': [local_a, torch.stack(scatter_list)], 'post': [a, torch.stack(scatter_list)]}, rank) + else: + assert_context(context.data, {'pre': [local_a], 'post': [a]}, rank) + assert_equal(a, 10*torch.tensor([(rank+1)] * DIM ,dtype=DTYPE, device=f'{DEVICE}:{rank}'), rank) + +## point2point +@wrap_reset +def test_send_recv(context, rank, world_size, async_op): + """send from rank 0 to rank world_size-1""" + if world_size<2: + return + a = torch.tensor([rank+1] * DIM, dtype=DTYPE, device=f'{DEVICE}:{rank}') + local_a = a.clone() + src = 0 + dst = world_size-1 + if rank == src: + dist.send(a, dst, group=dist.group. + WORLD) + context['send'].aggregate() + assert_context(context['send'].data, {'pre': [local_a], 'post': [a]}, rank) + assert_equal(a, local_a, rank) + if rank == dst: + src_tensor = torch.tensor([src+1, src+1], dtype=DTYPE, device=f'{DEVICE}:{rank}') + dist.recv(a, src, group=dist.group. + WORLD) + context['recv'].aggregate() + assert_context(context['recv'].data, {'pre':[local_a], 'post': [a]}, rank) + assert_equal(a, src_tensor, rank) + +@wrap_reset +def test_batch_isend_irecv(context, rank, world_size, async_op): + send_tensor = torch.tensor([rank+1] * DIM, dtype=DTYPE, device=f'{DEVICE}:{rank}') + recv_tensor = torch.zeros_like(send_tensor) + send_op = dist.P2POp(dist.isend, send_tensor, (rank + 1)%world_size) + recv_op = dist.P2POp(dist.irecv, recv_tensor, (rank - 1 + world_size)%world_size) + reqs = dist.batch_isend_irecv([send_op, recv_op]) + for req in reqs: + req.wait() + context.aggregate() + assert_context(context.data, {'pre': [torch.stack([send_tensor, torch.zeros_like(send_tensor)])], 'post':[torch.stack([send_tensor, recv_tensor])]}, rank) + assert_equal( recv_tensor, torch.tensor([(rank - 1 + world_size)%world_size + 1] * DIM, device=f'{DEVICE}:{rank}'), rank) + +def test_all(monitor, rank, world_size, async_op): + cc_context = monitor.cc_context + + test_send_recv(cc_context, rank, world_size, async_op) + test_broadcast(cc_context['broadcast'], rank, async_op) + test_gather(cc_context['gather'], rank, world_size, async_op) + test_all_gather(cc_context['all_gather'], rank, world_size, async_op) + test_all_gather_into_tensor(cc_context['all_gather_into_tensor'], rank, world_size, async_op) + test_reduce(cc_context['reduce'], rank, world_size, async_op) + test_all_reduce(cc_context['all_reduce'], rank, world_size, async_op) + test_reduce_scatter(cc_context['reduce_scatter'], rank, world_size, async_op) + test_reduce_scatter_tensor(cc_context['reduce_scatter_tensor'], rank, world_size, async_op) + test_scatter(cc_context['scatter'], rank, world_size, async_op) + test_batch_isend_irecv(cc_context['batch_isend_irecv'], rank, world_size, async_op) + + +def main(rank, world_size): + + ddp_setup(rank, world_size) + if rank == 0 and DEBUG: + import debugpy + debugpy.listen(5678) + debugpy.wait_for_client() + steps = 2 + + net = Model() + monitor = TrainerMon("kj600/unittest/config_cc.json", opt_ty="Megatron_Float16OptimizerWithFloat16Params") + # monitor = None + # monitor.hook_optimizer() # to enable tb + optimizer = torch.optim.Adam(net.parameters()) + for step in range(steps): + print('setp: ', step) + test_all(monitor, rank, world_size, False) + test_all(monitor, rank, world_size, True) + optimizer.step() + + +class Model(nn.Module): + def __init__(self): + super(Model, self).__init__() + self.layer = nn.Linear(2,2) + + def forward(self, x): + return self.layer(x) + +if __name__ == '__main__': + if len(sys.argv)>1: + DEBUG = sys.argv[1] + world_size=4 + torch.manual_seed(1234) + mp.spawn(main, args=(world_size,), nprocs=world_size) + + \ No newline at end of file diff --git a/debug/accuracy_tools/kj600/kj600/unittest/test_cc_codeline_ranks.py b/debug/accuracy_tools/kj600/kj600/unittest/test_cc_codeline_ranks.py new file mode 100644 index 0000000000..d635441e15 --- /dev/null +++ b/debug/accuracy_tools/kj600/kj600/unittest/test_cc_codeline_ranks.py @@ -0,0 +1,52 @@ +import sys +sys.path.append(".") +import torch +from torch import distributed as dist +import torch.multiprocessing as mp +from kj600.module_hook import TrainerMon +from kj600.unittest.cc_utils import * + +@wrap_reset +def test_all_gather(context, rank, target_rank, world_size, async_op): + a = torch.tensor([rank+1, rank+1], dtype=torch.float32, device=f'{DEVICE}:{rank}') + data = [torch.empty_like(a) for _ in range(world_size)] + dist.all_gather(data, a, group=dist.group.WORLD, async_op=async_op) + assert_context(context.data, {}, rank) + +@wrap_reset +def test_all_reduce(context, rank, target_rank, world_size, async_op): + a = torch.tensor([rank+1, rank+1], dtype=torch.float32, device=f'{DEVICE}:{rank}') + dist.all_reduce(a, op=dist.ReduceOp.SUM, group=dist.group.WORLD, async_op=async_op) + total = sum([i+1 for i in range(world_size)]) + sum_reduced = torch.tensor([total, total], dtype=torch.float32, device=f'{DEVICE}:{rank}') + context.aggregate() + if rank in target_rank: + assert_context(context.data, {"post": [sum_reduced]}, rank) + else: + assert_context(context.data, {}, rank) + +def main(rank, world_size): + + ddp_setup(rank, world_size) + steps = 2 + async_op = False + + net = Model() + monitor = TrainerMon("kj600/unittest/config_cc_codeline_ranks.json") + target_rank = monitor.module_rank_list + # monitor = None + # monitor.hook_optimizer() # to enable tb + optimizer = torch.optim.Adam(net.parameters()) + cc_context = monitor.cc_context + for step in range(steps): + print('setp: ', step) + test_all_gather(cc_context['all_gather'], rank, target_rank, world_size, async_op) + test_all_reduce(cc_context['all_reduce'], rank, target_rank, world_size, async_op) + optimizer.step() + +if __name__ == '__main__': + world_size=2 + torch.manual_seed(1234) + mp.spawn(main, args=(world_size,), nprocs=world_size) + + \ No newline at end of file diff --git a/debug/accuracy_tools/kj600/kj600/unittest/test_cc_log_only.py b/debug/accuracy_tools/kj600/kj600/unittest/test_cc_log_only.py new file mode 100644 index 0000000000..d7508d4af5 --- /dev/null +++ b/debug/accuracy_tools/kj600/kj600/unittest/test_cc_log_only.py @@ -0,0 +1,55 @@ +import os +import sys +sys.path.append(".") +import json +import torch +from torch import distributed as dist +import torch.multiprocessing as mp +from kj600.module_hook import TrainerMon +from kj600.unittest.cc_utils import * + + +with open(os.path.join(os.path.dirname(__file__), 'expected_cc_log.json')) as f: + EXPECTED = json.load(f) + +def test_all_gather(context, rank, world_size, async_op): + a = torch.tensor([rank+1, rank+1], dtype=torch.float32, device=f'{DEVICE}:{rank}') + data = [torch.empty_like(a) for _ in range(world_size)] + dist.all_gather(data, a, group=dist.group.WORLD, async_op=async_op) + assert_context(context.data, {}, rank) + +def test_all_reduce(context, rank, world_size, async_op): + a = torch.tensor([rank+1, rank+1], dtype=torch.float32, device=f'{DEVICE}:{rank}') + dist.all_reduce(a, op=dist.ReduceOp.SUM, group=dist.group.WORLD, async_op=async_op) + assert_context(context.data, {}, rank) + + +def main(rank, world_size): + ddp_setup(rank, world_size) + steps = 3 + async_op = False + + net = Model() + monitor = TrainerMon("kj600/unittest/config_cc_logonly.json") + monitor.hook_optimizer() # to enable tb + optimizer = torch.optim.Adam(net.parameters()) + cc_context = monitor.cc_context + try: + for step in range(steps): + print('step: ', step) + test_all_gather(cc_context['all_gather'], rank, world_size, async_op) + test_all_reduce(cc_context['all_reduce'], rank, world_size, async_op) + optimizer.step() + except Exception as e: + assert step == 1 + assert e.__str__() == "exit after first step when print cc stack", e + for k in EXPECTED.keys(): + assert [';'.join(stack) for stack in EXPECTED[k]] == list(monitor.cc_logged_stack[k]) + + +if __name__ == '__main__': + world_size=2 + torch.manual_seed(1234) + mp.spawn(main, args=(world_size,), nprocs=world_size) + + \ No newline at end of file diff --git a/debug/accuracy_tools/kj600/kj600/unittest/test_database.py b/debug/accuracy_tools/kj600/kj600/unittest/test_database.py new file mode 100644 index 0000000000..a9046d9c07 --- /dev/null +++ b/debug/accuracy_tools/kj600/kj600/unittest/test_database.py @@ -0,0 +1,42 @@ +import unittest +import uuid +from datetime import datetime +from unittest import TestCase + +from sqlalchemy import inspect + +from kj600.database import Database, ExceptionMessage + + +class TestDatabase(TestCase): + def __init__(self, method_name: str): + super(TestDatabase, self).__init__(method_name) + self.db = Database('mysql+pymysql://username:password@host:port/database') + + def test_create_table(self): + self.db.create_table() + inspect_ = inspect(self.db.engine) + table_names = inspect_.get_table_names() + print(table_names) + self.assertIn("exception_message", table_names) + + def test_insert_batch(self): + self.db.create_table() + job_id = str(uuid.uuid4()) + print(job_id) + save_list = [] + exception_message_list = [ + '> Rule AnomalyTurbulence reports anomaly signal in language_model.encoder.layers.0/1/input_zeros at step 1.', + '> Rule AnomalyTurbulence reports anomaly signal in language_model.encoder.layers.0.input_norm.weight/0/exp_avg_min at step 2.', + '> Rule AnomalyTurbulence reports anomaly signal in language_model.encoder.layers.0.input_norm.weight/1/exp_avg_min at step 2.'] + for exception_message in exception_message_list: + item = {'job_id': job_id, 'message': exception_message, 'create_time': datetime.now()} + save_list.append(ExceptionMessage(**item)) + self.db.insert_batch(save_list) + find_by_job_id = self.db.find_by_job_id(job_id) + exception_messages = [item.message for item in find_by_job_id] + self.assertEqual(exception_messages, exception_message_list) + + +if __name__ == '__main__': + unittest.main() diff --git a/debug/accuracy_tools/kj600/kj600/unittest/test_features.py b/debug/accuracy_tools/kj600/kj600/unittest/test_features.py new file mode 100644 index 0000000000..bc8c6dd71a --- /dev/null +++ b/debug/accuracy_tools/kj600/kj600/unittest/test_features.py @@ -0,0 +1,33 @@ +import unittest +import torch +import torch.nn as nn +import torch_npu +from kj600.features import eff_rank + + +class TestFeatureCalculation(unittest.TestCase): + def test_effective_rank(self): + param = torch.randn(10, 10).npu() + rank = eff_rank(param) + self.assertTrue(rank.item() >= 1) + + def test_lambda_max(self): + pass + # input_dim = 10 + # hidden_dim = 100 + # output_dim = 1 + # num_samples = 100 + # X = torch.randn(num_samples, input_dim) + # network = nn.Sequential( + # nn.Linear(input_dim, hidden_dim), + # nn.ReLU(), + # nn.Linear(hidden_dim, output_dim) + # ) + # Y = network(X) + # Y.backward() + # for name, param in network.named_parameters(): + # lm = lambda_max(param) + + +if __name__ == "__main__": + unittest.main() \ No newline at end of file diff --git a/debug/accuracy_tools/kj600/kj600/unittest/test_module_hook.py b/debug/accuracy_tools/kj600/kj600/unittest/test_module_hook.py new file mode 100644 index 0000000000..f81312691d --- /dev/null +++ b/debug/accuracy_tools/kj600/kj600/unittest/test_module_hook.py @@ -0,0 +1,84 @@ +import sys +sys.path.append('./') +import argparse +import torch +try: + import torch_npu + device = torch.device('npu:0') +except ModuleNotFoundError: + device = torch.device('cpu') +import torch.nn.functional as F +from kj600.module_hook import TrainerMon # Modify PYTHONPATH to import TrainerMon +#from hook_api import reg_grad_hook, reg_grad_one_hook, reg_module_backward_hook, reg_module_forward_hook +#from torch.cuda.amp import GradScaler + +# from torch.npu.amp import GradScaler + + +# from ptdbg_ascend import PrecisionDebugger as PD +# from monitor import GradientMonitor + +# print(torch_npu.__version__) + +#debugger = PD(dump_path="./dump/", hook_name="dump", step=[1, 2, 3], enable_dataloader=False) +#debugger.configure_hook(mode="list", scope=["optim_Adam_step"], ) + +parser = argparse.ArgumentParser(prog="kj600 debug", description="kj600 sample code", epilog="") +parser.add_argument("-o", "--out_dir", type=str, default=".") +args = parser.parse_args() +DTYPE = torch.float32 + + +class Model(torch.nn.Module): + def __init__(self): + super().__init__() + self.fc = torch.nn.Linear(784, 10, dtype=DTYPE) + self.relu = torch.nn.ReLU() + + def forward(self, x): + return self.relu(self.fc(x).type(DTYPE)) + + +net = Model().to(device=device) + +config = { + "targets": { + "fc": {"input": "tuple[2]:0", "output": "tensor::"}, + "relu": {"input": "..", "output": ".."} + } +} +# reg_grad_hook(net, hook_factory=hook_factory, config=config) +# reg_grad_one_hook(net, hook=monitor_hook, config=config) +# net.fc.register_forward_hook(get_actv_hook("fc")) +# reg_module_forward_hook(net, module_fwd_hook, config) +# reg_module_backward_hook(net, module_bwd_hook, config) +optimizer = torch.optim.Adam(net.parameters(), lr=0.0001) + +hooker = TrainerMon('./kj600/unittest/config_1.json', opt_ty = 'Megatron_Float16OptimizerWithFloat16Params') +hooker.hook_modules(model=net, global_batch_size=2, dp=1, micro_batch_size=2, fwd_or_bkd=0, params_have_main_grad=False) +# hooker.hook_optimizer(optimizer) + + +class ToyDataset(torch.utils.data.Dataset): + def __init__(self): + self.data = torch.randn(16, 784, dtype=DTYPE, requires_grad=True) + self.labels = torch.randint(low=0, high=9, size=(16,)) + + def __len__(self): + return len(self.labels) + + def __getitem__(self, idx): + return self.data[idx].to(device), self.labels[idx].to(device) + +train_ds = ToyDataset() +train_loader = torch.utils.data.DataLoader(train_ds, shuffle=True, batch_size=2) + + +# scaler = GradScaler() +for (inputs, labels) in train_loader: + optimizer.zero_grad() + outputs = net(inputs) + loss = F.cross_entropy(outputs, labels) + + loss.backward() + optimizer.step() -- Gitee From cc37a3437777027ee3c0ea42f7b0f5efaad11c68 Mon Sep 17 00:00:00 2001 From: qianzhengxin Date: Wed, 17 Jul 2024 16:38:50 +0800 Subject: [PATCH 004/106] add step&rank, fix md5..., fix save_grad_direction --- debug/accuracy_tools/grad_tool/grad_ms/global_context.py | 4 ++++ debug/accuracy_tools/grad_tool/grad_ms/hook.py | 4 ++-- debug/accuracy_tools/grad_tool/grad_ms/utils.py | 9 ++++----- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/debug/accuracy_tools/grad_tool/grad_ms/global_context.py b/debug/accuracy_tools/grad_tool/grad_ms/global_context.py index 233bfb9864..c2242ffe7c 100644 --- a/debug/accuracy_tools/grad_tool/grad_ms/global_context.py +++ b/debug/accuracy_tools/grad_tool/grad_ms/global_context.py @@ -14,6 +14,8 @@ class GlobalContext: _setting = { GradConst.LEVEL: GradConst.LEVEL0, GradConst.PARAM_LIST: None, + GradConst.STEP: None, + GradConst.RANK: None, GradConst.CURRENT_STEP: 0, GradConst.BOUNDS: [-1., 0., 1.], GradConst.OUTPUT_PATH: "./grad_stat" @@ -33,6 +35,8 @@ class GlobalContext: print_warn_log("Invalid level set in config yaml file, use L0 instead.") self._set_input_list(config_dict, GradConst.PARAM_LIST, str) self._set_input_list(config_dict, GradConst.BOUNDS, float) + self._set_input_list(config_dict, GradConst.STEP, int) + self._set_input_list(config_dict, GradConst.RANK, int) output_path = config_dict.get(GradConst.OUTPUT_PATH) if output_path: try: diff --git a/debug/accuracy_tools/grad_tool/grad_ms/hook.py b/debug/accuracy_tools/grad_tool/grad_ms/hook.py index e1f78d7dbf..bc68064117 100644 --- a/debug/accuracy_tools/grad_tool/grad_ms/hook.py +++ b/debug/accuracy_tools/grad_tool/grad_ms/hook.py @@ -55,7 +55,7 @@ def hook_pynative_optimizer(opt, hook_input): level_adapted = get_adapted_level(hook_input.level) def new_construct(self, gradients): - cur_step = self.dump_step + cur_step = self.dump_step.value()[0] if grad_context.step_need_dump(cur_step) and grad_context.rank_need_dump(hook_input.rank_id): output_lines = [] for index, grad_value in enumerate(gradients): @@ -72,7 +72,7 @@ def hook_pynative_optimizer(opt, hook_input): write_csv(output_csv_path, output_lines, GradStatCsv.generate_csv_header(level_adapted, dummy_csv_input)) - self.assignadd(self.dump_step, self.global_step_increase_tensor) + self.assignadd(self.dump_step, self.global_step_increase_tensor) out = hook_input.func(gradients) return out diff --git a/debug/accuracy_tools/grad_tool/grad_ms/utils.py b/debug/accuracy_tools/grad_tool/grad_ms/utils.py index 16d312039f..a24384268a 100644 --- a/debug/accuracy_tools/grad_tool/grad_ms/utils.py +++ b/debug/accuracy_tools/grad_tool/grad_ms/utils.py @@ -7,15 +7,15 @@ from grad_tool.common.utils import print_warn_log, create_directory, change_mode level_adp = { "L0": { - "header": [GradConst.md5, GradConst.max, GradConst.min, GradConst.norm, GradConst.shape], + "header": [GradConst.MD5, GradConst.MAX, GradConst.MIN, GradConst.NORM, GradConst.SHAPE], "have_grad_direction": False }, "L1": { - "header": [GradConst.max, GradConst.min, GradConst.norm, GradConst.shape], + "header": [GradConst.MAX, GradConst.MIN, GradConst.NORM, GradConst.SHAPE], "have_grad_direction": True }, "L2": { - "header": [GradConst.distribution, GradConst.max, GradConst.min, GradConst.norm, GradConst.shape], + "header": [GradConst.DISTRIBUTION, GradConst.MAX, GradConst.MIN, GradConst.NORM, GradConst.SHAPE], "have_grad_direction": True }, } @@ -23,8 +23,7 @@ level_adp = { def save_grad_direction(param_name, grad, save_path): if not os.path.exists(save_path): create_directory(save_path) - param_grad = grad.clone().detach() - grad_direction_tensor = param_grad > 0 + grad_direction_tensor = grad > 0 grad_direction_ndarray = grad_direction_tensor.numpy() save_filepath = os.path.join(save_path, f"{param_name}.npy") -- Gitee From 93eb67e85fce23f43abcff7bf6dbc6b236e2124d Mon Sep 17 00:00:00 2001 From: Linwei-Ying Date: Thu, 18 Jul 2024 10:54:54 +0800 Subject: [PATCH 005/106] codecheck fix --- .../api_accuracy_checker/common/config.py | 15 +++++++------ .../atat/pytorch/compare/acc_compare.py | 9 +++++--- .../atat/pytorch/debugger/debugger_config.py | 5 ++--- .../pytorch/debugger/precision_debugger.py | 16 +++++++------- .../compare/single_benchmark.py | 14 +++++++----- .../test/pytorch_ut/advisor/test_advisor.py | 11 +++++----- .../common/test_common_utils.py | 22 ++++++++++++++----- 7 files changed, 55 insertions(+), 37 deletions(-) diff --git a/debug/accuracy_tools/atat/pytorch/api_accuracy_checker/common/config.py b/debug/accuracy_tools/atat/pytorch/api_accuracy_checker/common/config.py index 0aceb691b2..f92d6fd16a 100644 --- a/debug/accuracy_tools/atat/pytorch/api_accuracy_checker/common/config.py +++ b/debug/accuracy_tools/atat/pytorch/api_accuracy_checker/common/config.py @@ -14,7 +14,14 @@ class Config: config = yaml.safe_load(file) self.config = {key: self.validate(key, value) for key, value in config.items()} - def validate(self, key, value): + def __getattr__(self, item): + return self.config[item] + + def __str__(self): + return '\n'.join(f"{key}={value}" for key, value in self.config.items()) + + @staticmethod + def validate(key, value): validators = { 'white_list': list, 'error_data_path': str, @@ -37,12 +44,6 @@ class Config: f"{', '.join(invalid_api)} is not in support_wrap_ops.yaml, please check the white_list") return value - def __getattr__(self, item): - return self.config[item] - - def __str__(self): - return '\n'.join(f"{key}={value}" for key, value in self.config.items()) - cur_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) yaml_path = os.path.join(cur_path, "config.yaml") diff --git a/debug/accuracy_tools/atat/pytorch/compare/acc_compare.py b/debug/accuracy_tools/atat/pytorch/compare/acc_compare.py index 2d7bdcfff3..d042c853fb 100644 --- a/debug/accuracy_tools/atat/pytorch/compare/acc_compare.py +++ b/debug/accuracy_tools/atat/pytorch/compare/acc_compare.py @@ -29,7 +29,8 @@ from dataclasses import dataclass from atat.pytorch.compare.match import graph_mapping from atat.pytorch.compare.highlight import HighlightRules, get_header_index -from atat.pytorch.compare.npy_compare import compare_ops_apply, get_error_type, reshape_value, get_relative_err, get_error_message +from atat.pytorch.compare.npy_compare import compare_ops_apply, get_error_type, reshape_value, get_relative_err, \ + get_error_message from atat.pytorch.advisor.advisor import Advisor from atat.pytorch.common.log import logger from atat.core.common.utils import check_compare_param, add_time_with_xlsx, CompareException, CompareConst, \ @@ -799,8 +800,10 @@ def op_item_parse(item, op_name, index, item_list=[], top_bool=True): else: resolve_api_special_parameters(item, full_op_name, item_list) else: - for j in range(len(item)): - op_item_parse(item[j], full_op_name, j, item_list=item_list, top_bool=False) + # for j in range(len(item)): + # op_item_parse(item[j], full_op_name, j, item_list=item_list, top_bool=False) + for j, item_spec in enumerate(item): + op_item_parse(item_spec, full_op_name, j, item_list=item_list, top_bool=False) return item_list diff --git a/debug/accuracy_tools/atat/pytorch/debugger/debugger_config.py b/debug/accuracy_tools/atat/pytorch/debugger/debugger_config.py index 6f2bfe8551..b28d3b6e1b 100644 --- a/debug/accuracy_tools/atat/pytorch/debugger/debugger_config.py +++ b/debug/accuracy_tools/atat/pytorch/debugger/debugger_config.py @@ -46,9 +46,8 @@ class DebuggerConfig: raise ValueError("backward_input must be configured when scope contains 'backward'") if Const.BACKWARD in self.scope[0]: self.is_forward_acl_dump = False - for index in range(len(self.scope)): - # Do this replace operation to let the acl backward dump can be done in forward hook. - self.scope[index] = self.scope[index].replace(Const.BACKWARD, Const.FORWARD) + for index, scope_spec in enumerate(self.scope): + self.scope[index] = scope_spec.replace(Const.BACKWARD, Const.FORWARD) self.backward_input[self.scope[index]] = self.backward_input_list[index] seed_all(self.seed, self.is_deterministic) diff --git a/debug/accuracy_tools/atat/pytorch/debugger/precision_debugger.py b/debug/accuracy_tools/atat/pytorch/debugger/precision_debugger.py index 140d829bed..d84d535b29 100644 --- a/debug/accuracy_tools/atat/pytorch/debugger/precision_debugger.py +++ b/debug/accuracy_tools/atat/pytorch/debugger/precision_debugger.py @@ -46,6 +46,14 @@ class PrecisionDebugger: def instance(self): return self._instance + @staticmethod + def check_model_valid(model): + if not model or isinstance(model, torch.nn.Module): + return model + raise MsaccException( + MsaccException.INVALID_PARAM_ERROR, "model 参数必须是torch.nn.Module类型。" + ) + @classmethod def start(cls): instance = cls._instance @@ -72,14 +80,6 @@ class PrecisionDebugger: raise Exception("PrecisionDebugger instance is not created.") cls._instance.service.step() - @staticmethod - def check_model_valid(model): - if not model or isinstance(model, torch.nn.Module): - return model - raise MsaccException( - MsaccException.INVALID_PARAM_ERROR, "model 参数必须是torch.nn.Module类型。" - ) - def iter_tracer(func): def func_wrapper(*args, **kwargs): diff --git a/debug/accuracy_tools/atat/pytorch/free_benchmark/compare/single_benchmark.py b/debug/accuracy_tools/atat/pytorch/free_benchmark/compare/single_benchmark.py index 85aa68f13b..e4fddba987 100644 --- a/debug/accuracy_tools/atat/pytorch/free_benchmark/compare/single_benchmark.py +++ b/debug/accuracy_tools/atat/pytorch/free_benchmark/compare/single_benchmark.py @@ -28,6 +28,14 @@ class SingleCompare: tensor[inf_or_nan_mask] = 1 return tensor + @staticmethod + def compare_float_seq(actual, golden): + return math.isclose(actual, golden) + + @staticmethod + def compare_other_seq(actual, golden): + return actual == golden + def compare_dict_seq(self, actual, golden): if len(actual) != len(golden): return False @@ -76,12 +84,6 @@ class SingleCompare: return False return True - def compare_float_seq(self, actual, golden): - return math.isclose(actual, golden) - - def compare_other_seq(self, actual, golden): - return actual == golden - def _cal_compare_metrics(self, actual, golden): diff_value = TorchC.subtract(actual, golden) diff_abs = TorchC.abs(diff_value) diff --git a/debug/accuracy_tools/atat/test/pytorch_ut/advisor/test_advisor.py b/debug/accuracy_tools/atat/test/pytorch_ut/advisor/test_advisor.py index 78e5b489e7..b3e9658a0d 100644 --- a/debug/accuracy_tools/atat/test/pytorch_ut/advisor/test_advisor.py +++ b/debug/accuracy_tools/atat/test/pytorch_ut/advisor/test_advisor.py @@ -2,6 +2,7 @@ import difflib import os import shutil import unittest +import logging from unittest.mock import patch import pandas @@ -70,11 +71,11 @@ class TestAdvisor(unittest.TestCase): output_content = out_file.read().splitlines() result = list(difflib.unified_diff(standard_content, output_content, n=0)) if result: - print('\n\n-------------------------------------------------------------------------', flush=True) - print(f'[ERROR] {output_file.replace(self.output_path, "")} advisor summary are inconsistent.', - flush=True) - print('\n'.join(result), flush=True) - print('-------------------------------------------------------------------------', flush=True) + logging.basicConfig(level=logging.INFO) + logging.info('\n\n-------------------------------------------------------------------------') + logging.error(f'[ERROR] {output_file.replace(self.output_path, "")} advisor summary are inconsistent.') + logging.error('\n'.join(result)) + logging.info('\n\n-------------------------------------------------------------------------') self.has_error = True diff --git a/debug/accuracy_tools/atat/test/pytorch_ut/api_accuracy_checker/common/test_common_utils.py b/debug/accuracy_tools/atat/test/pytorch_ut/api_accuracy_checker/common/test_common_utils.py index 16d0c0bc12..86ac58b2a5 100644 --- a/debug/accuracy_tools/atat/test/pytorch_ut/api_accuracy_checker/common/test_common_utils.py +++ b/debug/accuracy_tools/atat/test/pytorch_ut/api_accuracy_checker/common/test_common_utils.py @@ -1,7 +1,13 @@ +import os +import json +import csv import unittest from unittest.mock import patch -from atat.pytorch.api_accuracy_checker.common.utils import * +from atat.core.common.utils import CompareException +from atat.core.common.file_check import create_directory +from atat.pytorch.api_accuracy_checker.common.utils import get_json_contents, write_csv, check_need_convert, \ + check_object_type, check_file_or_directory_path, get_file_content_bytes, api_info_preprocess class TestUtils(unittest.TestCase): @@ -18,8 +24,11 @@ class TestUtils(unittest.TestCase): file_name = 'test.json' fd = os.open(file_name, os.O_CREAT | os.O_WRONLY | os.O_TRUNC, 0o644) - with os.fdopen(fd, 'w') as f: - json.dump(test_dict, f) + try: + with os.fdopen(fd, 'w') as f: + json.dump(test_dict, f) + finally: + os.close(fd) self.assertEqual(get_json_contents(file_name), test_dict) os.remove(file_name) @@ -57,8 +66,11 @@ class TestUtils(unittest.TestCase): def test_get_file_content_bytes(self): fd = os.open('test.txt', os.O_CREAT | os.O_WRONLY | os.O_TRUNC, 0o644) - with os.fdopen(fd, 'w') as f: - f.write("Hello, World!") + try: + with os.fdopen(fd, 'w') as f: + f.write("Hello, World!") + finally: + os.close(fd) self.assertEqual(get_file_content_bytes('test.txt'), b"Hello, World!") os.remove('test.txt') -- Gitee From c4148dcc2f0098840ed22b36c05c84a32fac5695 Mon Sep 17 00:00:00 2001 From: Linwei-Ying Date: Thu, 18 Jul 2024 10:58:24 +0800 Subject: [PATCH 006/106] comment deleted --- debug/accuracy_tools/atat/pytorch/compare/acc_compare.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/debug/accuracy_tools/atat/pytorch/compare/acc_compare.py b/debug/accuracy_tools/atat/pytorch/compare/acc_compare.py index d042c853fb..8940bf8e37 100644 --- a/debug/accuracy_tools/atat/pytorch/compare/acc_compare.py +++ b/debug/accuracy_tools/atat/pytorch/compare/acc_compare.py @@ -800,8 +800,6 @@ def op_item_parse(item, op_name, index, item_list=[], top_bool=True): else: resolve_api_special_parameters(item, full_op_name, item_list) else: - # for j in range(len(item)): - # op_item_parse(item[j], full_op_name, j, item_list=item_list, top_bool=False) for j, item_spec in enumerate(item): op_item_parse(item_spec, full_op_name, j, item_list=item_list, top_bool=False) return item_list -- Gitee From d53ddae2caa9b1575c5f12ecd273da27e82dcde4 Mon Sep 17 00:00:00 2001 From: Linwei-Ying Date: Thu, 18 Jul 2024 11:24:34 +0800 Subject: [PATCH 007/106] utbug fix --- .../common/test_common_utils.py | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/debug/accuracy_tools/atat/test/pytorch_ut/api_accuracy_checker/common/test_common_utils.py b/debug/accuracy_tools/atat/test/pytorch_ut/api_accuracy_checker/common/test_common_utils.py index 86ac58b2a5..e49367fd1a 100644 --- a/debug/accuracy_tools/atat/test/pytorch_ut/api_accuracy_checker/common/test_common_utils.py +++ b/debug/accuracy_tools/atat/test/pytorch_ut/api_accuracy_checker/common/test_common_utils.py @@ -24,11 +24,8 @@ class TestUtils(unittest.TestCase): file_name = 'test.json' fd = os.open(file_name, os.O_CREAT | os.O_WRONLY | os.O_TRUNC, 0o644) - try: - with os.fdopen(fd, 'w') as f: - json.dump(test_dict, f) - finally: - os.close(fd) + with os.fdopen(fd, 'w') as f: + json.dump(test_dict, f) self.assertEqual(get_json_contents(file_name), test_dict) os.remove(file_name) @@ -66,11 +63,8 @@ class TestUtils(unittest.TestCase): def test_get_file_content_bytes(self): fd = os.open('test.txt', os.O_CREAT | os.O_WRONLY | os.O_TRUNC, 0o644) - try: - with os.fdopen(fd, 'w') as f: - f.write("Hello, World!") - finally: - os.close(fd) + with os.fdopen(fd, 'w') as f: + f.write("Hello, World!") self.assertEqual(get_file_content_bytes('test.txt'), b"Hello, World!") os.remove('test.txt') -- Gitee From cb99129e293c9a6669337380245929bc62264ed1 Mon Sep 17 00:00:00 2001 From: l30036321 Date: Fri, 19 Jul 2024 09:33:50 +0800 Subject: [PATCH 008/106] fix analyze torch size bug --- .../atat/core/data_dump/data_processor/base.py | 4 +++- .../core/data_dump/data_processor/pytorch_processor.py | 7 ++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/debug/accuracy_tools/atat/core/data_dump/data_processor/base.py b/debug/accuracy_tools/atat/core/data_dump/data_processor/base.py index 208c053192..ba9bfaab3d 100644 --- a/debug/accuracy_tools/atat/core/data_dump/data_processor/base.py +++ b/debug/accuracy_tools/atat/core/data_dump/data_processor/base.py @@ -142,9 +142,11 @@ class BaseDataProcessor: resutl_dict[k] = cls.recursive_apply_transform(arg, transform) cls._recursive_key_stack.pop() return resutl_dict - else: + elif args is not None: logger.warning(f"Data type {type(args)} is not supported.") return None + else: + return None def if_return_forward_new_output(self): return self._return_forward_new_output diff --git a/debug/accuracy_tools/atat/core/data_dump/data_processor/pytorch_processor.py b/debug/accuracy_tools/atat/core/data_dump/data_processor/pytorch_processor.py index cf3c5ebe58..6d84d9e1f4 100644 --- a/debug/accuracy_tools/atat/core/data_dump/data_processor/pytorch_processor.py +++ b/debug/accuracy_tools/atat/core/data_dump/data_processor/pytorch_processor.py @@ -76,6 +76,10 @@ class PytorchDataProcessor(BaseDataProcessor): tensor_stat.mean = torch._C._VariableFunctionsClass.mean(data_clone).item() tensor_stat.norm = torch._C._VariableFunctionsClass.norm(data_clone).item() return tensor_stat + + @staticmethod + def _analyze_torch_size(arg): + return {"type": "torch.Size", "value": list(arg)} @classmethod def get_special_types(cls): @@ -98,9 +102,6 @@ class PytorchDataProcessor(BaseDataProcessor): def analyze_element(self, element): return self.recursive_apply_transform(element, self.analyze_single_element) - def _analyze_torch_size(arg): - return {"type": "torch.Size", "value": list(arg)} - def _analyze_tensor(self, tensor, suffix): tensor_stat = self.get_stat_info(tensor) tensor_json = {} -- Gitee From 15bfd41bd434943d25bebd0cc037d406d6b734fa Mon Sep 17 00:00:00 2001 From: curry3 <485078529@qq.com> Date: Mon, 15 Jul 2024 20:55:07 +0800 Subject: [PATCH 009/106] =?UTF-8?q?=E3=80=90=E4=BC=98=E5=8C=96=E3=80=91?= =?UTF-8?q?=E8=A1=A5=E5=85=85atat=E7=9A=84ut=E7=94=A8=E4=BE=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../atat/core/common/exceptions.py | 1 + .../test/core_ut/{ => common}/test_utils.py | 0 .../core_ut/data_dump/test_data_collector.py | 47 ++++++ .../atat/test/core_ut/data_dump/test_scope.py | 151 ++++++++++++++++++ .../pytorch_ut/functional/test_dump_module.py | 15 ++ .../atat/test/pytorch_ut/test_pt_config.py | 33 +++- .../atat/test/pytorch_ut/test_service.py | 59 +++++++ 7 files changed, 305 insertions(+), 1 deletion(-) rename debug/accuracy_tools/atat/test/core_ut/{ => common}/test_utils.py (100%) create mode 100644 debug/accuracy_tools/atat/test/core_ut/data_dump/test_data_collector.py create mode 100644 debug/accuracy_tools/atat/test/core_ut/data_dump/test_scope.py create mode 100644 debug/accuracy_tools/atat/test/pytorch_ut/functional/test_dump_module.py create mode 100644 debug/accuracy_tools/atat/test/pytorch_ut/test_service.py diff --git a/debug/accuracy_tools/atat/core/common/exceptions.py b/debug/accuracy_tools/atat/core/common/exceptions.py index f6b7c19ba3..df89699ce8 100644 --- a/debug/accuracy_tools/atat/core/common/exceptions.py +++ b/debug/accuracy_tools/atat/core/common/exceptions.py @@ -1,6 +1,7 @@ class CodedException(Exception): def __init__(self, code, error_info=''): super().__init__() + self.code = code self.error_info = self.err_strs.get(code) + error_info def __str__(self): diff --git a/debug/accuracy_tools/atat/test/core_ut/test_utils.py b/debug/accuracy_tools/atat/test/core_ut/common/test_utils.py similarity index 100% rename from debug/accuracy_tools/atat/test/core_ut/test_utils.py rename to debug/accuracy_tools/atat/test/core_ut/common/test_utils.py diff --git a/debug/accuracy_tools/atat/test/core_ut/data_dump/test_data_collector.py b/debug/accuracy_tools/atat/test/core_ut/data_dump/test_data_collector.py new file mode 100644 index 0000000000..625ab1337e --- /dev/null +++ b/debug/accuracy_tools/atat/test/core_ut/data_dump/test_data_collector.py @@ -0,0 +1,47 @@ +import unittest +from unittest.mock import patch, mock_open, MagicMock + +from atat.core.common.utils import Const +from atat.core.data_dump.data_collector import DataCollector +from atat.pytorch.debugger.debugger_config import DebuggerConfig +from atat.pytorch.pt_config import parse_json_config + + +class TestDataCollector(unittest.TestCase): + def setUp(self): + mock_json_data = { + "dump_path": "./ut_dump", + } + with patch("atat.pytorch.pt_config.FileOpen", mock_open(read_data='')), \ + patch("atat.pytorch.pt_config.json.load", return_value=mock_json_data): + common_config, task_config = parse_json_config("./config.json", Const.STATISTICS) + config = DebuggerConfig(common_config, task_config, Const.STATISTICS, "./ut_dump", "L1") + self.data_collector = DataCollector(config) + + def test_update_data(self): + self.data_collector.config.task = Const.OVERFLOW_CHECK + self.data_collector.data_processor.has_overflow = True + with patch("atat.core.data_dump.json_writer.DataWriter.update_data", return_value=None): + result1 = self.data_collector.update_data("test message", "test1:") + self.assertEqual(result1, "test1:Overflow detected.") + + self.data_collector.data_processor.has_overflow = False + result2 = self.data_collector.update_data("test message", "test2:") + self.assertEqual(result2, "test2:No Overflow, OK.") + + self.data_collector.config.task = Const.STATISTICS + self.data_collector.data_processor.has_overflow = True + with patch("atat.core.data_dump.json_writer.DataWriter.update_data", return_value=None): + result3 = self.data_collector.update_data("test message", "test3") + self.assertEqual(result3, "test3") + + def test_pre_forward_data_collect(self): + self.data_collector.check_scope_and_pid = MagicMock(return_value=False) + self.data_collector.is_inplace = MagicMock(return_value=False) + self.data_collector.data_processor.analyze_pre_forward = MagicMock() + name = "TestModule.forward" + pid = 123 + + self.data_collector.pre_forward_data_collect(name, None, pid, None) + self.data_collector.check_scope_and_pid.assert_called_once_with( + self.data_collector.scope, "TestModule.backward", 123) diff --git a/debug/accuracy_tools/atat/test/core_ut/data_dump/test_scope.py b/debug/accuracy_tools/atat/test/core_ut/data_dump/test_scope.py new file mode 100644 index 0000000000..3ef7bd8c7a --- /dev/null +++ b/debug/accuracy_tools/atat/test/core_ut/data_dump/test_scope.py @@ -0,0 +1,151 @@ +import unittest +from unittest.mock import MagicMock + +from atat.core.common.exceptions import ScopeException +from atat.core.data_dump.scope import ( + build_scope, + build_range_scope_according_to_scope_name, + BaseScope, + ListScope, + RangeScope, + APIRangeScope, + ModuleRangeScope +) + + +class TestBuildScope(unittest.TestCase): + def test_build_scope(self): + scope_class = MagicMock() + result1 = build_scope(scope_class, None, None) + self.assertEqual(result1, None) + + api_list = ['api1', 'api2'] + result2 = build_scope(scope_class, None, api_list) + self.assertEqual(result2, scope_class.return_value) + + def test_build_range_scope_according_to_scope_name(self): + result = build_range_scope_according_to_scope_name([], []) + self.assertIsInstance(result, APIRangeScope) + + +class TestBaseScope(unittest.TestCase): + def test_rectify_args(self): + scope = [] + api_list = "invalid_api_list" + with self.assertRaises(ScopeException) as context: + BaseScope.rectify_args(scope, api_list) + self.assertEqual(context.exception.code, ScopeException.InvalidApiStr) + + api_list = [1, 2, 3] + with self.assertRaises(ScopeException) as context: + BaseScope.rectify_args(scope, api_list) + self.assertEqual(context.exception.code, ScopeException.InvalidApiStr) + + scope = "module1" + api_list = [] + + expected_scope = ["module1"] + expected_api_list = [] + result_scope, result_api_list = BaseScope.rectify_args(scope, api_list) + self.assertEqual(result_scope, expected_scope) + self.assertEqual(result_api_list, expected_api_list) + + scope = 123 + api_list = [] + with self.assertRaises(ScopeException) as context: + BaseScope.rectify_args(scope, api_list) + self.assertEqual(context.exception.code, ScopeException.InvalidScope) + + scope = ["module1", 2, "module3"] + api_list = [] + with self.assertRaises(ScopeException) as context: + BaseScope.rectify_args(scope, api_list) + self.assertEqual(context.exception.code, ScopeException.InvalidScope) + + +class TestListScope(unittest.TestCase): + def test_rectify_args(self): + scope = ["module1"] + api_list = ["api1"] + with self.assertRaises(ScopeException) as context: + ListScope.rectify_args(scope, api_list) + self.assertEqual(context.exception.code, ScopeException.ArgConflict) + + def test_check(self): + list_scope = ListScope([], []) + module_name = "module1" + result = list_scope.check(module_name) + self.assertTrue(result) + + list_scope = ListScope(["module1"], []) + module_name = "module1" + result = list_scope.check(module_name) + self.assertTrue(result) + + list_scope = ListScope(["module1"], []) + module_name = "module2" + result = list_scope.check(module_name) + self.assertFalse(result) + + +class TestRangeScope(unittest.TestCase): + def test_rectify_args(self): + scope = ["module1", "module2", "module3"] + with self.assertRaises(ScopeException) as context: + RangeScope.rectify_args(scope, []) + self.assertEqual(context.exception.code, ScopeException.InvalidScope) + + scope = ["module1"] + expected_scope = ["module1", "module1"] + result_scope, result_api_list = RangeScope.rectify_args(scope, []) + self.assertEqual(result_scope, expected_scope) + + +class TestAPIRangeScope(unittest.TestCase): + def test_check_scope_is_valid(self): + api_range_scope = APIRangeScope([], []) + result = api_range_scope.check_scope_is_valid() + self.assertTrue(result) + + def test_check(self): + api_range_scope = APIRangeScope([], []) + api_name = "api1" + result = api_range_scope.check(api_name) + self.assertTrue(result) + + +class TestModuleRangeScope(unittest.TestCase): + def test_check_scope_is_valid(self): + module_range_scope = ModuleRangeScope([], []) + result = module_range_scope.check_scope_is_valid() + self.assertTrue(result) + + def test_begin_module(self): + module_range_scope = ModuleRangeScope(["module1", "module2"], []) + module_name = "module1" + module_range_scope.begin_module(module_name) + self.assertTrue(module_range_scope.in_scope) + + module_range_scope = ModuleRangeScope(["module1", "module2"], []) + module_name = "module3" + module_range_scope.begin_module(module_name) + self.assertFalse(module_range_scope.in_scope) + + def test_end_module(self): + module_range_scope = ModuleRangeScope(["module1", "module2"], []) + module_name = "module2" + module_range_scope.in_scope = True + module_range_scope.end_module(module_name) + self.assertFalse(module_range_scope.in_scope) + + module_range_scope = ModuleRangeScope(["module1", "module2"], []) + module_name = "module3" + module_range_scope.in_scope = True + module_range_scope.end_module(module_name) + self.assertTrue(module_range_scope.in_scope) + + def test_check(self): + module_range_scope = ModuleRangeScope([], []) + module_name = "module1" + result = module_range_scope.check(module_name) + self.assertTrue(result) diff --git a/debug/accuracy_tools/atat/test/pytorch_ut/functional/test_dump_module.py b/debug/accuracy_tools/atat/test/pytorch_ut/functional/test_dump_module.py new file mode 100644 index 0000000000..78b8b6bcc5 --- /dev/null +++ b/debug/accuracy_tools/atat/test/pytorch_ut/functional/test_dump_module.py @@ -0,0 +1,15 @@ +import unittest + +import torch.nn as nn +from atat.pytorch import PrecisionDebugger +from atat.pytorch.functional.dump_module import module_dump, module_count + + +class TestDumpModule(unittest.TestCase): + def setUp(self): + self.module = nn.Linear(in_features=8, out_features=4) + + def test_module_dump(self): + PrecisionDebugger(dump_path="./dump") + module_dump(self.module, "TestModule") + self.assertTrue("TestModule" in module_count) diff --git a/debug/accuracy_tools/atat/test/pytorch_ut/test_pt_config.py b/debug/accuracy_tools/atat/test/pytorch_ut/test_pt_config.py index fa52fe0e1b..f36e015bfe 100644 --- a/debug/accuracy_tools/atat/test/pytorch_ut/test_pt_config.py +++ b/debug/accuracy_tools/atat/test/pytorch_ut/test_pt_config.py @@ -2,7 +2,7 @@ from unittest import TestCase from unittest.mock import patch, mock_open from atat.core.common.const import Const -from atat.pytorch.pt_config import parse_json_config +from atat.pytorch.pt_config import parse_json_config, parse_task_config class TestPtConfig(TestCase): @@ -36,3 +36,34 @@ class TestPtConfig(TestCase): common_config, task_config = parse_json_config(None, Const.TENSOR) self.assertEqual(common_config.task, Const.STATISTICS) self.assertEqual(task_config.file_format, "npy") + + def test_parse_task_config(self): + overflow_check_config = { + "overflow_check": { + "overflow_nums": 1, + "check_mode": "all" + } + } + result = parse_task_config(Const.OVERFLOW_CHECK, overflow_check_config) + self.assertEqual(result.overflow_num, 1) + self.assertEqual(result.check_mode, "all") + + free_benchmark_config = { + "free_benchmark": { + "scope": [], + "list": ["conv2d"], + "fuzz_device": "npu", + "pert_mode": "improve_precision", + "handler_type": "check", + "fuzz_level": "L1", + "fuzz_stage": "forward", + "if_preheat": False, + "preheat_step": 15, + "max_sample": 20 + } + } + result = parse_task_config(Const.FREE_BENCHMARK, free_benchmark_config) + self.assertEqual(result.pert_mode, "improve_precision") + self.assertEqual(result.handler_type, "check") + self.assertEqual(result.preheat_step, 15) + self.assertEqual(result.max_sample, 20) diff --git a/debug/accuracy_tools/atat/test/pytorch_ut/test_service.py b/debug/accuracy_tools/atat/test/pytorch_ut/test_service.py new file mode 100644 index 0000000000..b36be15b85 --- /dev/null +++ b/debug/accuracy_tools/atat/test/pytorch_ut/test_service.py @@ -0,0 +1,59 @@ +import unittest +from unittest.mock import patch, mock_open + +import torch.nn as nn +from atat.core.common.utils import Const +from atat.pytorch.debugger.debugger_config import DebuggerConfig +from atat.pytorch.pt_config import parse_json_config +from atat.pytorch.service import Service + + +class TestService(unittest.TestCase): + def setUp(self): + mock_json_data = { + "dump_path": "./dump/", + } + with patch("atat.pytorch.pt_config.FileOpen", mock_open(read_data='')), \ + patch("atat.pytorch.pt_config.json.load", return_value=mock_json_data): + common_config, task_config = parse_json_config("./config.json", Const.STATISTICS) + self.config = DebuggerConfig(common_config, task_config, Const.STATISTICS, "./ut_dump", "L1") + self.service = Service(self.config) + + def test_start(self): + with patch("atat.pytorch.service.get_rank_if_initialized", return_value=0), \ + patch("atat.pytorch.service.Service.create_dirs", return_value=None): + self.service.start(None) + self.assertEqual(self.service.current_rank, 0) + + def test_stop_and_step(self): + with patch("atat.core.data_dump.data_collector.DataCollector.write_json", return_value=None): + self.service.stop() + self.assertFalse(self.service.switch) + + self.service.step() + self.assertEqual(self.service.current_iter, 1) + + def test_register_hook_new(self): + class TestModule(nn.Module): + def __init__(self) -> None: + super().__init__() + self.linear = nn.Linear(in_features=8, out_features=4) + + def forward(self, x): + x = self.linear(x) + return x + + self.service.model = TestModule() + self.config.level = "L0" + with patch("atat.pytorch.service.logger.info_on_rank_0") as mock_logger, \ + patch("atat.pytorch.service.remove_dropout", return_value=None): + self.service.register_hook_new() + self.assertEqual(mock_logger.call_count, 2) + + def test_create_dirs(self): + with patch("atat.pytorch.service.Path.mkdir", return_value=None), \ + patch("atat.core.common.file_check.FileChecker.common_check", return_value=None), \ + patch("atat.core.data_dump.data_collector.DataCollector.update_dump_paths", + return_value=None): + self.service.create_dirs() + self.assertEqual(self.service.dump_iter_dir, "./ut_dump/step0") -- Gitee From 5d93a4ecb4d544a248f085802899b20c6a91b20a Mon Sep 17 00:00:00 2001 From: Linwei-Ying Date: Fri, 19 Jul 2024 11:24:32 +0800 Subject: [PATCH 010/106] owners update --- OWNERS | 37 ++++++++----------------------------- 1 file changed, 8 insertions(+), 29 deletions(-) diff --git a/OWNERS b/OWNERS index 7b721dd643..2e949debf1 100644 --- a/OWNERS +++ b/OWNERS @@ -13,35 +13,14 @@ approvers: - kun_8 - binghamhuang reviewers: -- leo920320 -- wo-wenjie -- ma-dongfang -- wuyulong11 -- alysongirl -- wangchao285 -- brightlyking -- chenhao_1209 -- feng123www -- zhang-mingyu-0813 -- snowflakephoenix -- Seanesmhxocism -- augboost -- fanxiaotong1995 -- sunboquan -- kun_8 -- Martin-M -- ly-qianxiao -- yang-minghai22 -- hu-xiao-bo - lv-kaimeng - litian_drinksnow -- blian -- cycoe -- machj -- zhengweifeng6 -- gong-siwei -- uniteone - binghamhuang -- wjchuee -- zhou-xianqi -- stby11 \ No newline at end of file +- wo-wenjie +- ly-qianxiao +- leo920320 +- sunboquan +- stby +- Seanesmhxocism +- TAJh +- czr9775 \ No newline at end of file -- Gitee From f5e368dffe3039ecc0d1b6ce8ea4bf3b723adee2 Mon Sep 17 00:00:00 2001 From: qianzhengxin Date: Fri, 19 Jul 2024 11:26:46 +0800 Subject: [PATCH 011/106] fix generate_csv_content, use register_forward_pre_hook --- .../grad_tool/grad_ms/grad_stat_csv.py | 6 +++--- debug/accuracy_tools/grad_tool/grad_ms/hook.py | 13 +++++-------- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/debug/accuracy_tools/grad_tool/grad_ms/grad_stat_csv.py b/debug/accuracy_tools/grad_tool/grad_ms/grad_stat_csv.py index 791553bb11..1ebaa65827 100644 --- a/debug/accuracy_tools/grad_tool/grad_ms/grad_stat_csv.py +++ b/debug/accuracy_tools/grad_tool/grad_ms/grad_stat_csv.py @@ -82,9 +82,9 @@ class CsvDistribution(CsvItem): grad = grad.to(mindspore.float32) element_num = grad.numel() grad_equal_0_num = (grad == 0).sum().item() - bound = Tensor(bounds) - bucketsize_result = ops.bucketize(grad, bound) - interval_nums = [(bucketsize_result == i).sum().item() for i in range(len(bound) + 1)] + bucketsize_result = ops.bucketize(grad.float(), bounds) + bucketsize_result = bucketsize_result.astype(mindspore.int8) + interval_nums = [(bucketsize_result == i).sum().item() for i in range(len(bounds) + 1)] interval_nums.append(grad_equal_0_num) return_list = [x / element_num if element_num != 0 else 0 for x in interval_nums] return return_list diff --git a/debug/accuracy_tools/grad_tool/grad_ms/hook.py b/debug/accuracy_tools/grad_tool/grad_ms/hook.py index bc68064117..2d6034c589 100644 --- a/debug/accuracy_tools/grad_tool/grad_ms/hook.py +++ b/debug/accuracy_tools/grad_tool/grad_ms/hook.py @@ -54,8 +54,9 @@ def hook_graph_mode_optimizer(opt, hook_input): def hook_pynative_optimizer(opt, hook_input): level_adapted = get_adapted_level(hook_input.level) - def new_construct(self, gradients): - cur_step = self.dump_step.value()[0] + def hook_fn(cell, input): + gradients, = input + cur_step = grad_context.get_context(GradConst.CURRENT_STEP) if grad_context.step_need_dump(cur_step) and grad_context.rank_need_dump(hook_input.rank_id): output_lines = [] for index, grad_value in enumerate(gradients): @@ -71,13 +72,9 @@ def hook_pynative_optimizer(opt, hook_input): dummy_csv_input = CsvInput(None, None, hook_input.bounds) write_csv(output_csv_path, output_lines, GradStatCsv.generate_csv_header(level_adapted, dummy_csv_input)) + grad_context.update_step() - self.assignadd(self.dump_step, self.global_step_increase_tensor) - out = hook_input.func(gradients) - return out - - opt.dump_step = Parameter(initializer(0, [1], ms.int32), name="dump_step") - opt.construct = new_construct.__get__(opt, type(opt)) + opt.register_forward_pre_hook(hook_fn) def hook_optimizer(opt: Optimizer): -- Gitee From 7abdd558646168a7acef2926217fcc6d46c66678 Mon Sep 17 00:00:00 2001 From: Linwei-Ying Date: Fri, 19 Jul 2024 14:49:36 +0800 Subject: [PATCH 012/106] import fix --- .../api_accuracy_checker/common/test_common_utils.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/debug/accuracy_tools/atat/test/pytorch_ut/api_accuracy_checker/common/test_common_utils.py b/debug/accuracy_tools/atat/test/pytorch_ut/api_accuracy_checker/common/test_common_utils.py index e49367fd1a..11696a0d36 100644 --- a/debug/accuracy_tools/atat/test/pytorch_ut/api_accuracy_checker/common/test_common_utils.py +++ b/debug/accuracy_tools/atat/test/pytorch_ut/api_accuracy_checker/common/test_common_utils.py @@ -4,10 +4,7 @@ import csv import unittest from unittest.mock import patch -from atat.core.common.utils import CompareException -from atat.core.common.file_check import create_directory -from atat.pytorch.api_accuracy_checker.common.utils import get_json_contents, write_csv, check_need_convert, \ - check_object_type, check_file_or_directory_path, get_file_content_bytes, api_info_preprocess +from atat.pytorch.api_accuracy_checker.common.utils import * class TestUtils(unittest.TestCase): -- Gitee From 72cc089b6a7de949058e61e91334d1b911588e68 Mon Sep 17 00:00:00 2001 From: Linwei-Ying Date: Fri, 19 Jul 2024 14:50:41 +0800 Subject: [PATCH 013/106] import fix --- .../api_accuracy_checker/common/test_common_utils.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/debug/accuracy_tools/atat/test/pytorch_ut/api_accuracy_checker/common/test_common_utils.py b/debug/accuracy_tools/atat/test/pytorch_ut/api_accuracy_checker/common/test_common_utils.py index 11696a0d36..16d0c0bc12 100644 --- a/debug/accuracy_tools/atat/test/pytorch_ut/api_accuracy_checker/common/test_common_utils.py +++ b/debug/accuracy_tools/atat/test/pytorch_ut/api_accuracy_checker/common/test_common_utils.py @@ -1,6 +1,3 @@ -import os -import json -import csv import unittest from unittest.mock import patch -- Gitee From e324262755c29ebf48b0a68fd3fe0e70167e1483 Mon Sep 17 00:00:00 2001 From: qianzhengxin Date: Fri, 19 Jul 2024 16:34:19 +0800 Subject: [PATCH 014/106] fix cleancode --- .../grad_tool/grad_ms/global_context.py | 17 ++++++++--------- debug/accuracy_tools/grad_tool/grad_ms/utils.py | 2 +- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/debug/accuracy_tools/grad_tool/grad_ms/global_context.py b/debug/accuracy_tools/grad_tool/grad_ms/global_context.py index c2242ffe7c..d44bea52c7 100644 --- a/debug/accuracy_tools/grad_tool/grad_ms/global_context.py +++ b/debug/accuracy_tools/grad_tool/grad_ms/global_context.py @@ -59,6 +59,14 @@ class GlobalContext: def update_step(self): self._setting[GradConst.CURRENT_STEP] += 1 + def step_need_dump(self, step): + dump_step_list = self.get_context(GradConst.STEP) + return (not dump_step_list) or (step in dump_step_list) + + def rank_need_dump(self, rank): + dump_rank_list = self.get_context(GradConst.RANK) + return (not dump_rank_list) or (rank in dump_rank_list) + def _set_input_list(self, config_dict: Dict, name: str, dtype: Union[int, str, float]): value = config_dict.get(name) if dtype == int: @@ -76,13 +84,4 @@ class GlobalContext: else: print_warn_log(f"{name} is None or not a list with valid items, use default value.") - def step_need_dump(self, step): - dump_step_list = self.get_context(GradConst.STEP) - return (not dump_step_list) or (step in dump_step_list) - - def rank_need_dump(self, rank): - dump_rank_list = self.get_context(GradConst.RANK) - return (not dump_rank_list) or (rank in dump_rank_list) - - grad_context = GlobalContext() diff --git a/debug/accuracy_tools/grad_tool/grad_ms/utils.py b/debug/accuracy_tools/grad_tool/grad_ms/utils.py index a24384268a..41bd54b505 100644 --- a/debug/accuracy_tools/grad_tool/grad_ms/utils.py +++ b/debug/accuracy_tools/grad_tool/grad_ms/utils.py @@ -34,5 +34,5 @@ def get_adapted_level(level: str): if level == GradConst.LEVEL3: print_warn_log(f"In mindpsore pynative mode, only 'L0', 'L1' and 'L2' are supported, use L0 instead") level = GradConst.LEVEL0 - level_adapted = level_adp[level] + level_adapted = level_adp.get(level) return level_adapted \ No newline at end of file -- Gitee From 65b1bf145d0f4ce77483f744e6cf5fb2c1dfc387 Mon Sep 17 00:00:00 2001 From: Linwei-Ying Date: Fri, 19 Jul 2024 18:24:47 +0800 Subject: [PATCH 015/106] atat to msprobe1 --- .../pytorch/debugger/precision_debugger.py | 95 ------------------ .../accuracy_tools/{atat => msprobe}/atat.py | 0 .../config/img/free_benchmark.png | Bin .../core/data_dump/json_writer.py | 0 .../mindspore/debugger/precision_debugger.py | 32 ++++++ .../{atat => msprobe}/mindspore/doc/dump.md | 0 .../mindspore/dump/api_kbk_dump.py | 0 .../mindspore/dump/dump_tool_factory.py | 0 .../overflow_check_tool_factory.py | 0 .../pytorch/advisor/advisor_result.py | 0 .../pytorch/api_accuracy_checker/.keep | 0 .../pytorch/api_accuracy_checker/common/.keep | 0 .../api_accuracy_checker/compare/algorithm.py | 0 .../compare/api_precision_compare.py | 0 .../compare/api_precision_standard.yaml | 0 .../api_accuracy_checker/compare/compare.py | 0 .../pytorch/api_accuracy_checker/run_ut/.keep | 0 .../run_ut/run_overflow_check.py | 0 .../api_accuracy_checker/run_ut/run_ut.py | 0 .../run_ut/torch_ut_setting.json | 0 .../{atat => msprobe}/pytorch/common/log.py | 0 .../pytorch/common/parse_json.py | 0 .../pytorch/compare/mapping.yaml | 0 .../pytorch/compare/match.py | 0 .../pytorch/compare/npy_compare.py | 0 .../pytorch/doc/img/BLOOM-7B_1.png | Bin .../pytorch/doc/img/BLOOM-7B_2.png | Bin .../pytorch/doc/img/BLOOM-7B_3.png | Bin .../pytorch/doc/img/BLOOM-7B_4.png | Bin .../pytorch/doc/img/GPT-3_2.png | Bin .../pytorch/doc/img/GPT-3_4.png | Bin .../pytorch/doc/img/GPT-3_8.png | Bin .../pytorch/doc/img/YOLOV5S_1.png | Bin .../pytorch/doc/img/YOLOV5S_2.png | Bin .../pytorch/doc/parse_tool.md | 0 .../pytorch/doc/ptdbg_ascend_overview.md | 0 .../pytorch/doc/ptdbg_ascend_quickstart.md | 0 .../pytorch/doc/run_overflow_check.md | 0 ...76\345\272\246\346\257\224\345\257\271.md" | 0 .../pytorch/free_benchmark/common/enums.py | 0 .../pytorch/free_benchmark/common/params.py | 0 .../perturbed_layers/base_layer.py | 0 .../perturbed_layers/layer_factory.py | 0 .../perturbed_layers/npu/add_noise.py | 0 .../perturbed_layers/npu/change_value.py | 0 .../perturbed_layers/npu/improve_precision.py | 0 .../perturbed_layers/npu/no_change.py | 0 .../perturbed_layers/npu/npu_base_layser.py | 0 .../result_handlers/base_handler.py | 0 .../result_handlers/check_handler.py | 0 .../result_handlers/handler_factory.py | 0 .../result_handlers/preheat_handler.py | 0 .../pytorch/functional/dump_module.py | 0 .../pytorch/hook_module/api_registry.py | 0 .../pytorch/hook_module/hook_module.py | 0 .../pytorch/hook_module/wrap_aten.py | 0 .../pytorch/hook_module/wrap_distributed.py | 0 .../pytorch/hook_module/wrap_tensor.py | 0 .../pytorch/hook_module/wrap_torch.py | 0 .../pytorch/online_dispatch/compare.py | 0 .../pytorch/online_dispatch/single_compare.py | 0 .../online_dispatch/torch_ops_config.yaml | 0 .../pytorch/parse_tool/cli.py | 0 .../{atat => msprobe}/pytorch/pt_config.py | 0 .../test/core_ut/common/test_utils.py | 0 .../core_ut/data_dump/test_json_writer.py | 0 .../test/core_ut/data_dump/test_scope.py | 0 .../test/core_ut/test_common_config.py | 0 .../test/core_ut/test_log.py | 0 .../test/mindspore_ut/test_debugger_config.py | 0 .../mindspore_ut/test_kernel_graph_dump.py | 0 .../mindspore_ut/test_task_handler_factory.py | 0 .../compare/test_algorithm.py | 0 .../compare/test_compare_column.py | 0 .../compare/test_compare_utils.py | 0 .../api_accuracy_checker/run_ut/forward.json | 0 .../run_ut/test_multi_run_ut.py | 0 .../run_ut/test_run_ut.py | 0 .../result_handlers/test_result_handler.py | 0 .../hook_module/test_wrap_distributed.py | 0 .../hook_module/test_wrap_functional.py | 0 .../hook_module/test_wrap_tensor.py | 0 .../pytorch_ut/hook_module/test_wrap_torch.py | 0 .../test/pytorch_ut/test_pt_config.py | 0 .../test/pytorch_ut/test_service.py | 0 .../{atat => msprobe}/test/run_test.sh | 0 .../{atat => msprobe}/test/run_ut.py | 0 87 files changed, 32 insertions(+), 95 deletions(-) delete mode 100644 debug/accuracy_tools/atat/pytorch/debugger/precision_debugger.py rename debug/accuracy_tools/{atat => msprobe}/atat.py (100%) rename debug/accuracy_tools/{atat => msprobe}/config/img/free_benchmark.png (100%) rename debug/accuracy_tools/{atat => msprobe}/core/data_dump/json_writer.py (100%) create mode 100644 debug/accuracy_tools/msprobe/mindspore/debugger/precision_debugger.py rename debug/accuracy_tools/{atat => msprobe}/mindspore/doc/dump.md (100%) rename debug/accuracy_tools/{atat => msprobe}/mindspore/dump/api_kbk_dump.py (100%) rename debug/accuracy_tools/{atat => msprobe}/mindspore/dump/dump_tool_factory.py (100%) rename debug/accuracy_tools/{atat => msprobe}/mindspore/overflow_check/overflow_check_tool_factory.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/advisor/advisor_result.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/api_accuracy_checker/.keep (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/api_accuracy_checker/common/.keep (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/api_accuracy_checker/compare/algorithm.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/api_accuracy_checker/compare/api_precision_compare.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/api_accuracy_checker/compare/api_precision_standard.yaml (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/api_accuracy_checker/compare/compare.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/api_accuracy_checker/run_ut/.keep (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/api_accuracy_checker/run_ut/run_ut.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/api_accuracy_checker/run_ut/torch_ut_setting.json (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/common/log.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/common/parse_json.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/compare/mapping.yaml (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/compare/match.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/compare/npy_compare.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/doc/img/BLOOM-7B_1.png (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/doc/img/BLOOM-7B_2.png (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/doc/img/BLOOM-7B_3.png (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/doc/img/BLOOM-7B_4.png (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/doc/img/GPT-3_2.png (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/doc/img/GPT-3_4.png (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/doc/img/GPT-3_8.png (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/doc/img/YOLOV5S_1.png (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/doc/img/YOLOV5S_2.png (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/doc/parse_tool.md (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/doc/ptdbg_ascend_overview.md (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/doc/ptdbg_ascend_quickstart.md (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/doc/run_overflow_check.md (100%) rename "debug/accuracy_tools/atat/pytorch/doc/\345\234\250\347\272\277\347\262\276\345\272\246\346\257\224\345\257\271.md" => "debug/accuracy_tools/msprobe/pytorch/doc/\345\234\250\347\272\277\347\262\276\345\272\246\346\257\224\345\257\271.md" (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/free_benchmark/common/enums.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/free_benchmark/common/params.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/free_benchmark/perturbed_layers/base_layer.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/free_benchmark/perturbed_layers/layer_factory.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/free_benchmark/perturbed_layers/npu/add_noise.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/free_benchmark/perturbed_layers/npu/change_value.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/free_benchmark/perturbed_layers/npu/improve_precision.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/free_benchmark/perturbed_layers/npu/no_change.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/free_benchmark/perturbed_layers/npu/npu_base_layser.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/free_benchmark/result_handlers/base_handler.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/free_benchmark/result_handlers/check_handler.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/free_benchmark/result_handlers/handler_factory.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/free_benchmark/result_handlers/preheat_handler.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/functional/dump_module.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/hook_module/api_registry.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/hook_module/hook_module.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/hook_module/wrap_aten.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/hook_module/wrap_distributed.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/hook_module/wrap_tensor.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/hook_module/wrap_torch.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/online_dispatch/compare.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/online_dispatch/single_compare.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/online_dispatch/torch_ops_config.yaml (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/parse_tool/cli.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/pt_config.py (100%) rename debug/accuracy_tools/{atat => msprobe}/test/core_ut/common/test_utils.py (100%) rename debug/accuracy_tools/{atat => msprobe}/test/core_ut/data_dump/test_json_writer.py (100%) rename debug/accuracy_tools/{atat => msprobe}/test/core_ut/data_dump/test_scope.py (100%) rename debug/accuracy_tools/{atat => msprobe}/test/core_ut/test_common_config.py (100%) rename debug/accuracy_tools/{atat => msprobe}/test/core_ut/test_log.py (100%) rename debug/accuracy_tools/{atat => msprobe}/test/mindspore_ut/test_debugger_config.py (100%) rename debug/accuracy_tools/{atat => msprobe}/test/mindspore_ut/test_kernel_graph_dump.py (100%) rename debug/accuracy_tools/{atat => msprobe}/test/mindspore_ut/test_task_handler_factory.py (100%) rename debug/accuracy_tools/{atat => msprobe}/test/pytorch_ut/api_accuracy_checker/compare/test_algorithm.py (100%) rename debug/accuracy_tools/{atat => msprobe}/test/pytorch_ut/api_accuracy_checker/compare/test_compare_column.py (100%) rename debug/accuracy_tools/{atat => msprobe}/test/pytorch_ut/api_accuracy_checker/compare/test_compare_utils.py (100%) rename debug/accuracy_tools/{atat => msprobe}/test/pytorch_ut/api_accuracy_checker/run_ut/forward.json (100%) rename debug/accuracy_tools/{atat => msprobe}/test/pytorch_ut/api_accuracy_checker/run_ut/test_multi_run_ut.py (100%) rename debug/accuracy_tools/{atat => msprobe}/test/pytorch_ut/api_accuracy_checker/run_ut/test_run_ut.py (100%) rename debug/accuracy_tools/{atat => msprobe}/test/pytorch_ut/free_benchmark/result_handlers/test_result_handler.py (100%) rename debug/accuracy_tools/{atat => msprobe}/test/pytorch_ut/hook_module/test_wrap_distributed.py (100%) rename debug/accuracy_tools/{atat => msprobe}/test/pytorch_ut/hook_module/test_wrap_functional.py (100%) rename debug/accuracy_tools/{atat => msprobe}/test/pytorch_ut/hook_module/test_wrap_tensor.py (100%) rename debug/accuracy_tools/{atat => msprobe}/test/pytorch_ut/hook_module/test_wrap_torch.py (100%) rename debug/accuracy_tools/{atat => msprobe}/test/pytorch_ut/test_pt_config.py (100%) rename debug/accuracy_tools/{atat => msprobe}/test/pytorch_ut/test_service.py (100%) rename debug/accuracy_tools/{atat => msprobe}/test/run_test.sh (100%) rename debug/accuracy_tools/{atat => msprobe}/test/run_ut.py (100%) diff --git a/debug/accuracy_tools/atat/pytorch/debugger/precision_debugger.py b/debug/accuracy_tools/atat/pytorch/debugger/precision_debugger.py deleted file mode 100644 index d84d535b29..0000000000 --- a/debug/accuracy_tools/atat/pytorch/debugger/precision_debugger.py +++ /dev/null @@ -1,95 +0,0 @@ -import torch -from torch.utils.data import dataloader -from atat.pytorch.debugger.debugger_config import DebuggerConfig -from atat.pytorch.service import Service -from atat.pytorch.common.log import logger -from atat.pytorch.pt_config import parse_json_config -from atat.core.common.exceptions import MsaccException - - -class PrecisionDebugger: - _instance = None - - def __new__(cls, *args, **kwargs): - if cls._instance is None: - cls._instance = super(PrecisionDebugger, cls).__new__(cls) - cls._instance.config = None - cls._instance.enable_dataloader = False - return cls._instance - - def __init__( - self, - config_path=None, - task=None, - dump_path=None, - level=None, - model=None, - step=None, - ): - if not hasattr(self, "initialized"): - self.initialized = True - self.model = self.check_model_valid(model) - common_config, task_config = parse_json_config(config_path, task) - if step: - common_config.step = step - self.config = DebuggerConfig( - common_config, task_config, task, dump_path, level - ) - self.config.check_model(self.model) - self.service = Service(self.config) - self.enable_dataloader = self.config.enable_dataloader - if self.enable_dataloader: - logger.warning_on_rank_0("The enable_dataloader feature will be deprecated in the future.") - dataloader._BaseDataLoaderIter.__next__ = iter_tracer(dataloader._BaseDataLoaderIter.__next__) - - @property - def instance(self): - return self._instance - - @staticmethod - def check_model_valid(model): - if not model or isinstance(model, torch.nn.Module): - return model - raise MsaccException( - MsaccException.INVALID_PARAM_ERROR, "model 参数必须是torch.nn.Module类型。" - ) - - @classmethod - def start(cls): - instance = cls._instance - if not instance: - raise Exception("No instance of PrecisionDebugger found.") - if instance.enable_dataloader: - logger.warning_on_rank_0("DataLoader is enabled, start() skipped.") - else: - instance.service.start(instance.model) - - @classmethod - def stop(cls): - instance = cls._instance - if not instance: - raise Exception("PrecisionDebugger instance is not created.") - if instance.enable_dataloader: - logger.warning_on_rank_0("DataLoader is enabled, stop() skipped.") - else: - instance.service.stop() - - @classmethod - def step(cls): - if not cls._instance: - raise Exception("PrecisionDebugger instance is not created.") - cls._instance.service.step() - - -def iter_tracer(func): - def func_wrapper(*args, **kwargs): - debugger_instance = PrecisionDebugger.instance - debugger_instance.enable_dataloader = False - if not debugger_instance.service.first_start: - debugger_instance.stop() - debugger_instance.step() - result = func(*args, **kwargs) - debugger_instance.start() - debugger_instance.enable_dataloader = True - return result - return func_wrapper diff --git a/debug/accuracy_tools/atat/atat.py b/debug/accuracy_tools/msprobe/atat.py similarity index 100% rename from debug/accuracy_tools/atat/atat.py rename to debug/accuracy_tools/msprobe/atat.py diff --git a/debug/accuracy_tools/atat/config/img/free_benchmark.png b/debug/accuracy_tools/msprobe/config/img/free_benchmark.png similarity index 100% rename from debug/accuracy_tools/atat/config/img/free_benchmark.png rename to debug/accuracy_tools/msprobe/config/img/free_benchmark.png diff --git a/debug/accuracy_tools/atat/core/data_dump/json_writer.py b/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py similarity index 100% rename from debug/accuracy_tools/atat/core/data_dump/json_writer.py rename to debug/accuracy_tools/msprobe/core/data_dump/json_writer.py diff --git a/debug/accuracy_tools/msprobe/mindspore/debugger/precision_debugger.py b/debug/accuracy_tools/msprobe/mindspore/debugger/precision_debugger.py new file mode 100644 index 0000000000..0099074762 --- /dev/null +++ b/debug/accuracy_tools/msprobe/mindspore/debugger/precision_debugger.py @@ -0,0 +1,32 @@ +import os +from atat.mindspore.ms_config import parse_json_config +from atat.mindspore.debugger.debugger_config import DebuggerConfig +from atat.mindspore.task_handler_factory import TaskHandlerFactory + + +class PrecisionDebugger: + _instance = None + + def __new__(cls, config_path=None): + if not cls._instance: + cls._instance = super().__new__(cls) + cls._instance.initialized = False + cls._instance.config = None + return cls._instance + + def __init__(self, config_path=None): + if self.initialized: + return + if not config_path: + config_path = os.path.join(os.path.dirname(__file__), "../../config/config.json") + common_config, task_config = parse_json_config(config_path) + self.config = DebuggerConfig(common_config, task_config) + self.initialized = True + + @classmethod + def start(cls, target=None): + instance = cls._instance + if not instance: + raise Exception("No instance of PrecisionDebugger found.") + handler = TaskHandlerFactory.create(instance.config) + handler.handle() diff --git a/debug/accuracy_tools/atat/mindspore/doc/dump.md b/debug/accuracy_tools/msprobe/mindspore/doc/dump.md similarity index 100% rename from debug/accuracy_tools/atat/mindspore/doc/dump.md rename to debug/accuracy_tools/msprobe/mindspore/doc/dump.md diff --git a/debug/accuracy_tools/atat/mindspore/dump/api_kbk_dump.py b/debug/accuracy_tools/msprobe/mindspore/dump/api_kbk_dump.py similarity index 100% rename from debug/accuracy_tools/atat/mindspore/dump/api_kbk_dump.py rename to debug/accuracy_tools/msprobe/mindspore/dump/api_kbk_dump.py diff --git a/debug/accuracy_tools/atat/mindspore/dump/dump_tool_factory.py b/debug/accuracy_tools/msprobe/mindspore/dump/dump_tool_factory.py similarity index 100% rename from debug/accuracy_tools/atat/mindspore/dump/dump_tool_factory.py rename to debug/accuracy_tools/msprobe/mindspore/dump/dump_tool_factory.py diff --git a/debug/accuracy_tools/atat/mindspore/overflow_check/overflow_check_tool_factory.py b/debug/accuracy_tools/msprobe/mindspore/overflow_check/overflow_check_tool_factory.py similarity index 100% rename from debug/accuracy_tools/atat/mindspore/overflow_check/overflow_check_tool_factory.py rename to debug/accuracy_tools/msprobe/mindspore/overflow_check/overflow_check_tool_factory.py diff --git a/debug/accuracy_tools/atat/pytorch/advisor/advisor_result.py b/debug/accuracy_tools/msprobe/pytorch/advisor/advisor_result.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/advisor/advisor_result.py rename to debug/accuracy_tools/msprobe/pytorch/advisor/advisor_result.py diff --git a/debug/accuracy_tools/atat/pytorch/api_accuracy_checker/.keep b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/.keep similarity index 100% rename from debug/accuracy_tools/atat/pytorch/api_accuracy_checker/.keep rename to debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/.keep diff --git a/debug/accuracy_tools/atat/pytorch/api_accuracy_checker/common/.keep b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/common/.keep similarity index 100% rename from debug/accuracy_tools/atat/pytorch/api_accuracy_checker/common/.keep rename to debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/common/.keep diff --git a/debug/accuracy_tools/atat/pytorch/api_accuracy_checker/compare/algorithm.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/compare/algorithm.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/api_accuracy_checker/compare/algorithm.py rename to debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/compare/algorithm.py diff --git a/debug/accuracy_tools/atat/pytorch/api_accuracy_checker/compare/api_precision_compare.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/api_accuracy_checker/compare/api_precision_compare.py rename to debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py diff --git a/debug/accuracy_tools/atat/pytorch/api_accuracy_checker/compare/api_precision_standard.yaml b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/compare/api_precision_standard.yaml similarity index 100% rename from debug/accuracy_tools/atat/pytorch/api_accuracy_checker/compare/api_precision_standard.yaml rename to debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/compare/api_precision_standard.yaml diff --git a/debug/accuracy_tools/atat/pytorch/api_accuracy_checker/compare/compare.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/compare/compare.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/api_accuracy_checker/compare/compare.py rename to debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/compare/compare.py diff --git a/debug/accuracy_tools/atat/pytorch/api_accuracy_checker/run_ut/.keep b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/.keep similarity index 100% rename from debug/accuracy_tools/atat/pytorch/api_accuracy_checker/run_ut/.keep rename to debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/.keep diff --git a/debug/accuracy_tools/atat/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py rename to debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py diff --git a/debug/accuracy_tools/atat/pytorch/api_accuracy_checker/run_ut/run_ut.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/api_accuracy_checker/run_ut/run_ut.py rename to debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py diff --git a/debug/accuracy_tools/atat/pytorch/api_accuracy_checker/run_ut/torch_ut_setting.json b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/torch_ut_setting.json similarity index 100% rename from debug/accuracy_tools/atat/pytorch/api_accuracy_checker/run_ut/torch_ut_setting.json rename to debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/torch_ut_setting.json diff --git a/debug/accuracy_tools/atat/pytorch/common/log.py b/debug/accuracy_tools/msprobe/pytorch/common/log.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/common/log.py rename to debug/accuracy_tools/msprobe/pytorch/common/log.py diff --git a/debug/accuracy_tools/atat/pytorch/common/parse_json.py b/debug/accuracy_tools/msprobe/pytorch/common/parse_json.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/common/parse_json.py rename to debug/accuracy_tools/msprobe/pytorch/common/parse_json.py diff --git a/debug/accuracy_tools/atat/pytorch/compare/mapping.yaml b/debug/accuracy_tools/msprobe/pytorch/compare/mapping.yaml similarity index 100% rename from debug/accuracy_tools/atat/pytorch/compare/mapping.yaml rename to debug/accuracy_tools/msprobe/pytorch/compare/mapping.yaml diff --git a/debug/accuracy_tools/atat/pytorch/compare/match.py b/debug/accuracy_tools/msprobe/pytorch/compare/match.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/compare/match.py rename to debug/accuracy_tools/msprobe/pytorch/compare/match.py diff --git a/debug/accuracy_tools/atat/pytorch/compare/npy_compare.py b/debug/accuracy_tools/msprobe/pytorch/compare/npy_compare.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/compare/npy_compare.py rename to debug/accuracy_tools/msprobe/pytorch/compare/npy_compare.py diff --git a/debug/accuracy_tools/atat/pytorch/doc/img/BLOOM-7B_1.png b/debug/accuracy_tools/msprobe/pytorch/doc/img/BLOOM-7B_1.png similarity index 100% rename from debug/accuracy_tools/atat/pytorch/doc/img/BLOOM-7B_1.png rename to debug/accuracy_tools/msprobe/pytorch/doc/img/BLOOM-7B_1.png diff --git a/debug/accuracy_tools/atat/pytorch/doc/img/BLOOM-7B_2.png b/debug/accuracy_tools/msprobe/pytorch/doc/img/BLOOM-7B_2.png similarity index 100% rename from debug/accuracy_tools/atat/pytorch/doc/img/BLOOM-7B_2.png rename to debug/accuracy_tools/msprobe/pytorch/doc/img/BLOOM-7B_2.png diff --git a/debug/accuracy_tools/atat/pytorch/doc/img/BLOOM-7B_3.png b/debug/accuracy_tools/msprobe/pytorch/doc/img/BLOOM-7B_3.png similarity index 100% rename from debug/accuracy_tools/atat/pytorch/doc/img/BLOOM-7B_3.png rename to debug/accuracy_tools/msprobe/pytorch/doc/img/BLOOM-7B_3.png diff --git a/debug/accuracy_tools/atat/pytorch/doc/img/BLOOM-7B_4.png b/debug/accuracy_tools/msprobe/pytorch/doc/img/BLOOM-7B_4.png similarity index 100% rename from debug/accuracy_tools/atat/pytorch/doc/img/BLOOM-7B_4.png rename to debug/accuracy_tools/msprobe/pytorch/doc/img/BLOOM-7B_4.png diff --git a/debug/accuracy_tools/atat/pytorch/doc/img/GPT-3_2.png b/debug/accuracy_tools/msprobe/pytorch/doc/img/GPT-3_2.png similarity index 100% rename from debug/accuracy_tools/atat/pytorch/doc/img/GPT-3_2.png rename to debug/accuracy_tools/msprobe/pytorch/doc/img/GPT-3_2.png diff --git a/debug/accuracy_tools/atat/pytorch/doc/img/GPT-3_4.png b/debug/accuracy_tools/msprobe/pytorch/doc/img/GPT-3_4.png similarity index 100% rename from debug/accuracy_tools/atat/pytorch/doc/img/GPT-3_4.png rename to debug/accuracy_tools/msprobe/pytorch/doc/img/GPT-3_4.png diff --git a/debug/accuracy_tools/atat/pytorch/doc/img/GPT-3_8.png b/debug/accuracy_tools/msprobe/pytorch/doc/img/GPT-3_8.png similarity index 100% rename from debug/accuracy_tools/atat/pytorch/doc/img/GPT-3_8.png rename to debug/accuracy_tools/msprobe/pytorch/doc/img/GPT-3_8.png diff --git a/debug/accuracy_tools/atat/pytorch/doc/img/YOLOV5S_1.png b/debug/accuracy_tools/msprobe/pytorch/doc/img/YOLOV5S_1.png similarity index 100% rename from debug/accuracy_tools/atat/pytorch/doc/img/YOLOV5S_1.png rename to debug/accuracy_tools/msprobe/pytorch/doc/img/YOLOV5S_1.png diff --git a/debug/accuracy_tools/atat/pytorch/doc/img/YOLOV5S_2.png b/debug/accuracy_tools/msprobe/pytorch/doc/img/YOLOV5S_2.png similarity index 100% rename from debug/accuracy_tools/atat/pytorch/doc/img/YOLOV5S_2.png rename to debug/accuracy_tools/msprobe/pytorch/doc/img/YOLOV5S_2.png diff --git a/debug/accuracy_tools/atat/pytorch/doc/parse_tool.md b/debug/accuracy_tools/msprobe/pytorch/doc/parse_tool.md similarity index 100% rename from debug/accuracy_tools/atat/pytorch/doc/parse_tool.md rename to debug/accuracy_tools/msprobe/pytorch/doc/parse_tool.md diff --git a/debug/accuracy_tools/atat/pytorch/doc/ptdbg_ascend_overview.md b/debug/accuracy_tools/msprobe/pytorch/doc/ptdbg_ascend_overview.md similarity index 100% rename from debug/accuracy_tools/atat/pytorch/doc/ptdbg_ascend_overview.md rename to debug/accuracy_tools/msprobe/pytorch/doc/ptdbg_ascend_overview.md diff --git a/debug/accuracy_tools/atat/pytorch/doc/ptdbg_ascend_quickstart.md b/debug/accuracy_tools/msprobe/pytorch/doc/ptdbg_ascend_quickstart.md similarity index 100% rename from debug/accuracy_tools/atat/pytorch/doc/ptdbg_ascend_quickstart.md rename to debug/accuracy_tools/msprobe/pytorch/doc/ptdbg_ascend_quickstart.md diff --git a/debug/accuracy_tools/atat/pytorch/doc/run_overflow_check.md b/debug/accuracy_tools/msprobe/pytorch/doc/run_overflow_check.md similarity index 100% rename from debug/accuracy_tools/atat/pytorch/doc/run_overflow_check.md rename to debug/accuracy_tools/msprobe/pytorch/doc/run_overflow_check.md diff --git "a/debug/accuracy_tools/atat/pytorch/doc/\345\234\250\347\272\277\347\262\276\345\272\246\346\257\224\345\257\271.md" "b/debug/accuracy_tools/msprobe/pytorch/doc/\345\234\250\347\272\277\347\262\276\345\272\246\346\257\224\345\257\271.md" similarity index 100% rename from "debug/accuracy_tools/atat/pytorch/doc/\345\234\250\347\272\277\347\262\276\345\272\246\346\257\224\345\257\271.md" rename to "debug/accuracy_tools/msprobe/pytorch/doc/\345\234\250\347\272\277\347\262\276\345\272\246\346\257\224\345\257\271.md" diff --git a/debug/accuracy_tools/atat/pytorch/free_benchmark/common/enums.py b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/common/enums.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/free_benchmark/common/enums.py rename to debug/accuracy_tools/msprobe/pytorch/free_benchmark/common/enums.py diff --git a/debug/accuracy_tools/atat/pytorch/free_benchmark/common/params.py b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/common/params.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/free_benchmark/common/params.py rename to debug/accuracy_tools/msprobe/pytorch/free_benchmark/common/params.py diff --git a/debug/accuracy_tools/atat/pytorch/free_benchmark/perturbed_layers/base_layer.py b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/perturbed_layers/base_layer.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/free_benchmark/perturbed_layers/base_layer.py rename to debug/accuracy_tools/msprobe/pytorch/free_benchmark/perturbed_layers/base_layer.py diff --git a/debug/accuracy_tools/atat/pytorch/free_benchmark/perturbed_layers/layer_factory.py b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/perturbed_layers/layer_factory.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/free_benchmark/perturbed_layers/layer_factory.py rename to debug/accuracy_tools/msprobe/pytorch/free_benchmark/perturbed_layers/layer_factory.py diff --git a/debug/accuracy_tools/atat/pytorch/free_benchmark/perturbed_layers/npu/add_noise.py b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/perturbed_layers/npu/add_noise.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/free_benchmark/perturbed_layers/npu/add_noise.py rename to debug/accuracy_tools/msprobe/pytorch/free_benchmark/perturbed_layers/npu/add_noise.py diff --git a/debug/accuracy_tools/atat/pytorch/free_benchmark/perturbed_layers/npu/change_value.py b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/perturbed_layers/npu/change_value.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/free_benchmark/perturbed_layers/npu/change_value.py rename to debug/accuracy_tools/msprobe/pytorch/free_benchmark/perturbed_layers/npu/change_value.py diff --git a/debug/accuracy_tools/atat/pytorch/free_benchmark/perturbed_layers/npu/improve_precision.py b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/perturbed_layers/npu/improve_precision.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/free_benchmark/perturbed_layers/npu/improve_precision.py rename to debug/accuracy_tools/msprobe/pytorch/free_benchmark/perturbed_layers/npu/improve_precision.py diff --git a/debug/accuracy_tools/atat/pytorch/free_benchmark/perturbed_layers/npu/no_change.py b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/perturbed_layers/npu/no_change.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/free_benchmark/perturbed_layers/npu/no_change.py rename to debug/accuracy_tools/msprobe/pytorch/free_benchmark/perturbed_layers/npu/no_change.py diff --git a/debug/accuracy_tools/atat/pytorch/free_benchmark/perturbed_layers/npu/npu_base_layser.py b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/perturbed_layers/npu/npu_base_layser.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/free_benchmark/perturbed_layers/npu/npu_base_layser.py rename to debug/accuracy_tools/msprobe/pytorch/free_benchmark/perturbed_layers/npu/npu_base_layser.py diff --git a/debug/accuracy_tools/atat/pytorch/free_benchmark/result_handlers/base_handler.py b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/result_handlers/base_handler.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/free_benchmark/result_handlers/base_handler.py rename to debug/accuracy_tools/msprobe/pytorch/free_benchmark/result_handlers/base_handler.py diff --git a/debug/accuracy_tools/atat/pytorch/free_benchmark/result_handlers/check_handler.py b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/result_handlers/check_handler.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/free_benchmark/result_handlers/check_handler.py rename to debug/accuracy_tools/msprobe/pytorch/free_benchmark/result_handlers/check_handler.py diff --git a/debug/accuracy_tools/atat/pytorch/free_benchmark/result_handlers/handler_factory.py b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/result_handlers/handler_factory.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/free_benchmark/result_handlers/handler_factory.py rename to debug/accuracy_tools/msprobe/pytorch/free_benchmark/result_handlers/handler_factory.py diff --git a/debug/accuracy_tools/atat/pytorch/free_benchmark/result_handlers/preheat_handler.py b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/result_handlers/preheat_handler.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/free_benchmark/result_handlers/preheat_handler.py rename to debug/accuracy_tools/msprobe/pytorch/free_benchmark/result_handlers/preheat_handler.py diff --git a/debug/accuracy_tools/atat/pytorch/functional/dump_module.py b/debug/accuracy_tools/msprobe/pytorch/functional/dump_module.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/functional/dump_module.py rename to debug/accuracy_tools/msprobe/pytorch/functional/dump_module.py diff --git a/debug/accuracy_tools/atat/pytorch/hook_module/api_registry.py b/debug/accuracy_tools/msprobe/pytorch/hook_module/api_registry.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/hook_module/api_registry.py rename to debug/accuracy_tools/msprobe/pytorch/hook_module/api_registry.py diff --git a/debug/accuracy_tools/atat/pytorch/hook_module/hook_module.py b/debug/accuracy_tools/msprobe/pytorch/hook_module/hook_module.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/hook_module/hook_module.py rename to debug/accuracy_tools/msprobe/pytorch/hook_module/hook_module.py diff --git a/debug/accuracy_tools/atat/pytorch/hook_module/wrap_aten.py b/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_aten.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/hook_module/wrap_aten.py rename to debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_aten.py diff --git a/debug/accuracy_tools/atat/pytorch/hook_module/wrap_distributed.py b/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_distributed.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/hook_module/wrap_distributed.py rename to debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_distributed.py diff --git a/debug/accuracy_tools/atat/pytorch/hook_module/wrap_tensor.py b/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_tensor.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/hook_module/wrap_tensor.py rename to debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_tensor.py diff --git a/debug/accuracy_tools/atat/pytorch/hook_module/wrap_torch.py b/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_torch.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/hook_module/wrap_torch.py rename to debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_torch.py diff --git a/debug/accuracy_tools/atat/pytorch/online_dispatch/compare.py b/debug/accuracy_tools/msprobe/pytorch/online_dispatch/compare.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/online_dispatch/compare.py rename to debug/accuracy_tools/msprobe/pytorch/online_dispatch/compare.py diff --git a/debug/accuracy_tools/atat/pytorch/online_dispatch/single_compare.py b/debug/accuracy_tools/msprobe/pytorch/online_dispatch/single_compare.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/online_dispatch/single_compare.py rename to debug/accuracy_tools/msprobe/pytorch/online_dispatch/single_compare.py diff --git a/debug/accuracy_tools/atat/pytorch/online_dispatch/torch_ops_config.yaml b/debug/accuracy_tools/msprobe/pytorch/online_dispatch/torch_ops_config.yaml similarity index 100% rename from debug/accuracy_tools/atat/pytorch/online_dispatch/torch_ops_config.yaml rename to debug/accuracy_tools/msprobe/pytorch/online_dispatch/torch_ops_config.yaml diff --git a/debug/accuracy_tools/atat/pytorch/parse_tool/cli.py b/debug/accuracy_tools/msprobe/pytorch/parse_tool/cli.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/parse_tool/cli.py rename to debug/accuracy_tools/msprobe/pytorch/parse_tool/cli.py diff --git a/debug/accuracy_tools/atat/pytorch/pt_config.py b/debug/accuracy_tools/msprobe/pytorch/pt_config.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/pt_config.py rename to debug/accuracy_tools/msprobe/pytorch/pt_config.py diff --git a/debug/accuracy_tools/atat/test/core_ut/common/test_utils.py b/debug/accuracy_tools/msprobe/test/core_ut/common/test_utils.py similarity index 100% rename from debug/accuracy_tools/atat/test/core_ut/common/test_utils.py rename to debug/accuracy_tools/msprobe/test/core_ut/common/test_utils.py diff --git a/debug/accuracy_tools/atat/test/core_ut/data_dump/test_json_writer.py b/debug/accuracy_tools/msprobe/test/core_ut/data_dump/test_json_writer.py similarity index 100% rename from debug/accuracy_tools/atat/test/core_ut/data_dump/test_json_writer.py rename to debug/accuracy_tools/msprobe/test/core_ut/data_dump/test_json_writer.py diff --git a/debug/accuracy_tools/atat/test/core_ut/data_dump/test_scope.py b/debug/accuracy_tools/msprobe/test/core_ut/data_dump/test_scope.py similarity index 100% rename from debug/accuracy_tools/atat/test/core_ut/data_dump/test_scope.py rename to debug/accuracy_tools/msprobe/test/core_ut/data_dump/test_scope.py diff --git a/debug/accuracy_tools/atat/test/core_ut/test_common_config.py b/debug/accuracy_tools/msprobe/test/core_ut/test_common_config.py similarity index 100% rename from debug/accuracy_tools/atat/test/core_ut/test_common_config.py rename to debug/accuracy_tools/msprobe/test/core_ut/test_common_config.py diff --git a/debug/accuracy_tools/atat/test/core_ut/test_log.py b/debug/accuracy_tools/msprobe/test/core_ut/test_log.py similarity index 100% rename from debug/accuracy_tools/atat/test/core_ut/test_log.py rename to debug/accuracy_tools/msprobe/test/core_ut/test_log.py diff --git a/debug/accuracy_tools/atat/test/mindspore_ut/test_debugger_config.py b/debug/accuracy_tools/msprobe/test/mindspore_ut/test_debugger_config.py similarity index 100% rename from debug/accuracy_tools/atat/test/mindspore_ut/test_debugger_config.py rename to debug/accuracy_tools/msprobe/test/mindspore_ut/test_debugger_config.py diff --git a/debug/accuracy_tools/atat/test/mindspore_ut/test_kernel_graph_dump.py b/debug/accuracy_tools/msprobe/test/mindspore_ut/test_kernel_graph_dump.py similarity index 100% rename from debug/accuracy_tools/atat/test/mindspore_ut/test_kernel_graph_dump.py rename to debug/accuracy_tools/msprobe/test/mindspore_ut/test_kernel_graph_dump.py diff --git a/debug/accuracy_tools/atat/test/mindspore_ut/test_task_handler_factory.py b/debug/accuracy_tools/msprobe/test/mindspore_ut/test_task_handler_factory.py similarity index 100% rename from debug/accuracy_tools/atat/test/mindspore_ut/test_task_handler_factory.py rename to debug/accuracy_tools/msprobe/test/mindspore_ut/test_task_handler_factory.py diff --git a/debug/accuracy_tools/atat/test/pytorch_ut/api_accuracy_checker/compare/test_algorithm.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/compare/test_algorithm.py similarity index 100% rename from debug/accuracy_tools/atat/test/pytorch_ut/api_accuracy_checker/compare/test_algorithm.py rename to debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/compare/test_algorithm.py diff --git a/debug/accuracy_tools/atat/test/pytorch_ut/api_accuracy_checker/compare/test_compare_column.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/compare/test_compare_column.py similarity index 100% rename from debug/accuracy_tools/atat/test/pytorch_ut/api_accuracy_checker/compare/test_compare_column.py rename to debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/compare/test_compare_column.py diff --git a/debug/accuracy_tools/atat/test/pytorch_ut/api_accuracy_checker/compare/test_compare_utils.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/compare/test_compare_utils.py similarity index 100% rename from debug/accuracy_tools/atat/test/pytorch_ut/api_accuracy_checker/compare/test_compare_utils.py rename to debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/compare/test_compare_utils.py diff --git a/debug/accuracy_tools/atat/test/pytorch_ut/api_accuracy_checker/run_ut/forward.json b/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/forward.json similarity index 100% rename from debug/accuracy_tools/atat/test/pytorch_ut/api_accuracy_checker/run_ut/forward.json rename to debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/forward.json diff --git a/debug/accuracy_tools/atat/test/pytorch_ut/api_accuracy_checker/run_ut/test_multi_run_ut.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/test_multi_run_ut.py similarity index 100% rename from debug/accuracy_tools/atat/test/pytorch_ut/api_accuracy_checker/run_ut/test_multi_run_ut.py rename to debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/test_multi_run_ut.py diff --git a/debug/accuracy_tools/atat/test/pytorch_ut/api_accuracy_checker/run_ut/test_run_ut.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/test_run_ut.py similarity index 100% rename from debug/accuracy_tools/atat/test/pytorch_ut/api_accuracy_checker/run_ut/test_run_ut.py rename to debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/test_run_ut.py diff --git a/debug/accuracy_tools/atat/test/pytorch_ut/free_benchmark/result_handlers/test_result_handler.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/free_benchmark/result_handlers/test_result_handler.py similarity index 100% rename from debug/accuracy_tools/atat/test/pytorch_ut/free_benchmark/result_handlers/test_result_handler.py rename to debug/accuracy_tools/msprobe/test/pytorch_ut/free_benchmark/result_handlers/test_result_handler.py diff --git a/debug/accuracy_tools/atat/test/pytorch_ut/hook_module/test_wrap_distributed.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_wrap_distributed.py similarity index 100% rename from debug/accuracy_tools/atat/test/pytorch_ut/hook_module/test_wrap_distributed.py rename to debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_wrap_distributed.py diff --git a/debug/accuracy_tools/atat/test/pytorch_ut/hook_module/test_wrap_functional.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_wrap_functional.py similarity index 100% rename from debug/accuracy_tools/atat/test/pytorch_ut/hook_module/test_wrap_functional.py rename to debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_wrap_functional.py diff --git a/debug/accuracy_tools/atat/test/pytorch_ut/hook_module/test_wrap_tensor.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_wrap_tensor.py similarity index 100% rename from debug/accuracy_tools/atat/test/pytorch_ut/hook_module/test_wrap_tensor.py rename to debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_wrap_tensor.py diff --git a/debug/accuracy_tools/atat/test/pytorch_ut/hook_module/test_wrap_torch.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_wrap_torch.py similarity index 100% rename from debug/accuracy_tools/atat/test/pytorch_ut/hook_module/test_wrap_torch.py rename to debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_wrap_torch.py diff --git a/debug/accuracy_tools/atat/test/pytorch_ut/test_pt_config.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/test_pt_config.py similarity index 100% rename from debug/accuracy_tools/atat/test/pytorch_ut/test_pt_config.py rename to debug/accuracy_tools/msprobe/test/pytorch_ut/test_pt_config.py diff --git a/debug/accuracy_tools/atat/test/pytorch_ut/test_service.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/test_service.py similarity index 100% rename from debug/accuracy_tools/atat/test/pytorch_ut/test_service.py rename to debug/accuracy_tools/msprobe/test/pytorch_ut/test_service.py diff --git a/debug/accuracy_tools/atat/test/run_test.sh b/debug/accuracy_tools/msprobe/test/run_test.sh similarity index 100% rename from debug/accuracy_tools/atat/test/run_test.sh rename to debug/accuracy_tools/msprobe/test/run_test.sh diff --git a/debug/accuracy_tools/atat/test/run_ut.py b/debug/accuracy_tools/msprobe/test/run_ut.py similarity index 100% rename from debug/accuracy_tools/atat/test/run_ut.py rename to debug/accuracy_tools/msprobe/test/run_ut.py -- Gitee From 65b23a0ded381a587865367158d334beac914a5f Mon Sep 17 00:00:00 2001 From: Linwei-Ying Date: Fri, 19 Jul 2024 18:32:40 +0800 Subject: [PATCH 016/106] atat to msprobe2 --- .../mindspore/debugger/precision_debugger.py | 32 ------ .../{atat => msprobe}/README.md | 0 .../{atat => msprobe}/config/README.md | 0 .../{atat => msprobe}/config/config.json | 0 .../{atat => msprobe}/core/common/const.py | 0 .../core/common/exceptions.py | 0 .../core/common/file_check.py | 0 .../{atat => msprobe}/core/common/log.py | 0 .../{atat => msprobe}/core/common_config.py | 0 .../core/data_dump/data_collector.py | 0 .../core/data_dump/data_processor/base.py | 0 .../core/data_dump/data_processor/factory.py | 0 .../data_processor/pytorch_processor.py | 0 .../{atat => msprobe}/core/data_dump/scope.py | 0 .../mindspore/debugger/debugger_config.py | 0 .../mindspore/dump/kernel_graph_dump.py | 0 .../kernel_graph_overflow_check.py | 0 .../mindspore/task_handler_factory.py | 0 .../pytorch/advisor/advisor.py | 0 .../pytorch/advisor/advisor_const.py | 0 .../api_accuracy_checker/common/config.py | 0 .../compare/api_precision_threshold.yaml | 0 .../compare/compare_column.py | 0 .../compare/compare_utils.py | 0 .../pytorch/api_accuracy_checker/config.yaml | 0 .../run_ut/data_generate.py | 0 .../run_ut/run_ut_utils.py | 0 .../pytorch/common/compare_script.template | 0 .../pytorch/compare/acc_compare.py | 0 .../pytorch/compare/distributed_compare.py | 0 .../pytorch/compare/highlight.py | 0 .../pytorch/debugger/debugger_config.py | 0 .../pytorch/debugger/precision_debugger.py | 95 ++++++++++++++++++ .../{atat => msprobe}/pytorch/doc/FAQ.md | 0 .../pytorch/doc/api_accuracy_checker.md | 0 ...72\347\272\277\346\212\245\345\221\212.md" | 0 .../{atat => msprobe}/pytorch/doc/dump.md | 0 .../pytorch/doc/img/GPT-3_1.png | Bin .../pytorch/doc/img/GPT-3_3.png | Bin .../pytorch/doc/img/GPT-3_5.png | Bin .../pytorch/doc/img/GPT-3_6.png | Bin .../pytorch/doc/img/GPT-3_7.png | Bin .../doc/img/accuracy_checking_details.png | Bin .../doc/img/accuracy_checking_result.png | Bin .../doc/img/api_precision_compare_details.png | Bin .../doc/img/api_precision_compare_result.png | Bin .../pytorch/doc/img/auto_analyze_log.png | Bin .../pytorch/doc/img/compare_result_pkl.png | Bin .../doc/img/compare_result_pkl_md5.png.png | Bin .../pytorch/doc/img/cpu_info.png | Bin .../pytorch/doc/ptdbg_ascend_compare.md | 0 .../pytorch/free_benchmark/common/constant.py | 0 .../pytorch/free_benchmark/common/counter.py | 0 .../free_benchmark/compare/grad_saver.py | 0 .../compare/single_benchmark.py | 0 .../pytorch/free_benchmark/main.py | 0 .../perturbed_layers/npu/bit_noise.py | 0 .../perturbed_layers/run_cpu.py | 0 .../result_handlers/fix_handler.py | 0 .../pytorch/functional/data_processor.py | 0 .../pytorch/hook_module/support_wrap_ops.yaml | 0 .../pytorch/online_dispatch/dispatch.py | 0 .../pytorch/online_dispatch/dump_compare.py | 0 .../{atat => msprobe}/pytorch/service.py | 0 .../core_ut/data_dump/test_data_collector.py | 0 .../test/core_ut/test_file_check.py | 0 .../test/mindspore_ut/test_api_kbk_dump.py | 0 .../mindspore_ut/test_dump_tool_factory.py | 0 .../test_kernel_graph_overflow_check.py | 0 .../test/mindspore_ut/test_ms_config.py | 0 .../test_overflow_check_tool_factory.py | 0 .../mindspore_ut/test_precision_debugger.py | 0 .../test/pytorch_ut/advisor/test_advisor.py | 0 .../common/test_common_utils.py | 0 .../common/test_config.py | 0 .../compare/test_api_precision_compare.py | 0 .../compare/test_compare.py | 0 .../api_accuracy_checker/run_ut/dump.json | 0 .../run_ut/test_data_generate.py | 0 .../pytorch_ut/compare/test_acc_compare.py | 0 .../perturbed_layers/test_perturbed_layser.py | 0 .../pytorch_ut/free_benchmark/test_main.py | 0 .../pytorch_ut/functional/test_dump_module.py | 0 .../hook_module/test_api_registry.py | 0 .../hook_module/test_hook_module.py | 0 .../pytorch_ut/hook_module/test_wrap_aten.py | 0 .../pytorch_ut/hook_module/test_wrap_vf.py | 0 .../test/resources/advisor.txt | 0 .../compare_result_20230703104808.csv | 0 .../compare_result_without_accuracy.csv | 0 .../test/resources/config.yaml | 0 .../test/test_module_processer.py | 0 92 files changed, 95 insertions(+), 32 deletions(-) delete mode 100644 debug/accuracy_tools/atat/mindspore/debugger/precision_debugger.py rename debug/accuracy_tools/{atat => msprobe}/README.md (100%) rename debug/accuracy_tools/{atat => msprobe}/config/README.md (100%) rename debug/accuracy_tools/{atat => msprobe}/config/config.json (100%) rename debug/accuracy_tools/{atat => msprobe}/core/common/const.py (100%) rename debug/accuracy_tools/{atat => msprobe}/core/common/exceptions.py (100%) rename debug/accuracy_tools/{atat => msprobe}/core/common/file_check.py (100%) rename debug/accuracy_tools/{atat => msprobe}/core/common/log.py (100%) rename debug/accuracy_tools/{atat => msprobe}/core/common_config.py (100%) rename debug/accuracy_tools/{atat => msprobe}/core/data_dump/data_collector.py (100%) rename debug/accuracy_tools/{atat => msprobe}/core/data_dump/data_processor/base.py (100%) rename debug/accuracy_tools/{atat => msprobe}/core/data_dump/data_processor/factory.py (100%) rename debug/accuracy_tools/{atat => msprobe}/core/data_dump/data_processor/pytorch_processor.py (100%) rename debug/accuracy_tools/{atat => msprobe}/core/data_dump/scope.py (100%) rename debug/accuracy_tools/{atat => msprobe}/mindspore/debugger/debugger_config.py (100%) rename debug/accuracy_tools/{atat => msprobe}/mindspore/dump/kernel_graph_dump.py (100%) rename debug/accuracy_tools/{atat => msprobe}/mindspore/overflow_check/kernel_graph_overflow_check.py (100%) rename debug/accuracy_tools/{atat => msprobe}/mindspore/task_handler_factory.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/advisor/advisor.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/advisor/advisor_const.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/api_accuracy_checker/common/config.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/api_accuracy_checker/compare/api_precision_threshold.yaml (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/api_accuracy_checker/compare/compare_column.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/api_accuracy_checker/compare/compare_utils.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/api_accuracy_checker/config.yaml (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/api_accuracy_checker/run_ut/data_generate.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/common/compare_script.template (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/compare/acc_compare.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/compare/distributed_compare.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/compare/highlight.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/debugger/debugger_config.py (100%) create mode 100644 debug/accuracy_tools/msprobe/pytorch/debugger/precision_debugger.py rename debug/accuracy_tools/{atat => msprobe}/pytorch/doc/FAQ.md (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/doc/api_accuracy_checker.md (100%) rename "debug/accuracy_tools/atat/pytorch/doc/atat\347\262\276\345\272\246\345\267\245\345\205\267\346\225\260\346\215\256dump\346\240\207\345\207\206\346\200\247\350\203\275\345\237\272\347\272\277\346\212\245\345\221\212.md" => "debug/accuracy_tools/msprobe/pytorch/doc/atat\347\262\276\345\272\246\345\267\245\345\205\267\346\225\260\346\215\256dump\346\240\207\345\207\206\346\200\247\350\203\275\345\237\272\347\272\277\346\212\245\345\221\212.md" (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/doc/dump.md (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/doc/img/GPT-3_1.png (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/doc/img/GPT-3_3.png (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/doc/img/GPT-3_5.png (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/doc/img/GPT-3_6.png (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/doc/img/GPT-3_7.png (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/doc/img/accuracy_checking_details.png (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/doc/img/accuracy_checking_result.png (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/doc/img/api_precision_compare_details.png (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/doc/img/api_precision_compare_result.png (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/doc/img/auto_analyze_log.png (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/doc/img/compare_result_pkl.png (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/doc/img/compare_result_pkl_md5.png.png (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/doc/img/cpu_info.png (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/doc/ptdbg_ascend_compare.md (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/free_benchmark/common/constant.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/free_benchmark/common/counter.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/free_benchmark/compare/grad_saver.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/free_benchmark/compare/single_benchmark.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/free_benchmark/main.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/free_benchmark/perturbed_layers/npu/bit_noise.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/free_benchmark/perturbed_layers/run_cpu.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/free_benchmark/result_handlers/fix_handler.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/functional/data_processor.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/hook_module/support_wrap_ops.yaml (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/online_dispatch/dispatch.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/online_dispatch/dump_compare.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/service.py (100%) rename debug/accuracy_tools/{atat => msprobe}/test/core_ut/data_dump/test_data_collector.py (100%) rename debug/accuracy_tools/{atat => msprobe}/test/core_ut/test_file_check.py (100%) rename debug/accuracy_tools/{atat => msprobe}/test/mindspore_ut/test_api_kbk_dump.py (100%) rename debug/accuracy_tools/{atat => msprobe}/test/mindspore_ut/test_dump_tool_factory.py (100%) rename debug/accuracy_tools/{atat => msprobe}/test/mindspore_ut/test_kernel_graph_overflow_check.py (100%) rename debug/accuracy_tools/{atat => msprobe}/test/mindspore_ut/test_ms_config.py (100%) rename debug/accuracy_tools/{atat => msprobe}/test/mindspore_ut/test_overflow_check_tool_factory.py (100%) rename debug/accuracy_tools/{atat => msprobe}/test/mindspore_ut/test_precision_debugger.py (100%) rename debug/accuracy_tools/{atat => msprobe}/test/pytorch_ut/advisor/test_advisor.py (100%) rename debug/accuracy_tools/{atat => msprobe}/test/pytorch_ut/api_accuracy_checker/common/test_common_utils.py (100%) rename debug/accuracy_tools/{atat => msprobe}/test/pytorch_ut/api_accuracy_checker/common/test_config.py (100%) rename debug/accuracy_tools/{atat => msprobe}/test/pytorch_ut/api_accuracy_checker/compare/test_api_precision_compare.py (100%) rename debug/accuracy_tools/{atat => msprobe}/test/pytorch_ut/api_accuracy_checker/compare/test_compare.py (100%) rename debug/accuracy_tools/{atat => msprobe}/test/pytorch_ut/api_accuracy_checker/run_ut/dump.json (100%) rename debug/accuracy_tools/{atat => msprobe}/test/pytorch_ut/api_accuracy_checker/run_ut/test_data_generate.py (100%) rename debug/accuracy_tools/{atat => msprobe}/test/pytorch_ut/compare/test_acc_compare.py (100%) rename debug/accuracy_tools/{atat => msprobe}/test/pytorch_ut/free_benchmark/perturbed_layers/test_perturbed_layser.py (100%) rename debug/accuracy_tools/{atat => msprobe}/test/pytorch_ut/free_benchmark/test_main.py (100%) rename debug/accuracy_tools/{atat => msprobe}/test/pytorch_ut/functional/test_dump_module.py (100%) rename debug/accuracy_tools/{atat => msprobe}/test/pytorch_ut/hook_module/test_api_registry.py (100%) rename debug/accuracy_tools/{atat => msprobe}/test/pytorch_ut/hook_module/test_hook_module.py (100%) rename debug/accuracy_tools/{atat => msprobe}/test/pytorch_ut/hook_module/test_wrap_aten.py (100%) rename debug/accuracy_tools/{atat => msprobe}/test/pytorch_ut/hook_module/test_wrap_vf.py (100%) rename debug/accuracy_tools/{atat => msprobe}/test/resources/advisor.txt (100%) rename debug/accuracy_tools/{atat => msprobe}/test/resources/compare_result_20230703104808.csv (100%) rename debug/accuracy_tools/{atat => msprobe}/test/resources/compare_result_without_accuracy.csv (100%) rename debug/accuracy_tools/{atat => msprobe}/test/resources/config.yaml (100%) rename debug/accuracy_tools/{atat => msprobe}/test/test_module_processer.py (100%) diff --git a/debug/accuracy_tools/atat/mindspore/debugger/precision_debugger.py b/debug/accuracy_tools/atat/mindspore/debugger/precision_debugger.py deleted file mode 100644 index 0099074762..0000000000 --- a/debug/accuracy_tools/atat/mindspore/debugger/precision_debugger.py +++ /dev/null @@ -1,32 +0,0 @@ -import os -from atat.mindspore.ms_config import parse_json_config -from atat.mindspore.debugger.debugger_config import DebuggerConfig -from atat.mindspore.task_handler_factory import TaskHandlerFactory - - -class PrecisionDebugger: - _instance = None - - def __new__(cls, config_path=None): - if not cls._instance: - cls._instance = super().__new__(cls) - cls._instance.initialized = False - cls._instance.config = None - return cls._instance - - def __init__(self, config_path=None): - if self.initialized: - return - if not config_path: - config_path = os.path.join(os.path.dirname(__file__), "../../config/config.json") - common_config, task_config = parse_json_config(config_path) - self.config = DebuggerConfig(common_config, task_config) - self.initialized = True - - @classmethod - def start(cls, target=None): - instance = cls._instance - if not instance: - raise Exception("No instance of PrecisionDebugger found.") - handler = TaskHandlerFactory.create(instance.config) - handler.handle() diff --git a/debug/accuracy_tools/atat/README.md b/debug/accuracy_tools/msprobe/README.md similarity index 100% rename from debug/accuracy_tools/atat/README.md rename to debug/accuracy_tools/msprobe/README.md diff --git a/debug/accuracy_tools/atat/config/README.md b/debug/accuracy_tools/msprobe/config/README.md similarity index 100% rename from debug/accuracy_tools/atat/config/README.md rename to debug/accuracy_tools/msprobe/config/README.md diff --git a/debug/accuracy_tools/atat/config/config.json b/debug/accuracy_tools/msprobe/config/config.json similarity index 100% rename from debug/accuracy_tools/atat/config/config.json rename to debug/accuracy_tools/msprobe/config/config.json diff --git a/debug/accuracy_tools/atat/core/common/const.py b/debug/accuracy_tools/msprobe/core/common/const.py similarity index 100% rename from debug/accuracy_tools/atat/core/common/const.py rename to debug/accuracy_tools/msprobe/core/common/const.py diff --git a/debug/accuracy_tools/atat/core/common/exceptions.py b/debug/accuracy_tools/msprobe/core/common/exceptions.py similarity index 100% rename from debug/accuracy_tools/atat/core/common/exceptions.py rename to debug/accuracy_tools/msprobe/core/common/exceptions.py diff --git a/debug/accuracy_tools/atat/core/common/file_check.py b/debug/accuracy_tools/msprobe/core/common/file_check.py similarity index 100% rename from debug/accuracy_tools/atat/core/common/file_check.py rename to debug/accuracy_tools/msprobe/core/common/file_check.py diff --git a/debug/accuracy_tools/atat/core/common/log.py b/debug/accuracy_tools/msprobe/core/common/log.py similarity index 100% rename from debug/accuracy_tools/atat/core/common/log.py rename to debug/accuracy_tools/msprobe/core/common/log.py diff --git a/debug/accuracy_tools/atat/core/common_config.py b/debug/accuracy_tools/msprobe/core/common_config.py similarity index 100% rename from debug/accuracy_tools/atat/core/common_config.py rename to debug/accuracy_tools/msprobe/core/common_config.py diff --git a/debug/accuracy_tools/atat/core/data_dump/data_collector.py b/debug/accuracy_tools/msprobe/core/data_dump/data_collector.py similarity index 100% rename from debug/accuracy_tools/atat/core/data_dump/data_collector.py rename to debug/accuracy_tools/msprobe/core/data_dump/data_collector.py diff --git a/debug/accuracy_tools/atat/core/data_dump/data_processor/base.py b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/base.py similarity index 100% rename from debug/accuracy_tools/atat/core/data_dump/data_processor/base.py rename to debug/accuracy_tools/msprobe/core/data_dump/data_processor/base.py diff --git a/debug/accuracy_tools/atat/core/data_dump/data_processor/factory.py b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/factory.py similarity index 100% rename from debug/accuracy_tools/atat/core/data_dump/data_processor/factory.py rename to debug/accuracy_tools/msprobe/core/data_dump/data_processor/factory.py diff --git a/debug/accuracy_tools/atat/core/data_dump/data_processor/pytorch_processor.py b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py similarity index 100% rename from debug/accuracy_tools/atat/core/data_dump/data_processor/pytorch_processor.py rename to debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py diff --git a/debug/accuracy_tools/atat/core/data_dump/scope.py b/debug/accuracy_tools/msprobe/core/data_dump/scope.py similarity index 100% rename from debug/accuracy_tools/atat/core/data_dump/scope.py rename to debug/accuracy_tools/msprobe/core/data_dump/scope.py diff --git a/debug/accuracy_tools/atat/mindspore/debugger/debugger_config.py b/debug/accuracy_tools/msprobe/mindspore/debugger/debugger_config.py similarity index 100% rename from debug/accuracy_tools/atat/mindspore/debugger/debugger_config.py rename to debug/accuracy_tools/msprobe/mindspore/debugger/debugger_config.py diff --git a/debug/accuracy_tools/atat/mindspore/dump/kernel_graph_dump.py b/debug/accuracy_tools/msprobe/mindspore/dump/kernel_graph_dump.py similarity index 100% rename from debug/accuracy_tools/atat/mindspore/dump/kernel_graph_dump.py rename to debug/accuracy_tools/msprobe/mindspore/dump/kernel_graph_dump.py diff --git a/debug/accuracy_tools/atat/mindspore/overflow_check/kernel_graph_overflow_check.py b/debug/accuracy_tools/msprobe/mindspore/overflow_check/kernel_graph_overflow_check.py similarity index 100% rename from debug/accuracy_tools/atat/mindspore/overflow_check/kernel_graph_overflow_check.py rename to debug/accuracy_tools/msprobe/mindspore/overflow_check/kernel_graph_overflow_check.py diff --git a/debug/accuracy_tools/atat/mindspore/task_handler_factory.py b/debug/accuracy_tools/msprobe/mindspore/task_handler_factory.py similarity index 100% rename from debug/accuracy_tools/atat/mindspore/task_handler_factory.py rename to debug/accuracy_tools/msprobe/mindspore/task_handler_factory.py diff --git a/debug/accuracy_tools/atat/pytorch/advisor/advisor.py b/debug/accuracy_tools/msprobe/pytorch/advisor/advisor.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/advisor/advisor.py rename to debug/accuracy_tools/msprobe/pytorch/advisor/advisor.py diff --git a/debug/accuracy_tools/atat/pytorch/advisor/advisor_const.py b/debug/accuracy_tools/msprobe/pytorch/advisor/advisor_const.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/advisor/advisor_const.py rename to debug/accuracy_tools/msprobe/pytorch/advisor/advisor_const.py diff --git a/debug/accuracy_tools/atat/pytorch/api_accuracy_checker/common/config.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/common/config.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/api_accuracy_checker/common/config.py rename to debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/common/config.py diff --git a/debug/accuracy_tools/atat/pytorch/api_accuracy_checker/compare/api_precision_threshold.yaml b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/compare/api_precision_threshold.yaml similarity index 100% rename from debug/accuracy_tools/atat/pytorch/api_accuracy_checker/compare/api_precision_threshold.yaml rename to debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/compare/api_precision_threshold.yaml diff --git a/debug/accuracy_tools/atat/pytorch/api_accuracy_checker/compare/compare_column.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/compare/compare_column.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/api_accuracy_checker/compare/compare_column.py rename to debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/compare/compare_column.py diff --git a/debug/accuracy_tools/atat/pytorch/api_accuracy_checker/compare/compare_utils.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/compare/compare_utils.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/api_accuracy_checker/compare/compare_utils.py rename to debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/compare/compare_utils.py diff --git a/debug/accuracy_tools/atat/pytorch/api_accuracy_checker/config.yaml b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/config.yaml similarity index 100% rename from debug/accuracy_tools/atat/pytorch/api_accuracy_checker/config.yaml rename to debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/config.yaml diff --git a/debug/accuracy_tools/atat/pytorch/api_accuracy_checker/run_ut/data_generate.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/data_generate.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/api_accuracy_checker/run_ut/data_generate.py rename to debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/data_generate.py diff --git a/debug/accuracy_tools/atat/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py rename to debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py diff --git a/debug/accuracy_tools/atat/pytorch/common/compare_script.template b/debug/accuracy_tools/msprobe/pytorch/common/compare_script.template similarity index 100% rename from debug/accuracy_tools/atat/pytorch/common/compare_script.template rename to debug/accuracy_tools/msprobe/pytorch/common/compare_script.template diff --git a/debug/accuracy_tools/atat/pytorch/compare/acc_compare.py b/debug/accuracy_tools/msprobe/pytorch/compare/acc_compare.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/compare/acc_compare.py rename to debug/accuracy_tools/msprobe/pytorch/compare/acc_compare.py diff --git a/debug/accuracy_tools/atat/pytorch/compare/distributed_compare.py b/debug/accuracy_tools/msprobe/pytorch/compare/distributed_compare.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/compare/distributed_compare.py rename to debug/accuracy_tools/msprobe/pytorch/compare/distributed_compare.py diff --git a/debug/accuracy_tools/atat/pytorch/compare/highlight.py b/debug/accuracy_tools/msprobe/pytorch/compare/highlight.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/compare/highlight.py rename to debug/accuracy_tools/msprobe/pytorch/compare/highlight.py diff --git a/debug/accuracy_tools/atat/pytorch/debugger/debugger_config.py b/debug/accuracy_tools/msprobe/pytorch/debugger/debugger_config.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/debugger/debugger_config.py rename to debug/accuracy_tools/msprobe/pytorch/debugger/debugger_config.py diff --git a/debug/accuracy_tools/msprobe/pytorch/debugger/precision_debugger.py b/debug/accuracy_tools/msprobe/pytorch/debugger/precision_debugger.py new file mode 100644 index 0000000000..d84d535b29 --- /dev/null +++ b/debug/accuracy_tools/msprobe/pytorch/debugger/precision_debugger.py @@ -0,0 +1,95 @@ +import torch +from torch.utils.data import dataloader +from atat.pytorch.debugger.debugger_config import DebuggerConfig +from atat.pytorch.service import Service +from atat.pytorch.common.log import logger +from atat.pytorch.pt_config import parse_json_config +from atat.core.common.exceptions import MsaccException + + +class PrecisionDebugger: + _instance = None + + def __new__(cls, *args, **kwargs): + if cls._instance is None: + cls._instance = super(PrecisionDebugger, cls).__new__(cls) + cls._instance.config = None + cls._instance.enable_dataloader = False + return cls._instance + + def __init__( + self, + config_path=None, + task=None, + dump_path=None, + level=None, + model=None, + step=None, + ): + if not hasattr(self, "initialized"): + self.initialized = True + self.model = self.check_model_valid(model) + common_config, task_config = parse_json_config(config_path, task) + if step: + common_config.step = step + self.config = DebuggerConfig( + common_config, task_config, task, dump_path, level + ) + self.config.check_model(self.model) + self.service = Service(self.config) + self.enable_dataloader = self.config.enable_dataloader + if self.enable_dataloader: + logger.warning_on_rank_0("The enable_dataloader feature will be deprecated in the future.") + dataloader._BaseDataLoaderIter.__next__ = iter_tracer(dataloader._BaseDataLoaderIter.__next__) + + @property + def instance(self): + return self._instance + + @staticmethod + def check_model_valid(model): + if not model or isinstance(model, torch.nn.Module): + return model + raise MsaccException( + MsaccException.INVALID_PARAM_ERROR, "model 参数必须是torch.nn.Module类型。" + ) + + @classmethod + def start(cls): + instance = cls._instance + if not instance: + raise Exception("No instance of PrecisionDebugger found.") + if instance.enable_dataloader: + logger.warning_on_rank_0("DataLoader is enabled, start() skipped.") + else: + instance.service.start(instance.model) + + @classmethod + def stop(cls): + instance = cls._instance + if not instance: + raise Exception("PrecisionDebugger instance is not created.") + if instance.enable_dataloader: + logger.warning_on_rank_0("DataLoader is enabled, stop() skipped.") + else: + instance.service.stop() + + @classmethod + def step(cls): + if not cls._instance: + raise Exception("PrecisionDebugger instance is not created.") + cls._instance.service.step() + + +def iter_tracer(func): + def func_wrapper(*args, **kwargs): + debugger_instance = PrecisionDebugger.instance + debugger_instance.enable_dataloader = False + if not debugger_instance.service.first_start: + debugger_instance.stop() + debugger_instance.step() + result = func(*args, **kwargs) + debugger_instance.start() + debugger_instance.enable_dataloader = True + return result + return func_wrapper diff --git a/debug/accuracy_tools/atat/pytorch/doc/FAQ.md b/debug/accuracy_tools/msprobe/pytorch/doc/FAQ.md similarity index 100% rename from debug/accuracy_tools/atat/pytorch/doc/FAQ.md rename to debug/accuracy_tools/msprobe/pytorch/doc/FAQ.md diff --git a/debug/accuracy_tools/atat/pytorch/doc/api_accuracy_checker.md b/debug/accuracy_tools/msprobe/pytorch/doc/api_accuracy_checker.md similarity index 100% rename from debug/accuracy_tools/atat/pytorch/doc/api_accuracy_checker.md rename to debug/accuracy_tools/msprobe/pytorch/doc/api_accuracy_checker.md diff --git "a/debug/accuracy_tools/atat/pytorch/doc/atat\347\262\276\345\272\246\345\267\245\345\205\267\346\225\260\346\215\256dump\346\240\207\345\207\206\346\200\247\350\203\275\345\237\272\347\272\277\346\212\245\345\221\212.md" "b/debug/accuracy_tools/msprobe/pytorch/doc/atat\347\262\276\345\272\246\345\267\245\345\205\267\346\225\260\346\215\256dump\346\240\207\345\207\206\346\200\247\350\203\275\345\237\272\347\272\277\346\212\245\345\221\212.md" similarity index 100% rename from "debug/accuracy_tools/atat/pytorch/doc/atat\347\262\276\345\272\246\345\267\245\345\205\267\346\225\260\346\215\256dump\346\240\207\345\207\206\346\200\247\350\203\275\345\237\272\347\272\277\346\212\245\345\221\212.md" rename to "debug/accuracy_tools/msprobe/pytorch/doc/atat\347\262\276\345\272\246\345\267\245\345\205\267\346\225\260\346\215\256dump\346\240\207\345\207\206\346\200\247\350\203\275\345\237\272\347\272\277\346\212\245\345\221\212.md" diff --git a/debug/accuracy_tools/atat/pytorch/doc/dump.md b/debug/accuracy_tools/msprobe/pytorch/doc/dump.md similarity index 100% rename from debug/accuracy_tools/atat/pytorch/doc/dump.md rename to debug/accuracy_tools/msprobe/pytorch/doc/dump.md diff --git a/debug/accuracy_tools/atat/pytorch/doc/img/GPT-3_1.png b/debug/accuracy_tools/msprobe/pytorch/doc/img/GPT-3_1.png similarity index 100% rename from debug/accuracy_tools/atat/pytorch/doc/img/GPT-3_1.png rename to debug/accuracy_tools/msprobe/pytorch/doc/img/GPT-3_1.png diff --git a/debug/accuracy_tools/atat/pytorch/doc/img/GPT-3_3.png b/debug/accuracy_tools/msprobe/pytorch/doc/img/GPT-3_3.png similarity index 100% rename from debug/accuracy_tools/atat/pytorch/doc/img/GPT-3_3.png rename to debug/accuracy_tools/msprobe/pytorch/doc/img/GPT-3_3.png diff --git a/debug/accuracy_tools/atat/pytorch/doc/img/GPT-3_5.png b/debug/accuracy_tools/msprobe/pytorch/doc/img/GPT-3_5.png similarity index 100% rename from debug/accuracy_tools/atat/pytorch/doc/img/GPT-3_5.png rename to debug/accuracy_tools/msprobe/pytorch/doc/img/GPT-3_5.png diff --git a/debug/accuracy_tools/atat/pytorch/doc/img/GPT-3_6.png b/debug/accuracy_tools/msprobe/pytorch/doc/img/GPT-3_6.png similarity index 100% rename from debug/accuracy_tools/atat/pytorch/doc/img/GPT-3_6.png rename to debug/accuracy_tools/msprobe/pytorch/doc/img/GPT-3_6.png diff --git a/debug/accuracy_tools/atat/pytorch/doc/img/GPT-3_7.png b/debug/accuracy_tools/msprobe/pytorch/doc/img/GPT-3_7.png similarity index 100% rename from debug/accuracy_tools/atat/pytorch/doc/img/GPT-3_7.png rename to debug/accuracy_tools/msprobe/pytorch/doc/img/GPT-3_7.png diff --git a/debug/accuracy_tools/atat/pytorch/doc/img/accuracy_checking_details.png b/debug/accuracy_tools/msprobe/pytorch/doc/img/accuracy_checking_details.png similarity index 100% rename from debug/accuracy_tools/atat/pytorch/doc/img/accuracy_checking_details.png rename to debug/accuracy_tools/msprobe/pytorch/doc/img/accuracy_checking_details.png diff --git a/debug/accuracy_tools/atat/pytorch/doc/img/accuracy_checking_result.png b/debug/accuracy_tools/msprobe/pytorch/doc/img/accuracy_checking_result.png similarity index 100% rename from debug/accuracy_tools/atat/pytorch/doc/img/accuracy_checking_result.png rename to debug/accuracy_tools/msprobe/pytorch/doc/img/accuracy_checking_result.png diff --git a/debug/accuracy_tools/atat/pytorch/doc/img/api_precision_compare_details.png b/debug/accuracy_tools/msprobe/pytorch/doc/img/api_precision_compare_details.png similarity index 100% rename from debug/accuracy_tools/atat/pytorch/doc/img/api_precision_compare_details.png rename to debug/accuracy_tools/msprobe/pytorch/doc/img/api_precision_compare_details.png diff --git a/debug/accuracy_tools/atat/pytorch/doc/img/api_precision_compare_result.png b/debug/accuracy_tools/msprobe/pytorch/doc/img/api_precision_compare_result.png similarity index 100% rename from debug/accuracy_tools/atat/pytorch/doc/img/api_precision_compare_result.png rename to debug/accuracy_tools/msprobe/pytorch/doc/img/api_precision_compare_result.png diff --git a/debug/accuracy_tools/atat/pytorch/doc/img/auto_analyze_log.png b/debug/accuracy_tools/msprobe/pytorch/doc/img/auto_analyze_log.png similarity index 100% rename from debug/accuracy_tools/atat/pytorch/doc/img/auto_analyze_log.png rename to debug/accuracy_tools/msprobe/pytorch/doc/img/auto_analyze_log.png diff --git a/debug/accuracy_tools/atat/pytorch/doc/img/compare_result_pkl.png b/debug/accuracy_tools/msprobe/pytorch/doc/img/compare_result_pkl.png similarity index 100% rename from debug/accuracy_tools/atat/pytorch/doc/img/compare_result_pkl.png rename to debug/accuracy_tools/msprobe/pytorch/doc/img/compare_result_pkl.png diff --git a/debug/accuracy_tools/atat/pytorch/doc/img/compare_result_pkl_md5.png.png b/debug/accuracy_tools/msprobe/pytorch/doc/img/compare_result_pkl_md5.png.png similarity index 100% rename from debug/accuracy_tools/atat/pytorch/doc/img/compare_result_pkl_md5.png.png rename to debug/accuracy_tools/msprobe/pytorch/doc/img/compare_result_pkl_md5.png.png diff --git a/debug/accuracy_tools/atat/pytorch/doc/img/cpu_info.png b/debug/accuracy_tools/msprobe/pytorch/doc/img/cpu_info.png similarity index 100% rename from debug/accuracy_tools/atat/pytorch/doc/img/cpu_info.png rename to debug/accuracy_tools/msprobe/pytorch/doc/img/cpu_info.png diff --git a/debug/accuracy_tools/atat/pytorch/doc/ptdbg_ascend_compare.md b/debug/accuracy_tools/msprobe/pytorch/doc/ptdbg_ascend_compare.md similarity index 100% rename from debug/accuracy_tools/atat/pytorch/doc/ptdbg_ascend_compare.md rename to debug/accuracy_tools/msprobe/pytorch/doc/ptdbg_ascend_compare.md diff --git a/debug/accuracy_tools/atat/pytorch/free_benchmark/common/constant.py b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/common/constant.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/free_benchmark/common/constant.py rename to debug/accuracy_tools/msprobe/pytorch/free_benchmark/common/constant.py diff --git a/debug/accuracy_tools/atat/pytorch/free_benchmark/common/counter.py b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/common/counter.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/free_benchmark/common/counter.py rename to debug/accuracy_tools/msprobe/pytorch/free_benchmark/common/counter.py diff --git a/debug/accuracy_tools/atat/pytorch/free_benchmark/compare/grad_saver.py b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/compare/grad_saver.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/free_benchmark/compare/grad_saver.py rename to debug/accuracy_tools/msprobe/pytorch/free_benchmark/compare/grad_saver.py diff --git a/debug/accuracy_tools/atat/pytorch/free_benchmark/compare/single_benchmark.py b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/compare/single_benchmark.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/free_benchmark/compare/single_benchmark.py rename to debug/accuracy_tools/msprobe/pytorch/free_benchmark/compare/single_benchmark.py diff --git a/debug/accuracy_tools/atat/pytorch/free_benchmark/main.py b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/main.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/free_benchmark/main.py rename to debug/accuracy_tools/msprobe/pytorch/free_benchmark/main.py diff --git a/debug/accuracy_tools/atat/pytorch/free_benchmark/perturbed_layers/npu/bit_noise.py b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/perturbed_layers/npu/bit_noise.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/free_benchmark/perturbed_layers/npu/bit_noise.py rename to debug/accuracy_tools/msprobe/pytorch/free_benchmark/perturbed_layers/npu/bit_noise.py diff --git a/debug/accuracy_tools/atat/pytorch/free_benchmark/perturbed_layers/run_cpu.py b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/perturbed_layers/run_cpu.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/free_benchmark/perturbed_layers/run_cpu.py rename to debug/accuracy_tools/msprobe/pytorch/free_benchmark/perturbed_layers/run_cpu.py diff --git a/debug/accuracy_tools/atat/pytorch/free_benchmark/result_handlers/fix_handler.py b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/result_handlers/fix_handler.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/free_benchmark/result_handlers/fix_handler.py rename to debug/accuracy_tools/msprobe/pytorch/free_benchmark/result_handlers/fix_handler.py diff --git a/debug/accuracy_tools/atat/pytorch/functional/data_processor.py b/debug/accuracy_tools/msprobe/pytorch/functional/data_processor.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/functional/data_processor.py rename to debug/accuracy_tools/msprobe/pytorch/functional/data_processor.py diff --git a/debug/accuracy_tools/atat/pytorch/hook_module/support_wrap_ops.yaml b/debug/accuracy_tools/msprobe/pytorch/hook_module/support_wrap_ops.yaml similarity index 100% rename from debug/accuracy_tools/atat/pytorch/hook_module/support_wrap_ops.yaml rename to debug/accuracy_tools/msprobe/pytorch/hook_module/support_wrap_ops.yaml diff --git a/debug/accuracy_tools/atat/pytorch/online_dispatch/dispatch.py b/debug/accuracy_tools/msprobe/pytorch/online_dispatch/dispatch.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/online_dispatch/dispatch.py rename to debug/accuracy_tools/msprobe/pytorch/online_dispatch/dispatch.py diff --git a/debug/accuracy_tools/atat/pytorch/online_dispatch/dump_compare.py b/debug/accuracy_tools/msprobe/pytorch/online_dispatch/dump_compare.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/online_dispatch/dump_compare.py rename to debug/accuracy_tools/msprobe/pytorch/online_dispatch/dump_compare.py diff --git a/debug/accuracy_tools/atat/pytorch/service.py b/debug/accuracy_tools/msprobe/pytorch/service.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/service.py rename to debug/accuracy_tools/msprobe/pytorch/service.py diff --git a/debug/accuracy_tools/atat/test/core_ut/data_dump/test_data_collector.py b/debug/accuracy_tools/msprobe/test/core_ut/data_dump/test_data_collector.py similarity index 100% rename from debug/accuracy_tools/atat/test/core_ut/data_dump/test_data_collector.py rename to debug/accuracy_tools/msprobe/test/core_ut/data_dump/test_data_collector.py diff --git a/debug/accuracy_tools/atat/test/core_ut/test_file_check.py b/debug/accuracy_tools/msprobe/test/core_ut/test_file_check.py similarity index 100% rename from debug/accuracy_tools/atat/test/core_ut/test_file_check.py rename to debug/accuracy_tools/msprobe/test/core_ut/test_file_check.py diff --git a/debug/accuracy_tools/atat/test/mindspore_ut/test_api_kbk_dump.py b/debug/accuracy_tools/msprobe/test/mindspore_ut/test_api_kbk_dump.py similarity index 100% rename from debug/accuracy_tools/atat/test/mindspore_ut/test_api_kbk_dump.py rename to debug/accuracy_tools/msprobe/test/mindspore_ut/test_api_kbk_dump.py diff --git a/debug/accuracy_tools/atat/test/mindspore_ut/test_dump_tool_factory.py b/debug/accuracy_tools/msprobe/test/mindspore_ut/test_dump_tool_factory.py similarity index 100% rename from debug/accuracy_tools/atat/test/mindspore_ut/test_dump_tool_factory.py rename to debug/accuracy_tools/msprobe/test/mindspore_ut/test_dump_tool_factory.py diff --git a/debug/accuracy_tools/atat/test/mindspore_ut/test_kernel_graph_overflow_check.py b/debug/accuracy_tools/msprobe/test/mindspore_ut/test_kernel_graph_overflow_check.py similarity index 100% rename from debug/accuracy_tools/atat/test/mindspore_ut/test_kernel_graph_overflow_check.py rename to debug/accuracy_tools/msprobe/test/mindspore_ut/test_kernel_graph_overflow_check.py diff --git a/debug/accuracy_tools/atat/test/mindspore_ut/test_ms_config.py b/debug/accuracy_tools/msprobe/test/mindspore_ut/test_ms_config.py similarity index 100% rename from debug/accuracy_tools/atat/test/mindspore_ut/test_ms_config.py rename to debug/accuracy_tools/msprobe/test/mindspore_ut/test_ms_config.py diff --git a/debug/accuracy_tools/atat/test/mindspore_ut/test_overflow_check_tool_factory.py b/debug/accuracy_tools/msprobe/test/mindspore_ut/test_overflow_check_tool_factory.py similarity index 100% rename from debug/accuracy_tools/atat/test/mindspore_ut/test_overflow_check_tool_factory.py rename to debug/accuracy_tools/msprobe/test/mindspore_ut/test_overflow_check_tool_factory.py diff --git a/debug/accuracy_tools/atat/test/mindspore_ut/test_precision_debugger.py b/debug/accuracy_tools/msprobe/test/mindspore_ut/test_precision_debugger.py similarity index 100% rename from debug/accuracy_tools/atat/test/mindspore_ut/test_precision_debugger.py rename to debug/accuracy_tools/msprobe/test/mindspore_ut/test_precision_debugger.py diff --git a/debug/accuracy_tools/atat/test/pytorch_ut/advisor/test_advisor.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/advisor/test_advisor.py similarity index 100% rename from debug/accuracy_tools/atat/test/pytorch_ut/advisor/test_advisor.py rename to debug/accuracy_tools/msprobe/test/pytorch_ut/advisor/test_advisor.py diff --git a/debug/accuracy_tools/atat/test/pytorch_ut/api_accuracy_checker/common/test_common_utils.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/common/test_common_utils.py similarity index 100% rename from debug/accuracy_tools/atat/test/pytorch_ut/api_accuracy_checker/common/test_common_utils.py rename to debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/common/test_common_utils.py diff --git a/debug/accuracy_tools/atat/test/pytorch_ut/api_accuracy_checker/common/test_config.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/common/test_config.py similarity index 100% rename from debug/accuracy_tools/atat/test/pytorch_ut/api_accuracy_checker/common/test_config.py rename to debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/common/test_config.py diff --git a/debug/accuracy_tools/atat/test/pytorch_ut/api_accuracy_checker/compare/test_api_precision_compare.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/compare/test_api_precision_compare.py similarity index 100% rename from debug/accuracy_tools/atat/test/pytorch_ut/api_accuracy_checker/compare/test_api_precision_compare.py rename to debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/compare/test_api_precision_compare.py diff --git a/debug/accuracy_tools/atat/test/pytorch_ut/api_accuracy_checker/compare/test_compare.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/compare/test_compare.py similarity index 100% rename from debug/accuracy_tools/atat/test/pytorch_ut/api_accuracy_checker/compare/test_compare.py rename to debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/compare/test_compare.py diff --git a/debug/accuracy_tools/atat/test/pytorch_ut/api_accuracy_checker/run_ut/dump.json b/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/dump.json similarity index 100% rename from debug/accuracy_tools/atat/test/pytorch_ut/api_accuracy_checker/run_ut/dump.json rename to debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/dump.json diff --git a/debug/accuracy_tools/atat/test/pytorch_ut/api_accuracy_checker/run_ut/test_data_generate.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/test_data_generate.py similarity index 100% rename from debug/accuracy_tools/atat/test/pytorch_ut/api_accuracy_checker/run_ut/test_data_generate.py rename to debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/test_data_generate.py diff --git a/debug/accuracy_tools/atat/test/pytorch_ut/compare/test_acc_compare.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/compare/test_acc_compare.py similarity index 100% rename from debug/accuracy_tools/atat/test/pytorch_ut/compare/test_acc_compare.py rename to debug/accuracy_tools/msprobe/test/pytorch_ut/compare/test_acc_compare.py diff --git a/debug/accuracy_tools/atat/test/pytorch_ut/free_benchmark/perturbed_layers/test_perturbed_layser.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/free_benchmark/perturbed_layers/test_perturbed_layser.py similarity index 100% rename from debug/accuracy_tools/atat/test/pytorch_ut/free_benchmark/perturbed_layers/test_perturbed_layser.py rename to debug/accuracy_tools/msprobe/test/pytorch_ut/free_benchmark/perturbed_layers/test_perturbed_layser.py diff --git a/debug/accuracy_tools/atat/test/pytorch_ut/free_benchmark/test_main.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/free_benchmark/test_main.py similarity index 100% rename from debug/accuracy_tools/atat/test/pytorch_ut/free_benchmark/test_main.py rename to debug/accuracy_tools/msprobe/test/pytorch_ut/free_benchmark/test_main.py diff --git a/debug/accuracy_tools/atat/test/pytorch_ut/functional/test_dump_module.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/functional/test_dump_module.py similarity index 100% rename from debug/accuracy_tools/atat/test/pytorch_ut/functional/test_dump_module.py rename to debug/accuracy_tools/msprobe/test/pytorch_ut/functional/test_dump_module.py diff --git a/debug/accuracy_tools/atat/test/pytorch_ut/hook_module/test_api_registry.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_api_registry.py similarity index 100% rename from debug/accuracy_tools/atat/test/pytorch_ut/hook_module/test_api_registry.py rename to debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_api_registry.py diff --git a/debug/accuracy_tools/atat/test/pytorch_ut/hook_module/test_hook_module.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_hook_module.py similarity index 100% rename from debug/accuracy_tools/atat/test/pytorch_ut/hook_module/test_hook_module.py rename to debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_hook_module.py diff --git a/debug/accuracy_tools/atat/test/pytorch_ut/hook_module/test_wrap_aten.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_wrap_aten.py similarity index 100% rename from debug/accuracy_tools/atat/test/pytorch_ut/hook_module/test_wrap_aten.py rename to debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_wrap_aten.py diff --git a/debug/accuracy_tools/atat/test/pytorch_ut/hook_module/test_wrap_vf.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_wrap_vf.py similarity index 100% rename from debug/accuracy_tools/atat/test/pytorch_ut/hook_module/test_wrap_vf.py rename to debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_wrap_vf.py diff --git a/debug/accuracy_tools/atat/test/resources/advisor.txt b/debug/accuracy_tools/msprobe/test/resources/advisor.txt similarity index 100% rename from debug/accuracy_tools/atat/test/resources/advisor.txt rename to debug/accuracy_tools/msprobe/test/resources/advisor.txt diff --git a/debug/accuracy_tools/atat/test/resources/compare_result_20230703104808.csv b/debug/accuracy_tools/msprobe/test/resources/compare_result_20230703104808.csv similarity index 100% rename from debug/accuracy_tools/atat/test/resources/compare_result_20230703104808.csv rename to debug/accuracy_tools/msprobe/test/resources/compare_result_20230703104808.csv diff --git a/debug/accuracy_tools/atat/test/resources/compare_result_without_accuracy.csv b/debug/accuracy_tools/msprobe/test/resources/compare_result_without_accuracy.csv similarity index 100% rename from debug/accuracy_tools/atat/test/resources/compare_result_without_accuracy.csv rename to debug/accuracy_tools/msprobe/test/resources/compare_result_without_accuracy.csv diff --git a/debug/accuracy_tools/atat/test/resources/config.yaml b/debug/accuracy_tools/msprobe/test/resources/config.yaml similarity index 100% rename from debug/accuracy_tools/atat/test/resources/config.yaml rename to debug/accuracy_tools/msprobe/test/resources/config.yaml diff --git a/debug/accuracy_tools/atat/test/test_module_processer.py b/debug/accuracy_tools/msprobe/test/test_module_processer.py similarity index 100% rename from debug/accuracy_tools/atat/test/test_module_processer.py rename to debug/accuracy_tools/msprobe/test/test_module_processer.py -- Gitee From 165f16b7ed2402d06fcb55731b6adb4ba8515852 Mon Sep 17 00:00:00 2001 From: Linwei-Ying Date: Fri, 19 Jul 2024 18:36:21 +0800 Subject: [PATCH 017/106] atat to msprobe3 --- debug/accuracy_tools/{atat => msprobe}/__init__.py | 0 .../{atat => msprobe}/core/common/utils.py | 0 .../{atat => msprobe}/mindspore/__init__.py | 0 .../mindspore/debugger/__init__.py | 0 .../{atat => msprobe}/mindspore/dump/__init__.py | 0 .../{atat => msprobe}/mindspore/ms_config.py | 0 .../mindspore/overflow_check/__init__.py | 0 .../{atat => msprobe}/pytorch/__init__.py | 0 .../pytorch/api_accuracy_checker/__init__.py | 0 .../pytorch/api_accuracy_checker/common/__init__.py | 0 .../pytorch/api_accuracy_checker/common/utils.py | 0 .../api_accuracy_checker/compare/__init__.py | 0 .../pytorch/api_accuracy_checker/run_ut/__init__.py | 0 .../api_accuracy_checker/run_ut/multi_run_ut.py | 0 .../{atat => msprobe}/pytorch/common/__init__.py | 0 .../{atat => msprobe}/pytorch/common/utils.py | 0 .../{atat => msprobe}/pytorch/debugger/__init__.py | 0 .../pytorch/doc/img/module_compare.png | Bin .../pytorch/free_benchmark/__init__.py | 0 .../pytorch/free_benchmark/common/__init__.py | 0 .../pytorch/free_benchmark/common/utils.py | 0 .../free_benchmark/perturbed_layers/__init__.py | 0 .../free_benchmark/perturbed_layers/npu/__init__.py | 0 .../free_benchmark/result_handlers/__init__.py | 0 .../pytorch/functional/__init__.py | 0 .../pytorch/hook_module/__init__.py | 0 .../{atat => msprobe}/pytorch/hook_module/utils.py | 0 .../pytorch/hook_module/wrap_functional.py | 0 .../pytorch/hook_module/wrap_npu_custom.py | 0 .../pytorch/hook_module/wrap_vf.py | 0 .../{atat => msprobe}/pytorch/module_processer.py | 0 .../pytorch/online_dispatch/__init__.py | 0 .../pytorch/online_dispatch/utils.py | 0 .../{atat => msprobe}/pytorch/parse.py | 0 .../pytorch/parse_tool/__init__.py | 0 .../{atat => msprobe}/test/resources/npu_test.pkl | 0 36 files changed, 0 insertions(+), 0 deletions(-) rename debug/accuracy_tools/{atat => msprobe}/__init__.py (100%) rename debug/accuracy_tools/{atat => msprobe}/core/common/utils.py (100%) rename debug/accuracy_tools/{atat => msprobe}/mindspore/__init__.py (100%) rename debug/accuracy_tools/{atat => msprobe}/mindspore/debugger/__init__.py (100%) rename debug/accuracy_tools/{atat => msprobe}/mindspore/dump/__init__.py (100%) rename debug/accuracy_tools/{atat => msprobe}/mindspore/ms_config.py (100%) rename debug/accuracy_tools/{atat => msprobe}/mindspore/overflow_check/__init__.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/__init__.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/api_accuracy_checker/__init__.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/api_accuracy_checker/common/__init__.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/api_accuracy_checker/common/utils.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/api_accuracy_checker/compare/__init__.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/api_accuracy_checker/run_ut/__init__.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/common/__init__.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/common/utils.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/debugger/__init__.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/doc/img/module_compare.png (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/free_benchmark/__init__.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/free_benchmark/common/__init__.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/free_benchmark/common/utils.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/free_benchmark/perturbed_layers/__init__.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/free_benchmark/perturbed_layers/npu/__init__.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/free_benchmark/result_handlers/__init__.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/functional/__init__.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/hook_module/__init__.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/hook_module/utils.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/hook_module/wrap_functional.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/hook_module/wrap_npu_custom.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/hook_module/wrap_vf.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/module_processer.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/online_dispatch/__init__.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/online_dispatch/utils.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/parse.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/parse_tool/__init__.py (100%) rename debug/accuracy_tools/{atat => msprobe}/test/resources/npu_test.pkl (100%) diff --git a/debug/accuracy_tools/atat/__init__.py b/debug/accuracy_tools/msprobe/__init__.py similarity index 100% rename from debug/accuracy_tools/atat/__init__.py rename to debug/accuracy_tools/msprobe/__init__.py diff --git a/debug/accuracy_tools/atat/core/common/utils.py b/debug/accuracy_tools/msprobe/core/common/utils.py similarity index 100% rename from debug/accuracy_tools/atat/core/common/utils.py rename to debug/accuracy_tools/msprobe/core/common/utils.py diff --git a/debug/accuracy_tools/atat/mindspore/__init__.py b/debug/accuracy_tools/msprobe/mindspore/__init__.py similarity index 100% rename from debug/accuracy_tools/atat/mindspore/__init__.py rename to debug/accuracy_tools/msprobe/mindspore/__init__.py diff --git a/debug/accuracy_tools/atat/mindspore/debugger/__init__.py b/debug/accuracy_tools/msprobe/mindspore/debugger/__init__.py similarity index 100% rename from debug/accuracy_tools/atat/mindspore/debugger/__init__.py rename to debug/accuracy_tools/msprobe/mindspore/debugger/__init__.py diff --git a/debug/accuracy_tools/atat/mindspore/dump/__init__.py b/debug/accuracy_tools/msprobe/mindspore/dump/__init__.py similarity index 100% rename from debug/accuracy_tools/atat/mindspore/dump/__init__.py rename to debug/accuracy_tools/msprobe/mindspore/dump/__init__.py diff --git a/debug/accuracy_tools/atat/mindspore/ms_config.py b/debug/accuracy_tools/msprobe/mindspore/ms_config.py similarity index 100% rename from debug/accuracy_tools/atat/mindspore/ms_config.py rename to debug/accuracy_tools/msprobe/mindspore/ms_config.py diff --git a/debug/accuracy_tools/atat/mindspore/overflow_check/__init__.py b/debug/accuracy_tools/msprobe/mindspore/overflow_check/__init__.py similarity index 100% rename from debug/accuracy_tools/atat/mindspore/overflow_check/__init__.py rename to debug/accuracy_tools/msprobe/mindspore/overflow_check/__init__.py diff --git a/debug/accuracy_tools/atat/pytorch/__init__.py b/debug/accuracy_tools/msprobe/pytorch/__init__.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/__init__.py rename to debug/accuracy_tools/msprobe/pytorch/__init__.py diff --git a/debug/accuracy_tools/atat/pytorch/api_accuracy_checker/__init__.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/__init__.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/api_accuracy_checker/__init__.py rename to debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/__init__.py diff --git a/debug/accuracy_tools/atat/pytorch/api_accuracy_checker/common/__init__.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/common/__init__.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/api_accuracy_checker/common/__init__.py rename to debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/common/__init__.py diff --git a/debug/accuracy_tools/atat/pytorch/api_accuracy_checker/common/utils.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/common/utils.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/api_accuracy_checker/common/utils.py rename to debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/common/utils.py diff --git a/debug/accuracy_tools/atat/pytorch/api_accuracy_checker/compare/__init__.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/compare/__init__.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/api_accuracy_checker/compare/__init__.py rename to debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/compare/__init__.py diff --git a/debug/accuracy_tools/atat/pytorch/api_accuracy_checker/run_ut/__init__.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/__init__.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/api_accuracy_checker/run_ut/__init__.py rename to debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/__init__.py diff --git a/debug/accuracy_tools/atat/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py rename to debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py diff --git a/debug/accuracy_tools/atat/pytorch/common/__init__.py b/debug/accuracy_tools/msprobe/pytorch/common/__init__.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/common/__init__.py rename to debug/accuracy_tools/msprobe/pytorch/common/__init__.py diff --git a/debug/accuracy_tools/atat/pytorch/common/utils.py b/debug/accuracy_tools/msprobe/pytorch/common/utils.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/common/utils.py rename to debug/accuracy_tools/msprobe/pytorch/common/utils.py diff --git a/debug/accuracy_tools/atat/pytorch/debugger/__init__.py b/debug/accuracy_tools/msprobe/pytorch/debugger/__init__.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/debugger/__init__.py rename to debug/accuracy_tools/msprobe/pytorch/debugger/__init__.py diff --git a/debug/accuracy_tools/atat/pytorch/doc/img/module_compare.png b/debug/accuracy_tools/msprobe/pytorch/doc/img/module_compare.png similarity index 100% rename from debug/accuracy_tools/atat/pytorch/doc/img/module_compare.png rename to debug/accuracy_tools/msprobe/pytorch/doc/img/module_compare.png diff --git a/debug/accuracy_tools/atat/pytorch/free_benchmark/__init__.py b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/__init__.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/free_benchmark/__init__.py rename to debug/accuracy_tools/msprobe/pytorch/free_benchmark/__init__.py diff --git a/debug/accuracy_tools/atat/pytorch/free_benchmark/common/__init__.py b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/common/__init__.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/free_benchmark/common/__init__.py rename to debug/accuracy_tools/msprobe/pytorch/free_benchmark/common/__init__.py diff --git a/debug/accuracy_tools/atat/pytorch/free_benchmark/common/utils.py b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/common/utils.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/free_benchmark/common/utils.py rename to debug/accuracy_tools/msprobe/pytorch/free_benchmark/common/utils.py diff --git a/debug/accuracy_tools/atat/pytorch/free_benchmark/perturbed_layers/__init__.py b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/perturbed_layers/__init__.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/free_benchmark/perturbed_layers/__init__.py rename to debug/accuracy_tools/msprobe/pytorch/free_benchmark/perturbed_layers/__init__.py diff --git a/debug/accuracy_tools/atat/pytorch/free_benchmark/perturbed_layers/npu/__init__.py b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/perturbed_layers/npu/__init__.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/free_benchmark/perturbed_layers/npu/__init__.py rename to debug/accuracy_tools/msprobe/pytorch/free_benchmark/perturbed_layers/npu/__init__.py diff --git a/debug/accuracy_tools/atat/pytorch/free_benchmark/result_handlers/__init__.py b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/result_handlers/__init__.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/free_benchmark/result_handlers/__init__.py rename to debug/accuracy_tools/msprobe/pytorch/free_benchmark/result_handlers/__init__.py diff --git a/debug/accuracy_tools/atat/pytorch/functional/__init__.py b/debug/accuracy_tools/msprobe/pytorch/functional/__init__.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/functional/__init__.py rename to debug/accuracy_tools/msprobe/pytorch/functional/__init__.py diff --git a/debug/accuracy_tools/atat/pytorch/hook_module/__init__.py b/debug/accuracy_tools/msprobe/pytorch/hook_module/__init__.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/hook_module/__init__.py rename to debug/accuracy_tools/msprobe/pytorch/hook_module/__init__.py diff --git a/debug/accuracy_tools/atat/pytorch/hook_module/utils.py b/debug/accuracy_tools/msprobe/pytorch/hook_module/utils.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/hook_module/utils.py rename to debug/accuracy_tools/msprobe/pytorch/hook_module/utils.py diff --git a/debug/accuracy_tools/atat/pytorch/hook_module/wrap_functional.py b/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_functional.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/hook_module/wrap_functional.py rename to debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_functional.py diff --git a/debug/accuracy_tools/atat/pytorch/hook_module/wrap_npu_custom.py b/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_npu_custom.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/hook_module/wrap_npu_custom.py rename to debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_npu_custom.py diff --git a/debug/accuracy_tools/atat/pytorch/hook_module/wrap_vf.py b/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_vf.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/hook_module/wrap_vf.py rename to debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_vf.py diff --git a/debug/accuracy_tools/atat/pytorch/module_processer.py b/debug/accuracy_tools/msprobe/pytorch/module_processer.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/module_processer.py rename to debug/accuracy_tools/msprobe/pytorch/module_processer.py diff --git a/debug/accuracy_tools/atat/pytorch/online_dispatch/__init__.py b/debug/accuracy_tools/msprobe/pytorch/online_dispatch/__init__.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/online_dispatch/__init__.py rename to debug/accuracy_tools/msprobe/pytorch/online_dispatch/__init__.py diff --git a/debug/accuracy_tools/atat/pytorch/online_dispatch/utils.py b/debug/accuracy_tools/msprobe/pytorch/online_dispatch/utils.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/online_dispatch/utils.py rename to debug/accuracy_tools/msprobe/pytorch/online_dispatch/utils.py diff --git a/debug/accuracy_tools/atat/pytorch/parse.py b/debug/accuracy_tools/msprobe/pytorch/parse.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/parse.py rename to debug/accuracy_tools/msprobe/pytorch/parse.py diff --git a/debug/accuracy_tools/atat/pytorch/parse_tool/__init__.py b/debug/accuracy_tools/msprobe/pytorch/parse_tool/__init__.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/parse_tool/__init__.py rename to debug/accuracy_tools/msprobe/pytorch/parse_tool/__init__.py diff --git a/debug/accuracy_tools/atat/test/resources/npu_test.pkl b/debug/accuracy_tools/msprobe/test/resources/npu_test.pkl similarity index 100% rename from debug/accuracy_tools/atat/test/resources/npu_test.pkl rename to debug/accuracy_tools/msprobe/test/resources/npu_test.pkl -- Gitee From 842ab5330360459b4ee4c33c35e8e33f65f879f7 Mon Sep 17 00:00:00 2001 From: Linwei-Ying Date: Fri, 19 Jul 2024 18:38:02 +0800 Subject: [PATCH 018/106] atat to msprobe4 --- .../{atat => msprobe}/pytorch/parse_tool/lib/__init__.py | 0 .../{atat => msprobe}/pytorch/parse_tool/lib/compare.py | 0 .../{atat => msprobe}/pytorch/parse_tool/lib/config.py | 0 .../{atat => msprobe}/pytorch/parse_tool/lib/file_desc.py | 0 .../{atat => msprobe}/pytorch/parse_tool/lib/interactive_cli.py | 0 .../{atat => msprobe}/pytorch/parse_tool/lib/parse_exception.py | 0 .../{atat => msprobe}/pytorch/parse_tool/lib/parse_tool.py | 0 .../{atat => msprobe}/pytorch/parse_tool/lib/utils.py | 0 .../{atat => msprobe}/pytorch/parse_tool/lib/visualization.py | 0 9 files changed, 0 insertions(+), 0 deletions(-) rename debug/accuracy_tools/{atat => msprobe}/pytorch/parse_tool/lib/__init__.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/parse_tool/lib/compare.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/parse_tool/lib/config.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/parse_tool/lib/file_desc.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/parse_tool/lib/interactive_cli.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/parse_tool/lib/parse_exception.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/parse_tool/lib/parse_tool.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/parse_tool/lib/utils.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/parse_tool/lib/visualization.py (100%) diff --git a/debug/accuracy_tools/atat/pytorch/parse_tool/lib/__init__.py b/debug/accuracy_tools/msprobe/pytorch/parse_tool/lib/__init__.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/parse_tool/lib/__init__.py rename to debug/accuracy_tools/msprobe/pytorch/parse_tool/lib/__init__.py diff --git a/debug/accuracy_tools/atat/pytorch/parse_tool/lib/compare.py b/debug/accuracy_tools/msprobe/pytorch/parse_tool/lib/compare.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/parse_tool/lib/compare.py rename to debug/accuracy_tools/msprobe/pytorch/parse_tool/lib/compare.py diff --git a/debug/accuracy_tools/atat/pytorch/parse_tool/lib/config.py b/debug/accuracy_tools/msprobe/pytorch/parse_tool/lib/config.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/parse_tool/lib/config.py rename to debug/accuracy_tools/msprobe/pytorch/parse_tool/lib/config.py diff --git a/debug/accuracy_tools/atat/pytorch/parse_tool/lib/file_desc.py b/debug/accuracy_tools/msprobe/pytorch/parse_tool/lib/file_desc.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/parse_tool/lib/file_desc.py rename to debug/accuracy_tools/msprobe/pytorch/parse_tool/lib/file_desc.py diff --git a/debug/accuracy_tools/atat/pytorch/parse_tool/lib/interactive_cli.py b/debug/accuracy_tools/msprobe/pytorch/parse_tool/lib/interactive_cli.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/parse_tool/lib/interactive_cli.py rename to debug/accuracy_tools/msprobe/pytorch/parse_tool/lib/interactive_cli.py diff --git a/debug/accuracy_tools/atat/pytorch/parse_tool/lib/parse_exception.py b/debug/accuracy_tools/msprobe/pytorch/parse_tool/lib/parse_exception.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/parse_tool/lib/parse_exception.py rename to debug/accuracy_tools/msprobe/pytorch/parse_tool/lib/parse_exception.py diff --git a/debug/accuracy_tools/atat/pytorch/parse_tool/lib/parse_tool.py b/debug/accuracy_tools/msprobe/pytorch/parse_tool/lib/parse_tool.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/parse_tool/lib/parse_tool.py rename to debug/accuracy_tools/msprobe/pytorch/parse_tool/lib/parse_tool.py diff --git a/debug/accuracy_tools/atat/pytorch/parse_tool/lib/utils.py b/debug/accuracy_tools/msprobe/pytorch/parse_tool/lib/utils.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/parse_tool/lib/utils.py rename to debug/accuracy_tools/msprobe/pytorch/parse_tool/lib/utils.py diff --git a/debug/accuracy_tools/atat/pytorch/parse_tool/lib/visualization.py b/debug/accuracy_tools/msprobe/pytorch/parse_tool/lib/visualization.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/parse_tool/lib/visualization.py rename to debug/accuracy_tools/msprobe/pytorch/parse_tool/lib/visualization.py -- Gitee From 90db3c77b1c983ae568f37b5d040a8af8ad42f81 Mon Sep 17 00:00:00 2001 From: Linwei-Ying Date: Fri, 19 Jul 2024 18:57:17 +0800 Subject: [PATCH 019/106] atat to msprobe5 --- debug/accuracy_tools/msprobe/atat.py | 12 +++--- .../msprobe/core/common/file_check.py | 6 +-- .../msprobe/core/common/utils.py | 6 +-- .../msprobe/core/common_config.py | 6 +-- .../msprobe/core/data_dump/data_collector.py | 10 ++--- .../core/data_dump/data_processor/base.py | 6 +-- .../core/data_dump/data_processor/factory.py | 2 +- .../data_processor/pytorch_processor.py | 12 +++--- .../msprobe/core/data_dump/json_writer.py | 6 +-- .../msprobe/core/data_dump/scope.py | 4 +- .../msprobe/mindspore/__init__.py | 2 +- .../mindspore/debugger/precision_debugger.py | 6 +-- .../msprobe/mindspore/dump/api_kbk_dump.py | 8 ++-- .../mindspore/dump/dump_tool_factory.py | 6 +-- .../mindspore/dump/kernel_graph_dump.py | 8 ++-- .../msprobe/mindspore/ms_config.py | 4 +- .../kernel_graph_overflow_check.py | 8 ++-- .../overflow_check_tool_factory.py | 4 +- .../msprobe/mindspore/task_handler_factory.py | 6 +-- .../msprobe/pytorch/advisor/advisor.py | 12 +++--- .../msprobe/pytorch/advisor/advisor_result.py | 8 ++-- .../api_accuracy_checker/common/config.py | 6 +-- .../api_accuracy_checker/common/utils.py | 8 ++-- .../api_accuracy_checker/compare/algorithm.py | 4 +- .../compare/api_precision_compare.py | 18 ++++---- .../api_accuracy_checker/compare/compare.py | 14 +++---- .../compare/compare_column.py | 2 +- .../compare/compare_utils.py | 8 ++-- .../run_ut/data_generate.py | 9 ++-- .../run_ut/multi_run_ut.py | 12 +++--- .../run_ut/run_overflow_check.py | 8 ++-- .../api_accuracy_checker/run_ut/run_ut.py | 26 ++++++------ .../msprobe/pytorch/common/log.py | 6 +-- .../msprobe/pytorch/common/parse_json.py | 2 +- .../msprobe/pytorch/common/utils.py | 2 +- .../msprobe/pytorch/compare/acc_compare.py | 16 +++---- .../pytorch/compare/distributed_compare.py | 8 ++-- .../msprobe/pytorch/compare/highlight.py | 4 +- .../msprobe/pytorch/compare/match.py | 4 +- .../msprobe/pytorch/compare/npy_compare.py | 6 +-- .../pytorch/debugger/debugger_config.py | 6 +-- .../pytorch/debugger/precision_debugger.py | 10 ++--- .../pytorch/free_benchmark/__init__.py | 6 +-- .../pytorch/free_benchmark/common/constant.py | 4 +- .../pytorch/free_benchmark/common/counter.py | 2 +- .../pytorch/free_benchmark/common/params.py | 8 ++-- .../pytorch/free_benchmark/common/utils.py | 2 +- .../free_benchmark/compare/grad_saver.py | 20 ++++----- .../compare/single_benchmark.py | 8 ++-- .../msprobe/pytorch/free_benchmark/main.py | 20 ++++----- .../perturbed_layers/base_layer.py | 2 +- .../perturbed_layers/layer_factory.py | 16 +++---- .../perturbed_layers/npu/add_noise.py | 22 +++++----- .../perturbed_layers/npu/bit_noise.py | 22 +++++----- .../perturbed_layers/npu/change_value.py | 14 +++---- .../perturbed_layers/npu/improve_precision.py | 14 +++---- .../perturbed_layers/npu/no_change.py | 10 ++--- .../perturbed_layers/npu/npu_base_layser.py | 4 +- .../perturbed_layers/run_cpu.py | 12 +++--- .../result_handlers/base_handler.py | 18 ++++---- .../result_handlers/check_handler.py | 14 +++---- .../result_handlers/fix_handler.py | 10 ++--- .../result_handlers/handler_factory.py | 14 +++---- .../result_handlers/preheat_handler.py | 22 +++++----- .../msprobe/pytorch/functional/dump_module.py | 12 +++--- .../pytorch/hook_module/api_registry.py | 18 ++++---- .../pytorch/hook_module/hook_module.py | 2 +- .../msprobe/pytorch/hook_module/utils.py | 2 +- .../msprobe/pytorch/hook_module/wrap_aten.py | 8 ++-- .../pytorch/hook_module/wrap_distributed.py | 8 ++-- .../pytorch/hook_module/wrap_functional.py | 10 ++--- .../pytorch/hook_module/wrap_npu_custom.py | 8 ++-- .../pytorch/hook_module/wrap_tensor.py | 8 ++-- .../msprobe/pytorch/hook_module/wrap_torch.py | 8 ++-- .../msprobe/pytorch/hook_module/wrap_vf.py | 8 ++-- .../msprobe/pytorch/module_processer.py | 4 +- .../pytorch/online_dispatch/compare.py | 8 ++-- .../pytorch/online_dispatch/dispatch.py | 10 ++--- .../pytorch/online_dispatch/dump_compare.py | 6 +-- .../msprobe/pytorch/online_dispatch/utils.py | 4 +- debug/accuracy_tools/msprobe/pytorch/parse.py | 2 +- .../msprobe/pytorch/parse_tool/cli.py | 4 +- .../msprobe/pytorch/parse_tool/lib/compare.py | 6 +-- .../pytorch/parse_tool/lib/interactive_cli.py | 8 ++-- .../pytorch/parse_tool/lib/parse_exception.py | 2 +- .../pytorch/parse_tool/lib/parse_tool.py | 10 ++--- .../msprobe/pytorch/parse_tool/lib/utils.py | 16 +++---- .../pytorch/parse_tool/lib/visualization.py | 8 ++-- .../msprobe/pytorch/pt_config.py | 6 +-- .../accuracy_tools/msprobe/pytorch/service.py | 24 +++++------ .../msprobe/test/core_ut/common/test_utils.py | 40 +++++++++--------- .../core_ut/data_dump/test_data_collector.py | 16 +++---- .../core_ut/data_dump/test_json_writer.py | 6 +-- .../test/core_ut/data_dump/test_scope.py | 4 +- .../test/core_ut/test_common_config.py | 8 ++-- .../msprobe/test/core_ut/test_file_check.py | 42 +++++++++---------- .../msprobe/test/core_ut/test_log.py | 10 ++--- .../test/mindspore_ut/test_api_kbk_dump.py | 14 +++---- .../test/mindspore_ut/test_debugger_config.py | 6 +-- .../mindspore_ut/test_dump_tool_factory.py | 6 +-- .../mindspore_ut/test_kernel_graph_dump.py | 14 +++---- .../test_kernel_graph_overflow_check.py | 14 +++---- .../test/mindspore_ut/test_ms_config.py | 8 ++-- .../test_overflow_check_tool_factory.py | 6 +-- .../mindspore_ut/test_precision_debugger.py | 10 ++--- .../mindspore_ut/test_task_handler_factory.py | 10 ++--- .../test/pytorch_ut/advisor/test_advisor.py | 4 +- .../common/test_common_utils.py | 4 +- .../common/test_config.py | 2 +- .../compare/test_algorithm.py | 2 +- .../compare/test_api_precision_compare.py | 4 +- .../compare/test_compare.py | 6 +-- .../compare/test_compare_column.py | 2 +- .../compare/test_compare_utils.py | 4 +- .../run_ut/test_data_generate.py | 4 +- .../run_ut/test_multi_run_ut.py | 16 +++---- .../run_ut/test_run_ut.py | 4 +- .../pytorch_ut/compare/test_acc_compare.py | 2 +- .../perturbed_layers/test_perturbed_layser.py | 8 ++-- .../result_handlers/test_result_handler.py | 12 +++--- .../pytorch_ut/free_benchmark/test_main.py | 8 ++-- .../pytorch_ut/functional/test_dump_module.py | 4 +- .../hook_module/test_api_registry.py | 6 +-- .../hook_module/test_hook_module.py | 2 +- .../pytorch_ut/hook_module/test_wrap_aten.py | 2 +- .../hook_module/test_wrap_distributed.py | 2 +- .../hook_module/test_wrap_functional.py | 2 +- .../hook_module/test_wrap_tensor.py | 2 +- .../pytorch_ut/hook_module/test_wrap_torch.py | 2 +- .../pytorch_ut/hook_module/test_wrap_vf.py | 2 +- .../msprobe/test/pytorch_ut/test_pt_config.py | 16 +++---- .../msprobe/test/pytorch_ut/test_service.py | 28 ++++++------- debug/accuracy_tools/msprobe/test/run_ut.py | 2 +- .../msprobe/test/test_module_processer.py | 4 +- 134 files changed, 583 insertions(+), 582 deletions(-) diff --git a/debug/accuracy_tools/msprobe/atat.py b/debug/accuracy_tools/msprobe/atat.py index 90f8215b10..89fa4cf965 100644 --- a/debug/accuracy_tools/msprobe/atat.py +++ b/debug/accuracy_tools/msprobe/atat.py @@ -15,19 +15,19 @@ import argparse import sys -from atat.pytorch.api_accuracy_checker.run_ut.run_ut import _run_ut_parser, run_ut_command -from atat.pytorch.parse_tool.cli import parse as cli_parse -from atat.pytorch.api_accuracy_checker.run_ut.multi_run_ut import prepare_config, run_parallel_ut -from atat.pytorch.api_accuracy_checker.compare.api_precision_compare import _api_precision_compare_parser, \ +from msprobe.pytorch.api_accuracy_checker.run_ut.run_ut import _run_ut_parser, run_ut_command +from msprobe.pytorch.parse_tool.cli import parse as cli_parse +from msprobe.pytorch.api_accuracy_checker.run_ut.multi_run_ut import prepare_config, run_parallel_ut +from msprobe.pytorch.api_accuracy_checker.compare.api_precision_compare import _api_precision_compare_parser, \ _api_precision_compare_command -from atat.pytorch.api_accuracy_checker.run_ut.run_overflow_check import _run_overflow_check_parser, \ +from msprobe.pytorch.api_accuracy_checker.run_ut.run_overflow_check import _run_overflow_check_parser, \ _run_overflow_check_command def main(): parser = argparse.ArgumentParser( formatter_class=argparse.RawDescriptionHelpFormatter, - description="atat(ascend training accuracy tools), [Powered by MindStudio].\n" + description="msprobe(ascend training accuracy tools), [Powered by MindStudio].\n" "Providing one-site accuracy difference debugging toolkit for training on Ascend Devices.\n" f"For any issue, refer README.md first", ) diff --git a/debug/accuracy_tools/msprobe/core/common/file_check.py b/debug/accuracy_tools/msprobe/core/common/file_check.py index 2df825aa35..36896cfbc1 100644 --- a/debug/accuracy_tools/msprobe/core/common/file_check.py +++ b/debug/accuracy_tools/msprobe/core/common/file_check.py @@ -17,9 +17,9 @@ import os import re -from atat.core.common.log import logger -from atat.core.common.exceptions import FileCheckException -from atat.core.common.const import FileCheckConst +from msprobe.core.common.log import logger +from msprobe.core.common.exceptions import FileCheckException +from msprobe.core.common.const import FileCheckConst class FileChecker: diff --git a/debug/accuracy_tools/msprobe/core/common/utils.py b/debug/accuracy_tools/msprobe/core/common/utils.py index 088530f3c5..32aba8d8af 100644 --- a/debug/accuracy_tools/msprobe/core/common/utils.py +++ b/debug/accuracy_tools/msprobe/core/common/utils.py @@ -26,9 +26,9 @@ from datetime import datetime, timezone from pathlib import Path import numpy as np -from atat.core.common.file_check import FileOpen, FileChecker -from atat.core.common.const import Const, FileCheckConst, CompareConst, OverflowConst -from atat.core.common.log import logger +from msprobe.core.common.file_check import FileOpen, FileChecker +from msprobe.core.common.const import Const, FileCheckConst, CompareConst, OverflowConst +from msprobe.core.common.log import logger device = collections.namedtuple('device', ['type', 'index']) diff --git a/debug/accuracy_tools/msprobe/core/common_config.py b/debug/accuracy_tools/msprobe/core/common_config.py index e256372ca8..b7d446ce8e 100644 --- a/debug/accuracy_tools/msprobe/core/common_config.py +++ b/debug/accuracy_tools/msprobe/core/common_config.py @@ -1,6 +1,6 @@ -from atat.core.common.const import Const -from atat.core.common.log import logger -from atat.core.common.exceptions import MsaccException +from msprobe.core.common.const import Const +from msprobe.core.common.log import logger +from msprobe.core.common.exceptions import MsaccException class CommonConfig: diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_collector.py b/debug/accuracy_tools/msprobe/core/data_dump/data_collector.py index f6a9a70b13..800a2b81c2 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/data_collector.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/data_collector.py @@ -1,11 +1,11 @@ import os -from atat.core.data_dump.scope import build_scope, ListScope -from atat.core.data_dump.json_writer import DataWriter -from atat.core.common.log import logger -from atat.core.common.const import Const -from atat.core.data_dump.data_processor.factory import DataProcessorFactory +from msprobe.core.data_dump.scope import build_scope, ListScope +from msprobe.core.data_dump.json_writer import DataWriter +from msprobe.core.common.log import logger +from msprobe.core.common.const import Const +from msprobe.core.data_dump.data_processor.factory import DataProcessorFactory def build_data_collector(config): diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/base.py b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/base.py index ba9bfaab3d..430d13634c 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/base.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/base.py @@ -3,9 +3,9 @@ import inspect from dataclasses import dataclass from typing import Tuple, Dict, Optional, Any import numpy as np -from atat.core.common.log import logger -from atat.core.common.utils import convert_tuple -from atat.core.common.const import Const +from msprobe.core.common.log import logger +from msprobe.core.common.utils import convert_tuple +from msprobe.core.common.const import Const @dataclass diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/factory.py b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/factory.py index bcc771f368..2c536ba577 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/factory.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/factory.py @@ -1,4 +1,4 @@ -from atat.core.common.const import Const +from msprobe.core.common.const import Const class DataProcessorFactory: diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py index 6d84d9e1f4..95be091b21 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py @@ -5,13 +5,13 @@ from typing import List import numpy as np import torch -from atat.core.common.exceptions import MsaccException -from atat.core.common.file_check import path_len_exceeds_limit, change_mode -from atat.core.common.log import logger -from atat.core.common.const import Const, OverflowConst, FileCheckConst -from atat.core.data_dump.data_processor.base import BaseDataProcessor, ModuleBackwardInputsOutputs, \ +from msprobe.core.common.exceptions import MsaccException +from msprobe.core.common.file_check import path_len_exceeds_limit, change_mode +from msprobe.core.common.log import logger +from msprobe.core.common.const import Const, OverflowConst, FileCheckConst +from msprobe.core.data_dump.data_processor.base import BaseDataProcessor, ModuleBackwardInputsOutputs, \ ModuleForwardInputsOutputs, TensorStatInfo -from atat.pytorch.free_benchmark import FreeBenchmarkCheck, UnequalRow +from msprobe.pytorch.free_benchmark import FreeBenchmarkCheck, UnequalRow try: import torch_npu diff --git a/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py b/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py index 23f37b2342..c4b7fc11ec 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py @@ -4,9 +4,9 @@ import fcntl import json from pathlib import Path -from atat.core.common.file_check import change_mode -from atat.core.common.log import logger -from atat.core.common.const import Const, FileCheckConst +from msprobe.core.common.file_check import change_mode +from msprobe.core.common.log import logger +from msprobe.core.common.const import Const, FileCheckConst class DataWriter: diff --git a/debug/accuracy_tools/msprobe/core/data_dump/scope.py b/debug/accuracy_tools/msprobe/core/data_dump/scope.py index e7114f343f..1d74c3e461 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/scope.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/scope.py @@ -1,6 +1,6 @@ from abc import ABC, abstractmethod -from atat.core.common.exceptions import ScopeException -from atat.core.common.const import Const +from msprobe.core.common.exceptions import ScopeException +from msprobe.core.common.const import Const def build_scope(scope_class, scope=None, api_list=None): diff --git a/debug/accuracy_tools/msprobe/mindspore/__init__.py b/debug/accuracy_tools/msprobe/mindspore/__init__.py index bb3f935675..3bf42d1e39 100644 --- a/debug/accuracy_tools/msprobe/mindspore/__init__.py +++ b/debug/accuracy_tools/msprobe/mindspore/__init__.py @@ -1 +1 @@ -from atat.mindspore.debugger.precision_debugger import PrecisionDebugger +from msprobe.mindspore.debugger.precision_debugger import PrecisionDebugger diff --git a/debug/accuracy_tools/msprobe/mindspore/debugger/precision_debugger.py b/debug/accuracy_tools/msprobe/mindspore/debugger/precision_debugger.py index 0099074762..358d0d6f7d 100644 --- a/debug/accuracy_tools/msprobe/mindspore/debugger/precision_debugger.py +++ b/debug/accuracy_tools/msprobe/mindspore/debugger/precision_debugger.py @@ -1,7 +1,7 @@ import os -from atat.mindspore.ms_config import parse_json_config -from atat.mindspore.debugger.debugger_config import DebuggerConfig -from atat.mindspore.task_handler_factory import TaskHandlerFactory +from msprobe.mindspore.ms_config import parse_json_config +from msprobe.mindspore.debugger.debugger_config import DebuggerConfig +from msprobe.mindspore.task_handler_factory import TaskHandlerFactory class PrecisionDebugger: diff --git a/debug/accuracy_tools/msprobe/mindspore/dump/api_kbk_dump.py b/debug/accuracy_tools/msprobe/mindspore/dump/api_kbk_dump.py index a53841189f..5c7af45d79 100644 --- a/debug/accuracy_tools/msprobe/mindspore/dump/api_kbk_dump.py +++ b/debug/accuracy_tools/msprobe/mindspore/dump/api_kbk_dump.py @@ -1,9 +1,9 @@ import os import json -from atat.core.common.utils import make_dump_path_if_not_exists -from atat.mindspore.debugger.debugger_config import DebuggerConfig -from atat.core.common.log import logger -from atat.core.common.file_check import FileOpen +from msprobe.core.common.utils import make_dump_path_if_not_exists +from msprobe.mindspore.debugger.debugger_config import DebuggerConfig +from msprobe.core.common.log import logger +from msprobe.core.common.file_check import FileOpen class ApiKbkDump: diff --git a/debug/accuracy_tools/msprobe/mindspore/dump/dump_tool_factory.py b/debug/accuracy_tools/msprobe/mindspore/dump/dump_tool_factory.py index ab534edc24..2c4579b0e7 100644 --- a/debug/accuracy_tools/msprobe/mindspore/dump/dump_tool_factory.py +++ b/debug/accuracy_tools/msprobe/mindspore/dump/dump_tool_factory.py @@ -1,6 +1,6 @@ -from atat.mindspore.debugger.debugger_config import DebuggerConfig -from atat.mindspore.dump.api_kbk_dump import ApiKbkDump -from atat.mindspore.dump.kernel_graph_dump import KernelGraphDump +from msprobe.mindspore.debugger.debugger_config import DebuggerConfig +from msprobe.mindspore.dump.api_kbk_dump import ApiKbkDump +from msprobe.mindspore.dump.kernel_graph_dump import KernelGraphDump class DumpToolFactory: diff --git a/debug/accuracy_tools/msprobe/mindspore/dump/kernel_graph_dump.py b/debug/accuracy_tools/msprobe/mindspore/dump/kernel_graph_dump.py index 190e6bc4d5..8320ee0906 100644 --- a/debug/accuracy_tools/msprobe/mindspore/dump/kernel_graph_dump.py +++ b/debug/accuracy_tools/msprobe/mindspore/dump/kernel_graph_dump.py @@ -1,9 +1,9 @@ import os import json -from atat.core.common.utils import make_dump_path_if_not_exists -from atat.mindspore.debugger.debugger_config import DebuggerConfig -from atat.core.common.log import logger -from atat.core.common.file_check import FileOpen +from msprobe.core.common.utils import make_dump_path_if_not_exists +from msprobe.mindspore.debugger.debugger_config import DebuggerConfig +from msprobe.core.common.log import logger +from msprobe.core.common.file_check import FileOpen class KernelGraphDump: diff --git a/debug/accuracy_tools/msprobe/mindspore/ms_config.py b/debug/accuracy_tools/msprobe/mindspore/ms_config.py index 02cead32f1..2b390ae9e4 100644 --- a/debug/accuracy_tools/msprobe/mindspore/ms_config.py +++ b/debug/accuracy_tools/msprobe/mindspore/ms_config.py @@ -1,6 +1,6 @@ import json -from atat.core.common_config import CommonConfig, BaseConfig -from atat.core.common.file_check import FileOpen +from msprobe.core.common_config import CommonConfig, BaseConfig +from msprobe.core.common.file_check import FileOpen class TensorConfig(BaseConfig): diff --git a/debug/accuracy_tools/msprobe/mindspore/overflow_check/kernel_graph_overflow_check.py b/debug/accuracy_tools/msprobe/mindspore/overflow_check/kernel_graph_overflow_check.py index 7a677eb3c7..6640608735 100644 --- a/debug/accuracy_tools/msprobe/mindspore/overflow_check/kernel_graph_overflow_check.py +++ b/debug/accuracy_tools/msprobe/mindspore/overflow_check/kernel_graph_overflow_check.py @@ -1,9 +1,9 @@ import os import json -from atat.core.common.utils import make_dump_path_if_not_exists -from atat.mindspore.debugger.debugger_config import DebuggerConfig -from atat.core.common.log import logger -from atat.core.common.file_check import FileOpen +from msprobe.core.common.utils import make_dump_path_if_not_exists +from msprobe.mindspore.debugger.debugger_config import DebuggerConfig +from msprobe.core.common.log import logger +from msprobe.core.common.file_check import FileOpen class KernelGraphOverflowCheck: diff --git a/debug/accuracy_tools/msprobe/mindspore/overflow_check/overflow_check_tool_factory.py b/debug/accuracy_tools/msprobe/mindspore/overflow_check/overflow_check_tool_factory.py index fe53359be1..d809c71421 100644 --- a/debug/accuracy_tools/msprobe/mindspore/overflow_check/overflow_check_tool_factory.py +++ b/debug/accuracy_tools/msprobe/mindspore/overflow_check/overflow_check_tool_factory.py @@ -1,5 +1,5 @@ -from atat.mindspore.debugger.debugger_config import DebuggerConfig -from atat.mindspore.overflow_check.kernel_graph_overflow_check import KernelGraphOverflowCheck +from msprobe.mindspore.debugger.debugger_config import DebuggerConfig +from msprobe.mindspore.overflow_check.kernel_graph_overflow_check import KernelGraphOverflowCheck class OverflowCheckToolFactory: diff --git a/debug/accuracy_tools/msprobe/mindspore/task_handler_factory.py b/debug/accuracy_tools/msprobe/mindspore/task_handler_factory.py index 4f80e4e89c..7b7e6fd889 100644 --- a/debug/accuracy_tools/msprobe/mindspore/task_handler_factory.py +++ b/debug/accuracy_tools/msprobe/mindspore/task_handler_factory.py @@ -1,6 +1,6 @@ -from atat.mindspore.debugger.debugger_config import DebuggerConfig -from atat.mindspore.dump.dump_tool_factory import DumpToolFactory -from atat.mindspore.overflow_check.overflow_check_tool_factory import OverflowCheckToolFactory +from msprobe.mindspore.debugger.debugger_config import DebuggerConfig +from msprobe.mindspore.dump.dump_tool_factory import DumpToolFactory +from msprobe.mindspore.overflow_check.overflow_check_tool_factory import OverflowCheckToolFactory class TaskHandlerFactory: diff --git a/debug/accuracy_tools/msprobe/pytorch/advisor/advisor.py b/debug/accuracy_tools/msprobe/pytorch/advisor/advisor.py index 43b3f40f97..b178664d9e 100644 --- a/debug/accuracy_tools/msprobe/pytorch/advisor/advisor.py +++ b/debug/accuracy_tools/msprobe/pytorch/advisor/advisor.py @@ -17,12 +17,12 @@ import os -from atat.pytorch.advisor.advisor_result import AdvisorResult -from atat.pytorch.advisor.advisor_const import AdvisorConst -from atat.pytorch.common.log import logger -from atat.core.common.utils import CompareException -from atat.core.common.file_check import FileChecker -from atat.core.common.const import Const, CompareConst, FileCheckConst +from msprobe.pytorch.advisor.advisor_result import AdvisorResult +from msprobe.pytorch.advisor.advisor_const import AdvisorConst +from msprobe.pytorch.common.log import logger +from msprobe.core.common.utils import CompareException +from msprobe.core.common.file_check import FileChecker +from msprobe.core.common.const import Const, CompareConst, FileCheckConst class Advisor: """ diff --git a/debug/accuracy_tools/msprobe/pytorch/advisor/advisor_result.py b/debug/accuracy_tools/msprobe/pytorch/advisor/advisor_result.py index a24fa2a115..456f542e1f 100644 --- a/debug/accuracy_tools/msprobe/pytorch/advisor/advisor_result.py +++ b/debug/accuracy_tools/msprobe/pytorch/advisor/advisor_result.py @@ -17,10 +17,10 @@ import os import time -from atat.pytorch.advisor.advisor_const import AdvisorConst -from atat.pytorch.common.log import logger -from atat.core.common.const import Const, FileCheckConst -from atat.core.common.file_check import change_mode +from msprobe.pytorch.advisor.advisor_const import AdvisorConst +from msprobe.pytorch.common.log import logger +from msprobe.core.common.const import Const, FileCheckConst +from msprobe.core.common.file_check import change_mode class AdvisorResult: diff --git a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/common/config.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/common/config.py index f92d6fd16a..8e8ceda947 100644 --- a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/common/config.py +++ b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/common/config.py @@ -1,8 +1,8 @@ import os import yaml -from atat.pytorch.api_accuracy_checker.common.utils import check_file_or_directory_path -from atat.pytorch.hook_module.utils import WrapFunctionalOps, WrapTensorOps, WrapTorchOps -from atat.core.common.file_check import FileOpen +from msprobe.pytorch.api_accuracy_checker.common.utils import check_file_or_directory_path +from msprobe.pytorch.hook_module.utils import WrapFunctionalOps, WrapTensorOps, WrapTorchOps +from msprobe.core.common.file_check import FileOpen WrapApi = set(WrapFunctionalOps) | set(WrapTensorOps) | set(WrapTorchOps) diff --git a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/common/utils.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/common/utils.py index 9e1b02c015..d5d08818a9 100644 --- a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/common/utils.py +++ b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/common/utils.py @@ -28,10 +28,10 @@ except ImportError: else: IS_GPU = False -from atat.pytorch.common.log import logger -from atat.core.common.file_check import FileChecker, FileOpen, change_mode, create_directory -from atat.core.common.const import Const, FileCheckConst -from atat.core.common.utils import CompareException +from msprobe.pytorch.common.log import logger +from msprobe.core.common.file_check import FileChecker, FileOpen, change_mode, create_directory +from msprobe.core.common.const import Const, FileCheckConst +from msprobe.core.common.utils import CompareException class DumpException(CompareException): diff --git a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/compare/algorithm.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/compare/algorithm.py index 3982c167cc..1bb19cc048 100644 --- a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/compare/algorithm.py +++ b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/compare/algorithm.py @@ -2,8 +2,8 @@ import torch import numpy as np -from atat.pytorch.api_accuracy_checker.compare.compare_utils import ULP_PARAMETERS -from atat.core.common.const import CompareConst +from msprobe.pytorch.api_accuracy_checker.compare.compare_utils import ULP_PARAMETERS +from msprobe.core.common.const import CompareConst DEFAULT_THRESHOLD = 1 diff --git a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py index f73c83c488..73bf7c2b8e 100644 --- a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py +++ b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py @@ -7,19 +7,19 @@ from collections import namedtuple import torch import pandas as pd -from atat.pytorch.api_accuracy_checker.common.utils import write_csv -from atat.pytorch.api_accuracy_checker.common.config import msCheckerConfig -from atat.pytorch.api_accuracy_checker.compare.compare_utils import API_PRECISION_COMPARE_RESULT_FILE_NAME, \ +from msprobe.pytorch.api_accuracy_checker.common.utils import write_csv +from msprobe.pytorch.api_accuracy_checker.common.config import msCheckerConfig +from msprobe.pytorch.api_accuracy_checker.compare.compare_utils import API_PRECISION_COMPARE_RESULT_FILE_NAME, \ API_PRECISION_COMPARE_DETAILS_FILE_NAME, BENCHMARK_COMPARE_SUPPORT_LIST, API_PRECISION_COMPARE_UNSUPPORT_LIST, \ ApiPrecisionCompareColumn, AbsoluteStandardApi, BinaryStandardApi, ULPStandardApi, ThousandthStandardApi, \ BINARY_COMPARE_UNSUPPORT_LIST, ULP_COMPARE_SUPPORT_LIST, convert_str_to_float, CompareMessage, is_inf_or_nan, \ check_inf_or_nan -from atat.pytorch.api_accuracy_checker.compare.compare_column import ApiPrecisionOutputColumn -from atat.pytorch.api_accuracy_checker.run_ut.run_ut import get_validated_result_csv_path -from atat.core.common.file_check import FileChecker, change_mode, check_path_before_create, create_directory -from atat.pytorch.common.log import logger -from atat.core.common.utils import CompareException -from atat.core.common.const import CompareConst, FileCheckConst +from msprobe.pytorch.api_accuracy_checker.compare.compare_column import ApiPrecisionOutputColumn +from msprobe.pytorch.api_accuracy_checker.run_ut.run_ut import get_validated_result_csv_path +from msprobe.core.common.file_check import FileChecker, change_mode, check_path_before_create, create_directory +from msprobe.pytorch.common.log import logger +from msprobe.core.common.utils import CompareException +from msprobe.core.common.const import CompareConst, FileCheckConst CompareConfig = namedtuple('CompareConfig', ['npu_csv_path', 'gpu_csv_path', 'result_csv_path', 'details_csv_path']) BenchmarkInf_Nan_Consistency = namedtuple('BenchmarkInf_Nan_Consistency', ['small_value_inf_nan_consistency', diff --git a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/compare/compare.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/compare/compare.py index ca35c8ed5d..ee49588288 100644 --- a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/compare/compare.py +++ b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/compare/compare.py @@ -3,18 +3,18 @@ import os from collections import namedtuple import torch import numpy as np -from atat.pytorch.common.log import logger -from atat.pytorch.api_accuracy_checker.common.utils import get_json_contents, write_csv -from atat.pytorch.api_accuracy_checker.compare.compare_utils import check_dtype_comparable, \ +from msprobe.pytorch.common.log import logger +from msprobe.pytorch.api_accuracy_checker.common.utils import get_json_contents, write_csv +from msprobe.pytorch.api_accuracy_checker.compare.compare_utils import check_dtype_comparable, \ DETAIL_TEST_ROWS, precision_configs, BENCHMARK_COMPARE_SUPPORT_LIST, AbsoluteStandardApi, BinaryStandardApi, \ ULPStandardApi, ThousandthStandardApi, apis_threshold -from atat.pytorch.api_accuracy_checker.compare.compare_column import CompareColumn -from atat.pytorch.api_accuracy_checker.compare.algorithm import get_rmse, get_error_balance, get_max_rel_err, \ +from msprobe.pytorch.api_accuracy_checker.compare.compare_column import CompareColumn +from msprobe.pytorch.api_accuracy_checker.compare.algorithm import get_rmse, get_error_balance, get_max_rel_err, \ get_mean_rel_err, get_rel_err, get_abs_err, get_max_abs_err, get_rel_err_ratio, cosine_sim, get_rel_err_origin, \ get_small_value_err_ratio, get_finite_and_infinite_mask, get_small_value_mask, check_inf_nan_value, \ check_small_value, check_norm_value, get_abs_bench_with_eps, get_ulp_err -from atat.pytorch.api_accuracy_checker.common.config import msCheckerConfig -from atat.core.common.const import Const, CompareConst +from msprobe.pytorch.api_accuracy_checker.common.config import msCheckerConfig +from msprobe.core.common.const import Const, CompareConst ResultInfo = namedtuple('ResultInfo', ['full_api_name', 'fwd_success_status', 'bwd_success_status', diff --git a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/compare/compare_column.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/compare/compare_column.py index 9867a76fad..fb6d5dcc0f 100644 --- a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/compare/compare_column.py +++ b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/compare/compare_column.py @@ -1,4 +1,4 @@ -from atat.core.common.const import CompareConst +from msprobe.core.common.const import CompareConst class CompareColumn: diff --git a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/compare/compare_utils.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/compare/compare_utils.py index b7b32e41e4..5c7e86ff36 100644 --- a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/compare/compare_utils.py +++ b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/compare/compare_utils.py @@ -5,10 +5,10 @@ import math import numpy as np import torch import yaml -from atat.core.common.utils import CompareException -from atat.core.common.const import Const -from atat.pytorch.common.log import logger -from atat.core.common.file_check import FileOpen +from msprobe.core.common.utils import CompareException +from msprobe.core.common.const import Const +from msprobe.pytorch.common.log import logger +from msprobe.core.common.file_check import FileOpen current_time = time.strftime("%Y%m%d%H%M%S") diff --git a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/data_generate.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/data_generate.py index 97b2dcc7e4..f495cd673d 100644 --- a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/data_generate.py +++ b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/data_generate.py @@ -20,10 +20,11 @@ import math import torch import numpy -from atat.pytorch.api_accuracy_checker.run_ut.run_ut_utils import hf_32_standard_api -from atat.pytorch.api_accuracy_checker.common.utils import check_file_or_directory_path, check_object_type, get_full_data_path, CompareException -from atat.pytorch.common.log import logger -from atat.core.common.const import Const +from msprobe.pytorch.api_accuracy_checker.run_ut.run_ut_utils import hf_32_standard_api +from msprobe.pytorch.api_accuracy_checker.common.utils import check_file_or_directory_path, check_object_type, \ + get_full_data_path, CompareException +from msprobe.pytorch.common.log import logger +from msprobe.core.common.const import Const TORCH_TYPE = ["torch.device", "torch.dtype"] TENSOR_DATA_LIST = ["torch.Tensor", "torch.nn.parameter.Parameter"] diff --git a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py index d2ab9c1e95..9c96a52d8b 100644 --- a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +++ b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py @@ -9,14 +9,14 @@ import threading from collections import namedtuple from itertools import cycle from tqdm import tqdm -from atat.pytorch.api_accuracy_checker.run_ut.run_ut import _run_ut_parser, get_validated_result_csv_path, \ +from msprobe.pytorch.api_accuracy_checker.run_ut.run_ut import _run_ut_parser, get_validated_result_csv_path, \ get_validated_details_csv_path, preprocess_forward_content -from atat.pytorch.api_accuracy_checker.compare.compare import Comparator -from atat.pytorch.common import parse_json_info_forward_backward -from atat.core.common.file_check import FileChecker, check_file_suffix, check_link, FileOpen, \ +from msprobe.pytorch.api_accuracy_checker.compare.compare import Comparator +from msprobe.pytorch.common import parse_json_info_forward_backward +from msprobe.core.common.file_check import FileChecker, check_file_suffix, check_link, FileOpen, \ check_path_before_create, create_directory -from atat.pytorch.common.log import logger -from atat.core.common.const import FileCheckConst +from msprobe.pytorch.common.log import logger +from msprobe.core.common.const import FileCheckConst def split_json_file(input_file, num_splits, filter_api): diff --git a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py index c5834e9a8c..e38b4e6b24 100644 --- a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py +++ b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py @@ -10,10 +10,10 @@ else: is_gpu = False import torch from tqdm import tqdm -from atat.pytorch.api_accuracy_checker.run_ut.run_ut import exec_api, generate_device_params, get_api_info -from atat.pytorch.api_accuracy_checker.common.utils import get_json_contents -from atat.core.common.file_check import check_link -from atat.pytorch.common.log import logger +from msprobe.pytorch.api_accuracy_checker.run_ut.run_ut import exec_api, generate_device_params, get_api_info +from msprobe.pytorch.api_accuracy_checker.common.utils import get_json_contents +from msprobe.core.common.file_check import check_link +from msprobe.pytorch.common.log import logger def check_tensor_overflow(x): if isinstance(x, torch.Tensor) and x.numel() != 0 and x.dtype != torch.bool: diff --git a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py index cd83a95801..6295245a26 100644 --- a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +++ b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py @@ -18,21 +18,21 @@ else: import torch from tqdm import tqdm -from atat.pytorch.api_accuracy_checker.run_ut.run_ut_utils import Backward_Message, hf_32_standard_api -from atat.pytorch.api_accuracy_checker.run_ut.data_generate import gen_api_params, gen_args -from atat.pytorch.api_accuracy_checker.common.utils import get_json_contents, api_info_preprocess, \ +from msprobe.pytorch.api_accuracy_checker.run_ut.run_ut_utils import Backward_Message, hf_32_standard_api +from msprobe.pytorch.api_accuracy_checker.run_ut.data_generate import gen_api_params, gen_args +from msprobe.pytorch.api_accuracy_checker.common.utils import get_json_contents, api_info_preprocess, \ initialize_save_path, UtDataProcessor -from atat.pytorch.api_accuracy_checker.compare.compare import Comparator -from atat.pytorch.api_accuracy_checker.compare.compare_column import CompareColumn -from atat.pytorch.hook_module.wrap_tensor import TensorOPTemplate -from atat.pytorch.hook_module.wrap_functional import FunctionalOPTemplate -from atat.pytorch.hook_module.wrap_torch import TorchOPTemplate -from atat.pytorch.api_accuracy_checker.common.config import msCheckerConfig -from atat.pytorch.common.parse_json import parse_json_info_forward_backward -from atat.core.common.file_check import FileOpen, FileChecker, \ +from msprobe.pytorch.api_accuracy_checker.compare.compare import Comparator +from msprobe.pytorch.api_accuracy_checker.compare.compare_column import CompareColumn +from msprobe.pytorch.hook_module.wrap_tensor import TensorOPTemplate +from msprobe.pytorch.hook_module.wrap_functional import FunctionalOPTemplate +from msprobe.pytorch.hook_module.wrap_torch import TorchOPTemplate +from msprobe.pytorch.api_accuracy_checker.common.config import msCheckerConfig +from msprobe.pytorch.common.parse_json import parse_json_info_forward_backward +from msprobe.core.common.file_check import FileOpen, FileChecker, \ change_mode, check_file_suffix, check_link, check_path_before_create, create_directory -from atat.pytorch.common.log import logger -from atat.core.common.const import Const, FileCheckConst, CompareConst +from msprobe.pytorch.common.log import logger +from msprobe.core.common.const import Const, FileCheckConst, CompareConst current_time = time.strftime("%Y%m%d%H%M%S") UT_ERROR_DATA_DIR = 'ut_error_data' + current_time diff --git a/debug/accuracy_tools/msprobe/pytorch/common/log.py b/debug/accuracy_tools/msprobe/pytorch/common/log.py index e496e9b72a..cea518fa47 100644 --- a/debug/accuracy_tools/msprobe/pytorch/common/log.py +++ b/debug/accuracy_tools/msprobe/pytorch/common/log.py @@ -1,9 +1,9 @@ import os import time import sys -from atat.pytorch.common.utils import get_rank_if_initialized -from atat.core.common.log import BaseLogger -from atat.core.common.exceptions import DistributedNotInitializedError +from msprobe.pytorch.common.utils import get_rank_if_initialized +from msprobe.core.common.log import BaseLogger +from msprobe.core.common.exceptions import DistributedNotInitializedError class PyTorchLogger(BaseLogger): diff --git a/debug/accuracy_tools/msprobe/pytorch/common/parse_json.py b/debug/accuracy_tools/msprobe/pytorch/common/parse_json.py index a938f5f0da..22f7987986 100644 --- a/debug/accuracy_tools/msprobe/pytorch/common/parse_json.py +++ b/debug/accuracy_tools/msprobe/pytorch/common/parse_json.py @@ -1,5 +1,5 @@ import json -from atat.core.common.exceptions import ParseJsonException +from msprobe.core.common.exceptions import ParseJsonException def parse_json_info_forward_backward(json_path): diff --git a/debug/accuracy_tools/msprobe/pytorch/common/utils.py b/debug/accuracy_tools/msprobe/pytorch/common/utils.py index 4b413ac575..a3118e21c2 100644 --- a/debug/accuracy_tools/msprobe/pytorch/common/utils.py +++ b/debug/accuracy_tools/msprobe/pytorch/common/utils.py @@ -20,7 +20,7 @@ import stat import torch import numpy as np from functools import wraps -from atat.core.common.exceptions import DistributedNotInitializedError +from msprobe.core.common.exceptions import DistributedNotInitializedError try: import torch_npu diff --git a/debug/accuracy_tools/msprobe/pytorch/compare/acc_compare.py b/debug/accuracy_tools/msprobe/pytorch/compare/acc_compare.py index c7a0111798..d417672317 100644 --- a/debug/accuracy_tools/msprobe/pytorch/compare/acc_compare.py +++ b/debug/accuracy_tools/msprobe/pytorch/compare/acc_compare.py @@ -27,16 +27,16 @@ from openpyxl.styles import PatternFill from collections import namedtuple from dataclasses import dataclass -from atat.pytorch.compare.match import graph_mapping -from atat.pytorch.compare.highlight import HighlightRules, get_header_index -from atat.pytorch.compare.npy_compare import compare_ops_apply, get_error_type, reshape_value, get_relative_err, \ +from msprobe.pytorch.compare.match import graph_mapping +from msprobe.pytorch.compare.highlight import HighlightRules, get_header_index +from msprobe.pytorch.compare.npy_compare import compare_ops_apply, get_error_type, reshape_value, get_relative_err, \ get_error_message -from atat.pytorch.advisor.advisor import Advisor -from atat.pytorch.common.log import logger -from atat.core.common.utils import check_compare_param, add_time_with_xlsx, CompareException, \ +from msprobe.pytorch.advisor.advisor import Advisor +from msprobe.pytorch.common.log import logger +from msprobe.core.common.utils import check_compare_param, add_time_with_xlsx, CompareException, \ format_value, check_file_not_exists, check_configuration_param, task_dumppath_get -from atat.core.common.file_check import FileChecker, change_mode, FileOpen, create_directory -from atat.core.common.const import Const, CompareConst, FileCheckConst +from msprobe.core.common.file_check import FileChecker, change_mode, FileOpen, create_directory +from msprobe.core.common.const import Const, CompareConst, FileCheckConst def check_graph_mode(a_op_name, b_op_name): diff --git a/debug/accuracy_tools/msprobe/pytorch/compare/distributed_compare.py b/debug/accuracy_tools/msprobe/pytorch/compare/distributed_compare.py index b89adc1581..0298eca9e7 100644 --- a/debug/accuracy_tools/msprobe/pytorch/compare/distributed_compare.py +++ b/debug/accuracy_tools/msprobe/pytorch/compare/distributed_compare.py @@ -17,11 +17,11 @@ import os import sys import re -from atat.core.common.utils import CompareException, check_compare_param, \ +from msprobe.core.common.utils import CompareException, check_compare_param, \ check_configuration_param, task_dumppath_get, check_file_or_directory_path, check_regex_prefix_format_valid -from atat.pytorch.compare.acc_compare import compare_core -from atat.core.common.file_check import create_directory -from atat.pytorch.common.log import logger +from msprobe.pytorch.compare.acc_compare import compare_core +from msprobe.core.common.file_check import create_directory +from msprobe.pytorch.common.log import logger def compare_distributed(npu_dump_dir, bench_dump_dir, output_path, **kwargs): diff --git a/debug/accuracy_tools/msprobe/pytorch/compare/highlight.py b/debug/accuracy_tools/msprobe/pytorch/compare/highlight.py index 3a6898dedb..82f0022f8b 100644 --- a/debug/accuracy_tools/msprobe/pytorch/compare/highlight.py +++ b/debug/accuracy_tools/msprobe/pytorch/compare/highlight.py @@ -1,8 +1,8 @@ import math import abc import numpy as np -from atat.core.common.utils import get_header_index -from atat.core.common.const import CompareConst +from msprobe.core.common.utils import get_header_index +from msprobe.core.common.const import CompareConst class HighlightCheck(abc.ABC): diff --git a/debug/accuracy_tools/msprobe/pytorch/compare/match.py b/debug/accuracy_tools/msprobe/pytorch/compare/match.py index 148fbb7d64..6347d8887c 100644 --- a/debug/accuracy_tools/msprobe/pytorch/compare/match.py +++ b/debug/accuracy_tools/msprobe/pytorch/compare/match.py @@ -1,7 +1,7 @@ import os import yaml -from atat.core.common.file_check import FileOpen -from atat.core.common.utils import CompareException +from msprobe.core.common.file_check import FileOpen +from msprobe.core.common.utils import CompareException class AtenIrMapping(): diff --git a/debug/accuracy_tools/msprobe/pytorch/compare/npy_compare.py b/debug/accuracy_tools/msprobe/pytorch/compare/npy_compare.py index 0cf4c6c00a..5a0feb4cd4 100644 --- a/debug/accuracy_tools/msprobe/pytorch/compare/npy_compare.py +++ b/debug/accuracy_tools/msprobe/pytorch/compare/npy_compare.py @@ -1,8 +1,8 @@ import abc import numpy as np -from atat.core.common.utils import format_value -from atat.core.common.const import Const, CompareConst -from atat.pytorch.common.log import logger +from msprobe.core.common.utils import format_value +from msprobe.core.common.const import Const, CompareConst +from msprobe.pytorch.common.log import logger def handle_inf_nan(n_value, b_value): diff --git a/debug/accuracy_tools/msprobe/pytorch/debugger/debugger_config.py b/debug/accuracy_tools/msprobe/pytorch/debugger/debugger_config.py index bc544368b4..cfc588e1e9 100644 --- a/debug/accuracy_tools/msprobe/pytorch/debugger/debugger_config.py +++ b/debug/accuracy_tools/msprobe/pytorch/debugger/debugger_config.py @@ -1,6 +1,6 @@ -from atat.pytorch.common import seed_all -from atat.pytorch.common.log import logger -from atat.core.common.const import Const +from msprobe.pytorch.common import seed_all +from msprobe.pytorch.common.log import logger +from msprobe.core.common.const import Const class DebuggerConfig: diff --git a/debug/accuracy_tools/msprobe/pytorch/debugger/precision_debugger.py b/debug/accuracy_tools/msprobe/pytorch/debugger/precision_debugger.py index d84d535b29..1fce5a3035 100644 --- a/debug/accuracy_tools/msprobe/pytorch/debugger/precision_debugger.py +++ b/debug/accuracy_tools/msprobe/pytorch/debugger/precision_debugger.py @@ -1,10 +1,10 @@ import torch from torch.utils.data import dataloader -from atat.pytorch.debugger.debugger_config import DebuggerConfig -from atat.pytorch.service import Service -from atat.pytorch.common.log import logger -from atat.pytorch.pt_config import parse_json_config -from atat.core.common.exceptions import MsaccException +from msprobe.pytorch.debugger.debugger_config import DebuggerConfig +from msprobe.pytorch.service import Service +from msprobe.pytorch.common.log import logger +from msprobe.pytorch.pt_config import parse_json_config +from msprobe.core.common.exceptions import MsaccException class PrecisionDebugger: diff --git a/debug/accuracy_tools/msprobe/pytorch/free_benchmark/__init__.py b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/__init__.py index b9d41330a8..d234898c0d 100644 --- a/debug/accuracy_tools/msprobe/pytorch/free_benchmark/__init__.py +++ b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/__init__.py @@ -1,6 +1,6 @@ -from atat.core.common.log import logger -from atat.core.common.exceptions import FreeBenchmarkException -from atat.core.common.const import Const +from msprobe.core.common.log import logger +from msprobe.core.common.exceptions import FreeBenchmarkException +from msprobe.core.common.const import Const from .main import FreeBenchmarkCheck from .common.params import UnequalRow diff --git a/debug/accuracy_tools/msprobe/pytorch/free_benchmark/common/constant.py b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/common/constant.py index 36b7a64915..e737e7b217 100644 --- a/debug/accuracy_tools/msprobe/pytorch/free_benchmark/common/constant.py +++ b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/common/constant.py @@ -2,8 +2,8 @@ from typing import Dict import numpy as np import torch -from atat.pytorch.free_benchmark.common.enums import FuzzThreshold -from atat.pytorch.free_benchmark.common.params import BenchmarkThd +from msprobe.pytorch.free_benchmark.common.enums import FuzzThreshold +from msprobe.pytorch.free_benchmark.common.params import BenchmarkThd class CommonField: diff --git a/debug/accuracy_tools/msprobe/pytorch/free_benchmark/common/counter.py b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/common/counter.py index 186b75c71a..b2f8c81f3a 100644 --- a/debug/accuracy_tools/msprobe/pytorch/free_benchmark/common/counter.py +++ b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/common/counter.py @@ -1,5 +1,5 @@ from collections import defaultdict -from atat.pytorch.free_benchmark.common.constant import ThresholdConfig +from msprobe.pytorch.free_benchmark.common.constant import ThresholdConfig class PreheatCounter: diff --git a/debug/accuracy_tools/msprobe/pytorch/free_benchmark/common/params.py b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/common/params.py index 440348d78c..bbfc245a63 100644 --- a/debug/accuracy_tools/msprobe/pytorch/free_benchmark/common/params.py +++ b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/common/params.py @@ -2,13 +2,13 @@ from dataclasses import dataclass from typing import Any, Callable, Dict, List, Optional, Tuple import torch -from atat.pytorch.free_benchmark import logger -from atat.pytorch.free_benchmark.common.enums import ( +from msprobe.pytorch.free_benchmark import logger +from msprobe.pytorch.free_benchmark.common.enums import ( DeviceType, FuzzLevel, PerturbationMode, ) -from atat.pytorch.free_benchmark.common.utils import Tools +from msprobe.pytorch.free_benchmark.common.utils import Tools @dataclass @@ -78,7 +78,7 @@ def data_pre_deal(name, func, args, kwargs): data_params.valid_input_index = index if index == -1: logger.warning_on_rank_0( - f"[atat] Free benchmark: 无标杆工具不支持当前算子的输入类型 {name}." + f"[msprobe] Free benchmark: 无标杆工具不支持当前算子的输入类型 {name}." ) return data_params diff --git a/debug/accuracy_tools/msprobe/pytorch/free_benchmark/common/utils.py b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/common/utils.py index 24d2596763..ddcbd9d0f5 100644 --- a/debug/accuracy_tools/msprobe/pytorch/free_benchmark/common/utils.py +++ b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/common/utils.py @@ -1,5 +1,5 @@ import torch -from atat.pytorch.free_benchmark.common.enums import DeviceType +from msprobe.pytorch.free_benchmark.common.enums import DeviceType class Tools: diff --git a/debug/accuracy_tools/msprobe/pytorch/free_benchmark/compare/grad_saver.py b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/compare/grad_saver.py index 89ef9e4c9b..6781a1c2fc 100644 --- a/debug/accuracy_tools/msprobe/pytorch/free_benchmark/compare/grad_saver.py +++ b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/compare/grad_saver.py @@ -1,10 +1,10 @@ import torch -from atat.core.common.exceptions import FreeBenchmarkException -from atat.pytorch.free_benchmark import logger -from atat.pytorch.free_benchmark.common.constant import CommonField -from atat.pytorch.free_benchmark.common.params import DataParams, HandlerParams -from atat.pytorch.free_benchmark.perturbed_layers.layer_factory import LayerFactory -from atat.pytorch.free_benchmark.result_handlers.handler_factory import ( +from msprobe.core.common.exceptions import FreeBenchmarkException +from msprobe.pytorch.free_benchmark import logger +from msprobe.pytorch.free_benchmark.common.constant import CommonField +from msprobe.pytorch.free_benchmark.common.params import DataParams, HandlerParams +from msprobe.pytorch.free_benchmark.perturbed_layers.layer_factory import LayerFactory +from msprobe.pytorch.free_benchmark.result_handlers.handler_factory import ( FuzzHandlerFactory, ) @@ -41,18 +41,18 @@ class GradSaver: data_processor.update_unequal_rows(handler.get_unequal_rows()) except IndexError: logger.warning_on_rank_0( - f"[atat] Free benchmark: grad index out of range. api:{self.handler_params.api_name}." + f"[msprobe] Free benchmark: grad index out of range. api:{self.handler_params.api_name}." f"index:{new_grad_index}, perturbation grad len {len(self.perturbed_grad_input)}" ) return grad except FreeBenchmarkException as e: logger.warning_on_rank_0( - f"[atat] Free benchmark: grad input check error: {e}" + f"[msprobe] Free benchmark: grad input check error: {e}" ) return grad except Exception as e: logger.warning_on_rank_0( - f"[atat] Free benchmark: grad compare error: {e}" + f"[msprobe] Free benchmark: grad compare error: {e}" ) return grad return grad @@ -77,7 +77,7 @@ class GradSaver: handler.handle(self.data_params) except Exception as e: logger.warning_on_rank_0( - f"[atat] Free benchmark: compare two vjp failed: api:{self.handler_params.api_name}." + f"[msprobe] Free benchmark: compare two vjp failed: api:{self.handler_params.api_name}." f"{e}" ) # 在扰动前后输出对比后释放输出的引用 diff --git a/debug/accuracy_tools/msprobe/pytorch/free_benchmark/compare/single_benchmark.py b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/compare/single_benchmark.py index e4fddba987..59239fcd00 100644 --- a/debug/accuracy_tools/msprobe/pytorch/free_benchmark/compare/single_benchmark.py +++ b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/compare/single_benchmark.py @@ -1,9 +1,9 @@ import math import torch -from atat.pytorch.free_benchmark import logger -from atat.pytorch.free_benchmark.common.constant import ThresholdConfig -from atat.pytorch.free_benchmark.common.utils import TorchC +from msprobe.pytorch.free_benchmark import logger +from msprobe.pytorch.free_benchmark.common.constant import ThresholdConfig +from msprobe.pytorch.free_benchmark.common.utils import TorchC class SingleCompare: @@ -69,7 +69,7 @@ class SingleCompare: actual.dtype, ThresholdConfig.BENCHMARK_THD_DICT.get(torch.float32) ) if self.filter_overflow(golden) > 0: - logger.warning_on_rank_0("[atat] Free Benchmark: inf and nan" + logger.warning_on_rank_0("[msprobe] Free Benchmark: inf and nan" "in golden tensor is not supported.") return True actual = self.replace_inf_or_nan(actual) diff --git a/debug/accuracy_tools/msprobe/pytorch/free_benchmark/main.py b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/main.py index 2ebc0a6db9..971776d132 100644 --- a/debug/accuracy_tools/msprobe/pytorch/free_benchmark/main.py +++ b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/main.py @@ -1,19 +1,19 @@ from abc import ABC import torch -from atat.core.common.const import Const -from atat.pytorch.free_benchmark import logger -from atat.pytorch.free_benchmark.common.constant import CommonField -from atat.pytorch.free_benchmark.common.enums import ( +from msprobe.core.common.const import Const +from msprobe.pytorch.free_benchmark import logger +from msprobe.pytorch.free_benchmark.common.constant import CommonField +from msprobe.pytorch.free_benchmark.common.enums import ( DeviceType, FuzzLevel, HandlerType, PerturbationMode, ) -from atat.pytorch.free_benchmark.common.params import data_pre_deal, make_handler_params -from atat.pytorch.free_benchmark.compare.grad_saver import GradSaver -from atat.pytorch.free_benchmark.perturbed_layers.layer_factory import LayerFactory -from atat.pytorch.free_benchmark.result_handlers.handler_factory import ( +from msprobe.pytorch.free_benchmark.common.params import data_pre_deal, make_handler_params +from msprobe.pytorch.free_benchmark.compare.grad_saver import GradSaver +from msprobe.pytorch.free_benchmark.perturbed_layers.layer_factory import LayerFactory +from msprobe.pytorch.free_benchmark.result_handlers.handler_factory import ( FuzzHandlerFactory, ) @@ -81,7 +81,7 @@ class FreeBenchmarkCheck(ABC): grad_saver = getattr(module, CommonField.GRADSAVER) except AttributeError: logger.warning_on_rank_0( - f"[atat] Free benchmark: get grad saver failed. api_name:{name}" + f"[msprobe] Free benchmark: get grad saver failed. api_name:{name}" ) return @@ -97,6 +97,6 @@ class FreeBenchmarkCheck(ABC): ) except Exception as e: logger.warning_on_rank_0( - f"[atat] Free benchmark: grad vjp calculate failed. api_name:{name} error: {e}" + f"[msprobe] Free benchmark: grad vjp calculate failed. api_name:{name} error: {e}" ) return diff --git a/debug/accuracy_tools/msprobe/pytorch/free_benchmark/perturbed_layers/base_layer.py b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/perturbed_layers/base_layer.py index aa572fd8e8..f64a201d5e 100644 --- a/debug/accuracy_tools/msprobe/pytorch/free_benchmark/perturbed_layers/base_layer.py +++ b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/perturbed_layers/base_layer.py @@ -1,7 +1,7 @@ from abc import ABC, abstractmethod from typing import Any -from atat.pytorch.free_benchmark.common.params import DataParams +from msprobe.pytorch.free_benchmark.common.params import DataParams class BaseLayer(ABC): diff --git a/debug/accuracy_tools/msprobe/pytorch/free_benchmark/perturbed_layers/layer_factory.py b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/perturbed_layers/layer_factory.py index 0d09438ce0..0ea9107aa8 100644 --- a/debug/accuracy_tools/msprobe/pytorch/free_benchmark/perturbed_layers/layer_factory.py +++ b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/perturbed_layers/layer_factory.py @@ -1,15 +1,15 @@ -from atat.pytorch.free_benchmark import FreeBenchmarkException -from atat.pytorch.free_benchmark.common.enums import DeviceType, PerturbationMode -from atat.pytorch.free_benchmark.perturbed_layers.npu.improve_precision import ( +from msprobe.pytorch.free_benchmark import FreeBenchmarkException +from msprobe.pytorch.free_benchmark.common.enums import DeviceType, PerturbationMode +from msprobe.pytorch.free_benchmark.perturbed_layers.npu.improve_precision import ( ImprovePrecisionLayer, ) -from atat.pytorch.free_benchmark.perturbed_layers.npu.add_noise import AddNoiseLayer -from atat.pytorch.free_benchmark.perturbed_layers.npu.bit_noise import BitNoiseLayer -from atat.pytorch.free_benchmark.perturbed_layers.npu.no_change import NoChangeLayer -from atat.pytorch.free_benchmark.perturbed_layers.npu.change_value import ( +from msprobe.pytorch.free_benchmark.perturbed_layers.npu.add_noise import AddNoiseLayer +from msprobe.pytorch.free_benchmark.perturbed_layers.npu.bit_noise import BitNoiseLayer +from msprobe.pytorch.free_benchmark.perturbed_layers.npu.no_change import NoChangeLayer +from msprobe.pytorch.free_benchmark.perturbed_layers.npu.change_value import ( ChangeValueLayer, ) -from atat.pytorch.free_benchmark.perturbed_layers.run_cpu import CpuLayer +from msprobe.pytorch.free_benchmark.perturbed_layers.run_cpu import CpuLayer class LayerFactory: diff --git a/debug/accuracy_tools/msprobe/pytorch/free_benchmark/perturbed_layers/npu/add_noise.py b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/perturbed_layers/npu/add_noise.py index af8a93f7d4..a18ef1c51b 100644 --- a/debug/accuracy_tools/msprobe/pytorch/free_benchmark/perturbed_layers/npu/add_noise.py +++ b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/perturbed_layers/npu/add_noise.py @@ -1,10 +1,10 @@ import torch -from atat.pytorch.free_benchmark import logger -from atat.pytorch.free_benchmark.common.constant import ThresholdConfig -from atat.pytorch.free_benchmark.common.enums import PerturbationMode -from atat.pytorch.free_benchmark.common.params import DataParams -from atat.pytorch.free_benchmark.common.utils import TorchC -from atat.pytorch.free_benchmark.perturbed_layers.npu.npu_base_layser import ( +from msprobe.pytorch.free_benchmark import logger +from msprobe.pytorch.free_benchmark.common.constant import ThresholdConfig +from msprobe.pytorch.free_benchmark.common.enums import PerturbationMode +from msprobe.pytorch.free_benchmark.common.params import DataParams +from msprobe.pytorch.free_benchmark.common.utils import TorchC +from msprobe.pytorch.free_benchmark.perturbed_layers.npu.npu_base_layser import ( NpuBaseLayer, ) @@ -37,7 +37,7 @@ class AddNoiseLayer(NpuBaseLayer): 对输入添加扰动并返回 """ logger.info_on_rank_0( - f"[atat] Free benchmark: Perturbation is " + f"[msprobe] Free benchmark: Perturbation is " f"{PerturbationMode.ADD_NOISE} of {self.api_name}." ) params.perturbed_value = self.add_noise(params.args[params.valid_input_index]) @@ -60,13 +60,13 @@ class AddNoiseLayer(NpuBaseLayer): """ if not self.perturbed_value: logger.warning_on_rank_0( - f"[atat] Free Benchmark: For {self.api_name}, " + f"[msprobe] Free Benchmark: For {self.api_name}, " f"dtype unsupported. Cancel perturbation." ) return False if tensor_obj.numel() == 0: logger.warning_on_rank_0( - f"[atat] Free benchmark: For {self.api_name}, tensor shape must > 0." + f"[msprobe] Free benchmark: For {self.api_name}, tensor shape must > 0." f" Cancel adding noise." ) return False @@ -77,13 +77,13 @@ class AddNoiseLayer(NpuBaseLayer): max_val = TorchC.max(TorchC.abs(tensor_obj)).item() except Exception: logger.warning_on_rank_0( - f"[atat] Free Benchmark: For {self.api_name}, " + f"[msprobe] Free Benchmark: For {self.api_name}, " f"when calculate maximun value, tensor is changed to float32." ) max_val = TorchC.max(TorchC.abs(tensor_obj.to(torch.float32))).item() if max_val < abs_tol: logger.warning_on_rank_0( - f"[atat] Free Benchmark: For {self.api_name}, " + f"[msprobe] Free Benchmark: For {self.api_name}, " f"Maximun value is less than the minimun threshold. Cancel add noise." ) return False diff --git a/debug/accuracy_tools/msprobe/pytorch/free_benchmark/perturbed_layers/npu/bit_noise.py b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/perturbed_layers/npu/bit_noise.py index 40b99acf41..45dea7b93a 100644 --- a/debug/accuracy_tools/msprobe/pytorch/free_benchmark/perturbed_layers/npu/bit_noise.py +++ b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/perturbed_layers/npu/bit_noise.py @@ -1,10 +1,10 @@ import torch -from atat.pytorch.free_benchmark import logger -from atat.pytorch.free_benchmark.common.constant import ThresholdConfig -from atat.pytorch.free_benchmark.common.enums import PerturbationMode -from atat.pytorch.free_benchmark.common.params import DataParams -from atat.pytorch.free_benchmark.common.utils import TorchC -from atat.pytorch.free_benchmark.perturbed_layers.npu.npu_base_layser import ( +from msprobe.pytorch.free_benchmark import logger +from msprobe.pytorch.free_benchmark.common.constant import ThresholdConfig +from msprobe.pytorch.free_benchmark.common.enums import PerturbationMode +from msprobe.pytorch.free_benchmark.common.params import DataParams +from msprobe.pytorch.free_benchmark.common.utils import TorchC +from msprobe.pytorch.free_benchmark.perturbed_layers.npu.npu_base_layser import ( NpuBaseLayer, ) @@ -53,7 +53,7 @@ class BitNoiseLayer(NpuBaseLayer): 对输入添加扰动并返回 """ logger.info_on_rank_0( - f"[atat] Free benchmark: Perturbation is " + f"[msprobe] Free benchmark: Perturbation is " f"{PerturbationMode.BIT_NOISE} of {self.api_name}." ) params.perturbed_value = self.add_bit_noise(params.args[params.valid_input_index]) @@ -65,13 +65,13 @@ class BitNoiseLayer(NpuBaseLayer): """ if not self.bit_type: logger.info_on_rank_0( - f"[atat] Free Benchmark: For {self.api_name}, " + f"[msprobe] Free Benchmark: For {self.api_name}, " f"dtype unsupported. Cancel perturbation." ) return False if tensor_obj.numel() == 0: logger.warning_on_rank_0( - f"[atat] Free benchmark: For {self.api_name}, tensor shape must > 0" + f"[msprobe] Free benchmark: For {self.api_name}, tensor shape must > 0" f" Cancel adding noise." ) return False @@ -82,13 +82,13 @@ class BitNoiseLayer(NpuBaseLayer): max_val = TorchC.max(TorchC.abs(tensor_obj)).item() except Exception: logger.warning_on_rank_0( - f"[atat] Free Benchmark: For {self.api_name}, " + f"[msprobe] Free Benchmark: For {self.api_name}, " f"when calculate maximun value, tensor is changed to float32." ) max_val = TorchC.max(TorchC.abs(tensor_obj.to(torch.float32))).item() if max_val < abs_tol: logger.info_on_rank_0( - f"[atat] Free Benchmark: For {self.api_name}, " + f"[msprobe] Free Benchmark: For {self.api_name}, " f"Maximun value is less than the minimun threshold. Cancel add noise." ) return False diff --git a/debug/accuracy_tools/msprobe/pytorch/free_benchmark/perturbed_layers/npu/change_value.py b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/perturbed_layers/npu/change_value.py index b7a967e18b..91085d57a6 100644 --- a/debug/accuracy_tools/msprobe/pytorch/free_benchmark/perturbed_layers/npu/change_value.py +++ b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/perturbed_layers/npu/change_value.py @@ -1,9 +1,9 @@ import torch -from atat.pytorch.free_benchmark import logger -from atat.pytorch.free_benchmark.common.enums import PerturbationMode -from atat.pytorch.free_benchmark.common.params import DataParams -from atat.pytorch.free_benchmark.common.utils import TorchC -from atat.pytorch.free_benchmark.perturbed_layers.npu.npu_base_layser import ( +from msprobe.pytorch.free_benchmark import logger +from msprobe.pytorch.free_benchmark.common.enums import PerturbationMode +from msprobe.pytorch.free_benchmark.common.params import DataParams +from msprobe.pytorch.free_benchmark.common.utils import TorchC +from msprobe.pytorch.free_benchmark.perturbed_layers.npu.npu_base_layser import ( NpuBaseLayer, ) @@ -44,7 +44,7 @@ class ChangeValueLayer(NpuBaseLayer): 对输入添加扰动并返回 """ logger.info_on_rank_0( - f"[atat] Free benchmark: Perturbation is " + f"[msprobe] Free benchmark: Perturbation is " f"{PerturbationMode.CHANGE_VALUE} of {self.api_name}." ) params.perturbed_value = self.change_value(params.args[params.valid_input_index]) @@ -56,7 +56,7 @@ class ChangeValueLayer(NpuBaseLayer): """ if tensor_obj.size(0) < 2: logger.info_on_rank_0( - f"[atat] Free Benchmark: For {self.api_name}, " + f"[msprobe] Free Benchmark: For {self.api_name}, " f"size 0 must greater than 1. Cancel change value." ) return False diff --git a/debug/accuracy_tools/msprobe/pytorch/free_benchmark/perturbed_layers/npu/improve_precision.py b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/perturbed_layers/npu/improve_precision.py index 03718e3c4d..ad6d8b8989 100644 --- a/debug/accuracy_tools/msprobe/pytorch/free_benchmark/perturbed_layers/npu/improve_precision.py +++ b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/perturbed_layers/npu/improve_precision.py @@ -1,10 +1,10 @@ import torch -from atat.core.common.const import Const -from atat.pytorch.free_benchmark import logger -from atat.pytorch.free_benchmark.common.constant import CommonField -from atat.pytorch.free_benchmark.common.enums import PerturbationMode -from atat.pytorch.free_benchmark.common.params import DataParams -from atat.pytorch.free_benchmark.perturbed_layers.npu.npu_base_layser import ( +from msprobe.core.common.const import Const +from msprobe.pytorch.free_benchmark import logger +from msprobe.pytorch.free_benchmark.common.constant import CommonField +from msprobe.pytorch.free_benchmark.common.enums import PerturbationMode +from msprobe.pytorch.free_benchmark.common.params import DataParams +from msprobe.pytorch.free_benchmark.perturbed_layers.npu.npu_base_layser import ( NpuBaseLayer, ) @@ -34,7 +34,7 @@ class ImprovePrecisionLayer(NpuBaseLayer): def handle(self, params: DataParams) -> torch.Any: logger.info_on_rank_0( - f"[atat] Free benchmark: Perturbation is " + f"[msprobe] Free benchmark: Perturbation is " f"{PerturbationMode.IMPROVE_PRECISION} of {self.api_name}." ) new_args = self.improve_tensor_precision(params.args) diff --git a/debug/accuracy_tools/msprobe/pytorch/free_benchmark/perturbed_layers/npu/no_change.py b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/perturbed_layers/npu/no_change.py index bb065385c6..a69c56002a 100644 --- a/debug/accuracy_tools/msprobe/pytorch/free_benchmark/perturbed_layers/npu/no_change.py +++ b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/perturbed_layers/npu/no_change.py @@ -1,8 +1,8 @@ import torch -from atat.pytorch.free_benchmark import logger -from atat.pytorch.free_benchmark.common.enums import PerturbationMode -from atat.pytorch.free_benchmark.common.params import DataParams -from atat.pytorch.free_benchmark.perturbed_layers.npu.npu_base_layser import ( +from msprobe.pytorch.free_benchmark import logger +from msprobe.pytorch.free_benchmark.common.enums import PerturbationMode +from msprobe.pytorch.free_benchmark.common.params import DataParams +from msprobe.pytorch.free_benchmark.perturbed_layers.npu.npu_base_layser import ( NpuBaseLayer, ) @@ -21,7 +21,7 @@ class NoChangeLayer(NpuBaseLayer): 对输入添加扰动并返回 """ logger.info_on_rank_0( - f"[atat] Free benchmark: Perturbation is " + f"[msprobe] Free benchmark: Perturbation is " f"{PerturbationMode.NO_CHANGE} of {self.api_name}." ) params.perturbed_value = self.no_change(params.args[params.valid_input_index]) diff --git a/debug/accuracy_tools/msprobe/pytorch/free_benchmark/perturbed_layers/npu/npu_base_layser.py b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/perturbed_layers/npu/npu_base_layser.py index 3784af0953..1a85948147 100644 --- a/debug/accuracy_tools/msprobe/pytorch/free_benchmark/perturbed_layers/npu/npu_base_layser.py +++ b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/perturbed_layers/npu/npu_base_layser.py @@ -2,8 +2,8 @@ from abc import abstractmethod from typing import Any import torch -from atat.pytorch.free_benchmark.common.params import DataParams -from atat.pytorch.free_benchmark.perturbed_layers.base_layer import BaseLayer +from msprobe.pytorch.free_benchmark.common.params import DataParams +from msprobe.pytorch.free_benchmark.perturbed_layers.base_layer import BaseLayer class NpuBaseLayer(BaseLayer): diff --git a/debug/accuracy_tools/msprobe/pytorch/free_benchmark/perturbed_layers/run_cpu.py b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/perturbed_layers/run_cpu.py index 024958ffbe..d34ac97653 100644 --- a/debug/accuracy_tools/msprobe/pytorch/free_benchmark/perturbed_layers/run_cpu.py +++ b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/perturbed_layers/run_cpu.py @@ -1,9 +1,9 @@ import torch -from atat.pytorch.free_benchmark import logger -from atat.pytorch.free_benchmark.common.params import DataParams -from atat.pytorch.free_benchmark.common.utils import Tools -from atat.pytorch.free_benchmark.common.enums import DeviceType -from atat.pytorch.free_benchmark.perturbed_layers.base_layer import BaseLayer +from msprobe.pytorch.free_benchmark import logger +from msprobe.pytorch.free_benchmark.common.params import DataParams +from msprobe.pytorch.free_benchmark.common.utils import Tools +from msprobe.pytorch.free_benchmark.common.enums import DeviceType +from msprobe.pytorch.free_benchmark.perturbed_layers.base_layer import BaseLayer class CpuLayer(BaseLayer): @@ -11,7 +11,7 @@ class CpuLayer(BaseLayer): def handle(self, params: DataParams) -> torch.Any: logger.info_on_rank_0( - f"[atat] Free benchmark: Perturbation is to_cpu of {self.api_name}." + f"[msprobe] Free benchmark: Perturbation is to_cpu of {self.api_name}." ) new_args = Tools.convert_device_and_dtype(params.args, DeviceType.CPU, change_dtype=True) new_kwargs = Tools.convert_device_and_dtype(params.kwargs, DeviceType.CPU, change_dtype=True) diff --git a/debug/accuracy_tools/msprobe/pytorch/free_benchmark/result_handlers/base_handler.py b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/result_handlers/base_handler.py index c57d7e390a..1728b096f5 100644 --- a/debug/accuracy_tools/msprobe/pytorch/free_benchmark/result_handlers/base_handler.py +++ b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/result_handlers/base_handler.py @@ -3,20 +3,20 @@ from abc import ABC, abstractmethod from typing import Any, Optional, Tuple import torch -from atat.core.common.const import Const -from atat.pytorch.free_benchmark import logger -from atat.pytorch.free_benchmark.common.constant import ThresholdConfig -from atat.pytorch.free_benchmark.common.enums import ( +from msprobe.core.common.const import Const +from msprobe.pytorch.free_benchmark import logger +from msprobe.pytorch.free_benchmark.common.constant import ThresholdConfig +from msprobe.pytorch.free_benchmark.common.enums import ( FuzzThreshold, NormType, PerturbationMode, ) -from atat.pytorch.free_benchmark.common.params import ( +from msprobe.pytorch.free_benchmark.common.params import ( DataParams, HandlerParams, make_unequal_row, ) -from atat.pytorch.free_benchmark.common.utils import Tools, TorchC +from msprobe.pytorch.free_benchmark.common.utils import Tools, TorchC class FuzzHandler(ABC): @@ -104,7 +104,7 @@ class FuzzHandler(ABC): ) except Exception as e: logger.warning_on_rank_0( - f"[atat] Free Benchmark: For {self.params.api_name}, " + f"[msprobe] Free Benchmark: For {self.params.api_name}, " f"when computing ratio," f" y1 or y2 dtype is not supported {e}" ) @@ -133,7 +133,7 @@ class FuzzHandler(ABC): ) elif not isinstance(perturbed_output, torch.Tensor): logger.warning_on_rank_0( - f"[atat] Free Benchmark: For {self.params.api_name} " + f"[msprobe] Free Benchmark: For {self.params.api_name} " f"The compare for output type {type(perturbed_output)} is not supported" ) @@ -185,7 +185,7 @@ class FuzzHandler(ABC): ) except Exception as e: logger.warning_on_rank_0( - f"[atat] Free Benchmark: For {self.params.api_name}, " + f"[msprobe] Free Benchmark: For {self.params.api_name}, " f"when campare the result exception raise {e}" ) return npu_consistent, max_fuzz_ratio diff --git a/debug/accuracy_tools/msprobe/pytorch/free_benchmark/result_handlers/check_handler.py b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/result_handlers/check_handler.py index ed846803a1..c16284eb07 100644 --- a/debug/accuracy_tools/msprobe/pytorch/free_benchmark/result_handlers/check_handler.py +++ b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/result_handlers/check_handler.py @@ -1,11 +1,11 @@ from typing import Any -from atat.pytorch.free_benchmark import logger -from atat.pytorch.free_benchmark.common.enums import DeviceType -from atat.pytorch.free_benchmark.common.params import DataParams, make_unequal_row -from atat.pytorch.free_benchmark.common.utils import Tools -from atat.pytorch.free_benchmark.compare.single_benchmark import SingleCompare -from atat.pytorch.free_benchmark.result_handlers.base_handler import FuzzHandler +from msprobe.pytorch.free_benchmark import logger +from msprobe.pytorch.free_benchmark.common.enums import DeviceType +from msprobe.pytorch.free_benchmark.common.params import DataParams, make_unequal_row +from msprobe.pytorch.free_benchmark.common.utils import Tools +from msprobe.pytorch.free_benchmark.compare.single_benchmark import SingleCompare +from msprobe.pytorch.free_benchmark.result_handlers.base_handler import FuzzHandler class CheckerHandler(FuzzHandler): @@ -33,7 +33,7 @@ class CheckerHandler(FuzzHandler): self.other_compare(data_params) except Exception as e: logger.warning_on_rank_0( - f"[atat] Free Benchmark: For {self.params.api_name}, " + f"[msprobe] Free Benchmark: For {self.params.api_name}, " f"when campare the result exception raise {e}" ) return data_params.original_result diff --git a/debug/accuracy_tools/msprobe/pytorch/free_benchmark/result_handlers/fix_handler.py b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/result_handlers/fix_handler.py index fa5c6f3749..a1d90035e8 100644 --- a/debug/accuracy_tools/msprobe/pytorch/free_benchmark/result_handlers/fix_handler.py +++ b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/result_handlers/fix_handler.py @@ -1,9 +1,9 @@ from typing import Any -from atat.pytorch.free_benchmark.common.params import DataParams -from atat.pytorch.free_benchmark.common.utils import Tools -from atat.pytorch.free_benchmark.result_handlers.base_handler import FuzzHandler -from atat.pytorch.free_benchmark import logger +from msprobe.pytorch.free_benchmark.common.params import DataParams +from msprobe.pytorch.free_benchmark.common.utils import Tools +from msprobe.pytorch.free_benchmark.result_handlers.base_handler import FuzzHandler +from msprobe.pytorch.free_benchmark import logger class FixHandler(FuzzHandler): @@ -18,7 +18,7 @@ class FixHandler(FuzzHandler): ) except Exception as e: logger.warning_on_rank_0( - f"[atat] Free Benchmark: For {self.params.api_name} " + f"[msprobe] Free Benchmark: For {self.params.api_name} " f"Fix output failed. " ) return data_params.original_result \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/pytorch/free_benchmark/result_handlers/handler_factory.py b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/result_handlers/handler_factory.py index cff629854d..5ee968c6a8 100644 --- a/debug/accuracy_tools/msprobe/pytorch/free_benchmark/result_handlers/handler_factory.py +++ b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/result_handlers/handler_factory.py @@ -1,10 +1,10 @@ -from atat.pytorch.free_benchmark import FreeBenchmarkException -from atat.pytorch.free_benchmark.common.constant import PreheatConfig -from atat.pytorch.free_benchmark.common.enums import HandlerType -from atat.pytorch.free_benchmark.common.params import HandlerParams -from atat.pytorch.free_benchmark.result_handlers.check_handler import CheckerHandler -from atat.pytorch.free_benchmark.result_handlers.preheat_handler import PreheatHandler -from atat.pytorch.free_benchmark.result_handlers.fix_handler import FixHandler +from msprobe.pytorch.free_benchmark import FreeBenchmarkException +from msprobe.pytorch.free_benchmark.common.constant import PreheatConfig +from msprobe.pytorch.free_benchmark.common.enums import HandlerType +from msprobe.pytorch.free_benchmark.common.params import HandlerParams +from msprobe.pytorch.free_benchmark.result_handlers.check_handler import CheckerHandler +from msprobe.pytorch.free_benchmark.result_handlers.preheat_handler import PreheatHandler +from msprobe.pytorch.free_benchmark.result_handlers.fix_handler import FixHandler class FuzzHandlerFactory: diff --git a/debug/accuracy_tools/msprobe/pytorch/free_benchmark/result_handlers/preheat_handler.py b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/result_handlers/preheat_handler.py index 033a6d4931..d78e430362 100644 --- a/debug/accuracy_tools/msprobe/pytorch/free_benchmark/result_handlers/preheat_handler.py +++ b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/result_handlers/preheat_handler.py @@ -1,14 +1,14 @@ import math from typing import Any -from atat.pytorch.free_benchmark import logger -from atat.pytorch.free_benchmark.common.constant import ThresholdConfig -from atat.pytorch.free_benchmark.common.counter import preheat_counter -from atat.pytorch.free_benchmark.common.enums import DeviceType -from atat.pytorch.free_benchmark.common.params import DataParams, HandlerParams -from atat.pytorch.free_benchmark.common.utils import Tools -from atat.pytorch.free_benchmark.compare.single_benchmark import SingleCompare -from atat.pytorch.free_benchmark.result_handlers.base_handler import FuzzHandler +from msprobe.pytorch.free_benchmark import logger +from msprobe.pytorch.free_benchmark.common.constant import ThresholdConfig +from msprobe.pytorch.free_benchmark.common.counter import preheat_counter +from msprobe.pytorch.free_benchmark.common.enums import DeviceType +from msprobe.pytorch.free_benchmark.common.params import DataParams, HandlerParams +from msprobe.pytorch.free_benchmark.common.utils import Tools +from msprobe.pytorch.free_benchmark.compare.single_benchmark import SingleCompare +from msprobe.pytorch.free_benchmark.result_handlers.base_handler import FuzzHandler class PreheatHandler(FuzzHandler): @@ -74,14 +74,14 @@ class PreheatHandler(FuzzHandler): cpu_consistent = self.compare_npu_and_cpu(data_params) except Exception as e: logger.warning_on_rank_0( - f"[atat] Free Benchmark: For {self.params.api_name}, " + f"[msprobe] Free Benchmark: For {self.params.api_name}, " f"when campare to cpu exception raise {e}" ) try: first_dtype = Tools.get_first_tensor_dtype(data_params.original_result) except RuntimeError: logger.warning_on_rank_0( - f"[atat] Free Benchmark: For {self.params.api_name}, " + f"[msprobe] Free Benchmark: For {self.params.api_name}, " f"the output sequence does not contain tensors." ) if preheat_counter.get_api_preheat(self.pure_name, str(first_dtype)): @@ -96,7 +96,7 @@ class PreheatHandler(FuzzHandler): if res: total_count = preheat_counter.get_one_step_used_api(self.pure_name) logger.info_on_rank_0( - f"[atat] Free benchmark: preheat sample in step{self.params.step}" + f"[msprobe] Free benchmark: preheat sample in step{self.params.step}" f"api_name {self.params.api_name}, " f"curr_called_seq: {curr_called_seq}/{total_count}" ) diff --git a/debug/accuracy_tools/msprobe/pytorch/functional/dump_module.py b/debug/accuracy_tools/msprobe/pytorch/functional/dump_module.py index 675fa2a1bf..7e72aab8ae 100644 --- a/debug/accuracy_tools/msprobe/pytorch/functional/dump_module.py +++ b/debug/accuracy_tools/msprobe/pytorch/functional/dump_module.py @@ -1,10 +1,10 @@ import torch.nn as nn -from atat.pytorch.common.log import logger -from atat.core.common.const import Const -from atat.pytorch.hook_module.api_registry import api_register -from atat.pytorch.debugger.precision_debugger import PrecisionDebugger -from atat.core.common.exceptions import MsaccException -from atat.core.data_dump.scope import BaseScope +from msprobe.pytorch.common.log import logger +from msprobe.core.common.const import Const +from msprobe.pytorch.hook_module.api_registry import api_register +from msprobe.pytorch.debugger.precision_debugger import PrecisionDebugger +from msprobe.core.common.exceptions import MsaccException +from msprobe.core.data_dump.scope import BaseScope module_count = {} diff --git a/debug/accuracy_tools/msprobe/pytorch/hook_module/api_registry.py b/debug/accuracy_tools/msprobe/pytorch/hook_module/api_registry.py index 3b971cc71e..f75201eafc 100644 --- a/debug/accuracy_tools/msprobe/pytorch/hook_module/api_registry.py +++ b/debug/accuracy_tools/msprobe/pytorch/hook_module/api_registry.py @@ -18,15 +18,15 @@ import torch import torch.distributed as dist -from atat.pytorch.hook_module import wrap_torch, wrap_functional, wrap_tensor, wrap_vf, wrap_distributed, wrap_aten -from atat.pytorch.hook_module.wrap_aten import get_aten_ops -from atat.pytorch.hook_module.wrap_distributed import get_distributed_ops -from atat.pytorch.hook_module.wrap_functional import get_functional_ops -from atat.pytorch.hook_module.wrap_tensor import get_tensor_ops -from atat.pytorch.hook_module.wrap_torch import get_torch_ops -from atat.pytorch.hook_module.wrap_vf import get_vf_ops -from atat.pytorch.common.utils import torch_without_guard_version, npu_distributed_api, is_gpu -from atat.core.common.const import Const +from msprobe.pytorch.hook_module import wrap_torch, wrap_functional, wrap_tensor, wrap_vf, wrap_distributed, wrap_aten +from msprobe.pytorch.hook_module.wrap_aten import get_aten_ops +from msprobe.pytorch.hook_module.wrap_distributed import get_distributed_ops +from msprobe.pytorch.hook_module.wrap_functional import get_functional_ops +from msprobe.pytorch.hook_module.wrap_tensor import get_tensor_ops +from msprobe.pytorch.hook_module.wrap_torch import get_torch_ops +from msprobe.pytorch.hook_module.wrap_vf import get_vf_ops +from msprobe.pytorch.common.utils import torch_without_guard_version, npu_distributed_api, is_gpu +from msprobe.core.common.const import Const torch_version_above_2 = torch.__version__.split('+')[0] > '2.0' diff --git a/debug/accuracy_tools/msprobe/pytorch/hook_module/hook_module.py b/debug/accuracy_tools/msprobe/pytorch/hook_module/hook_module.py index 57212b6e45..6693a09d02 100644 --- a/debug/accuracy_tools/msprobe/pytorch/hook_module/hook_module.py +++ b/debug/accuracy_tools/msprobe/pytorch/hook_module/hook_module.py @@ -20,7 +20,7 @@ import threading import torch import torch.nn as nn import torch.utils.hooks as full_hooks -from atat.core.common.const import Const +from msprobe.core.common.const import Const class HOOKModule(nn.Module): diff --git a/debug/accuracy_tools/msprobe/pytorch/hook_module/utils.py b/debug/accuracy_tools/msprobe/pytorch/hook_module/utils.py index e4ed157af6..c1e581675f 100644 --- a/debug/accuracy_tools/msprobe/pytorch/hook_module/utils.py +++ b/debug/accuracy_tools/msprobe/pytorch/hook_module/utils.py @@ -18,7 +18,7 @@ import os import yaml -from atat.core.common.file_check import FileOpen +from msprobe.core.common.file_check import FileOpen cur_path = os.path.dirname(os.path.realpath(__file__)) yaml_path = os.path.join(cur_path, "support_wrap_ops.yaml") diff --git a/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_aten.py b/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_aten.py index c5a3c6365d..f3a6946ad9 100644 --- a/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_aten.py +++ b/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_aten.py @@ -20,10 +20,10 @@ import torch import yaml -from atat.pytorch.hook_module.hook_module import HOOKModule -from atat.pytorch.common.utils import torch_device_guard -from atat.core.common.const import Const -from atat.core.common.file_check import FileOpen +from msprobe.pytorch.hook_module.hook_module import HOOKModule +from msprobe.pytorch.common.utils import torch_device_guard +from msprobe.core.common.const import Const +from msprobe.core.common.file_check import FileOpen cur_path = os.path.dirname(os.path.realpath(__file__)) diff --git a/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_distributed.py b/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_distributed.py index e02189ac1b..6cf425441c 100644 --- a/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_distributed.py +++ b/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_distributed.py @@ -20,10 +20,10 @@ from functools import wraps import torch.distributed as dist import yaml -from atat.pytorch.hook_module.hook_module import HOOKModule -from atat.pytorch.common.utils import torch_device_guard -from atat.core.common.const import Const -from atat.core.common.file_check import FileOpen +from msprobe.pytorch.hook_module.hook_module import HOOKModule +from msprobe.pytorch.common.utils import torch_device_guard +from msprobe.core.common.const import Const +from msprobe.core.common.file_check import FileOpen cur_path = os.path.dirname(os.path.realpath(__file__)) diff --git a/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_functional.py b/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_functional.py index fa97f5ee31..fd7610ca8f 100644 --- a/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_functional.py +++ b/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_functional.py @@ -20,11 +20,11 @@ import os import torch import yaml -from atat.pytorch.hook_module.hook_module import HOOKModule -from atat.pytorch.common.utils import torch_device_guard -from atat.core.common.const import Const -from atat.pytorch.common.log import logger -from atat.core.common.file_check import FileOpen +from msprobe.pytorch.hook_module.hook_module import HOOKModule +from msprobe.pytorch.common.utils import torch_device_guard +from msprobe.core.common.const import Const +from msprobe.pytorch.common.log import logger +from msprobe.core.common.file_check import FileOpen def remove_dropout(): diff --git a/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_npu_custom.py b/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_npu_custom.py index 7d0882804f..992713bce5 100644 --- a/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_npu_custom.py +++ b/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_npu_custom.py @@ -20,10 +20,10 @@ import torch import torch_npu import yaml -from atat.pytorch.hook_module.hook_module import HOOKModule -from atat.pytorch.common.utils import torch_device_guard, torch_without_guard_version -from atat.core.common.const import Const -from atat.core.common.file_check import FileOpen +from msprobe.pytorch.hook_module.hook_module import HOOKModule +from msprobe.pytorch.common.utils import torch_device_guard, torch_without_guard_version +from msprobe.core.common.const import Const +from msprobe.core.common.file_check import FileOpen cur_path = os.path.dirname(os.path.realpath(__file__)) yaml_path = os.path.join(cur_path, "support_wrap_ops.yaml") diff --git a/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_tensor.py b/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_tensor.py index 6fac181402..3e26ae3bed 100644 --- a/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_tensor.py +++ b/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_tensor.py @@ -20,10 +20,10 @@ import os import torch import yaml -from atat.pytorch.hook_module.hook_module import HOOKModule -from atat.pytorch.common.utils import torch_device_guard, parameter_adapter -from atat.core.common.const import Const -from atat.core.common.file_check import FileOpen +from msprobe.pytorch.hook_module.hook_module import HOOKModule +from msprobe.pytorch.common.utils import torch_device_guard, parameter_adapter +from msprobe.core.common.const import Const +from msprobe.core.common.file_check import FileOpen cur_path = os.path.dirname(os.path.realpath(__file__)) yaml_path = os.path.join(cur_path, "support_wrap_ops.yaml") diff --git a/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_torch.py b/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_torch.py index f0bd01fe46..486ddda491 100644 --- a/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_torch.py +++ b/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_torch.py @@ -20,10 +20,10 @@ import os import torch import yaml -from atat.pytorch.hook_module.hook_module import HOOKModule -from atat.pytorch.common.utils import torch_device_guard -from atat.core.common.const import Const -from atat.core.common.file_check import FileOpen +from msprobe.pytorch.hook_module.hook_module import HOOKModule +from msprobe.pytorch.common.utils import torch_device_guard +from msprobe.core.common.const import Const +from msprobe.core.common.file_check import FileOpen cur_path = os.path.dirname(os.path.realpath(__file__)) yaml_path = os.path.join(cur_path, "support_wrap_ops.yaml") diff --git a/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_vf.py b/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_vf.py index d4c570221d..d78beb2a6a 100644 --- a/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_vf.py +++ b/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_vf.py @@ -20,10 +20,10 @@ import os import torch import yaml -from atat.pytorch.hook_module.hook_module import HOOKModule -from atat.core.common.file_check import FileOpen -from atat.pytorch.common.utils import torch_device_guard -from atat.core.common.const import Const +from msprobe.pytorch.hook_module.hook_module import HOOKModule +from msprobe.core.common.file_check import FileOpen +from msprobe.pytorch.common.utils import torch_device_guard +from msprobe.core.common.const import Const cur_path = os.path.dirname(os.path.realpath(__file__)) yaml_path = os.path.join(cur_path, "support_wrap_ops.yaml") diff --git a/debug/accuracy_tools/msprobe/pytorch/module_processer.py b/debug/accuracy_tools/msprobe/pytorch/module_processer.py index 8ce9140e32..422d36d6ac 100644 --- a/debug/accuracy_tools/msprobe/pytorch/module_processer.py +++ b/debug/accuracy_tools/msprobe/pytorch/module_processer.py @@ -1,8 +1,8 @@ from functools import wraps import torch from torch.utils.hooks import BackwardHook -from atat.core.common.const import Const -from atat.core.data_dump.scope import ModuleRangeScope +from msprobe.core.common.const import Const +from msprobe.core.data_dump.scope import ModuleRangeScope class ModuleProcesser: diff --git a/debug/accuracy_tools/msprobe/pytorch/online_dispatch/compare.py b/debug/accuracy_tools/msprobe/pytorch/online_dispatch/compare.py index e6d55ca061..19c18c124b 100644 --- a/debug/accuracy_tools/msprobe/pytorch/online_dispatch/compare.py +++ b/debug/accuracy_tools/msprobe/pytorch/online_dispatch/compare.py @@ -8,10 +8,10 @@ from rich.table import Table from rich.console import Console from .single_compare import single_benchmark_compare_wrap from .utils import DispatchException -from atat.core.common.const import CompareConst -from atat.core.common.file_check import FileOpen -from atat.pytorch.common.log import logger -from atat.core.common.utils import CompareException +from msprobe.core.common.const import CompareConst +from msprobe.core.common.file_check import FileOpen +from msprobe.pytorch.common.log import logger +from msprobe.core.common.utils import CompareException ELEMENT_NUM_THRESHOLD = 100 ZERO_NUM_THRESHOLD = 0.1 diff --git a/debug/accuracy_tools/msprobe/pytorch/online_dispatch/dispatch.py b/debug/accuracy_tools/msprobe/pytorch/online_dispatch/dispatch.py index 7502d746ac..898df30b99 100644 --- a/debug/accuracy_tools/msprobe/pytorch/online_dispatch/dispatch.py +++ b/debug/accuracy_tools/msprobe/pytorch/online_dispatch/dispatch.py @@ -21,9 +21,9 @@ from .dump_compare import dispatch_workflow, dispatch_multiprocess, error_call, from .utils import get_callstack, data_to_cpu, logger_debug, logger_error, logger_warn, logger_logo, get_sys_info, \ DispatchException from .compare import Comparator -from atat.core.common.file_check import FileOpen -from atat.core.common.utils import check_file_or_directory_path, check_path_before_create -from atat.core.common.const import Const, CompareConst +from msprobe.core.common.file_check import FileOpen +from msprobe.core.common.utils import check_file_or_directory_path, check_path_before_create +from msprobe.core.common.const import Const, CompareConst current_time = time.strftime("%Y%m%d%H%M%S") RESULT_FILE_NAME = "accuracy_checking_result_" + current_time + ".csv" @@ -209,9 +209,9 @@ class PtdbgDispatch(TorchDispatchMode): time_now = time.strftime("%Y%m%d%H%M%S", time.localtime(time.time())) if tag is None or not isinstance(tag, str): logger_warn('There is not tag or the type of tag is not string.') - dir_name = f'atat_rank{self.device_id}_{time_now}' + dir_name = f'msprobe_rank{self.device_id}_{time_now}' else: - dir_name = f'atat_{tag}_rank{self.device_id}_{time_now}' + dir_name = f'msprobe_{tag}_rank{self.device_id}_{time_now}' return dir_name def load_yaml_file(self, file_path): diff --git a/debug/accuracy_tools/msprobe/pytorch/online_dispatch/dump_compare.py b/debug/accuracy_tools/msprobe/pytorch/online_dispatch/dump_compare.py index cd7c5a3f28..f83b6fc9f0 100644 --- a/debug/accuracy_tools/msprobe/pytorch/online_dispatch/dump_compare.py +++ b/debug/accuracy_tools/msprobe/pytorch/online_dispatch/dump_compare.py @@ -7,9 +7,9 @@ import pandas as pd import torch from .utils import np_save_data, logger_debug, logger_error, logger_warn, logger_user, COLOR_RED, COLOR_GREEN, \ COLOR_RESET, CSV_COLUMN_NAME -from atat.core.common.file_check import FileOpen, change_mode -from atat.core.common.const import CompareConst, FileCheckConst, Const -from atat.pytorch.common.log import logger +from msprobe.core.common.file_check import FileOpen, change_mode +from msprobe.core.common.const import CompareConst, FileCheckConst, Const +from msprobe.pytorch.common.log import logger class DispatchRunParam: def __init__(self, debug_flag, device_id, root_npu_path, root_cpu_path, process_num, comparator): diff --git a/debug/accuracy_tools/msprobe/pytorch/online_dispatch/utils.py b/debug/accuracy_tools/msprobe/pytorch/online_dispatch/utils.py index f3fcffb6f2..fec3e0b007 100644 --- a/debug/accuracy_tools/msprobe/pytorch/online_dispatch/utils.py +++ b/debug/accuracy_tools/msprobe/pytorch/online_dispatch/utils.py @@ -12,8 +12,8 @@ except ImportError: else: pta_cpu_device = torch.device("cpu") -from atat.core.common.const import CompareConst, FileCheckConst -from atat.core.common.file_check import change_mode +from msprobe.core.common.const import CompareConst, FileCheckConst +from msprobe.core.common.file_check import change_mode cpu_device = torch._C.device("cpu") COLOR_RED = '\033[31m' diff --git a/debug/accuracy_tools/msprobe/pytorch/parse.py b/debug/accuracy_tools/msprobe/pytorch/parse.py index 40792d0e02..efd3d4a2dd 100644 --- a/debug/accuracy_tools/msprobe/pytorch/parse.py +++ b/debug/accuracy_tools/msprobe/pytorch/parse.py @@ -1,4 +1,4 @@ -from atat.pytorch.parse_tool import cli +from msprobe.pytorch.parse_tool import cli if __name__ == '__main__': cli.parse() diff --git a/debug/accuracy_tools/msprobe/pytorch/parse_tool/cli.py b/debug/accuracy_tools/msprobe/pytorch/parse_tool/cli.py index f59fbf13a8..500e8eef68 100644 --- a/debug/accuracy_tools/msprobe/pytorch/parse_tool/cli.py +++ b/debug/accuracy_tools/msprobe/pytorch/parse_tool/cli.py @@ -14,8 +14,8 @@ # See the License for the specific language governing permissions and # limitations under the License. """ -from atat.pytorch.parse_tool.lib.interactive_cli import InteractiveCli -from atat.pytorch.common.log import logger +from msprobe.pytorch.parse_tool.lib.interactive_cli import InteractiveCli +from msprobe.pytorch.common.log import logger def _run_interactive_cli(cli=None): diff --git a/debug/accuracy_tools/msprobe/pytorch/parse_tool/lib/compare.py b/debug/accuracy_tools/msprobe/pytorch/parse_tool/lib/compare.py index dfc4529414..85c4cde4d1 100644 --- a/debug/accuracy_tools/msprobe/pytorch/parse_tool/lib/compare.py +++ b/debug/accuracy_tools/msprobe/pytorch/parse_tool/lib/compare.py @@ -19,9 +19,9 @@ import os import time import numpy as np from collections import namedtuple -from atat.pytorch.parse_tool.lib.utils import Util -from atat.pytorch.parse_tool.lib.config import Const -from atat.pytorch.parse_tool.lib.parse_exception import ParseException +from msprobe.pytorch.parse_tool.lib.utils import Util +from msprobe.pytorch.parse_tool.lib.config import Const +from msprobe.pytorch.parse_tool.lib.parse_exception import ParseException class Compare: diff --git a/debug/accuracy_tools/msprobe/pytorch/parse_tool/lib/interactive_cli.py b/debug/accuracy_tools/msprobe/pytorch/parse_tool/lib/interactive_cli.py index 12b07183fb..1ea7dd3015 100644 --- a/debug/accuracy_tools/msprobe/pytorch/parse_tool/lib/interactive_cli.py +++ b/debug/accuracy_tools/msprobe/pytorch/parse_tool/lib/interactive_cli.py @@ -16,10 +16,10 @@ """ import cmd import argparse -from atat.pytorch.parse_tool.lib.parse_tool import ParseTool -from atat.pytorch.parse_tool.lib.utils import Util -from atat.pytorch.parse_tool.lib.config import Const -from atat.pytorch.parse_tool.lib.parse_exception import catch_exception +from msprobe.pytorch.parse_tool.lib.parse_tool import ParseTool +from msprobe.pytorch.parse_tool.lib.utils import Util +from msprobe.pytorch.parse_tool.lib.config import Const +from msprobe.pytorch.parse_tool.lib.parse_exception import catch_exception class InteractiveCli(cmd.Cmd): diff --git a/debug/accuracy_tools/msprobe/pytorch/parse_tool/lib/parse_exception.py b/debug/accuracy_tools/msprobe/pytorch/parse_tool/lib/parse_exception.py index 1177c51985..7525230ced 100644 --- a/debug/accuracy_tools/msprobe/pytorch/parse_tool/lib/parse_exception.py +++ b/debug/accuracy_tools/msprobe/pytorch/parse_tool/lib/parse_exception.py @@ -15,7 +15,7 @@ # limitations under the License. """ import logging -from atat.core.common.exceptions import FileCheckException +from msprobe.core.common.exceptions import FileCheckException class ParseException(Exception): diff --git a/debug/accuracy_tools/msprobe/pytorch/parse_tool/lib/parse_tool.py b/debug/accuracy_tools/msprobe/pytorch/parse_tool/lib/parse_tool.py index 3e02baa127..9a47dc54cf 100644 --- a/debug/accuracy_tools/msprobe/pytorch/parse_tool/lib/parse_tool.py +++ b/debug/accuracy_tools/msprobe/pytorch/parse_tool/lib/parse_tool.py @@ -18,11 +18,11 @@ import argparse import os from collections import namedtuple -from atat.pytorch.parse_tool.lib.config import Const -from atat.pytorch.parse_tool.lib.utils import Util -from atat.pytorch.parse_tool.lib.compare import Compare -from atat.pytorch.parse_tool.lib.visualization import Visualization -from atat.pytorch.parse_tool.lib.parse_exception import catch_exception, ParseException +from msprobe.pytorch.parse_tool.lib.config import Const +from msprobe.pytorch.parse_tool.lib.utils import Util +from msprobe.pytorch.parse_tool.lib.compare import Compare +from msprobe.pytorch.parse_tool.lib.visualization import Visualization +from msprobe.pytorch.parse_tool.lib.parse_exception import catch_exception, ParseException class ParseTool: diff --git a/debug/accuracy_tools/msprobe/pytorch/parse_tool/lib/utils.py b/debug/accuracy_tools/msprobe/pytorch/parse_tool/lib/utils.py index ce42d242ba..1e4c1882d0 100644 --- a/debug/accuracy_tools/msprobe/pytorch/parse_tool/lib/utils.py +++ b/debug/accuracy_tools/msprobe/pytorch/parse_tool/lib/utils.py @@ -25,15 +25,15 @@ import csv import time import numpy as np from collections import namedtuple -from atat.pytorch.parse_tool.lib.config import Const -from atat.pytorch.parse_tool.lib.file_desc import DumpDecodeFileDesc, FileDesc -from atat.pytorch.parse_tool.lib.parse_exception import ParseException -from atat.core.common.file_check import change_mode, check_other_user_writable,\ +from msprobe.pytorch.parse_tool.lib.config import Const +from msprobe.pytorch.parse_tool.lib.file_desc import DumpDecodeFileDesc, FileDesc +from msprobe.pytorch.parse_tool.lib.parse_exception import ParseException +from msprobe.core.common.file_check import change_mode, check_other_user_writable,\ check_path_executable, check_path_owner_consistent -from atat.core.common.const import FileCheckConst -from atat.core.common.file_check import FileOpen -from atat.core.common.utils import check_file_or_directory_path -from atat.pytorch.common.log import logger +from msprobe.core.common.const import FileCheckConst +from msprobe.core.common.file_check import FileOpen +from msprobe.core.common.utils import check_file_or_directory_path +from msprobe.pytorch.common.log import logger try: diff --git a/debug/accuracy_tools/msprobe/pytorch/parse_tool/lib/visualization.py b/debug/accuracy_tools/msprobe/pytorch/parse_tool/lib/visualization.py index 3ef9878ae8..5e37b58d0b 100644 --- a/debug/accuracy_tools/msprobe/pytorch/parse_tool/lib/visualization.py +++ b/debug/accuracy_tools/msprobe/pytorch/parse_tool/lib/visualization.py @@ -17,10 +17,10 @@ import json import numpy as np -from atat.pytorch.parse_tool.lib.config import Const -from atat.pytorch.parse_tool.lib.utils import Util -from atat.pytorch.parse_tool.lib.parse_exception import ParseException -from atat.core.common.file_check import FileOpen +from msprobe.pytorch.parse_tool.lib.config import Const +from msprobe.pytorch.parse_tool.lib.utils import Util +from msprobe.pytorch.parse_tool.lib.parse_exception import ParseException +from msprobe.core.common.file_check import FileOpen class Visualization: diff --git a/debug/accuracy_tools/msprobe/pytorch/pt_config.py b/debug/accuracy_tools/msprobe/pytorch/pt_config.py index 0674b91b34..ff09bfd8e9 100644 --- a/debug/accuracy_tools/msprobe/pytorch/pt_config.py +++ b/debug/accuracy_tools/msprobe/pytorch/pt_config.py @@ -1,9 +1,9 @@ import json import os -from atat.core.common_config import CommonConfig, BaseConfig -from atat.core.common.file_check import FileOpen -from atat.core.common.const import Const +from msprobe.core.common_config import CommonConfig, BaseConfig +from msprobe.core.common.file_check import FileOpen +from msprobe.core.common.const import Const class TensorConfig(BaseConfig): diff --git a/debug/accuracy_tools/msprobe/pytorch/service.py b/debug/accuracy_tools/msprobe/pytorch/service.py index d0b9c4d4b2..e5da444840 100644 --- a/debug/accuracy_tools/msprobe/pytorch/service.py +++ b/debug/accuracy_tools/msprobe/pytorch/service.py @@ -2,17 +2,17 @@ import functools import os from pathlib import Path -from atat.pytorch.common.log import logger -from atat.core.common.file_check import FileChecker, check_path_before_create -from atat.core.common.const import Const, FileCheckConst -from atat.core.common.exceptions import DistributedNotInitializedError, MsaccException -from atat.core.data_dump.data_collector import build_data_collector -from atat.core.data_dump.scope import BaseScope -from atat.core.data_dump.data_processor.base import ModuleForwardInputsOutputs, ModuleBackwardInputsOutputs -from atat.pytorch.common.utils import get_rank_if_initialized -from atat.pytorch.module_processer import ModuleProcesser -from atat.pytorch.hook_module import remove_dropout -from atat.pytorch.hook_module.api_registry import api_register +from msprobe.pytorch.common.log import logger +from msprobe.core.common.file_check import FileChecker, check_path_before_create +from msprobe.core.common.const import Const, FileCheckConst +from msprobe.core.common.exceptions import DistributedNotInitializedError, MsaccException +from msprobe.core.data_dump.data_collector import build_data_collector +from msprobe.core.data_dump.scope import BaseScope +from msprobe.core.data_dump.data_processor.base import ModuleForwardInputsOutputs, ModuleBackwardInputsOutputs +from msprobe.pytorch.common.utils import get_rank_if_initialized +from msprobe.pytorch.module_processer import ModuleProcesser +from msprobe.pytorch.hook_module import remove_dropout +from msprobe.pytorch.hook_module.api_registry import api_register class Service: @@ -81,7 +81,7 @@ class Service: self.model = model if self.config.step and self.current_iter > max(self.config.step): self.stop() - raise Exception("atat: exit after iteration {}".format(max(self.config.step))) + raise Exception("msprobe: exit after iteration {}".format(max(self.config.step))) if self.config.step and self.current_iter not in self.config.step: return if self.first_start: diff --git a/debug/accuracy_tools/msprobe/test/core_ut/common/test_utils.py b/debug/accuracy_tools/msprobe/test/core_ut/common/test_utils.py index b3273358e4..edd3eb53dc 100644 --- a/debug/accuracy_tools/msprobe/test/core_ut/common/test_utils.py +++ b/debug/accuracy_tools/msprobe/test/core_ut/common/test_utils.py @@ -20,9 +20,9 @@ import uuid from unittest import TestCase from unittest.mock import patch, MagicMock, mock_open -from atat.core.common.log import logger -from atat.core.common.const import Const -from atat.core.common.utils import (CompareException, +from msprobe.core.common.log import logger +from msprobe.core.common.const import Const +from msprobe.core.common.utils import (CompareException, check_seed_all, check_inplace_op, make_dump_path_if_not_exists, @@ -41,7 +41,7 @@ from atat.core.common.utils import (CompareException, check_regex_prefix_format_valid, get_dump_data_path, task_dumppath_get) -from atat.core.common.file_check import FileCheckConst +from msprobe.core.common.file_check import FileCheckConst class TestUtils(TestCase): @@ -88,7 +88,7 @@ class TestUtils(TestCase): raise OSError if not os.path.exists(dirname): - with patch("atat.core.common.utils.Path.mkdir", new=test_mkdir): + with patch("msprobe.core.common.utils.Path.mkdir", new=test_mkdir): with self.assertRaises(CompareException) as context: make_dump_path_if_not_exists(dirname) self.assertEqual(context.exception.code, CompareException.INVALID_PATH_ERROR) @@ -171,7 +171,7 @@ class TestUtils(TestCase): file_path = os.path.realpath(__file__) dirname = os.path.dirname(file_path) - with patch("atat.core.common.utils.FileChecker", new=TestFileChecker): + with patch("msprobe.core.common.utils.FileChecker", new=TestFileChecker): check_file_or_directory_path(file_path, isdir=False) self.assertTrue(TestFileChecker.checked) self.assertEqual(TestFileChecker.file_path, file_path) @@ -179,7 +179,7 @@ class TestUtils(TestCase): self.assertEqual(TestFileChecker.ability, FileCheckConst.READ_ABLE) TestFileChecker.checked = False - with patch("atat.core.common.utils.FileChecker", new=TestFileChecker): + with patch("msprobe.core.common.utils.FileChecker", new=TestFileChecker): check_file_or_directory_path(dirname, isdir=True) self.assertTrue(TestFileChecker.checked) self.assertEqual(TestFileChecker.file_path, dirname) @@ -216,9 +216,9 @@ class TestUtils(TestCase): mock_check_file_or_directory_path = MagicMock() mock_check_json_file = MagicMock() - with patch("atat.core.common.utils.FileOpen", mock_open(read_data="")), \ - patch("atat.core.common.utils.check_json_file", new=mock_check_json_file), \ - patch("atat.core.common.utils.check_file_or_directory_path", new=mock_check_file_or_directory_path): + with patch("msprobe.core.common.utils.FileOpen", mock_open(read_data="")), \ + patch("msprobe.core.common.utils.check_json_file", new=mock_check_json_file), \ + patch("msprobe.core.common.utils.check_file_or_directory_path", new=mock_check_file_or_directory_path): check_compare_param(params, "output_path") check_compare_param(params, "output_path", summary_compare=False, md5_compare=True) for i in range(len(call_args)): @@ -261,7 +261,7 @@ class TestUtils(TestCase): _check_json(handler, "test.json") self.assertEqual(handler.string, "0_0") - @patch("atat.core.common.utils._check_json") + @patch("msprobe.core.common.utils._check_json") def test_check_json_file(self, _mock_check_json): input_param = { "npu_json_path": "npu_json_path", @@ -275,7 +275,7 @@ class TestUtils(TestCase): @patch.object(logger, "error") def test_check_file_size(self, mock_error): - with patch("atat.core.common.utils.os.path.getsize", return_value=120): + with patch("msprobe.core.common.utils.os.path.getsize", return_value=120): with self.assertRaises(CompareException) as context: check_file_size("input_file", 100) self.assertEqual(context.exception.code, CompareException.INVALID_FILE_ERROR) @@ -294,7 +294,7 @@ class TestUtils(TestCase): self.assertEqual(str(context.exception), f"prefix contains invalid characters, " f"prefix pattern {Const.REGEX_PREFIX_PATTERN}") - @patch("atat.core.common.utils.check_file_or_directory_path") + @patch("msprobe.core.common.utils.check_file_or_directory_path") def test_get_dump_data_path(self, mock_check_file_or_directory_path): file_path = os.path.realpath(__file__) dirname = os.path.dirname(file_path) @@ -322,23 +322,23 @@ class TestUtils(TestCase): mock_error.assert_called_with("Please check the json path is valid.") input_param["npu_json_path"] = "npu_json_path" - with patch("atat.core.common.utils.FileOpen", mock_open(read_data="")), \ - patch("atat.core.common.utils.json.load", return_value=npu_json): + with patch("msprobe.core.common.utils.FileOpen", mock_open(read_data="")), \ + patch("msprobe.core.common.utils.json.load", return_value=npu_json): summary_compare, md5_compare = task_dumppath_get(input_param) self.assertFalse(summary_compare) self.assertFalse(md5_compare) npu_json["task"] = Const.STATISTICS - with patch("atat.core.common.utils.FileOpen", mock_open(read_data="")), \ - patch("atat.core.common.utils.json.load", return_value=npu_json), \ - patch("atat.core.common.utils.md5_find", return_value=True): + with patch("msprobe.core.common.utils.FileOpen", mock_open(read_data="")), \ + patch("msprobe.core.common.utils.json.load", return_value=npu_json), \ + patch("msprobe.core.common.utils.md5_find", return_value=True): summary_compare, md5_compare = task_dumppath_get(input_param) self.assertFalse(summary_compare) self.assertTrue(md5_compare) npu_json["task"] = Const.OVERFLOW_CHECK - with patch("atat.core.common.utils.FileOpen", mock_open(read_data="")), \ - patch("atat.core.common.utils.json.load", return_value=npu_json): + with patch("msprobe.core.common.utils.FileOpen", mock_open(read_data="")), \ + patch("msprobe.core.common.utils.json.load", return_value=npu_json): with self.assertRaises(CompareException) as context: task_dumppath_get(input_param) self.assertEqual(context.exception.code, CompareException.INVALID_TASK_ERROR) diff --git a/debug/accuracy_tools/msprobe/test/core_ut/data_dump/test_data_collector.py b/debug/accuracy_tools/msprobe/test/core_ut/data_dump/test_data_collector.py index 625ab1337e..eedbe5be7e 100644 --- a/debug/accuracy_tools/msprobe/test/core_ut/data_dump/test_data_collector.py +++ b/debug/accuracy_tools/msprobe/test/core_ut/data_dump/test_data_collector.py @@ -1,10 +1,10 @@ import unittest from unittest.mock import patch, mock_open, MagicMock -from atat.core.common.utils import Const -from atat.core.data_dump.data_collector import DataCollector -from atat.pytorch.debugger.debugger_config import DebuggerConfig -from atat.pytorch.pt_config import parse_json_config +from msprobe.core.common.utils import Const +from msprobe.core.data_dump.data_collector import DataCollector +from msprobe.pytorch.debugger.debugger_config import DebuggerConfig +from msprobe.pytorch.pt_config import parse_json_config class TestDataCollector(unittest.TestCase): @@ -12,8 +12,8 @@ class TestDataCollector(unittest.TestCase): mock_json_data = { "dump_path": "./ut_dump", } - with patch("atat.pytorch.pt_config.FileOpen", mock_open(read_data='')), \ - patch("atat.pytorch.pt_config.json.load", return_value=mock_json_data): + with patch("msprobe.pytorch.pt_config.FileOpen", mock_open(read_data='')), \ + patch("msprobe.pytorch.pt_config.json.load", return_value=mock_json_data): common_config, task_config = parse_json_config("./config.json", Const.STATISTICS) config = DebuggerConfig(common_config, task_config, Const.STATISTICS, "./ut_dump", "L1") self.data_collector = DataCollector(config) @@ -21,7 +21,7 @@ class TestDataCollector(unittest.TestCase): def test_update_data(self): self.data_collector.config.task = Const.OVERFLOW_CHECK self.data_collector.data_processor.has_overflow = True - with patch("atat.core.data_dump.json_writer.DataWriter.update_data", return_value=None): + with patch("msprobe.core.data_dump.json_writer.DataWriter.update_data", return_value=None): result1 = self.data_collector.update_data("test message", "test1:") self.assertEqual(result1, "test1:Overflow detected.") @@ -31,7 +31,7 @@ class TestDataCollector(unittest.TestCase): self.data_collector.config.task = Const.STATISTICS self.data_collector.data_processor.has_overflow = True - with patch("atat.core.data_dump.json_writer.DataWriter.update_data", return_value=None): + with patch("msprobe.core.data_dump.json_writer.DataWriter.update_data", return_value=None): result3 = self.data_collector.update_data("test message", "test3") self.assertEqual(result3, "test3") diff --git a/debug/accuracy_tools/msprobe/test/core_ut/data_dump/test_json_writer.py b/debug/accuracy_tools/msprobe/test/core_ut/data_dump/test_json_writer.py index 867da001e6..cfb1b3d551 100644 --- a/debug/accuracy_tools/msprobe/test/core_ut/data_dump/test_json_writer.py +++ b/debug/accuracy_tools/msprobe/test/core_ut/data_dump/test_json_writer.py @@ -1,10 +1,10 @@ import unittest -from atat.core.data_dump.json_writer import DataWriter +from msprobe.core.data_dump.json_writer import DataWriter import os import csv -from atat.core.common.file_check import FileOpen -from atat.core.common import utils +from msprobe.core.common.file_check import FileOpen +from msprobe.core.common import utils from pathlib import Path import json diff --git a/debug/accuracy_tools/msprobe/test/core_ut/data_dump/test_scope.py b/debug/accuracy_tools/msprobe/test/core_ut/data_dump/test_scope.py index 3ef7bd8c7a..1989fd0a95 100644 --- a/debug/accuracy_tools/msprobe/test/core_ut/data_dump/test_scope.py +++ b/debug/accuracy_tools/msprobe/test/core_ut/data_dump/test_scope.py @@ -1,8 +1,8 @@ import unittest from unittest.mock import MagicMock -from atat.core.common.exceptions import ScopeException -from atat.core.data_dump.scope import ( +from msprobe.core.common.exceptions import ScopeException +from msprobe.core.data_dump.scope import ( build_scope, build_range_scope_according_to_scope_name, BaseScope, diff --git a/debug/accuracy_tools/msprobe/test/core_ut/test_common_config.py b/debug/accuracy_tools/msprobe/test/core_ut/test_common_config.py index 00b17e1f1c..15957af217 100644 --- a/debug/accuracy_tools/msprobe/test/core_ut/test_common_config.py +++ b/debug/accuracy_tools/msprobe/test/core_ut/test_common_config.py @@ -17,10 +17,10 @@ from unittest import TestCase from unittest.mock import patch -from atat.core.common.log import logger -from atat.core.common.const import Const -from atat.core.common.exceptions import MsaccException -from atat.core.common_config import CommonConfig, BaseConfig +from msprobe.core.common.log import logger +from msprobe.core.common.const import Const +from msprobe.core.common.exceptions import MsaccException +from msprobe.core.common_config import CommonConfig, BaseConfig class TestCommonConfig(TestCase): diff --git a/debug/accuracy_tools/msprobe/test/core_ut/test_file_check.py b/debug/accuracy_tools/msprobe/test/core_ut/test_file_check.py index aa7882aa59..ecdf3da9fe 100644 --- a/debug/accuracy_tools/msprobe/test/core_ut/test_file_check.py +++ b/debug/accuracy_tools/msprobe/test/core_ut/test_file_check.py @@ -19,10 +19,10 @@ import os from unittest import TestCase from unittest.mock import patch, MagicMock -from atat.core.common.log import logger -from atat.core.common.const import FileCheckConst -from atat.core.common.exceptions import FileCheckException -from atat.core.common.file_check import (check_link, +from msprobe.core.common.log import logger +from msprobe.core.common.const import FileCheckConst +from msprobe.core.common.exceptions import FileCheckException +from msprobe.core.common.file_check import (check_link, check_path_length, check_path_exists, check_path_readability, @@ -40,7 +40,7 @@ from atat.core.common.file_check import (check_link, class TestFileCheckUtil(TestCase): @patch.object(logger, "error") def test_check_link(self, mock_logger_error): - with patch("atat.core.common.file_check.os.path.islink", return_value=True): + with patch("msprobe.core.common.file_check.os.path.islink", return_value=True): with self.assertRaises(FileCheckException) as context: check_link("link_path") self.assertEqual(str(context.exception), @@ -72,7 +72,7 @@ class TestFileCheckUtil(TestCase): @patch.object(logger, "error") def test_check_path_exists(self, mock_logger_error): - with patch("atat.core.common.file_check.os.path.exists", return_value=False): + with patch("msprobe.core.common.file_check.os.path.exists", return_value=False): with self.assertRaises(FileCheckException) as context: check_path_exists("file_path") self.assertEqual(str(context.exception), @@ -82,7 +82,7 @@ class TestFileCheckUtil(TestCase): @patch.object(logger, "error") def test_check_path_readability(self, mock_logger_error): path = "file_path" - with patch("atat.core.common.file_check.os.access", return_value=False): + with patch("msprobe.core.common.file_check.os.access", return_value=False): with self.assertRaises(FileCheckException) as context: check_path_readability(path) self.assertEqual(str(context.exception), @@ -91,14 +91,14 @@ class TestFileCheckUtil(TestCase): mock_access = MagicMock() mock_access.return_value = True - with patch("atat.core.common.file_check.os.access", new=mock_access): + with patch("msprobe.core.common.file_check.os.access", new=mock_access): check_path_readability(path) self.assertEqual(mock_access.call_args[0], (path, os.R_OK)) @patch.object(logger, "error") def test_check_path_writability(self, mock_logger_error): path = "file_path" - with patch("atat.core.common.file_check.os.access", return_value=False): + with patch("msprobe.core.common.file_check.os.access", return_value=False): with self.assertRaises(FileCheckException) as context: check_path_writability(path) self.assertEqual(str(context.exception), @@ -107,14 +107,14 @@ class TestFileCheckUtil(TestCase): mock_access = MagicMock() mock_access.return_value = True - with patch("atat.core.common.file_check.os.access", new=mock_access): + with patch("msprobe.core.common.file_check.os.access", new=mock_access): check_path_writability(path) self.assertEqual(mock_access.call_args[0], (path, os.W_OK)) @patch.object(logger, "error") def test_check_path_executable(self, mock_logger_error): path = "file_path" - with patch("atat.core.common.file_check.os.access", return_value=False): + with patch("msprobe.core.common.file_check.os.access", return_value=False): with self.assertRaises(FileCheckException) as context: check_path_executable(path) self.assertEqual(str(context.exception), @@ -123,7 +123,7 @@ class TestFileCheckUtil(TestCase): mock_access = MagicMock() mock_access.return_value = True - with patch("atat.core.common.file_check.os.access", new=mock_access): + with patch("msprobe.core.common.file_check.os.access", new=mock_access): check_path_executable(path) self.assertEqual(mock_access.call_args[0], (path, os.X_OK)) @@ -135,7 +135,7 @@ class TestFileCheckUtil(TestCase): path = "file_path" mock_stat = TestStat(0o002) - with patch("atat.core.common.file_check.os.stat", return_value=mock_stat): + with patch("msprobe.core.common.file_check.os.stat", return_value=mock_stat): with self.assertRaises(FileCheckException) as context: check_other_user_writable(path) self.assertEqual(str(context.exception), @@ -147,7 +147,7 @@ class TestFileCheckUtil(TestCase): def test_check_path_owner_consistent(self, mock_logger_error): file_path = os.path.realpath(__file__) file_owner = os.stat(file_path).st_uid - with patch("atat.core.common.file_check.os.getuid", return_value=file_owner+1): + with patch("msprobe.core.common.file_check.os.getuid", return_value=file_owner+1): with self.assertRaises(FileCheckException) as context: check_path_owner_consistent(file_path) self.assertEqual(str(context.exception), @@ -160,7 +160,7 @@ class TestFileCheckUtil(TestCase): path = "path" mock_re_match = MagicMock() mock_re_match.return_value = False - with patch("atat.core.common.file_check.re.match", new=mock_re_match): + with patch("msprobe.core.common.file_check.re.match", new=mock_re_match): with self.assertRaises(FileCheckException) as context: check_path_pattern_vaild(path) self.assertEqual(str(context.exception), @@ -181,8 +181,8 @@ class TestFileCheckUtil(TestCase): def test_check_common_file_size(self): mock_check_file_size = MagicMock() - with patch("atat.core.common.file_check.os.path.isfile", return_value=True), \ - patch("atat.core.common.file_check.check_file_size", new=mock_check_file_size): + with patch("msprobe.core.common.file_check.os.path.isfile", return_value=True), \ + patch("msprobe.core.common.file_check.check_file_size", new=mock_check_file_size): for suffix, max_size in FileCheckConst.FILE_SIZE_DICT.items(): check_common_file_size(suffix) mock_check_file_size.assert_called_with(suffix, max_size) @@ -201,16 +201,16 @@ class TestFileCheckUtil(TestCase): def test_check_path_type(self, mock_logger_error): file_path = "file_path" - with patch("atat.core.common.file_check.os.path.isfile", return_value=False), \ - patch("atat.core.common.file_check.os.path.isdir", return_value=True): + with patch("msprobe.core.common.file_check.os.path.isfile", return_value=False), \ + patch("msprobe.core.common.file_check.os.path.isdir", return_value=True): with self.assertRaises(FileCheckException) as context: check_path_type(file_path, FileCheckConst.FILE) self.assertEqual(str(context.exception), FileCheckException.err_strs.get(FileCheckException.INVALID_FILE_ERROR)) mock_logger_error.assert_called_with(f"The {file_path} should be a file!") - with patch("atat.core.common.file_check.os.path.isfile", return_value=True), \ - patch("atat.core.common.file_check.os.path.isdir", return_value=False): + with patch("msprobe.core.common.file_check.os.path.isfile", return_value=True), \ + patch("msprobe.core.common.file_check.os.path.isdir", return_value=False): with self.assertRaises(FileCheckException) as context: check_path_type(file_path, FileCheckConst.DIR) self.assertEqual(str(context.exception), diff --git a/debug/accuracy_tools/msprobe/test/core_ut/test_log.py b/debug/accuracy_tools/msprobe/test/core_ut/test_log.py index 6d7998d5ae..1687c48d02 100644 --- a/debug/accuracy_tools/msprobe/test/core_ut/test_log.py +++ b/debug/accuracy_tools/msprobe/test/core_ut/test_log.py @@ -17,11 +17,11 @@ from unittest import TestCase from unittest.mock import patch, MagicMock -from atat.core.common.log import BaseLogger, logger +from msprobe.core.common.log import BaseLogger, logger class TestLog(TestCase): - @patch("atat.core.common.log.print") + @patch("msprobe.core.common.log.print") def test__print_log(self, mock_print): logger._print_log("level", "msg") self.assertIn("[level] msg", mock_print.call_args[0][0]) @@ -75,7 +75,7 @@ class TestLog(TestCase): @patch.object(BaseLogger, "get_rank") def test_info_on_rank_0(self, mock_get_rank): mock_print = MagicMock() - with patch("atat.core.common.log.print", new=mock_print): + with patch("msprobe.core.common.log.print", new=mock_print): mock_get_rank.return_value = 0 logger.info_on_rank_0("msg") self.assertIn("[INFO] msg", mock_print.call_args[0][0]) @@ -87,7 +87,7 @@ class TestLog(TestCase): @patch.object(BaseLogger, "get_rank") def test_error_on_rank_0(self, mock_get_rank): mock_print = MagicMock() - with patch("atat.core.common.log.print", new=mock_print): + with patch("msprobe.core.common.log.print", new=mock_print): mock_get_rank.return_value = 0 logger.error_on_rank_0("msg") self.assertIn("[ERROR] msg", mock_print.call_args[0][0]) @@ -99,7 +99,7 @@ class TestLog(TestCase): @patch.object(BaseLogger, "get_rank") def test_warning_on_rank_0(self, mock_get_rank): mock_print = MagicMock() - with patch("atat.core.common.log.print", new=mock_print): + with patch("msprobe.core.common.log.print", new=mock_print): mock_get_rank.return_value = 0 logger.warning_on_rank_0("msg") self.assertIn("[WARNING] msg", mock_print.call_args[0][0]) diff --git a/debug/accuracy_tools/msprobe/test/mindspore_ut/test_api_kbk_dump.py b/debug/accuracy_tools/msprobe/test/mindspore_ut/test_api_kbk_dump.py index 47d60999b1..7411018ff0 100644 --- a/debug/accuracy_tools/msprobe/test/mindspore_ut/test_api_kbk_dump.py +++ b/debug/accuracy_tools/msprobe/test/mindspore_ut/test_api_kbk_dump.py @@ -19,9 +19,9 @@ import os from unittest import TestCase from unittest.mock import patch -from atat.core.common_config import CommonConfig, BaseConfig -from atat.mindspore.debugger.debugger_config import DebuggerConfig -from atat.mindspore.dump.api_kbk_dump import ApiKbkDump +from msprobe.core.common_config import CommonConfig, BaseConfig +from msprobe.mindspore.debugger.debugger_config import DebuggerConfig +from msprobe.mindspore.dump.api_kbk_dump import ApiKbkDump class TestApiKbkDump(TestCase): @@ -42,10 +42,10 @@ class TestApiKbkDump(TestCase): self.assertEqual(dumper.dump_json["common_dump_settings"]["iteration"], "0|2") os.environ["MS_ACL_DUMP_CFG_PATH"] = "path" - with patch("atat.mindspore.dump.api_kbk_dump.make_dump_path_if_not_exists"), \ - patch("atat.mindspore.dump.api_kbk_dump.FileOpen"), \ - patch("atat.mindspore.dump.api_kbk_dump.json.dump"), \ - patch("atat.mindspore.dump.api_kbk_dump.logger.info"): + with patch("msprobe.mindspore.dump.api_kbk_dump.make_dump_path_if_not_exists"), \ + patch("msprobe.mindspore.dump.api_kbk_dump.FileOpen"), \ + patch("msprobe.mindspore.dump.api_kbk_dump.json.dump"), \ + patch("msprobe.mindspore.dump.api_kbk_dump.logger.info"): dumper.handle() self.assertEqual(os.environ.get("GRAPH_OP_RUN"), "1") self.assertEqual(os.environ.get("MS_ACL_DUMP_CFG_PATH"), None) diff --git a/debug/accuracy_tools/msprobe/test/mindspore_ut/test_debugger_config.py b/debug/accuracy_tools/msprobe/test/mindspore_ut/test_debugger_config.py index 3bdf341c39..54bc1393aa 100644 --- a/debug/accuracy_tools/msprobe/test/mindspore_ut/test_debugger_config.py +++ b/debug/accuracy_tools/msprobe/test/mindspore_ut/test_debugger_config.py @@ -16,9 +16,9 @@ """ from unittest import TestCase -from atat.core.common.const import Const -from atat.core.common_config import CommonConfig, BaseConfig -from atat.mindspore.debugger.debugger_config import DebuggerConfig +from msprobe.core.common.const import Const +from msprobe.core.common_config import CommonConfig, BaseConfig +from msprobe.mindspore.debugger.debugger_config import DebuggerConfig class TestDebuggerConfig(TestCase): diff --git a/debug/accuracy_tools/msprobe/test/mindspore_ut/test_dump_tool_factory.py b/debug/accuracy_tools/msprobe/test/mindspore_ut/test_dump_tool_factory.py index f6626f551f..fb88d7bbbf 100644 --- a/debug/accuracy_tools/msprobe/test/mindspore_ut/test_dump_tool_factory.py +++ b/debug/accuracy_tools/msprobe/test/mindspore_ut/test_dump_tool_factory.py @@ -16,9 +16,9 @@ """ from unittest import TestCase -from atat.core.common_config import CommonConfig, BaseConfig -from atat.mindspore.debugger.debugger_config import DebuggerConfig -from atat.mindspore.dump.dump_tool_factory import DumpToolFactory +from msprobe.core.common_config import CommonConfig, BaseConfig +from msprobe.mindspore.debugger.debugger_config import DebuggerConfig +from msprobe.mindspore.dump.dump_tool_factory import DumpToolFactory class TestDumpToolFactory(TestCase): diff --git a/debug/accuracy_tools/msprobe/test/mindspore_ut/test_kernel_graph_dump.py b/debug/accuracy_tools/msprobe/test/mindspore_ut/test_kernel_graph_dump.py index 6c59521a17..e691a2c7ed 100644 --- a/debug/accuracy_tools/msprobe/test/mindspore_ut/test_kernel_graph_dump.py +++ b/debug/accuracy_tools/msprobe/test/mindspore_ut/test_kernel_graph_dump.py @@ -19,9 +19,9 @@ import os from unittest import TestCase from unittest.mock import patch -from atat.core.common_config import CommonConfig, BaseConfig -from atat.mindspore.debugger.debugger_config import DebuggerConfig -from atat.mindspore.dump.kernel_graph_dump import KernelGraphDump +from msprobe.core.common_config import CommonConfig, BaseConfig +from msprobe.mindspore.debugger.debugger_config import DebuggerConfig +from msprobe.mindspore.dump.kernel_graph_dump import KernelGraphDump class TestKernelGraphDump(TestCase): @@ -45,10 +45,10 @@ class TestKernelGraphDump(TestCase): self.assertEqual(dumper.dump_json["common_dump_settings"]["file_format"], "bin") self.assertEqual(dumper.dump_json["common_dump_settings"]["input_output"], 2) - with patch("atat.mindspore.dump.kernel_graph_dump.make_dump_path_if_not_exists"), \ - patch("atat.mindspore.dump.kernel_graph_dump.FileOpen"), \ - patch("atat.mindspore.dump.kernel_graph_dump.json.dump"), \ - patch("atat.mindspore.dump.kernel_graph_dump.logger.info"): + with patch("msprobe.mindspore.dump.kernel_graph_dump.make_dump_path_if_not_exists"), \ + patch("msprobe.mindspore.dump.kernel_graph_dump.FileOpen"), \ + patch("msprobe.mindspore.dump.kernel_graph_dump.json.dump"), \ + patch("msprobe.mindspore.dump.kernel_graph_dump.logger.info"): os.environ["GRAPH_OP_RUN"] = "1" with self.assertRaises(Exception) as context: diff --git a/debug/accuracy_tools/msprobe/test/mindspore_ut/test_kernel_graph_overflow_check.py b/debug/accuracy_tools/msprobe/test/mindspore_ut/test_kernel_graph_overflow_check.py index 101482458d..a93fab021a 100644 --- a/debug/accuracy_tools/msprobe/test/mindspore_ut/test_kernel_graph_overflow_check.py +++ b/debug/accuracy_tools/msprobe/test/mindspore_ut/test_kernel_graph_overflow_check.py @@ -19,9 +19,9 @@ import os from unittest import TestCase from unittest.mock import patch -from atat.core.common_config import CommonConfig, BaseConfig -from atat.mindspore.debugger.debugger_config import DebuggerConfig -from atat.mindspore.overflow_check.kernel_graph_overflow_check import KernelGraphOverflowCheck +from msprobe.core.common_config import CommonConfig, BaseConfig +from msprobe.mindspore.debugger.debugger_config import DebuggerConfig +from msprobe.mindspore.overflow_check.kernel_graph_overflow_check import KernelGraphOverflowCheck class TestKernelGraphOverflowCheck(TestCase): @@ -43,10 +43,10 @@ class TestKernelGraphOverflowCheck(TestCase): self.assertEqual(checker.dump_json["common_dump_settings"]["op_debug_mode"], 2) os.environ["MS_ACL_DUMP_CFG_PATH"] = "path" - with patch("atat.mindspore.overflow_check.kernel_graph_overflow_check.make_dump_path_if_not_exists"), \ - patch("atat.mindspore.overflow_check.kernel_graph_overflow_check.FileOpen"), \ - patch("atat.mindspore.overflow_check.kernel_graph_overflow_check.json.dump"), \ - patch("atat.mindspore.overflow_check.kernel_graph_overflow_check.logger.info"): + with patch("msprobe.mindspore.overflow_check.kernel_graph_overflow_check.make_dump_path_if_not_exists"), \ + patch("msprobe.mindspore.overflow_check.kernel_graph_overflow_check.FileOpen"), \ + patch("msprobe.mindspore.overflow_check.kernel_graph_overflow_check.json.dump"), \ + patch("msprobe.mindspore.overflow_check.kernel_graph_overflow_check.logger.info"): os.environ["GRAPH_OP_RUN"] = "1" with self.assertRaises(Exception) as context: diff --git a/debug/accuracy_tools/msprobe/test/mindspore_ut/test_ms_config.py b/debug/accuracy_tools/msprobe/test/mindspore_ut/test_ms_config.py index 3dc3670128..673386afb5 100644 --- a/debug/accuracy_tools/msprobe/test/mindspore_ut/test_ms_config.py +++ b/debug/accuracy_tools/msprobe/test/mindspore_ut/test_ms_config.py @@ -17,8 +17,8 @@ from unittest import TestCase from unittest.mock import patch, mock_open -from atat.core.common.const import Const -from atat.mindspore.ms_config import (parse_json_config, parse_task_config, +from msprobe.core.common.const import Const +from msprobe.mindspore.ms_config import (parse_json_config, parse_task_config, TensorConfig, StatisticsConfig, OverflowCheck) @@ -37,8 +37,8 @@ class TestMsConfig(TestCase): "summary_mode": "statistics" } } - with patch("atat.mindspore.ms_config.FileOpen", mock_open(read_data='')), \ - patch("atat.mindspore.ms_config.json.load", return_value=mock_json_data): + with patch("msprobe.mindspore.ms_config.FileOpen", mock_open(read_data='')), \ + patch("msprobe.mindspore.ms_config.json.load", return_value=mock_json_data): common_config, task_config = parse_json_config("./config.json") self.assertEqual(common_config.task, Const.STATISTICS) self.assertEqual(task_config.data_mode, ["all"]) diff --git a/debug/accuracy_tools/msprobe/test/mindspore_ut/test_overflow_check_tool_factory.py b/debug/accuracy_tools/msprobe/test/mindspore_ut/test_overflow_check_tool_factory.py index 497fe1376a..47da051d4f 100644 --- a/debug/accuracy_tools/msprobe/test/mindspore_ut/test_overflow_check_tool_factory.py +++ b/debug/accuracy_tools/msprobe/test/mindspore_ut/test_overflow_check_tool_factory.py @@ -16,9 +16,9 @@ """ from unittest import TestCase -from atat.core.common_config import CommonConfig, BaseConfig -from atat.mindspore.debugger.debugger_config import DebuggerConfig -from atat.mindspore.overflow_check.overflow_check_tool_factory import OverflowCheckToolFactory +from msprobe.core.common_config import CommonConfig, BaseConfig +from msprobe.mindspore.debugger.debugger_config import DebuggerConfig +from msprobe.mindspore.overflow_check.overflow_check_tool_factory import OverflowCheckToolFactory class TestOverflowCheckToolFactory(TestCase): diff --git a/debug/accuracy_tools/msprobe/test/mindspore_ut/test_precision_debugger.py b/debug/accuracy_tools/msprobe/test/mindspore_ut/test_precision_debugger.py index 834a58e41a..b33167dc7b 100644 --- a/debug/accuracy_tools/msprobe/test/mindspore_ut/test_precision_debugger.py +++ b/debug/accuracy_tools/msprobe/test/mindspore_ut/test_precision_debugger.py @@ -17,9 +17,9 @@ from unittest import TestCase from unittest.mock import patch -from atat.core.common_config import CommonConfig, BaseConfig -from atat.mindspore.debugger.debugger_config import DebuggerConfig -from atat.mindspore.debugger.precision_debugger import PrecisionDebugger +from msprobe.core.common_config import CommonConfig, BaseConfig +from msprobe.mindspore.debugger.debugger_config import DebuggerConfig +from msprobe.mindspore.debugger.precision_debugger import PrecisionDebugger class TestPrecisionDebugger(TestCase): @@ -42,9 +42,9 @@ class TestPrecisionDebugger(TestCase): task_config = BaseConfig(json_config) handler = Handler() - with patch("atat.mindspore.debugger.precision_debugger.parse_json_config", + with patch("msprobe.mindspore.debugger.precision_debugger.parse_json_config", return_value=[common_config, task_config]), \ - patch("atat.mindspore.debugger.precision_debugger.TaskHandlerFactory.create", return_value=handler): + patch("msprobe.mindspore.debugger.precision_debugger.TaskHandlerFactory.create", return_value=handler): debugger = PrecisionDebugger() debugger.start() self.assertTrue(isinstance(debugger.config, DebuggerConfig)) diff --git a/debug/accuracy_tools/msprobe/test/mindspore_ut/test_task_handler_factory.py b/debug/accuracy_tools/msprobe/test/mindspore_ut/test_task_handler_factory.py index 02cd9934cb..41be7b1db6 100644 --- a/debug/accuracy_tools/msprobe/test/mindspore_ut/test_task_handler_factory.py +++ b/debug/accuracy_tools/msprobe/test/mindspore_ut/test_task_handler_factory.py @@ -17,10 +17,10 @@ from unittest import TestCase from unittest.mock import patch -from atat.core.common_config import CommonConfig, BaseConfig -from atat.mindspore.debugger.debugger_config import DebuggerConfig -from atat.mindspore.dump.kernel_graph_dump import KernelGraphDump -from atat.mindspore.task_handler_factory import TaskHandlerFactory +from msprobe.core.common_config import CommonConfig, BaseConfig +from msprobe.mindspore.debugger.debugger_config import DebuggerConfig +from msprobe.mindspore.dump.kernel_graph_dump import KernelGraphDump +from msprobe.mindspore.task_handler_factory import TaskHandlerFactory class TestTaskHandlerFactory(TestCase): @@ -47,7 +47,7 @@ class TestTaskHandlerFactory(TestCase): handler = TaskHandlerFactory.create(config) self.assertTrue(isinstance(handler, KernelGraphDump)) - with patch("atat.mindspore.task_handler_factory.TaskHandlerFactory.tasks", new=tasks): + with patch("msprobe.mindspore.task_handler_factory.TaskHandlerFactory.tasks", new=tasks): with self.assertRaises(Exception) as context: TaskHandlerFactory.create(config) self.assertEqual(str(context.exception), "Can not find task handler") diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/advisor/test_advisor.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/advisor/test_advisor.py index b3e9658a0d..176b80068f 100644 --- a/debug/accuracy_tools/msprobe/test/pytorch_ut/advisor/test_advisor.py +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/advisor/test_advisor.py @@ -7,8 +7,8 @@ from unittest.mock import patch import pandas -from atat.pytorch.advisor.advisor import Advisor -from atat.pytorch.advisor.advisor_const import AdvisorConst +from msprobe.pytorch.advisor.advisor import Advisor +from msprobe.pytorch.advisor.advisor_const import AdvisorConst class TestAdvisor(unittest.TestCase): diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/common/test_common_utils.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/common/test_common_utils.py index 16d0c0bc12..56d100f0a1 100644 --- a/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/common/test_common_utils.py +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/common/test_common_utils.py @@ -1,12 +1,12 @@ import unittest from unittest.mock import patch -from atat.pytorch.api_accuracy_checker.common.utils import * +from msprobe.pytorch.api_accuracy_checker.common.utils import * class TestUtils(unittest.TestCase): - @patch('atat.pytorch.api_accuracy_checker.common.utils.get_file_content_bytes') + @patch('msprobe.pytorch.api_accuracy_checker.common.utils.get_file_content_bytes') def test_get_json_contents_should_raise_exception(self, mock_get_file_content_bytes): mock_get_file_content_bytes.return_value = 'not a dict' with self.assertRaises(CompareException) as ce: diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/common/test_config.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/common/test_config.py index 066e74aa51..ec606d9aa1 100644 --- a/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/common/test_config.py +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/common/test_config.py @@ -2,7 +2,7 @@ import unittest import os from unittest.mock import patch -from atat.pytorch.api_accuracy_checker.common.config import Config +from msprobe.pytorch.api_accuracy_checker.common.config import Config class TestConfig(unittest.TestCase): diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/compare/test_algorithm.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/compare/test_algorithm.py index 9604e7a681..35a8b9f1fa 100644 --- a/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/compare/test_algorithm.py +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/compare/test_algorithm.py @@ -2,7 +2,7 @@ import unittest import numpy as np -from atat.pytorch.api_accuracy_checker.compare import algorithm as alg +from msprobe.pytorch.api_accuracy_checker.compare import algorithm as alg class TestAlgorithmMethods(unittest.TestCase): diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/compare/test_api_precision_compare.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/compare/test_api_precision_compare.py index 7717d82657..540460d089 100644 --- a/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/compare/test_api_precision_compare.py +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/compare/test_api_precision_compare.py @@ -2,14 +2,14 @@ import unittest import pandas as pd -from atat.pytorch.api_accuracy_checker.compare.api_precision_compare import ( +from msprobe.pytorch.api_accuracy_checker.compare.api_precision_compare import ( CompareConfig, BenchmarkStandard, check_csv_columns, check_error_rate, get_api_checker_result, ) -from atat.core.common.const import CompareConst +from msprobe.core.common.const import CompareConst class TestApiPrecisionCompare(unittest.TestCase): diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/compare/test_compare.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/compare/test_compare.py index 2c97471c7a..e1e6d51de2 100644 --- a/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/compare/test_compare.py +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/compare/test_compare.py @@ -7,9 +7,9 @@ import unittest import numpy as np import torch.nn.functional -from atat.pytorch.api_accuracy_checker.compare.compare import Comparator -from atat.pytorch.api_accuracy_checker.compare.compare_column import CompareColumn -from atat.pytorch.api_accuracy_checker.run_ut.run_ut import UtDataInfo +from msprobe.pytorch.api_accuracy_checker.compare.compare import Comparator +from msprobe.pytorch.api_accuracy_checker.compare.compare_column import CompareColumn +from msprobe.pytorch.api_accuracy_checker.run_ut.run_ut import UtDataInfo current_time = time.strftime("%Y%m%d%H%M%S") RESULT_FILE_NAME = "accuracy_checking_result_" + current_time + ".csv" diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/compare/test_compare_column.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/compare/test_compare_column.py index ee25a25e74..782321868a 100644 --- a/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/compare/test_compare_column.py +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/compare/test_compare_column.py @@ -1,6 +1,6 @@ import unittest -from atat.pytorch.api_accuracy_checker.compare.compare_column import ApiPrecisionOutputColumn +from msprobe.pytorch.api_accuracy_checker.compare.compare_column import ApiPrecisionOutputColumn class TestCompareColumns(unittest.TestCase): diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/compare/test_compare_utils.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/compare/test_compare_utils.py index 93f3c2c73e..ac9c974ea3 100644 --- a/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/compare/test_compare_utils.py +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/compare/test_compare_utils.py @@ -2,8 +2,8 @@ import unittest import numpy as np -from atat.pytorch.api_accuracy_checker.common.utils import CompareException -from atat.pytorch.api_accuracy_checker.compare.compare_utils import check_dtype_comparable, convert_str_to_float +from msprobe.pytorch.api_accuracy_checker.common.utils import CompareException +from msprobe.pytorch.api_accuracy_checker.compare.compare_utils import check_dtype_comparable, convert_str_to_float class TestCompareUtils(unittest.TestCase): diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/test_data_generate.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/test_data_generate.py index f47c71c984..f664dad197 100644 --- a/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/test_data_generate.py +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/test_data_generate.py @@ -3,8 +3,8 @@ import os import unittest import copy -from atat.pytorch.api_accuracy_checker.run_ut.data_generate import * -from atat.pytorch.api_accuracy_checker.common.utils import get_json_contents +from msprobe.pytorch.api_accuracy_checker.run_ut.data_generate import * +from msprobe.pytorch.api_accuracy_checker.common.utils import get_json_contents base_dir = os.path.dirname(os.path.realpath(__file__)) forward_file = os.path.join(base_dir, "forward.json") diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/test_multi_run_ut.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/test_multi_run_ut.py index 6a9071f15e..771e042380 100644 --- a/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/test_multi_run_ut.py +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/test_multi_run_ut.py @@ -5,7 +5,7 @@ import logging from unittest.mock import patch, mock_open, MagicMock import json import signal -from atat.pytorch.api_accuracy_checker.run_ut.multi_run_ut import split_json_file, signal_handler, run_parallel_ut, \ +from msprobe.pytorch.api_accuracy_checker.run_ut.multi_run_ut import split_json_file, signal_handler, run_parallel_ut, \ prepare_config, main, ParallelUTConfig @@ -20,7 +20,7 @@ class TestMultiRunUT(unittest.TestCase): {'key3': 'TRUE', 'key4': 'TRUE'} ] - @patch('atat.pytorch.api_accuracy_checker.run_ut.multi_run_ut.FileOpen') + @patch('msprobe.pytorch.api_accuracy_checker.run_ut.multi_run_ut.FileOpen') def test_split_json_file(self, mock_FileOpen): mock_FileOpen.return_value.__enter__.return_value = mock_open(read_data=self.test_json_content).return_value num_splits = 2 @@ -63,10 +63,10 @@ class TestMultiRunUT(unittest.TestCase): @patch('os.remove') @patch('os.path.realpath', side_effect=lambda x: x) - @patch('atat.pytorch.api_accuracy_checker.run_ut.multi_run_ut.check_link') - @patch('atat.pytorch.api_accuracy_checker.run_ut.multi_run_ut.check_file_suffix') - @patch('atat.pytorch.api_accuracy_checker.run_ut.multi_run_ut.FileChecker') - @patch('atat.pytorch.api_accuracy_checker.run_ut.multi_run_ut.split_json_file', + @patch('msprobe.pytorch.api_accuracy_checker.run_ut.multi_run_ut.check_link') + @patch('msprobe.pytorch.api_accuracy_checker.run_ut.multi_run_ut.check_file_suffix') + @patch('msprobe.pytorch.api_accuracy_checker.run_ut.multi_run_ut.FileChecker') + @patch('msprobe.pytorch.api_accuracy_checker.run_ut.multi_run_ut.split_json_file', return_value=(['forward_split1.json', 'forward_split2.json'], 2)) def test_prepare_config(self, mock_split_json_file, mock_FileChecker, mock_check_file_suffix, mock_check_link, mock_realpath, mock_remove): @@ -93,8 +93,8 @@ class TestMultiRunUT(unittest.TestCase): @patch('argparse.ArgumentParser.parse_args') - @patch('atat.pytorch.api_accuracy_checker.run_ut.multi_run_ut.prepare_config') - @patch('atat.pytorch.api_accuracy_checker.run_ut.multi_run_ut.run_parallel_ut') + @patch('msprobe.pytorch.api_accuracy_checker.run_ut.multi_run_ut.prepare_config') + @patch('msprobe.pytorch.api_accuracy_checker.run_ut.multi_run_ut.run_parallel_ut') def test_main(self, mock_run_parallel_ut, mock_prepare_config, mock_parse_args): main() mock_parse_args.assert_called() diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/test_run_ut.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/test_run_ut.py index 97dccd2b58..bc643794ab 100644 --- a/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/test_run_ut.py +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/test_run_ut.py @@ -4,8 +4,8 @@ import copy import unittest import torch from unittest.mock import patch, DEFAULT -from atat.pytorch.api_accuracy_checker.run_ut.run_ut import * -from atat.pytorch.api_accuracy_checker.common.utils import get_json_contents +from msprobe.pytorch.api_accuracy_checker.run_ut.run_ut import * +from msprobe.pytorch.api_accuracy_checker.common.utils import get_json_contents base_dir = os.path.dirname(os.path.realpath(__file__)) forward_file = os.path.join(base_dir, "forward.json") diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/compare/test_acc_compare.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/compare/test_acc_compare.py index 5a82289a00..fb442941b0 100644 --- a/debug/accuracy_tools/msprobe/test/pytorch_ut/compare/test_acc_compare.py +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/compare/test_acc_compare.py @@ -1,6 +1,6 @@ # coding=utf-8 import unittest -from atat.pytorch.compare.acc_compare import rename_api +from msprobe.pytorch.compare.acc_compare import rename_api class TestUtilsMethods(unittest.TestCase): diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/free_benchmark/perturbed_layers/test_perturbed_layser.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/free_benchmark/perturbed_layers/test_perturbed_layser.py index 828d646c52..ad9eb5cd0e 100644 --- a/debug/accuracy_tools/msprobe/test/pytorch_ut/free_benchmark/perturbed_layers/test_perturbed_layser.py +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/free_benchmark/perturbed_layers/test_perturbed_layser.py @@ -1,10 +1,10 @@ from unittest import TestCase import torch -from atat.core.common.const import Const -from atat.pytorch.free_benchmark.common.enums import DeviceType, PerturbationMode -from atat.pytorch.free_benchmark.common.params import data_pre_deal -from atat.pytorch.free_benchmark.perturbed_layers.layer_factory import LayerFactory +from msprobe.core.common.const import Const +from msprobe.pytorch.free_benchmark.common.enums import DeviceType, PerturbationMode +from msprobe.pytorch.free_benchmark.common.params import data_pre_deal +from msprobe.pytorch.free_benchmark.perturbed_layers.layer_factory import LayerFactory class TestPerturbedLayer(TestCase): diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/free_benchmark/result_handlers/test_result_handler.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/free_benchmark/result_handlers/test_result_handler.py index d46e26e094..399efeb42d 100644 --- a/debug/accuracy_tools/msprobe/test/pytorch_ut/free_benchmark/result_handlers/test_result_handler.py +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/free_benchmark/result_handlers/test_result_handler.py @@ -2,17 +2,17 @@ from abc import ABC from unittest import TestCase import torch -from atat.core.common.const import Const -from atat.pytorch.free_benchmark.common.constant import PreheatConfig, ThresholdConfig -from atat.pytorch.free_benchmark.common.counter import preheat_counter -from atat.pytorch.free_benchmark.common.enums import ( +from msprobe.core.common.const import Const +from msprobe.pytorch.free_benchmark.common.constant import PreheatConfig, ThresholdConfig +from msprobe.pytorch.free_benchmark.common.counter import preheat_counter +from msprobe.pytorch.free_benchmark.common.enums import ( DeviceType, FuzzLevel, HandlerType, PerturbationMode, ) -from atat.pytorch.free_benchmark.common.params import DataParams, make_handler_params -from atat.pytorch.free_benchmark.result_handlers.handler_factory import ( +from msprobe.pytorch.free_benchmark.common.params import DataParams, make_handler_params +from msprobe.pytorch.free_benchmark.result_handlers.handler_factory import ( FuzzHandlerFactory, ) diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/free_benchmark/test_main.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/free_benchmark/test_main.py index d326e993c0..4498a2af70 100644 --- a/debug/accuracy_tools/msprobe/test/pytorch_ut/free_benchmark/test_main.py +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/free_benchmark/test_main.py @@ -4,10 +4,10 @@ from unittest import TestCase import torch import torch.nn as nn -from atat.core.common.const import Const -from atat.pytorch.free_benchmark import FreeBenchmarkCheck -from atat.pytorch.free_benchmark.common.constant import CommonField, PreheatConfig -from atat.pytorch.free_benchmark.common.enums import ( +from msprobe.core.common.const import Const +from msprobe.pytorch.free_benchmark import FreeBenchmarkCheck +from msprobe.pytorch.free_benchmark.common.constant import CommonField, PreheatConfig +from msprobe.pytorch.free_benchmark.common.enums import ( DeviceType, FuzzLevel, HandlerType, diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/functional/test_dump_module.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/functional/test_dump_module.py index 78b8b6bcc5..d67adf2f91 100644 --- a/debug/accuracy_tools/msprobe/test/pytorch_ut/functional/test_dump_module.py +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/functional/test_dump_module.py @@ -1,8 +1,8 @@ import unittest import torch.nn as nn -from atat.pytorch import PrecisionDebugger -from atat.pytorch.functional.dump_module import module_dump, module_count +from msprobe.pytorch import PrecisionDebugger +from msprobe.pytorch.functional.dump_module import module_dump, module_count class TestDumpModule(unittest.TestCase): diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_api_registry.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_api_registry.py index c80e5dbed4..837ad23df7 100644 --- a/debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_api_registry.py +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_api_registry.py @@ -1,5 +1,5 @@ import unittest -from atat.pytorch.hook_module.api_registry import ApiRegistry, torch_version_above_2, is_gpu +from msprobe.pytorch.hook_module.api_registry import ApiRegistry, torch_version_above_2, is_gpu class TestApiRegistry(unittest.TestCase): @@ -43,7 +43,7 @@ class TestApiRegistry(unittest.TestCase): import torch import torch.distributed as dist #import torch_npu #门禁没有安装torch_npu - from atat.pytorch.hook_module.api_registry import torch_without_guard_version, npu_distributed_api, is_gpu, torch_version_above_2 + from msprobe.pytorch.hook_module.api_registry import torch_without_guard_version, npu_distributed_api, is_gpu, torch_version_above_2 @@ -79,7 +79,7 @@ class TestApiRegistry(unittest.TestCase): import torch import torch.distributed as dist #import torch_npu #门禁没有安装torch_npu - from atat.pytorch.hook_module.api_registry import torch_without_guard_version, npu_distributed_api, is_gpu, torch_version_above_2 + from msprobe.pytorch.hook_module.api_registry import torch_without_guard_version, npu_distributed_api, is_gpu, torch_version_above_2 diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_hook_module.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_hook_module.py index 646f641522..50783e5d73 100644 --- a/debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_hook_module.py +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_hook_module.py @@ -1,7 +1,7 @@ import unittest from unittest.mock import patch, Mock -from atat.pytorch.hook_module.hook_module import HOOKModule +from msprobe.pytorch.hook_module.hook_module import HOOKModule class TestHookModule(unittest.TestCase): def test_call_1(self): diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_wrap_aten.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_wrap_aten.py index 92aee790dd..4940b07cb0 100644 --- a/debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_wrap_aten.py +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_wrap_aten.py @@ -1,6 +1,6 @@ import unittest import torch -from atat.pytorch.hook_module.wrap_aten import AtenOPTemplate, AtenOPPacketTemplate +from msprobe.pytorch.hook_module.wrap_aten import AtenOPTemplate, AtenOPPacketTemplate def hook(name): diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_wrap_distributed.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_wrap_distributed.py index bd0501ef2f..9a375e45bf 100644 --- a/debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_wrap_distributed.py +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_wrap_distributed.py @@ -1,6 +1,6 @@ import unittest import torch.distributed as dist -from atat.pytorch.hook_module.wrap_distributed import * +from msprobe.pytorch.hook_module.wrap_distributed import * class TestWrapDistributed(unittest.TestCase): def hook(name, prefix): diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_wrap_functional.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_wrap_functional.py index 232117498b..f43b8ea6cb 100644 --- a/debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_wrap_functional.py +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_wrap_functional.py @@ -1,6 +1,6 @@ import unittest import torch -from atat.pytorch.hook_module import wrap_functional as wf +from msprobe.pytorch.hook_module import wrap_functional as wf class TestWrapFunctional(unittest.TestCase): diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_wrap_tensor.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_wrap_tensor.py index e027270540..61f76b0ca0 100644 --- a/debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_wrap_tensor.py +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_wrap_tensor.py @@ -1,7 +1,7 @@ import unittest import torch import yaml -from atat.pytorch.hook_module.wrap_tensor import get_tensor_ops, HOOKTensor, TensorOPTemplate, wrap_tensor_op, wrap_tensor_ops_and_bind +from msprobe.pytorch.hook_module.wrap_tensor import get_tensor_ops, HOOKTensor, TensorOPTemplate, wrap_tensor_op, wrap_tensor_ops_and_bind class TestWrapTensor(unittest.TestCase): diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_wrap_torch.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_wrap_torch.py index 8817bc758a..e1a3e77983 100644 --- a/debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_wrap_torch.py +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_wrap_torch.py @@ -1,7 +1,7 @@ import unittest import torch import yaml -from atat.pytorch.hook_module.wrap_torch import * +from msprobe.pytorch.hook_module.wrap_torch import * class TestWrapTorch(unittest.TestCase): diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_wrap_vf.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_wrap_vf.py index 8d57fad6eb..98efb4bc5b 100644 --- a/debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_wrap_vf.py +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_wrap_vf.py @@ -1,6 +1,6 @@ import unittest import torch -from atat.pytorch.hook_module import wrap_vf +from msprobe.pytorch.hook_module import wrap_vf class TestWrapVF(unittest.TestCase): def setUp(self): diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/test_pt_config.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/test_pt_config.py index f36e015bfe..53b4e66c1b 100644 --- a/debug/accuracy_tools/msprobe/test/pytorch_ut/test_pt_config.py +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/test_pt_config.py @@ -1,8 +1,8 @@ from unittest import TestCase from unittest.mock import patch, mock_open -from atat.core.common.const import Const -from atat.pytorch.pt_config import parse_json_config, parse_task_config +from msprobe.core.common.const import Const +from msprobe.pytorch.pt_config import parse_json_config, parse_task_config class TestPtConfig(TestCase): @@ -23,16 +23,16 @@ class TestPtConfig(TestCase): "file_format": "npy" } } - with patch("atat.pytorch.pt_config.os.path.join", return_value="/path/config.json"), \ - patch("atat.pytorch.pt_config.FileOpen", mock_open(read_data='')), \ - patch("atat.pytorch.pt_config.json.load", return_value=mock_json_data): + with patch("msprobe.pytorch.pt_config.os.path.join", return_value="/path/config.json"), \ + patch("msprobe.pytorch.pt_config.FileOpen", mock_open(read_data='')), \ + patch("msprobe.pytorch.pt_config.json.load", return_value=mock_json_data): common_config, task_config = parse_json_config(None, None) self.assertEqual(common_config.task, Const.STATISTICS) self.assertEqual(task_config.data_mode, ["all"]) - with patch("atat.pytorch.pt_config.os.path.join", return_value="/path/config.json"), \ - patch("atat.pytorch.pt_config.FileOpen", mock_open(read_data='')), \ - patch("atat.pytorch.pt_config.json.load", return_value=mock_json_data): + with patch("msprobe.pytorch.pt_config.os.path.join", return_value="/path/config.json"), \ + patch("msprobe.pytorch.pt_config.FileOpen", mock_open(read_data='')), \ + patch("msprobe.pytorch.pt_config.json.load", return_value=mock_json_data): common_config, task_config = parse_json_config(None, Const.TENSOR) self.assertEqual(common_config.task, Const.STATISTICS) self.assertEqual(task_config.file_format, "npy") diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/test_service.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/test_service.py index b36be15b85..c09b6abcb6 100644 --- a/debug/accuracy_tools/msprobe/test/pytorch_ut/test_service.py +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/test_service.py @@ -2,10 +2,10 @@ import unittest from unittest.mock import patch, mock_open import torch.nn as nn -from atat.core.common.utils import Const -from atat.pytorch.debugger.debugger_config import DebuggerConfig -from atat.pytorch.pt_config import parse_json_config -from atat.pytorch.service import Service +from msprobe.core.common.utils import Const +from msprobe.pytorch.debugger.debugger_config import DebuggerConfig +from msprobe.pytorch.pt_config import parse_json_config +from msprobe.pytorch.service import Service class TestService(unittest.TestCase): @@ -13,20 +13,20 @@ class TestService(unittest.TestCase): mock_json_data = { "dump_path": "./dump/", } - with patch("atat.pytorch.pt_config.FileOpen", mock_open(read_data='')), \ - patch("atat.pytorch.pt_config.json.load", return_value=mock_json_data): + with patch("msprobe.pytorch.pt_config.FileOpen", mock_open(read_data='')), \ + patch("msprobe.pytorch.pt_config.json.load", return_value=mock_json_data): common_config, task_config = parse_json_config("./config.json", Const.STATISTICS) self.config = DebuggerConfig(common_config, task_config, Const.STATISTICS, "./ut_dump", "L1") self.service = Service(self.config) def test_start(self): - with patch("atat.pytorch.service.get_rank_if_initialized", return_value=0), \ - patch("atat.pytorch.service.Service.create_dirs", return_value=None): + with patch("msprobe.pytorch.service.get_rank_if_initialized", return_value=0), \ + patch("msprobe.pytorch.service.Service.create_dirs", return_value=None): self.service.start(None) self.assertEqual(self.service.current_rank, 0) def test_stop_and_step(self): - with patch("atat.core.data_dump.data_collector.DataCollector.write_json", return_value=None): + with patch("msprobe.core.data_dump.data_collector.DataCollector.write_json", return_value=None): self.service.stop() self.assertFalse(self.service.switch) @@ -45,15 +45,15 @@ class TestService(unittest.TestCase): self.service.model = TestModule() self.config.level = "L0" - with patch("atat.pytorch.service.logger.info_on_rank_0") as mock_logger, \ - patch("atat.pytorch.service.remove_dropout", return_value=None): + with patch("msprobe.pytorch.service.logger.info_on_rank_0") as mock_logger, \ + patch("msprobe.pytorch.service.remove_dropout", return_value=None): self.service.register_hook_new() self.assertEqual(mock_logger.call_count, 2) def test_create_dirs(self): - with patch("atat.pytorch.service.Path.mkdir", return_value=None), \ - patch("atat.core.common.file_check.FileChecker.common_check", return_value=None), \ - patch("atat.core.data_dump.data_collector.DataCollector.update_dump_paths", + with patch("msprobe.pytorch.service.Path.mkdir", return_value=None), \ + patch("msprobe.core.common.file_check.FileChecker.common_check", return_value=None), \ + patch("msprobe.core.data_dump.data_collector.DataCollector.update_dump_paths", return_value=None): self.service.create_dirs() self.assertEqual(self.service.dump_iter_dir, "./ut_dump/step0") diff --git a/debug/accuracy_tools/msprobe/test/run_ut.py b/debug/accuracy_tools/msprobe/test/run_ut.py index 7c593c14ab..8ea81ccca7 100644 --- a/debug/accuracy_tools/msprobe/test/run_ut.py +++ b/debug/accuracy_tools/msprobe/test/run_ut.py @@ -3,7 +3,7 @@ import shutil import subprocess import sys -from atat.core.common.log import logger +from msprobe.core.common.log import logger def run_ut(): diff --git a/debug/accuracy_tools/msprobe/test/test_module_processer.py b/debug/accuracy_tools/msprobe/test/test_module_processer.py index 89ee299f66..448c35f055 100644 --- a/debug/accuracy_tools/msprobe/test/test_module_processer.py +++ b/debug/accuracy_tools/msprobe/test/test_module_processer.py @@ -1,6 +1,6 @@ import unittest -from atat.pytorch.module_processer import ModuleProcesser -from atat.pytorch.common.utils import Const +from msprobe.pytorch.module_processer import ModuleProcesser +from msprobe.pytorch.common.utils import Const import torch -- Gitee From 8315c355683ef19167842ae6e6510e585c3a7ac0 Mon Sep 17 00:00:00 2001 From: Linwei-Ying Date: Fri, 19 Jul 2024 19:04:36 +0800 Subject: [PATCH 020/106] atat to msprobe6 --- debug/accuracy_tools/MANIFEST.in | 4 +-- .../api_accuracy_checker/README.md | 2 +- .../api_accuracy_checker/common/utils.py | 2 +- debug/accuracy_tools/msprobe/README.md | 16 +++++----- debug/accuracy_tools/msprobe/config/README.md | 2 +- .../msprobe/mindspore/doc/dump.md | 8 ++--- .../accuracy_tools/msprobe/pytorch/doc/FAQ.md | 30 +++++++++---------- .../pytorch/doc/api_accuracy_checker.md | 16 +++++----- ...72\347\272\277\346\212\245\345\221\212.md" | 4 +-- .../msprobe/pytorch/doc/dump.md | 12 ++++---- .../msprobe/pytorch/doc/parse_tool.md | 6 ++-- .../pytorch/doc/ptdbg_ascend_compare.md | 6 ++-- .../pytorch/doc/ptdbg_ascend_overview.md | 10 +++---- .../pytorch/doc/ptdbg_ascend_quickstart.md | 24 +++++++-------- .../msprobe/pytorch/doc/run_overflow_check.md | 2 +- ...76\345\272\246\346\257\224\345\257\271.md" | 8 ++--- debug/accuracy_tools/ptdbg_ascend/README.md | 2 +- .../src/python/ptdbg_ascend/common/utils.py | 2 +- debug/accuracy_tools/setup.py | 2 +- 19 files changed, 79 insertions(+), 79 deletions(-) diff --git a/debug/accuracy_tools/MANIFEST.in b/debug/accuracy_tools/MANIFEST.in index 7242c0c956..547864a6c8 100644 --- a/debug/accuracy_tools/MANIFEST.in +++ b/debug/accuracy_tools/MANIFEST.in @@ -1,2 +1,2 @@ -recursive-include atat/ * -recursive-exclude atat/test * \ No newline at end of file +recursive-include msprobe/ * +recursive-exclude msprobe/test * \ No newline at end of file diff --git a/debug/accuracy_tools/api_accuracy_checker/README.md b/debug/accuracy_tools/api_accuracy_checker/README.md index dbab78fc53..2d3c01831b 100644 --- a/debug/accuracy_tools/api_accuracy_checker/README.md +++ b/debug/accuracy_tools/api_accuracy_checker/README.md @@ -2,7 +2,7 @@ ## 版本过渡提示 -当前版本预检维护到2024/09/30,准备于2024/09/30下线,相关目录mstt/debug/accuracy_tools/api_accuracy_checker将于2024/09/30删除。新版本的预检已经合到mstt/debug/accuracy_tools/atat目录下。 +当前版本预检维护到2024/09/30,准备于2024/09/30下线,相关目录mstt/debug/accuracy_tools/api_accuracy_checker将于2024/09/30删除。新版本的预检已经合到mstt/debug/accuracy_tools/msprobe目录下。 Ascend模型精度预检工具能在昇腾NPU上扫描用户训练模型中所有API,输出精度情况的诊断和分析。工具通过dump模型中所有的API前反向信息;构造相应的API单元测试,将NPU输出与标杆(CPU高精度)比对,从而计算对应的精度指标,该过程称为run_ut;将NPU环境下dump的预检数据拷贝至GPU环境,同样执行run_ut;最后通过新精度标准比对法将NPU和GPU的预检结果进行比对,从而找出NPU中存在精度问题的API。 diff --git a/debug/accuracy_tools/api_accuracy_checker/common/utils.py b/debug/accuracy_tools/api_accuracy_checker/common/utils.py index 5af80a1fff..76d117afb4 100644 --- a/debug/accuracy_tools/api_accuracy_checker/common/utils.py +++ b/debug/accuracy_tools/api_accuracy_checker/common/utils.py @@ -108,7 +108,7 @@ class Const: VERSION_MESSAGE = """The current version of api_precision_checker will be deprecated on September 30, 2024. The att/debug/accuracy_tools/api_accuracy_checker directory will be deleted on September 30, 2024. - Please use the api_precision_checker in the att/debug/accuracy_tools/atat directory.""" + Please use the api_precision_checker in the att/debug/accuracy_tools/msprobe directory.""" class CompareConst: diff --git a/debug/accuracy_tools/msprobe/README.md b/debug/accuracy_tools/msprobe/README.md index de7d74f8f2..f22718248a 100644 --- a/debug/accuracy_tools/msprobe/README.md +++ b/debug/accuracy_tools/msprobe/README.md @@ -1,6 +1,6 @@ # MindStudio精度调试工具 -MindStudio精度调试工具(ascend_training_accuracy_tools),简称atat,是MindStudio Training Tools工具链下精度调试部分的工具包。主要包括精度预检和精度比对等子工具,当前适配场景包括PyTorch和MindSpore。 +MindStudio精度调试工具(MindStudio Probe),简称msprobe,是MindStudio Training Tools工具链下精度调试部分的工具包。主要包括精度预检和精度比对等子工具,当前适配场景包括PyTorch和MindSpore。 ## 工具安装 @@ -61,7 +61,7 @@ MindStudio精度调试工具(ascend_training_accuracy_tools),简称atat, ## 工具使用 -安装atat工具后,可以按照如下思路选择合适的子工具进行精度调试: +安装msprobe工具后,可以按照如下思路选择合适的子工具进行精度调试: 1. 判断框架场景。 @@ -107,32 +107,32 @@ MindStudio精度调试工具(ascend_training_accuracy_tools),简称atat, MindSpore场景:暂不支持。 -上述流程中的工具均为atat工具的子工具,使用相同的命令行,格式如下: +上述流程中的工具均为msprobe工具的子工具,使用相同的命令行,格式如下: 精度预检工具 ```bash -atat -f run_ut [-h] +msprobe -f run_ut [-h] ``` ```bash -atat -f multi_run_ut [-h] +msprobe -f multi_run_ut [-h] ``` ```bash -atat -f api_precision_compare [-h] +msprobe -f api_precision_compare [-h] ``` 溢出解析工具 ```bash -atat -f run_overflow_check [-h] +msprobe -f run_overflow_check [-h] ``` 数据解析工具 ```bash -atat -f parse [-h] +msprobe -f parse [-h] ``` | 参数 | 说明 | diff --git a/debug/accuracy_tools/msprobe/config/README.md b/debug/accuracy_tools/msprobe/config/README.md index a998704993..7b91bd26f1 100644 --- a/debug/accuracy_tools/msprobe/config/README.md +++ b/debug/accuracy_tools/msprobe/config/README.md @@ -394,4 +394,4 @@ train_loader = torch.utils.data.DataLoader( 关闭dropout: -在使用from atat.pytorch import PrecisionDebugger后,工具会自动将torch.nn.functional.dropout、torch.nn.functional.dropout2d、torch.nn.functional.dropout3d、torch.nn.Dropout、torch.nn.Dropout2d、torch.nn.Dropout3d的接口参数p置为0。 +在使用from msprobe.pytorch import PrecisionDebugger后,工具会自动将torch.nn.functional.dropout、torch.nn.functional.dropout2d、torch.nn.functional.dropout3d、torch.nn.Dropout、torch.nn.Dropout2d、torch.nn.Dropout3d的接口参数p置为0。 diff --git a/debug/accuracy_tools/msprobe/mindspore/doc/dump.md b/debug/accuracy_tools/msprobe/mindspore/doc/dump.md index 3321a4da12..425d0683a2 100644 --- a/debug/accuracy_tools/msprobe/mindspore/doc/dump.md +++ b/debug/accuracy_tools/msprobe/mindspore/doc/dump.md @@ -1,8 +1,8 @@ # **精度数据采集** -atat工具主要通过在训练脚本内添加dump接口并启动训练的方式来采集精度数据。 +msprobe工具主要通过在训练脚本内添加dump接口并启动训练的方式来采集精度数据。 -执行dump操作需要安装atat工具。详见《[MindStudio精度调试工具](../../README.md)》的“工具安装”章节。 +执行dump操作需要安装msprobe工具。详见《[MindStudio精度调试工具](../../README.md)》的“工具安装”章节。 ## dump接口介绍 @@ -12,7 +12,7 @@ atat工具主要通过在训练脚本内添加dump接口并启动训练的方式 通过加载dump配置文件的方式来确定dump操作的详细配置。 -可以在from atat.mindspore import PrecisionDebugger和模型初始化之间的任意位置添加该接口。 +可以在from msprobe.mindspore import PrecisionDebugger和模型初始化之间的任意位置添加该接口。 **原型** @@ -43,7 +43,7 @@ debugger.start() ## 示例代码 ```Python -from atat.mindspore import PrecisionDebugger +from msprobe.mindspore import PrecisionDebugger debugger = PrecisionDebugger(config_path="./config.json") # 请勿将以上初始化流程插入到循环代码中 # 下面代码也可以用PrecisionDebugger.start() diff --git a/debug/accuracy_tools/msprobe/pytorch/doc/FAQ.md b/debug/accuracy_tools/msprobe/pytorch/doc/FAQ.md index 19a434a194..8d12a72928 100644 --- a/debug/accuracy_tools/msprobe/pytorch/doc/FAQ.md +++ b/debug/accuracy_tools/msprobe/pytorch/doc/FAQ.md @@ -22,15 +22,15 @@ 6. 添加预检工具后截取操作报错:`IndexError: too many indices for tensor of dimension x` 或 `TypeError: len() of a 0-d tensor`。 - 答:注释工具目录mstt/debug/accuracy_tools/atat/pytorch/hook_module/support_wrap_ops.yaml文件中Tensor:下的`- __getitem__`,工具会跳过dump该API。如果是需要dump的关键位置API也可以考虑根据报错堆栈信息注释引发报错的类型检查。 + 答:注释工具目录mstt/debug/accuracy_tools/msprobe/pytorch/hook_module/support_wrap_ops.yaml文件中Tensor:下的`- __getitem__`,工具会跳过dump该API。如果是需要dump的关键位置API也可以考虑根据报错堆栈信息注释引发报错的类型检查。 7. 添加预检工具后F.gelu触发ValueError报错:`activation_func must be F.gelu`等。 - 答:注释工具目录mstt/debug/accuracy_tools/atat/pytorch/hook_module/support_wrap_ops.yaml文件中functional:下的的`- gelu`,工具会跳过dump该API。如果是需要dump的关键位置API也可以考虑根据报错堆栈信息注释引发报错的类型检查。 + 答:注释工具目录mstt/debug/accuracy_tools/msprobe/pytorch/hook_module/support_wrap_ops.yaml文件中functional:下的的`- gelu`,工具会跳过dump该API。如果是需要dump的关键位置API也可以考虑根据报错堆栈信息注释引发报错的类型检查。 8. 添加预检工具后触发AsStrided算子相关的报错,或者编译相关的报错,如:`Failed to compile Op [AsStrided]`。 - 答:注释工具目录mstt/debug/accuracy_tools/atat/pytorch/hook_module/support_wrap_ops.yaml文件中Tensor:下的`- t`和`- transpose`。 + 答:注释工具目录mstt/debug/accuracy_tools/msprobe/pytorch/hook_module/support_wrap_ops.yaml文件中Tensor:下的`- t`和`- transpose`。 9. Tensor 魔法函数具体对应什么操作? @@ -75,7 +75,7 @@ ### dump指定融合算子 -dump指定操作当前支持dump指定融合算子的输入输出,需要在mstt/debug/accuracy_tools/atat/pytorch/hook_module/support_wrap_ops.yaml中添加,比如以下代码段调用的softmax融合算子 +dump指定操作当前支持dump指定融合算子的输入输出,需要在mstt/debug/accuracy_tools/msprobe/pytorch/hook_module/support_wrap_ops.yaml中添加,比如以下代码段调用的softmax融合算子 ``` def npu_forward_fused_softmax(self, input_, mask): @@ -111,7 +111,7 @@ torch版本和硬件差异属于正常情况。 **故障现象** -使用atat工具时,报错: error code: EI0006。 +使用msprobe工具时,报错: error code: EI0006。 **故障原因** @@ -136,7 +136,7 @@ torch.npu.set_device('npu:0') torch.npu.set_device(f'npu:{rank}') ``` -如果运行精度比对功能遇到这个报错,尝试安装最新版本的atat。 +如果运行精度比对功能遇到这个报错,尝试安装最新版本的msprobe。 ### 4. dump得到的VF_lstm_99_forward_input.1.0.npy、VF_lstm_99_forward_input.1.1.npy类似的数据是否正常? @@ -147,7 +147,7 @@ torch.npu.set_device(f'npu:{rank}') 在比对脚本中,设置stack_mode=True,例如: ``` -from atat.pytorch import compare +from msprobe.pytorch import compare dump_result_param={ "npu_json_path": "./npu_dump/dump.json", "bench_json_path": "./gpu_dump/dump.json", @@ -174,20 +174,20 @@ compare(dump_result_param, output_path="./output", stack_mode=True) ### 9. dump.json文件中的某些api的dtype类型为float16,但是读取此api的npy文件显示的dtype类型为float32 -- atat工具在dump数据时需要将原始数据从npu to cpu上再转换为numpy类型,npu to cpu的逻辑和gpu to cpu是保持一致的,都存在dtype可能从float16变为float32类型的情况,如果出现dtype不一致的问题,最终dump数据的dtype以pkl文件为准。 +- msprobe工具在dump数据时需要将原始数据从npu to cpu上再转换为numpy类型,npu to cpu的逻辑和gpu to cpu是保持一致的,都存在dtype可能从float16变为float32类型的情况,如果出现dtype不一致的问题,最终dump数据的dtype以pkl文件为准。 -### 10. 使用dataloader后raise异常Exception("atat: exit after iteration {}". format(max(self.config.step)) +### 10. 使用dataloader后raise异常Exception("msprobe: exit after iteration {}". format(max(self.config.step)) - 正常现象,dataloader通过raise结束程序,堆栈信息可忽略。 -### 11. 添加atat工具后截取操作报错:`IndexError: too many indices for tensor of dimension x` 或 `TypeError: len() of a 0-d tensor`。 +### 11. 添加msprobe工具后截取操作报错:`IndexError: too many indices for tensor of dimension x` 或 `TypeError: len() of a 0-d tensor`。 -- 注释工具目录mstt/debug/accuracy_tools/atat/pytorch/hook_module/support_wrap_ops.yaml文件中Tensor:下的`- __getitem__`,工具会跳过dump该API。如果是需要dump的关键位置API也可以考虑根据报错堆栈信息注释引发报错的类型检查。 +- 注释工具目录mstt/debug/accuracy_tools/msprobe/pytorch/hook_module/support_wrap_ops.yaml文件中Tensor:下的`- __getitem__`,工具会跳过dump该API。如果是需要dump的关键位置API也可以考虑根据报错堆栈信息注释引发报错的类型检查。 -### 12. 添加atat工具后F.gelu触发ValueError报错:`activation_func must be F.gelu`等。 +### 12. 添加msprobe工具后F.gelu触发ValueError报错:`activation_func must be F.gelu`等。 -- 注释工具目录mstt/debug/accuracy_tools/atat/pytorch/hook_module/support_wrap_ops.yaml文件中functional:下的的`- gelu`,工具会跳过dump该API。如果是需要dump的关键位置api也可以考虑根据报错堆栈信息注释引发报错的类型检查。 +- 注释工具目录mstt/debug/accuracy_tools/msprobe/pytorch/hook_module/support_wrap_ops.yaml文件中functional:下的的`- gelu`,工具会跳过dump该API。如果是需要dump的关键位置api也可以考虑根据报错堆栈信息注释引发报错的类型检查。 -### 13. 添加atat工具后触发AsStrided算子相关的报错,或者编译相关的报错,如:`Failed to compile Op [AsStrided]`。 +### 13. 添加msprobe工具后触发AsStrided算子相关的报错,或者编译相关的报错,如:`Failed to compile Op [AsStrided]`。 -- 注释工具目录mstt/debug/accuracy_tools/atat/pytorch/hook_module/support_wrap_ops.yaml文件中Tensor:下的`- t`和`- transpose`。 +- 注释工具目录mstt/debug/accuracy_tools/msprobe/pytorch/hook_module/support_wrap_ops.yaml文件中Tensor:下的`- t`和`- transpose`。 diff --git a/debug/accuracy_tools/msprobe/pytorch/doc/api_accuracy_checker.md b/debug/accuracy_tools/msprobe/pytorch/doc/api_accuracy_checker.md index 0e45a4e83f..b3ed4a9e24 100644 --- a/debug/accuracy_tools/msprobe/pytorch/doc/api_accuracy_checker.md +++ b/debug/accuracy_tools/msprobe/pytorch/doc/api_accuracy_checker.md @@ -20,8 +20,8 @@ 精度预检操作流程如下: -1. 在NPU和GPU环境下分别安装atat工具。详见《[MindStudio精度调试工具](../../README.md)》的“工具安装”章节。 -2. 在NPU训练脚本内添加atat工具dump接口PrecisionDebugger采集待预检数据。详见《[精度数据采集](./dump.md)》。 +1. 在NPU和GPU环境下分别安装msprobe工具。详见《[MindStudio精度调试工具](../../README.md)》的“工具安装”章节。 +2. 在NPU训练脚本内添加msprobe工具dump接口PrecisionDebugger采集待预检数据。详见《[精度数据采集](./dump.md)》。 3. 将NPU环境下dump的预检数据拷贝至GPU环境。 4. 在NPU和GPU环境下分别执行run_ut,生成结果用于最终api_precision_compare操作的输入。详见“**run_ut预检操作**”。 5. 将NPU和GPU执行run_ut生成的`accuracy_checking_details_{timestamp}.csv`结果文件拷贝至同一环境下。 @@ -43,7 +43,7 @@ run_ut预检操作包括如下场景: 1. 将API信息输入给run_ut模块运行精度检测并比对,运行如下命令: ```bash - atat -f pytorch run_ut -api_info ./dump.json + msprobe -f pytorch run_ut -api_info ./dump.json ``` | 参数名称 | 说明 | 是否必选 | @@ -61,7 +61,7 @@ run_ut预检操作包括如下场景: 2. (可选)如果需要保存比对不达标的输入和输出数据,可以在run_ut执行命令结尾添加-save_error_data,例如: ```bash - atat -f pytorch run_ut -api_info ./dump.json -save_error_data + msprobe -f pytorch run_ut -api_info ./dump.json -save_error_data ``` 数据默认会存盘到'./ut_error_data{timestamp}'路径下(相对于启动run_ut的路径),有需要的话,用户可以通过修改mstt/debug/accuracy_tools/api_accuracy_checker目录下,config.yaml文件的error_data_path参数来配置保存路径,详见“config.yaml文件说明”。 @@ -73,7 +73,7 @@ multi_run_ut.py脚本,可以并行执行多个run_ut操作,从而降低预 命令示例如下: ```bash -atat -f pytorch multi_run_ut -api_info ./dump.json -n 32 -d 0 1 2 3 +msprobe -f pytorch multi_run_ut -api_info ./dump.json -n 32 -d 0 1 2 3 ``` | 参数名称 | 说明 | 是否必选 | @@ -96,7 +96,7 @@ atat -f pytorch multi_run_ut -api_info ./dump.json -n 32 -d 0 1 2 3 断点续检操作通过如下命令执行: ```bash -atat -f pytorch run_ut -api_info ./dump.json -csv_path /home/xxx/ut/accuracy_checking_result_{timestamp}.csv +msprobe -f pytorch run_ut -api_info ./dump.json -csv_path /home/xxx/ut/accuracy_checking_result_{timestamp}.csv ``` #### API预检白名单 @@ -109,7 +109,7 @@ run_ut过程支持API预检白名单,操作方式如下: config.yaml文件可以通过配置参数来控制dump和run_ut操作的白名单等功能。 -文件路径为:mstt/debug/accuracy_tools/atat/pytorch/api_accuracy_checker/config.yaml +文件路径为:mstt/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/config.yaml | 参数名称 | 说明 | 是否必选 | | --------------- | ------------------------------------------------------------ | -------- | @@ -203,7 +203,7 @@ API预检通过测试,则在`accuracy_checking_details_{timestamp}.csv`文件 需要同时获取NPU和GPU环境下run_ut操作的预检结果`accuracy_checking_details_{timestamp}.csv`文件。执行如下命令进行NPU和GPU预检结果的比对: ```bash -atat -f pytorch api_precision_compare -npu /home/xxx/npu/accuracy_checking_details_{timestamp}.csv -gpu /home/xxx/gpu/accuracy_checking_details_{timestamp}.csv -o /home/xxx/ +msprobe -f pytorch api_precision_compare -npu /home/xxx/npu/accuracy_checking_details_{timestamp}.csv -gpu /home/xxx/gpu/accuracy_checking_details_{timestamp}.csv -o /home/xxx/ ``` | 参数名称 | 说明 | 是否必选 | diff --git "a/debug/accuracy_tools/msprobe/pytorch/doc/atat\347\262\276\345\272\246\345\267\245\345\205\267\346\225\260\346\215\256dump\346\240\207\345\207\206\346\200\247\350\203\275\345\237\272\347\272\277\346\212\245\345\221\212.md" "b/debug/accuracy_tools/msprobe/pytorch/doc/atat\347\262\276\345\272\246\345\267\245\345\205\267\346\225\260\346\215\256dump\346\240\207\345\207\206\346\200\247\350\203\275\345\237\272\347\272\277\346\212\245\345\221\212.md" index ed175ff301..c9db3ae78d 100644 --- "a/debug/accuracy_tools/msprobe/pytorch/doc/atat\347\262\276\345\272\246\345\267\245\345\205\267\346\225\260\346\215\256dump\346\240\207\345\207\206\346\200\247\350\203\275\345\237\272\347\272\277\346\212\245\345\221\212.md" +++ "b/debug/accuracy_tools/msprobe/pytorch/doc/atat\347\262\276\345\272\246\345\267\245\345\205\267\346\225\260\346\215\256dump\346\240\207\345\207\206\346\200\247\350\203\275\345\237\272\347\272\277\346\212\245\345\221\212.md" @@ -1,4 +1,4 @@ -# atat精度工具标准性能基线报告 +# msprobe精度工具标准性能基线报告 ## 环境信息 @@ -16,7 +16,7 @@ CANN:8.0.T2 ## 模型信息和性能基线 -大模型在使用atat工具dump数据时,建议先简化模型层数,减少dump数据量。 +大模型在使用msprobe工具dump数据时,建议先简化模型层数,减少dump数据量。 以下场景的性能基线测试数据均为多次测试后取平均值,因此实际运行时性能数据可能会根据环境状态稍有浮动。 diff --git a/debug/accuracy_tools/msprobe/pytorch/doc/dump.md b/debug/accuracy_tools/msprobe/pytorch/doc/dump.md index 1e401b4f5a..7d0763b684 100644 --- a/debug/accuracy_tools/msprobe/pytorch/doc/dump.md +++ b/debug/accuracy_tools/msprobe/pytorch/doc/dump.md @@ -1,8 +1,8 @@ # **精度数据采集** -atat工具主要通过在训练脚本内添加dump接口并启动训练的方式来采集精度数据。 +msprobe工具主要通过在训练脚本内添加dump接口并启动训练的方式来采集精度数据。 -执行dump操作需要安装atat工具。详见《[MindStudio精度调试工具](../../README.md)》的“工具安装”章节。 +执行dump操作需要安装msprobe工具。详见《[MindStudio精度调试工具](../../README.md)》的“工具安装”章节。 ## dump接口介绍 @@ -12,7 +12,7 @@ atat工具主要通过在训练脚本内添加dump接口并启动训练的方式 通过加载dump配置文件的方式来确定dump操作的详细配置。 -可以在from atat.pytorch import PrecisionDebugger和模型初始化之间的任意位置添加该接口。 +可以在from msprobe.pytorch import PrecisionDebugger和模型初始化之间的任意位置添加该接口。 **原型** @@ -44,7 +44,7 @@ import torch import torch.nn as nn import torch_npu import torch.nn.functional as F -from atat.pytorch import PrecisionDebugger +from msprobe.pytorch import PrecisionDebugger torch.npu.set_device("npu:0") #定义一个简单的网络 @@ -124,7 +124,7 @@ debugger.step() ## 示例代码 ```Python -from atat.pytorch import PrecisionDebugger +from msprobe.pytorch import PrecisionDebugger debugger = PrecisionDebugger(config_path="./config.json", dump_path="./dump_path") # 请勿将以上初始化流程插入到循环代码中 @@ -193,7 +193,7 @@ pt文件保存的前缀和PyTorch对应关系如下: ## 工具支持的API列表 -atat工具维护固定的API支持列表,若需要删除或增加dump的API,可以在atat/pytorch/hook_module/support_wrap_ops.yaml文件内手动修改,如下示例: +msprobe工具维护固定的API支持列表,若需要删除或增加dump的API,可以在msprobe/pytorch/hook_module/support_wrap_ops.yaml文件内手动修改,如下示例: ```Python functional: # functional为算子类别,找到对应的类别,在该类别下按照下列格式删除或添加API diff --git a/debug/accuracy_tools/msprobe/pytorch/doc/parse_tool.md b/debug/accuracy_tools/msprobe/pytorch/doc/parse_tool.md index 2300091291..81efa10fa3 100644 --- a/debug/accuracy_tools/msprobe/pytorch/doc/parse_tool.md +++ b/debug/accuracy_tools/msprobe/pytorch/doc/parse_tool.md @@ -6,10 +6,10 @@ ## 进入parse交互式界面 -安装atat工具后(详见《[MindStudio精度调试工具](../../README.md)》的“工具安装”章节),可以通过使用命令 **atat -f pytorch parse** 进入交互式界面,如下所示: +安装msprobe工具后(详见《[MindStudio精度调试工具](../../README.md)》的“工具安装”章节),可以通过使用命令 **msprobe -f pytorch parse** 进入交互式界面,如下所示: ```bash -atat -f pytorch parse +msprobe -f pytorch parse Parse >>> ``` @@ -23,7 +23,7 @@ Parse >>> Ctrl+C可以退出parse交互式界面。不退出parse交互式界面若需要执行非该界面下的内置Shell命令,且命令与parse交互式界面命令冲突时,非该界面命令需要使用run命令,在相关命令前加上run前缀,如下示例: ```bash -atat -f pytorch parse +msprobe -f pytorch parse Parse >>> run vim cli.py Parse >>> vim cli.py ``` diff --git a/debug/accuracy_tools/msprobe/pytorch/doc/ptdbg_ascend_compare.md b/debug/accuracy_tools/msprobe/pytorch/doc/ptdbg_ascend_compare.md index e3537594c4..4bd05c73e2 100644 --- a/debug/accuracy_tools/msprobe/pytorch/doc/ptdbg_ascend_compare.md +++ b/debug/accuracy_tools/msprobe/pytorch/doc/ptdbg_ascend_compare.md @@ -44,7 +44,7 @@ compare_distributed(npu_dump_dir, bench_dump_dir, output_path, **kwargs) 创建比对脚本,例如compare_distributed.py,拷贝如下代码,具体参数请根据实际环境修改。 ```Python -from atat.pytorch import * +from msprobe.pytorch import * compare_distributed('./npu_dump/step0', './gpu_dump/step0', './output') ``` @@ -77,7 +77,7 @@ compare(input_param, output_path, stack_mode=False, auto_analyze=True, fuzzy_mat 单机单卡场景下创建比对脚本,例如compare.py,拷贝如下代码,具体参数请根据实际环境修改。 ```Python -from atat.pytorch import compare +from msprobe.pytorch import compare dump_result_param={ "npu_json_path": "./npu_dump/dump.json", "bench_json_path": "./gpu_dump/dump.json", @@ -96,7 +96,7 @@ compare(dump_result_param, output_path="./output", stack_mode=True) 以compare.py为例。 ```Python -from atat.pytorch import compare +from msprobe.pytorch import compare dump_result_param={ "npu_json_path": "./npu_dump/dump.json", "bench_json_path": "./gpu_dump/dump.json", diff --git a/debug/accuracy_tools/msprobe/pytorch/doc/ptdbg_ascend_overview.md b/debug/accuracy_tools/msprobe/pytorch/doc/ptdbg_ascend_overview.md index 708d90b348..0194514548 100644 --- a/debug/accuracy_tools/msprobe/pytorch/doc/ptdbg_ascend_overview.md +++ b/debug/accuracy_tools/msprobe/pytorch/doc/ptdbg_ascend_overview.md @@ -4,7 +4,7 @@ 在PyTorch训练网络,对同一模型或API调试过程中,遇到API相关的计算精度问题,定位时费时费力。 -atat的精度比对工具,用来进行PyTorch整网API粒度的数据dump、精度比对和溢出检测,从而定位PyTorch训练场景下的精度问题。 +msprobe的精度比对工具,用来进行PyTorch整网API粒度的数据dump、精度比对和溢出检测,从而定位PyTorch训练场景下的精度问题。 **使用场景** @@ -42,17 +42,17 @@ atat的精度比对工具,用来进行PyTorch整网API粒度的数据dump、 1. 准备CPU或GPU训练工程。 -2. 在环境下安装atat工具。详见《[MindStudio精度调试工具](../../README.md)》的“工具安装”章节。 +2. 在环境下安装msprobe工具。详见《[MindStudio精度调试工具](../../README.md)》的“工具安装”章节。 -3. 在训练脚本内添加atat工具dump接口PrecisionDebugger采集标杆数据。详见《[精度数据采集](./dump.md)》。 +3. 在训练脚本内添加msprobe工具dump接口PrecisionDebugger采集标杆数据。详见《[精度数据采集](./dump.md)》。 4. 执行训练dump数据。 5. 将CPU或GPU训练工程迁移为NPU训练工程。详见《[PyTorch模型迁移调优指南](https://www.hiascend.com/document/detail/zh/Pytorch/60RC1/ptmoddevg/trainingmigrguide/PT_LMTMOG_0003.html)》。 -6. 在NPU环境下安装atat工具。详见《[MindStudio精度调试工具](../../README.md)》的“工具安装”章节。 +6. 在NPU环境下安装msprobe工具。详见《[MindStudio精度调试工具](../../README.md)》的“工具安装”章节。 -7. 在NPU训练脚本内添加atat工具dump接口PrecisionDebugger采集标杆数据。详见《[精度数据采集](./dump.md)》。 +7. 在NPU训练脚本内添加msprobe工具dump接口PrecisionDebugger采集标杆数据。详见《[精度数据采集](./dump.md)》。 8. NPU环境下执行训练dump数据。 diff --git a/debug/accuracy_tools/msprobe/pytorch/doc/ptdbg_ascend_quickstart.md b/debug/accuracy_tools/msprobe/pytorch/doc/ptdbg_ascend_quickstart.md index c053020556..4b6ac9de2f 100644 --- a/debug/accuracy_tools/msprobe/pytorch/doc/ptdbg_ascend_quickstart.md +++ b/debug/accuracy_tools/msprobe/pytorch/doc/ptdbg_ascend_quickstart.md @@ -1,8 +1,8 @@ # **精度比对工具** -本文主要介绍atat的精度比对工具的快速入门和场景化示例。 +本文主要介绍msprobe的精度比对工具的快速入门和场景化示例。 -本文介绍的操作需要安装atat工具,详见《[MindStudio精度调试工具](../../README.md)》的“工具安装”章节。 +本文介绍的操作需要安装msprobe工具,详见《[MindStudio精度调试工具](../../README.md)》的“工具安装”章节。 本文介绍的操作主要是精度数据dump和精度比对,详细操作指导可参考《[精度数据采集](./dump.md)》和《[CPU或GPU与NPU精度数据比对](./ptdbg_ascend.md)》。 @@ -51,12 +51,12 @@ PyTorch训练场景的精度问题分析建议参考以下思路进行精度比 } ``` -2. 在训练脚本内添加atat工具,dump整网数据。 +2. 在训练脚本内添加msprobe工具,dump整网数据。 分别dump CPU或GPU以及NPU数据,在PyTorch训练脚本插入dump接口,示例代码如下(下面以NPU为例,CPU或GPU dump基本相同): ```python - from atat.pytorch import PrecisionDebugger + from msprobe.pytorch import PrecisionDebugger debugger = PrecisionDebugger(config_path="./config.json", dump_path="./npu_dump") # 请勿将以上初始化流程插入到循环代码中 @@ -82,7 +82,7 @@ PyTorch训练场景的精度问题分析建议参考以下思路进行精度比 创建并配置精度比对脚本,以创建compare.py为例,示例代码如下: ```python - from atat.pytorch import compare + from msprobe.pytorch import compare dump_result_param={ "npu_json_path": "./npu_dump/dump.json", "bench_json_path": "./gpu_dump/dump.json", @@ -140,10 +140,10 @@ python3 compare.py } ``` -2. 在NPU训练脚本内添加atat工具,执行溢出检测dump。 +2. 在NPU训练脚本内添加msprobe工具,执行溢出检测dump。 ```python - from atat.pytorch import PrecisionDebugger + from msprobe.pytorch import PrecisionDebugger debugger = PrecisionDebugger(config_path="./config.json", dump_path="./npu_dump") # 请勿将以上初始化流程插入到循环代码中 @@ -171,7 +171,7 @@ python3 compare.py 溢出解析工具执行命令如下: ```bash - atat -f pytorch run_overflow_check -api_info ./dump.json + msprobe -f pytorch run_overflow_check -api_info ./dump.json ``` 反向过程溢出的API暂不支持精度预检功能。 @@ -200,7 +200,7 @@ python3 compare.py 1. 创建比对脚本,例如compare_distributed.py,拷贝如下代码。 ```python - from atat.pytorch import * + from msprobe.pytorch import * compare_distributed('./npu_dump/step0', './gpu_dump/step0', './output') ``` @@ -219,7 +219,7 @@ python3 compare.py 多卡一般为多进程,须保证每个进程都正确调用PrecisionDebugger,或把PrecisionDebugger插入到import语句后,如: ```python -from atat.pytorch import PrecisionDebugger +from msprobe.pytorch import PrecisionDebugger debugger = PrecisionDebugger(config_path="./config.json", dump_path="./npu_dump") ``` @@ -339,10 +339,10 @@ debugger = PrecisionDebugger(config_path="./config.json", dump_path="./npu_dump" } ``` -2. 在训练脚本内添加atat工具,dump整网数据。 +2. 在训练脚本内添加msprobe工具,dump整网数据。 ```python - from atat.pytorch import PrecisionDebugger + from msprobe.pytorch import PrecisionDebugger debugger = PrecisionDebugger(config_path="./config.json", dump_path="./npu_dump") # 请勿将以上初始化流程插入到循环代码中 diff --git a/debug/accuracy_tools/msprobe/pytorch/doc/run_overflow_check.md b/debug/accuracy_tools/msprobe/pytorch/doc/run_overflow_check.md index 1bdc4f354c..b8c9c3b4c2 100644 --- a/debug/accuracy_tools/msprobe/pytorch/doc/run_overflow_check.md +++ b/debug/accuracy_tools/msprobe/pytorch/doc/run_overflow_check.md @@ -13,7 +13,7 @@ 2. 执行溢出API解析操作。 ```bash - atat -f pytorch run_overflow_check -api_info ./dump.json + msprobe -f pytorch run_overflow_check -api_info ./dump.json ``` | 参数名称 | 说明 | 是否必选 | diff --git "a/debug/accuracy_tools/msprobe/pytorch/doc/\345\234\250\347\272\277\347\262\276\345\272\246\346\257\224\345\257\271.md" "b/debug/accuracy_tools/msprobe/pytorch/doc/\345\234\250\347\272\277\347\262\276\345\272\246\346\257\224\345\257\271.md" index b2e373feb6..05bebaf0a2 100644 --- "a/debug/accuracy_tools/msprobe/pytorch/doc/\345\234\250\347\272\277\347\262\276\345\272\246\346\257\224\345\257\271.md" +++ "b/debug/accuracy_tools/msprobe/pytorch/doc/\345\234\250\347\272\277\347\262\276\345\272\246\346\257\224\345\257\271.md" @@ -32,8 +32,8 @@ PyTorch NPU在线精度比对是ptdbg_ascend工具实现在PyTorch训练过程 1. 在NPU训练脚本中添加在线精度比对接口,示例如下: ```python - from atat.pytorch.common.utils import seed_all - from atat.pytorch.online_dispatch import PtdbgDispatch + from msprobe.pytorch.common.utils import seed_all + from msprobe.pytorch.online_dispatch import PtdbgDispatch # 在main函数开始前固定随机数 seed_all() @@ -74,12 +74,12 @@ PyTorch NPU在线精度比对是ptdbg_ascend工具实现在PyTorch训练过程 | process_num | 多进程并发数,默认为0。 | 否 | | debug | debug信息打印,默认为False。 | 否 | ### dump数据存盘说明 -dump数据存盘目录名格式:`atat_tag_rankid_{timestamp}`。 +dump数据存盘目录名格式:`msprobe_tag_rankid_{timestamp}`。 子目录下包含1个比对结果csv文件、cpu和npudump数据目录,npu目录下包含Aten IR在NPU上的输入输出的dump数据,由于CPU的输入是直接使用NPU的输入执行,因此cpu目录下只包含执行输出的dump数据。 ```bash -atat_rank4_20230911170521 +msprobe_rank4_20230911170521 ├── compare_result_rank4_20230911170521.csv ├── cpu │   ├── native_batch_norm_backward_10_output.0.npy diff --git a/debug/accuracy_tools/ptdbg_ascend/README.md b/debug/accuracy_tools/ptdbg_ascend/README.md index dce5406bfa..d6238ef72b 100644 --- a/debug/accuracy_tools/ptdbg_ascend/README.md +++ b/debug/accuracy_tools/ptdbg_ascend/README.md @@ -2,7 +2,7 @@ ## 版本过渡提示 -当前版本ptdbg维护到2024/09/30,准备于2024/09/30下线,相关目录mstt/debug/accuracy_tools/ptdbg_ascend将于2024/09/30删除。新版本ptdbg已经合到mstt/debug/accuracy_tools/atat目录下。 +当前版本ptdbg维护到2024/09/30,准备于2024/09/30下线,相关目录mstt/debug/accuracy_tools/ptdbg_ascend将于2024/09/30删除。新版本ptdbg已经合到mstt/debug/accuracy_tools/msprobe目录下。 ## 快速安装 diff --git a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/common/utils.py b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/common/utils.py index f42ad09bb6..aaa57968e9 100644 --- a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/common/utils.py +++ b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/common/utils.py @@ -126,7 +126,7 @@ class Const: # version message tips VERSION_MESSAGE = """The current version of ptdbg will be deprecated on September 30, 2024. The att/debug/accuracy_tools/ptdbg_ascend directory will be deleted on September 30, 2024. - Please use the ptdbg in the att/debug/accuracy_tools/atat directory.""" + Please use the ptdbg in the att/debug/accuracy_tools/msprobe directory.""" class CompareConst: """ diff --git a/debug/accuracy_tools/setup.py b/debug/accuracy_tools/setup.py index 3568e3a47c..93456ab13e 100644 --- a/debug/accuracy_tools/setup.py +++ b/debug/accuracy_tools/setup.py @@ -36,5 +36,5 @@ setup( ext_modules=[], zip_safe=False, entry_points={ - 'console_scripts' : ['atat=atat.atat:main'], + 'console_scripts': ['msprobe=msprobe.msprobe:main'], },) \ No newline at end of file -- Gitee From e7f2bc4dfad4eb106db35bc404c16786516ced93 Mon Sep 17 00:00:00 2001 From: Linwei-Ying Date: Fri, 19 Jul 2024 19:06:15 +0800 Subject: [PATCH 021/106] atat to msprobe7 --- debug/accuracy_tools/msprobe/{atat.py => msprobe.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename debug/accuracy_tools/msprobe/{atat.py => msprobe.py} (100%) diff --git a/debug/accuracy_tools/msprobe/atat.py b/debug/accuracy_tools/msprobe/msprobe.py similarity index 100% rename from debug/accuracy_tools/msprobe/atat.py rename to debug/accuracy_tools/msprobe/msprobe.py -- Gitee From a7ad358828c39b9e7da0ff4023bc92ce806757f1 Mon Sep 17 00:00:00 2001 From: Linwei-Ying Date: Fri, 19 Jul 2024 19:13:46 +0800 Subject: [PATCH 022/106] atat to msprobe8 --- debug/accuracy_tools/msprobe/README.md | 8 ++++---- debug/accuracy_tools/setup.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/debug/accuracy_tools/msprobe/README.md b/debug/accuracy_tools/msprobe/README.md index f22718248a..84c531995a 100644 --- a/debug/accuracy_tools/msprobe/README.md +++ b/debug/accuracy_tools/msprobe/README.md @@ -4,7 +4,7 @@ MindStudio精度调试工具(MindStudio Probe),简称msprobe,是MindStud ## 工具安装 -精度工具合一软件包名称:`ascend_training_accuracy_tools-{version}-py3-none-any.whl` +精度工具合一软件包名称:`mindstudio_probe-{version}-py3-none-any.whl` 1. 使用pip命令安装numpy、openpyxl、pandas、PyYAML、rich、torch、tqdm依赖。 @@ -43,19 +43,19 @@ MindStudio精度调试工具(MindStudio Probe),简称msprobe,是MindStud 4. 执行如下命令进行安装。 ```bash - pip3 install ./ascend_training_accuracy_tools-{version}-py3-none-any.whl + pip3 install ./mindstudio_probe-{version}-py3-none-any.whl ``` 若为覆盖安装,请在命令行末尾增加“--force-reinstall”参数强制安装,例如: ```bash - pip3 install ./ascend_training_accuracy_tools-{version}-py3-none-any.whl --force-reinstall + pip3 install ./mindstudio_probe-{version}-py3-none-any.whl --force-reinstall ``` 提示如下信息则表示安装成功。 ```bash - Successfully installed ascend_training_accuracy_tools-{version} + Successfully installed mindstudio_probe-{version} ``` diff --git a/debug/accuracy_tools/setup.py b/debug/accuracy_tools/setup.py index 93456ab13e..3d2c3bb870 100644 --- a/debug/accuracy_tools/setup.py +++ b/debug/accuracy_tools/setup.py @@ -18,7 +18,7 @@ from setuptools import setup, find_packages setup( - name='ascend_training_accuracy_tools', + name='mindstudio_probe', version='1.0', description='This is a pytorch precision comparison tools', long_description='This is a pytorch precision comparison tools, include ptdbg and api accuracy checker', -- Gitee From 644e3513ecdc28155878511cf002dabee197bcab Mon Sep 17 00:00:00 2001 From: Linwei-Ying Date: Sat, 20 Jul 2024 09:19:00 +0800 Subject: [PATCH 023/106] codecheck fix --- .../msprobe/pytorch/hook_module/wrap_aten.py | 6 +++--- .../msprobe/pytorch/parse_tool/lib/utils.py | 20 +++++++++---------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_aten.py b/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_aten.py index f3a6946ad9..4617e4854f 100644 --- a/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_aten.py +++ b/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_aten.py @@ -80,13 +80,13 @@ class AtenOPPacketTemplate(): else: return attr - def overloads(self): - return self.opPacket.overloads() - @torch_device_guard def __call__(self, *args, **kwargs): return AtenOPTemplate(self.opPacket, self.hook)(*args, **kwargs) + def overloads(self): + return self.opPacket.overloads() + def wrap_aten_op(op, hook): return AtenOPPacketTemplate(op, hook) diff --git a/debug/accuracy_tools/msprobe/pytorch/parse_tool/lib/utils.py b/debug/accuracy_tools/msprobe/pytorch/parse_tool/lib/utils.py index 1e4c1882d0..266e93fb3e 100644 --- a/debug/accuracy_tools/msprobe/pytorch/parse_tool/lib/utils.py +++ b/debug/accuracy_tools/msprobe/pytorch/parse_tool/lib/utils.py @@ -73,16 +73,6 @@ class Util: def path_strip(path): return path.strip("'").strip('"') - @staticmethod - def _gen_npu_dump_convert_file_info(name, match, dir_path): - return DumpDecodeFileDesc(name, dir_path, int(match.groups()[-4]), op_name=match.group(2), - op_type=match.group(1), task_id=int(match.group(3)), anchor_type=match.groups()[-3], - anchor_idx=int(match.groups()[-2])) - - @staticmethod - def _gen_numpy_file_info(name, math, dir_path): - return FileDesc(name, dir_path) - @staticmethod def check_executable_file(path): check_path_owner_consistent(path) @@ -184,6 +174,16 @@ class Util: def change_filemode_safe(self, path): change_mode(path, FileCheckConst.DATA_FILE_AUTHORITY) + @staticmethod + def _gen_npu_dump_convert_file_info(name, match, dir_path): + return DumpDecodeFileDesc(name, dir_path, int(match.groups()[-4]), op_name=match.group(2), + op_type=match.group(1), task_id=int(match.group(3)), anchor_type=match.groups()[-3], + anchor_idx=int(match.groups()[-2])) + + @staticmethod + def _gen_numpy_file_info(name, math, dir_path): + return FileDesc(name, dir_path) + def execute_command(self, cmd): if not cmd: self.log.error("Commond is None") -- Gitee From 66f7f5d574d759f60b87d4cf4ba71f0e912bc4e5 Mon Sep 17 00:00:00 2001 From: Linwei-Ying Date: Sat, 20 Jul 2024 11:29:05 +0800 Subject: [PATCH 024/106] codecheck fix --- debug/accuracy_tools/msprobe/pytorch/compare/acc_compare.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debug/accuracy_tools/msprobe/pytorch/compare/acc_compare.py b/debug/accuracy_tools/msprobe/pytorch/compare/acc_compare.py index d417672317..cb5f4ab875 100644 --- a/debug/accuracy_tools/msprobe/pytorch/compare/acc_compare.py +++ b/debug/accuracy_tools/msprobe/pytorch/compare/acc_compare.py @@ -739,7 +739,7 @@ def parse(pkl_file, module_name_prefix): logger.info(summary_info) -def op_item_parse(item, op_name, index, item_list=[], top_bool=True): +def op_item_parse(item, op_name, index, item_list=None, top_bool=True): if item_list is None: item_list = [] if item is None or (isinstance(item, dict) and not item): -- Gitee From af10f45376737ca69851b50e021949c4158b4eb5 Mon Sep 17 00:00:00 2001 From: Linwei-Ying Date: Sat, 20 Jul 2024 14:09:25 +0800 Subject: [PATCH 025/106] atat to msprobe --- debug/accuracy_tools/msprobe/msprobe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debug/accuracy_tools/msprobe/msprobe.py b/debug/accuracy_tools/msprobe/msprobe.py index 89fa4cf965..698165b615 100644 --- a/debug/accuracy_tools/msprobe/msprobe.py +++ b/debug/accuracy_tools/msprobe/msprobe.py @@ -27,7 +27,7 @@ from msprobe.pytorch.api_accuracy_checker.run_ut.run_overflow_check import _run_ def main(): parser = argparse.ArgumentParser( formatter_class=argparse.RawDescriptionHelpFormatter, - description="msprobe(ascend training accuracy tools), [Powered by MindStudio].\n" + description="msprobe(mindstudio probe), [Powered by MindStudio].\n" "Providing one-site accuracy difference debugging toolkit for training on Ascend Devices.\n" f"For any issue, refer README.md first", ) -- Gitee From 3d2a327a20c814f9061b7a4eec07366e5bd40deb Mon Sep 17 00:00:00 2001 From: Linwei-Ying Date: Sat, 20 Jul 2024 16:44:53 +0800 Subject: [PATCH 026/106] md5 compare bugfix --- .../msprobe/pytorch/compare/acc_compare.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/debug/accuracy_tools/msprobe/pytorch/compare/acc_compare.py b/debug/accuracy_tools/msprobe/pytorch/compare/acc_compare.py index cb5f4ab875..a4b6884343 100644 --- a/debug/accuracy_tools/msprobe/pytorch/compare/acc_compare.py +++ b/debug/accuracy_tools/msprobe/pytorch/compare/acc_compare.py @@ -526,8 +526,10 @@ def handle_inf_nan(n_value, b_value): return n_value, b_value -def find_error_rows(result, last_len, n_num_input, highlight_dict, summary_compare=False): +def find_error_rows(result, last_len, n_num_input, highlight_dict, summary_compare=False, md5_compare=False): """找到单个API中需要高亮的行""" + if md5_compare: + return npu_max_index = get_header_index('NPU max', summary_compare) bench_max_index = get_header_index('Bench max', summary_compare) max_diff_index = get_header_index('Max diff' if summary_compare else 'MaxAbsErr', summary_compare) @@ -583,7 +585,7 @@ def get_name_and_state(name): return api_name, state -def find_compare_result_error_rows(result_df, highlight_dict, summary_compare): +def find_compare_result_error_rows(result_df, highlight_dict, summary_compare, md5_compare): """将dataframe根据API分组,并找到有误差的算子用于高亮""" result = result_df.values start, input_num, output_num, end = 0, 0, 0, len(result_df) @@ -601,7 +603,7 @@ def find_compare_result_error_rows(result_df, highlight_dict, summary_compare): else: output_num = num find_error_rows(result[start:start + input_num + output_num], start, input_num, highlight_dict, - summary_compare) + summary_compare, md5_compare) num, last_api_name, last_state = 1, api_name, state start += input_num + output_num input_num, output_num = 1, 0 @@ -612,7 +614,7 @@ def find_compare_result_error_rows(result_df, highlight_dict, summary_compare): input_num = num else: output_num = num - find_error_rows(result[start:start + input_num + output_num], start, input_num, highlight_dict, summary_compare) + find_error_rows(result[start:start + input_num + output_num], start, input_num, highlight_dict, summary_compare, md5_compare) def highlight_rows_xlsx(result_df, highlight_dict, file_path): @@ -697,7 +699,7 @@ def compare_core(input_parma, output_path, **kwargs): if not md5_compare and not summary_compare: result_df = _do_multi_process(input_parma, result_df) - find_compare_result_error_rows(result_df, highlight_dict, summary_compare) + find_compare_result_error_rows(result_df, highlight_dict, summary_compare, md5_compare) highlight_rows_xlsx(result_df, highlight_dict, file_path) if auto_analyze: advisor = Advisor(result_df, output_path) -- Gitee From ce99a02c4e9bac7323dc739d76df8fb46bef79c3 Mon Sep 17 00:00:00 2001 From: shawn_zhu1 Date: Sat, 20 Jul 2024 16:20:17 +0800 Subject: [PATCH 027/106] =?UTF-8?q?=E3=80=90feature=E3=80=91msprobe?= =?UTF-8?q?=E5=8F=91=E5=B8=83pypi=E7=A4=BE=E5=8C=BA=EF=BC=8C=E6=94=AF?= =?UTF-8?q?=E6=8C=81=E4=B8=80=E9=94=AE=E5=AE=89=E8=A3=85?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- debug/__init__.py | 0 debug/accuracy_tools/LICENSE | 201 ++++++++++++++++++++++++++++ debug/accuracy_tools/MANIFEST.in | 5 +- debug/accuracy_tools/atat/README.md | 29 +++- debug/accuracy_tools/setup.py | 72 +++++++--- 5 files changed, 282 insertions(+), 25 deletions(-) create mode 100644 debug/__init__.py create mode 100644 debug/accuracy_tools/LICENSE diff --git a/debug/__init__.py b/debug/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/debug/accuracy_tools/LICENSE b/debug/accuracy_tools/LICENSE new file mode 100644 index 0000000000..261eeb9e9f --- /dev/null +++ b/debug/accuracy_tools/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/debug/accuracy_tools/MANIFEST.in b/debug/accuracy_tools/MANIFEST.in index 7242c0c956..4f65e81386 100644 --- a/debug/accuracy_tools/MANIFEST.in +++ b/debug/accuracy_tools/MANIFEST.in @@ -1,2 +1,3 @@ -recursive-include atat/ * -recursive-exclude atat/test * \ No newline at end of file +include README.md +include LICENSE +recursive-include atat * diff --git a/debug/accuracy_tools/atat/README.md b/debug/accuracy_tools/atat/README.md index de7d74f8f2..e77eb00d08 100644 --- a/debug/accuracy_tools/atat/README.md +++ b/debug/accuracy_tools/atat/README.md @@ -4,8 +4,35 @@ MindStudio精度调试工具(ascend_training_accuracy_tools),简称atat, ## 工具安装 -精度工具合一软件包名称:`ascend_training_accuracy_tools-{version}-py3-none-any.whl` +# TODO +精度工具合一软件包名称:`msprobe-{version}-py3-none-any.whl` +### pip安装 +```shell +pip install msprobe +``` + +### 从源码安装 +1. 克隆或者下载项目源代码 + +```shell +git clone xxx +cd xxx +``` + +2. 安装setuptools和wheel + +```shell +pip install setuptools wheel +``` + +3. 安装msprobe + +```shell +python setup.py install +``` + +### 下载whl包安装 1. 使用pip命令安装numpy、openpyxl、pandas、PyYAML、rich、torch、tqdm依赖。 若环境中已安装部分依赖,不需要重复安装。 diff --git a/debug/accuracy_tools/setup.py b/debug/accuracy_tools/setup.py index 3568e3a47c..e0d84df103 100644 --- a/debug/accuracy_tools/setup.py +++ b/debug/accuracy_tools/setup.py @@ -1,6 +1,3 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" # Copyright (C) 2024. Huawei Technologies Co., Ltd. All rights reserved. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,28 +10,59 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" -from setuptools import setup, find_packages +import setuptools -setup( - name='ascend_training_accuracy_tools', - version='1.0', - description='This is a pytorch precision comparison tools', - long_description='This is a pytorch precision comparison tools, include ptdbg and api accuracy checker', - packages=find_packages(), - install_requires=[ - "wheel", - "numpy", - "pandas >= 1.3.5", - "pyyaml", - "rich", - "tqdm", - "openpyxl" - ], + +__version__ = '1.0.4' + +INSTALL_REQUIRED = [ + "wheel", + "numpy", + "pandas >= 1.3.5", + "pyyaml", + "rich", + "tqdm", + "openpyxl" +] + +EXCLUDE_PKGS = [ + "api_accuracy_checker*", + "grad_tool*", + "kj600*", + "ptdbg_ascend*" +] + +setuptools.setup( + name="msprobe-test", + version=__version__, + description="Pytorch Ascend Probe Utils", + long_description="Msprobe is a set of tools for diagnosing and improve model accuracy on Ascend NPU, including API " + "acc checker, ptdbg, grad tool.", + url="https://gitee.com/ascend/mstt/tree/master/debug/accuracy_tools/msprobe", + author="Ascend Team", + author_email="pmail_mindstudio@huawei.com", + packages=setuptools.find_namespace_packages(exclude=EXCLUDE_PKGS, include=["atat", "atat*"]), include_package_data=True, + python_requires=">=3.6.2", + install_requires=INSTALL_REQUIRED, + classifiers=[ + 'Intended Audience :: Developers', + 'Intended Audience :: Education', + 'Intended Audience :: Science/Research', + 'Programming Language :: Python :: 3', + 'Topic :: Scientific/Engineering', + 'Topic :: Scientific/Engineering :: Mathematics', + 'Topic :: Scientific/Engineering :: Artificial Intelligence', + 'Topic :: Software Development', + 'Topic :: Software Development :: Libraries', + 'Topic :: Software Development :: Libraries :: Python Modules', + ], + license='Apache License 2.0', + keywords='pytorch msprobe ascend', ext_modules=[], zip_safe=False, entry_points={ - 'console_scripts' : ['atat=atat.atat:main'], - },) \ No newline at end of file + 'console_scripts': ['atat=atat.atat:main'], + } +) -- Gitee From 1219a9aa9217888a79bc23434f4c877239827304 Mon Sep 17 00:00:00 2001 From: shawn_zhu1 Date: Sat, 20 Jul 2024 17:50:05 +0800 Subject: [PATCH 028/106] =?UTF-8?q?=E3=80=90feature=E3=80=91msprobe?= =?UTF-8?q?=E5=8F=91=E5=B8=83pypi=E7=A4=BE=E5=8C=BA=EF=BC=8C=E6=94=AF?= =?UTF-8?q?=E6=8C=81=E4=B8=80=E9=94=AE=E5=AE=89=E8=A3=85?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- debug/accuracy_tools/setup.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/debug/accuracy_tools/setup.py b/debug/accuracy_tools/setup.py index e0d84df103..607fdb677d 100644 --- a/debug/accuracy_tools/setup.py +++ b/debug/accuracy_tools/setup.py @@ -14,7 +14,7 @@ import setuptools -__version__ = '1.0.4' +__version__ = '1.0.0' INSTALL_REQUIRED = [ "wheel", @@ -34,11 +34,11 @@ EXCLUDE_PKGS = [ ] setuptools.setup( - name="msprobe-test", + name="mindstudio-probe", version=__version__, description="Pytorch Ascend Probe Utils", - long_description="Msprobe is a set of tools for diagnosing and improve model accuracy on Ascend NPU, including API " - "acc checker, ptdbg, grad tool.", + long_description="MindStudio-Probe is a set of tools for diagnosing and improve model accuracy on Ascend NPU, " + "including API acc checker, ptdbg, grad tool.", url="https://gitee.com/ascend/mstt/tree/master/debug/accuracy_tools/msprobe", author="Ascend Team", author_email="pmail_mindstudio@huawei.com", -- Gitee From 9f3738f3e0c1113e692ad99432f70322a504910a Mon Sep 17 00:00:00 2001 From: gitee Date: Mon, 22 Jul 2024 09:59:40 +0800 Subject: [PATCH 029/106] add blacklist --- .../accuracy_tools/msprobe/config/config.json | 5 ++ .../msprobe/core/common/const.py | 5 +- .../api_accuracy_checker/common/config.py | 10 ++++ .../api_accuracy_checker/common/utils.py | 1 + .../pytorch/api_accuracy_checker/config.yaml | 1 + .../api_accuracy_checker/run_ut/run_ut.py | 53 +++++++++++-------- .../msprobe/pytorch/pt_config.py | 41 ++++++++++++++ 7 files changed, 94 insertions(+), 22 deletions(-) diff --git a/debug/accuracy_tools/msprobe/config/config.json b/debug/accuracy_tools/msprobe/config/config.json index 70a630a40a..c6077b75ae 100644 --- a/debug/accuracy_tools/msprobe/config/config.json +++ b/debug/accuracy_tools/msprobe/config/config.json @@ -24,5 +24,10 @@ "overflow_check": { "overflow_nums": 1, "check_mode":"all" + }, + "run_ut": { + "white_list": [], + "black_list": [], + "error_data_path": "./" } } \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/core/common/const.py b/debug/accuracy_tools/msprobe/core/common/const.py index dea829c3ff..7938f03f51 100644 --- a/debug/accuracy_tools/msprobe/core/common/const.py +++ b/debug/accuracy_tools/msprobe/core/common/const.py @@ -15,6 +15,8 @@ class Const: OFF = 'OFF' BACKWARD = 'backward' FORWARD = 'forward' + DEFAULT_LIST = [] + DEFAULT_PATH = './' # dump mode ALL = "all" @@ -52,12 +54,13 @@ class Const: ENV_ENABLE = "1" ENV_DISABLE = "0" MAX_SEED_VALUE = 4294967295 # 2**32 - 1 - TASK_LIST = ["tensor", "statistics", "overflow_check", "free_benchmark"] + TASK_LIST = ["tensor", "statistics", "overflow_check", "free_benchmark", "run_ut"] LEVEL_LIST = ["L0", "L1", "L2", "mix"] STATISTICS = "statistics" TENSOR = "tensor" OVERFLOW_CHECK = "overflow_check" FREE_BENCHMARK = "free_benchmark" + RUN_UT = "run_ut" ATTR_NAME_PREFIX = "wrap_" KERNEL_DUMP = "kernel_dump" DATA = "data" diff --git a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/common/config.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/common/config.py index 8e8ceda947..5488f89748 100644 --- a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/common/config.py +++ b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/common/config.py @@ -24,6 +24,7 @@ class Config: def validate(key, value): validators = { 'white_list': list, + 'black_list': list, 'error_data_path': str, 'precision': int } @@ -42,6 +43,15 @@ class Config: if invalid_api: raise ValueError( f"{', '.join(invalid_api)} is not in support_wrap_ops.yaml, please check the white_list") + if key == 'black_list': + if not isinstance(value, list): + raise ValueError("black_list must be a list type") + if not all(isinstance(i, str) for i in value): + raise ValueError("All elements in black_list must be of str type") + invalid_api = [i for i in value if i not in WrapApi] + if invalid_api: + raise ValueError( + f"{', '.join(invalid_api)} is not in support_wrap_ops.yaml, please check the black_list") return value diff --git a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/common/utils.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/common/utils.py index d5d08818a9..b6e8932960 100644 --- a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/common/utils.py +++ b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/common/utils.py @@ -166,6 +166,7 @@ def initialize_save_path(save_path, dir_name): os.mkdir(data_path, mode=FileCheckConst.DATA_DIR_AUTHORITY) data_path_checker = FileChecker(data_path, FileCheckConst.DIR) data_path_checker.common_check() + return data_path def write_pt(file_path, tensor): diff --git a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/config.yaml b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/config.yaml index 7f26c72aa3..2dac535dc0 100644 --- a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/config.yaml +++ b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/config.yaml @@ -1,4 +1,5 @@ white_list: [] +black_list: [] error_data_path: './' precision: 14 \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py index 6295245a26..ae795f2dc4 100644 --- a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +++ b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py @@ -32,6 +32,7 @@ from msprobe.pytorch.common.parse_json import parse_json_info_forward_backward from msprobe.core.common.file_check import FileOpen, FileChecker, \ change_mode, check_file_suffix, check_link, check_path_before_create, create_directory from msprobe.pytorch.common.log import logger +from msprobe.pytorch.pt_config import parse_json_config from msprobe.core.common.const import Const, FileCheckConst, CompareConst current_time = time.strftime("%Y%m%d%H%M%S") @@ -39,7 +40,8 @@ UT_ERROR_DATA_DIR = 'ut_error_data' + current_time RESULT_FILE_NAME = "accuracy_checking_result_" + current_time + ".csv" DETAILS_FILE_NAME = "accuracy_checking_details_" + current_time + ".csv" RunUTConfig = namedtuple('RunUTConfig', ['forward_content', 'backward_content', 'result_csv_path', 'details_csv_path', - 'save_error_data', 'is_continue_run_ut', 'real_data_path']) + 'save_error_data', 'is_continue_run_ut', 'real_data_path', 'white_list', + 'black_list', 'error_data_path']) not_backward_list = ['repeat_interleave'] not_detach_set = {'resize_', 'resize_as_', 'set_', 'transpose_', 't_', 'squeeze_', 'unsqueeze_'} not_raise_dtype_set = {'type_as'} @@ -176,8 +178,7 @@ def run_ut(config): logger.info(f"UT task result will be saved in {config.result_csv_path}") logger.info(f"UT task details will be saved in {config.details_csv_path}") if config.save_error_data: - error_data_path = os.path.abspath(os.path.join(msCheckerConfig.error_data_path, UT_ERROR_DATA_DIR)) - logger.info(f"UT task error_datas will be saved in {error_data_path}") + logger.info(f"UT task error_datas will be saved in {config.error_data_path}") compare = Comparator(config.result_csv_path, config.details_csv_path, config.is_continue_run_ut) with FileOpen(config.result_csv_path, 'r') as file: csv_reader = csv.reader(file) @@ -188,17 +189,17 @@ def run_ut(config): continue if is_unsupported_api(api_full_name): # TODO run_ut does not support to the npu fusion api and distributed api continue + [_, api_name, _] = api_full_name.split(Const.SEP) try: - if msCheckerConfig.white_list: - [_, api_name, _] = api_full_name.split(Const.SEP) - if api_name not in set(msCheckerConfig.white_list): - continue + if msCheckerConfig.black_list and api_name in config.black_list: + continue + if msCheckerConfig.white_list and api_name not in config.white_list: + continue data_info = run_torch_api(api_full_name, config.real_data_path, config.backward_content, api_info_dict) is_fwd_success, is_bwd_success = compare.compare_output(api_full_name, data_info) if config.save_error_data: - do_save_error_data(api_full_name, data_info, is_fwd_success, is_bwd_success) + do_save_error_data(api_full_name, data_info, config.error_data_path, is_fwd_success, is_bwd_success) except Exception as err: - [_, api_name, _] = api_full_name.split(Const.SEP) if "expected scalar type Long" in str(err): logger.warning(f"API {api_name} not support int32 tensor in CPU, please add {api_name} to CONVERT_API " f"'int32_to_int64' list in accuracy_tools/api_accuracy_check/common/utils.py file.") @@ -227,16 +228,16 @@ def is_unsupported_api(api_name): return flag -def do_save_error_data(api_full_name, data_info, is_fwd_success, is_bwd_success): +def do_save_error_data(api_full_name, data_info, error_data_path, is_fwd_success, is_bwd_success): if not is_fwd_success or not is_bwd_success: - processor = UtDataProcessor(os.path.join(msCheckerConfig.error_data_path, UT_ERROR_DATA_DIR)) + processor = UtDataProcessor(error_data_path) for element in data_info.in_fwd_data_list: processor.save_tensors_in_element(api_full_name + '.forward.input', element) - processor.save_tensors_in_element(api_full_name + '.forward.output.bench', data_info.bench_out) - processor.save_tensors_in_element(api_full_name + '.forward.output.device', data_info.device_out) + processor.save_tensors_in_element(api_full_name + '.forward.output.bench', data_info.bench_output) + processor.save_tensors_in_element(api_full_name + '.forward.output.device', data_info.device_output) processor.save_tensors_in_element(api_full_name + '.backward.input', data_info.grad_in) - processor.save_tensors_in_element(api_full_name + '.backward.output.bench', data_info.bench_grad_out) - processor.save_tensors_in_element(api_full_name + '.backward.output.device', data_info.device_grad_out) + processor.save_tensors_in_element(api_full_name + '.backward.output.bench', data_info.bench_grad) + processor.save_tensors_in_element(api_full_name + '.backward.output.device', data_info.device_grad) def run_torch_api(api_full_name, real_data_path, backward_content, api_info_dict): @@ -314,14 +315,14 @@ def run_backward(args, grad, grad_index, out): return grad_out -def initialize_save_error_data(): - error_data_path = msCheckerConfig.error_data_path +def initialize_save_error_data(error_data_path): check_path_before_create(error_data_path) create_directory(error_data_path) - error_data_path_checker = FileChecker(msCheckerConfig.error_data_path, FileCheckConst.DIR, + error_data_path_checker = FileChecker(error_data_path, FileCheckConst.DIR, ability=FileCheckConst.WRITE_ABLE) error_data_path = error_data_path_checker.common_check() - initialize_save_path(error_data_path, UT_ERROR_DATA_DIR) + error_data_path =initialize_save_path(error_data_path, UT_ERROR_DATA_DIR) + return error_data_path def get_validated_result_csv_path(result_csv_path, mode): @@ -384,6 +385,8 @@ def _run_ut_parser(parser): required=False) parser.add_argument("-f", "--filter_api", dest="filter_api", action="store_true", help=" Whether to filter the api in the api_info_file.", required=False) + parser.add_argument("-config", "--config_path", dest="config_path", default="", type=str, + help=" The path of config.json", required=False) def preprocess_forward_content(forward_content): @@ -464,14 +467,22 @@ def run_ut_command(args): if args.result_csv_path: result_csv_path = get_validated_result_csv_path(args.result_csv_path, 'result') details_csv_path = get_validated_details_csv_path(result_csv_path) + white_list = msCheckerConfig.white_list + black_list = msCheckerConfig.black_list + error_data_path = msCheckerConfig.error_data_path + if args.config_path: + _, task_config = parse_json_config(args.config_path, Const.RUN_UT) + white_list = task_config.white_list + black_list = task_config.black_list + error_data_path = task_config.error_data_path if save_error_data: if args.result_csv_path: time_info = result_csv_path.split('.')[0].split('_')[-1] global UT_ERROR_DATA_DIR UT_ERROR_DATA_DIR = 'ut_error_data' + time_info - initialize_save_error_data() + error_data_path = initialize_save_error_data(error_data_path) run_ut_config = RunUTConfig(forward_content, backward_content, result_csv_path, details_csv_path, save_error_data, - args.result_csv_path, real_data_path) + args.result_csv_path, real_data_path, set(white_list), set(black_list), error_data_path) run_ut(run_ut_config) diff --git a/debug/accuracy_tools/msprobe/pytorch/pt_config.py b/debug/accuracy_tools/msprobe/pytorch/pt_config.py index ff09bfd8e9..f373becdea 100644 --- a/debug/accuracy_tools/msprobe/pytorch/pt_config.py +++ b/debug/accuracy_tools/msprobe/pytorch/pt_config.py @@ -4,6 +4,10 @@ import os from msprobe.core.common_config import CommonConfig, BaseConfig from msprobe.core.common.file_check import FileOpen from msprobe.core.common.const import Const +from msprobe.pytorch.hook_module.utils import WrapFunctionalOps, WrapTensorOps, WrapTorchOps + + +WrapApi = set(WrapFunctionalOps) | set(WrapTensorOps) | set(WrapTorchOps) class TensorConfig(BaseConfig): @@ -61,6 +65,43 @@ class FreeBenchmarkCheckConfig(BaseConfig): if self.preheat_step and self.preheat_step == 0: raise Exception("preheat_step cannot be 0") + +class RunUTConfig(BaseConfig): + def __init__(self, json_config): + super().__init__(json_config) + self.white_list = json_config.get("white_list", Const.DEFAULT_LIST) + self.black_list = json_config.get("black_list", Const.DEFAULT_LIST) + self.error_data_path = json_config.get("error_data_path", Const.DEFAULT_PATH) + self.check_run_ut_config() + + def check_run_ut_config(self): + self.check_white_list_config() + self.check_black_list_config() + self.check_error_data_path_config() + + def check_white_list_config(self): + if not isinstance(self.white_list, list): + raise Exception("white_list must be a list type") + if not all(isinstance(item, str) for item in self.white_list): + raise Exception("All elements in white_list must be string type") + invalid_api = [item for item in self.white_list if item not in WrapApi] + if invalid_api: + raise Exception("Invalid api in white_list: {}".format(invalid_api)) + + def check_black_list_config(self): + if not isinstance(self.black_list, list): + raise Exception("black_list must be a list type") + if not all(isinstance(item, str) for item in self.black_list): + raise Exception("All elements in black_list must be string type") + invalid_api = [item for item in self.black_list if item not in WrapApi] + if invalid_api: + raise Exception("Invalid api in black_list: {}".format(invalid_api)) + + def check_error_data_path_config(self): + if not os.path.exists(self.error_data_path): + raise Exception("error_data_path: %s is not exist", self.error_data_path) + + def parse_task_config(task, json_config): default_dic = {} if task == Const.TENSOR: -- Gitee From 2215e12b5ff581e0bc5c6cbb89a55ce37c313da5 Mon Sep 17 00:00:00 2001 From: cai-weiwei1989 <734267852@qq.com> Date: Mon, 22 Jul 2024 10:32:56 +0800 Subject: [PATCH 030/106] =?UTF-8?q?[accuracy=5Ftools]atat=E5=B7=A5?= =?UTF-8?q?=E5=85=B7=E6=94=B9=E5=90=8D=E4=B8=BAmaprobe=E8=B5=84=E6=96=99?= =?UTF-8?q?=E4=BC=98=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 14 ++++++++++++-- debug/accuracy_tools/README.md | 3 ++- ...237\272\347\272\277\346\212\245\345\221\212.md" | 0 3 files changed, 14 insertions(+), 3 deletions(-) rename "debug/accuracy_tools/msprobe/pytorch/doc/atat\347\262\276\345\272\246\345\267\245\345\205\267\346\225\260\346\215\256dump\346\240\207\345\207\206\346\200\247\350\203\275\345\237\272\347\272\277\346\212\245\345\221\212.md" => "debug/accuracy_tools/msprobe/pytorch/doc/msprobe\347\262\276\345\272\246\345\267\245\345\205\267\346\225\260\346\215\256dump\346\240\207\345\207\206\346\200\247\350\203\275\345\237\272\347\272\277\346\212\245\345\221\212.md" (100%) diff --git a/README.md b/README.md index 014a4d59f0..1b097fdfd5 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,7 @@ # 变更通知 + + 原Ascend Training Tools工具更名为MindStudio Training Tools,MindStudio训练工具链。变更计划如下: 1. 2024.06.25本代码仓名称变更为mstt。 @@ -34,11 +36,19 @@ MindStudio Training Tools,MindStudio训练工具链。针对训练&大模型 ### [精度工具](https://gitee.com/ascend/mstt/tree/master/debug/accuracy_tools) -1. [api_accuracy_checker(Ascend模型精度预检工具)](https://gitee.com/ascend/mstt/tree/master/debug/accuracy_tools/api_accuracy_checker) +1. [MindStudio Probe( MindStudio精度调试工具)](https://gitee.com/cai-weiwei1989/att_all/tree/master/debug/accuracy_tools/msprobe)。 + + MindStudio Training Tools工具链下精度调试部分的工具包,主要包括精度预检和精度比对等子工具。 + +2. [api_accuracy_checker(Ascend模型精度预检工具)](https://gitee.com/ascend/mstt/tree/master/debug/accuracy_tools/api_accuracy_checker) + + 2024.09.30下线 在昇腾NPU上扫描用户训练模型中所有API,进行API复现,给出精度情况的诊断和分析。 -2. [ptdbg_ascend(PyTorch精度工具)](https://gitee.com/ascend/mstt/tree/master/debug/accuracy_tools/ptdbg_ascend) +3. [ptdbg_ascend(PyTorch精度工具)](https://gitee.com/ascend/mstt/tree/master/debug/accuracy_tools/ptdbg_ascend) + + 2024.09.30下线 进行PyTorch整网API粒度的数据dump、精度比对和溢出检测,从而定位PyTorch训练场景下的精度问题。 diff --git a/debug/accuracy_tools/README.md b/debug/accuracy_tools/README.md index 962736908f..0445a00066 100644 --- a/debug/accuracy_tools/README.md +++ b/debug/accuracy_tools/README.md @@ -4,10 +4,11 @@ MindStudio Training Tools工具针对模型训练精度问题设计推出了一 ### 子功能介绍 -NPU上训练的网络存在精度问题,精度指标(loss或者具体的评价指标)与标杆相差较多。对于该场景的问题,可以使用**Ascend模型精度预检工具**或者**PyTorch精度工具**进行定位。 +NPU上训练的网络存在精度问题,精度指标(loss或者具体的评价指标)与标杆相差较多。对于该场景的问题,可以使用**MindStudio Probe工具**的精度预检工具和精度比对工具或**Ascend模型精度预检工具**、**PyTorch精度工具**进行定位。 | 工具名称 | 说明 | | ------------------------------------------------------------ | ------------------------------------------------------------ | +| [MindStudio Probe( MindStudio精度调试工具)](https://gitee.com/cai-weiwei1989/att_all/tree/master/debug/accuracy_tools/msprobe) | MindStudio Training Tools工具链下精度调试部分的工具包,主要包括精度预检和精度比对等子工具。 | | [api_accuracy_checker(Ascend模型精度预检工具)](https://gitee.com/ascend/mstt/tree/master/debug/accuracy_tools/api_accuracy_checker) | 在昇腾NPU上扫描用户训练模型中所有API,进行API复现,给出精度情况的诊断和分析。 | | [ptdbg_ascend(PyTorch精度工具)](https://gitee.com/ascend/mstt/tree/master/debug/accuracy_tools/ptdbg_ascend) | 进行PyTorch整网API粒度的数据dump、精度比对和溢出检测,从而定位PyTorch训练场景下的精度问题。 | diff --git "a/debug/accuracy_tools/msprobe/pytorch/doc/atat\347\262\276\345\272\246\345\267\245\345\205\267\346\225\260\346\215\256dump\346\240\207\345\207\206\346\200\247\350\203\275\345\237\272\347\272\277\346\212\245\345\221\212.md" "b/debug/accuracy_tools/msprobe/pytorch/doc/msprobe\347\262\276\345\272\246\345\267\245\345\205\267\346\225\260\346\215\256dump\346\240\207\345\207\206\346\200\247\350\203\275\345\237\272\347\272\277\346\212\245\345\221\212.md" similarity index 100% rename from "debug/accuracy_tools/msprobe/pytorch/doc/atat\347\262\276\345\272\246\345\267\245\345\205\267\346\225\260\346\215\256dump\346\240\207\345\207\206\346\200\247\350\203\275\345\237\272\347\272\277\346\212\245\345\221\212.md" rename to "debug/accuracy_tools/msprobe/pytorch/doc/msprobe\347\262\276\345\272\246\345\267\245\345\205\267\346\225\260\346\215\256dump\346\240\207\345\207\206\346\200\247\350\203\275\345\237\272\347\272\277\346\212\245\345\221\212.md" -- Gitee From 0c7fa5f0cb6a04cff95e2564882abb7c86cc335e Mon Sep 17 00:00:00 2001 From: cai-weiwei1989 <734267852@qq.com> Date: Mon, 22 Jul 2024 10:33:47 +0800 Subject: [PATCH 031/106] =?UTF-8?q?[accuracy=5Ftools]atat=E5=B7=A5?= =?UTF-8?q?=E5=85=B7=E6=94=B9=E5=90=8D=E4=B8=BAmaprobe=E8=B5=84=E6=96=99?= =?UTF-8?q?=E4=BC=98=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ...50\203\275\345\237\272\347\272\277\346\212\245\345\221\212.md" | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename "debug/accuracy_tools/msprobe/pytorch/doc/msprobe\347\262\276\345\272\246\345\267\245\345\205\267\346\225\260\346\215\256dump\346\240\207\345\207\206\346\200\247\350\203\275\345\237\272\347\272\277\346\212\245\345\221\212.md" => "debug/accuracy_tools/msprobe/pytorch/doc/atat\347\262\276\345\272\246\345\267\245\345\205\267\346\225\260\346\215\256dump\346\240\207\345\207\206\346\200\247\350\203\275\345\237\272\347\272\277\346\212\245\345\221\212.md" (100%) diff --git "a/debug/accuracy_tools/msprobe/pytorch/doc/msprobe\347\262\276\345\272\246\345\267\245\345\205\267\346\225\260\346\215\256dump\346\240\207\345\207\206\346\200\247\350\203\275\345\237\272\347\272\277\346\212\245\345\221\212.md" "b/debug/accuracy_tools/msprobe/pytorch/doc/atat\347\262\276\345\272\246\345\267\245\345\205\267\346\225\260\346\215\256dump\346\240\207\345\207\206\346\200\247\350\203\275\345\237\272\347\272\277\346\212\245\345\221\212.md" similarity index 100% rename from "debug/accuracy_tools/msprobe/pytorch/doc/msprobe\347\262\276\345\272\246\345\267\245\345\205\267\346\225\260\346\215\256dump\346\240\207\345\207\206\346\200\247\350\203\275\345\237\272\347\272\277\346\212\245\345\221\212.md" rename to "debug/accuracy_tools/msprobe/pytorch/doc/atat\347\262\276\345\272\246\345\267\245\345\205\267\346\225\260\346\215\256dump\346\240\207\345\207\206\346\200\247\350\203\275\345\237\272\347\272\277\346\212\245\345\221\212.md" -- Gitee From d84c56ebd843b1fb5f97fa60b744a29c3fb9f514 Mon Sep 17 00:00:00 2001 From: gitee Date: Mon, 22 Jul 2024 11:11:05 +0800 Subject: [PATCH 032/106] fix bug --- debug/accuracy_tools/msprobe/pytorch/pt_config.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/debug/accuracy_tools/msprobe/pytorch/pt_config.py b/debug/accuracy_tools/msprobe/pytorch/pt_config.py index f373becdea..b206564420 100644 --- a/debug/accuracy_tools/msprobe/pytorch/pt_config.py +++ b/debug/accuracy_tools/msprobe/pytorch/pt_config.py @@ -116,6 +116,9 @@ def parse_task_config(task, json_config): elif task == Const.FREE_BENCHMARK: config_dic = json_config.get(Const.FREE_BENCHMARK) if json_config.get(Const.FREE_BENCHMARK) else default_dic return FreeBenchmarkCheckConfig(config_dic) + elif task == Const.RUN_UT: + config_dic = json_config.get(Const.RUN_UT) if json_config.get(Const.RUN_UT) else default_dic + return RunUTConfig(config_dic) else: return StatisticsConfig(default_dic) -- Gitee From 1e8cc5778bdabaca1298d321542bbf2e5315a619 Mon Sep 17 00:00:00 2001 From: gitee Date: Mon, 22 Jul 2024 11:21:56 +0800 Subject: [PATCH 033/106] fix bug --- .../msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py index ae795f2dc4..30994f7094 100644 --- a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +++ b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py @@ -191,9 +191,9 @@ def run_ut(config): continue [_, api_name, _] = api_full_name.split(Const.SEP) try: - if msCheckerConfig.black_list and api_name in config.black_list: + if config.black_list and api_name in config.black_list: continue - if msCheckerConfig.white_list and api_name not in config.white_list: + if config.white_list and api_name not in config.white_list: continue data_info = run_torch_api(api_full_name, config.real_data_path, config.backward_content, api_info_dict) is_fwd_success, is_bwd_success = compare.compare_output(api_full_name, data_info) -- Gitee From 32a892fba6709cd9bd98a2a520bfab649fc8e12f Mon Sep 17 00:00:00 2001 From: cai-weiwei1989 <734267852@qq.com> Date: Mon, 22 Jul 2024 10:40:58 +0800 Subject: [PATCH 034/106] =?UTF-8?q?[accuracy=5Ftools]atat=E5=B7=A5?= =?UTF-8?q?=E5=85=B7=E6=94=B9=E5=90=8D=E4=B8=BAmaprobe=E8=B5=84=E6=96=99?= =?UTF-8?q?=E4=BC=98=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 2 +- debug/accuracy_tools/README.md | 10 +++++----- ...345\237\272\347\272\277\346\212\245\345\221\212.md" | 0 3 files changed, 6 insertions(+), 6 deletions(-) rename "debug/accuracy_tools/msprobe/pytorch/doc/atat\347\262\276\345\272\246\345\267\245\345\205\267\346\225\260\346\215\256dump\346\240\207\345\207\206\346\200\247\350\203\275\345\237\272\347\272\277\346\212\245\345\221\212.md" => "debug/accuracy_tools/msprobe/pytorch/doc/msprobe\347\262\276\345\272\246\345\267\245\345\205\267\346\225\260\346\215\256dump\346\240\207\345\207\206\346\200\247\350\203\275\345\237\272\347\272\277\346\212\245\345\221\212.md" (100%) diff --git a/README.md b/README.md index 1b097fdfd5..964ac19ee5 100644 --- a/README.md +++ b/README.md @@ -36,7 +36,7 @@ MindStudio Training Tools,MindStudio训练工具链。针对训练&大模型 ### [精度工具](https://gitee.com/ascend/mstt/tree/master/debug/accuracy_tools) -1. [MindStudio Probe( MindStudio精度调试工具)](https://gitee.com/cai-weiwei1989/att_all/tree/master/debug/accuracy_tools/msprobe)。 +1. [MindStudio Probe( MindStudio精度调试工具)](https://gitee.com/ascend/mstt/tree/master/debug/accuracy_tools/msprobe)。 MindStudio Training Tools工具链下精度调试部分的工具包,主要包括精度预检和精度比对等子工具。 diff --git a/debug/accuracy_tools/README.md b/debug/accuracy_tools/README.md index 0445a00066..e28c1d34ad 100644 --- a/debug/accuracy_tools/README.md +++ b/debug/accuracy_tools/README.md @@ -4,18 +4,18 @@ MindStudio Training Tools工具针对模型训练精度问题设计推出了一 ### 子功能介绍 -NPU上训练的网络存在精度问题,精度指标(loss或者具体的评价指标)与标杆相差较多。对于该场景的问题,可以使用**MindStudio Probe工具**的精度预检工具和精度比对工具或**Ascend模型精度预检工具**、**PyTorch精度工具**进行定位。 +NPU上训练的网络存在精度问题,精度指标(loss或者具体的评价指标)与标杆相差较多。对于该场景的问题,可以使用**MindStudio Probe工具**的**精度预检工具**和**精度比对工具**进行定位。 | 工具名称 | 说明 | | ------------------------------------------------------------ | ------------------------------------------------------------ | -| [MindStudio Probe( MindStudio精度调试工具)](https://gitee.com/cai-weiwei1989/att_all/tree/master/debug/accuracy_tools/msprobe) | MindStudio Training Tools工具链下精度调试部分的工具包,主要包括精度预检和精度比对等子工具。 | +| [MindStudio Probe( MindStudio精度调试工具)](https://gitee.com/ascend/mstt/tree/master/debug/accuracy_tools/msprobe) | MindStudio Training Tools工具链下精度调试部分的工具包,主要包括精度预检和精度比对等子工具。 | | [api_accuracy_checker(Ascend模型精度预检工具)](https://gitee.com/ascend/mstt/tree/master/debug/accuracy_tools/api_accuracy_checker) | 在昇腾NPU上扫描用户训练模型中所有API,进行API复现,给出精度情况的诊断和分析。 | | [ptdbg_ascend(PyTorch精度工具)](https://gitee.com/ascend/mstt/tree/master/debug/accuracy_tools/ptdbg_ascend) | 进行PyTorch整网API粒度的数据dump、精度比对和溢出检测,从而定位PyTorch训练场景下的精度问题。 | ### 场景介绍 -**Ascend模型精度预检工具**会对全网每一个API根据其实际训练中的shape、dtype和数值范围生成随机的输入,对比它与标杆的输出差异,并指出输出差异过大不符合精度标准的API。该工具检查单API精度问题准确率超过80%,对比一般dump比对方法减少落盘数据量99%以上。具体使用请参见《[Ascend模型精度预检工具](https://gitee.com/ascend/mstt/blob/master/debug/accuracy_tools/api_accuracy_checker/README.md)》 +**精度预检工具**会对全网每一个API根据其实际训练中的shape、dtype和数值范围生成随机的输入,对比它与标杆的输出差异,并指出输出差异过大不符合精度标准的API。该工具检查单API精度问题准确率超过80%,对比一般dump比对方法减少落盘数据量99%以上。 -**PyTorch精度工具精度比对功能**可以对NPU整网API数据进行与CPU或GPU标杆数据的精度比对,从而检测精度问题。具体来说,dump统计量、分段dump、模块化dump,通讯算子dump等功能可以用较轻的数据量实现不同侧重的精度比对,从而定位精度问题。具体使用请参见《[ptdbg_ascend精度工具功能说明](https://gitee.com/ascend/mstt/tree/master/debug/accuracy_tools/ptdbg_ascend/doc)》。 +**精度比对功能**可以对NPU整网API数据进行与CPU或GPU标杆数据的精度比对,从而检测精度问题。具体来说,dump统计量、分段dump、模块化dump,通讯算子dump等功能可以用较轻的数据量实现不同侧重的精度比对,从而定位精度问题。 -**PyTorch精度工具溢出检测功能**是在判断训练网络可能存在溢出现象时,例如某个step的loss突然变成inf nan,或者混精场景下loss_scale不断减小,可以通过ptdbg_ascend的精度检测工具检测API的溢出情况。具体使用请参见《[ptdbg_ascend精度工具功能说明](https://gitee.com/ascend/mstt/tree/master/debug/accuracy_tools/ptdbg_ascend/doc)》。 \ No newline at end of file +**溢出检测功能**是在判断训练网络可能存在溢出现象时,例如某个step的loss突然变成inf nan,或者混精场景下loss_scale不断减小,可以通过ptdbg_ascend的精度检测工具检测API的溢出情况。 \ No newline at end of file diff --git "a/debug/accuracy_tools/msprobe/pytorch/doc/atat\347\262\276\345\272\246\345\267\245\345\205\267\346\225\260\346\215\256dump\346\240\207\345\207\206\346\200\247\350\203\275\345\237\272\347\272\277\346\212\245\345\221\212.md" "b/debug/accuracy_tools/msprobe/pytorch/doc/msprobe\347\262\276\345\272\246\345\267\245\345\205\267\346\225\260\346\215\256dump\346\240\207\345\207\206\346\200\247\350\203\275\345\237\272\347\272\277\346\212\245\345\221\212.md" similarity index 100% rename from "debug/accuracy_tools/msprobe/pytorch/doc/atat\347\262\276\345\272\246\345\267\245\345\205\267\346\225\260\346\215\256dump\346\240\207\345\207\206\346\200\247\350\203\275\345\237\272\347\272\277\346\212\245\345\221\212.md" rename to "debug/accuracy_tools/msprobe/pytorch/doc/msprobe\347\262\276\345\272\246\345\267\245\345\205\267\346\225\260\346\215\256dump\346\240\207\345\207\206\346\200\247\350\203\275\345\237\272\347\272\277\346\212\245\345\221\212.md" -- Gitee From 32aa28126de7a2e0e69a8b0309ab259be8abe65f Mon Sep 17 00:00:00 2001 From: cai-weiwei1989 <734267852@qq.com> Date: Mon, 22 Jul 2024 14:50:38 +0800 Subject: [PATCH 035/106] =?UTF-8?q?[profiler]ascend=20insight=E5=B7=A5?= =?UTF-8?q?=E5=85=B7=E6=94=B9=E5=90=8D=E4=B8=BAMindStudio=20Insight?= =?UTF-8?q?=E8=B5=84=E6=96=99=E9=80=82=E9=85=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 2 +- profiler/cluster_analyse/README.md | 12 ++++++------ profiler/compare_tools/README.md | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 014a4d59f0..dd25d20158 100644 --- a/README.md +++ b/README.md @@ -50,7 +50,7 @@ MindStudio Training Tools,MindStudio训练工具链。针对训练&大模型 2. [cluster_analyse(集群分析工具)](https://gitee.com/ascend/mstt/tree/master/profiler/cluster_analyse) - 提供多机多卡的集群分析能力(基于通信域的通信分析和迭代耗时分析), 当前需要配合Ascend Insight的集群分析功能使用。 + 提供多机多卡的集群分析能力(基于通信域的通信分析和迭代耗时分析), 当前需要配合MindStudio Insight的集群分析功能使用。 3. [affinity_cpu_bind (亲和性cpu绑核工具) ](https://gitee.com/ascend/mstt/tree/master/profiler/affinity_cpu_bind) diff --git a/profiler/cluster_analyse/README.md b/profiler/cluster_analyse/README.md index deaebb6cde..fdd43ca965 100644 --- a/profiler/cluster_analyse/README.md +++ b/profiler/cluster_analyse/README.md @@ -86,7 +86,7 @@ experimental_config = torch_npu.profiler._ExperimentalConfig( ### 交付件 -集群分析工具的交付件通过Ascend Insight工具展示,详见《[MindStudio Ascend Insight用户指南](https://www.hiascend.com/document/detail/zh/mindstudio/70RC1/GUI-baseddevelopmenttool/msascendinsightug/AscendInsight_0002.html)》。 +集群分析工具的交付件通过MindStudio Insight工具展示,详见《[MindStudio Insight用户指南](https://www.hiascend.com/document/detail/zh/mindstudio/70RC2/GUI-baseddevelopmenttool/msascendinsightug/AscendInsight_0002.html)》。 #### cluster_step_trace_time.csv @@ -156,25 +156,25 @@ L列:Preparing,指迭代开始到首个计算或通信算子运行的时间 #### cluster_analysis.db -解析analysis.db或ascend_pytorch_profiler_{rank_id}.db生成的交付件,根据数据解析模式不同而解析不同的数据,可以使用Ascend Insight工具展示。 +解析analysis.db或ascend_pytorch_profiler_{rank_id}.db生成的交付件,根据数据解析模式不同而解析不同的数据,可以使用MindStudio Insight工具展示。 #### stats.ipynb - 数据解析模式为cann_api_sum时生成,保存在cluster_analysis_output/CannApiSum目录下。 - 可使用jupyter notebook工具或Ascend Insight工具打开,主要展示集群API耗时信息。 + 可使用jupyter notebook工具或MindStudio Insight工具打开,主要展示集群API耗时信息。 - 数据解析模式为compute_op_sum时生成,保存在cluster_analysis_output/ComputeOpSum目录下。 - 可使用jupyter notebook工具或Ascend Insight工具打开,主要展示集群计算算子耗时分析(将集群所有计算算子进行汇总并以图表展示),集群Rank计算算子耗时分析(将每个Rank的计算算子进行各自汇总)。 + 可使用jupyter notebook工具或MindStudio Insight工具打开,主要展示集群计算算子耗时分析(将集群所有计算算子进行汇总并以图表展示),集群Rank计算算子耗时分析(将每个Rank的计算算子进行各自汇总)。 - 数据解析模式为hccl_sum时生成,保存在cluster_analysis_output/HcclSum目录下。 - 可使用jupyter notebook工具或Ascend Insight工具打开,主要展示集群通信算子耗时分析(将集群所有通信算子进行汇总并以图表展示),集群Rank通信算子耗时分析(将每个Rank的通信算子进行各自汇总)、Top通信算子信息展示。 + 可使用jupyter notebook工具或MindStudio Insight工具打开,主要展示集群通信算子耗时分析(将集群所有通信算子进行汇总并以图表展示),集群Rank通信算子耗时分析(将每个Rank的通信算子进行各自汇总)、Top通信算子信息展示。 - 数据解析模式为mstx_sum时生成,保存在cluster_analysis_output/MstxSum目录下。 - 可使用jupyter notebook工具或Ascend Insight工具打开,主要展示集群场景mstx打点信息,分为框架侧、CANN侧和Device侧三部分的打点信息。 + 可使用jupyter notebook工具或MindStudio Insight工具打开,主要展示集群场景mstx打点信息,分为框架侧、CANN侧和Device侧三部分的打点信息。 diff --git a/profiler/compare_tools/README.md b/profiler/compare_tools/README.md index d81ce05f44..78ea5d8971 100644 --- a/profiler/compare_tools/README.md +++ b/profiler/compare_tools/README.md @@ -213,7 +213,7 @@ activities配置仅采集NPU数据,不配置experimental_config参数以及其 - 当Computing Time耗时增大,分析**算子性能**。 - 当Uncovered Communication Time耗时增大,分析**通信性能**,若通信性能分析没有劣化的通信算子,代表通信与计算的并行度较差,继续进行NPU的集群性能分析。 -- 当Mem Usage增大,分析**算子内存**,若没有明显占用较大的算子,则代表算子内存申请量没有差异,问题在于内存的释放(持有时间过久),可以使用tensorboard或ascend insight继续进行NPU内存的分析。 +- 当Mem Usage增大,分析**算子内存**,若没有明显占用较大的算子,则代表算子内存申请量没有差异,问题在于内存的释放(持有时间过久),可以使用TensorBoard或MindStudio insight继续进行NPU内存的分析。 ### 算子性能 -- Gitee From ac5da75a583cf23db22d068e211e1daeb515172b Mon Sep 17 00:00:00 2001 From: qianzhengxin Date: Mon, 22 Jul 2024 15:28:50 +0800 Subject: [PATCH 036/106] review fix --- .../grad_tool/grad_ms/grad_stat_csv.py | 6 +++--- debug/accuracy_tools/grad_tool/grad_ms/hook.py | 15 ++++++++++----- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/debug/accuracy_tools/grad_tool/grad_ms/grad_stat_csv.py b/debug/accuracy_tools/grad_tool/grad_ms/grad_stat_csv.py index 1ebaa65827..316033fdc6 100644 --- a/debug/accuracy_tools/grad_tool/grad_ms/grad_stat_csv.py +++ b/debug/accuracy_tools/grad_tool/grad_ms/grad_stat_csv.py @@ -16,14 +16,14 @@ class GradStatCsv: csv = {} @staticmethod - def generate_csv_header(level, csv_input): + def get_csv_header(level, csv_input): header = ["param_name"] for key in level["header"]: header.extend(GradStatCsv.csv[key].generate_csv_header(csv_input)) return header @staticmethod - def generate_csv_line(level, csv_input): + def get_csv_line(level, csv_input): line = [csv_input.param_name] for key in level["header"]: line.extend(GradStatCsv.csv[key].generate_csv_content(csv_input)) @@ -67,7 +67,7 @@ class CsvDistribution(CsvItem): def generate_csv_header(csv_input): bounds = csv_input.bounds intervals = [] - for i, _ in enumerate(bounds): + for i in range(len(bounds)): if i == 0: intervals.append(f"(-inf, {bounds[i]}]") else: diff --git a/debug/accuracy_tools/grad_tool/grad_ms/hook.py b/debug/accuracy_tools/grad_tool/grad_ms/hook.py index 2d6034c589..f0d4798182 100644 --- a/debug/accuracy_tools/grad_tool/grad_ms/hook.py +++ b/debug/accuracy_tools/grad_tool/grad_ms/hook.py @@ -18,15 +18,20 @@ from grad_tool.grad_ms.grad_stat_csv import GradStatCsv, CsvInput from grad_tool.grad_ms.utils import save_grad_direction, get_adapted_level class HookInput: + + ''' + HookInput is a class wrapping all the variables used for hooking optimizer + ''' + def __init__(self, opt) -> None: self.func = opt.construct self.g_names = [param.name for param in opt._parameters] self.param_list = grad_context.get_context(GradConst.PARAM_LIST) self.rank_id = get_rank_id() output_path = grad_context.get_context(GradConst.OUTPUT_PATH) - self.dump_dir = f"{output_path}/rank_{self.rank_id}/Dump/" - self.save_dir = f"{output_path}/rank_{self.rank_id}/" - self.step_finish_flag = f"{output_path}/rank_{self.rank_id}/Dump/{GradConst.STEP_FINISH}" + self.dump_dir = os.path.join(output_path, f"rank_{self.rank_id}", "Dump") + self.save_dir = os.path.join(output_path, f"rank_{self.rank_id}") + self.step_finish_flag = os.path.join(self.dump_dir, GradConst.STEP_FINISH) if os.path.exists(self.save_dir): print_warn_log(f"Delete existing path {self.save_dir}.") shutil.rmtree(self.save_dir) @@ -64,14 +69,14 @@ def hook_pynative_optimizer(opt, hook_input): if hook_input.param_list and param_name not in hook_input.param_list: continue csv_input = CsvInput(param_name, grad_value, hook_input.bounds) - grad_info = GradStatCsv.generate_csv_line(level_adapted, csv_input) + grad_info = GradStatCsv.get_csv_line(level_adapted, csv_input) output_lines.append(grad_info) if level_adapted["have_grad_direction"]: save_grad_direction(param_name, grad_value, os.path.join(hook_input.save_dir, f'step_{cur_step}')) output_csv_path = os.path.join(hook_input.save_dir, f"grad_summary_{cur_step}.csv") dummy_csv_input = CsvInput(None, None, hook_input.bounds) write_csv(output_csv_path, output_lines, - GradStatCsv.generate_csv_header(level_adapted, dummy_csv_input)) + GradStatCsv.get_csv_header(level_adapted, dummy_csv_input)) grad_context.update_step() opt.register_forward_pre_hook(hook_fn) -- Gitee From ba53cb734ab5ddbf772b890e211453d3225365a3 Mon Sep 17 00:00:00 2001 From: zhangjian <1032674385@qq.com> Date: Mon, 22 Jul 2024 15:39:42 +0800 Subject: [PATCH 037/106] add const for online_dispatch --- debug/accuracy_tools/msprobe/core/common/const.py | 2 ++ debug/accuracy_tools/msprobe/pytorch/online_dispatch/compare.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/debug/accuracy_tools/msprobe/core/common/const.py b/debug/accuracy_tools/msprobe/core/common/const.py index dea829c3ff..34b1e774fa 100644 --- a/debug/accuracy_tools/msprobe/core/common/const.py +++ b/debug/accuracy_tools/msprobe/core/common/const.py @@ -25,6 +25,8 @@ class Const: API_LIST = "api_list" API_STACK = "api_stack" DUMP_MODE = [ALL, LIST, RANGE, STACK, ACL, API_LIST, API_STACK] + AUTO = "auto" + ONLINE_DUMP_MODE = [ALL, LIST, AUTO, OFF] SUMMARY = "summary" MD5 = "md5" SUMMARY_MODE = [ALL, SUMMARY, MD5] diff --git a/debug/accuracy_tools/msprobe/pytorch/online_dispatch/compare.py b/debug/accuracy_tools/msprobe/pytorch/online_dispatch/compare.py index 19c18c124b..048ab3f901 100644 --- a/debug/accuracy_tools/msprobe/pytorch/online_dispatch/compare.py +++ b/debug/accuracy_tools/msprobe/pytorch/online_dispatch/compare.py @@ -228,7 +228,7 @@ class Comparator: else: is_bwd_success, bwd_compare_alg_results = True, None if is_bwd_success and bwd_compare_alg_results is None: - self.saver.record_results(ResultInfo(api_name, is_fwd_success, CompareConst.NA, fwd_compare_alg_results, + self.saver.record_results(ResultInfo(api_name, is_fwd_success, CompareConst.NAN, fwd_compare_alg_results, bwd_compare_alg_results)) else: self.saver.record_results(ResultInfo(api_name, is_fwd_success, is_bwd_success, fwd_compare_alg_results, -- Gitee From c32e65ab11b66e4c01f2e52561648ec2a3b50fc9 Mon Sep 17 00:00:00 2001 From: h00613304 Date: Mon, 22 Jul 2024 15:52:40 +0800 Subject: [PATCH 038/106] =?UTF-8?q?=E8=A1=A5=E5=85=85=E6=95=B4=E7=BD=91?= =?UTF-8?q?=E6=AF=94=E5=AF=B9compare=E9=83=A8=E5=88=86=E5=86=92=E7=83=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../pytorch_ut/compare/test_acc_compare.py | 260 +++++++++++++++++- .../test/pytorch_ut/compare/test_match.py | 20 ++ 2 files changed, 275 insertions(+), 5 deletions(-) create mode 100644 debug/accuracy_tools/atat/test/pytorch_ut/compare/test_match.py diff --git a/debug/accuracy_tools/atat/test/pytorch_ut/compare/test_acc_compare.py b/debug/accuracy_tools/atat/test/pytorch_ut/compare/test_acc_compare.py index 5a82289a00..625bafd8e9 100644 --- a/debug/accuracy_tools/atat/test/pytorch_ut/compare/test_acc_compare.py +++ b/debug/accuracy_tools/atat/test/pytorch_ut/compare/test_acc_compare.py @@ -1,17 +1,267 @@ # coding=utf-8 import unittest -from atat.pytorch.compare.acc_compare import rename_api +from msprobe.pytorch.compare import acc_compare as compare +import pandas as pd + +npu_dict = {'op_name': ['Functional_conv2d_0_forward_input.0', 'Functional_conv2d_0_forward_input.1', + 'Functional_conv2d_0_forward_input.2', 'Functional_conv2d_0_forward_output'], + 'input_struct': [('torch.float32', [1, 1, 28, 28]), ('torch.float32', [16, 1, 5, 5]), + ('torch.float32', [16])], + 'output_struct': [('torch.float32', [1, 16, 28, 28])], + 'summary': [[3.029174327850342, -2.926689624786377, -0.06619918346405029], + [0.19919930398464203, -0.19974489510059357, 0.006269412115216255], + [0.19734230637550354, -0.18177609145641327, 0.007903944700956345], + [2.1166646480560303, -2.190781354904175, -0.003579073818400502]], 'stack_info': []} + +bench_dict = {'op_name': ['Functional_conv2d_0_forward_input.0', 'Functional_conv2d_0_forward_input.1', + 'Functional_conv2d_0_forward_input.2', 'Functional_conv2d_0_forward_output'], + 'input_struct': [('torch.float32', [1, 1, 28, 28]), ('torch.float32', [16, 1, 5, 5]), + ('torch.float32', [16])], + 'output_struct': [('torch.float32', [1, 16, 28, 28])], + 'summery': [[3.029174327850342, -2.926689624786377, -0.06619918346405029], + [0.19919930398464203, -0.19974489510059357, 0.006269412115216255], + [0.19734230637550354, -0.18177609145641327, 0.007903944700956345], + [2.1166646480560303, -2.190781354904175, -0.003579073818400502]], 'stack_info': []} + +tensor_list = [ + {'type': 'torch.Tensor', 'dtype': 'torch.float32', 'shape': [16, 1, 3, 3], 'Max': 0.33033010363578796, + 'Min': -0.331031858921051,'Mean': -0.030964046716690063, 'Norm': 2.2533628940582275, 'requires_grad': True, + 'full_op_name': 'Tensor.add_.0.forward_input.0'}, + {'type': 'torch.Tensor', 'dtype': 'torch.float32', 'shape': [16, 1, 3, 3], + 'Max': 0.003992878366261721, 'Min': -0.008102823048830032, 'Mean': -0.0002002553956117481, + 'Norm': 0.02844562754034996, 'requires_grad': False, 'full_op_name': 'Tensor.add_.0.forward_input.1'}, + {'full_op_name': 'Tensor.add_.0.forward_input.alpha.0', 'dtype': "", "shape": '[]', 'md5': None, + 'Max': -0.1, 'Min': -0.1, 'Mean': -0.1, 'Norm': -0.1, 'data_name': '-1'}, + {'type': 'torch.Tensor', 'dtype': 'torch.float32', 'shape': [16, 1, 3, 3], + 'Max': 0.33033010363578796, 'Min': -0.331031858921051, 'Mean': -0.030964046716690063, + 'Norm': 2.2533628940582275, 'requires_grad': True, 'full_op_name': 'Tensor.add_.0.forward_output.0'} +] + +result_op_dict = {'op_name': ['Tensor.add_.0.forward_input.0', 'Tensor.add_.0.forward_input.1', + 'Tensor.add_.0.forward_input.alpha.0', 'Tensor.add_.0.forward_output.0'], + 'input_struct': [('torch.float32', [16, 1, 3, 3]), ('torch.float32', [16, 1, 3, 3]), + ("", '[]')], + 'output_struct': [('torch.float32', [16, 1, 3, 3])], + 'summary': [[0.33033010363578796, -0.331031858921051, -0.030964046716690063, 2.2533628940582275], + [0.003992878366261721, -0.008102823048830032, -0.0002002553956117481, 0.02844562754034996], + [-0.1, -0.1, -0.1, -0.1], + [0.33033010363578796, -0.331031858921051, -0.030964046716690063, 2.2533628940582275]], + 'stack_info': []} + +o_result = [ + ['Functional_conv2d_0_forward_input.0', 'Functional_conv2d_0_forward_input.0', 'torch.float32', 'torch.float32', + [1, 1, 28, 28], [1, 1, 28, 28], 0.0, 0.0, 0.0, ' ', '0.0%', '0.0%', '0.0%', ' ', 3.029174327850342, -2.926689624786377, + -0.06619918346405029, 3.029174327850342, -2.926689624786377, -0.06619918346405029, '', '', 'None'], + ['Functional_conv2d_0_forward_input.1', 'Functional_conv2d_0_forward_input.1', 'torch.float32', 'torch.float32', + [16, 1, 5, 5], [16, 1, 5, 5], 0.0, 0.0, 0.0, ' ', '0.0%', '0.0%', '0.0%', ' ', 0.19919930398464203, -0.19974489510059357, + 0.006269412115216255, 0.19919930398464203, -0.19974489510059357, 0.006269412115216255, '', '', 'None'], + ['Functional_conv2d_0_forward_input.2', 'Functional_conv2d_0_forward_input.2', 'torch.float32', 'torch.float32', + [16], [16], 0.0, 0.0, 0.0, ' ', '0.0%', '0.0%', '0.0%', ' ', 0.19734230637550354, -0.18177609145641327, 0.007903944700956345, + 0.19734230637550354, -0.18177609145641327, 0.007903944700956345, '', '', 'None'], + ['Functional_conv2d_0_forward_output', 'Functional_conv2d_0_forward_output', 'torch.float32', 'torch.float32', + [1, 16, 28, 28], [1, 16, 28, 28], 0.0, 0.0, 0.0, ' ', '0.0%', '0.0%', '0.0%', ' ', 2.1166646480560303, -2.190781354904175, + -0.003579073818400502, 2.1166646480560303, -2.190781354904175, -0.003579073818400502, '', '', 'None']] + +npu_dict_aten = {'op_name': ['Aten__native_batch_norm_legit_functional.default_0_forward_input.0', + 'Aten__native_batch_norm_legit_functional.default_0_forward_input.1', + 'Aten__native_batch_norm_legit_functional.default_0_forward_input.2', + 'Aten__native_batch_norm_legit_functional.default_0_forward_input.3', + 'Aten__native_batch_norm_legit_functional.default_0_forward_input.4', + 'Aten__native_batch_norm_legit_functional.default_0_forward_output.0', + 'Aten__native_batch_norm_legit_functional.default_0_forward_output.1', + 'Aten__native_batch_norm_legit_functional.default_0_forward_output.2', + 'Aten__native_batch_norm_legit_functional.default_0_forward_output.3', + 'Aten__native_batch_norm_legit_functional.default_0_forward_output.4'], + 'input_struct': [('torch.float16', [256, 256, 14, 14]), ('torch.float32', [256]), + ('torch.float32', [256]), ('torch.float32', [256]), ('torch.float32', [256])], + 'output_struct': [('torch.float16', [256, 256, 14, 14]), ('torch.float32', [256]), + ('torch.float32', [256]), ('torch.float32', [256]), ('torch.float32', [256])], + 'summary': [[139.625, -127.5625, -0.0103607177734375], + [2.5276029109954834, -2.1788690090179443, -0.0008259844034910202], + [2.472219944000244, -2.845968723297119, -0.008756577968597412], + [2.763145923614502, -3.398397922515869, -0.052132632583379745], + [2.673110008239746, -3.149275064468384, 0.01613386906683445], + [13.5546875, -10.640625, -0.008758544921875], + [0.30550330877304077, -0.24485322833061218, -0.010361209511756897], + [623.9192504882812, 432.96826171875, 520.2276611328125], + [2.4797861576080322, -3.055997371673584, -0.04795549064874649], + [61.7945556640625, 42.59713363647461, 52.03831481933594]]} + +bench_dict_functional = { + 'op_name': ['Functional_batch_norm_0_forward_input.0', 'Functional_batch_norm_0_forward_input.1', + 'Functional_batch_norm_0_forward_input.2', 'Functional_batch_norm_0_forward_input.3', + 'Functional_batch_norm_0_forward_input.4', 'Functional_batch_norm_0_forward_output'], + 'input_struct': [('torch.float32', [256, 256, 14, 14]), ('torch.float32', [256]), ('torch.float32', [256]), + ('torch.float32', [256]), ('torch.float32', [256])], + 'output_struct': [('torch.float32', [256, 256, 14, 14])], + 'summary': [[3.061628818511963, -3.22507381439209, 3.634914173744619e-05], + [0.0005779837374575436, -0.0006301702815108001, 3.634906533989124e-06], + [0.9338104128837585, 0.9277191162109375, 0.930335283279419], + [1.0, 1.0, 1.0], [0.0, 0.0, 0.0], + [5.397906303405762, -5.796811580657959, 2.5283952709287405e-10]] +} + +aten_result = [ + ['Aten__native_batch_norm_legit_functional.default_0_forward_input.0', 'Functional_batch_norm_0_forward_input.0', + 'torch.float16', 'torch.float32', [256, 256, 14, 14], [256, 256, 14, 14], 136.56337118148804, -124.33742618560791, + -0.010397066915174946, ' ', '4460.480981749501%', '3855.335826136584%', '28603.33536971545%', ' ', 139.625, + -127.5625, -0.0103607177734375, 3.061628818511963, -3.22507381439209, 3.634914173744619e-05, 'Warning', + 'Need double check api accuracy.', 'None'], + ['Aten__native_batch_norm_legit_functional.default_0_forward_input.1', 'Functional_batch_norm_0_forward_input.1', + 'torch.float32', 'torch.float32', [256], [256], 2.527024927258026, -2.1782388387364335, -0.0008296193100250093, + ' ', '437213.84590749856%', '345658.76916858414%', '22823.676544842117%', ' ', 2.5276029109954834, + -2.1788690090179443, -0.0008259844034910202, 0.0005779837374575436, -0.0006301702815108001, 3.634906533989124e-06, + 'Warning', 'Need double check api accuracy.', 'None'], + ['Aten__native_batch_norm_legit_functional.default_0_forward_input.2', 'Functional_batch_norm_0_forward_input.2', + 'torch.float32', 'torch.float32', [256], [256], 1.5384095311164856, -3.7736878395080566, -0.9390918612480164, ' ', + '164.74538192025793%', '406.7705163736246%', '100.94122819224167%', ' ', 2.472219944000244, -2.845968723297119, + -0.008756577968597412, 0.9338104128837585, 0.9277191162109375, 0.930335283279419, 'Warning', + 'Need double check api accuracy.', 'None'], + ['Aten__native_batch_norm_legit_functional.default_0_forward_input.3', 'Functional_batch_norm_0_forward_input.3', + 'torch.float32', 'torch.float32', [256], [256], 1.763145923614502, -4.398397922515869, -1.0521326325833797, ' ', + '176.3145923614502%', '439.8297922515869%', '105.21326325933797%', ' ', 2.763145923614502, -3.398397922515869, + -0.052132632583379745, 1.0, 1.0, 1.0, 'Warning', 'Need double check api accuracy.', 'None'], + ['Aten__native_batch_norm_legit_functional.default_0_forward_input.4', 'Functional_batch_norm_0_forward_input.4', + 'torch.float32', 'torch.float32', [256], [256], 2.673110008239746, -3.149275064468384, 0.01613386906693445, ' ', + 'N/A', 'N/A', 'N/A', ' ', 2.673110008239746, -3.149275064468384, 0.01613386906683445, 0.0, 0.0, 0.0, 'Warning', + 'Need double check api accuracy.', 'None'], + ['Aten__native_batch_norm_legit_functional.default_0_forward_output.0', 'Functional_batch_norm_0_forward_output', + 'torch.float16', 'torch.float32', [256, 256, 14, 14], [256, 256, 14, 14], 8.156781196594238, -4.843813419342041, + -0.008758545174714527, ' ', '151.11009228611078%', '83.55995967687207%', '3464072756.115108%', ' ', 13.5546875, + -10.640625, -0.008758544921875, 5.397906303405762, -5.796811580657959, 2.5283952709287405e-10, 'Warning', + 'Need double check api accuracy.', 'None'], + ['Aten__native_batch_norm_legit_functional.default_0_forward_output.1', 'Nan', 'torch.float32', 'Nan', [256], 'Nan', + ' ', ' ', ' ', ' ', ' ', 0.30550330877304077, -0.24485322833061218, -0.010361209511756897, 'Nan', 'Nan', 'Nan', + 'Yes', '', None], + ['Aten__native_batch_norm_legit_functional.default_0_forward_output.2', 'Nan', 'torch.float32', 'Nan', [256], 'Nan', + ' ', ' ', ' ', ' ', ' ', 623.9192504882812, 432.96826171875, 520.2276611328125, 'Nan', 'Nan', 'Nan', + 'Yes', '', None], + ['Aten__native_batch_norm_legit_functional.default_0_forward_output.3', 'Nan', 'torch.float32', 'Nan', [256], 'Nan', + ' ', ' ', ' ', ' ', ' ', 2.4797861576080322, -3.055997371673584, -0.04795549064874649, 'Nan', 'Nan', 'Nan', + 'Yes', '', None], + ['Aten__native_batch_norm_legit_functional.default_0_forward_output.4', 'Nan', 'torch.float32', 'Nan', [256], 'Nan', + ' ', ' ', ' ', ' ', ' ', 61.7945556640625, 42.59713363647461, 52.03831481933594, 'Nan', 'Nan', 'Nan', + 'Yes', '', None]] + +highlight_dict = {'red_rows': [], 'yellow_rows': []} + +num_0, num_1, num_2, num_3 = 0, 1, 2, 3 +summary_line_input = ['Functional_batch_norm_0_forward_input.0', 'Functional_batch_norm_0_forward_input.0', + 'torch.float16', + 'torch.float32', [256, 256, 14, 14], [256, 256, 14, 14], 0.01, 0, 0, 0, 1, 1, 1, 1, 1.01, 1, 1, 1, + 'Yes', ''] +summary_line_1 = ['Functional_batch_norm_0_forward_output.0', 'Functional_batch_norm_0_forward_output.0', + 'torch.float16', + 'torch.float32', [256, 256, 14, 14], [256, 256, 14, 14], 10, 0, 0, 0, 2, 0, 1, 1, 1, 1, 1, 1, + 'Warning', ''] +summary_line_2 = ['Functional_batch_norm_0_forward_output.1', 'Functional_batch_norm_0_forward_output.1', + 'torch.float16', + 'torch.float32', [256, 256, 14, 14], [256, 256, 14, 14], 0.02, 0, 0, 0, 0.12, 0, 1, 1, 0.1, 1, 1, 1, + 'Warning', ''] +summary_line_3 = ['Functional_batch_norm_0_forward_output.2', 'Functional_batch_norm_0_forward_output.2', + 'torch.float16', + 'torch.float32', [256, 256, 14, 14], [256, 256, 14, 14], 0, 0, 0, 0, 2, 0, 1, 1, 1, 1, 1, 1, + 'Warning', ''] +line_input = ['Functional_batch_norm_0_forward_input.0', 'Functional_batch_norm_0_forward_input.0', 'torch.float16', + 'torch.float32', [256, 256, 14, 14], [256, 256, 14, 14], 1, 1, 1, 0.95, 1, 1, 1, 1, 1, 1.01, 1, 1, 1, + 'Yes', ''] +line_1 = ['Functional_batch_norm_0_forward_output.0', 'Functional_batch_norm_0_forward_output.0', 'torch.float16', + 'torch.float32', [256, 256, 14, 14], [256, 256, 14, 14], 0.8, 1, 1, 0.59, 1, 'nan', 0, 1, 1, 19, 1, 1, 1, + 'Warning', ''] +line_2 = ['Functional_batch_norm_0_forward_output.1', 'Functional_batch_norm_0_forward_output.1', 'torch.float16', + 'torch.float32', [256, 256, 14, 14], [256, 256, 14, 14], 0.9, 1, 1, 0.8, 1, 0, 0.12, 0, 1, 1, 0.1, 1, 1, 1, + 'Warning', ''] +line_3 = ['Functional_batch_norm_0_forward_output.2', 'Functional_batch_norm_0_forward_output.2', 'torch.float16', + 'torch.float32', [256, 256, 14, 14], [256, 256, 14, 14], 0.8, 1.1e+10, 1, 0.85, 1, 9, 0.12, 0, 1, 1, 0.1, 1, + 1, 1, 'Warning', ''] + +op_data = { + 'input_args': [{'type': 'torch.Tensor', 'dtype': 'torch.float32', 'shape': [16, 1, 3, 3], + 'Max': 0.33033010363578796, 'Min': -0.331031858921051,'Mean': -0.030964046716690063, + 'Norm': 2.2533628940582275, 'requires_grad': True}, + {'type': 'torch.Tensor', 'dtype': 'torch.float32', 'shape': [16, 1, 3, 3], + 'Max': 0.003992878366261721, 'Min': -0.008102823048830032, 'Mean': -0.0002002553956117481, + 'Norm': 0.02844562754034996, 'requires_grad': False}], + 'input_kwargs': {'alpha': {'type': 'float', 'value': -0.1}}, + 'output': [{'type': 'torch.Tensor', 'dtype': 'torch.float32', 'shape': [16, 1, 3, 3], + 'Max': 0.33033010363578796, 'Min': -0.331031858921051,'Mean': -0.030964046716690063, + 'Norm': 2.2533628940582275, 'requires_grad': True}]} + +op_name = "Tensor.add_0.0.forward" + +op_result = [ + {'type': 'torch.Tensor', 'dtype': 'torch.float32', 'shape': [16, 1, 3, 3], + 'Max': 0.33033010363578796, 'Min': -0.331031858921051,'Mean': -0.030964046716690063, + 'Norm': 2.2533628940582275, 'requires_grad': True, 'full_op_name': 'Tensor.add_.0.forward_input.0'}, + {'type': 'torch.Tensor', 'dtype': 'torch.float32', 'shape': [16, 1, 3, 3], + 'Max': 0.003992878366261721, 'Min': -0.008102823048830032,'Mean': -0.0002002553956117481, + 'Norm': 0.02844562754034996, 'requires_grad': False, 'full_op_name': 'Tensor.add_.0.forward_input.1'}, + {'full_op_name': 'Tensor.add_.0.forward_input.alpha.0', 'dtype': "", "shape": '[]', 'md5': None, + 'Max': -0.1, 'Min': -0.1, 'Mean': -0.1, 'Norm': -0.1, 'data_name': '-1'}, + {'type': 'torch.Tensor', 'dtype': 'torch.float32', 'shape': [16, 1, 3, 3], + 'Max': 0.33033010363578796, 'Min': -0.331031858921051,'Mean': -0.030964046716690063, + 'Norm': 2.2533628940582275, 'requires_grad': True, 'full_op_name': 'Tensor.add_.0.forward_output.0'}] + class TestUtilsMethods(unittest.TestCase): + def test_check_graph_mode(self): + op1 = "Aten" + op2 = "torch" + self.assertTrue(compare.check_graph_mode(op1, op2)) + self.assertTrue(compare.check_graph_mode(op2, op1)) + self.assertFalse(compare.check_graph_mode(op1, op1)) + self.assertFalse(compare.check_graph_mode(op2, op2)) + + def test_check_op(self): + fuzzy_match = False + result = compare.check_op(npu_dict, bench_dict, fuzzy_match) + self.assertEqual(result, True) + + def test_merge_tensor(self): + op_dict = compare.merge_tensor(tensor_list, True, False) + self.assertEqual(op_dict, result_op_dict) + + def test_read_op(self): + result = compare.read_op(op_data, op_name) + self.assertEqual(result, op_result) + + def test_match_op(self): + fuzzy_match = False + a, b = compare.match_op([npu_dict], [bench_dict], fuzzy_match) + self.assertEqual(a, 0) + self.assertEqual(b, 0) + + def test_get_accuracy(self): + result = [] + compare.get_accuracy(result, npu_dict, bench_dict, highlight_dict) + self.assertEqual(result, o_result) + + def test_get_accuracy_graph_mode(self): + result = [] + compare.get_accuracy(result, npu_dict_aten, bench_dict_functional, highlight_dict) + self.assertEqual(result, aten_result) + + def test_find_error_rows(self): + summary_result = [summary_line_input, summary_line_1, summary_line_2, summary_line_3] + highlight_dict = {'red_rows': [], 'yellow_rows': []} + compare.find_error_rows(summary_result, 0, 1, highlight_dict, summary_compare=True) + self.assertEqual(highlight_dict, {'red_rows': [], 'yellow_rows': []}) + + def test_find_compare_result_error_rows(self): + result = [line_input, line_1, line_2, line_3] + result_df = pd.DataFrame(result) + highlight_dict = {'red_rows': [], 'yellow_rows': []} + compare.find_compare_result_error_rows(result_df, highlight_dict, False) + self.assertEqual(highlight_dict, {'red_rows': [num_1, num_3], 'yellow_rows': [num_2]}) + def test_rename_api(self): test_name_1 = "Distributed.broadcast.0.forward.input.0" expect_name_1 = "Distributed.broadcast.input.0" - actual_name_1 = rename_api(test_name_1, "forward") + actual_name_1 = compare.rename_api(test_name_1, "forward") self.assertEqual(actual_name_1, expect_name_1) - + test_name_2 = "Torch.sum.0.backward.output.0" expect_name_2 = "Torch.sum.output.0" - actual_name_2 = rename_api(test_name_2, "backward") + actual_name_2 = compare.rename_api(test_name_2, "backward") self.assertEqual(actual_name_2, expect_name_2) - \ No newline at end of file diff --git a/debug/accuracy_tools/atat/test/pytorch_ut/compare/test_match.py b/debug/accuracy_tools/atat/test/pytorch_ut/compare/test_match.py new file mode 100644 index 0000000000..ac28e994e9 --- /dev/null +++ b/debug/accuracy_tools/atat/test/pytorch_ut/compare/test_match.py @@ -0,0 +1,20 @@ +# coding=utf-8 +import unittest +from msprobe.pytorch.compare import match + + +class TestMatch(unittest.TestCase): + def test_graph_mapping(self): + op1 = "Aten_convolution_1_forward_0.input.0" + op2 = "Torch_conv2d_0_forward_0.input.0" + op3 = "Torch_batch_norm_0_forward_0.input.0" + op4 = "Aten_convolution.default_1_forward_0.input.0" + op5 = "Aten_foo_1_forward_0.input.0" + self.assertTrue(match.graph_mapping.match(op1, op2)) + self.assertTrue(match.graph_mapping.match(op2, op1)) + self.assertTrue(match.graph_mapping.match(op4, op2)) + self.assertTrue(match.graph_mapping.match(op2, op4)) + self.assertFalse(match.graph_mapping.match(op1, op3)) + self.assertFalse(match.graph_mapping.match(op3, op1)) + self.assertFalse(match.graph_mapping.match(op5, op2)) + self.assertFalse(match.graph_mapping.match(op2, op5)) -- Gitee From 645fe96e1a31b3ea77a4fd88c6bf50a70403802d Mon Sep 17 00:00:00 2001 From: zhouxianqi <13165993773@163.com> Date: Thu, 18 Jul 2024 18:55:18 +0800 Subject: [PATCH 039/106] add_overall_metrics_sheet --- .../comparator/base_comparator.py | 2 +- .../comparator/overall_metrics_comparator.py | 50 ++++ .../origin_data_bean/kernel_details_bean.py | 33 ++- .../origin_data_bean/trace_event_bean.py | 50 +++- .../compare_bean/overall_metrics_bean.py | 255 ++++++++++++++++++ .../compare_bean/profiling_info.py | 184 ++++++++++++- .../generator/detail_performance_generator.py | 15 +- .../profiling_parser/base_profiling_parser.py | 97 +++++++ .../profiling_parser/gpu_profiling_parser.py | 27 +- .../profiling_parser/npu_profiling_parser.py | 36 ++- .../compare_backend/utils/constant.py | 10 +- .../compare_backend/utils/excel_config.py | 79 +++++- .../view/work_sheet_creator.py | 29 +- .../test_kernel_details_bean.py | 4 +- .../test_gpu_profiling_parser.py | 1 + 15 files changed, 832 insertions(+), 40 deletions(-) create mode 100644 profiler/compare_tools/compare_backend/comparator/overall_metrics_comparator.py create mode 100644 profiler/compare_tools/compare_backend/compare_bean/overall_metrics_bean.py diff --git a/profiler/compare_tools/compare_backend/comparator/base_comparator.py b/profiler/compare_tools/compare_backend/comparator/base_comparator.py index 330fb871ee..8012dfae94 100644 --- a/profiler/compare_tools/compare_backend/comparator/base_comparator.py +++ b/profiler/compare_tools/compare_backend/comparator/base_comparator.py @@ -21,4 +21,4 @@ class BaseComparator(ABC): @abstractmethod def _compare(self): - raise NotImplementedError("Function _compare need to be implemented.") + raise NotImplementedError("Function _compare need to be implemented.") \ No newline at end of file diff --git a/profiler/compare_tools/compare_backend/comparator/overall_metrics_comparator.py b/profiler/compare_tools/compare_backend/comparator/overall_metrics_comparator.py new file mode 100644 index 0000000000..d438dc41d5 --- /dev/null +++ b/profiler/compare_tools/compare_backend/comparator/overall_metrics_comparator.py @@ -0,0 +1,50 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from math import isclose + +from compare_backend.comparator.base_comparator import BaseComparator +from compare_backend.utils.constant import Constant +from compare_backend.utils.excel_config import ExcelConfig + + +class OverallMetricsComparator(BaseComparator): + + def __init__(self, origin_data: dict, bean: any): + super().__init__(origin_data, bean) + self._row_style = [] + + @property + def base_info(self): + return self._origin_data.get(Constant.BASE_DATA) + + @property + def comp_info(self): + return self._origin_data.get(Constant.COMPARISON_DATA) + + def generate_data(self) -> dict: + self._compare() + return {self._sheet_name: { + "headers": self._headers, + "rows": self._rows, + "overhead": self._overhead, + "row_style": self._row_style + }} + + def _compare(self): + if isclose(self.base_info.e2e_time_ms, 0) or isclose(self.comp_info.e2e_time_ms, 0): + return + self._rows.extend(self._bean(self.base_info, self.comp_info).rows) + for row in self._rows: + self._row_style.append(ExcelConfig.ROW_STYLE_MAP.get(row[0], {})) # index 0 for metric index name diff --git a/profiler/compare_tools/compare_backend/compare_bean/origin_data_bean/kernel_details_bean.py b/profiler/compare_tools/compare_backend/compare_bean/origin_data_bean/kernel_details_bean.py index 122009b904..9c4825c0e8 100644 --- a/profiler/compare_tools/compare_backend/compare_bean/origin_data_bean/kernel_details_bean.py +++ b/profiler/compare_tools/compare_backend/compare_bean/origin_data_bean/kernel_details_bean.py @@ -1,8 +1,9 @@ import math +from decimal import Decimal import pandas as pd -from compare_backend.utils.common_func import convert_to_float +from compare_backend.utils.common_func import convert_to_float, convert_to_decimal from compare_backend.utils.constant import Constant @@ -12,8 +13,10 @@ class KernelDetailsBean: self._op_type = "" self._name = "" self._aiv_vec_time = 0.0 + self._aicore_time = 0.0 self._mac_time = 0.0 self._duration = 0.0 + self._start_time = Decimal("0") self.init() @property @@ -30,6 +33,12 @@ class KernelDetailsBean: return float("nan") return convert_to_float(self._aiv_vec_time) + @property + def aicore_time(self) -> float: + if self._aicore_time == "" or self._aicore_time == "N/A": + return float("nan") + return convert_to_float(self._aicore_time) + @property def mac_time(self) -> float: if self._mac_time == "" or self._mac_time == "N/A": @@ -40,6 +49,18 @@ class KernelDetailsBean: def duration(self) -> float: return convert_to_float(self._duration) + @property + def dur(self) -> float: + return convert_to_float(self._duration) + + @property + def start_time(self) -> Decimal: + return convert_to_decimal(self._start_time) + + @property + def end_time(self) -> Decimal: + return self.start_time + convert_to_decimal(self._duration) + def is_hide_op_pmu(self): if "mac_time(us)" in self._data.keys() or "aiv_vec_time(us)" in self._data.keys(): return False @@ -66,7 +87,7 @@ class KernelDetailsBean: def is_flash_attention(self): return "flashattention" in self.op_type.lower() - def is_cube(self): + def is_matmul(self): return "matmul" in self.op_type.lower() def is_conv(self): @@ -79,9 +100,17 @@ class KernelDetailsBean: def is_page_attention(self): return "pagedattention" in self.op_type.lower() + def is_trans(self): + return any(trans_mask in self.name.lower() for trans_mask in Constant.KERNEL_TRANS_MASK) + + def is_cube_kernel_cat(self): + return self.mac_time > 0 or self.aicore_time > 0 + def init(self): self._op_type = self._data.get('Type', "") self._name = self._data.get('Name', "") self._aiv_vec_time = self._data.get('aiv_vec_time(us)', "") + self._aicore_time = self._data.get("aicore_time(us)", "") self._mac_time = self._data.get('mac_time(us)', "") self._duration = self._data.get('Duration(us)', 0) + self._start_time = Decimal(self._data.get("Start Time(us)", "0")) diff --git a/profiler/compare_tools/compare_backend/compare_bean/origin_data_bean/trace_event_bean.py b/profiler/compare_tools/compare_backend/compare_bean/origin_data_bean/trace_event_bean.py index cef6bb0712..245b51d105 100644 --- a/profiler/compare_tools/compare_backend/compare_bean/origin_data_bean/trace_event_bean.py +++ b/profiler/compare_tools/compare_backend/compare_bean/origin_data_bean/trace_event_bean.py @@ -114,6 +114,21 @@ class TraceEventBean: def is_torch_op(self, value: bool): self._is_torch_op = value + @classmethod + def is_sdma(cls): + return False + + @classmethod + def is_page_attention(cls): + return False + + @classmethod + def is_trans(cls) -> bool: + """ + 暂时没找到GPU判断trans的方法,暂时都是notrans + """ + return False + def is_m_mode(self) -> bool: return self._ph == "M" @@ -199,11 +214,44 @@ class TraceEventBean: self._name = name def is_conv(self): - return self.name.lower().startswith("aten::conv") + return self.lower_name.startswith("aten::conv") def is_lccl(self): return self.lower_name == "kernel_aivec" + def is_fa_for_cpu_op(self) -> bool: + """ + 这个类在cpu op和gpu中均有用到,这里是在cpu op阶段判断 + """ + return any(cube_mask in self.lower_name for cube_mask in Constant.CPU_OP_FA_MASK) + + def is_conv_for_cpu_op(self) -> bool: + """ + 这个类在cpu op和gpu中均有用到,这里是在cpu op阶段判断 + """ + return self.lower_name.startswith(Constant.CPU_OP_CONV) + + def is_matmul_for_cpu_op(self) -> bool: + """ + 这个类在cpu op和gpu中均有用到,这里是在cpu op阶段判断 + """ + return any(bwd_mask in self.lower_name for bwd_mask in Constant.CPU_OP_MATMUL_MASK) + + def is_bwd_for_cpu_op(self) -> bool: + """ + 这个类在cpu op和gpu中均有用到,这里是在cpu op阶段判断 + """ + return any(bwd_mask in self.lower_name for bwd_mask in Constant.BWD_LIST) + + def is_cpu_cube_op(self) -> bool: + return self.is_matmul_for_cpu_op() or self.is_fa_for_cpu_op() or self.is_conv_for_cpu_op() + + def is_vector(self): + return not any(cube_mask in self.lower_name for cube_mask in Constant.KERNEL_CUBE_MASK) + + def is_cube_kernel_cat(self): + return any(cube_mask in self.lower_name for cube_mask in Constant.KERNEL_CUBE_MASK) + def init(self): if isinstance(self._event, dict): self._pid = self._event.get("pid", 0) diff --git a/profiler/compare_tools/compare_backend/compare_bean/overall_metrics_bean.py b/profiler/compare_tools/compare_backend/compare_bean/overall_metrics_bean.py new file mode 100644 index 0000000000..544f8f5234 --- /dev/null +++ b/profiler/compare_tools/compare_backend/compare_bean/overall_metrics_bean.py @@ -0,0 +1,255 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from math import isclose + +from compare_backend.compare_bean.profiling_info import ProfilingInfo +from compare_backend.utils.common_func import calculate_diff_ratio +from compare_backend.utils.constant import Constant +from compare_backend.utils.excel_config import ExcelConfig + + +class OverallMetricsBean: + TABLE_NAME = Constant.OVERALL_METRICS_TABLE + HEADERS = ExcelConfig.HEADERS.get(TABLE_NAME) + OVERHEAD = ExcelConfig.OVERHEAD.get(TABLE_NAME) + + def __init__(self, base_info: ProfilingInfo, comparison_info: ProfilingInfo): + self._base_data = OverallMetricsInfo(base_info).overall_metrics + self._comparison_data = OverallMetricsInfo(comparison_info).overall_metrics + + @property + def rows(self): + rows_data = [] + for index, base_data in self._base_data.items(): + comparison_data = self._comparison_data.get(index) + row = self.get_row_data(index, base_data, comparison_data) + if row: + rows_data.append(row) + return rows_data + + @staticmethod + def get_row_data(index, base_data, comparison_data): + if isclose(base_data[0], 0) and isclose(comparison_data[0], 0): + return [] + row_data = [index] + row_data.extend(base_data) + row_data.extend(comparison_data) + row_data.extend(calculate_diff_ratio(base_data[0], comparison_data[0])) + return row_data + + +class OverallMetricsInfo: + def __init__(self, profiling_info: ProfilingInfo): + self._profiling_info = profiling_info + self._overall_metrics_data_map = { + ExcelConfig.COMPUTING: self.computing_data, + ExcelConfig.FA: self.fa_data, + ExcelConfig.FA_FWD_CUBE: self.fa_fwd_cube_data, + ExcelConfig.FA_FWD_VECTOR: self.fa_fwd_vector_data, + ExcelConfig.FA_BWD_CUBE: self.fa_bwd_cube_data, + ExcelConfig.FA_BWD_VECTOR: self.fa_bwd_vector_data, + ExcelConfig.CONV: self.conv_data, + ExcelConfig.CONV_FWD_CUBE: self.conv_fwd_cube_data, + ExcelConfig.CONV_FWD_VECTOR: self.conv_fwd_vector_data, + ExcelConfig.CONV_BWD_CUBE: self.conv_bwd_cube_data, + ExcelConfig.CONV_BWD_VECTOR: self.conv_bwd_vector_data, + ExcelConfig.MM: self.mm_data, + ExcelConfig.MM_CUBE: self.mm_cube_data, + ExcelConfig.MM_VECTOR: self.mm_vector_data, + ExcelConfig.PA: self.pa_data, + ExcelConfig.VECTOR: self.vector_data, + ExcelConfig.VECTOR_TRANS: self.vector_trans_data, + ExcelConfig.VECTOR_NO_TRANS: self.vector_no_trans_data, + ExcelConfig.CUBE: self.cube_data, + ExcelConfig.SDMA_TM: self.sdma_tm_data, + ExcelConfig.OTHER: self.other_data, + ExcelConfig.COMMUNICATION_TIME: self.communication_data, + ExcelConfig.WAIT: self.wait_data, + ExcelConfig.TRANSMIT: self.transmit_data, + ExcelConfig.FREE_TIME: self.free_time_data, + ExcelConfig.SDMA: self.sdma_data, + ExcelConfig.FREE: self.free_data, + ExcelConfig.E2E_TIME: self.e2e_time_data + } + + @property + def overall_metrics(self): + return self._overall_metrics_data_map + + @property + def computing_data(self): + return [self._profiling_info.compute_time_ms, + self._profiling_info.compute_time_ms / self._profiling_info.e2e_time_ms, + sum((self._profiling_info.fa_total_num, self._profiling_info.conv_total_num, + self._profiling_info.mm_total_num, self._profiling_info.vector_total_num, + self._profiling_info.sdma_num_tensor_move, self._profiling_info.other_cube_num, + self._profiling_info.page_attention_num))] + + @property + def fa_data(self): + return [self._profiling_info.fa_total_time, + self._profiling_info.fa_total_time / self._profiling_info.e2e_time_ms, + self._profiling_info.fa_total_num] + + @property + def fa_fwd_cube_data(self): + return [self._profiling_info.fa_time_fwd_cube, + self._profiling_info.fa_time_fwd_cube / self._profiling_info.e2e_time_ms, + self._profiling_info.fa_num_fwd_cube] + + @property + def fa_fwd_vector_data(self): + return [self._profiling_info.fa_time_fwd_vector, + self._profiling_info.fa_time_fwd_vector / self._profiling_info.e2e_time_ms, + self._profiling_info.fa_num_fwd_vector] + + @property + def fa_bwd_cube_data(self): + return [self._profiling_info.fa_time_bwd_cube, + self._profiling_info.fa_time_bwd_cube / self._profiling_info.e2e_time_ms, + self._profiling_info.fa_num_bwd_cube] + + @property + def fa_bwd_vector_data(self): + return [self._profiling_info.fa_time_bwd_vector, + self._profiling_info.fa_time_bwd_vector / self._profiling_info.e2e_time_ms, + self._profiling_info.fa_num_bwd_vector] + + @property + def conv_data(self): + return [self._profiling_info.conv_total_time, + self._profiling_info.conv_total_time / self._profiling_info.e2e_time_ms, + self._profiling_info.conv_total_num] + + @property + def conv_fwd_cube_data(self): + return [self._profiling_info.conv_time_fwd_cube, + self._profiling_info.conv_time_fwd_cube / self._profiling_info.e2e_time_ms, + self._profiling_info.conv_num_fwd_cube] + + @property + def conv_fwd_vector_data(self): + return [self._profiling_info.conv_time_fwd_vector, + self._profiling_info.conv_time_fwd_vector / self._profiling_info.e2e_time_ms, + self._profiling_info.conv_num_fwd_vector] + + @property + def conv_bwd_cube_data(self): + return [self._profiling_info.conv_time_bwd_cube, + self._profiling_info.conv_time_bwd_cube / self._profiling_info.e2e_time_ms, + self._profiling_info.conv_num_bwd_cube] + + @property + def conv_bwd_vector_data(self): + return [self._profiling_info.conv_time_bwd_vector, + self._profiling_info.conv_time_bwd_vector / self._profiling_info.e2e_time_ms, + self._profiling_info.conv_num_bwd_vector] + + @property + def mm_data(self): + return [self._profiling_info.mm_total_time, + self._profiling_info.mm_total_time / self._profiling_info.e2e_time_ms, + self._profiling_info.mm_total_num] + + @property + def mm_cube_data(self): + return [self._profiling_info.matmul_time_cube, + self._profiling_info.matmul_time_cube / self._profiling_info.e2e_time_ms, + self._profiling_info.matmul_num_cube] + + @property + def mm_vector_data(self): + return [self._profiling_info.matmul_time_vector, + self._profiling_info.matmul_time_vector / self._profiling_info.e2e_time_ms, + self._profiling_info.matmul_num_vector] + + @property + def pa_data(self): + return [self._profiling_info.page_attention_time, + self._profiling_info.page_attention_time / self._profiling_info.e2e_time_ms, + self._profiling_info.page_attention_num] + + @property + def vector_data(self): + return [self._profiling_info.vector_total_time, + self._profiling_info.vector_total_time / self._profiling_info.e2e_time_ms, + self._profiling_info.vector_total_num] + + @property + def vector_trans_data(self): + return [self._profiling_info.vector_time_trans, + self._profiling_info.vector_time_trans / self._profiling_info.e2e_time_ms, + self._profiling_info.vector_num_trans] + + @property + def vector_no_trans_data(self): + return [self._profiling_info.vector_time_notrans, + self._profiling_info.vector_time_notrans / self._profiling_info.e2e_time_ms, + self._profiling_info.vector_num_notrans] + + @property + def cube_data(self): + return [self._profiling_info.other_cube_time, + self._profiling_info.other_cube_time / self._profiling_info.e2e_time_ms, + self._profiling_info.other_cube_num] + + @property + def sdma_tm_data(self): + return [self._profiling_info.sdma_time_tensor_move, + self._profiling_info.sdma_time_tensor_move / self._profiling_info.e2e_time_ms, + self._profiling_info.sdma_num_tensor_move] + + @property + def other_data(self): + other_time = max((0, + self._profiling_info.compute_time_ms - self._profiling_info.fa_total_time - + self._profiling_info.conv_total_time - self._profiling_info.mm_total_time - + self._profiling_info.vector_total_time - self._profiling_info.sdma_time_tensor_move - + self._profiling_info.other_cube_time - self._profiling_info.page_attention_time)) + return [other_time, other_time / self._profiling_info.e2e_time_ms, "/"] + + @property + def communication_data(self): + return [self._profiling_info.communication_not_overlapped_ms, + self._profiling_info.communication_not_overlapped_ms / self._profiling_info.e2e_time_ms, "/"] + + @property + def wait_data(self): + return [self._profiling_info.wait_time_ms, + self._profiling_info.wait_time_ms / self._profiling_info.e2e_time_ms, "/"] + + @property + def transmit_data(self): + return [self._profiling_info.transmit_time_ms, + self._profiling_info.transmit_time_ms / self._profiling_info.e2e_time_ms, "/"] + + @property + def free_time_data(self): + return [self._profiling_info.free_time_ms, + self._profiling_info.free_time_ms / self._profiling_info.e2e_time_ms, "/"] + + @property + def sdma_data(self): + return [self._profiling_info.sdma_time_stream, + self._profiling_info.sdma_time_stream / self._profiling_info.e2e_time_ms, "/"] + + @property + def free_data(self): + free = self._profiling_info.free_time_ms - self._profiling_info.sdma_time_stream + return [free, free / self._profiling_info.e2e_time_ms, "/"] + + @property + def e2e_time_data(self): + return [self._profiling_info.e2e_time_ms, 1, "/"] diff --git a/profiler/compare_tools/compare_backend/compare_bean/profiling_info.py b/profiler/compare_tools/compare_backend/compare_bean/profiling_info.py index e5d9bf26e9..e0a80a4d30 100644 --- a/profiler/compare_tools/compare_backend/compare_bean/profiling_info.py +++ b/profiler/compare_tools/compare_backend/compare_bean/profiling_info.py @@ -37,6 +37,105 @@ class ProfilingInfo: self.hide_op_details = False self.is_level0 = False + # 性能拆解新指标 + self.fa_time_fwd_cube = 0.0 + self.fa_num_fwd_cube = 0 + self.fa_time_bwd_cube = 0.0 + self.fa_num_bwd_cube = 0 + self.fa_time_fwd_vector = 0.0 + self.fa_num_fwd_vector = 0 + self.fa_time_bwd_vector = 0.0 + self.fa_num_bwd_vector = 0 + + self.conv_time_fwd_cube = 0.0 + self.conv_num_fwd_cube = 0 + self.conv_time_bwd_cube = 0.0 + self.conv_num_bwd_cube = 0 + self.conv_time_fwd_vector = 0.0 + self.conv_num_fwd_vector = 0 + self.conv_time_bwd_vector = 0.0 + self.conv_num_bwd_vector = 0 + + self.matmul_time_cube = 0.0 + self.matmul_num_cube = 0 + self.matmul_time_vector = 0.0 + self.matmul_num_vector = 0 + + self.page_attention_time = 0.0 + self.page_attention_num = 0 + + self.vector_time_trans = 0.0 + self.vector_num_trans = 0 + self.vector_time_notrans = 0.0 + self.vector_num_notrans = 0 + + self.sdma_time_tensor_move = 0.0 + self.sdma_num_tensor_move = 0 + self.sdma_time_stream = 0.0 + self.sdma_num_stream = 0 + + self.other_cube_time = 0.0 + self.other_cube_num = 0 + + @property + def e2e_time_ms(self): + return self.e2e_time * 10 ** 3 + + @property + def compute_time_ms(self): + return self.compute_time * 10 ** 3 + + @property + def free_time_ms(self): + return self.scheduling_time * 10 ** 3 + + @property + def communication_not_overlapped_ms(self): + return self.communication_not_overlapped * 10 ** 3 + + @property + def wait_time_ms(self): + return self.wait_time * 10 ** 3 + + @property + def transmit_time_ms(self): + return (self.communication_not_overlapped - self.wait_time) * 10 ** 3 + + @property + def fa_total_time(self): + return sum((self.fa_time_fwd_cube, self.fa_time_fwd_vector, self.fa_time_bwd_cube, self.fa_time_bwd_vector)) + + @property + def fa_total_num(self): + return sum((self.fa_num_fwd_cube, self.fa_num_fwd_vector, self.fa_num_bwd_cube, self.fa_num_bwd_vector)) + + @property + def conv_total_time(self): + return sum( + (self.conv_time_fwd_cube, self.conv_time_fwd_vector, self.conv_time_bwd_cube, + self.conv_time_bwd_vector)) + + @property + def conv_total_num(self): + return sum((self.conv_num_fwd_cube, self.conv_num_fwd_vector, self.conv_num_bwd_cube, + self.conv_num_bwd_vector)) + + @property + def mm_total_time(self): + return sum((self.matmul_time_cube, self.matmul_time_vector)) + + @property + def mm_total_num(self): + return sum((self.matmul_num_cube, self.matmul_num_vector)) + + @property + def vector_total_time(self): + return sum((self.vector_time_trans, self.vector_time_notrans)) + + @property + def vector_total_num(self): + return sum((self.vector_num_trans, self.vector_num_notrans)) + def trans_time_to_s(self): self.cube_time = self.cube_time / 10 ** 6 self.other_time = self.other_time / 10 ** 6 @@ -54,6 +153,24 @@ class ProfilingInfo: self.conv_time_fwd = self.conv_time_fwd / 10 ** 6 self.conv_time_bwd = self.conv_time_bwd / 10 ** 6 + # 新指标单位为ms + self.fa_time_fwd_cube /= 10 ** 3 + self.fa_time_bwd_cube /= 10 ** 3 + self.fa_time_fwd_vector /= 10 ** 3 + self.fa_time_bwd_vector /= 10 ** 3 + self.conv_time_fwd_cube /= 10 ** 3 + self.conv_time_bwd_cube /= 10 ** 3 + self.conv_time_fwd_vector /= 10 ** 3 + self.conv_time_bwd_vector /= 10 ** 3 + self.matmul_time_cube /= 10 ** 3 + self.matmul_time_vector /= 10 ** 3 + self.vector_time_trans /= 10 ** 3 + self.vector_time_notrans /= 10 ** 3 + self.sdma_time_tensor_move /= 10 ** 3 + self.sdma_time_stream /= 10 ** 3 + self.page_attention_time /= 10 ** 3 + self.other_cube_time /= 10 ** 3 + def calculate_other_time(self): self.other_time = max( [0, self.compute_time - self.cube_time - self.fa_time_fwd - self.fa_time_bwd - @@ -64,8 +181,7 @@ class ProfilingInfo: - self.conv_time_fwd - self.conv_time_bwd def calculate_schedule_time(self): - self.scheduling_time = (self.e2e_time - self.compute_time - self.lccl_time \ - - self.communication_not_overlapped) + self.scheduling_time = (self.e2e_time - self.compute_time - self.lccl_time - self.communication_not_overlapped) def update_fa_fwd_info(self, time: float): self.fa_time_fwd += time @@ -75,6 +191,30 @@ class ProfilingInfo: self.fa_time_bwd += time self.fa_num_bwd += 1 + def update_fa_fwd_cube_info(self, time: float): + self.fa_time_fwd_cube += time + self.fa_num_fwd_cube += 1 + + def update_fa_bwd_cube_info(self, time: float): + self.fa_time_bwd_cube += time + self.fa_num_bwd_cube += 1 + + def update_fa_fwd_vector_info(self, time: float): + self.fa_time_fwd_vector += time + self.fa_num_fwd_vector += 1 + + def update_fa_bwd_vector_info(self, time: float): + self.fa_time_bwd_vector += time + self.fa_num_bwd_vector += 1 + + def update_sdma_tensor_move_info(self, time: float): + self.sdma_time_tensor_move += time + self.sdma_num_tensor_move += 1 + + def update_sdma_stream_info(self, time: float, num: int = 1): + self.sdma_time_stream += time + self.sdma_num_stream += num + def update_pa_info(self, time: float): self.pa_time += time self.pa_num += 1 @@ -91,6 +231,42 @@ class ProfilingInfo: self.conv_time_bwd += time self.conv_num_bwd += 1 + def update_conv_bwd_cube_info(self, time: float): + self.conv_time_bwd_cube += time + self.conv_num_bwd_cube += 1 + + def update_conv_fwd_cube_info(self, time: float): + self.conv_time_fwd_cube += time + self.conv_num_fwd_cube += 1 + + def update_conv_bwd_vector_info(self, time: float): + self.conv_time_bwd_vector += time + self.conv_num_bwd_vector += 1 + + def update_conv_fwd_vector_info(self, time: float): + self.conv_time_fwd_vector += time + self.conv_num_fwd_vector += 1 + + def update_matmul_cube_info(self, time: float): + self.matmul_time_cube += time + self.matmul_num_cube += 1 + + def update_matmul_vector_info(self, time: float): + self.matmul_time_vector += time + self.matmul_num_vector += 1 + + def update_page_attention_info(self, time: float): + self.page_attention_time += time + self.page_attention_num += 1 + + def update_vector_trans_info(self, time: float): + self.vector_time_trans += time + self.vector_num_trans += 1 + + def update_vector_notrans_info(self, time: float): + self.vector_time_notrans += time + self.vector_num_notrans += 1 + def update_sdma_info(self, time: float, num: int = 1): self.sdma_time += time self.sdma_num += num @@ -103,6 +279,10 @@ class ProfilingInfo: self.vec_time += time self.vec_num += 1 + def update_other_cube_info(self, time: float): + self.other_cube_time += time + self.other_cube_num += 1 + def set_compute_time(self, time: float): self.compute_time = time diff --git a/profiler/compare_tools/compare_backend/generator/detail_performance_generator.py b/profiler/compare_tools/compare_backend/generator/detail_performance_generator.py index 5b93d888a4..292e312815 100644 --- a/profiler/compare_tools/compare_backend/generator/detail_performance_generator.py +++ b/profiler/compare_tools/compare_backend/generator/detail_performance_generator.py @@ -8,6 +8,7 @@ from compare_backend.comparator.module_comparetor import ModuleComparator from compare_backend.comparator.module_statistic_comparator import ModuleStatisticComparator from compare_backend.comparator.operator_comparator import OperatorComparator from compare_backend.comparator.operator_statistic_comparator import OperatorStatisticComparator +from compare_backend.comparator.overall_metrics_comparator import OverallMetricsComparator from compare_backend.compare_bean.communication_bean import CommunicationBean from compare_backend.compare_bean.memory_compare_bean import MemoryCompareBean from compare_backend.compare_bean.memory_statistic_bean import MemoryStatisticBean @@ -15,6 +16,7 @@ from compare_backend.compare_bean.module_compare_bean import ModuleCompareBean from compare_backend.compare_bean.module_statistic_bean import ModuleStatisticBean from compare_backend.compare_bean.operator_compare_bean import OperatorCompareBean from compare_backend.compare_bean.operator_statistic_bean import OperatorStatisticBean +from compare_backend.compare_bean.overall_metrics_bean import OverallMetricsBean from compare_backend.data_prepare.module_data_prepare import ModuleDataPrepare from compare_backend.data_prepare.operator_data_prepare import OperatorDataPrepare from compare_backend.generator.base_generator import BaseGenerator @@ -41,8 +43,16 @@ class DetailPerformanceGenerator(BaseGenerator): self._args.enable_communication_compare: print("[INFO] Start to compare performance detail data, please wait.") comparator_list = self._create_comparator() - for comparator in comparator_list: - self._result_data.update(comparator.generate_data()) + else: + comparator_list = [] + if self._args.enable_profiling_compare: + overall_data = {Constant.BASE_DATA: self._profiling_data_dict.get(Constant.BASE_DATA).overall_metrics, + Constant.COMPARISON_DATA: self._profiling_data_dict.get( + Constant.COMPARISON_DATA).overall_metrics} + # overall 数据在最前面 + comparator_list.insert(0, OverallMetricsComparator(overall_data, OverallMetricsBean)) + for comparator in comparator_list: + self._result_data.update(comparator.generate_data()) def generate_view(self): if not self._result_data: @@ -57,6 +67,7 @@ class DetailPerformanceGenerator(BaseGenerator): comparator_list = [] op_compare_result = [] + if self._args.enable_operator_compare: module_compare_result = self.match_nn_module() if self._profiling_data_dict.get( Constant.BASE_DATA).python_function_data and self._profiling_data_dict.get( diff --git a/profiler/compare_tools/compare_backend/profiling_parser/base_profiling_parser.py b/profiler/compare_tools/compare_backend/profiling_parser/base_profiling_parser.py index 2127ff5e75..6ee07a6569 100644 --- a/profiler/compare_tools/compare_backend/profiling_parser/base_profiling_parser.py +++ b/profiler/compare_tools/compare_backend/profiling_parser/base_profiling_parser.py @@ -2,6 +2,7 @@ from abc import abstractmethod, ABC from decimal import Decimal from compare_backend.compare_bean.origin_data_bean.compare_event import KernelEvent, MemoryEvent +from compare_backend.compare_bean.origin_data_bean.kernel_details_bean import KernelDetailsBean from compare_backend.compare_bean.origin_data_bean.trace_event_bean import TraceEventBean from compare_backend.compare_bean.profiling_info import ProfilingInfo from compare_backend.utils.constant import Constant @@ -66,6 +67,18 @@ class BaseProfilingParser(ABC): self._comm_list = [] self._read_trace_event() self._cur_func_index = 0 + self._categorize_performance_index = 0 + self._cpu_cube_op = None + self._bwd_tid = None + + @property + def cpu_cube_op(self): + if self._cpu_cube_op is not None: + return self._cpu_cube_op + cpu_cube_op = [op for op in self._result_data.torch_op_data if op.is_cpu_cube_op()] + cpu_cube_op.sort(key=lambda x: x.start_time) + self._cpu_cube_op = cpu_cube_op + return self._cpu_cube_op @abstractmethod def _update_memory_list(self): @@ -102,6 +115,90 @@ class BaseProfilingParser(ABC): self._check_result_data() return self._result_data + def categorize_computing_performance_data(self, tk: (TraceEventBean, KernelDetailsBean), flow_dict_new: dict): + if tk.is_page_attention(): + self._result_data.overall_metrics.update_page_attention_info(tk.dur) + return + if tk.is_sdma(): + self._result_data.overall_metrics.update_sdma_tensor_move_info(tk.dur) + return + flow_start_time = flow_dict_new.get(tk.start_time) + if flow_start_time: + while self._categorize_performance_index < len(self.cpu_cube_op): + cur_op = self.cpu_cube_op[self._categorize_performance_index] + if cur_op.end_time < flow_start_time: + self._categorize_performance_index += 1 + continue + if cur_op.start_time <= flow_start_time: + self._categorize_cube_performance_data(cur_op, tk) + return + break + if self._profiling_type == Constant.NPU: + # 缺失torch至npu连线的算子,判断fa/conv/matmul使用kernel_details.csv的op_type字段 + if tk.is_flash_attention(): + if tk.is_fa_bwd(): + self._result_data.overall_metrics.update_fa_bwd_cube_info(tk.dur) + else: + self._result_data.overall_metrics.update_fa_fwd_cube_info(tk.dur) + return + elif tk.is_conv(): + if tk.is_conv_bwd(): + self._result_data.overall_metrics.update_conv_bwd_cube_info(tk.dur) + else: + self._result_data.overall_metrics.update_conv_fwd_cube_info(tk.dur) + return + elif tk.is_matmul(): + self._result_data.overall_metrics.update_matmul_cube_info(tk.dur) + return + if tk.is_cube_kernel_cat(): + self._result_data.overall_metrics.update_other_cube_info(tk.dur) + elif tk.is_trans(): + self._result_data.overall_metrics.update_vector_trans_info(tk.dur) + else: + self._result_data.overall_metrics.update_vector_notrans_info(tk.dur) + + def _categorize_cube_performance_data(self, cpu_op: TraceEventBean, tk: (TraceEventBean, KernelDetailsBean)): + """ + 判断fa/conv/matmul/vector使用cpu_op + """ + if cpu_op.is_fa_for_cpu_op(): + if self._is_backward(cpu_op): + if tk.is_cube_kernel_cat(): + self._result_data.overall_metrics.update_fa_bwd_cube_info(tk.dur) + else: + self._result_data.overall_metrics.update_fa_bwd_vector_info(tk.dur) + else: + if tk.is_cube_kernel_cat(): + self._result_data.overall_metrics.update_fa_fwd_cube_info(tk.dur) + else: + self._result_data.overall_metrics.update_fa_fwd_vector_info(tk.dur) + elif cpu_op.is_conv_for_cpu_op(): + if self._is_backward(cpu_op): + if tk.is_cube_kernel_cat(): + self._result_data.overall_metrics.update_conv_bwd_cube_info(tk.dur) + else: + self._result_data.overall_metrics.update_conv_bwd_vector_info(tk.dur) + else: + if tk.is_cube_kernel_cat(): + self._result_data.overall_metrics.update_conv_fwd_cube_info(tk.dur) + else: + self._result_data.overall_metrics.update_conv_fwd_vector_info(tk.dur) + elif cpu_op.is_matmul_for_cpu_op(): # matmul + if tk.is_cube_kernel_cat(): + self._result_data.overall_metrics.update_matmul_cube_info(tk.dur) + else: + self._result_data.overall_metrics.update_matmul_vector_info(tk.dur) + + def _is_backward(self, event: TraceEventBean): + return event.tid == self._bwd_tid or event.is_bwd_for_cpu_op() + + def _get_flow_time_dict(self): + return { + flow_event["end"].start_time: flow_event["start"].start_time + for flow_event in self._flow_dict.values() + if flow_event.get("end") and flow_event.get("start") + } + def _dispatch_events(self): if not self._dispatch_func: return diff --git a/profiler/compare_tools/compare_backend/profiling_parser/gpu_profiling_parser.py b/profiler/compare_tools/compare_backend/profiling_parser/gpu_profiling_parser.py index c4089aec9b..7b1ae1a5a1 100644 --- a/profiler/compare_tools/compare_backend/profiling_parser/gpu_profiling_parser.py +++ b/profiler/compare_tools/compare_backend/profiling_parser/gpu_profiling_parser.py @@ -20,6 +20,7 @@ class GPUProfilingParser(BaseProfilingParser): self._compute_stream_id = self._infer_compute_stream_id() self._marks = defaultdict(int) self._aten_index = 0 + self._find_bwd_tid() @classmethod def __is_flash_attention(cls, name: str): @@ -30,10 +31,7 @@ class GPUProfilingParser(BaseProfilingParser): @classmethod def __is_sdma_time(cls, name: str): - for mark in cls.SDMA_MARK_LIST: - if mark in name.lower(): - return True - return False + return any(mask in name.lower() for mask in cls.SDMA_MARK_LIST) def _update_memory_list(self): if not self._enable_memory_compare: @@ -68,19 +66,15 @@ class GPUProfilingParser(BaseProfilingParser): min_ts = sys.float_info.max max_ts = sys.float_info.min self._trace_events.sort(key=lambda x: x.start_time) - aten_events = list(filter(lambda x: x.name.startswith("aten::"), self._trace_events)) - flow_dict_new = {} - for flow_event in self._flow_dict.values(): - start_event = flow_event.get("start") - end_event = flow_event.get("end") - if start_event and end_event: - flow_dict_new[end_event.start_time] = start_event.start_time + aten_events = [event for event in self._trace_events if event.name.startswith("aten::")] + flow_dict_new = self._get_flow_time_dict() for event in self._trace_events: if event.stream: min_ts = min(event.start_time, min_ts) max_ts = max(event.end_time, max_ts) if event.stream == self._compute_stream_id and self.__is_sdma_time(event.name): self._result_data.overall_metrics.update_sdma_info(event.dur) + self._result_data.overall_metrics.update_sdma_stream_info(event.dur) continue if not event.is_kernel_cat(): continue @@ -88,6 +82,7 @@ class GPUProfilingParser(BaseProfilingParser): if event.is_nccl_name(): continue self.__add_compute_time(event, aten_events, flow_dict_new) + self.categorize_computing_performance_data(event, flow_dict_new) self._aten_events = None self._result_data.overall_metrics.set_e2e_time(float(max_ts - min_ts)) self.__add_compute_and_overlap_time() @@ -162,7 +157,7 @@ class GPUProfilingParser(BaseProfilingParser): def _get_dispatch_func(self): func_set = set() - if self._enable_memory_compare or self._enable_operator_compare: + if self._enable_memory_compare or self._enable_operator_compare or self._enable_profiling_compare: func_set.add(self._picking_torch_op_event) if self._enable_communication_compare: func_set.add(self._picking_kernel_event) @@ -174,6 +169,8 @@ class GPUProfilingParser(BaseProfilingParser): func_set.add(self._picking_flow_event) if self._enable_memory_compare or self._enable_profiling_compare: func_set.add(self._picking_memory_event) + if self._enable_profiling_compare: + func_set.add(self._picking_flow_event) return list(func_set) def _infer_compute_stream_id(self): @@ -187,3 +184,9 @@ class GPUProfilingParser(BaseProfilingParser): raise RuntimeError('[ERROR] The profiling data does not contain kernel running data.') counter = Counter(kernel_stream_ids) return counter.most_common(1)[0][0] + + def _find_bwd_tid(self): + for event in self._trace_events: + if event.is_fwdbwd() and event.is_flow_end(): + self._bwd_tid = event.tid + break diff --git a/profiler/compare_tools/compare_backend/profiling_parser/npu_profiling_parser.py b/profiler/compare_tools/compare_backend/profiling_parser/npu_profiling_parser.py index 70ce44b44e..457a3b6be5 100644 --- a/profiler/compare_tools/compare_backend/profiling_parser/npu_profiling_parser.py +++ b/profiler/compare_tools/compare_backend/profiling_parser/npu_profiling_parser.py @@ -36,7 +36,7 @@ class NPUProfilingParser(BaseProfilingParser): def _get_dispatch_func(self): func_list = set() - if self._enable_memory_compare or self._enable_operator_compare: + if self._enable_memory_compare or self._enable_operator_compare or self._enable_profiling_compare: func_list.add(self._picking_torch_op_event) if self._enable_operator_compare or self._args.max_kernel_num: func_list.add(self._picking_kernel_event) @@ -52,6 +52,7 @@ class NPUProfilingParser(BaseProfilingParser): func_list.add(self._picking_overlap_analysis_data) func_list.add(self._picking_kernel_event) func_list.add(self._picking_hccl_event) + func_list.add(self._picking_flow_event) return list(func_list) def _update_memory_list(self): @@ -205,6 +206,8 @@ class NPUProfilingParser(BaseProfilingParser): def _filter_meta_id(self): for event in self._trace_events: + if event.is_fwdbwd() and event.is_flow_end(): + self._bwd_tid = event.tid if not event.is_process_meta(): continue if event.is_hccl_process_name(): @@ -244,17 +247,7 @@ class NPUProfilingParser(BaseProfilingParser): self._result_data.overall_metrics.update_lccl_info(event.dur) def __parse_kernel_csv(self): - try: - kernel_details = FileReader.read_csv_file(self._kernel_detail_path, KernelDetailsBean) - except Exception: - print('[WARNING] Npu kernel details csv file is not available.') - return - if not kernel_details or kernel_details[0].is_hide_op_pmu(): - self._result_data.overall_metrics.hide_op_details = True - return - for kernel in kernel_details: - if kernel.is_invalid(): - continue + def __screen_data(kernel: KernelDetailsBean): if kernel.is_flash_attention(): if kernel.is_fa_bwd(): self._result_data.overall_metrics.update_fa_bwd_info(kernel.duration) @@ -265,7 +258,7 @@ class NPUProfilingParser(BaseProfilingParser): self._result_data.overall_metrics.update_conv_bwd_info(kernel.duration) else: self._result_data.overall_metrics.update_conv_fwd_info(kernel.duration) - elif kernel.is_cube(): + elif kernel.is_matmul(): self._result_data.overall_metrics.update_cube_info(kernel.duration) elif kernel.is_sdma(): self._result_data.overall_metrics.update_sdma_info(kernel.duration) @@ -276,6 +269,22 @@ class NPUProfilingParser(BaseProfilingParser): else: self._result_data.overall_metrics.update_cube_info(kernel.duration) + try: + kernel_details = FileReader.read_csv_file(self._kernel_detail_path, KernelDetailsBean) + except Exception: + print('[WARNING] Npu kernel details csv file is not available.') + return + if not kernel_details or kernel_details[0].is_hide_op_pmu(): + self._result_data.overall_metrics.hide_op_details = True + return + flow_dict_new = self._get_flow_time_dict() + kernel_details.sort(key=lambda x: x.start_time) + for kernel in kernel_details: + if kernel.is_invalid(): + continue + __screen_data(kernel) + self.categorize_computing_performance_data(kernel, flow_dict_new) + def __parse_mem_csv(self): try: memory_record = FileReader.read_csv_file(self._memory_record_path, MemoryRecordBean) @@ -321,3 +330,4 @@ class NPUProfilingParser(BaseProfilingParser): for stream in compute_stream: dur_list = sdma_dict.get(stream, []) self._result_data.overall_metrics.update_sdma_info(sum(dur_list), len(dur_list)) + self._result_data.overall_metrics.update_sdma_stream_info(sum(dur_list), len(dur_list)) diff --git a/profiler/compare_tools/compare_backend/utils/constant.py b/profiler/compare_tools/compare_backend/utils/constant.py index 1b77b214c8..e200258802 100644 --- a/profiler/compare_tools/compare_backend/utils/constant.py +++ b/profiler/compare_tools/compare_backend/utils/constant.py @@ -11,6 +11,7 @@ class Constant(object): GREEN_COLOR = "00FF00" RED_COLOR = "FF0000" BLUE_COLOR = "00BFFF" + LIGHT_BLUE_COLOR = "87CEFA" US_TO_MS = 1000 KB_TO_MB = 1024 INVALID_VALUE = -1 @@ -55,6 +56,7 @@ class Constant(object): PERFORMANCE_TABLE = "Model Profiling Time Distribution" MODULE_TABLE = "ModuleCompare" MODULE_TOP_TABLE = "ModuleCompareStatistic" + OVERALL_METRICS_TABLE = "OverallMetrics" # memory SIZE = "Size(KB)" @@ -74,7 +76,13 @@ class Constant(object): MEMORY_LIST = "memory_list" COMMUNICATION_DICT = "comm_dict" - #compare type + # compare type OVERALL_COMPARE = "overall" BWD_LIST = ["bwd", "backward", "back"] + + CPU_OP_FA_MASK = ("flash_attention", "fusion_attention", "flashattn", "xformers_flash", "efficient_attention") + CPU_OP_CONV = "aten::conv" + CPU_OP_MATMUL_MASK = ("aten::addmm", "aten::bmm", "aten::mm", "aten::matmul") + KERNEL_CUBE_MASK = ("gemm", "conv", "cutlass", "wgrad") + KERNEL_TRANS_MASK = ("cast", "transdata", "transpose") diff --git a/profiler/compare_tools/compare_backend/utils/excel_config.py b/profiler/compare_tools/compare_backend/utils/excel_config.py index 306abcdfec..ae808863e7 100644 --- a/profiler/compare_tools/compare_backend/utils/excel_config.py +++ b/profiler/compare_tools/compare_backend/utils/excel_config.py @@ -18,6 +18,8 @@ class CellFormatType: 'valign': 'vcenter', 'bold': True, 'border': True} # 绿色背景,加粗 YELLOW_BOLD = {"font_name": "Arial", 'font_size': 11, 'fg_color': Constant.YELLOW_COLOR, 'align': 'left', 'valign': 'vcenter', 'bold': True, 'border': True} # 黄色背景,加粗 + BLUE_NORMAL = {'fg_color': Constant.BLUE_COLOR} # 蓝色背景,主要用于行样式 + LIGHT_BLUE_NORMAL = {'fg_color': Constant.LIGHT_BLUE_COLOR} # 淡蓝色背景,主要用于行样式 class ExcelConfig(object): @@ -65,6 +67,10 @@ class ExcelConfig(object): MODULE_LEVEL = "Module Level" BASE_CALL_STACK = "Base Call Stack" COMPARISON_CALL_STACK = "Comparison Call Stack" + INDEX = "Index" + DURATION = "Duration(ms)" + DURATION_RATIO = "Duration Ratio" + DIFF_DUR_MS = "Diff Duration(ms)" HEADERS = { Constant.OPERATOR_TABLE: [ @@ -176,10 +182,81 @@ class ExcelConfig(object): {"name": DIFF_TOTAL_RATIO, "type": CellFormatType.DEFAULT_RATIO, "width": 15}, {"name": BASE_CALL_STACK, "type": CellFormatType.DEFAULT, "width": 30}, {"name": COMPARISON_CALL_STACK, "type": CellFormatType.DEFAULT, "width": 30} + ], + Constant.OVERALL_METRICS_TABLE: [ + {"name": INDEX, "type": CellFormatType.DEFAULT, "width": 40}, + {"name": DURATION, "type": CellFormatType.DEFAULT_FLOAT, "width": 20}, + {"name": DURATION_RATIO, "type": CellFormatType.DEFAULT_RATIO, "width": 20}, + {"name": NUMBER, "type": CellFormatType.DEFAULT, "width": 10}, + {"name": DURATION, "type": CellFormatType.DEFAULT_FLOAT, "width": 20}, + {"name": DURATION_RATIO, "type": CellFormatType.DEFAULT_RATIO, "width": 20}, + {"name": NUMBER, "type": CellFormatType.DEFAULT, "width": 10}, + {"name": DIFF_DUR_MS, "type": CellFormatType.DEFAULT_FLOAT, "width": 20}, + {"name": DIFF_RATIO, "type": CellFormatType.DEFAULT_RATIO, "width": 10}, + ] } OVERHEAD = {Constant.OPERATOR_TABLE: ["B1:F1", "G1:K1"], Constant.MEMORY_TABLE: ["B1:F1", "G1:K1"], Constant.COMMUNICATION_TABLE: ["B1:H1", "I1:O1"], Constant.OPERATOR_TOP_TABLE: ["C1:D1", "E1:F1"], Constant.MEMORY_TOP_TABLE: ["C1:E1", "F1:H1"], Constant.MODULE_TOP_TABLE: ["F1:I1", "J1:M1"], - Constant.MODULE_TABLE: ["E1:H1", "I1:L1"]} + Constant.MODULE_TABLE: ["E1:H1", "I1:L1"], + Constant.OVERALL_METRICS_TABLE: ["B1:D1", "E1:G1"]} + + # overall metrics index + # computing time + COMPUTING = "Computing Time" + + FA = "\tFlash Attention" + FA_FWD_CUBE = "\t\tFlash Attention (Forward) (Cube)" + FA_FWD_VECTOR = "\t\tFlash Attention (Forward) (Vector)" + FA_BWD_CUBE = "\t\tFlash Attention (Backward) (Cube)" + FA_BWD_VECTOR = "\t\tFlash Attention (Backward) (Vector)" + + CONV = "\tConv" + CONV_FWD_CUBE = "\t\tConv (Forward) (Cube)" + CONV_FWD_VECTOR = "\t\tConv (Forward) (Vector)" + CONV_BWD_CUBE = "\t\tConv (Backward) (Cube)" + CONV_BWD_VECTOR = "\t\tConv (Backward) (Vector)" + + MM = "\tMatmul" + MM_CUBE = "\t\tMatmul (Cube)" + MM_VECTOR = "\t\tMatmul (Vector)" + + PA = "\tPage Attention" + + VECTOR = "\tVector" + VECTOR_TRANS = "\t\tVector (Trans)" + VECTOR_NO_TRANS = "\t\tVector (No Trans)" + + CUBE = "\tCube" + SDMA_TM = "\tSDMA (Tensor Move)" + OTHER = "\tOther" + + # communication time + COMMUNICATION_TIME = "Uncovered Communication Time" + WAIT = "\tWait" + TRANSMIT = "\tTransmit" + + # free time + FREE_TIME = "Free Time" + SDMA = "\tSDMA" + FREE = "\tFree" + + # e2e time + E2E_TIME = "E2E Time" + + ROW_STYLE_MAP = { + COMPUTING: CellFormatType.BLUE_NORMAL, + COMMUNICATION_TIME: CellFormatType.BLUE_NORMAL, + FREE_TIME: CellFormatType.BLUE_NORMAL, + E2E_TIME: CellFormatType.BLUE_NORMAL, + FA: CellFormatType.LIGHT_BLUE_NORMAL, + CONV: CellFormatType.LIGHT_BLUE_NORMAL, + MM: CellFormatType.LIGHT_BLUE_NORMAL, + PA: CellFormatType.LIGHT_BLUE_NORMAL, + VECTOR: CellFormatType.LIGHT_BLUE_NORMAL, + CUBE: CellFormatType.LIGHT_BLUE_NORMAL, + SDMA_TM: CellFormatType.LIGHT_BLUE_NORMAL, + OTHER: CellFormatType.LIGHT_BLUE_NORMAL + } diff --git a/profiler/compare_tools/compare_backend/view/work_sheet_creator.py b/profiler/compare_tools/compare_backend/view/work_sheet_creator.py index 7a33168da3..dffb7549fc 100644 --- a/profiler/compare_tools/compare_backend/view/work_sheet_creator.py +++ b/profiler/compare_tools/compare_backend/view/work_sheet_creator.py @@ -20,7 +20,10 @@ class WorkSheetCreator: return self._work_sheet = self._work_book.add_worksheet(self._sheet_name) self._write_headers() - self._write_data() + if "row_style" in self._data: + self._write_data_with_row_style() + else: + self._write_data() def _write_headers(self): base_header_format = self._work_book.add_format(CellFormatType.GREEN_BOLD) @@ -43,7 +46,7 @@ class WorkSheetCreator: col_id = self._col_ids[index] self._work_sheet.set_column(f"{col_id}:{col_id}", header.get("width")) self._work_sheet.write(f"{col_id}{self._row_id}", header.get("name"), header_format) - self._field_format[index] = self._work_book.add_format(header.get("type")) + self._field_format[index] = header.get("type") if header.get("name") in (ExcelConfig.DIFF_RATIO, ExcelConfig.DIFF_TOTAL_RATIO): self._diff_ratio_index = index self._row_id += 1 @@ -52,7 +55,27 @@ class WorkSheetCreator: red_ratio_format = self._work_book.add_format(CellFormatType.RED_RATIO) for data in self._data.get("rows"): for index, cell_data in enumerate(data): - cell_format = self._field_format.get(index) + cell_format = self._work_book.add_format(self._field_format.get(index)) + if index == self._diff_ratio_index and cell_data and cell_data > 1: + cell_format = red_ratio_format + cell_data = "INF" if cell_data == float('inf') else cell_data + self._work_sheet.write(f"{self._col_ids[index]}{self._row_id}", cell_data, cell_format) + self._row_id += 1 + + def _write_data_with_row_style(self): + """ + 带行样式及缩进的sheet + """ + red_ratio_format = self._work_book.add_format(CellFormatType.RED_RATIO) + rows = self._data.get("rows") + row_style = self._data.get("row_style") # 行样式 + + for data, row_style in zip(rows, row_style): + for index, cell_data in enumerate(data): + cell_style = {**self._field_format.get(index), **row_style} + if index == 0: # 0 for Index field + cell_style["indent"] = cell_data.count("\t") + cell_format = self._work_book.add_format(cell_style) if index == self._diff_ratio_index and cell_data and cell_data > 1: cell_format = red_ratio_format cell_data = "INF" if cell_data == float('inf') else cell_data diff --git a/profiler/test/ut/compare_tools/compare_bean/origin_data_bean/test_kernel_details_bean.py b/profiler/test/ut/compare_tools/compare_bean/origin_data_bean/test_kernel_details_bean.py index 7abf8da647..869ee85570 100644 --- a/profiler/test/ut/compare_tools/compare_bean/origin_data_bean/test_kernel_details_bean.py +++ b/profiler/test/ut/compare_tools/compare_bean/origin_data_bean/test_kernel_details_bean.py @@ -47,5 +47,5 @@ class TestKernelDetailsBean(unittest.TestCase): self.assertFalse(self.kernel_bean2.is_flash_attention()) def test_is_cube(self): - self.assertTrue(self.kernel_bean2.is_cube()) - self.assertFalse(self.kernel_bean3.is_cube()) + self.assertTrue(self.kernel_bean2.is_matmul()) + self.assertFalse(self.kernel_bean3.is_matmul()) diff --git a/profiler/test/ut/compare_tools/profiling_parser/test_gpu_profiling_parser.py b/profiler/test/ut/compare_tools/profiling_parser/test_gpu_profiling_parser.py index 0446872150..d7cb3d0588 100644 --- a/profiler/test/ut/compare_tools/profiling_parser/test_gpu_profiling_parser.py +++ b/profiler/test/ut/compare_tools/profiling_parser/test_gpu_profiling_parser.py @@ -68,6 +68,7 @@ class TestGpuProfilingParser(unittest.TestCase): patch("compare_backend.profiling_parser.gpu_profiling_parser.GPUProfilingParser.__init__", return_value=None): res = GPUProfilingParser({}, {}) + res._profiling_type = "GPU" res._trace_events = [TraceEventBean(event) for event in self.trace_events] res._result_data = ProfilingResult("GPU") res._compute_stream_id = 3 -- Gitee From 5c41cb651da97f980054c145773b6dfc84ce2137 Mon Sep 17 00:00:00 2001 From: qianzhengxin Date: Mon, 22 Jul 2024 16:29:50 +0800 Subject: [PATCH 040/106] path check & readme --- debug/accuracy_tools/grad_tool/README.md | 4 ++-- debug/accuracy_tools/grad_tool/grad_ms/utils.py | 10 +++++++--- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/debug/accuracy_tools/grad_tool/README.md b/debug/accuracy_tools/grad_tool/README.md index a3f683b411..a7929ca818 100644 --- a/debug/accuracy_tools/grad_tool/README.md +++ b/debug/accuracy_tools/grad_tool/README.md @@ -54,7 +54,7 @@ **不同级别的level的导出数据** -- PyTorch不同level数据 +- PyTorch/MindSpore动态图不同level数据 | 级别 | 特征数据表头 | 是否有方向数据 | | ---- | ------------------------------------------------------------ | -------------- | @@ -62,7 +62,7 @@ | L1 | ("param_name", "max", "min", "norm", "shape") | 是 | | L2 | ("param_name", *intervals, "=0", "max", "min", "norm", "shape") | 是 | -- MindSpore不同level数据 +- MindSpore静态图不同level数据 | 级别 | 特征数据表头 | 是否有方向数据 | | ---- | ------------------------------------------------------------ | -------------- | diff --git a/debug/accuracy_tools/grad_tool/grad_ms/utils.py b/debug/accuracy_tools/grad_tool/grad_ms/utils.py index 41bd54b505..23703f2820 100644 --- a/debug/accuracy_tools/grad_tool/grad_ms/utils.py +++ b/debug/accuracy_tools/grad_tool/grad_ms/utils.py @@ -1,9 +1,9 @@ import os import numpy as np - +import mindspore from grad_tool.common.constant import GradConst -from grad_tool.common.utils import print_warn_log, create_directory, change_mode +from grad_tool.common.utils import print_warn_log, create_directory, change_mode, check_file_or_directory_path level_adp = { "L0": { @@ -23,10 +23,14 @@ level_adp = { def save_grad_direction(param_name, grad, save_path): if not os.path.exists(save_path): create_directory(save_path) + save_filepath = os.path.join(save_path, f"{param_name}.npy") + check_file_or_directory_path(save_filepath) + + if grad.dtype == mindspore.bfloat16: + grad = grad.to(mindspore.float32) grad_direction_tensor = grad > 0 grad_direction_ndarray = grad_direction_tensor.numpy() - save_filepath = os.path.join(save_path, f"{param_name}.npy") np.save(save_filepath, grad_direction_ndarray) change_mode(save_filepath, 0o640) -- Gitee From 2b219a956319bedea882d99e04d4ecaeace43d47 Mon Sep 17 00:00:00 2001 From: Linwei-Ying Date: Mon, 22 Jul 2024 16:32:30 +0800 Subject: [PATCH 041/106] msacc to msprobe --- .../msprobe/core/common/exceptions.py | 36 +++++++++---------- .../msprobe/core/common_config.py | 22 ++++++------ .../data_processor/pytorch_processor.py | 4 +-- .../pytorch/debugger/precision_debugger.py | 6 ++-- .../msprobe/pytorch/functional/dump_module.py | 6 ++-- .../accuracy_tools/msprobe/pytorch/service.py | 4 +-- .../test/core_ut/test_common_config.py | 22 ++++++------ 7 files changed, 50 insertions(+), 50 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/common/exceptions.py b/debug/accuracy_tools/msprobe/core/common/exceptions.py index df89699ce8..ea61f8cd58 100644 --- a/debug/accuracy_tools/msprobe/core/common/exceptions.py +++ b/debug/accuracy_tools/msprobe/core/common/exceptions.py @@ -8,13 +8,13 @@ class CodedException(Exception): return self.error_info -class MsaccException(CodedException): +class MsprobeException(CodedException): INVALID_PARAM_ERROR = 0 OVERFLOW_NUMS_ERROR = 1 err_strs = { - INVALID_PARAM_ERROR: "[msacc] 无效参数: ", - OVERFLOW_NUMS_ERROR: "[msacc] 超过预设溢出次数 当前溢出次数:" + INVALID_PARAM_ERROR: "[msprobe] 无效参数: ", + OVERFLOW_NUMS_ERROR: "[msprobe] 超过预设溢出次数 当前溢出次数:" } @@ -27,12 +27,12 @@ class FileCheckException(CodedException): FILE_TOO_LARGE_ERROR = 5 err_strs = { - SOFT_LINK_ERROR: "[msacc] 检测到软链接: ", - FILE_PERMISSION_ERROR: "[msacc] 文件权限错误: ", - INVALID_FILE_ERROR: "[msacc] 无效文件: ", - ILLEGAL_PATH_ERROR: "[msacc] 非法文件路径: ", - ILLEGAL_PARAM_ERROR: "[msacc] 非法打开方式: ", - FILE_TOO_LARGE_ERROR: "[msacc] 文件过大: " + SOFT_LINK_ERROR: "[msprobe] 检测到软链接: ", + FILE_PERMISSION_ERROR: "[msprobe] 文件权限错误: ", + INVALID_FILE_ERROR: "[msprobe] 无效文件: ", + ILLEGAL_PATH_ERROR: "[msprobe] 非法文件路径: ", + ILLEGAL_PARAM_ERROR: "[msprobe] 非法打开方式: ", + FILE_TOO_LARGE_ERROR: "[msprobe] 文件过大: " } @@ -40,8 +40,8 @@ class ParseJsonException(CodedException): UnexpectedNameStruct = 0 InvalidDumpJson = 1 err_strs = { - UnexpectedNameStruct: "[msacc] Unexpected name in json: ", - InvalidDumpJson: "[msacc] json格式不正确: ", + UnexpectedNameStruct: "[msprobe] Unexpected name in json: ", + InvalidDumpJson: "[msprobe] json格式不正确: ", } @@ -50,23 +50,23 @@ class ScopeException(CodedException): InvalidScope = 1 ArgConflict = 2 err_strs = { - InvalidApiStr: "[msacc] Invalid api_list: ", - InvalidScope: "[msacc] Invalid scope: ", - ArgConflict: "[msacc] Scope and api_list conflict: ", + InvalidApiStr: "[msprobe] Invalid api_list: ", + InvalidScope: "[msprobe] Invalid scope: ", + ArgConflict: "[msprobe] Scope and api_list conflict: ", } class RepairException(CodedException): InvalidRepairType = 0 err_strs = { - InvalidRepairType: "[msacc] Invalid repair_type: " + InvalidRepairType: "[msprobe] Invalid repair_type: " } class StepException(CodedException): InvalidPostProcess = 0 err_strs = { - InvalidPostProcess: "[msacc] 错误的step后处理配置: ", + InvalidPostProcess: "[msprobe] 错误的step后处理配置: ", } @@ -74,8 +74,8 @@ class FreeBenchmarkException(CodedException): UnsupportedType = 0 InvalidGrad = 1 err_strs = { - UnsupportedType: "[msacc] Free benchmark get unsupported type: ", - InvalidGrad: "[msacc] Free benchmark gradient invalid: ", + UnsupportedType: "[msprobe] Free benchmark get unsupported type: ", + InvalidGrad: "[msprobe] Free benchmark gradient invalid: ", } diff --git a/debug/accuracy_tools/msprobe/core/common_config.py b/debug/accuracy_tools/msprobe/core/common_config.py index b7d446ce8e..ed38eba008 100644 --- a/debug/accuracy_tools/msprobe/core/common_config.py +++ b/debug/accuracy_tools/msprobe/core/common_config.py @@ -1,6 +1,6 @@ from msprobe.core.common.const import Const from msprobe.core.common.log import logger -from msprobe.core.common.exceptions import MsaccException +from msprobe.core.common.exceptions import MsprobeException class CommonConfig: @@ -19,22 +19,22 @@ class CommonConfig: def _check_config(self): if self.task and self.task not in Const.TASK_LIST: logger.error_log_with_exp( - "task is invalid, it should be one of {}".format(Const.TASK_LIST), MsaccException(MsaccException.INVALID_PARAM_ERROR)) + "task is invalid, it should be one of {}".format(Const.TASK_LIST), MsprobeException(MsprobeException.INVALID_PARAM_ERROR)) if self.rank is not None and not isinstance(self.rank, list): - logger.error_log_with_exp("rank is invalid, it should be a list", MsaccException(MsaccException.INVALID_PARAM_ERROR)) + logger.error_log_with_exp("rank is invalid, it should be a list", MsprobeException(MsprobeException.INVALID_PARAM_ERROR)) if self.step is not None and not isinstance(self.step, list): - logger.error_log_with_exp("step is invalid, it should be a list", MsaccException(MsaccException.INVALID_PARAM_ERROR)) + logger.error_log_with_exp("step is invalid, it should be a list", MsprobeException(MsprobeException.INVALID_PARAM_ERROR)) if self.level and self.level not in Const.LEVEL_LIST: logger.error_log_with_exp( - "level is invalid, it should be one of {}".format(Const.LEVEL_LIST), MsaccException(MsaccException.INVALID_PARAM_ERROR)) + "level is invalid, it should be one of {}".format(Const.LEVEL_LIST), MsprobeException(MsprobeException.INVALID_PARAM_ERROR)) if self.seed is not None and not isinstance(self.seed, int): - logger.error_log_with_exp("seed is invalid, it should be an integer", MsaccException(MsaccException.INVALID_PARAM_ERROR)) + logger.error_log_with_exp("seed is invalid, it should be an integer", MsprobeException(MsprobeException.INVALID_PARAM_ERROR)) if not isinstance(self.is_deterministic, bool): logger.error_log_with_exp( - "is_deterministic is invalid, it should be a boolean", MsaccException(MsaccException.INVALID_PARAM_ERROR)) + "is_deterministic is invalid, it should be a boolean", MsprobeException(MsprobeException.INVALID_PARAM_ERROR)) if not isinstance(self.enable_dataloader, bool): logger.error_log_with_exp( - "enable_dataloader is invalid, it should be a boolean", MsaccException(MsaccException.INVALID_PARAM_ERROR)) + "enable_dataloader is invalid, it should be a boolean", MsprobeException(MsprobeException.INVALID_PARAM_ERROR)) class BaseConfig: @@ -50,9 +50,9 @@ class BaseConfig: def check_config(self): if self.scope is not None and not isinstance(self.scope, list): - logger.error_log_with_exp("scope is invalid, it should be a list", MsaccException(MsaccException.INVALID_PARAM_ERROR)) + logger.error_log_with_exp("scope is invalid, it should be a list", MsprobeException(MsprobeException.INVALID_PARAM_ERROR)) if self.list is not None and not isinstance(self.list, list): - logger.error_log_with_exp("list is invalid, it should be a list", MsaccException(MsaccException.INVALID_PARAM_ERROR)) + logger.error_log_with_exp("list is invalid, it should be a list", MsprobeException(MsprobeException.INVALID_PARAM_ERROR)) if self.data_mode is not None and not isinstance(self.data_mode, list): - logger.error_log_with_exp("data_mode is invalid, it should be a list", MsaccException(MsaccException.INVALID_PARAM_ERROR)) + logger.error_log_with_exp("data_mode is invalid, it should be a list", MsprobeException(MsprobeException.INVALID_PARAM_ERROR)) diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py index 95be091b21..f307909a41 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py @@ -5,7 +5,7 @@ from typing import List import numpy as np import torch -from msprobe.core.common.exceptions import MsaccException +from msprobe.core.common.exceptions import MsprobeException from msprobe.core.common.file_check import path_len_exceeds_limit, change_mode from msprobe.core.common.log import logger from msprobe.core.common.const import Const, OverflowConst, FileCheckConst @@ -191,7 +191,7 @@ class OverflowCheckDataProcessor(PytorchDataProcessor): if self.overflow_nums == -1: return if self.real_overflow_dump_times >= self.overflow_nums: - raise MsaccException(MsaccException.OVERFLOW_NUMS_ERROR, str(self.real_overflow_dump_times)) + raise MsprobeException(MsprobeException.OVERFLOW_NUMS_ERROR, str(self.real_overflow_dump_times)) def check_overflow_npu(self): if self.overflow_debug_mode_enalbe(): diff --git a/debug/accuracy_tools/msprobe/pytorch/debugger/precision_debugger.py b/debug/accuracy_tools/msprobe/pytorch/debugger/precision_debugger.py index 1fce5a3035..e28e588c5f 100644 --- a/debug/accuracy_tools/msprobe/pytorch/debugger/precision_debugger.py +++ b/debug/accuracy_tools/msprobe/pytorch/debugger/precision_debugger.py @@ -4,7 +4,7 @@ from msprobe.pytorch.debugger.debugger_config import DebuggerConfig from msprobe.pytorch.service import Service from msprobe.pytorch.common.log import logger from msprobe.pytorch.pt_config import parse_json_config -from msprobe.core.common.exceptions import MsaccException +from msprobe.core.common.exceptions import MsprobeException class PrecisionDebugger: @@ -50,8 +50,8 @@ class PrecisionDebugger: def check_model_valid(model): if not model or isinstance(model, torch.nn.Module): return model - raise MsaccException( - MsaccException.INVALID_PARAM_ERROR, "model 参数必须是torch.nn.Module类型。" + raise MsprobeException( + MsprobeException.INVALID_PARAM_ERROR, "model 参数必须是torch.nn.Module类型。" ) @classmethod diff --git a/debug/accuracy_tools/msprobe/pytorch/functional/dump_module.py b/debug/accuracy_tools/msprobe/pytorch/functional/dump_module.py index 7e72aab8ae..efb95c3369 100644 --- a/debug/accuracy_tools/msprobe/pytorch/functional/dump_module.py +++ b/debug/accuracy_tools/msprobe/pytorch/functional/dump_module.py @@ -3,7 +3,7 @@ from msprobe.pytorch.common.log import logger from msprobe.core.common.const import Const from msprobe.pytorch.hook_module.api_registry import api_register from msprobe.pytorch.debugger.precision_debugger import PrecisionDebugger -from msprobe.core.common.exceptions import MsaccException +from msprobe.core.common.exceptions import MsprobeException from msprobe.core.data_dump.scope import BaseScope module_count = {} @@ -12,10 +12,10 @@ module_count = {} def module_dump(module, dump_name): if not isinstance(module, nn.Module): logger.error("The parameter:module in module_dump is not a Module subclass.") - raise MsaccException(MsaccException.INVALID_PARAM_ERROR) + raise MsprobeException(MsprobeException.INVALID_PARAM_ERROR) if not isinstance(dump_name, str): logger.error("The parameter:dump_name in module_dump is not a str type.") - raise MsaccException(MsaccException.INVALID_PARAM_ERROR) + raise MsprobeException(MsprobeException.INVALID_PARAM_ERROR) api_register.api_originality() if dump_name not in module_count: module_count[dump_name] = 0 diff --git a/debug/accuracy_tools/msprobe/pytorch/service.py b/debug/accuracy_tools/msprobe/pytorch/service.py index e5da444840..3a7b636230 100644 --- a/debug/accuracy_tools/msprobe/pytorch/service.py +++ b/debug/accuracy_tools/msprobe/pytorch/service.py @@ -5,7 +5,7 @@ from pathlib import Path from msprobe.pytorch.common.log import logger from msprobe.core.common.file_check import FileChecker, check_path_before_create from msprobe.core.common.const import Const, FileCheckConst -from msprobe.core.common.exceptions import DistributedNotInitializedError, MsaccException +from msprobe.core.common.exceptions import DistributedNotInitializedError, MsprobeException from msprobe.core.data_dump.data_collector import build_data_collector from msprobe.core.data_dump.scope import BaseScope from msprobe.core.data_dump.data_processor.base import ModuleForwardInputsOutputs, ModuleBackwardInputsOutputs @@ -138,7 +138,7 @@ class Service: logger.info_on_rank_0("The {} hook function is successfully mounted to the model.".format(self.config.task)) if self.config.level in ["L0", "mix"]: if self.model is None: - logger.error_log_with_exp("The model is None.", MsaccException.INVALID_PARAM_ERROR) + logger.error_log_with_exp("The model is None.", MsprobeException.INVALID_PARAM_ERROR) logger.info_on_rank_0("The init dump mode is enabled, and the module dump function will not be available") for name, module in self.model.named_modules(): if module == self.model: diff --git a/debug/accuracy_tools/msprobe/test/core_ut/test_common_config.py b/debug/accuracy_tools/msprobe/test/core_ut/test_common_config.py index 15957af217..06c7378ed3 100644 --- a/debug/accuracy_tools/msprobe/test/core_ut/test_common_config.py +++ b/debug/accuracy_tools/msprobe/test/core_ut/test_common_config.py @@ -19,7 +19,7 @@ from unittest.mock import patch from msprobe.core.common.log import logger from msprobe.core.common.const import Const -from msprobe.core.common.exceptions import MsaccException +from msprobe.core.common.exceptions import MsprobeException from msprobe.core.common_config import CommonConfig, BaseConfig @@ -44,7 +44,7 @@ class TestCommonConfig(TestCase): self.assertEqual(mock_error_log_with_exp.call_args[0][0], "task is invalid, it should be one of {}".format(Const.TASK_LIST)) self.assertEqual(str(mock_error_log_with_exp.call_args[0][1]), - MsaccException.err_strs.get(MsaccException.INVALID_PARAM_ERROR)) + MsprobeException.err_strs.get(MsprobeException.INVALID_PARAM_ERROR)) json_config.update({"task": Const.TENSOR}) json_config.update({"rank": 0}) @@ -52,7 +52,7 @@ class TestCommonConfig(TestCase): self.assertEqual(mock_error_log_with_exp.call_args[0][0], "rank is invalid, it should be a list") self.assertEqual(str(mock_error_log_with_exp.call_args[0][1]), - MsaccException.err_strs.get(MsaccException.INVALID_PARAM_ERROR)) + MsprobeException.err_strs.get(MsprobeException.INVALID_PARAM_ERROR)) json_config.update({"task": Const.TENSOR}) json_config.update({"rank": [0]}) @@ -61,7 +61,7 @@ class TestCommonConfig(TestCase): self.assertEqual(mock_error_log_with_exp.call_args[0][0], "step is invalid, it should be a list") self.assertEqual(str(mock_error_log_with_exp.call_args[0][1]), - MsaccException.err_strs.get(MsaccException.INVALID_PARAM_ERROR)) + MsprobeException.err_strs.get(MsprobeException.INVALID_PARAM_ERROR)) json_config.update({"task": Const.TENSOR}) json_config.update({"rank": [0]}) @@ -71,7 +71,7 @@ class TestCommonConfig(TestCase): self.assertEqual(mock_error_log_with_exp.call_args[0][0], "level is invalid, it should be one of {}".format(Const.LEVEL_LIST)) self.assertEqual(str(mock_error_log_with_exp.call_args[0][1]), - MsaccException.err_strs.get(MsaccException.INVALID_PARAM_ERROR)) + MsprobeException.err_strs.get(MsprobeException.INVALID_PARAM_ERROR)) json_config.update({"task": Const.TENSOR}) json_config.update({"rank": [0]}) @@ -82,7 +82,7 @@ class TestCommonConfig(TestCase): self.assertEqual(mock_error_log_with_exp.call_args[0][0], "seed is invalid, it should be an integer") self.assertEqual(str(mock_error_log_with_exp.call_args[0][1]), - MsaccException.err_strs.get(MsaccException.INVALID_PARAM_ERROR)) + MsprobeException.err_strs.get(MsprobeException.INVALID_PARAM_ERROR)) json_config.update({"task": Const.TENSOR}) json_config.update({"rank": [0]}) @@ -94,7 +94,7 @@ class TestCommonConfig(TestCase): self.assertEqual(mock_error_log_with_exp.call_args[0][0], "is_deterministic is invalid, it should be a boolean") self.assertEqual(str(mock_error_log_with_exp.call_args[0][1]), - MsaccException.err_strs.get(MsaccException.INVALID_PARAM_ERROR)) + MsprobeException.err_strs.get(MsprobeException.INVALID_PARAM_ERROR)) json_config.update({"task": Const.TENSOR}) json_config.update({"rank": [0]}) @@ -107,7 +107,7 @@ class TestCommonConfig(TestCase): self.assertEqual(mock_error_log_with_exp.call_args[0][0], "enable_dataloader is invalid, it should be a boolean") self.assertEqual(str(mock_error_log_with_exp.call_args[0][1]), - MsaccException.err_strs.get(MsaccException.INVALID_PARAM_ERROR)) + MsprobeException.err_strs.get(MsprobeException.INVALID_PARAM_ERROR)) @patch.object(logger, "error_log_with_exp") def test_base_config(self, mock_error_log_with_exp): @@ -130,7 +130,7 @@ class TestCommonConfig(TestCase): self.assertEqual(mock_error_log_with_exp.call_args[0][0], "scope is invalid, it should be a list") self.assertEqual(str(mock_error_log_with_exp.call_args[0][1]), - MsaccException.err_strs.get(MsaccException.INVALID_PARAM_ERROR)) + MsprobeException.err_strs.get(MsprobeException.INVALID_PARAM_ERROR)) json_config.update({"scope": ["Tensor_Add"]}) json_config.update({"list": "Tensor_Add"}) @@ -139,7 +139,7 @@ class TestCommonConfig(TestCase): self.assertEqual(mock_error_log_with_exp.call_args[0][0], "list is invalid, it should be a list") self.assertEqual(str(mock_error_log_with_exp.call_args[0][1]), - MsaccException.err_strs.get(MsaccException.INVALID_PARAM_ERROR)) + MsprobeException.err_strs.get(MsprobeException.INVALID_PARAM_ERROR)) json_config.update({"scope": ["Tensor_Add"]}) json_config.update({"list": ["Tensor_Add"]}) @@ -149,4 +149,4 @@ class TestCommonConfig(TestCase): self.assertEqual(mock_error_log_with_exp.call_args[0][0], "data_mode is invalid, it should be a list") self.assertEqual(str(mock_error_log_with_exp.call_args[0][1]), - MsaccException.err_strs.get(MsaccException.INVALID_PARAM_ERROR)) + MsprobeException.err_strs.get(MsprobeException.INVALID_PARAM_ERROR)) -- Gitee From f283c890e042fdc694bc83efa3c4b2bf2b3714d1 Mon Sep 17 00:00:00 2001 From: qianzhengxin Date: Mon, 22 Jul 2024 16:43:10 +0800 Subject: [PATCH 042/106] cleancode --- debug/accuracy_tools/grad_tool/grad_ms/grad_stat_csv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debug/accuracy_tools/grad_tool/grad_ms/grad_stat_csv.py b/debug/accuracy_tools/grad_tool/grad_ms/grad_stat_csv.py index 316033fdc6..11c2fc8205 100644 --- a/debug/accuracy_tools/grad_tool/grad_ms/grad_stat_csv.py +++ b/debug/accuracy_tools/grad_tool/grad_ms/grad_stat_csv.py @@ -67,7 +67,7 @@ class CsvDistribution(CsvItem): def generate_csv_header(csv_input): bounds = csv_input.bounds intervals = [] - for i in range(len(bounds)): + for i, _ in enumerate(bounds): if i == 0: intervals.append(f"(-inf, {bounds[i]}]") else: -- Gitee From 43c3df4bb77e376dcd4e4ea2ad2ea101b99b139a Mon Sep 17 00:00:00 2001 From: l30036321 Date: Thu, 18 Jul 2024 17:16:07 +0800 Subject: [PATCH 043/106] mindspore pynative dump --- .../core/data_dump/data_processor/base.py | 4 +- .../core/data_dump/data_processor/factory.py | 6 +- .../data_processor/mindspore_processor.py | 108 ++ .../data_processor/pytorch_processor.py | 7 +- .../atat/mindspore/common/log.py | 34 + .../atat/mindspore/common/utils.py | 9 + .../mindspore/debugger/debugger_config.py | 8 +- .../mindspore/debugger/precision_debugger.py | 23 +- .../mindspore/dump/hook_cell/api_registry.py | 93 ++ .../mindspore/dump/hook_cell/hook_cell.py | 55 ++ .../dump/hook_cell/support_wrap_ops.yaml | 925 ++++++++++++++++++ .../dump/hook_cell/wrap_functional.py | 90 ++ .../mindspore/dump/hook_cell/wrap_tensor.py | 65 ++ .../accuracy_tools/atat/mindspore/service.py | 132 +++ debug/accuracy_tools/atat/pytorch/service.py | 2 +- 15 files changed, 1547 insertions(+), 14 deletions(-) create mode 100644 debug/accuracy_tools/atat/core/data_dump/data_processor/mindspore_processor.py create mode 100644 debug/accuracy_tools/atat/mindspore/common/log.py create mode 100644 debug/accuracy_tools/atat/mindspore/common/utils.py create mode 100644 debug/accuracy_tools/atat/mindspore/dump/hook_cell/api_registry.py create mode 100644 debug/accuracy_tools/atat/mindspore/dump/hook_cell/hook_cell.py create mode 100644 debug/accuracy_tools/atat/mindspore/dump/hook_cell/support_wrap_ops.yaml create mode 100644 debug/accuracy_tools/atat/mindspore/dump/hook_cell/wrap_functional.py create mode 100644 debug/accuracy_tools/atat/mindspore/dump/hook_cell/wrap_tensor.py create mode 100644 debug/accuracy_tools/atat/mindspore/service.py diff --git a/debug/accuracy_tools/atat/core/data_dump/data_processor/base.py b/debug/accuracy_tools/atat/core/data_dump/data_processor/base.py index 1ee3314b36..a85afb60f9 100644 --- a/debug/accuracy_tools/atat/core/data_dump/data_processor/base.py +++ b/debug/accuracy_tools/atat/core/data_dump/data_processor/base.py @@ -141,9 +141,11 @@ class BaseDataProcessor: resutl_dict[k] = cls.recursive_apply_transform(arg, transform) cls._recursive_key_stack.pop() return resutl_dict - else: + elif args is not None: logger.warning(f"Data type {type(args)} is not supported.") return None + else: + return None def if_return_forward_new_output(self): return self._return_forward_new_output diff --git a/debug/accuracy_tools/atat/core/data_dump/data_processor/factory.py b/debug/accuracy_tools/atat/core/data_dump/data_processor/factory.py index 00f2f72e7a..22529f5606 100644 --- a/debug/accuracy_tools/atat/core/data_dump/data_processor/factory.py +++ b/debug/accuracy_tools/atat/core/data_dump/data_processor/factory.py @@ -51,11 +51,7 @@ class DataProcessorFactory: elif framework == Const.MS_FRAMEWORK: from .mindspore_processor import ( StatisticsDataProcessor as MindsporeStatisticsDataProcessor, - TensorDataProcessor as MindsporeTensorDataProcessor, - OverflowCheckDataProcessor as MindsporeOverflowCheckDataProcessor, - FreeBenchmarkDataProcessor as MindsporeFreeBenchmarkDataProcessor + TensorDataProcessor as MindsporeTensorDataProcessor ) cls.register_processor(Const.MS_FRAMEWORK, Const.STATISTICS, MindsporeStatisticsDataProcessor) cls.register_processor(Const.MS_FRAMEWORK, Const.TENSOR, MindsporeTensorDataProcessor) - cls.register_processor(Const.MS_FRAMEWORK, Const.OVERFLOW_CHECK, MindsporeOverflowCheckDataProcessor) - cls.register_processor(Const.MS_FRAMEWORK, Const.FREE_BENCHMARK, MindsporeFreeBenchmarkDataProcessor) \ No newline at end of file diff --git a/debug/accuracy_tools/atat/core/data_dump/data_processor/mindspore_processor.py b/debug/accuracy_tools/atat/core/data_dump/data_processor/mindspore_processor.py new file mode 100644 index 0000000000..02d95d1d4b --- /dev/null +++ b/debug/accuracy_tools/atat/core/data_dump/data_processor/mindspore_processor.py @@ -0,0 +1,108 @@ +import os +import zlib +import mindspore as ms +import numpy as np + +from atat.core.common.utils import Const +from atat.core.data_dump.data_processor.base import BaseDataProcessor, TensorStatInfo +from atat.core.common.log import logger +from atat.core.common.file_check import path_len_exceeds_limit, change_mode, FileCheckConst +from atat.mindspore.dump.hook_cell.wrap_functional import ops_func, mint_ops_func + + +class MindsporeDataProcessor(BaseDataProcessor): + mindspore_special_type = tuple([ms.Tensor]) + + def __init__(self, config, data_writer): + super().__init__(config, data_writer) + self.mindspore_object_key = { + "dtype": self.analyze_dtype_in_kwargs + } + + @staticmethod + def get_md5_for_tensor(x): + if x.dtype == ms.bfloat16: + x = x.to(ms.float32) + tensor_bytes = x.asnumpy().tobytes() + crc32_hash = zlib.crc32(tensor_bytes) + return f"{crc32_hash:08x}" + + @staticmethod + def analyze_dtype_in_kwargs(element): + single_arg = {} + single_arg.update({"type": "mindspore.dtype"}) + single_arg.update({"value": str(element)}) + return single_arg + + @staticmethod + def get_stat_info(data): + tensor_stat = TensorStatInfo() + if data.numel() == 0: + return tensor_stat + elif data.dtype == ms.bool_: + tensor_stat.max = mint_ops_func["max"](data).item() + tensor_stat.min = mint_ops_func["min"](data).item() + elif not data.shape: + tensor_stat.max = tensor_stat.min = tensor_stat.mean = tensor_stat.norm = data.item() + else: + tensor_stat.max = mint_ops_func["max"](data).item() + tensor_stat.min = mint_ops_func["min"](data).item() + tensor_stat.mean = mint_ops_func["mean"](data).item() + tensor_stat.norm = ops_func["norm"](data).item() + return tensor_stat + + @classmethod + def get_special_types(cls): + return super().get_special_types() + cls.mindspore_special_type + + def _analyze_tensor(self, tensor, suffix): + tensor_stat = self.get_stat_info(tensor) + tensor_json = {} + tensor_json.update({'type': 'mindspore.Tensor'}) + tensor_json.update({'dtype': str(tensor.dtype)}) + tensor_json.update({"shape": tensor.shape}) + tensor_json.update({"Max": tensor_stat.max}) + tensor_json.update({"Min": tensor_stat.min}) + tensor_json.update({"Mean": tensor_stat.mean}) + tensor_json.update({"Norm": tensor_stat.norm}) + if self.config.summary_mode == "md5": + tensor_md5 = self.get_md5_for_tensor(tensor) + tensor_json.update({"md5": tensor_md5}) + return tensor_json + + def analyze_single_element(self, element, suffix_stack): + if suffix_stack and suffix_stack[-1] in self.mindspore_object_key: + return self.mindspore_object_key[suffix_stack[-1]](element) + + converted_numpy, numpy_type = self._convert_numpy_to_builtin(element) + if converted_numpy is not element: + return self._analyze_numpy(converted_numpy, numpy_type) + if isinstance(element, ms.Tensor): + return self._analyze_tensor(element, Const.SEP.join(suffix_stack)) + + if isinstance(element, (bool, int, float, str, slice)): + return self._analyze_builtin(element) + return None + + def analyze_element(self, element): + return self.recursive_apply_transform(element, self.analyze_single_element) + + +class StatisticsDataProcessor(MindsporeDataProcessor): + pass + + +class TensorDataProcessor(MindsporeDataProcessor): + def _analyze_tensor(self, tensor, suffix): + dump_data_name, file_path = self.get_save_file_path(suffix) + single_arg = super()._analyze_tensor(tensor, suffix) + single_arg.update({"data_name": dump_data_name}) + if not path_len_exceeds_limit(file_path): + if tensor.dtype == ms.bfloat16: + tensor = tensor.to(ms.float32) + saved_tensor = tensor.asnumpy() + np.save(file_path, saved_tensor) + change_mode(file_path, FileCheckConst.DATA_FILE_AUTHORITY) + else: + logger.warning(f'The file path {file_path} length exceeds limit.') + return single_arg diff --git a/debug/accuracy_tools/atat/core/data_dump/data_processor/pytorch_processor.py b/debug/accuracy_tools/atat/core/data_dump/data_processor/pytorch_processor.py index 9f96635e9a..66e16b580f 100644 --- a/debug/accuracy_tools/atat/core/data_dump/data_processor/pytorch_processor.py +++ b/debug/accuracy_tools/atat/core/data_dump/data_processor/pytorch_processor.py @@ -77,6 +77,10 @@ class PytorchDataProcessor(BaseDataProcessor): tensor_stat.norm = torch._C._VariableFunctionsClass.norm(data_clone).item() return tensor_stat + @staticmethod + def _analyze_torch_size(arg): + return {"type": "torch.Size", "value": list(arg)} + @classmethod def get_special_types(cls): return super().get_special_types() + cls.pytorch_special_type @@ -98,9 +102,6 @@ class PytorchDataProcessor(BaseDataProcessor): def analyze_element(self, element): return self.recursive_apply_transform(element, self.analyze_single_element) - def _analyze_torch_size(arg): - return {"type": "torch.Size", "value": list(arg)} - def _analyze_tensor(self, tensor, suffix): tensor_stat = self.get_stat_info(tensor) tensor_json = {} diff --git a/debug/accuracy_tools/atat/mindspore/common/log.py b/debug/accuracy_tools/atat/mindspore/common/log.py new file mode 100644 index 0000000000..0bcd1e5b84 --- /dev/null +++ b/debug/accuracy_tools/atat/mindspore/common/log.py @@ -0,0 +1,34 @@ +import os +import time +import sys + +from atat.mindspore.common.utils import get_rank_if_initialized +from atat.core.common.log import BaseLogger +from atat.core.common.exceptions import DistributedNotInitializedError + + +class MindsporeLogger(BaseLogger): + def __init__(self): + super().__init__() + + def get_rank(self): + try: + current_rank = get_rank_if_initialized() + except DistributedNotInitializedError: + current_rank = None + + return current_rank + + def _print_log(self, level, msg, end='\n'): + current_rank = self.get_rank() + current_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + pid = os.getpid() + if current_rank is not None: + full_msg = f"{current_time} ({pid}) [rank {current_rank}] [{level}] {msg}" + else: + full_msg = f"{current_time} ({pid}) [{level}] {msg}" + print(full_msg, end=end) + sys.stdout.flush() + + +logger = MindsporeLogger() \ No newline at end of file diff --git a/debug/accuracy_tools/atat/mindspore/common/utils.py b/debug/accuracy_tools/atat/mindspore/common/utils.py new file mode 100644 index 0000000000..c326a2abf8 --- /dev/null +++ b/debug/accuracy_tools/atat/mindspore/common/utils.py @@ -0,0 +1,9 @@ +import mindspore +from atat.core.common.exceptions import DistributedNotInitializedError + + +def get_rank_if_initialized(): + if mindspore.communication.GlobalComm.INITED: + return mindspore.communication.get_rank() + else: + raise DistributedNotInitializedError("mindspore distributed environment is not initialized") diff --git a/debug/accuracy_tools/atat/mindspore/debugger/debugger_config.py b/debug/accuracy_tools/atat/mindspore/debugger/debugger_config.py index 56a4b9bf75..fed9d0a1cf 100644 --- a/debug/accuracy_tools/atat/mindspore/debugger/debugger_config.py +++ b/debug/accuracy_tools/atat/mindspore/debugger/debugger_config.py @@ -1,4 +1,5 @@ import os +from atat.core.common.utils import Const class DebuggerConfig: @@ -16,17 +17,20 @@ class DebuggerConfig: if not common_config.level: common_config.level = "L1" self.level = DebuggerConfig.convert_map[common_config.level] + self.level_ori = common_config.level self.list = [] if not task_config.list else task_config.list + self.scope =[] if not task_config.scope else task_config.scope self.data_mode = [] if not task_config.data_mode else task_config.data_mode self.file_format = task_config.file_format self.check_mode = task_config.check_mode - + self.framework = Const.MS_FRAMEWORK + self.summary_mode = task_config.summary_mode self.check() def check(self): if not self.dump_path: raise Exception("Dump path is empty.") - if not os.path.isabs(self.dump_path): + if self.level_ori != "L1" and not os.path.isabs(self.dump_path): raise Exception("Dump path must be absolute path.") if not self.task: self.task = "statistics" diff --git a/debug/accuracy_tools/atat/mindspore/debugger/precision_debugger.py b/debug/accuracy_tools/atat/mindspore/debugger/precision_debugger.py index 0099074762..31b6f4f945 100644 --- a/debug/accuracy_tools/atat/mindspore/debugger/precision_debugger.py +++ b/debug/accuracy_tools/atat/mindspore/debugger/precision_debugger.py @@ -1,7 +1,9 @@ import os +import mindspore as ms from atat.mindspore.ms_config import parse_json_config from atat.mindspore.debugger.debugger_config import DebuggerConfig from atat.mindspore.task_handler_factory import TaskHandlerFactory +from atat.mindspore.service import Service class PrecisionDebugger: @@ -22,11 +24,28 @@ class PrecisionDebugger: common_config, task_config = parse_json_config(config_path) self.config = DebuggerConfig(common_config, task_config) self.initialized = True + self.service = Service(self.config) @classmethod def start(cls, target=None): instance = cls._instance if not instance: raise Exception("No instance of PrecisionDebugger found.") - handler = TaskHandlerFactory.create(instance.config) - handler.handle() + if ms.get_context("mode") == 1 and instance.config.level_ori == "L1": + instance.service.start(target) + else: + handler = TaskHandlerFactory.create(instance.config) + handler.handle() + + @classmethod + def stop(cls): + instance = cls._instance + if not instance: + raise Exception("PrecisionDebugger instance is not created.") + instance.service.stop() + + @classmethod + def step(cls): + if not cls._instance: + raise Exception("PrecisionDebugger instance is not created.") + cls._instance.service.step() \ No newline at end of file diff --git a/debug/accuracy_tools/atat/mindspore/dump/hook_cell/api_registry.py b/debug/accuracy_tools/atat/mindspore/dump/hook_cell/api_registry.py new file mode 100644 index 0000000000..71c51d60ff --- /dev/null +++ b/debug/accuracy_tools/atat/mindspore/dump/hook_cell/api_registry.py @@ -0,0 +1,93 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +# Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" + +import mindspore as ms +from atat.mindspore.dump.hook_cell import wrap_tensor, wrap_functional +from atat.mindspore.dump.hook_cell.wrap_functional import get_functional_ops +from atat.mindspore.dump.hook_cell.wrap_tensor import get_tensor_ops +from atat.core.common.utils import Const + + +class ApiRegistry: + def __init__(self): + self.tensor_ori_attr = {} + self.functional_ori_attr = {} + self.mint_ops_ori_attr = {} + self.mint_func_ops_ori_attr = {} + + self.tensor_hook_attr = {} + self.functional_hook_attr = {} + self.mint_ops_hook_attr = {} + self.mint_func_ops_hook_attr = {} + + @staticmethod + def store_ori_attr(ori_api_group, api_list, api_ori_attr): + for api in api_list: + if Const.SEP in api: + sub_module_name, sub_op = api.rsplit(Const.SEP, 1) + sub_module = getattr(ori_api_group, sub_module_name) + api_ori_attr[api] = getattr(sub_module, sub_op) + else: + api_ori_attr[api] = getattr(ori_api_group, api) + + @staticmethod + def set_api_attr(api_group, attr_dict): + for api, api_attr in attr_dict.items(): + if Const.SEP in api: + sub_module_name, sub_op = api.rsplit(Const.SEP, 1) + sub_module = getattr(api_group, sub_module_name, None) + if sub_module is not None: + setattr(sub_module, sub_op, api_attr) + else: + setattr(api_group, api, api_attr) + + def api_modularity(self): + self.set_api_attr(ms.Tensor, self.tensor_hook_attr) + self.set_api_attr(ms.ops, self.functional_hook_attr) + self.set_api_attr(ms.mint, self.mint_ops_hook_attr) + self.set_api_attr(ms.mint.nn.functional, self.mint_func_ops_hook_attr) + + def api_originality(self): + self.set_api_attr(ms.Tensor, self.tensor_ori_attr) + self.set_api_attr(ms.ops, self.functional_ori_attr) + self.set_api_attr(ms.mint, self.mint_ops_ori_attr) + self.set_api_attr(ms.mint.nn.functional, self.mint_func_ops_ori_attr) + + def initialize_hook(self, hook): + self.store_ori_attr(ms.Tensor, get_tensor_ops(), self.tensor_ori_attr) + wrap_tensor.wrap_tensor_ops_and_bind(hook) + for attr_name in dir(wrap_tensor.HOOKTensor): + if attr_name.startswith("wrap_"): + self.tensor_hook_attr[attr_name[5:]] = getattr(wrap_tensor.HOOKTensor, attr_name) + + functional_ops, mint_ops, mint_func_ops = get_functional_ops() + self.store_ori_attr(ms.ops, functional_ops, self.functional_ori_attr) + self.store_ori_attr(ms.mint, mint_ops, self.mint_ops_ori_attr) + self.store_ori_attr(ms.mint.nn.functional, mint_func_ops, self.mint_func_ops_ori_attr) + wrap_functional.setup_hooks(hook) + for attr_name in dir(wrap_functional.HOOKFunctionalOP): + if attr_name.startswith("wrap_"): + self.functional_hook_attr[attr_name[5:]] = getattr(wrap_functional.HOOKFunctionalOP, attr_name) + for attr_name in dir(wrap_functional.HOOKMintOP): + if attr_name.startswith("wrap_"): + self.mint_ops_hook_attr[attr_name[5:]] = getattr(wrap_functional.HOOKMintOP, attr_name) + for attr_name in dir(wrap_functional.HOOKMintNNFunctionalOP): + if attr_name.startswith("wrap_"): + self.mint_func_ops_hook_attr[attr_name[5:]] = getattr(wrap_functional.HOOKMintNNFunctionalOP, attr_name) + + +api_register = ApiRegistry() diff --git a/debug/accuracy_tools/atat/mindspore/dump/hook_cell/hook_cell.py b/debug/accuracy_tools/atat/mindspore/dump/hook_cell/hook_cell.py new file mode 100644 index 0000000000..3d72b352f4 --- /dev/null +++ b/debug/accuracy_tools/atat/mindspore/dump/hook_cell/hook_cell.py @@ -0,0 +1,55 @@ +# Copyright 2024 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +from collections import defaultdict + +from mindspore import nn + +cell_count = defaultdict(int) +g_stop_hook = False + + +class HOOKCell(nn.Cell): + + def __init__(self, build_hook) -> None: + super(HOOKCell, self).__init__() + self.changed_status = False + self.input_kwargs = {} + self.prefix = "" + global g_stop_hook + if not g_stop_hook: + g_stop_hook = True + self.changed_status = True + if hasattr(self, "prefix_op_name_"): + self.prefix = self.prefix_op_name_ + + cell_count[self.prefix] += 1 + self.prefix = self.prefix + str(cell_count[self.prefix] - 1) + '.' + forward_hook, backward_hook = build_hook(self.prefix) + self.register_forward_hook(forward_hook) + self.register_backward_hook(backward_hook) + + # 重载call,加全局标志。 + def __call__(self, *args, **kwargs): + try: + self.input_kwargs = kwargs + out = super(HOOKCell, self).__call__(*args, **kwargs) + except Exception as e: + raise e + finally: + if self.changed_status: + self.changed_status = False + global g_stop_hook + g_stop_hook = False + return out diff --git a/debug/accuracy_tools/atat/mindspore/dump/hook_cell/support_wrap_ops.yaml b/debug/accuracy_tools/atat/mindspore/dump/hook_cell/support_wrap_ops.yaml new file mode 100644 index 0000000000..e4cc5cf322 --- /dev/null +++ b/debug/accuracy_tools/atat/mindspore/dump/hook_cell/support_wrap_ops.yaml @@ -0,0 +1,925 @@ +# Copyright 2023 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +# List of ops that register hooks + + +ops: + - adaptive_avg_pool1d + - adaptive_avg_pool2d + - adaptive_avg_pool3d + - adaptive_max_pool1d + - adaptive_max_pool2d + - avg_pool1d + - avg_pool2d + - avg_pool3d + - batch_norm + - bias_add + - ctc_greedy_decoder + - conv1d + - conv2d + - conv3d + - deformable_conv2d + - dense + - dropout + - dropout1d + - dropout2d + - dropout3d + - flatten + - fold + - fractional_max_pool3d + - lp_pool1d + - lp_pool2d + - lrn + - max_pool2d + - max_pool3d + - max_unpool1d + - max_unpool2d + - max_unpool3d + - unfold + - binary_cross_entropy + - binary_cross_entropy_with_logits + - cosine_embedding_loss + - cross_entropy + - ctc_loss + - gaussian_nll_loss + - hinge_embedding_loss + - huber_loss + - kl_div + - l1_loss + - margin_ranking_loss + - mse_loss + - multi_margin_loss + - multilabel_margin_loss + - multilabel_soft_margin_loss + - nll_loss + - smooth_l1_loss + - triplet_margin_loss + - elu + - fast_gelu + - gelu + - glu + - gumbel_softmax + - hardshrink + - hardsigmoid + - hardswish + - hardtanh + - leaky_relu + - log_softmax + - logsigmoid + - mish + - prelu + - relu + - relu6 + - celu + - rrelu + - selu + - sigmoid + - silu + - softmax + - softmin + - softshrink + - softsign + - tanh + - threshold + - cdist + - dist + - pdist + - choice_with_mask + - random_categorical + - log_uniform_candidate_sampler + - uniform_candidate_sampler + - affine_grid + - bounding_box_decode + - bounding_box_encode + - col2im + - check_valid + - crop_and_resize + - grid_sample + - interpolate + - iou + - pad + - padding + - pixel_shuffle + - pixel_unshuffle + - upsample + - abs + - absolute + - accumulate_n + - acos + - arccos + - acosh + - add + - addcdiv + - addcmul + - addmv + - addn + - angle + - arccosh + - arcsin + - arcsinh + - arctan + - arctanh + - arctan2 + - asin + - asinh + - atan + - atan2 + - atanh + - atleast_1d + - atleast_2d + - atleast_3d + - bessel_i0 + - bessel_i0e + - bessel_i1 + - bessel_i1e + - bessel_j0 + - bessel_j1 + - bessel_k0 + - bessel_k0e + - bessel_k1 + - bessel_k1e + - bessel_y0 + - bessel_y1 + - bitwise_and + - bitwise_left_shift + - bitwise_or + - bitwise_right_shift + - bitwise_xor + - ceil + - clamp + - clip + - combinations + - copysign + - cos + - cosh + - cosine_similarity + - cov + - diag_embed + - diff + - deg2rad + - digamma + - div + - divide + - erf + - erfc + - erfinv + - exp + - exp2 + - expm1 + - floor + - floor_div + - floor_mod + - float_power + - fmod + - frac + - gcd + - hypot + - igamma + - igammac + - imag + - i0 + - inv + - invert + - lcm + - ldexp + - lerp + - log + - log2 + - log10 + - log1p + - logaddexp + - logaddexp2 + - logical_and + - logical_not + - logical_or + - logical_xor + - logit + - mul + - multiply + - mvlgamma + - neg + - negative + - nextafter + - polar + - polygamma + - positive + - pow + - rad2deg + - ravel + - real + - reciprocal + - remainder + - rot90 + - round + - rsqrt + - sgn + - sign + - signbit + - sin + - sinc + - sinh + - sqrt + - square + - sub + - subtract + - t + - tan + - tanhshrink + - trapz + - tril_indices + - triu_indices + - true_divide + - trunc + - truncate_div + - truncate_mod + - xdivy + - xlogy + - zeta + - all + - amax + - amin + - aminmax + - any + - argmax + - argmin + - cummax + - cummin + - cumprod + - cumsum + - fmax + - histc + - logsumexp + - max + - mean + - median + - min + - norm + - prod + - std + - std_mean + - var + - var_mean + - argsort + - approximate_equal + - equal + - ge + - greater + - greater_equal + - gt + - intopk + - isclose + - isfinite + - isinf + - isnan + - isneginf + - isposinf + - isreal + - is_complex + - le + - less + - less_equal + - lt + - maximum + - minimum + - msort + - ne + - not_equal + - searchsorted + - topk + - bmm + - addbmm + - addmm + - baddbmm + - addr + - adjoint + - cholesky + - cholesky_solve + - batch_dot + - dot + - eig + - inner + - inverse + - geqrf + - ger + - kron + - lu_solve + - lu_unpack + - matmul + - matrix_solve + - matrix_band_part + - matrix_diag + - matrix_diag_part + - matrix_set_diag + - mm + - mv + - outer + - orgqr + - ormqr + - pinv + - svd + - tensor_dot + - logdet + - slogdet + - qr + - trace + - bartlett_window + - blackman_window + - hamming_window + - hann_window + - kaiser_window + - eye + - fill + - full + - full_like + - linspace + - logspace + - one_hot + - arange + - range + - heaviside + - bernoulli + - gamma + - laplace + - multinomial + - multinomial_with_replacement + - rand + - rand_like + - randint + - randint_like + - randn + - randn_like + - random_gamma + - random_poisson + - randperm + - standard_laplace + - standard_normal + - uniform + - argwhere + - batch_to_space_nd + - bincount + - block_diag + - broadcast_to + - cat + - channel_shuffle + - chunk + - column_stack + - concat + - conj + - count_nonzero + - deepcopy + - diag + - diagflat + - diagonal + - dyn_shape + - dsplit + - dstack + - einsum + - expand + - expand_dims + - flip + - fliplr + - flipud + - gather_d + - gather_elements + - gather_nd + - hsplit + - hstack + - index_add + - index_fill + - index_select + - inplace_add + - inplace_index_add + - inplace_sub + - inplace_update + - masked_fill + - masked_select + - meshgrid + - moveaxis + - movedim + - narrow + - nan_to_num + - nansum + - normal + - nonzero + - population_count + - rank + - repeat_elements + - repeat_interleave + - reshape + - reverse + - reverse_sequence + - roll + - scatter + - scatter_nd + - select + - sequence_mask + - shuffle + - size + - slice + - sort + - space_to_batch_nd + - sparse_segment_mean + - split + - squeeze + - stack + - strided_slice + - sum + - swapaxes + - swapdims + - tensor_scatter_add + - tensor_scatter_div + - tensor_scatter_max + - tensor_scatter_min + - tensor_scatter_mul + - tensor_scatter_sub + - tensor_scatter_elements + - tensor_split + - tile + - tril + - triu + - transpose + - unbind + - unique + - unique_consecutive + - unique_with_pad + - unsorted_segment_max + - unsorted_segment_min + - unsorted_segment_prod + - unsorted_segment_sum + - unsqueeze + - unstack + - view_as_real + - vsplit + - vstack + - where + - cross + - renorm + - is_tensor + - scalar_cast + - scalar_to_tensor + - tuple_to_array + - clip_by_global_norm + - clip_by_value + - assign + - assign_add + - assign_sub + - scatter_add + - scatter_div + - scatter_max + - scatter_min + - scatter_mul + - scatter_nd_add + - scatter_nd_div + - scatter_nd_max + - scatter_nd_min + - scatter_nd_mul + - scatter_nd_sub + - scatter_update + - derivative + - jet + +tensor: + - __abs__ + - __add__ + - __and__ + - __bool__ + - __eq__ + - __ge__ + - __gt__ + - __iadd__ + - __ifloordiv__ + - __imatmul__ + - __imod__ + - __imul__ + - __isub__ + - __le__ + - __lt__ + - __matmul__ + - __mod__ + - __mul__ + - __ne__ + - __neg__ + - __or__ + - __pow__ + - __radd__ + - __rmatmul__ + - __rmod__ + - __rmul__ + - __rpow__ + - __rsub__ + - __sub__ + - __truediv__ + - __xor__ + - abs + - absolute + - acos + - acosh + - add + - addbmm + - addcdiv + - addcmul + - addmm + - addmv + - addr + - all + - amax + - amin + - any + - arccos + - arccosh + - argmax + - angle + - arcsin + - arcsinh + - arctan + - arctanh + - argmin + - argsort + - asin + - asinh + - atan + - atan2 + - atanh + - baddbmm + - bernoulli + - bincount + - bitwise_and + - bitwise_or + - bitwise_xor + - bmm + - bool + - broadcast_to + - ceil + - cholesky_solve + - cholesky + - clamp + - clip + - conj + - copysign + - cos + - cosh + - cross + - cummax + - cummin + - cumprod + - cumsum + - deg2rad + - diag + - diagflat + - diff + - digamma + - div + - divide + - equal + - erf + - erfc + - erfinv + - exp + - expand_as + - expm1 + - flip + - fliplr + - flipud + - float_power + - floor + - fmod + - frac + - gather_elements + - ge + - geqrf + - ger + - greater + - greater_equal + - gt + - half + - hardshrink + - heaviside + - histc + - hypot + - i0 + - igamma + - igammac + - imag + - index_add + - index_fill + - index_put + - index_select + - inner + - int + - inverse + - isclose + - isfinite + - isinf + - isnan + - is_complex + - is_signed + - isneginf + - isposinf + - isreal + - lcm + - ldexp + - le + - lerp + - less + - less_equal + - log + - log10 + - log1p + - log2 + - logaddexp + - logaddexp2 + - logdet + - logical_and + - logical_not + - logical_or + - logical_xor + - logit + - logsumexp + - long + - lt + - masked_fill + - masked_scatter + - masked_select + - matmul + - max + - maximum + - mean + - median + - min + - minimum + - moveaxis + - movedim + - msort + - multinomial + - multiply + - mvlgamma + - nan_to_num + - nansum + - narrow + - ne + - neg + - negative + - nelement + - new_ones + - new_zeros + - nextafter + - norm + - nonzero + - not_equal + - ormqr + - permute + - pow + - prod + - qr + - ravel + - real + - reciprocal + - remainder + - renorm + - rad2deg + - tile + - repeat_interleave + - reshape + - reshape + - round + - rot90 + - rsqrt + - sum_to_size + - scatter + - sgn + - short + - sigmoid + - sign + - signbit + - sin + - sinc + - sinh + - slogdet + - sort + - split + - sqrt + - square + - squeeze + - std + - subtract + - subtract + - svd + - swapaxes + - swapdims + - t + - take + - tan + - tanh + - trace + - swapaxes + - tile + - to + - topk + - tril + - tensor_split + - transpose + - true_divide + - trunc + - unbind + - unique_consecutive + - unsqueeze + - var + - view + - where + - xlogy + - from_numpy + - std + - take + - var + - all + - any + - copy + - diagonal + - flatten + - resize + - sum + +mint.ops: + - abs + - absolute_import + - add + - add_ex + - all + - any + - any_ex + - arange + - argmax + - avg_pool2d + - baddbmm + - baddbmm_ex + - batch_norm + - binary_cross_entropy_with_logits + - bitwise_and + - bitwise_or + - bitwise_xor + - bmm + - broadcast_to + - cat + - cat_ex + - ceil + - chunk + - clamp + - conv2d + - conv_transpose2d + - cos + - cross + - cummax + - cummin + - cumsum + - div + - divide + - dropout + - embedding + - eq + - erf + - erfinv + - exp + - flatten + - flip + - flip_ex + - fold + - full + - functional + - gather + - gelu + - greater + - grid_sample + - group_norm + - gt + - index_select + - interpolate + - isclose + - isfinite + - layer_norm + - le + - leaky_relu + - less + - less_equal + - linear + - linspace + - log + - logical_and + - logical_not + - logical_or + - lt + - masked_select + - matmul + - max + - max_pool2d + - maximum + - mean + - mean_ex + - min + - minimum + - mul + - ne + - neg + - negative + - nn + - nonzero + - normal + - one_hot + - ones + - ones_ex + - ones_like + - pad + - permute + - permute_ex + - pow + - prod + - reciprocal + - relu + - remainder + - repeat_interleave + - rsqrt + - scatter + - scatter_add + - searchsorted + - sigmoid + - silu + - sin + - softmax + - softplus + - sort + - split + - sqrt + - sqrt_ex + - square + - stack + - sub + - sub_ex + - sum + - tanh + - tile + - topk + - tril + - triu + - unfold + - unique + - where + - xlogy + - zeros + - zeros_ex + - zeros_like + +mint.nn: + - Dropout + - Embedding + - Fold + - LayerNorm + - Linear + - MaxPool2d + - Unfold + - Upsample + +mint.nn.functional: + - absolute_import + - avg_pool2d + - batch_norm + - batch_norm_ex + - bce_with_logits + - binary_cross_entropy_with_logits + - conv_transpose2d + - dense + - dropout + - embedding + - fold + - gelu + - grid_sample + - group_norm + - interpolate + - layer_norm + - leaky_relu + - linear + - max_pool2d + - max_pool2d_ex + - normal + - one_hot + - one_hot_ext + - pad + - relu + - sigmoid + - silu + - softmax + - softmax_ex + - softplus + - tanh + - unfold diff --git a/debug/accuracy_tools/atat/mindspore/dump/hook_cell/wrap_functional.py b/debug/accuracy_tools/atat/mindspore/dump/hook_cell/wrap_functional.py new file mode 100644 index 0000000000..4eef7deef5 --- /dev/null +++ b/debug/accuracy_tools/atat/mindspore/dump/hook_cell/wrap_functional.py @@ -0,0 +1,90 @@ +# Copyright 2023 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import os +import yaml +import mindspore as ms +from atat.mindspore.dump.hook_cell.hook_cell import HOOKCell +from atat.core.common.utils import Const +from atat.core.common.file_check import FileOpen + + +cur_path = os.path.dirname(os.path.realpath(__file__)) +yaml_path = os.path.join(cur_path, "support_wrap_ops.yaml") + +ops_func = {f: getattr(ms.ops, f) for f in dir(ms.ops)} +mint_ops_func = {f: getattr(ms.mint, f) for f in dir(ms.mint)} +mint_func_ops_func = {f: getattr(ms.mint.nn.functional, f) for f in dir(ms.mint.nn.functional)} + + +def get_functional_ops(): + global ops_func, mint_ops_func, mint_func_ops_func + with FileOpen(yaml_path, 'r') as f: + config = yaml.safe_load(f) + WrapFunctionalOps = config.get("ops") + WrapMintOps = config.get("mint.ops") + WrapMintFunctionalOps = config.get("mint.nn.functional") + return ( + set(WrapFunctionalOps) & set(ops_func.keys()), + set(WrapMintOps) & set(mint_ops_func.keys()), + set(WrapMintFunctionalOps) & set(mint_func_ops_func.keys()) + ) + + +class HOOKFunctionalOP(object): + pass + + +class HOOKMintOP(object): + pass + + +class HOOKMintNNFunctionalOP(object): + pass + + +class FunctionalOPTemplate(HOOKCell): + def __init__(self, op_name, op_dict, prefix, hook): + self.op_name = op_name + self.op_func = op_dict[op_name] + self.prefix_op_name_ = prefix + str(op_name.split(Const.SEP)[-1]) + Const.SEP + super().__init__(hook) + + def construct(self, *args, **kwargs): + if self.op_name.startswith('dropout'): + return args[0] if args else kwargs.get('input') + return self.op_func(*args, **kwargs) + +def wrap_functional_op(op_name, op_dict, prefix, hook): + def op_template(*args, **kwargs): + return FunctionalOPTemplate(op_name, op_dict, prefix, hook)(*args, **kwargs) + return op_template + + +def wrap_functional_ops_and_bind(ops, op_dict, prefix, hook, hook_class): + for op_name in ops: + if callable(op_dict[op_name]): + setattr(hook_class, "wrap_" + op_name, wrap_functional_op(op_name, op_dict, prefix, hook)) + + +def setup_hooks(hook): + functional_ops, mint_ops, mint_func_ops = get_functional_ops() + wrap_functional_ops_and_bind( + functional_ops, {f: getattr(ms.ops, f) for f in dir(ms.ops)}, "Functional.", hook, HOOKFunctionalOP) + wrap_functional_ops_and_bind( + mint_ops, {f: getattr(ms.mint, f) for f in dir(ms.mint)}, "Mint.", hook, HOOKMintOP) + wrap_functional_ops_and_bind( + mint_func_ops, {f: getattr(ms.mint.nn.functional, f) for f in dir(ms.mint.nn.functional)}, "MintNNFunctional.", hook, HOOKMintNNFunctionalOP) + diff --git a/debug/accuracy_tools/atat/mindspore/dump/hook_cell/wrap_tensor.py b/debug/accuracy_tools/atat/mindspore/dump/hook_cell/wrap_tensor.py new file mode 100644 index 0000000000..379d210a47 --- /dev/null +++ b/debug/accuracy_tools/atat/mindspore/dump/hook_cell/wrap_tensor.py @@ -0,0 +1,65 @@ +# Copyright 2024 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import os +import yaml +import mindspore as ms + +from atat.mindspore.dump.hook_cell.hook_cell import HOOKCell +from atat.core.common.utils import Const + +cur_path = os.path.dirname(os.path.realpath(__file__)) +yaml_path = os.path.join(cur_path, "support_wrap_ops.yaml") +with open(yaml_path, 'r') as f: + WrapTensorOps = yaml.safe_load(f).get('tensor') + +TensorFunc = {} +for f in dir(ms.Tensor): + TensorFunc[f] = getattr(ms.Tensor, f) + + +def get_tensor_ops(): + global WrapTensorOps + _tensor_ops = dir(ms.Tensor) + return set(WrapTensorOps) & set(_tensor_ops) + + +class HOOKTensor(object): + pass + + +class TensorOPTemplate(HOOKCell): + + def __init__(self, op_name, hook): + self.op_name_ = op_name + self.prefix_op_name_ = "Tensor." + str(op_name) + Const.SEP + super().__init__(hook) + + def construct(self, *args, **kwargs): + return TensorFunc[str(self.op_name_)](*args, **kwargs) + + +def wrap_tensor_op(op_name, hook): + def tensor_op_template(*args, **kwargs): + return TensorOPTemplate(op_name, hook)(*args, **kwargs) + + return tensor_op_template + + +def wrap_tensor_ops_and_bind(hook): + _tensor_ops = get_tensor_ops() + for op_name in _tensor_ops: + if callable(TensorFunc[op_name]): + setattr(HOOKTensor, "wrap_" + str(op_name), wrap_tensor_op(op_name, hook)) diff --git a/debug/accuracy_tools/atat/mindspore/service.py b/debug/accuracy_tools/atat/mindspore/service.py new file mode 100644 index 0000000000..6e9db85266 --- /dev/null +++ b/debug/accuracy_tools/atat/mindspore/service.py @@ -0,0 +1,132 @@ +import os +from pathlib import Path +import functools +import mindspore + +from atat.core.data_dump.data_collector import build_data_collector +from atat.core.data_dump.scope import BaseScope +from atat.mindspore.common.utils import get_rank_if_initialized +from atat.core.common.file_check import FileChecker, FileCheckConst, check_path_before_create +from atat.mindspore.common.log import logger +from atat.core.common.utils import Const +from atat.core.common.exceptions import DistributedNotInitializedError +from atat.mindspore.dump.hook_cell.api_registry import api_register +from atat.core.data_dump.data_processor.base import ModuleForwardInputsOutputs, ModuleBackwardInputsOutputs + + +class Service: + def __init__(self, config): + self.model = None + self.config = config + self.config.level = self.config.level_ori + self.data_collector = build_data_collector(config) + self.switch = False + self.current_iter = 0 + self.first_start = True + self.current_rank = None + self.dump_iter_dir = None + + def build_hook(self, module_type, name): + def forward_hook(api_or_module_name, module, input, output): + self.data_collector.visit_and_clear_overflow_status(api_or_module_name) + if not self.switch: + return + if self.data_collector: + module_input_output = ModuleForwardInputsOutputs(args=input, kwargs=module.input_kwargs, output=output) + self.data_collector.forward_data_collect(api_or_module_name, module, pid, module_input_output) + if self.data_collector.if_return_forward_new_output(): + return self.data_collector.get_forward_new_output() + return output + + def backward_hook(api_or_module_name, module, grad_input, grad_output): + self.data_collector.visit_and_clear_overflow_status(api_or_module_name) + if not self.switch: + return + if self.data_collector: + module_input_output = ModuleBackwardInputsOutputs(grad_input=grad_input, grad_output=grad_output) + self.data_collector.backward_data_collect(api_or_module_name, module, pid, module_input_output) + + pid = os.getpid() + forward_name_template = name + Const.FORWARD + backward_name_template = name + Const.BACKWARD + forward_hook = functools.partial(forward_hook, forward_name_template) + backward_hook = functools.partial(backward_hook, backward_name_template) + + def wrap_forward_hook(*args, **kwargs): + return forward_hook(*args, **kwargs) + + def wrap_backward_hook(*args, **kwargs): + return backward_hook(*args, **kwargs) + + return wrap_forward_hook, wrap_backward_hook + + def step(self): + self.current_iter += 1 + self.data_collector.update_iter(self.current_iter) + + def start(self, model): + self.model = model + if self.config.step and self.current_iter > max(self.config.step): + self.stop() + raise Exception("atat: exit after iteration {}".format(max(self.config.step))) + if self.config.step and self.current_iter not in self.config.step: + return + if self.first_start: + try: + self.current_rank = get_rank_if_initialized() + except DistributedNotInitializedError: + self.current_rank = None + + if self.config.rank and self.current_rank not in self.config.rank: + return + self.register_hook_new() + self.first_start = False + self.switch = True + logger.info_on_rank_0(f"Dump switch is turned on at step {self.current_iter}. ") + if self.config.level != "L2": + self.create_dirs() + logger.info_on_rank_0(f"Dump data will be saved in {self.dump_iter_dir}.") + + def stop(self): + if self.config.level == "L2": + return + if self.config.step and self.current_iter not in self.config.step: + return + if self.config.rank and self.current_rank not in self.config.rank: + return + self.switch = False + self.data_collector.write_json() + + def create_dirs(self): + check_path_before_create(self.config.dump_path) + if not os.path.exists(self.config.dump_path): + Path(self.config.dump_path).mkdir(mode=0o750, exist_ok=True) + file_check = FileChecker(self.config.dump_path, FileCheckConst.DIR) + file_check.common_check() + self.dump_iter_dir = os.path.join(self.config.dump_path, f"step{self.current_iter}") + cur_rank = self.current_rank if self.current_rank is not None else '' + dump_dir = os.path.join(self.dump_iter_dir, f"rank{cur_rank}") + if not os.path.exists(dump_dir): + Path(dump_dir).mkdir(mode=0o750, parents=True, exist_ok=True) + if self.config.task in self.data_collector.tasks_need_tensor_data: + dump_data_dir = os.path.join(dump_dir, "dump_tensor_data") + Path(dump_data_dir).mkdir(mode=0o750, exist_ok=True) + else: + dump_data_dir = None + + dump_file_path = os.path.join(dump_dir, "dump.json") + stack_file_path = os.path.join(dump_dir, "stack.json") + construct_file_path = os.path.join(dump_dir, "construct.json") + free_benchmark_file_path = os.path.join(self.config.dump_path, "free_benchmark.csv") + self.data_collector.update_dump_paths( + dump_file_path, stack_file_path, construct_file_path, dump_data_dir, free_benchmark_file_path) + + + + def register_hook_new(self): + logger.info_on_rank_0("The {} hook function is successfully mounted to the model.".format(self.config.task)) + if self.config.level == "L1": + api_register.initialize_hook(functools.partial(self.build_hook, BaseScope.Module_Type_API)) + api_register.api_modularity() + + diff --git a/debug/accuracy_tools/atat/pytorch/service.py b/debug/accuracy_tools/atat/pytorch/service.py index cd80d0852a..9b9d7da74a 100644 --- a/debug/accuracy_tools/atat/pytorch/service.py +++ b/debug/accuracy_tools/atat/pytorch/service.py @@ -164,4 +164,4 @@ class Service: api_register.api_modularity() if Const.STATISTICS == self.config.task or Const.TENSOR == self.config.task: - remove_dropout() + remove_dropout() \ No newline at end of file -- Gitee From 49e06ffb1fd4325a30bc1c34415fe1e91c8fd2a4 Mon Sep 17 00:00:00 2001 From: gitee Date: Mon, 22 Jul 2024 19:44:39 +0800 Subject: [PATCH 044/106] fix overflow_check --- .../run_ut/run_overflow_check.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py index e38b4e6b24..1051315153 100644 --- a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py +++ b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py @@ -14,6 +14,9 @@ from msprobe.pytorch.api_accuracy_checker.run_ut.run_ut import exec_api, generat from msprobe.pytorch.api_accuracy_checker.common.utils import get_json_contents from msprobe.core.common.file_check import check_link from msprobe.pytorch.common.log import logger +from msprobe.pytorch.common.parse_json import parse_json_info_forward_backward +from msprobe.core.common.const import Const + def check_tensor_overflow(x): if isinstance(x, torch.Tensor) and x.numel() != 0 and x.dtype != torch.bool: @@ -52,12 +55,12 @@ def check_data_overflow(x): def run_overflow_check(forward_file): logger.info("start UT test") - forward_content = get_json_contents(forward_file) + forward_content, _, real_data_path = parse_json_info_forward_backward(forward_file) for api_full_name, api_info_dict in tqdm(forward_content.items()): try: - run_torch_api(api_full_name, api_info_dict) + run_torch_api(api_full_name, api_info_dict, real_data_path) except Exception as err: - api_name = api_full_name.split("_", 1)[1].rsplit("_", 2)[0] + [_, api_name, _] = api_full_name.split(Const.SEP) if "not implemented for 'Half'" in str(err): logger.warning(f"API {api_name} not support half tensor in CPU, please add {api_name} to CONVERT_API " f"'fp16_to_fp32' list in accuracy_tools/api_accuracy_check/common/utils.py file.") @@ -68,11 +71,10 @@ def run_overflow_check(forward_file): logger.error(f"Run {api_full_name} UT Error: %s" % str(err)) -def run_torch_api(api_full_name, api_info_dict): +def run_torch_api(api_full_name, api_info_dict, real_data_path): torch.npu.clear_npu_overflow_flag() - api_type = api_full_name.split(".")[0] - api_name = api_full_name.split(".", 1)[1].rsplit(".", 2)[0] - args, kwargs, need_grad = get_api_info(api_info_dict, api_name, real_data_path='') + [api_type, api_name, _] = api_full_name.split(Const.SEP) + args, kwargs, need_grad = get_api_info(api_info_dict, api_name, real_data_path=real_data_path) if not need_grad: logger.warning("%s function with out=... arguments don't support automatic differentiation, skip backward." % api_full_name) -- Gitee From 64f13de4ad52ef02b6f59ef610f7348ea939185b Mon Sep 17 00:00:00 2001 From: gitee Date: Mon, 22 Jul 2024 20:09:13 +0800 Subject: [PATCH 045/106] fix bug --- .../msprobe/core/common/const.py | 2 ++ .../api_accuracy_checker/common/config.py | 24 +++----------- .../msprobe/pytorch/common/utils.py | 19 +++++++++++ .../msprobe/pytorch/pt_config.py | 33 +++---------------- 4 files changed, 30 insertions(+), 48 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/common/const.py b/debug/accuracy_tools/msprobe/core/common/const.py index 7938f03f51..97f110e6f0 100644 --- a/debug/accuracy_tools/msprobe/core/common/const.py +++ b/debug/accuracy_tools/msprobe/core/common/const.py @@ -17,6 +17,8 @@ class Const: FORWARD = 'forward' DEFAULT_LIST = [] DEFAULT_PATH = './' + WHITE_LIST = 'white_list' + BLACK_LIST = 'black_list' # dump mode ALL = "all" diff --git a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/common/config.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/common/config.py index 5488f89748..9fafc98ce2 100644 --- a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/common/config.py +++ b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/common/config.py @@ -1,10 +1,8 @@ import os import yaml from msprobe.pytorch.api_accuracy_checker.common.utils import check_file_or_directory_path -from msprobe.pytorch.hook_module.utils import WrapFunctionalOps, WrapTensorOps, WrapTorchOps from msprobe.core.common.file_check import FileOpen - -WrapApi = set(WrapFunctionalOps) | set(WrapTensorOps) | set(WrapTorchOps) +from msprobe.pytorch.common.utils import check_filter_list_config, check_error_data_path_config class Config: @@ -35,23 +33,11 @@ class Config: if key == 'precision' and value < 0: raise ValueError("precision must be greater than 0") if key == 'white_list': - if not isinstance(value, list): - raise ValueError("white_list must be a list type") - if not all(isinstance(i, str) for i in value): - raise ValueError("All elements in white_list must be of str type") - invalid_api = [i for i in value if i not in WrapApi] - if invalid_api: - raise ValueError( - f"{', '.join(invalid_api)} is not in support_wrap_ops.yaml, please check the white_list") + check_filter_list_config(key, value) if key == 'black_list': - if not isinstance(value, list): - raise ValueError("black_list must be a list type") - if not all(isinstance(i, str) for i in value): - raise ValueError("All elements in black_list must be of str type") - invalid_api = [i for i in value if i not in WrapApi] - if invalid_api: - raise ValueError( - f"{', '.join(invalid_api)} is not in support_wrap_ops.yaml, please check the black_list") + check_filter_list_config(key, value) + if key == 'error_data_path': + check_error_data_path_config(value) return value diff --git a/debug/accuracy_tools/msprobe/pytorch/common/utils.py b/debug/accuracy_tools/msprobe/pytorch/common/utils.py index a3118e21c2..0d73a5551d 100644 --- a/debug/accuracy_tools/msprobe/pytorch/common/utils.py +++ b/debug/accuracy_tools/msprobe/pytorch/common/utils.py @@ -21,6 +21,7 @@ import torch import numpy as np from functools import wraps from msprobe.core.common.exceptions import DistributedNotInitializedError +from msprobe.pytorch.hook_module.utils import WrapFunctionalOps, WrapTensorOps, WrapTorchOps try: import torch_npu @@ -28,6 +29,9 @@ except ImportError: is_gpu = True else: is_gpu = False + + +WrapApi = set(WrapFunctionalOps) | set(WrapTensorOps) | set(WrapTorchOps) torch_without_guard_version_list = ['2.1', '2.2'] @@ -222,3 +226,18 @@ class Const: CONVERT_API = { "int32_to_int64": ["cross_entropy"] } + + +def check_filter_list_config(key, filter_list): + if not isinstance(filter_list, list): + raise Exception("%s must be a list type" % key) + if not all(isinstance(item, str) for item in filter_list): + raise Exception("All elements in white_list must be string type") + invalid_api = [item for item in filter_list if item not in WrapApi] + if invalid_api: + raise Exception("Invalid api in white_list: {}".format(invalid_api)) + + +def check_error_data_path_config(error_data_path): + if not os.path.exists(error_data_path): + raise Exception("error_data_path: %s is not exist", error_data_path) \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/pytorch/pt_config.py b/debug/accuracy_tools/msprobe/pytorch/pt_config.py index b206564420..798304e1fe 100644 --- a/debug/accuracy_tools/msprobe/pytorch/pt_config.py +++ b/debug/accuracy_tools/msprobe/pytorch/pt_config.py @@ -4,10 +4,7 @@ import os from msprobe.core.common_config import CommonConfig, BaseConfig from msprobe.core.common.file_check import FileOpen from msprobe.core.common.const import Const -from msprobe.pytorch.hook_module.utils import WrapFunctionalOps, WrapTensorOps, WrapTorchOps - - -WrapApi = set(WrapFunctionalOps) | set(WrapTensorOps) | set(WrapTorchOps) +from msprobe.pytorch.common.utils import check_filter_list_config, check_error_data_path_config class TensorConfig(BaseConfig): @@ -75,31 +72,9 @@ class RunUTConfig(BaseConfig): self.check_run_ut_config() def check_run_ut_config(self): - self.check_white_list_config() - self.check_black_list_config() - self.check_error_data_path_config() - - def check_white_list_config(self): - if not isinstance(self.white_list, list): - raise Exception("white_list must be a list type") - if not all(isinstance(item, str) for item in self.white_list): - raise Exception("All elements in white_list must be string type") - invalid_api = [item for item in self.white_list if item not in WrapApi] - if invalid_api: - raise Exception("Invalid api in white_list: {}".format(invalid_api)) - - def check_black_list_config(self): - if not isinstance(self.black_list, list): - raise Exception("black_list must be a list type") - if not all(isinstance(item, str) for item in self.black_list): - raise Exception("All elements in black_list must be string type") - invalid_api = [item for item in self.black_list if item not in WrapApi] - if invalid_api: - raise Exception("Invalid api in black_list: {}".format(invalid_api)) - - def check_error_data_path_config(self): - if not os.path.exists(self.error_data_path): - raise Exception("error_data_path: %s is not exist", self.error_data_path) + check_filter_list_config(Const.WHITE_LIST, self.white_list) + check_filter_list_config(Const.BLACK_LIST, self.black_list) + check_error_data_path_config(self.error_data_path) def parse_task_config(task, json_config): -- Gitee From 4c255b6233e7c85c827d8d3fef66e4b380dfea8d Mon Sep 17 00:00:00 2001 From: gitee Date: Mon, 22 Jul 2024 20:26:52 +0800 Subject: [PATCH 046/106] fix bug --- .../api_accuracy_checker/common/config.py | 8 +++--- .../msprobe/pytorch/common/utils.py | 20 -------------- .../msprobe/pytorch/pt_config.py | 26 ++++++++++++++++--- 3 files changed, 27 insertions(+), 27 deletions(-) diff --git a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/common/config.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/common/config.py index 9fafc98ce2..760e7c862d 100644 --- a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/common/config.py +++ b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/common/config.py @@ -2,7 +2,7 @@ import os import yaml from msprobe.pytorch.api_accuracy_checker.common.utils import check_file_or_directory_path from msprobe.core.common.file_check import FileOpen -from msprobe.pytorch.common.utils import check_filter_list_config, check_error_data_path_config +from msprobe.pytorch.pt_config import RunUTConfig class Config: @@ -33,11 +33,11 @@ class Config: if key == 'precision' and value < 0: raise ValueError("precision must be greater than 0") if key == 'white_list': - check_filter_list_config(key, value) + RunUTConfig.check_filter_list_config(key, value) if key == 'black_list': - check_filter_list_config(key, value) + RunUTConfig.check_filter_list_config(key, value) if key == 'error_data_path': - check_error_data_path_config(value) + RunUTConfig.check_error_data_path_config(value) return value diff --git a/debug/accuracy_tools/msprobe/pytorch/common/utils.py b/debug/accuracy_tools/msprobe/pytorch/common/utils.py index 0d73a5551d..acc1de1051 100644 --- a/debug/accuracy_tools/msprobe/pytorch/common/utils.py +++ b/debug/accuracy_tools/msprobe/pytorch/common/utils.py @@ -21,7 +21,6 @@ import torch import numpy as np from functools import wraps from msprobe.core.common.exceptions import DistributedNotInitializedError -from msprobe.pytorch.hook_module.utils import WrapFunctionalOps, WrapTensorOps, WrapTorchOps try: import torch_npu @@ -29,10 +28,6 @@ except ImportError: is_gpu = True else: is_gpu = False - - -WrapApi = set(WrapFunctionalOps) | set(WrapTensorOps) | set(WrapTorchOps) - torch_without_guard_version_list = ['2.1', '2.2'] for version in torch_without_guard_version_list: @@ -226,18 +221,3 @@ class Const: CONVERT_API = { "int32_to_int64": ["cross_entropy"] } - - -def check_filter_list_config(key, filter_list): - if not isinstance(filter_list, list): - raise Exception("%s must be a list type" % key) - if not all(isinstance(item, str) for item in filter_list): - raise Exception("All elements in white_list must be string type") - invalid_api = [item for item in filter_list if item not in WrapApi] - if invalid_api: - raise Exception("Invalid api in white_list: {}".format(invalid_api)) - - -def check_error_data_path_config(error_data_path): - if not os.path.exists(error_data_path): - raise Exception("error_data_path: %s is not exist", error_data_path) \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/pytorch/pt_config.py b/debug/accuracy_tools/msprobe/pytorch/pt_config.py index 798304e1fe..c2ee6c1d5d 100644 --- a/debug/accuracy_tools/msprobe/pytorch/pt_config.py +++ b/debug/accuracy_tools/msprobe/pytorch/pt_config.py @@ -5,6 +5,10 @@ from msprobe.core.common_config import CommonConfig, BaseConfig from msprobe.core.common.file_check import FileOpen from msprobe.core.common.const import Const from msprobe.pytorch.common.utils import check_filter_list_config, check_error_data_path_config +from msprobe.pytorch.hook_module.utils import WrapFunctionalOps, WrapTensorOps, WrapTorchOps + + +WrapApi = set(WrapFunctionalOps) | set(WrapTensorOps) | set(WrapTorchOps) class TensorConfig(BaseConfig): @@ -72,9 +76,25 @@ class RunUTConfig(BaseConfig): self.check_run_ut_config() def check_run_ut_config(self): - check_filter_list_config(Const.WHITE_LIST, self.white_list) - check_filter_list_config(Const.BLACK_LIST, self.black_list) - check_error_data_path_config(self.error_data_path) + # As instance methods are called, you need to pass instance attributes + RunUTConfig.check_filter_list_config(Const.WHITE_LIST, self.white_list) + RunUTConfig.check_filter_list_config(Const.BLACK_LIST, self.black_list) + RunUTConfig.check_error_data_path_config(self.error_data_path) + + @classmethod + def check_filter_list_config(cls, key, filter_list): + if not isinstance(filter_list, list): + raise Exception("%s must be a list type" % key) + if not all(isinstance(item, str) for item in filter_list): + raise Exception("All elements in %s must be string type" % key) + invalid_api = [item for item in filter_list if item not in WrapApi] + if invalid_api: + raise Exception("Invalid api in %s: %s" % (key, invalid_api)) + + @classmethod + def check_error_data_path_config(cls, error_data_path): + if not os.path.exists(error_data_path): + raise Exception("error_data_path: %s does not exist" % error_data_path) def parse_task_config(task, json_config): -- Gitee From 5c058f0da5e75e104d12420ba5835e28e5caa4a9 Mon Sep 17 00:00:00 2001 From: gitee Date: Mon, 22 Jul 2024 20:29:01 +0800 Subject: [PATCH 047/106] fix bug --- debug/accuracy_tools/msprobe/pytorch/pt_config.py | 1 - 1 file changed, 1 deletion(-) diff --git a/debug/accuracy_tools/msprobe/pytorch/pt_config.py b/debug/accuracy_tools/msprobe/pytorch/pt_config.py index c2ee6c1d5d..63f0b30e5e 100644 --- a/debug/accuracy_tools/msprobe/pytorch/pt_config.py +++ b/debug/accuracy_tools/msprobe/pytorch/pt_config.py @@ -4,7 +4,6 @@ import os from msprobe.core.common_config import CommonConfig, BaseConfig from msprobe.core.common.file_check import FileOpen from msprobe.core.common.const import Const -from msprobe.pytorch.common.utils import check_filter_list_config, check_error_data_path_config from msprobe.pytorch.hook_module.utils import WrapFunctionalOps, WrapTensorOps, WrapTorchOps -- Gitee From ed150d0e93ad7f2aa59daa1dd5dd54e5aeb9e12c Mon Sep 17 00:00:00 2001 From: gitee Date: Mon, 22 Jul 2024 20:54:37 +0800 Subject: [PATCH 048/106] add ut --- debug/accuracy_tools/msprobe/pytorch/pt_config.py | 11 +++++------ .../api_accuracy_checker/common/test_config.py | 2 +- .../msprobe/test/pytorch_ut/test_pt_config.py | 15 +++++++++++++++ 3 files changed, 21 insertions(+), 7 deletions(-) diff --git a/debug/accuracy_tools/msprobe/pytorch/pt_config.py b/debug/accuracy_tools/msprobe/pytorch/pt_config.py index 63f0b30e5e..1c608e3c7b 100644 --- a/debug/accuracy_tools/msprobe/pytorch/pt_config.py +++ b/debug/accuracy_tools/msprobe/pytorch/pt_config.py @@ -73,12 +73,6 @@ class RunUTConfig(BaseConfig): self.black_list = json_config.get("black_list", Const.DEFAULT_LIST) self.error_data_path = json_config.get("error_data_path", Const.DEFAULT_PATH) self.check_run_ut_config() - - def check_run_ut_config(self): - # As instance methods are called, you need to pass instance attributes - RunUTConfig.check_filter_list_config(Const.WHITE_LIST, self.white_list) - RunUTConfig.check_filter_list_config(Const.BLACK_LIST, self.black_list) - RunUTConfig.check_error_data_path_config(self.error_data_path) @classmethod def check_filter_list_config(cls, key, filter_list): @@ -94,6 +88,11 @@ class RunUTConfig(BaseConfig): def check_error_data_path_config(cls, error_data_path): if not os.path.exists(error_data_path): raise Exception("error_data_path: %s does not exist" % error_data_path) + + def check_run_ut_config(self): + RunUTConfig.check_filter_list_config(Const.WHITE_LIST, self.white_list) + RunUTConfig.check_filter_list_config(Const.BLACK_LIST, self.black_list) + RunUTConfig.check_error_data_path_config(self.error_data_path) def parse_task_config(task, json_config): diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/common/test_config.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/common/test_config.py index ec606d9aa1..35fc616476 100644 --- a/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/common/test_config.py +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/common/test_config.py @@ -35,5 +35,5 @@ class TestConfig(unittest.TestCase): validate_white_list = ['conv1d', 'max_pool1d', 'dropout', '__add__'] self.assertEqual(self.cfg.validate('white_list', validate_white_list), validate_white_list) - with self.assertRaises(ValueError): + with self.assertRaises(Exception): self.cfg.validate('white_list', ['invalid_api1', 'invalid_api2']) diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/test_pt_config.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/test_pt_config.py index 53b4e66c1b..c344f0b66b 100644 --- a/debug/accuracy_tools/msprobe/test/pytorch_ut/test_pt_config.py +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/test_pt_config.py @@ -67,3 +67,18 @@ class TestPtConfig(TestCase): self.assertEqual(result.handler_type, "check") self.assertEqual(result.preheat_step, 15) self.assertEqual(result.max_sample, 20) + + run_ut_config = { + "run_ut": { + "white_list": ["conv2d"], + "black_list": ["matmul"], + "error_data_path": '/home/dump_path' + + } + } + with patch('os.path.exists', return_value=True) as mocked_exists: + result = parse_task_config(Const.RUN_UT, run_ut_config) + self.assertEqual(result.white_list, ["conv2d"]) + self.assertEqual(result.black_list, ["matmul"]) + self.assertEqual(result.error_data_path, '/home/dump_path') + mocked_exists.assert_called_once_with('/home/dump_path') -- Gitee From 750e06567f331254cafb02fbe87e33f71078314d Mon Sep 17 00:00:00 2001 From: l30036321 Date: Tue, 23 Jul 2024 09:21:14 +0800 Subject: [PATCH 049/106] mindspore support ops yaml --- .../dump/hook_cell/support_wrap_ops.yaml | 925 ++++++++++++++++++ 1 file changed, 925 insertions(+) create mode 100644 debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml diff --git a/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml b/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml new file mode 100644 index 0000000000..089f444b61 --- /dev/null +++ b/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml @@ -0,0 +1,925 @@ +# Copyright 2024 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +# List of ops that register hooks + + +ops: + - adaptive_avg_pool1d + - adaptive_avg_pool2d + - adaptive_avg_pool3d + - adaptive_max_pool1d + - adaptive_max_pool2d + - avg_pool1d + - avg_pool2d + - avg_pool3d + - batch_norm + - bias_add + - ctc_greedy_decoder + - conv1d + - conv2d + - conv3d + - deformable_conv2d + - dense + - dropout + - dropout1d + - dropout2d + - dropout3d + - flatten + - fold + - fractional_max_pool3d + - lp_pool1d + - lp_pool2d + - lrn + - max_pool2d + - max_pool3d + - max_unpool1d + - max_unpool2d + - max_unpool3d + - unfold + - binary_cross_entropy + - binary_cross_entropy_with_logits + - cosine_embedding_loss + - cross_entropy + - ctc_loss + - gaussian_nll_loss + - hinge_embedding_loss + - huber_loss + - kl_div + - l1_loss + - margin_ranking_loss + - mse_loss + - multi_margin_loss + - multilabel_margin_loss + - multilabel_soft_margin_loss + - nll_loss + - smooth_l1_loss + - triplet_margin_loss + - elu + - fast_gelu + - gelu + - glu + - gumbel_softmax + - hardshrink + - hardsigmoid + - hardswish + - hardtanh + - leaky_relu + - log_softmax + - logsigmoid + - mish + - prelu + - relu + - relu6 + - celu + - rrelu + - selu + - sigmoid + - silu + - softmax + - softmin + - softshrink + - softsign + - tanh + - threshold + - cdist + - dist + - pdist + - choice_with_mask + - random_categorical + - log_uniform_candidate_sampler + - uniform_candidate_sampler + - affine_grid + - bounding_box_decode + - bounding_box_encode + - col2im + - check_valid + - crop_and_resize + - grid_sample + - interpolate + - iou + - pad + - padding + - pixel_shuffle + - pixel_unshuffle + - upsample + - abs + - absolute + - accumulate_n + - acos + - arccos + - acosh + - add + - addcdiv + - addcmul + - addmv + - addn + - angle + - arccosh + - arcsin + - arcsinh + - arctan + - arctanh + - arctan2 + - asin + - asinh + - atan + - atan2 + - atanh + - atleast_1d + - atleast_2d + - atleast_3d + - bessel_i0 + - bessel_i0e + - bessel_i1 + - bessel_i1e + - bessel_j0 + - bessel_j1 + - bessel_k0 + - bessel_k0e + - bessel_k1 + - bessel_k1e + - bessel_y0 + - bessel_y1 + - bitwise_and + - bitwise_left_shift + - bitwise_or + - bitwise_right_shift + - bitwise_xor + - ceil + - clamp + - clip + - combinations + - copysign + - cos + - cosh + - cosine_similarity + - cov + - diag_embed + - diff + - deg2rad + - digamma + - div + - divide + - erf + - erfc + - erfinv + - exp + - exp2 + - expm1 + - floor + - floor_div + - floor_mod + - float_power + - fmod + - frac + - gcd + - hypot + - igamma + - igammac + - imag + - i0 + - inv + - invert + - lcm + - ldexp + - lerp + - log + - log2 + - log10 + - log1p + - logaddexp + - logaddexp2 + - logical_and + - logical_not + - logical_or + - logical_xor + - logit + - mul + - multiply + - mvlgamma + - neg + - negative + - nextafter + - polar + - polygamma + - positive + - pow + - rad2deg + - ravel + - real + - reciprocal + - remainder + - rot90 + - round + - rsqrt + - sgn + - sign + - signbit + - sin + - sinc + - sinh + - sqrt + - square + - sub + - subtract + - t + - tan + - tanhshrink + - trapz + - tril_indices + - triu_indices + - true_divide + - trunc + - truncate_div + - truncate_mod + - xdivy + - xlogy + - zeta + - all + - amax + - amin + - aminmax + - any + - argmax + - argmin + - cummax + - cummin + - cumprod + - cumsum + - fmax + - histc + - logsumexp + - max + - mean + - median + - min + - norm + - prod + - std + - std_mean + - var + - var_mean + - argsort + - approximate_equal + - equal + - ge + - greater + - greater_equal + - gt + - intopk + - isclose + - isfinite + - isinf + - isnan + - isneginf + - isposinf + - isreal + - is_complex + - le + - less + - less_equal + - lt + - maximum + - minimum + - msort + - ne + - not_equal + - searchsorted + - topk + - bmm + - addbmm + - addmm + - baddbmm + - addr + - adjoint + - cholesky + - cholesky_solve + - batch_dot + - dot + - eig + - inner + - inverse + - geqrf + - ger + - kron + - lu_solve + - lu_unpack + - matmul + - matrix_solve + - matrix_band_part + - matrix_diag + - matrix_diag_part + - matrix_set_diag + - mm + - mv + - outer + - orgqr + - ormqr + - pinv + - svd + - tensor_dot + - logdet + - slogdet + - qr + - trace + - bartlett_window + - blackman_window + - hamming_window + - hann_window + - kaiser_window + - eye + - fill + - full + - full_like + - linspace + - logspace + - one_hot + - arange + - range + - heaviside + - bernoulli + - gamma + - laplace + - multinomial + - multinomial_with_replacement + - rand + - rand_like + - randint + - randint_like + - randn + - randn_like + - random_gamma + - random_poisson + - randperm + - standard_laplace + - standard_normal + - uniform + - argwhere + - batch_to_space_nd + - bincount + - block_diag + - broadcast_to + - cat + - channel_shuffle + - chunk + - column_stack + - concat + - conj + - count_nonzero + - deepcopy + - diag + - diagflat + - diagonal + - dyn_shape + - dsplit + - dstack + - einsum + - expand + - expand_dims + - flip + - fliplr + - flipud + - gather_d + - gather_elements + - gather_nd + - hsplit + - hstack + - index_add + - index_fill + - index_select + - inplace_add + - inplace_index_add + - inplace_sub + - inplace_update + - masked_fill + - masked_select + - meshgrid + - moveaxis + - movedim + - narrow + - nan_to_num + - nansum + - normal + - nonzero + - population_count + - rank + - repeat_elements + - repeat_interleave + - reshape + - reverse + - reverse_sequence + - roll + - scatter + - scatter_nd + - select + - sequence_mask + - shuffle + - size + - slice + - sort + - space_to_batch_nd + - sparse_segment_mean + - split + - squeeze + - stack + - strided_slice + - sum + - swapaxes + - swapdims + - tensor_scatter_add + - tensor_scatter_div + - tensor_scatter_max + - tensor_scatter_min + - tensor_scatter_mul + - tensor_scatter_sub + - tensor_scatter_elements + - tensor_split + - tile + - tril + - triu + - transpose + - unbind + - unique + - unique_consecutive + - unique_with_pad + - unsorted_segment_max + - unsorted_segment_min + - unsorted_segment_prod + - unsorted_segment_sum + - unsqueeze + - unstack + - view_as_real + - vsplit + - vstack + - where + - cross + - renorm + - is_tensor + - scalar_cast + - scalar_to_tensor + - tuple_to_array + - clip_by_global_norm + - clip_by_value + - assign + - assign_add + - assign_sub + - scatter_add + - scatter_div + - scatter_max + - scatter_min + - scatter_mul + - scatter_nd_add + - scatter_nd_div + - scatter_nd_max + - scatter_nd_min + - scatter_nd_mul + - scatter_nd_sub + - scatter_update + - derivative + - jet + +tensor: + - __abs__ + - __add__ + - __and__ + - __bool__ + - __eq__ + - __ge__ + - __gt__ + - __iadd__ + - __ifloordiv__ + - __imatmul__ + - __imod__ + - __imul__ + - __isub__ + - __le__ + - __lt__ + - __matmul__ + - __mod__ + - __mul__ + - __ne__ + - __neg__ + - __or__ + - __pow__ + - __radd__ + - __rmatmul__ + - __rmod__ + - __rmul__ + - __rpow__ + - __rsub__ + - __sub__ + - __truediv__ + - __xor__ + - abs + - absolute + - acos + - acosh + - add + - addbmm + - addcdiv + - addcmul + - addmm + - addmv + - addr + - all + - amax + - amin + - any + - arccos + - arccosh + - argmax + - angle + - arcsin + - arcsinh + - arctan + - arctanh + - argmin + - argsort + - asin + - asinh + - atan + - atan2 + - atanh + - baddbmm + - bernoulli + - bincount + - bitwise_and + - bitwise_or + - bitwise_xor + - bmm + - bool + - broadcast_to + - ceil + - cholesky_solve + - cholesky + - clamp + - clip + - conj + - copysign + - cos + - cosh + - cross + - cummax + - cummin + - cumprod + - cumsum + - deg2rad + - diag + - diagflat + - diff + - digamma + - div + - divide + - equal + - erf + - erfc + - erfinv + - exp + - expand_as + - expm1 + - flip + - fliplr + - flipud + - float_power + - floor + - fmod + - frac + - gather_elements + - ge + - geqrf + - ger + - greater + - greater_equal + - gt + - half + - hardshrink + - heaviside + - histc + - hypot + - i0 + - igamma + - igammac + - imag + - index_add + - index_fill + - index_put + - index_select + - inner + - int + - inverse + - isclose + - isfinite + - isinf + - isnan + - is_complex + - is_signed + - isneginf + - isposinf + - isreal + - lcm + - ldexp + - le + - lerp + - less + - less_equal + - log + - log10 + - log1p + - log2 + - logaddexp + - logaddexp2 + - logdet + - logical_and + - logical_not + - logical_or + - logical_xor + - logit + - logsumexp + - long + - lt + - masked_fill + - masked_scatter + - masked_select + - matmul + - max + - maximum + - mean + - median + - min + - minimum + - moveaxis + - movedim + - msort + - multinomial + - multiply + - mvlgamma + - nan_to_num + - nansum + - narrow + - ne + - neg + - negative + - nelement + - new_ones + - new_zeros + - nextafter + - norm + - nonzero + - not_equal + - ormqr + - permute + - pow + - prod + - qr + - ravel + - real + - reciprocal + - remainder + - renorm + - rad2deg + - tile + - repeat_interleave + - reshape + - reshape + - round + - rot90 + - rsqrt + - sum_to_size + - scatter + - sgn + - short + - sigmoid + - sign + - signbit + - sin + - sinc + - sinh + - slogdet + - sort + - split + - sqrt + - square + - squeeze + - std + - subtract + - subtract + - svd + - swapaxes + - swapdims + - t + - take + - tan + - tanh + - trace + - swapaxes + - tile + - to + - topk + - tril + - tensor_split + - transpose + - true_divide + - trunc + - unbind + - unique_consecutive + - unsqueeze + - var + - view + - where + - xlogy + - from_numpy + - std + - take + - var + - all + - any + - copy + - diagonal + - flatten + - resize + - sum + +mint.ops: + - abs + - absolute_import + - add + - add_ex + - all + - any + - any_ex + - arange + - argmax + - avg_pool2d + - baddbmm + - baddbmm_ex + - batch_norm + - binary_cross_entropy_with_logits + - bitwise_and + - bitwise_or + - bitwise_xor + - bmm + - broadcast_to + - cat + - cat_ex + - ceil + - chunk + - clamp + - conv2d + - conv_transpose2d + - cos + - cross + - cummax + - cummin + - cumsum + - div + - divide + - dropout + - embedding + - eq + - erf + - erfinv + - exp + - flatten + - flip + - flip_ex + - fold + - full + - functional + - gather + - gelu + - greater + - grid_sample + - group_norm + - gt + - index_select + - interpolate + - isclose + - isfinite + - layer_norm + - le + - leaky_relu + - less + - less_equal + - linear + - linspace + - log + - logical_and + - logical_not + - logical_or + - lt + - masked_select + - matmul + - max + - max_pool2d + - maximum + - mean + - mean_ex + - min + - minimum + - mul + - ne + - neg + - negative + - nn + - nonzero + - normal + - one_hot + - ones + - ones_ex + - ones_like + - pad + - permute + - permute_ex + - pow + - prod + - reciprocal + - relu + - remainder + - repeat_interleave + - rsqrt + - scatter + - scatter_add + - searchsorted + - sigmoid + - silu + - sin + - softmax + - softplus + - sort + - split + - sqrt + - sqrt_ex + - square + - stack + - sub + - sub_ex + - sum + - tanh + - tile + - topk + - tril + - triu + - unfold + - unique + - where + - xlogy + - zeros + - zeros_ex + - zeros_like + +mint.nn: + - Dropout + - Embedding + - Fold + - LayerNorm + - Linear + - MaxPool2d + - Unfold + - Upsample + +mint.nn.functional: + - absolute_import + - avg_pool2d + - batch_norm + - batch_norm_ex + - bce_with_logits + - binary_cross_entropy_with_logits + - conv_transpose2d + - dense + - dropout + - embedding + - fold + - gelu + - grid_sample + - group_norm + - interpolate + - layer_norm + - leaky_relu + - linear + - max_pool2d + - max_pool2d_ex + - normal + - one_hot + - one_hot_ext + - pad + - relu + - sigmoid + - silu + - softmax + - softmax_ex + - softplus + - tanh + - unfold -- Gitee From 9c6e4c602344dba1f4e4ca07dddaf9734b7f70a2 Mon Sep 17 00:00:00 2001 From: lcw Date: Tue, 23 Jul 2024 09:55:41 +0800 Subject: [PATCH 050/106] =?UTF-8?q?[Bugfix]=20dump=E5=B7=A5=E5=85=B7?= =?UTF-8?q?=E6=94=AF=E6=8C=81GPU?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../core/data_dump/data_processor/pytorch_processor.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py index 95be091b21..588d137f98 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py @@ -15,8 +15,9 @@ from msprobe.pytorch.free_benchmark import FreeBenchmarkCheck, UnequalRow try: import torch_npu + is_gpu = False except ImportError: - pass + is_gpu = True class PytorchDataProcessor(BaseDataProcessor): @@ -213,7 +214,7 @@ class OverflowCheckDataProcessor(PytorchDataProcessor): def _analyze_maybe_overflow_tensor(self, tensor_json, tensor): data_clone = tensor.detach() - if hasattr(torch_npu._C, '_npu_is_support_inf_nan') and torch_npu._C._npu_is_support_inf_nan(): + if is_gpu or (hasattr(torch_npu._C, '_npu_is_support_inf_nan') and torch_npu._C._npu_is_support_inf_nan()): if tensor_json['Max'] is None: return if np.isinf(tensor_json['Max']) or np.isnan(tensor_json['Max']): -- Gitee From 2a4386d567e557ca270864644e4f407b3e436e37 Mon Sep 17 00:00:00 2001 From: gitee Date: Tue, 23 Jul 2024 10:00:08 +0800 Subject: [PATCH 051/106] fix bug --- debug/accuracy_tools/msprobe/pytorch/pt_config.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/debug/accuracy_tools/msprobe/pytorch/pt_config.py b/debug/accuracy_tools/msprobe/pytorch/pt_config.py index 1c608e3c7b..77ee4ff83c 100644 --- a/debug/accuracy_tools/msprobe/pytorch/pt_config.py +++ b/debug/accuracy_tools/msprobe/pytorch/pt_config.py @@ -7,9 +7,6 @@ from msprobe.core.common.const import Const from msprobe.pytorch.hook_module.utils import WrapFunctionalOps, WrapTensorOps, WrapTorchOps -WrapApi = set(WrapFunctionalOps) | set(WrapTensorOps) | set(WrapTorchOps) - - class TensorConfig(BaseConfig): def __init__(self, json_config): super().__init__(json_config) @@ -67,6 +64,7 @@ class FreeBenchmarkCheckConfig(BaseConfig): class RunUTConfig(BaseConfig): + WrapApi = set(WrapFunctionalOps) | set(WrapTensorOps) | set(WrapTorchOps) def __init__(self, json_config): super().__init__(json_config) self.white_list = json_config.get("white_list", Const.DEFAULT_LIST) -- Gitee From 88276fbe2087701dde943abca90275bcb205880a Mon Sep 17 00:00:00 2001 From: gitee Date: Tue, 23 Jul 2024 10:16:23 +0800 Subject: [PATCH 052/106] fix --- debug/accuracy_tools/msprobe/pytorch/pt_config.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/debug/accuracy_tools/msprobe/pytorch/pt_config.py b/debug/accuracy_tools/msprobe/pytorch/pt_config.py index 77ee4ff83c..aae54be110 100644 --- a/debug/accuracy_tools/msprobe/pytorch/pt_config.py +++ b/debug/accuracy_tools/msprobe/pytorch/pt_config.py @@ -78,7 +78,7 @@ class RunUTConfig(BaseConfig): raise Exception("%s must be a list type" % key) if not all(isinstance(item, str) for item in filter_list): raise Exception("All elements in %s must be string type" % key) - invalid_api = [item for item in filter_list if item not in WrapApi] + invalid_api = [item for item in filter_list if item not in cls.WrapApi] if invalid_api: raise Exception("Invalid api in %s: %s" % (key, invalid_api)) @@ -108,7 +108,7 @@ def parse_task_config(task, json_config): config_dic = json_config.get(Const.FREE_BENCHMARK) if json_config.get(Const.FREE_BENCHMARK) else default_dic return FreeBenchmarkCheckConfig(config_dic) elif task == Const.RUN_UT: - config_dic = json_config.get(Const.RUN_UT) if json_config.get(Const.RUN_UT) else default_dic + config_dic = json_config.get(Const.RUN_UT, default_dic) return RunUTConfig(config_dic) else: return StatisticsConfig(default_dic) -- Gitee From 083acef59d67b44b5143f1e928df90a6da7ebefd Mon Sep 17 00:00:00 2001 From: gitee Date: Tue, 23 Jul 2024 10:35:01 +0800 Subject: [PATCH 053/106] fix bug --- debug/accuracy_tools/msprobe/pytorch/pt_config.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/debug/accuracy_tools/msprobe/pytorch/pt_config.py b/debug/accuracy_tools/msprobe/pytorch/pt_config.py index aae54be110..a3d765f3a4 100644 --- a/debug/accuracy_tools/msprobe/pytorch/pt_config.py +++ b/debug/accuracy_tools/msprobe/pytorch/pt_config.py @@ -96,16 +96,16 @@ class RunUTConfig(BaseConfig): def parse_task_config(task, json_config): default_dic = {} if task == Const.TENSOR: - config_dic = json_config.get(Const.TENSOR) if json_config.get(Const.TENSOR) else default_dic + config_dic = json_config.get(Const.TENSOR, default_dic) return TensorConfig(config_dic) elif task == Const.STATISTICS: - config_dic = json_config.get(Const.STATISTICS) if json_config.get(Const.STATISTICS) else default_dic + config_dic = json_config.get(Const.STATISTICS, default_dic) return StatisticsConfig(config_dic) elif task == Const.OVERFLOW_CHECK: - config_dic = json_config.get(Const.OVERFLOW_CHECK) if json_config.get(Const.OVERFLOW_CHECK) else default_dic + config_dic = json_config.get(Const.OVERFLOW_CHECK, default_dic) return OverflowCheckConfig(config_dic) elif task == Const.FREE_BENCHMARK: - config_dic = json_config.get(Const.FREE_BENCHMARK) if json_config.get(Const.FREE_BENCHMARK) else default_dic + config_dic = json_config.get(Const.FREE_BENCHMARK, default_dic) return FreeBenchmarkCheckConfig(config_dic) elif task == Const.RUN_UT: config_dic = json_config.get(Const.RUN_UT, default_dic) -- Gitee From 19b52aa934234a7e58d0799888cbfb37b727b4f3 Mon Sep 17 00:00:00 2001 From: h00613304 Date: Tue, 23 Jul 2024 10:41:58 +0800 Subject: [PATCH 054/106] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dcompare=20ut=E9=97=AE?= =?UTF-8?q?=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../pytorch_ut/compare/test_acc_compare.py | 30 +++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/debug/accuracy_tools/atat/test/pytorch_ut/compare/test_acc_compare.py b/debug/accuracy_tools/atat/test/pytorch_ut/compare/test_acc_compare.py index 625bafd8e9..b214e3dd79 100644 --- a/debug/accuracy_tools/atat/test/pytorch_ut/compare/test_acc_compare.py +++ b/debug/accuracy_tools/atat/test/pytorch_ut/compare/test_acc_compare.py @@ -18,7 +18,7 @@ bench_dict = {'op_name': ['Functional_conv2d_0_forward_input.0', 'Functional_con 'input_struct': [('torch.float32', [1, 1, 28, 28]), ('torch.float32', [16, 1, 5, 5]), ('torch.float32', [16])], 'output_struct': [('torch.float32', [1, 16, 28, 28])], - 'summery': [[3.029174327850342, -2.926689624786377, -0.06619918346405029], + 'summary': [[3.029174327850342, -2.926689624786377, -0.06619918346405029], [0.19919930398464203, -0.19974489510059357, 0.006269412115216255], [0.19734230637550354, -0.18177609145641327, 0.007903944700956345], [2.1166646480560303, -2.190781354904175, -0.003579073818400502]], 'stack_info': []} @@ -119,10 +119,10 @@ aten_result = [ 'Need double check api accuracy.', 'None'], ['Aten__native_batch_norm_legit_functional.default_0_forward_input.3', 'Functional_batch_norm_0_forward_input.3', 'torch.float32', 'torch.float32', [256], [256], 1.763145923614502, -4.398397922515869, -1.0521326325833797, ' ', - '176.3145923614502%', '439.8297922515869%', '105.21326325933797%', ' ', 2.763145923614502, -3.398397922515869, + '176.3145923614502%', '439.8397922515869%', '105.21326325833797%', ' ', 2.763145923614502, -3.398397922515869, -0.052132632583379745, 1.0, 1.0, 1.0, 'Warning', 'Need double check api accuracy.', 'None'], ['Aten__native_batch_norm_legit_functional.default_0_forward_input.4', 'Functional_batch_norm_0_forward_input.4', - 'torch.float32', 'torch.float32', [256], [256], 2.673110008239746, -3.149275064468384, 0.01613386906693445, ' ', + 'torch.float32', 'torch.float32', [256], [256], 2.673110008239746, -3.149275064468384, 0.01613386906683445, ' ', 'N/A', 'N/A', 'N/A', ' ', 2.673110008239746, -3.149275064468384, 0.01613386906683445, 0.0, 0.0, 0.0, 'Warning', 'Need double check api accuracy.', 'None'], ['Aten__native_batch_norm_legit_functional.default_0_forward_output.0', 'Functional_batch_norm_0_forward_output', @@ -132,16 +132,16 @@ aten_result = [ 'Need double check api accuracy.', 'None'], ['Aten__native_batch_norm_legit_functional.default_0_forward_output.1', 'Nan', 'torch.float32', 'Nan', [256], 'Nan', ' ', ' ', ' ', ' ', ' ', 0.30550330877304077, -0.24485322833061218, -0.010361209511756897, 'Nan', 'Nan', 'Nan', - 'Yes', '', None], + 'Yes', '', 'None'], ['Aten__native_batch_norm_legit_functional.default_0_forward_output.2', 'Nan', 'torch.float32', 'Nan', [256], 'Nan', ' ', ' ', ' ', ' ', ' ', 623.9192504882812, 432.96826171875, 520.2276611328125, 'Nan', 'Nan', 'Nan', - 'Yes', '', None], + 'Yes', '', 'None'], ['Aten__native_batch_norm_legit_functional.default_0_forward_output.3', 'Nan', 'torch.float32', 'Nan', [256], 'Nan', ' ', ' ', ' ', ' ', ' ', 2.4797861576080322, -3.055997371673584, -0.04795549064874649, 'Nan', 'Nan', 'Nan', - 'Yes', '', None], + 'Yes', '', 'None'], ['Aten__native_batch_norm_legit_functional.default_0_forward_output.4', 'Nan', 'torch.float32', 'Nan', [256], 'Nan', ' ', ' ', ' ', ' ', ' ', 61.7945556640625, 42.59713363647461, 52.03831481933594, 'Nan', 'Nan', 'Nan', - 'Yes', '', None]] + 'Yes', '', 'None']] highlight_dict = {'red_rows': [], 'yellow_rows': []} @@ -191,16 +191,16 @@ op_name = "Tensor.add_0.0.forward" op_result = [ {'type': 'torch.Tensor', 'dtype': 'torch.float32', 'shape': [16, 1, 3, 3], - 'Max': 0.33033010363578796, 'Min': -0.331031858921051,'Mean': -0.030964046716690063, - 'Norm': 2.2533628940582275, 'requires_grad': True, 'full_op_name': 'Tensor.add_.0.forward_input.0'}, + 'Max': 0.33033010363578796, 'Min': -0.331031858921051, 'Mean': -0.030964046716690063, + 'Norm': 2.2533628940582275, 'requires_grad': True, 'full_op_name': 'Tensor.add_0.0.forward_input.0'}, {'type': 'torch.Tensor', 'dtype': 'torch.float32', 'shape': [16, 1, 3, 3], - 'Max': 0.003992878366261721, 'Min': -0.008102823048830032,'Mean': -0.0002002553956117481, - 'Norm': 0.02844562754034996, 'requires_grad': False, 'full_op_name': 'Tensor.add_.0.forward_input.1'}, - {'full_op_name': 'Tensor.add_.0.forward_input.alpha.0', 'dtype': "", "shape": '[]', 'md5': None, + 'Max': 0.003992878366261721, 'Min': -0.008102823048830032, 'Mean': -0.0002002553956117481, + 'Norm': 0.02844562754034996, 'requires_grad': False, 'full_op_name': 'Tensor.add_0.0.forward_input.1'}, + {'full_op_name': 'Tensor.add_0.0.forward_input.alpha.0', 'dtype': "", 'shape': '[]', 'md5': None, 'Max': -0.1, 'Min': -0.1, 'Mean': -0.1, 'Norm': -0.1, 'data_name': '-1'}, {'type': 'torch.Tensor', 'dtype': 'torch.float32', 'shape': [16, 1, 3, 3], - 'Max': 0.33033010363578796, 'Min': -0.331031858921051,'Mean': -0.030964046716690063, - 'Norm': 2.2533628940582275, 'requires_grad': True, 'full_op_name': 'Tensor.add_.0.forward_output.0'}] + 'Max': 0.33033010363578796, 'Min': -0.331031858921051, 'Mean': -0.030964046716690063, + 'Norm': 2.2533628940582275, 'requires_grad': True, 'full_op_name': 'Tensor.add_0.0.forward_output.0'}] class TestUtilsMethods(unittest.TestCase): @@ -252,7 +252,7 @@ class TestUtilsMethods(unittest.TestCase): result = [line_input, line_1, line_2, line_3] result_df = pd.DataFrame(result) highlight_dict = {'red_rows': [], 'yellow_rows': []} - compare.find_compare_result_error_rows(result_df, highlight_dict, False) + compare.find_compare_result_error_rows(result_df, highlight_dict, False, False) self.assertEqual(highlight_dict, {'red_rows': [num_1, num_3], 'yellow_rows': [num_2]}) def test_rename_api(self): -- Gitee From f251aeba70038e5e2a0ce27306d870b6d90677cc Mon Sep 17 00:00:00 2001 From: h00613304 Date: Mon, 22 Jul 2024 15:52:40 +0800 Subject: [PATCH 055/106] =?UTF-8?q?=E8=A1=A5=E5=85=85=E6=95=B4=E7=BD=91?= =?UTF-8?q?=E6=AF=94=E5=AF=B9compare=E9=83=A8=E5=88=86=E5=86=92=E7=83=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../pytorch_ut/compare/test_acc_compare.py | 260 +++++++++++++++++- .../test/pytorch_ut/compare/test_match.py | 20 ++ 2 files changed, 275 insertions(+), 5 deletions(-) create mode 100644 debug/accuracy_tools/msprobe/test/pytorch_ut/compare/test_match.py diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/compare/test_acc_compare.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/compare/test_acc_compare.py index fb442941b0..625bafd8e9 100644 --- a/debug/accuracy_tools/msprobe/test/pytorch_ut/compare/test_acc_compare.py +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/compare/test_acc_compare.py @@ -1,17 +1,267 @@ # coding=utf-8 import unittest -from msprobe.pytorch.compare.acc_compare import rename_api +from msprobe.pytorch.compare import acc_compare as compare +import pandas as pd + +npu_dict = {'op_name': ['Functional_conv2d_0_forward_input.0', 'Functional_conv2d_0_forward_input.1', + 'Functional_conv2d_0_forward_input.2', 'Functional_conv2d_0_forward_output'], + 'input_struct': [('torch.float32', [1, 1, 28, 28]), ('torch.float32', [16, 1, 5, 5]), + ('torch.float32', [16])], + 'output_struct': [('torch.float32', [1, 16, 28, 28])], + 'summary': [[3.029174327850342, -2.926689624786377, -0.06619918346405029], + [0.19919930398464203, -0.19974489510059357, 0.006269412115216255], + [0.19734230637550354, -0.18177609145641327, 0.007903944700956345], + [2.1166646480560303, -2.190781354904175, -0.003579073818400502]], 'stack_info': []} + +bench_dict = {'op_name': ['Functional_conv2d_0_forward_input.0', 'Functional_conv2d_0_forward_input.1', + 'Functional_conv2d_0_forward_input.2', 'Functional_conv2d_0_forward_output'], + 'input_struct': [('torch.float32', [1, 1, 28, 28]), ('torch.float32', [16, 1, 5, 5]), + ('torch.float32', [16])], + 'output_struct': [('torch.float32', [1, 16, 28, 28])], + 'summery': [[3.029174327850342, -2.926689624786377, -0.06619918346405029], + [0.19919930398464203, -0.19974489510059357, 0.006269412115216255], + [0.19734230637550354, -0.18177609145641327, 0.007903944700956345], + [2.1166646480560303, -2.190781354904175, -0.003579073818400502]], 'stack_info': []} + +tensor_list = [ + {'type': 'torch.Tensor', 'dtype': 'torch.float32', 'shape': [16, 1, 3, 3], 'Max': 0.33033010363578796, + 'Min': -0.331031858921051,'Mean': -0.030964046716690063, 'Norm': 2.2533628940582275, 'requires_grad': True, + 'full_op_name': 'Tensor.add_.0.forward_input.0'}, + {'type': 'torch.Tensor', 'dtype': 'torch.float32', 'shape': [16, 1, 3, 3], + 'Max': 0.003992878366261721, 'Min': -0.008102823048830032, 'Mean': -0.0002002553956117481, + 'Norm': 0.02844562754034996, 'requires_grad': False, 'full_op_name': 'Tensor.add_.0.forward_input.1'}, + {'full_op_name': 'Tensor.add_.0.forward_input.alpha.0', 'dtype': "", "shape": '[]', 'md5': None, + 'Max': -0.1, 'Min': -0.1, 'Mean': -0.1, 'Norm': -0.1, 'data_name': '-1'}, + {'type': 'torch.Tensor', 'dtype': 'torch.float32', 'shape': [16, 1, 3, 3], + 'Max': 0.33033010363578796, 'Min': -0.331031858921051, 'Mean': -0.030964046716690063, + 'Norm': 2.2533628940582275, 'requires_grad': True, 'full_op_name': 'Tensor.add_.0.forward_output.0'} +] + +result_op_dict = {'op_name': ['Tensor.add_.0.forward_input.0', 'Tensor.add_.0.forward_input.1', + 'Tensor.add_.0.forward_input.alpha.0', 'Tensor.add_.0.forward_output.0'], + 'input_struct': [('torch.float32', [16, 1, 3, 3]), ('torch.float32', [16, 1, 3, 3]), + ("", '[]')], + 'output_struct': [('torch.float32', [16, 1, 3, 3])], + 'summary': [[0.33033010363578796, -0.331031858921051, -0.030964046716690063, 2.2533628940582275], + [0.003992878366261721, -0.008102823048830032, -0.0002002553956117481, 0.02844562754034996], + [-0.1, -0.1, -0.1, -0.1], + [0.33033010363578796, -0.331031858921051, -0.030964046716690063, 2.2533628940582275]], + 'stack_info': []} + +o_result = [ + ['Functional_conv2d_0_forward_input.0', 'Functional_conv2d_0_forward_input.0', 'torch.float32', 'torch.float32', + [1, 1, 28, 28], [1, 1, 28, 28], 0.0, 0.0, 0.0, ' ', '0.0%', '0.0%', '0.0%', ' ', 3.029174327850342, -2.926689624786377, + -0.06619918346405029, 3.029174327850342, -2.926689624786377, -0.06619918346405029, '', '', 'None'], + ['Functional_conv2d_0_forward_input.1', 'Functional_conv2d_0_forward_input.1', 'torch.float32', 'torch.float32', + [16, 1, 5, 5], [16, 1, 5, 5], 0.0, 0.0, 0.0, ' ', '0.0%', '0.0%', '0.0%', ' ', 0.19919930398464203, -0.19974489510059357, + 0.006269412115216255, 0.19919930398464203, -0.19974489510059357, 0.006269412115216255, '', '', 'None'], + ['Functional_conv2d_0_forward_input.2', 'Functional_conv2d_0_forward_input.2', 'torch.float32', 'torch.float32', + [16], [16], 0.0, 0.0, 0.0, ' ', '0.0%', '0.0%', '0.0%', ' ', 0.19734230637550354, -0.18177609145641327, 0.007903944700956345, + 0.19734230637550354, -0.18177609145641327, 0.007903944700956345, '', '', 'None'], + ['Functional_conv2d_0_forward_output', 'Functional_conv2d_0_forward_output', 'torch.float32', 'torch.float32', + [1, 16, 28, 28], [1, 16, 28, 28], 0.0, 0.0, 0.0, ' ', '0.0%', '0.0%', '0.0%', ' ', 2.1166646480560303, -2.190781354904175, + -0.003579073818400502, 2.1166646480560303, -2.190781354904175, -0.003579073818400502, '', '', 'None']] + +npu_dict_aten = {'op_name': ['Aten__native_batch_norm_legit_functional.default_0_forward_input.0', + 'Aten__native_batch_norm_legit_functional.default_0_forward_input.1', + 'Aten__native_batch_norm_legit_functional.default_0_forward_input.2', + 'Aten__native_batch_norm_legit_functional.default_0_forward_input.3', + 'Aten__native_batch_norm_legit_functional.default_0_forward_input.4', + 'Aten__native_batch_norm_legit_functional.default_0_forward_output.0', + 'Aten__native_batch_norm_legit_functional.default_0_forward_output.1', + 'Aten__native_batch_norm_legit_functional.default_0_forward_output.2', + 'Aten__native_batch_norm_legit_functional.default_0_forward_output.3', + 'Aten__native_batch_norm_legit_functional.default_0_forward_output.4'], + 'input_struct': [('torch.float16', [256, 256, 14, 14]), ('torch.float32', [256]), + ('torch.float32', [256]), ('torch.float32', [256]), ('torch.float32', [256])], + 'output_struct': [('torch.float16', [256, 256, 14, 14]), ('torch.float32', [256]), + ('torch.float32', [256]), ('torch.float32', [256]), ('torch.float32', [256])], + 'summary': [[139.625, -127.5625, -0.0103607177734375], + [2.5276029109954834, -2.1788690090179443, -0.0008259844034910202], + [2.472219944000244, -2.845968723297119, -0.008756577968597412], + [2.763145923614502, -3.398397922515869, -0.052132632583379745], + [2.673110008239746, -3.149275064468384, 0.01613386906683445], + [13.5546875, -10.640625, -0.008758544921875], + [0.30550330877304077, -0.24485322833061218, -0.010361209511756897], + [623.9192504882812, 432.96826171875, 520.2276611328125], + [2.4797861576080322, -3.055997371673584, -0.04795549064874649], + [61.7945556640625, 42.59713363647461, 52.03831481933594]]} + +bench_dict_functional = { + 'op_name': ['Functional_batch_norm_0_forward_input.0', 'Functional_batch_norm_0_forward_input.1', + 'Functional_batch_norm_0_forward_input.2', 'Functional_batch_norm_0_forward_input.3', + 'Functional_batch_norm_0_forward_input.4', 'Functional_batch_norm_0_forward_output'], + 'input_struct': [('torch.float32', [256, 256, 14, 14]), ('torch.float32', [256]), ('torch.float32', [256]), + ('torch.float32', [256]), ('torch.float32', [256])], + 'output_struct': [('torch.float32', [256, 256, 14, 14])], + 'summary': [[3.061628818511963, -3.22507381439209, 3.634914173744619e-05], + [0.0005779837374575436, -0.0006301702815108001, 3.634906533989124e-06], + [0.9338104128837585, 0.9277191162109375, 0.930335283279419], + [1.0, 1.0, 1.0], [0.0, 0.0, 0.0], + [5.397906303405762, -5.796811580657959, 2.5283952709287405e-10]] +} + +aten_result = [ + ['Aten__native_batch_norm_legit_functional.default_0_forward_input.0', 'Functional_batch_norm_0_forward_input.0', + 'torch.float16', 'torch.float32', [256, 256, 14, 14], [256, 256, 14, 14], 136.56337118148804, -124.33742618560791, + -0.010397066915174946, ' ', '4460.480981749501%', '3855.335826136584%', '28603.33536971545%', ' ', 139.625, + -127.5625, -0.0103607177734375, 3.061628818511963, -3.22507381439209, 3.634914173744619e-05, 'Warning', + 'Need double check api accuracy.', 'None'], + ['Aten__native_batch_norm_legit_functional.default_0_forward_input.1', 'Functional_batch_norm_0_forward_input.1', + 'torch.float32', 'torch.float32', [256], [256], 2.527024927258026, -2.1782388387364335, -0.0008296193100250093, + ' ', '437213.84590749856%', '345658.76916858414%', '22823.676544842117%', ' ', 2.5276029109954834, + -2.1788690090179443, -0.0008259844034910202, 0.0005779837374575436, -0.0006301702815108001, 3.634906533989124e-06, + 'Warning', 'Need double check api accuracy.', 'None'], + ['Aten__native_batch_norm_legit_functional.default_0_forward_input.2', 'Functional_batch_norm_0_forward_input.2', + 'torch.float32', 'torch.float32', [256], [256], 1.5384095311164856, -3.7736878395080566, -0.9390918612480164, ' ', + '164.74538192025793%', '406.7705163736246%', '100.94122819224167%', ' ', 2.472219944000244, -2.845968723297119, + -0.008756577968597412, 0.9338104128837585, 0.9277191162109375, 0.930335283279419, 'Warning', + 'Need double check api accuracy.', 'None'], + ['Aten__native_batch_norm_legit_functional.default_0_forward_input.3', 'Functional_batch_norm_0_forward_input.3', + 'torch.float32', 'torch.float32', [256], [256], 1.763145923614502, -4.398397922515869, -1.0521326325833797, ' ', + '176.3145923614502%', '439.8297922515869%', '105.21326325933797%', ' ', 2.763145923614502, -3.398397922515869, + -0.052132632583379745, 1.0, 1.0, 1.0, 'Warning', 'Need double check api accuracy.', 'None'], + ['Aten__native_batch_norm_legit_functional.default_0_forward_input.4', 'Functional_batch_norm_0_forward_input.4', + 'torch.float32', 'torch.float32', [256], [256], 2.673110008239746, -3.149275064468384, 0.01613386906693445, ' ', + 'N/A', 'N/A', 'N/A', ' ', 2.673110008239746, -3.149275064468384, 0.01613386906683445, 0.0, 0.0, 0.0, 'Warning', + 'Need double check api accuracy.', 'None'], + ['Aten__native_batch_norm_legit_functional.default_0_forward_output.0', 'Functional_batch_norm_0_forward_output', + 'torch.float16', 'torch.float32', [256, 256, 14, 14], [256, 256, 14, 14], 8.156781196594238, -4.843813419342041, + -0.008758545174714527, ' ', '151.11009228611078%', '83.55995967687207%', '3464072756.115108%', ' ', 13.5546875, + -10.640625, -0.008758544921875, 5.397906303405762, -5.796811580657959, 2.5283952709287405e-10, 'Warning', + 'Need double check api accuracy.', 'None'], + ['Aten__native_batch_norm_legit_functional.default_0_forward_output.1', 'Nan', 'torch.float32', 'Nan', [256], 'Nan', + ' ', ' ', ' ', ' ', ' ', 0.30550330877304077, -0.24485322833061218, -0.010361209511756897, 'Nan', 'Nan', 'Nan', + 'Yes', '', None], + ['Aten__native_batch_norm_legit_functional.default_0_forward_output.2', 'Nan', 'torch.float32', 'Nan', [256], 'Nan', + ' ', ' ', ' ', ' ', ' ', 623.9192504882812, 432.96826171875, 520.2276611328125, 'Nan', 'Nan', 'Nan', + 'Yes', '', None], + ['Aten__native_batch_norm_legit_functional.default_0_forward_output.3', 'Nan', 'torch.float32', 'Nan', [256], 'Nan', + ' ', ' ', ' ', ' ', ' ', 2.4797861576080322, -3.055997371673584, -0.04795549064874649, 'Nan', 'Nan', 'Nan', + 'Yes', '', None], + ['Aten__native_batch_norm_legit_functional.default_0_forward_output.4', 'Nan', 'torch.float32', 'Nan', [256], 'Nan', + ' ', ' ', ' ', ' ', ' ', 61.7945556640625, 42.59713363647461, 52.03831481933594, 'Nan', 'Nan', 'Nan', + 'Yes', '', None]] + +highlight_dict = {'red_rows': [], 'yellow_rows': []} + +num_0, num_1, num_2, num_3 = 0, 1, 2, 3 +summary_line_input = ['Functional_batch_norm_0_forward_input.0', 'Functional_batch_norm_0_forward_input.0', + 'torch.float16', + 'torch.float32', [256, 256, 14, 14], [256, 256, 14, 14], 0.01, 0, 0, 0, 1, 1, 1, 1, 1.01, 1, 1, 1, + 'Yes', ''] +summary_line_1 = ['Functional_batch_norm_0_forward_output.0', 'Functional_batch_norm_0_forward_output.0', + 'torch.float16', + 'torch.float32', [256, 256, 14, 14], [256, 256, 14, 14], 10, 0, 0, 0, 2, 0, 1, 1, 1, 1, 1, 1, + 'Warning', ''] +summary_line_2 = ['Functional_batch_norm_0_forward_output.1', 'Functional_batch_norm_0_forward_output.1', + 'torch.float16', + 'torch.float32', [256, 256, 14, 14], [256, 256, 14, 14], 0.02, 0, 0, 0, 0.12, 0, 1, 1, 0.1, 1, 1, 1, + 'Warning', ''] +summary_line_3 = ['Functional_batch_norm_0_forward_output.2', 'Functional_batch_norm_0_forward_output.2', + 'torch.float16', + 'torch.float32', [256, 256, 14, 14], [256, 256, 14, 14], 0, 0, 0, 0, 2, 0, 1, 1, 1, 1, 1, 1, + 'Warning', ''] +line_input = ['Functional_batch_norm_0_forward_input.0', 'Functional_batch_norm_0_forward_input.0', 'torch.float16', + 'torch.float32', [256, 256, 14, 14], [256, 256, 14, 14], 1, 1, 1, 0.95, 1, 1, 1, 1, 1, 1.01, 1, 1, 1, + 'Yes', ''] +line_1 = ['Functional_batch_norm_0_forward_output.0', 'Functional_batch_norm_0_forward_output.0', 'torch.float16', + 'torch.float32', [256, 256, 14, 14], [256, 256, 14, 14], 0.8, 1, 1, 0.59, 1, 'nan', 0, 1, 1, 19, 1, 1, 1, + 'Warning', ''] +line_2 = ['Functional_batch_norm_0_forward_output.1', 'Functional_batch_norm_0_forward_output.1', 'torch.float16', + 'torch.float32', [256, 256, 14, 14], [256, 256, 14, 14], 0.9, 1, 1, 0.8, 1, 0, 0.12, 0, 1, 1, 0.1, 1, 1, 1, + 'Warning', ''] +line_3 = ['Functional_batch_norm_0_forward_output.2', 'Functional_batch_norm_0_forward_output.2', 'torch.float16', + 'torch.float32', [256, 256, 14, 14], [256, 256, 14, 14], 0.8, 1.1e+10, 1, 0.85, 1, 9, 0.12, 0, 1, 1, 0.1, 1, + 1, 1, 'Warning', ''] + +op_data = { + 'input_args': [{'type': 'torch.Tensor', 'dtype': 'torch.float32', 'shape': [16, 1, 3, 3], + 'Max': 0.33033010363578796, 'Min': -0.331031858921051,'Mean': -0.030964046716690063, + 'Norm': 2.2533628940582275, 'requires_grad': True}, + {'type': 'torch.Tensor', 'dtype': 'torch.float32', 'shape': [16, 1, 3, 3], + 'Max': 0.003992878366261721, 'Min': -0.008102823048830032, 'Mean': -0.0002002553956117481, + 'Norm': 0.02844562754034996, 'requires_grad': False}], + 'input_kwargs': {'alpha': {'type': 'float', 'value': -0.1}}, + 'output': [{'type': 'torch.Tensor', 'dtype': 'torch.float32', 'shape': [16, 1, 3, 3], + 'Max': 0.33033010363578796, 'Min': -0.331031858921051,'Mean': -0.030964046716690063, + 'Norm': 2.2533628940582275, 'requires_grad': True}]} + +op_name = "Tensor.add_0.0.forward" + +op_result = [ + {'type': 'torch.Tensor', 'dtype': 'torch.float32', 'shape': [16, 1, 3, 3], + 'Max': 0.33033010363578796, 'Min': -0.331031858921051,'Mean': -0.030964046716690063, + 'Norm': 2.2533628940582275, 'requires_grad': True, 'full_op_name': 'Tensor.add_.0.forward_input.0'}, + {'type': 'torch.Tensor', 'dtype': 'torch.float32', 'shape': [16, 1, 3, 3], + 'Max': 0.003992878366261721, 'Min': -0.008102823048830032,'Mean': -0.0002002553956117481, + 'Norm': 0.02844562754034996, 'requires_grad': False, 'full_op_name': 'Tensor.add_.0.forward_input.1'}, + {'full_op_name': 'Tensor.add_.0.forward_input.alpha.0', 'dtype': "", "shape": '[]', 'md5': None, + 'Max': -0.1, 'Min': -0.1, 'Mean': -0.1, 'Norm': -0.1, 'data_name': '-1'}, + {'type': 'torch.Tensor', 'dtype': 'torch.float32', 'shape': [16, 1, 3, 3], + 'Max': 0.33033010363578796, 'Min': -0.331031858921051,'Mean': -0.030964046716690063, + 'Norm': 2.2533628940582275, 'requires_grad': True, 'full_op_name': 'Tensor.add_.0.forward_output.0'}] + class TestUtilsMethods(unittest.TestCase): + def test_check_graph_mode(self): + op1 = "Aten" + op2 = "torch" + self.assertTrue(compare.check_graph_mode(op1, op2)) + self.assertTrue(compare.check_graph_mode(op2, op1)) + self.assertFalse(compare.check_graph_mode(op1, op1)) + self.assertFalse(compare.check_graph_mode(op2, op2)) + + def test_check_op(self): + fuzzy_match = False + result = compare.check_op(npu_dict, bench_dict, fuzzy_match) + self.assertEqual(result, True) + + def test_merge_tensor(self): + op_dict = compare.merge_tensor(tensor_list, True, False) + self.assertEqual(op_dict, result_op_dict) + + def test_read_op(self): + result = compare.read_op(op_data, op_name) + self.assertEqual(result, op_result) + + def test_match_op(self): + fuzzy_match = False + a, b = compare.match_op([npu_dict], [bench_dict], fuzzy_match) + self.assertEqual(a, 0) + self.assertEqual(b, 0) + + def test_get_accuracy(self): + result = [] + compare.get_accuracy(result, npu_dict, bench_dict, highlight_dict) + self.assertEqual(result, o_result) + + def test_get_accuracy_graph_mode(self): + result = [] + compare.get_accuracy(result, npu_dict_aten, bench_dict_functional, highlight_dict) + self.assertEqual(result, aten_result) + + def test_find_error_rows(self): + summary_result = [summary_line_input, summary_line_1, summary_line_2, summary_line_3] + highlight_dict = {'red_rows': [], 'yellow_rows': []} + compare.find_error_rows(summary_result, 0, 1, highlight_dict, summary_compare=True) + self.assertEqual(highlight_dict, {'red_rows': [], 'yellow_rows': []}) + + def test_find_compare_result_error_rows(self): + result = [line_input, line_1, line_2, line_3] + result_df = pd.DataFrame(result) + highlight_dict = {'red_rows': [], 'yellow_rows': []} + compare.find_compare_result_error_rows(result_df, highlight_dict, False) + self.assertEqual(highlight_dict, {'red_rows': [num_1, num_3], 'yellow_rows': [num_2]}) + def test_rename_api(self): test_name_1 = "Distributed.broadcast.0.forward.input.0" expect_name_1 = "Distributed.broadcast.input.0" - actual_name_1 = rename_api(test_name_1, "forward") + actual_name_1 = compare.rename_api(test_name_1, "forward") self.assertEqual(actual_name_1, expect_name_1) - + test_name_2 = "Torch.sum.0.backward.output.0" expect_name_2 = "Torch.sum.output.0" - actual_name_2 = rename_api(test_name_2, "backward") + actual_name_2 = compare.rename_api(test_name_2, "backward") self.assertEqual(actual_name_2, expect_name_2) - \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/compare/test_match.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/compare/test_match.py new file mode 100644 index 0000000000..ac28e994e9 --- /dev/null +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/compare/test_match.py @@ -0,0 +1,20 @@ +# coding=utf-8 +import unittest +from msprobe.pytorch.compare import match + + +class TestMatch(unittest.TestCase): + def test_graph_mapping(self): + op1 = "Aten_convolution_1_forward_0.input.0" + op2 = "Torch_conv2d_0_forward_0.input.0" + op3 = "Torch_batch_norm_0_forward_0.input.0" + op4 = "Aten_convolution.default_1_forward_0.input.0" + op5 = "Aten_foo_1_forward_0.input.0" + self.assertTrue(match.graph_mapping.match(op1, op2)) + self.assertTrue(match.graph_mapping.match(op2, op1)) + self.assertTrue(match.graph_mapping.match(op4, op2)) + self.assertTrue(match.graph_mapping.match(op2, op4)) + self.assertFalse(match.graph_mapping.match(op1, op3)) + self.assertFalse(match.graph_mapping.match(op3, op1)) + self.assertFalse(match.graph_mapping.match(op5, op2)) + self.assertFalse(match.graph_mapping.match(op2, op5)) -- Gitee From e31fec750b6d1da394a14a14180065183ac12f6c Mon Sep 17 00:00:00 2001 From: h00613304 Date: Tue, 23 Jul 2024 10:41:58 +0800 Subject: [PATCH 056/106] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dcompare=20ut=E9=97=AE?= =?UTF-8?q?=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../pytorch_ut/compare/test_acc_compare.py | 30 +++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/compare/test_acc_compare.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/compare/test_acc_compare.py index 625bafd8e9..b214e3dd79 100644 --- a/debug/accuracy_tools/msprobe/test/pytorch_ut/compare/test_acc_compare.py +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/compare/test_acc_compare.py @@ -18,7 +18,7 @@ bench_dict = {'op_name': ['Functional_conv2d_0_forward_input.0', 'Functional_con 'input_struct': [('torch.float32', [1, 1, 28, 28]), ('torch.float32', [16, 1, 5, 5]), ('torch.float32', [16])], 'output_struct': [('torch.float32', [1, 16, 28, 28])], - 'summery': [[3.029174327850342, -2.926689624786377, -0.06619918346405029], + 'summary': [[3.029174327850342, -2.926689624786377, -0.06619918346405029], [0.19919930398464203, -0.19974489510059357, 0.006269412115216255], [0.19734230637550354, -0.18177609145641327, 0.007903944700956345], [2.1166646480560303, -2.190781354904175, -0.003579073818400502]], 'stack_info': []} @@ -119,10 +119,10 @@ aten_result = [ 'Need double check api accuracy.', 'None'], ['Aten__native_batch_norm_legit_functional.default_0_forward_input.3', 'Functional_batch_norm_0_forward_input.3', 'torch.float32', 'torch.float32', [256], [256], 1.763145923614502, -4.398397922515869, -1.0521326325833797, ' ', - '176.3145923614502%', '439.8297922515869%', '105.21326325933797%', ' ', 2.763145923614502, -3.398397922515869, + '176.3145923614502%', '439.8397922515869%', '105.21326325833797%', ' ', 2.763145923614502, -3.398397922515869, -0.052132632583379745, 1.0, 1.0, 1.0, 'Warning', 'Need double check api accuracy.', 'None'], ['Aten__native_batch_norm_legit_functional.default_0_forward_input.4', 'Functional_batch_norm_0_forward_input.4', - 'torch.float32', 'torch.float32', [256], [256], 2.673110008239746, -3.149275064468384, 0.01613386906693445, ' ', + 'torch.float32', 'torch.float32', [256], [256], 2.673110008239746, -3.149275064468384, 0.01613386906683445, ' ', 'N/A', 'N/A', 'N/A', ' ', 2.673110008239746, -3.149275064468384, 0.01613386906683445, 0.0, 0.0, 0.0, 'Warning', 'Need double check api accuracy.', 'None'], ['Aten__native_batch_norm_legit_functional.default_0_forward_output.0', 'Functional_batch_norm_0_forward_output', @@ -132,16 +132,16 @@ aten_result = [ 'Need double check api accuracy.', 'None'], ['Aten__native_batch_norm_legit_functional.default_0_forward_output.1', 'Nan', 'torch.float32', 'Nan', [256], 'Nan', ' ', ' ', ' ', ' ', ' ', 0.30550330877304077, -0.24485322833061218, -0.010361209511756897, 'Nan', 'Nan', 'Nan', - 'Yes', '', None], + 'Yes', '', 'None'], ['Aten__native_batch_norm_legit_functional.default_0_forward_output.2', 'Nan', 'torch.float32', 'Nan', [256], 'Nan', ' ', ' ', ' ', ' ', ' ', 623.9192504882812, 432.96826171875, 520.2276611328125, 'Nan', 'Nan', 'Nan', - 'Yes', '', None], + 'Yes', '', 'None'], ['Aten__native_batch_norm_legit_functional.default_0_forward_output.3', 'Nan', 'torch.float32', 'Nan', [256], 'Nan', ' ', ' ', ' ', ' ', ' ', 2.4797861576080322, -3.055997371673584, -0.04795549064874649, 'Nan', 'Nan', 'Nan', - 'Yes', '', None], + 'Yes', '', 'None'], ['Aten__native_batch_norm_legit_functional.default_0_forward_output.4', 'Nan', 'torch.float32', 'Nan', [256], 'Nan', ' ', ' ', ' ', ' ', ' ', 61.7945556640625, 42.59713363647461, 52.03831481933594, 'Nan', 'Nan', 'Nan', - 'Yes', '', None]] + 'Yes', '', 'None']] highlight_dict = {'red_rows': [], 'yellow_rows': []} @@ -191,16 +191,16 @@ op_name = "Tensor.add_0.0.forward" op_result = [ {'type': 'torch.Tensor', 'dtype': 'torch.float32', 'shape': [16, 1, 3, 3], - 'Max': 0.33033010363578796, 'Min': -0.331031858921051,'Mean': -0.030964046716690063, - 'Norm': 2.2533628940582275, 'requires_grad': True, 'full_op_name': 'Tensor.add_.0.forward_input.0'}, + 'Max': 0.33033010363578796, 'Min': -0.331031858921051, 'Mean': -0.030964046716690063, + 'Norm': 2.2533628940582275, 'requires_grad': True, 'full_op_name': 'Tensor.add_0.0.forward_input.0'}, {'type': 'torch.Tensor', 'dtype': 'torch.float32', 'shape': [16, 1, 3, 3], - 'Max': 0.003992878366261721, 'Min': -0.008102823048830032,'Mean': -0.0002002553956117481, - 'Norm': 0.02844562754034996, 'requires_grad': False, 'full_op_name': 'Tensor.add_.0.forward_input.1'}, - {'full_op_name': 'Tensor.add_.0.forward_input.alpha.0', 'dtype': "", "shape": '[]', 'md5': None, + 'Max': 0.003992878366261721, 'Min': -0.008102823048830032, 'Mean': -0.0002002553956117481, + 'Norm': 0.02844562754034996, 'requires_grad': False, 'full_op_name': 'Tensor.add_0.0.forward_input.1'}, + {'full_op_name': 'Tensor.add_0.0.forward_input.alpha.0', 'dtype': "", 'shape': '[]', 'md5': None, 'Max': -0.1, 'Min': -0.1, 'Mean': -0.1, 'Norm': -0.1, 'data_name': '-1'}, {'type': 'torch.Tensor', 'dtype': 'torch.float32', 'shape': [16, 1, 3, 3], - 'Max': 0.33033010363578796, 'Min': -0.331031858921051,'Mean': -0.030964046716690063, - 'Norm': 2.2533628940582275, 'requires_grad': True, 'full_op_name': 'Tensor.add_.0.forward_output.0'}] + 'Max': 0.33033010363578796, 'Min': -0.331031858921051, 'Mean': -0.030964046716690063, + 'Norm': 2.2533628940582275, 'requires_grad': True, 'full_op_name': 'Tensor.add_0.0.forward_output.0'}] class TestUtilsMethods(unittest.TestCase): @@ -252,7 +252,7 @@ class TestUtilsMethods(unittest.TestCase): result = [line_input, line_1, line_2, line_3] result_df = pd.DataFrame(result) highlight_dict = {'red_rows': [], 'yellow_rows': []} - compare.find_compare_result_error_rows(result_df, highlight_dict, False) + compare.find_compare_result_error_rows(result_df, highlight_dict, False, False) self.assertEqual(highlight_dict, {'red_rows': [num_1, num_3], 'yellow_rows': [num_2]}) def test_rename_api(self): -- Gitee From c1b963b922dc9fa7d55ba48829b751a6a8f4aa42 Mon Sep 17 00:00:00 2001 From: Linwei-Ying Date: Tue, 23 Jul 2024 11:44:16 +0800 Subject: [PATCH 057/106] dump data lack bugfix in compare --- debug/accuracy_tools/msprobe/pytorch/compare/acc_compare.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/debug/accuracy_tools/msprobe/pytorch/compare/acc_compare.py b/debug/accuracy_tools/msprobe/pytorch/compare/acc_compare.py index a4b6884343..e214910566 100644 --- a/debug/accuracy_tools/msprobe/pytorch/compare/acc_compare.py +++ b/debug/accuracy_tools/msprobe/pytorch/compare/acc_compare.py @@ -37,6 +37,7 @@ from msprobe.core.common.utils import check_compare_param, add_time_with_xlsx, C format_value, check_file_not_exists, check_configuration_param, task_dumppath_get from msprobe.core.common.file_check import FileChecker, change_mode, FileOpen, create_directory from msprobe.core.common.const import Const, CompareConst, FileCheckConst +from msprobe.core.common.exceptions import FileCheckException def check_graph_mode(a_op_name, b_op_name): @@ -491,6 +492,10 @@ def compare_by_op(op_name, op_name_mapping_dict, input_parma): error_file = error.filename n_value, b_value = CompareConst.READ_NONE, CompareConst.READ_NONE error_flag = True + except FileCheckerException: + error_file = data_name + n_value, b_value = CompareConst.READ_NONE, CompareConst.READ_NONE + error_flag = True n_value, b_value, error_flag = get_error_type(n_value, b_value, error_flag) if not error_flag: -- Gitee From d58bf08ab437683b5f01668f3d54717dd1346bee Mon Sep 17 00:00:00 2001 From: gitee Date: Tue, 23 Jul 2024 14:35:58 +0800 Subject: [PATCH 058/106] fix --- .../pytorch/api_accuracy_checker/run_ut/run_overflow_check.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py index 1051315153..3092c615be 100644 --- a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py +++ b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py @@ -60,7 +60,7 @@ def run_overflow_check(forward_file): try: run_torch_api(api_full_name, api_info_dict, real_data_path) except Exception as err: - [_, api_name, _] = api_full_name.split(Const.SEP) + _, api_name, _ = api_full_name.split(Const.SEP) if "not implemented for 'Half'" in str(err): logger.warning(f"API {api_name} not support half tensor in CPU, please add {api_name} to CONVERT_API " f"'fp16_to_fp32' list in accuracy_tools/api_accuracy_check/common/utils.py file.") @@ -73,7 +73,7 @@ def run_overflow_check(forward_file): def run_torch_api(api_full_name, api_info_dict, real_data_path): torch.npu.clear_npu_overflow_flag() - [api_type, api_name, _] = api_full_name.split(Const.SEP) + api_type, api_name, _ = api_full_name.split(Const.SEP) args, kwargs, need_grad = get_api_info(api_info_dict, api_name, real_data_path=real_data_path) if not need_grad: logger.warning("%s function with out=... arguments don't support automatic differentiation, skip backward." -- Gitee From 043d28dd3bee6f059fa288dcdc1d526b696df815 Mon Sep 17 00:00:00 2001 From: h00613304 Date: Tue, 23 Jul 2024 14:49:29 +0800 Subject: [PATCH 059/106] =?UTF-8?q?=E5=88=A0=E9=99=A4atat=E9=87=8C?= =?UTF-8?q?=E9=9D=A2=E7=9A=84=E9=87=8D=E5=A4=8D=E6=B5=8B=E8=AF=95=E7=94=A8?= =?UTF-8?q?=E4=BE=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../pytorch_ut/compare/test_acc_compare.py | 267 ------------------ .../test/pytorch_ut/compare/test_match.py | 20 -- 2 files changed, 287 deletions(-) delete mode 100644 debug/accuracy_tools/atat/test/pytorch_ut/compare/test_acc_compare.py delete mode 100644 debug/accuracy_tools/atat/test/pytorch_ut/compare/test_match.py diff --git a/debug/accuracy_tools/atat/test/pytorch_ut/compare/test_acc_compare.py b/debug/accuracy_tools/atat/test/pytorch_ut/compare/test_acc_compare.py deleted file mode 100644 index b214e3dd79..0000000000 --- a/debug/accuracy_tools/atat/test/pytorch_ut/compare/test_acc_compare.py +++ /dev/null @@ -1,267 +0,0 @@ -# coding=utf-8 -import unittest -from msprobe.pytorch.compare import acc_compare as compare -import pandas as pd - -npu_dict = {'op_name': ['Functional_conv2d_0_forward_input.0', 'Functional_conv2d_0_forward_input.1', - 'Functional_conv2d_0_forward_input.2', 'Functional_conv2d_0_forward_output'], - 'input_struct': [('torch.float32', [1, 1, 28, 28]), ('torch.float32', [16, 1, 5, 5]), - ('torch.float32', [16])], - 'output_struct': [('torch.float32', [1, 16, 28, 28])], - 'summary': [[3.029174327850342, -2.926689624786377, -0.06619918346405029], - [0.19919930398464203, -0.19974489510059357, 0.006269412115216255], - [0.19734230637550354, -0.18177609145641327, 0.007903944700956345], - [2.1166646480560303, -2.190781354904175, -0.003579073818400502]], 'stack_info': []} - -bench_dict = {'op_name': ['Functional_conv2d_0_forward_input.0', 'Functional_conv2d_0_forward_input.1', - 'Functional_conv2d_0_forward_input.2', 'Functional_conv2d_0_forward_output'], - 'input_struct': [('torch.float32', [1, 1, 28, 28]), ('torch.float32', [16, 1, 5, 5]), - ('torch.float32', [16])], - 'output_struct': [('torch.float32', [1, 16, 28, 28])], - 'summary': [[3.029174327850342, -2.926689624786377, -0.06619918346405029], - [0.19919930398464203, -0.19974489510059357, 0.006269412115216255], - [0.19734230637550354, -0.18177609145641327, 0.007903944700956345], - [2.1166646480560303, -2.190781354904175, -0.003579073818400502]], 'stack_info': []} - -tensor_list = [ - {'type': 'torch.Tensor', 'dtype': 'torch.float32', 'shape': [16, 1, 3, 3], 'Max': 0.33033010363578796, - 'Min': -0.331031858921051,'Mean': -0.030964046716690063, 'Norm': 2.2533628940582275, 'requires_grad': True, - 'full_op_name': 'Tensor.add_.0.forward_input.0'}, - {'type': 'torch.Tensor', 'dtype': 'torch.float32', 'shape': [16, 1, 3, 3], - 'Max': 0.003992878366261721, 'Min': -0.008102823048830032, 'Mean': -0.0002002553956117481, - 'Norm': 0.02844562754034996, 'requires_grad': False, 'full_op_name': 'Tensor.add_.0.forward_input.1'}, - {'full_op_name': 'Tensor.add_.0.forward_input.alpha.0', 'dtype': "", "shape": '[]', 'md5': None, - 'Max': -0.1, 'Min': -0.1, 'Mean': -0.1, 'Norm': -0.1, 'data_name': '-1'}, - {'type': 'torch.Tensor', 'dtype': 'torch.float32', 'shape': [16, 1, 3, 3], - 'Max': 0.33033010363578796, 'Min': -0.331031858921051, 'Mean': -0.030964046716690063, - 'Norm': 2.2533628940582275, 'requires_grad': True, 'full_op_name': 'Tensor.add_.0.forward_output.0'} -] - -result_op_dict = {'op_name': ['Tensor.add_.0.forward_input.0', 'Tensor.add_.0.forward_input.1', - 'Tensor.add_.0.forward_input.alpha.0', 'Tensor.add_.0.forward_output.0'], - 'input_struct': [('torch.float32', [16, 1, 3, 3]), ('torch.float32', [16, 1, 3, 3]), - ("", '[]')], - 'output_struct': [('torch.float32', [16, 1, 3, 3])], - 'summary': [[0.33033010363578796, -0.331031858921051, -0.030964046716690063, 2.2533628940582275], - [0.003992878366261721, -0.008102823048830032, -0.0002002553956117481, 0.02844562754034996], - [-0.1, -0.1, -0.1, -0.1], - [0.33033010363578796, -0.331031858921051, -0.030964046716690063, 2.2533628940582275]], - 'stack_info': []} - -o_result = [ - ['Functional_conv2d_0_forward_input.0', 'Functional_conv2d_0_forward_input.0', 'torch.float32', 'torch.float32', - [1, 1, 28, 28], [1, 1, 28, 28], 0.0, 0.0, 0.0, ' ', '0.0%', '0.0%', '0.0%', ' ', 3.029174327850342, -2.926689624786377, - -0.06619918346405029, 3.029174327850342, -2.926689624786377, -0.06619918346405029, '', '', 'None'], - ['Functional_conv2d_0_forward_input.1', 'Functional_conv2d_0_forward_input.1', 'torch.float32', 'torch.float32', - [16, 1, 5, 5], [16, 1, 5, 5], 0.0, 0.0, 0.0, ' ', '0.0%', '0.0%', '0.0%', ' ', 0.19919930398464203, -0.19974489510059357, - 0.006269412115216255, 0.19919930398464203, -0.19974489510059357, 0.006269412115216255, '', '', 'None'], - ['Functional_conv2d_0_forward_input.2', 'Functional_conv2d_0_forward_input.2', 'torch.float32', 'torch.float32', - [16], [16], 0.0, 0.0, 0.0, ' ', '0.0%', '0.0%', '0.0%', ' ', 0.19734230637550354, -0.18177609145641327, 0.007903944700956345, - 0.19734230637550354, -0.18177609145641327, 0.007903944700956345, '', '', 'None'], - ['Functional_conv2d_0_forward_output', 'Functional_conv2d_0_forward_output', 'torch.float32', 'torch.float32', - [1, 16, 28, 28], [1, 16, 28, 28], 0.0, 0.0, 0.0, ' ', '0.0%', '0.0%', '0.0%', ' ', 2.1166646480560303, -2.190781354904175, - -0.003579073818400502, 2.1166646480560303, -2.190781354904175, -0.003579073818400502, '', '', 'None']] - -npu_dict_aten = {'op_name': ['Aten__native_batch_norm_legit_functional.default_0_forward_input.0', - 'Aten__native_batch_norm_legit_functional.default_0_forward_input.1', - 'Aten__native_batch_norm_legit_functional.default_0_forward_input.2', - 'Aten__native_batch_norm_legit_functional.default_0_forward_input.3', - 'Aten__native_batch_norm_legit_functional.default_0_forward_input.4', - 'Aten__native_batch_norm_legit_functional.default_0_forward_output.0', - 'Aten__native_batch_norm_legit_functional.default_0_forward_output.1', - 'Aten__native_batch_norm_legit_functional.default_0_forward_output.2', - 'Aten__native_batch_norm_legit_functional.default_0_forward_output.3', - 'Aten__native_batch_norm_legit_functional.default_0_forward_output.4'], - 'input_struct': [('torch.float16', [256, 256, 14, 14]), ('torch.float32', [256]), - ('torch.float32', [256]), ('torch.float32', [256]), ('torch.float32', [256])], - 'output_struct': [('torch.float16', [256, 256, 14, 14]), ('torch.float32', [256]), - ('torch.float32', [256]), ('torch.float32', [256]), ('torch.float32', [256])], - 'summary': [[139.625, -127.5625, -0.0103607177734375], - [2.5276029109954834, -2.1788690090179443, -0.0008259844034910202], - [2.472219944000244, -2.845968723297119, -0.008756577968597412], - [2.763145923614502, -3.398397922515869, -0.052132632583379745], - [2.673110008239746, -3.149275064468384, 0.01613386906683445], - [13.5546875, -10.640625, -0.008758544921875], - [0.30550330877304077, -0.24485322833061218, -0.010361209511756897], - [623.9192504882812, 432.96826171875, 520.2276611328125], - [2.4797861576080322, -3.055997371673584, -0.04795549064874649], - [61.7945556640625, 42.59713363647461, 52.03831481933594]]} - -bench_dict_functional = { - 'op_name': ['Functional_batch_norm_0_forward_input.0', 'Functional_batch_norm_0_forward_input.1', - 'Functional_batch_norm_0_forward_input.2', 'Functional_batch_norm_0_forward_input.3', - 'Functional_batch_norm_0_forward_input.4', 'Functional_batch_norm_0_forward_output'], - 'input_struct': [('torch.float32', [256, 256, 14, 14]), ('torch.float32', [256]), ('torch.float32', [256]), - ('torch.float32', [256]), ('torch.float32', [256])], - 'output_struct': [('torch.float32', [256, 256, 14, 14])], - 'summary': [[3.061628818511963, -3.22507381439209, 3.634914173744619e-05], - [0.0005779837374575436, -0.0006301702815108001, 3.634906533989124e-06], - [0.9338104128837585, 0.9277191162109375, 0.930335283279419], - [1.0, 1.0, 1.0], [0.0, 0.0, 0.0], - [5.397906303405762, -5.796811580657959, 2.5283952709287405e-10]] -} - -aten_result = [ - ['Aten__native_batch_norm_legit_functional.default_0_forward_input.0', 'Functional_batch_norm_0_forward_input.0', - 'torch.float16', 'torch.float32', [256, 256, 14, 14], [256, 256, 14, 14], 136.56337118148804, -124.33742618560791, - -0.010397066915174946, ' ', '4460.480981749501%', '3855.335826136584%', '28603.33536971545%', ' ', 139.625, - -127.5625, -0.0103607177734375, 3.061628818511963, -3.22507381439209, 3.634914173744619e-05, 'Warning', - 'Need double check api accuracy.', 'None'], - ['Aten__native_batch_norm_legit_functional.default_0_forward_input.1', 'Functional_batch_norm_0_forward_input.1', - 'torch.float32', 'torch.float32', [256], [256], 2.527024927258026, -2.1782388387364335, -0.0008296193100250093, - ' ', '437213.84590749856%', '345658.76916858414%', '22823.676544842117%', ' ', 2.5276029109954834, - -2.1788690090179443, -0.0008259844034910202, 0.0005779837374575436, -0.0006301702815108001, 3.634906533989124e-06, - 'Warning', 'Need double check api accuracy.', 'None'], - ['Aten__native_batch_norm_legit_functional.default_0_forward_input.2', 'Functional_batch_norm_0_forward_input.2', - 'torch.float32', 'torch.float32', [256], [256], 1.5384095311164856, -3.7736878395080566, -0.9390918612480164, ' ', - '164.74538192025793%', '406.7705163736246%', '100.94122819224167%', ' ', 2.472219944000244, -2.845968723297119, - -0.008756577968597412, 0.9338104128837585, 0.9277191162109375, 0.930335283279419, 'Warning', - 'Need double check api accuracy.', 'None'], - ['Aten__native_batch_norm_legit_functional.default_0_forward_input.3', 'Functional_batch_norm_0_forward_input.3', - 'torch.float32', 'torch.float32', [256], [256], 1.763145923614502, -4.398397922515869, -1.0521326325833797, ' ', - '176.3145923614502%', '439.8397922515869%', '105.21326325833797%', ' ', 2.763145923614502, -3.398397922515869, - -0.052132632583379745, 1.0, 1.0, 1.0, 'Warning', 'Need double check api accuracy.', 'None'], - ['Aten__native_batch_norm_legit_functional.default_0_forward_input.4', 'Functional_batch_norm_0_forward_input.4', - 'torch.float32', 'torch.float32', [256], [256], 2.673110008239746, -3.149275064468384, 0.01613386906683445, ' ', - 'N/A', 'N/A', 'N/A', ' ', 2.673110008239746, -3.149275064468384, 0.01613386906683445, 0.0, 0.0, 0.0, 'Warning', - 'Need double check api accuracy.', 'None'], - ['Aten__native_batch_norm_legit_functional.default_0_forward_output.0', 'Functional_batch_norm_0_forward_output', - 'torch.float16', 'torch.float32', [256, 256, 14, 14], [256, 256, 14, 14], 8.156781196594238, -4.843813419342041, - -0.008758545174714527, ' ', '151.11009228611078%', '83.55995967687207%', '3464072756.115108%', ' ', 13.5546875, - -10.640625, -0.008758544921875, 5.397906303405762, -5.796811580657959, 2.5283952709287405e-10, 'Warning', - 'Need double check api accuracy.', 'None'], - ['Aten__native_batch_norm_legit_functional.default_0_forward_output.1', 'Nan', 'torch.float32', 'Nan', [256], 'Nan', - ' ', ' ', ' ', ' ', ' ', 0.30550330877304077, -0.24485322833061218, -0.010361209511756897, 'Nan', 'Nan', 'Nan', - 'Yes', '', 'None'], - ['Aten__native_batch_norm_legit_functional.default_0_forward_output.2', 'Nan', 'torch.float32', 'Nan', [256], 'Nan', - ' ', ' ', ' ', ' ', ' ', 623.9192504882812, 432.96826171875, 520.2276611328125, 'Nan', 'Nan', 'Nan', - 'Yes', '', 'None'], - ['Aten__native_batch_norm_legit_functional.default_0_forward_output.3', 'Nan', 'torch.float32', 'Nan', [256], 'Nan', - ' ', ' ', ' ', ' ', ' ', 2.4797861576080322, -3.055997371673584, -0.04795549064874649, 'Nan', 'Nan', 'Nan', - 'Yes', '', 'None'], - ['Aten__native_batch_norm_legit_functional.default_0_forward_output.4', 'Nan', 'torch.float32', 'Nan', [256], 'Nan', - ' ', ' ', ' ', ' ', ' ', 61.7945556640625, 42.59713363647461, 52.03831481933594, 'Nan', 'Nan', 'Nan', - 'Yes', '', 'None']] - -highlight_dict = {'red_rows': [], 'yellow_rows': []} - -num_0, num_1, num_2, num_3 = 0, 1, 2, 3 -summary_line_input = ['Functional_batch_norm_0_forward_input.0', 'Functional_batch_norm_0_forward_input.0', - 'torch.float16', - 'torch.float32', [256, 256, 14, 14], [256, 256, 14, 14], 0.01, 0, 0, 0, 1, 1, 1, 1, 1.01, 1, 1, 1, - 'Yes', ''] -summary_line_1 = ['Functional_batch_norm_0_forward_output.0', 'Functional_batch_norm_0_forward_output.0', - 'torch.float16', - 'torch.float32', [256, 256, 14, 14], [256, 256, 14, 14], 10, 0, 0, 0, 2, 0, 1, 1, 1, 1, 1, 1, - 'Warning', ''] -summary_line_2 = ['Functional_batch_norm_0_forward_output.1', 'Functional_batch_norm_0_forward_output.1', - 'torch.float16', - 'torch.float32', [256, 256, 14, 14], [256, 256, 14, 14], 0.02, 0, 0, 0, 0.12, 0, 1, 1, 0.1, 1, 1, 1, - 'Warning', ''] -summary_line_3 = ['Functional_batch_norm_0_forward_output.2', 'Functional_batch_norm_0_forward_output.2', - 'torch.float16', - 'torch.float32', [256, 256, 14, 14], [256, 256, 14, 14], 0, 0, 0, 0, 2, 0, 1, 1, 1, 1, 1, 1, - 'Warning', ''] -line_input = ['Functional_batch_norm_0_forward_input.0', 'Functional_batch_norm_0_forward_input.0', 'torch.float16', - 'torch.float32', [256, 256, 14, 14], [256, 256, 14, 14], 1, 1, 1, 0.95, 1, 1, 1, 1, 1, 1.01, 1, 1, 1, - 'Yes', ''] -line_1 = ['Functional_batch_norm_0_forward_output.0', 'Functional_batch_norm_0_forward_output.0', 'torch.float16', - 'torch.float32', [256, 256, 14, 14], [256, 256, 14, 14], 0.8, 1, 1, 0.59, 1, 'nan', 0, 1, 1, 19, 1, 1, 1, - 'Warning', ''] -line_2 = ['Functional_batch_norm_0_forward_output.1', 'Functional_batch_norm_0_forward_output.1', 'torch.float16', - 'torch.float32', [256, 256, 14, 14], [256, 256, 14, 14], 0.9, 1, 1, 0.8, 1, 0, 0.12, 0, 1, 1, 0.1, 1, 1, 1, - 'Warning', ''] -line_3 = ['Functional_batch_norm_0_forward_output.2', 'Functional_batch_norm_0_forward_output.2', 'torch.float16', - 'torch.float32', [256, 256, 14, 14], [256, 256, 14, 14], 0.8, 1.1e+10, 1, 0.85, 1, 9, 0.12, 0, 1, 1, 0.1, 1, - 1, 1, 'Warning', ''] - -op_data = { - 'input_args': [{'type': 'torch.Tensor', 'dtype': 'torch.float32', 'shape': [16, 1, 3, 3], - 'Max': 0.33033010363578796, 'Min': -0.331031858921051,'Mean': -0.030964046716690063, - 'Norm': 2.2533628940582275, 'requires_grad': True}, - {'type': 'torch.Tensor', 'dtype': 'torch.float32', 'shape': [16, 1, 3, 3], - 'Max': 0.003992878366261721, 'Min': -0.008102823048830032, 'Mean': -0.0002002553956117481, - 'Norm': 0.02844562754034996, 'requires_grad': False}], - 'input_kwargs': {'alpha': {'type': 'float', 'value': -0.1}}, - 'output': [{'type': 'torch.Tensor', 'dtype': 'torch.float32', 'shape': [16, 1, 3, 3], - 'Max': 0.33033010363578796, 'Min': -0.331031858921051,'Mean': -0.030964046716690063, - 'Norm': 2.2533628940582275, 'requires_grad': True}]} - -op_name = "Tensor.add_0.0.forward" - -op_result = [ - {'type': 'torch.Tensor', 'dtype': 'torch.float32', 'shape': [16, 1, 3, 3], - 'Max': 0.33033010363578796, 'Min': -0.331031858921051, 'Mean': -0.030964046716690063, - 'Norm': 2.2533628940582275, 'requires_grad': True, 'full_op_name': 'Tensor.add_0.0.forward_input.0'}, - {'type': 'torch.Tensor', 'dtype': 'torch.float32', 'shape': [16, 1, 3, 3], - 'Max': 0.003992878366261721, 'Min': -0.008102823048830032, 'Mean': -0.0002002553956117481, - 'Norm': 0.02844562754034996, 'requires_grad': False, 'full_op_name': 'Tensor.add_0.0.forward_input.1'}, - {'full_op_name': 'Tensor.add_0.0.forward_input.alpha.0', 'dtype': "", 'shape': '[]', 'md5': None, - 'Max': -0.1, 'Min': -0.1, 'Mean': -0.1, 'Norm': -0.1, 'data_name': '-1'}, - {'type': 'torch.Tensor', 'dtype': 'torch.float32', 'shape': [16, 1, 3, 3], - 'Max': 0.33033010363578796, 'Min': -0.331031858921051, 'Mean': -0.030964046716690063, - 'Norm': 2.2533628940582275, 'requires_grad': True, 'full_op_name': 'Tensor.add_0.0.forward_output.0'}] - - -class TestUtilsMethods(unittest.TestCase): - - def test_check_graph_mode(self): - op1 = "Aten" - op2 = "torch" - self.assertTrue(compare.check_graph_mode(op1, op2)) - self.assertTrue(compare.check_graph_mode(op2, op1)) - self.assertFalse(compare.check_graph_mode(op1, op1)) - self.assertFalse(compare.check_graph_mode(op2, op2)) - - def test_check_op(self): - fuzzy_match = False - result = compare.check_op(npu_dict, bench_dict, fuzzy_match) - self.assertEqual(result, True) - - def test_merge_tensor(self): - op_dict = compare.merge_tensor(tensor_list, True, False) - self.assertEqual(op_dict, result_op_dict) - - def test_read_op(self): - result = compare.read_op(op_data, op_name) - self.assertEqual(result, op_result) - - def test_match_op(self): - fuzzy_match = False - a, b = compare.match_op([npu_dict], [bench_dict], fuzzy_match) - self.assertEqual(a, 0) - self.assertEqual(b, 0) - - def test_get_accuracy(self): - result = [] - compare.get_accuracy(result, npu_dict, bench_dict, highlight_dict) - self.assertEqual(result, o_result) - - def test_get_accuracy_graph_mode(self): - result = [] - compare.get_accuracy(result, npu_dict_aten, bench_dict_functional, highlight_dict) - self.assertEqual(result, aten_result) - - def test_find_error_rows(self): - summary_result = [summary_line_input, summary_line_1, summary_line_2, summary_line_3] - highlight_dict = {'red_rows': [], 'yellow_rows': []} - compare.find_error_rows(summary_result, 0, 1, highlight_dict, summary_compare=True) - self.assertEqual(highlight_dict, {'red_rows': [], 'yellow_rows': []}) - - def test_find_compare_result_error_rows(self): - result = [line_input, line_1, line_2, line_3] - result_df = pd.DataFrame(result) - highlight_dict = {'red_rows': [], 'yellow_rows': []} - compare.find_compare_result_error_rows(result_df, highlight_dict, False, False) - self.assertEqual(highlight_dict, {'red_rows': [num_1, num_3], 'yellow_rows': [num_2]}) - - def test_rename_api(self): - test_name_1 = "Distributed.broadcast.0.forward.input.0" - expect_name_1 = "Distributed.broadcast.input.0" - actual_name_1 = compare.rename_api(test_name_1, "forward") - self.assertEqual(actual_name_1, expect_name_1) - - test_name_2 = "Torch.sum.0.backward.output.0" - expect_name_2 = "Torch.sum.output.0" - actual_name_2 = compare.rename_api(test_name_2, "backward") - self.assertEqual(actual_name_2, expect_name_2) diff --git a/debug/accuracy_tools/atat/test/pytorch_ut/compare/test_match.py b/debug/accuracy_tools/atat/test/pytorch_ut/compare/test_match.py deleted file mode 100644 index ac28e994e9..0000000000 --- a/debug/accuracy_tools/atat/test/pytorch_ut/compare/test_match.py +++ /dev/null @@ -1,20 +0,0 @@ -# coding=utf-8 -import unittest -from msprobe.pytorch.compare import match - - -class TestMatch(unittest.TestCase): - def test_graph_mapping(self): - op1 = "Aten_convolution_1_forward_0.input.0" - op2 = "Torch_conv2d_0_forward_0.input.0" - op3 = "Torch_batch_norm_0_forward_0.input.0" - op4 = "Aten_convolution.default_1_forward_0.input.0" - op5 = "Aten_foo_1_forward_0.input.0" - self.assertTrue(match.graph_mapping.match(op1, op2)) - self.assertTrue(match.graph_mapping.match(op2, op1)) - self.assertTrue(match.graph_mapping.match(op4, op2)) - self.assertTrue(match.graph_mapping.match(op2, op4)) - self.assertFalse(match.graph_mapping.match(op1, op3)) - self.assertFalse(match.graph_mapping.match(op3, op1)) - self.assertFalse(match.graph_mapping.match(op5, op2)) - self.assertFalse(match.graph_mapping.match(op2, op5)) -- Gitee From e76e12d2e105627bf899fd0bf1bc9cf6d7afe345 Mon Sep 17 00:00:00 2001 From: gitee Date: Tue, 23 Jul 2024 14:55:36 +0800 Subject: [PATCH 060/106] fix bug --- .../pytorch/api_accuracy_checker/run_ut/run_overflow_check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py index 3092c615be..732745ee8c 100644 --- a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py +++ b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py @@ -74,7 +74,7 @@ def run_overflow_check(forward_file): def run_torch_api(api_full_name, api_info_dict, real_data_path): torch.npu.clear_npu_overflow_flag() api_type, api_name, _ = api_full_name.split(Const.SEP) - args, kwargs, need_grad = get_api_info(api_info_dict, api_name, real_data_path=real_data_path) + args, kwargs, need_grad = get_api_info(api_info_dict, api_name, real_data_path) if not need_grad: logger.warning("%s function with out=... arguments don't support automatic differentiation, skip backward." % api_full_name) -- Gitee From 7366695cdc54a7159425a03429ca18efa2849e0b Mon Sep 17 00:00:00 2001 From: cai-weiwei1989 <734267852@qq.com> Date: Tue, 23 Jul 2024 15:42:19 +0800 Subject: [PATCH 061/106] =?UTF-8?q?[advisor]=E7=AE=97=E5=AD=90=E9=99=8D?= =?UTF-8?q?=E9=A2=91=E9=97=AE=E9=A2=98=E8=B5=84=E6=96=99=E9=80=82=E9=85=8D?= =?UTF-8?q?=E8=A1=A5=E5=85=85=E7=AE=97=E5=AD=90=E9=99=8D=E9=A2=91=E6=8F=8F?= =?UTF-8?q?=E8=BF=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- profiler/advisor/README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/profiler/advisor/README.md b/profiler/advisor/README.md index c650f40b3e..b6d157f5a8 100644 --- a/profiler/advisor/README.md +++ b/profiler/advisor/README.md @@ -72,6 +72,7 @@ msprof-analyze的advisor功能是将Ascend PyTorch Profiler或者msprof采集的 | | block_dim_analysis | block dim算子调优 | | | operator_no_bound_analysis | operator no bound | | | graph | 融合算子图调优 | +| | freq_analysis | AI Core算子降频分析 | | scheduling | timeline_fusion_ops | 亲和API替换调优 | | | timeline_op_dispatch | 识别算子下发问题(路径3/路径5) | @@ -152,7 +153,7 @@ torch_npu.npu.config.allow_internal_format = False ![schedule_3](./img/schedule_3.png) -computation模块从device计算性能维度进行分析,能够识别AI CPU、计算bound、动态Shape等问题并给出相应建议。此处不再详细展开,按照报告进行调优即可。 +computation模块从device计算性能维度进行分析,能够识别AI CPU、计算bound、动态Shape、AI Core算子降频分析等问题并给出相应建议。此处不再详细展开,按照报告进行调优即可。 ![computation_1](./img/computation_1.png) -- Gitee From e7e741cad26b5a8265b1f8f371d9a725e97b8660 Mon Sep 17 00:00:00 2001 From: fanhong <2532845962@qq.com> Date: Sat, 20 Jul 2024 14:53:36 +0800 Subject: [PATCH 062/106] =?UTF-8?q?=E9=80=82=E9=85=8Ddisaggregate=5Fperf?= =?UTF-8?q?=E6=96=B0=E6=8E=A5=E5=8F=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../analyzer/overall/overall_analyzer.py | 45 -------- .../overall/overall_summary_analyzer.py | 107 ++++++++---------- profiler/cli/analyze_cli.py | 3 - 3 files changed, 47 insertions(+), 108 deletions(-) delete mode 100644 profiler/advisor/analyzer/overall/overall_analyzer.py diff --git a/profiler/advisor/analyzer/overall/overall_analyzer.py b/profiler/advisor/analyzer/overall/overall_analyzer.py deleted file mode 100644 index 916a396b3d..0000000000 --- a/profiler/advisor/analyzer/overall/overall_analyzer.py +++ /dev/null @@ -1,45 +0,0 @@ -import logging -from typing import Dict, List - -from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer -from profiler.advisor.display.html.render import HTMLRender -from profiler.advisor.result.result import OptimizeResult -from profiler.compare_tools.compare_backend.utils.constant import Constant -from profiler.compare_tools.compare_interface.comparison_interface import ComparisonInterface - -logger = logging.getLogger() - - -class OverallSummaryAnalyzer(BaseAnalyzer): - - def __init__(self, profiling_path, benchmark_profiling_path=None, **kwargs): - self.benchmark_profiling_path = benchmark_profiling_path or profiling_path - self.profiling_path = profiling_path - self.html_render = HTMLRender() - self.result = OptimizeResult() - - def optimize(self, **kwargs): - compare_result = ComparisonInterface(self.benchmark_profiling_path, self.profiling_path).compare( - Constant.OVERALL_COMPARE) - - headers = compare_result.get('Model Profiling Time Distribution').get("headers", []) - rows = compare_result.get('Model Profiling Time Distribution').get("rows", []) - - self.make_record() - self.make_render(headers=headers, rows=rows) - return compare_result - - def make_record(self): - pass - - def make_render(self, **kwargs): - headers = kwargs.get("headers") - rows = kwargs.get("rows") - - if not headers or not rows: - logger.info("Empty headers or rows, skip render overall analysis html") - self.html_render.render_template(key="overall", - template_dir="templates", - template_name="overall_analysis.html", - headers=kwargs.get("headers"), - rows=kwargs.get("rows")) diff --git a/profiler/advisor/analyzer/overall/overall_summary_analyzer.py b/profiler/advisor/analyzer/overall/overall_summary_analyzer.py index c74ae05103..563a55abb5 100644 --- a/profiler/advisor/analyzer/overall/overall_summary_analyzer.py +++ b/profiler/advisor/analyzer/overall/overall_summary_analyzer.py @@ -12,20 +12,16 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import os import copy +import os -import logging -from typing import Dict, List - +from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer +from profiler.advisor.common import constant as const from profiler.advisor.display.html.render import HTMLRender -from profiler.advisor.result.result import OptimizeResult from profiler.advisor.result.item import OptimizeItem, OptimizeRecord -from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer +from profiler.advisor.result.result import OptimizeResult from profiler.compare_tools.compare_backend.utils.constant import Constant -from profiler.advisor.common import constant as const from profiler.compare_tools.compare_interface.comparison_interface import ComparisonInterface -from profiler.advisor.utils.utils import get_file_path_from_directory, load_parameter class OverallSummaryAnalyzer(BaseAnalyzer): @@ -52,16 +48,28 @@ class OverallSummaryAnalyzer(BaseAnalyzer): "Uncovered Communication Time(Wait Time)": [], "Free Time": ['SDMA Time(Num)'] } + time_field_map = { + "Computing Time": ['compute_time', None], + "Uncovered Communication Time(Wait Time)": ['communication_not_overlapped', 'wait_time'], + "Free Time": ['scheduling_time', None], + "Cube Time(Num)": ['cube_time', 'cube_num'], + "Vector Time(Num)": ['vec_time', 'vec_num'], + "Flash Attention Time(Forward)(Num)": ['fa_time_fwd', 'fa_num_fwd'], + "Flash Attention Time(Backward)(Num)": ['fa_time_bwd', 'fa_num_bwd'], + "Other Time": ['other_time', None], + "SDMA Time(Num)": ['sdma_time', 'sdma_num'], + } def __init__(self, collection_path: str, n_processes: int = 1, **kwargs): profile_path = get_profile_path(collection_path) super().__init__(profile_path, n_processes, **kwargs) - self.base_collection_path = kwargs.get("base_collection_path", "") + self.base_collection_path = kwargs.get("benchmark_profiling_path", "") self._has_base_collection = False self._is_minimal_profiling = False self.cur_data = {} self.cur_data_table = {} self.cur_bottleneck = {} + self._disaggregate_perf = {} self.cur_advices = "" self._headers = [] self._base_data = [] @@ -71,22 +79,6 @@ class OverallSummaryAnalyzer(BaseAnalyzer): self.bottleneck_str = "" self.bottleneck_table = {} - @staticmethod - def split_duration_and_num(time_value: str) -> tuple: - split_data = time_value.split("s") # time value example: 0.229s(1756) - duration, num = 0.0, None - if len(split_data) >= 2: - try: - num = int(split_data[1].strip("()")) - except ValueError: - pass - if len(split_data) >= 1: - try: - duration = float(split_data[0]) - except ValueError: - print(f"[WARNING] Invalid time value: {time_value}.") - return duration, num - @staticmethod def calculate_ratio(dividend, divisor): if not divisor: @@ -102,49 +94,43 @@ class OverallSummaryAnalyzer(BaseAnalyzer): return os.path.exists(self.collection_path) def process(self): - base_collection_path = self.base_collection_path if self._has_base_collection else self.collection_path - result_data = ComparisonInterface(base_collection_path, self.collection_path).compare(Constant.OVERALL_COMPARE) - for data in result_data.values(): - self._headers = data.get("headers", []) - rows = data.get("rows", []) - if len(rows) == 2: - self._base_data = rows[0] - self._comparison_data = rows[1] - if not self._headers or not self._comparison_data: + self._disaggregate_perf = ComparisonInterface(self.collection_path).disaggregate_perf(Constant.OVERALL_COMPARE) + if not self._disaggregate_perf: return - self._is_minimal_profiling = 'E2E Time(Not minimal profiling)' not in self._headers + self._is_minimal_profiling = self._disaggregate_perf.get("minimal_profiling", False) if self._has_base_collection: - self.cur_data["comparison_result"] = result_data + base_collection_path = self.base_collection_path if self._has_base_collection else self.collection_path + comparison_result = ComparisonInterface(base_collection_path, self.collection_path).compare( + Constant.OVERALL_COMPARE) + self.cur_data["comparison_result"] = comparison_result + time_category_dict = {} for time_category, time_list in self.performance_time_dict.items(): - time_value = self.get_time_value(time_category, self._comparison_data) - if time_value == Constant.INVALID_VALUE: + duration, _ = self.get_duration_and_num(time_category) + if duration == Constant.INVALID_VALUE: continue - duration, _ = self.split_duration_and_num(time_value) time_category = time_category.split("(")[0] time_category_dict[time_category] = duration self.get_sub_category_time(time_category, time_list, duration) self.cur_data["overall_data"] = time_category_dict - def get_time_value(self, header_name: str, data_list: list): - try: - data_index = self._headers.index(header_name) - except ValueError: - return Constant.INVALID_VALUE - try: - time_value = data_list[data_index] - except IndexError: - return Constant.INVALID_VALUE - return time_value + def get_duration_and_num(self, time_category: str): + field_list = self.time_field_map.get(time_category) + if not field_list: + return Constant.INVALID_VALUE, Constant.INVALID_VALUE + duration = round(self._disaggregate_perf.get(field_list[0], 0.0), 3) + num = self._disaggregate_perf.get(field_list[1], None) + if isinstance(num, float): + num = round(num, 3) + return duration, num def get_sub_category_time(self, category: str, time_list: list, total_duration: float): sub_time_dict = {} for time_name in time_list: - time_value = self.get_time_value(time_name, self._comparison_data) - if time_value == Constant.INVALID_VALUE: - continue sub_time_dict.setdefault(f"{category} Subtype", []).append(self.time_name_map.get(time_name, "")) - duration, num = self.split_duration_and_num(time_value) + duration, num = self.get_duration_and_num(time_name) + if duration == Constant.INVALID_VALUE or num == Constant.INVALID_VALUE: + continue sub_time_dict.setdefault(f"Duration(s)", []).append(duration) sub_time_dict.setdefault(f"Duration Ratio", []).append( "{:.2%}".format(self.calculate_ratio(duration, total_duration))) @@ -171,13 +157,14 @@ class OverallSummaryAnalyzer(BaseAnalyzer): # add comparison bottleneck time_type_origin = "Uncovered Communication Time(Wait Time)" \ if time_type == "Uncovered Communication Time" else time_type - base_duration, _ = self.split_duration_and_num(self.get_time_value(time_type_origin, self._base_data)) + base_duration, _ = self.get_duration_and_num(time_type_origin) if time_value > base_duration: ratio = "{:.2%}".format(self.calculate_ratio(time_value - base_duration, base_duration)) comparison_bottleneck += f"{time_type} exceeds the benchmark by {ratio}\n" self.cur_bottleneck["overall_data"] = overall_bottleneck if comparison_bottleneck: self.cur_bottleneck["comparison_result"] = comparison_bottleneck + def optimize(self, **kwargs): if self.path_check(): self.process() @@ -218,7 +205,6 @@ class OverallSummaryAnalyzer(BaseAnalyzer): data_table = {"headers": headers, "data": [data_list]} self.cur_data_table[data_type] = copy.deepcopy(data_table) - def make_record(self): """ make record for what and how to optimize @@ -232,7 +218,7 @@ class OverallSummaryAnalyzer(BaseAnalyzer): ) self.result.add(OptimizeRecord(optimization_item)) - self.result.add_detail(const.BOTTLENECK, self.bottleneck_table["headers"], self.bottleneck_table["data"][0]) + self.result.add_detail(const.BOTTLENECK, self.bottleneck_table["headers"], self.bottleneck_table["data"][0]) for data_type, data_dict in self.cur_data_table.items(): if data_dict: self.result.add_detail(const.DATA + data_type, data_dict["headers"], data_dict["data"][0]) @@ -241,9 +227,9 @@ class OverallSummaryAnalyzer(BaseAnalyzer): if not self.bottleneck_str and not self.cur_advices: return result_for_html = { - "Description" : self.bottleneck_str, - "suggestion" : self.cur_advices, - "details" : [self.bottleneck_table] + "Description": self.bottleneck_str, + "suggestion": self.cur_advices, + "details": [self.bottleneck_table] } self.html_render.render_template(key="overall", @@ -254,9 +240,10 @@ class OverallSummaryAnalyzer(BaseAnalyzer): torch_version=self.torch_version, result=result_for_html) + def get_profile_path(collection_path): for root, dirs, files in os.walk(collection_path): for file in files: if file.startswith("profiler_info"): return root - return "" \ No newline at end of file + return "" diff --git a/profiler/cli/analyze_cli.py b/profiler/cli/analyze_cli.py index 2e173dc870..f400a265b7 100644 --- a/profiler/cli/analyze_cli.py +++ b/profiler/cli/analyze_cli.py @@ -83,9 +83,6 @@ def analyze_cli(**kwargs): help="enter the profiling type, selectable range ascend_pytorch_profiler, mslite ,msprof") @debug_option def analyze_all(**kwargs) -> None: - # 当前compare_tools必须输入两个profiling路径,att-advisor有等价功能支持输入一个Profiling路径,后续替换成对应实现 - if not kwargs.get("benchmark_profiling_path"): - kwargs["benchmark_profiling_path"] = kwargs.get("profiling_path") try: _analyze(Interface.all_dimension, **kwargs) except RuntimeError as e: -- Gitee From c8838df21f97f5abb1c0f85a94d8bbb6c2b5ac83 Mon Sep 17 00:00:00 2001 From: fanhong <2532845962@qq.com> Date: Tue, 23 Jul 2024 15:38:38 +0800 Subject: [PATCH 063/106] =?UTF-8?q?=E9=80=82=E9=85=8Ddisaggregate=5Fperf?= =?UTF-8?q?=E6=96=B0=E6=8E=A5=E5=8F=A3=EF=BC=8C=E4=BB=85=E4=BF=9D=E7=95=99?= =?UTF-8?q?overall=5Fdata?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../overall/overall_summary_analyzer.py | 100 +++++------------- .../disaggregate/overall_perf_interface.py | 28 ++++- 2 files changed, 56 insertions(+), 72 deletions(-) diff --git a/profiler/advisor/analyzer/overall/overall_summary_analyzer.py b/profiler/advisor/analyzer/overall/overall_summary_analyzer.py index 563a55abb5..2cb0164eb3 100644 --- a/profiler/advisor/analyzer/overall/overall_summary_analyzer.py +++ b/profiler/advisor/analyzer/overall/overall_summary_analyzer.py @@ -42,34 +42,18 @@ class OverallSummaryAnalyzer(BaseAnalyzer): 'Other Time': "Other Computing Time", 'SDMA Time(Num)': 'SDMA Time' } - performance_time_dict = { - "Computing Time": ['Cube Time(Num)', 'Vector Time(Num)', 'Flash Attention Time(Forward)(Num)', - 'Flash Attention Time(Backward)(Num)', 'Other Time'], - "Uncovered Communication Time(Wait Time)": [], - "Free Time": ['SDMA Time(Num)'] - } - time_field_map = { - "Computing Time": ['compute_time', None], - "Uncovered Communication Time(Wait Time)": ['communication_not_overlapped', 'wait_time'], - "Free Time": ['scheduling_time', None], - "Cube Time(Num)": ['cube_time', 'cube_num'], - "Vector Time(Num)": ['vec_time', 'vec_num'], - "Flash Attention Time(Forward)(Num)": ['fa_time_fwd', 'fa_num_fwd'], - "Flash Attention Time(Backward)(Num)": ['fa_time_bwd', 'fa_num_bwd'], - "Other Time": ['other_time', None], - "SDMA Time(Num)": ['sdma_time', 'sdma_num'], - } def __init__(self, collection_path: str, n_processes: int = 1, **kwargs): profile_path = get_profile_path(collection_path) super().__init__(profile_path, n_processes, **kwargs) - self.base_collection_path = kwargs.get("benchmark_profiling_path", "") - self._has_base_collection = False + self.benchmark_profiling_path = kwargs.get("benchmark_profiling_path", "") + self._has_benchmark_profiling = False self._is_minimal_profiling = False self.cur_data = {} self.cur_data_table = {} self.cur_bottleneck = {} self._disaggregate_perf = {} + self._disaggregate_benchmark_perf = {} self.cur_advices = "" self._headers = [] self._base_data = [] @@ -85,79 +69,51 @@ class OverallSummaryAnalyzer(BaseAnalyzer): return float("inf") return dividend / divisor + @staticmethod + def get_time_category_dict(overall_dict: dict): + time_category_dict = { + "Computing Time": round(overall_dict.get('computing_time_ms', 0.0), 3), + "Uncovered Communication Time": round(overall_dict.get('uncovered_communication_time_ms', 0.0), 3), + "Free Time": round(overall_dict.get('free_time_ms', 0.0), 3) + } + return time_category_dict + def path_check(self): - if self.base_collection_path: - if os.path.exists(self.base_collection_path): - self._has_base_collection = True + if self.benchmark_profiling_path: + if os.path.exists(self.benchmark_profiling_path): + self._has_benchmark_profiling = True else: - print(f"[WARNING] Invalid path which not exists: {self.base_collection_path}.") + print(f"[WARNING] Invalid path which not exists: {self.benchmark_profiling_path}.") return os.path.exists(self.collection_path) def process(self): self._disaggregate_perf = ComparisonInterface(self.collection_path).disaggregate_perf(Constant.OVERALL_COMPARE) + if self._has_benchmark_profiling: + self._disaggregate_benchmark_perf = (ComparisonInterface(self.benchmark_profiling_path) + .disaggregate_perf(Constant.OVERALL_COMPARE)) if not self._disaggregate_perf: return self._is_minimal_profiling = self._disaggregate_perf.get("minimal_profiling", False) - if self._has_base_collection: - base_collection_path = self.base_collection_path if self._has_base_collection else self.collection_path - comparison_result = ComparisonInterface(base_collection_path, self.collection_path).compare( - Constant.OVERALL_COMPARE) - self.cur_data["comparison_result"] = comparison_result - - time_category_dict = {} - for time_category, time_list in self.performance_time_dict.items(): - duration, _ = self.get_duration_and_num(time_category) - if duration == Constant.INVALID_VALUE: - continue - time_category = time_category.split("(")[0] - time_category_dict[time_category] = duration - self.get_sub_category_time(time_category, time_list, duration) - self.cur_data["overall_data"] = time_category_dict - - def get_duration_and_num(self, time_category: str): - field_list = self.time_field_map.get(time_category) - if not field_list: - return Constant.INVALID_VALUE, Constant.INVALID_VALUE - duration = round(self._disaggregate_perf.get(field_list[0], 0.0), 3) - num = self._disaggregate_perf.get(field_list[1], None) - if isinstance(num, float): - num = round(num, 3) - return duration, num - - def get_sub_category_time(self, category: str, time_list: list, total_duration: float): - sub_time_dict = {} - for time_name in time_list: - sub_time_dict.setdefault(f"{category} Subtype", []).append(self.time_name_map.get(time_name, "")) - duration, num = self.get_duration_and_num(time_name) - if duration == Constant.INVALID_VALUE or num == Constant.INVALID_VALUE: - continue - sub_time_dict.setdefault(f"Duration(s)", []).append(duration) - sub_time_dict.setdefault(f"Duration Ratio", []).append( - "{:.2%}".format(self.calculate_ratio(duration, total_duration))) - sub_time_dict.setdefault(f"Kernel Number", []).append(num) - self.cur_data[self.time_name_map.get(category)] = sub_time_dict + self.cur_data["overall_data"] = self.get_time_category_dict(self._disaggregate_perf.get('overall', {})) def identify_bottleneck(self): overall_data = self.cur_data.get("overall_data") if not overall_data: return e2e_time = '%.3f' % sum([data for data in overall_data.values()]) - overall_bottleneck = f"The Model E2E Time is {e2e_time}s.\n" + overall_bottleneck = f"The Model E2E Time is {e2e_time}ms.\n" comparison_bottleneck = "" for time_type, time_value in overall_data.items(): - # add subtype time bottleneck - self.cur_bottleneck[self.time_name_map.get(time_type)] = f"{time_type} is {time_value}s.\n" # add overall bottleneck - overall_bottleneck += f" -- {time_type} is {time_value}s\n" + overall_bottleneck += f" -- {time_type} is {time_value}ms\n" if time_type == "Free Time" and self._is_minimal_profiling and self.calculate_ratio(time_value, e2e_time) > 0.1: overall_bottleneck += "percentage of free time exceed the threshold 10%." - if not self._has_base_collection: + if not self._has_benchmark_profiling: continue # add comparison bottleneck - time_type_origin = "Uncovered Communication Time(Wait Time)" \ - if time_type == "Uncovered Communication Time" else time_type - base_duration, _ = self.get_duration_and_num(time_type_origin) + base_duration = self.get_time_category_dict(self._disaggregate_benchmark_perf.get('overall', {})).get( + time_type) if time_value > base_duration: ratio = "{:.2%}".format(self.calculate_ratio(time_value - base_duration, base_duration)) comparison_bottleneck += f"{time_type} exceeds the benchmark by {ratio}\n" @@ -183,7 +139,7 @@ class OverallSummaryAnalyzer(BaseAnalyzer): for key, value in self.cur_bottleneck.items(): if not value: continue - result += f'{key}: {value} \n' + result += f'{value} \n' headers.append(key) data.append(value) data_list.append(data) @@ -226,8 +182,10 @@ class OverallSummaryAnalyzer(BaseAnalyzer): def make_render(self): if not self.bottleneck_str and not self.cur_advices: return + # 将\n替换为html换行 + bottleneck_str = self.bottleneck_str.replace('\n', '
') result_for_html = { - "Description": self.bottleneck_str, + "Description": bottleneck_str, "suggestion": self.cur_advices, "details": [self.bottleneck_table] } diff --git a/profiler/compare_tools/compare_backend/disaggregate/overall_perf_interface.py b/profiler/compare_tools/compare_backend/disaggregate/overall_perf_interface.py index c89e845193..7bac2b0335 100644 --- a/profiler/compare_tools/compare_backend/disaggregate/overall_perf_interface.py +++ b/profiler/compare_tools/compare_backend/disaggregate/overall_perf_interface.py @@ -31,4 +31,30 @@ class OverallPerfInterface: def _generate_result(self): overall_data = self._profiling_data.overall_metrics - self._result_data = getattr(overall_data, "__dict__", {}) + + self._result_data = { + "profiling_type": overall_data.profiling_type, + "minimal_profiling": overall_data.minimal_profiling, + "overall": {"e2e_time_ms": overall_data.e2e_time_ms, + "computing_time_ms": overall_data.compute_time_ms, + "uncovered_communication_time_ms": overall_data.communication_not_overlapped_ms, + "free_time_ms": overall_data.free_time_ms}, + "computing_time_disaggregate": {"fa_time_ms": overall_data.fa_total_time, + "conv_time_ms": overall_data.conv_total_time, + "matmul_time_ms": overall_data.mm_total_time, + "page_attention_time_ms": overall_data.page_attention_time, + "vector_time_ms": overall_data.vector_total_time, + "tensor_move_time_ms": overall_data.sdma_time_tensor_move, + "other_cube_time_ms": overall_data.other_cube_time}, + "computing_num_disaggregate": {"fa_num": overall_data.fa_total_num, + "conv_num": overall_data.conv_total_num, + "matmul_num": overall_data.mm_total_num, + "page_attention_num": overall_data.page_attention_num, + "vector_num": overall_data.vector_total_num, + "tensor_move_num": overall_data.sdma_num_tensor_move, + "other_cube_num": overall_data.other_cube_num}, + "communication_time_disaggregate": {"wait_time_ms": overall_data.wait_time_ms, + "transmit_time_ms": overall_data.transmit_time_ms}, + "free_time_disaggregate": {"sdma_time_ms": overall_data.sdma_time_stream, + "free_ms": overall_data.free_time_ms - overall_data.sdma_time_stream} + } -- Gitee From 1fb9a50ed31207da1aa976a2f25d688c9220be73 Mon Sep 17 00:00:00 2001 From: h00613304 Date: Tue, 23 Jul 2024 20:21:17 +0800 Subject: [PATCH 064/106] =?UTF-8?q?=E4=BF=AE=E6=94=B9codecheck?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../msprobe/test/pytorch_ut/compare/test_acc_compare.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/compare/test_acc_compare.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/compare/test_acc_compare.py index b214e3dd79..288e259c0a 100644 --- a/debug/accuracy_tools/msprobe/test/pytorch_ut/compare/test_acc_compare.py +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/compare/test_acc_compare.py @@ -1,7 +1,7 @@ # coding=utf-8 import unittest -from msprobe.pytorch.compare import acc_compare as compare import pandas as pd +from msprobe.pytorch.compare import acc_compare as compare npu_dict = {'op_name': ['Functional_conv2d_0_forward_input.0', 'Functional_conv2d_0_forward_input.1', 'Functional_conv2d_0_forward_input.2', 'Functional_conv2d_0_forward_output'], -- Gitee From b3230c665fb3c62f1f69f21befc025ff4ebf3b0f Mon Sep 17 00:00:00 2001 From: cai-weiwei1989 <734267852@qq.com> Date: Wed, 24 Jul 2024 14:53:10 +0800 Subject: [PATCH 065/106] =?UTF-8?q?[msprobe\pytorch\dump]=E8=B5=84?= =?UTF-8?q?=E6=96=99=E7=A4=BA=E4=BE=8B=E4=BB=A3=E7=A0=81=E4=BC=98=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../msprobe/pytorch/doc/dump.md | 25 +++++++++++-------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/debug/accuracy_tools/msprobe/pytorch/doc/dump.md b/debug/accuracy_tools/msprobe/pytorch/doc/dump.md index 7d0763b684..0554736c36 100644 --- a/debug/accuracy_tools/msprobe/pytorch/doc/dump.md +++ b/debug/accuracy_tools/msprobe/pytorch/doc/dump.md @@ -12,7 +12,7 @@ msprobe工具主要通过在训练脚本内添加dump接口并启动训练的方 通过加载dump配置文件的方式来确定dump操作的详细配置。 -可以在from msprobe.pytorch import PrecisionDebugger和模型初始化之间的任意位置添加该接口。 +PrecisionDebugger接口可以在from msprobe.pytorch import PrecisionDebugger之后的位置添加。详细使用可参考“**示例代码**”或“**model配置代码示例**”。 **原型** @@ -125,22 +125,25 @@ debugger.step() ```Python from msprobe.pytorch import PrecisionDebugger + +# 请勿将PrecisionDebugger的初始化流程插入到循环代码中 debugger = PrecisionDebugger(config_path="./config.json", dump_path="./dump_path") -# 请勿将以上初始化流程插入到循环代码中 -# 模型初始化 -# 下面代码也可以用PrecisionDebugger.start()和PrecisionDebugger.stop() -debugger.start() -# 需要dump的代码片段1 +# 模型、损失函数的定义及初始化等操作 +# ... -debugger.stop() -debugger.start() +# 数据集迭代的位置一般为模型训练开始的位置 +for data, label in data_loader: + debugger.start() # 开启数据dump -# 需要dump的代码片段2 + # 如下是模型每个step执行的逻辑 + output = model(data) + #... + loss.backward() -debugger.stop() -debugger.step() + debugger.stop() # 关闭数据dump + debugger.step() # 结束一个step的dump ``` ## dump结果文件介绍 -- Gitee From b2ec87875ea9c327ca447739fc64c6076c2beca5 Mon Sep 17 00:00:00 2001 From: hanqing Date: Wed, 24 Jul 2024 15:23:49 +0800 Subject: [PATCH 066/106] =?UTF-8?q?=E6=96=B0=E5=A2=9E=E6=8C=87=E5=AE=9A?= =?UTF-8?q?=E4=BB=A3=E7=A0=81=E6=AE=B5=E8=87=AA=E5=8A=A8dump=E5=89=8D?= =?UTF-8?q?=E5=8F=8D=E5=90=91=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../msprobe/pytorch/debugger/precision_debugger.py | 6 ++++++ debug/accuracy_tools/msprobe/pytorch/service.py | 4 ++++ 2 files changed, 10 insertions(+) diff --git a/debug/accuracy_tools/msprobe/pytorch/debugger/precision_debugger.py b/debug/accuracy_tools/msprobe/pytorch/debugger/precision_debugger.py index 1fce5a3035..2c8692d93a 100644 --- a/debug/accuracy_tools/msprobe/pytorch/debugger/precision_debugger.py +++ b/debug/accuracy_tools/msprobe/pytorch/debugger/precision_debugger.py @@ -64,6 +64,12 @@ class PrecisionDebugger: else: instance.service.start(instance.model) + # 指定代码段dump前反向结束符,之后的计算过程数据将被忽略,无法被dump + @classmethod + def forward_backward_dump_end(cls): + instance = cls._instance + instance.service.forward_backward_dump_end() + @classmethod def stop(cls): instance = cls._instance diff --git a/debug/accuracy_tools/msprobe/pytorch/service.py b/debug/accuracy_tools/msprobe/pytorch/service.py index e5da444840..adfbddc5ce 100644 --- a/debug/accuracy_tools/msprobe/pytorch/service.py +++ b/debug/accuracy_tools/msprobe/pytorch/service.py @@ -100,6 +100,10 @@ class Service: self.create_dirs() logger.info_on_rank_0(f"Dump data will be saved in {self.dump_iter_dir}.") + def forward_backward_dump_end(self): + logger.info_on_rank_0("Data needed ends here.") + api_register.api_originality() + def stop(self): if self.config.level == "L2": return -- Gitee From 698018c5faa754ca26a700531893c6f05b539212 Mon Sep 17 00:00:00 2001 From: fanhong <2532845962@qq.com> Date: Tue, 23 Jul 2024 22:46:00 +0800 Subject: [PATCH 067/106] =?UTF-8?q?=E9=80=82=E9=85=8Ddisaggregate=5Fperf?= =?UTF-8?q?=E6=96=B0=E6=8E=A5=E5=8F=A3=EF=BC=8C=E6=96=B0=E5=A2=9Eover=5Fsu?= =?UTF-8?q?mmary=5Fanalysis?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../overall/overall_summary_analyzer.py | 131 ++++++++++-------- 1 file changed, 75 insertions(+), 56 deletions(-) diff --git a/profiler/advisor/analyzer/overall/overall_summary_analyzer.py b/profiler/advisor/analyzer/overall/overall_summary_analyzer.py index 2cb0164eb3..e9f39c60a2 100644 --- a/profiler/advisor/analyzer/overall/overall_summary_analyzer.py +++ b/profiler/advisor/analyzer/overall/overall_summary_analyzer.py @@ -12,11 +12,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import copy import os from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer -from profiler.advisor.common import constant as const from profiler.advisor.display.html.render import HTMLRender from profiler.advisor.result.item import OptimizeItem, OptimizeRecord from profiler.advisor.result.result import OptimizeResult @@ -26,21 +24,21 @@ from profiler.compare_tools.compare_interface.comparison_interface import Compar class OverallSummaryAnalyzer(BaseAnalyzer): OVERALL_SUMMARY_ANALYZER = "overall_summary_analysis" - advice_map = { - "Computing Time": "if you want more detailed advice please go to att_advisor_*.html", - "Uncovered Communication Time": "if you want more detailed advice please go to att_advisor_*.html", - "Free Time": "if you want more detailed advice please go to att_advisor_*.html" - } - time_name_map = { - "Computing Time": "computing", - "Uncovered Communication Time": "communication", - "Free Time": "free", - 'Cube Time(Num)': 'Cube Time', - 'Vector Time(Num)': 'Vector Time', - 'Flash Attention Time(Forward)(Num)': 'Flash Attention Time(Forward)', - 'Flash Attention Time(Backward)(Num)': 'Flash Attention Time(Backward)', - 'Other Time': "Other Computing Time", - 'SDMA Time(Num)': 'SDMA Time' + performance_time_dict = { + "Computing Time": "computing_time_ms", + " -- Flash Attention": "fa_time_ms", + " -- Conv": "conv_time_ms", + " -- Matmul": "matmul_time_ms", + " -- Vector": "vector_time_ms", + " -- SDMA(Tensor Move)": "tensor_move_time_ms", + " -- Other Cube": "other_cube_time_ms", + "Uncovered Communication Time": "uncovered_communication_time_ms", + " -- Wait": "wait_time_ms", + " -- Transmit": "transmit_time_ms", + "Free Time": "free_time_ms", + " -- SDMA": "sdma_time_ms", + " -- Free": "free_ms", + "E2E Time": "e2e_time_ms" } def __init__(self, collection_path: str, n_processes: int = 1, **kwargs): @@ -50,18 +48,14 @@ class OverallSummaryAnalyzer(BaseAnalyzer): self._has_benchmark_profiling = False self._is_minimal_profiling = False self.cur_data = {} - self.cur_data_table = {} self.cur_bottleneck = {} self._disaggregate_perf = {} self._disaggregate_benchmark_perf = {} self.cur_advices = "" - self._headers = [] - self._base_data = [] - self._comparison_data = [] self.html_render = HTMLRender() self.result = OptimizeResult() self.bottleneck_str = "" - self.bottleneck_table = {} + self.over_summary_analysis = {} @staticmethod def calculate_ratio(dividend, divisor): @@ -88,13 +82,13 @@ class OverallSummaryAnalyzer(BaseAnalyzer): def process(self): self._disaggregate_perf = ComparisonInterface(self.collection_path).disaggregate_perf(Constant.OVERALL_COMPARE) - if self._has_benchmark_profiling: - self._disaggregate_benchmark_perf = (ComparisonInterface(self.benchmark_profiling_path) - .disaggregate_perf(Constant.OVERALL_COMPARE)) if not self._disaggregate_perf: return self._is_minimal_profiling = self._disaggregate_perf.get("minimal_profiling", False) self.cur_data["overall_data"] = self.get_time_category_dict(self._disaggregate_perf.get('overall', {})) + if self._has_benchmark_profiling: + self._disaggregate_benchmark_perf = ComparisonInterface( + self.benchmark_profiling_path).disaggregate_perf(Constant.OVERALL_COMPARE) def identify_bottleneck(self): overall_data = self.cur_data.get("overall_data") @@ -112,8 +106,9 @@ class OverallSummaryAnalyzer(BaseAnalyzer): if not self._has_benchmark_profiling: continue # add comparison bottleneck - base_duration = self.get_time_category_dict(self._disaggregate_benchmark_perf.get('overall', {})).get( - time_type) + base_duration = self.get_time_category_dict( + self._disaggregate_benchmark_perf.get('overall', {}) + ).get(time_type) if time_value > base_duration: ratio = "{:.2%}".format(self.calculate_ratio(time_value - base_duration, base_duration)) comparison_bottleneck += f"{time_type} exceeds the benchmark by {ratio}\n" @@ -126,40 +121,63 @@ class OverallSummaryAnalyzer(BaseAnalyzer): self.process() self.identify_bottleneck() self.format_bottleneck() - self.format_cur_data() + self.format_over_summary_analysis() self.make_record() self.make_render() return self.result def format_bottleneck(self): result = '' - headers = [] - data_list = [] - data = [] - for key, value in self.cur_bottleneck.items(): + for _, value in self.cur_bottleneck.items(): if not value: continue result += f'{value} \n' - headers.append(key) - data.append(value) - data_list.append(data) self.bottleneck_str = result - self.bottleneck_table["headers"] = headers - self.bottleneck_table["data"] = data_list - def format_cur_data(self): - if not self.cur_data: - return - for data_type, data in self.cur_data.items(): - if not data: - continue - if data_type not in list(self.time_name_map.values()): - data_list = list(data.values()) - else: - data_list = [','.join(map(str, value)) for value in data.values()] - headers = list(data.keys()) - data_table = {"headers": headers, "data": [data_list]} - self.cur_data_table[data_type] = copy.deepcopy(data_table) + def format_over_summary_analysis(self): + headers = ['Performance Index', 'Duration(ms)', 'Duration Ratio'] + performance_data = self.get_analysis_data(self._disaggregate_perf) + benchmark_data = self.get_analysis_data(self._disaggregate_benchmark_perf) + if self._has_benchmark_profiling: + headers.append('Diff Duration(ms)') + self.format_analysis_with_benchmark(performance_data, benchmark_data, headers) + else: + self.format_analysis_only(performance_data, headers) + + def get_analysis_data(self, data_dict: dict): + if not data_dict: + return {} + return { + **data_dict.get("overall"), + **data_dict.get("computing_time_disaggregate"), + **data_dict.get("communication_time_disaggregate"), + **data_dict.get("free_time_disaggregate"), + } + + def format_analysis_only(self, performance_data: dict, headers: list): + res = [] + total_duration = performance_data.get('e2e_time_ms', 0.0) + for time_name, time_key in self.performance_time_dict.items(): + row = [time_name] + duration = performance_data.get(time_key, 0.0) + row.append("{:.3f}".format(duration)) + row.append("{:.2%}".format(self.calculate_ratio(duration, total_duration))) + res.append(row) + self.over_summary_analysis["headers"] = headers + self.over_summary_analysis["data"] = res + + def format_analysis_with_benchmark(self, performance_data: dict, benchmark_data: dict, headers: list): + res = [] + total_duration = performance_data.get('e2e_time_ms', 0.0) + for time_name, time_key in self.performance_time_dict.items(): + row = [time_name] + duration = performance_data.get(time_key, 0.0) + row.append("{:.3f}".format(duration)) + row.append("{:.2%}".format(self.calculate_ratio(duration, total_duration))) + row.append("{:.3f}".format(duration - benchmark_data.get(time_key, 0.0))) + res.append(row) + self.over_summary_analysis["headers"] = headers + self.over_summary_analysis["data"] = res def make_record(self): """ @@ -174,10 +192,12 @@ class OverallSummaryAnalyzer(BaseAnalyzer): ) self.result.add(OptimizeRecord(optimization_item)) - self.result.add_detail(const.BOTTLENECK, self.bottleneck_table["headers"], self.bottleneck_table["data"][0]) - for data_type, data_dict in self.cur_data_table.items(): - if data_dict: - self.result.add_detail(const.DATA + data_type, data_dict["headers"], data_dict["data"][0]) + self.result.add_detail( + OverallSummaryAnalyzer.OVERALL_SUMMARY_ANALYZER, + headers=self.over_summary_analysis["headers"] + ) + for data in self.over_summary_analysis["data"]: + self.result.add_detail(OverallSummaryAnalyzer.OVERALL_SUMMARY_ANALYZER, detail=data) def make_render(self): if not self.bottleneck_str and not self.cur_advices: @@ -187,9 +207,8 @@ class OverallSummaryAnalyzer(BaseAnalyzer): result_for_html = { "Description": bottleneck_str, "suggestion": self.cur_advices, - "details": [self.bottleneck_table] + "details": [self.over_summary_analysis] } - self.html_render.render_template(key="overall", title=OverallSummaryAnalyzer.OVERALL_SUMMARY_ANALYZER, template_dir="templates", -- Gitee From 4872fb2d7e4230a07aa1922f2b79773da49c1a21 Mon Sep 17 00:00:00 2001 From: jiandaobao Date: Wed, 24 Jul 2024 16:22:29 +0800 Subject: [PATCH 068/106] Add threshold for dtype bfloat16. --- .../atat/pytorch/free_benchmark/common/constant.py | 1 + 1 file changed, 1 insertion(+) diff --git a/debug/accuracy_tools/atat/pytorch/free_benchmark/common/constant.py b/debug/accuracy_tools/atat/pytorch/free_benchmark/common/constant.py index 9b72437f22..47ede61ee3 100644 --- a/debug/accuracy_tools/atat/pytorch/free_benchmark/common/constant.py +++ b/debug/accuracy_tools/atat/pytorch/free_benchmark/common/constant.py @@ -51,6 +51,7 @@ class ThresholdConfig: DTYPE_PER_THD = { torch.float16: 1.002, + torch.bfloat16: 1.004, torch.float32: 1.0002, } BENCHMARK_THD_DICT = { -- Gitee From 03c49724e842dfec0c7251343d1154c483bd626b Mon Sep 17 00:00:00 2001 From: l30036321 Date: Wed, 24 Jul 2024 16:05:22 +0800 Subject: [PATCH 069/106] fix norm inner ops bug --- debug/accuracy_tools/msprobe/mindspore/common/log.py | 11 ----------- .../msprobe/mindspore/debugger/precision_debugger.py | 2 +- .../msprobe/mindspore/dump/hook_cell/api_registry.py | 2 +- 3 files changed, 2 insertions(+), 13 deletions(-) diff --git a/debug/accuracy_tools/msprobe/mindspore/common/log.py b/debug/accuracy_tools/msprobe/mindspore/common/log.py index de76256756..ec027c7501 100644 --- a/debug/accuracy_tools/msprobe/mindspore/common/log.py +++ b/debug/accuracy_tools/msprobe/mindspore/common/log.py @@ -34,16 +34,5 @@ class MindsporeLogger(BaseLogger): return current_rank - def _print_log(self, level, msg, end='\n'): - current_rank = self.get_rank() - current_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) - pid = os.getpid() - if current_rank is not None: - full_msg = f"{current_time} ({pid}) [rank {current_rank}] [{level}] {msg}" - else: - full_msg = f"{current_time} ({pid}) [{level}] {msg}" - print(full_msg, end=end) - sys.stdout.flush() - logger = MindsporeLogger() \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/mindspore/debugger/precision_debugger.py b/debug/accuracy_tools/msprobe/mindspore/debugger/precision_debugger.py index d112fe8d20..30f7162ff5 100644 --- a/debug/accuracy_tools/msprobe/mindspore/debugger/precision_debugger.py +++ b/debug/accuracy_tools/msprobe/mindspore/debugger/precision_debugger.py @@ -31,7 +31,7 @@ class PrecisionDebugger: instance = cls._instance if not instance: raise Exception("No instance of PrecisionDebugger found.") - if ms.get_context("mode") == 1 and instance.config.level_ori == "L1": + if ms.get_context("mode") == ms.PYNATIVE_MODE and instance.config.level_ori == "L1": instance.service.start() else: handler = TaskHandlerFactory.create(instance.config) diff --git a/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py b/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py index 9c1e5f1ca9..5508416fde 100644 --- a/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py +++ b/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py @@ -83,7 +83,7 @@ class ApiRegistry: self.tensor_hook_attr[attr_name[Const.ATTR_NAME_PREFIX_LEN:]] = getattr(HOOKTensor, attr_name) functional_ops, mint_ops, mint_func_ops = get_functional_ops() - self.store_ori_attr(ms.ops, self.norm_inner_ops, self.functional_ori_attr) + self.store_ori_attr(ms.ops, self.norm_inner_ops, self.norm_inner_ops_ori_attr) self.store_ori_attr(ms.ops, functional_ops, self.functional_ori_attr) self.store_ori_attr(ms.mint, mint_ops, self.mint_ops_ori_attr) self.store_ori_attr(ms.mint.nn.functional, mint_func_ops, self.mint_func_ops_ori_attr) -- Gitee From 71d1db3b8181647875d6f8ea4d369e586c1dbf8b Mon Sep 17 00:00:00 2001 From: zhaolei Date: Tue, 23 Jul 2024 14:19:17 +0800 Subject: [PATCH 070/106] =?UTF-8?q?1.ai=20core=E9=99=8D=E9=A2=91=E5=88=86?= =?UTF-8?q?=E6=9E=90=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../computation/ai_core_freq/__init__.py | 0 .../ai_core_freq/ai_core_freq_analyzer.py | 42 +++++ .../ai_core_freq/ai_core_freq_checker.py | 100 ++++++++++++ .../computation/profiling_analyzer.py | 9 +- profiler/advisor/common/analyzer_scopes.py | 1 + profiler/advisor/common/constant.py | 5 +- profiler/advisor/config/config.ini | 1 + profiler/advisor/config/config.py | 7 + .../advisor/dataset/ai_core_freq/__init__.py | 0 .../ai_core_freq/ai_core_freq_dataset.py | 148 ++++++++++++++++++ .../advisor/dataset/profiling/device_info.py | 2 + .../html/templates/ai_core_frequency.html | 27 ++++ profiler/advisor/interface/interface.py | 4 +- profiler/advisor/result/result.py | 14 +- profiler/advisor/utils/utils.py | 50 +++++- .../compute_advice/test_frequency_advice.py | 145 +++++++++++++++++ 16 files changed, 542 insertions(+), 13 deletions(-) create mode 100644 profiler/advisor/analyzer/computation/ai_core_freq/__init__.py create mode 100644 profiler/advisor/analyzer/computation/ai_core_freq/ai_core_freq_analyzer.py create mode 100644 profiler/advisor/analyzer/computation/ai_core_freq/ai_core_freq_checker.py create mode 100644 profiler/advisor/dataset/ai_core_freq/__init__.py create mode 100644 profiler/advisor/dataset/ai_core_freq/ai_core_freq_dataset.py create mode 100644 profiler/advisor/display/html/templates/ai_core_frequency.html create mode 100644 profiler/test/ut/advisor/compute_advice/test_frequency_advice.py diff --git a/profiler/advisor/analyzer/computation/ai_core_freq/__init__.py b/profiler/advisor/analyzer/computation/ai_core_freq/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/profiler/advisor/analyzer/computation/ai_core_freq/ai_core_freq_analyzer.py b/profiler/advisor/analyzer/computation/ai_core_freq/ai_core_freq_analyzer.py new file mode 100644 index 0000000000..cee16cce52 --- /dev/null +++ b/profiler/advisor/analyzer/computation/ai_core_freq/ai_core_freq_analyzer.py @@ -0,0 +1,42 @@ +import logging + +from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer +from profiler.advisor.result.result import OptimizeResult +from profiler.advisor.analyzer.computation.ai_core_freq.ai_core_freq_checker import AICoreFreqChecker +from profiler.advisor.display.html.render import HTMLRender +from profiler.advisor.dataset.ai_core_freq.ai_core_freq_dataset import AICoreFreqDataset +from profiler.advisor.config.config import Config + +logger = logging.getLogger() + + +class AICoreFreqAnalyzer(BaseAnalyzer): + dataset_cls_list = [AICoreFreqDataset] + + def __init__(self, collection_path, n_processes: int = 1, **kwargs) -> None: + super().__init__(collection_path, n_processes, **kwargs) + key = AICoreFreqDataset.get_key() + self.dataset = self.get_first_data_by_key(self.dataset_list, key) + self.result = OptimizeResult() + self.html_render = HTMLRender() + self.html = None + + @BaseAnalyzer.check_data((AICoreFreqDataset.get_key(),)) + def optimize(self, **kwargs): + if not Config().get_config("aic_frequency"): + logger.warning("Can not find ai core frequency in info.json*, please check data integrity.") + return self.result + add_render_list = kwargs.get("add_render_list", True) + ai_core_freq_checker = AICoreFreqChecker() + ai_core_freq_checker.check_ai_core_freq(self.dataset) + if not ai_core_freq_checker.ai_core_freq_issues: + return self.result + ai_core_freq_checker.make_record(self.result) + self.html = ai_core_freq_checker.make_render(self.html_render, add_render_list) + return self.result + + def make_record(self): + pass + + def make_render(self): + pass diff --git a/profiler/advisor/analyzer/computation/ai_core_freq/ai_core_freq_checker.py b/profiler/advisor/analyzer/computation/ai_core_freq/ai_core_freq_checker.py new file mode 100644 index 0000000000..e2d4a3c263 --- /dev/null +++ b/profiler/advisor/analyzer/computation/ai_core_freq/ai_core_freq_checker.py @@ -0,0 +1,100 @@ +import logging + +from profiler.advisor.dataset.ai_core_freq.ai_core_freq_dataset import AICoreFreqDataset +from profiler.advisor.result.result import OptimizeResult +from profiler.advisor.result.item import OptimizeItem, OptimizeRecord +from profiler.advisor.config.config import Config +from profiler.advisor.utils.utils import convert_to_float + +logger = logging.getLogger() + + +class AICoreFreqChecker: + DEFAULT_FREQ = 1800 + DECREASE_FREQ_RATIO = 0.05 + SHOW_TOPK_OPS = 10 + TOTAL_DURATION_INDEX = 2 + DECREASE_FREQ_RATIO_INDEX = 3 + + def __init__(self): + + self.ai_core_freq_issues = False + self.desc = "" + self.suggestions = "" + self.decrease_freq_ops = [] + self.headers = [] + self.op_freq = None + self.rank_id = None + self.stage = None + + def check_ai_core_freq(self, event_dataset: AICoreFreqDataset, rank_id=None, stage=None): + """ + :Param event_dataset: dataset of timeline event + """ + if not hasattr(event_dataset, "op_freq") or not getattr(event_dataset, "op_freq"): + logger.debug("Skip slow ai core frequency checker, " + "because no ai core frequency were recorded in trace_view.json") + return + + self.rank_id = rank_id + self.stage = stage + self.op_freq = event_dataset.op_freq + for op_name, op_info in self.op_freq.items(): + freq_list = op_info.get("freq_list", []) + if not freq_list: + continue + + op_count = op_info.get("count", 0) + op_total_duration = round(op_info.get("dur", 0), 2) + max_freq = max(self.DEFAULT_FREQ, convert_to_float(Config().get_config("aic_frequency"))) + + decrease_freq_ratio = sum(max_freq - freq for freq in freq_list) / (max_freq * len(freq_list)) + if decrease_freq_ratio >= self.DECREASE_FREQ_RATIO: + self.ai_core_freq_issues = True + self.decrease_freq_ops.append([op_name, op_count, op_total_duration, + f"{round(decrease_freq_ratio, 4):.2%}", + round(sum(freq_list) / len(freq_list), 2), + max(freq_list), min(freq_list)]) + + if self.decrease_freq_ops: + # 按算子总耗时和降频比率 降序排列 + self.decrease_freq_ops.sort(key= + lambda x: (x[self.TOTAL_DURATION_INDEX], x[self.DECREASE_FREQ_RATIO_INDEX]), + reverse=True) + + self.desc = (f"{len(self.decrease_freq_ops)} operators are found during frequency reduction, and the reduction " + f"ratio is larger than {self.DECREASE_FREQ_RATIO}.") + if self.rank_id: + self.desc = f"For rank {self.rank_id}, " + self.desc.lower() + self.suggestions = "Please check the temperature or max power of your machine." + + def make_record(self, result: OptimizeResult): + """ + make record for what and how to optimize + """ + optimization_item = OptimizeItem("AI Core Frequency", self.desc, [self.suggestions]) + result.add(OptimizeRecord(optimization_item)) + + self.headers = ["Operator name", "Count", "Total duration(us)", "AI CORE frequency decreased ratio", + "Average frequency", "Max frequency", "Min frequency"] + if self.rank_id: + self.headers = ["Rank id"] + self.headers + sub_table_name = "AI Core Frequency" if not self.stage else f"Stage-{self.stage}: AI Core Frequency" + result.add_detail(sub_table_name, headers=self.headers) + + for row in self.decrease_freq_ops: + if self.rank_id: + row = [self.rank_id] + row + result.add_detail(sub_table_name, detail=row) + + def make_render(self, html_render, add_render_list=True): + if self.SHOW_TOPK_OPS: + self.desc += f" Only show {self.SHOW_TOPK_OPS} operators here, see latest att_advisor.xlsx for details." + return html_render.render_template(key="computation", + template_dir="templates", + template_name="ai_core_frequency.html", + desc=self.desc, + suggestion=self.suggestions, + headers=self.headers, + data=self.decrease_freq_ops[:self.SHOW_TOPK_OPS], + add_render_list=add_render_list) diff --git a/profiler/advisor/analyzer/computation/profiling_analyzer.py b/profiler/advisor/analyzer/computation/profiling_analyzer.py index 8682617700..2021bcd576 100644 --- a/profiler/advisor/analyzer/computation/profiling_analyzer.py +++ b/profiler/advisor/analyzer/computation/profiling_analyzer.py @@ -1,19 +1,15 @@ import logging from abc import ABC -from typing import Dict, List from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer -from profiler.advisor.common import constant from profiler.advisor.result.result import OptimizeResult from profiler.advisor.analyzer.computation.aicpu.aicpu_checker import AicpuChecker from profiler.advisor.analyzer.computation.bound.block_dim_checker import BlockDimChecker from profiler.advisor.analyzer.computation.bound.operator_bound_checker import OperatorBoundChecker -from profiler.advisor.analyzer.computation.operator_checker import OperatorChecker from profiler.advisor.analyzer.computation.op_compile.dynamic_shape_checker import DynamicShapeChecker from profiler.advisor.analyzer.computation.operator_checker import OperatorChecker from profiler.advisor.display.html.render import HTMLRender from profiler.advisor.dataset.profiling.profiling_dataset import ProfilingDataset -from profiler.advisor.utils.utils import get_supported_subclass logger = logging.getLogger() @@ -76,14 +72,15 @@ class BlockDimAnalyzer(ProfilingAnalyzer): def __init__(self, collection_path, **kwargs) -> None: super().__init__(collection_path, **kwargs) self.checker = BlockDimChecker(self.cann_version) - + class OperatorBoundAnalyzer(ProfilingAnalyzer): def __init__(self, collection_path, **kwargs) -> None: super().__init__(collection_path, **kwargs) self.checker = OperatorBoundChecker(self.cann_version) + class AicpuAnalyzer(ProfilingAnalyzer): def __init__(self, collection_path, **kwargs) -> None: super().__init__(collection_path, **kwargs) - self.checker = AicpuChecker(self.cann_version) \ No newline at end of file + self.checker = AicpuChecker(self.cann_version) diff --git a/profiler/advisor/common/analyzer_scopes.py b/profiler/advisor/common/analyzer_scopes.py index 592f9d421e..de31b2160e 100644 --- a/profiler/advisor/common/analyzer_scopes.py +++ b/profiler/advisor/common/analyzer_scopes.py @@ -12,3 +12,4 @@ class SupportedScopes: BLOCK_DIM_ANALYSIS = "block_dim_analysis" OPERATOR_NO_BOUND_ANALYSIS = "operator_no_bound_analysis" TIMELINE_OP_DISPATCH = "timeline_op_dispatch" + FREQ_ANALYSIS = "freq_analysis" diff --git a/profiler/advisor/common/constant.py b/profiler/advisor/common/constant.py index 697430ee6c..e5318dc0fe 100644 --- a/profiler/advisor/common/constant.py +++ b/profiler/advisor/common/constant.py @@ -138,4 +138,7 @@ CLUSTER_STEP_TIME_CSV = "cluster_step_trace_time.csv" CLUSTER_COMM_JSON = "cluster_communication.json" BOTTLENECK = "bottleneck" -DATA = "data" \ No newline at end of file +DATA = "data" + +DISABLE_STREAMING_READER = "DISABLE_STREAMING_READER" +MAX_FILE_SIZE = 10**10 diff --git a/profiler/advisor/config/config.ini b/profiler/advisor/config/config.ini index c56c1dad9f..06e9931601 100644 --- a/profiler/advisor/config/config.ini +++ b/profiler/advisor/config/config.ini @@ -9,6 +9,7 @@ tune_ops_file = operator_tuning_file.cfg [THRESHOLD] # operator_bound_ratio: (mte, cube, vector, scalar) ratio greater than this value will be checked in operator_bound_checker operator_bound_ratio = 0.8 +frequency_threshold = 0.05 [RULE-BUCKET] # region : URL of different regions where can download rule yaml file cn-north-9 = cnnorth9-modelarts-sdk diff --git a/profiler/advisor/config/config.py b/profiler/advisor/config/config.py index 12f4526f8c..4f36dfedfc 100644 --- a/profiler/advisor/config/config.py +++ b/profiler/advisor/config/config.py @@ -97,6 +97,13 @@ class Config: """ return float(self.config.get("THRESHOLD", "operator_bound_ratio")) + @property + def frequency_threshold(self) -> float: + """ + frequency_threshold + """ + return float(self.config.get("THRESHOLD", "frequency_threshold")) + def set_log_path(self, result_file: str, log_path: str = None): self.log_path = log_path if log_path is not None else os.path.join(self._work_path, "log") os.makedirs(self.log_path, exist_ok=True) diff --git a/profiler/advisor/dataset/ai_core_freq/__init__.py b/profiler/advisor/dataset/ai_core_freq/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/profiler/advisor/dataset/ai_core_freq/ai_core_freq_dataset.py b/profiler/advisor/dataset/ai_core_freq/ai_core_freq_dataset.py new file mode 100644 index 0000000000..c99baea656 --- /dev/null +++ b/profiler/advisor/dataset/ai_core_freq/ai_core_freq_dataset.py @@ -0,0 +1,148 @@ +import json +import logging +import math +import os +import traceback + +import ijson +from tqdm import tqdm + +from profiler.advisor.common import constant as const +from profiler.advisor.common.timeline.event import TimelineEvent +from profiler.advisor.utils.utils import get_file_path_from_directory +from profiler.advisor.utils.utils import convert_to_float, parse_json_with_generator +from profiler.advisor.dataset.profiling.device_info import DeviceInfoParser +from profiler.advisor.config.config import Config + +logger = logging.getLogger() + + +class AICoreFreqDataset: + + def __init__(self, collection_path, data: dict, build_dataset=True, **kwargs) -> None: + + self._profiler_step = [] + self._ai_core_ops = [] + self._ai_core_freq: [TimelineEvent] = [] + self._previous_freq_index = -1 + + self.timeline_dir = collection_path + self.timeline_data_list = get_file_path_from_directory(collection_path, + lambda file: file.endswith("trace_view.json")) + + self.step = kwargs.get("step") + self.op_freq = {} + info = DeviceInfoParser(collection_path) + info.parse_data() + if not Config().get_config("aic_frequency"): + return + if self.parse(): + key = self.get_key() + if key not in data: + data[key] = [] + data[key].append(self) + + @property + def profiler_step(self): + return self._profiler_step + + @property + def ai_core_freq(self): + return self._ai_core_freq + + @property + def ai_core_ops(self): + return self._ai_core_ops + + @classmethod + def get_key(cls): + """ + get key of dataset + :return: key + """ + return cls.__module__.rsplit('.', maxsplit=1)[-1] + + def parse(self): + + if len(self.timeline_data_list) == 0: + logger.warning("Please ensure trace_view.json in %s, skip timeline analysis.", self.timeline_dir) + return False + + if len(self.timeline_data_list) > 1: + logger.warning("Found multiple trace_view.json in %s, load the file of device 0 for analysis .", + self.timeline_dir) + + _ = parse_json_with_generator(sorted(self.timeline_data_list)[0], self._add_event) + + target_ai_core_ops = self._get_target_ai_core_ops() + self._get_op_frequency(target_ai_core_ops) + return True + + def _add_profiler_step(self, event): + if event.name.startswith("ProfilerStep"): + self._profiler_step.append(event) + + def _add_ai_core_ops(self, event): + if event.args.get("Task Type") in ["MIX_AIC", "AI_CORE"]: + self._ai_core_ops.append(event) + + def _add_ai_core_freq(self, event): + if event.name == "AI Core Freq": + if self._previous_freq_index != -1: + self._ai_core_freq[self._previous_freq_index]["end"] = event.get("ts", float(math.inf)) + self._previous_freq_index += 1 + event.setdefault("end", float(math.inf)) + self._ai_core_freq.append(event) + + def _add_event(self, index, event): + event["dataset_index"] = index + if not isinstance(event, TimelineEvent): + event = TimelineEvent(event) + + self._add_profiler_step(event) + self._add_ai_core_ops(event) + self._add_ai_core_freq(event) + + return True + + def _get_target_ai_core_ops(self): + target_ai_core_ops = [] + if not self.step or f"ProfilerStep#{self.step}" not in [event.name for event in self._profiler_step]: + target_ai_core_ops = self._ai_core_ops + else: + for step_event in self._profiler_step: + if step_event.name != f"ProfilerStep#{self.step}": + continue + + for ai_core_op_event in self._ai_core_ops: + if step_event.ts_include(ai_core_op_event): + target_ai_core_ops.append(ai_core_op_event) + target_ai_core_ops = sorted(target_ai_core_ops, key=lambda x: float(x.ts)) + return target_ai_core_ops + + def _get_op_frequency(self, ai_core_ops): + ai_core_freq = sorted(self._ai_core_freq, key=lambda x: float(x.ts)) + + op_index, freq_index = 0, 0 + while op_index < len(ai_core_ops) and freq_index < len(ai_core_freq): + op_event = ai_core_ops[op_index] + op_end_time = convert_to_float(op_event.ts) + convert_to_float(op_event.dur) + op_freq_list = [] + while freq_index < len(ai_core_freq): + freq_event = ai_core_freq[freq_index] + if convert_to_float(freq_event.end) < op_end_time: + op_freq_list.append(convert_to_float(freq_event.args.MHz)) + freq_index += 1 + continue + elif convert_to_float(freq_event.ts) < op_end_time: + if op_event.name not in self.op_freq: + self.op_freq[op_event.name] = {"count": 0, "dur": 0, "freq_list": []} + self.op_freq[op_event.name]["count"] += 1 + self.op_freq[op_event.name]["dur"] += convert_to_float(op_event.dur) + op_freq_list.append(convert_to_float(freq_event.args.MHz)) + self.op_freq[op_event.name]["freq_list"].append(min(op_freq_list)) + break + else: + break + + op_index += 1 diff --git a/profiler/advisor/dataset/profiling/device_info.py b/profiler/advisor/dataset/profiling/device_info.py index b58930777f..110cd0794c 100644 --- a/profiler/advisor/dataset/profiling/device_info.py +++ b/profiler/advisor/dataset/profiling/device_info.py @@ -54,6 +54,8 @@ class DeviceInfoParser: config.set_config("device_id", device_info["id"]) if "aiv_num" in device_info: config.set_config("aiv_num", device_info["aiv_num"]) + if "aic_frequency" in device_info: + config.set_config("aic_frequency", device_info["aic_frequency"]) if "ai_core_num" in device_info: config.set_config("ai_core_num", device_info["ai_core_num"]) return True diff --git a/profiler/advisor/display/html/templates/ai_core_frequency.html b/profiler/advisor/display/html/templates/ai_core_frequency.html new file mode 100644 index 0000000000..d045142037 --- /dev/null +++ b/profiler/advisor/display/html/templates/ai_core_frequency.html @@ -0,0 +1,27 @@ +{% if data|length > 0 %} +
+

AI CORE Frequency Issues

+
+ Issue: {{ desc }} +
+ Suggestion: {{ suggestion }} +

+ + + {% for header in headers %} + + {% endfor %} + + + {% for row in data %} + + {% for element in row %} + + {% endfor %} + + {% endfor %} +
{{ header }}
{{ element|safe }}
+ +
+
+{% endif %} \ No newline at end of file diff --git a/profiler/advisor/interface/interface.py b/profiler/advisor/interface/interface.py index 59bfee77f6..20b7a5f0c2 100644 --- a/profiler/advisor/interface/interface.py +++ b/profiler/advisor/interface/interface.py @@ -13,6 +13,7 @@ from profiler.advisor.analyzer.cluster.slow_rank_analyser import SlowRankAnalyze from profiler.advisor.analyzer.cluster.slow_link_analyser import SlowLinkAnalyzer from profiler.advisor.analyzer.overall.overall_summary_analyzer import OverallSummaryAnalyzer from profiler.advisor.analyzer.schedule.dispatch.timeline_op_dispatch_analyzer import OpDispatchAnalyzer +from profiler.advisor.analyzer.computation.ai_core_freq.ai_core_freq_analyzer import AICoreFreqAnalyzer class Interface: supported_analyzer = { @@ -25,7 +26,8 @@ class Interface: SupportedScopes.AICPU_ANALYSIS: AicpuAnalyzer, SupportedScopes.OPERATOR_NO_BOUND_ANALYSIS: OperatorBoundAnalyzer, SupportedScopes.BLOCK_DIM_ANALYSIS: BlockDimAnalyzer, - SupportedScopes.GRAPH: FusionOPAnalyzer + SupportedScopes.GRAPH: FusionOPAnalyzer, + SupportedScopes.FREQ_ANALYSIS: AICoreFreqAnalyzer }), "communication": OrderedDict(), "overall": OrderedDict({SupportedScopes.OVER_ALL: OverallSummaryAnalyzer}), diff --git a/profiler/advisor/result/result.py b/profiler/advisor/result/result.py index c7d7da8663..42b617ca52 100644 --- a/profiler/advisor/result/result.py +++ b/profiler/advisor/result/result.py @@ -93,6 +93,9 @@ class SheetRecoder: if data not in self._sheet_data[sheet_name]["data"]: self._sheet_data[sheet_name]["data"].append(data) + def clear(self): + self._sheet_data.clear() + @singleton class OptimizeResult: @@ -110,12 +113,12 @@ class OptimizeResult: def add_tune_op_list(self, tune_op_list) -> None: """ add tune op name to tune op list - :param tune_op_list: tune op name list to be added + :param tune_op_list: list of operators to be optimized :return: None """ - for op_name in tune_op_list: - if op_name not in self._tune_op_list: - self._tune_op_list.append(op_name) + for operator_name in tune_op_list: + if operator_name not in self._tune_op_list: + self._tune_op_list.append(operator_name) def add(self, overview_item): sheet_name = "problems" @@ -148,6 +151,9 @@ class OptimizeResult: logger.info("Save problems details file to %s", Config().analysis_result_file) self._save_op_file_list() + def clear(self) -> None: + self.data.clear() + def _save_op_file_list(self) -> None: if not self._tune_op_list: return diff --git a/profiler/advisor/utils/utils.py b/profiler/advisor/utils/utils.py index 84419b6708..dd83423205 100644 --- a/profiler/advisor/utils/utils.py +++ b/profiler/advisor/utils/utils.py @@ -1,5 +1,6 @@ import inspect import json + import logging import multiprocessing as mp import os @@ -11,7 +12,7 @@ import traceback import types from functools import wraps from typing import Any, Set - +import ijson import click import requests from requests.adapters import HTTPAdapter @@ -550,3 +551,50 @@ def get_file_path_by_walk(root, filename): file_path = os.path.join(root, name) return file_path return file_path + + +def check_path_valid(path): + if os.path.islink(os.path.abspath(path)): + logger.error("fThe path is detected as a soft connection. path:%ss", path) + return False + elif not os.access(path, os.R_OK): + logger.error(f"The file is not readable. path:%ss", path) + return False + elif os.path.getsize(path) > const.MAX_FILE_SIZE: + logger.error(f"The file size exceeds the limit. path:%ss, MAX_FILE_SIZE:%ss B",path, const.MAX_FILE_SIZE) + return False + return True + + +def parse_json_with_generator(timeline_data_path, func): + result = [] + if not check_path_valid(timeline_data_path): + return result + try: + with open(timeline_data_path, "r") as f: + if os.getenv(const.DISABLE_STREAMING_READER) == "1": + logger.debug("Disable streaming reader.") + file_parser = json.loads(f.read()) + else: + logger.debug("Enable streaming reader.") + file_parser = ijson.items(f, "item") + + for i, event in tqdm(enumerate(file_parser), + leave=False, ncols=100, desc="Building dataset for timeline analysis"): + func_res = func(index=i, event=event) + if func_res is not None: + result.append(func_res) + + except Exception: + logger.warning("Error %s while parsing file %s, continue to timeline analysis", traceback.format_exc(), + timeline_data_path) + return result + + +def convert_to_float(num): + try: + return float(num) + except (ValueError, FloatingPointError): + logger.error(f"Can not convert %ss to float", num) + pass + return 0 diff --git a/profiler/test/ut/advisor/compute_advice/test_frequency_advice.py b/profiler/test/ut/advisor/compute_advice/test_frequency_advice.py new file mode 100644 index 0000000000..51acf3b8e2 --- /dev/null +++ b/profiler/test/ut/advisor/compute_advice/test_frequency_advice.py @@ -0,0 +1,145 @@ +import os +import shutil +import stat +import json + +import unittest +from profiler.advisor.interface.interface import Interface +from profiler.advisor.common.analyzer_scopes import SupportedScopes + + +class TestFrequencyAdvice(unittest.TestCase): + TMP_DIR = "./ascend_pt" + OUTPUT_DIR = "./ascend_pt/ASCEND_PROFILER_OUTPUT" + DEVICE_DIR = "./ascend_pt/PROF_000001_20240415174447255_OAANHDOMMJMHGIFC/device_0" + interface = None + err_interface = None + + def tearDown(self): + if os.path.exists(TestFrequencyAdvice.TMP_DIR): + shutil.rmtree(TestFrequencyAdvice.TMP_DIR) + self.clear_htmls() + + def setUp(self): + if os.path.exists(TestFrequencyAdvice.TMP_DIR): + shutil.rmtree(TestFrequencyAdvice.TMP_DIR) + if not os.path.exists(TestFrequencyAdvice.TMP_DIR): + os.makedirs(TestFrequencyAdvice.TMP_DIR) + if not os.path.exists(TestFrequencyAdvice.OUTPUT_DIR): + os.makedirs(TestFrequencyAdvice.OUTPUT_DIR) + if not os.path.exists(TestFrequencyAdvice.DEVICE_DIR): + os.makedirs(TestFrequencyAdvice.DEVICE_DIR) + self.clear_htmls() + + @classmethod + def clear_htmls(cls): + current_path = os.path.dirname(os.path.abspath(__file__)) + for filename in os.listdir(current_path): + # 检查文件是否以“att”开头 + if filename.startswith("att"): + # 构建文件的完整路径 + file_path = os.path.join(current_path, filename) + # 删除文件 + os.remove(file_path) + + @classmethod + def get_basic_trace_view(cls): + # Python pid + py_pid_data = {"ph": "M", "name": "process_name", "tid": 0, "pid": 1, "args": {"name": "Python"}} + # ascend pid + ascend_pid_data = {"ph": "M", "name": "process_name", "tid": 0, "pid": 4, "args": {"name": "Ascend Hardware"}} + # ascend pid + cann_pid_data = {"ph": "M", "name": "process_name", "tid": 0, "pid": 5, "args": {"name": "CANN"}} + # ascend hardware ops + ah_event1 = {"ph": "X", "name": "Slice1", "ts": "1699529623106750", "dur": 100, "tid": 3, "pid": 4, + "args": {"Task Type": "AI_CORE"}} + ah_event2 = {"ph": "X", "name": "Slice2", "ts": "1699529623106888", "dur": 80, "tid": 3, "pid": 4, + "args": {"Task Type": "AI_CORE"}} + # flow event + flow_event_s = {"ph": "s", "name": "link1", "id": 1, "tid": 3, "pid": 1, "ts": "200", "args": {}} + flow_event_e = {"ph": "f", "name": "link1", "id": 1, "tid": 3, "pid": 1, "ts": "1699529623106750", "args": {}} + return [py_pid_data, ascend_pid_data, cann_pid_data, ah_event1, ah_event2, flow_event_s, flow_event_e] + + @classmethod + def create_info_json(cls): + info = { + "DeviceInfo": [ + { + "id": 7, + "env_type": 3, + "ctrl_cpu_id": "ARMv8_Cortex_A55", + "ctrl_cpu_core_num": 1, + "ctrl_cpu_endian_little": 1, + "ts_cpu_core_num": 0, + "ai_cpu_core_num": 6, + "ai_core_num": 25, + "ai_cpu_core_id": 2, + "ai_core_id": 0, + "aicpu_occupy_bitmap": 252, + "ctrl_cpu": "0", + "ai_cpu": "2,3,4,5,6", + "aiv_num": 50, + "hwts_frequency": "49.999001", + "aic_frequency": "1850", + "aiv_frequency": "1850" + } + ] + } + with os.fdopen(os.open(f"{TestFrequencyAdvice.DEVICE_DIR}/info.json.0", + os.O_WRONLY | os.O_CREAT, stat.S_IWUSR | stat.S_IRUSR), 'w') as fp: + fp.write(json.dumps(info)) + + @classmethod + def create_non_910B_trace_view(cls): + basic_info = cls.get_basic_trace_view() + + # python ops + py_event1 = {"ph": "X", "cat": "python_function", "name": "aten::slice", "ts": "200", "dur": 100, "tid": 2, + "pid": 1, + "args": {"Call stack": "/root/test/slice.py(116);\r\n/root/torch/module.py"}} + py_event2 = {"ph": "X", "cat": "python_function", "name": "slice", "ts": "199", "dur": 200, "tid": 2, "pid": 1, + "args": {"Call stack": "/root/test/slice.py(116);\r\n/root/torch/module.py"}} + raw_data = [ + *basic_info, py_event1, py_event2 + ] + with os.fdopen(os.open(f"{TestFrequencyAdvice.OUTPUT_DIR}/trace_view.json", + # with os.fdopen(os.open(f"{TestFrequencyAdvice.OUTPUT_DIR}/msprof_20240415174455.json", + os.O_WRONLY | os.O_CREAT, stat.S_IWUSR | stat.S_IRUSR), 'w') as fp: + fp.write(json.dumps(raw_data)) + + @classmethod + def create_910B_trace_view(cls): + basic_info = cls.get_basic_trace_view() + + # python ops + py_event1 = {"name": "AI Core Freq", "ts": "1699529623106000.061", "pid": 682820896, "tid": 0, + "args": {"MHz": 1850}, "ph": "C"} + py_event2 = {"name": "AI Core Freq", "ts": "1699529623106770.541", "pid": 682820896, "tid": 0, + "args": {"MHz": 800}, "ph": "C"} + raw_data = [ + *basic_info, py_event1, py_event2 + ] + + with os.fdopen(os.open(f"{TestFrequencyAdvice.OUTPUT_DIR}/trace_view.json", + os.O_WRONLY | os.O_CREAT, stat.S_IWUSR | stat.S_IRUSR), 'w') as fp: + fp.write(json.dumps(raw_data)) + + def test_run_should_run_success_when_msprof_not_contain_frequency_data(self): + self.create_info_json() + self.create_non_910B_trace_view() + interface = Interface(profiling_path=self.TMP_DIR) + dimension = "computation" + scope = SupportedScopes.FREQ_ANALYSIS + result = interface.get_result(dimension, scope, render_html=1, output_dict=False, profiling_path=self.TMP_DIR) + self.assertEqual(0, len(result.data.get("AI Core Frequency", []))) + result.clear() + + def test_run_should_run_success_when_trace_view_contain_frequency_data(self): + self.create_info_json() + self.create_910B_trace_view() + interface = Interface(profiling_path=self.TMP_DIR) + dimension = "computation" + scope = SupportedScopes.FREQ_ANALYSIS + result = interface.get_result(dimension, scope, render_html=1, output_dict=False, profiling_path=self.TMP_DIR) + self.assertEqual(2, len(result.data.get("AI Core Frequency", dict).get("data", []))) + result.clear() -- Gitee From 96632320df063cf3340d6bc60baec2082f4df3eb Mon Sep 17 00:00:00 2001 From: wuyuhan Date: Mon, 22 Jul 2024 15:57:02 +0800 Subject: [PATCH 071/106] =?UTF-8?q?dataloader,=20syncBatchNorm,=20synchron?= =?UTF-8?q?izeStream=20=E6=A3=80=E6=B5=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 2 +- debug/__init__.py | 0 debug/accuracy_tools/LICENSE | 201 ++++ debug/accuracy_tools/MANIFEST.in | 7 +- debug/accuracy_tools/grad_tool/README.md | 4 +- .../grad_tool/common/constant.py | 8 + .../grad_tool/grad_ms/global_context.py | 13 +- .../grad_tool/grad_ms/grad_stat_csv.py | 130 +++ .../accuracy_tools/grad_tool/grad_ms/hook.py | 84 +- .../accuracy_tools/grad_tool/grad_ms/utils.py | 42 + debug/accuracy_tools/msprobe/README.md | 33 + .../accuracy_tools/msprobe/config/config.json | 5 + .../msprobe/core/common/const.py | 15 +- .../msprobe/core/common/exceptions.py | 36 +- .../msprobe/core/common_config.py | 22 +- .../core/data_dump/data_processor/base.py | 4 +- .../core/data_dump/data_processor/factory.py | 6 +- .../data_processor/mindspore_processor.py | 131 +++ .../data_processor/pytorch_processor.py | 4 +- .../msprobe/mindspore/common/log.py | 38 + .../msprobe/mindspore/common/utils.py | 31 + .../mindspore/debugger/debugger_config.py | 8 +- .../mindspore/debugger/precision_debugger.py | 25 +- .../mindspore/dump/hook_cell/api_registry.py | 104 ++ .../mindspore/dump/hook_cell/hook_cell.py | 57 ++ .../dump/hook_cell/support_wrap_ops.yaml | 925 ++++++++++++++++++ .../dump/hook_cell/wrap_functional.py | 94 ++ .../mindspore/dump/hook_cell/wrap_tensor.py | 66 ++ .../msprobe/mindspore/ms_config.py | 11 +- .../msprobe/mindspore/service.py | 138 +++ .../api_accuracy_checker/common/config.py | 18 +- .../api_accuracy_checker/common/utils.py | 1 + .../pytorch/api_accuracy_checker/config.yaml | 1 + .../run_ut/run_overflow_check.py | 16 +- .../api_accuracy_checker/run_ut/run_ut.py | 53 +- .../msprobe/pytorch/common/utils.py | 1 - .../msprobe/pytorch/compare/acc_compare.py | 5 + .../pytorch/debugger/precision_debugger.py | 6 +- .../msprobe/pytorch/functional/dump_module.py | 6 +- .../pytorch/online_dispatch/compare.py | 2 +- .../msprobe/pytorch/pt_config.py | 43 +- .../accuracy_tools/msprobe/pytorch/service.py | 6 +- .../test/core_ut/test_common_config.py | 22 +- .../test/mindspore_ut/test_debugger_config.py | 2 +- .../mindspore_ut/test_precision_debugger.py | 2 +- .../common/test_config.py | 2 +- .../pytorch_ut/compare/test_acc_compare.py | 260 ++++- .../test/pytorch_ut/compare/test_match.py | 20 + .../msprobe/test/pytorch_ut/test_pt_config.py | 15 + debug/accuracy_tools/setup.py | 70 +- profiler/advisor/README.md | 6 +- profiler/advisor/analyzer/base_analyzer.py | 22 +- .../analyzer/cluster/slow_link_analyser.py | 18 +- .../analyzer/cluster/slow_rank_analyser.py | 6 +- .../computation/ai_core_freq/__init__.py | 0 .../ai_core_freq/ai_core_freq_analyzer.py | 42 + .../ai_core_freq/ai_core_freq_checker.py | 100 ++ .../computation/aicpu/aicpu_checker.py | 6 +- .../computation/bound/block_dim_checker.py | 1 - .../computation/profiling_analyzer.py | 9 +- .../dataloader/dataloader_analyzer.py | 30 + .../analyzer/dataloader/dataloader_checker.py | 84 ++ .../graph_fusion/graph_fusion_checker.py | 2 +- .../analyzer/overall/overall_analyzer.py | 45 - .../overall/overall_summary_analyzer.py | 240 +++-- .../analyzer/schedule/syncbn/__init__.py | 0 .../schedule/syncbn/syncbn_analyzer.py | 30 + .../schedule/syncbn/syncbn_checker.py | 70 ++ .../schedule/synchronize_stream/__init__.py | 0 .../synchronize_stream_analyzer.py | 32 + .../synchronize_stream_checker.py | 89 ++ .../schedule/timeline_base_checker.py | 91 ++ profiler/advisor/common/analyzer_scopes.py | 4 + profiler/advisor/common/constant.py | 9 +- profiler/advisor/common/graph/graph_parser.py | 9 +- profiler/advisor/common/timeline/event.py | 5 +- .../advisor/common/timeline/fusion_ops_db.py | 6 +- profiler/advisor/config/config.ini | 1 + profiler/advisor/config/config.py | 7 + .../advisor/dataset/ai_core_freq/__init__.py | 0 .../ai_core_freq/ai_core_freq_dataset.py | 148 +++ .../dataset/cluster/cluster_dataset.py | 14 +- .../advisor/dataset/profiling/device_info.py | 2 + .../dataset/profiling/profiling_dataset.py | 4 +- .../advisor/dataset/timeline_event_dataset.py | 165 +++- profiler/advisor/display/html/render.py | 5 +- .../html/templates/ai_core_frequency.html | 27 + .../html/templates/slow_dataloader.html | 18 + .../html/templates/sync_batchnorm.html | 30 + .../html/templates/synchronize_stream.html | 57 ++ profiler/advisor/interface/interface.py | 18 +- profiler/advisor/result/item.py | 2 +- profiler/advisor/result/result.py | 18 +- profiler/advisor/rules/dataloader.yaml | 9 + profiler/advisor/rules/sync_batchnorm.yaml | 41 + profiler/advisor/rules/synchronize.yaml | 8 + profiler/advisor/utils/utils.py | 52 +- profiler/cli/__init__.py | 2 +- profiler/cli/analyze_cli.py | 3 - profiler/cluster_analyse/README.md | 12 +- .../common_func/file_manager.py | 19 + profiler/compare_tools/README.md | 2 +- .../comparator/base_comparator.py | 2 +- .../comparator/overall_metrics_comparator.py | 50 + .../origin_data_bean/kernel_details_bean.py | 33 +- .../origin_data_bean/trace_event_bean.py | 50 +- .../compare_bean/overall_metrics_bean.py | 255 +++++ .../compare_bean/profiling_info.py | 184 +++- .../disaggregate/overall_perf_interface.py | 28 +- .../generator/detail_performance_generator.py | 15 +- .../profiling_parser/base_profiling_parser.py | 97 ++ .../profiling_parser/gpu_profiling_parser.py | 27 +- .../profiling_parser/npu_profiling_parser.py | 36 +- .../compare_backend/utils/constant.py | 8 + .../compare_backend/utils/excel_config.py | 79 +- .../view/work_sheet_creator.py | 29 +- profiler/test/run_ut.py | 2 + .../test_dataloader_checker.py | 65 ++ .../timeline_advice/test_syncbn_checker.py | 62 ++ .../test_synchronize_stream.py | 55 ++ .../compute_advice/test_frequency_advice.py | 145 +++ .../test_kernel_details_bean.py | 4 +- .../test_gpu_profiling_parser.py | 1 + 123 files changed, 5150 insertions(+), 492 deletions(-) create mode 100644 debug/__init__.py create mode 100644 debug/accuracy_tools/LICENSE create mode 100644 debug/accuracy_tools/grad_tool/grad_ms/grad_stat_csv.py create mode 100644 debug/accuracy_tools/grad_tool/grad_ms/utils.py create mode 100644 debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py create mode 100644 debug/accuracy_tools/msprobe/mindspore/common/log.py create mode 100644 debug/accuracy_tools/msprobe/mindspore/common/utils.py create mode 100644 debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py create mode 100644 debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/hook_cell.py create mode 100644 debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml create mode 100644 debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/wrap_functional.py create mode 100644 debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/wrap_tensor.py create mode 100644 debug/accuracy_tools/msprobe/mindspore/service.py create mode 100644 debug/accuracy_tools/msprobe/test/pytorch_ut/compare/test_match.py create mode 100644 profiler/advisor/analyzer/computation/ai_core_freq/__init__.py create mode 100644 profiler/advisor/analyzer/computation/ai_core_freq/ai_core_freq_analyzer.py create mode 100644 profiler/advisor/analyzer/computation/ai_core_freq/ai_core_freq_checker.py create mode 100644 profiler/advisor/analyzer/dataloader/dataloader_analyzer.py create mode 100644 profiler/advisor/analyzer/dataloader/dataloader_checker.py delete mode 100644 profiler/advisor/analyzer/overall/overall_analyzer.py create mode 100644 profiler/advisor/analyzer/schedule/syncbn/__init__.py create mode 100644 profiler/advisor/analyzer/schedule/syncbn/syncbn_analyzer.py create mode 100644 profiler/advisor/analyzer/schedule/syncbn/syncbn_checker.py create mode 100644 profiler/advisor/analyzer/schedule/synchronize_stream/__init__.py create mode 100644 profiler/advisor/analyzer/schedule/synchronize_stream/synchronize_stream_analyzer.py create mode 100644 profiler/advisor/analyzer/schedule/synchronize_stream/synchronize_stream_checker.py create mode 100644 profiler/advisor/analyzer/schedule/timeline_base_checker.py create mode 100644 profiler/advisor/dataset/ai_core_freq/__init__.py create mode 100644 profiler/advisor/dataset/ai_core_freq/ai_core_freq_dataset.py create mode 100644 profiler/advisor/display/html/templates/ai_core_frequency.html create mode 100644 profiler/advisor/display/html/templates/slow_dataloader.html create mode 100644 profiler/advisor/display/html/templates/sync_batchnorm.html create mode 100644 profiler/advisor/display/html/templates/synchronize_stream.html create mode 100644 profiler/advisor/rules/dataloader.yaml create mode 100644 profiler/advisor/rules/sync_batchnorm.yaml create mode 100644 profiler/advisor/rules/synchronize.yaml create mode 100644 profiler/compare_tools/compare_backend/comparator/overall_metrics_comparator.py create mode 100644 profiler/compare_tools/compare_backend/compare_bean/overall_metrics_bean.py create mode 100644 profiler/test/ut/advisor/advisor_backend/timeline_advice/test_dataloader_checker.py create mode 100644 profiler/test/ut/advisor/advisor_backend/timeline_advice/test_syncbn_checker.py create mode 100644 profiler/test/ut/advisor/advisor_backend/timeline_advice/test_synchronize_stream.py create mode 100644 profiler/test/ut/advisor/compute_advice/test_frequency_advice.py diff --git a/README.md b/README.md index 014a4d59f0..dd25d20158 100644 --- a/README.md +++ b/README.md @@ -50,7 +50,7 @@ MindStudio Training Tools,MindStudio训练工具链。针对训练&大模型 2. [cluster_analyse(集群分析工具)](https://gitee.com/ascend/mstt/tree/master/profiler/cluster_analyse) - 提供多机多卡的集群分析能力(基于通信域的通信分析和迭代耗时分析), 当前需要配合Ascend Insight的集群分析功能使用。 + 提供多机多卡的集群分析能力(基于通信域的通信分析和迭代耗时分析), 当前需要配合MindStudio Insight的集群分析功能使用。 3. [affinity_cpu_bind (亲和性cpu绑核工具) ](https://gitee.com/ascend/mstt/tree/master/profiler/affinity_cpu_bind) diff --git a/debug/__init__.py b/debug/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/debug/accuracy_tools/LICENSE b/debug/accuracy_tools/LICENSE new file mode 100644 index 0000000000..261eeb9e9f --- /dev/null +++ b/debug/accuracy_tools/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/debug/accuracy_tools/MANIFEST.in b/debug/accuracy_tools/MANIFEST.in index 547864a6c8..7997215ffd 100644 --- a/debug/accuracy_tools/MANIFEST.in +++ b/debug/accuracy_tools/MANIFEST.in @@ -1,2 +1,5 @@ -recursive-include msprobe/ * -recursive-exclude msprobe/test * \ No newline at end of file +include README.md +include LICENSE +recursive-include msprobe * +recursive-exclude msprobe/test * + diff --git a/debug/accuracy_tools/grad_tool/README.md b/debug/accuracy_tools/grad_tool/README.md index a3f683b411..a7929ca818 100644 --- a/debug/accuracy_tools/grad_tool/README.md +++ b/debug/accuracy_tools/grad_tool/README.md @@ -54,7 +54,7 @@ **不同级别的level的导出数据** -- PyTorch不同level数据 +- PyTorch/MindSpore动态图不同level数据 | 级别 | 特征数据表头 | 是否有方向数据 | | ---- | ------------------------------------------------------------ | -------------- | @@ -62,7 +62,7 @@ | L1 | ("param_name", "max", "min", "norm", "shape") | 是 | | L2 | ("param_name", *intervals, "=0", "max", "min", "norm", "shape") | 是 | -- MindSpore不同level数据 +- MindSpore静态图不同level数据 | 级别 | 特征数据表头 | 是否有方向数据 | | ---- | ------------------------------------------------------------ | -------------- | diff --git a/debug/accuracy_tools/grad_tool/common/constant.py b/debug/accuracy_tools/grad_tool/common/constant.py index 902f54f5e6..d569d47c16 100644 --- a/debug/accuracy_tools/grad_tool/common/constant.py +++ b/debug/accuracy_tools/grad_tool/common/constant.py @@ -46,3 +46,11 @@ class GradConst: STEP_FINISH = "step_finish" SUMMARY = "summary" + + # csv header entry + MD5 = "MD5" + DISTRIBUTION = "distribution" + SHAPE = "shape" + MAX = "max" + MIN = "min" + NORM = "norm" \ No newline at end of file diff --git a/debug/accuracy_tools/grad_tool/grad_ms/global_context.py b/debug/accuracy_tools/grad_tool/grad_ms/global_context.py index 02d1f74454..d44bea52c7 100644 --- a/debug/accuracy_tools/grad_tool/grad_ms/global_context.py +++ b/debug/accuracy_tools/grad_tool/grad_ms/global_context.py @@ -14,6 +14,8 @@ class GlobalContext: _setting = { GradConst.LEVEL: GradConst.LEVEL0, GradConst.PARAM_LIST: None, + GradConst.STEP: None, + GradConst.RANK: None, GradConst.CURRENT_STEP: 0, GradConst.BOUNDS: [-1., 0., 1.], GradConst.OUTPUT_PATH: "./grad_stat" @@ -33,6 +35,8 @@ class GlobalContext: print_warn_log("Invalid level set in config yaml file, use L0 instead.") self._set_input_list(config_dict, GradConst.PARAM_LIST, str) self._set_input_list(config_dict, GradConst.BOUNDS, float) + self._set_input_list(config_dict, GradConst.STEP, int) + self._set_input_list(config_dict, GradConst.RANK, int) output_path = config_dict.get(GradConst.OUTPUT_PATH) if output_path: try: @@ -55,6 +59,14 @@ class GlobalContext: def update_step(self): self._setting[GradConst.CURRENT_STEP] += 1 + def step_need_dump(self, step): + dump_step_list = self.get_context(GradConst.STEP) + return (not dump_step_list) or (step in dump_step_list) + + def rank_need_dump(self, rank): + dump_rank_list = self.get_context(GradConst.RANK) + return (not dump_rank_list) or (rank in dump_rank_list) + def _set_input_list(self, config_dict: Dict, name: str, dtype: Union[int, str, float]): value = config_dict.get(name) if dtype == int: @@ -72,5 +84,4 @@ class GlobalContext: else: print_warn_log(f"{name} is None or not a list with valid items, use default value.") - grad_context = GlobalContext() diff --git a/debug/accuracy_tools/grad_tool/grad_ms/grad_stat_csv.py b/debug/accuracy_tools/grad_tool/grad_ms/grad_stat_csv.py new file mode 100644 index 0000000000..11c2fc8205 --- /dev/null +++ b/debug/accuracy_tools/grad_tool/grad_ms/grad_stat_csv.py @@ -0,0 +1,130 @@ +from abc import ABC, abstractmethod +import hashlib + +import mindspore +from mindspore import ops, Tensor +from grad_tool.common.constant import GradConst + + +class CsvInput: + def __init__(self, param_name, grad, bounds): + self.param_name = param_name + self.grad = grad + self.bounds = bounds + +class GradStatCsv: + csv = {} + + @staticmethod + def get_csv_header(level, csv_input): + header = ["param_name"] + for key in level["header"]: + header.extend(GradStatCsv.csv[key].generate_csv_header(csv_input)) + return header + + @staticmethod + def get_csv_line(level, csv_input): + line = [csv_input.param_name] + for key in level["header"]: + line.extend(GradStatCsv.csv[key].generate_csv_content(csv_input)) + return line + + +def register_csv_item(key, cls=None): + if cls is None: + # 无参数时,返回装饰器函数 + return lambda cls: register_csv_item(key, cls) + GradStatCsv.csv[key] = cls + return cls + + +class CsvItem(ABC): + @staticmethod + @abstractmethod + def generate_csv_header(csv_input): + pass + + @staticmethod + @abstractmethod + def generate_csv_content(csv_input): + pass + + +@register_csv_item(GradConst.MD5) +class CsvMd5(CsvItem): + def generate_csv_header(csv_input): + return ["MD5"] + + def generate_csv_content(csv_input): + grad = csv_input.grad + tensor_bytes = grad.float().numpy().tobytes() + md5_hash = hashlib.md5(tensor_bytes) + return [md5_hash.hexdigest()] + + +@register_csv_item(GradConst.DISTRIBUTION) +class CsvDistribution(CsvItem): + def generate_csv_header(csv_input): + bounds = csv_input.bounds + intervals = [] + for i, _ in enumerate(bounds): + if i == 0: + intervals.append(f"(-inf, {bounds[i]}]") + else: + intervals.append(f"({bounds[i-1]}, {bounds[i]}]") + intervals.extend([f"({bounds[-1]}, inf)", "=0"]) + return intervals + + def generate_csv_content(csv_input): + grad = csv_input.grad + bounds = csv_input.bounds + if grad.dtype == mindspore.bfloat16: + grad = grad.to(mindspore.float32) + element_num = grad.numel() + grad_equal_0_num = (grad == 0).sum().item() + bucketsize_result = ops.bucketize(grad.float(), bounds) + bucketsize_result = bucketsize_result.astype(mindspore.int8) + interval_nums = [(bucketsize_result == i).sum().item() for i in range(len(bounds) + 1)] + interval_nums.append(grad_equal_0_num) + return_list = [x / element_num if element_num != 0 else 0 for x in interval_nums] + return return_list + + +@register_csv_item(GradConst.MAX) +class CsvMax(CsvItem): + def generate_csv_header(csv_input): + return ["max"] + + def generate_csv_content(csv_input): + grad = csv_input.grad + return [ops.amax(grad).float().numpy().tolist()] + + +@register_csv_item(GradConst.MIN) +class CsvMin(CsvItem): + def generate_csv_header(csv_input): + return ["min"] + + def generate_csv_content(csv_input): + grad = csv_input.grad + return [ops.amin(grad).float().numpy().tolist()] + + +@register_csv_item(GradConst.NORM) +class CsvNorm(CsvItem): + def generate_csv_header(csv_input): + return ["norm"] + + def generate_csv_content(csv_input): + grad = csv_input.grad + return [ops.norm(grad).float().numpy().tolist()] + + +@register_csv_item(GradConst.SHAPE) +class CsvShape(CsvItem): + def generate_csv_header(csv_input): + return ["shape"] + + def generate_csv_content(csv_input): + grad = csv_input.grad + return [list(grad.shape)] \ No newline at end of file diff --git a/debug/accuracy_tools/grad_tool/grad_ms/hook.py b/debug/accuracy_tools/grad_tool/grad_ms/hook.py index ceadfee614..f0d4798182 100644 --- a/debug/accuracy_tools/grad_tool/grad_ms/hook.py +++ b/debug/accuracy_tools/grad_tool/grad_ms/hook.py @@ -1,4 +1,4 @@ -from functools import wraps + import os import shutil @@ -10,38 +10,82 @@ from mindspore.common.parameter import Parameter from mindspore.common.initializer import initializer from grad_tool.common.constant import GradConst -from grad_tool.common.utils import print_warn_log +from grad_tool.common.utils import print_warn_log, write_csv from grad_tool.grad_ms.global_context import grad_context from grad_tool.grad_ms.grad_analyzer import grad_dump, get_rank_id from grad_tool.grad_ms.grad_analyzer import csv_generator +from grad_tool.grad_ms.grad_stat_csv import GradStatCsv, CsvInput +from grad_tool.grad_ms.utils import save_grad_direction, get_adapted_level +class HookInput: -def hook_optimizer(opt: Optimizer): - func = opt.construct - g_names = [param.name for param in opt._parameters] - param_list = grad_context.get_context(GradConst.PARAM_LIST) - rank_id = get_rank_id() - output_path = grad_context.get_context(GradConst.OUTPUT_PATH) - dump_dir = f"{output_path}/rank_{rank_id}/Dump/" - save_dir = f"{output_path}/rank_{rank_id}/" - step_finish_flag = f"{output_path}/rank_{rank_id}/Dump/{GradConst.STEP_FINISH}" - if os.path.exists(save_dir): - print_warn_log(f"Delete existing path {save_dir}.") - shutil.rmtree(save_dir) - level = grad_context.get_context(GradConst.LEVEL) - bounds = grad_context.get_context(GradConst.BOUNDS) + ''' + HookInput is a class wrapping all the variables used for hooking optimizer + ''' + + def __init__(self, opt) -> None: + self.func = opt.construct + self.g_names = [param.name for param in opt._parameters] + self.param_list = grad_context.get_context(GradConst.PARAM_LIST) + self.rank_id = get_rank_id() + output_path = grad_context.get_context(GradConst.OUTPUT_PATH) + self.dump_dir = os.path.join(output_path, f"rank_{self.rank_id}", "Dump") + self.save_dir = os.path.join(output_path, f"rank_{self.rank_id}") + self.step_finish_flag = os.path.join(self.dump_dir, GradConst.STEP_FINISH) + if os.path.exists(self.save_dir): + print_warn_log(f"Delete existing path {self.save_dir}.") + shutil.rmtree(self.save_dir) + self.level = grad_context.get_context(GradConst.LEVEL) + self.bounds = grad_context.get_context(GradConst.BOUNDS) + self.mode = mindspore.get_context("mode") +def hook_graph_mode_optimizer(opt, hook_input): @jit def new_construct(self, gradients): for index, grad_value in enumerate(gradients): - if param_list and g_names[index] not in param_list: + if hook_input.param_list and hook_input.g_names[index] not in hook_input.param_list: continue - grad_dump(dump_dir, g_names[index], self.dump_step, grad_value, level, bounds) - ms.ops.TensorDump()(step_finish_flag, self.dump_step) + grad_dump(hook_input.dump_dir, hook_input.g_names[index], self.dump_step, + grad_value, hook_input.level, hook_input.bounds) + ms.ops.TensorDump()(hook_input.step_finish_flag, self.dump_step) self.assignadd(self.dump_step, self.global_step_increase_tensor) - out = func(gradients) + out = hook_input.func(gradients) return out opt.dump_step = Parameter(initializer(0, [1], ms.int32), name="dump_step") opt.construct = new_construct.__get__(opt, type(opt)) csv_generator.start() + +def hook_pynative_optimizer(opt, hook_input): + level_adapted = get_adapted_level(hook_input.level) + + def hook_fn(cell, input): + gradients, = input + cur_step = grad_context.get_context(GradConst.CURRENT_STEP) + if grad_context.step_need_dump(cur_step) and grad_context.rank_need_dump(hook_input.rank_id): + output_lines = [] + for index, grad_value in enumerate(gradients): + param_name = hook_input.g_names[index] + if hook_input.param_list and param_name not in hook_input.param_list: + continue + csv_input = CsvInput(param_name, grad_value, hook_input.bounds) + grad_info = GradStatCsv.get_csv_line(level_adapted, csv_input) + output_lines.append(grad_info) + if level_adapted["have_grad_direction"]: + save_grad_direction(param_name, grad_value, os.path.join(hook_input.save_dir, f'step_{cur_step}')) + output_csv_path = os.path.join(hook_input.save_dir, f"grad_summary_{cur_step}.csv") + dummy_csv_input = CsvInput(None, None, hook_input.bounds) + write_csv(output_csv_path, output_lines, + GradStatCsv.get_csv_header(level_adapted, dummy_csv_input)) + grad_context.update_step() + + opt.register_forward_pre_hook(hook_fn) + + +def hook_optimizer(opt: Optimizer): + hook_input = HookInput(opt) + + if hook_input.mode == mindspore.GRAPH_MODE: + hook_graph_mode_optimizer(opt, hook_input) + else: + hook_pynative_optimizer(opt, hook_input) \ No newline at end of file diff --git a/debug/accuracy_tools/grad_tool/grad_ms/utils.py b/debug/accuracy_tools/grad_tool/grad_ms/utils.py new file mode 100644 index 0000000000..23703f2820 --- /dev/null +++ b/debug/accuracy_tools/grad_tool/grad_ms/utils.py @@ -0,0 +1,42 @@ +import os + +import numpy as np +import mindspore +from grad_tool.common.constant import GradConst +from grad_tool.common.utils import print_warn_log, create_directory, change_mode, check_file_or_directory_path + +level_adp = { + "L0": { + "header": [GradConst.MD5, GradConst.MAX, GradConst.MIN, GradConst.NORM, GradConst.SHAPE], + "have_grad_direction": False + }, + "L1": { + "header": [GradConst.MAX, GradConst.MIN, GradConst.NORM, GradConst.SHAPE], + "have_grad_direction": True + }, + "L2": { + "header": [GradConst.DISTRIBUTION, GradConst.MAX, GradConst.MIN, GradConst.NORM, GradConst.SHAPE], + "have_grad_direction": True + }, + } + +def save_grad_direction(param_name, grad, save_path): + if not os.path.exists(save_path): + create_directory(save_path) + save_filepath = os.path.join(save_path, f"{param_name}.npy") + check_file_or_directory_path(save_filepath) + + if grad.dtype == mindspore.bfloat16: + grad = grad.to(mindspore.float32) + grad_direction_tensor = grad > 0 + grad_direction_ndarray = grad_direction_tensor.numpy() + + np.save(save_filepath, grad_direction_ndarray) + change_mode(save_filepath, 0o640) + +def get_adapted_level(level: str): + if level == GradConst.LEVEL3: + print_warn_log(f"In mindpsore pynative mode, only 'L0', 'L1' and 'L2' are supported, use L0 instead") + level = GradConst.LEVEL0 + level_adapted = level_adp.get(level) + return level_adapted \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/README.md b/debug/accuracy_tools/msprobe/README.md index 84c531995a..1e8c1a1f08 100644 --- a/debug/accuracy_tools/msprobe/README.md +++ b/debug/accuracy_tools/msprobe/README.md @@ -6,6 +6,16 @@ MindStudio精度调试工具(MindStudio Probe),简称msprobe,是MindStud 精度工具合一软件包名称:`mindstudio_probe-{version}-py3-none-any.whl` +### pip安装 + ```shell + pip install mindstudio-probe + ``` + 说明 + 1. 使用`pip install mindstudio-probe==版本号`可安装指定版本的包 + 2. pip命令会自动安装包及其依赖 + 3. 安装成功后,日志会显示`Successfully installed mindstudio-probe-版本号` + +### 下载whl包安装 1. 使用pip命令安装numpy、openpyxl、pandas、PyYAML、rich、torch、tqdm依赖。 若环境中已安装部分依赖,不需要重复安装。 @@ -58,6 +68,29 @@ MindStudio精度调试工具(MindStudio Probe),简称msprobe,是MindStud Successfully installed mindstudio_probe-{version} ``` +### 从源码安装 +1. 克隆或者下载项目源代码 + + ```shell + git clone https://gitee.com/ascend/mstt.git + cd debug/accuracy_tools + ``` + +2. 安装setuptools和wheel + + ```shell + pip install setuptools wheel + ``` + +3. 安装msprobe + + ```shell + python setup.py install + ``` + 提示出现如下信息则表示源码安装成功。 + ```shell + Finished processing dependencies for mindstudio-probe=={version} + ``` ## 工具使用 diff --git a/debug/accuracy_tools/msprobe/config/config.json b/debug/accuracy_tools/msprobe/config/config.json index 70a630a40a..c6077b75ae 100644 --- a/debug/accuracy_tools/msprobe/config/config.json +++ b/debug/accuracy_tools/msprobe/config/config.json @@ -24,5 +24,10 @@ "overflow_check": { "overflow_nums": 1, "check_mode":"all" + }, + "run_ut": { + "white_list": [], + "black_list": [], + "error_data_path": "./" } } \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/core/common/const.py b/debug/accuracy_tools/msprobe/core/common/const.py index dea829c3ff..df82455a67 100644 --- a/debug/accuracy_tools/msprobe/core/common/const.py +++ b/debug/accuracy_tools/msprobe/core/common/const.py @@ -2,6 +2,7 @@ import os import stat import numpy as np + class Const: """ Class for const @@ -15,6 +16,10 @@ class Const: OFF = 'OFF' BACKWARD = 'backward' FORWARD = 'forward' + DEFAULT_LIST = [] + DEFAULT_PATH = './' + WHITE_LIST = 'white_list' + BLACK_LIST = 'black_list' # dump mode ALL = "all" @@ -25,6 +30,8 @@ class Const: API_LIST = "api_list" API_STACK = "api_stack" DUMP_MODE = [ALL, LIST, RANGE, STACK, ACL, API_LIST, API_STACK] + AUTO = "auto" + ONLINE_DUMP_MODE = [ALL, LIST, AUTO, OFF] SUMMARY = "summary" MD5 = "md5" SUMMARY_MODE = [ALL, SUMMARY, MD5] @@ -35,6 +42,7 @@ class Const: PKL_SUFFIX = ".pkl" NUMPY_SUFFIX = ".npy" + PT_SUFFIX = ".pt" ONE_GB = 1073741824 # 1 * 1024 * 1024 * 1024 TEN_GB = 10737418240 # 10 * 1024 * 1024 * 1024 FILE_PATTERN = r'^[a-zA-Z0-9_./-]+$' @@ -52,13 +60,15 @@ class Const: ENV_ENABLE = "1" ENV_DISABLE = "0" MAX_SEED_VALUE = 4294967295 # 2**32 - 1 - TASK_LIST = ["tensor", "statistics", "overflow_check", "free_benchmark"] + TASK_LIST = ["tensor", "statistics", "overflow_check", "free_benchmark", "run_ut"] LEVEL_LIST = ["L0", "L1", "L2", "mix"] STATISTICS = "statistics" TENSOR = "tensor" OVERFLOW_CHECK = "overflow_check" FREE_BENCHMARK = "free_benchmark" + RUN_UT = "run_ut" ATTR_NAME_PREFIX = "wrap_" + ATTR_NAME_PREFIX_LEN = len(ATTR_NAME_PREFIX) KERNEL_DUMP = "kernel_dump" DATA = "data" PT_FRAMEWORK = "pytorch" @@ -84,6 +94,7 @@ class Const: "int32_to_int64": ["cross_entropy"] } + class CompareConst: """ Class for compare module const @@ -196,6 +207,7 @@ class CompareConst: MAX_RELATIVE_OUT_YELLOW = 0.1 MAX_RELATIVE_IN_YELLOW = 0.01 + class FileCheckConst: """ Class for file check const @@ -232,6 +244,7 @@ class FileCheckConst: YAML_SUFFIX: MAX_YAML_SIZE } + class OverflowConst: """ Class for Overflow diff --git a/debug/accuracy_tools/msprobe/core/common/exceptions.py b/debug/accuracy_tools/msprobe/core/common/exceptions.py index df89699ce8..ea61f8cd58 100644 --- a/debug/accuracy_tools/msprobe/core/common/exceptions.py +++ b/debug/accuracy_tools/msprobe/core/common/exceptions.py @@ -8,13 +8,13 @@ class CodedException(Exception): return self.error_info -class MsaccException(CodedException): +class MsprobeException(CodedException): INVALID_PARAM_ERROR = 0 OVERFLOW_NUMS_ERROR = 1 err_strs = { - INVALID_PARAM_ERROR: "[msacc] 无效参数: ", - OVERFLOW_NUMS_ERROR: "[msacc] 超过预设溢出次数 当前溢出次数:" + INVALID_PARAM_ERROR: "[msprobe] 无效参数: ", + OVERFLOW_NUMS_ERROR: "[msprobe] 超过预设溢出次数 当前溢出次数:" } @@ -27,12 +27,12 @@ class FileCheckException(CodedException): FILE_TOO_LARGE_ERROR = 5 err_strs = { - SOFT_LINK_ERROR: "[msacc] 检测到软链接: ", - FILE_PERMISSION_ERROR: "[msacc] 文件权限错误: ", - INVALID_FILE_ERROR: "[msacc] 无效文件: ", - ILLEGAL_PATH_ERROR: "[msacc] 非法文件路径: ", - ILLEGAL_PARAM_ERROR: "[msacc] 非法打开方式: ", - FILE_TOO_LARGE_ERROR: "[msacc] 文件过大: " + SOFT_LINK_ERROR: "[msprobe] 检测到软链接: ", + FILE_PERMISSION_ERROR: "[msprobe] 文件权限错误: ", + INVALID_FILE_ERROR: "[msprobe] 无效文件: ", + ILLEGAL_PATH_ERROR: "[msprobe] 非法文件路径: ", + ILLEGAL_PARAM_ERROR: "[msprobe] 非法打开方式: ", + FILE_TOO_LARGE_ERROR: "[msprobe] 文件过大: " } @@ -40,8 +40,8 @@ class ParseJsonException(CodedException): UnexpectedNameStruct = 0 InvalidDumpJson = 1 err_strs = { - UnexpectedNameStruct: "[msacc] Unexpected name in json: ", - InvalidDumpJson: "[msacc] json格式不正确: ", + UnexpectedNameStruct: "[msprobe] Unexpected name in json: ", + InvalidDumpJson: "[msprobe] json格式不正确: ", } @@ -50,23 +50,23 @@ class ScopeException(CodedException): InvalidScope = 1 ArgConflict = 2 err_strs = { - InvalidApiStr: "[msacc] Invalid api_list: ", - InvalidScope: "[msacc] Invalid scope: ", - ArgConflict: "[msacc] Scope and api_list conflict: ", + InvalidApiStr: "[msprobe] Invalid api_list: ", + InvalidScope: "[msprobe] Invalid scope: ", + ArgConflict: "[msprobe] Scope and api_list conflict: ", } class RepairException(CodedException): InvalidRepairType = 0 err_strs = { - InvalidRepairType: "[msacc] Invalid repair_type: " + InvalidRepairType: "[msprobe] Invalid repair_type: " } class StepException(CodedException): InvalidPostProcess = 0 err_strs = { - InvalidPostProcess: "[msacc] 错误的step后处理配置: ", + InvalidPostProcess: "[msprobe] 错误的step后处理配置: ", } @@ -74,8 +74,8 @@ class FreeBenchmarkException(CodedException): UnsupportedType = 0 InvalidGrad = 1 err_strs = { - UnsupportedType: "[msacc] Free benchmark get unsupported type: ", - InvalidGrad: "[msacc] Free benchmark gradient invalid: ", + UnsupportedType: "[msprobe] Free benchmark get unsupported type: ", + InvalidGrad: "[msprobe] Free benchmark gradient invalid: ", } diff --git a/debug/accuracy_tools/msprobe/core/common_config.py b/debug/accuracy_tools/msprobe/core/common_config.py index b7d446ce8e..ed38eba008 100644 --- a/debug/accuracy_tools/msprobe/core/common_config.py +++ b/debug/accuracy_tools/msprobe/core/common_config.py @@ -1,6 +1,6 @@ from msprobe.core.common.const import Const from msprobe.core.common.log import logger -from msprobe.core.common.exceptions import MsaccException +from msprobe.core.common.exceptions import MsprobeException class CommonConfig: @@ -19,22 +19,22 @@ class CommonConfig: def _check_config(self): if self.task and self.task not in Const.TASK_LIST: logger.error_log_with_exp( - "task is invalid, it should be one of {}".format(Const.TASK_LIST), MsaccException(MsaccException.INVALID_PARAM_ERROR)) + "task is invalid, it should be one of {}".format(Const.TASK_LIST), MsprobeException(MsprobeException.INVALID_PARAM_ERROR)) if self.rank is not None and not isinstance(self.rank, list): - logger.error_log_with_exp("rank is invalid, it should be a list", MsaccException(MsaccException.INVALID_PARAM_ERROR)) + logger.error_log_with_exp("rank is invalid, it should be a list", MsprobeException(MsprobeException.INVALID_PARAM_ERROR)) if self.step is not None and not isinstance(self.step, list): - logger.error_log_with_exp("step is invalid, it should be a list", MsaccException(MsaccException.INVALID_PARAM_ERROR)) + logger.error_log_with_exp("step is invalid, it should be a list", MsprobeException(MsprobeException.INVALID_PARAM_ERROR)) if self.level and self.level not in Const.LEVEL_LIST: logger.error_log_with_exp( - "level is invalid, it should be one of {}".format(Const.LEVEL_LIST), MsaccException(MsaccException.INVALID_PARAM_ERROR)) + "level is invalid, it should be one of {}".format(Const.LEVEL_LIST), MsprobeException(MsprobeException.INVALID_PARAM_ERROR)) if self.seed is not None and not isinstance(self.seed, int): - logger.error_log_with_exp("seed is invalid, it should be an integer", MsaccException(MsaccException.INVALID_PARAM_ERROR)) + logger.error_log_with_exp("seed is invalid, it should be an integer", MsprobeException(MsprobeException.INVALID_PARAM_ERROR)) if not isinstance(self.is_deterministic, bool): logger.error_log_with_exp( - "is_deterministic is invalid, it should be a boolean", MsaccException(MsaccException.INVALID_PARAM_ERROR)) + "is_deterministic is invalid, it should be a boolean", MsprobeException(MsprobeException.INVALID_PARAM_ERROR)) if not isinstance(self.enable_dataloader, bool): logger.error_log_with_exp( - "enable_dataloader is invalid, it should be a boolean", MsaccException(MsaccException.INVALID_PARAM_ERROR)) + "enable_dataloader is invalid, it should be a boolean", MsprobeException(MsprobeException.INVALID_PARAM_ERROR)) class BaseConfig: @@ -50,9 +50,9 @@ class BaseConfig: def check_config(self): if self.scope is not None and not isinstance(self.scope, list): - logger.error_log_with_exp("scope is invalid, it should be a list", MsaccException(MsaccException.INVALID_PARAM_ERROR)) + logger.error_log_with_exp("scope is invalid, it should be a list", MsprobeException(MsprobeException.INVALID_PARAM_ERROR)) if self.list is not None and not isinstance(self.list, list): - logger.error_log_with_exp("list is invalid, it should be a list", MsaccException(MsaccException.INVALID_PARAM_ERROR)) + logger.error_log_with_exp("list is invalid, it should be a list", MsprobeException(MsprobeException.INVALID_PARAM_ERROR)) if self.data_mode is not None and not isinstance(self.data_mode, list): - logger.error_log_with_exp("data_mode is invalid, it should be a list", MsaccException(MsaccException.INVALID_PARAM_ERROR)) + logger.error_log_with_exp("data_mode is invalid, it should be a list", MsprobeException(MsprobeException.INVALID_PARAM_ERROR)) diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/base.py b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/base.py index 430d13634c..5d90129197 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/base.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/base.py @@ -238,8 +238,8 @@ class BaseDataProcessor: return api_info_struct def get_save_file_path(self, suffix): - file_format = "pt" if self.config.framework == Const.PT_FRAMEWORK else "npy" + file_format = Const.PT_SUFFIX if self.config.framework == Const.PT_FRAMEWORK else Const.NUMPY_SUFFIX dump_data_name = (self.current_api_or_module_name + Const.SEP + self.api_data_category + Const.SEP + - suffix + Const.SEP + file_format) + suffix + file_format) file_path = os.path.join(self.data_writer.dump_tensor_data_dir, dump_data_name) return dump_data_name, file_path \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/factory.py b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/factory.py index 2c536ba577..86ef2115fb 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/factory.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/factory.py @@ -51,11 +51,7 @@ class DataProcessorFactory: elif framework == Const.MS_FRAMEWORK: from .mindspore_processor import ( StatisticsDataProcessor as MindsporeStatisticsDataProcessor, - TensorDataProcessor as MindsporeTensorDataProcessor, - OverflowCheckDataProcessor as MindsporeOverflowCheckDataProcessor, - FreeBenchmarkDataProcessor as MindsporeFreeBenchmarkDataProcessor + TensorDataProcessor as MindsporeTensorDataProcessor ) cls.register_processor(Const.MS_FRAMEWORK, Const.STATISTICS, MindsporeStatisticsDataProcessor) cls.register_processor(Const.MS_FRAMEWORK, Const.TENSOR, MindsporeTensorDataProcessor) - cls.register_processor(Const.MS_FRAMEWORK, Const.OVERFLOW_CHECK, MindsporeOverflowCheckDataProcessor) - cls.register_processor(Const.MS_FRAMEWORK, Const.FREE_BENCHMARK, MindsporeFreeBenchmarkDataProcessor) \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py new file mode 100644 index 0000000000..7533e2ee0d --- /dev/null +++ b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py @@ -0,0 +1,131 @@ +# Copyright 2024 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import zlib +import mindspore as ms +from mindspore import ops +import numpy as np + +from msprobe.core.common.const import Const +from msprobe.core.data_dump.data_processor.base import BaseDataProcessor, TensorStatInfo +from msprobe.core.common.file_check import path_len_exceeds_limit, change_mode, FileCheckConst +from msprobe.mindspore.dump.hook_cell.wrap_functional import load_ops_functions +from msprobe.mindspore.common.utils import convert_bf16_to_fp32 +from msprobe.mindspore.common.log import logger +from msprobe.mindspore.dump.hook_cell.api_registry import api_register + + +class MindsporeDataProcessor(BaseDataProcessor): + mindspore_special_type = tuple([ms.Tensor]) + ops_func, mint_ops_func, _ = load_ops_functions() + + def __init__(self, config, data_writer): + super().__init__(config, data_writer) + self.mindspore_object_key = { + "dtype": self.analyze_dtype_in_kwargs + } + + @staticmethod + def get_md5_for_tensor(x): + x = convert_bf16_to_fp32(x) + tensor_bytes = x.asnumpy().tobytes() + crc32_hash = zlib.crc32(tensor_bytes) + return f"{crc32_hash:08x}" + + @staticmethod + def analyze_dtype_in_kwargs(element): + return {"type": "mindspore.dtype", "value": str(element)} + + @classmethod + def get_special_types(cls): + return super().get_special_types() + cls.mindspore_special_type + + def get_stat_info(self, data): + tensor_stat = TensorStatInfo() + if data.numel() == 0: + return tensor_stat + elif data.dtype == ms.bool_: + tensor_stat.max = self.mint_ops_func["max"](data).item() + tensor_stat.min = self.mint_ops_func["min"](data).item() + elif not data.shape: + tensor_stat.max = tensor_stat.min = tensor_stat.mean = tensor_stat.norm = data.item() + elif data.dtype == ms.complex64 or data.dtype == ms.complex128: + data_abs = np.abs(data.asnumpy()) + tensor_stat.max = np.max(data_abs) + tensor_stat.min = np.min(data_abs) + tensor_stat.mean = np.mean(data_abs) + tensor_stat.norm = np.linalg.norm(data_abs) + else: + if data.dtype == ms.bfloat16 or not ops.is_floating_point(data): + data = data.to(ms.float32) + api_register.norm_inner_op_set_ori_func() + tensor_stat.max = self.mint_ops_func["max"](data).item() + tensor_stat.min = self.mint_ops_func["min"](data).item() + tensor_stat.mean = self.mint_ops_func["mean"](data).item() + tensor_stat.norm = self.ops_func["norm"](data).item() + api_register.norm_inner_op_set_hook_func() + return tensor_stat + + def analyze_single_element(self, element, suffix_stack): + if suffix_stack and suffix_stack[-1] in self.mindspore_object_key: + return self.mindspore_object_key[suffix_stack[-1]](element) + + converted_numpy, numpy_type = self._convert_numpy_to_builtin(element) + if converted_numpy is not element: + return self._analyze_numpy(converted_numpy, numpy_type) + if isinstance(element, ms.Tensor): + return self._analyze_tensor(element, Const.SEP.join(suffix_stack)) + + if isinstance(element, (bool, int, float, str, slice)): + return self._analyze_builtin(element) + return None + + def analyze_element(self, element): + return self.recursive_apply_transform(element, self.analyze_single_element) + + def _analyze_tensor(self, tensor, suffix): + tensor_stat = self.get_stat_info(tensor) + tensor_json = { + 'type': 'mindspore.Tensor', + 'dtype': str(tensor.dtype), + 'shape': tensor.shape, + 'Max': tensor_stat.max, + 'Min': tensor_stat.min, + 'Mean': tensor_stat.mean, + 'Norm': tensor_stat.norm + } + if self.config.summary_mode == Const.MD5: + tensor_md5 = self.get_md5_for_tensor(tensor) + tensor_json.update({Const.MD5: tensor_md5}) + return tensor_json + + +class StatisticsDataProcessor(MindsporeDataProcessor): + pass + + +class TensorDataProcessor(MindsporeDataProcessor): + def _analyze_tensor(self, tensor, suffix): + dump_data_name, file_path = self.get_save_file_path(suffix) + single_arg = super()._analyze_tensor(tensor, suffix) + single_arg.update({"data_name": dump_data_name}) + if not path_len_exceeds_limit(file_path): + tensor = convert_bf16_to_fp32(tensor) + saved_tensor = tensor.asnumpy() + np.save(file_path, saved_tensor) + change_mode(file_path, FileCheckConst.DATA_FILE_AUTHORITY) + else: + logger.warning(f'The file path {file_path} length exceeds limit.') + return single_arg diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py index 95be091b21..f307909a41 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py @@ -5,7 +5,7 @@ from typing import List import numpy as np import torch -from msprobe.core.common.exceptions import MsaccException +from msprobe.core.common.exceptions import MsprobeException from msprobe.core.common.file_check import path_len_exceeds_limit, change_mode from msprobe.core.common.log import logger from msprobe.core.common.const import Const, OverflowConst, FileCheckConst @@ -191,7 +191,7 @@ class OverflowCheckDataProcessor(PytorchDataProcessor): if self.overflow_nums == -1: return if self.real_overflow_dump_times >= self.overflow_nums: - raise MsaccException(MsaccException.OVERFLOW_NUMS_ERROR, str(self.real_overflow_dump_times)) + raise MsprobeException(MsprobeException.OVERFLOW_NUMS_ERROR, str(self.real_overflow_dump_times)) def check_overflow_npu(self): if self.overflow_debug_mode_enalbe(): diff --git a/debug/accuracy_tools/msprobe/mindspore/common/log.py b/debug/accuracy_tools/msprobe/mindspore/common/log.py new file mode 100644 index 0000000000..ec027c7501 --- /dev/null +++ b/debug/accuracy_tools/msprobe/mindspore/common/log.py @@ -0,0 +1,38 @@ +# Copyright 2024 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import os +import time +import sys + +from msprobe.mindspore.common.utils import get_rank_if_initialized +from msprobe.core.common.log import BaseLogger +from msprobe.core.common.exceptions import DistributedNotInitializedError + + +class MindsporeLogger(BaseLogger): + def __init__(self): + super().__init__() + + def get_rank(self): + try: + current_rank = get_rank_if_initialized() + except DistributedNotInitializedError: + current_rank = None + + return current_rank + + +logger = MindsporeLogger() \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/mindspore/common/utils.py b/debug/accuracy_tools/msprobe/mindspore/common/utils.py new file mode 100644 index 0000000000..d02f381953 --- /dev/null +++ b/debug/accuracy_tools/msprobe/mindspore/common/utils.py @@ -0,0 +1,31 @@ +# Copyright 2024 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import mindspore as ms +from msprobe.core.common.exceptions import DistributedNotInitializedError + + +def get_rank_if_initialized(): + if ms.communication.GlobalComm.INITED: + return ms.communication.get_rank() + else: + raise DistributedNotInitializedError("mindspore distributed environment is not initialized") + + +def convert_bf16_to_fp32(tensor): + if tensor.dtype == ms.bfloat16: + tensor = tensor.to(ms.float32) + return tensor + diff --git a/debug/accuracy_tools/msprobe/mindspore/debugger/debugger_config.py b/debug/accuracy_tools/msprobe/mindspore/debugger/debugger_config.py index 56a4b9bf75..04d66d6a26 100644 --- a/debug/accuracy_tools/msprobe/mindspore/debugger/debugger_config.py +++ b/debug/accuracy_tools/msprobe/mindspore/debugger/debugger_config.py @@ -1,4 +1,5 @@ import os +from msprobe.core.common.utils import Const class DebuggerConfig: @@ -16,17 +17,20 @@ class DebuggerConfig: if not common_config.level: common_config.level = "L1" self.level = DebuggerConfig.convert_map[common_config.level] + self.level_ori = common_config.level self.list = [] if not task_config.list else task_config.list + self.scope =[] if not task_config.scope else task_config.scope self.data_mode = [] if not task_config.data_mode else task_config.data_mode self.file_format = task_config.file_format self.check_mode = task_config.check_mode - + self.framework = Const.MS_FRAMEWORK + self.summary_mode = task_config.summary_mode self.check() def check(self): if not self.dump_path: raise Exception("Dump path is empty.") - if not os.path.isabs(self.dump_path): + if self.level_ori != "L1" and not os.path.isabs(self.dump_path): raise Exception("Dump path must be absolute path.") if not self.task: self.task = "statistics" diff --git a/debug/accuracy_tools/msprobe/mindspore/debugger/precision_debugger.py b/debug/accuracy_tools/msprobe/mindspore/debugger/precision_debugger.py index 358d0d6f7d..30f7162ff5 100644 --- a/debug/accuracy_tools/msprobe/mindspore/debugger/precision_debugger.py +++ b/debug/accuracy_tools/msprobe/mindspore/debugger/precision_debugger.py @@ -1,4 +1,6 @@ import os +import mindspore as ms +from msprobe.mindspore.service import Service from msprobe.mindspore.ms_config import parse_json_config from msprobe.mindspore.debugger.debugger_config import DebuggerConfig from msprobe.mindspore.task_handler_factory import TaskHandlerFactory @@ -22,11 +24,28 @@ class PrecisionDebugger: common_config, task_config = parse_json_config(config_path) self.config = DebuggerConfig(common_config, task_config) self.initialized = True + self.service = Service(self.config) @classmethod - def start(cls, target=None): + def start(cls): instance = cls._instance if not instance: raise Exception("No instance of PrecisionDebugger found.") - handler = TaskHandlerFactory.create(instance.config) - handler.handle() + if ms.get_context("mode") == ms.PYNATIVE_MODE and instance.config.level_ori == "L1": + instance.service.start() + else: + handler = TaskHandlerFactory.create(instance.config) + handler.handle() + + @classmethod + def stop(cls): + instance = cls._instance + if not instance: + raise Exception("PrecisionDebugger instance is not created.") + instance.service.stop() + + @classmethod + def step(cls): + if not cls._instance: + raise Exception("PrecisionDebugger instance is not created.") + cls._instance.service.step() \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py b/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py new file mode 100644 index 0000000000..5508416fde --- /dev/null +++ b/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py @@ -0,0 +1,104 @@ +# Copyright 2024 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import mindspore as ms +from msprobe.mindspore.dump.hook_cell.wrap_functional import get_functional_ops, setup_hooks, \ + HOOKFunctionalOP, HOOKMintOP, HOOKMintNNFunctionalOP +from msprobe.mindspore.dump.hook_cell.wrap_tensor import get_tensor_ops, wrap_tensor_ops_and_bind, HOOKTensor +from msprobe.core.common.utils import Const + + +class ApiRegistry: + def __init__(self): + self.tensor_ori_attr = {} + self.functional_ori_attr = {} + self.mint_ops_ori_attr = {} + self.mint_func_ops_ori_attr = {} + self.norm_inner_ops_ori_attr = {} + + self.tensor_hook_attr = {} + self.functional_hook_attr = {} + self.mint_ops_hook_attr = {} + self.mint_func_ops_hook_attr = {} + self.norm_inner_ops_hook_attr = {} + + self.norm_inner_ops = ["norm", "square", "sqrt", "is_complex"] + + @staticmethod + def store_ori_attr(ori_api_group, api_list, api_ori_attr): + for api in api_list: + if Const.SEP in api: + sub_module_name, sub_op = api.rsplit(Const.SEP, 1) + sub_module = getattr(ori_api_group, sub_module_name) + api_ori_attr[api] = getattr(sub_module, sub_op) + else: + api_ori_attr[api] = getattr(ori_api_group, api) + + @staticmethod + def set_api_attr(api_group, attr_dict): + for api, api_attr in attr_dict.items(): + if Const.SEP in api: + sub_module_name, sub_op = api.rsplit(Const.SEP, 1) + sub_module = getattr(api_group, sub_module_name, None) + if sub_module is not None: + setattr(sub_module, sub_op, api_attr) + else: + setattr(api_group, api, api_attr) + + def norm_inner_op_set_hook_func(self): + self.set_api_attr(ms.ops, self.norm_inner_ops_hook_attr) + + def norm_inner_op_set_ori_func(self): + self.set_api_attr(ms.ops, self.norm_inner_ops_ori_attr) + + def api_set_hook_func(self): + self.set_api_attr(ms.Tensor, self.tensor_hook_attr) + self.set_api_attr(ms.ops, self.functional_hook_attr) + self.set_api_attr(ms.mint, self.mint_ops_hook_attr) + self.set_api_attr(ms.mint.nn.functional, self.mint_func_ops_hook_attr) + + def api_set_ori_func(self): + self.set_api_attr(ms.Tensor, self.tensor_ori_attr) + self.set_api_attr(ms.ops, self.functional_ori_attr) + self.set_api_attr(ms.mint, self.mint_ops_ori_attr) + self.set_api_attr(ms.mint.nn.functional, self.mint_func_ops_ori_attr) + + def initialize_hook(self, hook): + self.store_ori_attr(ms.Tensor, get_tensor_ops(), self.tensor_ori_attr) + wrap_tensor_ops_and_bind(hook) + for attr_name in dir(HOOKTensor): + if attr_name.startswith(Const.ATTR_NAME_PREFIX): + self.tensor_hook_attr[attr_name[Const.ATTR_NAME_PREFIX_LEN:]] = getattr(HOOKTensor, attr_name) + + functional_ops, mint_ops, mint_func_ops = get_functional_ops() + self.store_ori_attr(ms.ops, self.norm_inner_ops, self.norm_inner_ops_ori_attr) + self.store_ori_attr(ms.ops, functional_ops, self.functional_ori_attr) + self.store_ori_attr(ms.mint, mint_ops, self.mint_ops_ori_attr) + self.store_ori_attr(ms.mint.nn.functional, mint_func_ops, self.mint_func_ops_ori_attr) + setup_hooks(hook) + for attr_name in dir(HOOKFunctionalOP): + if attr_name.startswith(Const.ATTR_NAME_PREFIX): + self.functional_hook_attr[attr_name[Const.ATTR_NAME_PREFIX_LEN:]] = getattr(HOOKFunctionalOP, attr_name) + if attr_name[Const.ATTR_NAME_PREFIX_LEN:] in self.norm_inner_ops: + self.norm_inner_ops_hook_attr[attr_name[Const.ATTR_NAME_PREFIX_LEN:]] = getattr(HOOKFunctionalOP, attr_name) + for attr_name in dir(HOOKMintOP): + if attr_name.startswith(Const.ATTR_NAME_PREFIX): + self.mint_ops_hook_attr[attr_name[Const.ATTR_NAME_PREFIX_LEN:]] = getattr(HOOKMintOP, attr_name) + for attr_name in dir(HOOKMintNNFunctionalOP): + if attr_name.startswith(Const.ATTR_NAME_PREFIX): + self.mint_func_ops_hook_attr[attr_name[Const.ATTR_NAME_PREFIX_LEN:]] = getattr(HOOKMintNNFunctionalOP, attr_name) + + +api_register = ApiRegistry() diff --git a/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/hook_cell.py b/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/hook_cell.py new file mode 100644 index 0000000000..bcb80dd226 --- /dev/null +++ b/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/hook_cell.py @@ -0,0 +1,57 @@ +# Copyright 2024 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +from collections import defaultdict + +from mindspore import nn +from msprobe.core.common.const import Const + + +cell_count = defaultdict(int) +g_stop_hook = False + + +class HOOKCell(nn.Cell): + + def __init__(self, build_hook) -> None: + super(HOOKCell, self).__init__() + self.changed_status = False + self.input_kwargs = {} + self.prefix = "" + global g_stop_hook + if not g_stop_hook: + g_stop_hook = True + self.changed_status = True + if hasattr(self, "prefix_op_name_"): + self.prefix = self.prefix_op_name_ + + cell_count[self.prefix] += 1 + self.prefix = self.prefix + str(cell_count[self.prefix] - 1) + Const.SEP + forward_hook, backward_hook = build_hook(self.prefix) + self.register_forward_hook(forward_hook) + self.register_backward_hook(backward_hook) + + # 重载call,加全局标志。 + def __call__(self, *args, **kwargs): + try: + self.input_kwargs = kwargs + out = super(HOOKCell, self).__call__(*args, **kwargs) + except Exception as e: + raise e + finally: + if self.changed_status: + self.changed_status = False + global g_stop_hook + g_stop_hook = False + return out diff --git a/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml b/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml new file mode 100644 index 0000000000..089f444b61 --- /dev/null +++ b/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml @@ -0,0 +1,925 @@ +# Copyright 2024 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +# List of ops that register hooks + + +ops: + - adaptive_avg_pool1d + - adaptive_avg_pool2d + - adaptive_avg_pool3d + - adaptive_max_pool1d + - adaptive_max_pool2d + - avg_pool1d + - avg_pool2d + - avg_pool3d + - batch_norm + - bias_add + - ctc_greedy_decoder + - conv1d + - conv2d + - conv3d + - deformable_conv2d + - dense + - dropout + - dropout1d + - dropout2d + - dropout3d + - flatten + - fold + - fractional_max_pool3d + - lp_pool1d + - lp_pool2d + - lrn + - max_pool2d + - max_pool3d + - max_unpool1d + - max_unpool2d + - max_unpool3d + - unfold + - binary_cross_entropy + - binary_cross_entropy_with_logits + - cosine_embedding_loss + - cross_entropy + - ctc_loss + - gaussian_nll_loss + - hinge_embedding_loss + - huber_loss + - kl_div + - l1_loss + - margin_ranking_loss + - mse_loss + - multi_margin_loss + - multilabel_margin_loss + - multilabel_soft_margin_loss + - nll_loss + - smooth_l1_loss + - triplet_margin_loss + - elu + - fast_gelu + - gelu + - glu + - gumbel_softmax + - hardshrink + - hardsigmoid + - hardswish + - hardtanh + - leaky_relu + - log_softmax + - logsigmoid + - mish + - prelu + - relu + - relu6 + - celu + - rrelu + - selu + - sigmoid + - silu + - softmax + - softmin + - softshrink + - softsign + - tanh + - threshold + - cdist + - dist + - pdist + - choice_with_mask + - random_categorical + - log_uniform_candidate_sampler + - uniform_candidate_sampler + - affine_grid + - bounding_box_decode + - bounding_box_encode + - col2im + - check_valid + - crop_and_resize + - grid_sample + - interpolate + - iou + - pad + - padding + - pixel_shuffle + - pixel_unshuffle + - upsample + - abs + - absolute + - accumulate_n + - acos + - arccos + - acosh + - add + - addcdiv + - addcmul + - addmv + - addn + - angle + - arccosh + - arcsin + - arcsinh + - arctan + - arctanh + - arctan2 + - asin + - asinh + - atan + - atan2 + - atanh + - atleast_1d + - atleast_2d + - atleast_3d + - bessel_i0 + - bessel_i0e + - bessel_i1 + - bessel_i1e + - bessel_j0 + - bessel_j1 + - bessel_k0 + - bessel_k0e + - bessel_k1 + - bessel_k1e + - bessel_y0 + - bessel_y1 + - bitwise_and + - bitwise_left_shift + - bitwise_or + - bitwise_right_shift + - bitwise_xor + - ceil + - clamp + - clip + - combinations + - copysign + - cos + - cosh + - cosine_similarity + - cov + - diag_embed + - diff + - deg2rad + - digamma + - div + - divide + - erf + - erfc + - erfinv + - exp + - exp2 + - expm1 + - floor + - floor_div + - floor_mod + - float_power + - fmod + - frac + - gcd + - hypot + - igamma + - igammac + - imag + - i0 + - inv + - invert + - lcm + - ldexp + - lerp + - log + - log2 + - log10 + - log1p + - logaddexp + - logaddexp2 + - logical_and + - logical_not + - logical_or + - logical_xor + - logit + - mul + - multiply + - mvlgamma + - neg + - negative + - nextafter + - polar + - polygamma + - positive + - pow + - rad2deg + - ravel + - real + - reciprocal + - remainder + - rot90 + - round + - rsqrt + - sgn + - sign + - signbit + - sin + - sinc + - sinh + - sqrt + - square + - sub + - subtract + - t + - tan + - tanhshrink + - trapz + - tril_indices + - triu_indices + - true_divide + - trunc + - truncate_div + - truncate_mod + - xdivy + - xlogy + - zeta + - all + - amax + - amin + - aminmax + - any + - argmax + - argmin + - cummax + - cummin + - cumprod + - cumsum + - fmax + - histc + - logsumexp + - max + - mean + - median + - min + - norm + - prod + - std + - std_mean + - var + - var_mean + - argsort + - approximate_equal + - equal + - ge + - greater + - greater_equal + - gt + - intopk + - isclose + - isfinite + - isinf + - isnan + - isneginf + - isposinf + - isreal + - is_complex + - le + - less + - less_equal + - lt + - maximum + - minimum + - msort + - ne + - not_equal + - searchsorted + - topk + - bmm + - addbmm + - addmm + - baddbmm + - addr + - adjoint + - cholesky + - cholesky_solve + - batch_dot + - dot + - eig + - inner + - inverse + - geqrf + - ger + - kron + - lu_solve + - lu_unpack + - matmul + - matrix_solve + - matrix_band_part + - matrix_diag + - matrix_diag_part + - matrix_set_diag + - mm + - mv + - outer + - orgqr + - ormqr + - pinv + - svd + - tensor_dot + - logdet + - slogdet + - qr + - trace + - bartlett_window + - blackman_window + - hamming_window + - hann_window + - kaiser_window + - eye + - fill + - full + - full_like + - linspace + - logspace + - one_hot + - arange + - range + - heaviside + - bernoulli + - gamma + - laplace + - multinomial + - multinomial_with_replacement + - rand + - rand_like + - randint + - randint_like + - randn + - randn_like + - random_gamma + - random_poisson + - randperm + - standard_laplace + - standard_normal + - uniform + - argwhere + - batch_to_space_nd + - bincount + - block_diag + - broadcast_to + - cat + - channel_shuffle + - chunk + - column_stack + - concat + - conj + - count_nonzero + - deepcopy + - diag + - diagflat + - diagonal + - dyn_shape + - dsplit + - dstack + - einsum + - expand + - expand_dims + - flip + - fliplr + - flipud + - gather_d + - gather_elements + - gather_nd + - hsplit + - hstack + - index_add + - index_fill + - index_select + - inplace_add + - inplace_index_add + - inplace_sub + - inplace_update + - masked_fill + - masked_select + - meshgrid + - moveaxis + - movedim + - narrow + - nan_to_num + - nansum + - normal + - nonzero + - population_count + - rank + - repeat_elements + - repeat_interleave + - reshape + - reverse + - reverse_sequence + - roll + - scatter + - scatter_nd + - select + - sequence_mask + - shuffle + - size + - slice + - sort + - space_to_batch_nd + - sparse_segment_mean + - split + - squeeze + - stack + - strided_slice + - sum + - swapaxes + - swapdims + - tensor_scatter_add + - tensor_scatter_div + - tensor_scatter_max + - tensor_scatter_min + - tensor_scatter_mul + - tensor_scatter_sub + - tensor_scatter_elements + - tensor_split + - tile + - tril + - triu + - transpose + - unbind + - unique + - unique_consecutive + - unique_with_pad + - unsorted_segment_max + - unsorted_segment_min + - unsorted_segment_prod + - unsorted_segment_sum + - unsqueeze + - unstack + - view_as_real + - vsplit + - vstack + - where + - cross + - renorm + - is_tensor + - scalar_cast + - scalar_to_tensor + - tuple_to_array + - clip_by_global_norm + - clip_by_value + - assign + - assign_add + - assign_sub + - scatter_add + - scatter_div + - scatter_max + - scatter_min + - scatter_mul + - scatter_nd_add + - scatter_nd_div + - scatter_nd_max + - scatter_nd_min + - scatter_nd_mul + - scatter_nd_sub + - scatter_update + - derivative + - jet + +tensor: + - __abs__ + - __add__ + - __and__ + - __bool__ + - __eq__ + - __ge__ + - __gt__ + - __iadd__ + - __ifloordiv__ + - __imatmul__ + - __imod__ + - __imul__ + - __isub__ + - __le__ + - __lt__ + - __matmul__ + - __mod__ + - __mul__ + - __ne__ + - __neg__ + - __or__ + - __pow__ + - __radd__ + - __rmatmul__ + - __rmod__ + - __rmul__ + - __rpow__ + - __rsub__ + - __sub__ + - __truediv__ + - __xor__ + - abs + - absolute + - acos + - acosh + - add + - addbmm + - addcdiv + - addcmul + - addmm + - addmv + - addr + - all + - amax + - amin + - any + - arccos + - arccosh + - argmax + - angle + - arcsin + - arcsinh + - arctan + - arctanh + - argmin + - argsort + - asin + - asinh + - atan + - atan2 + - atanh + - baddbmm + - bernoulli + - bincount + - bitwise_and + - bitwise_or + - bitwise_xor + - bmm + - bool + - broadcast_to + - ceil + - cholesky_solve + - cholesky + - clamp + - clip + - conj + - copysign + - cos + - cosh + - cross + - cummax + - cummin + - cumprod + - cumsum + - deg2rad + - diag + - diagflat + - diff + - digamma + - div + - divide + - equal + - erf + - erfc + - erfinv + - exp + - expand_as + - expm1 + - flip + - fliplr + - flipud + - float_power + - floor + - fmod + - frac + - gather_elements + - ge + - geqrf + - ger + - greater + - greater_equal + - gt + - half + - hardshrink + - heaviside + - histc + - hypot + - i0 + - igamma + - igammac + - imag + - index_add + - index_fill + - index_put + - index_select + - inner + - int + - inverse + - isclose + - isfinite + - isinf + - isnan + - is_complex + - is_signed + - isneginf + - isposinf + - isreal + - lcm + - ldexp + - le + - lerp + - less + - less_equal + - log + - log10 + - log1p + - log2 + - logaddexp + - logaddexp2 + - logdet + - logical_and + - logical_not + - logical_or + - logical_xor + - logit + - logsumexp + - long + - lt + - masked_fill + - masked_scatter + - masked_select + - matmul + - max + - maximum + - mean + - median + - min + - minimum + - moveaxis + - movedim + - msort + - multinomial + - multiply + - mvlgamma + - nan_to_num + - nansum + - narrow + - ne + - neg + - negative + - nelement + - new_ones + - new_zeros + - nextafter + - norm + - nonzero + - not_equal + - ormqr + - permute + - pow + - prod + - qr + - ravel + - real + - reciprocal + - remainder + - renorm + - rad2deg + - tile + - repeat_interleave + - reshape + - reshape + - round + - rot90 + - rsqrt + - sum_to_size + - scatter + - sgn + - short + - sigmoid + - sign + - signbit + - sin + - sinc + - sinh + - slogdet + - sort + - split + - sqrt + - square + - squeeze + - std + - subtract + - subtract + - svd + - swapaxes + - swapdims + - t + - take + - tan + - tanh + - trace + - swapaxes + - tile + - to + - topk + - tril + - tensor_split + - transpose + - true_divide + - trunc + - unbind + - unique_consecutive + - unsqueeze + - var + - view + - where + - xlogy + - from_numpy + - std + - take + - var + - all + - any + - copy + - diagonal + - flatten + - resize + - sum + +mint.ops: + - abs + - absolute_import + - add + - add_ex + - all + - any + - any_ex + - arange + - argmax + - avg_pool2d + - baddbmm + - baddbmm_ex + - batch_norm + - binary_cross_entropy_with_logits + - bitwise_and + - bitwise_or + - bitwise_xor + - bmm + - broadcast_to + - cat + - cat_ex + - ceil + - chunk + - clamp + - conv2d + - conv_transpose2d + - cos + - cross + - cummax + - cummin + - cumsum + - div + - divide + - dropout + - embedding + - eq + - erf + - erfinv + - exp + - flatten + - flip + - flip_ex + - fold + - full + - functional + - gather + - gelu + - greater + - grid_sample + - group_norm + - gt + - index_select + - interpolate + - isclose + - isfinite + - layer_norm + - le + - leaky_relu + - less + - less_equal + - linear + - linspace + - log + - logical_and + - logical_not + - logical_or + - lt + - masked_select + - matmul + - max + - max_pool2d + - maximum + - mean + - mean_ex + - min + - minimum + - mul + - ne + - neg + - negative + - nn + - nonzero + - normal + - one_hot + - ones + - ones_ex + - ones_like + - pad + - permute + - permute_ex + - pow + - prod + - reciprocal + - relu + - remainder + - repeat_interleave + - rsqrt + - scatter + - scatter_add + - searchsorted + - sigmoid + - silu + - sin + - softmax + - softplus + - sort + - split + - sqrt + - sqrt_ex + - square + - stack + - sub + - sub_ex + - sum + - tanh + - tile + - topk + - tril + - triu + - unfold + - unique + - where + - xlogy + - zeros + - zeros_ex + - zeros_like + +mint.nn: + - Dropout + - Embedding + - Fold + - LayerNorm + - Linear + - MaxPool2d + - Unfold + - Upsample + +mint.nn.functional: + - absolute_import + - avg_pool2d + - batch_norm + - batch_norm_ex + - bce_with_logits + - binary_cross_entropy_with_logits + - conv_transpose2d + - dense + - dropout + - embedding + - fold + - gelu + - grid_sample + - group_norm + - interpolate + - layer_norm + - leaky_relu + - linear + - max_pool2d + - max_pool2d_ex + - normal + - one_hot + - one_hot_ext + - pad + - relu + - sigmoid + - silu + - softmax + - softmax_ex + - softplus + - tanh + - unfold diff --git a/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/wrap_functional.py b/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/wrap_functional.py new file mode 100644 index 0000000000..be3d1bd254 --- /dev/null +++ b/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/wrap_functional.py @@ -0,0 +1,94 @@ +# Copyright 2024 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import os +import yaml +import mindspore as ms +from msprobe.mindspore.dump.hook_cell.hook_cell import HOOKCell +from msprobe.core.common.utils import Const +from msprobe.core.common.file_check import FileOpen + + +cur_path = os.path.dirname(os.path.realpath(__file__)) +yaml_path = os.path.join(cur_path, "support_wrap_ops.yaml") + + +def load_ops_functions(): + ops_func = {f: getattr(ms.ops, f) for f in dir(ms.ops)} + mint_ops_func = {f: getattr(ms.mint, f) for f in dir(ms.mint)} + mint_func_ops_func = {f: getattr(ms.mint.nn.functional, f) for f in dir(ms.mint.nn.functional)} + return ops_func, mint_ops_func, mint_func_ops_func + + +def get_functional_ops(): + ops_func, mint_ops_func, mint_func_ops_func = load_ops_functions() + with FileOpen(yaml_path, 'r') as f: + config = yaml.safe_load(f) + WrapFunctionalOps = config.get("ops") + WrapMintOps = config.get("mint.ops") + WrapMintFunctionalOps = config.get("mint.nn.functional") + return ( + set(WrapFunctionalOps) & set(ops_func.keys()), + set(WrapMintOps) & set(mint_ops_func.keys()), + set(WrapMintFunctionalOps) & set(mint_func_ops_func.keys()) + ) + + +class HOOKFunctionalOP(object): + pass + + +class HOOKMintOP(object): + pass + + +class HOOKMintNNFunctionalOP(object): + pass + + +class FunctionalOPTemplate(HOOKCell): + def __init__(self, op_name, op_dict, prefix, hook): + self.op_name = op_name + self.op_func = op_dict[op_name] + self.prefix_op_name_ = prefix + str(op_name.split(Const.SEP)[-1]) + Const.SEP + super().__init__(hook) + + def construct(self, *args, **kwargs): + if self.op_name.startswith('dropout'): + return args[0] if args else kwargs.get('input') + return self.op_func(*args, **kwargs) + + +def wrap_functional_op(op_name, op_dict, prefix, hook): + def op_template(*args, **kwargs): + return FunctionalOPTemplate(op_name, op_dict, prefix, hook)(*args, **kwargs) + return op_template + + +def wrap_functional_ops_and_bind(ops, op_dict, prefix, hook, hook_class): + for op_name in ops: + if callable(op_dict[op_name]): + setattr(hook_class, Const.ATTR_NAME_PREFIX + op_name, wrap_functional_op(op_name, op_dict, prefix, hook)) + + +def setup_hooks(hook): + functional_ops, mint_ops, mint_func_ops = get_functional_ops() + wrap_functional_ops_and_bind( + functional_ops, {f: getattr(ms.ops, f) for f in dir(ms.ops)}, "Functional.", hook, HOOKFunctionalOP) + wrap_functional_ops_and_bind( + mint_ops, {f: getattr(ms.mint, f) for f in dir(ms.mint)}, "Mint.", hook, HOOKMintOP) + wrap_functional_ops_and_bind( + mint_func_ops, {f: getattr(ms.mint.nn.functional, f) for f in dir(ms.mint.nn.functional)}, "MintFunctional.", hook, HOOKMintNNFunctionalOP) + diff --git a/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/wrap_tensor.py b/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/wrap_tensor.py new file mode 100644 index 0000000000..ae6a9a979d --- /dev/null +++ b/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/wrap_tensor.py @@ -0,0 +1,66 @@ +# Copyright 2024 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import os +import yaml +import mindspore as ms + +from msprobe.mindspore.dump.hook_cell.hook_cell import HOOKCell +from msprobe.core.common.utils import Const +from msprobe.core.common.file_check import FileOpen + +cur_path = os.path.dirname(os.path.realpath(__file__)) +yaml_path = os.path.join(cur_path, "support_wrap_ops.yaml") +with FileOpen(yaml_path, 'r') as f: + WrapTensorOps = yaml.safe_load(f).get('tensor') + +TensorFunc = {} +for f in dir(ms.Tensor): + TensorFunc[f] = getattr(ms.Tensor, f) + + +def get_tensor_ops(): + global WrapTensorOps + _tensor_ops = dir(ms.Tensor) + return set(WrapTensorOps) & set(_tensor_ops) + + +class HOOKTensor(object): + pass + + +class TensorOPTemplate(HOOKCell): + + def __init__(self, op_name, hook): + self.op_name_ = op_name + self.prefix_op_name_ = "Tensor." + str(op_name) + Const.SEP + super().__init__(hook) + + def construct(self, *args, **kwargs): + return TensorFunc[str(self.op_name_)](*args, **kwargs) + + +def wrap_tensor_op(op_name, hook): + def tensor_op_template(*args, **kwargs): + return TensorOPTemplate(op_name, hook)(*args, **kwargs) + + return tensor_op_template + + +def wrap_tensor_ops_and_bind(hook): + _tensor_ops = get_tensor_ops() + for op_name in _tensor_ops: + if callable(TensorFunc[op_name]): + setattr(HOOKTensor, Const.ATTR_NAME_PREFIX + str(op_name), wrap_tensor_op(op_name, hook)) diff --git a/debug/accuracy_tools/msprobe/mindspore/ms_config.py b/debug/accuracy_tools/msprobe/mindspore/ms_config.py index 2b390ae9e4..49ce4cf2c0 100644 --- a/debug/accuracy_tools/msprobe/mindspore/ms_config.py +++ b/debug/accuracy_tools/msprobe/mindspore/ms_config.py @@ -1,6 +1,7 @@ import json from msprobe.core.common_config import CommonConfig, BaseConfig from msprobe.core.common.file_check import FileOpen +from msprobe.core.common.const import Const class TensorConfig(BaseConfig): @@ -31,6 +32,8 @@ class StatisticsConfig(BaseConfig): if self.data_mode is not None and len(self.data_mode) > 0: if len(self.data_mode) > 1 or self.data_mode[0] not in ["all", "input", "output"]: raise Exception("data_mode must be all, input or output") + if self.summary_mode and self.summary_mode not in ["statistics", "md5"]: + raise Exception("summary_mode is invalid") class OverflowCheck(BaseConfig): @@ -56,11 +59,11 @@ def parse_task_config(task, json_config): task_map = json_config[task] if not task_map: task_map = dict() - if task == "tensor": + if task == Const.TENSOR: return TensorConfig(task_map) - elif task == "statistics": + elif task == Const.STATISTICS: return StatisticsConfig(task_map) - elif task == "overflow_check": + elif task == Const.OVERFLOW_CHECK: return OverflowCheck(task_map) else: raise Exception("task is invalid.") @@ -73,6 +76,6 @@ def parse_json_config(json_file_path): json_config = json.load(file) common_config = parse_common_config(json_config) if not common_config.task: - common_config.task = "statistics" + common_config.task = Const.STATISTICS task_config = parse_task_config(common_config.task, json_config) return common_config, task_config diff --git a/debug/accuracy_tools/msprobe/mindspore/service.py b/debug/accuracy_tools/msprobe/mindspore/service.py new file mode 100644 index 0000000000..e8aa34dc4f --- /dev/null +++ b/debug/accuracy_tools/msprobe/mindspore/service.py @@ -0,0 +1,138 @@ +# Copyright 2024 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import os +from pathlib import Path +import functools + +from msprobe.core.data_dump.data_collector import build_data_collector +from msprobe.core.data_dump.scope import BaseScope +from msprobe.mindspore.common.utils import get_rank_if_initialized +from msprobe.core.common.file_check import FileChecker, FileCheckConst, check_path_before_create +from msprobe.mindspore.common.log import logger +from msprobe.core.common.utils import Const +from msprobe.core.common.exceptions import DistributedNotInitializedError +from msprobe.mindspore.dump.hook_cell.api_registry import api_register +from msprobe.core.data_dump.data_processor.base import ModuleForwardInputsOutputs, ModuleBackwardInputsOutputs + + +class Service: + def __init__(self, config): + self.model = None + self.config = config + self.config.level = self.config.level_ori + self.data_collector = build_data_collector(config) + self.switch = False + self.current_iter = 0 + self.first_start = True + self.current_rank = None + self.dump_iter_dir = None + + def build_hook(self, module_type, name): + def forward_hook(api_or_module_name, module, input, output): + self.data_collector.visit_and_clear_overflow_status(api_or_module_name) + if not self.switch: + return None + if self.data_collector: + module_input_output = ModuleForwardInputsOutputs(args=input, kwargs=module.input_kwargs, output=output) + self.data_collector.forward_data_collect(api_or_module_name, module, pid, module_input_output) + if self.data_collector.if_return_forward_new_output(): + return self.data_collector.get_forward_new_output() + return output + + def backward_hook(api_or_module_name, module, grad_input, grad_output): + self.data_collector.visit_and_clear_overflow_status(api_or_module_name) + if not self.switch: + return + if self.data_collector: + module_input_output = ModuleBackwardInputsOutputs(grad_input=grad_input, grad_output=grad_output) + self.data_collector.backward_data_collect(api_or_module_name, module, pid, module_input_output) + + pid = os.getpid() + forward_name_template = name + Const.FORWARD + backward_name_template = name + Const.BACKWARD + forward_hook = functools.partial(forward_hook, forward_name_template) + backward_hook = functools.partial(backward_hook, backward_name_template) + + def wrap_forward_hook(*args, **kwargs): + return forward_hook(*args, **kwargs) + + def wrap_backward_hook(*args, **kwargs): + return backward_hook(*args, **kwargs) + + return wrap_forward_hook, wrap_backward_hook + + def step(self): + self.current_iter += 1 + self.data_collector.update_iter(self.current_iter) + + def start(self, model=None): + self.model = model + if self.config.step and self.current_iter > max(self.config.step): + self.stop() + raise Exception("msprobe: exit after iteration {}".format(max(self.config.step))) + if self.config.step and self.current_iter not in self.config.step: + return + if self.first_start: + try: + self.current_rank = get_rank_if_initialized() + except DistributedNotInitializedError: + self.current_rank = None + + if self.config.rank and self.current_rank not in self.config.rank: + return + self.register_hook_new() + self.first_start = False + self.switch = True + logger.info_on_rank_0(f"Dump switch is turned on at step {self.current_iter}. ") + self.create_dirs() + logger.info_on_rank_0(f"Dump data will be saved in {self.dump_iter_dir}.") + + def stop(self): + if self.config.step and self.current_iter not in self.config.step: + return + if self.config.rank and self.current_rank not in self.config.rank: + return + self.switch = False + self.data_collector.write_json() + + def create_dirs(self): + check_path_before_create(self.config.dump_path) + if not os.path.exists(self.config.dump_path): + Path(self.config.dump_path).mkdir(mode=0o750, exist_ok=True) + file_check = FileChecker(self.config.dump_path, FileCheckConst.DIR) + file_check.common_check() + self.dump_iter_dir = os.path.join(self.config.dump_path, f"step{self.current_iter}") + cur_rank = self.current_rank if self.current_rank is not None else '' + dump_dir = os.path.join(self.dump_iter_dir, f"rank{cur_rank}") + if not os.path.exists(dump_dir): + Path(dump_dir).mkdir(mode=0o750, parents=True, exist_ok=True) + if self.config.task in self.data_collector.tasks_need_tensor_data: + dump_data_dir = os.path.join(dump_dir, "dump_tensor_data") + Path(dump_data_dir).mkdir(mode=0o750, exist_ok=True) + else: + dump_data_dir = None + + dump_file_path = os.path.join(dump_dir, "dump.json") + stack_file_path = os.path.join(dump_dir, "stack.json") + construct_file_path = os.path.join(dump_dir, "construct.json") + self.data_collector.update_dump_paths( + dump_file_path, stack_file_path, construct_file_path, dump_data_dir, None) + + def register_hook_new(self): + logger.info_on_rank_0("The {} hook function is successfully mounted to the model.".format(self.config.task)) + if self.config.level == "L1": + api_register.initialize_hook(functools.partial(self.build_hook, BaseScope.Module_Type_API)) + api_register.api_set_hook_func() diff --git a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/common/config.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/common/config.py index 8e8ceda947..760e7c862d 100644 --- a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/common/config.py +++ b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/common/config.py @@ -1,10 +1,8 @@ import os import yaml from msprobe.pytorch.api_accuracy_checker.common.utils import check_file_or_directory_path -from msprobe.pytorch.hook_module.utils import WrapFunctionalOps, WrapTensorOps, WrapTorchOps from msprobe.core.common.file_check import FileOpen - -WrapApi = set(WrapFunctionalOps) | set(WrapTensorOps) | set(WrapTorchOps) +from msprobe.pytorch.pt_config import RunUTConfig class Config: @@ -24,6 +22,7 @@ class Config: def validate(key, value): validators = { 'white_list': list, + 'black_list': list, 'error_data_path': str, 'precision': int } @@ -34,14 +33,11 @@ class Config: if key == 'precision' and value < 0: raise ValueError("precision must be greater than 0") if key == 'white_list': - if not isinstance(value, list): - raise ValueError("white_list must be a list type") - if not all(isinstance(i, str) for i in value): - raise ValueError("All elements in white_list must be of str type") - invalid_api = [i for i in value if i not in WrapApi] - if invalid_api: - raise ValueError( - f"{', '.join(invalid_api)} is not in support_wrap_ops.yaml, please check the white_list") + RunUTConfig.check_filter_list_config(key, value) + if key == 'black_list': + RunUTConfig.check_filter_list_config(key, value) + if key == 'error_data_path': + RunUTConfig.check_error_data_path_config(value) return value diff --git a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/common/utils.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/common/utils.py index d5d08818a9..b6e8932960 100644 --- a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/common/utils.py +++ b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/common/utils.py @@ -166,6 +166,7 @@ def initialize_save_path(save_path, dir_name): os.mkdir(data_path, mode=FileCheckConst.DATA_DIR_AUTHORITY) data_path_checker = FileChecker(data_path, FileCheckConst.DIR) data_path_checker.common_check() + return data_path def write_pt(file_path, tensor): diff --git a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/config.yaml b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/config.yaml index 7f26c72aa3..2dac535dc0 100644 --- a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/config.yaml +++ b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/config.yaml @@ -1,4 +1,5 @@ white_list: [] +black_list: [] error_data_path: './' precision: 14 \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py index e38b4e6b24..732745ee8c 100644 --- a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py +++ b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py @@ -14,6 +14,9 @@ from msprobe.pytorch.api_accuracy_checker.run_ut.run_ut import exec_api, generat from msprobe.pytorch.api_accuracy_checker.common.utils import get_json_contents from msprobe.core.common.file_check import check_link from msprobe.pytorch.common.log import logger +from msprobe.pytorch.common.parse_json import parse_json_info_forward_backward +from msprobe.core.common.const import Const + def check_tensor_overflow(x): if isinstance(x, torch.Tensor) and x.numel() != 0 and x.dtype != torch.bool: @@ -52,12 +55,12 @@ def check_data_overflow(x): def run_overflow_check(forward_file): logger.info("start UT test") - forward_content = get_json_contents(forward_file) + forward_content, _, real_data_path = parse_json_info_forward_backward(forward_file) for api_full_name, api_info_dict in tqdm(forward_content.items()): try: - run_torch_api(api_full_name, api_info_dict) + run_torch_api(api_full_name, api_info_dict, real_data_path) except Exception as err: - api_name = api_full_name.split("_", 1)[1].rsplit("_", 2)[0] + _, api_name, _ = api_full_name.split(Const.SEP) if "not implemented for 'Half'" in str(err): logger.warning(f"API {api_name} not support half tensor in CPU, please add {api_name} to CONVERT_API " f"'fp16_to_fp32' list in accuracy_tools/api_accuracy_check/common/utils.py file.") @@ -68,11 +71,10 @@ def run_overflow_check(forward_file): logger.error(f"Run {api_full_name} UT Error: %s" % str(err)) -def run_torch_api(api_full_name, api_info_dict): +def run_torch_api(api_full_name, api_info_dict, real_data_path): torch.npu.clear_npu_overflow_flag() - api_type = api_full_name.split(".")[0] - api_name = api_full_name.split(".", 1)[1].rsplit(".", 2)[0] - args, kwargs, need_grad = get_api_info(api_info_dict, api_name, real_data_path='') + api_type, api_name, _ = api_full_name.split(Const.SEP) + args, kwargs, need_grad = get_api_info(api_info_dict, api_name, real_data_path) if not need_grad: logger.warning("%s function with out=... arguments don't support automatic differentiation, skip backward." % api_full_name) diff --git a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py index 6295245a26..30994f7094 100644 --- a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +++ b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py @@ -32,6 +32,7 @@ from msprobe.pytorch.common.parse_json import parse_json_info_forward_backward from msprobe.core.common.file_check import FileOpen, FileChecker, \ change_mode, check_file_suffix, check_link, check_path_before_create, create_directory from msprobe.pytorch.common.log import logger +from msprobe.pytorch.pt_config import parse_json_config from msprobe.core.common.const import Const, FileCheckConst, CompareConst current_time = time.strftime("%Y%m%d%H%M%S") @@ -39,7 +40,8 @@ UT_ERROR_DATA_DIR = 'ut_error_data' + current_time RESULT_FILE_NAME = "accuracy_checking_result_" + current_time + ".csv" DETAILS_FILE_NAME = "accuracy_checking_details_" + current_time + ".csv" RunUTConfig = namedtuple('RunUTConfig', ['forward_content', 'backward_content', 'result_csv_path', 'details_csv_path', - 'save_error_data', 'is_continue_run_ut', 'real_data_path']) + 'save_error_data', 'is_continue_run_ut', 'real_data_path', 'white_list', + 'black_list', 'error_data_path']) not_backward_list = ['repeat_interleave'] not_detach_set = {'resize_', 'resize_as_', 'set_', 'transpose_', 't_', 'squeeze_', 'unsqueeze_'} not_raise_dtype_set = {'type_as'} @@ -176,8 +178,7 @@ def run_ut(config): logger.info(f"UT task result will be saved in {config.result_csv_path}") logger.info(f"UT task details will be saved in {config.details_csv_path}") if config.save_error_data: - error_data_path = os.path.abspath(os.path.join(msCheckerConfig.error_data_path, UT_ERROR_DATA_DIR)) - logger.info(f"UT task error_datas will be saved in {error_data_path}") + logger.info(f"UT task error_datas will be saved in {config.error_data_path}") compare = Comparator(config.result_csv_path, config.details_csv_path, config.is_continue_run_ut) with FileOpen(config.result_csv_path, 'r') as file: csv_reader = csv.reader(file) @@ -188,17 +189,17 @@ def run_ut(config): continue if is_unsupported_api(api_full_name): # TODO run_ut does not support to the npu fusion api and distributed api continue + [_, api_name, _] = api_full_name.split(Const.SEP) try: - if msCheckerConfig.white_list: - [_, api_name, _] = api_full_name.split(Const.SEP) - if api_name not in set(msCheckerConfig.white_list): - continue + if config.black_list and api_name in config.black_list: + continue + if config.white_list and api_name not in config.white_list: + continue data_info = run_torch_api(api_full_name, config.real_data_path, config.backward_content, api_info_dict) is_fwd_success, is_bwd_success = compare.compare_output(api_full_name, data_info) if config.save_error_data: - do_save_error_data(api_full_name, data_info, is_fwd_success, is_bwd_success) + do_save_error_data(api_full_name, data_info, config.error_data_path, is_fwd_success, is_bwd_success) except Exception as err: - [_, api_name, _] = api_full_name.split(Const.SEP) if "expected scalar type Long" in str(err): logger.warning(f"API {api_name} not support int32 tensor in CPU, please add {api_name} to CONVERT_API " f"'int32_to_int64' list in accuracy_tools/api_accuracy_check/common/utils.py file.") @@ -227,16 +228,16 @@ def is_unsupported_api(api_name): return flag -def do_save_error_data(api_full_name, data_info, is_fwd_success, is_bwd_success): +def do_save_error_data(api_full_name, data_info, error_data_path, is_fwd_success, is_bwd_success): if not is_fwd_success or not is_bwd_success: - processor = UtDataProcessor(os.path.join(msCheckerConfig.error_data_path, UT_ERROR_DATA_DIR)) + processor = UtDataProcessor(error_data_path) for element in data_info.in_fwd_data_list: processor.save_tensors_in_element(api_full_name + '.forward.input', element) - processor.save_tensors_in_element(api_full_name + '.forward.output.bench', data_info.bench_out) - processor.save_tensors_in_element(api_full_name + '.forward.output.device', data_info.device_out) + processor.save_tensors_in_element(api_full_name + '.forward.output.bench', data_info.bench_output) + processor.save_tensors_in_element(api_full_name + '.forward.output.device', data_info.device_output) processor.save_tensors_in_element(api_full_name + '.backward.input', data_info.grad_in) - processor.save_tensors_in_element(api_full_name + '.backward.output.bench', data_info.bench_grad_out) - processor.save_tensors_in_element(api_full_name + '.backward.output.device', data_info.device_grad_out) + processor.save_tensors_in_element(api_full_name + '.backward.output.bench', data_info.bench_grad) + processor.save_tensors_in_element(api_full_name + '.backward.output.device', data_info.device_grad) def run_torch_api(api_full_name, real_data_path, backward_content, api_info_dict): @@ -314,14 +315,14 @@ def run_backward(args, grad, grad_index, out): return grad_out -def initialize_save_error_data(): - error_data_path = msCheckerConfig.error_data_path +def initialize_save_error_data(error_data_path): check_path_before_create(error_data_path) create_directory(error_data_path) - error_data_path_checker = FileChecker(msCheckerConfig.error_data_path, FileCheckConst.DIR, + error_data_path_checker = FileChecker(error_data_path, FileCheckConst.DIR, ability=FileCheckConst.WRITE_ABLE) error_data_path = error_data_path_checker.common_check() - initialize_save_path(error_data_path, UT_ERROR_DATA_DIR) + error_data_path =initialize_save_path(error_data_path, UT_ERROR_DATA_DIR) + return error_data_path def get_validated_result_csv_path(result_csv_path, mode): @@ -384,6 +385,8 @@ def _run_ut_parser(parser): required=False) parser.add_argument("-f", "--filter_api", dest="filter_api", action="store_true", help=" Whether to filter the api in the api_info_file.", required=False) + parser.add_argument("-config", "--config_path", dest="config_path", default="", type=str, + help=" The path of config.json", required=False) def preprocess_forward_content(forward_content): @@ -464,14 +467,22 @@ def run_ut_command(args): if args.result_csv_path: result_csv_path = get_validated_result_csv_path(args.result_csv_path, 'result') details_csv_path = get_validated_details_csv_path(result_csv_path) + white_list = msCheckerConfig.white_list + black_list = msCheckerConfig.black_list + error_data_path = msCheckerConfig.error_data_path + if args.config_path: + _, task_config = parse_json_config(args.config_path, Const.RUN_UT) + white_list = task_config.white_list + black_list = task_config.black_list + error_data_path = task_config.error_data_path if save_error_data: if args.result_csv_path: time_info = result_csv_path.split('.')[0].split('_')[-1] global UT_ERROR_DATA_DIR UT_ERROR_DATA_DIR = 'ut_error_data' + time_info - initialize_save_error_data() + error_data_path = initialize_save_error_data(error_data_path) run_ut_config = RunUTConfig(forward_content, backward_content, result_csv_path, details_csv_path, save_error_data, - args.result_csv_path, real_data_path) + args.result_csv_path, real_data_path, set(white_list), set(black_list), error_data_path) run_ut(run_ut_config) diff --git a/debug/accuracy_tools/msprobe/pytorch/common/utils.py b/debug/accuracy_tools/msprobe/pytorch/common/utils.py index a3118e21c2..acc1de1051 100644 --- a/debug/accuracy_tools/msprobe/pytorch/common/utils.py +++ b/debug/accuracy_tools/msprobe/pytorch/common/utils.py @@ -29,7 +29,6 @@ except ImportError: else: is_gpu = False - torch_without_guard_version_list = ['2.1', '2.2'] for version in torch_without_guard_version_list: if torch.__version__.startswith(version): diff --git a/debug/accuracy_tools/msprobe/pytorch/compare/acc_compare.py b/debug/accuracy_tools/msprobe/pytorch/compare/acc_compare.py index a4b6884343..e214910566 100644 --- a/debug/accuracy_tools/msprobe/pytorch/compare/acc_compare.py +++ b/debug/accuracy_tools/msprobe/pytorch/compare/acc_compare.py @@ -37,6 +37,7 @@ from msprobe.core.common.utils import check_compare_param, add_time_with_xlsx, C format_value, check_file_not_exists, check_configuration_param, task_dumppath_get from msprobe.core.common.file_check import FileChecker, change_mode, FileOpen, create_directory from msprobe.core.common.const import Const, CompareConst, FileCheckConst +from msprobe.core.common.exceptions import FileCheckException def check_graph_mode(a_op_name, b_op_name): @@ -491,6 +492,10 @@ def compare_by_op(op_name, op_name_mapping_dict, input_parma): error_file = error.filename n_value, b_value = CompareConst.READ_NONE, CompareConst.READ_NONE error_flag = True + except FileCheckerException: + error_file = data_name + n_value, b_value = CompareConst.READ_NONE, CompareConst.READ_NONE + error_flag = True n_value, b_value, error_flag = get_error_type(n_value, b_value, error_flag) if not error_flag: diff --git a/debug/accuracy_tools/msprobe/pytorch/debugger/precision_debugger.py b/debug/accuracy_tools/msprobe/pytorch/debugger/precision_debugger.py index 1fce5a3035..e28e588c5f 100644 --- a/debug/accuracy_tools/msprobe/pytorch/debugger/precision_debugger.py +++ b/debug/accuracy_tools/msprobe/pytorch/debugger/precision_debugger.py @@ -4,7 +4,7 @@ from msprobe.pytorch.debugger.debugger_config import DebuggerConfig from msprobe.pytorch.service import Service from msprobe.pytorch.common.log import logger from msprobe.pytorch.pt_config import parse_json_config -from msprobe.core.common.exceptions import MsaccException +from msprobe.core.common.exceptions import MsprobeException class PrecisionDebugger: @@ -50,8 +50,8 @@ class PrecisionDebugger: def check_model_valid(model): if not model or isinstance(model, torch.nn.Module): return model - raise MsaccException( - MsaccException.INVALID_PARAM_ERROR, "model 参数必须是torch.nn.Module类型。" + raise MsprobeException( + MsprobeException.INVALID_PARAM_ERROR, "model 参数必须是torch.nn.Module类型。" ) @classmethod diff --git a/debug/accuracy_tools/msprobe/pytorch/functional/dump_module.py b/debug/accuracy_tools/msprobe/pytorch/functional/dump_module.py index 7e72aab8ae..efb95c3369 100644 --- a/debug/accuracy_tools/msprobe/pytorch/functional/dump_module.py +++ b/debug/accuracy_tools/msprobe/pytorch/functional/dump_module.py @@ -3,7 +3,7 @@ from msprobe.pytorch.common.log import logger from msprobe.core.common.const import Const from msprobe.pytorch.hook_module.api_registry import api_register from msprobe.pytorch.debugger.precision_debugger import PrecisionDebugger -from msprobe.core.common.exceptions import MsaccException +from msprobe.core.common.exceptions import MsprobeException from msprobe.core.data_dump.scope import BaseScope module_count = {} @@ -12,10 +12,10 @@ module_count = {} def module_dump(module, dump_name): if not isinstance(module, nn.Module): logger.error("The parameter:module in module_dump is not a Module subclass.") - raise MsaccException(MsaccException.INVALID_PARAM_ERROR) + raise MsprobeException(MsprobeException.INVALID_PARAM_ERROR) if not isinstance(dump_name, str): logger.error("The parameter:dump_name in module_dump is not a str type.") - raise MsaccException(MsaccException.INVALID_PARAM_ERROR) + raise MsprobeException(MsprobeException.INVALID_PARAM_ERROR) api_register.api_originality() if dump_name not in module_count: module_count[dump_name] = 0 diff --git a/debug/accuracy_tools/msprobe/pytorch/online_dispatch/compare.py b/debug/accuracy_tools/msprobe/pytorch/online_dispatch/compare.py index 19c18c124b..048ab3f901 100644 --- a/debug/accuracy_tools/msprobe/pytorch/online_dispatch/compare.py +++ b/debug/accuracy_tools/msprobe/pytorch/online_dispatch/compare.py @@ -228,7 +228,7 @@ class Comparator: else: is_bwd_success, bwd_compare_alg_results = True, None if is_bwd_success and bwd_compare_alg_results is None: - self.saver.record_results(ResultInfo(api_name, is_fwd_success, CompareConst.NA, fwd_compare_alg_results, + self.saver.record_results(ResultInfo(api_name, is_fwd_success, CompareConst.NAN, fwd_compare_alg_results, bwd_compare_alg_results)) else: self.saver.record_results(ResultInfo(api_name, is_fwd_success, is_bwd_success, fwd_compare_alg_results, diff --git a/debug/accuracy_tools/msprobe/pytorch/pt_config.py b/debug/accuracy_tools/msprobe/pytorch/pt_config.py index ff09bfd8e9..a3d765f3a4 100644 --- a/debug/accuracy_tools/msprobe/pytorch/pt_config.py +++ b/debug/accuracy_tools/msprobe/pytorch/pt_config.py @@ -4,6 +4,7 @@ import os from msprobe.core.common_config import CommonConfig, BaseConfig from msprobe.core.common.file_check import FileOpen from msprobe.core.common.const import Const +from msprobe.pytorch.hook_module.utils import WrapFunctionalOps, WrapTensorOps, WrapTorchOps class TensorConfig(BaseConfig): @@ -61,20 +62,54 @@ class FreeBenchmarkCheckConfig(BaseConfig): if self.preheat_step and self.preheat_step == 0: raise Exception("preheat_step cannot be 0") + +class RunUTConfig(BaseConfig): + WrapApi = set(WrapFunctionalOps) | set(WrapTensorOps) | set(WrapTorchOps) + def __init__(self, json_config): + super().__init__(json_config) + self.white_list = json_config.get("white_list", Const.DEFAULT_LIST) + self.black_list = json_config.get("black_list", Const.DEFAULT_LIST) + self.error_data_path = json_config.get("error_data_path", Const.DEFAULT_PATH) + self.check_run_ut_config() + + @classmethod + def check_filter_list_config(cls, key, filter_list): + if not isinstance(filter_list, list): + raise Exception("%s must be a list type" % key) + if not all(isinstance(item, str) for item in filter_list): + raise Exception("All elements in %s must be string type" % key) + invalid_api = [item for item in filter_list if item not in cls.WrapApi] + if invalid_api: + raise Exception("Invalid api in %s: %s" % (key, invalid_api)) + + @classmethod + def check_error_data_path_config(cls, error_data_path): + if not os.path.exists(error_data_path): + raise Exception("error_data_path: %s does not exist" % error_data_path) + + def check_run_ut_config(self): + RunUTConfig.check_filter_list_config(Const.WHITE_LIST, self.white_list) + RunUTConfig.check_filter_list_config(Const.BLACK_LIST, self.black_list) + RunUTConfig.check_error_data_path_config(self.error_data_path) + + def parse_task_config(task, json_config): default_dic = {} if task == Const.TENSOR: - config_dic = json_config.get(Const.TENSOR) if json_config.get(Const.TENSOR) else default_dic + config_dic = json_config.get(Const.TENSOR, default_dic) return TensorConfig(config_dic) elif task == Const.STATISTICS: - config_dic = json_config.get(Const.STATISTICS) if json_config.get(Const.STATISTICS) else default_dic + config_dic = json_config.get(Const.STATISTICS, default_dic) return StatisticsConfig(config_dic) elif task == Const.OVERFLOW_CHECK: - config_dic = json_config.get(Const.OVERFLOW_CHECK) if json_config.get(Const.OVERFLOW_CHECK) else default_dic + config_dic = json_config.get(Const.OVERFLOW_CHECK, default_dic) return OverflowCheckConfig(config_dic) elif task == Const.FREE_BENCHMARK: - config_dic = json_config.get(Const.FREE_BENCHMARK) if json_config.get(Const.FREE_BENCHMARK) else default_dic + config_dic = json_config.get(Const.FREE_BENCHMARK, default_dic) return FreeBenchmarkCheckConfig(config_dic) + elif task == Const.RUN_UT: + config_dic = json_config.get(Const.RUN_UT, default_dic) + return RunUTConfig(config_dic) else: return StatisticsConfig(default_dic) diff --git a/debug/accuracy_tools/msprobe/pytorch/service.py b/debug/accuracy_tools/msprobe/pytorch/service.py index e5da444840..0ab7d0c58a 100644 --- a/debug/accuracy_tools/msprobe/pytorch/service.py +++ b/debug/accuracy_tools/msprobe/pytorch/service.py @@ -5,7 +5,7 @@ from pathlib import Path from msprobe.pytorch.common.log import logger from msprobe.core.common.file_check import FileChecker, check_path_before_create from msprobe.core.common.const import Const, FileCheckConst -from msprobe.core.common.exceptions import DistributedNotInitializedError, MsaccException +from msprobe.core.common.exceptions import DistributedNotInitializedError, MsprobeException from msprobe.core.data_dump.data_collector import build_data_collector from msprobe.core.data_dump.scope import BaseScope from msprobe.core.data_dump.data_processor.base import ModuleForwardInputsOutputs, ModuleBackwardInputsOutputs @@ -138,7 +138,7 @@ class Service: logger.info_on_rank_0("The {} hook function is successfully mounted to the model.".format(self.config.task)) if self.config.level in ["L0", "mix"]: if self.model is None: - logger.error_log_with_exp("The model is None.", MsaccException.INVALID_PARAM_ERROR) + logger.error_log_with_exp("The model is None.", MsprobeException.INVALID_PARAM_ERROR) logger.info_on_rank_0("The init dump mode is enabled, and the module dump function will not be available") for name, module in self.model.named_modules(): if module == self.model: @@ -164,4 +164,4 @@ class Service: api_register.api_modularity() if Const.STATISTICS == self.config.task or Const.TENSOR == self.config.task: - remove_dropout() + remove_dropout() \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/test/core_ut/test_common_config.py b/debug/accuracy_tools/msprobe/test/core_ut/test_common_config.py index 15957af217..06c7378ed3 100644 --- a/debug/accuracy_tools/msprobe/test/core_ut/test_common_config.py +++ b/debug/accuracy_tools/msprobe/test/core_ut/test_common_config.py @@ -19,7 +19,7 @@ from unittest.mock import patch from msprobe.core.common.log import logger from msprobe.core.common.const import Const -from msprobe.core.common.exceptions import MsaccException +from msprobe.core.common.exceptions import MsprobeException from msprobe.core.common_config import CommonConfig, BaseConfig @@ -44,7 +44,7 @@ class TestCommonConfig(TestCase): self.assertEqual(mock_error_log_with_exp.call_args[0][0], "task is invalid, it should be one of {}".format(Const.TASK_LIST)) self.assertEqual(str(mock_error_log_with_exp.call_args[0][1]), - MsaccException.err_strs.get(MsaccException.INVALID_PARAM_ERROR)) + MsprobeException.err_strs.get(MsprobeException.INVALID_PARAM_ERROR)) json_config.update({"task": Const.TENSOR}) json_config.update({"rank": 0}) @@ -52,7 +52,7 @@ class TestCommonConfig(TestCase): self.assertEqual(mock_error_log_with_exp.call_args[0][0], "rank is invalid, it should be a list") self.assertEqual(str(mock_error_log_with_exp.call_args[0][1]), - MsaccException.err_strs.get(MsaccException.INVALID_PARAM_ERROR)) + MsprobeException.err_strs.get(MsprobeException.INVALID_PARAM_ERROR)) json_config.update({"task": Const.TENSOR}) json_config.update({"rank": [0]}) @@ -61,7 +61,7 @@ class TestCommonConfig(TestCase): self.assertEqual(mock_error_log_with_exp.call_args[0][0], "step is invalid, it should be a list") self.assertEqual(str(mock_error_log_with_exp.call_args[0][1]), - MsaccException.err_strs.get(MsaccException.INVALID_PARAM_ERROR)) + MsprobeException.err_strs.get(MsprobeException.INVALID_PARAM_ERROR)) json_config.update({"task": Const.TENSOR}) json_config.update({"rank": [0]}) @@ -71,7 +71,7 @@ class TestCommonConfig(TestCase): self.assertEqual(mock_error_log_with_exp.call_args[0][0], "level is invalid, it should be one of {}".format(Const.LEVEL_LIST)) self.assertEqual(str(mock_error_log_with_exp.call_args[0][1]), - MsaccException.err_strs.get(MsaccException.INVALID_PARAM_ERROR)) + MsprobeException.err_strs.get(MsprobeException.INVALID_PARAM_ERROR)) json_config.update({"task": Const.TENSOR}) json_config.update({"rank": [0]}) @@ -82,7 +82,7 @@ class TestCommonConfig(TestCase): self.assertEqual(mock_error_log_with_exp.call_args[0][0], "seed is invalid, it should be an integer") self.assertEqual(str(mock_error_log_with_exp.call_args[0][1]), - MsaccException.err_strs.get(MsaccException.INVALID_PARAM_ERROR)) + MsprobeException.err_strs.get(MsprobeException.INVALID_PARAM_ERROR)) json_config.update({"task": Const.TENSOR}) json_config.update({"rank": [0]}) @@ -94,7 +94,7 @@ class TestCommonConfig(TestCase): self.assertEqual(mock_error_log_with_exp.call_args[0][0], "is_deterministic is invalid, it should be a boolean") self.assertEqual(str(mock_error_log_with_exp.call_args[0][1]), - MsaccException.err_strs.get(MsaccException.INVALID_PARAM_ERROR)) + MsprobeException.err_strs.get(MsprobeException.INVALID_PARAM_ERROR)) json_config.update({"task": Const.TENSOR}) json_config.update({"rank": [0]}) @@ -107,7 +107,7 @@ class TestCommonConfig(TestCase): self.assertEqual(mock_error_log_with_exp.call_args[0][0], "enable_dataloader is invalid, it should be a boolean") self.assertEqual(str(mock_error_log_with_exp.call_args[0][1]), - MsaccException.err_strs.get(MsaccException.INVALID_PARAM_ERROR)) + MsprobeException.err_strs.get(MsprobeException.INVALID_PARAM_ERROR)) @patch.object(logger, "error_log_with_exp") def test_base_config(self, mock_error_log_with_exp): @@ -130,7 +130,7 @@ class TestCommonConfig(TestCase): self.assertEqual(mock_error_log_with_exp.call_args[0][0], "scope is invalid, it should be a list") self.assertEqual(str(mock_error_log_with_exp.call_args[0][1]), - MsaccException.err_strs.get(MsaccException.INVALID_PARAM_ERROR)) + MsprobeException.err_strs.get(MsprobeException.INVALID_PARAM_ERROR)) json_config.update({"scope": ["Tensor_Add"]}) json_config.update({"list": "Tensor_Add"}) @@ -139,7 +139,7 @@ class TestCommonConfig(TestCase): self.assertEqual(mock_error_log_with_exp.call_args[0][0], "list is invalid, it should be a list") self.assertEqual(str(mock_error_log_with_exp.call_args[0][1]), - MsaccException.err_strs.get(MsaccException.INVALID_PARAM_ERROR)) + MsprobeException.err_strs.get(MsprobeException.INVALID_PARAM_ERROR)) json_config.update({"scope": ["Tensor_Add"]}) json_config.update({"list": ["Tensor_Add"]}) @@ -149,4 +149,4 @@ class TestCommonConfig(TestCase): self.assertEqual(mock_error_log_with_exp.call_args[0][0], "data_mode is invalid, it should be a list") self.assertEqual(str(mock_error_log_with_exp.call_args[0][1]), - MsaccException.err_strs.get(MsaccException.INVALID_PARAM_ERROR)) + MsprobeException.err_strs.get(MsprobeException.INVALID_PARAM_ERROR)) diff --git a/debug/accuracy_tools/msprobe/test/mindspore_ut/test_debugger_config.py b/debug/accuracy_tools/msprobe/test/mindspore_ut/test_debugger_config.py index 54bc1393aa..5187d3951c 100644 --- a/debug/accuracy_tools/msprobe/test/mindspore_ut/test_debugger_config.py +++ b/debug/accuracy_tools/msprobe/test/mindspore_ut/test_debugger_config.py @@ -27,7 +27,7 @@ class TestDebuggerConfig(TestCase): "dump_path": "/absolute_path", "rank": [], "step": [], - "level": "L1" + "level": "L0" } common_config = CommonConfig(json_config) task_config = BaseConfig(json_config) diff --git a/debug/accuracy_tools/msprobe/test/mindspore_ut/test_precision_debugger.py b/debug/accuracy_tools/msprobe/test/mindspore_ut/test_precision_debugger.py index b33167dc7b..425ed3040d 100644 --- a/debug/accuracy_tools/msprobe/test/mindspore_ut/test_precision_debugger.py +++ b/debug/accuracy_tools/msprobe/test/mindspore_ut/test_precision_debugger.py @@ -35,7 +35,7 @@ class TestPrecisionDebugger(TestCase): "dump_path": "/absolute_path", "rank": [], "step": [], - "level": "L1" + "level": "L0" } common_config = CommonConfig(json_config) diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/common/test_config.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/common/test_config.py index ec606d9aa1..35fc616476 100644 --- a/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/common/test_config.py +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/common/test_config.py @@ -35,5 +35,5 @@ class TestConfig(unittest.TestCase): validate_white_list = ['conv1d', 'max_pool1d', 'dropout', '__add__'] self.assertEqual(self.cfg.validate('white_list', validate_white_list), validate_white_list) - with self.assertRaises(ValueError): + with self.assertRaises(Exception): self.cfg.validate('white_list', ['invalid_api1', 'invalid_api2']) diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/compare/test_acc_compare.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/compare/test_acc_compare.py index fb442941b0..288e259c0a 100644 --- a/debug/accuracy_tools/msprobe/test/pytorch_ut/compare/test_acc_compare.py +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/compare/test_acc_compare.py @@ -1,17 +1,267 @@ # coding=utf-8 import unittest -from msprobe.pytorch.compare.acc_compare import rename_api +import pandas as pd +from msprobe.pytorch.compare import acc_compare as compare + +npu_dict = {'op_name': ['Functional_conv2d_0_forward_input.0', 'Functional_conv2d_0_forward_input.1', + 'Functional_conv2d_0_forward_input.2', 'Functional_conv2d_0_forward_output'], + 'input_struct': [('torch.float32', [1, 1, 28, 28]), ('torch.float32', [16, 1, 5, 5]), + ('torch.float32', [16])], + 'output_struct': [('torch.float32', [1, 16, 28, 28])], + 'summary': [[3.029174327850342, -2.926689624786377, -0.06619918346405029], + [0.19919930398464203, -0.19974489510059357, 0.006269412115216255], + [0.19734230637550354, -0.18177609145641327, 0.007903944700956345], + [2.1166646480560303, -2.190781354904175, -0.003579073818400502]], 'stack_info': []} + +bench_dict = {'op_name': ['Functional_conv2d_0_forward_input.0', 'Functional_conv2d_0_forward_input.1', + 'Functional_conv2d_0_forward_input.2', 'Functional_conv2d_0_forward_output'], + 'input_struct': [('torch.float32', [1, 1, 28, 28]), ('torch.float32', [16, 1, 5, 5]), + ('torch.float32', [16])], + 'output_struct': [('torch.float32', [1, 16, 28, 28])], + 'summary': [[3.029174327850342, -2.926689624786377, -0.06619918346405029], + [0.19919930398464203, -0.19974489510059357, 0.006269412115216255], + [0.19734230637550354, -0.18177609145641327, 0.007903944700956345], + [2.1166646480560303, -2.190781354904175, -0.003579073818400502]], 'stack_info': []} + +tensor_list = [ + {'type': 'torch.Tensor', 'dtype': 'torch.float32', 'shape': [16, 1, 3, 3], 'Max': 0.33033010363578796, + 'Min': -0.331031858921051,'Mean': -0.030964046716690063, 'Norm': 2.2533628940582275, 'requires_grad': True, + 'full_op_name': 'Tensor.add_.0.forward_input.0'}, + {'type': 'torch.Tensor', 'dtype': 'torch.float32', 'shape': [16, 1, 3, 3], + 'Max': 0.003992878366261721, 'Min': -0.008102823048830032, 'Mean': -0.0002002553956117481, + 'Norm': 0.02844562754034996, 'requires_grad': False, 'full_op_name': 'Tensor.add_.0.forward_input.1'}, + {'full_op_name': 'Tensor.add_.0.forward_input.alpha.0', 'dtype': "", "shape": '[]', 'md5': None, + 'Max': -0.1, 'Min': -0.1, 'Mean': -0.1, 'Norm': -0.1, 'data_name': '-1'}, + {'type': 'torch.Tensor', 'dtype': 'torch.float32', 'shape': [16, 1, 3, 3], + 'Max': 0.33033010363578796, 'Min': -0.331031858921051, 'Mean': -0.030964046716690063, + 'Norm': 2.2533628940582275, 'requires_grad': True, 'full_op_name': 'Tensor.add_.0.forward_output.0'} +] + +result_op_dict = {'op_name': ['Tensor.add_.0.forward_input.0', 'Tensor.add_.0.forward_input.1', + 'Tensor.add_.0.forward_input.alpha.0', 'Tensor.add_.0.forward_output.0'], + 'input_struct': [('torch.float32', [16, 1, 3, 3]), ('torch.float32', [16, 1, 3, 3]), + ("", '[]')], + 'output_struct': [('torch.float32', [16, 1, 3, 3])], + 'summary': [[0.33033010363578796, -0.331031858921051, -0.030964046716690063, 2.2533628940582275], + [0.003992878366261721, -0.008102823048830032, -0.0002002553956117481, 0.02844562754034996], + [-0.1, -0.1, -0.1, -0.1], + [0.33033010363578796, -0.331031858921051, -0.030964046716690063, 2.2533628940582275]], + 'stack_info': []} + +o_result = [ + ['Functional_conv2d_0_forward_input.0', 'Functional_conv2d_0_forward_input.0', 'torch.float32', 'torch.float32', + [1, 1, 28, 28], [1, 1, 28, 28], 0.0, 0.0, 0.0, ' ', '0.0%', '0.0%', '0.0%', ' ', 3.029174327850342, -2.926689624786377, + -0.06619918346405029, 3.029174327850342, -2.926689624786377, -0.06619918346405029, '', '', 'None'], + ['Functional_conv2d_0_forward_input.1', 'Functional_conv2d_0_forward_input.1', 'torch.float32', 'torch.float32', + [16, 1, 5, 5], [16, 1, 5, 5], 0.0, 0.0, 0.0, ' ', '0.0%', '0.0%', '0.0%', ' ', 0.19919930398464203, -0.19974489510059357, + 0.006269412115216255, 0.19919930398464203, -0.19974489510059357, 0.006269412115216255, '', '', 'None'], + ['Functional_conv2d_0_forward_input.2', 'Functional_conv2d_0_forward_input.2', 'torch.float32', 'torch.float32', + [16], [16], 0.0, 0.0, 0.0, ' ', '0.0%', '0.0%', '0.0%', ' ', 0.19734230637550354, -0.18177609145641327, 0.007903944700956345, + 0.19734230637550354, -0.18177609145641327, 0.007903944700956345, '', '', 'None'], + ['Functional_conv2d_0_forward_output', 'Functional_conv2d_0_forward_output', 'torch.float32', 'torch.float32', + [1, 16, 28, 28], [1, 16, 28, 28], 0.0, 0.0, 0.0, ' ', '0.0%', '0.0%', '0.0%', ' ', 2.1166646480560303, -2.190781354904175, + -0.003579073818400502, 2.1166646480560303, -2.190781354904175, -0.003579073818400502, '', '', 'None']] + +npu_dict_aten = {'op_name': ['Aten__native_batch_norm_legit_functional.default_0_forward_input.0', + 'Aten__native_batch_norm_legit_functional.default_0_forward_input.1', + 'Aten__native_batch_norm_legit_functional.default_0_forward_input.2', + 'Aten__native_batch_norm_legit_functional.default_0_forward_input.3', + 'Aten__native_batch_norm_legit_functional.default_0_forward_input.4', + 'Aten__native_batch_norm_legit_functional.default_0_forward_output.0', + 'Aten__native_batch_norm_legit_functional.default_0_forward_output.1', + 'Aten__native_batch_norm_legit_functional.default_0_forward_output.2', + 'Aten__native_batch_norm_legit_functional.default_0_forward_output.3', + 'Aten__native_batch_norm_legit_functional.default_0_forward_output.4'], + 'input_struct': [('torch.float16', [256, 256, 14, 14]), ('torch.float32', [256]), + ('torch.float32', [256]), ('torch.float32', [256]), ('torch.float32', [256])], + 'output_struct': [('torch.float16', [256, 256, 14, 14]), ('torch.float32', [256]), + ('torch.float32', [256]), ('torch.float32', [256]), ('torch.float32', [256])], + 'summary': [[139.625, -127.5625, -0.0103607177734375], + [2.5276029109954834, -2.1788690090179443, -0.0008259844034910202], + [2.472219944000244, -2.845968723297119, -0.008756577968597412], + [2.763145923614502, -3.398397922515869, -0.052132632583379745], + [2.673110008239746, -3.149275064468384, 0.01613386906683445], + [13.5546875, -10.640625, -0.008758544921875], + [0.30550330877304077, -0.24485322833061218, -0.010361209511756897], + [623.9192504882812, 432.96826171875, 520.2276611328125], + [2.4797861576080322, -3.055997371673584, -0.04795549064874649], + [61.7945556640625, 42.59713363647461, 52.03831481933594]]} + +bench_dict_functional = { + 'op_name': ['Functional_batch_norm_0_forward_input.0', 'Functional_batch_norm_0_forward_input.1', + 'Functional_batch_norm_0_forward_input.2', 'Functional_batch_norm_0_forward_input.3', + 'Functional_batch_norm_0_forward_input.4', 'Functional_batch_norm_0_forward_output'], + 'input_struct': [('torch.float32', [256, 256, 14, 14]), ('torch.float32', [256]), ('torch.float32', [256]), + ('torch.float32', [256]), ('torch.float32', [256])], + 'output_struct': [('torch.float32', [256, 256, 14, 14])], + 'summary': [[3.061628818511963, -3.22507381439209, 3.634914173744619e-05], + [0.0005779837374575436, -0.0006301702815108001, 3.634906533989124e-06], + [0.9338104128837585, 0.9277191162109375, 0.930335283279419], + [1.0, 1.0, 1.0], [0.0, 0.0, 0.0], + [5.397906303405762, -5.796811580657959, 2.5283952709287405e-10]] +} + +aten_result = [ + ['Aten__native_batch_norm_legit_functional.default_0_forward_input.0', 'Functional_batch_norm_0_forward_input.0', + 'torch.float16', 'torch.float32', [256, 256, 14, 14], [256, 256, 14, 14], 136.56337118148804, -124.33742618560791, + -0.010397066915174946, ' ', '4460.480981749501%', '3855.335826136584%', '28603.33536971545%', ' ', 139.625, + -127.5625, -0.0103607177734375, 3.061628818511963, -3.22507381439209, 3.634914173744619e-05, 'Warning', + 'Need double check api accuracy.', 'None'], + ['Aten__native_batch_norm_legit_functional.default_0_forward_input.1', 'Functional_batch_norm_0_forward_input.1', + 'torch.float32', 'torch.float32', [256], [256], 2.527024927258026, -2.1782388387364335, -0.0008296193100250093, + ' ', '437213.84590749856%', '345658.76916858414%', '22823.676544842117%', ' ', 2.5276029109954834, + -2.1788690090179443, -0.0008259844034910202, 0.0005779837374575436, -0.0006301702815108001, 3.634906533989124e-06, + 'Warning', 'Need double check api accuracy.', 'None'], + ['Aten__native_batch_norm_legit_functional.default_0_forward_input.2', 'Functional_batch_norm_0_forward_input.2', + 'torch.float32', 'torch.float32', [256], [256], 1.5384095311164856, -3.7736878395080566, -0.9390918612480164, ' ', + '164.74538192025793%', '406.7705163736246%', '100.94122819224167%', ' ', 2.472219944000244, -2.845968723297119, + -0.008756577968597412, 0.9338104128837585, 0.9277191162109375, 0.930335283279419, 'Warning', + 'Need double check api accuracy.', 'None'], + ['Aten__native_batch_norm_legit_functional.default_0_forward_input.3', 'Functional_batch_norm_0_forward_input.3', + 'torch.float32', 'torch.float32', [256], [256], 1.763145923614502, -4.398397922515869, -1.0521326325833797, ' ', + '176.3145923614502%', '439.8397922515869%', '105.21326325833797%', ' ', 2.763145923614502, -3.398397922515869, + -0.052132632583379745, 1.0, 1.0, 1.0, 'Warning', 'Need double check api accuracy.', 'None'], + ['Aten__native_batch_norm_legit_functional.default_0_forward_input.4', 'Functional_batch_norm_0_forward_input.4', + 'torch.float32', 'torch.float32', [256], [256], 2.673110008239746, -3.149275064468384, 0.01613386906683445, ' ', + 'N/A', 'N/A', 'N/A', ' ', 2.673110008239746, -3.149275064468384, 0.01613386906683445, 0.0, 0.0, 0.0, 'Warning', + 'Need double check api accuracy.', 'None'], + ['Aten__native_batch_norm_legit_functional.default_0_forward_output.0', 'Functional_batch_norm_0_forward_output', + 'torch.float16', 'torch.float32', [256, 256, 14, 14], [256, 256, 14, 14], 8.156781196594238, -4.843813419342041, + -0.008758545174714527, ' ', '151.11009228611078%', '83.55995967687207%', '3464072756.115108%', ' ', 13.5546875, + -10.640625, -0.008758544921875, 5.397906303405762, -5.796811580657959, 2.5283952709287405e-10, 'Warning', + 'Need double check api accuracy.', 'None'], + ['Aten__native_batch_norm_legit_functional.default_0_forward_output.1', 'Nan', 'torch.float32', 'Nan', [256], 'Nan', + ' ', ' ', ' ', ' ', ' ', 0.30550330877304077, -0.24485322833061218, -0.010361209511756897, 'Nan', 'Nan', 'Nan', + 'Yes', '', 'None'], + ['Aten__native_batch_norm_legit_functional.default_0_forward_output.2', 'Nan', 'torch.float32', 'Nan', [256], 'Nan', + ' ', ' ', ' ', ' ', ' ', 623.9192504882812, 432.96826171875, 520.2276611328125, 'Nan', 'Nan', 'Nan', + 'Yes', '', 'None'], + ['Aten__native_batch_norm_legit_functional.default_0_forward_output.3', 'Nan', 'torch.float32', 'Nan', [256], 'Nan', + ' ', ' ', ' ', ' ', ' ', 2.4797861576080322, -3.055997371673584, -0.04795549064874649, 'Nan', 'Nan', 'Nan', + 'Yes', '', 'None'], + ['Aten__native_batch_norm_legit_functional.default_0_forward_output.4', 'Nan', 'torch.float32', 'Nan', [256], 'Nan', + ' ', ' ', ' ', ' ', ' ', 61.7945556640625, 42.59713363647461, 52.03831481933594, 'Nan', 'Nan', 'Nan', + 'Yes', '', 'None']] + +highlight_dict = {'red_rows': [], 'yellow_rows': []} + +num_0, num_1, num_2, num_3 = 0, 1, 2, 3 +summary_line_input = ['Functional_batch_norm_0_forward_input.0', 'Functional_batch_norm_0_forward_input.0', + 'torch.float16', + 'torch.float32', [256, 256, 14, 14], [256, 256, 14, 14], 0.01, 0, 0, 0, 1, 1, 1, 1, 1.01, 1, 1, 1, + 'Yes', ''] +summary_line_1 = ['Functional_batch_norm_0_forward_output.0', 'Functional_batch_norm_0_forward_output.0', + 'torch.float16', + 'torch.float32', [256, 256, 14, 14], [256, 256, 14, 14], 10, 0, 0, 0, 2, 0, 1, 1, 1, 1, 1, 1, + 'Warning', ''] +summary_line_2 = ['Functional_batch_norm_0_forward_output.1', 'Functional_batch_norm_0_forward_output.1', + 'torch.float16', + 'torch.float32', [256, 256, 14, 14], [256, 256, 14, 14], 0.02, 0, 0, 0, 0.12, 0, 1, 1, 0.1, 1, 1, 1, + 'Warning', ''] +summary_line_3 = ['Functional_batch_norm_0_forward_output.2', 'Functional_batch_norm_0_forward_output.2', + 'torch.float16', + 'torch.float32', [256, 256, 14, 14], [256, 256, 14, 14], 0, 0, 0, 0, 2, 0, 1, 1, 1, 1, 1, 1, + 'Warning', ''] +line_input = ['Functional_batch_norm_0_forward_input.0', 'Functional_batch_norm_0_forward_input.0', 'torch.float16', + 'torch.float32', [256, 256, 14, 14], [256, 256, 14, 14], 1, 1, 1, 0.95, 1, 1, 1, 1, 1, 1.01, 1, 1, 1, + 'Yes', ''] +line_1 = ['Functional_batch_norm_0_forward_output.0', 'Functional_batch_norm_0_forward_output.0', 'torch.float16', + 'torch.float32', [256, 256, 14, 14], [256, 256, 14, 14], 0.8, 1, 1, 0.59, 1, 'nan', 0, 1, 1, 19, 1, 1, 1, + 'Warning', ''] +line_2 = ['Functional_batch_norm_0_forward_output.1', 'Functional_batch_norm_0_forward_output.1', 'torch.float16', + 'torch.float32', [256, 256, 14, 14], [256, 256, 14, 14], 0.9, 1, 1, 0.8, 1, 0, 0.12, 0, 1, 1, 0.1, 1, 1, 1, + 'Warning', ''] +line_3 = ['Functional_batch_norm_0_forward_output.2', 'Functional_batch_norm_0_forward_output.2', 'torch.float16', + 'torch.float32', [256, 256, 14, 14], [256, 256, 14, 14], 0.8, 1.1e+10, 1, 0.85, 1, 9, 0.12, 0, 1, 1, 0.1, 1, + 1, 1, 'Warning', ''] + +op_data = { + 'input_args': [{'type': 'torch.Tensor', 'dtype': 'torch.float32', 'shape': [16, 1, 3, 3], + 'Max': 0.33033010363578796, 'Min': -0.331031858921051,'Mean': -0.030964046716690063, + 'Norm': 2.2533628940582275, 'requires_grad': True}, + {'type': 'torch.Tensor', 'dtype': 'torch.float32', 'shape': [16, 1, 3, 3], + 'Max': 0.003992878366261721, 'Min': -0.008102823048830032, 'Mean': -0.0002002553956117481, + 'Norm': 0.02844562754034996, 'requires_grad': False}], + 'input_kwargs': {'alpha': {'type': 'float', 'value': -0.1}}, + 'output': [{'type': 'torch.Tensor', 'dtype': 'torch.float32', 'shape': [16, 1, 3, 3], + 'Max': 0.33033010363578796, 'Min': -0.331031858921051,'Mean': -0.030964046716690063, + 'Norm': 2.2533628940582275, 'requires_grad': True}]} + +op_name = "Tensor.add_0.0.forward" + +op_result = [ + {'type': 'torch.Tensor', 'dtype': 'torch.float32', 'shape': [16, 1, 3, 3], + 'Max': 0.33033010363578796, 'Min': -0.331031858921051, 'Mean': -0.030964046716690063, + 'Norm': 2.2533628940582275, 'requires_grad': True, 'full_op_name': 'Tensor.add_0.0.forward_input.0'}, + {'type': 'torch.Tensor', 'dtype': 'torch.float32', 'shape': [16, 1, 3, 3], + 'Max': 0.003992878366261721, 'Min': -0.008102823048830032, 'Mean': -0.0002002553956117481, + 'Norm': 0.02844562754034996, 'requires_grad': False, 'full_op_name': 'Tensor.add_0.0.forward_input.1'}, + {'full_op_name': 'Tensor.add_0.0.forward_input.alpha.0', 'dtype': "", 'shape': '[]', 'md5': None, + 'Max': -0.1, 'Min': -0.1, 'Mean': -0.1, 'Norm': -0.1, 'data_name': '-1'}, + {'type': 'torch.Tensor', 'dtype': 'torch.float32', 'shape': [16, 1, 3, 3], + 'Max': 0.33033010363578796, 'Min': -0.331031858921051, 'Mean': -0.030964046716690063, + 'Norm': 2.2533628940582275, 'requires_grad': True, 'full_op_name': 'Tensor.add_0.0.forward_output.0'}] + class TestUtilsMethods(unittest.TestCase): + def test_check_graph_mode(self): + op1 = "Aten" + op2 = "torch" + self.assertTrue(compare.check_graph_mode(op1, op2)) + self.assertTrue(compare.check_graph_mode(op2, op1)) + self.assertFalse(compare.check_graph_mode(op1, op1)) + self.assertFalse(compare.check_graph_mode(op2, op2)) + + def test_check_op(self): + fuzzy_match = False + result = compare.check_op(npu_dict, bench_dict, fuzzy_match) + self.assertEqual(result, True) + + def test_merge_tensor(self): + op_dict = compare.merge_tensor(tensor_list, True, False) + self.assertEqual(op_dict, result_op_dict) + + def test_read_op(self): + result = compare.read_op(op_data, op_name) + self.assertEqual(result, op_result) + + def test_match_op(self): + fuzzy_match = False + a, b = compare.match_op([npu_dict], [bench_dict], fuzzy_match) + self.assertEqual(a, 0) + self.assertEqual(b, 0) + + def test_get_accuracy(self): + result = [] + compare.get_accuracy(result, npu_dict, bench_dict, highlight_dict) + self.assertEqual(result, o_result) + + def test_get_accuracy_graph_mode(self): + result = [] + compare.get_accuracy(result, npu_dict_aten, bench_dict_functional, highlight_dict) + self.assertEqual(result, aten_result) + + def test_find_error_rows(self): + summary_result = [summary_line_input, summary_line_1, summary_line_2, summary_line_3] + highlight_dict = {'red_rows': [], 'yellow_rows': []} + compare.find_error_rows(summary_result, 0, 1, highlight_dict, summary_compare=True) + self.assertEqual(highlight_dict, {'red_rows': [], 'yellow_rows': []}) + + def test_find_compare_result_error_rows(self): + result = [line_input, line_1, line_2, line_3] + result_df = pd.DataFrame(result) + highlight_dict = {'red_rows': [], 'yellow_rows': []} + compare.find_compare_result_error_rows(result_df, highlight_dict, False, False) + self.assertEqual(highlight_dict, {'red_rows': [num_1, num_3], 'yellow_rows': [num_2]}) + def test_rename_api(self): test_name_1 = "Distributed.broadcast.0.forward.input.0" expect_name_1 = "Distributed.broadcast.input.0" - actual_name_1 = rename_api(test_name_1, "forward") + actual_name_1 = compare.rename_api(test_name_1, "forward") self.assertEqual(actual_name_1, expect_name_1) - + test_name_2 = "Torch.sum.0.backward.output.0" expect_name_2 = "Torch.sum.output.0" - actual_name_2 = rename_api(test_name_2, "backward") + actual_name_2 = compare.rename_api(test_name_2, "backward") self.assertEqual(actual_name_2, expect_name_2) - \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/compare/test_match.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/compare/test_match.py new file mode 100644 index 0000000000..ac28e994e9 --- /dev/null +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/compare/test_match.py @@ -0,0 +1,20 @@ +# coding=utf-8 +import unittest +from msprobe.pytorch.compare import match + + +class TestMatch(unittest.TestCase): + def test_graph_mapping(self): + op1 = "Aten_convolution_1_forward_0.input.0" + op2 = "Torch_conv2d_0_forward_0.input.0" + op3 = "Torch_batch_norm_0_forward_0.input.0" + op4 = "Aten_convolution.default_1_forward_0.input.0" + op5 = "Aten_foo_1_forward_0.input.0" + self.assertTrue(match.graph_mapping.match(op1, op2)) + self.assertTrue(match.graph_mapping.match(op2, op1)) + self.assertTrue(match.graph_mapping.match(op4, op2)) + self.assertTrue(match.graph_mapping.match(op2, op4)) + self.assertFalse(match.graph_mapping.match(op1, op3)) + self.assertFalse(match.graph_mapping.match(op3, op1)) + self.assertFalse(match.graph_mapping.match(op5, op2)) + self.assertFalse(match.graph_mapping.match(op2, op5)) diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/test_pt_config.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/test_pt_config.py index 53b4e66c1b..c344f0b66b 100644 --- a/debug/accuracy_tools/msprobe/test/pytorch_ut/test_pt_config.py +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/test_pt_config.py @@ -67,3 +67,18 @@ class TestPtConfig(TestCase): self.assertEqual(result.handler_type, "check") self.assertEqual(result.preheat_step, 15) self.assertEqual(result.max_sample, 20) + + run_ut_config = { + "run_ut": { + "white_list": ["conv2d"], + "black_list": ["matmul"], + "error_data_path": '/home/dump_path' + + } + } + with patch('os.path.exists', return_value=True) as mocked_exists: + result = parse_task_config(Const.RUN_UT, run_ut_config) + self.assertEqual(result.white_list, ["conv2d"]) + self.assertEqual(result.black_list, ["matmul"]) + self.assertEqual(result.error_data_path, '/home/dump_path') + mocked_exists.assert_called_once_with('/home/dump_path') diff --git a/debug/accuracy_tools/setup.py b/debug/accuracy_tools/setup.py index 3d2c3bb870..4e0eaa1f37 100644 --- a/debug/accuracy_tools/setup.py +++ b/debug/accuracy_tools/setup.py @@ -1,6 +1,3 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" # Copyright (C) 2024. Huawei Technologies Co., Ltd. All rights reserved. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,28 +10,59 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" -from setuptools import setup, find_packages +import setuptools -setup( - name='mindstudio_probe', - version='1.0', - description='This is a pytorch precision comparison tools', - long_description='This is a pytorch precision comparison tools, include ptdbg and api accuracy checker', - packages=find_packages(), - install_requires=[ - "wheel", - "numpy", - "pandas >= 1.3.5", - "pyyaml", - "rich", - "tqdm", - "openpyxl" - ], + +__version__ = '1.0.0' + +INSTALL_REQUIRED = [ + "wheel", + "numpy", + "pandas >= 1.3.5", + "pyyaml", + "rich", + "tqdm", + "openpyxl" +] + +EXCLUDE_PKGS = [ + "api_accuracy_checker*", + "grad_tool*", + "kj600*", + "ptdbg_ascend*", + "msprobe.test*", +] + +setuptools.setup( + name="mindstudio-probe", + version=__version__, + description="Pytorch Ascend Probe Utils", + long_description="MindStudio-Probe is a set of tools for diagnosing and improving model accuracy on Ascend NPU, " + "including API acc checker, ptdbg, grad tool etc.", + url="https://gitee.com/ascend/mstt/tree/master/debug/accuracy_tools/msprobe", + author="Ascend Team", + author_email="pmail_mindstudio@huawei.com", + packages=setuptools.find_namespace_packages(exclude=EXCLUDE_PKGS, include=["msprobe", "msprobe*"]), include_package_data=True, + python_requires=">=3.6.2", + install_requires=INSTALL_REQUIRED, + classifiers=[ + 'Intended Audience :: Developers', + 'Intended Audience :: Education', + 'Intended Audience :: Science/Research', + 'Programming Language :: Python :: 3', + 'Topic :: Scientific/Engineering', + 'Topic :: Scientific/Engineering :: Mathematics', + 'Topic :: Scientific/Engineering :: Artificial Intelligence', + 'Topic :: Software Development', + 'Topic :: Software Development :: Libraries', + 'Topic :: Software Development :: Libraries :: Python Modules', + ], + license='Apache License 2.0', + keywords='pytorch msprobe ascend', ext_modules=[], zip_safe=False, entry_points={ 'console_scripts': ['msprobe=msprobe.msprobe:main'], - },) \ No newline at end of file + },) diff --git a/profiler/advisor/README.md b/profiler/advisor/README.md index c650f40b3e..47e64a90ba 100644 --- a/profiler/advisor/README.md +++ b/profiler/advisor/README.md @@ -36,11 +36,11 @@ msprof-analyze的advisor功能是将Ascend PyTorch Profiler或者msprof采集的 3. 查看结果。 - 分析结果输出相关简略建议到执行终端中,并生成`att_advisor_{timestamp}.html`和`att_advisor_{timestamp}.xlsx`文件供用户预览。 + 分析结果输出相关简略建议到执行终端中,并生成`mstt_advisor_{timestamp}.html`和`mstt_advisor_{timestamp}.xlsx`文件供用户预览。 - `att_advisor_{timestamp}.xlsx`文件内容与执行终端输出一致。 + `mstt_advisor_{timestamp}.xlsx`文件内容与执行终端输出一致。 - `att_advisor_{timestamp}.html`文件分析详见“**报告解析**”。 + `mstt_advisor_{timestamp}.html`文件分析详见“**报告解析**”。 执行终端输出示例如下: diff --git a/profiler/advisor/analyzer/base_analyzer.py b/profiler/advisor/analyzer/base_analyzer.py index 5f4bd3202c..ada1b0bf4f 100644 --- a/profiler/advisor/analyzer/base_analyzer.py +++ b/profiler/advisor/analyzer/base_analyzer.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import logging from functools import wraps from typing import Dict, List, Union @@ -59,14 +73,6 @@ class BaseAnalyzer(VersionControl, metaclass=ABCMeta): def optimize(self, **kwargs): pass - @abstractmethod - def make_record(self): - pass - - @abstractmethod - def make_render(self): - pass - def init_dataset_list(self)->None: dataset_cls_list = self.dataset_cls_list if len(dataset_cls_list) == 0: diff --git a/profiler/advisor/analyzer/cluster/slow_link_analyser.py b/profiler/advisor/analyzer/cluster/slow_link_analyser.py index 846b79a50f..0b585cbc7c 100644 --- a/profiler/advisor/analyzer/cluster/slow_link_analyser.py +++ b/profiler/advisor/analyzer/cluster/slow_link_analyser.py @@ -19,7 +19,7 @@ from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer from profiler.advisor.common import constant from profiler.advisor.result.result import OptimizeResult from profiler.advisor.result.item import OptimizeItem, OptimizeRecord -from profiler.advisor.dataset.cluster.cluster_dataset import ClusterCommunicationDataSet +from profiler.advisor.dataset.cluster.cluster_dataset import ClusterCommunicationDataset class SlowLinkAnalyzer(BaseAnalyzer): @@ -35,11 +35,11 @@ class SlowLinkAnalyzer(BaseAnalyzer): SDMA = "SDMA" RDMA = "RDMA" SLOW_LINK_ANALYSIS = "slow_link_analysis" - dataset_cls_list = [ClusterCommunicationDataSet] + dataset_cls_list = [ClusterCommunicationDataset] def __init__(self, collection_path, n_processes: int = 1, **kwargs): super().__init__(collection_path, n_processes, **kwargs) - key = ClusterCommunicationDataSet.get_key() + key = ClusterCommunicationDataset.get_key() self.communication_data_class = self.get_first_data_by_key(self.dataset_list, key) self.rank_bw_dict = self.communication_data_class.get_data() self.result = OptimizeResult() @@ -49,8 +49,9 @@ class SlowLinkAnalyzer(BaseAnalyzer): def optimize(self, **kwargs): if self.rank_bw_dict is None: - print("slow_link 分析失败,原因是数据加载失败,请检查你的cluster_analysis_outpu文件夹, \ - 如不关心这类数据请忽略") + print("Slow link analysis failed due to data loading failure. \ + Please check your cluster_analysis_output folder. \ + If you are not concerned about this type of data, please ignore this message.") return self.result self.process() self.format_datas = self.format_details() @@ -65,8 +66,11 @@ class SlowLinkAnalyzer(BaseAnalyzer): def produce_bottleneck(self, link_type: str): data_list = [rank_dict.get(link_type, 0) for rank_id, rank_dict in self.rank_bw_dict.items()] - avg_bw = round(sum(data_list) / len(data_list), 3) - if avg_bw == 0: + if len(data_list) > 0: + avg_bw = round(sum(data_list) / len(data_list), 3) + else: + print("The slow link (identified bottleneck) cannot provide a bottleneck \ + because the analysis data is missing bandwidth information.") return self.bottelneck += f'{link_type}: \n' \ f' The average is {avg_bw}, \n' \ diff --git a/profiler/advisor/analyzer/cluster/slow_rank_analyser.py b/profiler/advisor/analyzer/cluster/slow_rank_analyser.py index aa0ddad507..f439b31f77 100644 --- a/profiler/advisor/analyzer/cluster/slow_rank_analyser.py +++ b/profiler/advisor/analyzer/cluster/slow_rank_analyser.py @@ -19,7 +19,7 @@ from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer from profiler.advisor.common import constant from profiler.advisor.result.result import OptimizeResult from profiler.advisor.result.item import OptimizeItem, OptimizeRecord -from profiler.advisor.dataset.cluster.cluster_dataset import ClusterStepTraceTimeDataSet +from profiler.advisor.dataset.cluster.cluster_dataset import ClusterStepTraceTimeDataset class SlowRankAnalyzer(BaseAnalyzer): @@ -27,11 +27,11 @@ class SlowRankAnalyzer(BaseAnalyzer): RANK = "rank" RATIO_THRESHOLD = 0.05 BOTTLENECK_LIST = ['Computing', 'Communication', "Free"] - dataset_cls_list = [ClusterStepTraceTimeDataSet] + dataset_cls_list = [ClusterStepTraceTimeDataset] def __init__(self, collection_path, n_processes: int = 1, **kwargs): super().__init__(collection_path, n_processes, **kwargs) - key = ClusterStepTraceTimeDataSet.get_key() + key = ClusterStepTraceTimeDataset.get_key() self.step_trace_class = self.get_first_data_by_key(self.dataset_list, key) self.step_trace_dict = self.step_trace_class.get_data() self.result = OptimizeResult() diff --git a/profiler/advisor/analyzer/computation/ai_core_freq/__init__.py b/profiler/advisor/analyzer/computation/ai_core_freq/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/profiler/advisor/analyzer/computation/ai_core_freq/ai_core_freq_analyzer.py b/profiler/advisor/analyzer/computation/ai_core_freq/ai_core_freq_analyzer.py new file mode 100644 index 0000000000..cee16cce52 --- /dev/null +++ b/profiler/advisor/analyzer/computation/ai_core_freq/ai_core_freq_analyzer.py @@ -0,0 +1,42 @@ +import logging + +from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer +from profiler.advisor.result.result import OptimizeResult +from profiler.advisor.analyzer.computation.ai_core_freq.ai_core_freq_checker import AICoreFreqChecker +from profiler.advisor.display.html.render import HTMLRender +from profiler.advisor.dataset.ai_core_freq.ai_core_freq_dataset import AICoreFreqDataset +from profiler.advisor.config.config import Config + +logger = logging.getLogger() + + +class AICoreFreqAnalyzer(BaseAnalyzer): + dataset_cls_list = [AICoreFreqDataset] + + def __init__(self, collection_path, n_processes: int = 1, **kwargs) -> None: + super().__init__(collection_path, n_processes, **kwargs) + key = AICoreFreqDataset.get_key() + self.dataset = self.get_first_data_by_key(self.dataset_list, key) + self.result = OptimizeResult() + self.html_render = HTMLRender() + self.html = None + + @BaseAnalyzer.check_data((AICoreFreqDataset.get_key(),)) + def optimize(self, **kwargs): + if not Config().get_config("aic_frequency"): + logger.warning("Can not find ai core frequency in info.json*, please check data integrity.") + return self.result + add_render_list = kwargs.get("add_render_list", True) + ai_core_freq_checker = AICoreFreqChecker() + ai_core_freq_checker.check_ai_core_freq(self.dataset) + if not ai_core_freq_checker.ai_core_freq_issues: + return self.result + ai_core_freq_checker.make_record(self.result) + self.html = ai_core_freq_checker.make_render(self.html_render, add_render_list) + return self.result + + def make_record(self): + pass + + def make_render(self): + pass diff --git a/profiler/advisor/analyzer/computation/ai_core_freq/ai_core_freq_checker.py b/profiler/advisor/analyzer/computation/ai_core_freq/ai_core_freq_checker.py new file mode 100644 index 0000000000..5ea4dbd754 --- /dev/null +++ b/profiler/advisor/analyzer/computation/ai_core_freq/ai_core_freq_checker.py @@ -0,0 +1,100 @@ +import logging + +from profiler.advisor.dataset.ai_core_freq.ai_core_freq_dataset import AICoreFreqDataset +from profiler.advisor.result.result import OptimizeResult +from profiler.advisor.result.item import OptimizeItem, OptimizeRecord +from profiler.advisor.config.config import Config +from profiler.advisor.utils.utils import convert_to_float + +logger = logging.getLogger() + + +class AICoreFreqChecker: + DEFAULT_FREQ = 1800 + DECREASE_FREQ_RATIO = 0.05 + SHOW_TOPK_OPS = 10 + TOTAL_DURATION_INDEX = 2 + DECREASE_FREQ_RATIO_INDEX = 3 + + def __init__(self): + + self.ai_core_freq_issues = False + self.desc = "" + self.suggestions = "" + self.decrease_freq_ops = [] + self.headers = [] + self.op_freq = None + self.rank_id = None + self.stage = None + + def check_ai_core_freq(self, event_dataset: AICoreFreqDataset, rank_id=None, stage=None): + """ + :Param event_dataset: dataset of timeline event + """ + if not hasattr(event_dataset, "op_freq") or not getattr(event_dataset, "op_freq"): + logger.debug("Skip slow ai core frequency checker, " + "because no ai core frequency were recorded in trace_view.json") + return + + self.rank_id = rank_id + self.stage = stage + self.op_freq = event_dataset.op_freq + for op_name, op_info in self.op_freq.items(): + freq_list = op_info.get("freq_list", []) + if not freq_list: + continue + + op_count = op_info.get("count", 0) + op_total_duration = round(op_info.get("dur", 0), 2) + max_freq = max(self.DEFAULT_FREQ, convert_to_float(Config().get_config("aic_frequency"))) + + decrease_freq_ratio = sum(max_freq - freq for freq in freq_list) / (max_freq * len(freq_list)) + if decrease_freq_ratio >= self.DECREASE_FREQ_RATIO: + self.ai_core_freq_issues = True + self.decrease_freq_ops.append([op_name, op_count, op_total_duration, + f"{round(decrease_freq_ratio, 4):.2%}", + round(sum(freq_list) / len(freq_list), 2), + max(freq_list), min(freq_list)]) + + if self.decrease_freq_ops: + # 按算子总耗时和降频比率 降序排列 + self.decrease_freq_ops.sort(key= + lambda x: (x[self.TOTAL_DURATION_INDEX], x[self.DECREASE_FREQ_RATIO_INDEX]), + reverse=True) + + self.desc = (f"{len(self.decrease_freq_ops)} operators are found during frequency reduction, and the reduction " + f"ratio is larger than {self.DECREASE_FREQ_RATIO}.") + if self.rank_id: + self.desc = f"For rank {self.rank_id}, " + self.desc.lower() + self.suggestions = "Please check the temperature or max power of your machine." + + def make_record(self, result: OptimizeResult): + """ + make record for what and how to optimize + """ + optimization_item = OptimizeItem("AI Core Frequency", self.desc, [self.suggestions]) + result.add(OptimizeRecord(optimization_item)) + + self.headers = ["Operator name", "Count", "Total duration(us)", "AI CORE frequency decreased ratio", + "Average frequency", "Max frequency", "Min frequency"] + if self.rank_id: + self.headers = ["Rank id"] + self.headers + sub_table_name = "AI Core Frequency" if not self.stage else f"Stage-{self.stage}: AI Core Frequency" + result.add_detail(sub_table_name, headers=self.headers) + + for row in self.decrease_freq_ops: + if self.rank_id: + row = [self.rank_id] + row + result.add_detail(sub_table_name, detail=row) + + def make_render(self, html_render, add_render_list=True): + if self.SHOW_TOPK_OPS: + self.desc += f" Only show {self.SHOW_TOPK_OPS} operators here, see latest mstt_advisor.xlsx for details." + return html_render.render_template(key="computation", + template_dir="templates", + template_name="ai_core_frequency.html", + desc=self.desc, + suggestion=self.suggestions, + headers=self.headers, + data=self.decrease_freq_ops[:self.SHOW_TOPK_OPS], + add_render_list=add_render_list) diff --git a/profiler/advisor/analyzer/computation/aicpu/aicpu_checker.py b/profiler/advisor/analyzer/computation/aicpu/aicpu_checker.py index 4eca1c6c02..0caede4b89 100644 --- a/profiler/advisor/analyzer/computation/aicpu/aicpu_checker.py +++ b/profiler/advisor/analyzer/computation/aicpu/aicpu_checker.py @@ -3,13 +3,13 @@ import os from functools import partial from typing import List, Dict, Optional -import yaml from profiler.advisor.analyzer.computation.operator_checker import OperatorChecker, logger from profiler.advisor.analyzer.schedule.fusion_ops.timeline_api_stack_checker import OpStackFinder from profiler.advisor.common import constant from profiler.advisor.dataset.dataset import Dataset from profiler.advisor.dataset.profiling.profiling_dataset import ProfilingDataset from profiler.advisor.dataset.timeline_event_dataset import TimelineEventDataset +from profiler.cluster_analyse.common_func.file_manager import FileManager class AicpuChecker(OperatorChecker): @@ -47,8 +47,8 @@ class AicpuChecker(OperatorChecker): if not os.path.exists(rule_path): logger.warning("Skip analyze aicpu issues, because %s does not exist.", rule_path) return {} - with open(rule_path, 'r') as f: - self.aicpu_rules = yaml.safe_load(f) + + self.aicpu_rules = FileManager.read_yaml_file(rule_path) self.filter_aicpu_rules(self.aicpu_rules) for checker_name, check_rule in self.aicpu_rules.items(): if not isinstance(check_rule, (list, dict,)): diff --git a/profiler/advisor/analyzer/computation/bound/block_dim_checker.py b/profiler/advisor/analyzer/computation/bound/block_dim_checker.py index a7d7ddd93c..7a873c6563 100644 --- a/profiler/advisor/analyzer/computation/bound/block_dim_checker.py +++ b/profiler/advisor/analyzer/computation/bound/block_dim_checker.py @@ -1,5 +1,4 @@ import logging - from typing import List from profiler.advisor.analyzer.computation.operator_checker import OperatorChecker diff --git a/profiler/advisor/analyzer/computation/profiling_analyzer.py b/profiler/advisor/analyzer/computation/profiling_analyzer.py index 8682617700..2021bcd576 100644 --- a/profiler/advisor/analyzer/computation/profiling_analyzer.py +++ b/profiler/advisor/analyzer/computation/profiling_analyzer.py @@ -1,19 +1,15 @@ import logging from abc import ABC -from typing import Dict, List from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer -from profiler.advisor.common import constant from profiler.advisor.result.result import OptimizeResult from profiler.advisor.analyzer.computation.aicpu.aicpu_checker import AicpuChecker from profiler.advisor.analyzer.computation.bound.block_dim_checker import BlockDimChecker from profiler.advisor.analyzer.computation.bound.operator_bound_checker import OperatorBoundChecker -from profiler.advisor.analyzer.computation.operator_checker import OperatorChecker from profiler.advisor.analyzer.computation.op_compile.dynamic_shape_checker import DynamicShapeChecker from profiler.advisor.analyzer.computation.operator_checker import OperatorChecker from profiler.advisor.display.html.render import HTMLRender from profiler.advisor.dataset.profiling.profiling_dataset import ProfilingDataset -from profiler.advisor.utils.utils import get_supported_subclass logger = logging.getLogger() @@ -76,14 +72,15 @@ class BlockDimAnalyzer(ProfilingAnalyzer): def __init__(self, collection_path, **kwargs) -> None: super().__init__(collection_path, **kwargs) self.checker = BlockDimChecker(self.cann_version) - + class OperatorBoundAnalyzer(ProfilingAnalyzer): def __init__(self, collection_path, **kwargs) -> None: super().__init__(collection_path, **kwargs) self.checker = OperatorBoundChecker(self.cann_version) + class AicpuAnalyzer(ProfilingAnalyzer): def __init__(self, collection_path, **kwargs) -> None: super().__init__(collection_path, **kwargs) - self.checker = AicpuChecker(self.cann_version) \ No newline at end of file + self.checker = AicpuChecker(self.cann_version) diff --git a/profiler/advisor/analyzer/dataloader/dataloader_analyzer.py b/profiler/advisor/analyzer/dataloader/dataloader_analyzer.py new file mode 100644 index 0000000000..291c3a1f94 --- /dev/null +++ b/profiler/advisor/analyzer/dataloader/dataloader_analyzer.py @@ -0,0 +1,30 @@ +import logging + +from typing import List, Dict, Any + +from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer +from profiler.advisor.result.result import OptimizeResult +from profiler.advisor.analyzer.dataloader.dataloader_checker import DataloaderChecker +from profiler.advisor.display.html.render import HTMLRender +from profiler.advisor.dataset.timeline_event_dataset import TimelineEventDataset + +logger = logging.getLogger() + + +class DataloaderAnalyzer(BaseAnalyzer): + dataset_cls_list = [TimelineEventDataset] + + def __init__(self, collection_path, n_processes: int = 1, **kwargs) -> None: + super().__init__(collection_path, n_processes, **kwargs) + key = TimelineEventDataset.get_key() + self.dataset = self.get_first_data_by_key(self.dataset_list, key) + self.result = OptimizeResult() + self.html_render = HTMLRender() + + @BaseAnalyzer.check_data((TimelineEventDataset.get_key(),)) + def optimize(self, **kwargs): + dataloader_checker = DataloaderChecker() + dataloader_checker.check_slow_dataloader(self.dataset) + dataloader_checker.make_record(self.result) + dataloader_checker.make_render(self.html_render) + return self.result diff --git a/profiler/advisor/analyzer/dataloader/dataloader_checker.py b/profiler/advisor/analyzer/dataloader/dataloader_checker.py new file mode 100644 index 0000000000..eb1886284e --- /dev/null +++ b/profiler/advisor/analyzer/dataloader/dataloader_checker.py @@ -0,0 +1,84 @@ +import os +import re +import logging +import yaml + +from profiler.advisor.dataset.timeline_event_dataset import TimelineEventDataset +from profiler.advisor.result.result import OptimizeResult +from profiler.advisor.result.item import OptimizeItem, OptimizeRecord +from profiler.cluster_analyse.common_func.file_manager import FileManager + +logger = logging.getLogger() + + +class DataloaderChecker: + + def __init__(self): + + self.dataloader_issues = False + self.optimization_item = [] + self.desc = "" + self.suggestions = [] + self.dataloader_duration_threshold = None + self._init_rule() + + def check_slow_dataloader(self, event_dataset: TimelineEventDataset): + """ + :Param event_dataset: dataset of timeline event + """ + if not hasattr(event_dataset, "dataloader") or not getattr(event_dataset, "dataloader"): + logger.debug("Skip slow dataloader checker, because no dataloader duration larger than %s", + self.dataloader_duration_threshold) + return + for event in event_dataset.dataloader: + + dataloader_duration = float(event.dur) / 1000 + if dataloader_duration < self.dataloader_duration_threshold: + continue + self.desc = self.desc.format(dataloader_duration=dataloader_duration, + dataloader_duration_threshold=self.dataloader_duration_threshold) + self.dataloader_issues = True + + if re.search("singleprocess", event.name.lower()): + self.suggestions = self._reset_suggestions(["I/O", "num_workers"]) + + def make_record(self, result: OptimizeResult): + """ + make record for what and how to optimize + """ + if not self.dataloader_issues: + return + + self.optimization_item.append(OptimizeItem("Slow dataloader", self.desc, self.suggestions)) + for optimization in self.optimization_item: + result.add(OptimizeRecord(optimization)) + + def make_render(self, html_render): + if not self.dataloader_issues: + return + html_render.render_template(key="dataloader", + template_dir="templates", + template_name="slow_dataloader.html", + desc=self.desc, + suggestions=self.suggestions) + + def _init_rule(self): + dataloader_rule_path = os.path.join( + os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))), + "rules", + "dataloader.yaml" + ) + dataloader_rule = FileManager.read_yaml_file(dataloader_rule_path) + + self.dataloader_duration_threshold = dataloader_rule.get("dataloader_duration_threshold") + self.desc = dataloader_rule.get("problem") + self.suggestions = dataloader_rule.get("solutions") + + def _reset_suggestions(self, suggestion_pattern_list): + + suggestions = [] + for solution in self.suggestions: + for suggestion_pattern in suggestion_pattern_list: + if re.search(suggestion_pattern, solution): + suggestions.append(solution) + return suggestions diff --git a/profiler/advisor/analyzer/graph_fusion/graph_fusion_checker.py b/profiler/advisor/analyzer/graph_fusion/graph_fusion_checker.py index e64020fdfe..30bd432379 100644 --- a/profiler/advisor/analyzer/graph_fusion/graph_fusion_checker.py +++ b/profiler/advisor/analyzer/graph_fusion/graph_fusion_checker.py @@ -149,7 +149,7 @@ class GraphFusionRules: optimization_item = OptimizeItem( "fusion issue", f"Found {len(self.candidates)} fusion issues", - ["Check fusion issues detail in att_advisor*.html"] + ["Check fusion issues detail in mstt_advisor*.html"] ) total_time = 0.0 for candidate in self.task_duration_list: diff --git a/profiler/advisor/analyzer/overall/overall_analyzer.py b/profiler/advisor/analyzer/overall/overall_analyzer.py deleted file mode 100644 index 916a396b3d..0000000000 --- a/profiler/advisor/analyzer/overall/overall_analyzer.py +++ /dev/null @@ -1,45 +0,0 @@ -import logging -from typing import Dict, List - -from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer -from profiler.advisor.display.html.render import HTMLRender -from profiler.advisor.result.result import OptimizeResult -from profiler.compare_tools.compare_backend.utils.constant import Constant -from profiler.compare_tools.compare_interface.comparison_interface import ComparisonInterface - -logger = logging.getLogger() - - -class OverallSummaryAnalyzer(BaseAnalyzer): - - def __init__(self, profiling_path, benchmark_profiling_path=None, **kwargs): - self.benchmark_profiling_path = benchmark_profiling_path or profiling_path - self.profiling_path = profiling_path - self.html_render = HTMLRender() - self.result = OptimizeResult() - - def optimize(self, **kwargs): - compare_result = ComparisonInterface(self.benchmark_profiling_path, self.profiling_path).compare( - Constant.OVERALL_COMPARE) - - headers = compare_result.get('Model Profiling Time Distribution').get("headers", []) - rows = compare_result.get('Model Profiling Time Distribution').get("rows", []) - - self.make_record() - self.make_render(headers=headers, rows=rows) - return compare_result - - def make_record(self): - pass - - def make_render(self, **kwargs): - headers = kwargs.get("headers") - rows = kwargs.get("rows") - - if not headers or not rows: - logger.info("Empty headers or rows, skip render overall analysis html") - self.html_render.render_template(key="overall", - template_dir="templates", - template_name="overall_analysis.html", - headers=kwargs.get("headers"), - rows=kwargs.get("rows")) diff --git a/profiler/advisor/analyzer/overall/overall_summary_analyzer.py b/profiler/advisor/analyzer/overall/overall_summary_analyzer.py index c74ae05103..8e93dbda77 100644 --- a/profiler/advisor/analyzer/overall/overall_summary_analyzer.py +++ b/profiler/advisor/analyzer/overall/overall_summary_analyzer.py @@ -13,27 +13,21 @@ # See the License for the specific language governing permissions and # limitations under the License. import os -import copy - -import logging -from typing import Dict, List +from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer from profiler.advisor.display.html.render import HTMLRender -from profiler.advisor.result.result import OptimizeResult from profiler.advisor.result.item import OptimizeItem, OptimizeRecord -from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer +from profiler.advisor.result.result import OptimizeResult from profiler.compare_tools.compare_backend.utils.constant import Constant -from profiler.advisor.common import constant as const from profiler.compare_tools.compare_interface.comparison_interface import ComparisonInterface -from profiler.advisor.utils.utils import get_file_path_from_directory, load_parameter class OverallSummaryAnalyzer(BaseAnalyzer): OVERALL_SUMMARY_ANALYZER = "overall_summary_analysis" advice_map = { - "Computing Time": "if you want more detailed advice please go to att_advisor_*.html", - "Uncovered Communication Time": "if you want more detailed advice please go to att_advisor_*.html", - "Free Time": "if you want more detailed advice please go to att_advisor_*.html" + "Computing Time": "if you want more detailed advice please go to mstt_advisor_*.html", + "Uncovered Communication Time": "if you want more detailed advice please go to mstt_advisor_*.html", + "Free Time": "if you want more detailed advice please go to mstt_advisor_*.html" } time_name_map = { "Computing Time": "computing", @@ -47,45 +41,37 @@ class OverallSummaryAnalyzer(BaseAnalyzer): 'SDMA Time(Num)': 'SDMA Time' } performance_time_dict = { - "Computing Time": ['Cube Time(Num)', 'Vector Time(Num)', 'Flash Attention Time(Forward)(Num)', - 'Flash Attention Time(Backward)(Num)', 'Other Time'], - "Uncovered Communication Time(Wait Time)": [], - "Free Time": ['SDMA Time(Num)'] + "Computing Time": "computing_time_ms", + " -- Flash Attention": "fa_time_ms", + " -- Conv": "conv_time_ms", + " -- Matmul": "matmul_time_ms", + " -- Vector": "vector_time_ms", + " -- SDMA(Tensor Move)": "tensor_move_time_ms", + " -- Other Cube": "other_cube_time_ms", + "Uncovered Communication Time": "uncovered_communication_time_ms", + " -- Wait": "wait_time_ms", + " -- Transmit": "transmit_time_ms", + "Free Time": "free_time_ms", + " -- SDMA": "sdma_time_ms", + " -- Free": "free_ms", + "E2E Time": "e2e_time_ms" } def __init__(self, collection_path: str, n_processes: int = 1, **kwargs): profile_path = get_profile_path(collection_path) super().__init__(profile_path, n_processes, **kwargs) - self.base_collection_path = kwargs.get("base_collection_path", "") - self._has_base_collection = False + self.benchmark_profiling_path = kwargs.get("benchmark_profiling_path", "") + self._has_benchmark_profiling = False self._is_minimal_profiling = False self.cur_data = {} - self.cur_data_table = {} self.cur_bottleneck = {} + self._disaggregate_perf = {} + self._disaggregate_benchmark_perf = {} self.cur_advices = "" - self._headers = [] - self._base_data = [] - self._comparison_data = [] self.html_render = HTMLRender() self.result = OptimizeResult() self.bottleneck_str = "" - self.bottleneck_table = {} - - @staticmethod - def split_duration_and_num(time_value: str) -> tuple: - split_data = time_value.split("s") # time value example: 0.229s(1756) - duration, num = 0.0, None - if len(split_data) >= 2: - try: - num = int(split_data[1].strip("()")) - except ValueError: - pass - if len(split_data) >= 1: - try: - duration = float(split_data[0]) - except ValueError: - print(f"[WARNING] Invalid time value: {time_value}.") - return duration, num + self.over_summary_analysis = {} @staticmethod def calculate_ratio(dividend, divisor): @@ -93,131 +79,121 @@ class OverallSummaryAnalyzer(BaseAnalyzer): return float("inf") return dividend / divisor + @staticmethod + def get_time_category_dict(overall_dict: dict): + time_category_dict = { + "Computing Time": round(overall_dict.get('computing_time_ms', 0.0), 3), + "Uncovered Communication Time": round(overall_dict.get('uncovered_communication_time_ms', 0.0), 3), + "Free Time": round(overall_dict.get('free_time_ms', 0.0), 3) + } + return time_category_dict + def path_check(self): - if self.base_collection_path: - if os.path.exists(self.base_collection_path): - self._has_base_collection = True + if self.benchmark_profiling_path: + if os.path.exists(self.benchmark_profiling_path): + self._has_benchmark_profiling = True else: - print(f"[WARNING] Invalid path which not exists: {self.base_collection_path}.") + print(f"[WARNING] Invalid path which not exists: {self.benchmark_profiling_path}.") return os.path.exists(self.collection_path) def process(self): - base_collection_path = self.base_collection_path if self._has_base_collection else self.collection_path - result_data = ComparisonInterface(base_collection_path, self.collection_path).compare(Constant.OVERALL_COMPARE) - for data in result_data.values(): - self._headers = data.get("headers", []) - rows = data.get("rows", []) - if len(rows) == 2: - self._base_data = rows[0] - self._comparison_data = rows[1] - if not self._headers or not self._comparison_data: + self._disaggregate_perf = ComparisonInterface(self.collection_path).disaggregate_perf(Constant.OVERALL_COMPARE) + if not self._disaggregate_perf: return - self._is_minimal_profiling = 'E2E Time(Not minimal profiling)' not in self._headers - if self._has_base_collection: - self.cur_data["comparison_result"] = result_data - time_category_dict = {} - for time_category, time_list in self.performance_time_dict.items(): - time_value = self.get_time_value(time_category, self._comparison_data) - if time_value == Constant.INVALID_VALUE: - continue - duration, _ = self.split_duration_and_num(time_value) - time_category = time_category.split("(")[0] - time_category_dict[time_category] = duration - self.get_sub_category_time(time_category, time_list, duration) - self.cur_data["overall_data"] = time_category_dict - - def get_time_value(self, header_name: str, data_list: list): - try: - data_index = self._headers.index(header_name) - except ValueError: - return Constant.INVALID_VALUE - try: - time_value = data_list[data_index] - except IndexError: - return Constant.INVALID_VALUE - return time_value - - def get_sub_category_time(self, category: str, time_list: list, total_duration: float): - sub_time_dict = {} - for time_name in time_list: - time_value = self.get_time_value(time_name, self._comparison_data) - if time_value == Constant.INVALID_VALUE: - continue - sub_time_dict.setdefault(f"{category} Subtype", []).append(self.time_name_map.get(time_name, "")) - duration, num = self.split_duration_and_num(time_value) - sub_time_dict.setdefault(f"Duration(s)", []).append(duration) - sub_time_dict.setdefault(f"Duration Ratio", []).append( - "{:.2%}".format(self.calculate_ratio(duration, total_duration))) - sub_time_dict.setdefault(f"Kernel Number", []).append(num) - self.cur_data[self.time_name_map.get(category)] = sub_time_dict + self._is_minimal_profiling = self._disaggregate_perf.get("minimal_profiling", False) + self.cur_data["overall_data"] = self.get_time_category_dict(self._disaggregate_perf.get('overall', {})) + if self._has_benchmark_profiling: + self._disaggregate_benchmark_perf = ComparisonInterface( + self.benchmark_profiling_path).disaggregate_perf(Constant.OVERALL_COMPARE) def identify_bottleneck(self): overall_data = self.cur_data.get("overall_data") if not overall_data: return e2e_time = '%.3f' % sum([data for data in overall_data.values()]) - overall_bottleneck = f"The Model E2E Time is {e2e_time}s.\n" + overall_bottleneck = f"The Model E2E Time is {e2e_time}ms.\n" comparison_bottleneck = "" for time_type, time_value in overall_data.items(): - # add subtype time bottleneck - self.cur_bottleneck[self.time_name_map.get(time_type)] = f"{time_type} is {time_value}s.\n" # add overall bottleneck - overall_bottleneck += f" -- {time_type} is {time_value}s\n" + overall_bottleneck += f" -- {time_type} is {time_value}ms\n" if time_type == "Free Time" and self._is_minimal_profiling and self.calculate_ratio(time_value, e2e_time) > 0.1: overall_bottleneck += "percentage of free time exceed the threshold 10%." - if not self._has_base_collection: + if not self._has_benchmark_profiling: continue # add comparison bottleneck - time_type_origin = "Uncovered Communication Time(Wait Time)" \ - if time_type == "Uncovered Communication Time" else time_type - base_duration, _ = self.split_duration_and_num(self.get_time_value(time_type_origin, self._base_data)) + base_duration = self.get_time_category_dict( + self._disaggregate_benchmark_perf.get('overall', {}) + ).get(time_type) if time_value > base_duration: ratio = "{:.2%}".format(self.calculate_ratio(time_value - base_duration, base_duration)) comparison_bottleneck += f"{time_type} exceeds the benchmark by {ratio}\n" self.cur_bottleneck["overall_data"] = overall_bottleneck if comparison_bottleneck: self.cur_bottleneck["comparison_result"] = comparison_bottleneck + def optimize(self, **kwargs): if self.path_check(): self.process() self.identify_bottleneck() self.format_bottleneck() - self.format_cur_data() + self.format_over_summary_analysis() self.make_record() self.make_render() return self.result def format_bottleneck(self): result = '' - headers = [] - data_list = [] - data = [] - for key, value in self.cur_bottleneck.items(): + for _, value in self.cur_bottleneck.items(): if not value: continue - result += f'{key}: {value} \n' - headers.append(key) - data.append(value) - data_list.append(data) + result += f'{value} \n' self.bottleneck_str = result - self.bottleneck_table["headers"] = headers - self.bottleneck_table["data"] = data_list - def format_cur_data(self): - if not self.cur_data: - return - for data_type, data in self.cur_data.items(): - if not data: - continue - if data_type not in list(self.time_name_map.values()): - data_list = list(data.values()) - else: - data_list = [','.join(map(str, value)) for value in data.values()] - headers = list(data.keys()) - data_table = {"headers": headers, "data": [data_list]} - self.cur_data_table[data_type] = copy.deepcopy(data_table) + def format_over_summary_analysis(self): + headers = ['Performance Index', 'Duration(ms)', 'Duration Ratio'] + performance_data = self.get_analysis_data(self._disaggregate_perf) + benchmark_data = self.get_analysis_data(self._disaggregate_benchmark_perf) + if self._has_benchmark_profiling: + headers.append('Diff Duration(ms)') + self.format_analysis_with_benchmark(performance_data, benchmark_data, headers) + else: + self.format_analysis_only(performance_data, headers) + + def get_analysis_data(self, data_dict: dict): + if not data_dict: + return {} + return { + **data_dict.get("overall"), + **data_dict.get("computing_time_disaggregate"), + **data_dict.get("communication_time_disaggregate"), + **data_dict.get("free_time_disaggregate"), + } + def format_analysis_only(self, performance_data: dict, headers: list): + res = [] + total_duration = performance_data.get('e2e_time_ms', 0.0) + for time_name, time_key in self.performance_time_dict.items(): + row = [time_name] + duration = performance_data.get(time_key, 0.0) + row.append("{:.3f}".format(duration)) + row.append("{:.2%}".format(self.calculate_ratio(duration, total_duration))) + res.append(row) + self.over_summary_analysis["headers"] = headers + self.over_summary_analysis["data"] = res + + def format_analysis_with_benchmark(self, performance_data: dict, benchmark_data: dict, headers: list): + res = [] + total_duration = performance_data.get('e2e_time_ms', 0.0) + for time_name, time_key in self.performance_time_dict.items(): + row = [time_name] + duration = performance_data.get(time_key, 0.0) + row.append("{:.3f}".format(duration)) + row.append("{:.2%}".format(self.calculate_ratio(duration, total_duration))) + row.append("{:.3f}".format(duration - benchmark_data.get(time_key, 0.0))) + res.append(row) + self.over_summary_analysis["headers"] = headers + self.over_summary_analysis["data"] = res def make_record(self): """ @@ -232,20 +208,23 @@ class OverallSummaryAnalyzer(BaseAnalyzer): ) self.result.add(OptimizeRecord(optimization_item)) - self.result.add_detail(const.BOTTLENECK, self.bottleneck_table["headers"], self.bottleneck_table["data"][0]) - for data_type, data_dict in self.cur_data_table.items(): - if data_dict: - self.result.add_detail(const.DATA + data_type, data_dict["headers"], data_dict["data"][0]) + self.result.add_detail( + OverallSummaryAnalyzer.OVERALL_SUMMARY_ANALYZER, + headers=self.over_summary_analysis["headers"] + ) + for data in self.over_summary_analysis["data"]: + self.result.add_detail(OverallSummaryAnalyzer.OVERALL_SUMMARY_ANALYZER, detail=data) def make_render(self): if not self.bottleneck_str and not self.cur_advices: return + # 将\n替换为html换行 + bottleneck_str = self.bottleneck_str.replace('\n', '
') result_for_html = { - "Description" : self.bottleneck_str, - "suggestion" : self.cur_advices, - "details" : [self.bottleneck_table] + "Description": bottleneck_str, + "suggestion": self.cur_advices, + "details": [self.over_summary_analysis] } - self.html_render.render_template(key="overall", title=OverallSummaryAnalyzer.OVERALL_SUMMARY_ANALYZER, template_dir="templates", @@ -254,9 +233,10 @@ class OverallSummaryAnalyzer(BaseAnalyzer): torch_version=self.torch_version, result=result_for_html) + def get_profile_path(collection_path): for root, dirs, files in os.walk(collection_path): for file in files: if file.startswith("profiler_info"): return root - return "" \ No newline at end of file + return "" diff --git a/profiler/advisor/analyzer/schedule/syncbn/__init__.py b/profiler/advisor/analyzer/schedule/syncbn/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/profiler/advisor/analyzer/schedule/syncbn/syncbn_analyzer.py b/profiler/advisor/analyzer/schedule/syncbn/syncbn_analyzer.py new file mode 100644 index 0000000000..2786a78408 --- /dev/null +++ b/profiler/advisor/analyzer/schedule/syncbn/syncbn_analyzer.py @@ -0,0 +1,30 @@ +import logging + +from typing import List, Dict, Any + +from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer +from profiler.advisor.result.result import OptimizeResult +from profiler.advisor.analyzer.schedule.syncbn.syncbn_checker import SyncBNChecker +from profiler.advisor.display.html.render import HTMLRender +from profiler.advisor.dataset.timeline_event_dataset import TimelineEventDataset + +logger = logging.getLogger() + + +class SyncBNAnalyzer(BaseAnalyzer): + dataset_cls_list = [TimelineEventDataset] + + def __init__(self, collection_path, **kwargs): + super().__init__(collection_path, **kwargs) + self.result = OptimizeResult() + self.html_render = HTMLRender() + key = TimelineEventDataset.get_key() + self.timeline_event_dataset = self.get_first_data_by_key(self.dataset_list, key) + + @BaseAnalyzer.check_data((TimelineEventDataset.get_key(),)) + def optimize(self, **kwargs): + syncbn_checker = SyncBNChecker() + syncbn_checker.check_syncbn(self.timeline_event_dataset) + syncbn_checker.make_record(self.result) + syncbn_checker.make_render(self.html_render) + return self.result diff --git a/profiler/advisor/analyzer/schedule/syncbn/syncbn_checker.py b/profiler/advisor/analyzer/schedule/syncbn/syncbn_checker.py new file mode 100644 index 0000000000..c0e10448f3 --- /dev/null +++ b/profiler/advisor/analyzer/schedule/syncbn/syncbn_checker.py @@ -0,0 +1,70 @@ +import logging +import os + +from profiler.advisor.dataset.timeline_event_dataset import TimelineEventDataset +from profiler.advisor.result.result import OptimizeResult +from profiler.advisor.result.item import OptimizeItem, OptimizeRecord +from profiler.cluster_analyse.common_func.file_manager import FileManager + +logger = logging.getLogger() + + +class SyncBNChecker: + + def __init__(self): + self.optimization_item = [] + self.syncbn_issues = False + self.desc = "" + self.suggestions = [] + self.solutions = None + self.max_syncbn_num = None + self._init_rule() + + def check_syncbn(self, event_dataset: TimelineEventDataset): + """ + :Param event_dataset: dataset of timeline event + """ + if not hasattr(event_dataset, "sync_batchnorm") or not getattr(event_dataset, "sync_batchnorm"): + logger.debug("Skip syncbn checker, because no syncbn found") + return + + syncbn_num = len(event_dataset.sync_batchnorm) + self.syncbn_issues = syncbn_num >= self.max_syncbn_num + self.desc = self.desc.format(syncbn_num=syncbn_num) + + def make_record(self, result: OptimizeResult): + """ + make record for what and how to optimize + """ + if not self.syncbn_issues: + return + + self.optimization_item.append(OptimizeItem("SyncBatchNorm", self.desc, self.suggestions)) + for optimization in self.optimization_item: + result.add(OptimizeRecord(optimization)) + + def make_render(self, html_render): + if not self.syncbn_issues: + return + html_render.render_template(key="schedule", + template_dir="templates", + template_name="sync_batchnorm.html", + desc=self.desc, + solutions=self.solutions) + + def _init_rule(self): + syncbn_rule_path = os.path.join( + os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))), + "rules", + "sync_batchnorm.yaml" + ) + + syncbn_rule = FileManager.read_yaml_file(syncbn_rule_path) + + self.max_syncbn_num = syncbn_rule.get("max_syncbn_num") + self.desc = syncbn_rule.get("problem") + + self.solutions = syncbn_rule.get("solutions") + for solution in self.solutions: + for key, val in solution.items(): + self.suggestions.append(f"{key}, {val.get('desc')}") diff --git a/profiler/advisor/analyzer/schedule/synchronize_stream/__init__.py b/profiler/advisor/analyzer/schedule/synchronize_stream/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/profiler/advisor/analyzer/schedule/synchronize_stream/synchronize_stream_analyzer.py b/profiler/advisor/analyzer/schedule/synchronize_stream/synchronize_stream_analyzer.py new file mode 100644 index 0000000000..d8906504c3 --- /dev/null +++ b/profiler/advisor/analyzer/schedule/synchronize_stream/synchronize_stream_analyzer.py @@ -0,0 +1,32 @@ +import logging + +from typing import List, Dict, Any + +from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer +from profiler.advisor.result.result import OptimizeResult +from profiler.advisor.analyzer.schedule.synchronize_stream.synchronize_stream_checker import SynchronizeStreamChecker +from profiler.advisor.display.html.render import HTMLRender +from profiler.advisor.dataset.timeline_event_dataset import TimelineEventDataset + +logger = logging.getLogger() + + +class SynchronizeStreamAnalyzer(BaseAnalyzer): + dataset_cls_list = [TimelineEventDataset] + + def __init__(self, collection_path, **kwargs): + super().__init__(collection_path, **kwargs) + self.result = OptimizeResult() + self.html_render = HTMLRender() + + key = TimelineEventDataset.get_key() + self.timeline_event_dataset = self.get_first_data_by_key(self.dataset_list, key) + + @BaseAnalyzer.check_data((TimelineEventDataset.get_key(),)) + def optimize(self, **kwargs): + + synchronize_stream_checker = SynchronizeStreamChecker() + synchronize_stream_checker.check_synchronize(self.timeline_event_dataset, kwargs.get("profiling_with_stack")) + synchronize_stream_checker.make_record(self.result) + synchronize_stream_checker.make_render(self.html_render) + return self.result diff --git a/profiler/advisor/analyzer/schedule/synchronize_stream/synchronize_stream_checker.py b/profiler/advisor/analyzer/schedule/synchronize_stream/synchronize_stream_checker.py new file mode 100644 index 0000000000..83ddd80a0f --- /dev/null +++ b/profiler/advisor/analyzer/schedule/synchronize_stream/synchronize_stream_checker.py @@ -0,0 +1,89 @@ +import logging + +from profiler.advisor.common import constant as const +from profiler.advisor.dataset.timeline_event_dataset import TimelineEventDataset +from profiler.advisor.result.result import OptimizeResult +from profiler.advisor.result.item import OptimizeItem, OptimizeRecord +from profiler.advisor.analyzer.schedule.timeline_base_checker import TimelineBaseChecker +from profiler.advisor.utils.utils import format_timeline_result + +logger = logging.getLogger() + + +class SynchronizeStreamChecker(TimelineBaseChecker): + + def __init__(self): + super().__init__(n_processes=1) + self.optimization_item = [] + self.synchronize_issues = False + self.desc = "" + self.suggestions = [] + self.solutions = [] + self.max_synchronize_num = None + + def check_synchronize(self, event_dataset: TimelineEventDataset, profiling_with_stack=None): + """ + :Param event_dataset: dataset of timeline event + """ + if not hasattr(event_dataset, "synchronize_stream") or not getattr(event_dataset, "synchronize_stream"): + logger.debug("Skip synchronize stream checker, because no synchronize stream found") + return + + synchronize_num = event_dataset.synchronize_stream.total_count + slow_synchronize_stream = event_dataset.synchronize_stream.slow_synchronize_stream + total_slow_synchronize_time = sum((float(sync_stream.dur) for sync_stream in slow_synchronize_stream)) + + synchronize_stream_rule = event_dataset.synchronize_stream.rule + self.max_synchronize_num = synchronize_stream_rule.get("max_synchronize_num") + self.synchronize_issues = synchronize_num >= self.max_synchronize_num and len(slow_synchronize_stream) > 0 + if not self.synchronize_issues: + return + + for sync_stream in slow_synchronize_stream: + if sync_stream.name not in self._matched_op_index: + self._matched_op_index[sync_stream.name] = [] + self._matched_op_index[sync_stream.name].append(sync_stream.dataset_index) + self.query_stack(event_dataset, profiling_with_stack) + + self.desc = synchronize_stream_rule.get("problem") + self.desc = self.desc.format(synchronize_num=synchronize_num, + slow_synchronize_num=len(slow_synchronize_stream), + total_synchronize_stream_time=total_slow_synchronize_time) + + solutions = synchronize_stream_rule.get("solutions") + for solution in solutions: + renderer_solution = {} + for key, val in solution.items(): + if self.empty_stacks and self.framework_black_list: + # 如果堆栈源于torch, torch_npu等框架,则不提示修改的代码 + if "modify code" in key.lower(): + continue + self.suggestions.append(f"{key}, {val.get('desc')}") + renderer_solution.update({key: val}) + self.solutions.append(renderer_solution) + + def make_record(self, result: OptimizeResult): + """ + make record for what and how to optimize + """ + if not self.synchronize_issues: + return + + self.optimization_item.append(OptimizeItem("SynchronizeStream", self.desc, self.suggestions)) + for optimization in self.optimization_item: + result.add(OptimizeRecord(optimization)) + + def make_render(self, html_render): + if not self.synchronize_issues: + return + + format_result_for_html = format_timeline_result(dict(self.matched_op_stacks), dump_html=True) + html_render.render_template(key="schedule", + template_dir="templates", + template_name="synchronize_stream.html", + desc=self.desc, + solutions=self.solutions, + result=format_result_for_html, + with_stack_doc_url=const.TIMELINE_WITH_STACK_DOC_URL, + empty_stacks=self.empty_stacks, + framework_black_list=self.framework_black_list) diff --git a/profiler/advisor/analyzer/schedule/timeline_base_checker.py b/profiler/advisor/analyzer/schedule/timeline_base_checker.py new file mode 100644 index 0000000000..8bc6915026 --- /dev/null +++ b/profiler/advisor/analyzer/schedule/timeline_base_checker.py @@ -0,0 +1,91 @@ +from abc import ABC, abstractmethod +import multiprocessing +import logging + +from profiler.advisor.common import constant as const +from profiler.advisor.common.timeline.event import TimelineEvent +from profiler.advisor.dataset.timeline_event_dataset import TimelineEventDataset +from profiler.advisor.result.result import OptimizeResult + +logger = logging.getLogger() + + +class TimelineBaseChecker(ABC): + + def __init__(self, n_processes: int = 1): + self.n_processes = n_processes + self._matched_op_index = {} if self.n_processes <= 1 else multiprocessing.Manager().dict() + self.matched_op_stacks = {} + self.empty_stacks = True + self.framework_black_list = False + + @abstractmethod + def make_record(self, result: OptimizeResult): + pass + + @abstractmethod + def make_render(self, html_render): + pass + + def query_stack(self, event_dataset: TimelineEventDataset = None, profiling_with_stack: str = None): + if all([len(matched_index) == 0 for matched_index in self._matched_op_index.values()]): + return + + event_dataset = event_dataset if not profiling_with_stack else TimelineEventDataset( + collection_path=profiling_with_stack, data={}, _datasets={}, analysis_mode="fusion_ops", + build_dataset=False) + + op_stack_list = event_dataset.parse_data_with_generator(self._query_stack_by_matched_index) + for op_stack in op_stack_list: + for op, stack in op_stack.items(): + if op not in self.matched_op_stacks: + self.matched_op_stacks[op] = {} + if stack == const.TIMELINE_FUSION_OPS_NO_STACK_FLAG: + continue + if stack not in self.matched_op_stacks[op]: + self.matched_op_stacks[op][stack] = 0 + self.matched_op_stacks[op][stack] += 1 + + def _query_stack_by_matched_index(self, index, event): + stack_record = {} + event = TimelineEvent(event) + + matched_ops = [] + for op, matched_index in self._matched_op_index.items(): + if index not in matched_index: + continue + + matched_ops.append(op) + stack = event.args.get(const.CALL_STACKS) + + if not stack: + logger.debug("Got empty '%s' for event %s", const.CALL_STACKS, event) + continue + + if not self._is_keep_stack(stack): + self.framework_black_list = True + logger.debug("Drop stack from framework %s", const.FRAMEWORK_STACK_BLACK_LIST) + continue + + if self.empty_stacks and stack: + self.empty_stacks = False + + stack_record[op] = stack + + if matched_ops and not stack_record: + for op in matched_ops: + stack_record[op] = const.TIMELINE_FUSION_OPS_NO_STACK_FLAG + + return stack_record + + def _is_keep_stack(self, stack): + # 过滤掉torch, torch_npu, megatron, deepspeed等框架下的堆栈,这些源码基本是不能被修改的 + stack_list = stack.replace("\\r\\n", ";").split(";") + if not stack_list: + return False + + final_called_stack = stack_list[0] + for framework in const.FRAMEWORK_STACK_BLACK_LIST: + if framework in final_called_stack.split("/"): + return False + return True diff --git a/profiler/advisor/common/analyzer_scopes.py b/profiler/advisor/common/analyzer_scopes.py index 592f9d421e..52e3e07554 100644 --- a/profiler/advisor/common/analyzer_scopes.py +++ b/profiler/advisor/common/analyzer_scopes.py @@ -12,3 +12,7 @@ class SupportedScopes: BLOCK_DIM_ANALYSIS = "block_dim_analysis" OPERATOR_NO_BOUND_ANALYSIS = "operator_no_bound_analysis" TIMELINE_OP_DISPATCH = "timeline_op_dispatch" + DATALOADER = "dataloader" + SYNCBN = "syncbn" + SYNCHRONIZE_STREAM = "synchronize_stream" + FREQ_ANALYSIS = "freq_analysis" diff --git a/profiler/advisor/common/constant.py b/profiler/advisor/common/constant.py index 697430ee6c..87245a43ea 100644 --- a/profiler/advisor/common/constant.py +++ b/profiler/advisor/common/constant.py @@ -26,6 +26,7 @@ ENQUEUE = "enqueue" TORCH_TO_NPU = "torch_to_npu" OP_COMPILE_NAME = "AscendCL@aclopCompileAndExecute" OP_COMPILE_ID = "aclopCompileAndExecute" +SYNC_STREAM = "AscendCL@aclrtSynchronizeStream" MAX_OP_COMPILE_NUM = 20 ACL_TO_NPU = "acl_to_npu" TASK_TYPE = "Task Type" @@ -111,7 +112,7 @@ HTTP_PREFIXES = "http://" HTTPS_PREFIXES = "https://" COMMON_YAML_DIR = "modelarts/solution/ma_advisor_rules/" COMMON_ENDPOINT_SUFFIX = "obs.{}.myhuaweicloud.com" -INNER_ENDPOINT_SUFFIX= "obs.{}.ulanqab.huawei.com" +INNER_ENDPOINT_SUFFIX = "obs.{}.ulanqab.huawei.com" AICPU_RULES_YAML_NAME = "aicpu_rules.yaml" FUSION_PASS_YAML_NAME = "op_fusion_pass.yaml" @@ -138,4 +139,8 @@ CLUSTER_STEP_TIME_CSV = "cluster_step_trace_time.csv" CLUSTER_COMM_JSON = "cluster_communication.json" BOTTLENECK = "bottleneck" -DATA = "data" \ No newline at end of file +DATA = "data" + +FRAMEWORK_STACK_BLACK_LIST = ["torch", "torch_npu", "megatron", "deepspeed"] +DISABLE_STREAMING_READER = "DISABLE_STREAMING_READER" +MAX_FILE_SIZE = 10**10 diff --git a/profiler/advisor/common/graph/graph_parser.py b/profiler/advisor/common/graph/graph_parser.py index d4c67fc191..ef4dc4d681 100644 --- a/profiler/advisor/common/graph/graph_parser.py +++ b/profiler/advisor/common/graph/graph_parser.py @@ -1,11 +1,12 @@ import os import logging -import yaml import itertools from collections import deque from dataclasses import dataclass from typing import List, Tuple, Dict +from profiler.cluster_analyse.common_func.file_manager import FileManager + logger = logging.getLogger() @@ -344,9 +345,9 @@ class QueryGraphParser: if not os.path.exists(rule_database): raise FileNotFoundError(f"Path {rule_database} does not exist.") - with open(rule_database, 'r') as f: - database = yaml.safe_load(f) - self.parse_yaml(database) + + database = FileManager.read_yaml_file(rule_database) + self.parse_yaml(database) def parse_yaml(self, yaml_database): fusion_strategy_list = yaml_database.get("GraphFusion", []) diff --git a/profiler/advisor/common/timeline/event.py b/profiler/advisor/common/timeline/event.py index 6001ac8872..e24d983a02 100644 --- a/profiler/advisor/common/timeline/event.py +++ b/profiler/advisor/common/timeline/event.py @@ -1,3 +1,4 @@ +from decimal import Decimal class AdvisorDict(dict): def __getstate__(self): return self.__dict__ @@ -18,6 +19,6 @@ class AdvisorDict(dict): class TimelineEvent(AdvisorDict): def ts_include(self, event): - - return float(self.ts) <= float(event.ts) and float(self.ts) + float(self.dur) >= float(event.ts) + float( + return Decimal(self.ts) <= Decimal(event.ts) and Decimal(self.ts) + Decimal(self.dur) >= Decimal( + event.ts) + Decimal( event.dur) \ No newline at end of file diff --git a/profiler/advisor/common/timeline/fusion_ops_db.py b/profiler/advisor/common/timeline/fusion_ops_db.py index 8637befd1a..64cc849295 100644 --- a/profiler/advisor/common/timeline/fusion_ops_db.py +++ b/profiler/advisor/common/timeline/fusion_ops_db.py @@ -1,13 +1,12 @@ import logging import os -import yaml - from profiler.advisor.common import constant from profiler.advisor.common.timeline.fusion_ops_rule import OpRule from profiler.advisor.common.timeline.fusion_ops_rule_handler import TimelineOpRuleHandler from profiler.advisor.utils.log import get_log_level from profiler.advisor.utils.utils import get_file_path_by_walk +from profiler.cluster_analyse.common_func.file_manager import FileManager logger = logging.getLogger() logger.setLevel(get_log_level()) @@ -241,8 +240,7 @@ class FusionOperatorDB: logger.debug("The rule yaml file is successfully found in path: %s", os.path.abspath(file_path)) - with open(file_path, "rb") as file: - db_content = yaml.safe_load(file) + db_content = FileManager.read_yaml_file(file_path) if not self._is_version_supported(db_content): self.is_empty = True diff --git a/profiler/advisor/config/config.ini b/profiler/advisor/config/config.ini index c56c1dad9f..06e9931601 100644 --- a/profiler/advisor/config/config.ini +++ b/profiler/advisor/config/config.ini @@ -9,6 +9,7 @@ tune_ops_file = operator_tuning_file.cfg [THRESHOLD] # operator_bound_ratio: (mte, cube, vector, scalar) ratio greater than this value will be checked in operator_bound_checker operator_bound_ratio = 0.8 +frequency_threshold = 0.05 [RULE-BUCKET] # region : URL of different regions where can download rule yaml file cn-north-9 = cnnorth9-modelarts-sdk diff --git a/profiler/advisor/config/config.py b/profiler/advisor/config/config.py index 12f4526f8c..4f36dfedfc 100644 --- a/profiler/advisor/config/config.py +++ b/profiler/advisor/config/config.py @@ -97,6 +97,13 @@ class Config: """ return float(self.config.get("THRESHOLD", "operator_bound_ratio")) + @property + def frequency_threshold(self) -> float: + """ + frequency_threshold + """ + return float(self.config.get("THRESHOLD", "frequency_threshold")) + def set_log_path(self, result_file: str, log_path: str = None): self.log_path = log_path if log_path is not None else os.path.join(self._work_path, "log") os.makedirs(self.log_path, exist_ok=True) diff --git a/profiler/advisor/dataset/ai_core_freq/__init__.py b/profiler/advisor/dataset/ai_core_freq/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/profiler/advisor/dataset/ai_core_freq/ai_core_freq_dataset.py b/profiler/advisor/dataset/ai_core_freq/ai_core_freq_dataset.py new file mode 100644 index 0000000000..c99baea656 --- /dev/null +++ b/profiler/advisor/dataset/ai_core_freq/ai_core_freq_dataset.py @@ -0,0 +1,148 @@ +import json +import logging +import math +import os +import traceback + +import ijson +from tqdm import tqdm + +from profiler.advisor.common import constant as const +from profiler.advisor.common.timeline.event import TimelineEvent +from profiler.advisor.utils.utils import get_file_path_from_directory +from profiler.advisor.utils.utils import convert_to_float, parse_json_with_generator +from profiler.advisor.dataset.profiling.device_info import DeviceInfoParser +from profiler.advisor.config.config import Config + +logger = logging.getLogger() + + +class AICoreFreqDataset: + + def __init__(self, collection_path, data: dict, build_dataset=True, **kwargs) -> None: + + self._profiler_step = [] + self._ai_core_ops = [] + self._ai_core_freq: [TimelineEvent] = [] + self._previous_freq_index = -1 + + self.timeline_dir = collection_path + self.timeline_data_list = get_file_path_from_directory(collection_path, + lambda file: file.endswith("trace_view.json")) + + self.step = kwargs.get("step") + self.op_freq = {} + info = DeviceInfoParser(collection_path) + info.parse_data() + if not Config().get_config("aic_frequency"): + return + if self.parse(): + key = self.get_key() + if key not in data: + data[key] = [] + data[key].append(self) + + @property + def profiler_step(self): + return self._profiler_step + + @property + def ai_core_freq(self): + return self._ai_core_freq + + @property + def ai_core_ops(self): + return self._ai_core_ops + + @classmethod + def get_key(cls): + """ + get key of dataset + :return: key + """ + return cls.__module__.rsplit('.', maxsplit=1)[-1] + + def parse(self): + + if len(self.timeline_data_list) == 0: + logger.warning("Please ensure trace_view.json in %s, skip timeline analysis.", self.timeline_dir) + return False + + if len(self.timeline_data_list) > 1: + logger.warning("Found multiple trace_view.json in %s, load the file of device 0 for analysis .", + self.timeline_dir) + + _ = parse_json_with_generator(sorted(self.timeline_data_list)[0], self._add_event) + + target_ai_core_ops = self._get_target_ai_core_ops() + self._get_op_frequency(target_ai_core_ops) + return True + + def _add_profiler_step(self, event): + if event.name.startswith("ProfilerStep"): + self._profiler_step.append(event) + + def _add_ai_core_ops(self, event): + if event.args.get("Task Type") in ["MIX_AIC", "AI_CORE"]: + self._ai_core_ops.append(event) + + def _add_ai_core_freq(self, event): + if event.name == "AI Core Freq": + if self._previous_freq_index != -1: + self._ai_core_freq[self._previous_freq_index]["end"] = event.get("ts", float(math.inf)) + self._previous_freq_index += 1 + event.setdefault("end", float(math.inf)) + self._ai_core_freq.append(event) + + def _add_event(self, index, event): + event["dataset_index"] = index + if not isinstance(event, TimelineEvent): + event = TimelineEvent(event) + + self._add_profiler_step(event) + self._add_ai_core_ops(event) + self._add_ai_core_freq(event) + + return True + + def _get_target_ai_core_ops(self): + target_ai_core_ops = [] + if not self.step or f"ProfilerStep#{self.step}" not in [event.name for event in self._profiler_step]: + target_ai_core_ops = self._ai_core_ops + else: + for step_event in self._profiler_step: + if step_event.name != f"ProfilerStep#{self.step}": + continue + + for ai_core_op_event in self._ai_core_ops: + if step_event.ts_include(ai_core_op_event): + target_ai_core_ops.append(ai_core_op_event) + target_ai_core_ops = sorted(target_ai_core_ops, key=lambda x: float(x.ts)) + return target_ai_core_ops + + def _get_op_frequency(self, ai_core_ops): + ai_core_freq = sorted(self._ai_core_freq, key=lambda x: float(x.ts)) + + op_index, freq_index = 0, 0 + while op_index < len(ai_core_ops) and freq_index < len(ai_core_freq): + op_event = ai_core_ops[op_index] + op_end_time = convert_to_float(op_event.ts) + convert_to_float(op_event.dur) + op_freq_list = [] + while freq_index < len(ai_core_freq): + freq_event = ai_core_freq[freq_index] + if convert_to_float(freq_event.end) < op_end_time: + op_freq_list.append(convert_to_float(freq_event.args.MHz)) + freq_index += 1 + continue + elif convert_to_float(freq_event.ts) < op_end_time: + if op_event.name not in self.op_freq: + self.op_freq[op_event.name] = {"count": 0, "dur": 0, "freq_list": []} + self.op_freq[op_event.name]["count"] += 1 + self.op_freq[op_event.name]["dur"] += convert_to_float(op_event.dur) + op_freq_list.append(convert_to_float(freq_event.args.MHz)) + self.op_freq[op_event.name]["freq_list"].append(min(op_freq_list)) + break + else: + break + + op_index += 1 diff --git a/profiler/advisor/dataset/cluster/cluster_dataset.py b/profiler/advisor/dataset/cluster/cluster_dataset.py index 09fda2d4dc..e1163f1cdd 100644 --- a/profiler/advisor/dataset/cluster/cluster_dataset.py +++ b/profiler/advisor/dataset/cluster/cluster_dataset.py @@ -25,9 +25,9 @@ class ClusterDataset(Dataset): """ for file in os.listdir(self.collection_path): if file == 'cluster_analysis_output': - print("[INFO]Cluster has been analyzed " - "because of the existence of cluster analysis output directory.") - print("[INFO]Skip Cluster analyze backend.") + logger.info("[INFO]Cluster has been analyzed " + "because of the existence of cluster analysis output directory.") + logger.info("[INFO]Skip Cluster analyze backend.") return True return False @@ -62,7 +62,7 @@ class ClusterDataset(Dataset): @singleton -class ClusterStepTraceTimeDataSet(ClusterDataset): +class ClusterStepTraceTimeDataset(ClusterDataset): RANK = "rank" def __init__(self, collection_path: str, data: dict, **kwargs): @@ -77,10 +77,10 @@ class ClusterStepTraceTimeDataSet(ClusterDataset): print("捕获到异常:", e) self._step_dict = None return False - self._step_dict = self.formate_data(step_data) + self._step_dict = self.format_data(step_data) return True - def formate_data(self, step_data: list): + def format_data(self, step_data: list): step_dict = defaultdict(lambda: [0, 0, 0]) for step_bean in step_data: if step_bean.type == self.RANK: @@ -94,7 +94,7 @@ class ClusterStepTraceTimeDataSet(ClusterDataset): @singleton -class ClusterCommunicationDataSet(ClusterDataset): +class ClusterCommunicationDataset(ClusterDataset): RDMA_TIME_MS = "RDMA time(ms)" RDMA_SIZE_MB = "RDMA size(mb)" SDMA_TIME_MS = "SDMA time(ms)" diff --git a/profiler/advisor/dataset/profiling/device_info.py b/profiler/advisor/dataset/profiling/device_info.py index b58930777f..110cd0794c 100644 --- a/profiler/advisor/dataset/profiling/device_info.py +++ b/profiler/advisor/dataset/profiling/device_info.py @@ -54,6 +54,8 @@ class DeviceInfoParser: config.set_config("device_id", device_info["id"]) if "aiv_num" in device_info: config.set_config("aiv_num", device_info["aiv_num"]) + if "aic_frequency" in device_info: + config.set_config("aic_frequency", device_info["aic_frequency"]) if "ai_core_num" in device_info: config.set_config("ai_core_num", device_info["ai_core_num"]) return True diff --git a/profiler/advisor/dataset/profiling/profiling_dataset.py b/profiler/advisor/dataset/profiling/profiling_dataset.py index 46d4a4fe8b..99a19d3b60 100644 --- a/profiler/advisor/dataset/profiling/profiling_dataset.py +++ b/profiler/advisor/dataset/profiling/profiling_dataset.py @@ -10,6 +10,7 @@ from profiler.advisor.common.profiling.tasktime import TaskTime from profiler.advisor.dataset.dataset import Dataset from profiler.advisor.dataset.profiling.device_info import DeviceInfoParser from profiler.advisor.utils.utils import join_prof_path +from profiler.cluster_analyse.common_func.file_manager import FileManager logger = logging.getLogger() @@ -69,8 +70,7 @@ class ProfilingDataset(Dataset): logger.warning("Skip parse profiling dataset, because %s does not exist.", config_path) return [] - with open(config_path, 'r') as f: - patterns = yaml.safe_load(f) + patterns = FileManager.read_yaml_file(config_path) return patterns diff --git a/profiler/advisor/dataset/timeline_event_dataset.py b/profiler/advisor/dataset/timeline_event_dataset.py index 94b6fdfef7..1504e65f54 100644 --- a/profiler/advisor/dataset/timeline_event_dataset.py +++ b/profiler/advisor/dataset/timeline_event_dataset.py @@ -1,14 +1,17 @@ +import json import logging -from typing import List +import os +from typing import List, Any +import traceback import ijson -from profiler.advisor.dataset.dataset import Dataset from tqdm import tqdm +import yaml from profiler.advisor.common import constant as const from profiler.advisor.common.timeline.event import TimelineEvent -from profiler.advisor.utils.utils import get_file_path_from_directory -from profiler.advisor.utils.utils import singleton +from profiler.advisor.utils.utils import get_file_path_from_directory, check_path_valid, singleton +from profiler.cluster_analyse.common_func.file_manager import FileManager logger = logging.getLogger() @@ -38,37 +41,76 @@ class OpCompileCollector: self._total_op_compile_time = 0.0 +class SynchronizeStreamCollector: + + def __init__(self): + self._synchronize_stream_count = 0 + self._slow_synchronize_stream = [] + self.rule = SynchronizeStreamCollector._load_rule() + + @property + def total_count(self): + return self._synchronize_stream_count + + @property + def slow_synchronize_stream(self): + return self._slow_synchronize_stream + + @staticmethod + def _load_rule(): + sync_stream_rule_path = os.path.join(os.path.dirname(os.path.dirname(os.path.realpath(__file__))), "rules", + "synchronize.yaml") + + sync_stream_rule = FileManager.read_yaml_file(sync_stream_rule_path) + return sync_stream_rule + + def update_sync_stream_count(self): + self._synchronize_stream_count += 1 + + def append_slow_sync_stream(self, event): + if float(event.dur) / 1000 >= self.rule.get("slow_synchronize_threshold", 10): + self._slow_synchronize_stream.append(event) + + def unset(self): + self._synchronize_stream_count = 0 + self._slow_synchronize_stream = [] + + @singleton -class TimelineEventDataset(Dataset): +class TimelineEventDataset: - def __init__(self, collection_path, data: dict, **kwargs) -> None: + def __init__(self, collection_path, data: dict, build_dataset=True, **kwargs) -> None: self._ops_with_task_type = {} self._ops_with_stack = {} self._ops_compile = OpCompileCollector() self._torch_to_npu = {} self._acl_to_npu = set() - self._aten: List[str] = [] - self._optimizer: List[str] = [] + self._aten: List[Any] = [] + self._optimizer: List[Any] = [] + self._dataloader: List[Any] = [] + self._sync_batchnorm: List[Any] = [] + self._synchronize_stream = SynchronizeStreamCollector() self.timeline_dir = collection_path - self.timeline_data_list = get_file_path_from_directory(collection_path, lambda file: file.endswith("trace_view.json")) + self.timeline_data_list = get_file_path_from_directory(collection_path, + lambda file: file.endswith("trace_view.json")) self.dataset_len = None self.analysis_mode = kwargs.get("analysis_mode") self.task_type = kwargs.get("task_type") - self.cann_version = kwargs.get("cann_version") - self.torch_version = kwargs.get("torch_version") - if self.analysis_mode in ["fusion_ops", "all"]: - logger.info("Load fusion operators database for cann version '%s' and torch version '%s'", - self.cann_version, self.torch_version) + if not build_dataset: + return - super().__init__(collection_path, data) + if self.parse(): + key = self.get_key() + if key not in data: + data[key] = [] + data[key].append(self) if self.analysis_mode in ["op_stack", "all"]: self._task_op_names = list(set([event_key.split("-")[0] for event_key in self._ops_with_task_type.keys()])) self._post_process() - @property def ops_with_stack(self): return self._ops_with_stack @@ -101,36 +143,60 @@ class TimelineEventDataset(Dataset): def aten(self): return self._aten - def _parse(self): + @property + def dataloader(self): + return self._dataloader + + @property + def sync_batchnorm(self): + return self._sync_batchnorm + + @property + def synchronize_stream(self): + return self._synchronize_stream + + @classmethod + def get_key(cls): + """ + get key of dataset + :return: key + """ + return cls.__module__.rsplit('.', maxsplit=1)[-1] + + def parse(self): if len(self.timeline_data_list) == 0: logger.warning("Please ensure trace_view.json in %s, skip timeline analysis.", self.timeline_dir) return False if len(self.timeline_data_list) > 1: - logger.warning("Please ensure only one trace_view.json in %s, there will analyze first timeline profiling data.", self.timeline_dir) - self.timeline_data_list = [self.timeline_data_list[0]] + logger.warning("Found multiple trace_view.json in %s, load the file of device 0 for analysis .", + self.timeline_dir) result = self.parse_data_with_generator(self._add_event) if not self.dataset_len: self.dataset_len = len(result) - return True def parse_data_with_generator(self, func): result = [] + timeline_data_path = sorted(self.timeline_data_list)[0] + if not check_path_valid(timeline_data_path): + return result + try: - with open(self.timeline_data_list[0], "r") as f: + with open(timeline_data_path, "r") as f: for i, event in tqdm(enumerate(ijson.items(f, "item")), leave=False, ncols=100, desc="Building dataset for timeline analysis", total=self.dataset_len): func_res = func(index=i, event=event) if func_res is not None: result.append(func_res) - except Exception as e: - logger.warning("Error %s while parsing file %s, continue to timeline analysis", e, - self.timeline_data_list[0]) + + except Exception: + logger.warning("Error %s while parsing file %s, continue to timeline analysis", traceback.format_exc(), + timeline_data_path) return result def _add_ops_with_task_type(self, event): @@ -168,12 +234,40 @@ class TimelineEventDataset(Dataset): "name": event.name, "dataset_index": event.dataset_index, "ts": event.ts, "dur": event.dur })) + def _add_dataloader(self, event: TimelineEvent): + if "dataloader" in event.name.lower(): + self._dataloader.append(TimelineEvent({ + "name": event.name, "dataset_index": event.dataset_index, "ts": event.ts, "dur": event.dur, + "stack": event.args.get("Call stack") + })) + + def _add_sync_batchnorm(self, event: TimelineEvent): + if event.name.lower() == "syncbatchnorm": + self._sync_batchnorm.append(TimelineEvent({ + "name": event.name, "dataset_index": event.dataset_index, "ts": event.ts, "dur": event.dur + })) + + def _add_synchronize(self, event: TimelineEvent): + if event.name.startswith(const.SYNC_STREAM): + self._synchronize.append(TimelineEvent({ + "name": event.name, "ts": event.ts, "dur": event.dur + })) + + def _add_specific_operator(self, event): + # for analysis of operator aclOpCompile, enable jit_compILE=False + self._add_op_compile(event) + # for analysis of slow dataloader.__next__ + self._add_dataloader(event) + # for analysis of syncBatchNorm operator, prompt users to replace source code of torch_npu's syncbn + self._add_sync_batchnorm(event) + def _add_event(self, index, event): event["dataset_index"] = index if not isinstance(event, TimelineEvent): event = TimelineEvent(event) - self._add_op_compile(event) + self._add_specific_operator(event) + if self.analysis_mode == "fusion_ops": self._add_event_for_fusion_ops(event) elif self.analysis_mode == "op_stack": @@ -189,6 +283,10 @@ class TimelineEventDataset(Dataset): self._add_aten(event) return + # 检查cann层同步操作,根据时间窗口索引到host侧的aten算子并给出堆栈 + if event.name.startswith(const.SYNC_STREAM): + self._add_aten(event) + if event.name.startswith(f"{const.OPTIMIZER}.{const.OPTIMIZER_STEP}{const.OPTIMIZER_SEP}"): self._add_optimizer(event) return @@ -214,7 +312,18 @@ class TimelineEventDataset(Dataset): # eliminate sub aten operator of the first level aten operator by 'ts' and 'dur', # keep the first level aten operator contiguous formated_atens = [] - for aten_event in sorted(self._aten, key=lambda x: x.get("ts", -1)): - if not formated_atens or not formated_atens[-1].ts_include(aten_event): - formated_atens.append(aten_event) + for event in sorted(self._aten, key=lambda x: x.get("ts", -1)): + if event.name.startswith(const.ATEN): + if not formated_atens or not formated_atens[-1].ts_include(event): + formated_atens.append(event) + + elif event.name.startswith(const.SYNC_STREAM): + self._synchronize_stream.update_sync_stream_count() + if formated_atens[-1].ts_include(event): + # 使用aten算子的索引,用于查询堆栈 + event["dataset_index"] = formated_atens[-1].get("dataset_index") + self._synchronize_stream.append_slow_sync_stream(event) + + else: + continue self._aten = formated_atens diff --git a/profiler/advisor/display/html/render.py b/profiler/advisor/display/html/render.py index 8ea7c9e0fc..3984fa8f34 100644 --- a/profiler/advisor/display/html/render.py +++ b/profiler/advisor/display/html/render.py @@ -1,6 +1,7 @@ import os import logging from typing import List, Dict +from collections import defaultdict from jinja2 import Environment, FileSystemLoader from profiler.advisor.common import constant @@ -15,7 +16,7 @@ logger = logging.getLogger() class HTMLRender: def __init__(self): self.html = "" - self.render_list: Dict[str, List] = {} + self.render_list = defaultdict(list) def render_html(self, template_dir: str = "templates", template_name: str = "main.html", template_header=constant.DEFAULT_TEMPLATE_HEADER): @@ -30,8 +31,6 @@ class HTMLRender: autoescape=True) template = env.get_template(template_name) rendered_html = template.render(**kwargs) - if key not in self.render_list: - self.render_list[key] = [] self.render_list[key].append(rendered_html) return rendered_html diff --git a/profiler/advisor/display/html/templates/ai_core_frequency.html b/profiler/advisor/display/html/templates/ai_core_frequency.html new file mode 100644 index 0000000000..d045142037 --- /dev/null +++ b/profiler/advisor/display/html/templates/ai_core_frequency.html @@ -0,0 +1,27 @@ +{% if data|length > 0 %} +
+

AI CORE Frequency Issues

+
+ Issue: {{ desc }} +
+ Suggestion: {{ suggestion }} +

+ + + {% for header in headers %} + + {% endfor %} + + + {% for row in data %} + + {% for element in row %} + + {% endfor %} + + {% endfor %} +
{{ header }}
{{ element|safe }}
+ +
+
+{% endif %} \ No newline at end of file diff --git a/profiler/advisor/display/html/templates/slow_dataloader.html b/profiler/advisor/display/html/templates/slow_dataloader.html new file mode 100644 index 0000000000..bf71a7085b --- /dev/null +++ b/profiler/advisor/display/html/templates/slow_dataloader.html @@ -0,0 +1,18 @@ +
+

Slow Dataloader Issues

+
+ {{ desc }} + + + + + + {% for suggestion in suggestions %} + + + + {% endfor %} +
Suggestions
{{ loop.index }}. {{ suggestion|safe }}
+ +
+
diff --git a/profiler/advisor/display/html/templates/sync_batchnorm.html b/profiler/advisor/display/html/templates/sync_batchnorm.html new file mode 100644 index 0000000000..bb46c1f06d --- /dev/null +++ b/profiler/advisor/display/html/templates/sync_batchnorm.html @@ -0,0 +1,30 @@ + +
+

SyncBatchNorm Issues

+
+ {{ desc }} + + + + + {% for item in solutions %} + {% set rowloop = loop %} + {% for key, value in item.items() %} + + + + {% endfor %} + {% endfor %} +
Suggestions
{{ rowloop.index }}. {{ value.desc }}
+ + More efficient code of syncbn forward as follows: + {% for item in solutions %} + {% for key, value in item.items() %} + {% if 'efficient_code' in value %} +
{{ value.efficient_code|safe }}
+ {% endif %} + {% endfor %} + {% endfor %} + +
+
diff --git a/profiler/advisor/display/html/templates/synchronize_stream.html b/profiler/advisor/display/html/templates/synchronize_stream.html new file mode 100644 index 0000000000..1832f9406d --- /dev/null +++ b/profiler/advisor/display/html/templates/synchronize_stream.html @@ -0,0 +1,57 @@ +
+

Synchronize Stream Issues

+
+ {{ desc }} + + + + + + + {% for item in solutions %} + {% set rowloop = loop %} + {% for key, value in item.items() %} + + + + + {% endfor %} + {% endfor %} +
Suggestions
{{ rowloop.index }}. {{ value.desc }}
+ +
+ {% if not empty_stacks %} + Please click on the collapsible box below to view the detailed code stack that triggers synchronizeStream + {% elif not framework_black_list %} + Suggestion: + These operators have no code stack. If parameter 'with_stack=False' was set while profiling, please refer to + Ascend PyTorch Profiler to set + 'with_stack=True'. Otherwise, ignore following affinity APIs due to backward broadcast lack of stack. + {% endif %} + + {% for api_name, stacks in result.items() %} + + {% if empty_stacks %} +
{{api_name|safe}}
+ + {% elif stacks | length > 0 %} + +
{{api_name|safe}}
+
+
+ {% for stack in stacks %} +
No.{{loop.index|safe}} code stack, called {{stack[1]|safe}} times
+ + {% endfor %} +
+
+ {% endif %} + + {% endfor %} + +
+ +
+
diff --git a/profiler/advisor/interface/interface.py b/profiler/advisor/interface/interface.py index 59bfee77f6..1d3872a178 100644 --- a/profiler/advisor/interface/interface.py +++ b/profiler/advisor/interface/interface.py @@ -13,23 +13,31 @@ from profiler.advisor.analyzer.cluster.slow_rank_analyser import SlowRankAnalyze from profiler.advisor.analyzer.cluster.slow_link_analyser import SlowLinkAnalyzer from profiler.advisor.analyzer.overall.overall_summary_analyzer import OverallSummaryAnalyzer from profiler.advisor.analyzer.schedule.dispatch.timeline_op_dispatch_analyzer import OpDispatchAnalyzer +from profiler.advisor.analyzer.schedule.syncbn.syncbn_analyzer import SyncBNAnalyzer +from profiler.advisor.analyzer.schedule.synchronize_stream.synchronize_stream_analyzer import SynchronizeStreamAnalyzer +from profiler.advisor.analyzer.dataloader.dataloader_analyzer import DataloaderAnalyzer +from profiler.advisor.analyzer.computation.ai_core_freq.ai_core_freq_analyzer import AICoreFreqAnalyzer + class Interface: supported_analyzer = { "schedule": OrderedDict({ - SupportedScopes.TIMELINE_FUSION_OPS: TimelineFusionOpsAnalyzer, - SupportedScopes.TIMELINE_OP_DISPATCH: OpDispatchAnalyzer + SupportedScopes.SYNCBN: SyncBNAnalyzer, + SupportedScopes.TIMELINE_OP_DISPATCH: OpDispatchAnalyzer, + SupportedScopes.SYNCHRONIZE_STREAM: SynchronizeStreamAnalyzer, + SupportedScopes.TIMELINE_FUSION_OPS: TimelineFusionOpsAnalyzer }), "computation": OrderedDict({ SupportedScopes.DYNAMIC_SHAPE_ANALYSIS: DynamicShapeAnalyzer, SupportedScopes.AICPU_ANALYSIS: AicpuAnalyzer, SupportedScopes.OPERATOR_NO_BOUND_ANALYSIS: OperatorBoundAnalyzer, SupportedScopes.BLOCK_DIM_ANALYSIS: BlockDimAnalyzer, - SupportedScopes.GRAPH: FusionOPAnalyzer + SupportedScopes.GRAPH: FusionOPAnalyzer, + SupportedScopes.FREQ_ANALYSIS: AICoreFreqAnalyzer }), "communication": OrderedDict(), "overall": OrderedDict({SupportedScopes.OVER_ALL: OverallSummaryAnalyzer}), - "dataloader": OrderedDict(), + "dataloader": OrderedDict({SupportedScopes.DATALOADER: DataloaderAnalyzer}), "cluster": OrderedDict({ SupportedScopes.SLOW_RANK: SlowRankAnalyzer, SupportedScopes.SLOW_LINK: SlowLinkAnalyzer @@ -66,7 +74,7 @@ class Interface: if render_html and result.data: if hasattr(analyzer, "html_render"): analyzer.html_render.render_html() - analyzer.html_render.save_to_file(f'att_advisor_{Timer().strftime}.html') + analyzer.html_render.save_to_file(f'mstt_advisor_{Timer().strftime}.html') return result if not output_dict else dict(result.data) diff --git a/profiler/advisor/result/item.py b/profiler/advisor/result/item.py index fa0ffb5b1c..02db7fdd00 100644 --- a/profiler/advisor/result/item.py +++ b/profiler/advisor/result/item.py @@ -15,7 +15,7 @@ class OptimizeItem: @property def headers(self): - return ["problem", "description", "suggestion"] + return ["category", "description", "suggestion"] class StatisticsItem: diff --git a/profiler/advisor/result/result.py b/profiler/advisor/result/result.py index c7d7da8663..0d0602ee56 100644 --- a/profiler/advisor/result/result.py +++ b/profiler/advisor/result/result.py @@ -93,6 +93,9 @@ class SheetRecoder: if data not in self._sheet_data[sheet_name]["data"]: self._sheet_data[sheet_name]["data"].append(data) + def clear(self): + self._sheet_data.clear() + @singleton class OptimizeResult: @@ -110,12 +113,12 @@ class OptimizeResult: def add_tune_op_list(self, tune_op_list) -> None: """ add tune op name to tune op list - :param tune_op_list: tune op name list to be added + :param tune_op_list: list of operators to be optimized :return: None """ - for op_name in tune_op_list: - if op_name not in self._tune_op_list: - self._tune_op_list.append(op_name) + for operator_name in tune_op_list: + if operator_name not in self._tune_op_list: + self._tune_op_list.append(operator_name) def add(self, overview_item): sheet_name = "problems" @@ -148,6 +151,9 @@ class OptimizeResult: logger.info("Save problems details file to %s", Config().analysis_result_file) self._save_op_file_list() + def clear(self) -> None: + self.data.clear() + def _save_op_file_list(self) -> None: if not self._tune_op_list: return @@ -173,9 +179,9 @@ class TerminalResult: def __init__(self): self.width, _ = self.get_terminal_size() if self.width is None: - self.table = PrettyTable(["No.", "Problem", "Description", "Suggestion"]) + self.table = PrettyTable(["No.", "Category", "Description", "Suggestion"]) else: - self.table = PrettyTable(["No.", "Problem", "Description", "Suggestion"], + self.table = PrettyTable(["No.", "Category", "Description", "Suggestion"], max_table_width=max(self.width - 20, 180)) self.table.hrules = ALL self.result_list = [] diff --git a/profiler/advisor/rules/dataloader.yaml b/profiler/advisor/rules/dataloader.yaml new file mode 100644 index 0000000000..a84abcfdfe --- /dev/null +++ b/profiler/advisor/rules/dataloader.yaml @@ -0,0 +1,9 @@ +# unit is milliseconds +dataloader_duration_threshold: 10 +problem: "Found slow dataloader, cost {dataloader_duration} milliseconds for one step while profiling, normally less than {dataloader_duration_threshold} milliseconds." +solutions: + - "Please check the disk I/O of your data directory. If you are training model in ModelArts, please move data to '/cache' or mount a more efficient cloud disk for better I/O." + - "Please check if there are any other multiprocess operations in runtime that may have affected the dataloader, such as training process core binding command 'taskset ...' used for launching the training job." + - "Please check the format of your data, avoid file format like tar, tar.gz, zip." + - "Please set 'pin_memory=True' for your dataloader." + - "Try to adjust dataloader parameter 'num_workers'." \ No newline at end of file diff --git a/profiler/advisor/rules/sync_batchnorm.yaml b/profiler/advisor/rules/sync_batchnorm.yaml new file mode 100644 index 0000000000..0f702af6ea --- /dev/null +++ b/profiler/advisor/rules/sync_batchnorm.yaml @@ -0,0 +1,41 @@ +problem: "Found {syncbn_num} SyncBatchNorm, which can lead to slow python task dispatch and frequent communication between devices and finally reducing training efficiency." +max_syncbn_num: 20 +solutions: + - enable batchnorm: + desc: "disable SyncBatchNorm by remove the code like 'torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)' if possible." + - enable efficient SyncBatchNorm: + desc: "replace the 'forward' method of python script 'torch_npu/utils/syncbatchnorm.py' in your runtime environment." + efficient_code: | + @staticmethod + def forward(self, input_tensor, weight, bias, running_mean, running_var, eps, momentum, process_group, world_size): + input_tensor = input_tensor.contiguous() + input_shape = input_tensor.shape + input_tensor_ = input_tensor.reshape(input_shape[0], input_shape[1], 1, -1) + sum_val, sum_square_val = torch.batch_norm_reduce(input_tensor_, eps) + + count = torch.full((1,), + input_tensor.numel() // input_tensor.size(1), + dtype=sum_val.dtype, + device=sum_val.device) + + num_channels = input_tensor.shape[1] + combined = torch.cat([sum_val, sum_square_val, count], dim=0) + combined_list = torch.empty((world_size,) + combined.shape, dtype=combined.dtype, device=combined.device) + dist.all_gather_togather(combined_list, combined, process_group, async_op=False) + sum_all, square_sum_all, count_all = torch.split(combined_list, num_channels, dim=1) + size = count_all.view(-1).sum() + if size == 1: + raise ValueError('Expected more than 1 value per channel when training, got input size {}'.format(size)) + + mean, invstd = torch.batch_norm_gather_stats_update(input_tensor, + sum_all, + square_sum_all, + running_mean, + running_var, + momentum, + eps, + count_all.view(-1)) + self.save_for_backward(input_tensor, weight, mean, invstd, count_all.to(torch.int32)) + self.process_group = process_group + out = torch.batch_norm_elemt(input_tensor, weight, bias, mean, invstd, eps) + return out \ No newline at end of file diff --git a/profiler/advisor/rules/synchronize.yaml b/profiler/advisor/rules/synchronize.yaml new file mode 100644 index 0000000000..3bd518d003 --- /dev/null +++ b/profiler/advisor/rules/synchronize.yaml @@ -0,0 +1,8 @@ +problem: "SynchronizeStream will reduce training efficiency. Found {synchronize_num} SynchronizeStream, {slow_synchronize_num} slow SynchronizeStream cost {total_synchronize_stream_time} us." +max_synchronize_num: 20 +slow_synchronize_threshold: 10 #ms +solutions: + - disable ascend launch blocking: + desc: "please check your env 'ASCEND_LAUNCH_BLOCKING', if ASCEND_LAUNCH_BLOCKING=1, please execute 'unset ASCEND_LAUNCH_BLOCKING' and then start your training job." + - modify code to avoid synchronize stream: + desc: "please try to modify your training code to avoid synchronize stream between cpu and npu." \ No newline at end of file diff --git a/profiler/advisor/utils/utils.py b/profiler/advisor/utils/utils.py index 84419b6708..b373d7bada 100644 --- a/profiler/advisor/utils/utils.py +++ b/profiler/advisor/utils/utils.py @@ -1,5 +1,6 @@ import inspect import json + import logging import multiprocessing as mp import os @@ -11,7 +12,7 @@ import traceback import types from functools import wraps from typing import Any, Set - +import ijson import click import requests from requests.adapters import HTTPAdapter @@ -43,7 +44,7 @@ class ContextObject(object): def debug_option(f): - return click.option('--debug', '-D', + return click.option('--debug', is_flag=True, expose_value=False, is_eager=True, @@ -550,3 +551,50 @@ def get_file_path_by_walk(root, filename): file_path = os.path.join(root, name) return file_path return file_path + + +def check_path_valid(path): + if os.path.islink(os.path.abspath(path)): + logger.error("fThe path is detected as a soft connection. path:%ss", path) + return False + elif not os.access(path, os.R_OK): + logger.error(f"The file is not readable. path:%ss", path) + return False + elif os.path.getsize(path) > const.MAX_FILE_SIZE: + logger.error(f"The file size exceeds the limit. path:%ss, MAX_FILE_SIZE:%ss B",path, const.MAX_FILE_SIZE) + return False + return True + + +def parse_json_with_generator(timeline_data_path, func): + result = [] + if not check_path_valid(timeline_data_path): + return result + try: + with open(timeline_data_path, "r") as f: + if os.getenv(const.DISABLE_STREAMING_READER) == "1": + logger.debug("Disable streaming reader.") + file_parser = json.loads(f.read()) + else: + logger.debug("Enable streaming reader.") + file_parser = ijson.items(f, "item") + + for i, event in tqdm(enumerate(file_parser), + leave=False, ncols=100, desc="Building dataset for timeline analysis"): + func_res = func(index=i, event=event) + if func_res is not None: + result.append(func_res) + + except Exception: + logger.warning("Error %s while parsing file %s, continue to timeline analysis", traceback.format_exc(), + timeline_data_path) + return result + + +def convert_to_float(num): + try: + return float(num) + except (ValueError, FloatingPointError): + logger.error(f"Can not convert %ss to float", num) + pass + return 0 diff --git a/profiler/cli/__init__.py b/profiler/cli/__init__.py index eab13571c5..e768e4cb86 100644 --- a/profiler/cli/__init__.py +++ b/profiler/cli/__init__.py @@ -1,4 +1,4 @@ from profiler.advisor.config.config import Config from profiler.advisor.utils.utils import Timer -Config().set_log_path(f"att_advisor_{Timer().strftime}.xlsx") +Config().set_log_path(f"mstt_advisor_{Timer().strftime}.xlsx") diff --git a/profiler/cli/analyze_cli.py b/profiler/cli/analyze_cli.py index 2e173dc870..f400a265b7 100644 --- a/profiler/cli/analyze_cli.py +++ b/profiler/cli/analyze_cli.py @@ -83,9 +83,6 @@ def analyze_cli(**kwargs): help="enter the profiling type, selectable range ascend_pytorch_profiler, mslite ,msprof") @debug_option def analyze_all(**kwargs) -> None: - # 当前compare_tools必须输入两个profiling路径,att-advisor有等价功能支持输入一个Profiling路径,后续替换成对应实现 - if not kwargs.get("benchmark_profiling_path"): - kwargs["benchmark_profiling_path"] = kwargs.get("profiling_path") try: _analyze(Interface.all_dimension, **kwargs) except RuntimeError as e: diff --git a/profiler/cluster_analyse/README.md b/profiler/cluster_analyse/README.md index deaebb6cde..fdd43ca965 100644 --- a/profiler/cluster_analyse/README.md +++ b/profiler/cluster_analyse/README.md @@ -86,7 +86,7 @@ experimental_config = torch_npu.profiler._ExperimentalConfig( ### 交付件 -集群分析工具的交付件通过Ascend Insight工具展示,详见《[MindStudio Ascend Insight用户指南](https://www.hiascend.com/document/detail/zh/mindstudio/70RC1/GUI-baseddevelopmenttool/msascendinsightug/AscendInsight_0002.html)》。 +集群分析工具的交付件通过MindStudio Insight工具展示,详见《[MindStudio Insight用户指南](https://www.hiascend.com/document/detail/zh/mindstudio/70RC2/GUI-baseddevelopmenttool/msascendinsightug/AscendInsight_0002.html)》。 #### cluster_step_trace_time.csv @@ -156,25 +156,25 @@ L列:Preparing,指迭代开始到首个计算或通信算子运行的时间 #### cluster_analysis.db -解析analysis.db或ascend_pytorch_profiler_{rank_id}.db生成的交付件,根据数据解析模式不同而解析不同的数据,可以使用Ascend Insight工具展示。 +解析analysis.db或ascend_pytorch_profiler_{rank_id}.db生成的交付件,根据数据解析模式不同而解析不同的数据,可以使用MindStudio Insight工具展示。 #### stats.ipynb - 数据解析模式为cann_api_sum时生成,保存在cluster_analysis_output/CannApiSum目录下。 - 可使用jupyter notebook工具或Ascend Insight工具打开,主要展示集群API耗时信息。 + 可使用jupyter notebook工具或MindStudio Insight工具打开,主要展示集群API耗时信息。 - 数据解析模式为compute_op_sum时生成,保存在cluster_analysis_output/ComputeOpSum目录下。 - 可使用jupyter notebook工具或Ascend Insight工具打开,主要展示集群计算算子耗时分析(将集群所有计算算子进行汇总并以图表展示),集群Rank计算算子耗时分析(将每个Rank的计算算子进行各自汇总)。 + 可使用jupyter notebook工具或MindStudio Insight工具打开,主要展示集群计算算子耗时分析(将集群所有计算算子进行汇总并以图表展示),集群Rank计算算子耗时分析(将每个Rank的计算算子进行各自汇总)。 - 数据解析模式为hccl_sum时生成,保存在cluster_analysis_output/HcclSum目录下。 - 可使用jupyter notebook工具或Ascend Insight工具打开,主要展示集群通信算子耗时分析(将集群所有通信算子进行汇总并以图表展示),集群Rank通信算子耗时分析(将每个Rank的通信算子进行各自汇总)、Top通信算子信息展示。 + 可使用jupyter notebook工具或MindStudio Insight工具打开,主要展示集群通信算子耗时分析(将集群所有通信算子进行汇总并以图表展示),集群Rank通信算子耗时分析(将每个Rank的通信算子进行各自汇总)、Top通信算子信息展示。 - 数据解析模式为mstx_sum时生成,保存在cluster_analysis_output/MstxSum目录下。 - 可使用jupyter notebook工具或Ascend Insight工具打开,主要展示集群场景mstx打点信息,分为框架侧、CANN侧和Device侧三部分的打点信息。 + 可使用jupyter notebook工具或MindStudio Insight工具打开,主要展示集群场景mstx打点信息,分为框架侧、CANN侧和Device侧三部分的打点信息。 diff --git a/profiler/cluster_analyse/common_func/file_manager.py b/profiler/cluster_analyse/common_func/file_manager.py index e7e2d5adca..380192f87b 100644 --- a/profiler/cluster_analyse/common_func/file_manager.py +++ b/profiler/cluster_analyse/common_func/file_manager.py @@ -17,6 +17,8 @@ import os import csv import json +import yaml + from common_func.constant import Constant from common_func.path_manager import PathManager @@ -60,6 +62,23 @@ class FileManager: raise RuntimeError(f"Failed to read the file: {base_name}") from e return result_data + @classmethod + def read_yaml_file(cls, file_path: str) -> dict: + PathManager.check_path_readable(file_path) + base_name = os.path.basename(file_path) + file_size = os.path.getsize(file_path) + if file_size <= 0: + return {} + if file_size > Constant.MAX_JSON_SIZE: + raise RuntimeError(f"The file({base_name}) size exceeds the preset max value.") + + try: + with open(file_path, "r") as yaml_file: + result_data = yaml.safe_load(yaml_file) + except Exception as e: + raise RuntimeError(f"Failed to read the file: {base_name}") from e + return result_data + @classmethod def create_csv_file(cls, profiler_path: str, data: list, file_name: str, headers: list = None) -> None: if not data: diff --git a/profiler/compare_tools/README.md b/profiler/compare_tools/README.md index d81ce05f44..78ea5d8971 100644 --- a/profiler/compare_tools/README.md +++ b/profiler/compare_tools/README.md @@ -213,7 +213,7 @@ activities配置仅采集NPU数据,不配置experimental_config参数以及其 - 当Computing Time耗时增大,分析**算子性能**。 - 当Uncovered Communication Time耗时增大,分析**通信性能**,若通信性能分析没有劣化的通信算子,代表通信与计算的并行度较差,继续进行NPU的集群性能分析。 -- 当Mem Usage增大,分析**算子内存**,若没有明显占用较大的算子,则代表算子内存申请量没有差异,问题在于内存的释放(持有时间过久),可以使用tensorboard或ascend insight继续进行NPU内存的分析。 +- 当Mem Usage增大,分析**算子内存**,若没有明显占用较大的算子,则代表算子内存申请量没有差异,问题在于内存的释放(持有时间过久),可以使用TensorBoard或MindStudio insight继续进行NPU内存的分析。 ### 算子性能 diff --git a/profiler/compare_tools/compare_backend/comparator/base_comparator.py b/profiler/compare_tools/compare_backend/comparator/base_comparator.py index 330fb871ee..8012dfae94 100644 --- a/profiler/compare_tools/compare_backend/comparator/base_comparator.py +++ b/profiler/compare_tools/compare_backend/comparator/base_comparator.py @@ -21,4 +21,4 @@ class BaseComparator(ABC): @abstractmethod def _compare(self): - raise NotImplementedError("Function _compare need to be implemented.") + raise NotImplementedError("Function _compare need to be implemented.") \ No newline at end of file diff --git a/profiler/compare_tools/compare_backend/comparator/overall_metrics_comparator.py b/profiler/compare_tools/compare_backend/comparator/overall_metrics_comparator.py new file mode 100644 index 0000000000..d438dc41d5 --- /dev/null +++ b/profiler/compare_tools/compare_backend/comparator/overall_metrics_comparator.py @@ -0,0 +1,50 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from math import isclose + +from compare_backend.comparator.base_comparator import BaseComparator +from compare_backend.utils.constant import Constant +from compare_backend.utils.excel_config import ExcelConfig + + +class OverallMetricsComparator(BaseComparator): + + def __init__(self, origin_data: dict, bean: any): + super().__init__(origin_data, bean) + self._row_style = [] + + @property + def base_info(self): + return self._origin_data.get(Constant.BASE_DATA) + + @property + def comp_info(self): + return self._origin_data.get(Constant.COMPARISON_DATA) + + def generate_data(self) -> dict: + self._compare() + return {self._sheet_name: { + "headers": self._headers, + "rows": self._rows, + "overhead": self._overhead, + "row_style": self._row_style + }} + + def _compare(self): + if isclose(self.base_info.e2e_time_ms, 0) or isclose(self.comp_info.e2e_time_ms, 0): + return + self._rows.extend(self._bean(self.base_info, self.comp_info).rows) + for row in self._rows: + self._row_style.append(ExcelConfig.ROW_STYLE_MAP.get(row[0], {})) # index 0 for metric index name diff --git a/profiler/compare_tools/compare_backend/compare_bean/origin_data_bean/kernel_details_bean.py b/profiler/compare_tools/compare_backend/compare_bean/origin_data_bean/kernel_details_bean.py index 122009b904..9c4825c0e8 100644 --- a/profiler/compare_tools/compare_backend/compare_bean/origin_data_bean/kernel_details_bean.py +++ b/profiler/compare_tools/compare_backend/compare_bean/origin_data_bean/kernel_details_bean.py @@ -1,8 +1,9 @@ import math +from decimal import Decimal import pandas as pd -from compare_backend.utils.common_func import convert_to_float +from compare_backend.utils.common_func import convert_to_float, convert_to_decimal from compare_backend.utils.constant import Constant @@ -12,8 +13,10 @@ class KernelDetailsBean: self._op_type = "" self._name = "" self._aiv_vec_time = 0.0 + self._aicore_time = 0.0 self._mac_time = 0.0 self._duration = 0.0 + self._start_time = Decimal("0") self.init() @property @@ -30,6 +33,12 @@ class KernelDetailsBean: return float("nan") return convert_to_float(self._aiv_vec_time) + @property + def aicore_time(self) -> float: + if self._aicore_time == "" or self._aicore_time == "N/A": + return float("nan") + return convert_to_float(self._aicore_time) + @property def mac_time(self) -> float: if self._mac_time == "" or self._mac_time == "N/A": @@ -40,6 +49,18 @@ class KernelDetailsBean: def duration(self) -> float: return convert_to_float(self._duration) + @property + def dur(self) -> float: + return convert_to_float(self._duration) + + @property + def start_time(self) -> Decimal: + return convert_to_decimal(self._start_time) + + @property + def end_time(self) -> Decimal: + return self.start_time + convert_to_decimal(self._duration) + def is_hide_op_pmu(self): if "mac_time(us)" in self._data.keys() or "aiv_vec_time(us)" in self._data.keys(): return False @@ -66,7 +87,7 @@ class KernelDetailsBean: def is_flash_attention(self): return "flashattention" in self.op_type.lower() - def is_cube(self): + def is_matmul(self): return "matmul" in self.op_type.lower() def is_conv(self): @@ -79,9 +100,17 @@ class KernelDetailsBean: def is_page_attention(self): return "pagedattention" in self.op_type.lower() + def is_trans(self): + return any(trans_mask in self.name.lower() for trans_mask in Constant.KERNEL_TRANS_MASK) + + def is_cube_kernel_cat(self): + return self.mac_time > 0 or self.aicore_time > 0 + def init(self): self._op_type = self._data.get('Type', "") self._name = self._data.get('Name', "") self._aiv_vec_time = self._data.get('aiv_vec_time(us)', "") + self._aicore_time = self._data.get("aicore_time(us)", "") self._mac_time = self._data.get('mac_time(us)', "") self._duration = self._data.get('Duration(us)', 0) + self._start_time = Decimal(self._data.get("Start Time(us)", "0")) diff --git a/profiler/compare_tools/compare_backend/compare_bean/origin_data_bean/trace_event_bean.py b/profiler/compare_tools/compare_backend/compare_bean/origin_data_bean/trace_event_bean.py index cef6bb0712..245b51d105 100644 --- a/profiler/compare_tools/compare_backend/compare_bean/origin_data_bean/trace_event_bean.py +++ b/profiler/compare_tools/compare_backend/compare_bean/origin_data_bean/trace_event_bean.py @@ -114,6 +114,21 @@ class TraceEventBean: def is_torch_op(self, value: bool): self._is_torch_op = value + @classmethod + def is_sdma(cls): + return False + + @classmethod + def is_page_attention(cls): + return False + + @classmethod + def is_trans(cls) -> bool: + """ + 暂时没找到GPU判断trans的方法,暂时都是notrans + """ + return False + def is_m_mode(self) -> bool: return self._ph == "M" @@ -199,11 +214,44 @@ class TraceEventBean: self._name = name def is_conv(self): - return self.name.lower().startswith("aten::conv") + return self.lower_name.startswith("aten::conv") def is_lccl(self): return self.lower_name == "kernel_aivec" + def is_fa_for_cpu_op(self) -> bool: + """ + 这个类在cpu op和gpu中均有用到,这里是在cpu op阶段判断 + """ + return any(cube_mask in self.lower_name for cube_mask in Constant.CPU_OP_FA_MASK) + + def is_conv_for_cpu_op(self) -> bool: + """ + 这个类在cpu op和gpu中均有用到,这里是在cpu op阶段判断 + """ + return self.lower_name.startswith(Constant.CPU_OP_CONV) + + def is_matmul_for_cpu_op(self) -> bool: + """ + 这个类在cpu op和gpu中均有用到,这里是在cpu op阶段判断 + """ + return any(bwd_mask in self.lower_name for bwd_mask in Constant.CPU_OP_MATMUL_MASK) + + def is_bwd_for_cpu_op(self) -> bool: + """ + 这个类在cpu op和gpu中均有用到,这里是在cpu op阶段判断 + """ + return any(bwd_mask in self.lower_name for bwd_mask in Constant.BWD_LIST) + + def is_cpu_cube_op(self) -> bool: + return self.is_matmul_for_cpu_op() or self.is_fa_for_cpu_op() or self.is_conv_for_cpu_op() + + def is_vector(self): + return not any(cube_mask in self.lower_name for cube_mask in Constant.KERNEL_CUBE_MASK) + + def is_cube_kernel_cat(self): + return any(cube_mask in self.lower_name for cube_mask in Constant.KERNEL_CUBE_MASK) + def init(self): if isinstance(self._event, dict): self._pid = self._event.get("pid", 0) diff --git a/profiler/compare_tools/compare_backend/compare_bean/overall_metrics_bean.py b/profiler/compare_tools/compare_backend/compare_bean/overall_metrics_bean.py new file mode 100644 index 0000000000..544f8f5234 --- /dev/null +++ b/profiler/compare_tools/compare_backend/compare_bean/overall_metrics_bean.py @@ -0,0 +1,255 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from math import isclose + +from compare_backend.compare_bean.profiling_info import ProfilingInfo +from compare_backend.utils.common_func import calculate_diff_ratio +from compare_backend.utils.constant import Constant +from compare_backend.utils.excel_config import ExcelConfig + + +class OverallMetricsBean: + TABLE_NAME = Constant.OVERALL_METRICS_TABLE + HEADERS = ExcelConfig.HEADERS.get(TABLE_NAME) + OVERHEAD = ExcelConfig.OVERHEAD.get(TABLE_NAME) + + def __init__(self, base_info: ProfilingInfo, comparison_info: ProfilingInfo): + self._base_data = OverallMetricsInfo(base_info).overall_metrics + self._comparison_data = OverallMetricsInfo(comparison_info).overall_metrics + + @property + def rows(self): + rows_data = [] + for index, base_data in self._base_data.items(): + comparison_data = self._comparison_data.get(index) + row = self.get_row_data(index, base_data, comparison_data) + if row: + rows_data.append(row) + return rows_data + + @staticmethod + def get_row_data(index, base_data, comparison_data): + if isclose(base_data[0], 0) and isclose(comparison_data[0], 0): + return [] + row_data = [index] + row_data.extend(base_data) + row_data.extend(comparison_data) + row_data.extend(calculate_diff_ratio(base_data[0], comparison_data[0])) + return row_data + + +class OverallMetricsInfo: + def __init__(self, profiling_info: ProfilingInfo): + self._profiling_info = profiling_info + self._overall_metrics_data_map = { + ExcelConfig.COMPUTING: self.computing_data, + ExcelConfig.FA: self.fa_data, + ExcelConfig.FA_FWD_CUBE: self.fa_fwd_cube_data, + ExcelConfig.FA_FWD_VECTOR: self.fa_fwd_vector_data, + ExcelConfig.FA_BWD_CUBE: self.fa_bwd_cube_data, + ExcelConfig.FA_BWD_VECTOR: self.fa_bwd_vector_data, + ExcelConfig.CONV: self.conv_data, + ExcelConfig.CONV_FWD_CUBE: self.conv_fwd_cube_data, + ExcelConfig.CONV_FWD_VECTOR: self.conv_fwd_vector_data, + ExcelConfig.CONV_BWD_CUBE: self.conv_bwd_cube_data, + ExcelConfig.CONV_BWD_VECTOR: self.conv_bwd_vector_data, + ExcelConfig.MM: self.mm_data, + ExcelConfig.MM_CUBE: self.mm_cube_data, + ExcelConfig.MM_VECTOR: self.mm_vector_data, + ExcelConfig.PA: self.pa_data, + ExcelConfig.VECTOR: self.vector_data, + ExcelConfig.VECTOR_TRANS: self.vector_trans_data, + ExcelConfig.VECTOR_NO_TRANS: self.vector_no_trans_data, + ExcelConfig.CUBE: self.cube_data, + ExcelConfig.SDMA_TM: self.sdma_tm_data, + ExcelConfig.OTHER: self.other_data, + ExcelConfig.COMMUNICATION_TIME: self.communication_data, + ExcelConfig.WAIT: self.wait_data, + ExcelConfig.TRANSMIT: self.transmit_data, + ExcelConfig.FREE_TIME: self.free_time_data, + ExcelConfig.SDMA: self.sdma_data, + ExcelConfig.FREE: self.free_data, + ExcelConfig.E2E_TIME: self.e2e_time_data + } + + @property + def overall_metrics(self): + return self._overall_metrics_data_map + + @property + def computing_data(self): + return [self._profiling_info.compute_time_ms, + self._profiling_info.compute_time_ms / self._profiling_info.e2e_time_ms, + sum((self._profiling_info.fa_total_num, self._profiling_info.conv_total_num, + self._profiling_info.mm_total_num, self._profiling_info.vector_total_num, + self._profiling_info.sdma_num_tensor_move, self._profiling_info.other_cube_num, + self._profiling_info.page_attention_num))] + + @property + def fa_data(self): + return [self._profiling_info.fa_total_time, + self._profiling_info.fa_total_time / self._profiling_info.e2e_time_ms, + self._profiling_info.fa_total_num] + + @property + def fa_fwd_cube_data(self): + return [self._profiling_info.fa_time_fwd_cube, + self._profiling_info.fa_time_fwd_cube / self._profiling_info.e2e_time_ms, + self._profiling_info.fa_num_fwd_cube] + + @property + def fa_fwd_vector_data(self): + return [self._profiling_info.fa_time_fwd_vector, + self._profiling_info.fa_time_fwd_vector / self._profiling_info.e2e_time_ms, + self._profiling_info.fa_num_fwd_vector] + + @property + def fa_bwd_cube_data(self): + return [self._profiling_info.fa_time_bwd_cube, + self._profiling_info.fa_time_bwd_cube / self._profiling_info.e2e_time_ms, + self._profiling_info.fa_num_bwd_cube] + + @property + def fa_bwd_vector_data(self): + return [self._profiling_info.fa_time_bwd_vector, + self._profiling_info.fa_time_bwd_vector / self._profiling_info.e2e_time_ms, + self._profiling_info.fa_num_bwd_vector] + + @property + def conv_data(self): + return [self._profiling_info.conv_total_time, + self._profiling_info.conv_total_time / self._profiling_info.e2e_time_ms, + self._profiling_info.conv_total_num] + + @property + def conv_fwd_cube_data(self): + return [self._profiling_info.conv_time_fwd_cube, + self._profiling_info.conv_time_fwd_cube / self._profiling_info.e2e_time_ms, + self._profiling_info.conv_num_fwd_cube] + + @property + def conv_fwd_vector_data(self): + return [self._profiling_info.conv_time_fwd_vector, + self._profiling_info.conv_time_fwd_vector / self._profiling_info.e2e_time_ms, + self._profiling_info.conv_num_fwd_vector] + + @property + def conv_bwd_cube_data(self): + return [self._profiling_info.conv_time_bwd_cube, + self._profiling_info.conv_time_bwd_cube / self._profiling_info.e2e_time_ms, + self._profiling_info.conv_num_bwd_cube] + + @property + def conv_bwd_vector_data(self): + return [self._profiling_info.conv_time_bwd_vector, + self._profiling_info.conv_time_bwd_vector / self._profiling_info.e2e_time_ms, + self._profiling_info.conv_num_bwd_vector] + + @property + def mm_data(self): + return [self._profiling_info.mm_total_time, + self._profiling_info.mm_total_time / self._profiling_info.e2e_time_ms, + self._profiling_info.mm_total_num] + + @property + def mm_cube_data(self): + return [self._profiling_info.matmul_time_cube, + self._profiling_info.matmul_time_cube / self._profiling_info.e2e_time_ms, + self._profiling_info.matmul_num_cube] + + @property + def mm_vector_data(self): + return [self._profiling_info.matmul_time_vector, + self._profiling_info.matmul_time_vector / self._profiling_info.e2e_time_ms, + self._profiling_info.matmul_num_vector] + + @property + def pa_data(self): + return [self._profiling_info.page_attention_time, + self._profiling_info.page_attention_time / self._profiling_info.e2e_time_ms, + self._profiling_info.page_attention_num] + + @property + def vector_data(self): + return [self._profiling_info.vector_total_time, + self._profiling_info.vector_total_time / self._profiling_info.e2e_time_ms, + self._profiling_info.vector_total_num] + + @property + def vector_trans_data(self): + return [self._profiling_info.vector_time_trans, + self._profiling_info.vector_time_trans / self._profiling_info.e2e_time_ms, + self._profiling_info.vector_num_trans] + + @property + def vector_no_trans_data(self): + return [self._profiling_info.vector_time_notrans, + self._profiling_info.vector_time_notrans / self._profiling_info.e2e_time_ms, + self._profiling_info.vector_num_notrans] + + @property + def cube_data(self): + return [self._profiling_info.other_cube_time, + self._profiling_info.other_cube_time / self._profiling_info.e2e_time_ms, + self._profiling_info.other_cube_num] + + @property + def sdma_tm_data(self): + return [self._profiling_info.sdma_time_tensor_move, + self._profiling_info.sdma_time_tensor_move / self._profiling_info.e2e_time_ms, + self._profiling_info.sdma_num_tensor_move] + + @property + def other_data(self): + other_time = max((0, + self._profiling_info.compute_time_ms - self._profiling_info.fa_total_time - + self._profiling_info.conv_total_time - self._profiling_info.mm_total_time - + self._profiling_info.vector_total_time - self._profiling_info.sdma_time_tensor_move - + self._profiling_info.other_cube_time - self._profiling_info.page_attention_time)) + return [other_time, other_time / self._profiling_info.e2e_time_ms, "/"] + + @property + def communication_data(self): + return [self._profiling_info.communication_not_overlapped_ms, + self._profiling_info.communication_not_overlapped_ms / self._profiling_info.e2e_time_ms, "/"] + + @property + def wait_data(self): + return [self._profiling_info.wait_time_ms, + self._profiling_info.wait_time_ms / self._profiling_info.e2e_time_ms, "/"] + + @property + def transmit_data(self): + return [self._profiling_info.transmit_time_ms, + self._profiling_info.transmit_time_ms / self._profiling_info.e2e_time_ms, "/"] + + @property + def free_time_data(self): + return [self._profiling_info.free_time_ms, + self._profiling_info.free_time_ms / self._profiling_info.e2e_time_ms, "/"] + + @property + def sdma_data(self): + return [self._profiling_info.sdma_time_stream, + self._profiling_info.sdma_time_stream / self._profiling_info.e2e_time_ms, "/"] + + @property + def free_data(self): + free = self._profiling_info.free_time_ms - self._profiling_info.sdma_time_stream + return [free, free / self._profiling_info.e2e_time_ms, "/"] + + @property + def e2e_time_data(self): + return [self._profiling_info.e2e_time_ms, 1, "/"] diff --git a/profiler/compare_tools/compare_backend/compare_bean/profiling_info.py b/profiler/compare_tools/compare_backend/compare_bean/profiling_info.py index e5d9bf26e9..e0a80a4d30 100644 --- a/profiler/compare_tools/compare_backend/compare_bean/profiling_info.py +++ b/profiler/compare_tools/compare_backend/compare_bean/profiling_info.py @@ -37,6 +37,105 @@ class ProfilingInfo: self.hide_op_details = False self.is_level0 = False + # 性能拆解新指标 + self.fa_time_fwd_cube = 0.0 + self.fa_num_fwd_cube = 0 + self.fa_time_bwd_cube = 0.0 + self.fa_num_bwd_cube = 0 + self.fa_time_fwd_vector = 0.0 + self.fa_num_fwd_vector = 0 + self.fa_time_bwd_vector = 0.0 + self.fa_num_bwd_vector = 0 + + self.conv_time_fwd_cube = 0.0 + self.conv_num_fwd_cube = 0 + self.conv_time_bwd_cube = 0.0 + self.conv_num_bwd_cube = 0 + self.conv_time_fwd_vector = 0.0 + self.conv_num_fwd_vector = 0 + self.conv_time_bwd_vector = 0.0 + self.conv_num_bwd_vector = 0 + + self.matmul_time_cube = 0.0 + self.matmul_num_cube = 0 + self.matmul_time_vector = 0.0 + self.matmul_num_vector = 0 + + self.page_attention_time = 0.0 + self.page_attention_num = 0 + + self.vector_time_trans = 0.0 + self.vector_num_trans = 0 + self.vector_time_notrans = 0.0 + self.vector_num_notrans = 0 + + self.sdma_time_tensor_move = 0.0 + self.sdma_num_tensor_move = 0 + self.sdma_time_stream = 0.0 + self.sdma_num_stream = 0 + + self.other_cube_time = 0.0 + self.other_cube_num = 0 + + @property + def e2e_time_ms(self): + return self.e2e_time * 10 ** 3 + + @property + def compute_time_ms(self): + return self.compute_time * 10 ** 3 + + @property + def free_time_ms(self): + return self.scheduling_time * 10 ** 3 + + @property + def communication_not_overlapped_ms(self): + return self.communication_not_overlapped * 10 ** 3 + + @property + def wait_time_ms(self): + return self.wait_time * 10 ** 3 + + @property + def transmit_time_ms(self): + return (self.communication_not_overlapped - self.wait_time) * 10 ** 3 + + @property + def fa_total_time(self): + return sum((self.fa_time_fwd_cube, self.fa_time_fwd_vector, self.fa_time_bwd_cube, self.fa_time_bwd_vector)) + + @property + def fa_total_num(self): + return sum((self.fa_num_fwd_cube, self.fa_num_fwd_vector, self.fa_num_bwd_cube, self.fa_num_bwd_vector)) + + @property + def conv_total_time(self): + return sum( + (self.conv_time_fwd_cube, self.conv_time_fwd_vector, self.conv_time_bwd_cube, + self.conv_time_bwd_vector)) + + @property + def conv_total_num(self): + return sum((self.conv_num_fwd_cube, self.conv_num_fwd_vector, self.conv_num_bwd_cube, + self.conv_num_bwd_vector)) + + @property + def mm_total_time(self): + return sum((self.matmul_time_cube, self.matmul_time_vector)) + + @property + def mm_total_num(self): + return sum((self.matmul_num_cube, self.matmul_num_vector)) + + @property + def vector_total_time(self): + return sum((self.vector_time_trans, self.vector_time_notrans)) + + @property + def vector_total_num(self): + return sum((self.vector_num_trans, self.vector_num_notrans)) + def trans_time_to_s(self): self.cube_time = self.cube_time / 10 ** 6 self.other_time = self.other_time / 10 ** 6 @@ -54,6 +153,24 @@ class ProfilingInfo: self.conv_time_fwd = self.conv_time_fwd / 10 ** 6 self.conv_time_bwd = self.conv_time_bwd / 10 ** 6 + # 新指标单位为ms + self.fa_time_fwd_cube /= 10 ** 3 + self.fa_time_bwd_cube /= 10 ** 3 + self.fa_time_fwd_vector /= 10 ** 3 + self.fa_time_bwd_vector /= 10 ** 3 + self.conv_time_fwd_cube /= 10 ** 3 + self.conv_time_bwd_cube /= 10 ** 3 + self.conv_time_fwd_vector /= 10 ** 3 + self.conv_time_bwd_vector /= 10 ** 3 + self.matmul_time_cube /= 10 ** 3 + self.matmul_time_vector /= 10 ** 3 + self.vector_time_trans /= 10 ** 3 + self.vector_time_notrans /= 10 ** 3 + self.sdma_time_tensor_move /= 10 ** 3 + self.sdma_time_stream /= 10 ** 3 + self.page_attention_time /= 10 ** 3 + self.other_cube_time /= 10 ** 3 + def calculate_other_time(self): self.other_time = max( [0, self.compute_time - self.cube_time - self.fa_time_fwd - self.fa_time_bwd - @@ -64,8 +181,7 @@ class ProfilingInfo: - self.conv_time_fwd - self.conv_time_bwd def calculate_schedule_time(self): - self.scheduling_time = (self.e2e_time - self.compute_time - self.lccl_time \ - - self.communication_not_overlapped) + self.scheduling_time = (self.e2e_time - self.compute_time - self.lccl_time - self.communication_not_overlapped) def update_fa_fwd_info(self, time: float): self.fa_time_fwd += time @@ -75,6 +191,30 @@ class ProfilingInfo: self.fa_time_bwd += time self.fa_num_bwd += 1 + def update_fa_fwd_cube_info(self, time: float): + self.fa_time_fwd_cube += time + self.fa_num_fwd_cube += 1 + + def update_fa_bwd_cube_info(self, time: float): + self.fa_time_bwd_cube += time + self.fa_num_bwd_cube += 1 + + def update_fa_fwd_vector_info(self, time: float): + self.fa_time_fwd_vector += time + self.fa_num_fwd_vector += 1 + + def update_fa_bwd_vector_info(self, time: float): + self.fa_time_bwd_vector += time + self.fa_num_bwd_vector += 1 + + def update_sdma_tensor_move_info(self, time: float): + self.sdma_time_tensor_move += time + self.sdma_num_tensor_move += 1 + + def update_sdma_stream_info(self, time: float, num: int = 1): + self.sdma_time_stream += time + self.sdma_num_stream += num + def update_pa_info(self, time: float): self.pa_time += time self.pa_num += 1 @@ -91,6 +231,42 @@ class ProfilingInfo: self.conv_time_bwd += time self.conv_num_bwd += 1 + def update_conv_bwd_cube_info(self, time: float): + self.conv_time_bwd_cube += time + self.conv_num_bwd_cube += 1 + + def update_conv_fwd_cube_info(self, time: float): + self.conv_time_fwd_cube += time + self.conv_num_fwd_cube += 1 + + def update_conv_bwd_vector_info(self, time: float): + self.conv_time_bwd_vector += time + self.conv_num_bwd_vector += 1 + + def update_conv_fwd_vector_info(self, time: float): + self.conv_time_fwd_vector += time + self.conv_num_fwd_vector += 1 + + def update_matmul_cube_info(self, time: float): + self.matmul_time_cube += time + self.matmul_num_cube += 1 + + def update_matmul_vector_info(self, time: float): + self.matmul_time_vector += time + self.matmul_num_vector += 1 + + def update_page_attention_info(self, time: float): + self.page_attention_time += time + self.page_attention_num += 1 + + def update_vector_trans_info(self, time: float): + self.vector_time_trans += time + self.vector_num_trans += 1 + + def update_vector_notrans_info(self, time: float): + self.vector_time_notrans += time + self.vector_num_notrans += 1 + def update_sdma_info(self, time: float, num: int = 1): self.sdma_time += time self.sdma_num += num @@ -103,6 +279,10 @@ class ProfilingInfo: self.vec_time += time self.vec_num += 1 + def update_other_cube_info(self, time: float): + self.other_cube_time += time + self.other_cube_num += 1 + def set_compute_time(self, time: float): self.compute_time = time diff --git a/profiler/compare_tools/compare_backend/disaggregate/overall_perf_interface.py b/profiler/compare_tools/compare_backend/disaggregate/overall_perf_interface.py index c89e845193..7bac2b0335 100644 --- a/profiler/compare_tools/compare_backend/disaggregate/overall_perf_interface.py +++ b/profiler/compare_tools/compare_backend/disaggregate/overall_perf_interface.py @@ -31,4 +31,30 @@ class OverallPerfInterface: def _generate_result(self): overall_data = self._profiling_data.overall_metrics - self._result_data = getattr(overall_data, "__dict__", {}) + + self._result_data = { + "profiling_type": overall_data.profiling_type, + "minimal_profiling": overall_data.minimal_profiling, + "overall": {"e2e_time_ms": overall_data.e2e_time_ms, + "computing_time_ms": overall_data.compute_time_ms, + "uncovered_communication_time_ms": overall_data.communication_not_overlapped_ms, + "free_time_ms": overall_data.free_time_ms}, + "computing_time_disaggregate": {"fa_time_ms": overall_data.fa_total_time, + "conv_time_ms": overall_data.conv_total_time, + "matmul_time_ms": overall_data.mm_total_time, + "page_attention_time_ms": overall_data.page_attention_time, + "vector_time_ms": overall_data.vector_total_time, + "tensor_move_time_ms": overall_data.sdma_time_tensor_move, + "other_cube_time_ms": overall_data.other_cube_time}, + "computing_num_disaggregate": {"fa_num": overall_data.fa_total_num, + "conv_num": overall_data.conv_total_num, + "matmul_num": overall_data.mm_total_num, + "page_attention_num": overall_data.page_attention_num, + "vector_num": overall_data.vector_total_num, + "tensor_move_num": overall_data.sdma_num_tensor_move, + "other_cube_num": overall_data.other_cube_num}, + "communication_time_disaggregate": {"wait_time_ms": overall_data.wait_time_ms, + "transmit_time_ms": overall_data.transmit_time_ms}, + "free_time_disaggregate": {"sdma_time_ms": overall_data.sdma_time_stream, + "free_ms": overall_data.free_time_ms - overall_data.sdma_time_stream} + } diff --git a/profiler/compare_tools/compare_backend/generator/detail_performance_generator.py b/profiler/compare_tools/compare_backend/generator/detail_performance_generator.py index 5b93d888a4..292e312815 100644 --- a/profiler/compare_tools/compare_backend/generator/detail_performance_generator.py +++ b/profiler/compare_tools/compare_backend/generator/detail_performance_generator.py @@ -8,6 +8,7 @@ from compare_backend.comparator.module_comparetor import ModuleComparator from compare_backend.comparator.module_statistic_comparator import ModuleStatisticComparator from compare_backend.comparator.operator_comparator import OperatorComparator from compare_backend.comparator.operator_statistic_comparator import OperatorStatisticComparator +from compare_backend.comparator.overall_metrics_comparator import OverallMetricsComparator from compare_backend.compare_bean.communication_bean import CommunicationBean from compare_backend.compare_bean.memory_compare_bean import MemoryCompareBean from compare_backend.compare_bean.memory_statistic_bean import MemoryStatisticBean @@ -15,6 +16,7 @@ from compare_backend.compare_bean.module_compare_bean import ModuleCompareBean from compare_backend.compare_bean.module_statistic_bean import ModuleStatisticBean from compare_backend.compare_bean.operator_compare_bean import OperatorCompareBean from compare_backend.compare_bean.operator_statistic_bean import OperatorStatisticBean +from compare_backend.compare_bean.overall_metrics_bean import OverallMetricsBean from compare_backend.data_prepare.module_data_prepare import ModuleDataPrepare from compare_backend.data_prepare.operator_data_prepare import OperatorDataPrepare from compare_backend.generator.base_generator import BaseGenerator @@ -41,8 +43,16 @@ class DetailPerformanceGenerator(BaseGenerator): self._args.enable_communication_compare: print("[INFO] Start to compare performance detail data, please wait.") comparator_list = self._create_comparator() - for comparator in comparator_list: - self._result_data.update(comparator.generate_data()) + else: + comparator_list = [] + if self._args.enable_profiling_compare: + overall_data = {Constant.BASE_DATA: self._profiling_data_dict.get(Constant.BASE_DATA).overall_metrics, + Constant.COMPARISON_DATA: self._profiling_data_dict.get( + Constant.COMPARISON_DATA).overall_metrics} + # overall 数据在最前面 + comparator_list.insert(0, OverallMetricsComparator(overall_data, OverallMetricsBean)) + for comparator in comparator_list: + self._result_data.update(comparator.generate_data()) def generate_view(self): if not self._result_data: @@ -57,6 +67,7 @@ class DetailPerformanceGenerator(BaseGenerator): comparator_list = [] op_compare_result = [] + if self._args.enable_operator_compare: module_compare_result = self.match_nn_module() if self._profiling_data_dict.get( Constant.BASE_DATA).python_function_data and self._profiling_data_dict.get( diff --git a/profiler/compare_tools/compare_backend/profiling_parser/base_profiling_parser.py b/profiler/compare_tools/compare_backend/profiling_parser/base_profiling_parser.py index 2127ff5e75..6ee07a6569 100644 --- a/profiler/compare_tools/compare_backend/profiling_parser/base_profiling_parser.py +++ b/profiler/compare_tools/compare_backend/profiling_parser/base_profiling_parser.py @@ -2,6 +2,7 @@ from abc import abstractmethod, ABC from decimal import Decimal from compare_backend.compare_bean.origin_data_bean.compare_event import KernelEvent, MemoryEvent +from compare_backend.compare_bean.origin_data_bean.kernel_details_bean import KernelDetailsBean from compare_backend.compare_bean.origin_data_bean.trace_event_bean import TraceEventBean from compare_backend.compare_bean.profiling_info import ProfilingInfo from compare_backend.utils.constant import Constant @@ -66,6 +67,18 @@ class BaseProfilingParser(ABC): self._comm_list = [] self._read_trace_event() self._cur_func_index = 0 + self._categorize_performance_index = 0 + self._cpu_cube_op = None + self._bwd_tid = None + + @property + def cpu_cube_op(self): + if self._cpu_cube_op is not None: + return self._cpu_cube_op + cpu_cube_op = [op for op in self._result_data.torch_op_data if op.is_cpu_cube_op()] + cpu_cube_op.sort(key=lambda x: x.start_time) + self._cpu_cube_op = cpu_cube_op + return self._cpu_cube_op @abstractmethod def _update_memory_list(self): @@ -102,6 +115,90 @@ class BaseProfilingParser(ABC): self._check_result_data() return self._result_data + def categorize_computing_performance_data(self, tk: (TraceEventBean, KernelDetailsBean), flow_dict_new: dict): + if tk.is_page_attention(): + self._result_data.overall_metrics.update_page_attention_info(tk.dur) + return + if tk.is_sdma(): + self._result_data.overall_metrics.update_sdma_tensor_move_info(tk.dur) + return + flow_start_time = flow_dict_new.get(tk.start_time) + if flow_start_time: + while self._categorize_performance_index < len(self.cpu_cube_op): + cur_op = self.cpu_cube_op[self._categorize_performance_index] + if cur_op.end_time < flow_start_time: + self._categorize_performance_index += 1 + continue + if cur_op.start_time <= flow_start_time: + self._categorize_cube_performance_data(cur_op, tk) + return + break + if self._profiling_type == Constant.NPU: + # 缺失torch至npu连线的算子,判断fa/conv/matmul使用kernel_details.csv的op_type字段 + if tk.is_flash_attention(): + if tk.is_fa_bwd(): + self._result_data.overall_metrics.update_fa_bwd_cube_info(tk.dur) + else: + self._result_data.overall_metrics.update_fa_fwd_cube_info(tk.dur) + return + elif tk.is_conv(): + if tk.is_conv_bwd(): + self._result_data.overall_metrics.update_conv_bwd_cube_info(tk.dur) + else: + self._result_data.overall_metrics.update_conv_fwd_cube_info(tk.dur) + return + elif tk.is_matmul(): + self._result_data.overall_metrics.update_matmul_cube_info(tk.dur) + return + if tk.is_cube_kernel_cat(): + self._result_data.overall_metrics.update_other_cube_info(tk.dur) + elif tk.is_trans(): + self._result_data.overall_metrics.update_vector_trans_info(tk.dur) + else: + self._result_data.overall_metrics.update_vector_notrans_info(tk.dur) + + def _categorize_cube_performance_data(self, cpu_op: TraceEventBean, tk: (TraceEventBean, KernelDetailsBean)): + """ + 判断fa/conv/matmul/vector使用cpu_op + """ + if cpu_op.is_fa_for_cpu_op(): + if self._is_backward(cpu_op): + if tk.is_cube_kernel_cat(): + self._result_data.overall_metrics.update_fa_bwd_cube_info(tk.dur) + else: + self._result_data.overall_metrics.update_fa_bwd_vector_info(tk.dur) + else: + if tk.is_cube_kernel_cat(): + self._result_data.overall_metrics.update_fa_fwd_cube_info(tk.dur) + else: + self._result_data.overall_metrics.update_fa_fwd_vector_info(tk.dur) + elif cpu_op.is_conv_for_cpu_op(): + if self._is_backward(cpu_op): + if tk.is_cube_kernel_cat(): + self._result_data.overall_metrics.update_conv_bwd_cube_info(tk.dur) + else: + self._result_data.overall_metrics.update_conv_bwd_vector_info(tk.dur) + else: + if tk.is_cube_kernel_cat(): + self._result_data.overall_metrics.update_conv_fwd_cube_info(tk.dur) + else: + self._result_data.overall_metrics.update_conv_fwd_vector_info(tk.dur) + elif cpu_op.is_matmul_for_cpu_op(): # matmul + if tk.is_cube_kernel_cat(): + self._result_data.overall_metrics.update_matmul_cube_info(tk.dur) + else: + self._result_data.overall_metrics.update_matmul_vector_info(tk.dur) + + def _is_backward(self, event: TraceEventBean): + return event.tid == self._bwd_tid or event.is_bwd_for_cpu_op() + + def _get_flow_time_dict(self): + return { + flow_event["end"].start_time: flow_event["start"].start_time + for flow_event in self._flow_dict.values() + if flow_event.get("end") and flow_event.get("start") + } + def _dispatch_events(self): if not self._dispatch_func: return diff --git a/profiler/compare_tools/compare_backend/profiling_parser/gpu_profiling_parser.py b/profiler/compare_tools/compare_backend/profiling_parser/gpu_profiling_parser.py index c4089aec9b..7b1ae1a5a1 100644 --- a/profiler/compare_tools/compare_backend/profiling_parser/gpu_profiling_parser.py +++ b/profiler/compare_tools/compare_backend/profiling_parser/gpu_profiling_parser.py @@ -20,6 +20,7 @@ class GPUProfilingParser(BaseProfilingParser): self._compute_stream_id = self._infer_compute_stream_id() self._marks = defaultdict(int) self._aten_index = 0 + self._find_bwd_tid() @classmethod def __is_flash_attention(cls, name: str): @@ -30,10 +31,7 @@ class GPUProfilingParser(BaseProfilingParser): @classmethod def __is_sdma_time(cls, name: str): - for mark in cls.SDMA_MARK_LIST: - if mark in name.lower(): - return True - return False + return any(mask in name.lower() for mask in cls.SDMA_MARK_LIST) def _update_memory_list(self): if not self._enable_memory_compare: @@ -68,19 +66,15 @@ class GPUProfilingParser(BaseProfilingParser): min_ts = sys.float_info.max max_ts = sys.float_info.min self._trace_events.sort(key=lambda x: x.start_time) - aten_events = list(filter(lambda x: x.name.startswith("aten::"), self._trace_events)) - flow_dict_new = {} - for flow_event in self._flow_dict.values(): - start_event = flow_event.get("start") - end_event = flow_event.get("end") - if start_event and end_event: - flow_dict_new[end_event.start_time] = start_event.start_time + aten_events = [event for event in self._trace_events if event.name.startswith("aten::")] + flow_dict_new = self._get_flow_time_dict() for event in self._trace_events: if event.stream: min_ts = min(event.start_time, min_ts) max_ts = max(event.end_time, max_ts) if event.stream == self._compute_stream_id and self.__is_sdma_time(event.name): self._result_data.overall_metrics.update_sdma_info(event.dur) + self._result_data.overall_metrics.update_sdma_stream_info(event.dur) continue if not event.is_kernel_cat(): continue @@ -88,6 +82,7 @@ class GPUProfilingParser(BaseProfilingParser): if event.is_nccl_name(): continue self.__add_compute_time(event, aten_events, flow_dict_new) + self.categorize_computing_performance_data(event, flow_dict_new) self._aten_events = None self._result_data.overall_metrics.set_e2e_time(float(max_ts - min_ts)) self.__add_compute_and_overlap_time() @@ -162,7 +157,7 @@ class GPUProfilingParser(BaseProfilingParser): def _get_dispatch_func(self): func_set = set() - if self._enable_memory_compare or self._enable_operator_compare: + if self._enable_memory_compare or self._enable_operator_compare or self._enable_profiling_compare: func_set.add(self._picking_torch_op_event) if self._enable_communication_compare: func_set.add(self._picking_kernel_event) @@ -174,6 +169,8 @@ class GPUProfilingParser(BaseProfilingParser): func_set.add(self._picking_flow_event) if self._enable_memory_compare or self._enable_profiling_compare: func_set.add(self._picking_memory_event) + if self._enable_profiling_compare: + func_set.add(self._picking_flow_event) return list(func_set) def _infer_compute_stream_id(self): @@ -187,3 +184,9 @@ class GPUProfilingParser(BaseProfilingParser): raise RuntimeError('[ERROR] The profiling data does not contain kernel running data.') counter = Counter(kernel_stream_ids) return counter.most_common(1)[0][0] + + def _find_bwd_tid(self): + for event in self._trace_events: + if event.is_fwdbwd() and event.is_flow_end(): + self._bwd_tid = event.tid + break diff --git a/profiler/compare_tools/compare_backend/profiling_parser/npu_profiling_parser.py b/profiler/compare_tools/compare_backend/profiling_parser/npu_profiling_parser.py index 70ce44b44e..457a3b6be5 100644 --- a/profiler/compare_tools/compare_backend/profiling_parser/npu_profiling_parser.py +++ b/profiler/compare_tools/compare_backend/profiling_parser/npu_profiling_parser.py @@ -36,7 +36,7 @@ class NPUProfilingParser(BaseProfilingParser): def _get_dispatch_func(self): func_list = set() - if self._enable_memory_compare or self._enable_operator_compare: + if self._enable_memory_compare or self._enable_operator_compare or self._enable_profiling_compare: func_list.add(self._picking_torch_op_event) if self._enable_operator_compare or self._args.max_kernel_num: func_list.add(self._picking_kernel_event) @@ -52,6 +52,7 @@ class NPUProfilingParser(BaseProfilingParser): func_list.add(self._picking_overlap_analysis_data) func_list.add(self._picking_kernel_event) func_list.add(self._picking_hccl_event) + func_list.add(self._picking_flow_event) return list(func_list) def _update_memory_list(self): @@ -205,6 +206,8 @@ class NPUProfilingParser(BaseProfilingParser): def _filter_meta_id(self): for event in self._trace_events: + if event.is_fwdbwd() and event.is_flow_end(): + self._bwd_tid = event.tid if not event.is_process_meta(): continue if event.is_hccl_process_name(): @@ -244,17 +247,7 @@ class NPUProfilingParser(BaseProfilingParser): self._result_data.overall_metrics.update_lccl_info(event.dur) def __parse_kernel_csv(self): - try: - kernel_details = FileReader.read_csv_file(self._kernel_detail_path, KernelDetailsBean) - except Exception: - print('[WARNING] Npu kernel details csv file is not available.') - return - if not kernel_details or kernel_details[0].is_hide_op_pmu(): - self._result_data.overall_metrics.hide_op_details = True - return - for kernel in kernel_details: - if kernel.is_invalid(): - continue + def __screen_data(kernel: KernelDetailsBean): if kernel.is_flash_attention(): if kernel.is_fa_bwd(): self._result_data.overall_metrics.update_fa_bwd_info(kernel.duration) @@ -265,7 +258,7 @@ class NPUProfilingParser(BaseProfilingParser): self._result_data.overall_metrics.update_conv_bwd_info(kernel.duration) else: self._result_data.overall_metrics.update_conv_fwd_info(kernel.duration) - elif kernel.is_cube(): + elif kernel.is_matmul(): self._result_data.overall_metrics.update_cube_info(kernel.duration) elif kernel.is_sdma(): self._result_data.overall_metrics.update_sdma_info(kernel.duration) @@ -276,6 +269,22 @@ class NPUProfilingParser(BaseProfilingParser): else: self._result_data.overall_metrics.update_cube_info(kernel.duration) + try: + kernel_details = FileReader.read_csv_file(self._kernel_detail_path, KernelDetailsBean) + except Exception: + print('[WARNING] Npu kernel details csv file is not available.') + return + if not kernel_details or kernel_details[0].is_hide_op_pmu(): + self._result_data.overall_metrics.hide_op_details = True + return + flow_dict_new = self._get_flow_time_dict() + kernel_details.sort(key=lambda x: x.start_time) + for kernel in kernel_details: + if kernel.is_invalid(): + continue + __screen_data(kernel) + self.categorize_computing_performance_data(kernel, flow_dict_new) + def __parse_mem_csv(self): try: memory_record = FileReader.read_csv_file(self._memory_record_path, MemoryRecordBean) @@ -321,3 +330,4 @@ class NPUProfilingParser(BaseProfilingParser): for stream in compute_stream: dur_list = sdma_dict.get(stream, []) self._result_data.overall_metrics.update_sdma_info(sum(dur_list), len(dur_list)) + self._result_data.overall_metrics.update_sdma_stream_info(sum(dur_list), len(dur_list)) diff --git a/profiler/compare_tools/compare_backend/utils/constant.py b/profiler/compare_tools/compare_backend/utils/constant.py index e2854692ae..e200258802 100644 --- a/profiler/compare_tools/compare_backend/utils/constant.py +++ b/profiler/compare_tools/compare_backend/utils/constant.py @@ -11,6 +11,7 @@ class Constant(object): GREEN_COLOR = "00FF00" RED_COLOR = "FF0000" BLUE_COLOR = "00BFFF" + LIGHT_BLUE_COLOR = "87CEFA" US_TO_MS = 1000 KB_TO_MB = 1024 INVALID_VALUE = -1 @@ -55,6 +56,7 @@ class Constant(object): PERFORMANCE_TABLE = "Model Profiling Time Distribution" MODULE_TABLE = "ModuleCompare" MODULE_TOP_TABLE = "ModuleCompareStatistic" + OVERALL_METRICS_TABLE = "OverallMetrics" # memory SIZE = "Size(KB)" @@ -78,3 +80,9 @@ class Constant(object): OVERALL_COMPARE = "overall" BWD_LIST = ["bwd", "backward", "back"] + + CPU_OP_FA_MASK = ("flash_attention", "fusion_attention", "flashattn", "xformers_flash", "efficient_attention") + CPU_OP_CONV = "aten::conv" + CPU_OP_MATMUL_MASK = ("aten::addmm", "aten::bmm", "aten::mm", "aten::matmul") + KERNEL_CUBE_MASK = ("gemm", "conv", "cutlass", "wgrad") + KERNEL_TRANS_MASK = ("cast", "transdata", "transpose") diff --git a/profiler/compare_tools/compare_backend/utils/excel_config.py b/profiler/compare_tools/compare_backend/utils/excel_config.py index 306abcdfec..ae808863e7 100644 --- a/profiler/compare_tools/compare_backend/utils/excel_config.py +++ b/profiler/compare_tools/compare_backend/utils/excel_config.py @@ -18,6 +18,8 @@ class CellFormatType: 'valign': 'vcenter', 'bold': True, 'border': True} # 绿色背景,加粗 YELLOW_BOLD = {"font_name": "Arial", 'font_size': 11, 'fg_color': Constant.YELLOW_COLOR, 'align': 'left', 'valign': 'vcenter', 'bold': True, 'border': True} # 黄色背景,加粗 + BLUE_NORMAL = {'fg_color': Constant.BLUE_COLOR} # 蓝色背景,主要用于行样式 + LIGHT_BLUE_NORMAL = {'fg_color': Constant.LIGHT_BLUE_COLOR} # 淡蓝色背景,主要用于行样式 class ExcelConfig(object): @@ -65,6 +67,10 @@ class ExcelConfig(object): MODULE_LEVEL = "Module Level" BASE_CALL_STACK = "Base Call Stack" COMPARISON_CALL_STACK = "Comparison Call Stack" + INDEX = "Index" + DURATION = "Duration(ms)" + DURATION_RATIO = "Duration Ratio" + DIFF_DUR_MS = "Diff Duration(ms)" HEADERS = { Constant.OPERATOR_TABLE: [ @@ -176,10 +182,81 @@ class ExcelConfig(object): {"name": DIFF_TOTAL_RATIO, "type": CellFormatType.DEFAULT_RATIO, "width": 15}, {"name": BASE_CALL_STACK, "type": CellFormatType.DEFAULT, "width": 30}, {"name": COMPARISON_CALL_STACK, "type": CellFormatType.DEFAULT, "width": 30} + ], + Constant.OVERALL_METRICS_TABLE: [ + {"name": INDEX, "type": CellFormatType.DEFAULT, "width": 40}, + {"name": DURATION, "type": CellFormatType.DEFAULT_FLOAT, "width": 20}, + {"name": DURATION_RATIO, "type": CellFormatType.DEFAULT_RATIO, "width": 20}, + {"name": NUMBER, "type": CellFormatType.DEFAULT, "width": 10}, + {"name": DURATION, "type": CellFormatType.DEFAULT_FLOAT, "width": 20}, + {"name": DURATION_RATIO, "type": CellFormatType.DEFAULT_RATIO, "width": 20}, + {"name": NUMBER, "type": CellFormatType.DEFAULT, "width": 10}, + {"name": DIFF_DUR_MS, "type": CellFormatType.DEFAULT_FLOAT, "width": 20}, + {"name": DIFF_RATIO, "type": CellFormatType.DEFAULT_RATIO, "width": 10}, + ] } OVERHEAD = {Constant.OPERATOR_TABLE: ["B1:F1", "G1:K1"], Constant.MEMORY_TABLE: ["B1:F1", "G1:K1"], Constant.COMMUNICATION_TABLE: ["B1:H1", "I1:O1"], Constant.OPERATOR_TOP_TABLE: ["C1:D1", "E1:F1"], Constant.MEMORY_TOP_TABLE: ["C1:E1", "F1:H1"], Constant.MODULE_TOP_TABLE: ["F1:I1", "J1:M1"], - Constant.MODULE_TABLE: ["E1:H1", "I1:L1"]} + Constant.MODULE_TABLE: ["E1:H1", "I1:L1"], + Constant.OVERALL_METRICS_TABLE: ["B1:D1", "E1:G1"]} + + # overall metrics index + # computing time + COMPUTING = "Computing Time" + + FA = "\tFlash Attention" + FA_FWD_CUBE = "\t\tFlash Attention (Forward) (Cube)" + FA_FWD_VECTOR = "\t\tFlash Attention (Forward) (Vector)" + FA_BWD_CUBE = "\t\tFlash Attention (Backward) (Cube)" + FA_BWD_VECTOR = "\t\tFlash Attention (Backward) (Vector)" + + CONV = "\tConv" + CONV_FWD_CUBE = "\t\tConv (Forward) (Cube)" + CONV_FWD_VECTOR = "\t\tConv (Forward) (Vector)" + CONV_BWD_CUBE = "\t\tConv (Backward) (Cube)" + CONV_BWD_VECTOR = "\t\tConv (Backward) (Vector)" + + MM = "\tMatmul" + MM_CUBE = "\t\tMatmul (Cube)" + MM_VECTOR = "\t\tMatmul (Vector)" + + PA = "\tPage Attention" + + VECTOR = "\tVector" + VECTOR_TRANS = "\t\tVector (Trans)" + VECTOR_NO_TRANS = "\t\tVector (No Trans)" + + CUBE = "\tCube" + SDMA_TM = "\tSDMA (Tensor Move)" + OTHER = "\tOther" + + # communication time + COMMUNICATION_TIME = "Uncovered Communication Time" + WAIT = "\tWait" + TRANSMIT = "\tTransmit" + + # free time + FREE_TIME = "Free Time" + SDMA = "\tSDMA" + FREE = "\tFree" + + # e2e time + E2E_TIME = "E2E Time" + + ROW_STYLE_MAP = { + COMPUTING: CellFormatType.BLUE_NORMAL, + COMMUNICATION_TIME: CellFormatType.BLUE_NORMAL, + FREE_TIME: CellFormatType.BLUE_NORMAL, + E2E_TIME: CellFormatType.BLUE_NORMAL, + FA: CellFormatType.LIGHT_BLUE_NORMAL, + CONV: CellFormatType.LIGHT_BLUE_NORMAL, + MM: CellFormatType.LIGHT_BLUE_NORMAL, + PA: CellFormatType.LIGHT_BLUE_NORMAL, + VECTOR: CellFormatType.LIGHT_BLUE_NORMAL, + CUBE: CellFormatType.LIGHT_BLUE_NORMAL, + SDMA_TM: CellFormatType.LIGHT_BLUE_NORMAL, + OTHER: CellFormatType.LIGHT_BLUE_NORMAL + } diff --git a/profiler/compare_tools/compare_backend/view/work_sheet_creator.py b/profiler/compare_tools/compare_backend/view/work_sheet_creator.py index 7a33168da3..dffb7549fc 100644 --- a/profiler/compare_tools/compare_backend/view/work_sheet_creator.py +++ b/profiler/compare_tools/compare_backend/view/work_sheet_creator.py @@ -20,7 +20,10 @@ class WorkSheetCreator: return self._work_sheet = self._work_book.add_worksheet(self._sheet_name) self._write_headers() - self._write_data() + if "row_style" in self._data: + self._write_data_with_row_style() + else: + self._write_data() def _write_headers(self): base_header_format = self._work_book.add_format(CellFormatType.GREEN_BOLD) @@ -43,7 +46,7 @@ class WorkSheetCreator: col_id = self._col_ids[index] self._work_sheet.set_column(f"{col_id}:{col_id}", header.get("width")) self._work_sheet.write(f"{col_id}{self._row_id}", header.get("name"), header_format) - self._field_format[index] = self._work_book.add_format(header.get("type")) + self._field_format[index] = header.get("type") if header.get("name") in (ExcelConfig.DIFF_RATIO, ExcelConfig.DIFF_TOTAL_RATIO): self._diff_ratio_index = index self._row_id += 1 @@ -52,7 +55,27 @@ class WorkSheetCreator: red_ratio_format = self._work_book.add_format(CellFormatType.RED_RATIO) for data in self._data.get("rows"): for index, cell_data in enumerate(data): - cell_format = self._field_format.get(index) + cell_format = self._work_book.add_format(self._field_format.get(index)) + if index == self._diff_ratio_index and cell_data and cell_data > 1: + cell_format = red_ratio_format + cell_data = "INF" if cell_data == float('inf') else cell_data + self._work_sheet.write(f"{self._col_ids[index]}{self._row_id}", cell_data, cell_format) + self._row_id += 1 + + def _write_data_with_row_style(self): + """ + 带行样式及缩进的sheet + """ + red_ratio_format = self._work_book.add_format(CellFormatType.RED_RATIO) + rows = self._data.get("rows") + row_style = self._data.get("row_style") # 行样式 + + for data, row_style in zip(rows, row_style): + for index, cell_data in enumerate(data): + cell_style = {**self._field_format.get(index), **row_style} + if index == 0: # 0 for Index field + cell_style["indent"] = cell_data.count("\t") + cell_format = self._work_book.add_format(cell_style) if index == self._diff_ratio_index and cell_data and cell_data > 1: cell_format = red_ratio_format cell_data = "INF" if cell_data == float('inf') else cell_data diff --git a/profiler/test/run_ut.py b/profiler/test/run_ut.py index ee27abaace..6ab208dc29 100644 --- a/profiler/test/run_ut.py +++ b/profiler/test/run_ut.py @@ -13,6 +13,7 @@ def set_python_path(): os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "advisor") advisor_backend_root = os.path.join( os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "advisor", "advisor_backend") + profiler_parent_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) # Update PYTHONPATH python_path = os.environ.get("PYTHONPATH", "") if not python_path: @@ -22,6 +23,7 @@ def set_python_path(): python_path += f":{compare_tools_root}" python_path += f":{advisor_root}" python_path += f":{advisor_backend_root}" + python_path += f":{profiler_parent_dir}" os.environ["PYTHONPATH"] = python_path diff --git a/profiler/test/ut/advisor/advisor_backend/timeline_advice/test_dataloader_checker.py b/profiler/test/ut/advisor/advisor_backend/timeline_advice/test_dataloader_checker.py new file mode 100644 index 0000000000..3d8e22b7c6 --- /dev/null +++ b/profiler/test/ut/advisor/advisor_backend/timeline_advice/test_dataloader_checker.py @@ -0,0 +1,65 @@ +import unittest +import os +import sys +import yaml + +from profiler.advisor.analyzer.dataloader.dataloader_checker import DataloaderChecker +from profiler.advisor.common.timeline.event import TimelineEvent +from profiler.test.ut.advisor.advisor_backend.tools.tool import recover_env + + +class TestDataloaderChecker(unittest.TestCase): + @classmethod + def tearDownClass(cls) -> None: + recover_env() + + def setUp(self) -> None: + rule_path = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname( + os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))))), + "advisor", "rules", "dataloader.yaml") + + with open(rule_path, "rb") as file: + self.rule = yaml.safe_load(file) + + def test_no_dataloader(self): + dataloader_duration = (self.rule.get("dataloader_duration_threshold") - 1) * 1000 + dataset = self._get_mock_dataset(dataloader_duration, is_empty_dataset=True) + + checker = DataloaderChecker() + checker.check_slow_dataloader(dataset) + self.assertFalse(checker.dataloader_issues) + + def test_no_slow_dataloader(self): + dataloader_duration = (self.rule.get("dataloader_duration_threshold") - 1) * 1000 + dataset = self._get_mock_dataset(dataloader_duration, is_empty_dataset=False) + checker = DataloaderChecker() + checker.check_slow_dataloader(dataset) + self.assertFalse(checker.dataloader_issues) + + def test_found_slow_dataloader(self): + dataloader_duration = (self.rule.get("dataloader_duration_threshold") + 1) * 1000 + dataset = self._get_mock_dataset(dataloader_duration, is_empty_dataset=False) + checker = DataloaderChecker() + checker.check_slow_dataloader(dataset) + self.assertTrue(checker.dataloader_issues) + + desc = self.rule.get("problem").format(dataloader_duration=dataloader_duration / 1000, + dataloader_duration_threshold=self.rule.get( + "dataloader_duration_threshold")) + + self.assertEqual(desc, checker.desc) + + def _get_mock_dataset(self, dur, is_empty_dataset=False): + dataset = TimelineEvent() + if is_empty_dataset: + return dataset + + dataset["dataloader"] = [TimelineEvent({"dur": dur, "name": "dataloader"})] + return dataset + + +if __name__ == '__main__': + tester = TestDataloaderChecker() + tester.test_no_dataloader() + tester.test_no_slow_dataloader() + tester.test_found_slow_dataloader() diff --git a/profiler/test/ut/advisor/advisor_backend/timeline_advice/test_syncbn_checker.py b/profiler/test/ut/advisor/advisor_backend/timeline_advice/test_syncbn_checker.py new file mode 100644 index 0000000000..d1df810a0e --- /dev/null +++ b/profiler/test/ut/advisor/advisor_backend/timeline_advice/test_syncbn_checker.py @@ -0,0 +1,62 @@ +import unittest +import os +import sys +import yaml + +from profiler.advisor.analyzer.schedule.syncbn.syncbn_checker import SyncBNChecker +from profiler.advisor.common.timeline.event import TimelineEvent +from profiler.test.ut.advisor.advisor_backend.tools.tool import recover_env + + +class TestSyncBNChecker(unittest.TestCase): + @classmethod + def tearDownClass(cls) -> None: + recover_env() + + def setUp(self) -> None: + rule_path = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname( + os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))))), + "advisor", "rules", "sync_batchnorm.yaml") + + with open(rule_path, "rb") as file: + self.rule = yaml.safe_load(file) + + def test_no_syncbn(self): + dataset = self._get_mock_dataset(1, is_empty_dataset=True) + + checker = SyncBNChecker() + checker.check_syncbn(dataset) + self.assertFalse(checker.syncbn_issues) + + def test_syncbn_not_reach_threshold(self): + dataset = self._get_mock_dataset(self.rule.get("max_syncbn_num") - 1, is_empty_dataset=False) + checker = SyncBNChecker() + checker.check_syncbn(dataset) + self.assertFalse(checker.syncbn_issues) + + def test_found_slow_dataloader(self): + dataset = self._get_mock_dataset(self.rule.get("max_syncbn_num") + 1, is_empty_dataset=False) + checker = SyncBNChecker() + checker.check_syncbn(dataset) + self.assertTrue(checker.syncbn_issues) + + desc = self.rule.get("problem").format(syncbn_num=self.rule.get("max_syncbn_num") + 1) + + self.assertEqual(desc, checker.desc) + + def _get_mock_dataset(self, syncbn_num, is_empty_dataset=False): + dataset = TimelineEvent() + if is_empty_dataset: + return dataset + + dataset["sync_batchnorm"] = [] + for _ in range(syncbn_num): + dataset["sync_batchnorm"].append(TimelineEvent({"name": "SyncBatchNorm"})) + return dataset + + +if __name__ == '__main__': + tester = TestSyncBNChecker() + tester.test_no_syncbn() + tester.test_syncbn_not_reach_threshold() + tester.test_found_slow_dataloader() diff --git a/profiler/test/ut/advisor/advisor_backend/timeline_advice/test_synchronize_stream.py b/profiler/test/ut/advisor/advisor_backend/timeline_advice/test_synchronize_stream.py new file mode 100644 index 0000000000..e87efcc216 --- /dev/null +++ b/profiler/test/ut/advisor/advisor_backend/timeline_advice/test_synchronize_stream.py @@ -0,0 +1,55 @@ +import unittest +import os +import sys +import yaml + +from profiler.advisor.analyzer.schedule.synchronize_stream.synchronize_stream_checker import SynchronizeStreamChecker +from profiler.advisor.common.timeline.event import TimelineEvent +from profiler.test.ut.advisor.advisor_backend.tools.tool import recover_env + + +class TestSynchronizeChecker(unittest.TestCase): + @classmethod + def tearDownClass(cls) -> None: + recover_env() + + def setUp(self) -> None: + rule_path = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname( + os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))))), + "advisor", "rules", "synchronize.yaml") + + with open(rule_path, "rb") as file: + self.rule = yaml.safe_load(file) + + def test_no_synchronize_stream(self): + dataset = self._get_mock_dataset(1, [], is_empty_dataset=True) + + checker = SynchronizeStreamChecker() + checker.check_synchronize(dataset) + self.assertFalse(checker.synchronize_issues) + + def test_max_synchronize_stream(self): + dataset = self._get_mock_dataset(100, [], is_empty_dataset=False) + checker = SynchronizeStreamChecker() + checker.check_synchronize(dataset) + self.assertTrue(checker.synchronize_issues) + + def _get_mock_dataset(self, total_count, slow_synchronize_stream, is_empty_dataset=False): + dataset = TimelineEvent() + if is_empty_dataset: + return dataset + + dataset["synchronize_stream"] = TimelineEvent( + dict( + total_count=total_count, + slow_synchronize_stream=slow_synchronize_stream, + rule=dict(max_synchronize_num=10, problem="", solutions=[]), + ) + ) + return dataset + + +if __name__ == '__main__': + tester = TestSynchronizeChecker() + tester.test_no_synchronize_stream() + tester.test_max_synchronize_stream() diff --git a/profiler/test/ut/advisor/compute_advice/test_frequency_advice.py b/profiler/test/ut/advisor/compute_advice/test_frequency_advice.py new file mode 100644 index 0000000000..51acf3b8e2 --- /dev/null +++ b/profiler/test/ut/advisor/compute_advice/test_frequency_advice.py @@ -0,0 +1,145 @@ +import os +import shutil +import stat +import json + +import unittest +from profiler.advisor.interface.interface import Interface +from profiler.advisor.common.analyzer_scopes import SupportedScopes + + +class TestFrequencyAdvice(unittest.TestCase): + TMP_DIR = "./ascend_pt" + OUTPUT_DIR = "./ascend_pt/ASCEND_PROFILER_OUTPUT" + DEVICE_DIR = "./ascend_pt/PROF_000001_20240415174447255_OAANHDOMMJMHGIFC/device_0" + interface = None + err_interface = None + + def tearDown(self): + if os.path.exists(TestFrequencyAdvice.TMP_DIR): + shutil.rmtree(TestFrequencyAdvice.TMP_DIR) + self.clear_htmls() + + def setUp(self): + if os.path.exists(TestFrequencyAdvice.TMP_DIR): + shutil.rmtree(TestFrequencyAdvice.TMP_DIR) + if not os.path.exists(TestFrequencyAdvice.TMP_DIR): + os.makedirs(TestFrequencyAdvice.TMP_DIR) + if not os.path.exists(TestFrequencyAdvice.OUTPUT_DIR): + os.makedirs(TestFrequencyAdvice.OUTPUT_DIR) + if not os.path.exists(TestFrequencyAdvice.DEVICE_DIR): + os.makedirs(TestFrequencyAdvice.DEVICE_DIR) + self.clear_htmls() + + @classmethod + def clear_htmls(cls): + current_path = os.path.dirname(os.path.abspath(__file__)) + for filename in os.listdir(current_path): + # 检查文件是否以“att”开头 + if filename.startswith("att"): + # 构建文件的完整路径 + file_path = os.path.join(current_path, filename) + # 删除文件 + os.remove(file_path) + + @classmethod + def get_basic_trace_view(cls): + # Python pid + py_pid_data = {"ph": "M", "name": "process_name", "tid": 0, "pid": 1, "args": {"name": "Python"}} + # ascend pid + ascend_pid_data = {"ph": "M", "name": "process_name", "tid": 0, "pid": 4, "args": {"name": "Ascend Hardware"}} + # ascend pid + cann_pid_data = {"ph": "M", "name": "process_name", "tid": 0, "pid": 5, "args": {"name": "CANN"}} + # ascend hardware ops + ah_event1 = {"ph": "X", "name": "Slice1", "ts": "1699529623106750", "dur": 100, "tid": 3, "pid": 4, + "args": {"Task Type": "AI_CORE"}} + ah_event2 = {"ph": "X", "name": "Slice2", "ts": "1699529623106888", "dur": 80, "tid": 3, "pid": 4, + "args": {"Task Type": "AI_CORE"}} + # flow event + flow_event_s = {"ph": "s", "name": "link1", "id": 1, "tid": 3, "pid": 1, "ts": "200", "args": {}} + flow_event_e = {"ph": "f", "name": "link1", "id": 1, "tid": 3, "pid": 1, "ts": "1699529623106750", "args": {}} + return [py_pid_data, ascend_pid_data, cann_pid_data, ah_event1, ah_event2, flow_event_s, flow_event_e] + + @classmethod + def create_info_json(cls): + info = { + "DeviceInfo": [ + { + "id": 7, + "env_type": 3, + "ctrl_cpu_id": "ARMv8_Cortex_A55", + "ctrl_cpu_core_num": 1, + "ctrl_cpu_endian_little": 1, + "ts_cpu_core_num": 0, + "ai_cpu_core_num": 6, + "ai_core_num": 25, + "ai_cpu_core_id": 2, + "ai_core_id": 0, + "aicpu_occupy_bitmap": 252, + "ctrl_cpu": "0", + "ai_cpu": "2,3,4,5,6", + "aiv_num": 50, + "hwts_frequency": "49.999001", + "aic_frequency": "1850", + "aiv_frequency": "1850" + } + ] + } + with os.fdopen(os.open(f"{TestFrequencyAdvice.DEVICE_DIR}/info.json.0", + os.O_WRONLY | os.O_CREAT, stat.S_IWUSR | stat.S_IRUSR), 'w') as fp: + fp.write(json.dumps(info)) + + @classmethod + def create_non_910B_trace_view(cls): + basic_info = cls.get_basic_trace_view() + + # python ops + py_event1 = {"ph": "X", "cat": "python_function", "name": "aten::slice", "ts": "200", "dur": 100, "tid": 2, + "pid": 1, + "args": {"Call stack": "/root/test/slice.py(116);\r\n/root/torch/module.py"}} + py_event2 = {"ph": "X", "cat": "python_function", "name": "slice", "ts": "199", "dur": 200, "tid": 2, "pid": 1, + "args": {"Call stack": "/root/test/slice.py(116);\r\n/root/torch/module.py"}} + raw_data = [ + *basic_info, py_event1, py_event2 + ] + with os.fdopen(os.open(f"{TestFrequencyAdvice.OUTPUT_DIR}/trace_view.json", + # with os.fdopen(os.open(f"{TestFrequencyAdvice.OUTPUT_DIR}/msprof_20240415174455.json", + os.O_WRONLY | os.O_CREAT, stat.S_IWUSR | stat.S_IRUSR), 'w') as fp: + fp.write(json.dumps(raw_data)) + + @classmethod + def create_910B_trace_view(cls): + basic_info = cls.get_basic_trace_view() + + # python ops + py_event1 = {"name": "AI Core Freq", "ts": "1699529623106000.061", "pid": 682820896, "tid": 0, + "args": {"MHz": 1850}, "ph": "C"} + py_event2 = {"name": "AI Core Freq", "ts": "1699529623106770.541", "pid": 682820896, "tid": 0, + "args": {"MHz": 800}, "ph": "C"} + raw_data = [ + *basic_info, py_event1, py_event2 + ] + + with os.fdopen(os.open(f"{TestFrequencyAdvice.OUTPUT_DIR}/trace_view.json", + os.O_WRONLY | os.O_CREAT, stat.S_IWUSR | stat.S_IRUSR), 'w') as fp: + fp.write(json.dumps(raw_data)) + + def test_run_should_run_success_when_msprof_not_contain_frequency_data(self): + self.create_info_json() + self.create_non_910B_trace_view() + interface = Interface(profiling_path=self.TMP_DIR) + dimension = "computation" + scope = SupportedScopes.FREQ_ANALYSIS + result = interface.get_result(dimension, scope, render_html=1, output_dict=False, profiling_path=self.TMP_DIR) + self.assertEqual(0, len(result.data.get("AI Core Frequency", []))) + result.clear() + + def test_run_should_run_success_when_trace_view_contain_frequency_data(self): + self.create_info_json() + self.create_910B_trace_view() + interface = Interface(profiling_path=self.TMP_DIR) + dimension = "computation" + scope = SupportedScopes.FREQ_ANALYSIS + result = interface.get_result(dimension, scope, render_html=1, output_dict=False, profiling_path=self.TMP_DIR) + self.assertEqual(2, len(result.data.get("AI Core Frequency", dict).get("data", []))) + result.clear() diff --git a/profiler/test/ut/compare_tools/compare_bean/origin_data_bean/test_kernel_details_bean.py b/profiler/test/ut/compare_tools/compare_bean/origin_data_bean/test_kernel_details_bean.py index 7abf8da647..869ee85570 100644 --- a/profiler/test/ut/compare_tools/compare_bean/origin_data_bean/test_kernel_details_bean.py +++ b/profiler/test/ut/compare_tools/compare_bean/origin_data_bean/test_kernel_details_bean.py @@ -47,5 +47,5 @@ class TestKernelDetailsBean(unittest.TestCase): self.assertFalse(self.kernel_bean2.is_flash_attention()) def test_is_cube(self): - self.assertTrue(self.kernel_bean2.is_cube()) - self.assertFalse(self.kernel_bean3.is_cube()) + self.assertTrue(self.kernel_bean2.is_matmul()) + self.assertFalse(self.kernel_bean3.is_matmul()) diff --git a/profiler/test/ut/compare_tools/profiling_parser/test_gpu_profiling_parser.py b/profiler/test/ut/compare_tools/profiling_parser/test_gpu_profiling_parser.py index 0446872150..d7cb3d0588 100644 --- a/profiler/test/ut/compare_tools/profiling_parser/test_gpu_profiling_parser.py +++ b/profiler/test/ut/compare_tools/profiling_parser/test_gpu_profiling_parser.py @@ -68,6 +68,7 @@ class TestGpuProfilingParser(unittest.TestCase): patch("compare_backend.profiling_parser.gpu_profiling_parser.GPUProfilingParser.__init__", return_value=None): res = GPUProfilingParser({}, {}) + res._profiling_type = "GPU" res._trace_events = [TraceEventBean(event) for event in self.trace_events] res._result_data = ProfilingResult("GPU") res._compute_stream_id = 3 -- Gitee From b9a14f9a4bc7f3940fdd7082ca219c34f1cf51e1 Mon Sep 17 00:00:00 2001 From: Linwei-Ying Date: Thu, 25 Jul 2024 15:35:01 +0800 Subject: [PATCH 072/106] input,output kwargs bugfix --- .../msprobe/pytorch/compare/acc_compare.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/debug/accuracy_tools/msprobe/pytorch/compare/acc_compare.py b/debug/accuracy_tools/msprobe/pytorch/compare/acc_compare.py index e214910566..cf7ad912e0 100644 --- a/debug/accuracy_tools/msprobe/pytorch/compare/acc_compare.py +++ b/debug/accuracy_tools/msprobe/pytorch/compare/acc_compare.py @@ -764,9 +764,14 @@ def op_item_parse(item, op_name, index, item_list=None, top_bool=True): else: full_op_name = op_name else: - full_op_name = op_name + '.' + str(index) + full_op_name = op_name + Const.SEP + str(index) if isinstance(item, dict): - if 'dtype' in item: + if 'type' not in item: + for kwarg in item: + kwarg_parsed_list = op_item_parse(item[kwarg], op_name + Const.SEP + kwarg, None) + item_list += kwarg_parsed_list + kwarg_parsed_list.clear() + elif 'dtype' in item: parsed_item = item parsed_item['full_op_name'] = full_op_name item_list.append(parsed_item) -- Gitee From 1ad93cf00a318b5b8daf3e99db1dbbac2c3fa26b Mon Sep 17 00:00:00 2001 From: stby <295887736@qq.com> Date: Wed, 24 Jul 2024 16:46:12 +0800 Subject: [PATCH 073/106] feature: api compare and kernel compare --- .../comparator/api_compare_comparator.py | 32 ++++++++ .../comparator/kernel_compare_comparator.py | 35 +++++++++ .../compare_bean/api_compare_bean.py | 47 ++++++++++++ .../compare_bean/kernel_compare_bean.py | 75 +++++++++++++++++++ .../origin_data_bean/kernel_details_bean.py | 6 ++ .../data_prepare/operator_data_prepare.py | 17 +++++ .../generator/detail_performance_generator.py | 22 +++++- .../profiling_parser/base_profiling_parser.py | 16 ++++ .../profiling_parser/gpu_profiling_parser.py | 3 + .../profiling_parser/npu_profiling_parser.py | 24 ++++++ .../compare_backend/utils/args_manager.py | 13 +++- .../compare_backend/utils/compare_args.py | 4 + .../compare_backend/utils/constant.py | 7 +- .../compare_backend/utils/excel_config.py | 48 +++++++++++- .../compare_backend/utils/torch_op_node.py | 8 ++ .../compare_backend/utils/tree_builder.py | 3 +- .../view/work_sheet_creator.py | 12 +-- profiler/compare_tools/performance_compare.py | 2 + .../test_base_profiling_parser.py | 5 ++ 19 files changed, 366 insertions(+), 13 deletions(-) create mode 100644 profiler/compare_tools/compare_backend/comparator/api_compare_comparator.py create mode 100644 profiler/compare_tools/compare_backend/comparator/kernel_compare_comparator.py create mode 100644 profiler/compare_tools/compare_backend/compare_bean/api_compare_bean.py create mode 100644 profiler/compare_tools/compare_backend/compare_bean/kernel_compare_bean.py diff --git a/profiler/compare_tools/compare_backend/comparator/api_compare_comparator.py b/profiler/compare_tools/compare_backend/comparator/api_compare_comparator.py new file mode 100644 index 0000000000..bc5810068b --- /dev/null +++ b/profiler/compare_tools/compare_backend/comparator/api_compare_comparator.py @@ -0,0 +1,32 @@ +from compare_backend.comparator.base_comparator import BaseComparator +from compare_backend.utils.constant import Constant +from compare_backend.utils.common_func import update_order_id + + +class ApiCompareComparator(BaseComparator): + def __init__(self, origin_data: list, bean: any): + super().__init__(origin_data, bean) + + @classmethod + def _aggregated_api_by_name(cls, ops: list): + ops_dict = {} + for op in ops: + ops_dict.setdefault(op.name, []).append(op) + return ops_dict + + def _compare(self): + if not self._origin_data: + return + base_ops = self._origin_data.get(Constant.BASE_DATA, {}) + comparison_ops = self._origin_data.get(Constant.COMPARISON_DATA, {}) + if not base_ops or not comparison_ops: + return + base_aggregated_ops = self._aggregated_api_by_name(base_ops) + comparison_aggregated_ops = self._aggregated_api_by_name(comparison_ops) + for op_name, base_data in base_aggregated_ops.items(): + comparsion_data = comparison_aggregated_ops.pop(op_name, []) + self._rows.append(self._bean(op_name, base_data, comparsion_data).row) + if comparison_aggregated_ops: + for op_name, comparison_data in comparison_aggregated_ops.items(): + self._rows.append(self._bean(op_name, [], comparison_data).row) + update_order_id(self._rows) diff --git a/profiler/compare_tools/compare_backend/comparator/kernel_compare_comparator.py b/profiler/compare_tools/compare_backend/comparator/kernel_compare_comparator.py new file mode 100644 index 0000000000..13c0f776af --- /dev/null +++ b/profiler/compare_tools/compare_backend/comparator/kernel_compare_comparator.py @@ -0,0 +1,35 @@ +from compare_backend.comparator.base_comparator import BaseComparator +from compare_backend.utils.constant import Constant +from compare_backend.utils.common_func import update_order_id + + +class KernelCompareComparator(BaseComparator): + def __init__(self, origin_data: list, bean: any): + super().__init__(origin_data, bean) + + @classmethod + def _aggregated_kernel_by_type_and_shape(cls, kernels: dict): + result_dict = {} + for type_shape, shape_values in kernels.items(): + for shape, kernel_data in shape_values.items(): + kernel = [single[1] for single in kernel_data] + result_list = [type_shape, shape, sum(kernel), len(kernel), max(kernel), min(kernel)] + result_dict.setdefault(f"{type_shape}{shape}", []).extend(result_list) + return result_dict + + def _compare(self): + if not self._origin_data: + return + base_kernels = self._origin_data.get(Constant.BASE_DATA, {}) + comparison_kernels = self._origin_data.get(Constant.COMPARISON_DATA, {}) + if not base_kernels or not comparison_kernels: + return + base_aggregated_kernels = self._aggregated_kernel_by_type_and_shape(base_kernels) + comparison_aggregated_kernels = self._aggregated_kernel_by_type_and_shape(comparison_kernels) + for type_shape, base_data in base_aggregated_kernels.items(): + comparsion_data = comparison_aggregated_kernels.pop(type_shape, []) + self._rows.append(self._bean(base_data, comparsion_data).row) + if comparison_aggregated_kernels: + for _, comparison_data in comparison_aggregated_kernels.items(): + self._rows.append(self._bean([], comparison_data).row) + update_order_id(self._rows) \ No newline at end of file diff --git a/profiler/compare_tools/compare_backend/compare_bean/api_compare_bean.py b/profiler/compare_tools/compare_backend/compare_bean/api_compare_bean.py new file mode 100644 index 0000000000..55e08a86be --- /dev/null +++ b/profiler/compare_tools/compare_backend/compare_bean/api_compare_bean.py @@ -0,0 +1,47 @@ +from compare_backend.utils.common_func import calculate_diff_ratio +from compare_backend.utils.constant import Constant +from compare_backend.utils.excel_config import ExcelConfig + + +class ApiInfo: + def __init__(self, op_name: str, data_list: list): + self._data_list = data_list + self.name = op_name + self.total_dur = 0.0 + self.self_time = 0.0 + self.avg_dur = 0.0 + self.number = len(data_list) + self._get_info() + + def _get_info(self): + for data in self._data_list: + self.total_dur += data.api_dur + self.self_time += data.api_self_time + self.total_dur /= 1000.0 + self.self_time /= 1000.0 + self.avg_dur = self.total_dur / self.number if self.number else 0.0 + + +class ApiCompareBean: + TABLE_NAME = Constant.API_TABLE + HEADERS = ExcelConfig.HEADERS.get(TABLE_NAME) + OVERHEAD = ExcelConfig.OVERHEAD.get(TABLE_NAME) + + def __init__(self, op_name: str, base_api: list, comparison_api: list): + self._name = op_name + self._base_api = ApiInfo(op_name, base_api) + self._comparison_api = ApiInfo(op_name, comparison_api) + + @property + def row(self): + row = [None, self._name, + self._base_api.total_dur, self._base_api.self_time, self._base_api.avg_dur, self._base_api.number, + self._comparison_api.total_dur, self._comparison_api.self_time, + self._comparison_api.avg_dur, self._comparison_api.number] + diff_fields = [calculate_diff_ratio(self._base_api.total_dur, self._comparison_api.total_dur)[1], + calculate_diff_ratio(self._base_api.self_time, self._comparison_api.self_time)[1], + calculate_diff_ratio(self._base_api.avg_dur, self._comparison_api.avg_dur)[1], + calculate_diff_ratio(self._base_api.number, self._comparison_api.number)[1]] + row.extend(diff_fields) + return row + diff --git a/profiler/compare_tools/compare_backend/compare_bean/kernel_compare_bean.py b/profiler/compare_tools/compare_backend/compare_bean/kernel_compare_bean.py new file mode 100644 index 0000000000..df96addc4f --- /dev/null +++ b/profiler/compare_tools/compare_backend/compare_bean/kernel_compare_bean.py @@ -0,0 +1,75 @@ +from compare_backend.utils.common_func import calculate_diff_ratio +from compare_backend.utils.constant import Constant +from compare_backend.utils.excel_config import ExcelConfig + + +class KernelCompareInfo: + def __init__(self, data_list: list): + self._kernel_type = None + self._input_shapes = None + self._total_dur = None + self._number = None + self._max_dur = None + self._min_dur = None + if not data_list: + return + self._kernel_type = data_list[0] + self._input_shapes = data_list[1] + self._total_dur = data_list[2] + self._number = data_list[3] + self._max_dur = data_list[4] + self._min_dur = data_list[5] + + @property + def kernel_type(self): + return self._kernel_type + + @property + def input_shapes(self): + return self._input_shapes + + @property + def total_dur(self): + return self._total_dur if self._total_dur else 0.0 + + @property + def number(self): + return self._number + + @property + def max_dur(self): + return self._max_dur + + @property + def min_dur(self): + return self._min_dur + + @property + def avg_dur(self): + return self._total_dur / self._number if self._total_dur and self._number else 0.0 + + +class KernelCompareBean: + TABLE_NAME = Constant.KERNEL_TABLE + HEADERS = ExcelConfig.HEADERS.get(TABLE_NAME) + OVERHEAD = ExcelConfig.OVERHEAD.get(TABLE_NAME) + + def __init__(self, base_kernel: list, comparison_kernel: list): + self._base_kernel = KernelCompareInfo(base_kernel) + self._comparison_kernel = KernelCompareInfo(comparison_kernel) + self._kernel_type = self._base_kernel.kernel_type \ + if self._base_kernel.kernel_type else self._comparison_kernel.kernel_type + self._input_shapes = self._base_kernel.input_shapes \ + if self._base_kernel.input_shapes else self._comparison_kernel.input_shapes + + @property + def row(self): + row = [None, self._kernel_type, self._input_shapes, + self._base_kernel.total_dur, self._base_kernel.avg_dur, + self._base_kernel.max_dur, self._base_kernel.min_dur, self._base_kernel.number, + self._comparison_kernel.total_dur, self._comparison_kernel.avg_dur, + self._comparison_kernel.max_dur, self._comparison_kernel.min_dur, self._comparison_kernel.number] + diff_fields = [calculate_diff_ratio(self._base_kernel.total_dur, self._comparison_kernel.total_dur)[1], + calculate_diff_ratio(self._base_kernel.avg_dur, self._comparison_kernel.avg_dur)[1]] + row.extend(diff_fields) + return row \ No newline at end of file diff --git a/profiler/compare_tools/compare_backend/compare_bean/origin_data_bean/kernel_details_bean.py b/profiler/compare_tools/compare_backend/compare_bean/origin_data_bean/kernel_details_bean.py index 9c4825c0e8..c15396e9c5 100644 --- a/profiler/compare_tools/compare_backend/compare_bean/origin_data_bean/kernel_details_bean.py +++ b/profiler/compare_tools/compare_backend/compare_bean/origin_data_bean/kernel_details_bean.py @@ -12,6 +12,7 @@ class KernelDetailsBean: self._data = data self._op_type = "" self._name = "" + self._input_shapes = "" self._aiv_vec_time = 0.0 self._aicore_time = 0.0 self._mac_time = 0.0 @@ -27,6 +28,10 @@ class KernelDetailsBean: def name(self) -> str: return self._name + @property + def input_shapes(self) -> str: + return self._input_shapes + @property def aiv_vec_time(self) -> float: if self._aiv_vec_time == "" or self._aiv_vec_time == "N/A": @@ -109,6 +114,7 @@ class KernelDetailsBean: def init(self): self._op_type = self._data.get('Type', "") self._name = self._data.get('Name', "") + self._input_shapes = self._data.get('Input Shapes', "") self._aiv_vec_time = self._data.get('aiv_vec_time(us)', "") self._aicore_time = self._data.get("aicore_time(us)", "") self._mac_time = self._data.get('mac_time(us)', "") diff --git a/profiler/compare_tools/compare_backend/data_prepare/operator_data_prepare.py b/profiler/compare_tools/compare_backend/data_prepare/operator_data_prepare.py index fdce23c6ab..3106527c41 100644 --- a/profiler/compare_tools/compare_backend/data_prepare/operator_data_prepare.py +++ b/profiler/compare_tools/compare_backend/data_prepare/operator_data_prepare.py @@ -17,3 +17,20 @@ class OperatorDataPrepare: else: result_data.append(level1_node) return result_data + + def get_all_layer_ops(self) -> any: + root_node = TreeBuilder.build_tree(self.profiling_data.torch_op_data, [], []) + level1_child_nodes = root_node.child_nodes + node_queue = [] + result_data = [] + for level1_node in level1_child_nodes: + if level1_node.is_step_profiler(): + node_queue.extend(level1_node.child_nodes) + else: + node_queue.append(level1_node) + while len(node_queue) > 0: + node = node_queue.pop(0) + result_data.append(node) + if node.child_nodes: + node_queue.extend(node.child_nodes) + return result_data \ No newline at end of file diff --git a/profiler/compare_tools/compare_backend/generator/detail_performance_generator.py b/profiler/compare_tools/compare_backend/generator/detail_performance_generator.py index 292e312815..6fe693fb06 100644 --- a/profiler/compare_tools/compare_backend/generator/detail_performance_generator.py +++ b/profiler/compare_tools/compare_backend/generator/detail_performance_generator.py @@ -8,6 +8,8 @@ from compare_backend.comparator.module_comparetor import ModuleComparator from compare_backend.comparator.module_statistic_comparator import ModuleStatisticComparator from compare_backend.comparator.operator_comparator import OperatorComparator from compare_backend.comparator.operator_statistic_comparator import OperatorStatisticComparator +from compare_backend.comparator.api_compare_comparator import ApiCompareComparator +from compare_backend.comparator.kernel_compare_comparator import KernelCompareComparator from compare_backend.comparator.overall_metrics_comparator import OverallMetricsComparator from compare_backend.compare_bean.communication_bean import CommunicationBean from compare_backend.compare_bean.memory_compare_bean import MemoryCompareBean @@ -16,6 +18,8 @@ from compare_backend.compare_bean.module_compare_bean import ModuleCompareBean from compare_backend.compare_bean.module_statistic_bean import ModuleStatisticBean from compare_backend.compare_bean.operator_compare_bean import OperatorCompareBean from compare_backend.compare_bean.operator_statistic_bean import OperatorStatisticBean +from compare_backend.compare_bean.api_compare_bean import ApiCompareBean +from compare_backend.compare_bean.kernel_compare_bean import KernelCompareBean from compare_backend.compare_bean.overall_metrics_bean import OverallMetricsBean from compare_backend.data_prepare.module_data_prepare import ModuleDataPrepare from compare_backend.data_prepare.operator_data_prepare import OperatorDataPrepare @@ -39,8 +43,10 @@ class DetailPerformanceGenerator(BaseGenerator): return op_compare_result def compare(self): - if self._args.enable_operator_compare or self._args.enable_memory_compare or \ - self._args.enable_communication_compare: + enable_compare = [self._args.enable_operator_compare, self._args.enable_memory_compare, + self._args.enable_communication_compare, self._args.enable_api_compare, + self._args.enable_kernel_compare] + if any(enable_compare): print("[INFO] Start to compare performance detail data, please wait.") comparator_list = self._create_comparator() else: @@ -97,6 +103,18 @@ class DetailPerformanceGenerator(BaseGenerator): comparator_list.append(OperatorStatisticComparator(op_compare_result, MemoryStatisticBean)) if not self._args.disable_details: comparator_list.append(OperatorComparator(op_compare_result, MemoryCompareBean)) + if self._args.enable_api_compare: + api_compare_result = { + Constant.BASE_DATA: OperatorDataPrepare( + self._profiling_data_dict.get(Constant.BASE_DATA)).get_all_layer_ops(), + Constant.COMPARISON_DATA: OperatorDataPrepare( + self._profiling_data_dict.get(Constant.COMPARISON_DATA)).get_all_layer_ops()} + comparator_list.append(ApiCompareComparator(api_compare_result, ApiCompareBean)) + if self._args.enable_kernel_compare: + kernel_compare_result = { + Constant.BASE_DATA: self._profiling_data_dict.get(Constant.BASE_DATA).kernel_details, + Constant.COMPARISON_DATA: self._profiling_data_dict.get(Constant.COMPARISON_DATA).kernel_details} + comparator_list.append(KernelCompareComparator(kernel_compare_result, KernelCompareBean)) return comparator_list def match_torch_op(self) -> list: diff --git a/profiler/compare_tools/compare_backend/profiling_parser/base_profiling_parser.py b/profiler/compare_tools/compare_backend/profiling_parser/base_profiling_parser.py index 6ee07a6569..625eee7c60 100644 --- a/profiler/compare_tools/compare_backend/profiling_parser/base_profiling_parser.py +++ b/profiler/compare_tools/compare_backend/profiling_parser/base_profiling_parser.py @@ -20,6 +20,7 @@ class ProfilingResult: self.overall_metrics = ProfilingInfo(profiling_type) self.python_function_data = [] self.fwdbwd_dict = {} + self.kernel_details = {} def update_torch_op_data(self, event: TraceEventBean): event.is_torch_op = True @@ -43,6 +44,9 @@ class ProfilingResult: def update_comm_task_data(self, comm_name: str, task_event: TraceEventBean): self.communication_dict.setdefault(comm_name, {}).setdefault("comm_task", {}).setdefault( task_event.name, []).append(task_event.dur) + + def update_kernel_details(self, kernels: dict): + self.kernel_details = kernels class BaseProfilingParser(ABC): @@ -57,6 +61,8 @@ class BaseProfilingParser(ABC): self._enable_operator_compare = args.enable_operator_compare self._enable_memory_compare = args.enable_memory_compare self._enable_communication_compare = args.enable_communication_compare + self._enable_api_compare = args.enable_api_compare + self._enable_kernel_compare = args.enable_kernel_compare self._dispatch_func = self._get_dispatch_func() self._result_data = ProfilingResult(self._profiling_type) self._memory_events = [] @@ -80,6 +86,10 @@ class BaseProfilingParser(ABC): self._cpu_cube_op = cpu_cube_op return self._cpu_cube_op + @abstractmethod + def _update_kernel_details(self): + raise NotImplementedError("Function _update_kernel_details need to be implemented.") + @abstractmethod def _update_memory_list(self): raise NotImplementedError("Function _update_memory_list need to be implemented.") @@ -112,6 +122,8 @@ class BaseProfilingParser(ABC): self._update_memory_list() if self._enable_profiling_compare: self._update_overall_metrics() + if self._enable_kernel_compare: + self._update_kernel_details() self._check_result_data() return self._result_data @@ -300,6 +312,10 @@ class BaseProfilingParser(ABC): print(f"[WARNING] Can't find any memory event in the file: {self._profiling_path}") if self._enable_communication_compare and not self._result_data.communication_dict: print(f"[WARNING] Can't find any communication op in the file: {self._profiling_path}") + if self._enable_api_compare and not self._result_data.torch_op_data: + print(f"[WARNING] Can't find any torch op in the file: {self._profiling_path}") + if self._enable_kernel_compare and not self._result_data.kernel_details: + print(f"[WARNING] Can't find any kernel details in the file: {self._profiling_path}") def _read_trace_event(self): try: diff --git a/profiler/compare_tools/compare_backend/profiling_parser/gpu_profiling_parser.py b/profiler/compare_tools/compare_backend/profiling_parser/gpu_profiling_parser.py index 7b1ae1a5a1..ea732a60e9 100644 --- a/profiler/compare_tools/compare_backend/profiling_parser/gpu_profiling_parser.py +++ b/profiler/compare_tools/compare_backend/profiling_parser/gpu_profiling_parser.py @@ -33,6 +33,9 @@ class GPUProfilingParser(BaseProfilingParser): def __is_sdma_time(cls, name: str): return any(mask in name.lower() for mask in cls.SDMA_MARK_LIST) + def _update_kernel_details(self): + pass + def _update_memory_list(self): if not self._enable_memory_compare: return diff --git a/profiler/compare_tools/compare_backend/profiling_parser/npu_profiling_parser.py b/profiler/compare_tools/compare_backend/profiling_parser/npu_profiling_parser.py index 457a3b6be5..cb25c252c6 100644 --- a/profiler/compare_tools/compare_backend/profiling_parser/npu_profiling_parser.py +++ b/profiler/compare_tools/compare_backend/profiling_parser/npu_profiling_parser.py @@ -53,8 +53,32 @@ class NPUProfilingParser(BaseProfilingParser): func_list.add(self._picking_kernel_event) func_list.add(self._picking_hccl_event) func_list.add(self._picking_flow_event) + if self._enable_api_compare: + func_list.add(self._picking_torch_op_event) return list(func_list) + def _update_kernel_details(self): + try: + kernel_details = FileReader.read_csv_file(self._kernel_detail_path, KernelDetailsBean) + except FileNotFoundError: + print("[WARNING] The file kernel_details.csv does not exist.") + except Exception: + print("[ERROR] Failed to read kernel_details.csv.") + return + if not kernel_details: + return + kernels_dict = {} + for kernel in kernel_details: + if kernel.is_invalid(): + continue + input_shapes = kernel.input_shapes if kernel.input_shapes else 'N/A' + kernels_dict.setdefault(kernel.op_type, {}).setdefault(input_shapes, []).append( + [kernel.name, kernel.duration]) + if len(kernels_dict) == 1: + print("[ERROR] Failed to enable enable_kernel_compare, type of kernel_details.csv is null.") + return + self._result_data.update_kernel_details(kernels_dict) + def _update_memory_list(self): try: memory_data = FileReader.read_csv_file(self._operator_memory_path, OperatorMemoryBean) diff --git a/profiler/compare_tools/compare_backend/utils/args_manager.py b/profiler/compare_tools/compare_backend/utils/args_manager.py index 4b5947fa7b..ab9fb43a96 100644 --- a/profiler/compare_tools/compare_backend/utils/args_manager.py +++ b/profiler/compare_tools/compare_backend/utils/args_manager.py @@ -69,6 +69,14 @@ class ArgsManager: def enable_communication_compare(self): return self._args.enable_communication_compare + @property + def enable_api_compare(self): + return self._args.enable_api_compare + + @property + def enable_kernel_compare(self): + return self._args.enable_kernel_compare + @classmethod def check_profiling_path(cls, file_path: str): PathManager.input_path_common_check(file_path) @@ -119,11 +127,14 @@ class ArgsManager: raise RuntimeError(msg) if not any([self._args.enable_profiling_compare, self._args.enable_operator_compare, - self._args.enable_memory_compare, self._args.enable_communication_compare]): + self._args.enable_memory_compare, self._args.enable_communication_compare, + self._args.enable_api_compare, self._args.enable_kernel_compare]): self._args.enable_profiling_compare = True self._args.enable_operator_compare = True self._args.enable_memory_compare = True self._args.enable_communication_compare = True + self._args.enable_api_compare = True + self._args.enable_kernel_compare = True base_profiling_path = PathManager.get_realpath(self._args.base_profiling_path) self.check_profiling_path(base_profiling_path) diff --git a/profiler/compare_tools/compare_backend/utils/compare_args.py b/profiler/compare_tools/compare_backend/utils/compare_args.py index ab9bc364f4..9e6291e89e 100644 --- a/profiler/compare_tools/compare_backend/utils/compare_args.py +++ b/profiler/compare_tools/compare_backend/utils/compare_args.py @@ -6,6 +6,8 @@ class Args: enable_operator_compare: bool = False, enable_memory_compare: bool = False, enable_communication_compare: bool = False, + enable_api_compare: bool = False, + enable_kernel_compare: bool = False, output_path: str = "", max_kernel_num: int = None, op_name_map: dict = {}, @@ -17,6 +19,8 @@ class Args: self.enable_operator_compare = enable_operator_compare self.enable_memory_compare = enable_memory_compare self.enable_communication_compare = enable_communication_compare + self.enable_api_compare = enable_api_compare + self.enable_kernel_compare = enable_kernel_compare self.output_path = output_path self.max_kernel_num = max_kernel_num self.op_name_map = op_name_map diff --git a/profiler/compare_tools/compare_backend/utils/constant.py b/profiler/compare_tools/compare_backend/utils/constant.py index e200258802..252aa536e1 100644 --- a/profiler/compare_tools/compare_backend/utils/constant.py +++ b/profiler/compare_tools/compare_backend/utils/constant.py @@ -39,13 +39,16 @@ class Constant(object): # compare type OPERATOR_COMPARE = "OperatorCompare" MEMORY_COMPARE = "MemoryCompare" - + API_COMPARE = "ApiCompare" + KERNEL_COMPARE = "KernelCompare" # sheet name OPERATOR_SHEET = "OperatorCompare" MEMORY_SHEET = "MemoryCompare" OPERATOR_TOP_SHEET = "OperatorCompareStatistic" MEMORY_TOP_SHEET = "MemoryCompareStatistic" COMMUNICATION_SHEET = "CommunicationCompare" + API_SHEET = "ApiCompare" + KERNEL_SHEET = "KernelCompare" # table name OPERATOR_TABLE = "OperatorCompare" @@ -57,6 +60,8 @@ class Constant(object): MODULE_TABLE = "ModuleCompare" MODULE_TOP_TABLE = "ModuleCompareStatistic" OVERALL_METRICS_TABLE = "OverallMetrics" + API_TABLE = "ApiCompare" + KERNEL_TABLE = "KernelCompare" # memory SIZE = "Size(KB)" diff --git a/profiler/compare_tools/compare_backend/utils/excel_config.py b/profiler/compare_tools/compare_backend/utils/excel_config.py index ae808863e7..b6be0ae2eb 100644 --- a/profiler/compare_tools/compare_backend/utils/excel_config.py +++ b/profiler/compare_tools/compare_backend/utils/excel_config.py @@ -57,7 +57,7 @@ class ExcelConfig(object): DEVICE_SELF_TIME = "Device Self Time(ms)" DEVICE_TOTAL_TIME = "Device Total Time(ms)" DIFF_SELF_TIME = "Device Self Time Diff(ms)" - DIFF_TOTAL_RATIO = "Total Diff Ratio" + DIFF_TOTAL_RATIO = "Diff Total Ratio" DIFF_TOTAL_TIME = "Device Total Time Diff(ms)" DEVICE_SELF_TIME_US = "Device Self Time(us)" DEVICE_TOTAL_TIME_US = "Device Total Time(us)" @@ -71,6 +71,14 @@ class ExcelConfig(object): DURATION = "Duration(ms)" DURATION_RATIO = "Duration Ratio" DIFF_DUR_MS = "Diff Duration(ms)" + API_NAME = "api name" + TOTAL_DURATION_MS = "Total Duration(ms)" + AVG_DURATION_MS = "Avg Duration(ms)" + SELF_TIME_MS = "Self Time(ms)" + DIFF_SELF_RATIO = "Diff Self Ratio" + DIFF_AVG_RATIO = "Diff Avg Ratio" + DIFF_CALLS_RATIO = "Diff Calls Ratio" + KERNEL = "Kernel" HEADERS = { Constant.OPERATOR_TABLE: [ @@ -193,7 +201,39 @@ class ExcelConfig(object): {"name": NUMBER, "type": CellFormatType.DEFAULT, "width": 10}, {"name": DIFF_DUR_MS, "type": CellFormatType.DEFAULT_FLOAT, "width": 20}, {"name": DIFF_RATIO, "type": CellFormatType.DEFAULT_RATIO, "width": 10}, - + ], + Constant.API_TABLE: [ + {"name": ORDER, "type": CellFormatType.DEFAULT, "width": 10}, + {"name": API_NAME, "type": CellFormatType.BOLD_STR, "width": 30}, + {"name": TOTAL_DURATION_MS, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + {"name": SELF_TIME_MS, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + {"name": AVG_DURATION_MS, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + {"name": CALLS, "type": CellFormatType.DEFAULT,"width": 20}, + {"name": TOTAL_DURATION_MS, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + {"name": SELF_TIME_MS, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + {"name": AVG_DURATION_MS, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + {"name": CALLS, "type": CellFormatType.DEFAULT,"width": 20}, + {"name": DIFF_TOTAL_RATIO, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + {"name": DIFF_SELF_RATIO, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + {"name": DIFF_AVG_RATIO, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + {"name": DIFF_CALLS_RATIO, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + ], + Constant.KERNEL_COMPARE: [ + {"name": ORDER, "type": CellFormatType.DEFAULT, "width": 10}, + {"name": KERNEL, "type": CellFormatType.BOLD_STR, "width": 30}, + {"name": INPUT_SHAPE, "type": CellFormatType.DEFAULT,"width": 20}, + {"name": TOTAL_DURATION, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + {"name": AVG_DURATION, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + {"name": MAX_DURATION, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + {"name": MIN_DURATION, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + {"name": CALLS, "type": CellFormatType.DEFAULT,"width": 20}, + {"name": TOTAL_DURATION, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + {"name": AVG_DURATION, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + {"name": MAX_DURATION, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + {"name": MIN_DURATION, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + {"name": CALLS, "type": CellFormatType.DEFAULT,"width": 20}, + {"name": DIFF_TOTAL_RATIO, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + {"name": DIFF_AVG_RATIO, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, ] } @@ -201,7 +241,9 @@ class ExcelConfig(object): Constant.COMMUNICATION_TABLE: ["B1:H1", "I1:O1"], Constant.OPERATOR_TOP_TABLE: ["C1:D1", "E1:F1"], Constant.MEMORY_TOP_TABLE: ["C1:E1", "F1:H1"], Constant.MODULE_TOP_TABLE: ["F1:I1", "J1:M1"], Constant.MODULE_TABLE: ["E1:H1", "I1:L1"], - Constant.OVERALL_METRICS_TABLE: ["B1:D1", "E1:G1"]} + Constant.OVERALL_METRICS_TABLE: ["B1:D1", "E1:G1"], + Constant.API_TABLE: ["C1:F1", "G1:J1"], + Constant.KERNEL_TABLE: ["D1:H1", "I1:M1"]} # overall metrics index # computing time diff --git a/profiler/compare_tools/compare_backend/utils/torch_op_node.py b/profiler/compare_tools/compare_backend/utils/torch_op_node.py index 690c46cd51..69ee92d123 100644 --- a/profiler/compare_tools/compare_backend/utils/torch_op_node.py +++ b/profiler/compare_tools/compare_backend/utils/torch_op_node.py @@ -64,6 +64,14 @@ class TorchOpNode: def device_dur(self): return sum([kernel.device_dur for kernel in self._kernel_list]) + @property + def api_dur(self): + return self._event.dur + + @property + def api_self_time(self): + return self.api_dur - sum(child.api_dur for child in self._child_nodes) + def add_child_node(self, child_node): self._child_nodes.append(child_node) diff --git a/profiler/compare_tools/compare_backend/utils/tree_builder.py b/profiler/compare_tools/compare_backend/utils/tree_builder.py index 34c1fe1a1f..d5aa787ac2 100644 --- a/profiler/compare_tools/compare_backend/utils/tree_builder.py +++ b/profiler/compare_tools/compare_backend/utils/tree_builder.py @@ -23,7 +23,8 @@ class TreeBuilder: tree_node = TorchOpNode(event, last_node) last_node.add_child_node(tree_node) last_node = tree_node - tree_node.set_kernel_list(kernel_dict.get(event.start_time, [])) + if kernel_dict: + tree_node.set_kernel_list(kernel_dict.get(event.start_time, [])) else: event.set_name(last_node.name) last_node.set_memory_allocated(event) diff --git a/profiler/compare_tools/compare_backend/view/work_sheet_creator.py b/profiler/compare_tools/compare_backend/view/work_sheet_creator.py index dffb7549fc..58bad621b0 100644 --- a/profiler/compare_tools/compare_backend/view/work_sheet_creator.py +++ b/profiler/compare_tools/compare_backend/view/work_sheet_creator.py @@ -12,7 +12,7 @@ class WorkSheetCreator: self._work_sheet = None self._row_id = 1 self._field_format = {} - self._diff_ratio_index = None + self._diff_ratio_index = [] self._col_ids = "ABCDEFGHIJKLMNOPQRSTUVW" def create_sheet(self): @@ -47,8 +47,10 @@ class WorkSheetCreator: self._work_sheet.set_column(f"{col_id}:{col_id}", header.get("width")) self._work_sheet.write(f"{col_id}{self._row_id}", header.get("name"), header_format) self._field_format[index] = header.get("type") - if header.get("name") in (ExcelConfig.DIFF_RATIO, ExcelConfig.DIFF_TOTAL_RATIO): - self._diff_ratio_index = index + ratio_white_list = [ExcelConfig.DIFF_RATIO, ExcelConfig.DIFF_TOTAL_RATIO, + ExcelConfig.DIFF_AVG_RATIO, ExcelConfig.DIFF_CALLS_RATIO, ExcelConfig.DIFF_SELF_RATIO] + if header.get("name") in ratio_white_list: + self._diff_ratio_index.append(index) self._row_id += 1 def _write_data(self): @@ -56,7 +58,7 @@ class WorkSheetCreator: for data in self._data.get("rows"): for index, cell_data in enumerate(data): cell_format = self._work_book.add_format(self._field_format.get(index)) - if index == self._diff_ratio_index and cell_data and cell_data > 1: + if index in self._diff_ratio_index and cell_data and cell_data > 1: cell_format = red_ratio_format cell_data = "INF" if cell_data == float('inf') else cell_data self._work_sheet.write(f"{self._col_ids[index]}{self._row_id}", cell_data, cell_format) @@ -76,7 +78,7 @@ class WorkSheetCreator: if index == 0: # 0 for Index field cell_style["indent"] = cell_data.count("\t") cell_format = self._work_book.add_format(cell_style) - if index == self._diff_ratio_index and cell_data and cell_data > 1: + if index in self._diff_ratio_index and cell_data and cell_data > 1: cell_format = red_ratio_format cell_data = "INF" if cell_data == float('inf') else cell_data self._work_sheet.write(f"{self._col_ids[index]}{self._row_id}", cell_data, cell_format) diff --git a/profiler/compare_tools/performance_compare.py b/profiler/compare_tools/performance_compare.py index 8de0a72cbd..7c9d60aac0 100644 --- a/profiler/compare_tools/performance_compare.py +++ b/profiler/compare_tools/performance_compare.py @@ -18,6 +18,8 @@ def main(): parser.add_argument("--enable_operator_compare", default=False, action='store_true', help="开启算子性能比较") parser.add_argument("--enable_memory_compare", default=False, action='store_true', help="开启算子内存比较") parser.add_argument("--enable_communication_compare", default=False, action='store_true', help="开启通信性能比较") + parser.add_argument("--enable_api_compare", default=False, action='store_true', help="开启host api性能比较") + parser.add_argument("--enable_kernel_compare", default=False, action='store_true', help="开启kernel性能比较") parser.add_argument("--disable_details", default=False, action='store_true', help="不展示比对明细") parser.add_argument("--output_path", type=str, default='', help="性能数据比对结果的存放路径") parser.add_argument("--max_kernel_num", type=int, help="每个torch op的kernel数量限制") diff --git a/profiler/test/ut/compare_tools/profiling_parser/test_base_profiling_parser.py b/profiler/test/ut/compare_tools/profiling_parser/test_base_profiling_parser.py index 44d97b248e..8073463592 100644 --- a/profiler/test/ut/compare_tools/profiling_parser/test_base_profiling_parser.py +++ b/profiler/test/ut/compare_tools/profiling_parser/test_base_profiling_parser.py @@ -24,6 +24,11 @@ class ProfilingParser(BaseProfilingParser): self._enable_operator_compare = True self._enable_memory_compare = True self._enable_communication_compare = True + self._enable_kernel_compare = True + self._enable_api_compare = True + + def _update_kernel_details(self): + pass def _update_memory_list(self): pass -- Gitee From 2612e37388fa6c84c059af81e38d80bc8e931702 Mon Sep 17 00:00:00 2001 From: qianzhengxin Date: Thu, 25 Jul 2024 16:43:45 +0800 Subject: [PATCH 074/106] bugfix --- debug/accuracy_tools/grad_tool/common/constant.py | 1 + debug/accuracy_tools/grad_tool/common/utils.py | 4 ++++ debug/accuracy_tools/grad_tool/grad_ms/utils.py | 7 +++++-- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/debug/accuracy_tools/grad_tool/common/constant.py b/debug/accuracy_tools/grad_tool/common/constant.py index d569d47c16..7f2a708aba 100644 --- a/debug/accuracy_tools/grad_tool/common/constant.py +++ b/debug/accuracy_tools/grad_tool/common/constant.py @@ -40,6 +40,7 @@ class GradConst: DIRECTORY_LENGTH = 4096 FILE_NAME_LENGTH = 255 FILE_VALID_PATTERN = r"^[a-zA-Z0-9_.:/-]+$" + PARAM_VALID_PATTERN = r"^[a-zA-Z0-9.]+$" DIR = "dir" FILE = "file" diff --git a/debug/accuracy_tools/grad_tool/common/utils.py b/debug/accuracy_tools/grad_tool/common/utils.py index cdce3fda7e..a1e639558d 100644 --- a/debug/accuracy_tools/grad_tool/common/utils.py +++ b/debug/accuracy_tools/grad_tool/common/utils.py @@ -220,3 +220,7 @@ def change_mode(path, mode): except PermissionError as ex: print_error_log(f'Failed to change {path} authority. {str(ex)}') raise ex + +def check_param(param_name): + if not re.match(GradConst.PARAM_VALID_PATTERN, param_name): + raise RuntimeError("The parameter name contains special characters.") \ No newline at end of file diff --git a/debug/accuracy_tools/grad_tool/grad_ms/utils.py b/debug/accuracy_tools/grad_tool/grad_ms/utils.py index 23703f2820..64829bdeb5 100644 --- a/debug/accuracy_tools/grad_tool/grad_ms/utils.py +++ b/debug/accuracy_tools/grad_tool/grad_ms/utils.py @@ -3,7 +3,8 @@ import os import numpy as np import mindspore from grad_tool.common.constant import GradConst -from grad_tool.common.utils import print_warn_log, create_directory, change_mode, check_file_or_directory_path +from grad_tool.common.utils import (print_warn_log, create_directory, change_mode, check_file_or_directory_path, + path_valid_check, check_param) level_adp = { "L0": { @@ -23,8 +24,10 @@ level_adp = { def save_grad_direction(param_name, grad, save_path): if not os.path.exists(save_path): create_directory(save_path) + check_file_or_directory_path(save_path, file_type=GradConst.DIR) + check_param(param_name) save_filepath = os.path.join(save_path, f"{param_name}.npy") - check_file_or_directory_path(save_filepath) + path_valid_check(save_filepath) if grad.dtype == mindspore.bfloat16: grad = grad.to(mindspore.float32) -- Gitee From abddd175a7b559a95b236949fe9762fe554989d6 Mon Sep 17 00:00:00 2001 From: gitee Date: Thu, 25 Jul 2024 17:11:55 +0800 Subject: [PATCH 075/106] update branch number --- debug/accuracy_tools/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debug/accuracy_tools/setup.py b/debug/accuracy_tools/setup.py index 4e0eaa1f37..1cc36f2a80 100644 --- a/debug/accuracy_tools/setup.py +++ b/debug/accuracy_tools/setup.py @@ -14,7 +14,7 @@ import setuptools -__version__ = '1.0.0' +__version__ = '1.1.0' INSTALL_REQUIRED = [ "wheel", -- Gitee From 5d2e493f771e1fc240157c3e661a158bb6afb0bf Mon Sep 17 00:00:00 2001 From: gitee Date: Thu, 25 Jul 2024 17:31:39 +0800 Subject: [PATCH 076/106] change branch number --- debug/accuracy_tools/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debug/accuracy_tools/setup.py b/debug/accuracy_tools/setup.py index 1cc36f2a80..afbf8feb3a 100644 --- a/debug/accuracy_tools/setup.py +++ b/debug/accuracy_tools/setup.py @@ -14,7 +14,7 @@ import setuptools -__version__ = '1.1.0' +__version__ = '1.0.1' INSTALL_REQUIRED = [ "wheel", -- Gitee From b7ffc4b1675b4797be51f3fe6d5f3a7baf5431a3 Mon Sep 17 00:00:00 2001 From: l30036321 Date: Thu, 25 Jul 2024 10:42:39 +0800 Subject: [PATCH 077/106] check dump start --- debug/accuracy_tools/msprobe/mindspore/service.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/debug/accuracy_tools/msprobe/mindspore/service.py b/debug/accuracy_tools/msprobe/mindspore/service.py index e8aa34dc4f..cb7a6f2a75 100644 --- a/debug/accuracy_tools/msprobe/mindspore/service.py +++ b/debug/accuracy_tools/msprobe/mindspore/service.py @@ -39,6 +39,7 @@ class Service: self.first_start = True self.current_rank = None self.dump_iter_dir = None + self.start_call = False def build_hook(self, module_type, name): def forward_hook(api_or_module_name, module, input, output): @@ -80,6 +81,8 @@ class Service: def start(self, model=None): self.model = model + self.start_call = True + logger.info_on_rank_0("msprobe: debugger.start() is set successfully") if self.config.step and self.current_iter > max(self.config.step): self.stop() raise Exception("msprobe: exit after iteration {}".format(max(self.config.step))) @@ -101,11 +104,16 @@ class Service: logger.info_on_rank_0(f"Dump data will be saved in {self.dump_iter_dir}.") def stop(self): + if not self.start_call: + logger.error_on_rank_0("msprobe: debugger.start() is not set in the current scope.") + raise Exception("debugger.start() is not set in the current scope.") if self.config.step and self.current_iter not in self.config.step: return if self.config.rank and self.current_rank not in self.config.rank: return self.switch = False + self.start_call = False + logger.info_on_rank_0(f"msprobe: debugger.stop() is set successfully. Please set debugger.start() to turn on the dump switch again. ") self.data_collector.write_json() def create_dirs(self): -- Gitee From bf629891ec4bd60610dd2f8b0a1e08994f5de79a Mon Sep 17 00:00:00 2001 From: wugengjun <451676383@qq.com> Date: Thu, 25 Jul 2024 18:46:23 +0800 Subject: [PATCH 078/106] =?UTF-8?q?=E3=80=90feature=E3=80=91=E6=97=A0?= =?UTF-8?q?=E6=A0=87=E6=9D=86=E6=98=BE=E5=AD=98=E4=BC=98=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../msprobe/core/common/const.py | 1 + .../pytorch/free_benchmark/common/constant.py | 2 + .../pytorch/free_benchmark/common/utils.py | 4 ++ .../result_handlers/base_handler.py | 72 +++++++++++-------- 4 files changed, 50 insertions(+), 29 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/common/const.py b/debug/accuracy_tools/msprobe/core/common/const.py index df82455a67..85d5c65e51 100644 --- a/debug/accuracy_tools/msprobe/core/common/const.py +++ b/debug/accuracy_tools/msprobe/core/common/const.py @@ -45,6 +45,7 @@ class Const: PT_SUFFIX = ".pt" ONE_GB = 1073741824 # 1 * 1024 * 1024 * 1024 TEN_GB = 10737418240 # 10 * 1024 * 1024 * 1024 + ONE_MB = 1048576 # 1 * 1024 * 1024 FILE_PATTERN = r'^[a-zA-Z0-9_./-]+$' DISTRIBUTED_PREFIX_LENGTH = 60 # env dump path diff --git a/debug/accuracy_tools/msprobe/pytorch/free_benchmark/common/constant.py b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/common/constant.py index e737e7b217..08ac3dc668 100644 --- a/debug/accuracy_tools/msprobe/pytorch/free_benchmark/common/constant.py +++ b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/common/constant.py @@ -60,6 +60,8 @@ class ThresholdConfig: torch.bfloat16: BenchmarkThd(2**-8, 1.0, 2**-8, 1e-4), } + TENSOR_SPLIT_MAX_CHUNK = 128 + class PreheatConfig: IF_PREHEAT = "if_preheat" diff --git a/debug/accuracy_tools/msprobe/pytorch/free_benchmark/common/utils.py b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/common/utils.py index ddcbd9d0f5..1aa0998642 100644 --- a/debug/accuracy_tools/msprobe/pytorch/free_benchmark/common/utils.py +++ b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/common/utils.py @@ -96,3 +96,7 @@ class TorchC: add = torch._C._VariableFunctionsClass.add bitwise_xor = torch._C._VariableFunctionsClass.bitwise_xor clone = torch._C._VariableFunctionsClass.clone + clamp = torch._C._VariableFunctionsClass.clamp + tensor_split = torch._C._VariableFunctionsClass.tensor_split + tensor = torch._C._VariableFunctionsClass.tensor + reshape = torch._C._VariableFunctionsClass.reshape diff --git a/debug/accuracy_tools/msprobe/pytorch/free_benchmark/result_handlers/base_handler.py b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/result_handlers/base_handler.py index 1728b096f5..945a9c43ae 100644 --- a/debug/accuracy_tools/msprobe/pytorch/free_benchmark/result_handlers/base_handler.py +++ b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/result_handlers/base_handler.py @@ -1,6 +1,7 @@ import math from abc import ABC, abstractmethod from typing import Any, Optional, Tuple +import numpy as np import torch from msprobe.core.common.const import Const @@ -34,15 +35,36 @@ class FuzzHandler(ABC): origin_ouput = origin_ouput.values perturbed_output = perturbed_output.values if hasattr(perturbed_output, "dtype"): - abs_tol = ThresholdConfig.ABS_TOL_VALUE_DICT.get(perturbed_output.dtype) + abs_tol = ThresholdConfig.ABS_TOL_VALUE_DICT.get(perturbed_output.dtype, FuzzThreshold.F32_THD) else: - abs_tol = FuzzThreshold.F32_THD.value + abs_tol = FuzzThreshold.F32_THD return ( origin_ouput.to(perturbed_output.dtype).to(perturbed_output.device), perturbed_output, abs_tol, ) + @staticmethod + def tensor_split_for_error_calculate(origin_output, perturbed_output): + """ + 对将投入误差值计算的扰动前后输出张量进行分块 + :param origin_output: 原始输出 + :param perturbed_output: 扰动后输出 + :return origin_output_chunks: 切块后原始输出列表 + :return perturbed_output_chunks: 切块后扰动后输出列表 + """ + single_output_mem = origin_output.element_size() * origin_output.nelement() / Const.ONE_MB + if single_output_mem == 0 or origin_output.ndim == 0: + return [origin_output], [perturbed_output] + # 张量大小和批数之间的关系:chunks_exp=math.log(M,2)-4, chunks=2**chunks_exp (M为对比张量数据大小[Mb]) + chunks_exp = int(math.log(single_output_mem, 2)) - 4 + chunks = 2 ** chunks_exp + chunks = max(chunks, 1) + chunks = min(chunks, ThresholdConfig.TENSOR_SPLIT_MAX_CHUNK) + origin_output_chunks = TorchC.tensor_split(TorchC.reshape(origin_output, (-1,)), chunks) + perturbed_output_chunks = TorchC.tensor_split(TorchC.reshape(perturbed_output, (-1,)), chunks) + return origin_output_chunks, perturbed_output_chunks + @staticmethod def convert_overflow_ratio_to_consistent(ratio): if math.isnan(ratio) or math.isinf(ratio): @@ -61,36 +83,28 @@ class FuzzHandler(ABC): self, origin_output, perturbed_output, norm_type, abs_tol ): if norm_type == NormType.ENDLESS_NORM: - return self.get_endless_norm(origin_output, perturbed_output, abs_tol) + return self.calculate_error(origin_output, perturbed_output, abs_tol) return ThresholdConfig.COMP_CONSISTENT - def get_endless_norm(self, origin_output, perturbed_output, abs_tol): - ratio_tensor1 = TorchC.where( - TorchC.gt(TorchC.abs(perturbed_output), abs_tol), - TorchC.div( - TorchC.abs(origin_output), - TorchC.add(TorchC.abs(perturbed_output), abs_tol), - ), - 1, - ) - ratio_tensor2 = TorchC.where( - TorchC.gt(TorchC.abs(origin_output), abs_tol), - TorchC.div( - TorchC.abs(perturbed_output), - TorchC.add(TorchC.abs(origin_output), abs_tol), - ), - 1, - ) + def calculate_error(self, origin_output, perturbed_output, abs_tol): + origin_output_chunks, perturbed_output_chunks = self.tensor_split_for_error_calculate(origin_output, perturbed_output) + norm1 = -np.inf + norm2 = -np.inf + norm3 = np.inf + for i, chunk_origin in enumerate(origin_output_chunks): + if chunk_origin.nelement() == 0: + break + chunk_perturbed = perturbed_output_chunks[i] + ratio_tensor1 = TorchC.where(TorchC.abs(chunk_perturbed) > abs_tol, + TorchC.div(TorchC.clamp(chunk_origin, min=abs_tol), TorchC.clamp(chunk_perturbed, min=abs_tol)), 1) + ratio_tensor2 = TorchC.where(TorchC.abs(chunk_origin) > abs_tol, + TorchC.div(TorchC.clamp(chunk_perturbed, min=abs_tol), TorchC.clamp(chunk_origin, min=abs_tol)), 1) + norm_values = TorchC.tensor([TorchC.max(ratio_tensor1), TorchC.max(ratio_tensor2)]) + max_ratio1, max_ratio2 = norm_values.tolist() + norm1 = max(norm1, self.convert_overflow_ratio_to_consistent(max_ratio1)) + norm2 = max(norm2, self.convert_overflow_ratio_to_consistent(max_ratio2)) + norm3 = min(norm3, self.convert_overflow_ratio_to_consistent(max_ratio1)) - norm1 = self.convert_overflow_ratio_to_consistent( - TorchC.max(ratio_tensor1).item() - ) - norm2 = self.convert_overflow_ratio_to_consistent( - TorchC.max(ratio_tensor2).item() - ) - norm3 = self.convert_overflow_ratio_to_consistent( - TorchC.min(ratio_tensor1).item() - ) if norm3 < 0: ratio = ThresholdConfig.SYMBOL_FLIPPING else: -- Gitee From ed264708ccd0f66ea194d3266e10c8d347caadb6 Mon Sep 17 00:00:00 2001 From: cai-weiwei1989 <734267852@qq.com> Date: Thu, 25 Jul 2024 20:32:05 +0800 Subject: [PATCH 079/106] =?UTF-8?q?[msprobe]=E8=BF=AD=E4=BB=A3=E4=B8=80?= =?UTF-8?q?=E5=8F=91=E5=8C=85=E9=93=BE=E6=8E=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- debug/accuracy_tools/msprobe/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/debug/accuracy_tools/msprobe/README.md b/debug/accuracy_tools/msprobe/README.md index 1e8c1a1f08..fb47846c35 100644 --- a/debug/accuracy_tools/msprobe/README.md +++ b/debug/accuracy_tools/msprobe/README.md @@ -26,6 +26,7 @@ MindStudio精度调试工具(MindStudio Probe),简称msprobe,是MindStud | 版本 | 发布日期 | 支持PyTorch版本 | 下载链接 | 校验码 | | ----- | ---------- | --------------- | ------------------------------------------------------------ | ------------------------------------------------------------ | + | 1.0.1 | 2024-07-25 | 2.0/2.1/2.2 | [mindstudio_probe-1.0.1-py3-none-any.whl](https://ptdbg.obs.myhuaweicloud.com/msprobe/1.0/mindstudio_probe-1.0.1-py3-none-any.whl) | b699e224e4d4e3bcf9412c54fa858a1ee370f0d7a2bc69cb3f1273ac14a6dc82 | | 1.0 | 2024-07-09 | 2.0/2.1/2.2 | [ascend_training_accuracy_tools-1.0-py3-none-any.whl](https://ptdbg.obs.myhuaweicloud.com/att/1.0/ascend_training_accuracy_tools-1.0-py3-none-any.whl) | 5016dfe886c5d340ec6f60a959673355855f313c91f100680da814efb49f8e81 | | 0.0.3 | 2024-06-11 | 2.0/2.1/2.2 | [ascend_training_accuracy_tools-0.0.3-py3-none-any.whl](https://ptdbg.obs.myhuaweicloud.com/att/0.0/ascend_training_accuracy_tools-0.0.3-py3-none-any.whl) | f46d9714704859e2d67861a65bbb3c76b0a250cf6e238b978b5b959ab1fe125a | | 0.0.2 | 2024-05-23 | 2.0/2.1/2.2 | [ascend_training_accuracy_tools-0.0.2-py3-none-any.whl](https://ptdbg.obs.myhuaweicloud.com/att/0.0/ascend_training_accuracy_tools-0.0.2-py3-none-any.whl) | 2e35809bde559e9c4d2f16a02ccde779ed9e436bb65fded0b7ebaf6ac2c88d93 | -- Gitee From b2d607d1fb9ad9c494f8d1a9dae7ac808c828abf Mon Sep 17 00:00:00 2001 From: wuyuhan Date: Thu, 25 Jul 2024 20:41:08 +0800 Subject: [PATCH 080/106] =?UTF-8?q?=E8=AF=86=E5=88=ABAICPU=E4=BC=98?= =?UTF-8?q?=E5=85=88=E8=AF=BB=E5=8F=96kernel=5Fdetails.csv,=20=E5=85=B6?= =?UTF-8?q?=E6=AC=A1=E8=AF=BB=E5=8F=96op=5Fsummary=5Fxxx.csv?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../config/profiling_data_version_config.yaml | 17 ++++++------- .../dataset/profiling/profiling_dataset.py | 9 +++++-- .../dataset/profiling/profiling_parser.py | 24 ++++++++++++------- profiler/advisor/utils/utils.py | 12 +++++++++- 4 files changed, 42 insertions(+), 20 deletions(-) diff --git a/profiler/advisor/config/profiling_data_version_config.yaml b/profiler/advisor/config/profiling_data_version_config.yaml index 4ef76105a0..b8c92fe074 100644 --- a/profiler/advisor/config/profiling_data_version_config.yaml +++ b/profiler/advisor/config/profiling_data_version_config.yaml @@ -1,18 +1,19 @@ versions: - version: 8.0.RC1 dirs_pattern: + ASCEND_PROFILER_OUTPUT: [ op_summary ] ^PROF_\d{6}_\d{17}_\w+$: - mindstudio_profiler_output: - [ op_summary, msprof ] + mindstudio_profiler_output: [ op_summary, msprof ] class_attr: op_summary: OpSummary msprof: Msprof file_attr: - op_summary: ^op_summary_\d{14}\.csv$ msprof: ^msprof_\d{14}\.json$ + op_summary: [ kernel_details.csv, '^op_summary_\d{14}\.csv$' ] - version: 7.0.0 dirs_pattern: + ASCEND_PROFILER_OUTPUT: [ op_summary ] ^PROF_\d{6}_\d{17}_\w+$: ^device_\d+$: summary: @@ -28,13 +29,14 @@ versions: msprof: Msprof ge_info: GeInfo file_attr: - op_summary: ^op_summary_\d+_\d+_\d{14}\.csv$ + op_summary: [ kernel_details.csv, '^op_summary_\d+_\d+_\d{14}\.csv$'] task_time: ^task_time_\d+_\d+_\d{14}\.json$ msprof: ^msprof_\d+_\d+_\d{14}\.json$ ge_info: ge_info.db - version: 7.0.RC1 dirs_pattern: + ASCEND_PROFILER_OUTPUT: [ op_summary ] ^PROF_\d{6}_\d{17}_\w+$: ^device_\d+$: summary: @@ -50,13 +52,14 @@ versions: msprof: Msprof ge_info: GeInfo file_attr: - op_summary: ^op_summary_\d+_\d+_\d+_\d{14}\.csv$ + op_summary: [ kernel_details.csv, '^op_summary_\d+_\d+_\d+_\d{14}\.csv$'] task_time: ^task_time_\d+_\d+_\d+_\d{14}\.json$ msprof: ^msprof_\d+_\d+_\d+_\d{14}\.json$ ge_info: ge_info.db - version: 6.3.RC2 dirs_pattern: + ASCEND_PROFILER_OUTPUT: [ op_summary ] ^PROF_\d{6}_\d{17}_\w+$: ^device_\d+$: summary: @@ -72,9 +75,7 @@ versions: msprof: Msprof ge_info: GeInfo file_attr: - op_summary: ^op_summary_\d+_\d+\.csv$ + op_summary: [ kernel_details.csv, '^op_summary_\d+_\d+\.csv$'] task_time: ^task_time_\d+_\d+\.json$ msprof: ^msprof_\d+_\d+\.json$ ge_info: ge_info.db - - diff --git a/profiler/advisor/dataset/profiling/profiling_dataset.py b/profiler/advisor/dataset/profiling/profiling_dataset.py index 99a19d3b60..4f7eb305bc 100644 --- a/profiler/advisor/dataset/profiling/profiling_dataset.py +++ b/profiler/advisor/dataset/profiling/profiling_dataset.py @@ -43,14 +43,19 @@ class ProfilingDataset(Dataset): self.build_from_pattern(value, join_prof_path(current_path, key)) elif isinstance(dirs_pattern, list): for item in dirs_pattern: + if hasattr(self, item) and getattr(self, item): + # 避免重复构建kernel_details.csv, op_summary.csv的数据对象 + continue + file_pattern = self.current_version_pattern.get('file_attr').get(item) data_class = globals()[self.current_version_pattern.get('class_attr').get(item)] - data_class.FILE_PATTERN = self.current_version_pattern.get('file_attr').get(item) + data_class.FILE_PATTERN = file_pattern data_object = data_class(current_path) is_success = data_object.parse_data() if is_success: setattr(self, item, data_object) else: - logger.warning("Skip parse %s from local path %s", self.current_version_pattern.get('class_attr').get(item), current_path) + logger.warning("Skip parse %s with file pattern %s from local path %s", + self.current_version_pattern.get('class_attr').get(item), file_pattern, current_path) else: logger.warning(f"Unsupported arguments : %s to build %s", dirs_pattern, self.__class__.__name__) diff --git a/profiler/advisor/dataset/profiling/profiling_parser.py b/profiler/advisor/dataset/profiling/profiling_parser.py index bb4caeb29e..3fe7bcd676 100644 --- a/profiler/advisor/dataset/profiling/profiling_parser.py +++ b/profiler/advisor/dataset/profiling/profiling_parser.py @@ -37,15 +37,21 @@ class ProfilingParser: return False def _parse_from_file(self): - file_list = get_file_path_from_directory(self._path, self.file_match_func(self.FILE_PATTERN)) - if not file_list: - return False - ## get last file - file = file_list[-1] - self.FILE_PATH = file - if len(file_list) > 1: - logger.warning("Multiple copies of %s were found, use %s", self.FILE_INFO, file) - return self.parse_from_file(file) + + if not isinstance(self.FILE_PATTERN, list): + self.FILE_PATTERN = [self.FILE_PATTERN] + + for file_pattern in self.FILE_PATTERN: + file_list = get_file_path_from_directory(self._path, self.file_match_func(file_pattern)) + if not file_list: + continue + ## get last file + file = file_list[-1] + self.FILE_PATH = file + if len(file_list) > 1: + logger.warning("Multiple copies of %s were found, use %s", self.FILE_INFO, file) + return self.parse_from_file(file) + return False @staticmethod def get_float(data) -> float: diff --git a/profiler/advisor/utils/utils.py b/profiler/advisor/utils/utils.py index b373d7bada..3488e7dfff 100644 --- a/profiler/advisor/utils/utils.py +++ b/profiler/advisor/utils/utils.py @@ -414,7 +414,17 @@ def format_excel_title(title: str) -> str: title = title.replace("(ns)", '') title = title.replace("(%)", '') title = title.replace(" ", "_") - return title + + # 将kernel_details中的列名转为与op_summary_x.csv中一致 + kernel_details_col_name_map = dict( + name="op_name", + type="op_type", + accelerator_core="task_type", + start_time="task_start_time", + duration="task_duration", + wait_time="wait_time" + ) + return kernel_details_col_name_map.get(title, title) def format_float(num: float) -> float: -- Gitee From 69c391d1067bbb0c988927a7494fd44e59416341 Mon Sep 17 00:00:00 2001 From: qianzhengxin Date: Fri, 26 Jul 2024 09:54:37 +0800 Subject: [PATCH 081/106] graph mode grad_tool incorrect output bugfix --- debug/accuracy_tools/grad_tool/grad_ms/grad_analyzer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debug/accuracy_tools/grad_tool/grad_ms/grad_analyzer.py b/debug/accuracy_tools/grad_tool/grad_ms/grad_analyzer.py index 75280b3194..eb6a28aa95 100644 --- a/debug/accuracy_tools/grad_tool/grad_ms/grad_analyzer.py +++ b/debug/accuracy_tools/grad_tool/grad_ms/grad_analyzer.py @@ -35,7 +35,7 @@ def grad_dump(dump_dir: str, g_name: str, dump_step: Parameter, grad: ms.Tensor, level2: [step, max, min, norm, shape_dim, shape] + grad_bool_data level3: [step, max, min, norm, shape_dim, shape, dist_dim, dist] + grad_bool_data ''' - dump_path = dump_dir + g_name + dump_path = os.path.join(dump_dir, g_name) dump_dir_path = dump_path + "_dir" save_op = ms.ops.TensorDump() -- Gitee From d49b5e1c7cb3015fb55f277a7fb03ff830a22051 Mon Sep 17 00:00:00 2001 From: gitee Date: Fri, 26 Jul 2024 10:28:21 +0800 Subject: [PATCH 082/106] fix bug --- .../pytorch/api_accuracy_checker/run_ut/multi_run_ut.py | 8 ++++---- .../msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py | 8 ++------ 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py index 9c96a52d8b..8f1aa5b73a 100644 --- a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +++ b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py @@ -68,7 +68,7 @@ signal.signal(signal.SIGTERM, signal_handler) ParallelUTConfig = namedtuple('ParallelUTConfig', ['api_files', 'out_path', 'num_splits', 'save_error_data_flag', 'jit_compile_flag', 'device_id', - 'result_csv_path', 'total_items', 'real_data_path']) + 'result_csv_path', 'total_items', 'config_path']) def run_parallel_ut(config): @@ -90,7 +90,7 @@ def run_parallel_ut(config): *(['-j'] if config.jit_compile_flag else []), *(['-save_error_data'] if config.save_error_data_flag else []), '-csv_path', config.result_csv_path, - *(['-real_data_path', config.real_data_path] if config.real_data_path else []) + *(['-config', config.config_path] if config.config_path else []) ] return cmd @@ -175,7 +175,7 @@ def prepare_config(args): out_path_checker = FileChecker(out_path, FileCheckConst.DIR, ability=FileCheckConst.WRITE_ABLE) out_path = out_path_checker.common_check() split_files, total_items = split_json_file(api_info, args.num_splits, args.filter_api) - + config_path = os.path.realpath(args.config_path) if args.config_path else None result_csv_path = args.result_csv_path or os.path.join(out_path, f"accuracy_checking_result_{time.strftime('%Y%m%d%H%M%S')}.csv") if not args.result_csv_path: details_csv_path = os.path.join(out_path, f"accuracy_checking_details_{time.strftime('%Y%m%d%H%M%S')}.csv") @@ -187,7 +187,7 @@ def prepare_config(args): logger.info(f"UT task details will be saved in {details_csv_path}") return ParallelUTConfig(split_files, out_path, args.num_splits, args.save_error_data, args.jit_compile, args.device_id, result_csv_path, - total_items, args.real_data_path) + total_items, config_path) def main(): diff --git a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py index 30994f7094..a8ff9b599e 100644 --- a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +++ b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py @@ -379,10 +379,6 @@ def _run_ut_parser(parser): help=" The path of accuracy_checking_result_{timestamp}.csv, " "when run ut is interrupted, enter the file path to continue run ut.", required=False) - parser.add_argument("-real_data_path", dest="real_data_path", nargs="?", const="", default="", type=str, - help=" In real data mode, the root directory for storing real data " - "must be configured.", - required=False) parser.add_argument("-f", "--filter_api", dest="filter_api", action="store_true", help=" Whether to filter the api in the api_info_file.", required=False) parser.add_argument("-config", "--config_path", dest="config_path", default="", type=str, @@ -400,9 +396,9 @@ def preprocess_forward_content(forward_content): if key not in arg_cache: filtered_new_args = [ {k: v for k, v in arg.items() if k not in ['Max', 'Min']} - for arg in value['args'] if isinstance(arg, dict) + for arg in value['input_args'] if isinstance(arg, dict) ] - arg_cache[key] = (filtered_new_args, value['kwargs']) + arg_cache[key] = (filtered_new_args, value['input_kwargs']) filtered_new_args, new_kwargs = arg_cache[key] -- Gitee From 29a07c6876ca53216f220c7ee99b880c86ef8580 Mon Sep 17 00:00:00 2001 From: gitee Date: Fri, 26 Jul 2024 10:52:26 +0800 Subject: [PATCH 083/106] fix bug --- .../api_accuracy_checker/run_ut/test_multi_run_ut.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/test_multi_run_ut.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/test_multi_run_ut.py index 771e042380..27126cdddd 100644 --- a/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/test_multi_run_ut.py +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/test_multi_run_ut.py @@ -48,7 +48,7 @@ class TestMultiRunUT(unittest.TestCase): device_id=[0, 1], result_csv_path='result.csv', total_items=2, - real_data_path=None + config_path=None ) mock_file.side_effect = [ @@ -81,7 +81,7 @@ class TestMultiRunUT(unittest.TestCase): args.jit_compile = False args.device_id = [0, 1] args.result_csv_path = None - args.real_data_path = None + args.config_path = None config = prepare_config(args) -- Gitee From 6a57435a1de01683f50eb51a095f52fc30d64602 Mon Sep 17 00:00:00 2001 From: stby <295887736@qq.com> Date: Fri, 26 Jul 2024 11:04:05 +0800 Subject: [PATCH 084/106] add api and kernel cli switch --- profiler/cli/compare_cli.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/profiler/cli/compare_cli.py b/profiler/cli/compare_cli.py index e794578da8..f9add948ea 100644 --- a/profiler/cli/compare_cli.py +++ b/profiler/cli/compare_cli.py @@ -32,6 +32,8 @@ from profiler.compare_tools.compare_backend.comparison_generator import Comparis @click.option('--enable_operator_compare', is_flag=True) @click.option('--enable_memory_compare', is_flag=True) @click.option('--enable_communication_compare', is_flag=True) +@click.option('--enable_api_compare', is_flag=True) +@click.option('--enable_kernel_compare', is_flag=True) @click.option('--disable_details', is_flag=True) @click.option('--output_path', '-o', 'output_path', type=click.Path()) @click.option('--max_kernel_num', 'max_kernel_num', type=int, help="The number of kernels per torch op is limited.") -- Gitee From cdb0c06fd4d3e0bba150ff078214e0514ad5a339 Mon Sep 17 00:00:00 2001 From: wugengjun <451676383@qq.com> Date: Fri, 26 Jul 2024 11:38:31 +0800 Subject: [PATCH 085/106] bugfix --- .../msprobe/pytorch/free_benchmark/common/utils.py | 2 +- .../pytorch/free_benchmark/result_handlers/base_handler.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/debug/accuracy_tools/msprobe/pytorch/free_benchmark/common/utils.py b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/common/utils.py index 1aa0998642..631beeb85c 100644 --- a/debug/accuracy_tools/msprobe/pytorch/free_benchmark/common/utils.py +++ b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/common/utils.py @@ -98,5 +98,5 @@ class TorchC: clone = torch._C._VariableFunctionsClass.clone clamp = torch._C._VariableFunctionsClass.clamp tensor_split = torch._C._VariableFunctionsClass.tensor_split - tensor = torch._C._VariableFunctionsClass.tensor + stack = torch._C._VariableFunctionsClass.stack reshape = torch._C._VariableFunctionsClass.reshape diff --git a/debug/accuracy_tools/msprobe/pytorch/free_benchmark/result_handlers/base_handler.py b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/result_handlers/base_handler.py index 945a9c43ae..e36f586735 100644 --- a/debug/accuracy_tools/msprobe/pytorch/free_benchmark/result_handlers/base_handler.py +++ b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/result_handlers/base_handler.py @@ -99,7 +99,7 @@ class FuzzHandler(ABC): TorchC.div(TorchC.clamp(chunk_origin, min=abs_tol), TorchC.clamp(chunk_perturbed, min=abs_tol)), 1) ratio_tensor2 = TorchC.where(TorchC.abs(chunk_origin) > abs_tol, TorchC.div(TorchC.clamp(chunk_perturbed, min=abs_tol), TorchC.clamp(chunk_origin, min=abs_tol)), 1) - norm_values = TorchC.tensor([TorchC.max(ratio_tensor1), TorchC.max(ratio_tensor2)]) + norm_values = TorchC.stack([TorchC.max(ratio_tensor1), TorchC.max(ratio_tensor2)]) max_ratio1, max_ratio2 = norm_values.tolist() norm1 = max(norm1, self.convert_overflow_ratio_to_consistent(max_ratio1)) norm2 = max(norm2, self.convert_overflow_ratio_to_consistent(max_ratio2)) -- Gitee From 039408adbfb9c89bf2ef92dcaa8b9eb5085aec35 Mon Sep 17 00:00:00 2001 From: stby <295887736@qq.com> Date: Fri, 26 Jul 2024 11:52:48 +0800 Subject: [PATCH 086/106] add gpu adapt --- .../profiling_parser/base_profiling_parser.py | 9 +++++---- .../profiling_parser/gpu_profiling_parser.py | 2 ++ 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/profiler/compare_tools/compare_backend/profiling_parser/base_profiling_parser.py b/profiler/compare_tools/compare_backend/profiling_parser/base_profiling_parser.py index 625eee7c60..9daaa55ef1 100644 --- a/profiler/compare_tools/compare_backend/profiling_parser/base_profiling_parser.py +++ b/profiler/compare_tools/compare_backend/profiling_parser/base_profiling_parser.py @@ -303,7 +303,7 @@ class BaseProfilingParser(ABC): task_index += 1 def _check_result_data(self): - if self._enable_operator_compare or self._enable_memory_compare: + if self._enable_operator_compare or self._enable_memory_compare or self._enable_api_compare: if not self._result_data.torch_op_data: print(f"[WARNING] Can't find any torch op in the file: {self._profiling_path}") if self._enable_operator_compare and not self._result_data.kernel_dict: @@ -312,10 +312,11 @@ class BaseProfilingParser(ABC): print(f"[WARNING] Can't find any memory event in the file: {self._profiling_path}") if self._enable_communication_compare and not self._result_data.communication_dict: print(f"[WARNING] Can't find any communication op in the file: {self._profiling_path}") - if self._enable_api_compare and not self._result_data.torch_op_data: - print(f"[WARNING] Can't find any torch op in the file: {self._profiling_path}") if self._enable_kernel_compare and not self._result_data.kernel_details: - print(f"[WARNING] Can't find any kernel details in the file: {self._profiling_path}") + if self._profiling_type == Constant.GPU: + print(f"[WARNING] kernel compare between GPU data and NPU data is not supported.") + else: + print(f"[WARNING] Can't find any kernel details in the file: {self._profiling_path}") def _read_trace_event(self): try: diff --git a/profiler/compare_tools/compare_backend/profiling_parser/gpu_profiling_parser.py b/profiler/compare_tools/compare_backend/profiling_parser/gpu_profiling_parser.py index ea732a60e9..0aeeba83ef 100644 --- a/profiler/compare_tools/compare_backend/profiling_parser/gpu_profiling_parser.py +++ b/profiler/compare_tools/compare_backend/profiling_parser/gpu_profiling_parser.py @@ -174,6 +174,8 @@ class GPUProfilingParser(BaseProfilingParser): func_set.add(self._picking_memory_event) if self._enable_profiling_compare: func_set.add(self._picking_flow_event) + if self._enable_api_compare: + func_set.add(self._picking_torch_op_event) return list(func_set) def _infer_compute_stream_id(self): -- Gitee From 9071f6d4a4ecf58da74853e2c8f9502a2d3c5318 Mon Sep 17 00:00:00 2001 From: gitee Date: Fri, 26 Jul 2024 15:45:36 +0800 Subject: [PATCH 087/106] Security issues fixed --- .../accuracy_tools/api_accuracy_checker/common/utils.py | 6 +++++- .../api_accuracy_checker/run_ut/multi_run_ut.py | 2 +- .../api_accuracy_checker/tensor_transport_layer/attl.py | 9 ++++++--- .../pytorch/api_accuracy_checker/run_ut/multi_run_ut.py | 2 +- 4 files changed, 13 insertions(+), 6 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/common/utils.py b/debug/accuracy_tools/api_accuracy_checker/common/utils.py index 76d117afb4..83b73e90f9 100644 --- a/debug/accuracy_tools/api_accuracy_checker/common/utils.py +++ b/debug/accuracy_tools/api_accuracy_checker/common/utils.py @@ -634,7 +634,11 @@ def initialize_save_path(save_path, dir_name): def write_pt(file_path, tensor): if os.path.exists(file_path): raise ValueError(f"File {file_path} already exists") - torch.save(tensor, file_path) + try: + torch.save(tensor, file_path) + except Exception as e: + error_message = "An unexpected error occurred: %s when saving tensor to %s" % (str(e), file_path) + print_error_log(error_message) full_path = os.path.realpath(file_path) file_check_util.change_mode(full_path, FileCheckConst.DATA_FILE_AUTHORITY) return full_path diff --git a/debug/accuracy_tools/api_accuracy_checker/run_ut/multi_run_ut.py b/debug/accuracy_tools/api_accuracy_checker/run_ut/multi_run_ut.py index df6c99a567..f2fdec494b 100644 --- a/debug/accuracy_tools/api_accuracy_checker/run_ut/multi_run_ut.py +++ b/debug/accuracy_tools/api_accuracy_checker/run_ut/multi_run_ut.py @@ -89,7 +89,7 @@ def run_parallel_ut(config): def update_progress_bar(progress_bar, result_csv_path): while any(process.poll() is None for process in processes): try: - with open(result_csv_path, 'r') as result_file: + with FileOpen(result_csv_path, 'r') as result_file: completed_items = len(result_file.readlines()) - 1 progress_bar.update(completed_items - progress_bar.n) except FileNotFoundError: diff --git a/debug/accuracy_tools/api_accuracy_checker/tensor_transport_layer/attl.py b/debug/accuracy_tools/api_accuracy_checker/tensor_transport_layer/attl.py index 0b91d2bbc8..34a3cbd12f 100644 --- a/debug/accuracy_tools/api_accuracy_checker/tensor_transport_layer/attl.py +++ b/debug/accuracy_tools/api_accuracy_checker/tensor_transport_layer/attl.py @@ -12,7 +12,7 @@ import torch from api_accuracy_checker.tensor_transport_layer.client import TCPClient from api_accuracy_checker.tensor_transport_layer.server import TCPServer -from api_accuracy_checker.common.utils import logger +from api_accuracy_checker.common.utils import logger, check_file_or_directory_path from ptdbg_ascend.src.python.ptdbg_ascend.common.utils import remove_path @@ -138,8 +138,10 @@ class ATTL: file_path = os.path.join(self.session_config.nfs_path, buffer.name + ".pt") else: file_path = os.path.join(self.session_config.nfs_path, buffer + f"_{int(time.time())}") - - torch.save(buffer, file_path) + try: + torch.save(buffer, file_path) + except Exception as e: + self.logger.error("there is something error. please check it. %s", e) def download(self): for file_type in ("start*", "*.pt", "end*"): @@ -150,6 +152,7 @@ class ATTL: if cur_file is None: return None else: + check_file_or_directory_path(cur_file) buffer = torch.load(cur_file) remove_path(cur_file) return buffer diff --git a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py index 9c96a52d8b..931ce56419 100644 --- a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +++ b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py @@ -111,7 +111,7 @@ def run_parallel_ut(config): def update_progress_bar(progress_bar, result_csv_path): while any(process.poll() is None for process in processes): try: - with open(result_csv_path, 'r') as result_file: + with FileOpen(result_csv_path, 'r') as result_file: completed_items = len(result_file.readlines()) - 1 progress_bar.update(completed_items - progress_bar.n) except FileNotFoundError: -- Gitee From 50ac71e0aa55c1a390286ada80f4ccac03610d07 Mon Sep 17 00:00:00 2001 From: gitee Date: Fri, 26 Jul 2024 16:10:50 +0800 Subject: [PATCH 088/106] safe issue fix --- .../api_accuracy_checker/run_ut/data_generate.py | 8 +++++--- .../tensor_transport_layer/attl.py | 5 +++-- .../api_accuracy_checker/run_ut/data_generate.py | 10 ++++++---- 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/run_ut/data_generate.py b/debug/accuracy_tools/api_accuracy_checker/run_ut/data_generate.py index 67dc5ad253..5781164839 100644 --- a/debug/accuracy_tools/api_accuracy_checker/run_ut/data_generate.py +++ b/debug/accuracy_tools/api_accuracy_checker/run_ut/data_generate.py @@ -20,9 +20,10 @@ import math import torch import numpy -from api_accuracy_checker.common.utils import Const, check_file_or_directory_path, check_object_type, print_warn_log, \ - print_error_log, get_full_data_path, CompareException +from api_accuracy_checker.common.utils import Const, check_object_type, print_warn_log, print_error_log, \ + get_full_data_path, CompareException from api_accuracy_checker.run_ut.run_ut_utils import hf_32_standard_api +from ptdbg_ascend.src.python.ptdbg_ascend.common.file_check_util import FileCheckConst, FileChecker TORCH_TYPE = ["torch.device", "torch.dtype"] TENSOR_DATA_LIST = ["torch.Tensor", "torch.nn.parameter.Parameter"] @@ -83,7 +84,8 @@ def gen_real_tensor(data_path, convert_type): convert_type: convert ori_type to dist_type flag. """ data_path = os.path.realpath(data_path) - check_file_or_directory_path(data_path) + data_path_checker = FileChecker(data_path, FileCheckConst.FILE, ability=FileCheckConst.READ_ABLE) + data_path = data_path_checker.common_check() if not data_path.endswith('.pt') and not data_path.endswith('.npy'): error_info = f"The file: {data_path} is not a pt or numpy file." raise CompareException(CompareException.INVALID_FILE_ERROR, error_info) diff --git a/debug/accuracy_tools/api_accuracy_checker/tensor_transport_layer/attl.py b/debug/accuracy_tools/api_accuracy_checker/tensor_transport_layer/attl.py index 34a3cbd12f..995d202886 100644 --- a/debug/accuracy_tools/api_accuracy_checker/tensor_transport_layer/attl.py +++ b/debug/accuracy_tools/api_accuracy_checker/tensor_transport_layer/attl.py @@ -12,7 +12,7 @@ import torch from api_accuracy_checker.tensor_transport_layer.client import TCPClient from api_accuracy_checker.tensor_transport_layer.server import TCPServer -from api_accuracy_checker.common.utils import logger, check_file_or_directory_path +from ptdbg_ascend.src.python.ptdbg_ascend.common.file_check_util import FileCheckConst, FileChecker from ptdbg_ascend.src.python.ptdbg_ascend.common.utils import remove_path @@ -152,7 +152,8 @@ class ATTL: if cur_file is None: return None else: - check_file_or_directory_path(cur_file) + cur_file_checker = FileChecker(cur_file, FileCheckConst.FILE, ability=FileCheckConst.READ_ABLE) + cur_file = cur_file_checker.common_check() buffer = torch.load(cur_file) remove_path(cur_file) return buffer diff --git a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/data_generate.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/data_generate.py index f495cd673d..b103643c08 100644 --- a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/data_generate.py +++ b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/data_generate.py @@ -21,10 +21,11 @@ import torch import numpy from msprobe.pytorch.api_accuracy_checker.run_ut.run_ut_utils import hf_32_standard_api -from msprobe.pytorch.api_accuracy_checker.common.utils import check_file_or_directory_path, check_object_type, \ - get_full_data_path, CompareException +from msprobe.pytorch.api_accuracy_checker.common.utils import check_object_type, get_full_data_path, \ + CompareException +from msprobe.core.common.file_check import FileChecker from msprobe.pytorch.common.log import logger -from msprobe.core.common.const import Const +from msprobe.core.common.const import Const, FileCheckConst TORCH_TYPE = ["torch.device", "torch.dtype"] TENSOR_DATA_LIST = ["torch.Tensor", "torch.nn.parameter.Parameter"] @@ -87,7 +88,8 @@ def gen_real_tensor(data_path, convert_type): convert_type: convert ori_type to dist_type flag. """ data_path = os.path.realpath(data_path) - check_file_or_directory_path(data_path) + data_path_checker = FileChecker(data_path, FileCheckConst.FILE, ability=FileCheckConst.READ_ABLE) + data_path = data_path_checker.common_check() if not data_path.endswith('.pt') and not data_path.endswith('.npy'): error_info = f"The file: {data_path} is not a pt or numpy file." raise CompareException(CompareException.INVALID_FILE_ERROR, error_info) -- Gitee From 4703e86edadfe694b3ca6909c38043a80114d462 Mon Sep 17 00:00:00 2001 From: gitee Date: Fri, 26 Jul 2024 16:54:09 +0800 Subject: [PATCH 089/106] bugfix --- .../api_accuracy_checker/run_ut/multi_run_ut.py | 11 +++-------- .../tensor_transport_layer/attl.py | 1 + .../api_accuracy_checker/run_ut/multi_run_ut.py | 11 +++-------- 3 files changed, 7 insertions(+), 16 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/run_ut/multi_run_ut.py b/debug/accuracy_tools/api_accuracy_checker/run_ut/multi_run_ut.py index f2fdec494b..0ab8073937 100644 --- a/debug/accuracy_tools/api_accuracy_checker/run_ut/multi_run_ut.py +++ b/debug/accuracy_tools/api_accuracy_checker/run_ut/multi_run_ut.py @@ -88,14 +88,9 @@ def run_parallel_ut(config): def update_progress_bar(progress_bar, result_csv_path): while any(process.poll() is None for process in processes): - try: - with FileOpen(result_csv_path, 'r') as result_file: - completed_items = len(result_file.readlines()) - 1 - progress_bar.update(completed_items - progress_bar.n) - except FileNotFoundError: - print_warn_log(f"Result CSV file not found: {result_csv_path}.") - except Exception as e: - print_error_log(f"An unexpected error occurred while reading result CSV: {e}") + with FileOpen(result_csv_path, 'r') as result_file: + completed_items = len(result_file.readlines()) - 1 + progress_bar.update(completed_items - progress_bar.n) time.sleep(1) for fwd, bwd in zip(config.forward_files, config.backward_files): diff --git a/debug/accuracy_tools/api_accuracy_checker/tensor_transport_layer/attl.py b/debug/accuracy_tools/api_accuracy_checker/tensor_transport_layer/attl.py index 995d202886..5fb63779fb 100644 --- a/debug/accuracy_tools/api_accuracy_checker/tensor_transport_layer/attl.py +++ b/debug/accuracy_tools/api_accuracy_checker/tensor_transport_layer/attl.py @@ -12,6 +12,7 @@ import torch from api_accuracy_checker.tensor_transport_layer.client import TCPClient from api_accuracy_checker.tensor_transport_layer.server import TCPServer +from api_accuracy_checker.common.utils import logger from ptdbg_ascend.src.python.ptdbg_ascend.common.file_check_util import FileCheckConst, FileChecker from ptdbg_ascend.src.python.ptdbg_ascend.common.utils import remove_path diff --git a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py index 931ce56419..879f116309 100644 --- a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +++ b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py @@ -110,14 +110,9 @@ def run_parallel_ut(config): def update_progress_bar(progress_bar, result_csv_path): while any(process.poll() is None for process in processes): - try: - with FileOpen(result_csv_path, 'r') as result_file: - completed_items = len(result_file.readlines()) - 1 - progress_bar.update(completed_items - progress_bar.n) - except FileNotFoundError: - logger.warning(f"Result CSV file not found: {result_csv_path}.") - except Exception as e: - logger.error(f"An unexpected error occurred while reading result CSV: {e}") + with FileOpen(result_csv_path, 'r') as result_file: + completed_items = len(result_file.readlines()) - 1 + progress_bar.update(completed_items - progress_bar.n) time.sleep(1) for api_info in config.api_files: -- Gitee From e959f71dec4a9bab60c3fc95aac8183733c1bb8b Mon Sep 17 00:00:00 2001 From: qianzhengxin Date: Sat, 27 Jul 2024 10:25:14 +0800 Subject: [PATCH 090/106] align pt and mindspore defalut setting --- debug/accuracy_tools/grad_tool/README.md | 34 +++++++------------ .../grad_tool/common/constant.py | 4 +-- .../accuracy_tools/grad_tool/common/utils.py | 6 +++- .../grad_tool/grad_ms/global_context.py | 30 ++++++++-------- .../grad_tool/grad_ms/grad_analyzer.py | 13 ++++--- 5 files changed, 41 insertions(+), 46 deletions(-) diff --git a/debug/accuracy_tools/grad_tool/README.md b/debug/accuracy_tools/grad_tool/README.md index a7929ca818..ed84abd46d 100644 --- a/debug/accuracy_tools/grad_tool/README.md +++ b/debug/accuracy_tools/grad_tool/README.md @@ -28,7 +28,7 @@ ### 梯度数据导出 -1. 创建配置文件config.yaml,PyTorch框架样例代码如下: +1. 创建配置文件config.yaml,样例如下: ```python level: L1 @@ -38,40 +38,30 @@ bounds: output_path: your_output_dir ``` - > 在MindSpore框架下,当前不支持rank和step配置,默认所有rank和所有step都进行采集, - > MindSpore中step指的是优化器被调用的次数(并非模型跑的step,某些step,例如loss为nan时,不会调用优化器) + > step指的是优化器被调用的次数(并非模型跑的step,某些step,例如loss为nan时,不会调用优化器) **参数说明** - | 参数 | 说明 | 是否必选 | - |--------------------------------|----------------------------------------------------|----------| - | level | Level级别,PyTorch可取值:L0、L1、L2,MindSpore可取值:L0, L1, L2, L3。决定导出数据的详细程度,级别越大导出数据越详细。数据类型:str。 | PyTorch是(MindSpore否,默认为L0) | - | param_list | 填写需要监控的权重名称。不指定或列表为空就表示监控所有权重。数据类型:List[str]。 | 否 | - | rank | 在多卡场景下,填写需要导出梯度数据的卡的Rank ID,不指定或列表为空就表示导出所有Rank的数据。单卡场景无需关注该参数。数据类型:List[int]。(MindSpore当前不支持指定rank) | 否 | - | step | 指定需要导出数据的step。对于PyTorch不指定或列表为空就表示导出所有step的数据,对于MindSpore不指定表示导出所有step,指定时要求传入range列表,例如[1, 2],否则无效。数据类型:List[int]。(MindSpore当前不支持指定step) | 否 | - | bounds | 用来划分区间以统计值分布。需要保证由数据小到大排列。不传则使用默认值[-10, -1, -0.1, -0.01, -0.001, 0, 0.001, 0.01, 0.1, 1, 10](mindspore为[-0.1, 0., 1.0]),数据类型:List。 | 否 | - | output_path | 输出目录。如果不存在就会创建一个新目录。数据类型:str。 | PyTorch是(MindSpore否,默认为./grad_stat | + | 参数 | 说明 | 输入类型 | 是否必选 | + |--------------------------------|-----------------------------------|-----------------|----------| + | level | 输出级别。决定导出数据的详细程度,级别越大导出数据越详细。可取值:L0, L1, L2|str | 是 | + | param_list | 权重名称列表,表示需要监控的权重。不指定或列表为空就表示监控所有权重。 | List[str] | 否 | + | rank | rank id列表,在多卡场景下,表示需要导出梯度数据的进程的rank id。不指定或列表为空就表示导出所有rank的数据。单卡场景无需关注该参数。 (MindSpore Pynative模式下,当前暂不支持指定rank功能) | List[int] | 否 | + | step | step列表,表示需要导出数据的step列表。不指定或列表为空就表示导出所有step的数据。(MindSpore Pynative模式下,当前暂不支持指定step功能) | List[int] | 否 | + | bounds | 区间列表,用来划分区间以统计数值的分布。需要保证由数据小到大排列。不指定则使用默认值[-10, -1, -0.1, -0.01, -0.001, 0, 0.001, 0.01, 0.1, 1, 10] | List[float] | 否 | + | output_path | 输出目录。如果不存在就会创建一个新目录。 | str | 是 | **不同级别的level的导出数据** -- PyTorch/MindSpore动态图不同level数据 | 级别 | 特征数据表头 | 是否有方向数据 | | ---- | ------------------------------------------------------------ | -------------- | | L0 | ("param_name", "MD5", "max", "min", "norm", "shape") | 否 | | L1 | ("param_name", "max", "min", "norm", "shape") | 是 | | L2 | ("param_name", *intervals, "=0", "max", "min", "norm", "shape") | 是 | - -- MindSpore静态图不同level数据 - - | 级别 | 特征数据表头 | 是否有方向数据 | - | ---- | ------------------------------------------------------------ | -------------- | - | L0 | ("param_name", "max", "min", "norm", "shape") | 否 | - | L1 | ("param_name", *intervals, "=0", "max", "min", "norm", "shape") | 否 | - | L2 | ("param_name", "max", "min", "norm", "shape") | 是 | - | L3 | ("param_name", *intervals, "=0", "max", "min", "norm", "shape") | 是 | intervals就是根据值分布bounds划分出的区间。 + MindSpore Pynative模式下,L0级别中暂不支持"MD5" **方向数据解释** @@ -98,7 +88,7 @@ gm = GradientMonitor("config_path", framework="MindSpore") gm.monitor(optimizer) ``` -3. 结束监控(MindSpore需要) +3. 结束监控(MindSpore Pynative模式下需要) 在训练结束之后,调用stop接口 diff --git a/debug/accuracy_tools/grad_tool/common/constant.py b/debug/accuracy_tools/grad_tool/common/constant.py index 7f2a708aba..f37389d12f 100644 --- a/debug/accuracy_tools/grad_tool/common/constant.py +++ b/debug/accuracy_tools/grad_tool/common/constant.py @@ -23,8 +23,8 @@ class GradConst: LEVEL0 = "L0" LEVEL1 = "L1" LEVEL2 = "L2" - LEVEL3 = "L3" - SUPPORTED_LEVEL = {"L0", "L1", "L2", "L3"} + # LEVEL3 = "L3" + SUPPORTED_LEVEL = {"L0", "L1", "L2"} # numpy coding STEP_IDX = 0 diff --git a/debug/accuracy_tools/grad_tool/common/utils.py b/debug/accuracy_tools/grad_tool/common/utils.py index a1e639558d..f40f8688c2 100644 --- a/debug/accuracy_tools/grad_tool/common/utils.py +++ b/debug/accuracy_tools/grad_tool/common/utils.py @@ -223,4 +223,8 @@ def change_mode(path, mode): def check_param(param_name): if not re.match(GradConst.PARAM_VALID_PATTERN, param_name): - raise RuntimeError("The parameter name contains special characters.") \ No newline at end of file + raise RuntimeError("The parameter name contains special characters.") + +def check_str(string, variable_name): + if not isinstance(string, str): + raise ValueError(f'The variable: "{variable_name}" is not a string.') \ No newline at end of file diff --git a/debug/accuracy_tools/grad_tool/grad_ms/global_context.py b/debug/accuracy_tools/grad_tool/grad_ms/global_context.py index d44bea52c7..3bb1459ff6 100644 --- a/debug/accuracy_tools/grad_tool/grad_ms/global_context.py +++ b/debug/accuracy_tools/grad_tool/grad_ms/global_context.py @@ -4,7 +4,7 @@ from typing import Dict, List, Union from grad_tool.common.utils import print_warn_log from grad_tool.common.constant import GradConst -from grad_tool.common.utils import path_valid_check, create_directory +from grad_tool.common.utils import path_valid_check, create_directory, check_str class GlobalContext: @@ -12,13 +12,13 @@ class GlobalContext: _instance = None _instance_lock = threading.Lock() _setting = { - GradConst.LEVEL: GradConst.LEVEL0, + GradConst.LEVEL: None, GradConst.PARAM_LIST: None, GradConst.STEP: None, GradConst.RANK: None, GradConst.CURRENT_STEP: 0, - GradConst.BOUNDS: [-1., 0., 1.], - GradConst.OUTPUT_PATH: "./grad_stat" + GradConst.BOUNDS: [-10, -1, -0.1, -0.01, -0.001, 0, 0.001, 0.01, 0.1, 1, 10], + GradConst.OUTPUT_PATH: None } def __new__(cls, *args, **kwargs): @@ -29,23 +29,25 @@ class GlobalContext: return cls._instance def init_context(self, config_dict: Dict): - if config_dict.get(GradConst.LEVEL, None) in GradConst.SUPPORTED_LEVEL: + level = config_dict.get(GradConst.LEVEL) + check_str(level, variable_name = "level in yaml") + if level in GradConst.SUPPORTED_LEVEL: self._setting[GradConst.LEVEL] = config_dict.get(GradConst.LEVEL) else: - print_warn_log("Invalid level set in config yaml file, use L0 instead.") + raise ValueError("Invalid level set in config yaml file, level option: L0, L1, L2") + self._set_input_list(config_dict, GradConst.PARAM_LIST, str) self._set_input_list(config_dict, GradConst.BOUNDS, float) self._set_input_list(config_dict, GradConst.STEP, int) self._set_input_list(config_dict, GradConst.RANK, int) + output_path = config_dict.get(GradConst.OUTPUT_PATH) - if output_path: - try: - path_valid_check(output_path) - except RuntimeError as err: - print_warn_log(f"Invalid output_path, use default output_path. The error message is {err}.") - output_path = None - if output_path: - self._setting[GradConst.OUTPUT_PATH] = output_path + check_str(output_path, variable_name = "output_path in yaml") + try: + path_valid_check(output_path) + except RuntimeError as err: + raise ValueError(f"Invalid output_path: {output_path}. The error message is {err}.") + self._setting[GradConst.OUTPUT_PATH] = output_path if not os.path.isdir(self._setting.get(GradConst.OUTPUT_PATH)): create_directory(self._setting.get(GradConst.OUTPUT_PATH)) else: diff --git a/debug/accuracy_tools/grad_tool/grad_ms/grad_analyzer.py b/debug/accuracy_tools/grad_tool/grad_ms/grad_analyzer.py index eb6a28aa95..9a67f2b3d9 100644 --- a/debug/accuracy_tools/grad_tool/grad_ms/grad_analyzer.py +++ b/debug/accuracy_tools/grad_tool/grad_ms/grad_analyzer.py @@ -31,9 +31,8 @@ def grad_dump(dump_dir: str, g_name: str, dump_step: Parameter, grad: ms.Tensor, ''' Dump gradient statistic data. level0: [step, max, min, norm, shape_dim, shape] - level1: [step, max, min, norm, shape_dim, shape, dist_dim, dist] - level2: [step, max, min, norm, shape_dim, shape] + grad_bool_data - level3: [step, max, min, norm, shape_dim, shape, dist_dim, dist] + grad_bool_data + level1: [step, max, min, norm, shape_dim, shape] + grad_bool_data + level2: [step, max, min, norm, shape_dim, shape, dist_dim, dist] + grad_bool_data ''' dump_path = os.path.join(dump_dir, g_name) dump_dir_path = dump_path + "_dir" @@ -51,7 +50,7 @@ def grad_dump(dump_dir: str, g_name: str, dump_step: Parameter, grad: ms.Tensor, level0_stat = ms.ops.concat((extrem_stat, shape_stat), axis=0) level_stat = level0_stat - if level == "L1" or level == "L3": + if level == GradConst.LEVEL2: zero_grad = (grad == 0).sum() dist_dim = ms.Tensor([len(bounds) + 2]).float() bucket_result = ms.ops.bucketize(grad.float(), bounds) @@ -60,11 +59,11 @@ def grad_dump(dump_dir: str, g_name: str, dump_step: Parameter, grad: ms.Tensor, dist_stat.append(zero_grad) dist_stat.append(ms.Tensor(1, dtype=ms.int64)) # make sure dist_stat is not empty dist_stat = ms.ops.stack(dist_stat, axis=0).float() - level1_stat = ms.ops.concat((level0_stat, dist_dim, dist_stat), axis=0) - level_stat = level1_stat + level2_stat = ms.ops.concat((level0_stat, dist_dim, dist_stat), axis=0) + level_stat = level2_stat save_op(dump_path, level_stat) - if level == "L2" or level == "L3": + if level == GradConst.LEVEL1 or level == GradConst.LEVEL2: grad_direction = grad > 0 save_op(dump_dir_path, grad_direction) -- Gitee From 81699a631eedc256d82c4fec76a3f3134107dc89 Mon Sep 17 00:00:00 2001 From: wangqingcai Date: Sat, 27 Jul 2024 11:25:25 +0800 Subject: [PATCH 091/106] add divide zero protection --- .../bench_functions/npu_fusion_attention.py | 56 +++++++++++-------- 1 file changed, 34 insertions(+), 22 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/bench_functions/npu_fusion_attention.py b/debug/accuracy_tools/api_accuracy_checker/bench_functions/npu_fusion_attention.py index 4c230c17c0..d5a91ce3b5 100644 --- a/debug/accuracy_tools/api_accuracy_checker/bench_functions/npu_fusion_attention.py +++ b/debug/accuracy_tools/api_accuracy_checker/bench_functions/npu_fusion_attention.py @@ -8,7 +8,6 @@ from api_accuracy_checker.common.utils import logger gtype = torch.float64 # arm host必须选择float64,x86环境选择float32即可,64也行。arm计算很慢,s=8k的场景建议使用x86 softmax_build_mode = "QKV" # "MAX_SUM" - """ # 前向函数声明对比 标杆实现:fusion_attention_forward: q, k, v, drop_mask, atten_mask, pse, scale, keep_prob @@ -45,6 +44,9 @@ def softmax_grad(dp, softmax_res): def broadcast_kv(num_heads, num_kv_heads, kv_tensor, dtype): + if num_kv_heads == 0 or num_kv_heads < num_heads: + raise ValueError(f"num_kv_heads must be non-zero and less than num_heads.") + factor = num_heads // num_kv_heads kv_shape = kv_tensor.shape B = kv_shape[0] @@ -102,28 +104,34 @@ def parse_bsnd_args(query, key, head_num, input_layout): if not isinstance(input_layout, str) or input_layout not in supported_input_layout: raise ValueError(f"Invalid input_layout arg which must be one of {supported_input_layout}.") - if input_layout == "BSH": - B, S1, H1 = query.shape - _, S2, H2 = key.shape - D = H1 // N1 - N2 = H2 // D - elif input_layout == "SBH": - S1, B, H1 = query.shape - S2, _, H2 = key.shape - D = H1 // N1 - N2 = H2 // D - elif input_layout == "BSND": - B, S1, N1, D = query.shape - _, S2, N2, _ = key.shape - H1 = N1 * D - H2 = N2 * D - elif input_layout == "BNSD": - B, N1, S1, D = query.shape - _, N2, S2, _ = key.shape - H1 = N1 * D - H2 = N2 * D - elif input_layout == "TND": + if input_layout == "TND": raise ValueError(f"input_layout {input_layout} does not supported for now.") + try: + if input_layout == "BSH": + B, S1, H1 = query.shape + _, S2, H2 = key.shape + D = H1 // N1 + N2 = H2 // D + elif input_layout == "SBH": + S1, B, H1 = query.shape + S2, _, H2 = key.shape + D = H1 // N1 + N2 = H2 // D + elif input_layout == "BSND": + B, S1, N1, D = query.shape + _, S2, N2, _ = key.shape + H1 = N1 * D + H2 = N2 * D + elif input_layout == "BNSD": + B, N1, S1, D = query.shape + _, N2, S2, _ = key.shape + H1 = N1 * D + H2 = N2 * D + except Exception as e: + raise ValueError(f"query.shape: {query.shape}, key.shape: {key.shape}, parse_bsnd_args error: {e}") from e + + if D == 0: + raise ValueError(f"Value D must be non-zero.") DTYPE = query.dtype return B, S1, S2, N1, N2, D, H1, H2, DTYPE @@ -251,6 +259,8 @@ def rebuild_softmax_by_max_sum(q, k, atten_mask, pse, scale, softmax_max, softma """ print(f"Using softmax_max and softmax_sum to rebuild original softmax") qk = calculate_qk(q, k, atten_mask, pse, scale) + if softmax_max.shape[-1] == 0: + raise ValueError(f"softmax_max.shape[-1] must be non-zero, softmax_max.shape: {softmax_max.shape}") repeat_dim = qk.shape[-1] // softmax_max.shape[-1] softmax_res = torch.exp(qk.sub(softmax_max.repeat(1, 1, 1, repeat_dim))).div( softmax_sum.repeat(1, 1, 1, repeat_dim)) @@ -394,6 +404,8 @@ def npu_fusion_attention_grad(*args, **kwargs): # N不等长适配by cdy if not (N1 == N2): + if N2 == 0: + raise ValueError("dims_kwargs.N2 must be non-zero.") G = int(N1 / N2) dk = torch.sum(dk.reshape(B, N2, G, S2, D), dim=2, keepdim=True).reshape(B, N2, S2, D) dv = torch.sum(dv.reshape(B, N2, G, S2, D), dim=2, keepdim=True).reshape(B, N2, S2, D) -- Gitee From a8e11073f6a4c42560e7ea1dbf498fb4c6f85d4d Mon Sep 17 00:00:00 2001 From: qianzhengxin Date: Sat, 27 Jul 2024 14:39:32 +0800 Subject: [PATCH 092/106] bug fix --- debug/accuracy_tools/grad_tool/README.md | 8 ++++---- debug/accuracy_tools/grad_tool/common/constant.py | 1 - debug/accuracy_tools/grad_tool/grad_ms/grad_analyzer.py | 6 +++--- debug/accuracy_tools/grad_tool/grad_ms/utils.py | 3 --- 4 files changed, 7 insertions(+), 11 deletions(-) diff --git a/debug/accuracy_tools/grad_tool/README.md b/debug/accuracy_tools/grad_tool/README.md index ed84abd46d..1d35f03e47 100644 --- a/debug/accuracy_tools/grad_tool/README.md +++ b/debug/accuracy_tools/grad_tool/README.md @@ -46,8 +46,8 @@ |--------------------------------|-----------------------------------|-----------------|----------| | level | 输出级别。决定导出数据的详细程度,级别越大导出数据越详细。可取值:L0, L1, L2|str | 是 | | param_list | 权重名称列表,表示需要监控的权重。不指定或列表为空就表示监控所有权重。 | List[str] | 否 | - | rank | rank id列表,在多卡场景下,表示需要导出梯度数据的进程的rank id。不指定或列表为空就表示导出所有rank的数据。单卡场景无需关注该参数。 (MindSpore Pynative模式下,当前暂不支持指定rank功能) | List[int] | 否 | - | step | step列表,表示需要导出数据的step列表。不指定或列表为空就表示导出所有step的数据。(MindSpore Pynative模式下,当前暂不支持指定step功能) | List[int] | 否 | + | rank | rank id列表,在多卡场景下,表示需要导出梯度数据的进程的rank id。不指定或列表为空就表示导出所有rank的数据。单卡场景无需关注该参数。 (MindSpore静态图模式下,当前暂不支持指定rank功能) | List[int] | 否 | + | step | step列表,表示需要导出数据的step列表。不指定或列表为空就表示导出所有step的数据。(MindSpore静态图模式下,当前暂不支持指定step功能) | List[int] | 否 | | bounds | 区间列表,用来划分区间以统计数值的分布。需要保证由数据小到大排列。不指定则使用默认值[-10, -1, -0.1, -0.01, -0.001, 0, 0.001, 0.01, 0.1, 1, 10] | List[float] | 否 | | output_path | 输出目录。如果不存在就会创建一个新目录。 | str | 是 | @@ -61,7 +61,7 @@ | L2 | ("param_name", *intervals, "=0", "max", "min", "norm", "shape") | 是 | intervals就是根据值分布bounds划分出的区间。 - MindSpore Pynative模式下,L0级别中暂不支持"MD5" + MindSpore静态图模式下,L0级别中暂不支持"MD5" **方向数据解释** @@ -88,7 +88,7 @@ gm = GradientMonitor("config_path", framework="MindSpore") gm.monitor(optimizer) ``` -3. 结束监控(MindSpore Pynative模式下需要) +3. 结束监控(MindSpore静态图模式下需要) 在训练结束之后,调用stop接口 diff --git a/debug/accuracy_tools/grad_tool/common/constant.py b/debug/accuracy_tools/grad_tool/common/constant.py index f37389d12f..38d33e9886 100644 --- a/debug/accuracy_tools/grad_tool/common/constant.py +++ b/debug/accuracy_tools/grad_tool/common/constant.py @@ -23,7 +23,6 @@ class GradConst: LEVEL0 = "L0" LEVEL1 = "L1" LEVEL2 = "L2" - # LEVEL3 = "L3" SUPPORTED_LEVEL = {"L0", "L1", "L2"} # numpy coding diff --git a/debug/accuracy_tools/grad_tool/grad_ms/grad_analyzer.py b/debug/accuracy_tools/grad_tool/grad_ms/grad_analyzer.py index 9a67f2b3d9..895b8f2ae6 100644 --- a/debug/accuracy_tools/grad_tool/grad_ms/grad_analyzer.py +++ b/debug/accuracy_tools/grad_tool/grad_ms/grad_analyzer.py @@ -154,7 +154,7 @@ class CSVGenerator(Process): level = grad_context.get_context(GradConst.LEVEL) try: shape_dim = int(stat_data[GradConst.SHAPE_DIM_IDX]) - if level in [GradConst.LEVEL1, GradConst.LEVEL3]: + if level == GradConst.LEVEL2: dist_dim = int(stat_data[shape_dim + GradConst.SHAPE_DIM_IDX + 1]) length = shape_dim + dist_dim + 7 else: @@ -186,7 +186,7 @@ class CSVGenerator(Process): if not param_name: raise RuntimeError("Invalid gradient statistic file name.") csv_line = [param_name] - if self.level == GradConst.LEVEL1 or self.level == GradConst.LEVEL3: + if self.level == GradConst.LEVEL2: csv_line.extend(self.get_dist_data(shape_dim, stat_data)) csv_line.extend(self.get_extrem_data(shape_dim, stat_data)) self.cache_list.append(csv_line) @@ -207,7 +207,7 @@ class CSVGenerator(Process): def create_csv_file(self): headers = ["Param_name"] - if self.level == GradConst.LEVEL1 or self.level == GradConst.LEVEL3: + if self.level == GradConst.LEVEL2: headers.extend(self.get_dist_header()) headers.extend(self.get_extrem_headers()) output_path = f"{self.save_dir}/grad_summary_{self.current_step}.csv" diff --git a/debug/accuracy_tools/grad_tool/grad_ms/utils.py b/debug/accuracy_tools/grad_tool/grad_ms/utils.py index 64829bdeb5..c0efbdc7ba 100644 --- a/debug/accuracy_tools/grad_tool/grad_ms/utils.py +++ b/debug/accuracy_tools/grad_tool/grad_ms/utils.py @@ -38,8 +38,5 @@ def save_grad_direction(param_name, grad, save_path): change_mode(save_filepath, 0o640) def get_adapted_level(level: str): - if level == GradConst.LEVEL3: - print_warn_log(f"In mindpsore pynative mode, only 'L0', 'L1' and 'L2' are supported, use L0 instead") - level = GradConst.LEVEL0 level_adapted = level_adp.get(level) return level_adapted \ No newline at end of file -- Gitee From 970003a983f89515ffca433887de4865c65037e5 Mon Sep 17 00:00:00 2001 From: l30036321 Date: Sat, 27 Jul 2024 14:57:52 +0800 Subject: [PATCH 093/106] fix slice bug --- .../core/data_dump/data_processor/base.py | 11 ----------- .../data_processor/mindspore_processor.py | 18 +++++++++++++++++- .../data_processor/pytorch_processor.py | 18 +++++++++++++++++- 3 files changed, 34 insertions(+), 13 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/base.py b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/base.py index 5d90129197..a6858e8cb0 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/base.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/base.py @@ -108,17 +108,6 @@ class BaseDataProcessor: def _analyze_numpy(value, numpy_type): return {"type": numpy_type, "value": value} - @staticmethod - def _analyze_builtin(arg): - single_arg = {} - if isinstance(arg, slice): - single_arg.update({"type": "slice"}) - single_arg.update({"value": [arg.start, arg.stop, arg.step]}) - else: - single_arg.update({"type": type(arg).__name__}) - single_arg.update({"value": arg}) - return single_arg - @classmethod def get_special_types(cls): return cls.special_type diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py index 7533e2ee0d..a66cb94592 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py @@ -48,6 +48,22 @@ class MindsporeDataProcessor(BaseDataProcessor): def analyze_dtype_in_kwargs(element): return {"type": "mindspore.dtype", "value": str(element)} + @staticmethod + def _analyze_builtin(arg): + single_arg = {} + if isinstance(arg, slice): + single_arg.update({"type": "slice"}) + # slice参数中可能存在tensor类型,json序列化,需要转换为python数值类型 + values = [ + value if not isinstance(value, ms.Tensor) else value.item() + for value in [arg.start, arg.stop, arg.step] + ] + single_arg.update({"value": values}) + else: + single_arg.update({"type": type(arg).__name__}) + single_arg.update({"value": arg}) + return single_arg + @classmethod def get_special_types(cls): return super().get_special_types() + cls.mindspore_special_type @@ -90,7 +106,7 @@ class MindsporeDataProcessor(BaseDataProcessor): if isinstance(element, (bool, int, float, str, slice)): return self._analyze_builtin(element) - return None + return {} def analyze_element(self, element): return self.recursive_apply_transform(element, self.analyze_single_element) diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py index 1c599573c0..9441aa79f9 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py @@ -78,6 +78,22 @@ class PytorchDataProcessor(BaseDataProcessor): tensor_stat.norm = torch._C._VariableFunctionsClass.norm(data_clone).item() return tensor_stat + @staticmethod + def _analyze_builtin(arg): + single_arg = {} + if isinstance(arg, slice): + single_arg.update({"type": "slice"}) + # slice参数中可能存在tensor类型,json序列化,需要转换为python数值类型 + values = [ + value if not isinstance(value, torch.Tensor) else value.item() + for value in [arg.start, arg.stop, arg.step] + ] + single_arg.update({"value": values}) + else: + single_arg.update({"type": type(arg).__name__}) + single_arg.update({"value": arg}) + return single_arg + @staticmethod def _analyze_torch_size(arg): return {"type": "torch.Size", "value": list(arg)} @@ -98,7 +114,7 @@ class PytorchDataProcessor(BaseDataProcessor): return self._analyze_tensor(element, Const.SEP.join(suffix_stack)) if isinstance(element, (bool, int, float, str, slice)): return self._analyze_builtin(element) - return None + return {} def analyze_element(self, element): return self.recursive_apply_transform(element, self.analyze_single_element) -- Gitee From 55a077363ab17b537481d689890a8e2150f1789e Mon Sep 17 00:00:00 2001 From: qianzhengxin Date: Sat, 27 Jul 2024 15:52:00 +0800 Subject: [PATCH 094/106] cleancode --- debug/accuracy_tools/grad_tool/grad_ms/global_context.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debug/accuracy_tools/grad_tool/grad_ms/global_context.py b/debug/accuracy_tools/grad_tool/grad_ms/global_context.py index 3bb1459ff6..424f16aedd 100644 --- a/debug/accuracy_tools/grad_tool/grad_ms/global_context.py +++ b/debug/accuracy_tools/grad_tool/grad_ms/global_context.py @@ -46,7 +46,7 @@ class GlobalContext: try: path_valid_check(output_path) except RuntimeError as err: - raise ValueError(f"Invalid output_path: {output_path}. The error message is {err}.") + raise ValueError(f"Invalid output_path: {output_path}. The error message is {err}.") from err self._setting[GradConst.OUTPUT_PATH] = output_path if not os.path.isdir(self._setting.get(GradConst.OUTPUT_PATH)): create_directory(self._setting.get(GradConst.OUTPUT_PATH)) -- Gitee From 5713546e107058283c5b25cbb9dbf9414b57d886 Mon Sep 17 00:00:00 2001 From: l30036321 Date: Sat, 27 Jul 2024 16:30:10 +0800 Subject: [PATCH 095/106] fix handle_tensor_extremum_nan_inf call position --- .../data_processor/pytorch_processor.py | 46 ++++++++++--------- 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py index 9441aa79f9..4cdd3ea046 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py @@ -78,6 +78,22 @@ class PytorchDataProcessor(BaseDataProcessor): tensor_stat.norm = torch._C._VariableFunctionsClass.norm(data_clone).item() return tensor_stat + @staticmethod + def handle_tensor_extremum_nan_inf(tensor, operator): + data_clone = tensor.detach() + data_nan = torch._C._VariableFunctionsClass.isnan(data_clone) + if int(torch._C._VariableFunctionsClass.sum(data_nan)) == data_clone.numel(): + return float('nan') + finite_mask = torch._C._VariableFunctionsClass.isfinite(data_clone) + if int(torch._C._VariableFunctionsClass.sum(finite_mask)) > 0: + finite_values = data_clone[finite_mask] + return torch._C._VariableFunctionsClass.max(finite_values).item() if operator == 'max' else \ + torch._C._VariableFunctionsClass.min(finite_values).item() + else: + data_no_nan = data_clone[~data_nan] + return torch._C._VariableFunctionsClass.max(data_no_nan).item() if operator == 'max' else \ + torch._C._VariableFunctionsClass.min(data_no_nan).item() + @staticmethod def _analyze_builtin(arg): single_arg = {} @@ -130,9 +146,15 @@ class PytorchDataProcessor(BaseDataProcessor): tensor_json.update({"Mean": tensor_stat.mean}) tensor_json.update({"Norm": tensor_stat.norm}) tensor_json.update({"requires_grad": tensor.requires_grad}) - if self.config.summary_mode == "md5": + + if np.isinf(tensor_stat.max) or np.isnan(tensor_stat.max): + tensor_json['Max_except_inf_nan'] = self.handle_tensor_extremum_nan_inf(tensor, "max") + if np.isinf(tensor_stat.min) or np.isnan(tensor_stat.min): + tensor_json['Min_except_inf_nan'] = self.handle_tensor_extremum_nan_inf(tensor, "min") + + if self.config.summary_mode == Const.MD5: tensor_md5 = self.get_md5_for_tensor(tensor) - tensor_json.update({"md5": tensor_md5}) + tensor_json.update({Const.MD5: tensor_md5}) return tensor_json @@ -168,21 +190,6 @@ class OverflowCheckDataProcessor(PytorchDataProcessor): overflow_mode = os.getenv(OverflowConst.OVERFLOW_DEBUG_MODE_ENABLE, Const.ENV_DISABLE) return overflow_mode == Const.ENV_ENABLE - @staticmethod - def handle_tensor_extremum_nan_inf(data_clone, operator): - data_nan = torch._C._VariableFunctionsClass.isnan(data_clone) - if int(torch._C._VariableFunctionsClass.sum(data_nan)) == data_clone.numel(): - return float('nan') - finite_mask = torch._C._VariableFunctionsClass.isfinite(data_clone) - if int(torch._C._VariableFunctionsClass.sum(finite_mask)) > 0: - finite_values = data_clone[finite_mask] - return torch._C._VariableFunctionsClass.max(finite_values).item() if operator == 'max' else \ - torch._C._VariableFunctionsClass.min(finite_values).item() - else: - data_no_nan = data_clone[~data_nan] - return torch._C._VariableFunctionsClass.max(data_no_nan).item() if operator == 'max' else \ - torch._C._VariableFunctionsClass.min(data_no_nan).item() - def analyze_forward(self, name, module, module_input_output: ModuleForwardInputsOutputs): self.has_overflow = False api_info_struct = super().analyze_forward(name, module, module_input_output) @@ -228,16 +235,13 @@ class OverflowCheckDataProcessor(PytorchDataProcessor): else: torch_npu._C._clear_overflow_npu() - def _analyze_maybe_overflow_tensor(self, tensor_json, tensor): - data_clone = tensor.detach() + def _analyze_maybe_overflow_tensor(self, tensor_json): if is_gpu or (hasattr(torch_npu._C, '_npu_is_support_inf_nan') and torch_npu._C._npu_is_support_inf_nan()): if tensor_json['Max'] is None: return if np.isinf(tensor_json['Max']) or np.isnan(tensor_json['Max']): - tensor_json['Max_except_inf_nan'] = self.handle_tensor_extremum_nan_inf(data_clone, "max") self.has_overflow = True if np.isinf(tensor_json['Min']) or np.isnan(tensor_json['Min']): - tensor_json['Min_except_inf_nan'] = self.handle_tensor_extremum_nan_inf(data_clone, "min") self.has_overflow = True else: self.has_overflow = self.check_overflow_npu() -- Gitee From f18e6e63af3d206e59b7607859fbc6cda8e9cbab Mon Sep 17 00:00:00 2001 From: Linwei-Ying Date: Sat, 27 Jul 2024 16:37:13 +0800 Subject: [PATCH 096/106] FileCheckerException spelling bugfix --- debug/accuracy_tools/msprobe/pytorch/compare/acc_compare.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debug/accuracy_tools/msprobe/pytorch/compare/acc_compare.py b/debug/accuracy_tools/msprobe/pytorch/compare/acc_compare.py index cf7ad912e0..ea9323ae09 100644 --- a/debug/accuracy_tools/msprobe/pytorch/compare/acc_compare.py +++ b/debug/accuracy_tools/msprobe/pytorch/compare/acc_compare.py @@ -492,7 +492,7 @@ def compare_by_op(op_name, op_name_mapping_dict, input_parma): error_file = error.filename n_value, b_value = CompareConst.READ_NONE, CompareConst.READ_NONE error_flag = True - except FileCheckerException: + except FileCheckException: error_file = data_name n_value, b_value = CompareConst.READ_NONE, CompareConst.READ_NONE error_flag = True -- Gitee From f95a776d221cf886fdac343e57e78853884822b8 Mon Sep 17 00:00:00 2001 From: cai-weiwei1989 <734267852@qq.com> Date: Wed, 24 Jul 2024 11:52:37 +0800 Subject: [PATCH 097/106] =?UTF-8?q?[profiler\compare=5Ftools]=E6=AF=94?= =?UTF-8?q?=E5=AF=B9=E7=BB=93=E6=9E=9C=E6=80=BB=E4=BD=93=E6=80=A7=E8=83=BD?= =?UTF-8?q?=E6=95=B0=E6=8D=AE=E6=96=B0=E5=A2=9Esheet=E9=A1=B5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- profiler/README.md | 1 + profiler/compare_tools/README.md | 54 +++++++++++++++++- profiler/compare_tools/img/OverallMetrics.png | Bin 0 -> 66941 bytes 3 files changed, 54 insertions(+), 1 deletion(-) create mode 100644 profiler/compare_tools/img/OverallMetrics.png diff --git a/profiler/README.md b/profiler/README.md index 1669e3524e..549ffefc14 100644 --- a/profiler/README.md +++ b/profiler/README.md @@ -91,6 +91,7 @@ ascend pytorch profiler数据目录结构如下: | profiler版本 | 发布日期 | 下载链接 | 校验码 | | ------------ | ---------- | ------------------------------------------------------------ | ------------------------------------------------------------ | + | 1.2.0 | 2024-07-25 | [msprof_analyze-1.2.0-py3-none-any.whl](https://ptdbg.obs.myhuaweicloud.com/profiler/package/1.2.0/msprof_analyze-1.2.0-py3-none-any.whl) | 6a4366e3beca40b4a8305080e6e441d6ecafb5c05489e5905ac0265787555f37 | | 1.1.2 | 2024-07-12 | [msprof_analyze-1.1.2-py3-none-any.whl](https://ptdbg.obs.myhuaweicloud.com/profiler/package/1.1.2/msprof_analyze-1.1.2-py3-none-any.whl) | af62125b1f9348bf491364e03af712fc6d0282ccee3fb07458bc9bbef82dacc6 | | 1.1.1 | 2024-06-20 | [msprof_analyze-1.1.1-py3-none-any.whl](https://ptdbg.obs.myhuaweicloud.com/profiler/package/1.1.1/msprof_analyze-1.1.1-py3-none-any.whl) | 76aad967a3823151421153d368d4d2f8e5cfbcb356033575e0b8ec5acea8e5e4 | | 1.1.0 | 2024-05-28 | [msprof_analyze-1.1.0-py3-none-any.whl](https://ptdbg.obs.myhuaweicloud.com/profiler/package/1.1.0/msprof_analyze-1.1.0-py3-none-any.whl) | b339f70e7d1e45e81f289332ca64990a744d0e7ce6fdd84a8d82e814fa400698 | diff --git a/profiler/compare_tools/README.md b/profiler/compare_tools/README.md index 78ea5d8971..2772ef9984 100644 --- a/profiler/compare_tools/README.md +++ b/profiler/compare_tools/README.md @@ -174,9 +174,13 @@ python performance_compare.py [基准性能数据文件] [比对性能数据文 MindSpore场景仅支持**总体性能**和**通信性能**的对比。 +比对结果分为打屏和performance_comparison_result_{timestamp}.csv两种形式输出,其中打屏输出为概要信息,csv文件保存详细结果。 + ### 总体性能 -总体性能比对结果以打屏的形式呈现。 +#### 打屏结果 + +总体性能比对结果以打屏的形式呈现时,字段如下: | 字段 | 说明 | | --------------------------------------- | ------------------------------------------------------------ | @@ -196,6 +200,54 @@ MindSpore场景仅支持**总体性能**和**通信性能**的对比。 | E2E Time(Not minimal profiling) | E2E总耗时,计算流端到端耗时。当存在Not minimal profiling时,表示该时间存在性能膨胀,会影响通信和调度耗时。 | | Other Time | AI CPU、DSA、TensorMove等其他算子耗时。 | +#### csv文件结果 + +总体性能比对结果在performance_comparison_result_*.xlsx中OverallMetrics的sheet页呈现时,示例如下: + +![OverallMetrics](./img/OverallMetrics.png) + +表头字段说明: + +| 字段 | 说明 | +| -------------- | --------------------------- | +| Index | 指标。 | +| Duration(ms) | 执行耗时,单位ms。 | +| Duration Ratio | 执行耗时占E2E总耗时的比例。 | +| Number | 计算算子的数量。 | + +Index列字段说明: + +| 字段 | | | 说明 | +| ---------------------------- | ------------------ | ----------------------------------- | ------------------------------------------------------------ | +| Computing Time | | | 计算流耗时,计算流所有event耗时总和。如果有多条并发计算,计算流耗时对重叠部分只会计算一次。 | +| | Flash Attention | | Flash Attention算子。 | +| | | Flash Attention (Forward) (Cube) | Flash Attention前向算子下发的所有Cube类Kernel的总耗时,一般为执行该算子核心计算的算子。 | +| | | Flash Attention (Forward) (Vector) | Flash Attention前向算子下发的所有Vector类Kernel的总耗时,一般为插入的转换类算子,如TransData。 | +| | | Flash Attention (Backward) (Cube) | Flash Attention反向算子下发的所有Cube类Kernel的总耗时,一般为执行该算子核心计算的算子。 | +| | | Flash Attention (Backward) (Vector) | Flash Attention反向算子下发的所有Vector类Kernel的总耗时,一般为插入的转换类算子,如TransData。 | +| | Conv | | Conv算子。 | +| | | Conv (Forward) (Cube) | Conv前向算子下发的所有Cube类Kernel的总耗时,一般为执行该算子核心计算的算子。 | +| | | Conv (Forward) (Vector) | Conv前向Vector算子。Conv前向算子下发的所有Vector类Kernel的总耗时,一般为插入的转换类算子,如TransData。 | +| | | Conv (Backward) (Cube) | Conv反向算子下发的所有Cube类Kernel的总耗时,一般为执行该算子核心计算的算子。 | +| | | Conv (Backward) (Vector) | Conv反向算子下发的所有Vector类Kernel的总耗时,一般为插入的转换类算子,如TransData。 | +| | Matmul | | Matmul算子。 | +| | | Matmul (Cube) | Matmul算子下发的所有Cube类Kernel的总耗时,一般为执行该算子核心计算的算子。 | +| | | Matmul (Vector) | Matmul算子下发的所有Vector类Kernel的总耗时,一般为插入的转换类算子,如TransData。 | +| | Paged Attention | | Paged Attention算子。 | +| | Vector | | Vector算子。 | +| | | Vector (Trans) | 转换类Vector算子,主要包含Cast、TransPose、TransData算子。(仅针对NPU数据) | +| | | Vector ( No Trans) | 非转换类Vector算子。 | +| | Cube | | 未识别出Flash Attention、Conv和Matmul的Cube算子。 | +| | SDMA (Tensor Move) | | 拷贝类任务。 | +| | Other | | AI CPU、DSA等其他算子。 | +| Uncovered Communication Time | | | 通信未掩盖耗时,包含卡间等待时间。 | +| | Wait | | 卡间同步等待耗时。(仅针对NPU数据) | +| | Transmit | | 通信传输耗时。 | +| Free Time | | | 调度耗时 = E2E耗时 - 算子耗时 - 通信不可掩盖耗时。Free的定义为Device侧既不在通信又不在计算的时间,因此包含拷贝时间(SDMA Time)。 | +| | SDMA | | NPU为除Tensor Move外的拷贝类任务,GPU为所有拷贝类任务。 | +| | Free | | 排除SDMA的空闲耗时。 | +| E2E Time | | | E2E总耗时,计算流端到端耗时。当存在Not minimal profiling时,表示该时间存在性能膨胀,会影响通信和调度耗时。 | + 可以采取最简性能数据采集的方式来减少E2E耗时的性能膨胀,示例代码如下: ```python diff --git a/profiler/compare_tools/img/OverallMetrics.png b/profiler/compare_tools/img/OverallMetrics.png new file mode 100644 index 0000000000000000000000000000000000000000..b130d3607344c983a9304440e38a45fe96a4bb56 GIT binary patch literal 66941 zcmdqIXH=72w>GK|3i^maK`9z~Rhk6pT~NT#M5P8I9So6<5C}-ofb`x$q$tv)geFxW z^j-p?_YyjU651D@x9s=X-`?Yl^W*$EW3aeqa>KgUTC=QcUTfZgS{lkX|Gf9-rAwD? zs;VeyU%GUq;?kwd``5@x-#GP%tdlO6owSvoTq@`Wu8=-lu~JZ1xOAx~?8b@7Rnq6{ z_A2^Lmo5PsFWTi6yYJ?gE}dtoDk;2jGhVBa30+X>=e#GCbCt5!^fDuiCttMp32U@w zzKmza<6AuNaJL6FH=S>!*D?l2TXPchn|QWQ#3xjooOb0fr>vVYC~*hXbD{T%NeNN3 zV#3B}J?BSqY18Ms=O=R8Q5>o})aGieR;o!CZ6!*Q?eF5sz(8Ne!RC})9rAepsO}7V z&ZZj4@$a9u0f$p^I)Q7xECL_n;^H)!q}{fr9hTUeW^YB1{-{mLF|hfHP$;vUSEle+ za!qA5``cF>?mlX0_M@O;q7aXtCiPY=l`zd_*>FXd`LDaej~ZMe=h$z-2(bk)iq*ai ztEyN<;@nH-LZ4xBB}_h50yxO}rIZ#op#l|K@P)axgy=@4RQbO3uwT>pPko)bs%|Fj zZ}^%cmHXaFv624a|qXoBi4P?G&)N)okBgNVW?W8jjnvmfID#}7M#9BR+Ap<>t*aGog0?+ zKo)Uu7K?!%bEZ5qGCW zGOfV56GNE2nd#XTAu_HC;v3%}mVl(o_m2?%s8Ba}AS%v~F^)-2#x37*D-iX}-M(}w zPF6Z)Hl1o=zuqsslU0TTa2vkx6YJ=9z~iLoA%O}tw?%$YJGkrqr6x^>vHQj1&?uQ1 z;y_!9g{q15fThfAN$`NzqF=|hi3w2)^0wh6eakLcj~+&8VPf_{P$NO7CUHlB(>>{R zY56|CR_uDs(Y2?t0Ei*7-N!K}N)Z=8nViwd03@mr1Z?mO(9IK zaPaQdM%Hh(hP#+>bFk34bDcJMO*)+a_8r2Xqi@elHJ|J$AlGQ-F`^Y3ONpR@(zqi_ zf=%c)Q~wc8v+HHIFqcfg$y#ml(KU4Fum@lXB-a-dHyD)^(&!r|)Sau5ZdN2}j`*+O?W$!1S2 zORCbo_4yBgJ!yx07Y=2_BO^K39i)m>=ZkKk&}9hocM69Qk0%}x4_rWU9)(pJ2T=ri zR!odU=q)(m1b!Zy7g3Z4$L%}9$J$sA3a|=$*G^R!0u-Ge-Z|P2uPsBunTX*MffGZR zaK#>703?GMFbHN`yn;}+OEzRCGCd2s(8Z%zueL}!Mb&F6e`FKBo~xA#Ht+oYr7K6| zl*t9xYI*e4(l=IPIi5ZY61~;F29}PzRh81qb>*u@bf|x1ll&8V^+)b%#PI#zb(~o6 z94A9@WrD}&^`|ZA8inaKx<+vXFka|MFs~EP)H-*GxkoW!StZRdG}r<@Q05KBzP_(P z8u{gI*6s((%G@^T%6&On^M)Qzq&la{n0K-sa~DZfa!X;Ji$J&0opCSHRzZfKwb0$G zPXrV?KU^=6C53})vzGPGYMEvl*o@~`RRQktP22#&rPrlsmwpNg^NvOuS%-DT?=5~o zKv#`^<}{X7Sh0(g))aUhf@uV1*1Zq=9)$oJe3D%|aab+iv>%T7H#NdBj&H%z+f%Q?ClN$ zr5#xxayJ9CFA)uEY)M(P=6FG#Djc!Z0bdy0V+TiovR=MoeEOkFFw2;AdY&)48@>> z2Sxik<+RJuOT1Ru*~wCmc5%Ij;v)Hi@X8nj=xvd^Tr}a--wg69_xUf@AKg zDzO82LYHvsUGJaKN3HZ3f$ZCi$NXsD=ie%ZVI{=kM5qba2luVm+^FA>z~l=Nn^dXm zHC}ZH)%1{z$nUM z>DyR`rxI$CXzi{OTN6uW!9tw1CPX>0Vtz~eS842-ECtFE4K{A1A~{m zBoYl`1>!0LJ@?QD*@0(l(8WI(;;CdU-honqlhQC$svJPdN}WNsVBwaL$K2|%VhvE6 zt4Z2^`sIf5BG+jqKOlnF1w_9Bxd)D%xQH-AZQ!{>^(|cOtFqwZ8YvTZ;Iwam~b}y@#pzocyh?N_ZWQGvP;IsGV&>hODh&tAE}w>zp6kOHA46)7tFPqJ`pAL@I(0JQx@%pNuCn8}aB{ zUkGX$|UU(nd9c1hjuoG;Rmv_XIaNaOl%LMuOA~jViUk72%Ow>V7sDi1 zU@hUtB|Fu}jaG>i#+Yil#~o{&H!WtIkVjz0^&6!eH>&BX(fP4{c=>9j!YQwCuXG5v zBpX3oRU?`amv=s;^~Py~C_RYYd(#4i=0!mar+~UR)N!!j0MYpEQb;AP2!Us?nZJHR zh8r^vUN*zNJWx(a;a&92^chF(L628FD6yaeaq@X7m;GAvcc6k!C=Tqc&JSKJExVD1 z_G0%x4fN~;Q;ThWh~JIgCgXg-VMccv#)OWta(2bQK#|U-SEbo7%u;HFs9whdyqf?&UrwH-X8h*0DKA>JP ze835ENm>7fY}g+lvIa0bY8axRfm~4-r*sgGW4ArozDIS%vGr3JD5r&tvVFni>VkN` zU(@qJkA){Pp+I9PlIuJb07URR)u)e519zKQ4}5!Yl|w$`>?Cu5lmmDRb+{-njnaEQ zM;#UYx*Mth4`y-mjb$^u`G$5O6|#re>qO)Jr8G_wx$How%%@>sS29EfXz=BG5l*tG z3Sg%Sty`3xo`u>kf~}E9 zU&cdOcf(~TWv1&T+O^$1+y3M;Zl|mvirkJ|G=<{kv!w8_RggrwY^@sV_^GL)@jZ)9 zwrx}Mwi^YHxT>DGJ8jsgZ9aXCvqVx(9IIfv92TU zNUu31$P*YU{%-ZxWYnX*wzKP&JRQTsgKgtj1ZQrU?CAJ!t7K&s+e=HXll|`)b<>h0 z8Jq&r;i8|Gn|fEplDt=G3VBa;8H{E2(iZIqxtpB1;Dzm%h0w?}q#rM#<(`teWlaeI zyfiBLDH!Q!tplxWw*l+bIgsv-pDRe7>YtH`3+-jCIF+BjLoE zsyOtk?%=4`^y)!&8pQ8EQ8eqjMMr;%4{jk8e;~h5Qc?O{sBSm=p#1m3@ASay zhFQ&q0ETuJV;=Pqi@u6X##{PQ<);Lh(8)U4JZYm(+iO0qRr~;(G^R(`<04comBc(Q zH|jW|=bnYM-8*rmvgN1D2#6Fa!pk^?gcoR>VqA)lgp;S=Ky(sHbi|)W4eXE<}Gz2%ZMZEEX`yd1cM979;T?I`%OAF$AfTbX&LwKA94_Ft( zS(F1D8R*s>LKHV{z-?33x#u#*_oHB*v%N?|NBe2WMnf54@kh(0eCY{gC^xe0M!eSw zq~lIdT-jD-?c?m){Va8J3nbi-vn4bgdd~q)qk@*0TktxS`|+EkdQu!16%q;Bz6nP< zL2Q^8-9xVE;$?O2xmR_r-f5!OH++O{K`Nzmoa(I}t5{T=&oaH7K*+6>v_DRj5_{BU zjIGacT6)ymlm9XppUbd#?_4!)fTZdEoMBX{!j!GL`dGzAuWDk(BV0gm&SKrFev0yE z#`_ZbDNKT-C{Gi-yGP_U?N*6ju`^ydQt<_^KD9ESas=pCK^Ey4h)?g|B~REkm_Y46 zZEY}EuKc3D_;RJzyvv}LXRf6zJ8hWiKXGC}QYFx2E)46&60Ruf8J$J(QI89*kc@W~ zuP!SCEbPlmFloF+<2RK_qBMfZ!TuccVHOG0u-1<}jmOnjcn zSq9J-D4Utjg^BP$6C2aZi-fs~#~3q3fd$Yb#C>LbzfmHP|?=T8DfZkTPL4Vkpn{a;8u&B1?DWP zNa$EPKWHdsnd8^NperL)llY2p{u$fr6jF43ucl)vYt6g!+bZ~aI%JKP9e0&sRJz7J z6I3ZTA-nou#bijoN?BiQ!g#Dwpaq%d3Mcv@@`GTVR7=@uOt4GGbb3sUZxDn#ZJ8z^_#45Jiw*pC? zPRH0fzQDAgLZ$p7yI=scH?#7prw@;UAzN!qq!|YG8!l`hp?o+*_fz9!iEJi>=xbd_ zWn_gVTXK{u17O*15GtrDxq;sm?*n<_oFsZRdaf436IvrBb9obfk#Il)s#86#L$3;u zh5+|-*ZO*md_n8*+_K2t{ItAb!nf9^HKaiAjz$34Ue2pZ z04n!aS!fot!(hO#R1Dw(8Kdd%Z4In&1z6XvC{D3t@u&p49c~9s`ftJ4e(jQkJ3Ju(Hh^G>M|Y4ZC{63WTwaO zcn9P3uBuqaRK1aB;fzb^?K%_Lq_yf(b=}NfmkfJco(I7^ciA~lT9Gh6NIy>3#OqZS zCoUb||F#iGV*u`l4T`j(d*Sb66q$zQgJD$ey<~l~s7}rDIFw{>_iJ;1RLxp=ksb1l zm!qH~v!kXXGikSvz9D1ePO+hngE8uV)hLf~EV?{KxA0YWB}f9sICsy*QH7ZwI)%a- z&(DQ>At)cvNc1ZMTAM@yBt29-_OOz4M%SL!M#J>k6r+?+uMqZ{d{uFBRIkPeX|i(*RHI;8kcYOkTjTYD!{7< z2U9qegu#+3UH|(OtB2U)VfTlVxa`M5vVU_UDjI1F72zieh2hJozhLD&!Va}v%(CMQ zH=AQl?zxy=^7yC@h~RKn6DW#j!Tc9Z*MzeB2irHrUi&J7Lf zdqmPGj#B6bPK|l+HtF&oq8f8*=wF1v!#7%7mI_I%q_*NG$@X_~rE%c)1x$oorEG8F zvlhN+NG;|6CL-M=O)ogO+z$!~@mJxWNG+RcHQ?`JHr2oROCFk%FI~4tZBK#m_TNQP zKnb{@jtc%Dw=ktGWgxW#+3)Os7ZYTq8HFyA+t>A9)H;1d6-jM?!bR!t;sEL!`(NqK zpY+jz`n{F`q(=M_dU5#=k+|jDbcIggmhNRg(BD_+|3`~e!sY=XLZQY_M+diRlnboIPcYLMkMJijNZfX zFB&=w52S3Dwxq&QhK>4(4#>?mGOLD0i7IwSiVkoe@G=chz_Y!ZMxsc3AgO!E(i^SC zi+Y^2 z*2^{NYmu((ut!y12eQ8**toneDBVsY;blB~bRk@zHBCX+PC;{9a%G+06X#mu`71m* z#^xQf@dOQZm0lrplOpt`3(74&C(WV1Cf@_u59Pp(z$6fkf~3G0ACzn+ov^F>kI}l8DfU#Bm+72Cix{+q z(eK|>!$#_tZ(~YGEH){8U?rUS!l!fmIDL1}vsdTYn2dJ9LuUF>Lmk4*26bU?gh`w= z+i{jx>vu+M^4KU9oQlY*o(gTUQ{IzCv#)CYa@V)fMYatWCSP`~(T zRQOJC;ptKTZZo83=c^}NHnA(ud1HaSc{ZY{5ykyipKmk#&Ac`f4Mbi`WKBvKl1*bZzQ(JrhriQ;51ul>Q_ zEjO6#JY`w{ij(&!>`_}bsqwm$q(AyA&K|j4DRbRn3h&at`y*E?ma`&q1CkUWiH?1p zrUTCsFdV}qy17>F|Dv9gp1(HN3|oF0>MXbYGOa`A+B#bH*}`*V|0!Y`(t(mLje%(q zS6#)ibJOxr8$V45Sfc$=*&zPtIU^t=q8JR@9(_RXy5)u~{BYP-(I`aC%j2?au6S8I?s5Jd*$Vh% zX$&2TM5 zd5=+gg+Q^Ynot>0ffe^riB*Y_LyILCo*>Rf=dZ?K2pBs^pjuV{{9Z zzUH%E{*!@p7NXwSP;kWgmka0ukmtSZ-(5JWXik?2#V!Ut;s%a;6;+G?UW%nuBjep` zW(9)!CdT_$nIX{e0na4CkJfPo9}iudiRyv!?nQ;1S3Y0)kQ$eeaqxs!^{N!qFfAl% z9<#5osL5bxZ6PO=jw0alP+N~hGZJuZ@ew9~P0KDp$#Ra}zQAK>;m~P|xfzRd)hf?Q zw~^q>Z-ijH8n!I|Ub!9(`!~xtEC&V}_NdS;#C?^$t7Agy3s_eY}=b6}fPOTwZV zfv!55^at9a_HFE~r4_cm-~vKrR_%ib&gU}?el2srEx1JWm+TIXMU|RRVR8F{Isd(w zOML5`v?O$xstSp870=91$w>9v^P>d1)*?v1fR(hX9!15eM<1Bxf8tF#@+S8XGxe3j zC3`h7UCWJSjg{}-hg8aR_#DS5;`R49sLZ}cf$VB*Q-*~B`7lEi88)1Io$skYw_S~_ zNhj&(p|j3yd)Q)SGP5=*PX(E7#c@6J)cJ{gxro!CQ@Gboq(Sxc4iTOG6&7n|e=d+;akB!!*Xv5&4AVJ&vix&yta(=c^agktSGlp+pCGCCExN;@g23^_n56mo-5qo;2r{s=0OH))$NbhG8wbU3PqF7S_7$jH_PJ-i+-D6@ToSo ztZXurono~}!L4|n_)m+jqO78>amRloTRJFZK#sVwK{3BDJM*?b1yrkVe`epyCP zSb1~dJu+Qn7fmppWsOB4jy{FPppE}!#R5T9&k<62UDk> z_yBx&nauy18@Uk$TQB zmVsH=bpb=vu0cwTs*t43(u3rSZBmAVcKYY`r50K+&@ zn3i5x8W-1Y@I)S?V6^Q`n?QQ>K}_6??Zywg^^b=M7cp69$>&=!gZcmpdW75~d}!G<|B589a=(?<3gW^91r@F)U z_bnc`s~{pIJDX2$SlNW`n7)m=YaD5^Y(bArhfLL-fjx#YTWP2fdW8dsfNnNTOJQF1 zU#*Wnb#Bi(c~H;RVqwZXk`V#;A6*Og$v7V&FRi{sU1E}(JN%@O0lJJ?hn#<7K4Sd3RC?$ZzCN_7gT5z~aw*JL)fW=NluHFn8l!Kob5u#f5eDsRp?IL++^$pe4h!pu}Cz+uaY1Ut<)`du(bX~5MJR_7; zQNgCVBdOH8-L6{v?4P3>yiFS9KL$z?JKhM-P>jAAj3Ax-a;O@ku1Ok8s1B?Ak9A25 zwp=GH_k$%t*4!0mONMKuJKoiC@QvnkvCaj~&Yo271i41!50A0~@4h0H9iQuIZSAFY zHRRBK7p9lT=Q-_P&jW)$Gw*xu)%_mHrDW$0a^KrcqCv#%nx;@UGd;7tY@2`Zyk*!6 zyk3Iru~D9k(_=@bj6Q|uOX5e0^KT#3&^yCSQ}su!`QDYnh(jMvz(->4j88U+#9Tb% zQ4yhhlZL;}luwqT=ozsJovnCW{FUxy66ju;tEQ=}apfH`FB5L9X93ooQ*I$ML$ol~Lo+Yi_bN7h&_x1IgKb`D+COa+!-T3Y^7#C| zNlv%Ok6~$3=JXA%eRdMwbvH26m@lI1{Y3ZxSr8;+vR#kn)y=x0-QLnOz){^tA(#6* zRiz{}G)ig9ninR$0ai749Z-s4C?++-sm9?g`Wvjkk6me+p?eVRE(&lPWQKQs6JC=Y=0q$RdHQ`vCa=s zYv)V0HpW}~IXx)sI*$xCqklyW!937O{BnDY-`A-{=~wT0U6T!2(Sv-A#MW-NLk3aW{aIJS6cp2b@tNvyi3ApaJD`SOIwY;r)SqQe5-N6hu-65 z@ElE<;$$e!a>?C@e{ckRV#HS?e%*%~96WoVB9*|}GnJ>!ZPg;S%qH(QTEBWj05+O-cp&PS6f z=^{0W23QZrYV*7Eh`>VQlvd?PFf}B9Kh7LDR}n`%)n`T5 zXX5il)iR5saENl^EBKAl9hxMeWx$~1y^!WE*PC_sVhStA>e_N|!^(}JZ)&n840N^` z4Zj@=(K09Lc^67+7rt*yee2I?E`(+^Yaf26);YjiPO5x)bl*GaWcFV8fr(zyR%4dB z{fxo+uKaO6f0qGcJKAWOLoZhYgYldP!ZXhjgXic6X&@6_WZfoyA>yIZO3zbjT$62G z_=bj^+$xzQ1Pj)a*Q+V>^S;{d%feG4)FZ=`4$;ZVbuYX^@oSfrZc`Ns-KO;JFKTG| zv$VtWVc+gpp2mTL>4(mtD0SmFoTUzKB&VZD!$-t$HUIt4@E;2ep!Qg|d`vV{C;WhO z_&0By$Pj+eU^AGkY@v;=%(PdAl)-H0(*+r>n;JEfe53Hh*E9{Fani5jiUpIYq_(L_7AEQ6B0NnQm zT$vieqX;zwe1#k|R6@#EAB9<$_{5W8V(kLc^F${(>0QjHxhqQ9hv-ujCCnCq!!Op* z08x$U221j9WoFCx2&Y1UUxlHHhWbxO*O2dm>V67|s-~v=bdV7AEZnJ&K>NvEWhia{ z*BthEUu8%=M|h2WZA$uD08-vT42rVzG3k)a-OEJ>PGJ}!PBnF zqI33$nimD0zlp5@P#cEe=9Ua}Lb`}L1Y7buBK`-w*aCK7-c|TnB9!>rRNea@;T})t zRZ)6ffFr{8cZV@H^0ml@s(qWZbIn;9HjBZknyxVYXBovJL$43=nMm%5cvaN3ymidm4! z?K~Q{YiKg_o}L}ezuM_GPq2Y!2jm1gguHeSUg%CNAC}5Mi##1kXj&&ElQjD_DbV$W zeoJr&;oa!)_Y)n;DvATltii8T=*YGECM~e^Ci>IhKDcLHN;$_P+?>jwz^bQq*CKbDgJqqH2KN!{v$h|7oqRy z0_r);PUw^ri^E2P& zV#X$wgsMK3e!kXxiSebu_9s$+d$OsY5R(3R^ehwCnRl_%%eS_#sg3@WmX0EM`IGJv zv<@HBRu{rBDTlNYp1hlrO~8`kK0SyHh3M(qXSM4|dWK{>qW+TvLi4Z`dceV@sgNb8 z=y0UTpw-<@9k%}}D8=~byZUhI-A#qg7T))pzh-r(1J3pfF1z!6B3!>%2)Y2J$h=&R zjIGLi!ZTGj8;P=fjpyi9|8UMZUl6mZvyor8U7v>igunf?l!Oj{cqGS>3)`~W0+V+J zbEJ>3sAA-FmhEDM*)2062{6XgF*djv*KL6j7XwsuaB4|?pTYWc4h3fa+1*`7fvyb$?O8$TC?HNVMILhB-d0t<&<(7(TY{O3c35YLnL+ z;T52uE){@@ty6Mj>|ej5VLP%kvOD$ww}G^8eVs_#?osDQD8%O2^_`tigm6;LUPt)~oSCEG;i(SW8=kczPrWM90V6Rp8lz)Xs?pVj>r`GH;Fz`B{i)KD z!bvY+5A$mI+=WatH~c%D2X_MV)SH(KTq5%6)z2kw;E&7A1b=74MTS$04q3&Sf(&8D zBly(g*StE3RkoUM39v^|MS036`&g*Eggf7F%im$r2^j(YksK+HYm{O%tVaL%jgRP$*XGJGw)P9+dhH)%mifEo>%ZAI^SxeUqiwdbMrKxN* z1GMuVQ;w6VrvBww%V%6|`5k6Y^6qa1Y1c|Wo+;TkeKwht)kT?i-_**IrrnYp&mmC0 z7AehT=#cG}tpyBIShC8_h}CSu$M%FA&+T_e^4~X8J$izfH3O9}G-umAhUXUT2RBnS zBzExuOFvSC-L6O@RNdt@tQ`z4a?wHlxijeT(4EgTq%rIvX*Q>>cf}K@iwD;YK$#k~ z4GUc}1|;M*zIi@P%?*>J1sbe6+dnP^Jp|2+>;KV~q&SQX%~>3{SMorU(6?i_qq5`g zpGLRPS8$sQ2rU?H(9|Ap74<~jWk}i-Gwy8tHGu2vt-4!B+3Vm@#JEDoycG?fws>2@ z8`zUXVg;_c>s2{$ITj_D-hAKu#Yq_ZVvO5BR%|{@6$N7~XqGce8RnmtjxAvRSPNYl zSuHX#O!>?4P-YGtv`o^^%q2e`7zD=TFD2^8h2! z|Ct1Ngzj4}sU2IadO;E29?!lMA^Kf+K4Sj*b?@zVx*uItujx+Z(T&-1^K{QJRY!sj z^++N1&F)UiXIOq1#l5l5qNPon(6f{G1QUJZ$?~|v*pNu&bY`oct~GJ9!$jF(9382W zPN`!_U?J?2+oYMaER#{)y`#9Re%rKp`7&nP5|wha<#YAy%>io4`FfS*@A!B1Z0L}& zn@P%?q#(%?(DZ`e#$Q(QHm8!u;0#S*j*f3fR2bgC7fb`s%y0dUuDL>jVSq!;JFi(_ zgn|3{@Fjv`!e~79OU{@TQYfwcFkc9@kn?6BQaO^_yevWri1=dTCG|}`qUeOy{rZQ* z8VZ4({idqcqr<9%3S4jsI=(i>&NavD@{3aLuqPu*I<(k2Cd0=l)9{z?MQJ75D@MIK zmaOCQxBCZsGCT57)Rns3Jx z{@!Z)Lv=Cw9XO?F=#Z`UOpd0Kao*+ri-Kdpr8%mQ`P2AOwP3otQlt0%hKqMW2f0FN z^xKsNz%JvjKe@h+J?RN=TuNpy4sO=VP1SsC3$rIh%bt$YBE8YyeiyNFNRMIj*FI!m z^_zHAVKdqkD0^XvE$upvH}W_mdzIU_yB_lDjzor?MM~$k^-ket#VTGIInO>GrQ3}| zO!vZ4VyzbF=7#>r6hEd`p-mt{aINtVA3bh)1^QfR?T$X{7Ew(N{^@{BIVu}9H*|4` z+FuP0du)*h@f<)dvu$eRrRW;x8S)n3C}iMrlZq_ncd2~99_|$!VPLqhJLL$EYl#$j zN^-@wyu2lvhPTuRZeh9_*>3MXXe&OM~?I@c;F~(qLjp%w0E+ae7zuTGp%st(_`qAcXm~IPuQR{UADQWXKhWj zm+A7|s`IQRr|a$juiCnYr>~tocur0{y^%+rer%7Hum@V&EeLPlu<{cosZ$Zz$^Hsq z`uMqw%Ltd5QTsBc;o2E9cKqPm@{taIRZ0K=2fEAblKz$m(!{P@~`>PuzmAuQ)H}~ z1%1zOu+g#!kpLlnjpXSc;&0#YGS&wN-;$o3oZOiY-0;R9tzDk)P;5rGRMGEHFkd-( zORX^lt#0}hK4s=vanEyPGG_Wu>+bE#Ggp)0C9jEMqI6iR*Y=g&hAGc}8I@ValR@(U zfhI{p^!utc`ws=aoUbNX3v&dE|KXUVAIYr`)I(03fF5E}mQ-BBj$eblmqD{um8nh) z8QaMfVgl4bF4ZA%caKf@yskwTRnSu0E8Cj>>4olq8uWq^dxo3zZ^1D$)Iq!RG~KW^ zjuh)ij@SaaGV^O$b?;E-_uKoJm$Z%VFI});(q6v2#)4wZbc*i{#9+Pq3bQ}@?XEHn z->O$+f%^6De5h!<``OdT4nql=3=&d5o}IK>h-vk;W}W&fV%)2LqTU1(F9!_;Dc$~% zB4N2>4is_yF}88t<*a&F{g3tPxO;?YxXYV@jOy|(SJ6@Vr*gLm60&Sx$Zjt4pM6?w zT*k5ul=meVCEd+;?a4Y2HcRVL0cym(t_P?ApLYjQ)GcJaeMReV$6S=G4tiiJpI>QU zr%)>q;A2@qRn)vSbIhIJ%_m_c3Gdtd7gH zt2N6Die#TLX$7wj;_+7(&we1Zeo8*V(zh5TU6N_BMbex|$Z}PGpfMr9v<{z^8DlK3 z+w9jeIHxP0F$0X9iI3Lik8n@ePu>vy~@216q@UJNrPNv3JgwCE>jsCpMhgM>ox3 z)gQkt|HuNs337}|DRG`}+b(h1Tt(~%F-s_W=hSh{iIa!zdfNvMl!;E!KgtxKs%6Lv z{;YN-h;L@Lixm%Rd9`a3G8+Aj*o5b(WGAHhZWy!Y8Jk{Ia%`^mXwJrh>>MmOXhX*)T(!d+<_(C>M(J7j(Yl}e^oqv zNTAIKONrGtoaKoxV_+G!N!l_EwiDZ$pr89H6Hqtn^uU`IDkDttjbRN>8QETsZ8lu% zZr)}ERAf(V-paQNRa_x`uP5-A*|_B$QzMJQ^${ww?V}esHBNw9b7aet>}3Uo*zQ+S)P8sWOY)73;Bym>(!au555)3IAcPuOL4wQQZ_@<#83ZeKMbPS`@Z(j zQ1JD)zPDlz{VJsP$J7A#?|&AbmiKhpeSvFZ&>B%Ijp012t@RccysD==DAV&>^LXdVW#R4G-a*3!r{4Lz7#h5x6 zHg!qN-F#E9efy8=9SSlF5?Z@02+|Q!JU|?`*S-fwxmSzqG{6$(m}m?geY+(Fp z-GuEc?p?!{+oe^%KGI8!l-S(Kafxh#!~9({nEqw2Y0(3^pPd=eHKU5PZ6o_d`IP31 z`a_z&axE?XboOj(I9fzM5lS79aI^AH+wzXR(Cx0OTP^iJyR7f*lNt8Hj|G)~(!r%V z+>cG7#K|3#98BNS=B9C|qe2Mz$mOf}UQ;6gA?vy&uUll|~m|8n|>HjOT! zt8}*Bn$KNcua5Ymj!!;ij`<@cv5(<4Jo`yf<`ctHetwSVOeRvuk2H@d9k6ZdRjTAD zd`u`W+mpb78io{Mej+a0mmf}4pLTd$;s`fiiz2%2`@{(=68H1Oa(mxO!`U0CriTB3d zwycnh|7FHcQXpK${r3CX+sEYXfb7Q3l&l^_d_dp0Ajjf33j9UWOGgb-oBACQAb-*U zxVg#4}n;fJk101Bo54~Q!u;3YSdoLcXF zT^)rATet6hiT|Z!Hpp7oJSr9X&m!HBaoYEPkYt@rua(fU|<{PYvD zzZIuSgS<};BO<0Q-fVs-Ur%~*?Qg}q|DV5HUWwQ6kPL0-#f@)JoghO`*s4tf!#QF;ydMRUPVsV_lgf8?u3?G12(!#4YEn@0*`2E zO5X5sSvKg6Qe`RExm?51GVa0oRvZmf*Ssa+ti392T07BCV%fARgWhc}vxZE2ZFtA&wWKi~Sn&fG`Q3!Zl%gDuw>|KEdUX z@`>*Kn^|sY?~Kfl!O~yo$8xaP8=a>gM=b#;hrxT6PR1Y8!y@?);LajH?)%71wJ6Zq zN2k)oqv=XWv(uA`k%K1K2IS4m4uLNv1UL4o?_q8UEu^36>s zI;sZrYC^`e;#)?`kz9pRU{OKrmj+W6BMXPiycujx;K3^_=`(*vD=-UN5%+sMSn~U0 zubri&Cf}~5cl}!a7Sjh`R>VGy&wMFg#`K4W_vNYy*MbesuOe~&dE?!hZMl2zmyYE> ztmpM!ue+AJ>96za1w)e!$p9Ok0nH$`ev*d|WaD;}_j{BZ1d`lA|J^t`Eg!G_THsup zzX94{Xp~`;X+Cd2Ka|lPYA(|*pv7Ha2x`86Q#McR)aJ@CK#4G|wv1Tmo3Ni~*ZNMk z8C&6vt;%8W5&3&$p0t(mGFILGE`Z^ZevumYm?X^1;u1om`NW_~f9uutf+win@@3=n zp?9yy9Sp_?ESs3|s&=yhBNoD%!FG@c(ED!FMI z{v)o&a?utyZr--xn~=o^9lgTXg-GyeGH8KQ$=eCYLEKXk<8X_x&D2g1D<8IVyu1w6 zPkWNecz=hS8nS=J)+^T9zMAKGb?N`G_ntvfb?eqB8IdR`IVn*xG&C6$5hMx*az>J5 zu#qMQB?vS@B-lhz0f|k{Ip-XjoDpb{oZ+ta4QKCj&)MfxeYXmi@D268&&m)BToZZu*M`91Wov0N|CPLQnA@?{7O0 znEA&Jpqs6F#!c#c+)G;MHD%6GfiBCp?dm5%iSu$Uava+Cn6ckSh?^kBzqYL>#K*;( zj+jspMdGK)P$!oJ_c_&W^d_52-+vLFXMt-6`)az)e>Xq%bTfX`lTfz!)#skx9%p4y z%quS@d}xF2n}o>#&;2~N455(XMI&&a%Z#PC;?}&%=pdrsGKKJu<$`{~<3=6To_pmq z?-ytG$ldlJG@Rr1ar>KXPEQp{ub|-(nFb(Vo`;%@P)V zP5r7D6OIv3J}CdSg(XA`(qzHx56L?B^n15>h>+9?d6*k)y1(Wq9C%bW*ri=VcJ~gO;$p-p< zoOPZ+M4o}{uZ#~V#XiJSj-5=O9!}rUNhMN)Ph!k{0uzqXioR-iY7Vt@<#Hek#vki{ zY;gR!Ky<8Pawlm(g3H^&G@DRSF~b>HNYT|JGfS0z)R%;SIP zYHIdc7EdGv|0&7zM}_@Ki}!2z<`T5N;Jr3B7zWS>iPkLmsmO)x`@Kl7)`xNoXb{ol zNq8yKAk&GwNZ;jk9sbRGnCCd$1_?D;UL769aVWgG2F|0T%*I$>MYVX&y8<}__LC_L zDsKAA=yMR~1-gf--5r1Ag+VuM_m-^!>$9sA&7Pk*d1Ip5r_F;RF8&>=&1n(q--D)G z0)Ncdb|j|hS{w8|l&ncZIU6nV{bICRq-_gk^qn(UtTqQF`&hltVZq#<3=RGUJ8CtX z;@cnXnrOK+wYRjMG4q=7;>f>5W+VDRl(IO}Vj%M8AkWv*q$x~l^*@+5n8R9^Bz|;&h{m%6lq_P|&M2~YJG~+ssRd+5+mLt2Ew_(okL4jSR47Nw33kwd=?AIOlNJ4uo zu?p*)cTNQa^~jd|XXxBtBfJdfW9(<;JI)SmqT)0S@ebav@wM^(Vt@U8$u z7>fy&M>3t;9!3X5*knmC#ckH&hs5iSNS9LsFFbj;4cSh6%&(^MZx5#MgjM3|dR`~;HF=3bo*0_fyoua&$9yl7zXlt{?>(2i3ktK0wN0XT*()c|!o=UzkNsWzPby6|& zuRE!u`RC?~T&w?POvb@jKNkm{X%pVS)KYlNjwKhTy1N>6SZe;`w4}|h>$&$LBxWuq z)8B<*#7r_nzmWEX_PEfDV5s2kgMu@xkqIV8)7hObj|esppB~yu<&d8H!T4VH;%MP} z@Y)ud!+(<}qqL7F#I;x5Sz2nfwAG~+3kCllmj=Bp2m6UV4rj~^fRqf%Y42Jp|m_e1RN&uK6` zEq7go`FPAa>~#!3GP+kRvFai6B8R=^I# zpi|Ejeb=Y_G_~l_V&N})_J*)Q+ob!)C3ni^up34-cVli33=LY-+n07PV;>+g02*ax z^E*iVN&=mw z%TlbGFE=x7EW$_bHog~Jrm)C^`g}?;mTr{QBvKxIm_GItUvH{!GgycsVZR|7`425F z=5h4r;?4BQ4g7FvqXeu#)Yn~Ox5wV)O?pY$%8Nq7F3^&$cPzi4+r^Cz8YEAwlw?`u z>;djNbV5ANY0X{kHYUdgr*R~%0*79FE4GHcJ=W_zt5;-iTPu1?qEF|d!h7+`>-4AZ z!3cL6+2z`!-A`$!^s3Guawwz{S3D)0sH{Ey4pUgNE;bYcbOX4B+jK_-QTgL0{x2BeD8z#e$k3mg`rg z3mfO>cSA@m*`x&gJ3Zpm-=_m{4xcFmF!qn(-aZ>B!Wx@TXJ$OPhx}KfmY+u-X0R;Lsd@5XS;Zb2`NnN0ji7WCd z&Q<1DGG=10bxUc%Y2{>&@aN0E&}WL4Om3#=%~TRqbV+RAd!y=}sa{ub+=bm4|Uu^LWylIfddaAgg|Cumsn1iNJ|rh1oH5v4wt{ zN{{%&ZP#f%N}w6J=HQv%UN5{?sY&yjlBo_d#p-5Q2hIZKtttW`qxECS`23^x`HVsbh8 zeba{bKU`lt>xAu%)qly_=Ko@b4N&kY#1*m+%Yr`S&rIO2*)e!YZgdrNGLzuuMy6GM zZ#&*En>sr}@nIBwh%fq%3VK6*1XL-=< z#^ny;;NnTc*>YCylnIZ~z{DCRfIzV~mzyH8^QcD27kIzj(g0i3lsN6z+@3KUVAQ^YweR0Nc{iK5FFgEZvM^3#4SfXKnsPAF!oxRYY}FX+T<_rpExZj$G9`blGM2M*716oqTC{(WXqVD z24Pg9i=}9qpzi}f6YN3ldU6WhC#Y9TkRDC;xc+CVU>W?;Z-TnZxahcBWujA^<}Lbk z%P8A;r{KAkpuq6{Yv#5mG2x<}3P>OB!igu-Km$`Lfb)rNF`VZAh3*=t$E)OIn2+zr zTR;Dpv&Q26=DPKvkx`(}rwkqd-s#W{+Su2)M~2aB*Sm%5Snb$9Ob>)eV@h9tsAJix z^|a@{%sF1{KPGCBb`UrwA0MeM|4kz3IzNYN((!{gEFOUoFfpnBdKiIDp~o#`bh2CM zldxM?g)33H1Qw%IPE_ zyf{t4)@{&~FfbW=Ie>0Ti19kS2K`XK5IVL2xTOD%RWPcrMo)d02IM#d{o>aEO%7X| zL~`yx!#}VzO*qWD|2<1XGZT=gX)C;K+8QI^;r+6c;X%aZF6HVapM)^MccMHXMNxI= zKC?c^fApcBLwPdyr2^5krV0Vf6H)FrLi|k|LVuAVNXx=oj=RK=Uk|R8$Uoti^9Dm`JssCVXLeqaUHV)N7n#&uvy+KP&1bIq$1o(c*A_tNtB|1%~pqCnPyu$Kd5pT|O z`B$IQ)QhYZN?APMMe|_Cnl|NZA7~}=B5r+Exn~7B!qwdNTrcNzuqpTXNSHv=zhSPF zo9B(ab#g1pe9Yx1-75=-R-GJbw%BEgY5dcnveTL;`7&nvw5OVJ?;oUz54gB=$}Cy= z0Yz}@f6bKM{!m)?F4;hSOkXocKoc^GN6iFMQ4S{D;uY(ve!+9Cq> zy<7Za2U!ajCn?Ylkezd7CKU_wDT25;=Kkiyw5Gc)HS(u;*0{EB{r69K+y?9#;}&TK z<+^#Q)g1%s=OMFgoeraiwI%ICnuB)82#QJ+3JqAWhL!@^AAP&1;<5JbY0 zim+aw?L*rP299Q9x6B2jHIS-urYsF4l2AWtlE2EPX;dUZ@=HFa2Moe%o>=aHW<>8_ z2aDWW-0F8~455xmJl`h2FY1@4mvS%kr<4wm=jF zYiCXro%lZ*y#B%KrSg|>7MQQKgU8LvA^<@F@|GmBTHQ5>ofiaaN0hAtO`KKD~ks}o7{>=e2V?cr=cRGvRYT+;HZ5TG6)dtQATf(C(~5^ zz$SS2YmEq4=9}swVBgQ0ga_V$Jz|V@ zNFp#u5g+MBKu-#$hL@bBC?n4dnhQP)CP@(%P=s;2HR@cD(MN*1CBk^6C|ntcWrkr} zhBH>Sd$}!DK}_7yPd3=XDfK%(>a4R|vTA;#(3DG#ig9~3PD|GbDp90MQmIV77NKqz zsL%}PmLkgQ5q&g?AvoBhG`xBYzX;+GIMReKviH*kvv%rJ`1~k{pBuotzTlqA1yRc6 znFZ^&_g+LNOoi(j??;J_^g$o$G3CBwV5E+Z=i0t&l@S4o7Sy`fJ+(dZGSw-~Y+rKc zVLYgeGk!dyZC3I6DI=UJvX!^m1klgG+7s6dr&Ja5pSt^4SYJ4Qz+Wj#H7IY31J%4Uc1iTvnTdcP(4fVa0zbfpj(;x;m6wc%U({Xb)2(8 z3gIu^r%uwtdZg`(_l<`RE`MZopG7IIwwmvnFqrQ`#jkpTOO1_`-;Wy0OHl*tk&p11 zs{UbwT>ml)8K70qmB&&(SKp+2=?NIR-0~2B$T!xtB>K!&2xjh7jidcGBOqf6BMU5h zP8OHHupV2vtfg`{Z#-A-OQ2NiTGbs$7 zC@9G($2R07xhq=a{Emrn|Y zQj8Td7EIgdtjaXk7;B8*_iCyIb7jdrVb$_VqsPuN0`;uGIPP{Zl9x0(FP>E?+>Rio z%Y8Xrr#w+Pz!un;cbs?_4=ZzLcXK$$Ll>%uq1Hxhl%z;pK-4NuZ$*EvQ8#ilvbm|C z#E?{W7<>`g-$JSgY%-eP0H;l*nv)XEveFl2j+{4ey@2i_F!(Ht-s?*)+gc9bAr%|A zZhQ|$3ch68ESZET+bzs7TG4R>-^Q{wqZ{}~Md9WY?s6jVB~McPa2vLvD)1!jhlisc z!s~^_u}gb-=5FdTx(SEjs4hOQdtpNN}8&=IE`ww%+?`xJ_f&kdUrkz zS?OFW6Y{#fT-`Q*1t1H_RAQDF<^H-B`p-3`%Pxg?@`&fatVyLF{>p*+j3a zRJVl$t@n3^nd%Sj>M~epDQ#TO;9Q~HP>0Hc$(GyXYB45qzTDa&;JrNVDTh~fmwCM2 zzYogv#13)uAHw^pRau2xb;#7Y54)7|hO5S>FP`i@dB^0rV(2^O+|(Ar6Ev{K6!EP$ zOZwqdKxv|K^$9uK%7@7?aX?%esUis-ycemG`%&A7wF1l4(>>)xb5wId(PVC|%4hV$ z!t(u)((dj$$}kpBV2AU1ZI)c)!rv!A$uX8<6VVrRu(F;gC5u=Ewg)8%0$U}P9k_N? zs`?9Pr$E6>$3)b(^AMgtg``uzg`|)EevEg-2`eMPx&8Oie52rr7jdFnFfn?d)#YT~Vn^gb6e~eH^61FR98!7G`Rw=% zTA|3es4`&>VnNZ*y#jOuz;Dj)BR^dFuyJ~0?bsr&l{e0!0p+B|8iR>!l|yM}Lu$5a z*17jrp~yTpkyC55^#v(z^qG2iv4kMMH9eWwh4X>ets7%PeKuHc8E=Q-#0AyWyPt{} z?kGFq3a>q(XM{G~N9}wwza2IsURKPmUl}S(do-zM9ye~!6UY9ZYjn!RT(<7}`D9{zCaqu{!KVRwN>iZtm@9|gQ!rwT6~ zl2Q?1c6@Gxw=b@>%G4`|2u{jS4L@aM00xQ@AyR;d6OBCq)=JX!wa(1 zPlx4TMJXINU=C+Yl3VWb=j+byFXuxA%>N zfM0y4_wpHFZAlVs96dAuq zo9?t_eP|Wm7gb84nYobsT2IY!uu`cZx3@gHeefg+OSj=Z`8PL|sGV&e&UmqDd~XR7 z@`{LWjOAqcZHR=-0CB=%)?(1@0 zwV+S~X#VPYT|@Y0Lg90)J)HPDDgnoCoN=iCp@HUvrUgqsGBLuS8A-oOc2d=g@na?{ z93xajzbCt#4HgJIr(JkL;d{kqhx(xvcQBc^41zBe7d=u4@;#sKF(=# zbq??zXX%Tu29fMu*?m-~$Y)E_$oyy+m2+L0UO1Z1W(;^$cxCkM`5*EhN&P~Xc8Rnf zGT;fz@%seteL*R>DL8adxw%9Xt~Wv+{*;bwr+c!mQ{Rc3RzGd*7pK`#Y##q`P>pf& zJj_L+s>~d|DK$|xmi10jaq)HsHcWT3$%x9NcCde_bQp#rTf-jnXin%Ynd64|t58gDF8Z%Eb`oI(Kz7xoZ3=|4T;M1b(qREkoT|LA1&m#Z@tg+U~4 zF61QomJ-V{ww-v_o1vSiWm*@7nCIwK^LdD*MZAkS5NNgVkfh>ciIw^=Hy_es95jC> zzPG9u5XvN_AbN(|?257I_@w+uC{jeDa$g-@$5KG8-+l8}U|MCpUtRv7mFLM{)ccQA z9A8)qO%#3-8zOPi?X4+9Xry7EY4HG%ALY533;jNVLk(D9p@35N12uvJPa4nd^pj5= z#v(|*?yaK(CGvAETuhXYDeMmI`7x#Mp}6@jjR0d^H`-n>Z93HOnt@41t?Sd~H=yH{~b4(X5#|A#t?jm65qTjD>~wm1|wx z>Fj1U&n)$%CPc80=jkyd^Py=tW*E1jX9YGD`#M;rgJ7)vy)_j3CyA=DeGhNn{#T07 zO=HaxkI}Q;jSF$|cKk8nU_AiQzGM=jt5g`@2?3ea_W~luk_{|oZtgi`cOZwe0a};V z76Rjy;#6iJK6_cVQ-0*owg`NJ@a)oICvT7+;$(!=o;|&6_Gt*Z0HuZ%A2+pCb33cc z!Cb!0{OaePNKj7E?`4!cUq$sn?Zj)s z2edlRd_11S;_Ut}&iQ|vJ|vwtmb)KdZEhbQV<@#);r~y1h(Gg7|0hA?KLlz1&R+cw ze*6#n3QPU(SW-Vf;JSr>%|!gWB2F`0IVxmk7uE7=7K~@~ue+G9kf|w;7I&|j7W?~; z$K(6UNt0k#cUmL!BHL!`ov0y)M>egH#`-LGXa8Yz|^_G-A5L9on2xlK4(5+`Z@oF;_qyUe`rk8zr+Z*5I*PAj|SkTSghzykrxnH*eS%JY_Jvkcs()OJJ= zP*AwQC&|VoMx5_&O6Uo!Tdf*)m2JvAqR5o91of6Gy1Zb^G1LE8qugP~I#o|6L_(hl zXk=^xx_KHtNQv>FewnP+Pt*kC>TNt@Ds?AhE}-WCg;$*9;jfI*0~*XGuV`uSKfna? z#4I_nJhtEur${$sslU3S{)al(F=hJZz^twQPvcrD-?|n-f5?o<7(WtGo>JFOU;-aZ zk$HU?infQ8OuYnjja}|fbikWsm$%vtZ6mseY z4Wbc4v4)_)l=CWptZ|5=au;v&Mm5RhuraGE1(WE0*7a410Pu@o+Qn2+xq(6|S&s|UvQmMp!JRChGq zvzeR_Uo6SY*k`Y(y4m_=9bc&U$o9h~e+*-i{z467YQy;07FIkO=`ZRTVANuKP+bgvB9-)|oKaDAh<8FQ}U%`ZL zZ(GQ7^Y~uXNi7G^#8>kk*d-hLg6n;&8gUsR_@H%b2uBzb`uTqp_}8`Eciz>Rjs{Lm zJ`*}M(#4=gwnB+60Xn$3L6C127@>M>Q~QYA%Fh-wi?q`5>WhG9?`%X?^lhnYbcwxA zI(qY_4@Vm!czY3R17B!mxlG--posfnBVL*r{rg1mcg4LLO>27mD|A}eVX6?oQ^9|* z$zJgRgAPOYy*fB>Hfy_ApGi8?%!hY|$0T6Wz)vlg(+=xs=_DT8w*1x|y1RW5=qboI z1(6$C7S5&ORfu><}0JLF$`#=u>pEy8`4! zU>h%_TqFSCIVB!={e0H&#(jx=-c)I$2l$Y!kY0jqR~I>eO_o7|&BsKMs*bgR$))55 z8A2qtGo=}lg&Wx#L2tb{b+5s<#%%>g>upLNHXEpz9AWPoV9=&u`+11u_ppc>NV=gl zmG(q#AZ21#UxI}0sVV*MA2(_rv%FokUlscX#{gEYW;A|ME4$Ft5hbXY1>;g#KWp-a zv%^+V;-DKb{2?Hb5*LNbI6qkiXAFF}^^{Wf==4)S4u40wuM`(h^xI$A?P{ljC{r;< zb&|&Fs4P=3Aq<#-K3Y;7+}G6J=q1ntWPvap{}hz%p3NNO_q^Wj7Sl^Qy4V1YWH)%; zRAhge{DQ4fHo}cxPHMXi@s7UPY%Rk{+J{-V)OOxI)#AsocVNu!yoSK({_@+cFr~&? zDKU5qe#WsM?ac9pV(M(44Mvb_OHw+E)b91L0vqi=8dnG@HZSOXF=aDihqt1esupKA z-{K7Ti{!W!q;xMPcC-l`#9zHw1UY+bl_zTPIE>;xpfumL(L{#(5%4`UR16T5wUy=| zl?HZH%rVW3vO4Igf~;cNR_<$UI?$Z7NmAPc*+A1bGqfe`boMa@=pnuerFIi%w9p6=?3o5mnyl(+$m-> z53?{<(qLe2e9sMU7#ffWi8t1dG$hFPTcr=JzDFwd&A#>pWsL=)C+eLTKDx$NA1-LL za^7h+(YB?3s82g-GcP?+u_=8kkde3&7UegQ!vBSF+S^0v#j7*DsTH9w{N6tkgT-ip zWk%=L4zTysk*)nZ1;;V2NYvW zt#=NGW5FIgyx8(ZD*4VZ8ET5PAv~ew3+`|iBK6PnLZsTCo5#FtG$9~JM!~Q#BmO&Ew>Xk&3w^hIRC+QLUc&}z9spWp0mPvG#Dw1ql3~q4e z@iE%^&`POxozX8v3!Esr%G0BynyTy6Ui`G})si*Xe5a*C5^`jXt(++OG&^1*+`mq& zUFwb87I*j_G5QSKqrp;PSx*onBPiw$v3$fEKq^o;XmzRD(1|p!F~Jy`Obo6IwqG*t zL!=G8&9M!q>K_FpkFY-^k0!6Ab5nXC(>ERmJ&&I$PmGdPT%7H|A{i4djz%hk@3H9+ zl$Y9%-ipM?{rN!?o@irbuCc)(*cQg$f`|r6mw7GKh}po>-EmEyxxrOXEvxF!P3W6b z9-t05OeAl#>v$OvwJ&nC=Oe3mYLSbx>^`Qp|DvDDE_xQ&I# zlWgeAz^j;f4{ze2w03QC-jFM!7DBPd(=(*X3*c6#*TMd!%amP%RBL%SR2yFzLJ%c(2agiNv!}N2zS05Q zU$`7hH*qA=kW>~ge{&P@R@MGgOBmhQscI7)i}}|*4h-FLjMQ_ob44DtKZF0F$6v=5 zM_JLy@)UpDoAWQ5XYkD{?)@@VCk6_goICF$!X4Yj*hTapEvJRumX^pw_sqM_ufjN% zz{=C4zejr|M z`B@n81}(WtLb(h!mv0|gol<`KKK``4IRT7`_xiLHN85{Cz~XZZ>u_fgQcCe>YYP6A zY{h^A{>|#aA5~itY-PV3+iFex%c^R~E9?U-y$Qxq+}`8j?;idsI{eZw;&91MB>+kp zjUJoBlJvn@4wu@ph63}N%)xaawMDN|;mW!VlbTO}@Yo@MG|&|Mb34mI!{Jph4P4FM zIYErC1|J?`R(sr@R?IT2xqA<%HhiKoQ?_kRu@&fX70%cT0SS-q(fnqGD$iIQ0%P2Z z)Z-7cYN5LP!lm!i07CPFvvfuj51SHhn-vxWb?cCtF1Cz1j36I}RmAQ)9Z7!R=leHOMRzZ7r=o^P`(0g!%RyX+*VA7ISH?;!hC8=VD4p8nxT~=^8AVz43Slz=9S5EvR+%_v=0h&)36o8u z@xaG%8lm>1^yG)LOrp?i*Xw8L2OkC13WxTKU0t!vAX1*Gx4N;FydBCeu|%^+bV;QP zPWt&jZE-qYa(~b|PJ!VZijyDXj&TR;DcaMu@qx}oHfMgY^pWP;!UW;jWy4NO#u#12 zGH@QCVFC*qvLv5AWGn<0VtR&MftK^ghHClqv#zCIZHtM1V!VYZ;(TObwr(7NN>-#$ zoo`y_B8l^jFm>2ef88$%8B$rUFTF29E&|o$^L9lGfG2wPLGv`|1yn4ZD#~xN5PWm7{ z11rNPZeE6v-=}!}l5r=dg$=;S=H80l4r{YMe={3A59&BIcUdqOa{Mp>+~X8#VNAd! z5GTn|;dF;W=aUjAgFEW8xBjmK@sJsFa_xtVPRdgr+47H0oIx7-{?OE9vUWgcL}R@+ z8UDevuu{Y=&3+gol%)~fMZqzmAE`TA@jjE9SkboRFGE#1e-;x#Lmmjae==rix-~16?M2G_;)!tZ$a%J@{r9@MQ;_=T+R-)&A2ys~3>=Lt7++{FH=5eu zuN@oP%{F84Lb4c(8{3(;>7}o60`kC>6eFWKnYn}7$BD0r?3YV- z7cY5Tz}{KAC~8FC>{Ngp{>$2zW6Vvn)#GJD^&b3JUZGRiwq=dEY9*SEd0wd$4clMx zyhhF_tl9u(raT83j*LY#^1~rhS0PJx%M%fYl&{VwumP%r<`rt4BNFx()4^O!lk0e7 zY8S`PLMh?bQqAgl^}`7fci}p9|DSmsVY)>z6p})bL~CCCE2*x`gPG|JU4A*M%262 z+5T%be{0{`Sz=hT2})sL_}(n~pgB*^ouB!uEl-?!M{egGf4%vT2~p9Yjvt!Y&`D^n z4!J@>0aqn<7D9HmskSDJQ8nQHBfwY;~z1)|^fL;OAaf&N%*jk53Cx)#GI zT>9js@WQfavhkPk+IH2JvMt$&Zp-g>+~qWOL;5vw?t%T^k`T)$9d-&CG%Rto+qz?K zm4*+k^(3 zac93nBh6eSibSYFZ)m{4U`9*zn*#9|EB$;zL_e zZVrEr=MPTggATp0_ElDM9m<9AHI;9B+q6qQ!xZiyx(RgOnqBONQR- z(%JSbrL~t&eoq(nHIspL2Gtm?Kf#Sni1L^DIT0&`Sd)nEF69X#tR=3cfjA%#FS5>z zTg8cuZh)ummEBQUtLb6|;48aVa|p9gmx4MOh){BrY=rDD>>=EZ@PNPwjfmD;mY1ow zpG(k&_CAI)XT)LLRS&&KIX?EbUlfC4_1LXLMjcHrd?nyJ5w6?*EGZM4OGPzqTl9#( zhWN&+N3`KN@lILREeDKe3M()+JQH$dTYY=hHWM=iH#oefG&VY|y%+*u{*qk36jNIh zK1WB##MvLV8r5$>?F%BRzt&Wyu>l+Em}IfOW4aI>=0JhL8uE-{t&I;(V?SOHi7c=)(Ez%aGji;rm#I3s7%cIUVb_c}-6 zZ+!!hwM%+Hns0N++3!6q?or;iqnE{TiIBP3te*=?&d@wH^c=iW4_&En1&;v_LX zp#5&$_9>!O<_O4iSUvIJVf--MV&f0#bq>{;4jl#(y%bDAXAHgiI~)M$_+44zxM_@- z7GH1z=es&xBY9MJ{PYwlb*5lhYbAxju^gfzl7JdIszaI;xat)OZ~pHQ1NsIw^nGin zmS&FUf9M@QvNH7J{5x({%ZT5ZtM~OeKjLMt81Va#76_1VAKCMNTUseD#mVKI?y0O# z`&V>cDVHnFIcW)eLF82oYh}?y2OA0N)mi)xm5crU`hvsHwo|G>yK`E}s|`}rOxurg z6}67WnKmWv*dl+ZY)wAbt?$%5%4|TVmG-|l0s>iOQ|mAD%s|VAAtGH}cw1O@d6#W2 zwe4pNVF^}RZb}Ry<`(V7T>I;%yijksIbB`bIr?c&_|Zrdz+#e^MJ#)D1?wLTXGwbU zY;R;OxDFpPT1=TJdCOOHw@6*X5N5BufU&4J#nH{pNpanAAJFGGW(fQb!v95 zb}-a^g}S|OX{aTEQdHIH@^U|Z?r`;Br{DORvP%IgW;4Nf`S(!CF|DdeuJT0(Zc|jz zaT)oI1tSdRP?*dG!}JO5XG5(gPPUd{bF()F&rjKua`IbyQsSsGH?ZHn4hGj> zoTh%Gzf^3y>(~aQdgbQVnr#&<-V__!v99?HlRyj&&vnr-fqhhrQWJZHk9!Ae%PpQp z)FvZNgnR7U3OAa0&uu0i&;K+>SO7_x{c5wGZ0c4*)aIlrKyIC;lkPAe{FjeWH@G{= zHV(+-)Yff3YNx3`NjHj|`_Zh!*(@d&?=rKh2jqE83p5_KuNBm<^_^|*_`V_)Q>nK2 zGQa4q@=L#KfRbj7dF`q5M9D%PNiMiS*nv;ha7rSEm0$T}(S!I}m^9;(7&$p#OiX>) zl^P~7sy&g2-?!vgbY{_?d-5u=jq1eV09$0X=XBi#EDtPWWOk07lZE4Jv z-5omX*j&`N+l-MC`mMb@{eC+}?TF66NMGs6!t?UH!LniVl8sk(td*>d@5Pk^ zP@lui>86Dn4@N7WB=Q6KL@S(P!j&DFFfEfMWrP&c{nI>6O$$l)bJ^`-u}(ncer2V1z`viDZeuSk@8O|uvpx3En**(n2Fp6zgWqol%_QJ2H5;xYRSgJpD~nU|s={H?t#wsA zb1HFjULTUvWw%U=>&q@<$`?y|a}N(o6)$wg-iF@V?0%FvrwJuySV8US=#HkguS(I^ zvpLF*Ti%NonOU5hpPeere9=0o-5 ztmOqv3J;i1AlfwMHIS&3qQO5drYNUuy!q-A+0tl?PM$;$%b5)`GL6LIL_RU48}0Zi zS95A4Sh{Y$lSC8Bx9Q}+sX&>yQ3P~aP^C9-Yt_}cwXrMhxx6EdU+HdEKK8$D++PN_ zKhRy-w0?eaOfKnjx<8jpTn{2`3$7k0r!n2UTlkV{ee&a@E!o-bikd=4{_W)54!CoD z@3O%-V_iM6YtL+9@?D`NNO7?h^0nJOH)&)}!HO{uSga|}&yPoDs|EMPYmeLIAMkQfGQZvvrNA`j`=-8#c^*xt@hihU5dTEn@v-C)ded^2SBE80|hl zKG9E9%tN7b^_`Y*sFDQ422y=_`aX+KFSg_aZdBzm0GE&AN zlkZcrE2By$Ek)hR&`-r@!zFvp(nFCoQV2`k$zNA96Q0E5hm5mRG+~iFKpNbdx6Wk$ z2*{X57S?f}K}`lEI(4VN7h?VRh9;yadii!&sj9R>j{0+? zznCITo5woNc#Qv3^{?G|`>nQ_GpYgIVq1IjFKHrKJHv#gIx^}bWV~K5>OfQ*AmMC_ z&9DN`>(s_|bmeFQnmDB8&q?dOXcd9y>(0n`xm)R^MR7;ljs~4@3fuGo zMbww-qv-D9U>K?SCG2s}OJm~~ioH5E2cm5C2+}BRjxqVZeF(B5f3sm$ze~_|3OV(# z1Gw%*7(ofuZ4I{X$(oGJ0~G}=4mXBWu3FT=JY=+MkM*S7x<#4x$h(#WL(mcM_n@o< zd`e=v8(%5=zD={Y9h5rYS3aA~!TztqPN$b-eb!c$`zDE$0>VMw&lHz=Xu>*mh58PZ z>hD?oryx`MxsL1o)2Y-xSsUh57;wvvRj1f62RghRk&wi;q?&rb?GpEww*qK@#jqC0 zIV>;p*nrUge*f`*_z{?(-#7C#b|3G|$<}+HQ&JdMKlrDdE5+4FCRrg%bk0>k#ceXD z8)kU_bx-V%^p5|fTIGKkzXkC8XQx-EALu;JpWqV8%sPE^Ooe6#PWYzH5J?F6*46ve z)m^J3fSvIvR$=(@K8x9alheZ02LRtXM|!yC}W2jRo?J@;jK-rHIxPWu|S9sg&+uJ;us?_Vc&Jt%1` z4lpnd1az(~1o`9rrnZ^@RGP z+p}PoXw26%>%p!ijg<~8YvX40&GWzj%lpgAy_&r}J-e}*h8!xLot&3jv*U$IS_Ac> z_W2q`?9*%uU()>`5SuK`d)|Sq!x$@?h2J(NhN)>(p`AF#PFHxG=`Z06!*RB0z|$5Z z%@~dFe%z~#@m!RPsV>%ju~bmcoMIdh(ojZF>ldi(&KR~UBo}c=o$z`-P@#0@-zueP z2|j`EkWPI^JgwTz7&>||fzqbcm2{n}>KoPUceI1Mt{+At9IqX6A;WKhfKyCByO^gl zRFb)LZ+!3_gVouTYeAc+eia+Zdm!cX33EEpT=-CDmQgZm+A?h=)+KQ+Z7Kc1ppb|~ z3N^O36Uixyx!0#kV_q>xz$S~GS5f1YmoKPs(AhG(8w(dpRilrclTXzLbx3GEnkV3Y z0}i2lL0{5F5CZq_aN1zKYX!RUyVj5+iw~x90w0Jz@AtvYtHQl=`YYaFt(HHv0#6TI z+|O88*S*Ry{aIuqHu;I_H#K?+opRcsv7RZ%W@LSPJfB8;u#}PfY3=0&#z_$jtH>=+ zOoG~6*D34|g8QKc;!suR44m(G0 zW|^&3GQ2m@a!sJNvRci8MAQZ_(>exX;}jj?@ZD19<2zDP2M_d!7XI7;q$-(}!S& z3y4n{@Yj zmy&zsS;YoJ_qv)$6etXwYlkaYQ}qCq`gRv+3#ZgX_#+Koo_7ZZMD-Dk%g)72d#@(c~$iKH9VR* zA1f+b`*(LkHUX_9tNXeA>b0_hUC#1dE%)=Q#$Of;kEx|c-ryXV`nSAPIDFgJReh3G3)=t z01&pwwRqVC`@{?sK|Wiv*zZoh_9XD|f$ZT>U4X@w&!>t8v-esTUZxE|bgQ{JDl}wn zhX^=)cC=0MlfwAnqge*R$3+h?{f5{etQ0kdNmnpwQ|cgh6*OdpUdivvm25_su8w?4tU}4FHD)bhn!k-cd;rpmc;R7;$)KU#sT4QK7#HwDaUuQfP7JO2WpMd@{B>f)nL1f-HUb{A=*Z3jEj!-q+QFzYV+a;s(#(dyRY z;H2^DhyttDo(=1>bMb@KSEzCfC9?6ylulogVMwD*|I%loJafBwxfD!sJ3yq69%*3) zY58NxT*nXbc@nsnD*mV%Ax39cAZ~}aZ#EhAF^eh@krxxZt2zg`l7SjD&plPFz^Ld7 zwFn>0YMyA<~V_vT8>J-QFz#_FG zH;|xt&-zl0ugg}6C%F(yFvx*$!_oeyEAQG+&jr`Q1wlR%T0=wvcer4~i6}pWt2?>J zX5>M9JW)9Qtow(T(j%wlwiQa>sKbY+7cchaUodbm|Be|YHb^MwI7CZzTfSk^-F{V< zMSc0@*Z)J^dxk~1WbMBa1Q8Sg1(7IGi2~XL2_gz2K~W?J$wGG%1<42~L6U$Zn;@c~ z7l zx`tZ5M0q+383-kUeh$xpRtof%Vx({I$eq$|kem8xU{DaSPY6vGvZCW1T!SYB#iYWP zonTY1tMzUPjNXn5g)wF|^xef~olNK4>tCl!m|%m6P~Q0$*s{^v!41=-n7iX_$l}fN zttO<{bFm|}8ffez0p9g$aU^ZA!(3^p7^|Ceb^ zfczk~yJXAGZ7%yi5biU;t6HHWu#>adH5}b>u2VUGFx%gETkR4g23a6Gh>r}n^f2@alJ1;E})vG=3 z3;u+()|avB1IklCVOv)-Y!pY(2Td!GOwH7;XS7r(6wKXpTJWz&zIFuH^I8(=_&i6c zr*SJ%kLQ3=2;(8MV!;EoGlvzdo0^<=3UxbEf@hglfXn4<$PLL46@GcqDdgpM)}9rZ zeine^&hX?+S$6U=)Y2`PY{wD{oe>;?A>h_U&c*to8U=QSnJZ&o4Wgeg=9=>{h>Fgw zgYwZqvRHOT**E^O7fp}t=(mc{a(yfz(>I%7=!HN>1lc!q+-r>?vT!^#U2UeJ&stJJ zduG^Oh<)iDV4a#{`E~dGfY3sQSPwxq(-1^)4iRW?WhXxWN)Paf=YNrI+J8TMU(EZt zT}%(g^+@=t_7@t>2V%{)aI>b=dk^yzxj#PEV0bEa8`5!=ZqZHv?m8YU+#)1q=QF>! zzQ{k!2T-lXyK|Mup~4G6hU_;U(BsCu1)5x9PDMhnw?@7|tBHdaaG2|+lx{C#PKV$0 z^dqMLxAWUcGaIiRsF-VD(cHvaC-CK9afm#)kVm$8C<$nV-ugLW(F2IibBE>k; z?YKS_CW77c9>a^-7k!3oB*qIbBo%IN#|4TJrb9%EtvzIn`8;oafQW1>Yh=4Hdl`Gs zCeg+>YoHI)9hXg9k>6%#mG|86?LN8D0n}_YU#jlET{koAh*bL4ef;wAc7~l3?C@N6 zxJ_XDkM#^t(O;oYpso?o*Az7%exn|Piaft;+;hK_ zXWuLRS!kJ93gEzR;(7UtLIxTBEPAg*=O?`t2bRq{O(>t%uL1$D>Q9Bf%d<6o_Z3JC zQg2NClsCR69b=Df9Ip!zxt_Yy92LzJ($}{O>O7H0!WT^x#$8~V(lHkr@ATqXPSXmL zN9{nctc8ek8eJLhF`GG*PLH2kh zESvNDTOonCkvx+W{es>*!9=^)Xc#c?Z9Gh1kW7HMedpNrH0k|`mxT4Dmh!0}#Y33m zWbKv}Kiq=FcJxs-5Sg69p@pTQ&uZ}Y78{&`hUS=RPN9kE>{P1#^tIgh7Rri71u7%@ zt+|K_huvb?LIc{&1+3}r==;^z@e6FB*+XAj#RuAAehfr{+c!-ew(Cn(FuG4IN$&PC zm_%Z~Z5v7z8T*9}&}kC?ZF!ie(-ifJvtsFm34Ga9$gM9&XIuI+9bG6(g0fd<0Jt}T z!3m*13hQA(TCVhp1sYQ?CY)HnK%aBJGe5D)v7Q3Dw{W8zG0aGvM4>J~too*+dUS+3wqtZ_@eRYSu- z12!$63dsCH1?y&Q_3LXTZ^TH|i9X?PQ&z%NK;HbQVXj*Gg zK$CPSv>J?;@eIf{<%(JO%)_oh7Wau{!zd|0_;H%i=Y{t_-ZG5QK-r=!Jgs=Aip~6Q zHaXUB5-)=*~*CPMwMf4Y2B4AWR#-#m+U*RWUx%hV{#eev- zoBO3HusOn39qsNaU1p^DaWL6=yt9~7>*aN{x3=9meDt+ttQ=nrlp1bTY6V_}G}552 zfHd;>crhu-%iQrJ9+uoBvW9`I1Ln!vvHQ_hQ4A=l;c(%Nlb#3{<|S;GbQN1?r|tj+ zxD1sHn*56HTn>0gW{ z@qU#h?Hp_nzqJ^?Y5cF@@$8JHxy2?vyqY~fxy+6Uv1<KB?UK3@HrsDZ63bu+lkSi{}V{|BfQ~|S`Vpg&G_)? z!yAi1uuoPQ^J~f}Y!lzxj1$$B#&4qe@;=Ch_q%ggLI7y}>31*%BIGT(>uuQ~oxfi~ zG@`eq(eQY0nrzPUq_X=s7=5}1%e5z!xLaR;l$*flz&;{zwn*^SW zw;Rkiuti={K%Ses;I>-wA!nb&MQbT9j+^~29-;W>XoJajPmv24aT~t3>%`NDULA11 zoI@V;(b2j&%i`eDRaJO^vS z(&Z5%-0%&o>O!(df_E8gs`;PauLz>69yf4Mqq#V7x6o<+@_8n0Mo=PT$T{e)(MsC~ z@&;~tb-t#E2SFyYcEG>_=k~q%MN1t@IQAoOUK3jX1atpnHT}ng;z3rr+y1bet6?qw zdQDxU#e?!oGJ|y+Y-gIrE|Ur9vgC-~zay(eazk&N*FH-kvhD7ctx7yu*ZE^*wJ_+^ zjXPrk8SetkpA};lNF$!q_o{WROO@KGX+p_bP@RHnFNip zLGCpGOg8_!dlnlq3RaNOQJ}H9dhB!41Es(@27NW5?@QZWB&VUA-%IbhXi?bcscqzG zwaRD%dL%l0K>5LS`UWZJABJonayQGBcVDaCD{^n($Q*E-=y#z*cxPt1Lj@5xUG&Zm zw9zgy) z{>A;kwz=r@8|?l&2E~XNc3}R^pvkuQ&aNoow~L05aksP=F7k8gc8V zd6V`}^Sv*j`2YOn&%HOHe6b^JifcTkkGcMho0}qHmPR7)5BQX9+&`GW{uNl2`UtEH z60+>c99E#6xPFY4k@4CckdZm=F=M}16(b{kJlYF<<1W+6?&ioC_t*MSiVne$w1~p9Kh;86c0}kurZ{lqpn(DGxQ;0UyfV2EW7T8+nc}-I{}+c1!)>08 zG!@2oCG8aT-kYQ4TYrpdvLKX=d&$CrFZ3mLytvezb_x>Nx0z7gM(d8=q_8*jK!72o zjRv=?C)G)IpaU*D6c#GHW62KXEAzy>Lw*x+)jqV7&MrN2emo@2lhxjnWcf_H+Y_A> zBr+gj%ZEPH8JtSG#`BfW%r_PU zq1?;Nblv8cTIreKj1?>ePd$bBs!+_0y_5Mr&|`MPv-J-$&8Rx@TGh1C;`ZR2;rqt* z-edmz!6{-B7Fm}WJes7j0omN+>C<5b2;Z44wK#yllB-)4)Bg_b$9S6vfulX;*thlC z9dXu++}hQWG{Ms5k#a2K%B9CRl4+!V-F3x?avoiV-5DE?Jg4i{2ed@aXUBR!rHeZ1 z3LMg5jO%9@n_H0u^!>NnJU?x{|Eh0Rfq+DA2hL}0hrJYVqgWh`<%7%T8_}Ui%cJ3o zS-P!Dbwd(Yijyi0DQ0T28@(NO`7&|A`n(O5{OV&a8LIt+CaXwsYmt>{J1=kjaG6Vt18lbiz2pw4m zKI94YhVXrq4pYwy0%9B+#hrZ3%G^ril=|Yk*pX%};}+%j_p1dU%S~Xh#Tc85^t&Sb zGq38NfuVxj-pu*>SG6QwrF0vRV}Xxf+Zb2oJ`Xs?k}MlA^G7Fx!Kf{JzN6!64|P{5 zV(a?NGrgn0_TI)BlkVA38GsJWEDaqad~ zz7Jhmr*Z^%iw>jgb%Ov=syFVmng0;!$NDV&m1T{`q0%y3PxEhu;YXBQNDup1TsR1M za%nhYwbDqWuBv?yjawkt@ArV%3v%Gc4bqs^4^mj##pD9F3B{b}XyZv;% z(%oMyBOv+CnkDMTp%W9vcA^AjDZ@lBSBUEWNo9=xit325V>hdgjufUi9=EwBRM^I5 zBKe}V3#^W(@GB2)F6(kL?uTUz{L9K}+b}7l|E}l5@-oLL@d!3>>AvG?;t;@C+ZefOE%3k(Vu=L!PzQJdR)CwD) zThrDGX~d?NUw!Xd-Nf(=cfML3*=>qykbiqs9Jo5%e8yL_3Vy8`R5KVHK6eW z(Z_va6z#4}A_~dT7`VT8l(^l0*>~g3!B*b{iVPe*4RRV^@=jOO&>^ex6kZE`=XC6F zmD5 zeoj2KLz_RKF8|OjY5!LT`p%r00OYQCv(_L8foe|RkfY9zk2u`dZn>It#CEdPQx5pf+1wA-90SCP>lQ>zBRyIz%+5#i76C4rV zOO;kHUF8FZ3-*=3K{{K)9-&x$#c$2kn6oJE1^K*VPDfuPO=IYZZ4zKkRZK+bVR3zX z3MNaMl<4$yYlz`{@gL#l6M3IpScQN?Rb6i+%Jg}OgkU_ zh(qr7nY*a28iocZiu~|rCMY{-LGfE+fjai!LKB>%_1&w;TtAjeZUj9za?JL3jiCpO zw^{M5;Cl$QjR;M4_{+nTE;RGVmt)cxJDp1$7Y-(hWQ67`xaij~(Qz9G{KQQtdn!smI2-C>MDO>}A%N?pO z)aBCr;#48x7T7HVr58+8^;zMk!Ysc{3uzIvYzyX+uyZtJDo-Zq7ij1KvM z-*D|EOA%_pqGH8;i4zC3tKC$mq#CA4%5RKOjyoUybou`Ahe)E0o_M_x@;2?fy(Gi@ zv^MBqmZ!2H?h?%sRBOVOe3ZU0X1+p&~}b4 zN-rEdc}m_T3lZJ2MS{o2Yj4DO@Z9nmx1E99S&B}I7f@|!g*Vdf-(6ok9}=b5d$z8t z<14HVx~v_p0>>-^#?Z~E%U$WM&ll!DeVe;xTt3~Xa`3HG`e3Wp;l*oN2uo*BS3I zG6QyrOci17+fDjtdc#P06y(NXX$|K25`u>Zf?SpNM)TwodL+m9jYTAmd4%60-RvTEjLp-)Ztx8J?x zbAn#)6QLfLD9i3%7#P0YX6$HwGqO!MGZsp~kt?u8KbG@p$IU|>v<|T_wCf~Lk3LQr> zwZbdp-_Vvo`+|&f4Q?0unVk!)Q@VlP&o2pe#jT*>o750Jh}9(%EN^aNG5uP4@I^Bk z=v_TxjQJc?tQ11DXyu8(G4a!acBKwLx&Huas@H?%vPtd_pR!R!CN$l2I4j{sxz0EDx;Iw#HeFl3h>XvT*Zj{$`}6%X z_3O?joSL^Gl+bv21B}?%7S4sv=Ins~C7hT3j1T9hs$df|g>r)waqTb1PIZj`I__Fq z{5|SS!*jDWo0OeL(8)U)0#`##4iS}3Ho#^Exz9s_b1AIWa=)_hPUH^1MyXI!{Iqi9@0|uKekvB7=%wBaI-9a**zE*NtXR7bxg%|+u z%DTmyoj}4F<^e7=N2No(YJQtgMfFJ zI5nL=Dvp0C*1u3w5X<$X{bQaw+kI8X>0XU4vz4mxT-oo85P?{teS0uhw1oJtbXF`9_W3XqmF2)} zc-k}GvcFqOtvCF>ip7;@htFQp4p7$jE*W5SX~rU&V5wz(AZzra16h-T9#d8zOH;AE z-=J`MzTp{pp>S{3;<*h%+Mw^Ix5+DNNr<|g&K0DXxU8Ua+AMEXH?=q~p}OK~3$-uQ zUw*FSSgpyhRe2ZX8)7cK0FnjO!CAG22mVQ5yfHv?+V9FPU1;!`2ij{zXOcMQE4Ebs z^v>8YEvY4gf;WqWgdJ;kMHNOL(f{<7g}RBI66Kvt!$fj}VVM?-4`cTqBtIiH-4i}q z{KT9V2nC<4{9vCbswIn09-f?MKQJueC|7u%2wZ@>DyParnL$cX zfCw%UAii4DX*=*hVNAQu{GFJ^f@K2)V@`QJl@1=LO9Tlw;y7QhC4>4!r9A*LrT3HB zYkC;l4^LOKSaqHTYR<)B&dWhIZQ z8N|nh@D~wJ3B74PH0d2)acC!$$+q^SO`aCM4uyr+qz@)rkJ9FfE+C8*^^gUFosA4U|?69NB(*^44RoN~3q^Szho44Hwd*Dz&^A zOo&)@!}8<1A)!8P=JMlrV4B&Rzdu{SbkE)%IqUJs!x2xY^pEuQ+!^aj!=T+#<%Wq! z?`7x>Oh=zj?Qhzq8Zb#q>C{0c@d$3sgo`~bN^|>k6l^lG)Bi5|NJoMpG$W{Eup+aJ ztNv?vzcTck0@ge|p{h^;4Aw9~qUIUj6XJ_}^!wqZ4dF!%z3`q|2S*)A07lQ`2wG(<5Szx&9sqf+bR`aMHGH#B3r;t>ul&m*#bTd- zcM-4FbKM@5`og38gTa%Ej=>IMz3^l@Qxp)rBb40w2Lpj-@9GOQ!8s^MLo{Pp`gC8( zJeL-5b^UQP`U}c0uQ7L4iu~cRIbWrqADruMzej7L)uOO7Quqq9*-YZ-59xu_p|=4u5a z`8yIuB&gq=_F}A#+c_oM1~3)3+{?vfKR8RY^-;*4iph&p2$lVdi|pzpd_sX6STL`= ztS82vUVR_*~?s{Og)^;Ue($#C&-{97O&b}2Sg*gtqDDPsc{0riaOzm}ark?MG}v(wN3)UXva zJpWQ`=BwERt%8&^O>}HLDc>hTKuLX~B;a+v{)t|9Rwl zI2j{zod02*PN*QG+ZOLue9{$jk{()$1l=xX)*^xPbbc=lCaInBo}v>>#9kWNr4Z8z zdwLxoEE*Q-U2ipKYj*^_rw#UFUl``lEfYV)%yQs39zw^k znRnIX(b^Wz+BeWs*?Avd$i`QZLG}q>+>IM(i=cFoRh<+p!^Gr9CI~Mv)+MtC0Rsx# z@d_I&Z8OAt@h7&r?~yR0L^y}ynn9b7usbA(;LHcrkrOAc#NTyew=>&e>vni!p?@BY zxyVN?H-zYJdcs0f^4t>D$v@9o>?%q&`I1@>NJ<7!`W$+^;=nuNt@dMqh{32496mOb zBp0#o>&NB`I`ziTIpEi}Tixo1E4QJz%`YZC%$cIV$cl_@2GwO@n_9bKy6I5rs+FCy(Sq%&Ny!@(P6e@Xp`R}GdVXb)6 zp!0asAm#EC)1Z&|aFY>l8Wg96Q#LkYANkWX2y|YCbL`%{SH0Szm?w{;0QFhrq#mk& zEYsp^v!_A3qN2fDik5%Z(gm8l{%gTXx%=_a;rc+4jZn?MWd=t~JPUX|9u;XC@<03d z{MWr^fT@We0m%6NV;%T!CE;50TFSf{kdt8GDn2^gg?_BB1%=zIj>mh`(}LPVwT8z} z4!^c9AOC70JYKzxnT{2G z`{!X~8ppvk!KXr>EDisrq{**;tb3GHENHq<2}Bh|annZMQ|fnMYA$}Zb1Q=vH%s%@ z4@R3~rsMAI^$P`}np;T@d31;2i%J99*XBEcg3uXjz`fQx^T#C?Ev8(sk6ULKy)DTi;`X9X6z130~>+xqodu{W!H6b z_z`UjY`6++pqtw=(q6mkGP-eGrv2w|33ak!4$E|Gn6z}62u4=I4yOc}Z}P{YWCNb~ z&s5t5nA`22GRVd_(``}WdR>Y^1}3N^l-g)IdC)^By{G2Uy(0Zm;T63!&)wKWxK33t z$8w#AXOv7)qZ7*X@`79y5fJqycH5y-3WHLm5=5ev=f6Dxv_i>x+*=mjzb9iswh&IO z*JYPE&NFR4|Fq=SDLI6pS0|G&S6*IaUqBaElOd6js^|^%>EdewB%_2aYE@cxIKjE<1|kWANC}~%7Kx_D?^>WS(&TGQ`4!8Qd6tmXqjT09=LEz|V()N{s(YPzR-=p~ zPP<477Lm-uv}XieuQZxoqV1<%AFUz2EQo3#vv&ErU2RGZG7UdP$89PJ4~+;rFIU27 z&V+al?kjCpQHCqAaKI3FM(q){*ZO;1Qhfo+ zh}xxfStU;n?DV(6Nlo9ycVZcPfW{M3HWqB$DiEEgQ7)#y*Z(!;^ZD%-tpmRLJ>cyG z(qZ3@zq8TkCGSAUvhQ)||GZ}`ug&;<&Y4G?9lkL_n3@QHutTm{w)ngE-2N-V7E;#5?n_N+9RH?{tPs;d7( z0t7|Ww5kt=88h@;B^d+k;e*@Kulm~}g}TG1nj93IvK%cvU8aznGFxe3Oi}v>R)Glv zg2uhb5Zjt{m>J?OaG>3%lxL+0v@&kvwWk-~=CxQ$iZ>?ie}>)DPW zZp+P~+nFso+cBmEWkvMqLi}ywQL7s+xW0^ksjIT1S|74*+V=7b znTKQ2?fBCArK0i-;amatWC`I{`_J^&C*$mQGm^*FvFYRZ?CUh#0nssCZ_=zxIM&0?n$UD~ zLluFNL=b`G2#I$gyh~Gv9H)_rE6^2I*Wdi5o5zWK>-AGBK}nITxGCqxa8pRdv&9O0 zI^9L%SEqTQ(?DXf*VQ}BQFw7%?EC}d*Cc|WMc41|%o02&hFs{Ygb#s-&*Jn$^jN;V zX1wCU!0DO((Gp`a{0Rpvl8|{(<*tuReRwYpPci77d58&o91Y>ax~Q-WktnjK2~rEc zO`&bRMZ@~G1GxX{Di&Vy$bX~ZBE|Q#yCr8Pi|6)*Qg4Epb6s~^u?Vy zYq8pezVS}VW_;3uuNvZ-KI-3GnEir$gYQ~~aUE#&D;XqHc*KsBD*BM$`=rB#e0(~8 z;)U)|ZQE*rL+SJ_QbAF9QM(kg>>h{0^=I(*TN5_hT@l(b;cD344tCx=Y@rj;_rYNu z%{WTM^B0_=Vy?S&BOF>oReqxM6%rHvlJsSc!Joo>jvusk)XD@ZpTD!#?YwvQY2x;9 zQ%vSK)qF(EIrP|$l2)2NhiRXM%6)wQH@^}>u>kUo`0 zUn^$C#7wZ}tyg-0w6g6GUyUPcZP)I|0EZ6?%eAJQf2}UHfK9PfX-0@ss_tDnnJifd zsJpGilV#uGvb|zT_`dv9N%;RwZOT{_hgW<~I~;Y}{louPaMI|l^nakydyV1bbOr@o zaYf7n#8n{3nvIlT`>QVNpE}YfKT-jy0c&|4CW; zkb9L}h`fL&-!vL7xhZ-D4sRy1EG+~3SnM5_v% z|679kTZ4R#H)k|gwfRuwJ<>)=(*`?jTaW8MPMHC19pWo+aMX>KFy`YLsQ(ByRYM|~ zo?TrD8&vb6$#uD*6nrOvIa&71>$r6R)k7e(($0<>VQcnh`+er3oma}XEk6GtBUFKt z>?`9USL{(s_q`5f%Ot2A5M=7}AERW1ws$WjcJ;Jcx(f_skBYGQx=a367@mYLC(C}8 zGiRa{gT}~Q`F$twagZR(nE7%|lOCp*wq`gfD&bwS{&7p^*YMq{3pZkGxSIE=k4p3r zmvi?jd&vLx1oFV%r^eppdo2;S*SI^^6ojTJX3PiFEW5_H*zU`?y!;oxE03E2h1GH| zyougOyfv+%Sq{pi(P3SevDfg}FKvha@l2yKMDbTrzx?^(HsezeCyS@ncu(R#BbkSX zqTeRefOd%03!!fxV|b}&(N>_!RPRB84DG9aAU}R8k{@>mC>_cU(mz@IJAhI&7v|=H z78vuL!$+6mM$R^fD%n1Rq~AJK+m%nO$)5oyI+e`};)QYsDqGAvTeE(D)5#FiCbPgx zrbO!jTFE}Cbj;dj4Bv!NTt9^uViEkXnVu(o$)21$=BLNEZH3>G*fD8*d_^u$zW??30zI2SOS?LMn~xyGdf=J{6&R`iJ>#YdO4#Ab_SP|n@2%;T6>dB zNdHkF#o{~#Y=DAnX)PnOGU@q9(Jkhnzl4SOZ$Se?t@prsw6u3hjULp_-yi#C*cs?) zntB(qC`P<8DGb-G%G$H~<3^Lz4qGhrmpkIxplE}o?zI+7Pf1rrlpxFRz21SmEJEv-BcW>+I2TsLbJ|P7PaD5V5t1t{p&o zg_v(v_RbcaNVT`~=s(OuTE(}nRjM;3;LLZPFFV8-q71&h^U@)TbKi31_~$k8w=Eb1 zThyVkT!d2{rDCN=yzfxNF&vGw;#S=z!2Od_ZpXqP^+3a>iT}6*vSh&I;`FP*{7?54 z7d>l;uf>^4Vzi6DT79Up8h>MGkN z6a2N=Qjs}M#`B;l*28yx@^~L`#mXtDUY6TOWQPAsp}+7yxS(RM`uq<9SQ4;;nn=Lc zf1-hvxcB4VM}V!*|G;&4CynfR4aD6Sdb+1lOw;rE4yPIU7KXOd59ea^>SEkm( zZ85YXCp%`zs|)pL4=aHw^}KxB&E8n5Vr^;9b7E@*wQ5?Q*OS9pb#mcgy1Q)P@~VQ! zf6N80k{~r&)We-$)A398SE5!*CIxik%7R>slz3L?5$}w}CRy$jU5J(>TiyCV@Sb*c zX8-$Ppm^9=igFXOy-B-_q6ekt3(yZyLbP;zU={Q`u+xUsw`lMQ90;pT36~qb*EIU! zvNO6SwbK5ff}}+UxV(V|%D1t6+>w4Q&|t;VEknVzgKr z<`a?ovza}<71g7ZM@IH7{2mRpRzwdq{S*JA2U?&Rv*lJt7qbUwzqDGf;uyb7vQMK& zUHp7U(Rt}q-^LeJ8>iC2CY}A-xUXT|o{L1>J(E#|h2Va0J_zEPeV0Zref*2dvF5c% zR~Q;ke7KEcOv@uohtlDDJJ%Rcy4%%=NrN$V7YKtsmGfu3DX= zM;7gx#)s+&sIZR|$lIVU{~<(lfKxn6VhzHGqTi|V2#_1l$Rx;;Qj8m-DR7s(fxaAR ztcH|%|LQA>z7vv|5Em-H*jHcG_DVu{tA3AGs){p5XJazma<*Q9w41Y1XcDfqhUOnUps_ z1FH?*eR3(QB)fH?p)buk%`A8;e=;{5Wtb6KPPW~rYznV97Zm$?$H((}#{>7CEe@f# zs}r#W5Z}!_xBYyPZYp<9J*exR6}pFvzCtT(^PTXOBcOjj_T#4ENP}3lEi$C z7Bp!UEq3(~Y!(Zxp`+0=r%CV4dXq9^Kl#O$Cm}XqI{nv{Ao@i?dq>SfR$1ZHGD{t8 zS3NTICl)!c3KO~_osWJ=RJORGe< z*98d>S>U5NZw`5?J!~<}Ty@1oV5#9@XUrD}`AyIuJ!k~USymB|r+RgHAWy2=5-j-}bVW4_w-d0yf3M4lw z-Gd973jef?c#A8)rMvxDtg-@AWAZtR>8W8DX0+VVX6+P}_Uv+kLVC$S>F2Cu&7`RI znF1psHbgm7vR6*I8O^tO_{7f+6ry11_|l%4)8~P2_L5=&@fu7n45|alYrCU#jAoDC zm*B39J$yf2S#Y{VZf5U>4pM1ts|OO9b-g5SO0zb3Iytl*YwCUr^J=~+BX>>{r4@MV z?4(F@sy2cEuqKqi$B}l(vqFu`o7Q|D{vhMWI({YK5BYna&yaD%vp0XC(R&gmy#wP! zZpaA2sNk6tA5hd95pTMi#U6Eh1>wO6cd^cSHndH`eM3zqGJD4oX3`F(j@9Kinxy%Q zmx6?@z=KM4Si9D{dKRhWv(6q`0P)uB(BM7uINTg+ct zP2c|&mCKt`Zb@T)4hna3dY1ndV^b^dsU&X@BLbmmEf@QhEEzv~n(UB2i0-DD(!6g% zh}^_bz^zD!og2rqPZYD*!(_rq(tXdg0SrSf^FtDV)7D z@UYM$>WBAz&YDYSh&p^|AmNXr;V*9aE5REGUM4uP?%%!Mky+u^;D9^K;C7$B5O2n)qD5^C^25 z%R@N&BZIL3+i?!FR6}ycEyUAasvB~FKm~mu=(wEacv+u@|8{rC4Bv-aX5ZH^?vk#X z+WlV=t+pS41D`0T9LjEWj(k{YV?lmDYsq53{0I*F0y;#KfR@-A`?!%$r9R7i0QWIf z%VL|jiHDgvlCOd#Vw)YqmN-!1tyeO44LL<~Uk_eCMpc!!rn8p6R(%zm7xFVhyhX^h zpUC1|s%LmwqcQ)OxQSgCS^Ce}~n$KYazze7iO-Up#+ z_eodYpLeI<*GB&{5(X|cNlB|RZ^8GPHkRA|-jofV`UsyS#1qsC;!fg+|0kETzp>N` zIf|dK|5btpVuZXS`>%Rp9V3B*&p_++fI{ghV9RIv zE-$mXrgOU!8&lNT_b`cu*iP&N!Swh#|LM;$*zL}w?*+egvUuU~a~qykFJKP*@i0(~ z?;)aQ53n&r#4>?$E-5Kukvi#xs{o*?>&( zPS|7UZIKH#vEAhf0&cIzAq++PJtw6vFVPjB@Hzc0qZyjhdMrJ>r?jLP+BZAmF=nOslrJa zcQV&{dt>-@Ky%>*iv|@qJ|-WvcKI&sMrG$b0imgm86%7fWjurl%AVWzUp5uIgD%qO zX(7_1-zZ`?9yGWABn1Iddqf@xik7}cU$HH7&}EL`8wYr%T)$T8CH`kt%W9KDJSC)xPAAStbBd8qAST^YU~b@xNE z0ot~rP=8NJ-(ARrLWaW+o=UP!UttXN@eHp&Ut6qmx7(!NO%OSqS|vkv;aJk9=4|efUO98F*i}_x3B}(+U^T8l>Rz zTgh;l&mIW`aKzQu$_l*Chv(j=`z+9f<5=EHZY9>ZO4Cj9bvorNv$&Hwts1i#&7paOB}FEGF!2k};0 zzgv|M|B{`*c*aNU;@WG_?R}*xZj;6HkCpmI8|*LF%N(JEaFGFWq2%!0XY+5tSn2{eg$%e29c&ws9!*?>DV!@WzzUQ7D=)ec#42hzgsInx+7K($~ zO({L9F?bBrxN$zcB+UwEUtKDDY?*82lMju#<-cY`Dj$eD@_Q9@zR)2t*)j6oq`C=D z)!HY{DL0oPM2|fsnlE^bEcR1TV5;H)e6OTST1o$qT;Rhypo-M|do($Cm3fMtIbb3G zA7mx{CnWiV=!u}@|7EhK^?N|p1e^9DLvwJ>a`M*m9l+V#Tie<->Ll}tC3GLWkIey~ zOIM-VNUBzj%9|5K_9(Af>e+6d#ypsd~6|bBmV=qcv zSqaqOTSCu{is9Bq8?dzWdY!sNu9Px*m88Z)u((Wdz!v`%Bl9G#Q3vqFm7mV7UN?K; zFZ=^B{xwjD-+KR?z%u7#lh}Ye+1Q;G!j8iJP^m$5`+#&!sc>|=`24i=ihX`?+lShk8Y+usgB-6eS>^B;v~ zM`1f6LBadV4chI|y{P9+uddkTY12K98eTJcsxZh1?CaPbeU4OP?U-bE79H_A0$P9? zULn+7T$$Y@FiV`_CCk>>SNgf0VZu>dTff%+MEu)QB+k65Mw{imJ$IYP4<#qp3RU|x zsFa_B>V^7?=?pHwpk8$CrwNGDMJ9z*O@+Q_C=3E8D6Y?izr@!(2;S3uH4E!23~oTTo&oz?l0{Cp_}|nZx5`C~LG|w@Ul1N2Tf0@(^q? zj(c!;i@s4v?14|l_E^Lmd|;sz^}PihClCZ3Z$qt~@ww~=cu+c?QJh)c?+odLe+Uo{ zs9z9Wvy#-pp-&}*0J2m76d1eKJ2OMny^wVEM_SlHuIJnXqJ)rrf)(4!eu4$Uaht6! z#;{^~dj!($HUXgO^m~x?mG{n$ksdk^thE!+Ujpke%m_h0lz$prUC6=JCDZW!!SP~3 zyW@B$oF~Y{EVSGFS-cGvYq{P$Fk;mk zxwUa?O@gTv9o=)O+3~0R9(RbnK*cmW$D*!kSq)0%g&MRZYo0j``jSmI1=R?N6xO1)I35lqYv^j^gY5C$E>Jjw-0-_)WqzMQ?V?aU=3Q8|ZCqOs?0qG@xgeoGS^iF6ZT?j>b3n0=7M5&=k4TL7W zNKxL#?|$ET-@W(UH^v*|<&Qlw##(Fdx!0a+{^tCxwdPtF=WLKbi6S8Oe~oT$-R$>@ zKIxV?LKX`^N;GPkdAqXs#7rDkJiqgnSyY1%dn32d-+ig>skw1GF2s{(A4I%B4^+h7 zWxe- zi4!_CksadED3g+M?F(`gP4D2}`Y4lKWO9Vm@ic(N4`R^++b~NkjATQ=Nn5jeN$rkJ z@hur)Zkr}=q2(&DuozjJCH3rK>coV&pEJsi?L0Bn*)Fan|cz)R-{2e6unIngA9 z+xHf0_a17NPeq%w{QW)Pu3n$a2pUtzD#%zT|F=Ci!TmP=GVhYOX7%e4Ngix`#L4Lq zOmk}&@yZL|&TPj&7fC`}#8(kX!!6C4xykzBHKreJa~`!gDx%AW+MOmpKMm{XnNAI7 z@$=f4g&QK(4~%<5164*Xh(j*oAl1$NLUr-v>mt{>f}$t?Dg$WWmx14+D#vUPznWDNT>WRZcyQ>ycf&nejzAvEW*j`4JN$F@( zQTWpAZrdy4F+t`xsz%n(i~GR(zGmV|)W}h%*=*y+Q2ROkHb*O$^5s$gQhZ=ml<)o~ zzD(F^u1z*X#&To!h8M>j9kf|{C;BZlrM z0LkZ<>MA|A5Q!L&o~O2Y#X*JS3w!0GgmDL_GoK=xMN)69e^E=m$09;?7CLYN5@gs3 z#s*@S0$5svpq5Xxk;u~K^Hjn2;L(1*4fuQEEowbfLJT548)n!*{42vZNpKHn&lPiO z(fRpnAY&@0i$d;yF`H3aa#BduK1Z=K!C@D1yIuvsqT@M3QW zb_u)_9Y}Zg^;hWUxW0cY`MY{jqa26f$h6!=b{;iWpv~VU|LI0;Q`3j*sHG>ksdP+$ z{?q5%928`x-{+$@>+_?vm$_VRlrQIzn$ou>G!C`F`VuGn_&uUgoRbH2;#O|`n z6hDuqv^g|hw=QAZGmAyT9F5@N#4J^FJ^`8AOQprVEgqZuQQ@rknz%%LMAokl+g9#H zadDLy#P9|2y6`eAblWyHku*0WF=nTvyK-J8L^Bv(*5l~ZX8OeUSsInCORaLBpvcx* zN?Pp0OXoJF*z#aG0oAXKBdb^V*TNaVd@8%q7lSl zSR3kMnSR;FqSh`zRnq81uB*eCL79|)kuZ3@aa3Zd^DEMZry+6Gz(f}9`1-4xZc1mU zF>Ogsae~!wrp-;UF$UA9E?JGq%&+YYz1I(7HkC^TYV7BQ+tW(R(GR$Wf*fRfuk|oR z^0SL@vl5d1j&(-iDmL?~5SCXpSYy0EaSp$+c(u}(z5x&Ea6I^ti-a5iy9o+BMClTKkHpkw%6Rjm`%9gI7 z;yc#!I9`6tcCPYJvVEl@IU-4ScA!rTWhW2voQ#Lr{#N&Hv!Dt0_}%EfC|S=*{6g0T zqRUo3KXGocs-V4RxpWv5OB!xBXnY$lb8g72yY^QhoPEu{b^~Nwr}onzy zOB}VR?(lrTxBgpiD66%f4ZsDyTX|@F)?ko_5~-(8z`~sGu*u`a*Y$f24d=_nf*%!a zKeXLar?(tGcf)xL&@64TWJnn%UWbs#m0nC%w9`&YteQwQ*4_1V)$eUr6C9sa7^c|R zg-+Q003(mFpF@xOed1s=k18FT4g3l*=pI64(Ig_DvR=fdSe+P=q(1vrcz8ClN+^Fe zg$F5JMlBwz8Ql2M*g620aIr{Oanh$8opCM?R>SvO+oiP=y6UVuaR_>uEc!=A4b$gk zL=-7eQqlZ$9-eNOm)gQ?yh1@zP4Bf;I>Lmyi5qSQ<@pKv$i_Fn+VesKojV*Af3khk zD~IyLTwt#y3d$VQww*^Fe~c;BDA_bKt&3E!`03o4qI(&$T-n#{?CLd&Wkn1WyHNC* zI3_(_%&l6$*$-P>0_lo5zcIQy4+xfTjS-2X#PW{*nPrcKQynS5cG@=<3%Y187fG1L zC9!GRRC0~ciYrN+=mQH)^uI2V0+^FhW2*r0FRhI7%vA^puqCGgYgBhsq5o+>%FuM% zOaX=SHbp&$k8$BWG=h^sYY@OxvvEX_oCla**hkp3zm7q&b&9rS{#BI7=j4j)5^p29 z%e;iy`&f_{`p(1c$NMl+%aldqn9WV%c&WR&(yfE*W1Tl!B~KN2WDKy^#WpKhA$2Pw zDovM5;&0gB(na6+_E^%=({`*+0^w!5P@7-K$YZs$Mu4le?@55Rvez0V(~xQ~a| z%}|q$NNA69TYSti!6M*vh^eOGXeOmdj&rC>BSg?zQ&!M2%m7emA)USk#Ld_)ihKV zfB&g|ulsvCagNDb*E)n#A+nuH;-TW#t_vGF220+x&@ePrO|x%|k{tT1Ec;JcE(6%+(5CYq&gq$v!Up{Emyi=31l+itlX7q^$iSAne*TPogl!td zYS;RKIEz?WnJsEMJHpO4x`t|>TZAo*z}V&I*utGYZDMBBOcVEzLR)bruT?{fz_0vF zXbOye)Y{rFsV~$~DNgg@k`dnz6u0KKqBI@-gVK*P88w{ir@h0sRWL>^Ehq+H_z$C( zEc*h7N<0$HlB~2I&K!^uUn&{ zth?(UwrkR|$#%k(os<>oJwo&=r>IlomuE{P75|8%K(H8|xQ5p7GPiOe-TUZ3HF7I) z?wNTIrs>wosRYzp1>@AxLVyIiK`gR&f#;4ZN??$s8q{YXYjg8M_9vbARoWo2QYE@K zt{;vzVq$cV#ct%L(ySai89h?>10ku1GjJKI8c@#6o43UW>uO_3U3w$i^)ToC@t51P z>RQpuY>W))j_Si1y>~Ro21D_eydxEBHP%WymSBmWth>w=idg^R`7O+EWeJ}Dhq3@4 z1|KQChyAk68%+tnutHvc%q-$j8k(qI{cXCx^+IH&JdPexFiURJcf2t(H@kX7{ z+PNj{Ah9J>Lr#}s^6N5*aHvS$t!#Ur??v@Vy^A&v`$RQ1&e()@P?6T%i1zl9t*8B` z+UQN!{9Z=Jx4gOuLv$#4ri00LxK-!uhBuQ0*#fF~1hqAg)LfZ1F!BKZPFYki^ffSU3UyJTPu8ewj6P z23;uz*^;;UD(@P_?Nh zHMRINMR|!|D$i1Ri>b669D8C^4o-IB^G8a@WxLkU(I9+UbfPc>#QnJ?6 zd`)WgU`1k0JXcbcJCPky(M>fi6qjpEAgL1W=f2xxLv_<=ky4IlCM}?I5h*M*;iV-= zrYEswYO@53JF!xwn@U${KsQ5Z>^Qn+@B7vr6{FkwH@k$sT5@C{8cy=cy0=;6|)X=n`o37=zvOEt~RFK$n!-Ru|cML0sVV*PNm8js=1^K(~! z!a*JJ{-us>32eETK#BRyPIF6EVDDSUUSAOT0m^=u5a@{kJLU+@&vLgfry5@op;_`* z{Do_0f&AJV0s2n008`mVOpg;Dwn8INH;hVL9BaVeu}{rMwnlCYe=%})2acybT=ew| z0K2{UKEXXh7LTt6I!(~G$S){XV)f@Or(!I7g9fE2&Y?ygBXx<+7ti#--FkLJ? zhqy9;~ih7X!t8Bn9MD< z`I|~Sg$6^;m`B$0kqqdP%7}Pr)=>+m<=r*97d!-9_~i9J&ou(aZyp>x0$FWYmqcpR zHR~GDR-X17If%)n+9o+M!J3SRT6>dcI?ZyomHRbpSLH`7Ku71Qg|_f@PJAKQXS&E~ zNYuG&>2Yj}vbUCupVCskp9L9HEMS&@V7^AL6jlXu0Ima%d4oSoy@ij(b4n!YORQ24 z;S8i`Fr{8>;tFeU*s?*SXJO_B8d5CXB%7$o;&W9Ta0dGgjkJ>5v~aXk!~oIM)rSgO zljL1qloWwqqYE@^(O?pn;*F%X;tov`7yHnqbv&7-h%lFBmURhl^u@@06n|NtWzGk( z9ob;^i^iJ6@&rQ%`@(U0lg##=4T84vHDEM!uJlJ4Q4#;cD}ZG`e?iy3H@#gFwm^R1 z7deb}VOdjT7DISOVt zxart8EcBbGSn`1ZVf;ZQo%l6j_+9tHZ`ZHjgDvnsq{=&Jk;Ylc`sX!%=?s_F=BO5#;DOapO*7m> z0Y3cS81%Pae(2%TO3gyWb~cw5@Xh~)LI3XZ`!+N*_%t*&w$t6sfB7$Z?)BHC-ObHt zfg@^C$%zjaz<1orol8^vz-LB%O2>u5J{B85nsl$X@v1U`Wh!a1 zOe6&#-=BvaIF=pF=R8&0h08pzZ>q!)4#l^F6dXu+UX`Pu-K*OC^&0sHaD_3&VCBPa zQtQME>nq~tyHcUXxo*!uC!Q23=irFwFnM>4cK?Z9ot?gdVM&}WpUuIdli}AW&2i5k zJ2NqjyTp2Df0isx*5Fk(q;cn7UwV|PK$VfN)GP)j<`?W~G_C^76d%LDwhfA|Z{(VS zIf~!Cn(VF3PJC~LcHmGvsr$ld!mm7L9UsV2+Tg0O+$|Bk4==t`o0n7CJoKkolAX@0 zw!;}ejzC}J;MXYKH78F436+_jT86K)G^C9FTuM-q-)*8?uchqKDSpZ7T->Ea#- zikBQ7JWAT#S5E`pZSzJ1EB<;>84xQa_WNA*QJ>cmm3`&;jq-l@8Ne(h@EsGr?*!Z0VEnLK|4P=?^=CD90fZE3 zt*zbSU9w4RS}d?lZD`#I|BKt1d;E&0%w(}Cld#QJ1Lx~vp01`H#Fj+LRbIuk2|JAH z_ZvkX=}#0PZ$e#;;5&~ZOeb_DEguQ*3B&z9V^QYj?Sd%~1PZ}Ym0_;K?B%>>Ncm9l z;8AsKIO`XA-@2gf0__Cy;Uc11cX;PTPE*>2uTwM-g>rzjDf7t|P3F+!dB;Bff&7@@ zI4`41>RIIb6UyXkr!o~6v3yQ_(Kbsz?d@`_%9NLY+TDRaTc_J7jHDa>HOOvh!%bpjWb_^S0Bl*z?vH5ao$f01!^El#Lcei~G+K5Pt4xFTS zQBQ8i%E*Ysou(J?6#7?WrVcy1xb^DOH}V@$Ks;}k)Z$xL|5g7^6B-(!&nyyiL^L>Y z4PO{~ZWtG`FQo5@QpRb%K(`IH&znXgMD!)&_E6t|b`A~& z)KN$Yn&S1h`*`-iZ1QOpZdiQNz=-_&JBWSC_j%f2oF_km`sWq4bo9te`e@J`*nK%ri*5jhzF+SJZzBMZ~X zJm775UeS~$AFR^eBW2pd=n0H5oZ3paz1<003sH0N$r6?^eI}hh{ss}(T$Yf&kB3*K zxa4HmG<>}Y_rY(N-3Q}qN)Sk1`%>JsL+93Qn_4dWz2VQBi1kT5K90W4Kj*Ia*7Spn z%aX2ke7$Xd&oIYPf2bQl8tRV8Hp$Oh=Snf6OikPH%B>?bCq+jkq?AX^V^<}ULfe?- z7WuA3DwUYSacuGU9q-KfJKQn{swQI9Z5%fC*Gr>`L!Z%#yq_<=Vp zqDZ(1T){#RP?-r#{Lk6QBEwDoDPqmm()Nm8#`iD?G!0h zX`QjH9D}06raMQsnJ@7Gqy5&;wY2?(i!ype&V#7OQY;nGx9m|A-Jd(}1A*c5!dvEJ z)Jgu>VP|JAP9;gE_yB_|tampaZ|atQ`Aen0dJ-dMd3U2PGmU0UDW+;;_DJ(* zvtNP4_zx6ikXv-*1wed%#}V~eK~_}+8W8mcyR?Gvd98WE7OnpqqeXmPPjNr}uP9C9 zGT)rCq yhx@@o^{J8};0)IP|J}p+zmS^RdwnF*$(aZJv9lZL_bF7sPx-ONqhh(gUi~k Date: Mon, 29 Jul 2024 14:28:03 +0800 Subject: [PATCH 098/106] add api --- .../msprobe/pytorch/hook_module/support_wrap_ops.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/debug/accuracy_tools/msprobe/pytorch/hook_module/support_wrap_ops.yaml b/debug/accuracy_tools/msprobe/pytorch/hook_module/support_wrap_ops.yaml index d64c577ff3..f68708e945 100644 --- a/debug/accuracy_tools/msprobe/pytorch/hook_module/support_wrap_ops.yaml +++ b/debug/accuracy_tools/msprobe/pytorch/hook_module/support_wrap_ops.yaml @@ -1873,4 +1873,5 @@ distributed: - reduce_scatter - _reduce_scatter_base - _all_gather_base - - all_to_all_single \ No newline at end of file + - all_to_all_single + - all_to_all \ No newline at end of file -- Gitee From e16dfdd9b06226a1babfce3c6432fbf5b055fb49 Mon Sep 17 00:00:00 2001 From: wangqingcai Date: Mon, 29 Jul 2024 14:40:57 +0800 Subject: [PATCH 099/106] relocation bench_function and fix hook_module --- .../api_accuracy_checker/run_ut/run_ut.py | 8 + .../pytorch/bench_functions/__init__.py | 15 + .../pytorch/bench_functions/apply_adam_w.py | 30 ++ .../bench_functions/confusion_transpose.py | 25 ++ .../pytorch/bench_functions/fast_gelu.py | 58 +++ .../bench_functions/layer_norm_eval.py | 8 + .../msprobe/pytorch/bench_functions/linear.py | 15 + .../bench_functions/matmul_backward.py | 51 +++ .../bench_functions/npu_fusion_attention.py | 424 ++++++++++++++++++ .../pytorch/bench_functions/rms_norm.py | 18 + .../pytorch/bench_functions/rotary_mul.py | 55 +++ .../bench_functions/scaled_mask_softmax.py | 29 ++ .../msprobe/pytorch/bench_functions/swiglu.py | 58 +++ .../msprobe/pytorch/function_factory.py | 47 ++ .../msprobe/pytorch/hook_module/wrap_aten.py | 21 +- .../pytorch/hook_module/wrap_npu_custom.py | 18 +- 16 files changed, 871 insertions(+), 9 deletions(-) create mode 100644 debug/accuracy_tools/msprobe/pytorch/bench_functions/__init__.py create mode 100644 debug/accuracy_tools/msprobe/pytorch/bench_functions/apply_adam_w.py create mode 100644 debug/accuracy_tools/msprobe/pytorch/bench_functions/confusion_transpose.py create mode 100644 debug/accuracy_tools/msprobe/pytorch/bench_functions/fast_gelu.py create mode 100644 debug/accuracy_tools/msprobe/pytorch/bench_functions/layer_norm_eval.py create mode 100644 debug/accuracy_tools/msprobe/pytorch/bench_functions/linear.py create mode 100644 debug/accuracy_tools/msprobe/pytorch/bench_functions/matmul_backward.py create mode 100644 debug/accuracy_tools/msprobe/pytorch/bench_functions/npu_fusion_attention.py create mode 100644 debug/accuracy_tools/msprobe/pytorch/bench_functions/rms_norm.py create mode 100644 debug/accuracy_tools/msprobe/pytorch/bench_functions/rotary_mul.py create mode 100644 debug/accuracy_tools/msprobe/pytorch/bench_functions/scaled_mask_softmax.py create mode 100644 debug/accuracy_tools/msprobe/pytorch/bench_functions/swiglu.py create mode 100644 debug/accuracy_tools/msprobe/pytorch/function_factory.py diff --git a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py index 30994f7094..bca9711161 100644 --- a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +++ b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py @@ -27,6 +27,8 @@ from msprobe.pytorch.api_accuracy_checker.compare.compare_column import CompareC from msprobe.pytorch.hook_module.wrap_tensor import TensorOPTemplate from msprobe.pytorch.hook_module.wrap_functional import FunctionalOPTemplate from msprobe.pytorch.hook_module.wrap_torch import TorchOPTemplate +from msprobe.pytorch.hook_module.wrap_npu_custom import NpuOPTemplate +from msprobe.pytorch.hook_module.wrap_aten import AtenOPTemplate from msprobe.pytorch.api_accuracy_checker.common.config import msCheckerConfig from msprobe.pytorch.common.parse_json import parse_json_info_forward_backward from msprobe.core.common.file_check import FileOpen, FileChecker, \ @@ -78,6 +80,12 @@ def exec_api(api_type, api_name, args, kwargs): if api_type == "Torch": torch_api = TorchOPTemplate(api_name, str, False) out = torch_api.forward(*args, **kwargs) + if api_type == "Aten": + torch_api = AtenOPTemplate(api_name, None, False) + out = torch_api.forward(*args, **kwargs) + if api_type == "NPU": + torch_api = NpuOPTemplate(api_name, None, False) + out = torch_api.forward(*args, **kwargs) return out diff --git a/debug/accuracy_tools/msprobe/pytorch/bench_functions/__init__.py b/debug/accuracy_tools/msprobe/pytorch/bench_functions/__init__.py new file mode 100644 index 0000000000..eb06867371 --- /dev/null +++ b/debug/accuracy_tools/msprobe/pytorch/bench_functions/__init__.py @@ -0,0 +1,15 @@ +import os +from pkgutil import iter_modules +from importlib import import_module + +""" +gpu and cpu not implement benchmark function, supplementary benchmarking function implementation +""" + +package_path = os.path.dirname(os.path.realpath(__file__)) +for _, module_name, _ in iter_modules([package_path]): + module = import_module(f"{__name__}.{module_name}") + for attr_name in dir(module): + attr = getattr(module, attr_name) + if callable(attr) and "npu_custom" not in attr_name: + globals()[attr_name] = attr diff --git a/debug/accuracy_tools/msprobe/pytorch/bench_functions/apply_adam_w.py b/debug/accuracy_tools/msprobe/pytorch/bench_functions/apply_adam_w.py new file mode 100644 index 0000000000..3cebd30507 --- /dev/null +++ b/debug/accuracy_tools/msprobe/pytorch/bench_functions/apply_adam_w.py @@ -0,0 +1,30 @@ +import torch + +from msprobe.pytorch.function_factory import npu_custom_functions + + +@npu_custom_functions +def npu_apply_adam_w(beta1_power, beta2_power, lr, weight_decay, + beta1, beta2, eps, grad, max_grad_norm, amsgrad, maximize, out): + var, m, v = out + if amsgrad: + max_grad_norm = (torch.rand(var.shape) * 10.0 - 5.0).to(var.dtype) + gt = -grad if maximize else grad + m_out = m * beta1 - (beta1 + (-1)) * gt + v_out = v * beta2 - (beta2 + (-1)) * gt * gt + var_t = var * (1 + (-lr * weight_decay)) + beta1_power_out = beta1_power * beta1 + beta2_power_out = beta2_power * beta2 + if amsgrad: + max_grad_norm_out = torch.max(max_grad_norm, v_out) + if (1 - beta2_power_out) == 0: + beta2_power_out -= eps + denom = torch.sqrt(torch.div(max_grad_norm_out, (1 - beta2_power_out))) + eps + else: + vraintain = torch.div(v_out, (1 - beta2_power_out)) + denom = torch.sqrt(vraintain) + eps + + if (1 - beta1_power_out) == 0: + beta1_power_out -= eps + var_out = var_t + torch.div(-lr * m_out, (1 - beta1_power_out)).div(denom) + return var_out.cpu(), m_out.cpu(), v_out.cpu() diff --git a/debug/accuracy_tools/msprobe/pytorch/bench_functions/confusion_transpose.py b/debug/accuracy_tools/msprobe/pytorch/bench_functions/confusion_transpose.py new file mode 100644 index 0000000000..dd30bb18a6 --- /dev/null +++ b/debug/accuracy_tools/msprobe/pytorch/bench_functions/confusion_transpose.py @@ -0,0 +1,25 @@ +import torch +from msprobe.pytorch.function_factory import npu_custom_functions, npu_custom_grad_functions + + +@npu_custom_functions +def npu_confusion_transpose(data, perm, shape, transpose_first): + if transpose_first: + output = data.permute(*perm).contiguous().view(shape) + else: + output = data.view(shape).permute(*perm) + return output.cpu() + + +@npu_custom_grad_functions +def npu_confusion_transpose_backward(grad, perm, shape, transpose_first): + shape_cal = shape if transpose_first else [shape[perm_dim] for perm_dim in perm] + perm_cal = [0] * len(perm) + for i, perm_dim in enumerate(perm): + perm_cal[perm_dim] = i + + if transpose_first: + result = grad.permute(*perm_cal).reshape(shape_cal) + else: + result = grad.reshape(shape_cal).permute(*perm_cal) + return result.cpu() diff --git a/debug/accuracy_tools/msprobe/pytorch/bench_functions/fast_gelu.py b/debug/accuracy_tools/msprobe/pytorch/bench_functions/fast_gelu.py new file mode 100644 index 0000000000..5442eff734 --- /dev/null +++ b/debug/accuracy_tools/msprobe/pytorch/bench_functions/fast_gelu.py @@ -0,0 +1,58 @@ +import torch +from msprobe.pytorch.function_factory import npu_custom_functions, npu_custom_grad_functions + + +@npu_custom_functions +def fast_gelu(input0): + attr = 1.702 + const_0 = 0 - attr + const_1 = 1 + const_2 = attr / 2 + + abs_x = torch.abs(input0) + mul_abs_x = abs_x * const_0 + exp_abs_x = torch.exp(mul_abs_x) + div_down = exp_abs_x + const_1 + + pn_x = input0 - abs_x + mul_pn_x = pn_x * const_2 + exp_pn_x = torch.exp(mul_pn_x) + div_up = input0 * exp_pn_x + div_down_rec = torch.reciprocal(div_down) + result = div_up * div_down_rec + + return result.cpu() + + +@npu_custom_grad_functions +def npu_fast_gelu_backward(grad, input_x): + const_2 = 1.702 + const_3 = 1.0 + const_1 = 0.0 - const_2 + + # e^(-1.702x) + abs_x = torch.abs(input_x) + mul_abs_x = abs_x * const_1 + exp_x = torch.exp(mul_abs_x) + + # 1.702xe^(-1.702x) + add_2 = input_x * exp_x + add_2 = add_2 * const_2 + + # e^(1.702(x-|x|)) + pn_x = input_x - abs_x + mul_pn_x = pn_x * const_2 + exp_pn_x = torch.exp(mul_pn_x) + + # e^(-1.702x) + 1.702xe^(-1.702x) + e^(1.702(x-|x|)) + div_up = exp_x + add_2 + div_up = div_up + exp_pn_x + + # (e^(-1.702x)+1)^2 + div_down_i = exp_x + const_3 + div_down = div_down_i * div_down_i + div_down_rec = torch.reciprocal(div_down) + result_temp = div_up * div_down_rec + result = grad * result_temp + + return result.cpu() diff --git a/debug/accuracy_tools/msprobe/pytorch/bench_functions/layer_norm_eval.py b/debug/accuracy_tools/msprobe/pytorch/bench_functions/layer_norm_eval.py new file mode 100644 index 0000000000..885b5c460e --- /dev/null +++ b/debug/accuracy_tools/msprobe/pytorch/bench_functions/layer_norm_eval.py @@ -0,0 +1,8 @@ +import torch +from msprobe.pytorch.function_factory import npu_custom_functions + + +@npu_custom_functions +def npu_layer_norm_eval(data, normalized_shape): + result = torch.nn.functional.layer_norm(data, normalized_shape) + return result.cpu() diff --git a/debug/accuracy_tools/msprobe/pytorch/bench_functions/linear.py b/debug/accuracy_tools/msprobe/pytorch/bench_functions/linear.py new file mode 100644 index 0000000000..33b18d759d --- /dev/null +++ b/debug/accuracy_tools/msprobe/pytorch/bench_functions/linear.py @@ -0,0 +1,15 @@ +import torch +from msprobe.pytorch.function_factory import npu_custom_functions, npu_custom_grad_functions + + +@npu_custom_functions +def npu_linear(x, weight, bias): + output = torch.nn.functional.linear(x, weight, bias) + return output.cpu() + + +@npu_custom_grad_functions +def npu_linear_backward(grad, input_data, weight): + input_grad = torch.matmul(grad, weight) + weight_grad = torch.matmul(grad.t(), input_data) + return input_grad.cpu(), weight_grad.cpu() diff --git a/debug/accuracy_tools/msprobe/pytorch/bench_functions/matmul_backward.py b/debug/accuracy_tools/msprobe/pytorch/bench_functions/matmul_backward.py new file mode 100644 index 0000000000..dae2745527 --- /dev/null +++ b/debug/accuracy_tools/msprobe/pytorch/bench_functions/matmul_backward.py @@ -0,0 +1,51 @@ +import torch +from msprobe.pytorch.function_factory import npu_custom_grad_functions + + +@npu_custom_grad_functions +def matmul_backward(grad, self, other, mask): + grad_self, grad_other = None, None + dim_self = self.dim() + dim_other = other.dim() + + size_grad = list(grad.size()) + size_self = list(self.size()) + size_other = list(other.size()) + if dim_self == 1 and dim_other == 1: + grad_self = other.mul(grad) if mask[0] else grad_self + grad_other = self.mul(grad) if mask[1] else grad_other + elif dim_self == 2 and dim_other == 1: + grad_self = grad.unsqueeze(1).mm(other.unsqueeze(0)) if mask[0] else grad_self + grad_other = self.transpose(-1, -2).mm(grad.unsqueeze(1)).squeeze_(1) if mask[1] else grad_other + elif dim_self == 1 and dim_other == 2: + grad_self = grad.unsqueeze(0).mm(other.transpose(-1, -2)).squeeze_(0) if mask[0] else grad_self + grad_other = self.unsqueeze(1).mm(grad.unsqueeze(0)) if mask[1] else grad_other + elif dim_self >= 3 and (dim_other == 1 or dim_other == 2): + view_size = 1 if dim_other == 1 else size_grad[-1] + unfolded_grad = (grad.unsqueeze(-1) if dim_other == 1 else grad).contiguous().view(-1, view_size) + if mask[0]: + grad_self = unfolded_grad.mm(other.unsqueeze(0) if dim_other == 1 else other.transpose(-1, -2)) \ + .view(size_self) + print(f'size_self: {size_self}') + if mask[1]: + unfolded_self = self.contiguous().view([-1, size_self[-1]]) + grad_other = unfolded_self.transpose(-1, -2).mm(unfolded_grad).view(size_other) + elif (dim_self == 1 or dim_self == 2) and dim_other >= 3: + view_size = 1 if dim_self == 1 else size_grad[-2] + unfolded_grad_T = grad.view([-1, view_size]) \ + if dim_self == 1 else grad.transpose(-1, -2).contiguous().view([-1, view_size]) + if mask[0]: + # create a 2D-matrix from other + unfolded_other_T = \ + other.transpose(-1, -2).contiguous().view([-1, size_other[-2]]).transpose(-1, -2) + grad_self = unfolded_other_T.mm(unfolded_grad_T).transpose(-1, -2).view(size_self) + if mask[1]: + size_other_T = size_other[:-2] + size_other_T.extend(size_other[::-1][:2]) + grad_other = \ + unfolded_grad_T.mm(self.unsqueeze(0) if dim_self == 1 else self).view(size_other_T).transpose(-1, -2) + else: + grad_self = torch.matmul(grad, other.transpose(-1, -2)) if mask[0] else grad_self + grad_other = torch.matmul(self.transpose(-1, -2), grad) if mask[1] else grad_other + + return grad_self.cpu(), grad_other.cpu() diff --git a/debug/accuracy_tools/msprobe/pytorch/bench_functions/npu_fusion_attention.py b/debug/accuracy_tools/msprobe/pytorch/bench_functions/npu_fusion_attention.py new file mode 100644 index 0000000000..2a46d02004 --- /dev/null +++ b/debug/accuracy_tools/msprobe/pytorch/bench_functions/npu_fusion_attention.py @@ -0,0 +1,424 @@ +import torch +import numpy as np +from einops import rearrange + +from msprobe.pytorch.function_factory import npu_custom_functions, npu_custom_grad_functions +from api_accuracy_checker.common.utils import logger + +gtype = torch.float64 # arm host必须选择float64,x86环境选择float32即可,64也行。arm计算很慢,s=8k的场景建议使用x86 +softmax_build_mode = "QKV" # "MAX_SUM" + +""" +# 前向函数声明对比 +标杆实现:fusion_attention_forward: q, k, v, drop_mask, atten_mask, pse, scale, keep_prob +融合算子:npu_fusion_attention_forward: query, key, value, head_num, input_layout, *, pse=None, padding_mask=None, + atten_mask=None, scale=1.0, keep_prob=1.0, pre_tockens=2147483647, + next_tockens=2147483647, inner_precise=0, prefix=None, sparse_mode=0, + gen_mask_parallel=True, sync=False + +# 反向函数声明对比 +标杆实现:fusion_attention_backward: dx, q, k, v, softmax_res, drop_mask, pse, scale, keep_prob +融合算子:npu_fusion_attention_backward: query, key, value, dy, head_num, input_layout, *, pse=None, padding_mask=None, + atten_mask=None, softmax_max=None, softmax_sum=None, softmax_in=None, + attention_in=None, scale_value=1.0, keep_prob=1.0, pre_tockens=2147483647, + next_tockens=2147483647, inner_precise=0, seed=0, offset=0, + numels=0, prefix=None, sparse_mode=0, gen_mask_parallel=True, sync=False +""" + + +def softmax_forward(x): + x_max = torch.max(x, dim=-1, keepdims=True)[0] + x_sub = x.sub(x_max) + y = torch.exp(x_sub) + x_sum = y.sum(dim=-1, keepdims=True) + res = y.div(x_sum) + return res, x_max, x_sum + + +def softmax_grad(dp, softmax_res): + muls = dp * softmax_res + muls_r = muls.sum(dim=-1, keepdims=True) + sub_r = dp - muls_r + res = sub_r * softmax_res + return res + + +def broadcast_kv(num_heads, num_kv_heads, kv_tensor, dtype): + if num_kv_heads == 0 or num_kv_heads < num_heads: + raise ValueError(f"num_kv_heads must be non-zero and less than num_heads.") + + factor = num_heads // num_kv_heads + kv_shape = kv_tensor.shape + B = kv_shape[0] + S = kv_shape[2] + D = kv_shape[3] + kv_res = torch.zeros([B, num_heads, S, D]).to(dtype) + for i in range(num_heads): + j = i // factor + kv_res[:, i:i + 1, :, :] = kv_tensor[:, j:j + 1, :, :] + return kv_res + + +def calculate_qk(q, k, atten_mask, pse, scale): + if pse is None or len(pse.shape) == 0: + qk = torch.matmul(q, k.permute(0, 1, 3, 2)).mul(scale) + else: + qk = (torch.matmul(q, k.permute(0, 1, 3, 2)) + pse).mul(scale) + if atten_mask is None or len(atten_mask.shape) == 0: + return qk + else: + qk = qk + atten_mask.bool() * (-40000.0) # -10000 + return qk + + +def fusion_attention_forward(q, k, v, drop_mask, atten_mask, pse, scale, keep_prob): + qk = calculate_qk(q, k, atten_mask, pse, scale) + softmax_res, softmax_max, softmax_sum = softmax_forward(qk) + if drop_mask is None or len(drop_mask.shape) == 0: + drop_res = softmax_res + else: + drop_res = softmax_res * drop_mask * (1.0 / keep_prob) + y = torch.matmul(drop_res, v) + return y, softmax_max, softmax_sum + + +def fusion_attention_backward(dx, q, k, v, softmax_res, drop_mask, pse, scale, keep_prob): + dp = torch.matmul(dx, v.permute(0, 1, 3, 2)) + if drop_mask is None or len(drop_mask.shape) == 0: + drop_res = softmax_res.permute(0, 1, 3, 2) + dp_drop = dp + else: + drop_res = softmax_res.mul(drop_mask).mul(1.0 / keep_prob).permute(0, 1, 3, 2) + dp_drop = dp * drop_mask * (1.0 / keep_prob) + dv = torch.matmul(drop_res, dx) + softmax_grad_res = (softmax_grad(dp_drop, softmax_res) * scale) + dq = torch.matmul(softmax_grad_res, k) + dk = torch.matmul(softmax_grad_res.permute(0, 1, 3, 2), q) + return dq, dk, dv + + +def parse_bsnd_args(query, key, head_num, input_layout): + supported_input_layout = ["BSH", "SBH", "BSND", "BNSD", "TND"] + B, S1, S2, N1, N2, D, H1, H2 = None, None, None, head_num, None, None, None, None + + if not isinstance(input_layout, str) or input_layout not in supported_input_layout: + raise ValueError(f"Invalid input_layout arg which must be one of {supported_input_layout}.") + + if input_layout == "TND": + raise ValueError(f"input_layout {input_layout} does not supported for now.") + try: + if input_layout == "BSH": + B, S1, H1 = query.shape + _, S2, H2 = key.shape + D = H1 // N1 + N2 = H2 // D + elif input_layout == "SBH": + S1, B, H1 = query.shape + S2, _, H2 = key.shape + D = H1 // N1 + N2 = H2 // D + elif input_layout == "BSND": + B, S1, N1, D = query.shape + _, S2, N2, _ = key.shape + H1 = N1 * D + H2 = N2 * D + elif input_layout == "BNSD": + B, N1, S1, D = query.shape + _, N2, S2, _ = key.shape + H1 = N1 * D + H2 = N2 * D + except Exception as e: + raise ValueError(f"query.shape: {query.shape}, key.shape: {key.shape}, parse_bsnd_args error: {e}") from e + + if D == 0: + raise ValueError(f"Value D must be non-zero.") + DTYPE = query.dtype + return B, S1, S2, N1, N2, D, H1, H2, DTYPE + + +def convert_from_bnsd(_input, input_layout): + if input_layout == "BSH": + # (B,N,S,D)=>(B,S,N*D) + out = rearrange(_input, 'b n s d -> b s (n d)').contiguous() + elif input_layout == "SBH": + # (B,N,S,D)=>(S,B,N*D) + out = rearrange(_input, 'b n s d -> s b (n d)').contiguous() + elif input_layout == "BSND": + # (B,N,S,D)=>(B,S,N,D) + out = rearrange(_input, 'b n s d -> b s n d').contiguous() + elif input_layout == "TND": + raise ValueError(f"input_layout {input_layout} does not supported for now.") + else: + out = _input + return out + + +def convert_to_bnsd(_input, n, input_layout): + # 默认"BNSD"无需处理 + if input_layout == "BSH": + # (B,S,N*D)=>(B,N,S,D) + out = rearrange(_input, 'b s (n d) -> b n s d', n=n) + elif input_layout == "SBH": + # (S,B,N*D)=>(B,N,S,D) + out = rearrange(_input, 's b (n d) -> b n s d', n=n) + elif input_layout == "BSND": + # (B,S,N,D)=>(B,N,S,D) + out = rearrange(_input, 'b s n d -> b n s d', n=n) + elif input_layout == "TND": + raise ValueError(f"input_layout {input_layout} does not supported for now.") + else: + out = _input + if out.dim() != 4: + raise ValueError(f"convert qkv format failed with input_layout {input_layout}.") + return out.to(gtype) + + +def generate_atten_mask(sparse_mode, atten_mask, B, N1, S1, S2, pre_tocken, next_tocken, dtype): + """ + # 当sparse_mode=2、3、4时小算子到融合算子会走这个优化,反过来看就要拆解回原来的基本实现 + ===> atten_mask = torch.from_numpy(np.triu(np.ones([2048, 2048]), k=1)).to(dtype) + """ + shape = [S1, S2] + + if atten_mask is not None: + # 当FA的输入已经包含atten_mask时,可以认为已经是转换之后的mask矩阵了,有三种特殊场景,即稀疏矩阵场景,需要进行逆向还原 + if sparse_mode == 2 or sparse_mode == 3 or sparse_mode == 4: + print(S1, S2, atten_mask.shape, atten_mask.dtype) + + if atten_mask.dim() == 2 and atten_mask.shape[0] == 2048 and atten_mask.shape[1] == 2048: + if atten_mask.equal(torch.from_numpy(np.triu(np.ones([2048, 2048]), k=1)).to(atten_mask.dtype)): + if sparse_mode == 2: + atten_mask = torch.from_numpy(np.triu(np.ones(shape), k=1)) + elif sparse_mode == 3: + atten_mask = torch.from_numpy(np.triu(np.ones(shape), k=S2 - S1 + 1)) + elif sparse_mode == 4: + atten_mask_u = torch.from_numpy(np.triu(np.ones(shape), k=next_tocken + 1)) + atten_mask_l = torch.from_numpy(np.tril(np.ones(shape), k=-pre_tocken - 1)) + atten_mask = atten_mask_u + atten_mask_l + logger.debug(f"反向转换atten_mask {atten_mask.shape}") + return atten_mask.to(dtype) + + return atten_mask.to(dtype) + + if atten_mask is not None: + if atten_mask.dim() == 2: + if atten_mask.shape[0] != S1 or atten_mask.shape[1] != S2: + raise ValueError(f"Invalid atten_mask shape `SS` {atten_mask.shape}") + shape = [S1, S2] + elif atten_mask.dim() == 4: + if atten_mask.shape[1] == 1: + shape = [B, 1, S1, S2] if B != 1 else [1, 1, S1, S2] + else: + shape = [B, N1, S1, S2] if B != 1 else [1, N1, S1, S2] + + if sparse_mode == 0: + atten_mask_u = torch.from_numpy(np.triu(np.ones(shape), k=next_tocken + 1)) + atten_mask_l = torch.from_numpy(np.tril(np.ones(shape), k=-pre_tocken - 1)) + atten_mask = atten_mask_u + atten_mask_l + elif sparse_mode == 1: # no sparse + atten_mask = torch.from_numpy(np.zeros(shape)) + elif sparse_mode == 2: + atten_mask = torch.from_numpy(np.triu(np.ones(shape), k=1)) + elif sparse_mode == 3: + atten_mask = torch.from_numpy(np.triu(np.ones(shape), k=S2 - S1 + 1)) + elif sparse_mode == 4: + atten_mask_u = torch.from_numpy(np.triu(np.ones(shape), k=next_tocken + 1)) + atten_mask_l = torch.from_numpy(np.tril(np.ones(shape), k=-pre_tocken - 1)) + atten_mask = atten_mask_u + atten_mask_l + # 注:不会出现sparse_mode=5的情况,该情况要求必须要传入atten_mask,且atten_mask矩阵数据格式须为BNSS或B1SS, + # 因此可以认为FA的输入已经是正确的atten_mask了 + return atten_mask.to(dtype) + + +def generate_kv(key, value, N1, N2): + # N不等长适配by cdy + if not (N1 == N2): + k_new = broadcast_kv(N1, N2, key, key.dtype) + v_new = broadcast_kv(N1, N2, value, value.dtype) + else: + k_new = key + v_new = value + return k_new, v_new + + +def rebuid_softmax_by_qkv(q, k, atten_mask, pse, scale): + """ + attention = softmax(QK^T/sqrt(d))V + softmax(x_i) = e^(x_i - x_max) / sum(e^(x_i - x_max)) + """ + print(f"Using QKV to rebuild original softmax") + qk = calculate_qk(q, k, atten_mask, pse, scale) + softmax_res, x_max, x_sum = softmax_forward(qk) + return softmax_res + + +def rebuild_softmax_by_max_sum(q, k, atten_mask, pse, scale, softmax_max, softmax_sum): + """ + attention = softmax(QK^T/sqrt(d))V + softmax(x_i) = e^(x_i - x_max_i) / x_sum_i) + """ + print(f"Using softmax_max and softmax_sum to rebuild original softmax") + qk = calculate_qk(q, k, atten_mask, pse, scale) + if softmax_max.shape[-1] == 0: + raise ValueError(f"softmax_max.shape[-1] must be non-zero, softmax_max.shape: {softmax_max.shape}") + repeat_dim = qk.shape[-1] // softmax_max.shape[-1] + softmax_res = torch.exp(qk.sub(softmax_max.repeat(1, 1, 1, repeat_dim))).div( + softmax_sum.repeat(1, 1, 1, repeat_dim)) + return softmax_res + + +def npu_fusion_attention_forward_patch(*args, **kwargs): + # query, key, value, head_num, input_layout + if len(args) != 5: + raise ValueError(f"Unsupported npu_fusion_attention args {args}.") + + B, S1, S2, N1, N2, D, H1, H2, DTYPE = parse_bsnd_args(args[0], args[1], args[3], args[4]) + if N1 == N2 and S1 == S2: + logger.debug(f"running case : BNSD = {B}_{N1}_{S1}_{D}, sparse = {kwargs.get('sparse_mode', 0)}") + else: + logger.debug(f"running case: BNSD = {B}_{N1}({N2})_{S1}({S2})_{D}, sparse = {kwargs.get('sparse_mode', 0)}") + if not (N1 % N2 == 0 and N1 >= N2): + raise ValueError(f"N1与N2不匹配,请检查: N1 = {N1}, N2 = {N2}.") + + dims_kwargs = {"B": B, "S1": S1, "S2": S2, "N1": N1, "N2": N2, + "D": D, "H1": H1, "H2": H2, "DTYPE": DTYPE} + + new_kwargs = {"keep_prob": 1, + "scale": kwargs.get("scale", 1 / (D ** 0.5)), + "sparse_mode": kwargs.get("sparse_mode", 0), + "prefix": kwargs.get("prefix"), + "pre_tockens": kwargs.get("pre_tockens", 2147483647), + "next_tockens": kwargs.get("next_tockens", 2147483647), + "pse": kwargs.get("pse"), + "padding_mask": kwargs.get("padding_mask"), + "atten_mask": kwargs.get("atten_mask")} + + return args, dims_kwargs, new_kwargs + + +def npu_fusion_attention_backward_patch(*args, **kwargs): + if len(args) != 6: + raise ValueError(f"Unsupported npu_fusion_attention_grad args {args}.") + + B, S1, S2, N1, N2, D, H1, H2, DTYPE = parse_bsnd_args(args[0], args[1], args[4], args[5]) + if N1 == N2 and S1 == S2: + print(f"running case : BNSD = {B}_{N1}_{S1}_{D}, sparse = {kwargs.get('sparse_mode', 0)}") + else: + print(f"running case: BNSD = {B}_{N1}({N2})_{S1}({S2})_{D}, sparse = {kwargs.get('sparse_mode', 0)}") + if not (N1 % N2 == 0 and N1 >= N2): + raise ValueError(f"N1与N2不匹配,请检查: N1 = {N1}, N2 = {N2}.") + + dims_kwargs = {"B": B, "S1": S1, "S2": S2, "N1": N1, "N2": N2, + "D": D, "H1": H1, "H2": H2, "DTYPE": DTYPE} + + new_kwargs = {"keep_prob": 1, + "scale_value": kwargs.get("scale_value", 1 / (D ** 0.5)), + "sparse_mode": kwargs.get("sparse_mode", 0), + "prefix": kwargs.get("prefix"), + "pre_tockens": kwargs.get("pre_tockens", 2147483647), + "next_tockens": kwargs.get("next_tockens", 2147483647), + "pse": kwargs.get("pse"), + "padding_mask": kwargs.get("padding_mask"), + "softmax_max": kwargs.get("softmax_max"), + "softmax_sum": kwargs.get("softmax_sum"), + "softmax_in": kwargs.get("softmax_in"), + "attention_in": kwargs.get("attention_in"), + "seed": kwargs.get("seed", 0), + "offset": kwargs.get("offset", 0), + "numels": kwargs.get("numels", 0), + "atten_mask": kwargs.get("atten_mask")} + + return args, dims_kwargs, new_kwargs + + +@npu_custom_functions +def npu_fusion_attention(*args, **kwargs): + new_args, dims_kwargs, new_kwargs = npu_fusion_attention_forward_patch(*args, **kwargs) + query, key, value, input_layout = new_args[0], new_args[1], new_args[2], new_args[4] + N1 = dims_kwargs.get("N1") + N2 = dims_kwargs.get("N2") + S1 = dims_kwargs.get("S1") + S2 = dims_kwargs.get("S2") + B = dims_kwargs.get("B") + DTYPE = dims_kwargs.get("DTYPE") + atten_mask = new_kwargs.get("atten_mask") + keep_prob = new_kwargs.get("keep_prob") + sparse_mode = new_kwargs.get("sparse_mode") + pre_tockens = new_kwargs.get("pre_tockens") + next_tockens = new_kwargs.get("next_tockens") + pse = new_kwargs.get("pse") + scale = new_kwargs.get("scale") + + atten_mask = generate_atten_mask(sparse_mode, atten_mask, B, N1, S1, S2, pre_tockens, next_tockens, DTYPE) + query = convert_to_bnsd(query, N1, input_layout) + key = convert_to_bnsd(key, N2, input_layout) + value = convert_to_bnsd(value, N2, input_layout) + k_new, v_new = generate_kv(key, value, N1, N2) + out_golden, softmax_max, softmax_sum = fusion_attention_forward(q=query, k=k_new, v=v_new, + drop_mask=None, atten_mask=atten_mask, + pse=pse, scale=scale, + keep_prob=keep_prob) + if out_golden.dim() == 5: + out_golden = out_golden.reshape(out_golden.size(0), out_golden.size(1) * out_golden.size(2), out_golden.size(3), + out_golden.size(4)) + out_golden = convert_from_bnsd(out_golden, input_layout) + + return out_golden.cpu(), softmax_max.repeat(1, 1, 1, 8).cpu(), softmax_sum.repeat(1, 1, 1, 8).cpu() + + +@npu_custom_grad_functions +def npu_fusion_attention_grad(*args, **kwargs): + # dx, q, k, v, softmax_res, drop_mask, pse, scale, keep_prob + new_args, dims_kwargs, new_kwargs = npu_fusion_attention_backward_patch(*args, **kwargs) + query, key, value, dx, input_layout = new_args[0], new_args[1], new_args[2], new_args[3], new_args[5] + N1 = dims_kwargs.get("N1") + N2 = dims_kwargs.get("N2") + S1 = dims_kwargs.get("S1") + S2 = dims_kwargs.get("S2") + B = dims_kwargs.get("B") + D = dims_kwargs.get("D") + DTYPE = dims_kwargs.get("DTYPE") + atten_mask = new_kwargs.get("atten_mask") + keep_prob = new_kwargs.get("keep_prob") + sparse_mode = new_kwargs.get("sparse_mode") + pre_tockens = new_kwargs.get("pre_tockens") + next_tockens = new_kwargs.get("next_tockens") + pse = new_kwargs.get("pse") + softmax_max = new_kwargs.get("softmax_max") + softmax_sum = new_kwargs.get("softmax_sum") + scale_value = new_kwargs.get("scale_value") + + atten_mask = generate_atten_mask(sparse_mode, atten_mask, B, N1, S1, S2, pre_tockens, next_tockens, DTYPE) + query = convert_to_bnsd(query, N1, input_layout) + dx = convert_to_bnsd(dx, N1, input_layout) + key = convert_to_bnsd(key, N2, input_layout) + value = convert_to_bnsd(value, N2, input_layout) + k_new, v_new = generate_kv(key, value, N1, N2) + + if softmax_build_mode == "QKV": + softmax_res = rebuid_softmax_by_qkv(query, k_new, atten_mask, pse, scale_value) + else: + softmax_res = rebuild_softmax_by_max_sum(query, k_new, atten_mask, pse, scale_value, softmax_max, softmax_sum) + + dq, dk, dv = fusion_attention_backward(dx, query, k_new, v_new, softmax_res, None, pse, scale_value, keep_prob) + + # N不等长适配by cdy + if not (N1 == N2): + if N2 == 0: + raise ValueError("dims_kwargs.N2 must be non-zero.") + G = int(N1 / N2) + dk = torch.sum(dk.reshape(B, N2, G, S2, D), dim=2, keepdim=True).reshape(B, N2, S2, D) + dv = torch.sum(dv.reshape(B, N2, G, S2, D), dim=2, keepdim=True).reshape(B, N2, S2, D) + + if dq.dim() == 5: + dq = dq.reshape(dq.size(0), dq.size(1) * dq.size(2), dq.size(3), dq.size(4)) + if dk.dim() == 5: + dk = dk.reshape(dk.size(0), dk.size(1) * dk.size(2), dk.size(3), dk.size(4)) + if dv.dim() == 5: + dv = dv.reshape(dv.size(0), dv.size(1) * dv.size(2), dv.size(3), dv.size(4)) + + dq = convert_from_bnsd(dq, input_layout) + dk = convert_from_bnsd(dk, input_layout) + dv = convert_from_bnsd(dv, input_layout) + + return dq.cpu(), dk.cpu(), dv.cpu() diff --git a/debug/accuracy_tools/msprobe/pytorch/bench_functions/rms_norm.py b/debug/accuracy_tools/msprobe/pytorch/bench_functions/rms_norm.py new file mode 100644 index 0000000000..0fe6c834a4 --- /dev/null +++ b/debug/accuracy_tools/msprobe/pytorch/bench_functions/rms_norm.py @@ -0,0 +1,18 @@ +import torch +from msprobe.pytorch.function_factory import npu_custom_functions, npu_custom_grad_functions + + +@npu_custom_functions +def npu_rms_norm(x, gamma, epsilon=1e-5): + rstd = torch.rsqrt(torch.mean(torch.pow(x, 2), axis=-1, keepdim=True) + epsilon) + res = x * rstd * gamma + return res.cpu(), rstd.float().cpu() + + +@npu_custom_grad_functions +def npu_rms_norm_backward(grad, x, gamma, rstd): + mean_gy = (grad * x * gamma * rstd).mean(dim=-1, keepdim=True) + grad_x = (grad * gamma - x * rstd * mean_gy) * rstd + grad_gamma = x * grad * rstd + return grad_x.cpu(), grad_gamma.cpu() + diff --git a/debug/accuracy_tools/msprobe/pytorch/bench_functions/rotary_mul.py b/debug/accuracy_tools/msprobe/pytorch/bench_functions/rotary_mul.py new file mode 100644 index 0000000000..76b3828da3 --- /dev/null +++ b/debug/accuracy_tools/msprobe/pytorch/bench_functions/rotary_mul.py @@ -0,0 +1,55 @@ +import torch +from msprobe.pytorch.function_factory import npu_custom_functions, npu_custom_grad_functions + + +@npu_custom_functions +def npu_rotary_mul(x, r1, r2): + x1, x2 = torch.chunk(x, 2, -1) + x_new = torch.cat((-x2, x1), dim=-1) + output = r1 * x + r2 * x_new + return output.cpu() + + +@npu_custom_grad_functions +def npu_rotary_mul_backward(dy_tensor, x, r1, r2): + x.requires_grad = True + r1.requires_grad = True + r2.requires_grad = True + # golden + x1, x2 = torch.chunk(x, 2, -1) + x_new = torch.cat((-x2, x1), dim=-1) + golden_tensor = r1 * x + r2 * x_new + golden_tensor.backward(dy_tensor) + r1_shape = r1.shape + r1_grad = torch.zeros(r1_shape).type(torch.float32) + r2_grad = torch.zeros(r1_shape).type(torch.float32) + x1, x2 = torch.chunk(x.float(), 2, -1) + x_new2 = torch.cat((-x2, x1), dim=-1) + x_shape = x.shape + h = x.float() + grad = dy_tensor.float() + condition_1 = (((r1_shape[0] == 1 and x_shape[0] != 1) or (r1_shape[0] == 1 and x_shape[0] == 1)) and + ((r1_shape[2] == 1 and x_shape[2] != 1) or (r1_shape[2] == 1 and x_shape[2] == 1)) and + (r1_shape[1] == x_shape[1]) and (r1_shape[3] == x_shape[3])) + condition_2 = (((r1_shape[0] == 1 and x_shape[0] != 1) or (r1_shape[0] == 1 and x_shape[0] == 1)) and + ((r1_shape[1] == 1 and x_shape[1] != 1) or (r1_shape[1] == 1 and x_shape[1] == 1)) and + (r1_shape[2] == x_shape[2]) and (r1_shape[3] == x_shape[3])) + condition_3 = (((r1_shape[2] == 1 and x_shape[2] != 1) or (r1_shape[2] == 1 and x_shape[2] == 1)) and + ((r1_shape[1] == 1 and x_shape[1] != 1) or (r1_shape[1] == 1 and x_shape[1] == 1)) and + (r1_shape[0] == x_shape[0]) and (r1_shape[3] == x_shape[3])) + if condition_1: + for i in range(x_shape[0]): + for j in range(x_shape[2]): + r2_grad[0, :, 0, :] += (x_new2[i, :, j, :] * grad[i, :, j, :]) + r1_grad[0, :, 0, :] += (h[i, :, j, :] * grad[i, :, j, :]) + elif condition_2: + for i in range(x_shape[0]): + for j in range(x_shape[1]): + r2_grad[0, 0, :, :] += (x_new2[i, j, :, :] * grad[i, j, :, :]) + r1_grad[0, 0, :, :] += (h[i, j, :, :] * grad[i, j, :, :]) + elif condition_3: + for i in range(x_shape[1]): + for j in range(x_shape[2]): + r2_grad[:, 0, 0, :] += (x_new2[:, i, j, :] * grad[:, i, j, :]) + r1_grad[:, 0, 0, :] += (h[:, i, j, :] * grad[:, i, j, :]) + return x.grad.cpu(), r1_grad.cpu(), r2_grad.cpu() diff --git a/debug/accuracy_tools/msprobe/pytorch/bench_functions/scaled_mask_softmax.py b/debug/accuracy_tools/msprobe/pytorch/bench_functions/scaled_mask_softmax.py new file mode 100644 index 0000000000..bcc523ee40 --- /dev/null +++ b/debug/accuracy_tools/msprobe/pytorch/bench_functions/scaled_mask_softmax.py @@ -0,0 +1,29 @@ +import torch +from msprobe.pytorch.function_factory import npu_custom_functions, npu_custom_grad_functions + + +@npu_custom_functions +def npu_scaled_masked_softmax(x, mask, scale, fixed_triu_mask): + if fixed_triu_mask: + mask = (torch.triu(torch.ones(mask.shape), k=1)).bool().to(mask.device) + dtype = x.dtype + x = (x * scale).masked_fill(mask, value=-10000) + x = x - torch.max(x, dim=-1, keepdims=True)[0] + x = torch.exp(x.float()) + y = torch.div(x, torch.sum(x, dim=-1, keepdims=True)) + return y.to(dtype).cpu() + + +@npu_custom_grad_functions +def npu_scaled_masked_softmax_backward(y_grad, y, mask, scale, fixed_triu_mask): + if fixed_triu_mask: + mask = (torch.triu(torch.ones(mask.shape), k=1)).bool().to(mask.device) + dtype = y_grad.dtype + y_grad = y_grad.float() + y = y.float() + x_grad = y_grad * y + x_grad = y_grad - torch.sum(x_grad, dim=-1, keepdims=True) + x_grad = x_grad * y + x_grad = x_grad * scale + x_grad = x_grad.masked_fill(mask, value=0) + return x_grad.to(dtype).cpu() diff --git a/debug/accuracy_tools/msprobe/pytorch/bench_functions/swiglu.py b/debug/accuracy_tools/msprobe/pytorch/bench_functions/swiglu.py new file mode 100644 index 0000000000..973be454d3 --- /dev/null +++ b/debug/accuracy_tools/msprobe/pytorch/bench_functions/swiglu.py @@ -0,0 +1,58 @@ +import torch +from msprobe.pytorch.function_factory import npu_custom_functions, npu_custom_grad_functions + + +@npu_custom_functions +def npu_swiglu(x, dim=-1): + tensor_dtype = x.dtype + + inTensors = torch.chunk(x, 2, dim=dim) + if tensor_dtype == torch.float32: + tensor_scalar = torch.sigmoid(torch.mul(inTensors[0], 1.0)) + output_data = torch.mul(torch.mul(tensor_scalar, inTensors[0]), inTensors[1]) + else: + tensor_self_float = inTensors[0].type(torch.float) + tensor_other_float = inTensors[1].type(torch.float) + tensor_out_float = torch.nn.functional.silu(tensor_self_float).type(tensor_dtype).type( + torch.float32) * tensor_other_float + output_data = tensor_out_float.type(tensor_dtype) + return output_data.cpu() + + +@npu_custom_grad_functions +def npu_swiglu_backward(grad, x, dim=-1): + tensor_dtype = grad.dtype + in_tensors = torch.chunk(x, 2, dim=dim) + tensor_grad_out = grad + + if tensor_dtype == torch.float16: + tensor_out1 = torch.mul( + torch.mul(in_tensors[1].type(torch.float32), swish_grad(1, in_tensors[0].type(torch.float32))), + tensor_grad_out.type(torch.float32)).type(torch.float16) + tensor_out2 = torch.mul(tensor_grad_out.type(torch.float32), + swish(1, in_tensors[0].type(torch.float32))).type(torch.float16) + output = torch.cat((tensor_out1, tensor_out2), dim) + elif tensor_dtype == torch.bfloat16: + tensor_self_float = in_tensors[0].type(torch.float) + tensor_other_float = in_tensors[1].type(torch.float) + tensor_gradout_float = tensor_grad_out.type(torch.float) + + tensor_out1 = torch.mul(tensor_gradout_float, swish_grad(1.0, tensor_self_float)).type(torch.bfloat16).type( + torch.float32) * tensor_other_float + tensor_out2 = swish(1.0, tensor_self_float).type(torch.bfloat16).type(torch.float32) * tensor_gradout_float + tensor_out_float = torch.cat((tensor_out1, tensor_out2), dim=dim) + output = tensor_out_float.type(torch.bfloat16) + else: + tensor_out1 = torch.mul(torch.mul(in_tensors[1], swish_grad(1.0, in_tensors[0])), tensor_grad_out) + tensor_out2 = torch.mul(tensor_grad_out, swish(1.0, in_tensors[0])) + output = torch.cat((tensor_out1, tensor_out2), dim) + return output.cpu() + + +def swish_grad(beta, x): + return torch.sigmoid(beta * x) + x * (1 - torch.sigmoid(beta * x)) * torch.sigmoid(beta * x) * beta + + +def swish(beta, x): + return x * torch.sigmoid(beta * x) + diff --git a/debug/accuracy_tools/msprobe/pytorch/function_factory.py b/debug/accuracy_tools/msprobe/pytorch/function_factory.py new file mode 100644 index 0000000000..6934cc0690 --- /dev/null +++ b/debug/accuracy_tools/msprobe/pytorch/function_factory.py @@ -0,0 +1,47 @@ +class Register(dict): + def __init__(self, *args, **kwargs): + super(Register, self).__init__(*args, **kwargs) + self._dict = {} + + def register(self, target): + + def add_register_item(key, value): + if key in self._dict: + print(f"warning: {value.__name__} has been registered before, so we will overriden it.") + self[key] = value + return value + + if callable(target): + return add_register_item(target.__name__, target) + else: + raise Exception(f"The func {target} is not callable.") + + def __call__(self, target): + return self.register(target) + + def __setitem__(self, key, value): + self._dict[key] = value + + def __getitem__(self, key): + return self._dict[key] + + def __contains__(self, key): + return key in self._dict + + def __str__(self): + return str(self._dict) + + def keys(self): + return self._dict.keys() + + def values(self): + return self._dict.values() + + def items(self): + return self._dict.items() + + +npu_custom_functions = Register() +npu_custom_grad_functions = Register() + +from msprobe.pytorch.bench_functions import * diff --git a/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_aten.py b/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_aten.py index 4617e4854f..2c1805ab8d 100644 --- a/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_aten.py +++ b/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_aten.py @@ -24,12 +24,14 @@ from msprobe.pytorch.hook_module.hook_module import HOOKModule from msprobe.pytorch.common.utils import torch_device_guard from msprobe.core.common.const import Const from msprobe.core.common.file_check import FileOpen - +from msprobe.pytorch.function_factory import npu_custom_grad_functions cur_path = os.path.dirname(os.path.realpath(__file__)) yaml_path = os.path.join(cur_path, "support_wrap_ops.yaml") with FileOpen(yaml_path, 'r') as f: - WrapAtenOps = yaml.safe_load(f).get('aten') + Ops = yaml.safe_load(f) + WrapAtenOps = Ops.get('aten') + WhiteAtenOps = Ops.get('white_aten_ops', []) aten_func = {} @@ -48,7 +50,7 @@ class HOOKAtenOP(object): class AtenOPTemplate(HOOKModule): - def __init__(self, op, hook): + def __init__(self, op, hook, need_hook=True): if isinstance(op, torch._ops.OpOverloadPacket): op_name_ = op._qualified_op_name.split("::")[-1] else: @@ -58,11 +60,20 @@ class AtenOPTemplate(HOOKModule): op_name_ = op_name_ + '.' + overload_name self.op = op self.prefix_op_name_ = "Aten" + Const.SEP + str(op_name_) + Const.SEP - super().__init__(hook) + self.need_hook = need_hook + if self.need_hook: + super().__init__(hook) @torch_device_guard def forward(self, *args, **kwargs): - return self.op(*args, **kwargs) + if self.op in npu_custom_grad_functions: + return npu_custom_grad_functions[self.op](*args, **kwargs) + if self.op in WhiteAtenOps: + return eval(f"torch.ops.aten.{self.op}")(*args, **kwargs) + if self.op not in aten_func: + raise Exception(f"Skip op[{self.op}] accuracy check, because the op is not " + f"in dir(torch.ops.aten) and support yaml.") + return aten_func[self.op](*args, **kwargs) class AtenOPPacketTemplate(): diff --git a/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_npu_custom.py b/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_npu_custom.py index 992713bce5..db9f996839 100644 --- a/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_npu_custom.py +++ b/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_npu_custom.py @@ -21,9 +21,11 @@ import torch_npu import yaml from msprobe.pytorch.hook_module.hook_module import HOOKModule +from msprobe.pytorch.api_accuracy_checker.common.config import msCheckerConfig from msprobe.pytorch.common.utils import torch_device_guard, torch_without_guard_version from msprobe.core.common.const import Const from msprobe.core.common.file_check import FileOpen +from msprobe.pytorch.function_factory import npu_custom_functions cur_path = os.path.dirname(os.path.realpath(__file__)) yaml_path = os.path.join(cur_path, "support_wrap_ops.yaml") @@ -37,7 +39,10 @@ def get_npu_ops(): _npu_ops = dir(torch.ops.npu) else: _npu_ops = dir(torch_npu._C._VariableFunctionsClass) - return set(WrapNpuOps) & set(_npu_ops) + if msCheckerConfig.white_list: + return set(WrapNpuOps) & set(_npu_ops) & set(msCheckerConfig.white_list) + else: + return set(WrapNpuOps) & set(_npu_ops) class HOOKNpuOP(object): @@ -46,13 +51,19 @@ class HOOKNpuOP(object): class NpuOPTemplate(HOOKModule): - def __init__(self, op_name, hook): + def __init__(self, op_name, hook, need_hook=True): self.op_name_ = op_name self.prefix_op_name_ = "NPU" + Const.SEP + str(op_name) + Const.SEP - super().__init__(hook) + self.need_hook = need_hook + if need_hook: + super().__init__(hook) @torch_device_guard def forward(self, *args, **kwargs): + if not self.need_hook: + if self.op_name_ not in npu_custom_functions: + raise Exception(f'There is not bench function {self.op_name_}') + return npu_custom_functions[self.op_name_](*args, **kwargs) if torch_without_guard_version: return getattr(torch.ops.npu, str(self.op_name_))(*args, **kwargs) else: @@ -60,7 +71,6 @@ class NpuOPTemplate(HOOKModule): def wrap_npu_op(op_name, hook): - def npu_op_template(*args, **kwargs): return NpuOPTemplate(op_name, hook)(*args, **kwargs) -- Gitee From 4e26436f8fef859d698b637bb380c1ab58fae723 Mon Sep 17 00:00:00 2001 From: wangqingcai Date: Mon, 29 Jul 2024 16:16:46 +0800 Subject: [PATCH 100/106] add codecheck --- .../pytorch/bench_functions/apply_adam_w.py | 7 ++-- .../bench_functions/matmul_backward.py | 1 - .../bench_functions/npu_fusion_attention.py | 10 ++--- .../msprobe/pytorch/common/utils.py | 41 +++++++++++++++++++ .../msprobe/pytorch/function_factory.py | 29 +++++++------ 5 files changed, 66 insertions(+), 22 deletions(-) diff --git a/debug/accuracy_tools/msprobe/pytorch/bench_functions/apply_adam_w.py b/debug/accuracy_tools/msprobe/pytorch/bench_functions/apply_adam_w.py index 3cebd30507..dc0954911c 100644 --- a/debug/accuracy_tools/msprobe/pytorch/bench_functions/apply_adam_w.py +++ b/debug/accuracy_tools/msprobe/pytorch/bench_functions/apply_adam_w.py @@ -9,12 +9,13 @@ def npu_apply_adam_w(beta1_power, beta2_power, lr, weight_decay, var, m, v = out if amsgrad: max_grad_norm = (torch.rand(var.shape) * 10.0 - 5.0).to(var.dtype) + beta1_power_out = beta1_power * beta1 + beta2_power_out = beta2_power * beta2 + var_t = var * (1 + (-lr * weight_decay)) gt = -grad if maximize else grad m_out = m * beta1 - (beta1 + (-1)) * gt v_out = v * beta2 - (beta2 + (-1)) * gt * gt - var_t = var * (1 + (-lr * weight_decay)) - beta1_power_out = beta1_power * beta1 - beta2_power_out = beta2_power * beta2 + if amsgrad: max_grad_norm_out = torch.max(max_grad_norm, v_out) if (1 - beta2_power_out) == 0: diff --git a/debug/accuracy_tools/msprobe/pytorch/bench_functions/matmul_backward.py b/debug/accuracy_tools/msprobe/pytorch/bench_functions/matmul_backward.py index dae2745527..3c4f7dc040 100644 --- a/debug/accuracy_tools/msprobe/pytorch/bench_functions/matmul_backward.py +++ b/debug/accuracy_tools/msprobe/pytorch/bench_functions/matmul_backward.py @@ -26,7 +26,6 @@ def matmul_backward(grad, self, other, mask): if mask[0]: grad_self = unfolded_grad.mm(other.unsqueeze(0) if dim_other == 1 else other.transpose(-1, -2)) \ .view(size_self) - print(f'size_self: {size_self}') if mask[1]: unfolded_self = self.contiguous().view([-1, size_self[-1]]) grad_other = unfolded_self.transpose(-1, -2).mm(unfolded_grad).view(size_other) diff --git a/debug/accuracy_tools/msprobe/pytorch/bench_functions/npu_fusion_attention.py b/debug/accuracy_tools/msprobe/pytorch/bench_functions/npu_fusion_attention.py index 2a46d02004..6a49ce740a 100644 --- a/debug/accuracy_tools/msprobe/pytorch/bench_functions/npu_fusion_attention.py +++ b/debug/accuracy_tools/msprobe/pytorch/bench_functions/npu_fusion_attention.py @@ -183,7 +183,7 @@ def generate_atten_mask(sparse_mode, atten_mask, B, N1, S1, S2, pre_tocken, next if atten_mask is not None: # 当FA的输入已经包含atten_mask时,可以认为已经是转换之后的mask矩阵了,有三种特殊场景,即稀疏矩阵场景,需要进行逆向还原 if sparse_mode == 2 or sparse_mode == 3 or sparse_mode == 4: - print(S1, S2, atten_mask.shape, atten_mask.dtype) + logger.info(f"S1: {S1}, S2:{S2}, atten_mask.shape:{atten_mask.shape}, atten_mask.dtype:{atten_mask.dtype}") if atten_mask.dim() == 2 and atten_mask.shape[0] == 2048 and atten_mask.shape[1] == 2048: if atten_mask.equal(torch.from_numpy(np.triu(np.ones([2048, 2048]), k=1)).to(atten_mask.dtype)): @@ -246,7 +246,7 @@ def rebuid_softmax_by_qkv(q, k, atten_mask, pse, scale): attention = softmax(QK^T/sqrt(d))V softmax(x_i) = e^(x_i - x_max) / sum(e^(x_i - x_max)) """ - print(f"Using QKV to rebuild original softmax") + logger.info("Using QKV to rebuild original softmax") qk = calculate_qk(q, k, atten_mask, pse, scale) softmax_res, x_max, x_sum = softmax_forward(qk) return softmax_res @@ -257,7 +257,7 @@ def rebuild_softmax_by_max_sum(q, k, atten_mask, pse, scale, softmax_max, softma attention = softmax(QK^T/sqrt(d))V softmax(x_i) = e^(x_i - x_max_i) / x_sum_i) """ - print(f"Using softmax_max and softmax_sum to rebuild original softmax") + logger.info("Using softmax_max and softmax_sum to rebuild original softmax") qk = calculate_qk(q, k, atten_mask, pse, scale) if softmax_max.shape[-1] == 0: raise ValueError(f"softmax_max.shape[-1] must be non-zero, softmax_max.shape: {softmax_max.shape}") @@ -302,9 +302,9 @@ def npu_fusion_attention_backward_patch(*args, **kwargs): B, S1, S2, N1, N2, D, H1, H2, DTYPE = parse_bsnd_args(args[0], args[1], args[4], args[5]) if N1 == N2 and S1 == S2: - print(f"running case : BNSD = {B}_{N1}_{S1}_{D}, sparse = {kwargs.get('sparse_mode', 0)}") + logger.info(f"running case : BNSD = {B}_{N1}_{S1}_{D}, sparse = {kwargs.get('sparse_mode', 0)}") else: - print(f"running case: BNSD = {B}_{N1}({N2})_{S1}({S2})_{D}, sparse = {kwargs.get('sparse_mode', 0)}") + logger.info(f"running case: BNSD = {B}_{N1}({N2})_{S1}({S2})_{D}, sparse = {kwargs.get('sparse_mode', 0)}") if not (N1 % N2 == 0 and N1 >= N2): raise ValueError(f"N1与N2不匹配,请检查: N1 = {N1}, N2 = {N2}.") diff --git a/debug/accuracy_tools/msprobe/pytorch/common/utils.py b/debug/accuracy_tools/msprobe/pytorch/common/utils.py index acc1de1051..9028d79181 100644 --- a/debug/accuracy_tools/msprobe/pytorch/common/utils.py +++ b/debug/accuracy_tools/msprobe/pytorch/common/utils.py @@ -14,10 +14,12 @@ # See the License for the specific language governing permissions and # limitations under the License. """ +import logging import os import random import stat import torch +import torch.distributed as dist import numpy as np from functools import wraps from msprobe.core.common.exceptions import DistributedNotInitializedError @@ -221,3 +223,42 @@ class Const: CONVERT_API = { "int32_to_int64": ["cross_entropy"] } + + +def get_tensor_rank(in_feat, out_feat): + if dist.is_initialized(): + return dist.get_rank() + + def get_tensor_rank_single(x): + if isinstance(x, (list, tuple)): + if len(x) > 0: + return get_tensor_rank_single(x[0]) + return None + elif isinstance(x, torch.Tensor): + device = x.device + if device.type == 'cpu': + return None + else: + return device.index + return None + + in_rank = get_tensor_rank_single(in_feat) + if in_rank is not None: + return in_rank + out_rank = get_tensor_rank_single(out_feat) + if out_rank is not None: + return out_rank + return None + + +def _create_logger(level=logging.INFO): + logger_ = logging.getLogger() + logger_.setLevel(level) + ch = logging.StreamHandler() + ch.setLevel(level) + logger_.addHandler(ch) + return logger_ + + +log_level = logging.DEBUG if os.environ.get("API_ACCURACY_CHECK_LOG_LEVEL") == "1" else logging.INFO +logger = _create_logger(log_level) diff --git a/debug/accuracy_tools/msprobe/pytorch/function_factory.py b/debug/accuracy_tools/msprobe/pytorch/function_factory.py index 6934cc0690..4e725de4f0 100644 --- a/debug/accuracy_tools/msprobe/pytorch/function_factory.py +++ b/debug/accuracy_tools/msprobe/pytorch/function_factory.py @@ -1,21 +1,11 @@ +from msprobe.pytorch.common.utils import logger + + class Register(dict): def __init__(self, *args, **kwargs): super(Register, self).__init__(*args, **kwargs) self._dict = {} - def register(self, target): - - def add_register_item(key, value): - if key in self._dict: - print(f"warning: {value.__name__} has been registered before, so we will overriden it.") - self[key] = value - return value - - if callable(target): - return add_register_item(target.__name__, target) - else: - raise Exception(f"The func {target} is not callable.") - def __call__(self, target): return self.register(target) @@ -40,6 +30,19 @@ class Register(dict): def items(self): return self._dict.items() + def register(self, target): + + def add_register_item(key, value): + if key in self._dict: + logger.warning(f"{value.__name__} has been registered before, so we will overriden it.") + self[key] = value + return value + + if callable(target): + return add_register_item(target.__name__, target) + else: + raise Exception(f"The func {target} is not callable.") + npu_custom_functions = Register() npu_custom_grad_functions = Register() -- Gitee From 93d34b067504fddc86b868e98360e0e4e20ea5f2 Mon Sep 17 00:00:00 2001 From: gitee Date: Tue, 30 Jul 2024 09:06:48 +0800 Subject: [PATCH 101/106] fix --- debug/accuracy_tools/msprobe/core/common/const.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debug/accuracy_tools/msprobe/core/common/const.py b/debug/accuracy_tools/msprobe/core/common/const.py index 85d5c65e51..b59536aa5d 100644 --- a/debug/accuracy_tools/msprobe/core/common/const.py +++ b/debug/accuracy_tools/msprobe/core/common/const.py @@ -84,7 +84,7 @@ class Const: INPLACE_LIST = [ "broadcast", "all_reduce", "reduce", "all_gather", "gather", "scatter", "reduce_scatter", - "_reduce_scatter_base", "_all_gather_base", "send", "recv", "irecv", "isend", "all_to_all_single" + "_reduce_scatter_base", "_all_gather_base", "send", "recv", "irecv", "isend", "all_to_all_single", "all_to_all" ] CONVERT = { -- Gitee From ad47e040b9643a6eb205b1b87c6f92f3d0b95aa0 Mon Sep 17 00:00:00 2001 From: jiangchao_j Date: Tue, 30 Jul 2024 09:58:12 +0800 Subject: [PATCH 102/106] fix overflow_nums bug --- debug/accuracy_tools/msprobe/core/common_config.py | 4 ++-- .../core/data_dump/data_processor/pytorch_processor.py | 2 +- .../msprobe/pytorch/debugger/debugger_config.py | 2 +- debug/accuracy_tools/msprobe/pytorch/pt_config.py | 4 ++-- .../accuracy_tools/msprobe/test/core_ut/test_common_config.py | 2 +- .../accuracy_tools/msprobe/test/pytorch_ut/test_pt_config.py | 2 +- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/common_config.py b/debug/accuracy_tools/msprobe/core/common_config.py index ed38eba008..b4bf5cf28e 100644 --- a/debug/accuracy_tools/msprobe/core/common_config.py +++ b/debug/accuracy_tools/msprobe/core/common_config.py @@ -44,8 +44,8 @@ class BaseConfig: self.data_mode = json_config.get('data_mode') self.backward_input = json_config.get("backward_input") self.file_format = json_config.get("file_format") - self.summary_mode = json_config.get("summary_mode") - self.overflow_num = json_config.get("overflow_num") + self.summary_mode = json_config.get("summary_mode") + self.overflow_nums = json_config.get("overflow_nums") self.check_mode = json_config.get("check_mode") def check_config(self): diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py index 4cdd3ea046..00cab5e54f 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py @@ -182,7 +182,7 @@ class OverflowCheckDataProcessor(PytorchDataProcessor): super().__init__(config, data_writer) self.cached_tensors_and_file_paths = {} self.real_overflow_dump_times = 0 - self.overflow_nums = config.overflow_num + self.overflow_nums = config.overflow_nums self.bits_for_overflow = 8 @staticmethod diff --git a/debug/accuracy_tools/msprobe/pytorch/debugger/debugger_config.py b/debug/accuracy_tools/msprobe/pytorch/debugger/debugger_config.py index cfc588e1e9..f1289e9b01 100644 --- a/debug/accuracy_tools/msprobe/pytorch/debugger/debugger_config.py +++ b/debug/accuracy_tools/msprobe/pytorch/debugger/debugger_config.py @@ -21,7 +21,7 @@ class DebuggerConfig: self.acl_config = common_config.acl_config if common_config.acl_config else "" self.is_forward_acl_dump = True self.summary_mode = task_config.summary_mode if task_config.summary_mode else Const.STATISTICS - self.overflow_num = task_config.overflow_num if task_config.overflow_num else 1 + self.overflow_nums = task_config.overflow_nums if task_config.overflow_nums else 1 self.framework = Const.PT_FRAMEWORK if self.task == Const.FREE_BENCHMARK: diff --git a/debug/accuracy_tools/msprobe/pytorch/pt_config.py b/debug/accuracy_tools/msprobe/pytorch/pt_config.py index a3d765f3a4..ceec92a633 100644 --- a/debug/accuracy_tools/msprobe/pytorch/pt_config.py +++ b/debug/accuracy_tools/msprobe/pytorch/pt_config.py @@ -32,12 +32,12 @@ class StatisticsConfig(BaseConfig): class OverflowCheckConfig(BaseConfig): def __init__(self, json_config): super().__init__(json_config) - self.overflow_num = json_config.get("overflow_nums") + self.overflow_nums = json_config.get("overflow_nums") self.check_mode = json_config.get("check_mode") self.check_overflow_config() def check_overflow_config(self): - if self.overflow_num is not None and not isinstance(self.overflow_num, int): + if self.overflow_nums is not None and not isinstance(self.overflow_nums, int): raise Exception("overflow_num is invalid") if self.check_mode is not None and self.check_mode not in ["all", "aicore", "atomic"]: raise Exception("check_mode is invalid") diff --git a/debug/accuracy_tools/msprobe/test/core_ut/test_common_config.py b/debug/accuracy_tools/msprobe/test/core_ut/test_common_config.py index 06c7378ed3..8b2138a485 100644 --- a/debug/accuracy_tools/msprobe/test/core_ut/test_common_config.py +++ b/debug/accuracy_tools/msprobe/test/core_ut/test_common_config.py @@ -121,7 +121,7 @@ class TestCommonConfig(TestCase): self.assertIsNone(base_config.backward_input) self.assertIsNone(base_config.file_format) self.assertIsNone(base_config.summary_mode) - self.assertIsNone(base_config.overflow_num) + self.assertIsNone(base_config.overflow_nums) self.assertIsNone(base_config.check_mode) json_config.update({"scope": "Tensor_Add"}) diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/test_pt_config.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/test_pt_config.py index c344f0b66b..470390d77b 100644 --- a/debug/accuracy_tools/msprobe/test/pytorch_ut/test_pt_config.py +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/test_pt_config.py @@ -45,7 +45,7 @@ class TestPtConfig(TestCase): } } result = parse_task_config(Const.OVERFLOW_CHECK, overflow_check_config) - self.assertEqual(result.overflow_num, 1) + self.assertEqual(result.overflow_nums, 1) self.assertEqual(result.check_mode, "all") free_benchmark_config = { -- Gitee From 718154b7a9fdede0544dc93b4cbeacea6faa6b63 Mon Sep 17 00:00:00 2001 From: l30036321 Date: Tue, 30 Jul 2024 10:49:24 +0800 Subject: [PATCH 103/106] fix overflowcheck bug --- .../msprobe/core/data_dump/data_processor/pytorch_processor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py index 4cdd3ea046..8dac54fd2e 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py @@ -255,7 +255,7 @@ class OverflowCheckDataProcessor(PytorchDataProcessor): else: logger.warning(f'The file path {file_path} length exceeds limit.') single_arg = super()._analyze_tensor(tensor, suffix) - self._analyze_maybe_overflow_tensor(single_arg, tensor) + self._analyze_maybe_overflow_tensor(single_arg) single_arg.update({"data_name": dump_data_name}) return single_arg -- Gitee From c52ed966597c2e333594b6bd2b0fad226877843c Mon Sep 17 00:00:00 2001 From: wangqingcai Date: Mon, 29 Jul 2024 21:01:32 +0800 Subject: [PATCH 104/106] fix aten --- .../msprobe/pytorch/common/utils.py | 12 +++--------- .../msprobe/pytorch/function_factory.py | 18 +++++++++++++++++- .../msprobe/pytorch/hook_module/wrap_aten.py | 18 ++++++++++-------- .../pytorch/hook_module/wrap_npu_custom.py | 14 ++++++++------ 4 files changed, 38 insertions(+), 24 deletions(-) diff --git a/debug/accuracy_tools/msprobe/pytorch/common/utils.py b/debug/accuracy_tools/msprobe/pytorch/common/utils.py index 9028d79181..181491488f 100644 --- a/debug/accuracy_tools/msprobe/pytorch/common/utils.py +++ b/debug/accuracy_tools/msprobe/pytorch/common/utils.py @@ -233,22 +233,16 @@ def get_tensor_rank(in_feat, out_feat): if isinstance(x, (list, tuple)): if len(x) > 0: return get_tensor_rank_single(x[0]) - return None elif isinstance(x, torch.Tensor): device = x.device - if device.type == 'cpu': - return None - else: + if device.type != 'cpu': return device.index return None in_rank = get_tensor_rank_single(in_feat) - if in_rank is not None: - return in_rank out_rank = get_tensor_rank_single(out_feat) - if out_rank is not None: - return out_rank - return None + tensor_rank = in_rank if in_rank else out_rank + return tensor_rank def _create_logger(level=logging.INFO): diff --git a/debug/accuracy_tools/msprobe/pytorch/function_factory.py b/debug/accuracy_tools/msprobe/pytorch/function_factory.py index 4e725de4f0..6d840e561e 100644 --- a/debug/accuracy_tools/msprobe/pytorch/function_factory.py +++ b/debug/accuracy_tools/msprobe/pytorch/function_factory.py @@ -47,4 +47,20 @@ class Register(dict): npu_custom_functions = Register() npu_custom_grad_functions = Register() -from msprobe.pytorch.bench_functions import * +from msprobe.pytorch.bench_functions.apply_adam_w import npu_apply_adam_w +from msprobe.pytorch.bench_functions.confusion_transpose import npu_confusion_transpose, \ + npu_confusion_transpose_backward +from msprobe.pytorch.bench_functions.fast_gelu import fast_gelu, npu_fast_gelu_backward +from msprobe.pytorch.bench_functions.layer_norm_eval import npu_layer_norm_eval +from msprobe.pytorch.bench_functions.linear import npu_linear, npu_linear_backward +from msprobe.pytorch.bench_functions.matmul_backward import matmul_backward +from msprobe.pytorch.bench_functions.npu_fusion_attention import softmax_forward, softmax_grad, broadcast_kv, \ + calculate_qk, fusion_attention_forward, fusion_attention_backward, parse_bsnd_args, convert_from_bnsd, \ + convert_to_bnsd, generate_atten_mask, generate_kv, rebuid_softmax_by_qkv, rebuild_softmax_by_max_sum, \ + npu_fusion_attention_forward_patch, npu_fusion_attention_backward_patch, npu_fusion_attention, \ + npu_fusion_attention_grad +from msprobe.pytorch.bench_functions.rms_norm import npu_rms_norm, npu_rms_norm_backward +from msprobe.pytorch.bench_functions.rotary_mul import npu_rotary_mul, npu_rotary_mul_backward +from msprobe.pytorch.bench_functions.scaled_mask_softmax import npu_scaled_masked_softmax, \ + npu_scaled_masked_softmax_backward +from msprobe.pytorch.bench_functions.swiglu import npu_swiglu, npu_swiglu_backward, swish_grad, swish diff --git a/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_aten.py b/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_aten.py index 2c1805ab8d..a02abbe5f4 100644 --- a/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_aten.py +++ b/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_aten.py @@ -66,14 +66,16 @@ class AtenOPTemplate(HOOKModule): @torch_device_guard def forward(self, *args, **kwargs): - if self.op in npu_custom_grad_functions: - return npu_custom_grad_functions[self.op](*args, **kwargs) - if self.op in WhiteAtenOps: - return eval(f"torch.ops.aten.{self.op}")(*args, **kwargs) - if self.op not in aten_func: - raise Exception(f"Skip op[{self.op}] accuracy check, because the op is not " - f"in dir(torch.ops.aten) and support yaml.") - return aten_func[self.op](*args, **kwargs) + if isinstance(self.op, str): + if self.op in npu_custom_grad_functions: + return npu_custom_grad_functions[self.op](*args, **kwargs) + if self.op in WhiteAtenOps: + return eval(f"torch.ops.aten.{self.op}")(*args, **kwargs) + if self.op not in aten_func: + raise Exception(f"Skip op[{self.op}] accuracy check, because the op is not " + f"in dir(torch.ops.aten) and support yaml.") + return aten_func[self.op](*args, **kwargs) + return self.op(*args, **kwargs) class AtenOPPacketTemplate(): diff --git a/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_npu_custom.py b/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_npu_custom.py index db9f996839..8a67ed9429 100644 --- a/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_npu_custom.py +++ b/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_npu_custom.py @@ -17,11 +17,9 @@ import os import torch -import torch_npu import yaml from msprobe.pytorch.hook_module.hook_module import HOOKModule -from msprobe.pytorch.api_accuracy_checker.common.config import msCheckerConfig from msprobe.pytorch.common.utils import torch_device_guard, torch_without_guard_version from msprobe.core.common.const import Const from msprobe.core.common.file_check import FileOpen @@ -32,6 +30,13 @@ yaml_path = os.path.join(cur_path, "support_wrap_ops.yaml") with FileOpen(yaml_path, 'r') as f: WrapNpuOps = yaml.safe_load(f).get('torch_npu') +try: + import torch_npu +except ImportError: + is_gpu = True +else: + is_gpu = False + def get_npu_ops(): global WrapNpuOps @@ -39,10 +44,7 @@ def get_npu_ops(): _npu_ops = dir(torch.ops.npu) else: _npu_ops = dir(torch_npu._C._VariableFunctionsClass) - if msCheckerConfig.white_list: - return set(WrapNpuOps) & set(_npu_ops) & set(msCheckerConfig.white_list) - else: - return set(WrapNpuOps) & set(_npu_ops) + return set(WrapNpuOps) & set(_npu_ops) class HOOKNpuOP(object): -- Gitee From 8401c942e6b64b6215d3ca4846927df775c41ea6 Mon Sep 17 00:00:00 2001 From: wangqingcai Date: Tue, 30 Jul 2024 16:13:13 +0800 Subject: [PATCH 105/106] resolve class registry loop import issue --- .../pytorch/bench_functions/apply_adam_w.py | 3 -- .../bench_functions/confusion_transpose.py | 6 --- .../pytorch/bench_functions/fast_gelu.py | 3 -- .../bench_functions/layer_norm_eval.py | 2 - .../msprobe/pytorch/bench_functions/linear.py | 3 -- .../bench_functions/matmul_backward.py | 2 - .../bench_functions/npu_fusion_attention.py | 3 -- .../pytorch/bench_functions/rms_norm.py | 3 -- .../pytorch/bench_functions/rotary_mul.py | 3 -- .../bench_functions/scaled_mask_softmax.py | 3 -- .../msprobe/pytorch/bench_functions/swiglu.py | 3 -- .../msprobe/pytorch/function_factory.py | 49 +++++++++++-------- 12 files changed, 29 insertions(+), 54 deletions(-) diff --git a/debug/accuracy_tools/msprobe/pytorch/bench_functions/apply_adam_w.py b/debug/accuracy_tools/msprobe/pytorch/bench_functions/apply_adam_w.py index dc0954911c..caf21a604c 100644 --- a/debug/accuracy_tools/msprobe/pytorch/bench_functions/apply_adam_w.py +++ b/debug/accuracy_tools/msprobe/pytorch/bench_functions/apply_adam_w.py @@ -1,9 +1,6 @@ import torch -from msprobe.pytorch.function_factory import npu_custom_functions - -@npu_custom_functions def npu_apply_adam_w(beta1_power, beta2_power, lr, weight_decay, beta1, beta2, eps, grad, max_grad_norm, amsgrad, maximize, out): var, m, v = out diff --git a/debug/accuracy_tools/msprobe/pytorch/bench_functions/confusion_transpose.py b/debug/accuracy_tools/msprobe/pytorch/bench_functions/confusion_transpose.py index dd30bb18a6..627bf11b64 100644 --- a/debug/accuracy_tools/msprobe/pytorch/bench_functions/confusion_transpose.py +++ b/debug/accuracy_tools/msprobe/pytorch/bench_functions/confusion_transpose.py @@ -1,8 +1,3 @@ -import torch -from msprobe.pytorch.function_factory import npu_custom_functions, npu_custom_grad_functions - - -@npu_custom_functions def npu_confusion_transpose(data, perm, shape, transpose_first): if transpose_first: output = data.permute(*perm).contiguous().view(shape) @@ -11,7 +6,6 @@ def npu_confusion_transpose(data, perm, shape, transpose_first): return output.cpu() -@npu_custom_grad_functions def npu_confusion_transpose_backward(grad, perm, shape, transpose_first): shape_cal = shape if transpose_first else [shape[perm_dim] for perm_dim in perm] perm_cal = [0] * len(perm) diff --git a/debug/accuracy_tools/msprobe/pytorch/bench_functions/fast_gelu.py b/debug/accuracy_tools/msprobe/pytorch/bench_functions/fast_gelu.py index 5442eff734..a1a9ca0808 100644 --- a/debug/accuracy_tools/msprobe/pytorch/bench_functions/fast_gelu.py +++ b/debug/accuracy_tools/msprobe/pytorch/bench_functions/fast_gelu.py @@ -1,8 +1,6 @@ import torch -from msprobe.pytorch.function_factory import npu_custom_functions, npu_custom_grad_functions -@npu_custom_functions def fast_gelu(input0): attr = 1.702 const_0 = 0 - attr @@ -24,7 +22,6 @@ def fast_gelu(input0): return result.cpu() -@npu_custom_grad_functions def npu_fast_gelu_backward(grad, input_x): const_2 = 1.702 const_3 = 1.0 diff --git a/debug/accuracy_tools/msprobe/pytorch/bench_functions/layer_norm_eval.py b/debug/accuracy_tools/msprobe/pytorch/bench_functions/layer_norm_eval.py index 885b5c460e..f6949c079e 100644 --- a/debug/accuracy_tools/msprobe/pytorch/bench_functions/layer_norm_eval.py +++ b/debug/accuracy_tools/msprobe/pytorch/bench_functions/layer_norm_eval.py @@ -1,8 +1,6 @@ import torch -from msprobe.pytorch.function_factory import npu_custom_functions -@npu_custom_functions def npu_layer_norm_eval(data, normalized_shape): result = torch.nn.functional.layer_norm(data, normalized_shape) return result.cpu() diff --git a/debug/accuracy_tools/msprobe/pytorch/bench_functions/linear.py b/debug/accuracy_tools/msprobe/pytorch/bench_functions/linear.py index 33b18d759d..95db875edf 100644 --- a/debug/accuracy_tools/msprobe/pytorch/bench_functions/linear.py +++ b/debug/accuracy_tools/msprobe/pytorch/bench_functions/linear.py @@ -1,14 +1,11 @@ import torch -from msprobe.pytorch.function_factory import npu_custom_functions, npu_custom_grad_functions -@npu_custom_functions def npu_linear(x, weight, bias): output = torch.nn.functional.linear(x, weight, bias) return output.cpu() -@npu_custom_grad_functions def npu_linear_backward(grad, input_data, weight): input_grad = torch.matmul(grad, weight) weight_grad = torch.matmul(grad.t(), input_data) diff --git a/debug/accuracy_tools/msprobe/pytorch/bench_functions/matmul_backward.py b/debug/accuracy_tools/msprobe/pytorch/bench_functions/matmul_backward.py index 3c4f7dc040..ed1c746ec1 100644 --- a/debug/accuracy_tools/msprobe/pytorch/bench_functions/matmul_backward.py +++ b/debug/accuracy_tools/msprobe/pytorch/bench_functions/matmul_backward.py @@ -1,8 +1,6 @@ import torch -from msprobe.pytorch.function_factory import npu_custom_grad_functions -@npu_custom_grad_functions def matmul_backward(grad, self, other, mask): grad_self, grad_other = None, None dim_self = self.dim() diff --git a/debug/accuracy_tools/msprobe/pytorch/bench_functions/npu_fusion_attention.py b/debug/accuracy_tools/msprobe/pytorch/bench_functions/npu_fusion_attention.py index 6a49ce740a..f4b639e2fe 100644 --- a/debug/accuracy_tools/msprobe/pytorch/bench_functions/npu_fusion_attention.py +++ b/debug/accuracy_tools/msprobe/pytorch/bench_functions/npu_fusion_attention.py @@ -2,7 +2,6 @@ import torch import numpy as np from einops import rearrange -from msprobe.pytorch.function_factory import npu_custom_functions, npu_custom_grad_functions from api_accuracy_checker.common.utils import logger gtype = torch.float64 # arm host必须选择float64,x86环境选择float32即可,64也行。arm计算很慢,s=8k的场景建议使用x86 @@ -331,7 +330,6 @@ def npu_fusion_attention_backward_patch(*args, **kwargs): return args, dims_kwargs, new_kwargs -@npu_custom_functions def npu_fusion_attention(*args, **kwargs): new_args, dims_kwargs, new_kwargs = npu_fusion_attention_forward_patch(*args, **kwargs) query, key, value, input_layout = new_args[0], new_args[1], new_args[2], new_args[4] @@ -366,7 +364,6 @@ def npu_fusion_attention(*args, **kwargs): return out_golden.cpu(), softmax_max.repeat(1, 1, 1, 8).cpu(), softmax_sum.repeat(1, 1, 1, 8).cpu() -@npu_custom_grad_functions def npu_fusion_attention_grad(*args, **kwargs): # dx, q, k, v, softmax_res, drop_mask, pse, scale, keep_prob new_args, dims_kwargs, new_kwargs = npu_fusion_attention_backward_patch(*args, **kwargs) diff --git a/debug/accuracy_tools/msprobe/pytorch/bench_functions/rms_norm.py b/debug/accuracy_tools/msprobe/pytorch/bench_functions/rms_norm.py index 0fe6c834a4..e647312fdb 100644 --- a/debug/accuracy_tools/msprobe/pytorch/bench_functions/rms_norm.py +++ b/debug/accuracy_tools/msprobe/pytorch/bench_functions/rms_norm.py @@ -1,15 +1,12 @@ import torch -from msprobe.pytorch.function_factory import npu_custom_functions, npu_custom_grad_functions -@npu_custom_functions def npu_rms_norm(x, gamma, epsilon=1e-5): rstd = torch.rsqrt(torch.mean(torch.pow(x, 2), axis=-1, keepdim=True) + epsilon) res = x * rstd * gamma return res.cpu(), rstd.float().cpu() -@npu_custom_grad_functions def npu_rms_norm_backward(grad, x, gamma, rstd): mean_gy = (grad * x * gamma * rstd).mean(dim=-1, keepdim=True) grad_x = (grad * gamma - x * rstd * mean_gy) * rstd diff --git a/debug/accuracy_tools/msprobe/pytorch/bench_functions/rotary_mul.py b/debug/accuracy_tools/msprobe/pytorch/bench_functions/rotary_mul.py index 76b3828da3..0e0fda5f73 100644 --- a/debug/accuracy_tools/msprobe/pytorch/bench_functions/rotary_mul.py +++ b/debug/accuracy_tools/msprobe/pytorch/bench_functions/rotary_mul.py @@ -1,8 +1,6 @@ import torch -from msprobe.pytorch.function_factory import npu_custom_functions, npu_custom_grad_functions -@npu_custom_functions def npu_rotary_mul(x, r1, r2): x1, x2 = torch.chunk(x, 2, -1) x_new = torch.cat((-x2, x1), dim=-1) @@ -10,7 +8,6 @@ def npu_rotary_mul(x, r1, r2): return output.cpu() -@npu_custom_grad_functions def npu_rotary_mul_backward(dy_tensor, x, r1, r2): x.requires_grad = True r1.requires_grad = True diff --git a/debug/accuracy_tools/msprobe/pytorch/bench_functions/scaled_mask_softmax.py b/debug/accuracy_tools/msprobe/pytorch/bench_functions/scaled_mask_softmax.py index bcc523ee40..8717aebaf9 100644 --- a/debug/accuracy_tools/msprobe/pytorch/bench_functions/scaled_mask_softmax.py +++ b/debug/accuracy_tools/msprobe/pytorch/bench_functions/scaled_mask_softmax.py @@ -1,8 +1,6 @@ import torch -from msprobe.pytorch.function_factory import npu_custom_functions, npu_custom_grad_functions -@npu_custom_functions def npu_scaled_masked_softmax(x, mask, scale, fixed_triu_mask): if fixed_triu_mask: mask = (torch.triu(torch.ones(mask.shape), k=1)).bool().to(mask.device) @@ -14,7 +12,6 @@ def npu_scaled_masked_softmax(x, mask, scale, fixed_triu_mask): return y.to(dtype).cpu() -@npu_custom_grad_functions def npu_scaled_masked_softmax_backward(y_grad, y, mask, scale, fixed_triu_mask): if fixed_triu_mask: mask = (torch.triu(torch.ones(mask.shape), k=1)).bool().to(mask.device) diff --git a/debug/accuracy_tools/msprobe/pytorch/bench_functions/swiglu.py b/debug/accuracy_tools/msprobe/pytorch/bench_functions/swiglu.py index 973be454d3..e03c975a50 100644 --- a/debug/accuracy_tools/msprobe/pytorch/bench_functions/swiglu.py +++ b/debug/accuracy_tools/msprobe/pytorch/bench_functions/swiglu.py @@ -1,8 +1,6 @@ import torch -from msprobe.pytorch.function_factory import npu_custom_functions, npu_custom_grad_functions -@npu_custom_functions def npu_swiglu(x, dim=-1): tensor_dtype = x.dtype @@ -19,7 +17,6 @@ def npu_swiglu(x, dim=-1): return output_data.cpu() -@npu_custom_grad_functions def npu_swiglu_backward(grad, x, dim=-1): tensor_dtype = grad.dtype in_tensors = torch.chunk(x, 2, dim=dim) diff --git a/debug/accuracy_tools/msprobe/pytorch/function_factory.py b/debug/accuracy_tools/msprobe/pytorch/function_factory.py index 6d840e561e..c2fd8bfd0c 100644 --- a/debug/accuracy_tools/msprobe/pytorch/function_factory.py +++ b/debug/accuracy_tools/msprobe/pytorch/function_factory.py @@ -1,4 +1,17 @@ from msprobe.pytorch.common.utils import logger +from msprobe.pytorch.bench_functions.apply_adam_w import npu_apply_adam_w +from msprobe.pytorch.bench_functions.confusion_transpose import npu_confusion_transpose, \ + npu_confusion_transpose_backward +from msprobe.pytorch.bench_functions.fast_gelu import fast_gelu, npu_fast_gelu_backward +from msprobe.pytorch.bench_functions.layer_norm_eval import npu_layer_norm_eval +from msprobe.pytorch.bench_functions.linear import npu_linear, npu_linear_backward +from msprobe.pytorch.bench_functions.matmul_backward import matmul_backward +from msprobe.pytorch.bench_functions.npu_fusion_attention import npu_fusion_attention, npu_fusion_attention_grad +from msprobe.pytorch.bench_functions.rms_norm import npu_rms_norm, npu_rms_norm_backward +from msprobe.pytorch.bench_functions.rotary_mul import npu_rotary_mul, npu_rotary_mul_backward +from msprobe.pytorch.bench_functions.scaled_mask_softmax import npu_scaled_masked_softmax, \ + npu_scaled_masked_softmax_backward +from msprobe.pytorch.bench_functions.swiglu import npu_swiglu, npu_swiglu_backward, swish_grad, swish class Register(dict): @@ -6,8 +19,10 @@ class Register(dict): super(Register, self).__init__(*args, **kwargs) self._dict = {} - def __call__(self, target): - return self.register(target) + def __call__(self, target_func_list): + for target in target_func_list: + self.register(target) + return def __setitem__(self, key, value): self._dict[key] = value @@ -44,23 +59,17 @@ class Register(dict): raise Exception(f"The func {target} is not callable.") +# register for npu custom bench functions npu_custom_functions = Register() -npu_custom_grad_functions = Register() +npu_custom_functions([ + npu_apply_adam_w, npu_confusion_transpose, fast_gelu, npu_layer_norm_eval, npu_linear, npu_fusion_attention, + npu_rms_norm, npu_rotary_mul, npu_scaled_masked_softmax, npu_swiglu +]) -from msprobe.pytorch.bench_functions.apply_adam_w import npu_apply_adam_w -from msprobe.pytorch.bench_functions.confusion_transpose import npu_confusion_transpose, \ - npu_confusion_transpose_backward -from msprobe.pytorch.bench_functions.fast_gelu import fast_gelu, npu_fast_gelu_backward -from msprobe.pytorch.bench_functions.layer_norm_eval import npu_layer_norm_eval -from msprobe.pytorch.bench_functions.linear import npu_linear, npu_linear_backward -from msprobe.pytorch.bench_functions.matmul_backward import matmul_backward -from msprobe.pytorch.bench_functions.npu_fusion_attention import softmax_forward, softmax_grad, broadcast_kv, \ - calculate_qk, fusion_attention_forward, fusion_attention_backward, parse_bsnd_args, convert_from_bnsd, \ - convert_to_bnsd, generate_atten_mask, generate_kv, rebuid_softmax_by_qkv, rebuild_softmax_by_max_sum, \ - npu_fusion_attention_forward_patch, npu_fusion_attention_backward_patch, npu_fusion_attention, \ - npu_fusion_attention_grad -from msprobe.pytorch.bench_functions.rms_norm import npu_rms_norm, npu_rms_norm_backward -from msprobe.pytorch.bench_functions.rotary_mul import npu_rotary_mul, npu_rotary_mul_backward -from msprobe.pytorch.bench_functions.scaled_mask_softmax import npu_scaled_masked_softmax, \ - npu_scaled_masked_softmax_backward -from msprobe.pytorch.bench_functions.swiglu import npu_swiglu, npu_swiglu_backward, swish_grad, swish +# register for npu custom backward bench functions +npu_custom_grad_functions = Register() +npu_custom_grad_functions([ + npu_confusion_transpose_backward, npu_fast_gelu_backward, npu_linear_backward, matmul_backward, + npu_fusion_attention_grad, npu_rms_norm_backward, npu_rotary_mul_backward, npu_scaled_masked_softmax_backward, + npu_swiglu_backward +]) -- Gitee From 38054fd7fbd75c5381acaa194e5707757d74d37a Mon Sep 17 00:00:00 2001 From: wangyunkai <14808924+wyk675680243@user.noreply.gitee.com> Date: Thu, 7 Nov 2024 10:46:39 +0800 Subject: [PATCH 106/106] =?UTF-8?q?[=E5=88=9D=E5=A7=8B=E5=8C=96=E6=8F=92?= =?UTF-8?q?=E4=BB=B6=E6=A1=86=E6=9E=B6]=201.=E5=BB=BA=E7=AB=8Bmindstudio-b?= =?UTF-8?q?oard=E7=9B=AE=E5=BD=95=E7=BB=93=E6=9E=84=202.cmakeLists.txt?= =?UTF-8?q?=E9=85=8D=E7=BD=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- plugins/mindstudio-board/server/.gitignore | 4 + .../mindstudio-board/server/CMakeLists.txt | 49 ++ .../server/cmake/mind_expression.cmake | 32 + .../server/cmake/options.cmake | 69 +++ .../mindstudio-board/server/cmake/utils.cmake | 584 ++++++++++++++++++ .../server/core/CMakeLists.txt | 16 + .../server/plugins/CMakeLists.txt | 3 + .../server/src/CMakeLists.txt | 16 + 8 files changed, 773 insertions(+) create mode 100644 plugins/mindstudio-board/server/.gitignore create mode 100644 plugins/mindstudio-board/server/CMakeLists.txt create mode 100644 plugins/mindstudio-board/server/cmake/mind_expression.cmake create mode 100644 plugins/mindstudio-board/server/cmake/options.cmake create mode 100644 plugins/mindstudio-board/server/cmake/utils.cmake create mode 100644 plugins/mindstudio-board/server/core/CMakeLists.txt create mode 100644 plugins/mindstudio-board/server/plugins/CMakeLists.txt create mode 100644 plugins/mindstudio-board/server/src/CMakeLists.txt diff --git a/plugins/mindstudio-board/server/.gitignore b/plugins/mindstudio-board/server/.gitignore new file mode 100644 index 0000000000..43bdbcd1e0 --- /dev/null +++ b/plugins/mindstudio-board/server/.gitignore @@ -0,0 +1,4 @@ +cmake-build-* +output +*.pb.cc +*.pb.h \ No newline at end of file diff --git a/plugins/mindstudio-board/server/CMakeLists.txt b/plugins/mindstudio-board/server/CMakeLists.txt new file mode 100644 index 0000000000..9bd5937886 --- /dev/null +++ b/plugins/mindstudio-board/server/CMakeLists.txt @@ -0,0 +1,49 @@ +cmake_minimum_required(VERSION 3.20) +project(BOARD) + +if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 7.3.0) + message(FATAL_ERROR "GCC version must be 7.3.0 and above, but found ${CMAKE_CXX_COMPILER_VERSION}") + elseif (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 11.4.0) + message(WARNING "GCC version ${CMAKE_CXX_COMPILER_VERSION} is greater than 11.4.0, may cause unknown problems.") + endif () +endif () + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_C_STANDARD 11) + +set(HOME_DIR ${PROJECT_SOURCE_DIR}) + +set(EXECUTABLE_OUTPUT_PATH ${HOME_DIR}/output/bin) +set(LIBRARY_OUTPUT_PATH ${HOME_DIR}/output/bin) + +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fstack-protector-all -D_FORTIFY_SOURCE=2 -O2 -ftrapv -fstack-protector-strong -fPIE -fPIC") +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fstack-protector-all -D_FORTIFY_SOURCE=2 -O2 -ftrapv -fstack-protector-strong -fPIE -fPIC") +message(WARNING "Build Info: OS=${CMAKE_SYSTEM_NAME}, build type=${CMAKE_BUILD_TYPE}") +if (${CMAKE_BUILD_TYPE} MATCHES "Debug") + message(STATUS "Enable debug symbol table, change optimization level to 0") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -O0") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g -O0") +endif () + +set(CMAKE_SKIP_RPATH true) +if (CMAKE_SYSTEM_NAME MATCHES "windows") + if ((NOT CMAKE_BUILD_TYPE MATCHES "Debug") AND (NOT CMAKE_BUILD_TYPE MATCHES "PROFILE")) + message(STATUS "Build type = ${CMAKE_BUILD_TYPE}, static = enable.") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wl,--nxcompat -Wl,--dynamicbase -s -pie -Wincompatible-pointer-types") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wl,--nxcompat -Wl,--dynamicbase -s -pie -Wincompatible-pointer-types") + add_link_options(-static) + endif () +elseif () + if ((NOT CMAKE_BUILD_TYPE MATCHES "Debug") AND (NOT CMAKE_BUILD_TYPE MATCHES "PROFILE")) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -s -pie -Wl,-z,now") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -s -pie -Wl,-z,now") + endif () +endif () +include_directories(${HOME_DIR}/third_party/rapidjson) + +include(${CMAKE_SOURCE_DIR}/cmake/mind_expression.cmake) + +add_subdirectory(core) +add_subdirectory(plugins) +add_subdirectory(src) \ No newline at end of file diff --git a/plugins/mindstudio-board/server/cmake/mind_expression.cmake b/plugins/mindstudio-board/server/cmake/mind_expression.cmake new file mode 100644 index 0000000000..19672b677f --- /dev/null +++ b/plugins/mindstudio-board/server/cmake/mind_expression.cmake @@ -0,0 +1,32 @@ +set(SECURE_CXX_FLAGS "") +if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") + if (WIN32) + set(SECURE_CXX_FLAGS "-fstack-protector-all") + else () + set(SECURE_CXX_FLAGS "-fstack-protector-all -Wl,-z,relro,-z,now,-z,noexecstack") + endif () +endif () +set(_ms_tmp_CMAKE_CXX_FLAGS_F ${CMAKE_CXX_FLAGS}) + +if (NOT MSVC) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=hidden") +endif () + +set(TOP_DIR ${CMAKE_SOURCE_DIR}) +set(ENABLE_GITEE ON) + +include(cmake/options.cmake) +include(cmake/utils.cmake) +include(${CMAKE_SOURCE_DIR}/cmake/external_libs/protobuf.cmake) +#include(${CMAKE_SOURCE_DIR}/cmake/external_libs/json.cmake) +include(${CMAKE_SOURCE_DIR}/cmake/external_libs/libuv.cmake) +include(${CMAKE_SOURCE_DIR}/cmake/external_libs/uSockets.cmake) +include(${CMAKE_SOURCE_DIR}/cmake/external_libs/uWebSockets.cmake) + +include_directories(${CMAKE_SOURCE_DIR}/third_party) + +if (ENABLE_TESTCASES OR ENABLE_CPP_ST) + include(${CMAKE_SOURCE_DIR}/cmake/external_libs/gtest.cmake) +endif () + +set(CMAKE_CXX_FLAGS ${_ms_tmp_CMAKE_CXX_FLAGS_F}) diff --git a/plugins/mindstudio-board/server/cmake/options.cmake b/plugins/mindstudio-board/server/cmake/options.cmake new file mode 100644 index 0000000000..7c1cebead7 --- /dev/null +++ b/plugins/mindstudio-board/server/cmake/options.cmake @@ -0,0 +1,69 @@ +option(ENABLE_TESTCASES "Run testcases switch, default off" ON) +option(ENABLE_CPP_ST "Run cpp st testcases switch, default off" ON) +option(DEBUG_MODE "Debug mode, default off" OFF) +option(ENABLE_COVERAGE "Enable code coverage report" OFF) +option(ENABLE_ASAN "Enable Google Sanitizer to find memory bugs") +option(ENABLE_DEBUGGER "enable debugger" OFF) +option(ENABLE_GLIBCXX "enable_glibcxx" ON) +option(BUILD_DEV_MODE "MindBoard build nightly dev mode" OFF) +option(USE_LLVM "Use llvm" OFF) + +if (NOT CMAKE_SYSTEM_NAME MATCHES "Linux") + set(ENABLE_GLIBCXX ON) +endif () + +if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + if (WIN32) + set(OPTION_CXX_FLAGS "${OPTION_CXX_FLAGS} -fstack-protector-all") + else () + set(OPTION_CXX_FLAGS "${OPTION_CXX_FLAGS} -fstack-protector-all -Wl,-z,relro,-z,now,-z,noexecstack") + endif () +endif () + +if (CMAKE_SYSTEM_NAME MATCHES "Darwin") + set(OPTION_CXX_FLAGS "${OPTION_CXX_FLAGS} -Wsign-compare") +endif () + +if (ENABLE_COVERAGE) + set(COVERAGE_COMPILER_FLAGS "-g --coverage -fprofile-arcs -ftest-coverage") + set(OPTION_CXX_FLAGS "${OPTION_CXX_FLAGS} ${COVERAGE_COMPILER_FLAGS}") +endif () + +if (ENABLE_ASAN) + set(OPTION_CXX_FLAGS "${OPTION_CXX_FLAGS} -fsanitize=address -fsanitize-recover=address -fno-omit-frame-pointer") + if (NOT CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + set(OPTION_CXX_FLAGS "${OPTION_CXX_FLAGS} -static-libsan") + endif () + add_compile_definitions(ENABLE_ASAN) +endif () + +if (DEBUG_MODE) + set(CMAKE_BUILD_TYPE "Debug") + add_compile_definitions(MEM_REUSE_DEBUG) +else () + set(CMAKE_BUILD_TYPE "Release") +endif () + +if ((CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64") OR (CMAKE_BUILD_TYPE STREQUAL Release)) + set(PYBIND11_LTO_CXX_FLAGS FALSE) +endif () + +if (NOT BUILD_PATH) + set(BUILD_PATH "${CMAKE_SOURCE_DIR}/build") +endif () + +if (ENABLE_TESTCASES) + add_compile_definitions(ENABLE_TEST) +endif () + +if (ENABLE_DEBUGGER) + add_compile_definitions(ENABLE_DEBUGGER) +endif () + +if (ENABLE_DEBUGGER OR ENABLE_TESTCASES) + set(MS_BUILD_GRPC ON) +endif () + +if (USE_LLVM) + add_compile_definitions(USE_LLVM) +endif () diff --git a/plugins/mindstudio-board/server/cmake/utils.cmake b/plugins/mindstudio-board/server/cmake/utils.cmake new file mode 100644 index 0000000000..f68842eb9b --- /dev/null +++ b/plugins/mindstudio-board/server/cmake/utils.cmake @@ -0,0 +1,584 @@ +include(FetchContent) +set(FETCHCONTENT_QUIET OFF) + +if (CMAKE_SYSTEM_NAME MATCHES "Windows" AND ${CMAKE_VERSION} VERSION_GREATER_EQUAL 3.17.0) + set(CMAKE_FIND_LIBRARY_SUFFIXES .dll ${CMAKE_FIND_LIBRARY_SUFFIXES}) +endif () + +function(mindspore_add_submodule_obj des_submodule_objs sub_dir submodule_name_obj) + + add_subdirectory(${sub_dir}) + + if (NOT TARGET ${submodule_name_obj}) + message(FATAL_ERROR "Can not find submodule '${submodule_name_obj}'. in ${CMAKE_CURRENT_LIST_FILE}") + endif () + if ("$" IN_LIST ${des_submodule_objs}) + message(FATAL_ERROR "submodule '${submodule_name_obj}' added more than once. in ${CMAKE_CURRENT_LIST_FILE}") + endif () + + set(${des_submodule_objs} ${${des_submodule_objs}} $ PARENT_SCOPE) + +endfunction() + +if (DEFINED ENV{MSLIBS_CACHE_PATH}) + set(_MS_LIB_CACHE $ENV{MSLIBS_CACHE_PATH}) +else () + set(_MS_LIB_CACHE ${CMAKE_BINARY_DIR}/.mslib) +endif () +message("MS LIBS CACHE PATH: ${_MS_LIB_CACHE}") + +if (NOT EXISTS ${_MS_LIB_CACHE}) + file(MAKE_DIRECTORY ${_MS_LIB_CACHE}) +endif () + +if (DEFINED ENV{MSLIBS_SERVER} AND NOT ENABLE_GITEE) + set(LOCAL_LIBS_SERVER $ENV{MSLIBS_SERVER}) + message("LOCAL_LIBS_SERVER: ${LOCAL_LIBS_SERVER}") +endif () + +include(ProcessorCount) +ProcessorCount(N) +if (JOBS) + set(THNUM ${JOBS}) +else () + set(JOBS 8) + if (${JOBS} GREATER ${N}) + set(THNUM ${N}) + else () + set(THNUM ${JOBS}) + endif () +endif () +message("set make thread num: ${THNUM}") + +if (LOCAL_LIBS_SERVER) + if (NOT ENV{no_proxy}) + set(ENV{no_proxy} "${LOCAL_LIBS_SERVER}") + else () + string(FIND $ENV{no_proxy} ${LOCAL_LIBS_SERVER} IP_POS) + if (${IP_POS} EQUAL -1) + set(ENV{no_proxy} "$ENV{no_proxy},${LOCAL_LIBS_SERVER}") + endif () + endif () +endif () + +function(__download_pkg pkg_name pkg_url pkg_sha256) + + if (LOCAL_LIBS_SERVER) + set(REGEX_IP_ADDRESS "^([0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+)$") + get_filename_component(_URL_FILE_NAME ${pkg_url} NAME) + if (${LOCAL_LIBS_SERVER} MATCHES ${REGEX_IP_ADDRESS}) + set(pkg_url "http://${LOCAL_LIBS_SERVER}:8081/libs/${pkg_name}/${_URL_FILE_NAME}" ${pkg_url}) + else () + set(pkg_url "https://${LOCAL_LIBS_SERVER}/libs/${pkg_name}/${_URL_FILE_NAME}" ${pkg_url}) + endif () + endif () + + FetchContent_Declare( + ${pkg_name} + URL ${pkg_url} + URL_HASH SHA256=${pkg_sha256} + ) + FetchContent_GetProperties(${pkg_name}) + message("download: ${${pkg_name}_SOURCE_DIR} , ${pkg_name} , ${pkg_url}") + FetchContent_MakeAvailable(${pkg_name}) + set(${pkg_name}_SOURCE_DIR ${${pkg_name}_SOURCE_DIR} PARENT_SCOPE) + +endfunction() + +function(__download_pkg_with_git pkg_name pkg_url pkg_git_commit pkg_sha256) + + if (LOCAL_LIBS_SERVER) + set(pkg_url "http://${LOCAL_LIBS_SERVER}:8081/libs/${pkg_name}/${pkg_git_commit}") + FetchContent_Declare( + ${pkg_name} + URL ${pkg_url} + URL_HASH SHA256=${pkg_sha256} + ) + else () + FetchContent_Declare( + ${pkg_name} + GIT_REPOSITORY ${pkg_url} + GIT_TAG ${pkg_git_commit}) + endif () + FetchContent_GetProperties(${pkg_name}) + message("download: ${${pkg_name}_SOURCE_DIR} , ${pkg_name} , ${pkg_url}") + FetchContent_MakeAvailable(${pkg_name}) + set(${pkg_name}_SOURCE_DIR ${${pkg_name}_SOURCE_DIR} PARENT_SCOPE) + +endfunction() + + +function(__find_pkg_then_add_target pkg_name pkg_exe lib_path) + set(options) + set(oneValueArgs PATH) + set(multiValueArgs SUFFIXES_PATH NAMES) + cmake_parse_arguments(LIB "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + unset(${pkg_name}_LIBS) + + message("_FIND:${${pkg_name}_BASE_DIR}") + + if (pkg_exe) + unset(${pkg_exe}_EXE CACHE) + find_program(${pkg_exe}_EXE ${pkg_exe} PATHS ${${pkg_name}_BASE_DIR}/bin NO_DEFAULT_PATH) + if (NOT ${pkg_exe}_EXE) + return() + endif () + add_executable(${pkg_name}::${pkg_exe} IMPORTED GLOBAL) + set_target_properties(${pkg_name}::${pkg_exe} PROPERTIES + IMPORTED_LOCATION ${${pkg_exe}_EXE} + ) + message("found ${${pkg_exe}_EXE}") + endif () + + foreach (_LIB_NAME ${LIB_NAMES}) + set(_LIB_SEARCH_NAME ${_LIB_NAME}) + if (MSVC AND ${pkg_name}_Debug) + set(_LIB_SEARCH_NAME ${_LIB_SEARCH_NAME}d) + endif () + set(_LIB_TYPE SHARED) + if (${pkg_name}_USE_STATIC_LIBS) + set(_LIB_SEARCH_NAME "${CMAKE_STATIC_LIBRARY_PREFIX}${_LIB_SEARCH_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}") + set(_LIB_TYPE STATIC) + endif () + set(${_LIB_NAME}_LIB ${_LIB_NAME}_LIB-NOTFOUND) + if (APPLE) + find_library(${_LIB_NAME}_LIB ${_LIB_SEARCH_NAME} PATHS ${${pkg_name}_BASE_DIR}/${lib_path} + PATH_SUFFIXES ${LIB_SUFFIXES_PATH} NO_DEFAULT_PATH NO_CMAKE_FIND_ROOT_PATH) + else () + find_library(${_LIB_NAME}_LIB ${_LIB_SEARCH_NAME} PATHS ${${pkg_name}_BASE_DIR}/${lib_path} + PATH_SUFFIXES ${LIB_SUFFIXES_PATH} NO_DEFAULT_PATH) + endif () + if (NOT ${_LIB_NAME}_LIB) + message("not find ${_LIB_SEARCH_NAME} in path: ${${pkg_name}_BASE_DIR}/${lib_path}") + return() + endif () + + add_library(${pkg_name}::${_LIB_NAME} ${_LIB_TYPE} IMPORTED GLOBAL) + if (WIN32 AND ${_LIB_TYPE} STREQUAL "SHARED") + if (DEBUG_MODE) + set_target_properties(${pkg_name}::${_LIB_NAME} PROPERTIES IMPORTED_IMPLIB_DEBUG ${${_LIB_NAME}_LIB}) + else () + set_target_properties(${pkg_name}::${_LIB_NAME} PROPERTIES IMPORTED_IMPLIB_RELEASE ${${_LIB_NAME}_LIB}) + endif () + else () + set_target_properties(${pkg_name}::${_LIB_NAME} PROPERTIES IMPORTED_LOCATION ${${_LIB_NAME}_LIB}) + endif () + + if (EXISTS ${${pkg_name}_BASE_DIR}/include) + set_target_properties(${pkg_name}::${_LIB_NAME} PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${${pkg_name}_BASE_DIR}/include") + endif () + + list(APPEND ${pkg_name}_LIBS ${pkg_name}::${_LIB_NAME}) + message("found ${${_LIB_NAME}_LIB}") + STRING(REGEX REPLACE "(.+)/(.+)" "\\1" LIBPATH ${${_LIB_NAME}_LIB}) + set(${pkg_name}_LIBPATH ${LIBPATH} CACHE STRING INTERNAL) + endforeach () + + set(${pkg_name}_LIBS ${${pkg_name}_LIBS} PARENT_SCOPE) +endfunction() + +function(__exec_cmd) + set(options) + set(oneValueArgs WORKING_DIRECTORY) + set(multiValueArgs COMMAND) + + cmake_parse_arguments(EXEC "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + execute_process(COMMAND ${EXEC_COMMAND} + WORKING_DIRECTORY ${EXEC_WORKING_DIRECTORY} + RESULT_VARIABLE RESULT) + if (NOT RESULT EQUAL "0") + message(FATAL_ERROR "error! when ${EXEC_COMMAND} in ${EXEC_WORKING_DIRECTORY}") + endif () +endfunction() + +function(__check_patches pkg_patches) + # check patches + if (PKG_PATCHES) + file(TOUCH ${_MS_LIB_CACHE}/${pkg_name}_patch.sha256) + file(READ ${_MS_LIB_CACHE}/${pkg_name}_patch.sha256 ${pkg_name}_PATCHES_SHA256) + + message("patches sha256:${${pkg_name}_PATCHES_SHA256}") + + set(${pkg_name}_PATCHES_NEW_SHA256) + foreach (_PATCH ${PKG_PATCHES}) + file(SHA256 ${_PATCH} _PF_SHA256) + set(${pkg_name}_PATCHES_NEW_SHA256 "${${pkg_name}_PATCHES_NEW_SHA256},${_PF_SHA256}") + endforeach () + + if (NOT ${pkg_name}_PATCHES_SHA256 STREQUAL ${pkg_name}_PATCHES_NEW_SHA256) + set(${pkg_name}_PATCHES ${PKG_PATCHES}) + file(REMOVE_RECURSE "${_MS_LIB_CACHE}/${pkg_name}-subbuild") + file(WRITE ${_MS_LIB_CACHE}/${pkg_name}_patch.sha256 ${${pkg_name}_PATCHES_NEW_SHA256}) + message("patches changed : ${${pkg_name}_PATCHES_NEW_SHA256}") + endif () + endif () +endfunction() + +set(MS_FIND_NO_DEFAULT_PATH NO_CMAKE_PATH NO_CMAKE_ENVIRONMENT_PATH NO_SYSTEM_ENVIRONMENT_PATH + NO_CMAKE_BUILDS_PATH NO_CMAKE_PACKAGE_REGISTRY NO_CMAKE_SYSTEM_PATH + NO_CMAKE_SYSTEM_PACKAGE_REGISTRY) +function(mindboard_add_pkg pkg_name) + + set(options) + set(oneValueArgs URL SHA256 GIT_REPOSITORY GIT_TAG VER EXE DIR DOWNLOAD_ONLY HEAD_ONLY CMAKE_PATH RELEASE + LIB_PATH CUSTOM_CMAKE) + set(multiValueArgs + CMAKE_OPTION LIBS PRE_CONFIGURE_COMMAND CONFIGURE_COMMAND BUILD_OPTION INSTALL_INCS + INSTALL_LIBS PATCHES SUBMODULES SOURCEMODULES ONLY_MAKE ONLY_MAKE_INCS ONLY_MAKE_LIBS + LIB_SUFFIXES_PATH) + cmake_parse_arguments(PKG "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + if (NOT PKG_LIB_PATH) + set(PKG_LIB_PATH lib) + endif () + + if (NOT PKG_EXE) + set(PKG_EXE 0) + endif () + + set(__FIND_PKG_NAME ${pkg_name}) + string(TOLOWER ${pkg_name} pkg_name) + message("pkg name:${__FIND_PKG_NAME},${pkg_name}") + + set(${pkg_name}_PATCHES_HASH) + foreach (_PATCH ${PKG_PATCHES}) + file(SHA256 ${_PATCH} _PF_SHA256) + set(${pkg_name}_PATCHES_HASH "${${pkg_name}_PATCHES_HASH},${_PF_SHA256}") + endforeach () + + # strip directory variables to ensure third party packages are installed in consistent locations + string(REPLACE ${TOP_DIR} "" ARGN_STRIPPED ${ARGN}) + string(REPLACE ${_MS_LIB_CACHE} "" ARGN_STRIPPED ${ARGN_STRIPPED}) + # check options + set(${pkg_name}_CONFIG_TXT + "${CMAKE_CXX_COMPILER_VERSION}-${CMAKE_C_COMPILER_VERSION} + ${ARGN_STRIPPED}-${${pkg_name}_USE_STATIC_LIBS}-${${pkg_name}_PATCHES_HASH} + ${${pkg_name}_CXXFLAGS}-${${pkg_name}_CFLAGS}-${${pkg_name}_LDFLAGS}") + if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin") + set(${pkg_name}_CONFIG_TXT "${${pkg_name}_CONFIG_TXT}--${CMAKE_OSX_DEPLOYMENT_TARGET}") + endif () + string(REPLACE ";" "-" ${pkg_name}_CONFIG_TXT ${${pkg_name}_CONFIG_TXT}) + string(SHA256 ${pkg_name}_CONFIG_HASH ${${pkg_name}_CONFIG_TXT}) + + message("${pkg_name} config hash: ${${pkg_name}_CONFIG_HASH}") + + set(${pkg_name}_BASE_DIR ${_MS_LIB_CACHE}/${pkg_name}_${PKG_VER}_${${pkg_name}_CONFIG_HASH}) + set(${pkg_name}_DIRPATH ${${pkg_name}_BASE_DIR} CACHE STRING INTERNAL) + + if (EXISTS ${${pkg_name}_BASE_DIR}/options.txt AND PKG_HEAD_ONLY) + set(${pkg_name}_INC ${${pkg_name}_BASE_DIR}/${PKG_HEAD_ONLY} PARENT_SCOPE) + add_library(${pkg_name} INTERFACE) + target_include_directories(${pkg_name} INTERFACE ${${pkg_name}_INC}) + if (${PKG_RELEASE}) + __find_pkg_then_add_target(${pkg_name} ${PKG_EXE} ${PKG_LIB_PATH} + SUFFIXES_PATH ${PKG_LIB_SUFFIXES_PATH} + NAMES ${PKG_LIBS}) + endif () + return() + endif () + + set(${__FIND_PKG_NAME}_ROOT ${${pkg_name}_BASE_DIR}) + set(${__FIND_PKG_NAME}_ROOT ${${pkg_name}_BASE_DIR} PARENT_SCOPE) + + if (PKG_LIBS) + __find_pkg_then_add_target(${pkg_name} ${PKG_EXE} ${PKG_LIB_PATH} + SUFFIXES_PATH ${PKG_LIB_SUFFIXES_PATH} + NAMES ${PKG_LIBS}) + if (${pkg_name}_LIBS) + set(${pkg_name}_INC ${${pkg_name}_BASE_DIR}/include PARENT_SCOPE) + message("Found libs: ${${pkg_name}_LIBS}") + return() + endif () + elseif (NOT PKG_HEAD_ONLY) + find_package(${__FIND_PKG_NAME} ${PKG_VER} PATHS ${${pkg_name}_BASE_DIR} ${MS_FIND_NO_DEFAULT_PATH}) + if (${__FIND_PKG_NAME}_FOUND) + set(${pkg_name}_INC ${${pkg_name}_BASE_DIR}/include PARENT_SCOPE) + message("Found pkg: ${__FIND_PKG_NAME}") + return() + endif () + endif () + + if (NOT PKG_DIR) + if (PKG_GIT_REPOSITORY) + __download_pkg_with_git(${pkg_name} ${PKG_GIT_REPOSITORY} ${PKG_GIT_TAG} ${PKG_SHA256}) + else () + __download_pkg(${pkg_name} ${PKG_URL} ${PKG_SHA256}) + endif () + foreach (_SUBMODULE_FILE ${PKG_SUBMODULES}) + STRING(REGEX REPLACE "(.+)_(.+)" "\\1" _SUBMODEPATH ${_SUBMODULE_FILE}) + STRING(REGEX REPLACE "(.+)/(.+)" "\\2" _SUBMODENAME ${_SUBMODEPATH}) + file(GLOB ${pkg_name}_INSTALL_SUBMODULE ${_SUBMODULE_FILE}/*) + file(COPY ${${pkg_name}_INSTALL_SUBMODULE} DESTINATION ${${pkg_name}_SOURCE_DIR}/3rdparty/${_SUBMODENAME}) + endforeach () + else () + set(${pkg_name}_SOURCE_DIR ${PKG_DIR}) + endif () + file(WRITE ${${pkg_name}_BASE_DIR}/options.txt ${${pkg_name}_CONFIG_TXT}) + message("${pkg_name}_SOURCE_DIR : ${${pkg_name}_SOURCE_DIR}") + + foreach (_PATCH_FILE ${PKG_PATCHES}) + get_filename_component(_PATCH_FILE_NAME ${_PATCH_FILE} NAME) + + # convert line-endings of patch file to UNIX LF + set(_LF_PATCH_FILE ${CMAKE_BINARY_DIR}/_ms_patch/${_PATCH_FILE_NAME}) + configure_file(${_PATCH_FILE} ${_LF_PATCH_FILE} NEWLINE_STYLE LF @ONLY) + + # convert line-endings of source file to be patched to UNIX LF + file(READ ${_LF_PATCH_FILE} _LF_PATCH_CONTENT) + string(REGEX MATCHALL "diff --git a/[/A-Za-z0-9\.\-_]*" _PATCH_SOURCE_LIST "${_LF_PATCH_CONTENT}") + list(TRANSFORM _PATCH_SOURCE_LIST REPLACE "diff --git a/" "") # strip prefix of file path + + foreach (_PATCH_SOURCE ${_PATCH_SOURCE_LIST}) + if (EXISTS ${${pkg_name}_SOURCE_DIR}/${_PATCH_SOURCE}) + execute_process(COMMAND bash -c "sed -i \'s@\\r@@g\' ${${pkg_name}_SOURCE_DIR}/${_PATCH_SOURCE}" + COMMAND_ECHO STDOUT) + endif () + endforeach () + + # apply patch + message("patching ${${pkg_name}_SOURCE_DIR} -p1 < ${_LF_PATCH_FILE}") + execute_process(COMMAND ${Patch_EXECUTABLE} -p1 INPUT_FILE ${_LF_PATCH_FILE} + WORKING_DIRECTORY ${${pkg_name}_SOURCE_DIR} + RESULT_VARIABLE Result) + if (NOT Result EQUAL "0") + message(FATAL_ERROR "Failed patch: ${_LF_PATCH_FILE}") + endif () + endforeach () + foreach (_SOURCE_DIR ${PKG_SOURCEMODULES}) + file(GLOB ${pkg_name}_INSTALL_SOURCE ${${pkg_name}_SOURCE_DIR}/${_SOURCE_DIR}/*) + file(COPY ${${pkg_name}_INSTALL_SOURCE} DESTINATION ${${pkg_name}_BASE_DIR}/${_SOURCE_DIR}/) + endforeach () + file(LOCK ${${pkg_name}_BASE_DIR} DIRECTORY GUARD FUNCTION RESULT_VARIABLE ${pkg_name}_LOCK_RET TIMEOUT 600) + if (NOT ${pkg_name}_LOCK_RET EQUAL "0") + message(FATAL_ERROR "error! when try lock ${${pkg_name}_BASE_DIR} : ${${pkg_name}_LOCK_RET}") + endif () + + if (PKG_CUSTOM_CMAKE) + file(GLOB ${pkg_name}_cmake ${PKG_CUSTOM_CMAKE}/CMakeLists.txt) + file(COPY ${${pkg_name}_cmake} DESTINATION ${${pkg_name}_SOURCE_DIR}) + endif () + + if (${pkg_name}_SOURCE_DIR) + if (PKG_DOWNLOAD_ONLY) + file(GLOB ${pkg_name}_SOURCE_SUBDIRS ${${pkg_name}_SOURCE_DIR}/*) + file(COPY ${${pkg_name}_SOURCE_SUBDIRS} DESTINATION ${${pkg_name}_BASE_DIR}) + set(${pkg_name}_SOURCE_DIR ${${pkg_name}_SOURCE_DIR} PARENT_SCOPE) + elseif (PKG_HEAD_ONLY) + file(GLOB ${pkg_name}_SOURCE_SUBDIRS ${${pkg_name}_SOURCE_DIR}/*) + file(COPY ${${pkg_name}_SOURCE_SUBDIRS} DESTINATION ${${pkg_name}_BASE_DIR}) + set(${pkg_name}_INC ${${pkg_name}_BASE_DIR}/${PKG_HEAD_ONLY} PARENT_SCOPE) + if (NOT PKG_RELEASE) + add_library(${pkg_name} INTERFACE) + target_include_directories(${pkg_name} INTERFACE ${${pkg_name}_INC}) + endif () + + elseif (PKG_ONLY_MAKE) + __exec_cmd(COMMAND ${CMAKE_MAKE_PROGRAM} ${${pkg_name}_CXXFLAGS} -j${THNUM} + WORKING_DIRECTORY ${${pkg_name}_SOURCE_DIR}) + set(PKG_INSTALL_INCS ${PKG_ONLY_MAKE_INCS}) + set(PKG_INSTALL_LIBS ${PKG_ONLY_MAKE_LIBS}) + file(GLOB ${pkg_name}_INSTALL_INCS ${${pkg_name}_SOURCE_DIR}/${PKG_INSTALL_INCS}) + file(GLOB ${pkg_name}_INSTALL_LIBS ${${pkg_name}_SOURCE_DIR}/${PKG_INSTALL_LIBS}) + file(COPY ${${pkg_name}_INSTALL_INCS} DESTINATION ${${pkg_name}_BASE_DIR}/include) + file(COPY ${${pkg_name}_INSTALL_LIBS} DESTINATION ${${pkg_name}_BASE_DIR}/lib) + + elseif (PKG_CMAKE_OPTION) + # in cmake + file(MAKE_DIRECTORY ${${pkg_name}_SOURCE_DIR}/_build) + if (${pkg_name}_CFLAGS) + set(${pkg_name}_CMAKE_CFLAGS "-DCMAKE_C_FLAGS=${${pkg_name}_CFLAGS}") + endif () + if (${pkg_name}_CXXFLAGS) + set(${pkg_name}_CMAKE_CXXFLAGS "-DCMAKE_CXX_FLAGS=${${pkg_name}_CXXFLAGS}") + endif () + + if (${pkg_name}_LDFLAGS) + if (${pkg_name}_USE_STATIC_LIBS) + #set(${pkg_name}_CMAKE_LDFLAGS "-DCMAKE_STATIC_LINKER_FLAGS=${${pkg_name}_LDFLAGS}") + else () + set(${pkg_name}_CMAKE_LDFLAGS "-DCMAKE_SHARED_LINKER_FLAGS=${${pkg_name}_LDFLAGS}") + endif () + endif () + if (APPLE) + __exec_cmd(COMMAND ${CMAKE_COMMAND} -DCMAKE_CXX_COMPILER_ARG1=${CMAKE_CXX_COMPILER_ARG1} + -DCMAKE_C_COMPILER_ARG1=${CMAKE_C_COMPILER_ARG1} ${PKG_CMAKE_OPTION} + ${${pkg_name}_CMAKE_CFLAGS} ${${pkg_name}_CMAKE_CXXFLAGS} ${${pkg_name}_CMAKE_LDFLAGS} + -DCMAKE_INSTALL_PREFIX=${${pkg_name}_BASE_DIR} ${${pkg_name}_SOURCE_DIR}/${PKG_CMAKE_PATH} + WORKING_DIRECTORY ${${pkg_name}_SOURCE_DIR}/_build) + __exec_cmd(COMMAND ${CMAKE_COMMAND} --build . --target install -- + WORKING_DIRECTORY ${${pkg_name}_SOURCE_DIR}/_build) + else () + __exec_cmd(COMMAND ${CMAKE_COMMAND} -DCMAKE_CXX_COMPILER_ARG1=${CMAKE_CXX_COMPILER_ARG1} + -DCMAKE_C_COMPILER_ARG1=${CMAKE_C_COMPILER_ARG1} ${PKG_CMAKE_OPTION} -G ${CMAKE_GENERATOR} + ${${pkg_name}_CMAKE_CFLAGS} ${${pkg_name}_CMAKE_CXXFLAGS} ${${pkg_name}_CMAKE_LDFLAGS} + -DCMAKE_INSTALL_PREFIX=${${pkg_name}_BASE_DIR} ${${pkg_name}_SOURCE_DIR}/${PKG_CMAKE_PATH} + WORKING_DIRECTORY ${${pkg_name}_SOURCE_DIR}/_build) + if (MSVC) + set(CONFIG_TYPE Release) + if (DEBUG_MODE) + set(CONFIG_TYPE Debug) + endif () + __exec_cmd(COMMAND ${CMAKE_COMMAND} --build . --config ${CONFIG_TYPE} --target install -- + WORKING_DIRECTORY ${${pkg_name}_SOURCE_DIR}/_build) + else () + __exec_cmd(COMMAND ${CMAKE_COMMAND} --build . --target install -- -j${THNUM} + WORKING_DIRECTORY ${${pkg_name}_SOURCE_DIR}/_build) + endif () + endif () + else () + if (${pkg_name}_CFLAGS) + set(${pkg_name}_MAKE_CFLAGS "CFLAGS=${${pkg_name}_CFLAGS}") + endif () + if (${pkg_name}_CXXFLAGS) + set(${pkg_name}_MAKE_CXXFLAGS "CXXFLAGS=${${pkg_name}_CXXFLAGS}") + endif () + if (${pkg_name}_LDFLAGS) + set(${pkg_name}_MAKE_LDFLAGS "LDFLAGS=${${pkg_name}_LDFLAGS}") + endif () + # in configure && make + if (PKG_PRE_CONFIGURE_COMMAND) + __exec_cmd(COMMAND ${PKG_PRE_CONFIGURE_COMMAND} + WORKING_DIRECTORY ${${pkg_name}_SOURCE_DIR}) + endif () + + if (PKG_CONFIGURE_COMMAND) + __exec_cmd(COMMAND ${PKG_CONFIGURE_COMMAND} + ${${pkg_name}_MAKE_CFLAGS} ${${pkg_name}_MAKE_CXXFLAGS} ${${pkg_name}_MAKE_LDFLAGS} + --prefix=${${pkg_name}_BASE_DIR} + WORKING_DIRECTORY ${${pkg_name}_SOURCE_DIR}) + endif () + set(${pkg_name}_BUILD_OPTION ${PKG_BUILD_OPTION}) + if (NOT PKG_CONFIGURE_COMMAND) + set(${pkg_name}_BUILD_OPTION ${${pkg_name}_BUILD_OPTION} + ${${pkg_name}_MAKE_CFLAGS} ${${pkg_name}_MAKE_CXXFLAGS} ${${pkg_name}_MAKE_LDFLAGS}) + endif () + # build + if (APPLE) + __exec_cmd(COMMAND ${CMAKE_MAKE_PROGRAM} ${${pkg_name}_BUILD_OPTION} + WORKING_DIRECTORY ${${pkg_name}_SOURCE_DIR}) + else () + __exec_cmd(COMMAND ${CMAKE_MAKE_PROGRAM} ${${pkg_name}_BUILD_OPTION} -j${THNUM} + WORKING_DIRECTORY ${${pkg_name}_SOURCE_DIR}) + endif () + + if (PKG_INSTALL_INCS OR PKG_INSTALL_LIBS) + file(GLOB ${pkg_name}_INSTALL_INCS ${${pkg_name}_SOURCE_DIR}/${PKG_INSTALL_INCS}) + file(GLOB ${pkg_name}_INSTALL_LIBS ${${pkg_name}_SOURCE_DIR}/${PKG_INSTALL_LIBS}) + file(COPY ${${pkg_name}_INSTALL_INCS} DESTINATION ${${pkg_name}_BASE_DIR}/include) + file(COPY ${${pkg_name}_INSTALL_LIBS} DESTINATION ${${pkg_name}_BASE_DIR}/lib) + else () + __exec_cmd(COMMAND ${CMAKE_MAKE_PROGRAM} install WORKING_DIRECTORY ${${pkg_name}_SOURCE_DIR}) + endif () + endif () + endif () + + if (PKG_LIBS) + __find_pkg_then_add_target(${pkg_name} ${PKG_EXE} ${PKG_LIB_PATH} + SUFFIXES_PATH ${PKG_LIB_SUFFIXES_PATH} + NAMES ${PKG_LIBS}) + set(${pkg_name}_INC ${${pkg_name}_BASE_DIR}/include PARENT_SCOPE) + if (NOT ${pkg_name}_LIBS) + message(FATAL_ERROR "Can not find pkg: ${pkg_name}") + endif () + else () + find_package(${__FIND_PKG_NAME} ${PKG_VER} QUIET ${MS_FIND_NO_DEFAULT_PATH}) + if (${__FIND_PKG_NAME}_FOUND) + set(${pkg_name}_INC ${${pkg_name}_BASE_DIR}/include PARENT_SCOPE) + message("Found pkg: ${${__FIND_PKG_NAME}_LIBRARIES}") + return() + endif () + endif () +endfunction() + +function(src_separate_compile) + set(options) + set(oneValueArgs OBJECT_NAME OBJECT_SIZE) + set(multiValueArgs SRC_LIST) + cmake_parse_arguments(STUDENT "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + list(LENGTH STUDENT_SRC_LIST SRC_LIST_SIZE) + + set(SEPARATE_SIZE 100) + set(SEPARATE_INDEX 0) + set(OBJECT_COUNT 0) + math(EXPR SRC_LIST_MAX_INDEX "${SRC_LIST_SIZE} - 1") + while (${SRC_LIST_SIZE} GREATER ${SEPARATE_INDEX}) + math(EXPR SEPARATE_END "${SEPARATE_INDEX} + ${SEPARATE_SIZE} - 1") + if (${SEPARATE_END} GREATER ${SRC_LIST_MAX_INDEX}) + math(EXPR SEPARATE_SIZE "${SRC_LIST_SIZE} - ${SEPARATE_INDEX}") + endif () + list(SUBLIST STUDENT_SRC_LIST ${SEPARATE_INDEX} ${SEPARATE_SIZE} new_sub_list) + math(EXPR OBJECT_COUNT "${OBJECT_COUNT} + 1") + math(EXPR SEPARATE_INDEX "${SEPARATE_INDEX} + ${SEPARATE_SIZE}") + add_library(${STUDENT_OBJECT_NAME}_${OBJECT_COUNT} OBJECT ${new_sub_list}) + endwhile () + set(${STUDENT_OBJECT_SIZE} "${OBJECT_COUNT}" PARENT_SCOPE) + message("${STUDENT_OBJECT_SIZE} object count is ${OBJECT_COUNT}") +endfunction() + +function(enable_target_when_only_build_plugins target) + if (ONLY_BUILD_DEVICE_PLUGINS) + get_target_property(target_type ${target} TYPE) + if (target_type STREQUAL "INTERFACE_LIBRARY") + return() + endif () + set_target_properties(${target} PROPERTIES EXCLUDE_FROM_ALL FALSE) + endif () +endfunction() + +function(disable_target_when_only_build_plugins target) + if (ONLY_BUILD_DEVICE_PLUGINS) + get_target_property(target_type ${target} TYPE) + if (target_type STREQUAL "INTERFACE_LIBRARY") + return() + endif () + get_property(is_set TARGET ${target} PROPERTY EXCLUDE_FROM_ALL) + if (NOT DEFINED is_set) + set_target_properties(${target} PROPERTIES EXCLUDE_FROM_ALL TRUE) + endif () + endif () +endfunction() + +function(enable_directory_when_only_build_plugins dir) + get_property(targets DIRECTORY ${dir} PROPERTY BUILDSYSTEM_TARGETS) + foreach (target ${targets}) + enable_target_when_only_build_plugins(${target}) + endforeach () + get_property(items DIRECTORY ${dir} PROPERTY SUBDIRECTORIES) + foreach (item ${items}) + enable_directory_when_only_build_plugins(${item}) + endforeach () +endfunction() + +function(disable_directory_when_only_build_plugins dir) + get_property(targets DIRECTORY ${dir} PROPERTY BUILDSYSTEM_TARGETS) + foreach (target ${targets}) + disable_target_when_only_build_plugins(${target}) + endforeach () + get_property(items DIRECTORY ${dir} PROPERTY SUBDIRECTORIES) + foreach (item ${items}) + disable_directory_when_only_build_plugins(${item}) + endforeach () +endfunction() + +function(add_subdirectory_with_faster_option dir) + if (ONLY_BUILD_DEVICE_PLUGINS) + add_subdirectory(${dir}) + disable_directory_when_only_build_plugins(${dir}) + else () + add_subdirectory(${dir}) + endif () +endfunction() + +function(find_and_use_mold) + find_program(MOLD_LINKER mold) + if (MOLD_LINKER) + message(STATUS "using mold to speed linking libraries") + get_filename_component(MOLD_LINKER_PATH ${MOLD_LINKER} DIRECTORY) + file(GLOB MOLD_LINKER_PATH "${MOLD_LINKER_PATH}/../libexec/mold") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -B${MOLD_LINKER_PATH}") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -B${MOLD_LINKER_PATH}") + endif () +endfunction() \ No newline at end of file diff --git a/plugins/mindstudio-board/server/core/CMakeLists.txt b/plugins/mindstudio-board/server/core/CMakeLists.txt new file mode 100644 index 0000000000..b246fb4429 --- /dev/null +++ b/plugins/mindstudio-board/server/core/CMakeLists.txt @@ -0,0 +1,16 @@ +project(msinsight) + +aux_source_directory(./src CORE_SRC) + +add_library(${PROJECT_NAME} SHARED ${CORE_SRC}) +target_include_directories(${PROJECT_NAME} PUBLIC ./include) +target_include_directories(${PROJECT_NAME} PRIVATE ./src) +if (CMAKE_SYSTEM_NAME STREQUAL "Linux") + target_link_libraries(${PROJECT_NAME} PRIVATE dl) + target_link_libraries(${PROJECT_NAME} PRIVATE stdc++fs) +endif () + +if (${CMAKE_BUILD_TYPE} MATCHES "Debug") + target_compile_options(${PROJECT_NAME} PRIVATE -g -O0) +endif () + diff --git a/plugins/mindstudio-board/server/plugins/CMakeLists.txt b/plugins/mindstudio-board/server/plugins/CMakeLists.txt new file mode 100644 index 0000000000..7e4cc31e59 --- /dev/null +++ b/plugins/mindstudio-board/server/plugins/CMakeLists.txt @@ -0,0 +1,3 @@ +project(plugins) + +add_subdirectory(ScalarVisually) \ No newline at end of file diff --git a/plugins/mindstudio-board/server/src/CMakeLists.txt b/plugins/mindstudio-board/server/src/CMakeLists.txt new file mode 100644 index 0000000000..057e3e64d4 --- /dev/null +++ b/plugins/mindstudio-board/server/src/CMakeLists.txt @@ -0,0 +1,16 @@ +project(MindStudioBoard) + +aux_source_directory(. MAIN_SRC) +aux_source_directory(httpServer MAIN_SRC) + +list(APPEND MAIN_SRC + ${MAIN_SRC} + ${U_SOCKETS_SRC}) +add_executable(${PROJECT_NAME} ${MAIN_SRC}) +if (CMAKE_BUILD_TYPE STREQUAL "Debug") + target_compile_options(${PROJECT_NAME} PRIVATE -g -O0) +endif () +target_include_directories(${PROJECT_NAME} PRIVATE ${MAIN_INCLUDE}) + +target_link_libraries(${PROJECT_NAME} MindStudioBoardCore) +target_link_libraries(${PROJECT_NAME} uv_a) \ No newline at end of file -- Gitee