From 457a43d163e316e5dbad1011d9f5e9f1883f4b0a Mon Sep 17 00:00:00 2001 From: wangchao426 Date: Sat, 28 Jun 2025 09:54:41 +0800 Subject: [PATCH 1/3] =?UTF-8?q?=E3=80=90bugfix=E3=80=91=E4=BF=AE=E6=94=B9?= =?UTF-8?q?=E5=88=86=E6=9E=90=E6=95=B4=E7=BD=91=E9=A6=96=E6=BA=A2=E5=87=BA?= =?UTF-8?q?=E8=8A=82=E7=82=B9=E6=97=B6=E5=88=86=E7=BB=84=E7=9A=84bug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- debug/accuracy_tools/msprobe/nan_analyze/analyzer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debug/accuracy_tools/msprobe/nan_analyze/analyzer.py b/debug/accuracy_tools/msprobe/nan_analyze/analyzer.py index e147f23b7c7..a32ad7c351c 100644 --- a/debug/accuracy_tools/msprobe/nan_analyze/analyzer.py +++ b/debug/accuracy_tools/msprobe/nan_analyze/analyzer.py @@ -221,7 +221,7 @@ class NanAnalyzer: node = get_next_node(nodes) if not node: continue - if not groups or node.node_id in all_ids_in_groups: + if not groups or node.node_id not in all_ids_in_groups: new_group = find_all_members(node) groups.append(new_group) all_ids_in_groups.update(new_group) -- Gitee From 8568cf4fdf89d767cd8b2414a55379b12db664db Mon Sep 17 00:00:00 2001 From: wangchao426 Date: Sat, 28 Jun 2025 11:04:47 +0800 Subject: [PATCH 2/3] =?UTF-8?q?=E6=97=A5=E5=BF=97=E8=A7=84=E8=8C=83?= =?UTF-8?q?=E6=80=A7=E4=BF=AE=E6=94=B9=EF=BC=9A=E5=A4=A7=E5=B0=8F=E5=86=99?= =?UTF-8?q?=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- debug/accuracy_tools/msprobe/visualization/graph_service.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debug/accuracy_tools/msprobe/visualization/graph_service.py b/debug/accuracy_tools/msprobe/visualization/graph_service.py index 69718d9c26f..bb219b423af 100644 --- a/debug/accuracy_tools/msprobe/visualization/graph_service.py +++ b/debug/accuracy_tools/msprobe/visualization/graph_service.py @@ -69,7 +69,7 @@ def _compare_graph_result(input_param, args): # 对两个数据进行构图 graph_n = _build_graph_info(input_param.get('npu_path'), args) graph_b = _build_graph_info(input_param.get('bench_path'), args) - logger.info('Model graphs built successfully, start Comparing graphs...') + logger.info('Model graphs built successfully, start comparing graphs...') # 基于graph、stack和data进行比较 graph_comparator = _compare_graph(graph_n, graph_b, input_param, args) # 增加micro step标记 -- Gitee From ec380b23695755005d74d0c003da54bbd63e6a07 Mon Sep 17 00:00:00 2001 From: wangchao426 Date: Mon, 30 Jun 2025 11:34:48 +0800 Subject: [PATCH 3/3] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E5=88=86=E6=9E=90?= =?UTF-8?q?=E9=A6=96=E5=BC=82=E5=B8=B8=E8=8A=82=E7=82=B9=E6=9E=84=E5=9B=BE?= =?UTF-8?q?=E6=97=B6layer=E9=94=99=E8=AF=AF=E7=9A=84=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../msprobe/nan_analyze/graph.py | 30 ++++++++++++++----- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/debug/accuracy_tools/msprobe/nan_analyze/graph.py b/debug/accuracy_tools/msprobe/nan_analyze/graph.py index 5a4f8fb8729..3e6e7d71e3a 100644 --- a/debug/accuracy_tools/msprobe/nan_analyze/graph.py +++ b/debug/accuracy_tools/msprobe/nan_analyze/graph.py @@ -16,8 +16,8 @@ from dataclasses import dataclass from msprobe.core.common.const import Const from msprobe.core.common.log import logger -from msprobe.core.common.exceptions import MsprobeException from msprobe.nan_analyze.utils import FileCache, RankPath, is_ignore_op, check_item_anomaly, NanAnalyseConst +from msprobe.core.common.exceptions import MsprobeException @dataclass @@ -99,13 +99,13 @@ class CommunicationNode: self.link_nodes = kwargs.get('link_nodes', {}) self.dst_nodes = kwargs.get('dst_nodes', {}) self.src_nodes = kwargs.get('src_nodes', {}) - self.next_nodes = kwargs.get('next_nodes', {}) + self.next_node = kwargs.get('next_node') self.compute_ops = kwargs.get('compute_ops', []) self.type = self._resolve_type() self.connected = False def add_next(self, node): - self.next_nodes[node.node_id] = node + self.next_node = node node.pre_node = self node.layer = self.layer + 1 node.data.layer = node.layer @@ -113,7 +113,9 @@ class CommunicationNode: def add_link(self, node): self.link_nodes[node.node_id] = node node.link_nodes[self.node_id] = self - node.layer = self.layer + layer = max(node.layer, self.layer) + self.update_layer(layer) + node.update_layer(layer) node.data.layer = node.layer self.connected = True node.connected = True @@ -121,14 +123,16 @@ class CommunicationNode: def add_dst(self, node): self.dst_nodes[node.node_id] = node node.src_nodes[self.node_id] = self - node.layer = self.layer + layer = max(node.layer, self.layer) + self.update_layer(layer) + node.update_layer(layer) node.data.layer = node.layer self.connected = True node.connected = True def delete(self): - for node in self.next_nodes.values(): - node.pre_node = None + if self.next_node: + self.next_node.pre_node = None for node in self.dst_nodes.values(): node.src_nodes.pop(self.node_id) for node in self.src_nodes.values(): @@ -138,9 +142,19 @@ class CommunicationNode: if self.pre_node: self.pre_node.next_nodes.pop(self.node_id) + def update_layer(self, layer): + if layer == self.layer: + return + self.layer = layer + next_node = self.next_node + while next_node: + layer += 1 + next_node.layer = layer + next_node = next_node.next_node + def has_nan_inf(self): return self.input_has_nan_inf() or check_item_anomaly(self.data.outputs) - + def input_has_nan_inf(self): return check_item_anomaly(self.data.input_args) or check_item_anomaly(self.data.input_kwargs) -- Gitee