From f40ed9cfe095a3291704c7327437e6d54891329b Mon Sep 17 00:00:00 2001
From: wugengjun <451676383@qq.com>
Date: Wed, 22 Jan 2025 21:04:56 +0800
Subject: [PATCH 1/2] =?UTF-8?q?=E9=9D=99=E6=80=81=E5=9B=BEcell=E7=BA=A7dum?=
 =?UTF-8?q?p?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../msprobe/core/common/const.py              |   5 +
 .../msprobe/docs/02.config_introduction.md    |  20 +-
 .../msprobe/docs/06.data_dump_MindSpore.md    |  62 ++-
 .../msprobe/mindspore/common/const.py         |   1 +
 .../mindspore/debugger/precision_debugger.py  |   6 +-
 .../mindspore/dump/cell_dump_process.py       | 450 ++++++++++++++++++
 .../mindspore/dump/dump_tool_factory.py       |  17 +-
 .../mindspore/dump/graph_mode_cell_dump.py    |  69 +++
 .../msprobe/mindspore/task_handler_factory.py |   7 +-
 .../debugger/test_graph_cell_dump.py          | 309 ++++++++++++
 10 files changed, 922 insertions(+), 24 deletions(-)
 create mode 100644 debug/accuracy_tools/msprobe/mindspore/dump/cell_dump_process.py
 create mode 100644 debug/accuracy_tools/msprobe/mindspore/dump/graph_mode_cell_dump.py
 create mode 100644 debug/accuracy_tools/msprobe/test/mindspore_ut/debugger/test_graph_cell_dump.py
diff --git a/debug/accuracy_tools/msprobe/core/common/const.py b/debug/accuracy_tools/msprobe/core/common/const.py
index b49b4fffd5..6824fc8b42 100644
--- a/debug/accuracy_tools/msprobe/core/common/const.py
+++ b/debug/accuracy_tools/msprobe/core/common/const.py
@@ -206,12 +206,14 @@ class Const:
     TORCH_FLOAT32 = "torch.float32"
     TORCH_BFLOAT16 = "torch.bfloat16"
 
+    TYPE = 'type'
     DTYPE = 'dtype'
     SHAPE = 'shape'
     MAX = 'Max'
     MIN = 'Min'
     MEAN = 'Mean'
     NORM = 'Norm'
+    DATA_NAME = 'data_name'
 
     CODE_STACK = 'Code Stack'
     OP_NAME = 'Op Name'
@@ -224,6 +226,9 @@ class Const:
     SCOPE_SEPARATOR = "/"
     REPLACEMENT_CHARACTER = "_"
 
+    FORWARD_PATTERN = SEP + FORWARD + SEP
+    BACKWARD_PATTERN = SEP + BACKWARD + SEP
+
     OPTIMIZER = "optimizer"
     CLIP_GRAD = "clip_grad"
     END_PREFIX = "end_"
diff --git a/debug/accuracy_tools/msprobe/docs/02.config_introduction.md b/debug/accuracy_tools/msprobe/docs/02.config_introduction.md
index f134bd4536..5b2e6d5027 100644
--- a/debug/accuracy_tools/msprobe/docs/02.config_introduction.md
+++ b/debug/accuracy_tools/msprobe/docs/02.config_introduction.md
@@ -10,19 +10,19 @@
 
 ### 1.1 通用配置
 
-| 参数    | 解释                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       | 是否必选 |
-| ----------------- |------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -------- |
-| task              | dump 的任务类型，str 类型。可选参数：<br/>  "statistics"：仅采集统计信息，默认值；<br/> "tensor"：采集统计信息和完全复刻整网的真实数据；<br/> "run_ut"：精度预检，仅 PyTorch 场景支持，采集数据时勿选；<br/> "overflow_check"：溢出检测；<br/>  "free_benchmark"：无标杆比对；<br/>  "grad_probe"：梯度监控； <br/> "structure"：仅采集模型结构以及调用栈信息，不采集具体数据。 <br/> 根据 task 参数取值的不同，可以配置不同场景参数，详见：<br/>[1.2 task 配置为 statistics](#12-task-配置为-statistics)，<br/>[1.3 task 配置为 tensor](#13-task-配置为-tensor)，<br/>[1.4 task 配置为 run_ut](#14-task-配置为-run_ut)，<br/>[1.5 task 配置为 overflow_check](#15-task-配置为-overflow_check)，<br/>[1.6 task 配置为 free_benchmark](#16-task-配置为-free_benchmark)，<br/>[1.7 task 配置为 grad_probe](#17-task-配置为-grad_probe)。 <br/>  **配置示例**："task": "tensor"。 | 否       |
-| dump_path         | 设置 dump 数据目录路径，str 类型。<br/>  **配置示例**："dump_path": "./dump_path"。                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        | 是       |
-| rank              | 指定对某张卡上的数据进行采集，list[Union[int, str]] 类型，默认未配置（表示采集所有卡的数据），应配置元素为 ≥0 的整数或类似"4-6"的字符串，且须配置实际可用的 Rank ID。<br/>  PyTorch 场景: Rank ID 从 0 开始计数，最大取值为所有节点可用卡总数-1，若所配置的值大于实际训练所运行的卡的 Rank ID，则 dump 数据为空，比如当前环境 Rank ID 为 0 到 7，实际训练运行 0 到 3 卡，此时若配置 Rank ID 为 4 或不存在的 10 等其他值，dump 数据为空。<br/>  MindSpore 场景：所有节点的 Rank ID 均从 0 开始计数，最大取值为每个节点可用卡总数-1，config.json 配置一次 rank 参数对所有节点同时生效。<br/> 注意，单卡训练时，rank必须为[]，即空列表，不能指定rank。<br/>**配置示例**："rank": [1, "4-6"]。                                                                                                                                                                                                                                | 否       |
-| step              | 指定采集某个 step 的数据，list[Union[int, str]] 类型。默认未配置，表示采集所有 step 数据。采集特定 step 时，须指定为训练脚本中存在的 step，可逐个配置，也可以指定范围。<br/>  **配置示例**："step": [0, 1 , 2, "4-6"]。                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     | 否       |
-| level             | dump 级别，str 类型，根据不同级别采集不同数据。可选参数：<br/>"L0"：dump 模块级精度数据，仅 PyTorch 与 MindSpore 动态图场景支持，使用背景详见 [1.1.1 模块级精度数据 dump 说明](#111-模块级精度数据-dump-说明)；<br/>"L1"：dump API 级精度数据，默认值，仅 PyTorch 与 MindSpore 动态图场景支持；<br/>"L2"：dump kernel 级精度数据，PyTorch场景详细介绍见 [PyTorch 场景的 kernel dump 说明](./04.kernel_dump_PyTorch.md)；MindSpore场景详细介绍见 [MindSpore 场景的 kernel dump 说明](./28.kernel_dump_MindSpore.md)；<br/>"mix"：dump module 模块级和 API 级精度数据，即"L0"+"L1"，仅 PyTorch 与 MindSpore 动态图场景支持。<br/>"debug"：单点保存功能，细节详见[单点保存工具 README](./28.debugger_save_instruction.md)<br/>  **配置示例**："level": "L1"。                                                                                                                                                                                                                                                                 | 否 |
-| enable_dataloader | 自动控制开关，bool 类型，仅 PyTorch 场景支持。可选参数 true（开启）或 false（关闭），默认为 false。配置为 true 后自动识别 step 参数指定的迭代，并在该迭代执行完成后退出训练，此时 start、stop 和 step 函数可不配置，开启该开关要求训练脚本是通过 torch.utils.data.dataloader 方式加载数据。仅支持 PyTorch 单卡训练使用，分布式训练场景下存在数据 dump 不全问题。 **这个特性下个版本将被废弃**                                                                                                                                                                                                                                                                                                                                                                                                                                    | 否       |
+| 参数    | 解释                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              | 是否必选 |
+| ----------------- |-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -------- |
+| task              | dump 的任务类型，str 类型。可选参数：<br/>  "statistics"：仅采集统计信息，默认值；<br/> "tensor"：采集统计信息和完全复刻整网的真实数据；<br/> "run_ut"：精度预检，仅 PyTorch 场景支持，采集数据时勿选；<br/> "overflow_check"：溢出检测；<br/>  "free_benchmark"：无标杆比对；<br/>  "grad_probe"：梯度监控。<br/>    根据 task 参数取值的不同，可以配置不同场景参数，详见：<br/>[1.2 task 配置为 statistics](#12-task-配置为-statistics)，<br/>[1.3 task 配置为 tensor](#13-task-配置为-tensor)，<br/>[1.4 task 配置为 run_ut](#14-task-配置为-run_ut)，<br/>[1.5 task 配置为 overflow_check](#15-task-配置为-overflow_check)，<br/>[1.6 task 配置为 free_benchmark](#16-task-配置为-free_benchmark)，<br/>[1.7 task 配置为 grad_probe](#17-task-配置为-grad_probe)。 <br/>  **配置示例**："task": "tensor"。 | 否       |
+| dump_path         | 设置 dump 数据目录路径，str 类型。<br/>  **配置示例**："dump_path": "./dump_path"。                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               | 是       |
+| rank              | 指定对某张卡上的数据进行采集，list[Union[int, str]] 类型，默认未配置（表示采集所有卡的数据），应配置元素为 ≥0 的整数或类似"4-6"的字符串，且须配置实际可用的 Rank ID。<br/>  PyTorch 场景: Rank ID 从 0 开始计数，最大取值为所有节点可用卡总数-1，若所配置的值大于实际训练所运行的卡的 Rank ID，则 dump 数据为空，比如当前环境 Rank ID 为 0 到 7，实际训练运行 0 到 3 卡，此时若配置 Rank ID 为 4 或不存在的 10 等其他值，dump 数据为空。<br/>  MindSpore 场景：所有节点的 Rank ID 均从 0 开始计数，最大取值为每个节点可用卡总数-1，config.json 配置一次 rank 参数对所有节点同时生效。静态图 L0 级别 dump 暂不支持指定rank。<br/> 注意，单卡训练时，rank必须为[]，即空列表，不能指定rank。<br/>**配置示例**："rank": [1, "4-6"]。                                                                                                       | 否       |
+| step              | 指定采集某个 step 的数据，list[Union[int, str]] 类型。默认未配置，表示采集所有 step 数据。采集特定 step 时，须指定为训练脚本中存在的 step，可逐个配置，也可以指定范围。<br/>  **配置示例**："step": [0, 1 , 2, "4-6"]。                                                                                                                                                                                                                                                                                                                                                                                                            | 否       |
+| level             | dump 级别，str 类型，根据不同级别采集不同数据。可选参数：<br/>"L0"：dump 模块级精度数据，PyTorch 与 MindSpore 均支持，使用背景详见 [1.1.1 模块级精度数据 dump 说明](#111-模块级精度数据-dump-说明)；<br/>"L1"：dump API 级精度数据，默认值，仅 PyTorch 与 MindSpore 动态图场景支持；<br/>"L2"：dump kernel 级精度数据，PyTorch场景详细介绍见 [PyTorch 场景的 kernel dump 说明](./04.kernel_dump_PyTorch.md)；<br/>"mix"：dump module 模块级和 API 级精度数据，即"L0"+"L1"，仅 PyTorch 与 MindSpore 动态图场景支持。<br/>  **配置示例**："level": "L1"。                                                                                                                                                    | 否 |
+| enable_dataloader | 自动控制开关，bool 类型，仅 PyTorch 场景支持。可选参数 true（开启）或 false（关闭），默认为 false。配置为 true 后自动识别 step 参数指定的迭代，并在该迭代执行完成后退出训练，此时 start、stop 和 step 函数可不配置，开启该开关要求训练脚本是通过 torch.utils.data.dataloader 方式加载数据。仅支持 PyTorch 单卡训练使用，分布式训练场景下存在数据 dump 不全问题。 **这个特性下个版本将被废弃**                                                                                                                                                                                                                                                                                                           | 否       |
 | async_dump        | 异步 dump 开关，bool 类型。可选参数 true（开启）或 false（关闭），默认为 false。配置为 true 后开启异步 dump，即采集的精度数据会在当前 step 训练结束后统一落盘，训练过程中工具不触发同步操作。由于使用该模式有**显存溢出**的风险，当 task 配置为 tensor 时，即真实数据的异步dump模式，必须配置 [list](#13-task-配置为-tensor) 参数，指定需要 dump 的 tensor 。该模式暂不支持复数类型 tensor <br/>的统计量计算。                                                                                                                                                                                                                                                                                                                                                                                                                      | 否       |
 
 #### 1.1.1 模块级精度数据 dump 说明
 
-仅 PyTorch 与 MindSpore 动态图场景支持。
+PyTorch 与 MindSpore 均支持。
 
 大模型场景下，通常不是简单的利用自动迁移能力实现从 GPU 到 NPU 的训练脚本迁移，而是会对 NPU 网络进行一系列针对性的适配，因此，常常会造成迁移后的 NPU 模型存在部分子结构不能与 GPU 原始模型完全对应。模型结构不一致导致 API 调用类型及数量不一致，若直接按照 API 粒度进行精度数据 dump 和比对，则无法完全比对所有的 API。
 
@@ -46,7 +46,7 @@
     <tr><td>MindSpore 静态图场景配置 kernel_name，可以是算子的名称列表，也可以指定算子类型（"level": "L2"时不支持），还可以配置算子名称的正则表达式（当字符串符合“name-regex(xxx)”格式时，后台则会将其作为正则表达式。<br/><b>配置示例</b>：list: ["name-regex(Default/.+)"]<br/>可匹配算子名称以“Default/”开头的所有算子。</td></tr>
     <tr><td rowspan="3">data_mode</td><td>dump 数据过滤，str 类型。</td><td rowspan="3">否</td></tr>
     <tr><td>PyTorch 与 MindSpore 动态图场景：支持"all"、"forward"、"backward"、"input"和"output"，除"all"外，其余参数可以自由组合。默认为["all"]，即保存所有 dump 的数据。<br/> <b>配置示例</b>："data_mode": ["backward"] （仅保存反向数据）或 "data_mode": ["forward", "input"]（仅保存前向的输入数据）。</td></tr>
-    <tr><td>MindSpore 静态图场景：仅支持"all"、"input"和"output"参数，且各参数只能单独配置，不支持自由组合。<br/><b>配置示例</b>："data_mode": ["all"]。</td></tr>
+    <tr><td>MindSpore 静态图场景：L0 级别 dump 仅支持"all"、"forward"和"backward"参数；L2 级别 dump 仅支持"all"、"input"和"output"参数。且各参数只能单独配置，不支持自由组合。<br/><b>配置示例</b>："data_mode": ["all"]。</td></tr>
     <tr><td rowspan="2">summary_mode</td><td>控制 dump 文件输出的模式，str 类型，仅 PyTorch 与 MindSpore 动态图场景支持，可选参数：<br/> md5：dump 输出包含 CRC-32 值以及 API 统计信息的 dump.json 文件，用于验证数据的完整性；<br/> statistics：dump 仅输出包含 API 统计信息的 dump.json 文件，默认值。<br/><b>配置示例</b>："summary_mode": "md5"。</td><td rowspan="2">否</td><tr><td>MindSpore静态图jit_level=O2场景L2级dump，支持上述配置的同时额外支持配置统计项列表，可选统计项为max、min、mean、l2norm，可从中任意选取组合搭配。其中mean、l2norm的结果为float数据格式。<br/><b>配置示例</b>："summary_mode": ["max", "min"]。</td></tr></tr>
 </table>
 
diff --git a/debug/accuracy_tools/msprobe/docs/06.data_dump_MindSpore.md b/debug/accuracy_tools/msprobe/docs/06.data_dump_MindSpore.md
index f7507facd2..0ee33b44a8 100644
--- a/debug/accuracy_tools/msprobe/docs/06.data_dump_MindSpore.md
+++ b/debug/accuracy_tools/msprobe/docs/06.data_dump_MindSpore.md
@@ -30,8 +30,10 @@ dump 的"tensor"模式采集数据量大小，可以参考[数据量基线](data
 
 ## 5. 场景介绍
 
-### 5.1 静态图场景
-在静态图场景下，msprobe 仅支持 **L2 Level**  的数据采集。
+### 5.1 静态图场景 
+在静态图场景下，msprobe 支持 **L0 Level** 和 **L2 Level**  的数据采集。 
+- **L0 Level（Cell 级）** ：采集 `Cell` 对象的数据，适用于需要分析特定网络模块的情况。
+
 - **L2 Level（Kernel 级）** ：采集底层算子的输入输出数据，适用于深入分析算子级别的精度问题。
 
 采集方式请参见[示例代码 > 静态图场景](#71-静态图场景)。详细介绍请参见[《config.json 配置文件介绍》](./02.config_introduction.md#11-通用配置)中的“level 参数”和[《config.json 配置示例》](./03.config_examples.md#2-mindspore-静态图场景) 中的“MindSpore 静态图场景”。
@@ -110,7 +112,7 @@ stop()
 
 **功能说明**：结束一个 step 的数据采集，完成所有数据落盘并更新 dump 参数。在一个 step 结束的位置添加，且必须在 **stop** 函数之后的位置调用。
 该函数需要配合 **start** 和 **stop** 函数使用，尽量添加在反向计算代码之后，否则可能会导致反向数据丢失。
-**仅未使用 Model 高阶 API 的动态图场景支持。**
+**仅未使用 Model 高阶 API 的动态图和静态图场景支持。**
 
 **原型**：
 
@@ -152,7 +154,7 @@ save(variable, name, save_backward=True)
 
 ### 6.2 msprobe.mindspore.common.utils.MsprobeStep
 
-**功能说明**：MindSpore Callback类，自动在每个step开始时调用start()接口，在每个step结束时调用stop()、step()接口。实现使用 Model 高阶 API 的动态图场景下 L0、L1、mix 级别的精度数据采集控制，控制粒度为单个 **Step** ，而 PrecisionDebugger.start, PrecisionDebugger.stop 接口的控制粒度任意训练代码段。
+**功能说明**：MindSpore Callback类，自动在每个step开始时调用start()接口，在每个step结束时调用stop()、step()接口。实现使用 Model 高阶 API 的动态图场景下 L0、L1、mix 级别，和静态图场景下 L0级别的精度数据采集控制，控制粒度为单个 **Step** ，而 PrecisionDebugger.start, PrecisionDebugger.stop 接口的控制粒度任意训练代码段。
 
 **原型**：
 
@@ -188,6 +190,54 @@ seed_all(seed=1234, mode=False, rm_dropout=True)
 
 ### 7.1 静态图场景
 
+#### 7.1.1 L0 级别
+
+##### 7.1.1.1 未使用 Model 高阶 API 
+
+
+```python
+import mindspore as ms
+ms.set_context(mode=ms.GRAPH_MODE, device_target="Ascend")
+
+from msprobe.mindspore import PrecisionDebugger
+debugger = PrecisionDebugger(config_path="./config.json")
+
+# 模型、损失函数的定义以及初始化等操作
+# ...
+model = Network()
+# 数据集迭代的地方往往是模型开始训练的地方
+for data, label in data_loader:
+    debugger.start(model) # 进行 L0 级别下Cell 对象的数据采集时调用
+    # 如下是模型每个 step 执行的逻辑
+    grad_net = ms.grad(model)(data)
+    # ...
+    debugger.step()         # 更新迭代数
+```
+
+##### 7.1.1.2 使用 Model 高阶 API 
+
+
+```python
+import mindspore as ms
+from mindspore.train import Model
+ms.set_context(mode=ms.GRAPH_MODE, device_target="Ascend")
+
+from msprobe.mindspore import PrecisionDebugger
+from msprobe.mindspore.common.utils import MsprobeStep
+debugger = PrecisionDebugger(config_path="./config.json")
+
+# 模型、损失函数的定义以及初始化等操作
+# ...
+
+model = Network()
+# 进行 L0 级别下 Cell 对象的数据采集时调用
+debugger.start(model)
+trainer = Model(model, loss_fn=loss_fn, optimizer=optimizer, metrics={'accuracy'})
+trainer.train(1, train_dataset, callbacks=[MsprobeStep(debugger)])
+```
+
+#### 7.1.2 L2 级别
+
 ```python
 import mindspore as ms
 ms.set_context(mode=ms.GRAPH_MODE, device_target="Ascend")
@@ -301,7 +351,9 @@ trainer.train(1, train_dataset)
 
 ### 8.1 静态图场景
 
-训练结束后，数据将保存在 `dump_path` 指定的目录下。
+训练结束后，数据将保存在 `dump_path` 指定的目录下。<br/>
+L0 级别 dump 的目录结构与动态图场景下目录结构一致。<br/>
+L2 级别 dump 的目录结构如下所示：
 
 若jit_level=O2，且使用mindstudio-probe发布包或源码编包时添加了`--include-mod=adump`选项，目录结构示例如下：
 ```
diff --git a/debug/accuracy_tools/msprobe/mindspore/common/const.py b/debug/accuracy_tools/msprobe/mindspore/common/const.py
index 067e783842..b41dc5ce01 100644
--- a/debug/accuracy_tools/msprobe/mindspore/common/const.py
+++ b/debug/accuracy_tools/msprobe/mindspore/common/const.py
@@ -61,6 +61,7 @@ class Const:
     DROPOUT_API_NAME_PREFIX = "dropout"
 
     GRAPH_DATA_MODE_LIST = [CoreConst.ALL, CoreConst.INPUT, CoreConst.OUTPUT]
+    GRAPH_CELL_DUMP_DATA_MODE_LIST = [CoreConst.ALL, CoreConst.FORWARD, CoreConst.BACKWARD]
 
     HOOK_MS_PREFIX_DICT = {
         OPS_DATA_PREFIX: OPS_PREFIX,
diff --git a/debug/accuracy_tools/msprobe/mindspore/debugger/precision_debugger.py b/debug/accuracy_tools/msprobe/mindspore/debugger/precision_debugger.py
index 7694d71dd9..a7082d3e56 100644
--- a/debug/accuracy_tools/msprobe/mindspore/debugger/precision_debugger.py
+++ b/debug/accuracy_tools/msprobe/mindspore/debugger/precision_debugger.py
@@ -34,6 +34,7 @@ from msprobe.mindspore.ms_config import parse_json_config
 from msprobe.mindspore.runtime import Runtime
 from msprobe.mindspore.service import Service
 from msprobe.mindspore.task_handler_factory import TaskHandlerFactory
+from msprobe.mindspore.dump.graph_mode_cell_dump import GraphModeCellDump
 
 try:
     from msprobe.lib import _msprobe_c
@@ -164,7 +165,7 @@ class PrecisionDebugger:
         else:
             if not instance.first_start:
                 api_register.api_set_ori_func()
-                handler = TaskHandlerFactory.create(instance.config)
+                handler = TaskHandlerFactory.create(instance.config, model)
                 handler.handle()
 
         instance.first_start = True
@@ -199,6 +200,9 @@ class PrecisionDebugger:
             _msprobe_c._PrecisionDebugger().step()
         if instance.task in PrecisionDebugger.task_not_need_service:
             return
+        if instance.config.execution_mode != MsConst.PYNATIVE_MODE and instance.config.level == MsConst.CELL:
+            GraphModeCellDump.step()
+            return
         if instance.service:
             instance.service.step()
         HOOKCell.cell_count = defaultdict(int)
diff --git a/debug/accuracy_tools/msprobe/mindspore/dump/cell_dump_process.py b/debug/accuracy_tools/msprobe/mindspore/dump/cell_dump_process.py
new file mode 100644
index 0000000000..a21c4590b8
--- /dev/null
+++ b/debug/accuracy_tools/msprobe/mindspore/dump/cell_dump_process.py
@@ -0,0 +1,450 @@
+# Copyright (c) 2025-2025, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import time
+import re
+import json
+import atexit
+from multiprocessing import Pool
+
+import numpy as np
+import mindspore as ms
+from mindspore import nn, ops
+
+from msprobe.mindspore.common.log import logger
+from msprobe.core.common.const import Const as CoreConst
+from msprobe.core.common.file_utils import load_npy, save_json, remove_path
+from msprobe.core.common.const import FileCheckConst
+
+
+CONSTRUCT_FILE_NAME = "construct.json"
+DEFAULT_RANK_DIR = "rank0"
+KEY_LAYERS = "layers"
+construct = {}
+cell_list = []
+KEY_SIDE_EFFECT = "side_effect_io"
+td = ops.TensorDump()
+td_in = ops.TensorDump("in")
+td.add_prim_attr(KEY_SIDE_EFFECT, False)
+td_in.add_prim_attr(KEY_SIDE_EFFECT, False)
+np_ms_dtype_dict = {
+    "bool": ms.bool_,
+    "int8": ms.int8,
+    "byte": ms.byte,
+    "int16": ms.int16,
+    "short": ms.short,
+    "int32": ms.int32,
+    "intc": ms.intc,
+    "int64": ms.int64,
+    "intp": ms.intp,
+    "uint8": ms.uint8,
+    "ubyte": ms.ubyte,
+    "uint16": ms.uint16,
+    "ushort": ms.ushort,
+    "uint32": ms.uint32,
+    "uintc": ms.uintc,
+    "uint64": ms.uint64,
+    "uintp": ms.uintp,
+    "float16": ms.float16,
+    "half": ms.half,
+    "float32": ms.float32,
+    "single": ms.single,
+    "float64": ms.float64,
+    "double": ms.double,
+    "bfloat16": ms.bfloat16,
+    "complex64": ms.complex64,
+    "complex128": ms.complex128
+}
+
+
+def generate_file_path(dump_path, cell_prefix, suffix, io_type, index):
+    step_path = os.path.join(dump_path, "{step}")
+    rank_path = os.path.join(step_path, "{rank}")
+    data_path = os.path.join(rank_path, CoreConst.DUMP_TENSOR_DATA)
+    file_name = CoreConst.SEP.join([cell_prefix, suffix, io_type, str(index)])
+    return os.path.join(data_path, file_name)
+
+
+def partial_func(func, dump_path, cell_prefix, index, io_type):
+    def newfunc(*args, **kwargs):
+        return func(dump_path, cell_prefix, index, io_type, *args, **kwargs)
+    return newfunc
+
+
+def clip_gradient(dump_path, cell_prefix, index, io_type, dx):
+    if io_type == CoreConst.OUTPUT:
+        temp = td(generate_file_path(dump_path, cell_prefix, CoreConst.BACKWARD, io_type, index), dx)
+        dx = ops.depend(dx, temp)
+    if io_type == CoreConst.INPUT:
+        temp = td_in(generate_file_path(dump_path, cell_prefix, CoreConst.BACKWARD, io_type, index), dx)
+        dx = ops.depend(dx, temp)
+    return dx
+
+
+def cell_construct_wrapper(func, self):
+    def new_construct(self, *args, **kwargs):
+        new_args = []
+        out_list = []
+
+        index = 0
+        item = None
+        # The inputs of the cell.
+        for index, item in enumerate(args):
+            if self.data_mode == "backward" or self.data_mode == "all":
+                if ops.is_tensor(item):
+                    item = self.output_clips[index](item)
+            if self.data_mode == "forward" or self.data_mode == "all":
+                if ops.is_tensor(item):
+                    temp = td_in(generate_file_path(self.dump_path, self.cell_prefix, CoreConst.FORWARD, CoreConst.INPUT, index), item)
+                    item = ops.depend(item, temp)
+            new_args.append(item)
+
+        out = func(*new_args, **kwargs)
+
+        # The outputs of the cell.
+        if isinstance(out, tuple):
+            for index, item in enumerate(out):
+                if self.data_mode == "backward" or self.data_mode == "all":
+                    if ops.is_tensor(item):
+                        item = self.input_clips[index](item)
+                if self.data_mode == "forward" or self.data_mode == "all":
+                    if ops.is_tensor(item):
+                        temp = td(generate_file_path(self.dump_path, self.cell_prefix, CoreConst.FORWARD, CoreConst.OUTPUT, index), item)
+                        item = ops.depend(item, temp)
+                        out_list.append(item)
+                    else:
+                        out_list.append(item)
+            out_list = tuple(out_list)
+            return out_list
+        else:
+            if self.data_mode == "backward" or self.data_mode == "all":
+                out = self.input_clips[0](out)
+            if self.data_mode == "forward" or self.data_mode == "all":
+                if ops.is_tensor(out):
+                    temp = td(generate_file_path(self.dump_path, self.cell_prefix, CoreConst.FORWARD, CoreConst.OUTPUT, index), out)
+                    out = ops.depend(out, temp)
+            return out
+
+    return new_construct.__get__(self, type(self))
+
+
+# 获取目录下所有文件名并根据TensorDump落盘自增id从小到大排序
+def sort_filenames(path):
+    filenames = os.listdir(path)
+    id_pattern = re.compile(rf'{CoreConst.REPLACEMENT_CHARACTER}(\d+){CoreConst.NUMPY_SUFFIX}$')
+    filenames.sort(key=lambda x: int(id_pattern.findall(x)[0]))
+    return filenames
+
+
+# 删除重复dump的文件：自定义文件名相同，并且数据相同
+def del_same_file(path, filenames):
+    result_list = []
+    seen_prefixes = {}
+    for current_filename in filenames:
+        parts = current_filename.rsplit(CoreConst.REPLACEMENT_CHARACTER, 1)
+        prefix = parts[0]
+        if prefix not in seen_prefixes:
+            result_list.append(current_filename)
+            seen_prefixes[prefix] = current_filename
+        else:
+            current_file_path = os.path.join(path, current_filename)
+            current_file = load_npy(current_file_path)
+            prev_filename = seen_prefixes[prefix]
+            prev_file_path = os.path.join(path, prev_filename)
+            prev_file = load_npy(prev_file_path)
+            if np.array_equal(current_file, prev_file):
+                remove_path(current_file_path)
+                logger.warning(f"{current_file_path} is deleted!")
+            else:
+                result_list.append(current_filename)
+    return result_list
+
+
+def rename_filename(path):
+    filenames = sort_filenames(path)
+    filenames = del_same_file(path, filenames)
+
+    filename_dict = {}
+    for filename in filenames:
+        name_field = filename.rsplit(CoreConst.REPLACEMENT_CHARACTER, 1)[0]
+
+        if name_field in filename_dict:
+            filename_dict[name_field] += 1
+        else:
+            filename_dict[name_field] = 0
+  
+        cell_index = filename_dict[name_field]
+
+        # 修改文件名，增加重复调用Cell的序号
+        if CoreConst.FORWARD_PATTERN in filename:
+            #Format: Cell.{cell_name}.{class_name}.{forward/backward}.{number}.{input/output}.{index}_{dtype}_{id}.npy
+            newFileName = filename.replace(CoreConst.FORWARD_PATTERN, CoreConst.FORWARD_PATTERN + str(cell_index) + CoreConst.SEP)
+        if CoreConst.BACKWARD_PATTERN in filename:
+            newFileName = filename.replace(CoreConst.BACKWARD_PATTERN, CoreConst.BACKWARD_PATTERN + str(cell_index) + CoreConst.SEP)
+        os.rename(os.path.join(path, filename), os.path.join(path, newFileName))
+    logger.info(f"==========The rename_filename phase is Finished!==========")
+
+
+# Extract the field between the first "." and the third to last ".", i.e. {cell_name}
+def get_cell_name(str):
+    parts = str.split(CoreConst.SEP)
+    if len(parts) < 4:
+        return None
+    start_index = 1
+    end_index = len(parts) - 3
+    return CoreConst.SEP.join(parts[start_index:end_index])
+
+
+# Extract the field between the last "." and the second to last ".", i.e. {data_made}
+def get_data_mode(str):
+    last_dot_index = str.rfind(CoreConst.SEP)
+    second_last_dot_index = str.rfind(CoreConst.SEP, 0, last_dot_index)
+    data_mode = str[second_last_dot_index + 1:last_dot_index]
+    return data_mode
+
+
+# 判断二者之间是否存在父子关系
+def check_relation(cell_name, parent_cell_name):
+    layers_pattern = rf"{CoreConst.SEP}{KEY_LAYERS}{CoreConst.SEP}\d+$"
+    last_dot_index = cell_name.rfind(CoreConst.SEP)
+    if last_dot_index != -1:
+        # 如果cell_name最后一个'.'之前的字段等于parent_cell_name，则判定存在父子关系
+        sub_cell_name = cell_name[:last_dot_index]
+        if sub_cell_name == parent_cell_name:
+            return True
+        elif re.search(layers_pattern, cell_name):
+            # 如果cell_name以".layer.{layer_id}"结尾，且去掉该字段后等于parent_cell_name，则判定存在父子关系
+            sub_cell_name = re.sub(layers_pattern, '', cell_name)
+            if sub_cell_name == parent_cell_name:
+                return True
+    return False
+
+
+def get_construct(cell_list_input):
+    for cell in cell_list_input:
+        cell_name = get_cell_name(cell)
+        cell_data_mode = get_data_mode(cell)
+        found_flag = False
+        for parent_cell in cell_list_input:
+            parent_cell_name = get_cell_name(parent_cell)
+            parent_data_mode = get_data_mode(parent_cell)
+            has_relation = check_relation(cell_name, parent_cell_name)
+            if has_relation and parent_data_mode == cell_data_mode:
+                construct.update({cell: parent_cell})
+                found_flag = True
+                break
+        if not found_flag:
+            construct.update({cell: None})
+
+
+def generate_construct(path):
+    global construct
+    filenames = sort_filenames(path)
+
+    # 提取文件名中Cell.{cell_name}.{class_name}.{data_mode}.{重复调用此cell的序号}字段，并存入cell_list
+    for filename in filenames:
+        point_position = 3
+        mid_field = filename.rsplit(CoreConst.SEP, point_position)[0]
+        if CoreConst.INPUT in filename:
+            if mid_field in cell_list:
+                cell_list.remove(mid_field)
+            cell_list.append(mid_field)
+        else:
+            if mid_field not in cell_list:
+                index = filenames.index(filename)
+                output_field = mid_field + CoreConst.OUTPUT
+                find_flag = False
+                for filename_other in cell_list[index + 1:]:
+                    if output_field in filename_other:
+                        find_flag = True
+                if find_flag is False:
+                    cell_list.append(mid_field)
+
+    get_construct(cell_list)
+
+    # 生成JSON文件
+    rank_dir = os.path.dirname(path)
+    json_path = os.path.join(rank_dir, CONSTRUCT_FILE_NAME)
+    save_json(json_path, construct, indent=1)
+
+    # 清空'construct'继续处理下一个路径下的数据
+    construct = {}
+    logger.info(f"Construct data saved to {json_path}")
+
+
+def process_file(file_path):
+    try:
+        # 读取.npy文件内容
+        npy_content = load_npy(file_path)
+        logger.info(f"Loaded {file_path}: shape is {npy_content.shape}, dtype is {npy_content.dtype}")
+
+        # 文件名举例:Cell.network._backbone.loss.CrossEntropyLoss.forward.0.input.0_float32_165.npy
+        parts = os.path.basename(file_path).split(CoreConst.SEP)
+        data_dtype = ""
+        # 获取0_float32_165或者0_in_float32_165中的float32
+        data_dtype_list = parts[-2].split('_')
+        if len(data_dtype_list) > 1:
+            data_dtype = data_dtype_list[-2]
+        # op_name是Cell.network._backbone.loss.CrossEntropyLoss.forward.0
+        op_name = CoreConst.SEP.join(parts[:-3])
+        ms_dtype = np_ms_dtype_dict.get(data_dtype)
+        if ms_dtype is None:
+            logger.warning(f"Get dtype None from file {file_path}")
+        tensor_json = {
+            CoreConst.TYPE: 'mindspore.Tensor',
+            CoreConst.DTYPE: str(ms_dtype),
+            CoreConst.SHAPE: list(npy_content.shape),
+            CoreConst.MAX: npy_content.max().item(),
+            CoreConst.MIN: npy_content.min().item(),
+            CoreConst.MEAN: npy_content.mean().item(),
+            CoreConst.NORM: np.linalg.norm(npy_content).item(),
+            CoreConst.DATA_NAME: os.path.basename(file_path)
+        }
+
+        # 根据文件名的最后一个部分（输入或输出）确定是添加到input_args还是output
+        if parts[-3] == CoreConst.INPUT:
+            return op_name, CoreConst.INPUT_ARGS, tensor_json
+        elif parts[-3] == CoreConst.OUTPUT:
+            return op_name, CoreConst.OUTPUT, tensor_json
+        else:
+            return None, None, None
+
+    except Exception as e:
+        logger.error(f"Error reading {file_path}: {e}")
+        return None, None, None
+
+
+def custom_sort(item, key_to_index):
+    key = item[0]
+    return key_to_index.get(key, float('inf'))
+
+
+def generate_dump_info(path):
+    if not os.path.exists(path):
+        logger.error("The provided path does not exist.")
+        return
+
+    dump_data = {"task": "tensor", "level": "L0", "dump_data_dir": path, "data": {}}
+
+    with Pool(processes=10) as pool:
+        file_paths = []
+        for root, _, files in os.walk(path):
+            for file in files:
+                if file.endswith(FileCheckConst.NUMPY_SUFFIX):
+                    file_paths.append((os.path.join(root, file),))
+        file_paths.sort()
+        results = pool.starmap(process_file, file_paths)
+
+    # 收集结果
+    for op_name, key, tensor_json in results:
+        if op_name:
+            if op_name not in dump_data.get(CoreConst.DATA, {}):
+                dump_data.get(CoreConst.DATA, {})[op_name] = {CoreConst.INPUT_ARGS: [],
+                                                              CoreConst.INPUT_KWARGS: {},
+                                                              CoreConst.OUTPUT: []}
+            if key not in dump_data.get(CoreConst.DATA, {}).get(op_name, {}):
+                dump_data.get(CoreConst.DATA, {}).get(op_name, {})[key] = []
+            dump_data.get(CoreConst.DATA, {}).get(op_name, {}).get(key, []).append(tensor_json)
+
+    # 根据cell_list排序
+    data_dict = dump_data.get(CoreConst.DATA, {})
+    key_to_index = {key: index for index, key in enumerate(cell_list)}
+    sorted_data_dict = dict(sorted(data_dict.items(), key=lambda item: custom_sort(item, key_to_index)))
+    dump_data[CoreConst.DATA] = sorted_data_dict
+
+    # 将数据写入dump.json
+    json_path = os.path.join(os.path.dirname(path), 'dump.json')
+    save_json(json_path, dump_data, indent=1)
+
+    logger.info(f"Dump data saved to {json_path}")
+
+
+def generate_stack_info(path):
+    if not os.path.exists(path):
+        logger.error("The provided path does not exist.")
+        return
+
+    stack_data = {}
+    file_paths = []
+    # 传入的path为工具生成的./dump_tensor_data，内容为npy文件
+    for root, _, files in os.walk(path):
+        for file in files:
+            if file.endswith(FileCheckConst.NUMPY_SUFFIX):
+                file_paths.append(os.path.join(root, file))
+    file_paths.sort()
+    for file_path in file_paths:
+        # 文件名举例:Cell.network._backbone.loss.CrossEntropyLoss.forward.0.input.0_float32_165.npy
+        parts = os.path.basename(file_path).split(CoreConst.SEP)
+        # op_name是Cell.network._backbone.loss.CrossEntropyLoss.forward.0
+        op_name = CoreConst.SEP.join(parts[:-3])
+        stack_data.update({op_name: []})
+
+    # 将数据写入stack.json
+    json_path = os.path.join(os.path.dirname(path), 'stack.json')
+    save_json(json_path, stack_data, indent=1)
+
+    logger.info(f"Stack data saved to {json_path}")
+
+
+def process(dump_path):
+    logger.info(f"==========Start processing data that has already been stored on the disk!==========")
+    rank_id = os.environ.get('RANK_ID')
+    rank_dir = DEFAULT_RANK_DIR
+    if rank_id is not None:
+        rank_dir = CoreConst.RANK + str(rank_id)
+
+    step_dir_list = os.listdir(dump_path)
+    for step_dir in step_dir_list:
+        step_path = os.path.join(dump_path, step_dir)
+        rank_path = os.path.join(step_path, rank_dir)
+        npy_path = os.path.join(rank_path, CoreConst.DUMP_TENSOR_DATA)
+        rename_filename(npy_path)
+        generate_construct(npy_path)
+        generate_dump_info(npy_path)
+        generate_stack_info(npy_path)
+
+
+def start(net=None, dump_path="./", data_mode=CoreConst.ALL):
+    if net is None:
+        return
+
+    black_list = ["grad_reducer", ""]
+    for name, cell in net.cells_and_names():
+        class_name = cell.__class__.__name__
+        # 跳过黑名单cell
+        if name in black_list:
+            logger.info(f"Cell {name}.{class_name} is skipped!")
+            continue
+        # 跳过框架内部的cell
+        if class_name.startswith(CoreConst.REPLACEMENT_CHARACTER):
+            logger.info(f"Cell {name}.{class_name} is skipped!")
+            continue
+        else:
+            #Format: Cell.{cell_name}.{class_name}
+            cell.cell_prefix = CoreConst.SEP.join([CoreConst.CELL, name, cell.__class__.__name__])
+
+        cell.construct = cell_construct_wrapper(cell.construct, cell)
+        logger.info(f"Cell {name}: construct function is wrapped!")
+        cell.dump_path = dump_path
+        cell.data_mode = data_mode
+        cell.input_clips = []
+        cell.output_clips = []
+        # It is assumed that each cell has a maximum of 50 outputs and 50 inputs.
+        for i in range(50):
+            cell.input_clips.append(ops.InsertGradientOf(partial_func(clip_gradient, cell.dump_path, cell.cell_prefix, i, CoreConst.INPUT)))
+            cell.output_clips.append(ops.InsertGradientOf(partial_func(clip_gradient, cell.dump_path, cell.cell_prefix, i, CoreConst.OUTPUT)))
+
+    logger.info(f"==========The cell_dump_process_start phase is Finished!==========")
+    atexit.register(process, dump_path=dump_path)
diff --git a/debug/accuracy_tools/msprobe/mindspore/dump/dump_tool_factory.py b/debug/accuracy_tools/msprobe/mindspore/dump/dump_tool_factory.py
index 0ca63b4a84..c0933d20aa 100644
--- a/debug/accuracy_tools/msprobe/mindspore/dump/dump_tool_factory.py
+++ b/debug/accuracy_tools/msprobe/mindspore/dump/dump_tool_factory.py
@@ -17,13 +17,14 @@ from msprobe.mindspore.common.const import Const
 from msprobe.mindspore.debugger.debugger_config import DebuggerConfig
 from msprobe.mindspore.dump.kernel_graph_dump import KernelGraphDump
 from msprobe.mindspore.dump.kernel_kbyk_dump import KernelKbykDump
+from msprobe.mindspore.dump.graph_mode_cell_dump import GraphModeCellDump
 
 
 class DumpToolFactory:
     tools = {
         Const.CELL: {
-            Const.GRAPH_KBYK_MODE: None,
-            Const.GRAPH_GE_MODE: None,
+            Const.GRAPH_KBYK_MODE: GraphModeCellDump,
+            Const.GRAPH_GE_MODE: GraphModeCellDump,
             Const.PYNATIVE_MODE: None
         },
         Const.API: {
@@ -39,9 +40,13 @@ class DumpToolFactory:
     }
 
     @staticmethod
-    def create(config: DebuggerConfig):
-        if len(config.data_mode) != 1 or config.data_mode[0] not in Const.GRAPH_DATA_MODE_LIST:
-            raise Exception("data_mode must be one of all, input, output.")
+    def create(config: DebuggerConfig, model):
+        if config.level == Const.CELL:
+            if len(config.data_mode) != 1 or config.data_mode[0] not in Const.GRAPH_CELL_DUMP_DATA_MODE_LIST:
+                raise Exception("data_mode must be one of all, forward, backward.")
+        else:
+            if len(config.data_mode) != 1 or config.data_mode[0] not in Const.GRAPH_DATA_MODE_LIST:
+                raise Exception("data_mode must be one of all, input, output.")
         tool = DumpToolFactory.tools.get(config.level)
         if not tool:
             raise Exception("Valid level is needed.")
@@ -49,4 +54,4 @@ class DumpToolFactory:
         if not tool:
             raise Exception(f"Data dump is not supported in {config.execution_mode} mode "
                             f"when dump level is {config.level}.")
-        return tool(config)
+        return tool(config, model) if tool == GraphModeCellDump else tool(config)
diff --git a/debug/accuracy_tools/msprobe/mindspore/dump/graph_mode_cell_dump.py b/debug/accuracy_tools/msprobe/mindspore/dump/graph_mode_cell_dump.py
new file mode 100644
index 0000000000..e32866868f
--- /dev/null
+++ b/debug/accuracy_tools/msprobe/mindspore/dump/graph_mode_cell_dump.py
@@ -0,0 +1,69 @@
+# Copyright (c) 2025-2025, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+from msprobe.mindspore.common.log import logger
+from msprobe.mindspore.debugger.debugger_config import DebuggerConfig
+import mindspore as ms
+from mindspore._c_expression import _tensordump_set_step
+from mindspore.ops.primitive import _run_op
+from mindspore import hal, ops
+import msprobe.mindspore.dump.cell_dump_process as cellDumper
+from msprobe.mindspore.common.const import Const
+
+
+class GraphModeCellDump:
+    def __init__(self, config: DebuggerConfig, model):
+        self.net = model
+        self.white_list = []
+        self.black_list = []
+        self.dump_path = config.dump_path if config.dump_path else "./"
+        self.rank = config.rank
+        self.step = config.step
+        self.scope = config.scope
+        self.list = config.list
+        self.data_mode = config.data_mode
+        self.file_format = config.file_format
+        self.check_config()
+        self.set_step()
+
+    @staticmethod
+    def step():
+        hal.synchronize()
+        temp_tensor = ms.Tensor([1], dtype=ms.float32)
+        step_flag = "<tensordump-update-step>"
+        _run_op(ops.TensorDump(), "TensorDump", (step_flag, temp_tensor))
+        ops.tensordump(step_flag, temp_tensor)
+
+    def check_config(self):
+        if self.rank != []:
+            raise Exception("In graph mode, cell dump does not currently support specifying rank.")
+        if self.scope != []:
+            raise Exception("In graph mode, cell dump does not currently support specifying scope.")
+        if self.list != []:
+            raise Exception("In graph mode, cell dump does not currently support specifying list.")
+        if len(self.data_mode) != 1 or self.data_mode[0] not in Const.GRAPH_CELL_DUMP_DATA_MODE_LIST:
+            raise Exception("In graph mode and cell dump, data_mode must be one of all, forword, backword.")
+        if self.file_format != []:
+            logger.warning("In graph mode, cell dump does not currently support specifying file_format. The file will be stored in npy format.")
+        if not self.net:
+            raise Exception("The model is empty and cell dump is not enabled.")
+        return True
+
+    def set_step(self):
+        _tensordump_set_step(self.step)
+
+    def handle(self):
+        os.environ['MS_JIT_MODULES'] = 'msprobe'
+        cellDumper.start(net=self.net, dump_path=self.dump_path, data_mode=self.data_mode[0])
diff --git a/debug/accuracy_tools/msprobe/mindspore/task_handler_factory.py b/debug/accuracy_tools/msprobe/mindspore/task_handler_factory.py
index a9cb5e6dd4..5cfbbaeb4a 100644
--- a/debug/accuracy_tools/msprobe/mindspore/task_handler_factory.py
+++ b/debug/accuracy_tools/msprobe/mindspore/task_handler_factory.py
@@ -29,11 +29,14 @@ class TaskHandlerFactory:
     }
 
     @staticmethod
-    def create(config: DebuggerConfig):
+    def create(config: DebuggerConfig, model):
         task = TaskHandlerFactory.tasks.get(config.task)
         if not task:
             raise Exception("Valid task is needed.")
-        handler = task.create(config)
+        if task == DumpToolFactory:
+            handler = task.create(config, model)
+        else:
+            handler = task.create(config)
         if not handler:
             raise Exception("Can not find task handler")
         return handler
diff --git a/debug/accuracy_tools/msprobe/test/mindspore_ut/debugger/test_graph_cell_dump.py b/debug/accuracy_tools/msprobe/test/mindspore_ut/debugger/test_graph_cell_dump.py
new file mode 100644
index 0000000000..b111e64437
--- /dev/null
+++ b/debug/accuracy_tools/msprobe/test/mindspore_ut/debugger/test_graph_cell_dump.py
@@ -0,0 +1,309 @@
+# Copyright (c) 2025-2025, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import re
+import unittest
+from unittest.mock import MagicMock, patch
+
+import mindspore as ms
+from mindspore import ops
+
+from msprobe.core.common.const import Const as CoreConst
+from msprobe.mindspore.dump.cell_dump_process import generate_file_path
+from msprobe.mindspore.dump.cell_dump_process import partial_func, clip_gradient
+from msprobe.mindspore.dump.cell_dump_process import cell_construct_wrapper
+from msprobe.mindspore.dump.cell_dump_process import rename_filename, sort_filenames, del_same_file
+from msprobe.mindspore.dump.cell_dump_process import check_relation
+
+
+class TestGenerateFilePath(unittest.TestCase):
+    def setUp(self):
+        self.dump_path = "/path"
+        self.cell_prefix = "Cell.network._backbone.LlamaForCausalLM"
+        self.suffix = "forward"
+        self.io_type = "input"
+        self.index = 0
+
+    def test_generate_file_path(self):
+        expected_path = os.path.join(
+            self.dump_path,
+            "{step}",
+            "{rank}",
+            CoreConst.DUMP_TENSOR_DATA,
+            CoreConst.SEP.join([self.cell_prefix, self.suffix, self.io_type, str(self.index)])
+        )
+        result = generate_file_path(self.dump_path, self.cell_prefix, self.suffix, self.io_type, self.index)
+        self.assertEqual(result, expected_path)
+
+
+class TestPartialFunc(unittest.TestCase):
+
+    @patch('msprobe.mindspore.dump.cell_dump_process.CoreConst')
+    @patch('msprobe.mindspore.dump.cell_dump_process.td')
+    @patch('msprobe.mindspore.dump.cell_dump_process.td_in')
+    @patch('msprobe.mindspore.dump.cell_dump_process.generate_file_path')
+    @patch('msprobe.mindspore.dump.cell_dump_process.ops.depend')
+    def test_clip_gradient_output(self, mock_depend, mock_generate_file_path, mock_td_in, mock_td, mock_CoreConst):
+        mock_CoreConst.OUTPUT = "output"
+        mock_CoreConst.BACKWARD = "backward"
+        mock_generate_file_path.return_value = "mock_path"
+        mock_td.return_value = "temp_tensor"
+        mock_depend.return_value = "dependent_tensor"
+
+        result = clip_gradient("dump_path", "cell_prefix", 0, "output", "dx")
+
+        mock_generate_file_path.assert_called_with("dump_path", "cell_prefix", "backward", "output", 0)
+        mock_td.assert_called_with("mock_path", "dx")
+        mock_depend.assert_called_with("dx", "temp_tensor")
+        self.assertEqual(result, "dependent_tensor")
+
+    @patch('msprobe.mindspore.dump.cell_dump_process.CoreConst')
+    @patch('msprobe.mindspore.dump.cell_dump_process.td')
+    @patch('msprobe.mindspore.dump.cell_dump_process.td_in')
+    @patch('msprobe.mindspore.dump.cell_dump_process.generate_file_path')
+    @patch('msprobe.mindspore.dump.cell_dump_process.ops.depend')
+    def test_clip_gradient_input(self, mock_depend, mock_generate_file_path, mock_td_in, mock_td, mock_CoreConst):
+        mock_CoreConst.INPUT = "input"
+        mock_CoreConst.BACKWARD = "backward"
+        mock_generate_file_path.return_value = "mock_path"
+        mock_td_in.return_value = "temp_tensor"
+        mock_depend.return_value = "dependent_tensor"
+
+        result = clip_gradient("dump_path", "cell_prefix", 0, "input", "dx")
+
+        mock_generate_file_path.assert_called_with("dump_path", "cell_prefix", "backward", "input", 0)
+        mock_td_in.assert_called_with("mock_path", "dx")
+        mock_depend.assert_called_with("dx", "temp_tensor")
+        self.assertEqual(result, "dependent_tensor")
+
+    def test_partial_func(self):
+        def mock_func(dump_path, cell_prefix, index, io_type, *args, **kwargs):
+            return dump_path, cell_prefix, index, io_type, args, kwargs
+
+        new_func = partial_func(mock_func, "dump_path", "cell_prefix", 0, "io_type")
+        result = new_func("arg1", "arg2", kwarg1="value1")
+
+        self.assertEqual(result, ("dump_path", "cell_prefix", 0, "io_type", ("arg1", "arg2"), {'kwarg1': 'value1'}))
+
+
+class TestCellWrapperProcess(unittest.TestCase):
+
+    @patch('msprobe.mindspore.dump.cell_dump_process.generate_file_path')
+    @patch('msprobe.mindspore.dump.cell_dump_process.td')
+    @patch('msprobe.mindspore.dump.cell_dump_process.td_in')
+    def test_cell_construct_wrapper(self, mock_td_in, mock_td, mock_generate_file_path):
+        # Mock the generate_file_path function
+        mock_generate_file_path.return_value = "mock_path"
+
+        # Mock the TensorDump operations
+        mock_td.return_value = MagicMock()
+        mock_td_in.return_value = MagicMock()
+
+        # Create a mock cell with necessary attributes
+        mock_cell = MagicMock()
+        mock_cell.data_mode = "all"
+        mock_cell.dump_path = "mock_dump_path"
+        mock_cell.cell_prefix = "mock_cell_prefix"
+        mock_cell.input_clips = [MagicMock() for _ in range(50)]
+        mock_cell.output_clips = [MagicMock() for _ in range(50)]
+
+        # Define a mock function to wrap
+        def mock_func(*args, **kwargs):
+            return args
+
+        # Wrap the mock function using cell_construct_wrapper
+        wrapped_func = cell_construct_wrapper(mock_func, mock_cell)
+
+        # Create mock inputs
+        mock_input = ms.Tensor([1, 2, 3])
+        mock_args = (mock_input,)
+
+        # Call the wrapped function
+        result = wrapped_func(mock_cell, *mock_args)
+
+        # Check if the result is as expected
+        self.assertEqual(result, mock_args)
+
+        # Verify that the TensorDump operations were called
+        mock_td_in.assert_called()
+        mock_td.assert_called()
+
+    @patch('msprobe.mindspore.dump.cell_dump_process.generate_file_path')
+    @patch('msprobe.mindspore.dump.cell_dump_process.td')
+    @patch('msprobe.mindspore.dump.cell_dump_process.td_in')
+    def test_cell_construct_wrapper_with_tuple_output(self, mock_td_in, mock_td, mock_generate_file_path):
+        # Mock the generate_file_path function
+        mock_generate_file_path.return_value = "mock_path"
+
+        # Mock the TensorDump operations
+        mock_td.return_value = MagicMock()
+        mock_td_in.return_value = MagicMock()
+
+        # Create a mock cell with necessary attributes
+        mock_cell = MagicMock()
+        mock_cell.data_mode = "all"
+        mock_cell.dump_path = "mock_dump_path"
+        mock_cell.cell_prefix = "mock_cell_prefix"
+        mock_cell.input_clips = [MagicMock() for _ in range(50)]
+        mock_cell.output_clips = [MagicMock() for _ in range(50)]
+
+        # Define a mock function to wrap
+        def mock_func(*args, **kwargs):
+            return (args[0], args[0])
+
+        # Wrap the mock function using cell_construct_wrapper
+        wrapped_func = cell_construct_wrapper(mock_func, mock_cell)
+
+        # Create mock inputs
+        mock_input = ms.Tensor([1, 2, 3])
+        mock_args = (mock_input,)
+
+        # Call the wrapped function
+        result = wrapped_func(mock_cell, *mock_args)
+
+        # Check if the result is as expected
+        self.assertEqual(result, (mock_input, mock_input))
+
+        # Verify that the TensorDump operations were called
+        mock_td_in.assert_called()
+        mock_td.assert_called()
+
+
+class TestSortFilenames(unittest.TestCase):
+
+    @patch('os.listdir')
+    def test_sort_filenames(self, mock_listdir):
+        # Mock the list of filenames returned by os.listdir
+        mock_listdir.return_value = [
+            'Cell.network._backbone.model.LlamaModel.backward.0.input.0_float16_177.npy',
+            'Cell.network._backbone.model.LlamaModel.forward.0.input.0_in_int32_1.npy',
+            'Cell.network._backbone.model.LlamaModel.forward.0.output.10_float16_165.npy',
+            'Cell.network._backbone.model.norm_out.LlamaRMSNorm.backward.0.input.0_float16_178.npy'
+        ]
+        
+        # Mock the CoreConst values
+        CoreConst.REPLACEMENT_CHARACTER = '_'
+        CoreConst.NUMPY_SUFFIX = '.npy'
+        
+        # Expected sorted filenames
+        expected_sorted_filenames = [
+            'Cell.network._backbone.model.LlamaModel.forward.0.input.0_in_int32_1.npy',
+            'Cell.network._backbone.model.LlamaModel.forward.0.output.10_float16_165.npy',
+            'Cell.network._backbone.model.LlamaModel.backward.0.input.0_float16_177.npy',
+            'Cell.network._backbone.model.norm_out.LlamaRMSNorm.backward.0.input.0_float16_178.npy'
+        ]
+        
+        # Call the function
+        sorted_filenames = sort_filenames('/mock/path')
+        
+        # Assert the filenames are sorted correctly
+        self.assertEqual(sorted_filenames, expected_sorted_filenames)
+
+
+class TestRenameFilename(unittest.TestCase):
+
+    @patch('msprobe.mindspore.dump.cell_dump_process.sort_filenames')
+    @patch('msprobe.mindspore.dump.cell_dump_process.del_same_file')
+    @patch('msprobe.mindspore.dump.cell_dump_process.os.rename')
+    def test_rename_filename(self, mock_rename, mock_del_same_file, mock_sort_filenames):
+        # Mock the constants
+        CoreConst.REPLACEMENT_CHARACTER = '_'
+        CoreConst.FORWARD_PATTERN = '.forward.'
+        CoreConst.BACKWARD_PATTERN = '.backward.'
+        CoreConst.SEP = '.'
+
+        # Mock the filenames
+        mock_sort_filenames.return_value = [
+            "Cell.learning_rate.CosineWithWarmUpLR.forward.input.0_int32_101.npy",
+            "Cell.learning_rate.CosineWithWarmUpLR.forward.output.0_float32_102.npy",
+            "Cell.loss_scaling_manager.DynamicLossScaleUpdateCell.backward.input.0_float32_103.npy",
+            "Cell.loss_scaling_manager.DynamicLossScaleUpdateCell.backward.input.1_bool_104.npy",
+            "Cell.loss_scaling_manager.DynamicLossScaleUpdateCell.backward.output.1_bool_105.npy",
+            "Cell.learning_rate.CosineWithWarmUpLR.forward.input.0_int32_111.npy",
+            "Cell.learning_rate.CosineWithWarmUpLR.forward.output.0_float32_112.npy",
+        ]
+        mock_del_same_file.return_value = [mock_sort_filenames.return_value]
+
+        # Call the function
+        rename_filename('/mock/path')
+
+        # Check if os.rename was called with the correct arguments
+        mock_rename.assert_any_call(
+            '/mock/path/Cell.learning_rate.CosineWithWarmUpLR.forward.input.0_int32_101.npy',
+            '/mock/path/Cell_learning_rate_CosineWithWarmUpLR.forward.0.input_0_int32_101.npy'
+        )
+        mock_rename.assert_any_call(
+            '/mock/path/Cell.learning_rate.CosineWithWarmUpLR.forward.output.0_float32_102.npy',
+            '/mock/path/Cell_learning_rate_CosineWithWarmUpLR.forward.0.output_0_float32_102.npy'
+        )
+        mock_rename.assert_any_call(
+            '/mock/path/Cell.loss_scaling_manager.DynamicLossScaleUpdateCell.backward.input.0_float32_103.npy',
+            '/mock/path/Cell_loss_scaling_manager_DynamicLossScaleUpdateCell.backward.0.input_0_float32_103.npy'
+        )
+        mock_rename.assert_any_call(
+            '/mock/path/Cell.loss_scaling_manager.DynamicLossScaleUpdateCell.backward.input.1_bool_104.npy',
+            '/mock/path/Cell_loss_scaling_manager_DynamicLossScaleUpdateCell.backward.0.input_1_bool_104.npy'
+        )
+        mock_rename.assert_any_call(
+            '/mock/path/Cell.loss_scaling_manager.DynamicLossScaleUpdateCell.backward.output.1_bool_105.npy',
+            '/mock/path/Cell_loss_scaling_manager_DynamicLossScaleUpdateCell.backward.0.output_1_bool_105.npy'
+        )
+        mock_rename.assert_any_call(
+            '/mock/path/Cell.learning_rate.CosineWithWarmUpLR.forward.input.0_int32_111.npy',
+            '/mock/path/Cell_learning_rate_CosineWithWarmUpLR.forward.1.input_0_int32_111.npy'
+        )
+        mock_rename.assert_any_call(
+            '/mock/path/Cell.learning_rate.CosineWithWarmUpLR.forward.output.0_float32_112.npy',
+            '/mock/path/Cell_learning_rate_CosineWithWarmUpLR.forward.1.output_0_float32_112.npy'
+        )
+
+        # Mock the filenames
+        mock_sort_filenames.return_value = []
+        mock_del_same_file.return_value = []
+
+        # Call the function
+        rename_filename('/mock/path')
+
+        # Check if os.rename was not called
+        mock_rename.assert_not_called()
+
+
+class TestCheckRelation(unittest.TestCase):
+
+    def setUp(self):
+        CoreConst.SEP = '.'
+        global KEY_LAYERS
+        KEY_LAYERS = "layers"
+
+    def test_direct_parent_child_relation(self):
+        self.assertTrue(check_relation("network._backbone", "network"))
+        self.assertTrue(check_relation("network._backbone.model", "network._backbone"))
+
+    def test_no_relation(self):
+        self.assertFalse(check_relation("network._backbone", "network.loss"))
+        self.assertFalse(check_relation("network._backbone.model", "network.loss"))
+
+    def test_layer_pattern_relation(self):
+        self.assertTrue(check_relation("network.model.layers.0", "network.model"))
+        self.assertTrue(check_relation("network._backbone.model.layers.1", "network._backbone.model"))
+
+    def test_no_layer_pattern_relation(self):
+        self.assertFalse(check_relation("network.model.layers.0", "network.loss"))
+        self.assertFalse(check_relation("network._backbone.model.layers.1", "network._backbone.model.layers"))
+
+    def test_edge_cases(self):
+        self.assertFalse(check_relation("", "network"))
+        self.assertFalse(check_relation("network.layer1", ""))
+        self.assertFalse(check_relation("", ""))
-- 
Gitee


From 3ac2ba51aefc8800092cb5f4b064f5a31389ba73 Mon Sep 17 00:00:00 2001
From: fuchao <1501312275@qq.com>
Date: Mon, 17 Feb 2025 18:18:33 +0800
Subject: [PATCH 2/2] =?UTF-8?q?=E4=BC=98=E5=8C=96=E9=9D=99=E6=80=81?=
 =?UTF-8?q?=E5=9B=BEcell=E7=BA=A7dump=E5=A4=84=E7=90=86=E9=80=BB=E8=BE=91?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../mindspore/dump/cell_dump_process.py       | 50 +++++++++++++++++--
 1 file changed, 47 insertions(+), 3 deletions(-)

diff --git a/debug/accuracy_tools/msprobe/mindspore/dump/cell_dump_process.py b/debug/accuracy_tools/msprobe/mindspore/dump/cell_dump_process.py
index a21c4590b8..a9121e1435 100644
--- a/debug/accuracy_tools/msprobe/mindspore/dump/cell_dump_process.py
+++ b/debug/accuracy_tools/msprobe/mindspore/dump/cell_dump_process.py
@@ -133,7 +133,7 @@ def cell_construct_wrapper(func, self):
                 out = self.input_clips[0](out)
             if self.data_mode == "forward" or self.data_mode == "all":
                 if ops.is_tensor(out):
-                    temp = td(generate_file_path(self.dump_path, self.cell_prefix, CoreConst.FORWARD, CoreConst.OUTPUT, index), out)
+                    temp = td(generate_file_path(self.dump_path, self.cell_prefix, CoreConst.FORWARD, CoreConst.OUTPUT, 0), out)
                     out = ops.depend(out, temp)
             return out
 
@@ -302,6 +302,21 @@ def process_file(file_path):
         ms_dtype = np_ms_dtype_dict.get(data_dtype)
         if ms_dtype is None:
             logger.warning(f"Get dtype None from file {file_path}")
+
+        #修改落盘文件名字，去掉TensorDump自带的数据类型和自增id字段
+        data_file_name = os.path.basename(file_path)
+        data_file_dir = os.path.dirname(file_path)
+        parts = data_file_name.split(CoreConst.SEP)
+        if len(parts) >= 2:
+            param_index = parts[-2].split(CoreConst.REPLACEMENT_CHARACTER)[0]
+            pre_parts = CoreConst.SEP.join(parts[:-2])
+            new_file_name = pre_parts + CoreConst.SEP + param_index + CoreConst.NUMPY_SUFFIX
+            os.rename(os.path.join(data_file_dir, data_file_name), os.path.join(data_file_dir, new_file_name))
+            logger.info(f"{data_file_name} is renamed to {new_file_name}")
+        else:
+            logger.warning(f"Failed to rename {data_file_name}.")
+            new_file_name = data_file_name
+
         tensor_json = {
             CoreConst.TYPE: 'mindspore.Tensor',
             CoreConst.DTYPE: str(ms_dtype),
@@ -310,7 +325,7 @@ def process_file(file_path):
             CoreConst.MIN: npy_content.min().item(),
             CoreConst.MEAN: npy_content.mean().item(),
             CoreConst.NORM: np.linalg.norm(npy_content).item(),
-            CoreConst.DATA_NAME: os.path.basename(file_path)
+            CoreConst.DATA_NAME: new_file_name
         }
 
         # 根据文件名的最后一个部分（输入或输出）确定是添加到input_args还是output
@@ -398,8 +413,28 @@ def generate_stack_info(path):
     logger.info(f"Stack data saved to {json_path}")
 
 
+def is_download_finished(directory, interval=3):
+    """
+    判断指定目录在一段时间后是否有数据被下载完成
+    :param directory: 指定目录的路径
+    :param interval: 检查的时间间隔（秒），默认为 3 秒
+    :return: 如有数据被下载完成返回 True，否则返回 False
+    """
+    # 检查目录是否存在
+    if not os.path.exists(directory):
+        logger.warning(f"The specified directory {directory} does not exist.")
+        return False
+    initial_modification_time = os.path.getmtime(directory)
+    time.sleep(interval)
+    current_modification_time = os.path.getmtime(directory)
+    # 比较初始和当前修改时间
+    if current_modification_time > initial_modification_time:
+        return False
+    else:
+        return True
+
+
 def process(dump_path):
-    logger.info(f"==========Start processing data that has already been stored on the disk!==========")
     rank_id = os.environ.get('RANK_ID')
     rank_dir = DEFAULT_RANK_DIR
     if rank_id is not None:
@@ -410,10 +445,19 @@ def process(dump_path):
         step_path = os.path.join(dump_path, step_dir)
         rank_path = os.path.join(step_path, rank_dir)
         npy_path = os.path.join(rank_path, CoreConst.DUMP_TENSOR_DATA)
+        while True:
+            is_finished = is_download_finished(npy_path)
+            if not is_finished:
+                logger.info(f"There is data being downloaded in the specified directory, continue checking...")
+            else:
+                logger.info(f"There is no data being downloaded in the specified directory, Stop checking.")
+                break
+        logger.info(f"==========Start processing data that has already been stored on the disk!==========")
         rename_filename(npy_path)
         generate_construct(npy_path)
         generate_dump_info(npy_path)
         generate_stack_info(npy_path)
+        logger.info(f"==========JSON file generation completed!==========")
 
 
 def start(net=None, dump_path="./", data_mode=CoreConst.ALL):
-- 
Gitee