diff --git a/debug/accuracy_tools/msprobe/core/common/const.py b/debug/accuracy_tools/msprobe/core/common/const.py
index 6f7616be986310579da0191e20799bfe63515f4f..962d9921fea5f7fa772267ec5990f0117bdcf5aa 100644
--- a/debug/accuracy_tools/msprobe/core/common/const.py
+++ b/debug/accuracy_tools/msprobe/core/common/const.py
@@ -112,9 +112,12 @@ class Const:
     RUN_UT = "run_ut"
     GRAD_PROBE = "grad_probe"
     STRUCTURE = "structure"
+    DUMP_PRECISION_HIGH = "high"
+    DUMP_PRECISION_LOW = "low"
     TASK_LIST = [TENSOR, STATISTICS, OVERFLOW_CHECK, FREE_BENCHMARK, RUN_UT, GRAD_PROBE, STRUCTURE]
     DUMP_DATA_COLLECTION_LIST = [STATISTICS, TENSOR, STRUCTURE]
     DUMP_DATA_MODE_LIST = [ALL, INPUT, OUTPUT, FORWARD, BACKWARD]
+    DUMP_PRECISION_LIST = [DUMP_PRECISION_LOW, DUMP_PRECISION_HIGH]
     LEVEL_L0 = "L0"
     LEVEL_L1 = "L1"
     LEVEL_L2 = "L2"
diff --git a/debug/accuracy_tools/msprobe/core/common_config.py b/debug/accuracy_tools/msprobe/core/common_config.py
index 836a7b89d3008c8e2fc34053eddd186e875279d6..0dd252433387852a15fa2040ee045376bb883ab8 100644
--- a/debug/accuracy_tools/msprobe/core/common_config.py
+++ b/debug/accuracy_tools/msprobe/core/common_config.py
@@ -28,6 +28,7 @@ class CommonConfig:
         self.level = json_config.get('level')
         self.enable_dataloader = json_config.get('enable_dataloader', False)
         self.async_dump = json_config.get("async_dump", False)
+        self.precision = json_config.get("precision", Const.DUMP_PRECISION_HIGH)
         self._check_config()
 
     def _check_config(self):
@@ -49,6 +50,10 @@ class CommonConfig:
         elif self.async_dump:
             logger.warning("async_dump is True, it may cause OOM when dumping large tensor.")
 
+        if self.precision not in Const.DUMP_PRECISION_LIST:
+            logger.error_log_with_exp("precision is invalid, it should be one of {}".format(Const.DUMP_PRECISION_LIST),
+                                      MsprobeException(MsprobeException.INVALID_PARAM_ERROR))
+
 
 class BaseConfig:
     def __init__(self, json_config):
diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py
index 8c53718ca33849a4da8604ef0922caf2c43d490d..1e8cb322f9f4dc518a10d690168e0b80b84fa18e 100644
--- a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py
+++ b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py
@@ -65,51 +65,6 @@ class MindsporeDataProcessor(BaseDataProcessor):
     def analyze_dtype_in_kwargs(element):
         return {"type": "mindspore.dtype", "value": str(element)}
 
-    @staticmethod
-    def get_stat_info_sync(data):
-        tensor_stat = TensorStatInfo()
-        if data.dtype == ms.bool_:
-            tensor_stat.max = mint.any(data)
-            tensor_stat.min = mint.all(data)
-        elif not data.shape:
-            tensor_stat.max = tensor_stat.min = tensor_stat.mean = tensor_stat.norm = data.copy()
-        elif data.dtype == ms.complex64 or data.dtype == ms.complex128:
-            data_abs = np.abs(data.asnumpy())
-            tensor_stat.max = np.max(data_abs).item()
-            tensor_stat.min = np.min(data_abs).item()
-            tensor_stat.mean = np.mean(data_abs).item()
-            tensor_stat.norm = np.linalg.norm(data_abs).item()
-        else:
-            if not ops.is_floating_point(data) or data.dtype == ms.float64:
-                data = data.to(ms.float32)
-            get_norm_value = mint.norm if hasattr(mint, "norm") else ops.norm
-            tensor_stat.max = mint.max(data)
-            tensor_stat.min = mint.min(data)
-            tensor_stat.mean = mint.mean(data)
-            tensor_stat.norm = get_norm_value(data)
-        return tensor_stat
-
-    @staticmethod
-    def get_stat_info_async(data):
-        tensor_stat = TensorStatInfo()
-        if data.dtype == ms.bool_:
-            tensor_stat.max = mint.any(data)
-            tensor_stat.min = mint.all(data)
-        elif not data.shape:
-            tensor_stat.max = tensor_stat.min = tensor_stat.mean = tensor_stat.norm = data.copy()
-        elif data.dtype == ms.complex64 or data.dtype == ms.complex128:
-            logger.warning("Async dump do not support complex data!")
-            return tensor_stat
-        else:
-            if not ops.is_floating_point(data) or data.dtype == ms.float64:
-                data = data.to(ms.float32)
-            get_norm_value = mint.norm if hasattr(mint, "norm") else ops.norm
-            tensor_stat.max = mint.max(data)
-            tensor_stat.min = mint.min(data)
-            tensor_stat.mean = mint.mean(data)
-            tensor_stat.norm = get_norm_value(data)
-        return tensor_stat
-
     @staticmethod
     def is_hookable_element(element):
         return hasattr(element, "register_hook") and callable(element.register_hook)
@@ -146,14 +101,37 @@ class MindsporeDataProcessor(BaseDataProcessor):
         self.api_register.restore_inner_used_api()
         tensor_stat = TensorStatInfo()
         if data.numel() == 0:
-            stat_info = tensor_stat
-        else:
+            pass
+        elif data.dtype == ms.bool_:
             if self.config.async_dump:
-                stat_info = MindsporeDataProcessor.get_stat_info_async(data)
+                tensor_stat.max = mint.any(data)
+                tensor_stat.min = mint.all(data)
             else:
-                stat_info = MindsporeDataProcessor.get_stat_info_sync(data)
+                data_np = data.asnumpy()
+                tensor_stat.max = np.max(data_np).item()
+                tensor_stat.min = np.min(data_np).item()
+        elif not data.shape:
+            tensor_stat.max = tensor_stat.min = tensor_stat.mean = tensor_stat.norm = data.copy()
+        elif data.dtype == ms.complex64 or data.dtype == ms.complex128:
+            if self.config.async_dump:
+                logger.warning("Async dump do not support complex data!")
+            else:
+                data_abs = np.abs(data.asnumpy())
+                tensor_stat.max = np.max(data_abs).item()
+                tensor_stat.min = np.min(data_abs).item()
+                tensor_stat.mean = np.mean(data_abs).item()
+                tensor_stat.norm = np.linalg.norm(data_abs).item()
+        else:
+            if self.config.precision == Const.DUMP_PRECISION_HIGH or not ops.is_floating_point(
+                    data) or data.dtype == ms.float64:
+                data = data.to(ms.float32)
+            get_norm_value = mint.norm if hasattr(mint, "norm") else ops.norm
+            tensor_stat.max = mint.max(data)
+            tensor_stat.min = mint.min(data)
+            tensor_stat.mean = mint.mean(data)
+            tensor_stat.norm = get_norm_value(data)
         self.api_register.register_inner_used_api()
-        return stat_info
+        return tensor_stat
 
     def analyze_single_element(self, element, suffix_stack):
         if suffix_stack and suffix_stack[-1] in self.mindspore_object_key:
@@ -318,11 +296,17 @@ class OverflowCheckDataProcessor(MindsporeDataProcessor):
         if max_tensor is None or min_tensor is None:
             return
 
-        if mint.isinf(max_tensor) or mint.isnan(max_tensor):
+        def check_inf_nan(value):
+            # Use .item() if it's a tensor-like structure
+            if hasattr(value, "item"):
+                value = value.item()
+            return np.isinf(value) or np.isnan(value)
+
+        if check_inf_nan(max_tensor):
             self.has_overflow = True
             return
 
-        if mint.isinf(min_tensor) or mint.isnan(min_tensor):
+        if check_inf_nan(min_tensor):
             self.has_overflow = True
 
     def _analyze_tensor(self, tensor, suffix):
diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py
index 33c2657cd9cd53668df6268caab8db0683ce30e9..9a4c5ee9645a30ac77396270bec972f58729889d 100644
--- a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py
+++ b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py
@@ -96,29 +96,17 @@ class PytorchDataProcessor(BaseDataProcessor):
 
 
     @staticmethod
-    def get_stat_info_async(data):
+    def get_stat_info(data, async_dump=False, precision=Const.DUMP_PRECISION_HIGH):
         tensor_stat = TensorStatInfo()
-        if torch.is_complex(data):
-            logger.warning("Async dump do not support complex data!")
+        if data.is_meta:
+            return tensor_stat
+        data_clone = data.detach()
+        if not data_clone.numel() or not data_clone.data_ptr():
             return tensor_stat
-        elif data.dtype == torch.bool:
-            tensor_stat.max = torch.any(data)
-            tensor_stat.min = torch.all(data)
-        elif not data.shape:
-            tensor_stat.max = tensor_stat.min = tensor_stat.mean = tensor_stat.norm = data.clone()
-        else:
-            if data.dtype == torch.float64 or not data.is_floating_point():
-                data = data.float()
-            tensor_stat.max = torch.max(data)
-            tensor_stat.min = torch.min(data)
-            tensor_stat.mean = torch.mean(data)
-            tensor_stat.norm = torch.norm(data)
-        return tensor_stat
-
-    @staticmethod
-    def get_stat_info_sync(data):
-        tensor_stat = TensorStatInfo()
         if torch.is_complex(data):
+            if async_dump:
+                logger.warning("Async dump do not support complex data!")
+                return tensor_stat
             data_np = data.cpu().numpy()
             data_abs = np.abs(data_np)
             tensor_stat.max = np.max(data_abs).item()
@@ -130,7 +118,7 @@ class PytorchDataProcessor(BaseDataProcessor):
         elif not data.shape:
             tensor_stat.max = tensor_stat.min = tensor_stat.mean = tensor_stat.norm = data.clone()
         else:
-            if data.dtype == torch.float64 or not data.is_floating_point():
+            if precision == Const.DUMP_PRECISION_HIGH or data.dtype == torch.float64 or not data.is_floating_point():
                 data = data.float()
             tensor_stat.max = torch.max(data)
             tensor_stat.min = torch.min(data)
@@ -138,20 +126,6 @@ class PytorchDataProcessor(BaseDataProcessor):
             tensor_stat.norm = torch.norm(data)
         return tensor_stat
 
-    @staticmethod
-    def get_stat_info(data, async_dump=False):
-        tensor_stat = TensorStatInfo()
-        if data.is_meta:
-            return tensor_stat
-        data_clone = data.detach()
-        if not data_clone.numel() or not data_clone.data_ptr():
-            return tensor_stat
-        else:
-            if data_clone.device.type == Const.CPU_LOWERCASE or not async_dump:
-                return PytorchDataProcessor.get_stat_info_sync(data_clone)
-            else:
-                return PytorchDataProcessor.get_stat_info_async(data_clone)
-
     @staticmethod
     def handle_tensor_extremum_nan_inf(tensor, operator):
         data_clone = tensor.detach()
@@ -257,7 +231,7 @@ class PytorchDataProcessor(BaseDataProcessor):
         return p2pop_info
 
     def _analyze_tensor(self, tensor, suffix):
-        tensor_stat = self.get_stat_info(tensor, self.config.async_dump)
+        tensor_stat = self.get_stat_info(tensor, self.config.async_dump, self.config.precision)
         tensor_json = {}
         tensor_json.update({'type': 'torch.Tensor'})
         tensor_json.update({'dtype': str(tensor.dtype)})
diff --git a/debug/accuracy_tools/msprobe/docs/02.config_introduction.md b/debug/accuracy_tools/msprobe/docs/02.config_introduction.md
index c0a6ea3543777e53328ca1ced034abf7ab2f84b7..e00b9c86bd6fa1ba7d2b54292859fb6fb583172d 100644
--- a/debug/accuracy_tools/msprobe/docs/02.config_introduction.md
+++ b/debug/accuracy_tools/msprobe/docs/02.config_introduction.md
@@ -10,20 +10,19 @@
 
 ### 1.1 通用配置
 
-| 参数    | 解释                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 | 是否必选 |
-| ----------------- |------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -------- |
-| task              | dump 的任务类型，str 类型。可选参数：<br/>  "statistics"：仅采集统计信息，默认值；<br/> "tensor"：采集统计信息和完全复刻整网的真实数据；<br/> "run_ut"：精度预检，仅 PyTorch 场景支持，采集数据时勿选；<br/> "overflow_check"：溢出检测；<br/>  "free_benchmark"：无标杆比对，不支持 MSAdapter 场景；<br/>  "grad_probe"：梯度监控， 不支持 MSAdapter 场景； <br/> "structure"：仅采集模型结构以及调用栈信息，不采集具体数据。 <br/> 根据 task 参数取值的不同，可以配置不同场景参数，详见：<br/>[1.2 task 配置为 statistics](#12-task-配置为-statistics)，<br/>[1.3 task 配置为 tensor](#13-task-配置为-tensor)，<br/>[1.4 task 配置为 run_ut](#14-task-配置为-run_ut)，<br/>[1.5 task 配置为 overflow_check](#15-task-配置为-overflow_check)，<br/>[1.6 task 配置为 free_benchmark](#16-task-配置为-free_benchmark)，<br/>[1.7 task 配置为 grad_probe](#17-task-配置为-grad_probe)。 <br/>[1.8 task 配置为 structure](#18-task-配置为-structure)。 <br/>  **配置示例**："task": "tensor"。 | 否       |
-| dump_path         | 设置 dump 数据目录路径，str 类型。<br/>  **配置示例**："dump_path": "./dump_path"。                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  | 是       |
-| rank              | 指定对某张卡上的数据进行采集，list[Union[int, str]] 类型，默认未配置（表示采集所有卡的数据），应配置元素为 ≥0 的整数或类似"4-6"的字符串，且须配置实际可用的 Rank ID。<br/>  PyTorch 场景: Rank ID 从 0 开始计数，最大取值为所有节点可用卡总数-1，若所配置的值大于实际训练所运行的卡的 Rank ID，则 dump 数据为空，比如当前环境 Rank ID 为 0 到 7，实际训练运行 0 到 3 卡，此时若配置 Rank ID 为 4 或不存在的 10 等其他值，dump 数据为空。<br/>  MindSpore 场景：所有节点的 Rank ID 均从 0 开始计数，最大取值为每个节点可用卡总数-1，config.json 配置一次 rank 参数对所有节点同时生效。静态图 L0 级别 dump 暂不支持指定rank。<br/> 注意，单卡训练时，rank必须为[]，即空列表，不能指定rank。<br/>**配置示例**："rank": [1, "4-6"]。                                                                                                                                                                                                                                                                                                | 否       |
-| step              | 指定采集某个 step 的数据，list[Union[int, str]] 类型。默认未配置，表示采集所有 step 数据。采集特定 step 时，须指定为训练脚本中存在的 step，可逐个配置，也可以指定范围。<br/>  **配置示例**："step": [0, 1 , 2, "4-6"]。                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               | 否       |
-| level             | dump 级别，str 类型，根据不同级别采集不同数据。可选参数：<br/>"L0"：dump 模块级精度数据，使用背景详见 [1.1.1 模块级精度数据 dump 说明](#111-模块级精度数据-dump-说明)；<br/>"L1"：dump API 级精度数据，默认值，仅 PyTorch、MSAdapter 以及 MindSpore 均支持；<br/>"L2"：dump kernel 级精度数据，PyTorch 场景详细介绍见 [PyTorch 场景的 kernel dump 说明](./04.kernel_dump_PyTorch.md)；MindSpore 动态图场景详细介绍见 [MindSpore 动态图场景的 kernel dump 说明](./28.kernel_dump_MindSpore.md)；MindSpore 静态图场景详细介绍见《MindSpore 场景的数据采集》中的 ["**8.1 静态图场景**"](./06.data_dump_MindSpore.md#81-静态图场景)小节；<br/>"mix"：dump module 模块级和 API 级精度数据，即"L0"+"L1"，仅 PyTorch、MSAdapter 以及 MindSpore 动态图场景支持。<br/>"debug"：单点保存功能，细节详见[单点保存工具 README](./28.debugger_save_instruction.md)<br/>  **配置示例**："level": "L1"。                                                                                                  | 否 |
-| enable_dataloader | 自动控制开关，bool 类型，仅 PyTorch 场景支持。可选参数 true（开启）或 false（关闭），默认为 false。配置为 true 后自动识别 step 参数指定的迭代，并在该迭代执行完成后退出训练，此时 start、stop 和 step 函数可不配置，开启该开关要求训练脚本是通过 torch.utils.data.dataloader 方式加载数据。仅支持 PyTorch 单卡训练使用，分布式训练场景下存在数据 dump 不全问题。 **这个特性下个版本将被废弃**                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              | 否       |
-| async_dump        | 异步 dump 开关，bool 类型， 支持 task 为 tensor 或 statistic 模式， level 支持 L0、 L1、 mix、 debug 模式。可选参数 true（开启）或 false（关闭），默认为 false。配置为 true 后开启异步 dump，即采集的精度数据会在当前 step 训练结束后统一落盘，训练过程中工具不触发同步操作。由于使用该模式有**显存溢出**的风险，当 task 配置为 tensor 时，即真实数据的异步dump模式，必须配置 [list](#13-task-配置为-tensor) 参数，指定需要 dump 的 tensor 。该模式下，summary_mode 不支持 md5 值，也不支持复数类型 tensor 的统计量计算。<br/>                                                                                                                                                                                                                                                                                                                                                                                                                                              | 否       |
+| 参数                | 解释                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                | 是否必选 |
+|-------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -------- |
+| task              | dump 的任务类型，str 类型。可选参数：<br/>  "statistics"：仅采集统计信息，默认值；<br/> "tensor"：采集统计信息和完全复刻整网的真实数据；<br/> "run_ut"：精度预检，仅 PyTorch 场景支持，采集数据时勿选；<br/> "overflow_check"：溢出检测；<br/>  "free_benchmark"：无标杆比对，不支持 MSAdapter 场景；<br/>  "grad_probe"：梯度监控， 不支持 MSAdapter 场景； <br/> "structure"：仅采集模型结构以及调用栈信息，不采集具体数据。 <br/> 根据 task 参数取值的不同，可以配置不同场景参数，详见：<br/>[1.2 task 配置为 statistics](#12-task-配置为-statistics)，<br/>[1.3 task 配置为 tensor](#13-task-配置为-tensor)，<br/>[1.4 task 配置为 run_ut](#14-task-配置为-run_ut)，<br/>[1.5 task 配置为 overflow_check](#15-task-配置为-overflow_check)，<br/>[1.6 task 配置为 free_benchmark](#16-task-配置为-free_benchmark)，<br/>[1.7 task 配置为 grad_probe](#17-task-配置为-grad_probe)，<br/>[1.8 task 配置为 structure](#18-task-配置为-structure)。 <br/>  **配置示例**："task": "tensor"。 | 否       |
+| dump_path         | 设置 dump 数据目录路径，str 类型。<br/>  **配置示例**："dump_path": "./dump_path"。                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 | 是       |
+| rank              | 指定对某张卡上的数据进行采集，list[Union[int, str]] 类型，默认未配置（表示采集所有卡的数据），应配置元素为 ≥0 的整数或类似"4-6"的字符串，且须配置实际可用的 Rank ID。<br/>  PyTorch 场景: Rank ID 从 0 开始计数，最大取值为所有节点可用卡总数-1，若所配置的值大于实际训练所运行的卡的 Rank ID，则 dump 数据为空，比如当前环境 Rank ID 为 0 到 7，实际训练运行 0 到 3 卡，此时若配置 Rank ID 为 4 或不存在的 10 等其他值，dump 数据为空。<br/>  MindSpore 场景：所有节点的 Rank ID 均从 0 开始计数，最大取值为每个节点可用卡总数-1，config.json 配置一次 rank 参数对所有节点同时生效。静态图 L0 级别 dump 暂不支持指定rank。<br/> 注意，单卡训练时，rank必须为[]，即空列表，不能指定rank。<br/>**配置示例**："rank": [1, "4-6"]。                                                                                                                                                                                                                                                                                               | 否       |
+| step              | 指定采集某个 step 的数据，list[Union[int, str]] 类型。默认未配置，表示采集所有 step 数据。采集特定 step 时，须指定为训练脚本中存在的 step，可逐个配置，也可以指定范围。<br/>  **配置示例**："step": [0, 1 , 2, "4-6"]。                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              | 否       |
+| level             | dump 级别，str 类型，根据不同级别采集不同数据。可选参数：<br/>"L0"：dump 模块级精度数据，使用背景详见 [1.1.1 模块级精度数据 dump 说明](#111-模块级精度数据-dump-说明)。<br/>"L1"：dump API 级精度数据，默认值，仅 PyTorch、MSAdapter 以及 MindSpore 动态图场景支持。<br/>"L2"：dump kernel 级精度数据，PyTorch 场景详细介绍见 [PyTorch 场景的 kernel dump 说明](./04.kernel_dump_PyTorch.md)；MindSpore 动态图场景详细介绍见 [MindSpore 动态图场景的 kernel dump 说明](./28.kernel_dump_MindSpore.md)；MindSpore 静态图场景详细介绍见《MindSpore 场景的数据采集》中的 ["**8.1 静态图场景**"](./06.data_dump_MindSpore.md#81-静态图场景)小节。<br/>"mix"：dump module 模块级和 API 级精度数据，即"L0"+"L1"，仅 PyTorch、MSAdapter 以及 MindSpore 动态图场景支持。<br/>"debug"：单点保存功能，详见[单点保存工具](./28.debugger_save_instruction.md)。<br/>  **配置示例**："level": "L1"。                                                                                                     | 否 |
+| enable_dataloader | 自动控制开关，bool 类型，仅 PyTorch 场景支持。可选参数 true（开启）或 false（关闭），默认为 false。配置为 true 后自动识别 step 参数指定的迭代，并在该迭代执行完成后退出训练，此时 start、stop 和 step 函数可不配置，开启该开关要求训练脚本是通过 torch.utils.data.dataloader 方式加载数据。仅支持 PyTorch 单卡训练使用，分布式训练场景下存在数据 dump 不全问题。 **这个特性下个版本将被废弃**                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             | 否       |
+| async_dump        | 异步 dump 开关，bool 类型， 支持 task 为 tensor 或 statistic 模式， level 支持 L0、 L1、 mix、 debug 模式。可选参数 true（开启）或 false（关闭），默认为 false。配置为 true 后开启异步 dump，即采集的精度数据会在当前 step 训练结束后统一落盘，训练过程中工具不触发同步操作。由于使用该模式有**显存溢出**的风险，当 task 配置为 tensor 时，即真实数据的异步dump模式，必须配置 [list](#13-task-配置为-tensor) 参数，指定需要 dump 的 tensor 。该模式下，summary_mode 不支持 md5 值，也不支持复数类型 tensor 的统计量计算。 <br/>                                                                                                                                                                                                                                                                                                                                                                                                                    | 否       |
+| precision         | 控制统计值计算所用精度，可选值["high", "low"]，默认值为"high"。选择"high"时，统计量使用float32进行计算，会增加device内存占用，精度更高，但在处理较大数值时可能会导致**显存溢出**；为"low"时使用与原始数据相同的类型进行计算，device内存占用较少。支持 Pytorch，MindSpore 动态图，MindSpore静态图 O0/O1 场景。支持 task 配置为 statistic 或 tensor， level 配置为 L0，L1，mix，debug。                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     | 否       |
 
 #### 1.1.1 模块级精度数据 dump 说明
 
-PyTorch 与 MindSpore 均支持。
-
 大模型场景下，通常不是简单的利用自动迁移能力实现从 GPU 到 NPU 的训练脚本迁移，而是会对 NPU 网络进行一系列针对性的适配，因此，常常会造成迁移后的 NPU 模型存在部分子结构不能与 GPU 原始模型完全对应。模型结构不一致导致 API 调用类型及数量不一致，若直接按照 API 粒度进行精度数据 dump 和比对，则无法完全比对所有的 API。
 
 本小节介绍的功能是对模型中的大粒度模块进行数据 dump，使其比对时，对于无法以 API 粒度比对的模块可以直接以模块粒度进行比对。
@@ -48,12 +47,11 @@ PyTorch 与 MindSpore 均支持。
     <tr><td rowspan="2">tensor_list</td><td>自定义采集真实数据的算子列表，list[str] 类型，默认未配置。包含以下配置方法：</td><td rowspan="2">否</td></tr>
     <tr><td>PyTorch、MSAdapter 以及 MindSpore 动态图场景指定某一类 API 或模块，即会 dump 这一类 API 或模块输入输出的统计量信息和完整的 tensor 数据。<br/><b>配置示例</b>："tensor_list": ["relu"]。 <br/>    PyTorch、MSAdapter 以及 MindSpore 动态图场景目前只支持level配置为 L0, L1 和 mix 级别。 <br/>  MindSpore 静态图场景不支持。</td></tr>
     <tr><td>device</td><td>控制统计值计算所用的设备，可选值["device", "host"]，默认"host"。使用device计算会比host有性能加速，只支持min/max/avg/l2norm统计量。支持 MindSpore静态图 O0/O1 场景。</td><td>否</td></tr>
-    <tr><td>precision</td><td>控制统计值计算所用精度，可选值["high", "low"]，默认值为"high"。选择"high"时，avg/l2norm统计量使用float32进行计算，会增加device内存占用，精度更高；为"low"时使用与原始数据相同的类型进行计算，device内存占用较少，但在处理较大数值时可能会导致统计量溢出。支持 MindSpore静态图 O0/O1 场景。</td><td>否</td></tr>
-    <tr><td rowspan="3">data_mode</td><td>dump 数据过滤，str 类型。</td><td rowspan="3">否</td><tr><td>PyTorch、MSAdapter 以及 MindSpore 动态图场景：支持"all"、"forward"、"backward"、"input"和"output"，除"all"外，其余参数可以自由组合。默认为["all"]，即保存所有 dump 的数据。<br/> <b>配置示例</b>："data_mode": ["backward"] （仅保存反向数据）或 "data_mode": ["forward", "input"]（仅保存前向的输入数据）。</td></tr>
+    <tr><td rowspan="3">data_mode</td><td>dump 数据过滤，str 类型。</td><td rowspan="3">否</td></tr><tr><td>PyTorch、MSAdapter 以及 MindSpore 动态图场景：支持"all"、"forward"、"backward"、"input"和"output"，除"all"外，其余参数可以自由组合。默认为["all"]，即保存所有 dump 的数据。<br/> <b>配置示例</b>："data_mode": ["backward"] （仅保存反向数据）或 "data_mode": ["forward", "input"]（仅保存前向的输入数据）。</td></tr>
     <tr><td>MindSpore 静态图场景：L0 级别 dump 仅支持"all"、"forward"和"backward"参数；L2 级别 dump 仅支持"all"、"input"和"output"参数。且各参数只能单独配置，不支持自由组合。<br/><b>配置示例</b>："data_mode": ["all"]。</td></tr>
     <tr><td rowspan="3">summary_mode</td><td>控制 dump 文件输出的模式，str 类型，支持 PyTorch、MSAdapter、MindSpore 动态图以及 MindSpore 静态图 L2 级别 jit_level=O2 场景和 L0 级别 jit_level=O0/O1 场景。</td><td rowspan="3">否</td></tr>
     <tr><td>PyTorch、MSAdapter 以及 MindSpore 动态图场景：可选参数为<br/> md5：dump 输出包含 CRC-32 值以及 API 统计信息的 dump.json 文件，用于验证数据的完整性；<br/> statistics：dump 仅输出包含 API 统计信息的 dump.json 文件，默认值。<br/><b>配置示例</b>："summary_mode": "md5"。</td></tr>
-    <tr><td>MindSpore 静态图 jit_level=O2 场景：支持上述配置的同时额外支持配置统计项列表，可选统计项为max、min、mean、l2norm，可从中任意选取组合搭配。其中mean、l2norm的结果为float数据格式。<br/><b>配置示例</b>："summary_mode": ["max", "min"]。</td></tr>
+    <tr><td>MindSpore 静态图 L2 级别 jit_level=O2 场景：支持上述配置的同时额外支持配置统计项列表，可选统计项为max、min、mean、l2norm，可从中任意选取组合搭配。其中mean、l2norm的结果为float数据格式。<br/>MindSpore 静态图 L0 级别 jit_level=O0/O1场景：仅支持上述配置中"statistics"字段和max、min、mean、l2norm中任意组合搭配的统计项列表。<br/><b>配置示例</b>："summary_mode": ["max", "min"]。</td></tr>
 </table>
 
 **说明**：
diff --git a/debug/accuracy_tools/msprobe/mindspore/debugger/debugger_config.py b/debug/accuracy_tools/msprobe/mindspore/debugger/debugger_config.py
index 04892755ec2d5a87ca4b63730325e9fbea016e64..393c66c0c888ef6128dc15fd428a21505f3afff8 100644
--- a/debug/accuracy_tools/msprobe/mindspore/debugger/debugger_config.py
+++ b/debug/accuracy_tools/msprobe/mindspore/debugger/debugger_config.py
@@ -55,6 +55,7 @@ class DebuggerConfig:
             self.td_config_path = "" if not task_config.td_config_path else task_config.td_config_path
         else:
             self.td_config_path = ""
+        self.precision = common_config.precision if common_config.precision else Const.DUMP_PRECISION_HIGH
         self.check()
         self._check_statistics_config(task_config)
         create_directory(self.dump_path)
diff --git a/debug/accuracy_tools/msprobe/pytorch/debugger/debugger_config.py b/debug/accuracy_tools/msprobe/pytorch/debugger/debugger_config.py
index 8e3bfbcaf83e3a17a31b6a9484fbf983802b765e..2f43f240112a5f6d7024702cd9a474caa49cff0b 100644
--- a/debug/accuracy_tools/msprobe/pytorch/debugger/debugger_config.py
+++ b/debug/accuracy_tools/msprobe/pytorch/debugger/debugger_config.py
@@ -34,6 +34,7 @@ class DebuggerConfig:
         self.overflow_nums = task_config.overflow_nums if task_config.overflow_nums else 1
         self.framework = Const.PT_FRAMEWORK
         self.async_dump = common_config.async_dump if common_config.async_dump else False
+        self.precision = common_config.precision if common_config.precision else Const.DUMP_PRECISION_HIGH
 
         if self.task == Const.FREE_BENCHMARK:
             self.fuzz_device = task_config.fuzz_device
diff --git a/debug/accuracy_tools/msprobe/test/core_ut/data_dump/data_processor/test_pytorch_processor.py b/debug/accuracy_tools/msprobe/test/core_ut/data_dump/data_processor/test_pytorch_processor.py
index 23208269bd10303a1fec6417ae37df840fa36706..33071c0e01c800858a892061927d470c06bcff73 100644
--- a/debug/accuracy_tools/msprobe/test/core_ut/data_dump/data_processor/test_pytorch_processor.py
+++ b/debug/accuracy_tools/msprobe/test/core_ut/data_dump/data_processor/test_pytorch_processor.py
@@ -1,19 +1,3 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-# Copyright (C) 2024-2025. Huawei Technologies Co., Ltd. All rights reserved.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
 import hashlib
 import os
 import sys
@@ -99,12 +83,12 @@ class TestPytorchDataProcessor(unittest.TestCase):
     def test_get_stat_info_int(self):
         tensor = torch.tensor([1, 2, 3], dtype=torch.int32)
         result = self.processor.get_stat_info(tensor)
+
         self.assertEqual(result.max, 3)
         self.assertEqual(result.min, 1)
         self.assertEqual(result.mean, 2)
         self.assertEqual(result.norm, torch.norm(tensor.float()).item())
 
-
     def test_get_stat_info_empty(self):
         tensor = torch.tensor([])
         result = self.processor.get_stat_info(tensor)
@@ -242,6 +226,7 @@ class TestPytorchDataProcessor(unittest.TestCase):
         class TestReduceOp:
             def __str__(self):
                 raise Exception("failed to convert str type")
+
         arg = TestReduceOp()
         self.processor._analyze_reduce_op(arg)
         mock_logger_warning.assert_called_with(
@@ -298,9 +283,9 @@ class TestPytorchDataProcessor(unittest.TestCase):
         expected = {"type": 'int8', "value": 1}
         self.assertEqual(result, expected)
 
-        numpy_element = np.complex128(1+2j)
+        numpy_element = np.complex128(1 + 2j)
         result = self.processor.analyze_single_element(numpy_element, [])
-        expected = {"type": 'complex128', "value": (1+2j)}
+        expected = {"type": 'complex128', "value": (1 + 2j)}
         self.assertEqual(result, expected)
 
     def test_analyze_single_element_tensor(self):
@@ -320,6 +305,32 @@ class TestPytorchDataProcessor(unittest.TestCase):
         expected_result = self.processor._analyze_builtin(Ellipsis)
         self.assertEqual(result, expected_result)
 
+    @patch.object(PytorchDataProcessor, 'get_md5_for_tensor')
+    def test_analyze_tensor(self, get_md5_for_tensor):
+        get_md5_for_tensor.return_value = 'mocked_md5'
+        tensor = torch.tensor([1.0, 2.0, 3.0])
+        self.config.summary_mode = 'md5'
+        self.config.async_dump = False
+        result = self.processor._analyze_tensor(tensor, 'suffix')
+        expected = {
+            'type': 'torch.Tensor',
+            'dtype': str(tensor.dtype),
+            'shape': tensor.shape,
+            'requires_grad': tensor.requires_grad,
+            'md5': 'mocked_md5'
+        }
+        result.pop('tensor_stat_index', None)
+        self.assertDictEqual(expected, result)
+
+    def test_analyze_tensor_with_empty_tensor(self):
+        tensor = torch.tensor([])
+        result = self.processor._analyze_tensor(tensor, 'suffix')
+
+        self.assertEqual(result['type'], "torch.Tensor")
+        self.assertEqual(result['dtype'], 'torch.float32')
+        self.assertEqual(result['shape'], torch.Size([0]))
+        self.assertEqual(result['requires_grad'], False)
+
 
 class TestTensorDataProcessor(unittest.TestCase):
 
@@ -331,6 +342,24 @@ class TestTensorDataProcessor(unittest.TestCase):
         self.processor.current_api_or_module_name = "test_api"
         self.processor.api_data_category = "input"
 
+    @patch('torch.save')
+    def test_analyze_tensor(self, mock_save):
+        self.config.framework = "pytorch"
+        self.config.async_dump = False
+        tensor = torch.tensor([1.0, 2.0, 3.0])
+        suffix = 'suffix'
+        result = self.processor._analyze_tensor(tensor, suffix)
+        mock_save.assert_called_once()
+        expected = {
+            'type': 'torch.Tensor',
+            'dtype': 'torch.float32',
+            'shape': tensor.shape,
+            'requires_grad': False,
+            'data_name': 'test_api.input.suffix.pt'
+        }
+        result.pop('tensor_stat_index', None)
+        self.assertEqual(expected, result)
+
 
 class TestOverflowCheckDataProcessor(unittest.TestCase):
 
@@ -346,6 +375,9 @@ class TestOverflowCheckDataProcessor(unittest.TestCase):
         sys.modules['torch_npu'] = Mock()
         sys.modules['torch_npu.npu'] = Mock()
         sys.modules['torch_npu.npu.utils'] = Mock()
+        self.tensor_json = {
+            'tensor_stat_index': 123  # 默认情况下 tensor_stat_index 存在
+        }
 
     def test_is_terminated(self):
         self.processor.overflow_nums = -1
@@ -360,7 +392,7 @@ class TestOverflowCheckDataProcessor(unittest.TestCase):
 
     def test_analyze_forward_input(self):
         with patch.object(BaseDataProcessor, "analyze_forward_input", return_value={"name": 1}):
-            api_info = self.processor.analyze_forward_input("name", "module","module_input_output")
+            api_info = self.processor.analyze_forward_input("name", "module", "module_input_output")
             self.assertEqual(self.processor.cached_api_info, {"name": 1})
             self.assertIsNone(api_info)
 
@@ -422,6 +454,57 @@ class TestOverflowCheckDataProcessor(unittest.TestCase):
         self.processor._is_support_inf_nan()
         self.assertTrue(self.processor.support_inf_nan)
 
+    def test_max_tensor_or_min_tensor_is_none(self):
+        # 让 get_buffer_values_max 和 get_buffer_values_min 返回 None
+        self.processor.data_writer.get_buffer_values_max.return_value = None
+        self.processor.data_writer.get_buffer_values_min.return_value = None
+
+        # 在该情况下应该直接返回，不做任何改变
+        self.processor._analyze_maybe_overflow_tensor(self.tensor_json)
+
+        # 确保 has_overflow 没有被设置
+        self.assertFalse(self.processor.has_overflow)
+
+    def test_tensor_is_inf_or_nan(self):
+        # 模拟 max_tensor 为 Inf
+        self.processor.data_writer.get_buffer_values_max.return_value = torch.tensor(float('inf'))
+        self.processor.data_writer.get_buffer_values_min.return_value = torch.tensor(1.0)
+
+        # 测试应该设置 has_overflow 为 True
+        self.processor._analyze_maybe_overflow_tensor(self.tensor_json)
+        self.assertTrue(self.processor.has_overflow)
+
+        # 模拟 min_tensor 为 NaN
+        self.processor.data_writer.get_buffer_values_max.return_value = torch.tensor(1.0)
+        self.processor.data_writer.get_buffer_values_min.return_value = torch.tensor(float('nan'))
+
+        # 测试应该设置 has_overflow 为 True
+        self.processor._analyze_maybe_overflow_tensor(self.tensor_json)
+        self.assertTrue(self.processor.has_overflow)
+
+    def test_normal_tensor(self):
+        # 模拟正常的 max_tensor 和 min_tensor
+        self.processor.data_writer.get_buffer_values_max.return_value = torch.tensor(1.0)
+        self.processor.data_writer.get_buffer_values_min.return_value = torch.tensor(-1.0)
+
+        # 在正常情况下不应该改变 has_overflow
+        self.processor._analyze_maybe_overflow_tensor(self.tensor_json)
+        self.assertFalse(self.processor.has_overflow)
+
+    @patch('msprobe.core.common.file_utils.path_len_exceeds_limit', return_value=False)
+    @patch.object(BaseDataProcessor, 'get_save_file_path',
+                  return_value=['test_api_name', 'test_api_name.0.forward.input.pt'])
+    def test_analyze_tensor(self, mock_path_len_exceeds_limit, _):
+        tensor = torch.tensor([1.0, 2.0, 3.0])
+        suffix = 'suffix'
+        expected = {'Max': 3.0, 'Min': 1.0, 'data_name': 'test_api_name'}
+        with patch.object(PytorchDataProcessor, '_analyze_tensor',
+                          return_value={'Max': 3.0, 'Min': 1.0}) as mock_super_analyze_tensor:
+            result = self.processor._analyze_tensor(tensor, suffix)
+            mock_super_analyze_tensor.assert_called_once_with(tensor, suffix)
+            mock_path_len_exceeds_limit.assert_called_once()
+            self.assertEqual(expected, result)
+
 
 class TestFreeBenchmarkDataProcessor(unittest.TestCase):