From 44c07cbf703946a1d8ec9846b783666aba0204e7 Mon Sep 17 00:00:00 2001 From: TAJh Date: Thu, 10 Jul 2025 10:55:49 +0800 Subject: [PATCH 1/2] add step --- .../msprobe/mindspore/__init__.py | 2 +- .../mindspore/compare/common_dir_compare.py | 26 ++++++++++++------- .../mindspore/dump/graph_tensor_dump.py | 9 +++++++ 3 files changed, 26 insertions(+), 11 deletions(-) diff --git a/debug/accuracy_tools/msprobe/mindspore/__init__.py b/debug/accuracy_tools/msprobe/mindspore/__init__.py index 5005d6921e..c36ea84caa 100644 --- a/debug/accuracy_tools/msprobe/mindspore/__init__.py +++ b/debug/accuracy_tools/msprobe/mindspore/__init__.py @@ -25,4 +25,4 @@ except ImportError: from msprobe.mindspore.debugger.precision_debugger import PrecisionDebugger from msprobe.mindspore.common.utils import seed_all, MsprobeStep, MsprobeInitStep from msprobe.mindspore.monitor.module_hook import TrainerMon -from msprobe.mindspore.dump.graph_tensor_dump import save, save_grad \ No newline at end of file +from msprobe.mindspore.dump.graph_tensor_dump import save, save_grad, step \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/mindspore/compare/common_dir_compare.py b/debug/accuracy_tools/msprobe/mindspore/compare/common_dir_compare.py index ca7b383cd2..bbf5ae1483 100644 --- a/debug/accuracy_tools/msprobe/mindspore/compare/common_dir_compare.py +++ b/debug/accuracy_tools/msprobe/mindspore/compare/common_dir_compare.py @@ -152,21 +152,28 @@ def find_npy_files(directory): dirs.clear() for file in files: if file.endswith(".npy"): - # 分割文件名并去掉最后两个元素 - file_name = file.split('_') + file_name = file.strip('.npy') + key = None + if '_' in file_name: + # 分割文件名并去掉最后两个元素 + file_name = file_name.split('_') + elif '.' in file_name: + file_name = file_name.split('.') if len(file_name) < 2: - continue + continue key = '_'.join(file_name[:-2]) - # 文件的完整路径 - value = os.path.join(root, file) - # 添加到字典中 - if not npy_files_dict.get(key): - npy_files_dict[key] = [] - npy_files_dict[key].append(value) + if key: + # 文件的完整路径 + value = os.path.join(root, file) + # 添加到字典中 + if not npy_files_dict.get(key): + npy_files_dict[key] = [] + npy_files_dict[key].append(value) return npy_files_dict def generate_map_dict(npu_file_dict, bench_file_dict, name_map_dict=None): + result_dict = {} for k, npu_file_list in npu_file_dict.items(): bench_file_list = bench_file_dict.get(k) if not bench_file_list and k in name_map_dict: @@ -174,7 +181,6 @@ def generate_map_dict(npu_file_dict, bench_file_dict, name_map_dict=None): bench_length = len(bench_file_list) if not (bench_file_list and bench_length): continue - result_dict = {} for i, npu_file in enumerate(npu_file_list): if i >= bench_length: break diff --git a/debug/accuracy_tools/msprobe/mindspore/dump/graph_tensor_dump.py b/debug/accuracy_tools/msprobe/mindspore/dump/graph_tensor_dump.py index 7b3f249e7e..58746c63eb 100644 --- a/debug/accuracy_tools/msprobe/mindspore/dump/graph_tensor_dump.py +++ b/debug/accuracy_tools/msprobe/mindspore/dump/graph_tensor_dump.py @@ -16,6 +16,8 @@ import os from collections import OrderedDict import mindspore as ms +from mindspore import hal, ops, Tensor +from mindspore.ops.primitive import _run_op def _iterate_items(data): @@ -121,3 +123,10 @@ def save_grad(save_dir, name, data): dump_dir = generate_dump_dir(save_dir) suffix_name = name + '_grad' return _SaveGradCell(dump_dir, suffix_name)(data) + + +def step(): + hal.synchronize() + temp_tensor = Tensor([1], dtype=ms.float32) + step_flag = "" + _run_op(ops.TensorDump(), "TensorDump", (step_flag, temp_tensor)) \ No newline at end of file -- Gitee From a4522773d7a58b7ad0affc8c3511d4f7692b964b Mon Sep 17 00:00:00 2001 From: TAJh <2559659915@qq.com> Date: Mon, 14 Jul 2025 11:57:16 +0800 Subject: [PATCH 2/2] bfx --- .../mindspore/compare/common_dir_compare.py | 25 ++++++++++--------- .../mindspore/dump/graph_tensor_dump.py | 4 ++- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/debug/accuracy_tools/msprobe/mindspore/compare/common_dir_compare.py b/debug/accuracy_tools/msprobe/mindspore/compare/common_dir_compare.py index bbf5ae1483..ab10e007b1 100644 --- a/debug/accuracy_tools/msprobe/mindspore/compare/common_dir_compare.py +++ b/debug/accuracy_tools/msprobe/mindspore/compare/common_dir_compare.py @@ -152,21 +152,22 @@ def find_npy_files(directory): dirs.clear() for file in files: if file.endswith(".npy"): - file_name = file.strip('.npy') - key = None - if '_' in file_name: - # 分割文件名并去掉最后两个元素 - file_name = file_name.split('_') - elif '.' in file_name: - file_name = file_name.split('.') - if len(file_name) < 2: - continue - key = '_'.join(file_name[:-2]) + # 正确移除文件扩展名 + file_name = os.path.splitext(file)[0] + logger.info(f"Generating file info for file: {file}") + + # 使用一致的分割逻辑 + file_ele = file_name.split('_') + + if len(file_ele) < 2: + continue + + key = '_'.join(file_ele[:-2]) if key: - # 文件的完整路径 + # 文件的完整路径 value = os.path.join(root, file) # 添加到字典中 - if not npy_files_dict.get(key): + if key not in npy_files_dict: npy_files_dict[key] = [] npy_files_dict[key].append(value) return npy_files_dict diff --git a/debug/accuracy_tools/msprobe/mindspore/dump/graph_tensor_dump.py b/debug/accuracy_tools/msprobe/mindspore/dump/graph_tensor_dump.py index 58746c63eb..6265114425 100644 --- a/debug/accuracy_tools/msprobe/mindspore/dump/graph_tensor_dump.py +++ b/debug/accuracy_tools/msprobe/mindspore/dump/graph_tensor_dump.py @@ -129,4 +129,6 @@ def step(): hal.synchronize() temp_tensor = Tensor([1], dtype=ms.float32) step_flag = "" - _run_op(ops.TensorDump(), "TensorDump", (step_flag, temp_tensor)) \ No newline at end of file + _run_op(ops.TensorDump(), "TensorDump", (step_flag, temp_tensor)) + ops.tensordump(step_flag, temp_tensor) + hal.synchronize() \ No newline at end of file -- Gitee