From b89a4a057e4d51ec37e16e2c31e251fcc1d81fd6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=AE=8B=E4=BD=B3=E7=90=AA?= Date: Fri, 15 Aug 2025 10:24:20 +0800 Subject: [PATCH] dump_hash --- .../msprobe/core/common/const.py | 1 + .../msprobe/docs/02.config_introduction.md | 4 +- .../mindspore/dump/kernel_kbyk_dump.py | 24 ++++++++++ .../msprobe/mindspore/ms_config.py | 7 +-- .../mindspore_ut/test_kernel_kbyk_dump.py | 45 ++++++++++++++++++- 5 files changed, 75 insertions(+), 6 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/common/const.py b/debug/accuracy_tools/msprobe/core/common/const.py index 10ebcfe20b..353eab0c9a 100644 --- a/debug/accuracy_tools/msprobe/core/common/const.py +++ b/debug/accuracy_tools/msprobe/core/common/const.py @@ -73,6 +73,7 @@ class Const: ONLINE_DUMP_MODE = [ALL, LIST, AUTO, OFF] SUMMARY = "summary" MD5 = "md5" + HASH = "hash" VALUE = "value" SUMMARY_MODE = ["statistics", "md5"] diff --git a/debug/accuracy_tools/msprobe/docs/02.config_introduction.md b/debug/accuracy_tools/msprobe/docs/02.config_introduction.md index 7b70cd859e..c93f953905 100644 --- a/debug/accuracy_tools/msprobe/docs/02.config_introduction.md +++ b/debug/accuracy_tools/msprobe/docs/02.config_introduction.md @@ -51,13 +51,13 @@ MindSpore 静态图场景:L0 级别 dump 仅支持"all"、"forward"和"backward"参数;L2 级别 dump 仅支持"all"、"input"和"output"参数。且各参数只能单独配置,不支持自由组合。
配置示例:"data_mode": ["all"]。 summary_mode控制 dump 文件输出的模式,str 类型,支持 PyTorch、MSAdapter、MindSpore 动态图以及 MindSpore 静态图 L2 级别 jit_level=O2 场景和 L0 级别 jit_level=O0/O1 场景。否 PyTorch、MSAdapter 以及 MindSpore 动态图场景:可选参数为
md5:dump 输出包含 CRC-32 值以及 API 统计信息的 dump.json 文件,用于验证数据的完整性;
statistics:dump 仅输出包含 API 统计信息的 dump.json 文件,默认值。
配置示例:"summary_mode": "md5"。 - MindSpore 静态图 L2 级别 jit_level=O2 场景:支持上述配置的同时额外支持配置统计项列表,可选统计项为max、min、mean、l2norm,可从中任意选取组合搭配。其中mean、l2norm的结果为float数据格式。
MindSpore 静态图 L0 级别 jit_level=O0/O1场景:仅支持上述配置中"statistics"字段和max、min、mean、l2norm中任意组合搭配的统计项列表。
配置示例:"summary_mode": ["max", "min"]。 + MindSpore 静态图 L2 级别 jit_level=O2 场景:支持上述配置的同时额外支持配置统计项列表,可选统计项为max、min、mean、l2norm,可从中任意选取组合搭配。其中mean、l2norm的结果为float数据格式。
MindSpore 静态图 L2 级别 jit_level=O0/O1 场景:支持上述配置的同时额外支持配置统计项列表,可选统计项为max、min、mean、l2norm、count、negative zero count、zero count、positive zero count、nan count、negative inf count、positive inf count、hash、md5,可从中任意选取组合搭配。注意:hash统计项在MindSpore2.7.0及以前版本计算MD5值,在以后版本计算SHA1值。
MindSpore 静态图 L0 级别 jit_level=O0/O1场景:仅支持上述配置中"statistics"字段和max、min、mean、l2norm中任意组合搭配的统计项列表。
配置示例:"summary_mode": ["max", "min"]。 **说明**: -1. "summary_mode" 配置为 "md5" 时,所使用的校验算法为 CRC-32 算法。 +1. PyTorch、MSAdapter 以及 MindSpore 动态图场景,"summary_mode" 配置为 "md5" 时,所使用的校验算法为 CRC-32 算法;MindSpore 静态图场景,"summary_mode" 配置为 "md5" 时,所使用的校验算法为 MD5 算法。 **示例**: - [PyTorch场景](03.config_examples.md#11-task-配置为-statistics) diff --git a/debug/accuracy_tools/msprobe/mindspore/dump/kernel_kbyk_dump.py b/debug/accuracy_tools/msprobe/mindspore/dump/kernel_kbyk_dump.py index 91a6ab93ab..987b80ef12 100644 --- a/debug/accuracy_tools/msprobe/mindspore/dump/kernel_kbyk_dump.py +++ b/debug/accuracy_tools/msprobe/mindspore/dump/kernel_kbyk_dump.py @@ -20,6 +20,9 @@ from msprobe.core.common.file_utils import create_directory, save_json from msprobe.mindspore.common.log import logger from msprobe.mindspore.debugger.debugger_config import DebuggerConfig +import mindspore as ms +ms_version = ms.__version__ + class KernelKbykDump: COMMON_SETTINGS = "common_dump_settings" @@ -39,6 +42,7 @@ class KernelKbykDump: common_set["input_output"] = 0 common_set["kernels"] = [] common_set["support_device"] = [0, 1, 2, 3, 4, 5, 6, 7] + common_set["statistic_category"] = [] if config.stat_cal_mode and config.device_stat_precision_mode: e2e_set = { @@ -71,9 +75,29 @@ class KernelKbykDump: common_set["input_output"] = 1 if config.data_mode[0] == Const.OUTPUT: common_set["input_output"] = 2 + if config.summary_mode: + if isinstance(config.summary_mode, str): + if config.summary_mode == Const.STATISTICS: + common_set["statistic_category"] = ["max", "min", "avg", "l2norm"] + else: + mode = self._process_hash(config.summary_mode) + common_set["statistic_category"] = [mode] + elif isinstance(config.summary_mode, list): + common_set["statistic_category"] = list({ + self._process_hash("avg" if mode == Const.MEAN else mode) + for mode in config.summary_mode + }) self.dump_json[KernelKbykDump.COMMON_SETTINGS] = common_set self.dump_json[KernelKbykDump.E2E_SETTINGS] = e2e_set + + @staticmethod + def _process_hash(value): + if ms_version <= "2.7.0" and (value == Const.HASH or value == Const.MD5): + value = "md5" + elif value == Const.MD5: + value = "hash:md5" + return value def handle(self): json_path = self.dump_json[KernelKbykDump.COMMON_SETTINGS]["path"] diff --git a/debug/accuracy_tools/msprobe/mindspore/ms_config.py b/debug/accuracy_tools/msprobe/mindspore/ms_config.py index 4b73ad5bde..d50b177a2f 100644 --- a/debug/accuracy_tools/msprobe/mindspore/ms_config.py +++ b/debug/accuracy_tools/msprobe/mindspore/ms_config.py @@ -59,11 +59,12 @@ class StatisticsConfig(BaseConfig): raise Exception("Config param [precision] is invalid, expected from [\"high\", \"low\"]") def _check_summary_mode(self): - muti_opt = ["md5", "max", "min", "mean", "l2norm"] + muti_opt = ["max", "min", "mean", "count", "negative zero count", "positive zero count", "nan count", + "negative inf count", "positive inf count", "zero count", "l2norm", "hash", "md5"] if isinstance(self.summary_mode, str) and self.summary_mode not in Const.SUMMARY_MODE: - raise Exception("summary_mode is invalid") + raise Exception("summary_mode is an invalid string") if isinstance(self.summary_mode, list) and not all(opt in muti_opt for opt in self.summary_mode): - raise Exception("summary_mode is invalid") + raise Exception("summary_mode contains invalid option(s)") class OverflowCheckConfig(BaseConfig): diff --git a/debug/accuracy_tools/msprobe/test/mindspore_ut/test_kernel_kbyk_dump.py b/debug/accuracy_tools/msprobe/test/mindspore_ut/test_kernel_kbyk_dump.py index 9be887eb4b..1132cc4d73 100644 --- a/debug/accuracy_tools/msprobe/test/mindspore_ut/test_kernel_kbyk_dump.py +++ b/debug/accuracy_tools/msprobe/test/mindspore_ut/test_kernel_kbyk_dump.py @@ -24,6 +24,11 @@ from msprobe.mindspore.debugger.debugger_config import DebuggerConfig from msprobe.mindspore.ms_config import StatisticsConfig from msprobe.mindspore.dump.kernel_kbyk_dump import KernelKbykDump +from collections import Counter + +import mindspore as ms +ms_version = ms.__version__ + class TestKernelKbykDump(TestCase): @patch("msprobe.mindspore.debugger.debugger_config.create_directory") @@ -379,4 +384,42 @@ class TestKernelKbykDump(TestCase): patch("msprobe.mindspore.dump.kernel_kbyk_dump.save_json") as mock_save_json: dumper.handle() mock_info.assert_called_with("/absolute_path/kernel_kbyk_dump.json has been created.") - self.assertEqual(os.environ.get("MS_ACL_DUMP_CFG_PATH"), None) \ No newline at end of file + self.assertEqual(os.environ.get("MS_ACL_DUMP_CFG_PATH"), None) + + @patch("msprobe.mindspore.debugger.debugger_config.create_directory") + def test_handle_statistics(self, _): + json_config = { + "task": "statistics", + "dump_path": "/absolute_path", + "rank": [], + "step": [0, 2], + "level": "L2", + "statistics": { + "list": [], + "data_mode": ["all"], + "device": "host", + "summary_mode": ["hash", "md5", "max"] + } + } + + common_config = CommonConfig(json_config) + task_config = StatisticsConfig(json_config["statistics"]) + config = DebuggerConfig(common_config, task_config) + dumper = KernelKbykDump(config) + self.assertEqual(dumper.dump_json["e2e_dump_settings"]["stat_calc_mode"], "host") + self.assertEqual(dumper.dump_json["common_dump_settings"]["saved_data"], "statistic") + if ms_version > "2.7.0": + self.assertEqual(Counter(dumper.dump_json["common_dump_settings"]["statistic_category"]), Counter(["max", "hash", "hash:md5"])) + else: + self.assertEqual(Counter(dumper.dump_json["common_dump_settings"]["statistic_category"]), Counter(["max", "md5"])) + os.environ["MS_ACL_DUMP_CFG_PATH"] = "path" + with patch("msprobe.mindspore.dump.kernel_kbyk_dump.create_directory"), \ + patch("msprobe.mindspore.dump.kernel_kbyk_dump.logger.info") as mock_info, \ + patch("msprobe.mindspore.dump.kernel_kbyk_dump.save_json") as mock_save_json: + dumper.handle() + self.assertIn("kernel_kbyk_dump.json", mock_save_json.call_args_list[0][0][0]) + mock_info.assert_called_with("/absolute_path/kernel_kbyk_dump.json has been created.") + + self.assertEqual(os.environ.get("MS_ACL_DUMP_CFG_PATH"), None) + if "MINDSPORE_DUMP_CONFIG" in os.environ: + del os.environ["MINDSPORE_DUMP_CONFIG"] \ No newline at end of file -- Gitee