diff --git a/debug/accuracy_tools/msprobe/core/common/const.py b/debug/accuracy_tools/msprobe/core/common/const.py
index 10ebcfe20bc4a5c9f7951146fe3aff937c895dc2..353eab0c9a36ddaa4c1dbcafc5d9ed203df57c4b 100644
--- a/debug/accuracy_tools/msprobe/core/common/const.py
+++ b/debug/accuracy_tools/msprobe/core/common/const.py
@@ -73,6 +73,7 @@ class Const:
ONLINE_DUMP_MODE = [ALL, LIST, AUTO, OFF]
SUMMARY = "summary"
MD5 = "md5"
+ HASH = "hash"
VALUE = "value"
SUMMARY_MODE = ["statistics", "md5"]
diff --git a/debug/accuracy_tools/msprobe/docs/02.config_introduction.md b/debug/accuracy_tools/msprobe/docs/02.config_introduction.md
index 7b70cd859ebca13db471837c469e7661e0a774f2..c93f95390579fa356d37698f92eb5cb31af5861c 100644
--- a/debug/accuracy_tools/msprobe/docs/02.config_introduction.md
+++ b/debug/accuracy_tools/msprobe/docs/02.config_introduction.md
@@ -51,13 +51,13 @@
MindSpore 静态图场景:L0 级别 dump 仅支持"all"、"forward"和"backward"参数;L2 级别 dump 仅支持"all"、"input"和"output"参数。且各参数只能单独配置,不支持自由组合。 配置示例:"data_mode": ["all"]。 |
summary_mode | 控制 dump 文件输出的模式,str 类型,支持 PyTorch、MSAdapter、MindSpore 动态图以及 MindSpore 静态图 L2 级别 jit_level=O2 场景和 L0 级别 jit_level=O0/O1 场景。 | 否 |
PyTorch、MSAdapter 以及 MindSpore 动态图场景:可选参数为 md5:dump 输出包含 CRC-32 值以及 API 统计信息的 dump.json 文件,用于验证数据的完整性; statistics:dump 仅输出包含 API 统计信息的 dump.json 文件,默认值。 配置示例:"summary_mode": "md5"。 |
- MindSpore 静态图 L2 级别 jit_level=O2 场景:支持上述配置的同时额外支持配置统计项列表,可选统计项为max、min、mean、l2norm,可从中任意选取组合搭配。其中mean、l2norm的结果为float数据格式。 MindSpore 静态图 L0 级别 jit_level=O0/O1场景:仅支持上述配置中"statistics"字段和max、min、mean、l2norm中任意组合搭配的统计项列表。 配置示例:"summary_mode": ["max", "min"]。 |
+ MindSpore 静态图 L2 级别 jit_level=O2 场景:支持上述配置的同时额外支持配置统计项列表,可选统计项为max、min、mean、l2norm,可从中任意选取组合搭配。其中mean、l2norm的结果为float数据格式。 MindSpore 静态图 L2 级别 jit_level=O0/O1 场景:支持上述配置的同时额外支持配置统计项列表,可选统计项为max、min、mean、l2norm、count、negative zero count、zero count、positive zero count、nan count、negative inf count、positive inf count、hash、md5,可从中任意选取组合搭配。注意:hash统计项在MindSpore2.7.0及以前版本计算MD5值,在以后版本计算SHA1值。 MindSpore 静态图 L0 级别 jit_level=O0/O1场景:仅支持上述配置中"statistics"字段和max、min、mean、l2norm中任意组合搭配的统计项列表。 配置示例:"summary_mode": ["max", "min"]。 |
**说明**:
-1. "summary_mode" 配置为 "md5" 时,所使用的校验算法为 CRC-32 算法。
+1. PyTorch、MSAdapter 以及 MindSpore 动态图场景,"summary_mode" 配置为 "md5" 时,所使用的校验算法为 CRC-32 算法;MindSpore 静态图场景,"summary_mode" 配置为 "md5" 时,所使用的校验算法为 MD5 算法。
**示例**:
- [PyTorch场景](03.config_examples.md#11-task-配置为-statistics)
diff --git a/debug/accuracy_tools/msprobe/mindspore/dump/kernel_kbyk_dump.py b/debug/accuracy_tools/msprobe/mindspore/dump/kernel_kbyk_dump.py
index 91a6ab93abad35c39810d2b3e9ec731605694aa5..987b80ef1256331731ebff741ac9e3eae128dd12 100644
--- a/debug/accuracy_tools/msprobe/mindspore/dump/kernel_kbyk_dump.py
+++ b/debug/accuracy_tools/msprobe/mindspore/dump/kernel_kbyk_dump.py
@@ -20,6 +20,9 @@ from msprobe.core.common.file_utils import create_directory, save_json
from msprobe.mindspore.common.log import logger
from msprobe.mindspore.debugger.debugger_config import DebuggerConfig
+import mindspore as ms
+ms_version = ms.__version__
+
class KernelKbykDump:
COMMON_SETTINGS = "common_dump_settings"
@@ -39,6 +42,7 @@ class KernelKbykDump:
common_set["input_output"] = 0
common_set["kernels"] = []
common_set["support_device"] = [0, 1, 2, 3, 4, 5, 6, 7]
+ common_set["statistic_category"] = []
if config.stat_cal_mode and config.device_stat_precision_mode:
e2e_set = {
@@ -71,9 +75,29 @@ class KernelKbykDump:
common_set["input_output"] = 1
if config.data_mode[0] == Const.OUTPUT:
common_set["input_output"] = 2
+ if config.summary_mode:
+ if isinstance(config.summary_mode, str):
+ if config.summary_mode == Const.STATISTICS:
+ common_set["statistic_category"] = ["max", "min", "avg", "l2norm"]
+ else:
+ mode = self._process_hash(config.summary_mode)
+ common_set["statistic_category"] = [mode]
+ elif isinstance(config.summary_mode, list):
+ common_set["statistic_category"] = list({
+ self._process_hash("avg" if mode == Const.MEAN else mode)
+ for mode in config.summary_mode
+ })
self.dump_json[KernelKbykDump.COMMON_SETTINGS] = common_set
self.dump_json[KernelKbykDump.E2E_SETTINGS] = e2e_set
+
+ @staticmethod
+ def _process_hash(value):
+ if ms_version <= "2.7.0" and (value == Const.HASH or value == Const.MD5):
+ value = "md5"
+ elif value == Const.MD5:
+ value = "hash:md5"
+ return value
def handle(self):
json_path = self.dump_json[KernelKbykDump.COMMON_SETTINGS]["path"]
diff --git a/debug/accuracy_tools/msprobe/mindspore/ms_config.py b/debug/accuracy_tools/msprobe/mindspore/ms_config.py
index 4b73ad5bdebbc3b6b2bdceb2d34b89264aa4f013..d50b177a2fc30feb6b24623d552f4921b432abdd 100644
--- a/debug/accuracy_tools/msprobe/mindspore/ms_config.py
+++ b/debug/accuracy_tools/msprobe/mindspore/ms_config.py
@@ -59,11 +59,12 @@ class StatisticsConfig(BaseConfig):
raise Exception("Config param [precision] is invalid, expected from [\"high\", \"low\"]")
def _check_summary_mode(self):
- muti_opt = ["md5", "max", "min", "mean", "l2norm"]
+ muti_opt = ["max", "min", "mean", "count", "negative zero count", "positive zero count", "nan count",
+ "negative inf count", "positive inf count", "zero count", "l2norm", "hash", "md5"]
if isinstance(self.summary_mode, str) and self.summary_mode not in Const.SUMMARY_MODE:
- raise Exception("summary_mode is invalid")
+ raise Exception("summary_mode is an invalid string")
if isinstance(self.summary_mode, list) and not all(opt in muti_opt for opt in self.summary_mode):
- raise Exception("summary_mode is invalid")
+ raise Exception("summary_mode contains invalid option(s)")
class OverflowCheckConfig(BaseConfig):
diff --git a/debug/accuracy_tools/msprobe/test/mindspore_ut/test_kernel_kbyk_dump.py b/debug/accuracy_tools/msprobe/test/mindspore_ut/test_kernel_kbyk_dump.py
index 9be887eb4be1be00d68b12ca5181f7371dcf075e..1132cc4d73c4bcfade89a46d90274081eafbd3ed 100644
--- a/debug/accuracy_tools/msprobe/test/mindspore_ut/test_kernel_kbyk_dump.py
+++ b/debug/accuracy_tools/msprobe/test/mindspore_ut/test_kernel_kbyk_dump.py
@@ -24,6 +24,11 @@ from msprobe.mindspore.debugger.debugger_config import DebuggerConfig
from msprobe.mindspore.ms_config import StatisticsConfig
from msprobe.mindspore.dump.kernel_kbyk_dump import KernelKbykDump
+from collections import Counter
+
+import mindspore as ms
+ms_version = ms.__version__
+
class TestKernelKbykDump(TestCase):
@patch("msprobe.mindspore.debugger.debugger_config.create_directory")
@@ -379,4 +384,42 @@ class TestKernelKbykDump(TestCase):
patch("msprobe.mindspore.dump.kernel_kbyk_dump.save_json") as mock_save_json:
dumper.handle()
mock_info.assert_called_with("/absolute_path/kernel_kbyk_dump.json has been created.")
- self.assertEqual(os.environ.get("MS_ACL_DUMP_CFG_PATH"), None)
\ No newline at end of file
+ self.assertEqual(os.environ.get("MS_ACL_DUMP_CFG_PATH"), None)
+
+ @patch("msprobe.mindspore.debugger.debugger_config.create_directory")
+ def test_handle_statistics(self, _):
+ json_config = {
+ "task": "statistics",
+ "dump_path": "/absolute_path",
+ "rank": [],
+ "step": [0, 2],
+ "level": "L2",
+ "statistics": {
+ "list": [],
+ "data_mode": ["all"],
+ "device": "host",
+ "summary_mode": ["hash", "md5", "max"]
+ }
+ }
+
+ common_config = CommonConfig(json_config)
+ task_config = StatisticsConfig(json_config["statistics"])
+ config = DebuggerConfig(common_config, task_config)
+ dumper = KernelKbykDump(config)
+ self.assertEqual(dumper.dump_json["e2e_dump_settings"]["stat_calc_mode"], "host")
+ self.assertEqual(dumper.dump_json["common_dump_settings"]["saved_data"], "statistic")
+ if ms_version > "2.7.0":
+ self.assertEqual(Counter(dumper.dump_json["common_dump_settings"]["statistic_category"]), Counter(["max", "hash", "hash:md5"]))
+ else:
+ self.assertEqual(Counter(dumper.dump_json["common_dump_settings"]["statistic_category"]), Counter(["max", "md5"]))
+ os.environ["MS_ACL_DUMP_CFG_PATH"] = "path"
+ with patch("msprobe.mindspore.dump.kernel_kbyk_dump.create_directory"), \
+ patch("msprobe.mindspore.dump.kernel_kbyk_dump.logger.info") as mock_info, \
+ patch("msprobe.mindspore.dump.kernel_kbyk_dump.save_json") as mock_save_json:
+ dumper.handle()
+ self.assertIn("kernel_kbyk_dump.json", mock_save_json.call_args_list[0][0][0])
+ mock_info.assert_called_with("/absolute_path/kernel_kbyk_dump.json has been created.")
+
+ self.assertEqual(os.environ.get("MS_ACL_DUMP_CFG_PATH"), None)
+ if "MINDSPORE_DUMP_CONFIG" in os.environ:
+ del os.environ["MINDSPORE_DUMP_CONFIG"]
\ No newline at end of file