From 15a370a1e2a1ed5ff2f82e3d38dcf256c9ec39d4 Mon Sep 17 00:00:00 2001 From: jiandaobao Date: Wed, 25 Jun 2025 14:43:05 +0800 Subject: [PATCH 1/2] threshold zero --- .../pytorch/free_benchmark/result_handlers/base_handler.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/debug/accuracy_tools/msprobe/pytorch/free_benchmark/result_handlers/base_handler.py b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/result_handlers/base_handler.py index 47f93ab7b..e0d583dd0 100644 --- a/debug/accuracy_tools/msprobe/pytorch/free_benchmark/result_handlers/base_handler.py +++ b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/result_handlers/base_handler.py @@ -186,6 +186,8 @@ class FuzzHandler(ABC): ratio = self.ratio_calculate( origin_output, perturbed_output, norm_type=NormType.ENDLESS_NORM ) + if threshold == 0: + raise ValueError("Threshold cannot be zero. Check `get_threshold` implementation.") if ratio == ThresholdConfig.SYMBOL_FLIPPING: is_consistent = False else: -- Gitee From 1a76ff43df988c1e7f91ce13f86d621817aa4d51 Mon Sep 17 00:00:00 2001 From: jiandaobao Date: Tue, 1 Jul 2025 14:57:03 +0800 Subject: [PATCH 2/2] monvis input supported. --- debug/accuracy_tools/msprobe/core/common/const.py | 5 +++++ debug/accuracy_tools/msprobe/pytorch/monitor/csv2db.py | 7 ++++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/common/const.py b/debug/accuracy_tools/msprobe/core/common/const.py index b46144e5c..5f27bf467 100644 --- a/debug/accuracy_tools/msprobe/core/common/const.py +++ b/debug/accuracy_tools/msprobe/core/common/const.py @@ -743,6 +743,11 @@ class MonitorConst: DEFAULT_STEP_INTERVAL = 1 OP_LIST = ["norm", "min", "max", "zeros", "nans", "id", "mean", "shape", "dtype"] + OP_MONVIS_SUPPORTED = [ + "norm", "min", "max", "zeros", "nans", "mean", + "entropy", "softmax_max", "sr", "kernel_norm", "std_x", "jacobian", + "proxy", "token_similarity" + ] MONITOR_OUTPUT_DIR = "MONITOR_OUTPUT_DIR" DEFAULT_MONITOR_OUTPUT_DIR = "./monitor_output" DATABASE = "database" diff --git a/debug/accuracy_tools/msprobe/pytorch/monitor/csv2db.py b/debug/accuracy_tools/msprobe/pytorch/monitor/csv2db.py index ade9ea6d1..32873b668 100644 --- a/debug/accuracy_tools/msprobe/pytorch/monitor/csv2db.py +++ b/debug/accuracy_tools/msprobe/pytorch/monitor/csv2db.py @@ -37,7 +37,8 @@ from tqdm import tqdm all_data_type_list = [ "actv", "actv_grad", "exp_avg", "exp_avg_sq", - "grad_unreduced", "grad_reduced", "param_origin", "param_updated" + "grad_unreduced", "grad_reduced", "param_origin", "param_updated", + "linear_hook", "norm_hook", "proxy_model", "token_hook", "attention_hook" ] DEFAULT_INT_VALUE = 0 MAX_PROCESS_NUM = 128 @@ -83,7 +84,7 @@ def update_with_order_dict(main_dict, new_list): def get_ordered_stats(stats): if not isinstance(stats, Iterable): return [] - return [stat for stat in MonitorConst.OP_LIST if stat in stats] + return [stat for stat in MonitorConst.OP_MONVIS_SUPPORTED if stat in stats] def pre_scan_single_rank(rank, files): @@ -106,7 +107,7 @@ def pre_scan_single_rank(rank, files): max_step = step_end if max_step < step_end else max_step data = read_csv(file_path) - stats = [k for k in data.keys() if k in MonitorConst.OP_LIST] + stats = [k for k in data.keys() if k in MonitorConst.OP_MONVIS_SUPPORTED] metric_stats[metric_name].update(stats) for _, row in data.iterrows(): -- Gitee