From 890b14c724b6197ba57962de466914db71ed3164 Mon Sep 17 00:00:00 2001 From: Linwei-Ying Date: Fri, 7 Mar 2025 17:24:54 +0800 Subject: [PATCH 01/27] compare reconstruct --- .../msprobe/core/compare/acc_compare.py | 56 ++++++++++++++++++- .../msprobe/mindspore/compare/ms_compare.py | 36 +++--------- .../msprobe/pytorch/compare/pt_compare.py | 26 ++------- 3 files changed, 67 insertions(+), 51 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/compare/acc_compare.py b/debug/accuracy_tools/msprobe/core/compare/acc_compare.py index f2aa8c479e..b9429be513 100644 --- a/debug/accuracy_tools/msprobe/core/compare/acc_compare.py +++ b/debug/accuracy_tools/msprobe/core/compare/acc_compare.py @@ -17,6 +17,7 @@ import multiprocessing import os import re from copy import deepcopy +from dataclasses import dataclass import pandas as pd from tqdm import tqdm @@ -24,16 +25,65 @@ from tqdm import tqdm from msprobe.core.advisor.advisor import Advisor from msprobe.core.common.const import CompareConst, Const from msprobe.core.common.exceptions import FileCheckException -from msprobe.core.common.file_utils import load_json, remove_path +from msprobe.core.common.file_utils import load_json, remove_path, create_directory from msprobe.core.common.log import logger -from msprobe.core.common.utils import CompareException, add_time_with_xlsx, check_op_str_pattern_valid, safe_get_value +from msprobe.core.common.utils import CompareException, add_time_with_xlsx, check_op_str_pattern_valid, safe_get_value, set_dump_path, get_dump_mode, check_compare_param, check_configuration_param from msprobe.core.compare.check import check_dump_json_str, check_graph_mode, check_stack_json_str, \ check_struct_match, fuzzy_check_op from msprobe.core.compare.highlight import find_compare_result_error_rows, highlight_rows_xlsx from msprobe.core.compare.multiprocessing_compute import ComparisonResult, _handle_multi_process, _save_cmp_result from msprobe.core.compare.npy_compare import compare_ops_apply, get_error_flag_and_msg from msprobe.core.compare.utils import get_accuracy, get_rela_diff_summary_mode, get_un_match_accuracy, merge_tensor, \ - print_compare_ends_info, read_op, get_name_and_state, reorder_op_x_list + print_compare_ends_info, read_op, get_name_and_state, reorder_op_x_list, set_stack_json_path + + +@dataclass +class ComparisonConfig: + dump_mode: str + stack_mode: bool + auto_analyze: bool + fuzzy_match: bool + data_mapping: dict + suffix: str + cell_mapping: dict + api_mapping: dict + layer_mapping: dict + + +def setup_comparison(input_param, output_path, **kwargs) -> ComparisonConfig: + """公共的前置处理逻辑,返回封装后的 ComparisonConfig 对象""" + try: + config = ComparisonConfig( + dump_mode='', + stack_mode=False, + auto_analyze=kwargs.get('auto_analyze', True), + fuzzy_match=kwargs.get('fuzzy_match', False), + data_mapping=kwargs.get('data_mapping', {}), + suffix=kwargs.get('suffix', ''), + cell_mapping=kwargs.get('cell_mapping', {}), + api_mapping=kwargs.get('api_mapping', {}), + layer_mapping=kwargs.get('layer_mapping', {}), + ) + + set_dump_path(input_param) + config.dump_mode = get_dump_mode(input_param) + + # set stack_mode and set "stack_json_path" in input_param + if 'stack_json_path' in input_param: + config.stack_mode = kwargs.get('stack_mode', False) + else: + config.stack_mode = set_stack_json_path(input_param) + + check_configuration_param(config.stack_mode, config.auto_analyze, config.fuzzy_match, + input_param.get('is_print_compare_log', True)) + create_directory(output_path) + check_compare_param(input_param, output_path, config.dump_mode, config.stack_mode) + + return config + + except (CompareException, FileCheckException) as error: + logger.error('Compare failed. Please check the arguments and do it again!') + raise CompareException(error.code) from error class ModeConfig: diff --git a/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py b/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py index 4f158512bb..f45426089c 100644 --- a/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py +++ b/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py @@ -30,6 +30,7 @@ from msprobe.core.compare.acc_compare import Comparator, ModeConfig from msprobe.core.compare.check import dtype_mapping from msprobe.core.compare.layer_mapping import generate_data_mapping_by_layer_mapping from msprobe.core.compare.utils import set_stack_json_path, reorder_op_x_list +from msprobe.core.compare.acc_compare import setup_comparison class MappingConfig: @@ -399,32 +400,13 @@ def check_cross_framework(bench_json_path): def ms_compare(input_param, output_path, **kwargs): - try: - auto_analyze = kwargs.get('auto_analyze', True) - fuzzy_match = kwargs.get('fuzzy_match', False) - cell_mapping = kwargs.get('cell_mapping', None) - api_mapping = kwargs.get('api_mapping', None) - data_mapping = kwargs.get('data_mapping', None) - layer_mapping = kwargs.get('layer_mapping', None) - suffix = kwargs.get('suffix', '') - - set_dump_path(input_param) - dump_mode = get_dump_mode(input_param) - if 'stack_json_path' in input_param: - stack_mode = kwargs.get('stack_mode', False) - else: - stack_mode = set_stack_json_path(input_param) # set stack_mode and set "stack_json_path" in input_param - check_configuration_param(stack_mode, auto_analyze, fuzzy_match, input_param.get('is_print_compare_log', True)) - create_directory(output_path) - check_compare_param(input_param, output_path, dump_mode, stack_mode) - except (CompareException, FileCheckException) as error: - logger.error('Compare failed. Please check the arguments and do it again!') - raise CompareException(error.code) from error - if layer_mapping: - data_mapping = generate_data_mapping_by_layer_mapping(input_param, layer_mapping, output_path) - - mode_config = ModeConfig(stack_mode, auto_analyze, fuzzy_match, dump_mode) - mapping_config = MappingConfig(cell_mapping, api_mapping, data_mapping) + config = setup_comparison(input_param, output_path, **kwargs) + + if config.layer_mapping: + config.data_mapping = generate_data_mapping_by_layer_mapping(input_param, config.layer_mapping, output_path) + is_cross_framework = check_cross_framework(input_param.get('bench_json_path')) + mode_config = ModeConfig(config.stack_mode, config.auto_analyze, config.fuzzy_match, config.dump_mode) + mapping_config = MappingConfig(config.cell_mapping, config.api_mapping, config.data_mapping) ms_comparator = MSComparator(mode_config, mapping_config, is_cross_framework) - ms_comparator.compare_core(input_param, output_path, suffix=suffix) + ms_comparator.compare_core(input_param, output_path, suffix=config.suffix) diff --git a/debug/accuracy_tools/msprobe/pytorch/compare/pt_compare.py b/debug/accuracy_tools/msprobe/pytorch/compare/pt_compare.py index 308a82b3d6..760553ac84 100644 --- a/debug/accuracy_tools/msprobe/pytorch/compare/pt_compare.py +++ b/debug/accuracy_tools/msprobe/pytorch/compare/pt_compare.py @@ -26,6 +26,7 @@ from msprobe.core.compare.acc_compare import Comparator, ModeConfig from msprobe.core.compare.utils import set_stack_json_path from msprobe.pytorch.common.log import logger from msprobe.pytorch.common.utils import load_pt +from msprobe.core.compare.acc_compare import setup_comparison class PTComparator(Comparator): @@ -80,25 +81,8 @@ class PTComparator(Comparator): def compare(input_param, output_path, **kwargs): - try: - auto_analyze = kwargs.get('auto_analyze', True) - fuzzy_match = kwargs.get('fuzzy_match', False) - data_mapping = kwargs.get('data_mapping', None) - suffix = kwargs.get('suffix', '') + config = setup_comparison(input_param, output_path, **kwargs) - set_dump_path(input_param) - dump_mode = get_dump_mode(input_param) - if "stack_json_path" in input_param: - stack_mode = kwargs.get('stack_mode', False) - else: - stack_mode = set_stack_json_path(input_param) # set stack_mode and set "stack_json_path" in input_param - check_configuration_param(stack_mode, auto_analyze, fuzzy_match, input_param.get('is_print_compare_log', True)) - create_directory(output_path) - check_compare_param(input_param, output_path, dump_mode, stack_mode) - except (CompareException, FileCheckException) as error: - logger.error('Compare failed. Please check the arguments and do it again!') - raise CompareException(error.code) from error - - mode_config = ModeConfig(stack_mode, auto_analyze, fuzzy_match, dump_mode) - pt_comparator = PTComparator(mode_config, data_mapping) - pt_comparator.compare_core(input_param, output_path, suffix=suffix) + mode_config = ModeConfig(config.stack_mode, config.auto_analyze, config.fuzzy_match, config.dump_mode) + pt_comparator = PTComparator(mode_config, config.data_mapping) + pt_comparator.compare_core(input_param, output_path, suffix=config.suffix) -- Gitee From 420066df20636642c3f66d92333e566cb02e96e4 Mon Sep 17 00:00:00 2001 From: Linwei-Ying Date: Fri, 7 Mar 2025 17:47:18 +0800 Subject: [PATCH 02/27] compare reconstruct --- .../msprobe/mindspore/compare/ms_compare.py | 30 ++++++++++++++----- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py b/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py index f45426089c..6451f20dea 100644 --- a/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py +++ b/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py @@ -249,14 +249,14 @@ class MSComparator(Comparator): raise CompareException(CompareException.INDEX_OUT_OF_BOUNDS_ERROR) from error return api_name - def compare_process(self, file_lists): - npu_json_path, bench_json_path, stack_json_path = file_lists - npu_json_data = load_json(npu_json_path) - bench_json_data = load_json(bench_json_path) - stack_json_data = load_json(stack_json_path) if self.stack_mode else None - - npu_df = self.gen_data_df(npu_json_data, stack_json_data) - bench_df = self.gen_data_df(bench_json_data, stack_json_data) + def assign_npu_df_compare_key(self, npu_df, bench_df): + """ + 处理 npu_df 的 COMPARE_KEY 赋值逻辑。 + + :param npu_df: DataFrame,NPU 计算数据 + :param bench_df: DataFrame,对比数据 + :return: 处理后的 npu_df + """ if self.cell_mapping: npu_df[CompareConst.COMPARE_KEY] = npu_df[CompareConst.OP_NAME].apply(self.process_cell_mapping) elif self.api_mapping: @@ -265,11 +265,25 @@ class MSComparator(Comparator): self.modify_compare_data_with_user_mapping(npu_df, bench_df) else: npu_df[CompareConst.COMPARE_KEY] = npu_df[CompareConst.OP_NAME] + + return npu_df + + def compare_process(self, file_lists): + npu_json_path, bench_json_path, stack_json_path = file_lists + npu_json_data = load_json(npu_json_path) + bench_json_data = load_json(bench_json_path) + stack_json_data = load_json(stack_json_path) if self.stack_mode else None + + npu_df = self.gen_data_df(npu_json_data, stack_json_data) + bench_df = self.gen_data_df(bench_json_data, stack_json_data) + + npu_df = self.assign_npu_df_compare_key(npu_df, bench_df) npu_df[[Const.DTYPE, Const.SHAPE]] = npu_df[[Const.DTYPE, Const.SHAPE]].astype(str) bench_df[[Const.DTYPE, Const.SHAPE]] = bench_df[[Const.DTYPE, Const.SHAPE]].astype(str) npu_df[CompareConst.COMPARE_SHAPE] = npu_df[Const.SHAPE] bench_df[CompareConst.COMPARE_KEY] = bench_df[CompareConst.OP_NAME] bench_df[CompareConst.COMPARE_SHAPE] = bench_df[Const.SHAPE] + match_result = pd.merge(npu_df, bench_df, on=[CompareConst.COMPARE_KEY, CompareConst.COMPARE_SHAPE], how='outer') match_result = match_result[match_result['op_name_x'].notna()].fillna(CompareConst.N_A) -- Gitee From 33b6771898f3121d46dad6fc9c147c9ee7492cf6 Mon Sep 17 00:00:00 2001 From: Linwei-Ying Date: Mon, 10 Mar 2025 10:50:30 +0800 Subject: [PATCH 03/27] compare reconstruct --- .../accuracy_tools/msprobe/core/compare/acc_compare.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/compare/acc_compare.py b/debug/accuracy_tools/msprobe/core/compare/acc_compare.py index b9429be513..079bcec770 100644 --- a/debug/accuracy_tools/msprobe/core/compare/acc_compare.py +++ b/debug/accuracy_tools/msprobe/core/compare/acc_compare.py @@ -474,15 +474,17 @@ class Comparator: Returns: """ + logger.info("Please check whether the input data belongs to you. If not, there may be security risks.") + # get kwargs or set default value suffix = kwargs.get('suffix', '') - logger.info("Please check whether the input data belongs to you. If not, there may be security risks.") + # process output file path file_name = add_time_with_xlsx("compare_result" + suffix) file_path = os.path.join(os.path.realpath(output_path), file_name) remove_path(file_path) - highlight_dict = {"red_rows": set(), "yellow_rows": set(), "red_lines": [], "yellow_lines": []} + # compare general data(name, dtype, shape, statistics, etc.) and initialize the comparsion result table npu_json = input_param.get("npu_json_path") bench_json = input_param.get("bench_json_path") stack_json = input_param.get("stack_json_path") @@ -495,12 +497,16 @@ class Comparator: logger.warning("Can`t match any op.") return + # compare real data if self.dump_mode == Const.ALL: result_df = self.do_multi_process(input_param, result_df) + # highlight suspicious API + highlight_dict = {"red_rows": set(), "yellow_rows": set(), "red_lines": [], "yellow_lines": []} find_compare_result_error_rows(result_df, highlight_dict, self.dump_mode) highlight_rows_xlsx(result_df, highlight_dict, file_path) + # output comparsion analysis suggestions if self.auto_analyze: advisor = Advisor(result_df, output_path, suffix) advisor.analysis() -- Gitee From c898ab1d7423f7d427633c9757f57563e6c126d0 Mon Sep 17 00:00:00 2001 From: Linwei-Ying Date: Mon, 10 Mar 2025 11:22:31 +0800 Subject: [PATCH 04/27] compare reconstruct --- .../msprobe/core/compare/acc_compare.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/compare/acc_compare.py b/debug/accuracy_tools/msprobe/core/compare/acc_compare.py index 079bcec770..7357d0aad4 100644 --- a/debug/accuracy_tools/msprobe/core/compare/acc_compare.py +++ b/debug/accuracy_tools/msprobe/core/compare/acc_compare.py @@ -457,6 +457,13 @@ class Comparator: result_list.append(err_msg) return result_list + @staticmethod + def process_output_file(output_path, suffix): + file_name = add_time_with_xlsx("compare_result" + suffix) + file_path = os.path.join(os.path.realpath(output_path), file_name) + remove_path(file_path) + return file_path + def compare_core(self, input_param, output_path, **kwargs): """ Compares data from multiple JSON files and generates a comparison report. @@ -479,12 +486,10 @@ class Comparator: # get kwargs or set default value suffix = kwargs.get('suffix', '') - # process output file path - file_name = add_time_with_xlsx("compare_result" + suffix) - file_path = os.path.join(os.path.realpath(output_path), file_name) - remove_path(file_path) + # process output file + file_path = self.process_output_file(output_path, suffix) - # compare general data(name, dtype, shape, statistics, etc.) and initialize the comparsion result table + # initialize the comparsion result table and compare general data(name, dtype, shape, statistics/md5, etc.) npu_json = input_param.get("npu_json_path") bench_json = input_param.get("bench_json_path") stack_json = input_param.get("stack_json_path") @@ -492,7 +497,6 @@ class Comparator: result_df = self.compare_process_custom([npu_json, bench_json, stack_json]) else: result_df = self.compare_process([npu_json, bench_json, stack_json]) - if not result_df.values.tolist(): logger.warning("Can`t match any op.") return -- Gitee From f7b39bda5d1618b8eb20b5ec96ccc15328a0421f Mon Sep 17 00:00:00 2001 From: Linwei-Ying Date: Mon, 10 Mar 2025 15:35:02 +0800 Subject: [PATCH 05/27] compare reconstruct --- .../msprobe/core/common/const.py | 4 +- .../msprobe/mindspore/compare/ms_compare.py | 185 +++++++++++------- 2 files changed, 111 insertions(+), 78 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/common/const.py b/debug/accuracy_tools/msprobe/core/common/const.py index ce72b22d63..e54b2b28d7 100644 --- a/debug/accuracy_tools/msprobe/core/common/const.py +++ b/debug/accuracy_tools/msprobe/core/common/const.py @@ -478,8 +478,8 @@ class CompareConst: OUTPUT_PATTERN = Const.SEP + Const.OUTPUT + Const.SEP PARAMS_PATTERN = Const.SEP + Const.PARAMS + Const.SEP PARAMS_GRAD_PATTERN = Const.SEP + Const.PARAMS_GRAD + Const.SEP - COMPARE_KEY = 'compare_key' - COMPARE_SHAPE = 'compare_shape' + CMP_KEY = 'compare_key' + CMP_SHAPE = 'compare_shape' INTERNAL_API_MAPPING_FILE = 'ms_to_pt_api.yaml' UNREADABLE = 'unreadable data' diff --git a/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py b/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py index 6451f20dea..25493a0128 100644 --- a/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py +++ b/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py @@ -48,6 +48,7 @@ class MSComparator(Comparator): data_mapping: mindspore的cell或api的入参/出参和pytorch之间的映射关系; is_cross_framework: 是否跨框架。 """ + def __init__(self, mode_config, mapping_config=None, is_cross_framework=False): super().__init__(mode_config) self.frame_name = MSComparator.__name__ @@ -84,48 +85,64 @@ class MSComparator(Comparator): result['data_name_x'] = result.apply(lambda row: [row['data_name_x'], row['data_name_y']], axis=1) return result + @staticmethod + def type_check(val): + """ + 检查是否为数值或字符串形式的nan + """ + check_series = pd.Series(False, index=val.index) + val_str = val.astype(str) + check_series[pd.to_numeric(val_str, errors='coerce').notna() | val_str.str.lower().eq('nan')] = True + return check_series + + @staticmethod + def get_number(val): + return pd.to_numeric(val.astype(str), errors='coerce') + + def calc_summary_diff(self, result_df, cond_no_bench, stats_index: str): + npu_val = result_df['NPU ' + stats_index] + bench_val = result_df['Bench ' + stats_index] + diff_name = stats_index.capitalize() + ' diff' + rel_err_name = ('norm' if stats_index == 'l2norm' else stats_index).capitalize() + 'RelativeErr' + + # 只要npu、bench有一个不是数字或nan, 该行记为N/A + cond_na = ~self.type_check(npu_val) | ~self.type_check(bench_val) + + # 如果不是数字或nan,就赋值统计量差异为N/A + result_df.loc[cond_na, [diff_name, rel_err_name]] = CompareConst.N_A + result_df.loc[~(cond_no_bench | cond_na), diff_name] = self.get_number(npu_val) - self.get_number(bench_val) + + cond_diff_nan = result_df[diff_name].isna() + cond_nan_diff = ~cond_no_bench & ~cond_na & cond_diff_nan + cond_diff_not_nan = result_df[diff_name].notna() + cond_not_nan_diff = ~cond_no_bench & ~cond_na & cond_diff_not_nan + + result_df.loc[cond_nan_diff, [diff_name, rel_err_name]] = CompareConst.NAN + condition_pt_zero = bench_val == 0 + result_df.loc[cond_not_nan_diff & condition_pt_zero, rel_err_name] = CompareConst.NAN + cond_ref_err = cond_not_nan_diff & ~condition_pt_zero + # 计算相对误差转成百分比字符串 + result_df.loc[cond_ref_err, rel_err_name] = ( + result_df.loc[cond_ref_err, diff_name] / bench_val[cond_ref_err] * 100) + result_df.loc[cond_ref_err, rel_err_name] = (result_df.loc[cond_ref_err, rel_err_name].abs().astype(str) + '%') + + magnitude = self.get_number(result_df[diff_name]).abs() / (pd.Series( + np.maximum(self.get_number(npu_val), self.get_number(bench_val))).abs() + CompareConst.EPSILON) + return magnitude > CompareConst.MAGNITUDE + def calc_accuracy(self, result_df, header): + # bench name N/A represents no bench data, err_msg adds "No bench data matched." condition_no_bench = result_df[CompareConst.BENCH_NAME] == CompareConst.N_A result_df[condition_no_bench] = result_df[condition_no_bench].fillna(CompareConst.N_A) result_df.loc[condition_no_bench, CompareConst.ERROR_MESSAGE] = CompareConst.NO_BENCH - def calc_summary_diff(data_type: str): - def type_check(val): - check_series = pd.Series(False, index=val.index) - val_str = val.astype(str) - check_series[pd.to_numeric(val_str, errors='coerce').notna() | val_str.str.lower().eq('nan')] = True - return check_series - - def get_number(val): - return pd.to_numeric(val.astype(str), errors='coerce') - - ms_val = result_df['NPU ' + data_type] - pt_val = result_df['Bench ' + data_type] - diff_name = data_type.capitalize() + ' diff' - rel_err_name = ('norm' if data_type == 'l2norm' else data_type).capitalize() + 'RelativeErr' - condition_na = ~type_check(ms_val) | ~type_check(pt_val) - result_df.loc[condition_na, [diff_name, rel_err_name]] = CompareConst.N_A - result_df.loc[~(condition_no_bench | condition_na), diff_name] = get_number(ms_val) - get_number(pt_val) - condition_nan_diff = ~condition_no_bench & ~condition_na & result_df[diff_name].isna() - condition_not_nan_diff = ~condition_no_bench & ~condition_na & result_df[diff_name].notna() - result_df.loc[condition_nan_diff, [diff_name, rel_err_name]] = CompareConst.NAN - condition_pt_zero = pt_val == 0 - result_df.loc[condition_not_nan_diff & condition_pt_zero, rel_err_name] = CompareConst.NAN - condition_ref_err = condition_not_nan_diff & ~condition_pt_zero - result_df.loc[condition_ref_err, rel_err_name] = (result_df.loc[condition_ref_err, diff_name] / - pt_val[condition_ref_err] * 100) - result_df.loc[condition_ref_err, rel_err_name] = (result_df.loc[condition_ref_err, rel_err_name] - .abs().astype(str) + '%') - magnitude = get_number(result_df[diff_name]).abs() / ( - pd.Series(np.maximum(get_number(ms_val), get_number(pt_val))).abs() + CompareConst.EPSILON) - return magnitude > CompareConst.MAGNITUDE - if self.dump_mode == Const.MD5: condition_md5_equal = result_df[CompareConst.NPU_MD5] == result_df[CompareConst.BENCH_MD5] result_df.loc[condition_md5_equal, CompareConst.RESULT] = CompareConst.PASS result_df.loc[~condition_md5_equal & ~condition_no_bench, CompareConst.RESULT] = CompareConst.DIFF elif self.dump_mode == Const.SUMMARY: - warning_list = [calc_summary_diff(data_type) for data_type in ['max', 'min', 'mean', 'l2norm']] + warning_list = [self.calc_summary_diff(result_df, condition_no_bench, stats_index) for stats_index in + ['max', 'min', 'mean', 'l2norm']] warning_flag = pd.DataFrame(warning_list).all() result_df.loc[~condition_no_bench, [CompareConst.RESULT, CompareConst.ERROR_MESSAGE]] = '' result_df.loc[warning_flag, CompareConst.RESULT] = CompareConst.WARNING @@ -137,17 +154,33 @@ class MSComparator(Comparator): CompareConst.ERROR_MESSAGE] result_df.loc[~condition_no_bench, fill_cols] = '' result_df.loc[~condition_no_bench, CompareConst.ACCURACY] = CompareConst.ACCURACY_CHECK_YES + return result_df[header] + @staticmethod + def set_summary(summary): + if summary == CompareConst.N_A: + return [CompareConst.N_A] * 4 + summary_list = [] + for i in summary: + if i is None: + summary_list.append(CompareConst.N_A) + elif str(i).lower() == 'nan': + summary_list.append(CompareConst.NAN) + else: + summary_list.append(i) + return summary_list + def make_result_df(self, result): + # get header header = CompareConst.HEAD_OF_COMPARE_MODE[self.dump_mode][:] - if self.stack_mode: header.append(CompareConst.STACK) if self.dump_mode == Const.ALL: header.append(CompareConst.DATA_NAME) result = self.process_data_name(result) + # rename match_result columns result.rename(columns={'op_name_x': CompareConst.NPU_NAME, 'op_name_y': CompareConst.BENCH_NAME, 'dtype_x': CompareConst.NPU_DTYPE, @@ -159,31 +192,18 @@ class MSComparator(Comparator): 'data_name_x': CompareConst.DATA_NAME, 'stack_info_x': CompareConst.STACK}, inplace=True) + # process summary data npu_summary = [CompareConst.NPU_MAX, CompareConst.NPU_MIN, CompareConst.NPU_MEAN, CompareConst.NPU_NORM] bench_summary = [CompareConst.BENCH_MAX, CompareConst.BENCH_MIN, CompareConst.BENCH_MEAN, CompareConst.BENCH_NORM] - - def set_summary(summary): - if summary == CompareConst.N_A: - return [CompareConst.N_A] * 4 - summary_list = [] - for i in summary: - if i is None: - summary_list.append(CompareConst.N_A) - elif str(i).lower() == 'nan': - summary_list.append(CompareConst.NAN) - else: - summary_list.append(i) - return summary_list - - result[npu_summary] = result['summary_x'].apply(set_summary).tolist() - result[bench_summary] = result['summary_y'].apply(set_summary).tolist() + result[npu_summary] = result['summary_x'].apply(self.set_summary).tolist() + result[bench_summary] = result['summary_y'].apply(self.set_summary).tolist() result_df = pd.DataFrame(columns=header) for h in header: if h in result.columns: result_df[h] = result[h] - return self.calc_accuracy(result_df, header) + return result_df, header def load_internal_api(self): cur_path = os.path.dirname(os.path.realpath(__file__)) @@ -258,53 +278,66 @@ class MSComparator(Comparator): :return: 处理后的 npu_df """ if self.cell_mapping: - npu_df[CompareConst.COMPARE_KEY] = npu_df[CompareConst.OP_NAME].apply(self.process_cell_mapping) + npu_df[CompareConst.CMP_KEY] = npu_df[CompareConst.OP_NAME].apply(self.process_cell_mapping) elif self.api_mapping: - npu_df[CompareConst.COMPARE_KEY] = npu_df[CompareConst.OP_NAME].apply(self.process_internal_api_mapping) + npu_df[CompareConst.CMP_KEY] = npu_df[CompareConst.OP_NAME].apply(self.process_internal_api_mapping) if isinstance(self.api_mapping, str): self.modify_compare_data_with_user_mapping(npu_df, bench_df) else: - npu_df[CompareConst.COMPARE_KEY] = npu_df[CompareConst.OP_NAME] + npu_df[CompareConst.CMP_KEY] = npu_df[CompareConst.OP_NAME] return npu_df + def gen_dtype_condition(self, match_result): + """ + dtype匹配条件为npu、bench的dtype一致或属于规定的映射关系 + """ + npu_dtype = match_result['dtype_x'] + bench_dtype = match_result['dtype_y'] + if self.cross_frame: + npu_dtype = npu_dtype.map(dtype_mapping).fillna(npu_dtype) + + equal_condition = npu_dtype == bench_dtype + match_condition = ( + (npu_dtype.isin(CompareConst.DTYPE_MATCH_GROUPS[0]) & bench_dtype.isin( + CompareConst.DTYPE_MATCH_GROUPS[0])) | + (npu_dtype.isin(CompareConst.DTYPE_MATCH_GROUPS[1]) & bench_dtype.isin( + CompareConst.DTYPE_MATCH_GROUPS[1])) + ) + return equal_condition | match_condition + def compare_process(self, file_lists): + # load json data npu_json_path, bench_json_path, stack_json_path = file_lists npu_json_data = load_json(npu_json_path) bench_json_data = load_json(bench_json_path) stack_json_data = load_json(stack_json_path) if self.stack_mode else None + # parse json data and generate df npu_df = self.gen_data_df(npu_json_data, stack_json_data) bench_df = self.gen_data_df(bench_json_data, stack_json_data) - npu_df = self.assign_npu_df_compare_key(npu_df, bench_df) npu_df[[Const.DTYPE, Const.SHAPE]] = npu_df[[Const.DTYPE, Const.SHAPE]].astype(str) bench_df[[Const.DTYPE, Const.SHAPE]] = bench_df[[Const.DTYPE, Const.SHAPE]].astype(str) - npu_df[CompareConst.COMPARE_SHAPE] = npu_df[Const.SHAPE] - bench_df[CompareConst.COMPARE_KEY] = bench_df[CompareConst.OP_NAME] - bench_df[CompareConst.COMPARE_SHAPE] = bench_df[Const.SHAPE] - match_result = pd.merge(npu_df, bench_df, on=[CompareConst.COMPARE_KEY, CompareConst.COMPARE_SHAPE], - how='outer') + # create new columns for compare op_name and shape + # process npu_df's COMPARE_KEY whether same or different framework + npu_df = self.assign_npu_df_compare_key(npu_df, bench_df) + npu_df[CompareConst.CMP_SHAPE] = npu_df[Const.SHAPE] + bench_df[CompareConst.CMP_KEY] = bench_df[CompareConst.OP_NAME] + bench_df[CompareConst.CMP_SHAPE] = bench_df[Const.SHAPE] + + # match npu and bench + match_result = pd.merge(npu_df, bench_df, on=[CompareConst.CMP_KEY, CompareConst.CMP_SHAPE], how='outer') match_result = match_result[match_result['op_name_x'].notna()].fillna(CompareConst.N_A) + bench_columns = [i + '_y' for i in bench_df.columns] + match_result.loc[~self.gen_dtype_condition(match_result), bench_columns] = CompareConst.N_A + + # organize comparsion result table + result_df, header = self.make_result_df(match_result) - def gen_dtype_condition(): - npu_dtype = match_result['dtype_x'] - bench_dtype = match_result['dtype_y'] - if self.cross_frame: - npu_dtype = npu_dtype.map(dtype_mapping).fillna(npu_dtype) - - equal_condition = npu_dtype == bench_dtype - match_condition = ( - (npu_dtype.isin(CompareConst.DTYPE_MATCH_GROUPS[0]) & bench_dtype.isin( - CompareConst.DTYPE_MATCH_GROUPS[0])) | - (npu_dtype.isin(CompareConst.DTYPE_MATCH_GROUPS[1]) & bench_dtype.isin( - CompareConst.DTYPE_MATCH_GROUPS[1])) - ) - return equal_condition | match_condition - - match_result.loc[~gen_dtype_condition(), [i + '_y' for i in bench_df.columns]] = CompareConst.N_A - return self.make_result_df(match_result) + # calculate statistics diff + return self.calc_accuracy(result_df, header) def modify_compare_data_with_user_mapping(self, npu_df, bench_df): def get_api_indices_dict(op_name_df): -- Gitee From 12e8d8c9b4a89d13fc8cdcf24ce1361da74ce949 Mon Sep 17 00:00:00 2001 From: Linwei-Ying Date: Tue, 11 Mar 2025 10:53:54 +0800 Subject: [PATCH 06/27] compare reconstruct --- .../msprobe/mindspore/compare/ms_compare.py | 11 +++--- .../msprobe/pytorch/compare/pt_compare.py | 35 +++++++++++++++---- 2 files changed, 34 insertions(+), 12 deletions(-) diff --git a/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py b/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py index 25493a0128..a9f9f98135 100644 --- a/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py +++ b/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py @@ -152,7 +152,7 @@ class MSComparator(Comparator): CompareConst.MAX_ABS_ERR, CompareConst.MAX_RELATIVE_ERR, CompareConst.ONE_THOUSANDTH_ERR_RATIO, CompareConst.FIVE_THOUSANDTHS_ERR_RATIO, CompareConst.ERROR_MESSAGE] - result_df.loc[~condition_no_bench, fill_cols] = '' + result_df.loc[~condition_no_bench, fill_cols] = '' # TODO 注意和pt对齐 result_df.loc[~condition_no_bench, CompareConst.ACCURACY] = CompareConst.ACCURACY_CHECK_YES return result_df[header] @@ -271,11 +271,11 @@ class MSComparator(Comparator): def assign_npu_df_compare_key(self, npu_df, bench_df): """ - 处理 npu_df 的 COMPARE_KEY 赋值逻辑。 + 处理 npu_df 的 COMPARE_KEY 赋值逻辑 - :param npu_df: DataFrame,NPU 计算数据 - :param bench_df: DataFrame,对比数据 - :return: 处理后的 npu_df + :param npu_df: DataFrame,NPU 对比数据 + :param bench_df: DataFrame,Bench 对比数据 + :return: compare_key(name)处理后的 npu_df """ if self.cell_mapping: npu_df[CompareConst.CMP_KEY] = npu_df[CompareConst.OP_NAME].apply(self.process_cell_mapping) @@ -285,7 +285,6 @@ class MSComparator(Comparator): self.modify_compare_data_with_user_mapping(npu_df, bench_df) else: npu_df[CompareConst.CMP_KEY] = npu_df[CompareConst.OP_NAME] - return npu_df def gen_dtype_condition(self, match_result): diff --git a/debug/accuracy_tools/msprobe/pytorch/compare/pt_compare.py b/debug/accuracy_tools/msprobe/pytorch/compare/pt_compare.py index 760553ac84..e123195202 100644 --- a/debug/accuracy_tools/msprobe/pytorch/compare/pt_compare.py +++ b/debug/accuracy_tools/msprobe/pytorch/compare/pt_compare.py @@ -17,13 +17,11 @@ import os.path import torch -from msprobe.core.common.const import FileCheckConst -from msprobe.core.common.exceptions import FileCheckException -from msprobe.core.common.file_utils import FileChecker, create_directory, load_yaml -from msprobe.core.common.utils import CompareException, check_compare_param, check_configuration_param, get_dump_mode, \ - set_dump_path +from msprobe.core.common.const import FileCheckConst, CompareConst, Const +from msprobe.core.common.file_utils import FileChecker, load_yaml +from msprobe.core.common.utils import CompareException from msprobe.core.compare.acc_compare import Comparator, ModeConfig -from msprobe.core.compare.utils import set_stack_json_path +from msprobe.core.compare.utils import rename_api from msprobe.pytorch.common.log import logger from msprobe.pytorch.common.utils import load_pt from msprobe.core.compare.acc_compare import setup_comparison @@ -79,6 +77,31 @@ class PTComparator(Comparator): data_value = data_value.numpy() return data_value + @staticmethod + def process_fuzzy_match(op_name): + if not op_name: + return CompareConst.N_A + if Const.FORWARD in op_name: + renamed_op_name = rename_api(op_name, Const.FORWARD) + elif Const.BACKWARD in op_name: + renamed_op_name = rename_api(op_name, Const.BACKWARD) + else: + renamed_op_name = op_name + return renamed_op_name + + def assign_df_compare_key(self, df): + """ + 处理 npu_df 或 bench_df 的 COMPARE_KEY 赋值逻辑 + + :param df: DataFrame,NPU or Bench 对比数据 + :return: compare_key(name)处理后的 npu_df 或 bench_df + """ + if self.fuzzy_match: + df[CompareConst.CMP_KEY] = df[CompareConst.OP_NAME].apply(self.process_fuzzy_match) + else: + df[CompareConst.CMP_KEY] = df[CompareConst.OP_NAME] + return df + def compare(input_param, output_path, **kwargs): config = setup_comparison(input_param, output_path, **kwargs) -- Gitee From 3792dd695a537a5cbb30f65373f82345ce8df9de Mon Sep 17 00:00:00 2001 From: Linwei-Ying Date: Tue, 11 Mar 2025 14:48:49 +0800 Subject: [PATCH 07/27] compare reconstruct --- .../msprobe/mindspore/compare/ms_compare.py | 26 +++++++++++++------ 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py b/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py index a9f9f98135..d9f8095f07 100644 --- a/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py +++ b/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py @@ -21,15 +21,13 @@ import numpy as np import pandas as pd from msprobe.core.common.const import CompareConst, Const -from msprobe.core.common.exceptions import FileCheckException -from msprobe.core.common.file_utils import create_directory, load_json, load_npy, load_yaml +from msprobe.core.common.file_utils import load_json, load_npy, load_yaml from msprobe.core.common.log import logger -from msprobe.core.common.utils import CompareException, check_compare_param, check_configuration_param, \ - check_op_str_pattern_valid, get_dump_mode, set_dump_path, detect_framework_by_dump_json +from msprobe.core.common.utils import CompareException, check_op_str_pattern_valid, detect_framework_by_dump_json from msprobe.core.compare.acc_compare import Comparator, ModeConfig from msprobe.core.compare.check import dtype_mapping from msprobe.core.compare.layer_mapping import generate_data_mapping_by_layer_mapping -from msprobe.core.compare.utils import set_stack_json_path, reorder_op_x_list +from msprobe.core.compare.utils import reorder_op_x_list from msprobe.core.compare.acc_compare import setup_comparison @@ -269,6 +267,9 @@ class MSComparator(Comparator): raise CompareException(CompareException.INDEX_OUT_OF_BOUNDS_ERROR) from error return api_name + def process_data_mapping(self, npu_op_name): + return self.data_mapping_dict.get(npu_op_name, npu_op_name) + def assign_npu_df_compare_key(self, npu_df, bench_df): """ 处理 npu_df 的 COMPARE_KEY 赋值逻辑 @@ -277,12 +278,17 @@ class MSComparator(Comparator): :param bench_df: DataFrame,Bench 对比数据 :return: compare_key(name)处理后的 npu_df """ - if self.cell_mapping: - npu_df[CompareConst.CMP_KEY] = npu_df[CompareConst.OP_NAME].apply(self.process_cell_mapping) - elif self.api_mapping: + # 处理api_mapping映射 + if self.api_mapping: npu_df[CompareConst.CMP_KEY] = npu_df[CompareConst.OP_NAME].apply(self.process_internal_api_mapping) if isinstance(self.api_mapping, str): self.modify_compare_data_with_user_mapping(npu_df, bench_df) + # 处理cell_mapping映射 + elif self.cell_mapping: + npu_df[CompareConst.CMP_KEY] = npu_df[CompareConst.OP_NAME].apply(self.process_cell_mapping) + # 处理data_mapping映射 + elif self.data_mapping: + npu_df[CompareConst.CMP_KEY] = npu_df[CompareConst.OP_NAME].apply(self.process_data_mapping) else: npu_df[CompareConst.CMP_KEY] = npu_df[CompareConst.OP_NAME] return npu_df @@ -291,6 +297,10 @@ class MSComparator(Comparator): """ dtype匹配条件为npu、bench的dtype一致或属于规定的映射关系 """ + # 如果ms使用了fuzzy_match或data_mapping,不校验dtype,返回全True的DataFrame + if self.fuzzy_match or self.data_mapping: + return pd.Series(True, index=match_result.index) + npu_dtype = match_result['dtype_x'] bench_dtype = match_result['dtype_y'] if self.cross_frame: -- Gitee From e9a7acb51601eb5273763ffaeca1b72bf3437b38 Mon Sep 17 00:00:00 2001 From: Linwei-Ying Date: Tue, 11 Mar 2025 17:43:27 +0800 Subject: [PATCH 08/27] compare reconstruct --- .../msprobe/core/compare/acc_compare.py | 440 +++++++++--------- .../msprobe/mindspore/compare/ms_compare.py | 385 ++++----------- .../msprobe/pytorch/compare/pt_compare.py | 65 ++- 3 files changed, 353 insertions(+), 537 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/compare/acc_compare.py b/debug/accuracy_tools/msprobe/core/compare/acc_compare.py index 7357d0aad4..7f1d105ec2 100644 --- a/debug/accuracy_tools/msprobe/core/compare/acc_compare.py +++ b/debug/accuracy_tools/msprobe/core/compare/acc_compare.py @@ -13,28 +13,28 @@ # See the License for the specific language governing permissions and # limitations under the License. +from abc import ABC, abstractmethod import multiprocessing import os -import re -from copy import deepcopy from dataclasses import dataclass +import numpy as np import pandas as pd -from tqdm import tqdm from msprobe.core.advisor.advisor import Advisor from msprobe.core.common.const import CompareConst, Const from msprobe.core.common.exceptions import FileCheckException from msprobe.core.common.file_utils import load_json, remove_path, create_directory from msprobe.core.common.log import logger -from msprobe.core.common.utils import CompareException, add_time_with_xlsx, check_op_str_pattern_valid, safe_get_value, set_dump_path, get_dump_mode, check_compare_param, check_configuration_param +from msprobe.core.common.utils import CompareException, add_time_with_xlsx, check_op_str_pattern_valid, \ + safe_get_value, set_dump_path, get_dump_mode, check_compare_param, check_configuration_param from msprobe.core.compare.check import check_dump_json_str, check_graph_mode, check_stack_json_str, \ check_struct_match, fuzzy_check_op from msprobe.core.compare.highlight import find_compare_result_error_rows, highlight_rows_xlsx from msprobe.core.compare.multiprocessing_compute import ComparisonResult, _handle_multi_process, _save_cmp_result from msprobe.core.compare.npy_compare import compare_ops_apply, get_error_flag_and_msg -from msprobe.core.compare.utils import get_accuracy, get_rela_diff_summary_mode, get_un_match_accuracy, merge_tensor, \ - print_compare_ends_info, read_op, get_name_and_state, reorder_op_x_list, set_stack_json_path +from msprobe.core.compare.utils import merge_tensor, print_compare_ends_info, read_op, get_name_and_state, \ + reorder_op_x_list, set_stack_json_path @dataclass @@ -94,74 +94,13 @@ class ModeConfig: self.dump_mode = dump_mode -class Comparator: +class Comparator(ABC): def __init__(self, mode_config: ModeConfig): self.stack_mode = mode_config.stack_mode self.auto_analyze = mode_config.auto_analyze self.fuzzy_match = mode_config.fuzzy_match self.dump_mode = mode_config.dump_mode - @staticmethod - def get_result_md5_compare(ms_op_name, bench_op_name, npu_ops_all, bench_ops_all, *args): - npu_struct = npu_ops_all.get(ms_op_name).get('struct', []) - bench_struct = bench_ops_all.get(bench_op_name).get('struct', []) - - if len(npu_struct) < 3 or len(bench_struct) < 3: - logger.error(f"The length of npu_struct and bench_struct must be >= 3, " - f"but got npu_struct={len(npu_struct)} and bench_struct={len(bench_struct)}. Please check!") - raise CompareException(CompareException.INDEX_OUT_OF_BOUNDS_ERROR) - - result_item = [ms_op_name, bench_op_name, npu_struct[0], bench_struct[0], - npu_struct[1], bench_struct[1], npu_struct[2], bench_struct[2], - CompareConst.PASS if npu_struct[2] == bench_struct[2] else CompareConst.DIFF] - - if len(args) >= 2 and args[0]: - result_item.extend(args[1]) - else: - result_item.append(CompareConst.NONE) - return result_item - - @staticmethod - def calculate_summary_data(npu_summary_data, bench_summary_data, result_item): - err_msg = "" - result_item, accuracy_check, err_msg = get_rela_diff_summary_mode(result_item, npu_summary_data, - bench_summary_data, err_msg) - result_item.append(accuracy_check) - result_item.append(err_msg) - - @staticmethod - def _generate_na_data(ops_all): - if not ops_all: - return {} - key = next(iter(ops_all)) - value = deepcopy(ops_all[key]) - for k, v in value.items(): - if isinstance(v, tuple): - value[k] = tuple(CompareConst.N_A for _ in range(len(v))) - elif isinstance(v, list): - value[k] = [CompareConst.N_A] * len(v) - else: - value[k] = CompareConst.N_A - return value - - def make_result_table(self, result): - header = CompareConst.HEAD_OF_COMPARE_MODE[self.dump_mode][:] - - if self.stack_mode: - header.append(CompareConst.STACK) - if self.dump_mode == Const.ALL: - header.append(CompareConst.DATA_NAME) - else: - if self.dump_mode == Const.ALL: - for row in result: - del row[-2] # 输出结果不要堆栈信息时,删除中间结果result中的stack info,真实数据时为倒数第2列 - header.append(CompareConst.DATA_NAME) - else: - for row in result: - del row[-1] # 输出结果不要堆栈信息时,删除中间结果result中的stack info,非真实数据时为倒数第1列 - result_df = pd.DataFrame(result, columns=header, dtype='object') - return result_df - def gen_merge_list(self, json_data, op_name, stack_json_data): op_data = json_data['data'][op_name] check_dump_json_str(op_data, op_name) @@ -213,86 +152,6 @@ class Comparator: return n_index, len(bench_queue) - 1 return -1, -1 - def compare_process(self, file_lists): - npu_json_path, bench_json_path, stack_json_path = file_lists - npu_json_data = load_json(npu_json_path) - bench_json_data = load_json(bench_json_path) - stack_json_data = load_json(stack_json_path) if self.stack_mode else None - - if self.fuzzy_match: - logger.warning("This task uses fuzzy matching, which may affect the accuracy of the comparison.") - - npu_ops_queue = [] - bench_ops_queue = [] - result = [] - - ops_npu_iter = iter(npu_json_data['data']) - ops_bench_iter = iter(bench_json_data['data']) - read_err_npu = True - read_err_bench = True - last_npu_ops_len = 0 - last_bench_ops_len = 0 - - npu_api_nums = len(npu_json_data['data']) - progress_bar = tqdm(total=npu_api_nums, desc="API/Module Read Progress", unit="item", ncols=100) - - while True: - if not read_err_npu and not read_err_bench: - break - try: - last_npu_ops_len = len(npu_ops_queue) - op_name_npu = next(ops_npu_iter) - check_op_str_pattern_valid(op_name_npu) - npu_merge_list = self.gen_merge_list(npu_json_data, op_name_npu, stack_json_data) - if npu_merge_list: - npu_ops_queue.append(npu_merge_list) - except StopIteration: - read_err_npu = False - try: - last_bench_ops_len = len(bench_ops_queue) - op_name_bench = next(ops_bench_iter) - check_op_str_pattern_valid(op_name_bench) - bench_merge_list = self.gen_merge_list(bench_json_data, op_name_bench, stack_json_data) - if bench_merge_list: - bench_ops_queue.append(bench_merge_list) - except StopIteration: - read_err_bench = False - - progress_bar.update(1) - - # merge all boolean expressions - both_empty = not npu_ops_queue and not bench_ops_queue - no_change = (len(npu_ops_queue) == last_npu_ops_len) and (len(bench_ops_queue) == last_bench_ops_len) - if both_empty or no_change: - continue - - # APIs in NPU and Bench models unconsistent judgment - if bool(npu_ops_queue) ^ bool(bench_ops_queue): - logger.info("Please check whether the number and calls of APIs in NPU and Bench models are consistent.") - break - - n_match_point, b_match_point = self.match_op(npu_ops_queue, bench_ops_queue) - - # 如果没有匹配到,数据放到队列中,跳过,直到后面匹配到,把匹配之前的api放到不匹配中 - if n_match_point == -1 and b_match_point == -1: - continue - - n_match_data = npu_ops_queue[n_match_point] - b_match_data = bench_ops_queue[b_match_point] - un_match_data = npu_ops_queue[0: n_match_point] - for npu_data in un_match_data: - get_un_match_accuracy(result, npu_data, self.dump_mode) - get_accuracy(result, n_match_data, b_match_data, self.dump_mode) - del npu_ops_queue[0: n_match_point + 1] - del bench_ops_queue[0: b_match_point + 1] - progress_bar.close() - if npu_ops_queue: - for npu_data in npu_ops_queue: - get_un_match_accuracy(result, npu_data, self.dump_mode) - - result_df = self.make_result_table(result) - return result_df - def merge_data(self, json_data, stack_json_data): ops_all = {} for op_name in json_data.get('data', {}): @@ -328,79 +187,6 @@ class Comparator: struct_to_index_mapping[struct_key] += 1 return ops_all - def get_accuracy(self, npu_ops_all, bench_ops_all): - result = [] - bench_ops_all[CompareConst.N_A] = self._generate_na_data(bench_ops_all) - for ms_op_name, bench_op_name in self.data_mapping_dict.items(): - if ms_op_name in npu_ops_all and bench_op_name in bench_ops_all: - npu_stack_info = npu_ops_all.get(ms_op_name).get("stack_info", None) - bench_stack_info = bench_ops_all.get(bench_op_name).get("stack_info", None) - has_stack = npu_stack_info and bench_stack_info - if self.dump_mode == Const.MD5: - result.append(self.get_result_md5_compare(ms_op_name, bench_op_name, npu_ops_all, - bench_ops_all, has_stack, npu_stack_info)) - continue - - npu_struct = npu_ops_all.get(ms_op_name).get('struct', []) - bench_struct = bench_ops_all.get(bench_op_name).get('struct', []) - - if len(npu_struct) < 2 or len(bench_struct) < 2: - logger.error( - f"The length of npu_struct and bench_struct must be >= 2, " - f"but got npu_struct={len(npu_struct)} and bench_struct={len(bench_struct)}. " - f"Please check!" - ) - raise CompareException(CompareException.INDEX_OUT_OF_BOUNDS_ERROR) - - base_result_item = [ - ms_op_name, bench_op_name, - npu_struct[0], - bench_struct[0], - npu_struct[1], - bench_struct[1] - ] - - if self.dump_mode == Const.SUMMARY: - result_item = base_result_item + [" "] * 8 # 8个统计量数据情况的比对指标 - else: - result_item = base_result_item + [" "] * 6 # 6个真实数据情况的比对指标 - - npu_summary_data = npu_ops_all.get(ms_op_name).get("summary") - result_item.extend(npu_summary_data) - bench_summary_data = bench_ops_all.get(bench_op_name).get("summary") - result_item.extend(bench_summary_data) - if self.dump_mode == Const.SUMMARY: - self.calculate_summary_data(npu_summary_data, bench_summary_data, result_item) - else: - result_item.append(CompareConst.ACCURACY_CHECK_YES) - result_item.append("") - if has_stack: - result_item.extend(npu_stack_info) - else: - result_item.append(CompareConst.NONE) - if self.dump_mode == Const.ALL: - ms_data_name = npu_ops_all.get(ms_op_name).get("data_name", None) - pt_data_name = bench_ops_all.get(bench_op_name).get("data_name", None) - result_item.append([ms_data_name, pt_data_name]) - result.append(result_item) - elif ms_op_name not in npu_ops_all: - logger.warning(f'Can not find npu op name : `{ms_op_name}` in npu dump json file.') - elif bench_op_name not in npu_ops_all: - logger.warning(f'Can not find bench op name : `{bench_op_name}` in bench dump json file.') - return result - - def compare_process_custom(self, file_lists): - npu_json_path, bench_json_path, stack_json_path = file_lists - npu_json_data = load_json(npu_json_path) - bench_json_data = load_json(bench_json_path) - stack_json_data = load_json(stack_json_path) if self.stack_mode else None - npu_ops_all = self.merge_data(npu_json_data, stack_json_data) - bench_ops_all = self.merge_data(bench_json_data, stack_json_data) - - result = self.get_accuracy(npu_ops_all, bench_ops_all) - result_df = self.make_result_table(result) - return result_df - def compare_by_op(self, npu_op_name, bench_op_name, op_name_mapping_dict, input_param): """ :param npu_op_name: excel中的NPU_Name,例如:MintFunctional.conv2d.0.forward.input.3.0 @@ -464,6 +250,213 @@ class Comparator: remove_path(file_path) return file_path + def gen_data_df(self, data_json, stack_json_data): + result = { + CompareConst.OP_NAME: [], + Const.DTYPE: [], + Const.SHAPE: [], + Const.SUMMARY: [], + 'stack_info': [] + } + if self.dump_mode == Const.ALL: + result['data_name'] = [] + elif self.dump_mode == Const.MD5: + result[Const.MD5] = [] + for data_name in data_json['data']: + check_op_str_pattern_valid(data_name) + merge_list = self.gen_merge_list(data_json, data_name, stack_json_data) + if not merge_list: + continue + + op_name_list = merge_list.get(CompareConst.OP_NAME) + summary_list = merge_list.get(Const.SUMMARY) + data_name_list = merge_list.get('data_name') + op_name_reorder, summary_reorder, data_name_reorder = reorder_op_x_list(op_name_list, + summary_list, + data_name_list) + for op_name in op_name_reorder: + result[CompareConst.OP_NAME].append(op_name) + if (CompareConst.INPUT_PATTERN in op_name) or (CompareConst.KWARGS_PATTERN in op_name): + struct = merge_list[CompareConst.INPUT_STRUCT].pop(0) + elif CompareConst.OUTPUT_PATTERN in op_name: + struct = merge_list[CompareConst.OUTPUT_STRUCT].pop(0) + elif CompareConst.PARAMS_PATTERN in op_name: + struct = merge_list[CompareConst.PARAMS_STRUCT].pop(0) + else: + struct = merge_list[CompareConst.PARAMS_GRAD_STRUCT].pop(0) + result[Const.DTYPE].append(struct[0]) + result[Const.SHAPE].append(struct[1]) + if self.dump_mode == Const.MD5: + result[Const.MD5].append(struct[2]) + result[Const.SUMMARY].append(summary_reorder.pop(0)) + result['stack_info'].append(merge_list['stack_info'][0] if self.stack_mode else None) + if self.dump_mode == Const.ALL: + result['data_name'].append(data_name_reorder.pop(0)) + return pd.DataFrame(result) + + @abstractmethod + def process_compare_key_and_shape(self, npu_df, bench_df): + pass + + @abstractmethod + def gen_dtype_condition(self, match_result): + pass + + @staticmethod + def process_data_name(result): + result['data_name_x'] = result.apply(lambda row: [row['data_name_x'], row['data_name_y']], axis=1) + return result + + @staticmethod + def set_summary(summary): + if summary == CompareConst.N_A: + return [CompareConst.N_A] * 4 + summary_list = [] + for i in summary: + if i is None: + summary_list.append(CompareConst.N_A) + elif str(i).lower() == 'nan': + summary_list.append(CompareConst.NAN) + else: + summary_list.append(i) + return summary_list + + def make_result_df(self, result): + # get header + header = CompareConst.HEAD_OF_COMPARE_MODE[self.dump_mode][:] + if self.stack_mode: + header.append(CompareConst.STACK) + if self.dump_mode == Const.ALL: + header.append(CompareConst.DATA_NAME) + result = self.process_data_name(result) + + # rename match_result columns + result.rename(columns={'op_name_x': CompareConst.NPU_NAME, + 'op_name_y': CompareConst.BENCH_NAME, + 'dtype_x': CompareConst.NPU_DTYPE, + 'dtype_y': CompareConst.BENCH_DTYPE, + 'shape_x': CompareConst.NPU_SHAPE, + 'shape_y': CompareConst.BENCH_SHAPE, + 'md5_x': CompareConst.NPU_MD5, + 'md5_y': CompareConst.BENCH_MD5, + 'data_name_x': CompareConst.DATA_NAME, + 'stack_info_x': CompareConst.STACK}, inplace=True) + + # process summary data + npu_summary = [CompareConst.NPU_MAX, CompareConst.NPU_MIN, CompareConst.NPU_MEAN, CompareConst.NPU_NORM] + bench_summary = [CompareConst.BENCH_MAX, CompareConst.BENCH_MIN, CompareConst.BENCH_MEAN, + CompareConst.BENCH_NORM] + result[npu_summary] = result['summary_x'].apply(self.set_summary).tolist() + result[bench_summary] = result['summary_y'].apply(self.set_summary).tolist() + + result_df = pd.DataFrame(columns=header) + for h in header: + if h in result.columns: + result_df[h] = result[h] + return result_df, header + + @staticmethod + def type_check(val): + """ + 检查是否为数值或字符串形式的nan + """ + check_series = pd.Series(False, index=val.index) + val_str = val.astype(str) + check_series[pd.to_numeric(val_str, errors='coerce').notna() | val_str.str.lower().eq('nan')] = True + return check_series + + @staticmethod + def get_number(val): + return pd.to_numeric(val.astype(str), errors='coerce') + + def calc_summary_diff(self, result_df, cond_no_bench, stats_index: str): + npu_val = result_df['NPU ' + stats_index] + bench_val = result_df['Bench ' + stats_index] + diff_name = stats_index.capitalize() + ' diff' + rel_err_name = ('norm' if stats_index == 'l2norm' else stats_index).capitalize() + 'RelativeErr' + + # 只要npu、bench有一个不是数字或nan, 该行记为N/A + cond_na = ~self.type_check(npu_val) | ~self.type_check(bench_val) + + # 如果不是数字或nan,就赋值统计量差异为N/A + result_df.loc[cond_na, [diff_name, rel_err_name]] = CompareConst.N_A + result_df.loc[~(cond_no_bench | cond_na), diff_name] = self.get_number(npu_val) - self.get_number(bench_val) + + cond_diff_nan = result_df[diff_name].isna() + cond_nan_diff = ~cond_no_bench & ~cond_na & cond_diff_nan + cond_diff_not_nan = result_df[diff_name].notna() + cond_not_nan_diff = ~cond_no_bench & ~cond_na & cond_diff_not_nan + + result_df.loc[cond_nan_diff, [diff_name, rel_err_name]] = CompareConst.NAN + condition_pt_zero = bench_val == 0 + result_df.loc[cond_not_nan_diff & condition_pt_zero, rel_err_name] = CompareConst.NAN + cond_ref_err = cond_not_nan_diff & ~condition_pt_zero + # 计算相对误差转成百分比字符串 + result_df.loc[cond_ref_err, rel_err_name] = ( + result_df.loc[cond_ref_err, diff_name] / bench_val[cond_ref_err] * 100) + result_df.loc[cond_ref_err, rel_err_name] = (result_df.loc[cond_ref_err, rel_err_name].abs().astype(str) + '%') + + magnitude = self.get_number(result_df[diff_name]).abs() / (pd.Series( + np.maximum(self.get_number(npu_val), self.get_number(bench_val))).abs() + CompareConst.EPSILON) + return magnitude > CompareConst.MAGNITUDE + + def calc_accuracy(self, result_df, header): + # bench name N/A represents no bench data, err_msg adds "No bench data matched." + condition_no_bench = result_df[CompareConst.BENCH_NAME] == CompareConst.N_A + result_df[condition_no_bench] = result_df[condition_no_bench].fillna(CompareConst.N_A) + result_df.loc[condition_no_bench, CompareConst.ERROR_MESSAGE] = CompareConst.NO_BENCH + + if self.dump_mode == Const.MD5: + condition_md5_equal = result_df[CompareConst.NPU_MD5] == result_df[CompareConst.BENCH_MD5] + result_df.loc[condition_md5_equal, CompareConst.RESULT] = CompareConst.PASS + result_df.loc[~condition_md5_equal & ~condition_no_bench, CompareConst.RESULT] = CompareConst.DIFF + elif self.dump_mode == Const.SUMMARY: + warning_list = [self.calc_summary_diff(result_df, condition_no_bench, stats_index) for stats_index in + ['max', 'min', 'mean', 'l2norm']] + warning_flag = pd.DataFrame(warning_list).all() + result_df.loc[~condition_no_bench, [CompareConst.RESULT, CompareConst.ERROR_MESSAGE]] = '' + result_df.loc[warning_flag, CompareConst.RESULT] = CompareConst.WARNING + result_df.loc[warning_flag, CompareConst.ERROR_MESSAGE] = 'Need double check api accuracy.' + else: + fill_cols = [CompareConst.COSINE, CompareConst.EUC_DIST, + CompareConst.MAX_ABS_ERR, CompareConst.MAX_RELATIVE_ERR, + CompareConst.ONE_THOUSANDTH_ERR_RATIO, CompareConst.FIVE_THOUSANDTHS_ERR_RATIO, + CompareConst.ERROR_MESSAGE] + result_df.loc[~condition_no_bench, fill_cols] = '' # TODO 注意和pt对齐 + result_df.loc[~condition_no_bench, CompareConst.ACCURACY] = CompareConst.ACCURACY_CHECK_YES + + return result_df[header] + + def compare_process(self, file_lists): + # load json data + npu_json_path, bench_json_path, stack_json_path = file_lists + npu_json_data = load_json(npu_json_path) + bench_json_data = load_json(bench_json_path) + stack_json_data = load_json(stack_json_path) if self.stack_mode else None + + # parse json data and generate df + npu_df = self.gen_data_df(npu_json_data, stack_json_data) + bench_df = self.gen_data_df(bench_json_data, stack_json_data) + + npu_df[[Const.DTYPE, Const.SHAPE]] = npu_df[[Const.DTYPE, Const.SHAPE]].astype(str) + bench_df[[Const.DTYPE, Const.SHAPE]] = bench_df[[Const.DTYPE, Const.SHAPE]].astype(str) + + # create new columns for compare op_name and shape + # process npu_df's COMPARE_KEY whether same or different framework + npu_df, bench_df = self.process_compare_key_and_shape(npu_df, bench_df) + + # match npu and bench + match_result = pd.merge(npu_df, bench_df, on=[CompareConst.CMP_KEY, CompareConst.CMP_SHAPE], how='outer') + match_result = match_result[match_result['op_name_x'].notna()].fillna(CompareConst.N_A) + bench_columns = [i + '_y' for i in bench_df.columns] + match_result.loc[~self.gen_dtype_condition(match_result), bench_columns] = CompareConst.N_A + + # organize comparsion result table + result_df, header = self.make_result_df(match_result) + + # calculate statistics diff + return self.calc_accuracy(result_df, header) + def compare_core(self, input_param, output_path, **kwargs): """ Compares data from multiple JSON files and generates a comparison report. @@ -493,10 +486,7 @@ class Comparator: npu_json = input_param.get("npu_json_path") bench_json = input_param.get("bench_json_path") stack_json = input_param.get("stack_json_path") - if self.data_mapping: - result_df = self.compare_process_custom([npu_json, bench_json, stack_json]) - else: - result_df = self.compare_process([npu_json, bench_json, stack_json]) + result_df = self.compare_process([npu_json, bench_json, stack_json]) if not result_df.values.tolist(): logger.warning("Can`t match any op.") return diff --git a/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py b/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py index d9f8095f07..4654ba21b8 100644 --- a/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py +++ b/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py @@ -79,171 +79,27 @@ class MSComparator(Comparator): f"{type(self.data_mapping)}") @staticmethod - def process_data_name(result): - result['data_name_x'] = result.apply(lambda row: [row['data_name_x'], row['data_name_y']], axis=1) - return result - - @staticmethod - def type_check(val): - """ - 检查是否为数值或字符串形式的nan - """ - check_series = pd.Series(False, index=val.index) - val_str = val.astype(str) - check_series[pd.to_numeric(val_str, errors='coerce').notna() | val_str.str.lower().eq('nan')] = True - return check_series - - @staticmethod - def get_number(val): - return pd.to_numeric(val.astype(str), errors='coerce') - - def calc_summary_diff(self, result_df, cond_no_bench, stats_index: str): - npu_val = result_df['NPU ' + stats_index] - bench_val = result_df['Bench ' + stats_index] - diff_name = stats_index.capitalize() + ' diff' - rel_err_name = ('norm' if stats_index == 'l2norm' else stats_index).capitalize() + 'RelativeErr' - - # 只要npu、bench有一个不是数字或nan, 该行记为N/A - cond_na = ~self.type_check(npu_val) | ~self.type_check(bench_val) - - # 如果不是数字或nan,就赋值统计量差异为N/A - result_df.loc[cond_na, [diff_name, rel_err_name]] = CompareConst.N_A - result_df.loc[~(cond_no_bench | cond_na), diff_name] = self.get_number(npu_val) - self.get_number(bench_val) - - cond_diff_nan = result_df[diff_name].isna() - cond_nan_diff = ~cond_no_bench & ~cond_na & cond_diff_nan - cond_diff_not_nan = result_df[diff_name].notna() - cond_not_nan_diff = ~cond_no_bench & ~cond_na & cond_diff_not_nan - - result_df.loc[cond_nan_diff, [diff_name, rel_err_name]] = CompareConst.NAN - condition_pt_zero = bench_val == 0 - result_df.loc[cond_not_nan_diff & condition_pt_zero, rel_err_name] = CompareConst.NAN - cond_ref_err = cond_not_nan_diff & ~condition_pt_zero - # 计算相对误差转成百分比字符串 - result_df.loc[cond_ref_err, rel_err_name] = ( - result_df.loc[cond_ref_err, diff_name] / bench_val[cond_ref_err] * 100) - result_df.loc[cond_ref_err, rel_err_name] = (result_df.loc[cond_ref_err, rel_err_name].abs().astype(str) + '%') - - magnitude = self.get_number(result_df[diff_name]).abs() / (pd.Series( - np.maximum(self.get_number(npu_val), self.get_number(bench_val))).abs() + CompareConst.EPSILON) - return magnitude > CompareConst.MAGNITUDE - - def calc_accuracy(self, result_df, header): - # bench name N/A represents no bench data, err_msg adds "No bench data matched." - condition_no_bench = result_df[CompareConst.BENCH_NAME] == CompareConst.N_A - result_df[condition_no_bench] = result_df[condition_no_bench].fillna(CompareConst.N_A) - result_df.loc[condition_no_bench, CompareConst.ERROR_MESSAGE] = CompareConst.NO_BENCH - - if self.dump_mode == Const.MD5: - condition_md5_equal = result_df[CompareConst.NPU_MD5] == result_df[CompareConst.BENCH_MD5] - result_df.loc[condition_md5_equal, CompareConst.RESULT] = CompareConst.PASS - result_df.loc[~condition_md5_equal & ~condition_no_bench, CompareConst.RESULT] = CompareConst.DIFF - elif self.dump_mode == Const.SUMMARY: - warning_list = [self.calc_summary_diff(result_df, condition_no_bench, stats_index) for stats_index in - ['max', 'min', 'mean', 'l2norm']] - warning_flag = pd.DataFrame(warning_list).all() - result_df.loc[~condition_no_bench, [CompareConst.RESULT, CompareConst.ERROR_MESSAGE]] = '' - result_df.loc[warning_flag, CompareConst.RESULT] = CompareConst.WARNING - result_df.loc[warning_flag, CompareConst.ERROR_MESSAGE] = 'Need double check api accuracy.' - else: - fill_cols = [CompareConst.COSINE, CompareConst.EUC_DIST, - CompareConst.MAX_ABS_ERR, CompareConst.MAX_RELATIVE_ERR, - CompareConst.ONE_THOUSANDTH_ERR_RATIO, CompareConst.FIVE_THOUSANDTHS_ERR_RATIO, - CompareConst.ERROR_MESSAGE] - result_df.loc[~condition_no_bench, fill_cols] = '' # TODO 注意和pt对齐 - result_df.loc[~condition_no_bench, CompareConst.ACCURACY] = CompareConst.ACCURACY_CHECK_YES - - return result_df[header] - - @staticmethod - def set_summary(summary): - if summary == CompareConst.N_A: - return [CompareConst.N_A] * 4 - summary_list = [] - for i in summary: - if i is None: - summary_list.append(CompareConst.N_A) - elif str(i).lower() == 'nan': - summary_list.append(CompareConst.NAN) - else: - summary_list.append(i) - return summary_list - - def make_result_df(self, result): - # get header - header = CompareConst.HEAD_OF_COMPARE_MODE[self.dump_mode][:] - if self.stack_mode: - header.append(CompareConst.STACK) - if self.dump_mode == Const.ALL: - header.append(CompareConst.DATA_NAME) - result = self.process_data_name(result) - - # rename match_result columns - result.rename(columns={'op_name_x': CompareConst.NPU_NAME, - 'op_name_y': CompareConst.BENCH_NAME, - 'dtype_x': CompareConst.NPU_DTYPE, - 'dtype_y': CompareConst.BENCH_DTYPE, - 'shape_x': CompareConst.NPU_SHAPE, - 'shape_y': CompareConst.BENCH_SHAPE, - 'md5_x': CompareConst.NPU_MD5, - 'md5_y': CompareConst.BENCH_MD5, - 'data_name_x': CompareConst.DATA_NAME, - 'stack_info_x': CompareConst.STACK}, inplace=True) - - # process summary data - npu_summary = [CompareConst.NPU_MAX, CompareConst.NPU_MIN, CompareConst.NPU_MEAN, CompareConst.NPU_NORM] - bench_summary = [CompareConst.BENCH_MAX, CompareConst.BENCH_MIN, CompareConst.BENCH_MEAN, - CompareConst.BENCH_NORM] - result[npu_summary] = result['summary_x'].apply(self.set_summary).tolist() - result[bench_summary] = result['summary_y'].apply(self.set_summary).tolist() - - result_df = pd.DataFrame(columns=header) - for h in header: - if h in result.columns: - result_df[h] = result[h] - return result_df, header - - def load_internal_api(self): - cur_path = os.path.dirname(os.path.realpath(__file__)) - yaml_path = os.path.abspath(os.path.join(cur_path, CompareConst.INTERNAL_API_MAPPING_FILE)) - return load_yaml(yaml_path) - - def load_mapping_file(self, mapping_file): + def load_mapping_file(mapping_file): if isinstance(mapping_file, str): mapping_dict = load_yaml(mapping_file) else: mapping_dict = {} return mapping_dict - def process_cell_mapping(self, npu_op_name): - if not npu_op_name: - return CompareConst.N_A - param_grad_flag = Const.PARAMS_GRAD in npu_op_name.split(Const.SEP) - if not param_grad_flag and not re.search(Const.REGEX_FORWARD_BACKWARD, npu_op_name): - return CompareConst.N_A - npu_op_name = npu_op_name.replace("Cell", "Module", 1) - if self.cell_mapping_dict: - # get cell name & class name from op_name - # Cell.fc1.Dense.forward.0.input.0 - cell_name = re.split(r'\.(?:forward|backward|parameters_grad)\.', npu_op_name.split(Const.SEP, 1)[-1])[0] - if cell_name in self.cell_mapping_dict: - npu_op_name = npu_op_name.replace(cell_name, self.cell_mapping_dict[cell_name], 1) - return npu_op_name + @staticmethod + def load_internal_api(): + cur_path = os.path.dirname(os.path.realpath(__file__)) + yaml_path = os.path.abspath(os.path.join(cur_path, CompareConst.INTERNAL_API_MAPPING_FILE)) + return load_yaml(yaml_path) - def read_npy_data(self, dir_path, file_name, load_pt_file=False): - if not file_name: - return None - data_path = os.path.join(dir_path, file_name) - if load_pt_file: - import torch - from msprobe.pytorch.common.utils import load_pt - data_value = load_pt(data_path, True).detach() - if data_value.dtype == torch.bfloat16: - data_value = data_value.to(torch.float32) - data_value = data_value.numpy() - else: - data_value = load_npy(data_path) - return data_value + @staticmethod + def get_api_name(api_list): + try: + api_name = api_list[0] + Const.SEP + api_list[1] + except IndexError as error: + logger.error(f'Failed to retrieve API name, please check if the dump data is reasonable') + raise CompareException(CompareException.INDEX_OUT_OF_BOUNDS_ERROR) from error + return api_name def process_internal_api_mapping(self, npu_op_name): # get api name & class name from op_name @@ -259,95 +115,6 @@ class MSComparator(Comparator): else: return npu_op_name - def get_api_name(self, api_list): - try: - api_name = api_list[0] + Const.SEP + api_list[1] - except IndexError as error: - logger.error(f'Failed to retrieve API name, please check if the dump data is reasonable') - raise CompareException(CompareException.INDEX_OUT_OF_BOUNDS_ERROR) from error - return api_name - - def process_data_mapping(self, npu_op_name): - return self.data_mapping_dict.get(npu_op_name, npu_op_name) - - def assign_npu_df_compare_key(self, npu_df, bench_df): - """ - 处理 npu_df 的 COMPARE_KEY 赋值逻辑 - - :param npu_df: DataFrame,NPU 对比数据 - :param bench_df: DataFrame,Bench 对比数据 - :return: compare_key(name)处理后的 npu_df - """ - # 处理api_mapping映射 - if self.api_mapping: - npu_df[CompareConst.CMP_KEY] = npu_df[CompareConst.OP_NAME].apply(self.process_internal_api_mapping) - if isinstance(self.api_mapping, str): - self.modify_compare_data_with_user_mapping(npu_df, bench_df) - # 处理cell_mapping映射 - elif self.cell_mapping: - npu_df[CompareConst.CMP_KEY] = npu_df[CompareConst.OP_NAME].apply(self.process_cell_mapping) - # 处理data_mapping映射 - elif self.data_mapping: - npu_df[CompareConst.CMP_KEY] = npu_df[CompareConst.OP_NAME].apply(self.process_data_mapping) - else: - npu_df[CompareConst.CMP_KEY] = npu_df[CompareConst.OP_NAME] - return npu_df - - def gen_dtype_condition(self, match_result): - """ - dtype匹配条件为npu、bench的dtype一致或属于规定的映射关系 - """ - # 如果ms使用了fuzzy_match或data_mapping,不校验dtype,返回全True的DataFrame - if self.fuzzy_match or self.data_mapping: - return pd.Series(True, index=match_result.index) - - npu_dtype = match_result['dtype_x'] - bench_dtype = match_result['dtype_y'] - if self.cross_frame: - npu_dtype = npu_dtype.map(dtype_mapping).fillna(npu_dtype) - - equal_condition = npu_dtype == bench_dtype - match_condition = ( - (npu_dtype.isin(CompareConst.DTYPE_MATCH_GROUPS[0]) & bench_dtype.isin( - CompareConst.DTYPE_MATCH_GROUPS[0])) | - (npu_dtype.isin(CompareConst.DTYPE_MATCH_GROUPS[1]) & bench_dtype.isin( - CompareConst.DTYPE_MATCH_GROUPS[1])) - ) - return equal_condition | match_condition - - def compare_process(self, file_lists): - # load json data - npu_json_path, bench_json_path, stack_json_path = file_lists - npu_json_data = load_json(npu_json_path) - bench_json_data = load_json(bench_json_path) - stack_json_data = load_json(stack_json_path) if self.stack_mode else None - - # parse json data and generate df - npu_df = self.gen_data_df(npu_json_data, stack_json_data) - bench_df = self.gen_data_df(bench_json_data, stack_json_data) - - npu_df[[Const.DTYPE, Const.SHAPE]] = npu_df[[Const.DTYPE, Const.SHAPE]].astype(str) - bench_df[[Const.DTYPE, Const.SHAPE]] = bench_df[[Const.DTYPE, Const.SHAPE]].astype(str) - - # create new columns for compare op_name and shape - # process npu_df's COMPARE_KEY whether same or different framework - npu_df = self.assign_npu_df_compare_key(npu_df, bench_df) - npu_df[CompareConst.CMP_SHAPE] = npu_df[Const.SHAPE] - bench_df[CompareConst.CMP_KEY] = bench_df[CompareConst.OP_NAME] - bench_df[CompareConst.CMP_SHAPE] = bench_df[Const.SHAPE] - - # match npu and bench - match_result = pd.merge(npu_df, bench_df, on=[CompareConst.CMP_KEY, CompareConst.CMP_SHAPE], how='outer') - match_result = match_result[match_result['op_name_x'].notna()].fillna(CompareConst.N_A) - bench_columns = [i + '_y' for i in bench_df.columns] - match_result.loc[~self.gen_dtype_condition(match_result), bench_columns] = CompareConst.N_A - - # organize comparsion result table - result_df, header = self.make_result_df(match_result) - - # calculate statistics diff - return self.calc_accuracy(result_df, header) - def modify_compare_data_with_user_mapping(self, npu_df, bench_df): def get_api_indices_dict(op_name_df): api_indices_dict = defaultdict(list) @@ -402,50 +169,90 @@ class MSComparator(Comparator): if is_abandoned: npu_df.loc[index, CompareConst.COMPARE_KEY] = op_name + 'abandoned' - def gen_data_df(self, data_json, stack_json_data): - result = { - CompareConst.OP_NAME: [], - Const.DTYPE: [], - Const.SHAPE: [], - Const.SUMMARY: [], - 'stack_info': [] - } - if self.dump_mode == Const.ALL: - result['data_name'] = [] - elif self.dump_mode == Const.MD5: - result[Const.MD5] = [] - for data_name in data_json['data']: - check_op_str_pattern_valid(data_name) - merge_list = self.gen_merge_list(data_json, data_name, stack_json_data) - if not merge_list: - continue + def process_cell_mapping(self, npu_op_name): + if not npu_op_name: + return CompareConst.N_A + param_grad_flag = Const.PARAMS_GRAD in npu_op_name.split(Const.SEP) + if not param_grad_flag and not re.search(Const.REGEX_FORWARD_BACKWARD, npu_op_name): + return CompareConst.N_A + npu_op_name = npu_op_name.replace("Cell", "Module", 1) + if self.cell_mapping_dict: + # get cell name & class name from op_name + # Cell.fc1.Dense.forward.0.input.0 + cell_name = re.split(r'\.(?:forward|backward|parameters_grad)\.', npu_op_name.split(Const.SEP, 1)[-1])[0] + if cell_name in self.cell_mapping_dict: + npu_op_name = npu_op_name.replace(cell_name, self.cell_mapping_dict[cell_name], 1) + return npu_op_name - op_name_list = merge_list.get(CompareConst.OP_NAME) - summary_list = merge_list.get(Const.SUMMARY) - data_name_list = merge_list.get('data_name') - op_name_reorder, summary_reorder, data_name_reorder = reorder_op_x_list(op_name_list, - summary_list, - data_name_list) - for op_name in op_name_reorder: - result[CompareConst.OP_NAME].append(op_name) - if (CompareConst.INPUT_PATTERN in op_name) or (CompareConst.KWARGS_PATTERN in op_name): - struct = merge_list[CompareConst.INPUT_STRUCT].pop(0) - elif CompareConst.OUTPUT_PATTERN in op_name: - struct = merge_list[CompareConst.OUTPUT_STRUCT].pop(0) - elif CompareConst.PARAMS_PATTERN in op_name: - struct = merge_list[CompareConst.PARAMS_STRUCT].pop(0) - else: - struct = merge_list[CompareConst.PARAMS_GRAD_STRUCT].pop(0) - result[Const.DTYPE].append(struct[0]) - result[Const.SHAPE].append(struct[1]) - if self.dump_mode == Const.MD5: - result[Const.MD5].append(struct[2]) - result[Const.SUMMARY].append(summary_reorder.pop(0)) - result['stack_info'].append(merge_list['stack_info'][0] if self.stack_mode else None) - if self.dump_mode == Const.ALL: - result['data_name'].append(data_name_reorder.pop(0)) - return pd.DataFrame(result) + def process_data_mapping(self, npu_op_name): + return self.data_mapping_dict.get(npu_op_name, npu_op_name) + def assign_npu_df_compare_key(self, npu_df, bench_df): + """ + 处理 npu_df 的 COMPARE_KEY 赋值逻辑 + + :param npu_df: DataFrame,NPU 对比数据 + :param bench_df: DataFrame,Bench 对比数据 + :return: compare_key(name)处理后的 npu_df + """ + # 处理api_mapping映射 + if self.api_mapping: + npu_df[CompareConst.CMP_KEY] = npu_df[CompareConst.OP_NAME].apply(self.process_internal_api_mapping) + if isinstance(self.api_mapping, str): + self.modify_compare_data_with_user_mapping(npu_df, bench_df) + # 处理cell_mapping映射 + elif self.cell_mapping: + npu_df[CompareConst.CMP_KEY] = npu_df[CompareConst.OP_NAME].apply(self.process_cell_mapping) + # 处理data_mapping映射 + elif self.data_mapping: + npu_df[CompareConst.CMP_KEY] = npu_df[CompareConst.OP_NAME].apply(self.process_data_mapping) + else: + npu_df[CompareConst.CMP_KEY] = npu_df[CompareConst.OP_NAME] + return npu_df + + def process_compare_key_and_shape(self, npu_df, bench_df): + npu_df = self.assign_npu_df_compare_key(npu_df, bench_df) + npu_df[CompareConst.CMP_SHAPE] = npu_df[Const.SHAPE] + bench_df[CompareConst.CMP_KEY] = bench_df[CompareConst.OP_NAME] + bench_df[CompareConst.CMP_SHAPE] = bench_df[Const.SHAPE] + return npu_df, bench_df + + def gen_dtype_condition(self, match_result): + """ + dtype匹配条件为npu、bench的dtype一致或属于规定的映射关系 + """ + # 如果ms使用了fuzzy_match或data_mapping,不校验dtype,返回全True的DataFrame + if self.fuzzy_match or self.data_mapping: + return pd.Series(True, index=match_result.index) + + npu_dtype = match_result['dtype_x'] + bench_dtype = match_result['dtype_y'] + if self.cross_frame: + npu_dtype = npu_dtype.map(dtype_mapping).fillna(npu_dtype) + + equal_condition = npu_dtype == bench_dtype + match_condition = ( + (npu_dtype.isin(CompareConst.DTYPE_MATCH_GROUPS[0]) & bench_dtype.isin( + CompareConst.DTYPE_MATCH_GROUPS[0])) | + (npu_dtype.isin(CompareConst.DTYPE_MATCH_GROUPS[1]) & bench_dtype.isin( + CompareConst.DTYPE_MATCH_GROUPS[1])) + ) + return equal_condition | match_condition + + def read_npy_data(self, dir_path, file_name, load_pt_file=False): + if not file_name: + return None + data_path = os.path.join(dir_path, file_name) + if load_pt_file: + import torch + from msprobe.pytorch.common.utils import load_pt + data_value = load_pt(data_path, True).detach() + if data_value.dtype == torch.bfloat16: + data_value = data_value.to(torch.float32) + data_value = data_value.numpy() + else: + data_value = load_npy(data_path) + return data_value def check_cross_framework(bench_json_path): framework = detect_framework_by_dump_json(bench_json_path) diff --git a/debug/accuracy_tools/msprobe/pytorch/compare/pt_compare.py b/debug/accuracy_tools/msprobe/pytorch/compare/pt_compare.py index e123195202..a331447500 100644 --- a/debug/accuracy_tools/msprobe/pytorch/compare/pt_compare.py +++ b/debug/accuracy_tools/msprobe/pytorch/compare/pt_compare.py @@ -54,29 +54,6 @@ class PTComparator(Comparator): mapping_dict = {} return mapping_dict - def read_npy_data(self, dir_path, file_name): - if not file_name: - return None - data_path = os.path.join(dir_path, file_name) - path_checker = FileChecker(data_path, FileCheckConst.FILE, FileCheckConst.READ_ABLE, - FileCheckConst.PT_SUFFIX, False) - data_path = path_checker.common_check() - try: - # detach because numpy can not process gradient information - data_value = load_pt(data_path, to_cpu=True).detach() - except RuntimeError as e: - # 这里捕获 load_pt 中抛出的异常 - logger.error(f"Failed to load the .pt file at {data_path}.") - raise CompareException(CompareException.INVALID_FILE_ERROR) from e - except AttributeError as e: - # 这里捕获 detach 方法抛出的异常 - logger.error(f"Failed to detach the loaded tensor.") - raise CompareException(CompareException.DETACH_ERROR) from e - if data_value.dtype == torch.bfloat16: - data_value = data_value.to(torch.float32) - data_value = data_value.numpy() - return data_value - @staticmethod def process_fuzzy_match(op_name): if not op_name: @@ -102,6 +79,48 @@ class PTComparator(Comparator): df[CompareConst.CMP_KEY] = df[CompareConst.OP_NAME] return df + def process_compare_key_and_shape(self, npu_df, bench_df): + npu_df = self.assign_df_compare_key(npu_df) + npu_df[CompareConst.CMP_SHAPE] = npu_df[Const.SHAPE] + bench_df = self.assign_df_compare_key(bench_df) + bench_df[CompareConst.CMP_SHAPE] = bench_df[Const.SHAPE] + return npu_df, bench_df + + def gen_dtype_condition(self, match_result): + """ + dtype匹配条件为npu、bench的dtype一致或属于规定的映射关系 + """ + npu_dtype = match_result['dtype_x'] + bench_dtype = match_result['dtype_y'] + + equal_condition = npu_dtype == bench_dtype + match_condition = ( + (npu_dtype.isin(CompareConst.DTYPE_MATCH_GROUPS[1]) & bench_dtype.isin(CompareConst.DTYPE_MATCH_GROUPS[1]))) + return equal_condition | match_condition + + def read_npy_data(self, dir_path, file_name): + if not file_name: + return None + data_path = os.path.join(dir_path, file_name) + path_checker = FileChecker(data_path, FileCheckConst.FILE, FileCheckConst.READ_ABLE, + FileCheckConst.PT_SUFFIX, False) + data_path = path_checker.common_check() + try: + # detach because numpy can not process gradient information + data_value = load_pt(data_path, to_cpu=True).detach() + except RuntimeError as e: + # 这里捕获 load_pt 中抛出的异常 + logger.error(f"Failed to load the .pt file at {data_path}.") + raise CompareException(CompareException.INVALID_FILE_ERROR) from e + except AttributeError as e: + # 这里捕获 detach 方法抛出的异常 + logger.error(f"Failed to detach the loaded tensor.") + raise CompareException(CompareException.DETACH_ERROR) from e + if data_value.dtype == torch.bfloat16: + data_value = data_value.to(torch.float32) + data_value = data_value.numpy() + return data_value + def compare(input_param, output_path, **kwargs): config = setup_comparison(input_param, output_path, **kwargs) -- Gitee From c4535afbe2cd03bebde6a258968be7ffb5d61685 Mon Sep 17 00:00:00 2001 From: Linwei-Ying Date: Tue, 11 Mar 2025 19:42:10 +0800 Subject: [PATCH 09/27] compare reconstruct --- .../msprobe/core/compare/acc_compare.py | 191 ++++++------------ .../msprobe/mindspore/compare/ms_compare.py | 31 ++- .../msprobe/mindspore/compare/utils.py | 30 +++ .../msprobe/pytorch/compare/pt_compare.py | 9 +- .../msprobe/pytorch/compare/utils.py | 47 +++++ 5 files changed, 159 insertions(+), 149 deletions(-) create mode 100644 debug/accuracy_tools/msprobe/mindspore/compare/utils.py create mode 100644 debug/accuracy_tools/msprobe/pytorch/compare/utils.py diff --git a/debug/accuracy_tools/msprobe/core/compare/acc_compare.py b/debug/accuracy_tools/msprobe/core/compare/acc_compare.py index 7f1d105ec2..407dd46ce2 100644 --- a/debug/accuracy_tools/msprobe/core/compare/acc_compare.py +++ b/debug/accuracy_tools/msprobe/core/compare/acc_compare.py @@ -27,13 +27,12 @@ from msprobe.core.common.exceptions import FileCheckException from msprobe.core.common.file_utils import load_json, remove_path, create_directory from msprobe.core.common.log import logger from msprobe.core.common.utils import CompareException, add_time_with_xlsx, check_op_str_pattern_valid, \ - safe_get_value, set_dump_path, get_dump_mode, check_compare_param, check_configuration_param -from msprobe.core.compare.check import check_dump_json_str, check_graph_mode, check_stack_json_str, \ - check_struct_match, fuzzy_check_op + set_dump_path, get_dump_mode, check_compare_param, check_configuration_param +from msprobe.core.compare.check import check_dump_json_str, check_stack_json_str from msprobe.core.compare.highlight import find_compare_result_error_rows, highlight_rows_xlsx from msprobe.core.compare.multiprocessing_compute import ComparisonResult, _handle_multi_process, _save_cmp_result from msprobe.core.compare.npy_compare import compare_ops_apply, get_error_flag_and_msg -from msprobe.core.compare.utils import merge_tensor, print_compare_ends_info, read_op, get_name_and_state, \ +from msprobe.core.compare.utils import merge_tensor, print_compare_ends_info, read_op, \ reorder_op_x_list, set_stack_json_path @@ -119,130 +118,6 @@ class Comparator(ABC): merge_list = merge_tensor(op_parsed_list, self.dump_mode) return merge_list - def check_op(self, npu_dict, bench_dict): - npu_op_name = npu_dict[CompareConst.OP_NAME] - bench_op_name = bench_dict[CompareConst.OP_NAME] - graph_mode = check_graph_mode(safe_get_value(npu_op_name, 0, "npu_op_name"), - safe_get_value(bench_op_name, 0, "bench_op_name")) - - frame_name = getattr(self, "frame_name") - if frame_name == "PTComparator": - from msprobe.pytorch.compare.match import graph_mapping - if graph_mode: - return graph_mapping.match(npu_op_name[0], bench_op_name[0]) - struct_match = check_struct_match(npu_dict, bench_dict) - if not self.fuzzy_match: - name_match = npu_op_name == bench_op_name - return name_match and struct_match - try: - name_match = fuzzy_check_op(npu_op_name, bench_op_name) - except Exception as err: - logger.warning("%s and %s can not fuzzy match." % (npu_op_name, bench_op_name)) - name_match = False - return name_match and struct_match - - def match_op(self, npu_queue, bench_queue): - for b_index, b_op in enumerate(bench_queue[0: -1]): - if self.check_op(npu_queue[-1], b_op): - return len(npu_queue) - 1, b_index - if self.check_op(npu_queue[-1], bench_queue[-1]): - return len(npu_queue) - 1, len(bench_queue) - 1 - for n_index, n_op in enumerate(npu_queue[0: -1]): - if self.check_op(n_op, bench_queue[-1]): - return n_index, len(bench_queue) - 1 - return -1, -1 - - def merge_data(self, json_data, stack_json_data): - ops_all = {} - for op_name in json_data.get('data', {}): - merge_list = self.gen_merge_list(json_data, op_name, stack_json_data) - if merge_list: - struct_to_index_mapping = { - CompareConst.INPUT_STRUCT: 0, - CompareConst.OUTPUT_STRUCT: 0, - CompareConst.PARAMS_STRUCT: 0, - CompareConst.PARAMS_GRAD_STRUCT: 0 - } - - op_name_list = merge_list.get(CompareConst.OP_NAME) - summary_list = merge_list.get(Const.SUMMARY) - data_name_list = merge_list.get('data_name') - op_name_reorder, summary_reorder, data_name_reorder = reorder_op_x_list(op_name_list, - summary_list, - data_name_list) - for index, op_full_name in enumerate(op_name_reorder): - data_name = data_name_reorder[index] if data_name_reorder else None - - _, state = get_name_and_state(op_full_name) - struct_key = CompareConst.STATE_TO_STRUCT_MAPPING.get(state) - if not struct_key: - continue - ops_all[op_full_name] = { - CompareConst.STRUCT: safe_get_value(merge_list, struct_to_index_mapping.get(struct_key), - "merge_list", key=struct_key), - CompareConst.SUMMARY: safe_get_value(summary_reorder, index, "summary_reorder"), - 'data_name': data_name, - 'stack_info': merge_list.get('stack_info') - } - struct_to_index_mapping[struct_key] += 1 - return ops_all - - def compare_by_op(self, npu_op_name, bench_op_name, op_name_mapping_dict, input_param): - """ - :param npu_op_name: excel中的NPU_Name,例如:MintFunctional.conv2d.0.forward.input.3.0 - :param bench_op_name: excel中的Bench_Name,例如:Functional.conv2d.0.forward.input.3.0 - :param op_name_mapping_dict: op_name和npy或pt文件的映射关系 - :param input_param: npu_json_path/bench_json_path/stack_json_path等参数 - :return: result_list,包含余弦相似度、最大绝对误差、最大相对误差、千分之一误差率、千分之五误差率和错误信息 - 用于读取excel中的NPU_Name和Bench_Name,根据映射关系找到npy或pt文件,然后读取文件中的数据进行比较,计算余弦相似度、欧式距离 - 最大绝对误差、最大相对误差、千分之一误差率、千分之五误差率并生成错误信息 - """ - error_file, relative_err, error_flag = None, None, False - - data_name_pair = op_name_mapping_dict.get(npu_op_name) - npu_data_name = data_name_pair[0] - bench_data_name = data_name_pair[1] - - if str(npu_data_name) == '-1': # 没有npu真实数据 - n_value, b_value, error_flag = CompareConst.READ_NONE, CompareConst.READ_NONE, True - elif str(bench_data_name) == '-1': # 没有bench真实数据 - n_value, b_value, error_flag = CompareConst.READ_NONE, CompareConst.READ_NONE, True - error_file = 'no_bench_data' - else: - npu_dir = input_param.get("npu_dump_data_dir") - bench_dir = input_param.get("bench_dump_data_dir") - try: - frame_name = getattr(self, "frame_name") - read_npy_data = getattr(self, "read_npy_data") - if frame_name == "MSComparator": - n_value = read_npy_data(npu_dir, npu_data_name) - if self.cross_frame: - b_value = read_npy_data(bench_dir, bench_data_name, load_pt_file=True) - else: - b_value = read_npy_data(bench_dir, bench_data_name) - else: - n_value = read_npy_data(npu_dir, npu_data_name) - b_value = read_npy_data(bench_dir, bench_data_name) - except IOError as error: - error_file = error.filename - n_value, b_value = CompareConst.READ_NONE, CompareConst.READ_NONE - error_flag = True - except (FileCheckException, CompareException): - error_file = npu_data_name - n_value, b_value = CompareConst.READ_NONE, CompareConst.READ_NONE - error_flag = True - - # 通过n_value, b_value同时得到错误标志和错误信息 - n_value, b_value, error_flag, err_msg = get_error_flag_and_msg(n_value, b_value, - error_flag=error_flag, error_file=error_file) - - result_list, err_msg = compare_ops_apply(n_value, b_value, error_flag, err_msg) - - if self.fuzzy_match and npu_op_name != bench_op_name and bench_op_name != CompareConst.N_A: - err_msg += " Fuzzy matching data, the comparison accuracy may be affected." - result_list.append(err_msg) - return result_list - @staticmethod def process_output_file(output_path, suffix): file_name = add_time_with_xlsx("compare_result" + suffix) @@ -507,6 +382,62 @@ class Comparator(ABC): print_compare_ends_info() + def compare_by_op(self, npu_op_name, bench_op_name, op_name_mapping_dict, input_param): + """ + :param npu_op_name: excel中的NPU_Name,例如:MintFunctional.conv2d.0.forward.input.3.0 + :param bench_op_name: excel中的Bench_Name,例如:Functional.conv2d.0.forward.input.3.0 + :param op_name_mapping_dict: op_name和npy或pt文件的映射关系 + :param input_param: npu_json_path/bench_json_path/stack_json_path等参数 + :return: result_list,包含余弦相似度、最大绝对误差、最大相对误差、千分之一误差率、千分之五误差率和错误信息 + 用于读取excel中的NPU_Name和Bench_Name,根据映射关系找到npy或pt文件,然后读取文件中的数据进行比较,计算余弦相似度、欧式距离 + 最大绝对误差、最大相对误差、千分之一误差率、千分之五误差率并生成错误信息 + """ + error_file, relative_err, error_flag = None, None, False + + data_name_pair = op_name_mapping_dict.get(npu_op_name) + npu_data_name = data_name_pair[0] + bench_data_name = data_name_pair[1] + + if str(npu_data_name) == '-1': # 没有npu真实数据 + n_value, b_value, error_flag = CompareConst.READ_NONE, CompareConst.READ_NONE, True + elif str(bench_data_name) == '-1': # 没有bench真实数据 + n_value, b_value, error_flag = CompareConst.READ_NONE, CompareConst.READ_NONE, True + error_file = 'no_bench_data' + else: + npu_dir = input_param.get("npu_dump_data_dir") + bench_dir = input_param.get("bench_dump_data_dir") + try: + frame_name = getattr(self, "frame_name") + read_npy_data = getattr(self, "read_npy_data") + if frame_name == "MSComparator": + n_value = read_npy_data(npu_dir, npu_data_name) + if self.cross_frame: + b_value = read_npy_data(bench_dir, bench_data_name, load_pt_file=True) + else: + b_value = read_npy_data(bench_dir, bench_data_name) + else: + n_value = read_npy_data(npu_dir, npu_data_name) + b_value = read_npy_data(bench_dir, bench_data_name) + except IOError as error: + error_file = error.filename + n_value, b_value = CompareConst.READ_NONE, CompareConst.READ_NONE + error_flag = True + except (FileCheckException, CompareException): + error_file = npu_data_name + n_value, b_value = CompareConst.READ_NONE, CompareConst.READ_NONE + error_flag = True + + # 通过n_value, b_value同时得到错误标志和错误信息 + n_value, b_value, error_flag, err_msg = get_error_flag_and_msg(n_value, b_value, + error_flag=error_flag, error_file=error_file) + + result_list, err_msg = compare_ops_apply(n_value, b_value, error_flag, err_msg) + + if self.fuzzy_match and npu_op_name != bench_op_name and bench_op_name != CompareConst.N_A: + err_msg += " Fuzzy matching data, the comparison accuracy may be affected." + result_list.append(err_msg) + return result_list + def compare_ops(self, idx, dump_path_dict, result_df, lock, input_param): cos_result = [] euc_dist_result = [] @@ -561,3 +492,7 @@ class Comparator(ABC): except ValueError as e: logger.error('result dataframe is not found.') raise CompareException(CompareException.INVALID_DATA_ERROR) from e + + @abstractmethod + def read_real_data(self, npu_dir, npu_data_name, bench_dir, bench_data_name) -> tuple: + pass diff --git a/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py b/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py index 4654ba21b8..30874ae0bd 100644 --- a/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py +++ b/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py @@ -17,18 +17,17 @@ import os import re from collections import defaultdict -import numpy as np import pandas as pd from msprobe.core.common.const import CompareConst, Const -from msprobe.core.common.file_utils import load_json, load_npy, load_yaml +from msprobe.core.common.file_utils import load_yaml from msprobe.core.common.log import logger -from msprobe.core.common.utils import CompareException, check_op_str_pattern_valid, detect_framework_by_dump_json -from msprobe.core.compare.acc_compare import Comparator, ModeConfig +from msprobe.core.common.utils import CompareException, detect_framework_by_dump_json +from msprobe.core.compare.acc_compare import Comparator, ModeConfig, setup_comparison from msprobe.core.compare.check import dtype_mapping from msprobe.core.compare.layer_mapping import generate_data_mapping_by_layer_mapping -from msprobe.core.compare.utils import reorder_op_x_list -from msprobe.core.compare.acc_compare import setup_comparison +from msprobe.mindspore.compare.utils import read_npy_data +from msprobe.pytorch.compare.utils import read_pt_data class MappingConfig: @@ -239,20 +238,14 @@ class MSComparator(Comparator): ) return equal_condition | match_condition - def read_npy_data(self, dir_path, file_name, load_pt_file=False): - if not file_name: - return None - data_path = os.path.join(dir_path, file_name) - if load_pt_file: - import torch - from msprobe.pytorch.common.utils import load_pt - data_value = load_pt(data_path, True).detach() - if data_value.dtype == torch.bfloat16: - data_value = data_value.to(torch.float32) - data_value = data_value.numpy() + def read_real_data(self, npu_dir, npu_data_name, bench_dir, bench_data_name) -> tuple: + n_value = read_npy_data(npu_dir, npu_data_name) + if self.cross_frame: + b_value = read_pt_data(bench_dir, bench_data_name) else: - data_value = load_npy(data_path) - return data_value + b_value = read_npy_data(bench_dir, bench_data_name) + return n_value, b_value + def check_cross_framework(bench_json_path): framework = detect_framework_by_dump_json(bench_json_path) diff --git a/debug/accuracy_tools/msprobe/mindspore/compare/utils.py b/debug/accuracy_tools/msprobe/mindspore/compare/utils.py new file mode 100644 index 0000000000..737cdb55d2 --- /dev/null +++ b/debug/accuracy_tools/msprobe/mindspore/compare/utils.py @@ -0,0 +1,30 @@ +# Copyright (c) 2025-2025, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +from msprobe.core.common.file_utils import load_npy, FileChecker, FileCheckConst + + +def read_npy_data(dir_path, file_name): + if not file_name: + return None + + data_path = os.path.join(dir_path, file_name) + path_checker = FileChecker(data_path, FileCheckConst.FILE, FileCheckConst.READ_ABLE, + FileCheckConst.NUMPY_SUFFIX, False) + data_path = path_checker.common_check() + data_value = load_npy(data_path) + return data_value diff --git a/debug/accuracy_tools/msprobe/pytorch/compare/pt_compare.py b/debug/accuracy_tools/msprobe/pytorch/compare/pt_compare.py index a331447500..f4ccfe6543 100644 --- a/debug/accuracy_tools/msprobe/pytorch/compare/pt_compare.py +++ b/debug/accuracy_tools/msprobe/pytorch/compare/pt_compare.py @@ -20,11 +20,11 @@ import torch from msprobe.core.common.const import FileCheckConst, CompareConst, Const from msprobe.core.common.file_utils import FileChecker, load_yaml from msprobe.core.common.utils import CompareException -from msprobe.core.compare.acc_compare import Comparator, ModeConfig +from msprobe.core.compare.acc_compare import Comparator, ModeConfig, setup_comparison from msprobe.core.compare.utils import rename_api from msprobe.pytorch.common.log import logger from msprobe.pytorch.common.utils import load_pt -from msprobe.core.compare.acc_compare import setup_comparison +from msprobe.pytorch.compare.utils import read_pt_data class PTComparator(Comparator): @@ -98,6 +98,11 @@ class PTComparator(Comparator): (npu_dtype.isin(CompareConst.DTYPE_MATCH_GROUPS[1]) & bench_dtype.isin(CompareConst.DTYPE_MATCH_GROUPS[1]))) return equal_condition | match_condition + def read_real_data(self, npu_dir, npu_data_name, bench_dir, bench_data_name) -> tuple: + n_value = read_pt_data(npu_dir, npu_data_name) + b_value = read_pt_data(bench_dir, bench_data_name) + return n_value, b_value + def read_npy_data(self, dir_path, file_name): if not file_name: return None diff --git a/debug/accuracy_tools/msprobe/pytorch/compare/utils.py b/debug/accuracy_tools/msprobe/pytorch/compare/utils.py new file mode 100644 index 0000000000..16473ff386 --- /dev/null +++ b/debug/accuracy_tools/msprobe/pytorch/compare/utils.py @@ -0,0 +1,47 @@ +# Copyright (c) 2025-2025, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import torch + +from msprobe.core.common.utils import logger, CompareException +from msprobe.core.common.file_utils import FileChecker, FileCheckConst +from msprobe.pytorch.common.utils import load_pt + + +def read_pt_data(dir_path, file_name): + if not file_name: + return None + + data_path = os.path.join(dir_path, file_name) + path_checker = FileChecker(data_path, FileCheckConst.FILE, FileCheckConst.READ_ABLE, + FileCheckConst.PT_SUFFIX, False) + data_path = path_checker.common_check() + try: + # detach because numpy can not process gradient information + data_value = load_pt(data_path, to_cpu=True).detach() + except RuntimeError as e: + # 这里捕获 load_pt 中抛出的异常 + logger.error(f"Failed to load the .pt file at {data_path}.") + raise CompareException(CompareException.INVALID_FILE_ERROR) from e + except AttributeError as e: + # 这里捕获 detach 方法抛出的异常 + logger.error(f"Failed to detach the loaded tensor.") + raise CompareException(CompareException.DETACH_ERROR) from e + if data_value.dtype == torch.bfloat16: + data_value = data_value.to(torch.float32) + data_value = data_value.numpy() + return data_value -- Gitee From 373507a44325d75c154f4e273895b07e2889405a Mon Sep 17 00:00:00 2001 From: Linwei-Ying Date: Tue, 11 Mar 2025 20:08:37 +0800 Subject: [PATCH 10/27] compare reconstruct --- .../msprobe/core/compare/acc_compare.py | 98 +++---- .../msprobe/core/compare/check.py | 87 +----- .../msprobe/core/compare/utils.py | 276 ------------------ .../msprobe/mindspore/compare/ms_compare.py | 32 +- .../msprobe/mindspore/compare/utils.py | 10 + .../msprobe/pytorch/compare/pt_compare.py | 34 +-- 6 files changed, 69 insertions(+), 468 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/compare/acc_compare.py b/debug/accuracy_tools/msprobe/core/compare/acc_compare.py index 407dd46ce2..28e6742e60 100644 --- a/debug/accuracy_tools/msprobe/core/compare/acc_compare.py +++ b/debug/accuracy_tools/msprobe/core/compare/acc_compare.py @@ -49,42 +49,6 @@ class ComparisonConfig: layer_mapping: dict -def setup_comparison(input_param, output_path, **kwargs) -> ComparisonConfig: - """公共的前置处理逻辑,返回封装后的 ComparisonConfig 对象""" - try: - config = ComparisonConfig( - dump_mode='', - stack_mode=False, - auto_analyze=kwargs.get('auto_analyze', True), - fuzzy_match=kwargs.get('fuzzy_match', False), - data_mapping=kwargs.get('data_mapping', {}), - suffix=kwargs.get('suffix', ''), - cell_mapping=kwargs.get('cell_mapping', {}), - api_mapping=kwargs.get('api_mapping', {}), - layer_mapping=kwargs.get('layer_mapping', {}), - ) - - set_dump_path(input_param) - config.dump_mode = get_dump_mode(input_param) - - # set stack_mode and set "stack_json_path" in input_param - if 'stack_json_path' in input_param: - config.stack_mode = kwargs.get('stack_mode', False) - else: - config.stack_mode = set_stack_json_path(input_param) - - check_configuration_param(config.stack_mode, config.auto_analyze, config.fuzzy_match, - input_param.get('is_print_compare_log', True)) - create_directory(output_path) - check_compare_param(input_param, output_path, config.dump_mode, config.stack_mode) - - return config - - except (CompareException, FileCheckException) as error: - logger.error('Compare failed. Please check the arguments and do it again!') - raise CompareException(error.code) from error - - class ModeConfig: def __init__(self, stack_mode=False, auto_analyze=True, fuzzy_match=False, dump_mode=None): self.stack_mode = stack_mode @@ -100,6 +64,13 @@ class Comparator(ABC): self.fuzzy_match = mode_config.fuzzy_match self.dump_mode = mode_config.dump_mode + @staticmethod + def process_output_file(output_path, suffix): + file_name = add_time_with_xlsx("compare_result" + suffix) + file_path = os.path.join(os.path.realpath(output_path), file_name) + remove_path(file_path) + return file_path + def gen_merge_list(self, json_data, op_name, stack_json_data): op_data = json_data['data'][op_name] check_dump_json_str(op_data, op_name) @@ -118,13 +89,6 @@ class Comparator(ABC): merge_list = merge_tensor(op_parsed_list, self.dump_mode) return merge_list - @staticmethod - def process_output_file(output_path, suffix): - file_name = add_time_with_xlsx("compare_result" + suffix) - file_path = os.path.join(os.path.realpath(output_path), file_name) - remove_path(file_path) - return file_path - def gen_data_df(self, data_json, stack_json_data): result = { CompareConst.OP_NAME: [], @@ -407,17 +371,7 @@ class Comparator(ABC): npu_dir = input_param.get("npu_dump_data_dir") bench_dir = input_param.get("bench_dump_data_dir") try: - frame_name = getattr(self, "frame_name") - read_npy_data = getattr(self, "read_npy_data") - if frame_name == "MSComparator": - n_value = read_npy_data(npu_dir, npu_data_name) - if self.cross_frame: - b_value = read_npy_data(bench_dir, bench_data_name, load_pt_file=True) - else: - b_value = read_npy_data(bench_dir, bench_data_name) - else: - n_value = read_npy_data(npu_dir, npu_data_name) - b_value = read_npy_data(bench_dir, bench_data_name) + n_value, b_value = self.read_real_data(npu_dir, npu_data_name, bench_dir, bench_data_name) except IOError as error: error_file = error.filename n_value, b_value = CompareConst.READ_NONE, CompareConst.READ_NONE @@ -496,3 +450,39 @@ class Comparator(ABC): @abstractmethod def read_real_data(self, npu_dir, npu_data_name, bench_dir, bench_data_name) -> tuple: pass + + +def setup_comparison(input_param, output_path, **kwargs) -> ComparisonConfig: + """公共的前置处理逻辑,返回封装后的 ComparisonConfig 对象""" + try: + config = ComparisonConfig( + dump_mode='', + stack_mode=False, + auto_analyze=kwargs.get('auto_analyze', True), + fuzzy_match=kwargs.get('fuzzy_match', False), + data_mapping=kwargs.get('data_mapping', {}), + suffix=kwargs.get('suffix', ''), + cell_mapping=kwargs.get('cell_mapping', {}), + api_mapping=kwargs.get('api_mapping', {}), + layer_mapping=kwargs.get('layer_mapping', {}), + ) + + set_dump_path(input_param) + config.dump_mode = get_dump_mode(input_param) + + # set stack_mode and set "stack_json_path" in input_param + if 'stack_json_path' in input_param: + config.stack_mode = kwargs.get('stack_mode', False) + else: + config.stack_mode = set_stack_json_path(input_param) + + check_configuration_param(config.stack_mode, config.auto_analyze, config.fuzzy_match, + input_param.get('is_print_compare_log', True)) + create_directory(output_path) + check_compare_param(input_param, output_path, config.dump_mode, config.stack_mode) + + return config + + except (CompareException, FileCheckException) as error: + logger.error('Compare failed. Please check the arguments and do it again!') + raise CompareException(error.code) from error diff --git a/debug/accuracy_tools/msprobe/core/compare/check.py b/debug/accuracy_tools/msprobe/core/compare/check.py index 9429d7ffa1..0ce58aaf32 100644 --- a/debug/accuracy_tools/msprobe/core/compare/check.py +++ b/debug/accuracy_tools/msprobe/core/compare/check.py @@ -14,9 +14,8 @@ # limitations under the License. from msprobe.core.common.log import logger -from msprobe.core.compare.utils import rename_api from msprobe.core.common.utils import check_op_str_pattern_valid, CompareException -from msprobe.core.common.const import CompareConst, Const +from msprobe.core.common.const import Const dtype_mapping = { "Int8": "torch.int8", @@ -37,90 +36,6 @@ dtype_mapping = { } -def compare_op_dict_struct(npu_dict, bench_dict): - return all(npu_dict.get(key) == bench_dict.get(key) for key in CompareConst.STRUCT_COMPARE_KEY) - - -def check_struct_match(npu_dict, bench_dict): - is_match = compare_op_dict_struct(npu_dict, bench_dict) - if not is_match: - struct_match_list = [] - try: - for i, key in enumerate(CompareConst.STRUCT_COMPARE_KEY): - # 首先额外检查input_struct是否空,input_struct不可能为空 - if i == 0 and (not npu_dict.get(key, []) or not bench_dict.get(key, [])): - return False - struct_match_list.append(check_type_shape_match(npu_dict.get(key, []), bench_dict.get(key, []))) - except CompareException as error: - err_msg = f'index out of bounds error occurs in npu or bench api, please check!\n' \ - f'npu_dict: {npu_dict}' \ - f'bench_dict: {bench_dict}' - logger.error(err_msg) - raise CompareException(CompareException.INDEX_OUT_OF_BOUNDS_ERROR) from error - is_match = all(struct_match_list) - return is_match - - -def check_type_shape_match(npu_struct, bench_struct): - """ - further check dtypes with a dtype mapping list when dtypes are not entirely consistent. - """ - if len(npu_struct) != len(bench_struct): - return False - if not npu_struct and not bench_struct: - return True - - struct_match = False - for npu_type_shape, bench_type_shape in zip(npu_struct, bench_struct): - try: - npu_type = npu_type_shape[0] - npu_shape = npu_type_shape[1] - bench_type = bench_type_shape[0] - bench_shape = bench_type_shape[1] - except IndexError as error: - logger.error(f'length of npu_type_shape: {npu_type_shape} and bench_type_shape: {bench_type_shape} ' - f'should both be 2, please check!') - raise CompareException(CompareException.INDEX_OUT_OF_BOUNDS_ERROR) from error - shape_match = npu_shape == bench_shape - type_match = ((npu_type == bench_type) or - any(npu_type in group and bench_type in group for group in CompareConst.DTYPE_MATCH_GROUPS)) - struct_match = shape_match and type_match - if not struct_match: - return False - return struct_match - - -def check_graph_mode(a_op_name, b_op_name): - if Const.ATEN in a_op_name and Const.ATEN not in b_op_name: - return True - if Const.ATEN not in a_op_name and Const.ATEN in b_op_name: - return True - return False - - -def fuzzy_check_op(npu_name_list, bench_name_list): - # 先检查api里的item长度是否相等,如果不是parameters_grad, 必然有input或者output,长度不可能为0 - # 如果是parameters_grad, "parameters_grad"字段的字典不会是空字典,因此len>=1 - if len(npu_name_list) == 0 or len(bench_name_list) == 0 or len(npu_name_list) != len(bench_name_list): - return False - is_match = True - for npu_name, bench_name in zip(npu_name_list, bench_name_list): - is_match = fuzzy_check_name(npu_name, bench_name) - if not is_match: - break - return is_match - - -def fuzzy_check_name(npu_name, bench_name): - if Const.FORWARD in npu_name and Const.FORWARD in bench_name: - is_match = rename_api(npu_name, Const.FORWARD) == rename_api(bench_name, Const.FORWARD) - elif Const.BACKWARD in npu_name and Const.BACKWARD in bench_name: - is_match = rename_api(npu_name, Const.BACKWARD) == rename_api(bench_name, Const.BACKWARD) - else: - is_match = npu_name == bench_name - return is_match - - def check_dump_json_str(op_data, op_name): input_list = op_data.get(Const.INPUT_ARGS, None) if op_data.get(Const.INPUT_ARGS, None) else op_data.get( Const.INPUT, None) diff --git a/debug/accuracy_tools/msprobe/core/compare/utils.py b/debug/accuracy_tools/msprobe/core/compare/utils.py index e93ff775e7..bcd3c3c454 100644 --- a/debug/accuracy_tools/msprobe/core/compare/utils.py +++ b/debug/accuracy_tools/msprobe/core/compare/utils.py @@ -191,73 +191,6 @@ def gen_op_item(op_data, op_name): return op_item -def resolve_api_special_parameters(data_dict, full_op_name, item_list): - """ - Function Description: - 解析下面格式的数据, 是api参数的一种特殊格式 - { - "last_hidden_state": { - "type": "torch.Tensor", - "dtype": "torch.bfloat16", - ... - }, - "loss": { - "type": "torch.Tensor", - "dtype": "torch.float32", - ... - } - } - Parameter: - data_dict: 字典格式的数据 - full_op_name: 参数的全名字符串 - item_list: 参数信息集合 - """ - for key, value in data_dict.items(): - if isinstance(value, dict): - parsed_item = value - parts = full_op_name.split(Const.SEP) - parts.insert(-1, key) - full_op_name_new = ".".join(parts) - parsed_item['full_op_name'] = full_op_name_new - item_list.append(parsed_item) - - -def process_summary_data(summary_data): - """处理summary_data中的nan值,返回处理后的列表""" - return [CompareConst.NAN if isinstance(x, float) and math.isnan(x) else x for x in summary_data] - - -def get_rela_diff_summary_mode(result_item, npu_summary_data, bench_summary_data, err_msg): - start_idx = CompareConst.SUMMARY_COMPARE_RESULT_HEADER.index(CompareConst.MAX_DIFF) - warning_flag = False - for i, (npu_val, bench_val) in enumerate(zip(npu_summary_data, bench_summary_data)): - if all(isinstance(val, (float, int)) and not isinstance(val, bool) for val in [npu_val, bench_val]): - diff = npu_val - bench_val - if math.isnan(diff): - diff = CompareConst.NAN - relative = CompareConst.NAN - else: - if bench_val != 0: - relative = str(abs((diff / bench_val) * 100)) + '%' - else: - relative = CompareConst.N_A - magnitude_diff = abs(diff) / (max(abs(npu_val), abs(bench_val)) + CompareConst.EPSILON) - if magnitude_diff > CompareConst.MAGNITUDE: - warning_flag = True - result_item[start_idx + i] = diff - result_item[start_idx + i + CompareConst.STATISTICS_INDICATOR_NUM] = relative - else: - result_item[start_idx + i] = CompareConst.N_A - result_item[start_idx + i + CompareConst.STATISTICS_INDICATOR_NUM] = CompareConst.N_A - - accuracy_check = CompareConst.WARNING if warning_flag else "" - err_msg += "Need double check api accuracy." if warning_flag else "" - for i in range(start_idx, len(result_item)): - if str(result_item[i]) in ('inf', '-inf', 'nan'): - result_item[i] = f'{result_item[i]}\t' - return result_item, accuracy_check, err_msg - - @dataclass class ApiItemInfo: name: str @@ -265,215 +198,6 @@ class ApiItemInfo: stack_info: list -def stack_column_process(result_item, has_stack, index, key, npu_stack_info): - if has_stack and index == 0 and key == CompareConst.INPUT_STRUCT: - result_item.extend(npu_stack_info) - else: - result_item.append(CompareConst.NONE) - return result_item - - -def result_item_init(n_info, b_info, dump_mode): - n_len = len(n_info.struct) - b_len = len(b_info.struct) - struct_long_enough = (n_len > 2 and b_len > 2) if dump_mode == Const.MD5 else (n_len > 1 and b_len > 1) - if struct_long_enough: - result_item = [ - n_info.name, b_info.name, n_info.struct[0], b_info.struct[0], n_info.struct[1], b_info.struct[1] - ] - if dump_mode == Const.MD5: - md5_compare_result = CompareConst.PASS if n_info.struct[2] == b_info.struct[2] else CompareConst.DIFF - result_item.extend([n_info.struct[2], b_info.struct[2], md5_compare_result]) - elif dump_mode == Const.SUMMARY: - result_item.extend([" "] * 8) # 8个统计量数据情况的比对指标 - else: - result_item.extend([" "] * 6) # 6个真实数据情况的比对指标 - else: - err_msg = "index out of bounds error will occur in result_item_init, please check!\n" \ - f"npu_info_struct is {n_info.struct}\n" \ - f"bench_info_struct is {b_info.struct}" - logger.error(err_msg) - raise CompareException(CompareException.INDEX_OUT_OF_BOUNDS_ERROR) - return result_item - - -def count_struct(op_dict): - parts = [ - CompareConst.OP_NAME, - CompareConst.INPUT_STRUCT, - CompareConst.OUTPUT_STRUCT, - CompareConst.PARAMS_STRUCT, - CompareConst.PARAMS_GRAD_STRUCT - ] - lengths = [len(op_dict.get(part, [])) for part in parts] - num = lengths[0] - if num != sum(lengths[1:]): - logger.error(f"Length of names and structs of op_dict not match. Please check! op_dict: {op_dict}") - raise CompareException(CompareException.NAMES_STRUCTS_MATCH_ERROR) - return tuple(lengths) - - -def get_accuracy(result, n_dict, b_dict, dump_mode): - def get_accuracy_core(n_start, n_len, b_start, b_len, key): - min_len = min(n_len, b_len) - npu_stack_info = n_dict.get("stack_info", None) - bench_stack_info = b_dict.get("stack_info", None) - has_stack = npu_stack_info and bench_stack_info - - if dump_mode == Const.ALL: - npu_data_name_list = n_dict.get("data_name", None) - bench_data_name_list = b_dict.get("data_name", None) - - for index in range(min_len): - n_name = safe_get_value(n_dict, n_start + index, "n_dict", key="op_name") - b_name = safe_get_value(b_dict, b_start + index, "b_dict", key="op_name") - n_struct = safe_get_value(n_dict, index, "n_dict", key=key) - b_struct = safe_get_value(b_dict, index, "b_dict", key=key) - err_msg = "" - - npu_info = ApiItemInfo(n_name, n_struct, npu_stack_info) - bench_info = ApiItemInfo(b_name, b_struct, bench_stack_info) - result_item = result_item_init(npu_info, bench_info, dump_mode) - - if dump_mode == Const.MD5: - result_item = stack_column_process(result_item, has_stack, index, key, npu_stack_info) - result.append(result_item) - continue - - npu_summary_data = safe_get_value(n_dict, n_start + index, "n_dict", key=CompareConst.SUMMARY) - bench_summary_data = safe_get_value(b_dict, b_start + index, "b_dict", key=CompareConst.SUMMARY) - result_item.extend(process_summary_data(npu_summary_data)) - result_item.extend(process_summary_data(bench_summary_data)) - - if dump_mode == Const.SUMMARY: - result_item, accuracy_check, err_msg = get_rela_diff_summary_mode(result_item, npu_summary_data, - bench_summary_data, err_msg) - - result_item.append(accuracy_check if dump_mode == Const.SUMMARY else CompareConst.ACCURACY_CHECK_YES) - result_item.append(err_msg) - result_item = stack_column_process(result_item, has_stack, index, key, npu_stack_info) - if dump_mode == Const.ALL: - npu_data_name = safe_get_value(npu_data_name_list, n_start + index, "npu_data_name_list") - bench_data_name = safe_get_value(bench_data_name_list, b_start + index, "bench_data_name_list") - result_item.append([npu_data_name, bench_data_name]) - - result.append(result_item) - - if n_len > b_len: - for index in range(b_len, n_len): - try: - n_name = n_dict['op_name'][n_start + index] - n_struct = n_dict[key][index] - if dump_mode == Const.MD5: - result_item = [ - n_name, CompareConst.NAN, n_struct[0], CompareConst.NAN, n_struct[1], CompareConst.NAN, - n_struct[2], CompareConst.NAN, CompareConst.NAN - ] - result.append(result_item) - continue - result_item = [ - n_name, CompareConst.NAN, n_struct[0], CompareConst.NAN, n_struct[1], CompareConst.NAN, - " ", " ", " ", " ", " ", " " - ] - summary_data = n_dict.get(CompareConst.SUMMARY)[n_start + index] - result_item.extend(summary_data) - summary_data = [CompareConst.NAN for _ in range(len(n_dict.get(CompareConst.SUMMARY)[0]))] - result_item.extend(summary_data) - except IndexError as e: - err_msg = "index out of bounds error occurs, please check!\n" \ - f"n_dict is {n_dict}" - logger.error(err_msg) - raise CompareException(CompareException.INDEX_OUT_OF_BOUNDS_ERROR) from e - - err_msg = "" - result_item.append(CompareConst.ACCURACY_CHECK_YES) - result_item.append(err_msg) - result_item = stack_column_process(result_item, has_stack, index, key, npu_stack_info) - if dump_mode == Const.ALL: - npu_data_name = safe_get_value(npu_data_name_list, n_start + index, "npu_data_name_list") - result_item.append([npu_data_name, "-1"]) - - result.append(result_item) - - n_num, n_num_input, n_num_output, n_num_params, n_num_params_grad = count_struct(n_dict) - b_num, b_num_input, b_num_output, b_num_params, b_num_params_grad = count_struct(b_dict) - - get_accuracy_core(0, n_num_input, 0, b_num_input, CompareConst.INPUT_STRUCT) - get_accuracy_core(n_num_input + n_num_output, n_num_params, b_num_input + b_num_output, b_num_params, - CompareConst.PARAMS_STRUCT) - get_accuracy_core(n_num_input, n_num_output, b_num_input, b_num_output, CompareConst.OUTPUT_STRUCT) - get_accuracy_core(n_num_input + n_num_output + n_num_params, n_num_params_grad, - b_num_input + b_num_output + b_num_params, b_num_params_grad, - CompareConst.PARAMS_GRAD_STRUCT) - - -def append_stack_info(result_item, npu_stack_info, index): - """添加堆栈信息到 result_item""" - if npu_stack_info and index == 0: - result_item.extend(npu_stack_info) - else: - result_item.append(CompareConst.NONE) - - -def get_un_match_accuracy(result, n_dict, dump_mode): - npu_stack_info = n_dict.get("stack_info", None) - bench_name, bench_type, bench_shape = CompareConst.N_A, CompareConst.N_A, CompareConst.N_A - - struct_to_index_mapping = { - CompareConst.INPUT_STRUCT: 0, - CompareConst.OUTPUT_STRUCT: 0, - CompareConst.PARAMS_STRUCT: 0, - CompareConst.PARAMS_GRAD_STRUCT: 0 - } - - op_name_list = n_dict.get(CompareConst.OP_NAME) - summary_list = n_dict.get(Const.SUMMARY) - data_name_list = n_dict.get('data_name') - op_name_reorder, summary_reorder, _ = reorder_op_x_list(op_name_list, - summary_list, - data_name_list) - for index, n_name in enumerate(op_name_reorder): - _, state = get_name_and_state(n_name) - struct_key = CompareConst.STATE_TO_STRUCT_MAPPING.get(state) - if not struct_key: - continue - n_struct = safe_get_value(n_dict, struct_to_index_mapping.get(struct_key), "n_dict", key=struct_key) - struct_to_index_mapping[struct_key] += 1 - - try: - result_item = [n_name, bench_name, n_struct[0], bench_type, n_struct[1], bench_shape] - except IndexError as e: - err_msg = "index out of bounds error occurs, please check!\n" \ - f"op_name of n_dict is {n_dict['op_name']}\n" \ - f"input_struct of n_dict is {n_dict[CompareConst.INPUT_STRUCT]}\n" \ - f"output_struct of n_dict is {n_dict[CompareConst.OUTPUT_STRUCT]}" - logger.error(err_msg) - raise CompareException(CompareException.INDEX_OUT_OF_BOUNDS_ERROR) from e - - if dump_mode == Const.MD5: - result_item.extend([CompareConst.N_A] * 3) - append_stack_info(result_item, npu_stack_info, index) - result.append(result_item) - continue - if dump_mode == Const.SUMMARY: - result_item.extend([CompareConst.N_A] * 8) # 8个统计量数据情况的比对指标 - if dump_mode == Const.ALL: - result_item.extend([CompareConst.N_A] * 6) # 6个真实数据情况的比对指标 - - npu_summary_data = safe_get_value(summary_reorder, index, "summary_reorder") - bench_summary_data = [CompareConst.N_A] * 4 - result_item.extend(npu_summary_data) - result_item.extend(bench_summary_data) - err_msg = CompareConst.NO_BENCH - accuracy_check_res = CompareConst.N_A - result_item.append(accuracy_check_res) - result_item.append(err_msg) - append_stack_info(result_item, npu_stack_info, index) - if dump_mode == Const.ALL and result_item[1] == CompareConst.N_A: - result_item.extend([["-1", "-1"]]) - result.append(result_item) - - def merge_tensor(tensor_list, dump_mode): op_dict = {} op_dict["op_name"] = [] diff --git a/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py b/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py index 30874ae0bd..fc4fe2d930 100644 --- a/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py +++ b/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py @@ -22,11 +22,11 @@ import pandas as pd from msprobe.core.common.const import CompareConst, Const from msprobe.core.common.file_utils import load_yaml from msprobe.core.common.log import logger -from msprobe.core.common.utils import CompareException, detect_framework_by_dump_json +from msprobe.core.common.utils import CompareException from msprobe.core.compare.acc_compare import Comparator, ModeConfig, setup_comparison from msprobe.core.compare.check import dtype_mapping from msprobe.core.compare.layer_mapping import generate_data_mapping_by_layer_mapping -from msprobe.mindspore.compare.utils import read_npy_data +from msprobe.mindspore.compare.utils import read_npy_data, check_cross_framework from msprobe.pytorch.compare.utils import read_pt_data @@ -114,17 +114,14 @@ class MSComparator(Comparator): else: return npu_op_name - def modify_compare_data_with_user_mapping(self, npu_df, bench_df): - def get_api_indices_dict(op_name_df): - api_indices_dict = defaultdict(list) - for op_index, name in enumerate(op_name_df[CompareConst.OP_NAME]): - api = self.get_api_name(name.split(Const.SEP)) - api_indices_dict[api].append(op_index) - return api_indices_dict - - ms_api_indices_dict = get_api_indices_dict(npu_df) - pt_api_indices_dict = get_api_indices_dict(bench_df) + def get_api_indices_dict(self, op_name_df): + api_indices_dict = defaultdict(list) + for op_index, name in enumerate(op_name_df[CompareConst.OP_NAME]): + api = self.get_api_name(name.split(Const.SEP)) + api_indices_dict[api].append(op_index) + return api_indices_dict + def modify_compare_data_with_user_mapping(self, npu_df, bench_df): def gen_input_compare_key(pattern, term): flag = True for i, prefix in enumerate(mapping_dict.get(f'ms_{term}')): @@ -135,6 +132,9 @@ class MSComparator(Comparator): flag = False return flag + ms_api_indices_dict = self.get_api_indices_dict(npu_df) + pt_api_indices_dict = self.get_api_indices_dict(bench_df) + for mapping_dict in self.api_mapping_dict: keys_to_compare = [ ('ms_args', 'pt_args'), @@ -247,14 +247,6 @@ class MSComparator(Comparator): return n_value, b_value -def check_cross_framework(bench_json_path): - framework = detect_framework_by_dump_json(bench_json_path) - if framework == Const.PT_FRAMEWORK: - return True - else: - return False - - def ms_compare(input_param, output_path, **kwargs): config = setup_comparison(input_param, output_path, **kwargs) diff --git a/debug/accuracy_tools/msprobe/mindspore/compare/utils.py b/debug/accuracy_tools/msprobe/mindspore/compare/utils.py index 737cdb55d2..f8c0a810eb 100644 --- a/debug/accuracy_tools/msprobe/mindspore/compare/utils.py +++ b/debug/accuracy_tools/msprobe/mindspore/compare/utils.py @@ -15,7 +15,9 @@ import os +from msprobe.core.common.const import Const from msprobe.core.common.file_utils import load_npy, FileChecker, FileCheckConst +from msprobe.core.common.utils import detect_framework_by_dump_json def read_npy_data(dir_path, file_name): @@ -28,3 +30,11 @@ def read_npy_data(dir_path, file_name): data_path = path_checker.common_check() data_value = load_npy(data_path) return data_value + + +def check_cross_framework(bench_json_path): + framework = detect_framework_by_dump_json(bench_json_path) + if framework == Const.PT_FRAMEWORK: + return True + else: + return False diff --git a/debug/accuracy_tools/msprobe/pytorch/compare/pt_compare.py b/debug/accuracy_tools/msprobe/pytorch/compare/pt_compare.py index f4ccfe6543..99a87e7e43 100644 --- a/debug/accuracy_tools/msprobe/pytorch/compare/pt_compare.py +++ b/debug/accuracy_tools/msprobe/pytorch/compare/pt_compare.py @@ -13,17 +13,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -import os.path - -import torch - -from msprobe.core.common.const import FileCheckConst, CompareConst, Const -from msprobe.core.common.file_utils import FileChecker, load_yaml -from msprobe.core.common.utils import CompareException +from msprobe.core.common.const import CompareConst, Const +from msprobe.core.common.file_utils import load_yaml from msprobe.core.compare.acc_compare import Comparator, ModeConfig, setup_comparison from msprobe.core.compare.utils import rename_api -from msprobe.pytorch.common.log import logger -from msprobe.pytorch.common.utils import load_pt from msprobe.pytorch.compare.utils import read_pt_data @@ -103,29 +96,6 @@ class PTComparator(Comparator): b_value = read_pt_data(bench_dir, bench_data_name) return n_value, b_value - def read_npy_data(self, dir_path, file_name): - if not file_name: - return None - data_path = os.path.join(dir_path, file_name) - path_checker = FileChecker(data_path, FileCheckConst.FILE, FileCheckConst.READ_ABLE, - FileCheckConst.PT_SUFFIX, False) - data_path = path_checker.common_check() - try: - # detach because numpy can not process gradient information - data_value = load_pt(data_path, to_cpu=True).detach() - except RuntimeError as e: - # 这里捕获 load_pt 中抛出的异常 - logger.error(f"Failed to load the .pt file at {data_path}.") - raise CompareException(CompareException.INVALID_FILE_ERROR) from e - except AttributeError as e: - # 这里捕获 detach 方法抛出的异常 - logger.error(f"Failed to detach the loaded tensor.") - raise CompareException(CompareException.DETACH_ERROR) from e - if data_value.dtype == torch.bfloat16: - data_value = data_value.to(torch.float32) - data_value = data_value.numpy() - return data_value - def compare(input_param, output_path, **kwargs): config = setup_comparison(input_param, output_path, **kwargs) -- Gitee From f9711eec7f2d6b039da05a34561f1b746eb446d2 Mon Sep 17 00:00:00 2001 From: Linwei-Ying Date: Tue, 11 Mar 2025 20:40:19 +0800 Subject: [PATCH 11/27] compare reconstruct --- debug/accuracy_tools/msprobe/core/compare/utils.py | 3 +-- .../msprobe/mindspore/compare/ms_compare.py | 13 +++++++++++-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/compare/utils.py b/debug/accuracy_tools/msprobe/core/compare/utils.py index bcd3c3c454..c8598fc5ac 100644 --- a/debug/accuracy_tools/msprobe/core/compare/utils.py +++ b/debug/accuracy_tools/msprobe/core/compare/utils.py @@ -15,14 +15,13 @@ import os import re -import math import zlib from dataclasses import dataclass import numpy as np from msprobe.core.common.const import Const, CompareConst, FileCheckConst -from msprobe.core.common.utils import CompareException, check_regex_prefix_format_valid, logger, safe_get_value +from msprobe.core.common.utils import CompareException, check_regex_prefix_format_valid, logger from msprobe.core.common.file_utils import check_file_or_directory_path diff --git a/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py b/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py index fc4fe2d930..022f05c479 100644 --- a/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py +++ b/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py @@ -115,10 +115,17 @@ class MSComparator(Comparator): return npu_op_name def get_api_indices_dict(self, op_name_df): + """ + 生成一个api对应的所有的input、output等的index的键值对字典 + 示例: + {'Functional.conv2d': [0, 1, 2, 3], + 'Functional.batch_norm': [4, 5, 6, 7, 8] + } + """ api_indices_dict = defaultdict(list) for op_index, name in enumerate(op_name_df[CompareConst.OP_NAME]): - api = self.get_api_name(name.split(Const.SEP)) - api_indices_dict[api].append(op_index) + api_name = self.get_api_name(name.split(Const.SEP)) + api_indices_dict[api_name].append(op_index) return api_indices_dict def modify_compare_data_with_user_mapping(self, npu_df, bench_df): @@ -196,7 +203,9 @@ class MSComparator(Comparator): """ # 处理api_mapping映射 if self.api_mapping: + # 如果用户不传api_mapping.yaml,先使用内置api_mapping.yaml替换npu_op_name npu_df[CompareConst.CMP_KEY] = npu_df[CompareConst.OP_NAME].apply(self.process_internal_api_mapping) + # 如果用户传入api_mapping.yaml,再使用传入api_mapping.yaml进一步替换npu_op_name if isinstance(self.api_mapping, str): self.modify_compare_data_with_user_mapping(npu_df, bench_df) # 处理cell_mapping映射 -- Gitee From 006cf558886a0e89598e467e08e8ff45051123de Mon Sep 17 00:00:00 2001 From: Linwei-Ying Date: Wed, 12 Mar 2025 11:27:37 +0800 Subject: [PATCH 12/27] compare reconstruct --- .../msprobe/core/compare/utils.py | 208 +++++++++++++++++- .../msprobe/pytorch/compare/pt_compare.py | 2 +- .../visualization/builder/msprobe_adapter.py | 18 +- .../visualization/compare/graph_comparator.py | 7 +- .../msprobe/visualization/utils.py | 12 +- 5 files changed, 226 insertions(+), 21 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/compare/utils.py b/debug/accuracy_tools/msprobe/core/compare/utils.py index c8598fc5ac..a62fb85815 100644 --- a/debug/accuracy_tools/msprobe/core/compare/utils.py +++ b/debug/accuracy_tools/msprobe/core/compare/utils.py @@ -15,13 +15,15 @@ import os import re +import math import zlib from dataclasses import dataclass import numpy as np +import pandas as pd from msprobe.core.common.const import Const, CompareConst, FileCheckConst -from msprobe.core.common.utils import CompareException, check_regex_prefix_format_valid, logger +from msprobe.core.common.utils import CompareException, check_regex_prefix_format_valid, logger, safe_get_value from msprobe.core.common.file_utils import check_file_or_directory_path @@ -323,6 +325,210 @@ def reorder_op_x_list(op_name_list, summary_list, data_name_list): return op_name_reorder, summary_reorder, data_name_reorder +def process_summary_data(summary_data): + """处理summary_data中的nan值,返回处理后的列表""" + return [CompareConst.NAN if isinstance(x, float) and math.isnan(x) else x for x in summary_data] + + +def get_rela_diff_summary_mode(result_item, npu_summary_data, bench_summary_data, err_msg): + start_idx = CompareConst.SUMMARY_COMPARE_RESULT_HEADER.index(CompareConst.MAX_DIFF) + warning_flag = False + for i, (npu_val, bench_val) in enumerate(zip(npu_summary_data, bench_summary_data)): + if all(isinstance(val, (float, int)) and not isinstance(val, bool) for val in [npu_val, bench_val]): + diff = npu_val - bench_val + if math.isnan(diff): + diff = CompareConst.NAN + relative = CompareConst.NAN + else: + if bench_val != 0: + relative = str(abs((diff / bench_val) * 100)) + '%' + else: + relative = CompareConst.N_A + magnitude_diff = abs(diff) / (max(abs(npu_val), abs(bench_val)) + CompareConst.EPSILON) + if magnitude_diff > CompareConst.MAGNITUDE: + warning_flag = True + result_item[start_idx + i] = diff + result_item[start_idx + i + CompareConst.STATISTICS_INDICATOR_NUM] = relative + else: + result_item[start_idx + i] = CompareConst.N_A + result_item[start_idx + i + CompareConst.STATISTICS_INDICATOR_NUM] = CompareConst.N_A + + accuracy_check = CompareConst.WARNING if warning_flag else "" + err_msg += "Need double check api accuracy." if warning_flag else "" + for i in range(start_idx, len(result_item)): + if str(result_item[i]) in ('inf', '-inf', 'nan'): + result_item[i] = f'{result_item[i]}\t' + return result_item, accuracy_check, err_msg + + +@dataclass +class ApiItemInfo: + name: str + struct: tuple + stack_info: list + + +def stack_column_process(result_item, has_stack, index, key, npu_stack_info): + if has_stack and index == 0 and key == CompareConst.INPUT_STRUCT: + result_item.extend(npu_stack_info) + else: + result_item.append(CompareConst.NONE) + return result_item + + +def result_item_init(n_info, b_info, dump_mode): + n_len = len(n_info.struct) + b_len = len(b_info.struct) + struct_long_enough = (n_len > 2 and b_len > 2) if dump_mode == Const.MD5 else (n_len > 1 and b_len > 1) + if struct_long_enough: + result_item = [ + n_info.name, b_info.name, n_info.struct[0], b_info.struct[0], n_info.struct[1], b_info.struct[1] + ] + if dump_mode == Const.MD5: + md5_compare_result = CompareConst.PASS if n_info.struct[2] == b_info.struct[2] else CompareConst.DIFF + result_item.extend([n_info.struct[2], b_info.struct[2], md5_compare_result]) + elif dump_mode == Const.SUMMARY: + result_item.extend([" "] * 8) # 8个统计量数据情况的比对指标 + else: + result_item.extend([" "] * 6) # 6个真实数据情况的比对指标 + else: + err_msg = "index out of bounds error will occur in result_item_init, please check!\n" \ + f"npu_info_struct is {n_info.struct}\n" \ + f"bench_info_struct is {b_info.struct}" + logger.error(err_msg) + raise CompareException(CompareException.INDEX_OUT_OF_BOUNDS_ERROR) + return result_item + + +def count_struct(op_dict): + parts = [ + CompareConst.OP_NAME, + CompareConst.INPUT_STRUCT, + CompareConst.OUTPUT_STRUCT, + CompareConst.PARAMS_STRUCT, + CompareConst.PARAMS_GRAD_STRUCT + ] + lengths = [len(op_dict.get(part, [])) for part in parts] + num = lengths[0] + if num != sum(lengths[1:]): + logger.error(f"Length of names and structs of op_dict not match. Please check! op_dict: {op_dict}") + raise CompareException(CompareException.NAMES_STRUCTS_MATCH_ERROR) + return tuple(lengths) + + +def get_accuracy(result, n_dict, b_dict, dump_mode): + def get_accuracy_core(n_start, n_len, b_start, b_len, key): + min_len = min(n_len, b_len) + npu_stack_info = n_dict.get("stack_info", None) + bench_stack_info = b_dict.get("stack_info", None) + has_stack = npu_stack_info and bench_stack_info + + if dump_mode == Const.ALL: + npu_data_name_list = n_dict.get("data_name", None) + bench_data_name_list = b_dict.get("data_name", None) + + for index in range(min_len): + n_name = safe_get_value(n_dict, n_start + index, "n_dict", key="op_name") + b_name = safe_get_value(b_dict, b_start + index, "b_dict", key="op_name") + n_struct = safe_get_value(n_dict, index, "n_dict", key=key) + b_struct = safe_get_value(b_dict, index, "b_dict", key=key) + err_msg = "" + + npu_info = ApiItemInfo(n_name, n_struct, npu_stack_info) + bench_info = ApiItemInfo(b_name, b_struct, bench_stack_info) + result_item = result_item_init(npu_info, bench_info, dump_mode) + + if dump_mode == Const.MD5: + result_item = stack_column_process(result_item, has_stack, index, key, npu_stack_info) + result.append(result_item) + continue + + npu_summary_data = safe_get_value(n_dict, n_start + index, "n_dict", key=CompareConst.SUMMARY) + bench_summary_data = safe_get_value(b_dict, b_start + index, "b_dict", key=CompareConst.SUMMARY) + result_item.extend(process_summary_data(npu_summary_data)) + result_item.extend(process_summary_data(bench_summary_data)) + + if dump_mode == Const.SUMMARY: + result_item, accuracy_check, err_msg = get_rela_diff_summary_mode(result_item, npu_summary_data, + bench_summary_data, err_msg) + + result_item.append(accuracy_check if dump_mode == Const.SUMMARY else CompareConst.ACCURACY_CHECK_YES) + result_item.append(err_msg) + result_item = stack_column_process(result_item, has_stack, index, key, npu_stack_info) + if dump_mode == Const.ALL: + npu_data_name = safe_get_value(npu_data_name_list, n_start + index, "npu_data_name_list") + bench_data_name = safe_get_value(bench_data_name_list, b_start + index, "bench_data_name_list") + result_item.append([npu_data_name, bench_data_name]) + + result.append(result_item) + + if n_len > b_len: + for index in range(b_len, n_len): + try: + n_name = n_dict['op_name'][n_start + index] + n_struct = n_dict[key][index] + if dump_mode == Const.MD5: + result_item = [ + n_name, CompareConst.NAN, n_struct[0], CompareConst.NAN, n_struct[1], CompareConst.NAN, + n_struct[2], CompareConst.NAN, CompareConst.NAN + ] + result.append(result_item) + continue + result_item = [ + n_name, CompareConst.NAN, n_struct[0], CompareConst.NAN, n_struct[1], CompareConst.NAN, + " ", " ", " ", " ", " ", " " + ] + summary_data = n_dict.get(CompareConst.SUMMARY)[n_start + index] + result_item.extend(summary_data) + summary_data = [CompareConst.NAN for _ in range(len(n_dict.get(CompareConst.SUMMARY)[0]))] + result_item.extend(summary_data) + except IndexError as e: + err_msg = "index out of bounds error occurs, please check!\n" \ + f"n_dict is {n_dict}" + logger.error(err_msg) + raise CompareException(CompareException.INDEX_OUT_OF_BOUNDS_ERROR) from e + + err_msg = "" + result_item.append(CompareConst.ACCURACY_CHECK_YES) + result_item.append(err_msg) + result_item = stack_column_process(result_item, has_stack, index, key, npu_stack_info) + if dump_mode == Const.ALL: + npu_data_name = safe_get_value(npu_data_name_list, n_start + index, "npu_data_name_list") + result_item.append([npu_data_name, "-1"]) + + result.append(result_item) + + n_num, n_num_input, n_num_output, n_num_params, n_num_params_grad = count_struct(n_dict) + b_num, b_num_input, b_num_output, b_num_params, b_num_params_grad = count_struct(b_dict) + + get_accuracy_core(0, n_num_input, 0, b_num_input, CompareConst.INPUT_STRUCT) + get_accuracy_core(n_num_input + n_num_output, n_num_params, b_num_input + b_num_output, b_num_params, + CompareConst.PARAMS_STRUCT) + get_accuracy_core(n_num_input, n_num_output, b_num_input, b_num_output, CompareConst.OUTPUT_STRUCT) + get_accuracy_core(n_num_input + n_num_output + n_num_params, n_num_params_grad, + b_num_input + b_num_output + b_num_params, b_num_params_grad, + CompareConst.PARAMS_GRAD_STRUCT) + + +def make_result_table(result, dump_mode, stack_mode): + header = CompareConst.HEAD_OF_COMPARE_MODE[dump_mode][:] + + if stack_mode: + header.append(CompareConst.STACK) + if dump_mode == Const.ALL: + header.append(CompareConst.DATA_NAME) + else: + if dump_mode == Const.ALL: + for row in result: + del row[-2] # 输出结果不要堆栈信息时,删除中间结果result中的stack info,真实数据时为倒数第2列 + header.append(CompareConst.DATA_NAME) + else: + for row in result: + del row[-1] # 输出结果不要堆栈信息时,删除中间结果result中的stack info,非真实数据时为倒数第1列 + result_df = pd.DataFrame(result, columns=header, dtype='object') + return result_df + + def _compare_parser(parser): parser.add_argument("-i", "--input_path", dest="input_path", type=str, help=" The compare input path, a dict json.", required=True) diff --git a/debug/accuracy_tools/msprobe/pytorch/compare/pt_compare.py b/debug/accuracy_tools/msprobe/pytorch/compare/pt_compare.py index 99a87e7e43..7622c4ae23 100644 --- a/debug/accuracy_tools/msprobe/pytorch/compare/pt_compare.py +++ b/debug/accuracy_tools/msprobe/pytorch/compare/pt_compare.py @@ -1,4 +1,4 @@ -# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd. +# Copyright (c) 2024-2025, Huawei Technologies Co., Ltd. # All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/debug/accuracy_tools/msprobe/visualization/builder/msprobe_adapter.py b/debug/accuracy_tools/msprobe/visualization/builder/msprobe_adapter.py index ee5e3f519e..eb80bd9c43 100644 --- a/debug/accuracy_tools/msprobe/visualization/builder/msprobe_adapter.py +++ b/debug/accuracy_tools/msprobe/visualization/builder/msprobe_adapter.py @@ -1,4 +1,4 @@ -# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd. +# Copyright (c) 2024-2025, Huawei Technologies Co., Ltd. # All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -13,12 +13,13 @@ # See the License for the specific language governing permissions and # limitations under the License. import re -import math -from msprobe.core.compare.acc_compare import read_op, merge_tensor, get_accuracy + +from msprobe.core.compare.acc_compare import ModeConfig +from msprobe.core.compare.utils import read_op, merge_tensor, get_accuracy, make_result_table from msprobe.core.common.utils import set_dump_path, get_dump_mode from msprobe.visualization.utils import GraphConst from msprobe.core.common.const import Const -from msprobe.core.compare.acc_compare import ModeConfig + # 用于将节点名字解析成对应的NodeOp的规则 op_patterns = [ @@ -227,3 +228,12 @@ def _format_data(data_dict): if all_null: data_dict.clear() data_dict[GraphConst.VALUE] = GraphConst.NULL + + +def get_csv_df(stack_mode, csv_data, compare_mode): + """ + 调用acc接口写入csv + """ + + dump_mode = GraphConst.GRAPHCOMPARE_MODE_TO_DUMP_MODE_TO_MAPPING.get(compare_mode) + return make_result_table(csv_data, dump_mode, stack_mode) diff --git a/debug/accuracy_tools/msprobe/visualization/compare/graph_comparator.py b/debug/accuracy_tools/msprobe/visualization/compare/graph_comparator.py index 902d721a8d..8c89b6d7ce 100644 --- a/debug/accuracy_tools/msprobe/visualization/compare/graph_comparator.py +++ b/debug/accuracy_tools/msprobe/visualization/compare/graph_comparator.py @@ -1,4 +1,4 @@ -# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# Copyright (c) 2024-2025, Huawei Technologies Co., Ltd. # All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -14,10 +14,9 @@ # limitations under the License. import re -from msprobe.visualization.builder.msprobe_adapter import compare_node, get_compare_mode, run_real_data -from msprobe.visualization.utils import GraphConst, load_json_file, load_data_json_file, get_csv_df +from msprobe.visualization.builder.msprobe_adapter import compare_node, get_compare_mode, run_real_data, get_csv_df +from msprobe.visualization.utils import GraphConst, load_json_file, load_data_json_file from msprobe.visualization.graph.graph import Graph, NodeOp -from msprobe.visualization.graph.node_colors import NodeColors from msprobe.visualization.compare.mode_adapter import ModeAdapter from msprobe.core.common.const import Const diff --git a/debug/accuracy_tools/msprobe/visualization/utils.py b/debug/accuracy_tools/msprobe/visualization/utils.py index f6e8258bb6..22b91276c9 100644 --- a/debug/accuracy_tools/msprobe/visualization/utils.py +++ b/debug/accuracy_tools/msprobe/visualization/utils.py @@ -1,4 +1,4 @@ -# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# Copyright (c) 2024-2025, Huawei Technologies Co., Ltd. # All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -18,7 +18,6 @@ import re import json from msprobe.core.common.file_utils import FileOpen from msprobe.core.common.const import CompareConst, Const -from msprobe.core.compare.acc_compare import Comparator, ModeConfig def load_json_file(file_path): @@ -50,15 +49,6 @@ def save_json_file(file_path, data): f.write(json.dumps(data, indent=4)) -def get_csv_df(stack_mode, csv_data, compare_mode): - """ - 调用acc接口写入csv - """ - dump_mode = GraphConst.GRAPHCOMPARE_MODE_TO_DUMP_MODE_TO_MAPPING.get(compare_mode) - mode_config = ModeConfig(stack_mode=stack_mode, dump_mode=dump_mode) - return Comparator(mode_config).make_result_table(csv_data) - - def str2float(percentage_str): """ 百分比字符串转换转换为浮点型 -- Gitee From b4deca3a6191392dac5ca292d67285e40df61bbb Mon Sep 17 00:00:00 2001 From: Linwei-Ying Date: Wed, 12 Mar 2025 14:21:49 +0800 Subject: [PATCH 13/27] compare reconstruct --- .../accuracy_tools/msprobe/core/common/const.py | 16 ++++++++-------- .../msprobe/mindspore/compare/ms_compare.py | 11 +++-------- 2 files changed, 11 insertions(+), 16 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/common/const.py b/debug/accuracy_tools/msprobe/core/common/const.py index e54b2b28d7..1c440ea6d9 100644 --- a/debug/accuracy_tools/msprobe/core/common/const.py +++ b/debug/accuracy_tools/msprobe/core/common/const.py @@ -364,7 +364,6 @@ class CompareConst: MAX_RELATIVE_ERR, MIN_RELATIVE_ERR, MEAN_RELATIVE_ERR, NORM_RELATIVE_ERR] # dtype match - DTYPE_MATCH_GROUPS = [ {Const.FLOAT16, Const.FLOAT32, Const.BFLOAT16}, {Const.TORCH_FLOAT16, Const.TORCH_FLOAT32, Const.TORCH_BFLOAT16} @@ -388,13 +387,6 @@ class CompareConst: Const.PARAMS_GRAD: PARAMS_GRAD_STRUCT } - STRUCT_COMPARE_KEY = [ - INPUT_STRUCT, - OUTPUT_STRUCT, - PARAMS_STRUCT, - PARAMS_GRAD_STRUCT - ] - # compare standard HUNDRED_RATIO_THRESHOLD = 0.01 THOUSAND_RATIO_THRESHOLD = 0.001 @@ -473,6 +465,14 @@ class CompareConst: MAX_DIFF: None, MIN_DIFF: None, MEAN_DIFF: None, NORM_DIFF: None, MAX_RELATIVE_ERR: None, MIN_RELATIVE_ERR: None, MEAN_RELATIVE_ERR: None, NORM_RELATIVE_ERR: None } + + API_MAPPING_KEYS_TO_COMPARE = [ + ('ms_args', 'pt_args'), + ('ms_output', 'pt_output'), + ('ms_parameters', 'pt_parameters'), + ('ms_parameters_grad', 'pt_parameters_grad'), + ] + INPUT_PATTERN = Const.SEP + Const.INPUT + Const.SEP KWARGS_PATTERN = Const.SEP + Const.KWARGS + Const.SEP OUTPUT_PATTERN = Const.SEP + Const.OUTPUT + Const.SEP diff --git a/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py b/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py index 022f05c479..9affa822d8 100644 --- a/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py +++ b/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py @@ -116,7 +116,7 @@ class MSComparator(Comparator): def get_api_indices_dict(self, op_name_df): """ - 生成一个api对应的所有的input、output等的index的键值对字典 + 生成多个api对应的各自的所有的input、output等的index的键值对字典 示例: {'Functional.conv2d': [0, 1, 2, 3], 'Functional.batch_norm': [4, 5, 6, 7, 8] @@ -143,13 +143,8 @@ class MSComparator(Comparator): pt_api_indices_dict = self.get_api_indices_dict(bench_df) for mapping_dict in self.api_mapping_dict: - keys_to_compare = [ - ('ms_args', 'pt_args'), - ('ms_output', 'pt_output'), - ('ms_parameters', 'pt_parameters'), - ('ms_parameters_grad', 'pt_parameters_grad'), - ] - if not all(len(mapping_dict.get(k1, [])) == len(mapping_dict.get(k2, [])) for k1, k2 in keys_to_compare): + if not all(len(mapping_dict.get(k1, [])) == len(mapping_dict.get(k2, [])) for k1, k2 in + CompareConst.API_MAPPING_KEYS_TO_COMPARE): logger.warning('The user-defined mapping table is incorrect,\ make sure that the number of parameters is equal') continue -- Gitee From 84a44e623ec10a34e554487a2785a8f61a7ab023 Mon Sep 17 00:00:00 2001 From: Linwei-Ying Date: Thu, 13 Mar 2025 10:51:54 +0800 Subject: [PATCH 14/27] compare reconstruct --- .../msprobe/core/compare/acc_compare.py | 90 +++++++++---------- .../msprobe/mindspore/compare/ms_compare.py | 74 +++++++-------- .../msprobe/pytorch/compare/match.py | 49 ---------- .../msprobe/pytorch/compare/pt_compare.py | 26 +++--- 4 files changed, 95 insertions(+), 144 deletions(-) delete mode 100644 debug/accuracy_tools/msprobe/pytorch/compare/match.py diff --git a/debug/accuracy_tools/msprobe/core/compare/acc_compare.py b/debug/accuracy_tools/msprobe/core/compare/acc_compare.py index 28e6742e60..70e272974c 100644 --- a/debug/accuracy_tools/msprobe/core/compare/acc_compare.py +++ b/debug/accuracy_tools/msprobe/core/compare/acc_compare.py @@ -71,6 +71,51 @@ class Comparator(ABC): remove_path(file_path) return file_path + @staticmethod + def process_data_name(result): + result['data_name_x'] = result.apply(lambda row: [row['data_name_x'], row['data_name_y']], axis=1) + return result + + @staticmethod + def set_summary(summary): + if summary == CompareConst.N_A: + return [CompareConst.N_A] * 4 + summary_list = [] + for i in summary: + if i is None: + summary_list.append(CompareConst.N_A) + elif str(i).lower() == 'nan': + summary_list.append(CompareConst.NAN) + else: + summary_list.append(i) + return summary_list + + @staticmethod + def type_check(val): + """ + 检查是否为数值或字符串形式的nan + """ + check_series = pd.Series(False, index=val.index) + val_str = val.astype(str) + check_series[pd.to_numeric(val_str, errors='coerce').notna() | val_str.str.lower().eq('nan')] = True + return check_series + + @staticmethod + def get_number(val): + return pd.to_numeric(val.astype(str), errors='coerce') + + @abstractmethod + def process_compare_key_and_shape(self, npu_df, bench_df): + pass + + @abstractmethod + def gen_dtype_condition(self, match_result): + pass + + @abstractmethod + def read_real_data(self, npu_dir, npu_data_name, bench_dir, bench_data_name) -> tuple: + pass + def gen_merge_list(self, json_data, op_name, stack_json_data): op_data = json_data['data'][op_name] check_dump_json_str(op_data, op_name) @@ -133,33 +178,6 @@ class Comparator(ABC): result['data_name'].append(data_name_reorder.pop(0)) return pd.DataFrame(result) - @abstractmethod - def process_compare_key_and_shape(self, npu_df, bench_df): - pass - - @abstractmethod - def gen_dtype_condition(self, match_result): - pass - - @staticmethod - def process_data_name(result): - result['data_name_x'] = result.apply(lambda row: [row['data_name_x'], row['data_name_y']], axis=1) - return result - - @staticmethod - def set_summary(summary): - if summary == CompareConst.N_A: - return [CompareConst.N_A] * 4 - summary_list = [] - for i in summary: - if i is None: - summary_list.append(CompareConst.N_A) - elif str(i).lower() == 'nan': - summary_list.append(CompareConst.NAN) - else: - summary_list.append(i) - return summary_list - def make_result_df(self, result): # get header header = CompareConst.HEAD_OF_COMPARE_MODE[self.dump_mode][:] @@ -194,20 +212,6 @@ class Comparator(ABC): result_df[h] = result[h] return result_df, header - @staticmethod - def type_check(val): - """ - 检查是否为数值或字符串形式的nan - """ - check_series = pd.Series(False, index=val.index) - val_str = val.astype(str) - check_series[pd.to_numeric(val_str, errors='coerce').notna() | val_str.str.lower().eq('nan')] = True - return check_series - - @staticmethod - def get_number(val): - return pd.to_numeric(val.astype(str), errors='coerce') - def calc_summary_diff(self, result_df, cond_no_bench, stats_index: str): npu_val = result_df['NPU ' + stats_index] bench_val = result_df['Bench ' + stats_index] @@ -447,10 +451,6 @@ class Comparator(ABC): logger.error('result dataframe is not found.') raise CompareException(CompareException.INVALID_DATA_ERROR) from e - @abstractmethod - def read_real_data(self, npu_dir, npu_data_name, bench_dir, bench_data_name) -> tuple: - pass - def setup_comparison(input_param, output_path, **kwargs) -> ComparisonConfig: """公共的前置处理逻辑,返回封装后的 ComparisonConfig 对象""" diff --git a/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py b/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py index 9affa822d8..a921fec94e 100644 --- a/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py +++ b/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py @@ -100,6 +100,43 @@ class MSComparator(Comparator): raise CompareException(CompareException.INDEX_OUT_OF_BOUNDS_ERROR) from error return api_name + def process_compare_key_and_shape(self, npu_df, bench_df): + npu_df = self.assign_npu_df_compare_key(npu_df, bench_df) + npu_df[CompareConst.CMP_SHAPE] = npu_df[Const.SHAPE] + bench_df[CompareConst.CMP_KEY] = bench_df[CompareConst.OP_NAME] + bench_df[CompareConst.CMP_SHAPE] = bench_df[Const.SHAPE] + return npu_df, bench_df + + def gen_dtype_condition(self, match_result): + """ + dtype匹配条件为npu、bench的dtype一致或属于规定的映射关系 + """ + # 如果ms使用了fuzzy_match或data_mapping,不校验dtype,返回全True的DataFrame + if self.fuzzy_match or self.data_mapping: + return pd.Series(True, index=match_result.index) + + npu_dtype = match_result['dtype_x'] + bench_dtype = match_result['dtype_y'] + if self.cross_frame: + npu_dtype = npu_dtype.map(dtype_mapping).fillna(npu_dtype) + + equal_condition = npu_dtype == bench_dtype + match_condition = ( + (npu_dtype.isin(CompareConst.DTYPE_MATCH_GROUPS[0]) & bench_dtype.isin( + CompareConst.DTYPE_MATCH_GROUPS[0])) | + (npu_dtype.isin(CompareConst.DTYPE_MATCH_GROUPS[1]) & bench_dtype.isin( + CompareConst.DTYPE_MATCH_GROUPS[1])) + ) + return equal_condition | match_condition + + def read_real_data(self, npu_dir, npu_data_name, bench_dir, bench_data_name) -> tuple: + n_value = read_npy_data(npu_dir, npu_data_name) + if self.cross_frame: + b_value = read_pt_data(bench_dir, bench_data_name) + else: + b_value = read_npy_data(bench_dir, bench_data_name) + return n_value, b_value + def process_internal_api_mapping(self, npu_op_name): # get api name & class name from op_name # Functional.addcmul.0.forward.input.0 @@ -213,43 +250,6 @@ class MSComparator(Comparator): npu_df[CompareConst.CMP_KEY] = npu_df[CompareConst.OP_NAME] return npu_df - def process_compare_key_and_shape(self, npu_df, bench_df): - npu_df = self.assign_npu_df_compare_key(npu_df, bench_df) - npu_df[CompareConst.CMP_SHAPE] = npu_df[Const.SHAPE] - bench_df[CompareConst.CMP_KEY] = bench_df[CompareConst.OP_NAME] - bench_df[CompareConst.CMP_SHAPE] = bench_df[Const.SHAPE] - return npu_df, bench_df - - def gen_dtype_condition(self, match_result): - """ - dtype匹配条件为npu、bench的dtype一致或属于规定的映射关系 - """ - # 如果ms使用了fuzzy_match或data_mapping,不校验dtype,返回全True的DataFrame - if self.fuzzy_match or self.data_mapping: - return pd.Series(True, index=match_result.index) - - npu_dtype = match_result['dtype_x'] - bench_dtype = match_result['dtype_y'] - if self.cross_frame: - npu_dtype = npu_dtype.map(dtype_mapping).fillna(npu_dtype) - - equal_condition = npu_dtype == bench_dtype - match_condition = ( - (npu_dtype.isin(CompareConst.DTYPE_MATCH_GROUPS[0]) & bench_dtype.isin( - CompareConst.DTYPE_MATCH_GROUPS[0])) | - (npu_dtype.isin(CompareConst.DTYPE_MATCH_GROUPS[1]) & bench_dtype.isin( - CompareConst.DTYPE_MATCH_GROUPS[1])) - ) - return equal_condition | match_condition - - def read_real_data(self, npu_dir, npu_data_name, bench_dir, bench_data_name) -> tuple: - n_value = read_npy_data(npu_dir, npu_data_name) - if self.cross_frame: - b_value = read_pt_data(bench_dir, bench_data_name) - else: - b_value = read_npy_data(bench_dir, bench_data_name) - return n_value, b_value - def ms_compare(input_param, output_path, **kwargs): config = setup_comparison(input_param, output_path, **kwargs) diff --git a/debug/accuracy_tools/msprobe/pytorch/compare/match.py b/debug/accuracy_tools/msprobe/pytorch/compare/match.py deleted file mode 100644 index d676b85f20..0000000000 --- a/debug/accuracy_tools/msprobe/pytorch/compare/match.py +++ /dev/null @@ -1,49 +0,0 @@ -# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -from msprobe.core.common.utils import CompareException -from msprobe.core.common.file_utils import load_yaml - - -class AtenIrMapping(): - def __init__(self): - cur_path = os.path.dirname(os.path.realpath(__file__)) - yaml_path = os.path.join(cur_path, "mapping.yaml") - self.aten_mapping = load_yaml(yaml_path) - - def match(self, op1, op2): - if "Aten" in op1 and "Aten" not in op2: - return self.match_op(op1, op2) - else: - return self.match_op(op2, op1) - - def match_op(self, aten_op, torch_op): - try: - aten_op_raw_name_overload = '_'.join(aten_op.split("_")[1:-3]) - aten_op_raw_name = aten_op_raw_name_overload.split('.')[0] - torch_op_raw_name = '_'.join(torch_op.split("_")[1:-3]).lower() - except IndexError as e: - err_msg = f"Dump op name format error: {aten_op}, {torch_op}. Your dump data may be corrupted." - raise CompareException.INVALID_DATA_ERROR(err_msg) from e - matching_op = self.aten_mapping.get(aten_op_raw_name) - if matching_op is None: - return False - if matching_op.lower() == torch_op_raw_name: - return True - return False - - -graph_mapping = AtenIrMapping() diff --git a/debug/accuracy_tools/msprobe/pytorch/compare/pt_compare.py b/debug/accuracy_tools/msprobe/pytorch/compare/pt_compare.py index 7622c4ae23..013714341d 100644 --- a/debug/accuracy_tools/msprobe/pytorch/compare/pt_compare.py +++ b/debug/accuracy_tools/msprobe/pytorch/compare/pt_compare.py @@ -59,19 +59,6 @@ class PTComparator(Comparator): renamed_op_name = op_name return renamed_op_name - def assign_df_compare_key(self, df): - """ - 处理 npu_df 或 bench_df 的 COMPARE_KEY 赋值逻辑 - - :param df: DataFrame,NPU or Bench 对比数据 - :return: compare_key(name)处理后的 npu_df 或 bench_df - """ - if self.fuzzy_match: - df[CompareConst.CMP_KEY] = df[CompareConst.OP_NAME].apply(self.process_fuzzy_match) - else: - df[CompareConst.CMP_KEY] = df[CompareConst.OP_NAME] - return df - def process_compare_key_and_shape(self, npu_df, bench_df): npu_df = self.assign_df_compare_key(npu_df) npu_df[CompareConst.CMP_SHAPE] = npu_df[Const.SHAPE] @@ -96,6 +83,19 @@ class PTComparator(Comparator): b_value = read_pt_data(bench_dir, bench_data_name) return n_value, b_value + def assign_df_compare_key(self, df): + """ + 处理 npu_df 或 bench_df 的 COMPARE_KEY 赋值逻辑 + + :param df: DataFrame,NPU or Bench 对比数据 + :return: compare_key(name)处理后的 npu_df 或 bench_df + """ + if self.fuzzy_match: + df[CompareConst.CMP_KEY] = df[CompareConst.OP_NAME].apply(self.process_fuzzy_match) + else: + df[CompareConst.CMP_KEY] = df[CompareConst.OP_NAME] + return df + def compare(input_param, output_path, **kwargs): config = setup_comparison(input_param, output_path, **kwargs) -- Gitee From fb39b74b39858216e8ab09c60818033a9afeb65a Mon Sep 17 00:00:00 2001 From: Linwei-Ying Date: Thu, 13 Mar 2025 11:10:15 +0800 Subject: [PATCH 15/27] compare reconstruct --- debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py | 3 +-- debug/accuracy_tools/msprobe/pytorch/compare/pt_compare.py | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py b/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py index a921fec94e..407fff98ac 100644 --- a/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py +++ b/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py @@ -48,7 +48,6 @@ class MSComparator(Comparator): def __init__(self, mode_config, mapping_config=None, is_cross_framework=False): super().__init__(mode_config) - self.frame_name = MSComparator.__name__ self.stack_mode = mode_config.stack_mode self.auto_analyze = mode_config.auto_analyze @@ -64,11 +63,11 @@ class MSComparator(Comparator): self.cross_frame = is_cross_framework else: self.cross_frame = self.cell_mapping is not None or self.api_mapping is not None + self.cell_mapping_dict = self.load_mapping_file(self.cell_mapping) self.api_mapping_dict = self.load_mapping_file(self.api_mapping) if self.api_mapping is not None: self.ms_to_pt_mapping = self.load_internal_api() - if isinstance(self.data_mapping, str) or self.data_mapping is None: self.data_mapping_dict = self.load_mapping_file(self.data_mapping) elif isinstance(self.data_mapping, dict): diff --git a/debug/accuracy_tools/msprobe/pytorch/compare/pt_compare.py b/debug/accuracy_tools/msprobe/pytorch/compare/pt_compare.py index 013714341d..e32f033b3d 100644 --- a/debug/accuracy_tools/msprobe/pytorch/compare/pt_compare.py +++ b/debug/accuracy_tools/msprobe/pytorch/compare/pt_compare.py @@ -29,7 +29,6 @@ class PTComparator(Comparator): self.fuzzy_match = mode_config.fuzzy_match self.dump_mode = mode_config.dump_mode - self.frame_name = PTComparator.__name__ self.data_mapping = data_mapping if isinstance(self.data_mapping, str) or self.data_mapping is None: self.data_mapping_dict = self.load_mapping_file(self.data_mapping) -- Gitee From d51dc132d98d3d1ab02b9a1e4ed53e6eef5c3742 Mon Sep 17 00:00:00 2001 From: Linwei-Ying Date: Mon, 17 Mar 2025 11:31:17 +0800 Subject: [PATCH 16/27] compare reconstruct --- .../msprobe/pytorch/compare/pt_compare.py | 42 ++-- .../test/core_ut/compare/test_acc_compare.py | 232 ------------------ .../core_ut/compare/test_acc_compare_check.py | 83 +------ .../core_ut/compare/test_acc_compare_utils.py | 62 +---- .../mindspore_ut/compare/test_ms_compare.py | 133 ---------- .../pytorch_ut/compare/test_pt_compare.py | 30 --- 6 files changed, 31 insertions(+), 551 deletions(-) diff --git a/debug/accuracy_tools/msprobe/pytorch/compare/pt_compare.py b/debug/accuracy_tools/msprobe/pytorch/compare/pt_compare.py index e32f033b3d..0825e8981b 100644 --- a/debug/accuracy_tools/msprobe/pytorch/compare/pt_compare.py +++ b/debug/accuracy_tools/msprobe/pytorch/compare/pt_compare.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +import pandas as pd + from msprobe.core.common.const import CompareConst, Const from msprobe.core.common.file_utils import load_yaml from msprobe.core.compare.acc_compare import Comparator, ModeConfig, setup_comparison @@ -58,10 +60,30 @@ class PTComparator(Comparator): renamed_op_name = op_name return renamed_op_name + def process_data_mapping(self, npu_op_name): + return self.data_mapping_dict.get(npu_op_name, npu_op_name) + + def assign_df_compare_key(self, df, is_npu=True): + """ + 处理 npu_df 或 bench_df 的 COMPARE_KEY 赋值逻辑 + + :param df: DataFrame,NPU or Bench 对比数据 + :param is_npu: 是否npu数据 + :return: compare_key(name)处理后的 npu_df 或 bench_df + """ + if is_npu and self.data_mapping: + df[CompareConst.CMP_KEY] = df[CompareConst.OP_NAME].apply(self.process_data_mapping) + elif self.fuzzy_match: + df[CompareConst.CMP_KEY] = df[CompareConst.OP_NAME].apply(self.process_fuzzy_match) + else: + df[CompareConst.CMP_KEY] = df[CompareConst.OP_NAME] + + return df + def process_compare_key_and_shape(self, npu_df, bench_df): - npu_df = self.assign_df_compare_key(npu_df) + npu_df = self.assign_df_compare_key(npu_df, is_npu=True) npu_df[CompareConst.CMP_SHAPE] = npu_df[Const.SHAPE] - bench_df = self.assign_df_compare_key(bench_df) + bench_df = self.assign_df_compare_key(bench_df, is_npu=False) bench_df[CompareConst.CMP_SHAPE] = bench_df[Const.SHAPE] return npu_df, bench_df @@ -69,6 +91,9 @@ class PTComparator(Comparator): """ dtype匹配条件为npu、bench的dtype一致或属于规定的映射关系 """ + if self.data_mapping: + return pd.Series(True, index=match_result.index) + npu_dtype = match_result['dtype_x'] bench_dtype = match_result['dtype_y'] @@ -82,19 +107,6 @@ class PTComparator(Comparator): b_value = read_pt_data(bench_dir, bench_data_name) return n_value, b_value - def assign_df_compare_key(self, df): - """ - 处理 npu_df 或 bench_df 的 COMPARE_KEY 赋值逻辑 - - :param df: DataFrame,NPU or Bench 对比数据 - :return: compare_key(name)处理后的 npu_df 或 bench_df - """ - if self.fuzzy_match: - df[CompareConst.CMP_KEY] = df[CompareConst.OP_NAME].apply(self.process_fuzzy_match) - else: - df[CompareConst.CMP_KEY] = df[CompareConst.OP_NAME] - return df - def compare(input_param, output_path, **kwargs): config = setup_comparison(input_param, output_path, **kwargs) diff --git a/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare.py b/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare.py index 94244be326..b1d621310a 100644 --- a/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare.py +++ b/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare.py @@ -300,11 +300,6 @@ class TestUtilsMethods(unittest.TestCase): if os.path.exists(base_dir3): shutil.rmtree(base_dir3) - def test_get_accuracy_graph_mode(self): - result = [] - get_accuracy(result, npu_dict_aten, bench_dict_functional, dump_mode=Const.SUMMARY) - self.assertEqual(result, aten_result) - def test_find_error_rows(self): api_batch = ApiBatch("Functional_batch_norm_0_forward", 0) api_batch.input_len = 1 @@ -337,108 +332,6 @@ class TestUtilsMethods(unittest.TestCase): ] }) - def test_calculate_summary_data(self): - npu_summary_data = [1, 1, 1, 1] - bench_summary_data = [2, 2, 2, 2] - result_item = ['', '', '', '', '', '', '', '', '', '', '', '', '', ''] - - stack_mode = True - auto_analyze = True - fuzzy_match = False - dump_mode = Const.SUMMARY - mode_config = ModeConfig(stack_mode, auto_analyze, fuzzy_match, dump_mode) - - comparator = Comparator(mode_config) - comparator.calculate_summary_data(npu_summary_data, bench_summary_data, result_item) - self.assertEqual(result_item, - ['', '', '', '', '', '', -1, -1, -1, -1, '50.0%', '50.0%', '50.0%', '50.0%', '', '']) - - bench_summary_data = [0, 0, 0, 0] - result_item = ['', '', '', '', '', '', '', '', '', '', '', '', '', ''] - - comparator.calculate_summary_data(npu_summary_data, bench_summary_data, result_item) - self.assertEqual(result_item, ['', '', '', '', '', '', 1, 1, 1, 1, 'N/A', 'N/A', 'N/A', 'N/A', 'Warning', - 'Need double check api accuracy.']) - - def test_make_result_table_stack_mode_True(self): - result_md5 = [['Functional.linear.0.forward.input.0', 'Functional.linear.0.forward.input.0', - 'torch.float32', 'torch.float32', [2, 2], [2, 2], '', '', '', 'File']] - result_summary = [['Functional.linear.0.forward.input.0', 'Functional.linear.0.forward.input.0', - 'torch.float32', 'torch.float32', [2, 2], [2, 2], '', '', '', '', '', '', '', '', - 1, 1, 1, 1, 1, 1, 1, 1, 'Yes', '', 'File']] - result_all = [['Functional.linear.0.forward.input.0', 'Functional.linear.0.forward.input.0', - 'torch.float32', 'torch.float32', [2, 2], [2, 2], '', '', '', '', '', '', - 1, 1, 1, 1, 1, 1, 1, 1, 'Yes', '', 'File', '-1']] - columns_md5_stack_mode_true = CompareConst.MD5_COMPARE_RESULT_HEADER + ['NPU_Stack_Info'] - result_table_md5_true = pd.DataFrame(result_md5, columns=columns_md5_stack_mode_true, dtype=object) - columns_summary_stack_mode_true = CompareConst.SUMMARY_COMPARE_RESULT_HEADER + ['NPU_Stack_Info'] - result_table_summary_true = pd.DataFrame(result_summary, columns=columns_summary_stack_mode_true, dtype=object) - columns_all_stack_mode_true = CompareConst.COMPARE_RESULT_HEADER + ['NPU_Stack_Info'] + ['Data_name'] - result_table_all_true = pd.DataFrame(result_all, columns=columns_all_stack_mode_true, dtype=object) - - stack_mode = True - auto_analyze = True - fuzzy_match = False - - dump_mode = Const.MD5 - mode_config = ModeConfig(stack_mode, auto_analyze, fuzzy_match, dump_mode) - result_df = Comparator(mode_config).make_result_table(result_md5) - self.assertTrue(result_df.equals(result_table_md5_true)) - - dump_mode = Const.SUMMARY - mode_config = ModeConfig(stack_mode, auto_analyze, fuzzy_match, dump_mode) - result_df = Comparator(mode_config).make_result_table(result_summary) - self.assertTrue(result_df.equals(result_table_summary_true)) - - dump_mode = Const.ALL - mode_config = ModeConfig(stack_mode, auto_analyze, fuzzy_match, dump_mode) - result_df = Comparator(mode_config).make_result_table(result_all) - self.assertTrue(result_df.equals(result_table_all_true)) - - def test_make_result_table_stack_mode_False(self): - result_md5_test = [['Functional.linear.0.forward.input.0', 'Functional.linear.0.forward.input.0', - 'torch.float32', 'torch.float32', [2, 2], [2, 2], '', '', '', '']] - result_md5 = [['Functional.linear.0.forward.input.0', 'Functional.linear.0.forward.input.0', - 'torch.float32', 'torch.float32', [2, 2], [2, 2], '', '', '']] - result_summary_test = [['Functional.linear.0.forward.input.0', 'Functional.linear.0.forward.input.0', - 'torch.float32', 'torch.float32', [2, 2], [2, 2], '', '', '', '', '', '', '', '', - 1, 1, 1, 1, 1, 1, 1, 1, 'Yes', '', '']] - result_summary = [['Functional.linear.0.forward.input.0', 'Functional.linear.0.forward.input.0', - 'torch.float32', 'torch.float32', [2, 2], [2, 2], '', '', '', '', '', '', '', '', - 1, 1, 1, 1, 1, 1, 1, 1, 'Yes', '']] - result_all_test = [['Functional.linear.0.forward.input.0', 'Functional.linear.0.forward.input.0', - 'torch.float32', 'torch.float32', [2, 2], [2, 2], '', '', '', '', '', '', - 1, 1, 1, 1, 1, 1, 1, 1, 'Yes', '', '', '-1']] - result_all = [['Functional.linear.0.forward.input.0', 'Functional.linear.0.forward.input.0', - 'torch.float32', 'torch.float32', [2, 2], [2, 2], '', '', '', '', '', '', - 1, 1, 1, 1, 1, 1, 1, 1, 'Yes', '', '-1']] - columns_md5_stack_mode_true = CompareConst.MD5_COMPARE_RESULT_HEADER - result_table_md5_true = pd.DataFrame(result_md5, columns=columns_md5_stack_mode_true, dtype='object') - columns_summary_stack_mode_true = CompareConst.SUMMARY_COMPARE_RESULT_HEADER - result_table_summary_true = pd.DataFrame(result_summary, columns=columns_summary_stack_mode_true, - dtype='object') - columns_all_stack_mode_true = CompareConst.COMPARE_RESULT_HEADER + ['Data_name'] - result_table_all_true = pd.DataFrame(result_all, columns=columns_all_stack_mode_true, dtype='object') - - stack_mode = False - auto_analyze = True - fuzzy_match = False - - dump_mode = Const.MD5 - mode_config = ModeConfig(stack_mode, auto_analyze, fuzzy_match, dump_mode) - result_df = Comparator(mode_config).make_result_table(result_md5_test) - self.assertTrue(result_df.equals(result_table_md5_true)) - - dump_mode = Const.SUMMARY - mode_config = ModeConfig(stack_mode, auto_analyze, fuzzy_match, dump_mode) - result_df = Comparator(mode_config).make_result_table(result_summary_test) - self.assertTrue(result_df.equals(result_table_summary_true)) - - dump_mode = Const.ALL - mode_config = ModeConfig(stack_mode, auto_analyze, fuzzy_match, dump_mode) - result_df = Comparator(mode_config).make_result_table(result_all_test) - self.assertTrue(result_df.equals(result_table_all_true)) - def test_gen_merge_list(self): op_data = { 'input_args': [ @@ -472,66 +365,6 @@ class TestUtilsMethods(unittest.TestCase): result = Comparator(mode_config).gen_merge_list(json_data, op_name, stack_json_data) self.assertEqual(result, merge_list) - def test_check_op_fuzzy_false(self): - stack_mode = False - auto_analyze = True - dump_mode = Const.SUMMARY - - fuzzy_match = False - mode_config = ModeConfig(stack_mode, auto_analyze, fuzzy_match, dump_mode) - - pt_comparator = PTComparator(mode_config) - result = pt_comparator.check_op(npu_dict, bench_dict) - self.assertEqual(result, True) - - def test_check_op_fuzzy_true(self): - stack_mode = False - auto_analyze = True - dump_mode = Const.SUMMARY - - fuzzy_match = True - mode_config = ModeConfig(stack_mode, auto_analyze, fuzzy_match, dump_mode) - - pt_comparator = PTComparator(mode_config) - result = pt_comparator.check_op(npu_dict2, bench_dict) - self.assertEqual(result, True) - - def test_match_op_both_last_element(self): - stack_mode = False - auto_analyze = True - fuzzy_match = False - dump_mode = Const.SUMMARY - mode_config = ModeConfig(stack_mode, auto_analyze, fuzzy_match, dump_mode) - - pt_comparator = PTComparator(mode_config) - a, b = pt_comparator.match_op([npu_dict], [bench_dict]) - self.assertEqual(a, 0) - self.assertEqual(b, 0) - - def test_match_op_only_npu_last_element(self): - stack_mode = False - auto_analyze = True - fuzzy_match = False - dump_mode = Const.SUMMARY - mode_config = ModeConfig(stack_mode, auto_analyze, fuzzy_match, dump_mode) - - pt_comparator = PTComparator(mode_config) - a, b = pt_comparator.match_op([npu_dict], [bench_dict, 1]) - self.assertEqual(a, 0) - self.assertEqual(b, 0) - - def test_match_op_only_bench_last_element(self): - stack_mode = False - auto_analyze = True - fuzzy_match = False - dump_mode = Const.SUMMARY - mode_config = ModeConfig(stack_mode, auto_analyze, fuzzy_match, dump_mode) - - pt_comparator = PTComparator(mode_config) - a, b = pt_comparator.match_op([npu_dict, npu_dict2], [bench_dict]) - self.assertEqual(a, 0) - self.assertEqual(b, 0) - def test_compare_process(self): generate_dump_json(base_dir) generate_stack_json(base_dir) @@ -555,35 +388,6 @@ class TestUtilsMethods(unittest.TestCase): o_result = pd.DataFrame(o_data, columns=columns, dtype=object) self.assertTrue(result.equals(o_result)) - def test_merge_data(self): - op_data = { - 'input_args': [ - { - 'type': 'torch.Tensor', 'dtype': 'torch.float32', 'shape': [2, 2], - 'Max': 1, 'Min': 1, 'Mean': 1, 'Norm': 1, 'requires_grad': False, - 'data_name': 'Functional.linear.0.forward.input.0.pt', - 'full_op_name': 'Functional.linear.0.forward.input.0' - } - ] - } - json_data = {'data': {'Functional.linear.0.forward': op_data}} - stack_json_data = {'Functional.linear.0.forward': ['File']} - - stack_mode = True - auto_analyze = True - fuzzy_match = False - dump_mode = Const.SUMMARY - mode_config = ModeConfig(stack_mode, auto_analyze, fuzzy_match, dump_mode) - - result = Comparator(mode_config).merge_data(json_data, stack_json_data) - ops_all = { - 'Functional.linear.0.forward.input.0': { - 'data_name': None, 'stack_info': [['File']], - 'struct': ('torch.float32', [2, 2]), 'summary': [1, 1, 1, 1] - } - } - self.assertEqual(result, ops_all) - def test_compare_core_basic(self): generate_dump_json(base_dir2) generate_stack_json(base_dir2) @@ -704,39 +508,3 @@ class TestUtilsMethods(unittest.TestCase): generate_pt(base_dir) result = pt_comparator.compare_by_op(npu_op_name, bench_op_name, op_name_mapping_dict, input_param) self.assertEqual(result, [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, '']) - - -class TestComparator(unittest.TestCase): - def setUp(self): - mode_config = ModeConfig(dump_mode=Const.MD5) - self.comparator = Comparator(mode_config=mode_config) - self.npu_ops_all = { - 'op1': {'struct': ['float32', [1, 96, 2], '83dcefb7']}, - } - self.bench_ops_all = { - 'op1': {'struct': ['float32', [1, 96, 2], '83dcefb7']}, - } - - def test_normal(self): - expected_result = ['op1', 'op1', 'float32', 'float32', [1, 96, 2], [1, 96, 2], '83dcefb7', '83dcefb7', - CompareConst.PASS, CompareConst.NONE] - result = self.comparator.get_result_md5_compare('op1', 'op1', - self.npu_ops_all, self.bench_ops_all) - self.assertEqual(result, expected_result) - - @patch('msprobe.core.compare.acc_compare.logger') - def test_length_exception(self, mock_logger): - self.npu_ops_all['op1']['struct'] = ['npu_val1', 'npu_val2'] - with self.assertRaises(CompareException) as context: - self.comparator.get_result_md5_compare('op1', 'op1', - self.npu_ops_all, self.bench_ops_all) - self.assertEqual(context.exception.code, CompareException.INDEX_OUT_OF_BOUNDS_ERROR) - mock_logger.error.assert_called_once_with("The length of npu_struct and bench_struct must be >= 3, " - "but got npu_struct=2 and bench_struct=3. Please check!") - - def test_with_extra_args(self): - expected_result = ['op1', 'op1', 'float32', 'float32', [1, 96, 2], [1, 96, 2], '83dcefb7', '83dcefb7', - CompareConst.PASS, 'extra_data'] - result = self.comparator.get_result_md5_compare('op1', 'op1', - self.npu_ops_all, self.bench_ops_all, True, ['extra_data']) - self.assertEqual(result, expected_result) diff --git a/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare_check.py b/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare_check.py index a1e5f8eee1..fdfd124222 100644 --- a/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare_check.py +++ b/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare_check.py @@ -1,7 +1,6 @@ # coding=utf-8 import unittest -from msprobe.core.compare.check import check_struct_match, check_type_shape_match, check_graph_mode, fuzzy_check_op, \ - fuzzy_check_name, check_dump_json_str, check_json_key_value, valid_key_value, check_stack_json_str +from msprobe.core.compare.check import check_dump_json_str, check_json_key_value, valid_key_value, check_stack_json_str from msprobe.core.common.utils import CompareException @@ -66,86 +65,6 @@ op_name = 'Functional.conv2d.0.backward.input.0' class TestUtilsMethods(unittest.TestCase): - def test_check_struct_match_success(self): - result = check_struct_match(npu_dict, bench_dict) - self.assertTrue(result) - - def test_check_struct_match_fail(self): - npu_dict2 = {'input_struct': [('torch.float32', [1, 1, 28, 28]), ('torch.float32', [16, 1, 5, 5]), - ('torch.float32', [16])], - 'output_struct': [('torch.float32', [1, 16, 28, 28])] - } - - bench_dict2 = {'input_struct': [('torch.float32', [2, 1, 28, 28]), ('torch.float32', [16, 1, 5, 5]), - ('torch.float32', [16])], - 'output_struct': [('torch.float32', [1, 16, 28, 28])] - } - result = check_struct_match(npu_dict2, bench_dict2) - self.assertFalse(result) - - def test_check_struct_index_error(self): - npu_dict3 = {'input_struct': [('a'), ('torch.float32'), - ('torch.float32')], - 'output_struct': [('torch.float32')] - } - - bench_dict3 = {'input_struct': [('torch.float32'), ('torch.float32'), - ('torch.float32')], - 'output_struct': [('torch.float32')] - } - with self.assertRaises(CompareException) as context: - result = check_struct_match(npu_dict3, bench_dict3) - self.assertEqual(context.exception.code, CompareException.INDEX_OUT_OF_BOUNDS_ERROR) - - def test_check_type_shape_match_success(self): - result = check_type_shape_match(npu_struct, bench_struct) - self.assertTrue(result) - - def test_check_type_shape_match_index_error(self): - npu_struct2 = [('a'), ('torch.float32'), ('torch.float32')] - bench_struct2 = [('torch.float32'), ('torch.float32'), ('torch.float32')] - with self.assertRaises(CompareException) as context: - result = check_type_shape_match(npu_struct2, bench_struct2) - self.assertEqual(context.exception.code, CompareException.INDEX_OUT_OF_BOUNDS_ERROR) - - def test_check_graph_mode(self): - op1 = "Aten" - op2 = "torch" - self.assertTrue(check_graph_mode(op1, op2)) - self.assertTrue(check_graph_mode(op2, op1)) - self.assertFalse(check_graph_mode(op1, op1)) - self.assertFalse(check_graph_mode(op2, op2)) - - def test_fuzzy_check_op_1(self): - npu_name_list = [] - bench_name_list = [] - result = fuzzy_check_op(npu_name_list, bench_name_list) - self.assertFalse(result) - - def test_fuzzy_check_op_2(self): - npu_name_list = [] - bench_name_list = ['Functional.conv2d.0.forward.input.0'] - result = fuzzy_check_op(npu_name_list, bench_name_list) - self.assertFalse(result) - - def test_fuzzy_check_op_3(self): - npu_name_list = ['Functional.conv2d.0.forward.input.0'] - bench_name_list = ['Functional.conv2d.1.forward.input.0'] - result = fuzzy_check_op(npu_name_list, bench_name_list) - self.assertTrue(result) - - def test_fuzzy_check_name_1(self): - npu_name = 'Functional.conv2d.0.backward.input.0' - bench_name = 'Functional.conv2d.1.backward.input.0' - result = fuzzy_check_name(npu_name, bench_name) - self.assertTrue(result) - - def test_fuzzy_check_name_2(self): - npu_name = 'Functional.conv2d.0.backward.input.0' - bench_name = 'Functional.conv2d.1.backward.input.1' - result = fuzzy_check_name(npu_name, bench_name) - self.assertFalse(result) - def test_check_dump_json_str(self): with self.assertRaises(CompareException) as context: check_dump_json_str(op_data, op_name) diff --git a/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare_utils.py b/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare_utils.py index bf23f4de1d..a4839ae051 100644 --- a/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare_utils.py +++ b/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare_utils.py @@ -12,9 +12,9 @@ import numpy as np from msprobe.core.common.const import CompareConst, Const from msprobe.core.common.utils import CompareException from msprobe.core.compare.utils import ApiItemInfo, _compare_parser, check_and_return_dir_contents, extract_json, \ - count_struct, get_accuracy, append_stack_info, get_rela_diff_summary_mode, get_un_match_accuracy, merge_tensor, \ - op_item_parse, read_op, rename_api, resolve_api_special_parameters, result_item_init, stack_column_process, \ - table_value_is_valid, get_name_and_state, reorder_op_name_list, reorder_op_x_list, gen_op_item + count_struct, get_accuracy, get_rela_diff_summary_mode, merge_tensor, op_item_parse, read_op, rename_api, \ + result_item_init, stack_column_process, table_value_is_valid, get_name_and_state, reorder_op_name_list, \ + reorder_op_x_list, gen_op_item # test_read_op_1 op_data = { @@ -379,11 +379,6 @@ class TestUtilsMethods(unittest.TestCase): op_item_parse(parse_item, parse_op_name, depth=11) self.assertEqual(context.exception.code, CompareException.RECURSION_LIMIT_ERROR) - def test_resolve_api_special_parameters(self): - item_list = [] - resolve_api_special_parameters(data_dict, full_op_name, item_list) - self.assertEqual(item_list, o_result_api_special) - def test_get_rela_diff_summary_mode_float_or_int(self): result_item = [0] * 14 err_msg = '' @@ -449,57 +444,6 @@ class TestUtilsMethods(unittest.TestCase): get_accuracy(result, npu_dict, bench_dict, dump_mode=Const.SUMMARY) self.assertEqual(result, o_result) - def test_append_stack_info_stack_exist_index_0(self): - result_item = ['item1'] - npu_stack_info = ['stack_info1'] - index = 0 - - append_stack_info(result_item, npu_stack_info, index) - - self.assertEqual(result_item, ['item1', 'stack_info1']) - - def test_append_stack_info_stack_exist_index_not_0(self): - result_item = ['item1'] - npu_stack_info = ['stack_info1'] - index = 1 - - append_stack_info(result_item, npu_stack_info, index) - - self.assertEqual(result_item, ['item1', CompareConst.NONE]) - - def test_append_stack_info_stack_empty_index_0(self): - result_item = ['item1'] - npu_stack_info = [] - index = 0 - - append_stack_info(result_item, npu_stack_info, index) - - self.assertEqual(result_item, ['item1', CompareConst.NONE]) - - def test_append_stack_info_stack_empty_index_not_0(self): - result_item = ['item1'] - npu_stack_info = [] - index = 1 - - append_stack_info(result_item, npu_stack_info, index) - - self.assertEqual(result_item, ['item1', CompareConst.NONE]) - - def test_get_un_match_accuracy_md5(self): - result = [] - get_un_match_accuracy(result, npu_dict, dump_mode=Const.MD5) - self.assertEqual(result, o_result_unmatch_1) - - def test_get_un_match_accuracy_summary(self): - result = [] - get_un_match_accuracy(result, npu_dict, dump_mode=Const.SUMMARY) - self.assertEqual(result, o_result_unmatch_2) - - def test_get_un_match_accuracy_all(self): - result = [] - get_un_match_accuracy(result, npu_dict, dump_mode=Const.ALL) - self.assertEqual(result, o_result_unmatch_3) - def test_merge_tensor_summary(self): op_dict = merge_tensor(tensor_list, dump_mode=Const.SUMMARY) self.assertEqual(op_dict, result_op_dict) diff --git a/debug/accuracy_tools/msprobe/test/mindspore_ut/compare/test_ms_compare.py b/debug/accuracy_tools/msprobe/test/mindspore_ut/compare/test_ms_compare.py index 6f73778940..886d96063c 100644 --- a/debug/accuracy_tools/msprobe/test/mindspore_ut/compare/test_ms_compare.py +++ b/debug/accuracy_tools/msprobe/test/mindspore_ut/compare/test_ms_compare.py @@ -246,112 +246,6 @@ def gen_api_mapping_test_data(need_user_mapping=False): class TestUtilsMethods(unittest.TestCase): - def test_check_op_ms(self): - stack_mode = True - auto_analyze = True - fuzzy_match = False - dump_mode = Const.ALL - - mode_config = ModeConfig(stack_mode, auto_analyze, fuzzy_match, dump_mode) - mapping_config = MappingConfig() - - ms_comparator = MSComparator(mode_config, mapping_config) - result = ms_comparator.check_op(npu_dict, bench_dict) - self.assertTrue(result) - - def test_data_mapping(self): - stack_json_data = {} - - stack_mode = True - auto_analyze = True - fuzzy_match = False - dump_mode = Const.SUMMARY - - mode_config = ModeConfig(stack_mode, auto_analyze, fuzzy_match, dump_mode) - mapping_config = MappingConfig(data_mapping=data_mapping) - ms_comparator = MSComparator(mode_config, mapping_config) - - npu_ops_all = ms_comparator.merge_data(npu_json_data, stack_json_data) - npu_ops_all_correct = { - 'Functional.flash_attention_score.4.forward.input.0': { - 'struct': ('BFloat16', [4096, 1, 2048]), - 'summary': [4.1875, -4.4375, -4.550282028503716e-05, 2316.379150390625], - 'data_name': None, - 'stack_info': [None] - }, - 'Functional.flash_attention_score.4.forward.output.0': { - 'struct': ('BFloat16', [4096, 1, 2048]), - 'summary': [4.1875, -4.4375, -4.550282028503716e-05, 2316.379150390625], - 'data_name': None, - 'stack_info': [None] - } - } - self.assertDictEqual(npu_ops_all, npu_ops_all_correct) - - bench_ops_all = ms_comparator.merge_data(bench_json_data, stack_json_data) - bench_ops_all_correct = { - 'NPU.npu_fusion_attention.4.forward.input.0': { - 'struct': ('torch.bfloat16', [4096, 1, 2048]), - 'summary': [4.1875, -4.4375, -4.553794860839844e-05, 2320.0], - 'data_name': None, - 'stack_info': [None] - }, - 'NPU.npu_fusion_attention.4.forward.output.0': { - 'struct': ('torch.bfloat16', [4096, 1, 2048]), - 'summary': [4.1875, -4.4375, -4.553794860839844e-05, 2320.0], - 'data_name': None, - 'stack_info': [None] - } - } - self.assertDictEqual(bench_ops_all, bench_ops_all_correct) - - result = ms_comparator.get_accuracy(npu_ops_all, bench_ops_all) - result_correct = [['Functional.flash_attention_score.4.forward.input.0', - 'NPU.npu_fusion_attention.4.forward.input.0', - 'BFloat16', 'torch.bfloat16', [4096, 1, 2048], [4096, 1, 2048], 0.0, 0.0, - 3.512832336127758e-08, -3.620849609375, '0.0%', '0.0%', '0.07714076816099476%', - '0.1560711038523707%', 4.1875, -4.4375, -4.550282028503716e-05, 2316.379150390625, - 4.1875, -4.4375, -4.553794860839844e-05, 2320.0, '', '', None], - ['Functional.flash_attention_score.4.forward.output.0', - 'NPU.npu_fusion_attention.4.forward.output.0', - 'BFloat16', 'torch.bfloat16', [4096, 1, 2048], [4096, 1, 2048], 0.0, 0.0, - 3.512832336127758e-08, -3.620849609375, '0.0%', '0.0%', '0.07714076816099476%', - '0.1560711038523707%', 4.1875, -4.4375, -4.550282028503716e-05, 2316.379150390625, - 4.1875, -4.4375, -4.553794860839844e-05, 2320.0, '', '', None] - ] - self.assertListEqual(result, result_correct) - - def test_dm_tensor_task(self): - self.compare_process_custom(dump_mode=Const.ALL) - - def compare_process_custom(self, dump_mode): - data_path = tempfile.mkdtemp(prefix='dump_data', dir='/tmp') - try: - npu_dump_path = os.path.join(data_path, 'npu_dump.json') - bench_dump_path = os.path.join(data_path, 'bench_dump.json') - npu_stack_path = os.path.join(data_path, 'npu_stack.json') - - with open(npu_dump_path, 'w') as n_d_f: - json.dump(npu_json_data, n_d_f) - with open(bench_dump_path, 'w') as b_d_f: - json.dump(bench_json_data, b_d_f) - with open(npu_stack_path, 'w') as n_s_f: - json.dump({}, n_s_f) - - stack_mode = True - auto_analyze = True - fuzzy_match = False - dump_mode = Const.SUMMARY - - mode_config = ModeConfig(stack_mode, auto_analyze, fuzzy_match, dump_mode) - mapping_config = MappingConfig() - - ms_comparator = MSComparator(mode_config, mapping_config) - result_df = ms_comparator.compare_process_custom((npu_dump_path, bench_dump_path, npu_stack_path)) - self.assertListEqual(result_df.values.tolist(), []) - finally: - shutil.rmtree(data_path) - @patch('msprobe.mindspore.compare.ms_compare.detect_framework_by_dump_json') def test_check_cross_framework_valid_pytorch(self, mock_detect_framework): mock_detect_framework.return_value = Const.PT_FRAMEWORK @@ -468,33 +362,6 @@ class TestUtilsMethods(unittest.TestCase): npu_op_name = ms_comparator.process_cell_mapping(npu_cell_dict.get('op_name')[0]) self.assertEqual(npu_op_name, 'Module.fc1.Linear.forward.0.input.0') - def test_read_npy_data(self): - stack_mode = True - auto_analyze = True - fuzzy_match = False - dump_mode = Const.ALL - - mode_config = ModeConfig(stack_mode, auto_analyze, fuzzy_match, dump_mode) - mapping_config = MappingConfig() - - ms_comparator = MSComparator(mode_config, mapping_config) - - self.temp_file = tempfile.NamedTemporaryFile(suffix='.pt') - tensor = torch.Tensor([1, 2, 3]) - filename = self.temp_file.name.split('/')[-1] - torch.save(tensor, self.temp_file.name) - result = ms_comparator.read_npy_data('/tmp', filename, load_pt_file=True) - self.assertTrue(np.array_equal(result, np.array([1, 2, 3]))) - self.temp_file.close() - - self.temp_file = tempfile.NamedTemporaryFile(suffix='.npy') - tensor = np.array([1, 2, 3]) - filename = self.temp_file.name.split('/')[-1] - np.save(self.temp_file.name, tensor) - result = ms_comparator.read_npy_data('/tmp', filename, load_pt_file=False) - self.assertTrue(np.array_equal(result, np.array([1, 2, 3]))) - self.temp_file.close() - def test_process_internal_api_mapping(self): stack_mode = True auto_analyze = True diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/compare/test_pt_compare.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/compare/test_pt_compare.py index b079e646c4..edb9979cc3 100644 --- a/debug/accuracy_tools/msprobe/test/pytorch_ut/compare/test_pt_compare.py +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/compare/test_pt_compare.py @@ -40,36 +40,6 @@ class TestUtilsMethods(unittest.TestCase): if os.path.exists(base_dir2): shutil.rmtree(base_dir2) - def test_read_npy_data_bf16(self): - generate_bf16_pt(base_dir1) - - stack_mode = True - auto_analyze = True - fuzzy_match = False - dump_mode = Const.ALL - mode_config = ModeConfig(stack_mode, auto_analyze, fuzzy_match, dump_mode) - - pt_comparator = PTComparator(mode_config) - result = pt_comparator.read_npy_data(base_dir1, 'bf16.pt') - - target_result = torch.tensor([1, 2, 3, 4], dtype=torch.float32).numpy() - self.assertTrue(np.array_equal(result, target_result)) - - def test_read_npy_data_dict(self): - generate_dict_pt(base_dir1) - - stack_mode = True - auto_analyze = True - fuzzy_match = False - dump_mode = Const.ALL - mode_config = ModeConfig(stack_mode, auto_analyze, fuzzy_match, dump_mode) - - pt_comparator = PTComparator(mode_config) - - with self.assertRaises(CompareException) as context: - result = pt_comparator.read_npy_data(base_dir1, 'dict.pt') - self.assertEqual(context.exception.code, CompareException.DETACH_ERROR) - def test_compare(self): generate_dump_json(base_dir2) generate_stack_json(base_dir2) -- Gitee From 79605057aed2fe38711cb571cd9ad36f89bcbfca Mon Sep 17 00:00:00 2001 From: Linwei-Ying Date: Mon, 17 Mar 2025 15:58:38 +0800 Subject: [PATCH 17/27] compare reconstruct --- .../msprobe/core/compare/acc_compare.py | 27 ++++++++++--------- .../msprobe/mindspore/compare/ms_compare.py | 4 +-- .../test/core_ut/compare/test_acc_compare.py | 9 +++---- .../mindspore_ut/compare/test_ms_compare.py | 5 ++-- .../pytorch_ut/compare/test_pt_compare.py | 5 +--- 5 files changed, 22 insertions(+), 28 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/compare/acc_compare.py b/debug/accuracy_tools/msprobe/core/compare/acc_compare.py index 70e272974c..d286d54dd6 100644 --- a/debug/accuracy_tools/msprobe/core/compare/acc_compare.py +++ b/debug/accuracy_tools/msprobe/core/compare/acc_compare.py @@ -93,7 +93,7 @@ class Comparator(ABC): @staticmethod def type_check(val): """ - 检查是否为数值或字符串形式的nan + 检查是否为数值或字符串形式的nan, 如果是返回True """ check_series = pd.Series(False, index=val.index) val_str = val.astype(str) @@ -218,23 +218,24 @@ class Comparator(ABC): diff_name = stats_index.capitalize() + ' diff' rel_err_name = ('norm' if stats_index == 'l2norm' else stats_index).capitalize() + 'RelativeErr' - # 只要npu、bench有一个不是数字或nan, 该行记为N/A - cond_na = ~self.type_check(npu_val) | ~self.type_check(bench_val) + # npu、bench中统计量均为数字或nan + cond_num_nan = self.type_check(npu_val) & self.type_check(bench_val) - # 如果不是数字或nan,就赋值统计量差异为N/A - result_df.loc[cond_na, [diff_name, rel_err_name]] = CompareConst.N_A - result_df.loc[~(cond_no_bench | cond_na), diff_name] = self.get_number(npu_val) - self.get_number(bench_val) - - cond_diff_nan = result_df[diff_name].isna() - cond_nan_diff = ~cond_no_bench & ~cond_na & cond_diff_nan - cond_diff_not_nan = result_df[diff_name].notna() - cond_not_nan_diff = ~cond_no_bench & ~cond_na & cond_diff_not_nan + # 如果统计量不是数字或nan,就赋值统计量差异为N/A + result_df.loc[~cond_num_nan, [diff_name, rel_err_name]] = CompareConst.N_A + cond_valid_stat = ~cond_no_bench & cond_num_nan # 有效统计条件:bench_name不是N/A,并且NPU和bench的统计量都是数字或nan + result_df.loc[cond_valid_stat, diff_name] = self.get_number(npu_val) - self.get_number(bench_val) + cond_diff_nan = result_df[diff_name].isna() # 统计量差异是nan + cond_nan_diff = cond_valid_stat & cond_diff_nan result_df.loc[cond_nan_diff, [diff_name, rel_err_name]] = CompareConst.NAN + + cond_not_nan_diff = cond_valid_stat & ~cond_diff_nan condition_pt_zero = bench_val == 0 - result_df.loc[cond_not_nan_diff & condition_pt_zero, rel_err_name] = CompareConst.NAN + result_df.loc[cond_not_nan_diff & condition_pt_zero, rel_err_name] = CompareConst.N_A + + # 相对误差转成百分比字符串 cond_ref_err = cond_not_nan_diff & ~condition_pt_zero - # 计算相对误差转成百分比字符串 result_df.loc[cond_ref_err, rel_err_name] = ( result_df.loc[cond_ref_err, diff_name] / bench_val[cond_ref_err] * 100) result_df.loc[cond_ref_err, rel_err_name] = (result_df.loc[cond_ref_err, rel_err_name].abs().astype(str) + '%') diff --git a/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py b/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py index 407fff98ac..4acd256693 100644 --- a/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py +++ b/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py @@ -169,7 +169,7 @@ class MSComparator(Comparator): flag = True for i, prefix in enumerate(mapping_dict.get(f'ms_{term}')): if op_name.split(pattern)[1].startswith(str(prefix)): - npu_df.loc[index, CompareConst.COMPARE_KEY] = ( + npu_df.loc[index, CompareConst.CMP_KEY] = ( op_name.replace(pattern + str(prefix), pattern + str(mapping_dict.get(f'pt_{term}')[i]))) flag = False @@ -204,7 +204,7 @@ class MSComparator(Comparator): logger.error(f'Excepted op_name: {op_name}') raise CompareException(CompareException.INVALID_DATA_ERROR) if is_abandoned: - npu_df.loc[index, CompareConst.COMPARE_KEY] = op_name + 'abandoned' + npu_df.loc[index, CompareConst.CMP_KEY] = op_name + 'abandoned' def process_cell_mapping(self, npu_op_name): if not npu_op_name: diff --git a/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare.py b/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare.py index b1d621310a..971537315d 100644 --- a/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare.py +++ b/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare.py @@ -4,16 +4,13 @@ import os import shutil import threading import unittest -from unittest.mock import patch import pandas as pd import torch from msprobe.core.common.const import CompareConst, Const -from msprobe.core.common.utils import CompareException -from msprobe.core.compare.acc_compare import Comparator, ModeConfig +from msprobe.core.compare.acc_compare import ModeConfig from msprobe.core.compare.highlight import find_error_rows, find_compare_result_error_rows, ApiBatch -from msprobe.core.compare.utils import get_accuracy from msprobe.pytorch.compare.pt_compare import PTComparator npu_dict = {'op_name': ['Functional.conv2d.0.forward.input.0', 'Functional.conv2d.0.forward.input.1', @@ -362,7 +359,7 @@ class TestUtilsMethods(unittest.TestCase): dump_mode = Const.SUMMARY mode_config = ModeConfig(stack_mode, auto_analyze, fuzzy_match, dump_mode) - result = Comparator(mode_config).gen_merge_list(json_data, op_name, stack_json_data) + result = PTComparator(mode_config).gen_merge_list(json_data, op_name, stack_json_data) self.assertEqual(result, merge_list) def test_compare_process(self): @@ -457,7 +454,7 @@ class TestUtilsMethods(unittest.TestCase): dump_mode = Const.ALL mode_config = ModeConfig(stack_mode, auto_analyze, fuzzy_match, dump_mode) - comparator = Comparator(mode_config) + comparator = PTComparator(mode_config) result = comparator.do_multi_process(input_param, result_df) self.assertTrue(result.equals(o_result)) diff --git a/debug/accuracy_tools/msprobe/test/mindspore_ut/compare/test_ms_compare.py b/debug/accuracy_tools/msprobe/test/mindspore_ut/compare/test_ms_compare.py index 886d96063c..85ed78b87d 100644 --- a/debug/accuracy_tools/msprobe/test/mindspore_ut/compare/test_ms_compare.py +++ b/debug/accuracy_tools/msprobe/test/mindspore_ut/compare/test_ms_compare.py @@ -9,7 +9,6 @@ from unittest.mock import patch import numpy as np import pandas as pd -import torch import yaml from msprobe.core.common.utils import CompareException @@ -246,7 +245,7 @@ def gen_api_mapping_test_data(need_user_mapping=False): class TestUtilsMethods(unittest.TestCase): - @patch('msprobe.mindspore.compare.ms_compare.detect_framework_by_dump_json') + @patch('msprobe.mindspore.compare.utils.detect_framework_by_dump_json') def test_check_cross_framework_valid_pytorch(self, mock_detect_framework): mock_detect_framework.return_value = Const.PT_FRAMEWORK @@ -254,7 +253,7 @@ class TestUtilsMethods(unittest.TestCase): self.assertTrue(result) - @patch('msprobe.mindspore.compare.ms_compare.detect_framework_by_dump_json') + @patch('msprobe.mindspore.compare.utils.detect_framework_by_dump_json') def test_check_cross_framework_invalid_framework(self, mock_detect_framework): mock_detect_framework.return_value = Const.MS_FRAMEWORK diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/compare/test_pt_compare.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/compare/test_pt_compare.py index edb9979cc3..4eda1d6d97 100644 --- a/debug/accuracy_tools/msprobe/test/pytorch_ut/compare/test_pt_compare.py +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/compare/test_pt_compare.py @@ -3,13 +3,10 @@ import os import shutil import unittest -import numpy as np import torch -from msprobe.core.common.const import Const from msprobe.core.common.utils import CompareException -from msprobe.core.compare.acc_compare import ModeConfig -from msprobe.pytorch.compare.pt_compare import PTComparator, compare +from msprobe.pytorch.compare.pt_compare import compare from msprobe.test.core_ut.compare.test_acc_compare import generate_dump_json, generate_stack_json -- Gitee From 257b409e4b0106a3b64832f00941ed95fcdc327d Mon Sep 17 00:00:00 2001 From: Linwei-Ying Date: Mon, 17 Mar 2025 17:13:27 +0800 Subject: [PATCH 18/27] compare reconstruct --- .../msprobe/test/core_ut/compare/test_acc_compare.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare.py b/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare.py index 971537315d..97d6331fb3 100644 --- a/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare.py +++ b/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare.py @@ -5,6 +5,7 @@ import shutil import threading import unittest +import numpy as np import pandas as pd import torch @@ -377,13 +378,13 @@ class TestUtilsMethods(unittest.TestCase): result = PTComparator(mode_config).compare_process(file_lists) o_data = [ ['Functional.linear.0.forward.input.0', 'Functional.linear.0.forward.input.0', - 'torch.float32', 'torch.float32', [2, 2], [2, 2], 0, 0, 0, 0, '0.0%', 'N/A', '0.0%', '0.0%', + 'torch.float32', 'torch.float32', '[2, 2]', '[2, 2]', 0, 0, 0, 0, '0.0%', 'N/A', '0.0%', '0.0%', 2, 0, 1, 1, 2, 0, 1, 1, '', '', ['File'] ] ] columns = CompareConst.SUMMARY_COMPARE_RESULT_HEADER + ['NPU_Stack_Info'] o_result = pd.DataFrame(o_data, columns=columns, dtype=object) - self.assertTrue(result.equals(o_result)) + self.assertTrue(np.array_equal(result.to_numpy(), o_result.to_numpy())) def test_compare_core_basic(self): generate_dump_json(base_dir2) -- Gitee From 738155eea3064aebee15af6b3f6e775860bf4d8c Mon Sep 17 00:00:00 2001 From: Linwei-Ying Date: Mon, 17 Mar 2025 20:32:35 +0800 Subject: [PATCH 19/27] compare reconstruct --- .../msprobe/core/compare/acc_compare.py | 25 ++++++++++++++++++- .../msprobe/mindspore/compare/ms_compare.py | 19 +------------- .../msprobe/pytorch/compare/pt_compare.py | 20 +-------------- 3 files changed, 26 insertions(+), 38 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/compare/acc_compare.py b/debug/accuracy_tools/msprobe/core/compare/acc_compare.py index d286d54dd6..3b141da03e 100644 --- a/debug/accuracy_tools/msprobe/core/compare/acc_compare.py +++ b/debug/accuracy_tools/msprobe/core/compare/acc_compare.py @@ -24,7 +24,7 @@ import pandas as pd from msprobe.core.advisor.advisor import Advisor from msprobe.core.common.const import CompareConst, Const from msprobe.core.common.exceptions import FileCheckException -from msprobe.core.common.file_utils import load_json, remove_path, create_directory +from msprobe.core.common.file_utils import load_json, remove_path, create_directory, load_yaml from msprobe.core.common.log import logger from msprobe.core.common.utils import CompareException, add_time_with_xlsx, check_op_str_pattern_valid, \ set_dump_path, get_dump_mode, check_compare_param, check_configuration_param @@ -64,6 +64,26 @@ class Comparator(ABC): self.fuzzy_match = mode_config.fuzzy_match self.dump_mode = mode_config.dump_mode + def _init_data_mapping(self, data_mapping): + """ + 初始化data_mapping_dict + """ + if isinstance(data_mapping, str) or data_mapping is None: + self.data_mapping_dict = self.load_mapping_file(data_mapping) + elif isinstance(data_mapping, dict): + self.data_mapping_dict = data_mapping + else: + raise TypeError(f"The type of parameter `data_mapping` must be dict, str or None, but got " + f"{type(data_mapping)}") + + @staticmethod + def load_mapping_file(mapping_file): + if isinstance(mapping_file, str): + mapping_dict = load_yaml(mapping_file) + else: + mapping_dict = {} + return mapping_dict + @staticmethod def process_output_file(output_path, suffix): file_name = add_time_with_xlsx("compare_result" + suffix) @@ -116,6 +136,9 @@ class Comparator(ABC): def read_real_data(self, npu_dir, npu_data_name, bench_dir, bench_data_name) -> tuple: pass + def process_data_mapping(self, npu_op_name): + return self.data_mapping_dict.get(npu_op_name, npu_op_name) + def gen_merge_list(self, json_data, op_name, stack_json_data): op_data = json_data['data'][op_name] check_dump_json_str(op_data, op_name) diff --git a/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py b/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py index 4acd256693..f2abfefbb9 100644 --- a/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py +++ b/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py @@ -68,21 +68,7 @@ class MSComparator(Comparator): self.api_mapping_dict = self.load_mapping_file(self.api_mapping) if self.api_mapping is not None: self.ms_to_pt_mapping = self.load_internal_api() - if isinstance(self.data_mapping, str) or self.data_mapping is None: - self.data_mapping_dict = self.load_mapping_file(self.data_mapping) - elif isinstance(self.data_mapping, dict): - self.data_mapping_dict = self.data_mapping - else: - raise TypeError(f"The type of parameter `data_mapping` must be dict, str or None, but got " - f"{type(self.data_mapping)}") - - @staticmethod - def load_mapping_file(mapping_file): - if isinstance(mapping_file, str): - mapping_dict = load_yaml(mapping_file) - else: - mapping_dict = {} - return mapping_dict + self.data_mapping_dict = self._init_data_mapping(self.data_mapping) @staticmethod def load_internal_api(): @@ -221,9 +207,6 @@ class MSComparator(Comparator): npu_op_name = npu_op_name.replace(cell_name, self.cell_mapping_dict[cell_name], 1) return npu_op_name - def process_data_mapping(self, npu_op_name): - return self.data_mapping_dict.get(npu_op_name, npu_op_name) - def assign_npu_df_compare_key(self, npu_df, bench_df): """ 处理 npu_df 的 COMPARE_KEY 赋值逻辑 diff --git a/debug/accuracy_tools/msprobe/pytorch/compare/pt_compare.py b/debug/accuracy_tools/msprobe/pytorch/compare/pt_compare.py index 0825e8981b..e0262493f8 100644 --- a/debug/accuracy_tools/msprobe/pytorch/compare/pt_compare.py +++ b/debug/accuracy_tools/msprobe/pytorch/compare/pt_compare.py @@ -16,7 +16,6 @@ import pandas as pd from msprobe.core.common.const import CompareConst, Const -from msprobe.core.common.file_utils import load_yaml from msprobe.core.compare.acc_compare import Comparator, ModeConfig, setup_comparison from msprobe.core.compare.utils import rename_api from msprobe.pytorch.compare.utils import read_pt_data @@ -32,21 +31,7 @@ class PTComparator(Comparator): self.dump_mode = mode_config.dump_mode self.data_mapping = data_mapping - if isinstance(self.data_mapping, str) or self.data_mapping is None: - self.data_mapping_dict = self.load_mapping_file(self.data_mapping) - elif isinstance(self.data_mapping, dict): - self.data_mapping_dict = self.data_mapping - else: - raise TypeError(f"The type of parameter `data_mapping` must be dict, str or None, but got " - f"{type(self.data_mapping)}") - - @staticmethod - def load_mapping_file(mapping_file): - if isinstance(mapping_file, str): - mapping_dict = load_yaml(mapping_file) - else: - mapping_dict = {} - return mapping_dict + self.data_mapping_dict = self._init_data_mapping(self.data_mapping) @staticmethod def process_fuzzy_match(op_name): @@ -60,9 +45,6 @@ class PTComparator(Comparator): renamed_op_name = op_name return renamed_op_name - def process_data_mapping(self, npu_op_name): - return self.data_mapping_dict.get(npu_op_name, npu_op_name) - def assign_df_compare_key(self, df, is_npu=True): """ 处理 npu_df 或 bench_df 的 COMPARE_KEY 赋值逻辑 -- Gitee From a62f34e8e2dfa25bdf06a63e5f9955a70939d4e6 Mon Sep 17 00:00:00 2001 From: Linwei-Ying Date: Wed, 19 Mar 2025 16:29:15 +0800 Subject: [PATCH 20/27] compare reconstruct --- .../msprobe/core/common/const.py | 2 +- .../msprobe/core/compare/acc_compare.py | 32 ++++++++++--------- .../msprobe/mindspore/compare/ms_compare.py | 7 ++-- 3 files changed, 23 insertions(+), 18 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/common/const.py b/debug/accuracy_tools/msprobe/core/common/const.py index ce81e655ff..ce1d6a95ff 100644 --- a/debug/accuracy_tools/msprobe/core/common/const.py +++ b/debug/accuracy_tools/msprobe/core/common/const.py @@ -554,7 +554,7 @@ class CompareConst: API_MAPPING_KEYS_TO_COMPARE = [ ('ms_args', 'pt_args'), - ('ms_output', 'pt_output'), + ('ms_outputs', 'pt_outputs'), ('ms_parameters', 'pt_parameters'), ('ms_parameters_grad', 'pt_parameters_grad'), ] diff --git a/debug/accuracy_tools/msprobe/core/compare/acc_compare.py b/debug/accuracy_tools/msprobe/core/compare/acc_compare.py index 3b141da03e..adec6111f0 100644 --- a/debug/accuracy_tools/msprobe/core/compare/acc_compare.py +++ b/debug/accuracy_tools/msprobe/core/compare/acc_compare.py @@ -64,18 +64,6 @@ class Comparator(ABC): self.fuzzy_match = mode_config.fuzzy_match self.dump_mode = mode_config.dump_mode - def _init_data_mapping(self, data_mapping): - """ - 初始化data_mapping_dict - """ - if isinstance(data_mapping, str) or data_mapping is None: - self.data_mapping_dict = self.load_mapping_file(data_mapping) - elif isinstance(data_mapping, dict): - self.data_mapping_dict = data_mapping - else: - raise TypeError(f"The type of parameter `data_mapping` must be dict, str or None, but got " - f"{type(data_mapping)}") - @staticmethod def load_mapping_file(mapping_file): if isinstance(mapping_file, str): @@ -278,8 +266,10 @@ class Comparator(ABC): result_df.loc[condition_md5_equal, CompareConst.RESULT] = CompareConst.PASS result_df.loc[~condition_md5_equal & ~condition_no_bench, CompareConst.RESULT] = CompareConst.DIFF elif self.dump_mode == Const.SUMMARY: - warning_list = [self.calc_summary_diff(result_df, condition_no_bench, stats_index) for stats_index in - ['max', 'min', 'mean', 'l2norm']] + warning_list = [ + self.calc_summary_diff(result_df, condition_no_bench, stats_index) + for stats_index in ['max', 'min', 'mean', 'l2norm'] + ] warning_flag = pd.DataFrame(warning_list).all() result_df.loc[~condition_no_bench, [CompareConst.RESULT, CompareConst.ERROR_MESSAGE]] = '' result_df.loc[warning_flag, CompareConst.RESULT] = CompareConst.WARNING @@ -289,7 +279,7 @@ class Comparator(ABC): CompareConst.MAX_ABS_ERR, CompareConst.MAX_RELATIVE_ERR, CompareConst.ONE_THOUSANDTH_ERR_RATIO, CompareConst.FIVE_THOUSANDTHS_ERR_RATIO, CompareConst.ERROR_MESSAGE] - result_df.loc[~condition_no_bench, fill_cols] = '' # TODO 注意和pt对齐 + result_df.loc[~condition_no_bench, fill_cols] = '' result_df.loc[~condition_no_bench, CompareConst.ACCURACY] = CompareConst.ACCURACY_CHECK_YES return result_df[header] @@ -475,6 +465,18 @@ class Comparator(ABC): logger.error('result dataframe is not found.') raise CompareException(CompareException.INVALID_DATA_ERROR) from e + def _init_data_mapping(self, data_mapping): + """ + 初始化data_mapping_dict + """ + if isinstance(data_mapping, str) or data_mapping is None: + self.data_mapping_dict = self.load_mapping_file(data_mapping) + elif isinstance(data_mapping, dict): + self.data_mapping_dict = data_mapping + else: + raise TypeError(f"The type of parameter `data_mapping` must be dict, str or None, but got " + f"{type(data_mapping)}") + def setup_comparison(input_param, output_path, **kwargs) -> ComparisonConfig: """公共的前置处理逻辑,返回封装后的 ComparisonConfig 对象""" diff --git a/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py b/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py index f2abfefbb9..f2cbbc990f 100644 --- a/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py +++ b/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py @@ -165,8 +165,11 @@ class MSComparator(Comparator): pt_api_indices_dict = self.get_api_indices_dict(bench_df) for mapping_dict in self.api_mapping_dict: - if not all(len(mapping_dict.get(k1, [])) == len(mapping_dict.get(k2, [])) for k1, k2 in - CompareConst.API_MAPPING_KEYS_TO_COMPARE): + all_length_equal = True + for k1, k2 in CompareConst.API_MAPPING_KEYS_TO_COMPARE: + if len(mapping_dict.get(k1, [])) != len(mapping_dict.get(k2, [])): + all_length_equal = False + if not all_length_equal: logger.warning('The user-defined mapping table is incorrect,\ make sure that the number of parameters is equal') continue -- Gitee From b2a46ddae2aedc0d2ed0ae2c0ad86f71a4dda92d Mon Sep 17 00:00:00 2001 From: Linwei-Ying Date: Wed, 19 Mar 2025 16:43:59 +0800 Subject: [PATCH 21/27] compare reconstruct --- .../test/pytorch_ut/compare/test_match.py | 20 ------------------- 1 file changed, 20 deletions(-) delete mode 100644 debug/accuracy_tools/msprobe/test/pytorch_ut/compare/test_match.py diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/compare/test_match.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/compare/test_match.py deleted file mode 100644 index ac28e994e9..0000000000 --- a/debug/accuracy_tools/msprobe/test/pytorch_ut/compare/test_match.py +++ /dev/null @@ -1,20 +0,0 @@ -# coding=utf-8 -import unittest -from msprobe.pytorch.compare import match - - -class TestMatch(unittest.TestCase): - def test_graph_mapping(self): - op1 = "Aten_convolution_1_forward_0.input.0" - op2 = "Torch_conv2d_0_forward_0.input.0" - op3 = "Torch_batch_norm_0_forward_0.input.0" - op4 = "Aten_convolution.default_1_forward_0.input.0" - op5 = "Aten_foo_1_forward_0.input.0" - self.assertTrue(match.graph_mapping.match(op1, op2)) - self.assertTrue(match.graph_mapping.match(op2, op1)) - self.assertTrue(match.graph_mapping.match(op4, op2)) - self.assertTrue(match.graph_mapping.match(op2, op4)) - self.assertFalse(match.graph_mapping.match(op1, op3)) - self.assertFalse(match.graph_mapping.match(op3, op1)) - self.assertFalse(match.graph_mapping.match(op5, op2)) - self.assertFalse(match.graph_mapping.match(op2, op5)) -- Gitee From 8ba42136fb6be878fc67958aa29e18e03f198fcd Mon Sep 17 00:00:00 2001 From: Linwei-Ying Date: Wed, 19 Mar 2025 16:58:41 +0800 Subject: [PATCH 22/27] compare reconstruct --- .../test/core_ut/compare/test_cmp_multiprocessing_compute.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/debug/accuracy_tools/msprobe/test/core_ut/compare/test_cmp_multiprocessing_compute.py b/debug/accuracy_tools/msprobe/test/core_ut/compare/test_cmp_multiprocessing_compute.py index 49f084ce07..debf8ab868 100644 --- a/debug/accuracy_tools/msprobe/test/core_ut/compare/test_cmp_multiprocessing_compute.py +++ b/debug/accuracy_tools/msprobe/test/core_ut/compare/test_cmp_multiprocessing_compute.py @@ -9,9 +9,10 @@ import pandas as pd from msprobe.core.common.const import CompareConst, Const from msprobe.core.common.utils import CompareException -from msprobe.core.compare.acc_compare import Comparator, ModeConfig +from msprobe.core.compare.acc_compare import ModeConfig from msprobe.core.compare.multiprocessing_compute import ComparisonResult, _handle_multi_process, _save_cmp_result, \ check_accuracy, read_dump_data +from msprobe.pytorch.compare.pt_compare import PTComparator from test_acc_compare import generate_dump_json data = [['Functional.linear.0.forward.input.0', 'Functional.linear.0.forward.input.0', @@ -52,7 +53,7 @@ class TestUtilsMethods(unittest.TestCase): dump_mode = Const.ALL mode_config = ModeConfig(stack_mode, auto_analyze, fuzzy_match, dump_mode) - func = Comparator(mode_config).compare_ops + func = PTComparator(mode_config).compare_ops generate_dump_json(base_dir) input_param = {'bench_json_path': os.path.join(base_dir, 'dump.json')} lock = multiprocessing.Manager().RLock() -- Gitee From bb64201eefc1c75b71aae40d3340e07f04a77883 Mon Sep 17 00:00:00 2001 From: Linwei-Ying Date: Thu, 20 Mar 2025 17:21:28 +0800 Subject: [PATCH 23/27] compare reconstruct --- .../msprobe/core/compare/acc_compare.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/compare/acc_compare.py b/debug/accuracy_tools/msprobe/core/compare/acc_compare.py index adec6111f0..4c9c5f3486 100644 --- a/debug/accuracy_tools/msprobe/core/compare/acc_compare.py +++ b/debug/accuracy_tools/msprobe/core/compare/acc_compare.py @@ -90,9 +90,7 @@ class Comparator(ABC): return [CompareConst.N_A] * 4 summary_list = [] for i in summary: - if i is None: - summary_list.append(CompareConst.N_A) - elif str(i).lower() == 'nan': + if str(i).lower() == 'nan': summary_list.append(CompareConst.NAN) else: summary_list.append(i) @@ -157,6 +155,8 @@ class Comparator(ABC): result['data_name'] = [] elif self.dump_mode == Const.MD5: result[Const.MD5] = [] + + # 从json中循环解析API数据,遍历所有API for data_name in data_json['data']: check_op_str_pattern_valid(data_name) merge_list = self.gen_merge_list(data_json, data_name, stack_json_data) @@ -169,7 +169,8 @@ class Comparator(ABC): op_name_reorder, summary_reorder, data_name_reorder = reorder_op_x_list(op_name_list, summary_list, data_name_list) - for op_name in op_name_reorder: + # 遍历单个API的所有item + for index, op_name in enumerate(op_name_reorder): result[CompareConst.OP_NAME].append(op_name) if (CompareConst.INPUT_PATTERN in op_name) or (CompareConst.KWARGS_PATTERN in op_name): struct = merge_list[CompareConst.INPUT_STRUCT].pop(0) @@ -184,7 +185,7 @@ class Comparator(ABC): if self.dump_mode == Const.MD5: result[Const.MD5].append(struct[2]) result[Const.SUMMARY].append(summary_reorder.pop(0)) - result['stack_info'].append(merge_list['stack_info'][0] if self.stack_mode else None) + result['stack_info'].append(merge_list['stack_info'][0] if index == 0 and self.stack_mode else None) if self.dump_mode == Const.ALL: result['data_name'].append(data_name_reorder.pop(0)) return pd.DataFrame(result) @@ -270,7 +271,7 @@ class Comparator(ABC): self.calc_summary_diff(result_df, condition_no_bench, stats_index) for stats_index in ['max', 'min', 'mean', 'l2norm'] ] - warning_flag = pd.DataFrame(warning_list).all() + warning_flag = pd.DataFrame(warning_list).any() result_df.loc[~condition_no_bench, [CompareConst.RESULT, CompareConst.ERROR_MESSAGE]] = '' result_df.loc[warning_flag, CompareConst.RESULT] = CompareConst.WARNING result_df.loc[warning_flag, CompareConst.ERROR_MESSAGE] = 'Need double check api accuracy.' @@ -302,13 +303,14 @@ class Comparator(ABC): # process npu_df's COMPARE_KEY whether same or different framework npu_df, bench_df = self.process_compare_key_and_shape(npu_df, bench_df) - # match npu and bench + # match npu and bench, match_result contains both npu_info and bench_info match_result = pd.merge(npu_df, bench_df, on=[CompareConst.CMP_KEY, CompareConst.CMP_SHAPE], how='outer') + # 筛选出npu_name存在的行并填充筛选出行中的缺失值为N/A match_result = match_result[match_result['op_name_x'].notna()].fillna(CompareConst.N_A) bench_columns = [i + '_y' for i in bench_df.columns] match_result.loc[~self.gen_dtype_condition(match_result), bench_columns] = CompareConst.N_A - # organize comparsion result table + # organize compare result table by renaming columns result_df, header = self.make_result_df(match_result) # calculate statistics diff -- Gitee From e61e3f8c9ca1eaa3ef256f6b474ce312e5e9b6b9 Mon Sep 17 00:00:00 2001 From: Linwei-Ying Date: Mon, 24 Mar 2025 10:31:39 +0800 Subject: [PATCH 24/27] compare reconstruct --- debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py | 2 +- debug/accuracy_tools/msprobe/pytorch/compare/pt_compare.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py b/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py index f2cbbc990f..3ef4bfaa13 100644 --- a/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py +++ b/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py @@ -68,7 +68,7 @@ class MSComparator(Comparator): self.api_mapping_dict = self.load_mapping_file(self.api_mapping) if self.api_mapping is not None: self.ms_to_pt_mapping = self.load_internal_api() - self.data_mapping_dict = self._init_data_mapping(self.data_mapping) + self._init_data_mapping(self.data_mapping) @staticmethod def load_internal_api(): diff --git a/debug/accuracy_tools/msprobe/pytorch/compare/pt_compare.py b/debug/accuracy_tools/msprobe/pytorch/compare/pt_compare.py index e0262493f8..52330efe29 100644 --- a/debug/accuracy_tools/msprobe/pytorch/compare/pt_compare.py +++ b/debug/accuracy_tools/msprobe/pytorch/compare/pt_compare.py @@ -31,7 +31,7 @@ class PTComparator(Comparator): self.dump_mode = mode_config.dump_mode self.data_mapping = data_mapping - self.data_mapping_dict = self._init_data_mapping(self.data_mapping) + self._init_data_mapping(self.data_mapping) @staticmethod def process_fuzzy_match(op_name): -- Gitee From b74915afd84c17444fe0f0b9d7f4391260c5604b Mon Sep 17 00:00:00 2001 From: Linwei-Ying Date: Tue, 25 Mar 2025 16:09:09 +0800 Subject: [PATCH 25/27] compare reconstruct --- .../msprobe/core/common/const.py | 7 + .../msprobe/core/compare/acc_compare.py | 124 +++++++++++++++++- .../msprobe/core/compare/utils.py | 13 +- .../msprobe/mindspore/compare/ms_compare.py | 52 ++------ .../pytorch/compare/distributed_compare.py | 12 +- .../msprobe/pytorch/compare/pt_compare.py | 61 ++------- 6 files changed, 163 insertions(+), 106 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/common/const.py b/debug/accuracy_tools/msprobe/core/common/const.py index ce1d6a95ff..2a4c6ace77 100644 --- a/debug/accuracy_tools/msprobe/core/common/const.py +++ b/debug/accuracy_tools/msprobe/core/common/const.py @@ -566,6 +566,13 @@ class CompareConst: PARAMS_GRAD_PATTERN = Const.SEP + Const.PARAMS_GRAD + Const.SEP CMP_KEY = 'compare_key' CMP_SHAPE = 'compare_shape' + + MATCH_RESULT_COLUMNS = [ + 'op_name_x', 'dtype_x', 'shape_x', 'summary_x', 'stack_info_x', 'data_name_x', + CMP_KEY, CMP_SHAPE, + 'op_name_y', 'dtype_y', 'shape_y', 'summary_y', 'stack_info_y', 'data_name_y', + ] + INTERNAL_API_MAPPING_FILE = 'ms_to_pt_api.yaml' UNREADABLE = 'unreadable data' diff --git a/debug/accuracy_tools/msprobe/core/compare/acc_compare.py b/debug/accuracy_tools/msprobe/core/compare/acc_compare.py index fefa3e0ce7..d1b6703e73 100644 --- a/debug/accuracy_tools/msprobe/core/compare/acc_compare.py +++ b/debug/accuracy_tools/msprobe/core/compare/acc_compare.py @@ -33,7 +33,7 @@ from msprobe.core.compare.highlight import find_compare_result_error_rows, highl from msprobe.core.compare.multiprocessing_compute import ComparisonResult, _handle_multi_process, _save_cmp_result from msprobe.core.compare.npy_compare import compare_ops_apply, get_error_flag_and_msg from msprobe.core.compare.utils import merge_tensor, print_compare_ends_info, read_op, \ - reorder_op_x_list, set_stack_json_path + reorder_op_x_list, set_stack_json_path, rename_api @dataclass @@ -112,18 +112,74 @@ class Comparator(ABC): def get_number(val): return pd.to_numeric(val.astype(str), errors='coerce') + @staticmethod + def put_unmatched_in_table(match_result, npu_op_item): + new_columns = ['op_name_y', 'dtype_y', 'shape_y', 'summary_y', 'stack_info_y', 'data_name_y'] + na_series = pd.Series([CompareConst.N_A] * len(new_columns), index=new_columns) + new_result_item = pd.concat([npu_op_item, na_series]).to_frame().T + new_result_item.columns = CompareConst.MATCH_RESULT_COLUMNS + match_result = pd.concat([match_result, new_result_item]) + return match_result + + @staticmethod + def put_matched_in_table(match_result, npu_op_item, bench_op_item): + new_result_item = pd.concat([npu_op_item, bench_op_item]).head(14).to_frame().T + new_result_item.columns = CompareConst.MATCH_RESULT_COLUMNS + match_result = pd.concat([match_result, new_result_item]) + return match_result + + @staticmethod + def check_op_item(npu_op_item, bench_op_item): + name_match = rename_api(npu_op_item[CompareConst.CMP_KEY]) == rename_api(bench_op_item[CompareConst.CMP_KEY]) + shape_match = npu_op_item[CompareConst.CMP_SHAPE] == bench_op_item[CompareConst.CMP_SHAPE] + if name_match and shape_match: + return True + else: + check_op_str_pattern_valid(npu_op_item['op_name']) + check_op_str_pattern_valid(bench_op_item['op_name']) + logger.warning(f"{npu_op_item['op_name']} and {bench_op_item['op_name']} can not fuzzy match") + return False + @abstractmethod - def process_compare_key_and_shape(self, npu_df, bench_df): + def assign_npu_df_compare_key(self, npu_df, bench_df): pass @abstractmethod - def gen_dtype_condition(self, match_result): + def process_cross_frame_npu_dtype(self, npu_dtype): pass @abstractmethod def read_real_data(self, npu_dir, npu_data_name, bench_dir, bench_data_name) -> tuple: pass + def process_compare_key_and_shape(self, npu_df, bench_df): + npu_df = self.assign_npu_df_compare_key(npu_df, bench_df) + npu_df[CompareConst.CMP_SHAPE] = npu_df[Const.SHAPE] + bench_df[CompareConst.CMP_KEY] = bench_df[CompareConst.OP_NAME] + bench_df[CompareConst.CMP_SHAPE] = bench_df[Const.SHAPE] + return npu_df, bench_df + + def gen_dtype_condition(self, match_result): + """ + dtype匹配条件为npu、bench的dtype一致或属于规定的映射关系 + """ + # 如果使用了data_mapping,不校验dtype,返回全True的DataFrame + if self.data_mapping: + return pd.Series(True, index=match_result.index) + + npu_dtype = match_result['dtype_x'] + bench_dtype = match_result['dtype_y'] + npu_dtype = self.process_cross_frame_npu_dtype(npu_dtype) + + equal_condition = npu_dtype == bench_dtype + match_condition = ( + (npu_dtype.isin(CompareConst.DTYPE_MATCH_GROUPS[0]) & bench_dtype.isin( + CompareConst.DTYPE_MATCH_GROUPS[0])) | + (npu_dtype.isin(CompareConst.DTYPE_MATCH_GROUPS[1]) & bench_dtype.isin( + CompareConst.DTYPE_MATCH_GROUPS[1])) + ) + return equal_condition | match_condition + def process_data_mapping(self, npu_op_name): return self.data_mapping_dict.get(npu_op_name, npu_op_name) @@ -192,6 +248,66 @@ class Comparator(ABC): result['data_name'].append(data_name_reorder.pop(0)) return pd.DataFrame(result) + def match_api_infos(self, npu_df, bench_df): + if not self.fuzzy_match: + match_result = pd.merge(npu_df, bench_df, on=[CompareConst.CMP_KEY, CompareConst.CMP_SHAPE], how='outer') + else: + match_result = self.process_fuzzy_match(npu_df, bench_df) + return match_result + + def process_fuzzy_match(self, npu_df, bench_df): + npu_ops_queue = [] + bench_ops_queue = [] + match_result = pd.DataFrame(columns=CompareConst.MATCH_RESULT_COLUMNS) + + max_len = max(len(npu_df), len(bench_df)) + min_len = min(len(npu_df), len(bench_df)) + for i in range(max_len): + if i < min_len: + npu_ops_queue.append(npu_df.iloc[i]) + bench_ops_queue.append(bench_df.iloc[i]) + else: + try: + npu_ops_queue.append(npu_df.iloc[i]) + except IndexError: + npu_ops_queue.append(None) + try: + bench_ops_queue.append(bench_df.iloc[i]) + except IndexError: + bench_ops_queue.append(None) + + npu_match_point, bench_match_point = self.match_op(npu_ops_queue, bench_ops_queue) + + # 如果没有匹配到,数据放到队列中,跳过。直到后面匹配到,把匹配之前的api放到不匹配中 + if npu_match_point == -1 and bench_match_point == -1: + continue + + npu_op_item = npu_ops_queue[npu_match_point] + bench_op_item = bench_ops_queue[bench_match_point] + unmatched_data = npu_ops_queue[0: npu_match_point] + for op_item in unmatched_data: + match_result = self.put_unmatched_in_table(match_result, op_item) + match_result = self.put_matched_in_table(match_result, npu_op_item, bench_op_item) + del npu_ops_queue[0: npu_match_point + 1] + del bench_ops_queue[0: bench_match_point + 1] + + if npu_ops_queue: + for op_item in npu_ops_queue: + match_result = self.put_unmatched_in_table(match_result, op_item) + + return match_result + + def match_op(self, npu_queue, bench_queue): + for b_index, b_op in enumerate(bench_queue[0: -1]): + if self.check_op_item(npu_queue[-1], b_op): + return len(npu_queue) - 1, b_index + if self.check_op_item(npu_queue[-1], bench_queue[-1]): + return len(npu_queue) - 1, len(bench_queue) - 1 + for n_index, n_op in enumerate(npu_queue[0: -1]): + if self.check_op_item(n_op, bench_queue[-1]): + return n_index, len(bench_queue) - 1 + return -1, -1 + def make_result_df(self, result): # get header header = CompareConst.HEAD_OF_COMPARE_MODE[self.dump_mode][:] @@ -306,7 +422,7 @@ class Comparator(ABC): npu_df, bench_df = self.process_compare_key_and_shape(npu_df, bench_df) # match npu and bench, match_result contains both npu_info and bench_info - match_result = pd.merge(npu_df, bench_df, on=[CompareConst.CMP_KEY, CompareConst.CMP_SHAPE], how='outer') + match_result = self.match_api_infos(npu_df, bench_df) # 筛选出npu_name存在的行并填充筛选出行中的缺失值为N/A match_result = match_result[match_result['op_name_x'].notna()].fillna(CompareConst.N_A) bench_columns = [i + '_y' for i in bench_df.columns] diff --git a/debug/accuracy_tools/msprobe/core/compare/utils.py b/debug/accuracy_tools/msprobe/core/compare/utils.py index 5c1fb5b7b5..b9a8ad637f 100644 --- a/debug/accuracy_tools/msprobe/core/compare/utils.py +++ b/debug/accuracy_tools/msprobe/core/compare/utils.py @@ -82,19 +82,20 @@ def check_and_return_dir_contents(dump_dir, prefix): return contents -def rename_api(npu_name, process): +def rename_api(op_name): """ 原api: {api_type}.{api_name}.{API调用次数}.{前向反向}.{input/output}.{参数序号} - rename后: {api_type}.{api_name}.{input/output}.{参数序号} + rename后: {api_type}.{api_name}.{API调用次数}.{input/output}.{参数序号} """ - npu_split = npu_name.split(process) + process = Const.FORWARD if Const.FORWARD in op_name else Const.BACKWARD + name_split = op_name.split(process) try: - torch_func_index, in_out = npu_split[0], npu_split[1] + torch_func_index, in_out = name_split[0], name_split[1] except IndexError as error: - logger.error(f'{npu_name} can not be split with {process}, please check!') + logger.error(f'{op_name} can not be split with {process}, please check!') raise CompareException(CompareException.INDEX_OUT_OF_BOUNDS_ERROR) from error torch_func_split = torch_func_index.rsplit(Const.SEP, 2) - torch_func = str(torch_func_split[0]) + str(in_out) + torch_func = str(torch_func_split[0]) + Const.SEP + process + str(in_out) return torch_func diff --git a/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py b/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py index 3ef4bfaa13..cb29ea8254 100644 --- a/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py +++ b/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py @@ -17,8 +17,6 @@ import os import re from collections import defaultdict -import pandas as pd - from msprobe.core.common.const import CompareConst, Const from msprobe.core.common.file_utils import load_yaml from msprobe.core.common.log import logger @@ -85,43 +83,6 @@ class MSComparator(Comparator): raise CompareException(CompareException.INDEX_OUT_OF_BOUNDS_ERROR) from error return api_name - def process_compare_key_and_shape(self, npu_df, bench_df): - npu_df = self.assign_npu_df_compare_key(npu_df, bench_df) - npu_df[CompareConst.CMP_SHAPE] = npu_df[Const.SHAPE] - bench_df[CompareConst.CMP_KEY] = bench_df[CompareConst.OP_NAME] - bench_df[CompareConst.CMP_SHAPE] = bench_df[Const.SHAPE] - return npu_df, bench_df - - def gen_dtype_condition(self, match_result): - """ - dtype匹配条件为npu、bench的dtype一致或属于规定的映射关系 - """ - # 如果ms使用了fuzzy_match或data_mapping,不校验dtype,返回全True的DataFrame - if self.fuzzy_match or self.data_mapping: - return pd.Series(True, index=match_result.index) - - npu_dtype = match_result['dtype_x'] - bench_dtype = match_result['dtype_y'] - if self.cross_frame: - npu_dtype = npu_dtype.map(dtype_mapping).fillna(npu_dtype) - - equal_condition = npu_dtype == bench_dtype - match_condition = ( - (npu_dtype.isin(CompareConst.DTYPE_MATCH_GROUPS[0]) & bench_dtype.isin( - CompareConst.DTYPE_MATCH_GROUPS[0])) | - (npu_dtype.isin(CompareConst.DTYPE_MATCH_GROUPS[1]) & bench_dtype.isin( - CompareConst.DTYPE_MATCH_GROUPS[1])) - ) - return equal_condition | match_condition - - def read_real_data(self, npu_dir, npu_data_name, bench_dir, bench_data_name) -> tuple: - n_value = read_npy_data(npu_dir, npu_data_name) - if self.cross_frame: - b_value = read_pt_data(bench_dir, bench_data_name) - else: - b_value = read_npy_data(bench_dir, bench_data_name) - return n_value, b_value - def process_internal_api_mapping(self, npu_op_name): # get api name & class name from op_name # Functional.addcmul.0.forward.input.0 @@ -235,6 +196,19 @@ class MSComparator(Comparator): npu_df[CompareConst.CMP_KEY] = npu_df[CompareConst.OP_NAME] return npu_df + def process_cross_frame_npu_dtype(self, npu_dtype): + if self.cross_frame: + npu_dtype = npu_dtype.map(dtype_mapping).fillna(npu_dtype) + return npu_dtype + + def read_real_data(self, npu_dir, npu_data_name, bench_dir, bench_data_name) -> tuple: + n_value = read_npy_data(npu_dir, npu_data_name) + if self.cross_frame: + b_value = read_pt_data(bench_dir, bench_data_name) + else: + b_value = read_npy_data(bench_dir, bench_data_name) + return n_value, b_value + def ms_compare(input_param, output_path, **kwargs): config = setup_comparison(input_param, output_path, **kwargs) diff --git a/debug/accuracy_tools/msprobe/pytorch/compare/distributed_compare.py b/debug/accuracy_tools/msprobe/pytorch/compare/distributed_compare.py index de62af421b..a484ad5cee 100644 --- a/debug/accuracy_tools/msprobe/pytorch/compare/distributed_compare.py +++ b/debug/accuracy_tools/msprobe/pytorch/compare/distributed_compare.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2024, Huawei Technologies Co., Ltd. +# Copyright (c) 2024-2025, Huawei Technologies Co., Ltd. # All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -15,14 +15,10 @@ import os -from msprobe.core.common.exceptions import FileCheckException -from msprobe.core.common.file_utils import create_directory -from msprobe.core.common.utils import CompareException, check_compare_param, check_configuration_param, get_dump_mode, \ - set_dump_path -from msprobe.core.compare.acc_compare import ModeConfig -from msprobe.core.compare.utils import check_and_return_dir_contents, extract_json, set_stack_json_path +from msprobe.core.common.utils import CompareException +from msprobe.core.compare.utils import check_and_return_dir_contents, extract_json from msprobe.pytorch.common.log import logger -from msprobe.pytorch.compare.pt_compare import PTComparator, compare +from msprobe.pytorch.compare.pt_compare import compare def compare_distributed(npu_dump_dir, bench_dump_dir, output_path, **kwargs): diff --git a/debug/accuracy_tools/msprobe/pytorch/compare/pt_compare.py b/debug/accuracy_tools/msprobe/pytorch/compare/pt_compare.py index 52330efe29..d628111de6 100644 --- a/debug/accuracy_tools/msprobe/pytorch/compare/pt_compare.py +++ b/debug/accuracy_tools/msprobe/pytorch/compare/pt_compare.py @@ -13,11 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -import pandas as pd - -from msprobe.core.common.const import CompareConst, Const +from msprobe.core.common.const import CompareConst from msprobe.core.compare.acc_compare import Comparator, ModeConfig, setup_comparison -from msprobe.core.compare.utils import rename_api from msprobe.pytorch.compare.utils import read_pt_data @@ -33,63 +30,29 @@ class PTComparator(Comparator): self.data_mapping = data_mapping self._init_data_mapping(self.data_mapping) - @staticmethod - def process_fuzzy_match(op_name): - if not op_name: - return CompareConst.N_A - if Const.FORWARD in op_name: - renamed_op_name = rename_api(op_name, Const.FORWARD) - elif Const.BACKWARD in op_name: - renamed_op_name = rename_api(op_name, Const.BACKWARD) - else: - renamed_op_name = op_name - return renamed_op_name - - def assign_df_compare_key(self, df, is_npu=True): + def assign_npu_df_compare_key(self, npu_df, bench_df): """ - 处理 npu_df 或 bench_df 的 COMPARE_KEY 赋值逻辑 + 处理 npu_df 的 COMPARE_KEY 赋值逻辑 - :param df: DataFrame,NPU or Bench 对比数据 - :param is_npu: 是否npu数据 - :return: compare_key(name)处理后的 npu_df 或 bench_df - """ - if is_npu and self.data_mapping: - df[CompareConst.CMP_KEY] = df[CompareConst.OP_NAME].apply(self.process_data_mapping) - elif self.fuzzy_match: - df[CompareConst.CMP_KEY] = df[CompareConst.OP_NAME].apply(self.process_fuzzy_match) - else: - df[CompareConst.CMP_KEY] = df[CompareConst.OP_NAME] - - return df - - def process_compare_key_and_shape(self, npu_df, bench_df): - npu_df = self.assign_df_compare_key(npu_df, is_npu=True) - npu_df[CompareConst.CMP_SHAPE] = npu_df[Const.SHAPE] - bench_df = self.assign_df_compare_key(bench_df, is_npu=False) - bench_df[CompareConst.CMP_SHAPE] = bench_df[Const.SHAPE] - return npu_df, bench_df - - def gen_dtype_condition(self, match_result): - """ - dtype匹配条件为npu、bench的dtype一致或属于规定的映射关系 + :param npu_df: DataFrame,NPU 对比数据 + :param bench_df: DataFrame,Bench 对比数据 + :return: compare_key(name)处理后的 npu_df """ if self.data_mapping: - return pd.Series(True, index=match_result.index) + npu_df[CompareConst.CMP_KEY] = npu_df[CompareConst.OP_NAME].apply(self.process_data_mapping) + else: + npu_df[CompareConst.CMP_KEY] = npu_df[CompareConst.OP_NAME] - npu_dtype = match_result['dtype_x'] - bench_dtype = match_result['dtype_y'] + return npu_df - equal_condition = npu_dtype == bench_dtype - match_condition = ( - (npu_dtype.isin(CompareConst.DTYPE_MATCH_GROUPS[1]) & bench_dtype.isin(CompareConst.DTYPE_MATCH_GROUPS[1]))) - return equal_condition | match_condition + def process_cross_frame_npu_dtype(self, npu_dtype): + return npu_dtype def read_real_data(self, npu_dir, npu_data_name, bench_dir, bench_data_name) -> tuple: n_value = read_pt_data(npu_dir, npu_data_name) b_value = read_pt_data(bench_dir, bench_data_name) return n_value, b_value - def compare(input_param, output_path, **kwargs): config = setup_comparison(input_param, output_path, **kwargs) -- Gitee From d0785d33f3d75a73e70cd4e827dc45427ee9c843 Mon Sep 17 00:00:00 2001 From: Linwei-Ying Date: Tue, 25 Mar 2025 16:23:02 +0800 Subject: [PATCH 26/27] compare reconstruct --- debug/accuracy_tools/msprobe/core/compare/acc_compare.py | 6 ++++++ .../accuracy_tools/msprobe/pytorch/compare/pt_compare.py | 1 + .../test/core_ut/compare/test_acc_compare_utils.py | 8 ++++---- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/compare/acc_compare.py b/debug/accuracy_tools/msprobe/core/compare/acc_compare.py index d1b6703e73..62715ba8f4 100644 --- a/debug/accuracy_tools/msprobe/core/compare/acc_compare.py +++ b/debug/accuracy_tools/msprobe/core/compare/acc_compare.py @@ -249,6 +249,9 @@ class Comparator(ABC): return pd.DataFrame(result) def match_api_infos(self, npu_df, bench_df): + """ + 正常匹配和模糊匹配 + """ if not self.fuzzy_match: match_result = pd.merge(npu_df, bench_df, on=[CompareConst.CMP_KEY, CompareConst.CMP_SHAPE], how='outer') else: @@ -256,6 +259,9 @@ class Comparator(ABC): return match_result def process_fuzzy_match(self, npu_df, bench_df): + """ + 模糊匹配通过循环方式匹配api + """ npu_ops_queue = [] bench_ops_queue = [] match_result = pd.DataFrame(columns=CompareConst.MATCH_RESULT_COLUMNS) diff --git a/debug/accuracy_tools/msprobe/pytorch/compare/pt_compare.py b/debug/accuracy_tools/msprobe/pytorch/compare/pt_compare.py index d628111de6..811e829777 100644 --- a/debug/accuracy_tools/msprobe/pytorch/compare/pt_compare.py +++ b/debug/accuracy_tools/msprobe/pytorch/compare/pt_compare.py @@ -53,6 +53,7 @@ class PTComparator(Comparator): b_value = read_pt_data(bench_dir, bench_data_name) return n_value, b_value + def compare(input_param, output_path, **kwargs): config = setup_comparison(input_param, output_path, **kwargs) diff --git a/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare_utils.py b/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare_utils.py index a4839ae051..d93eda0990 100644 --- a/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare_utils.py +++ b/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare_utils.py @@ -352,14 +352,14 @@ class TestUtilsMethods(unittest.TestCase): def test_rename_api_1(self): test_name_1 = "Distributed.broadcast.0.forward.input.0" - expect_name_1 = "Distributed.broadcast.input.0" - actual_name_1 = rename_api(test_name_1, "forward") + expect_name_1 = "Distributed.broadcast.forward.input.0" + actual_name_1 = rename_api(test_name_1) self.assertEqual(actual_name_1, expect_name_1) def test_rename_api_2(self): test_name_2 = "Torch.sum.0.backward.output.0" - expect_name_2 = "Torch.sum.output.0" - actual_name_2 = rename_api(test_name_2, "backward") + expect_name_2 = "Torch.sum.backward.output.0" + actual_name_2 = rename_api(test_name_2) self.assertEqual(actual_name_2, expect_name_2) def test_read_op(self): -- Gitee From 0ecd307bc3e3b997543b1c2d7c0e7bc2845b7fd8 Mon Sep 17 00:00:00 2001 From: Linwei-Ying Date: Tue, 25 Mar 2025 16:55:11 +0800 Subject: [PATCH 27/27] compare reconstruct --- .../msprobe/core/compare/acc_compare.py | 5 +- .../msprobe/core/compare/check.py | 50 +++++++++++++------ .../msprobe/mindspore/compare/ms_compare.py | 8 +-- .../msprobe/pytorch/compare/pt_compare.py | 4 +- 4 files changed, 43 insertions(+), 24 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/compare/acc_compare.py b/debug/accuracy_tools/msprobe/core/compare/acc_compare.py index 62715ba8f4..e92296579e 100644 --- a/debug/accuracy_tools/msprobe/core/compare/acc_compare.py +++ b/debug/accuracy_tools/msprobe/core/compare/acc_compare.py @@ -145,7 +145,7 @@ class Comparator(ABC): pass @abstractmethod - def process_cross_frame_npu_dtype(self, npu_dtype): + def process_cross_frame_dtype(self, npu_dtype): pass @abstractmethod @@ -169,7 +169,8 @@ class Comparator(ABC): npu_dtype = match_result['dtype_x'] bench_dtype = match_result['dtype_y'] - npu_dtype = self.process_cross_frame_npu_dtype(npu_dtype) + npu_dtype = self.process_cross_frame_dtype(npu_dtype) + bench_dtype = self.process_cross_frame_dtype(bench_dtype) equal_condition = npu_dtype == bench_dtype match_condition = ( diff --git a/debug/accuracy_tools/msprobe/core/compare/check.py b/debug/accuracy_tools/msprobe/core/compare/check.py index 0ce58aaf32..a88ddb8f5e 100644 --- a/debug/accuracy_tools/msprobe/core/compare/check.py +++ b/debug/accuracy_tools/msprobe/core/compare/check.py @@ -17,22 +17,40 @@ from msprobe.core.common.log import logger from msprobe.core.common.utils import check_op_str_pattern_valid, CompareException from msprobe.core.common.const import Const -dtype_mapping = { - "Int8": "torch.int8", - "UInt8": "torch.uint8", - "Int16": "torch.int16", - "UInt16": "torch.uint16", - "Int32": "torch.int32", - "UInt32": "torch.uint32", - "Int64": "torch.int64", - "UInt64": "torch.uint64", - "Float16": "torch.float16", - "Float32": "torch.float32", - "Float64": "torch.float64", - "Bool": "torch.bool", - "BFloat16": "torch.bfloat16", - "Complex64": "torch.complex64", - "Complex128": "torch.complex128" +cross_dtype_mapping = { + "Int8": "int", + "torch.int8": "int", + "UInt8": "int", + "torch.uint8": "int", + "Int16": "int", + "torch.int16": "int", + "UInt16": "int", + "torch.uint16": "int", + "Int32": "int", + "torch.int32": "int", + "UInt32": "int", + "torch.uint32": "int", + "Int64": "int", + "torch.int64": "int", + "UInt64": "int", + "torch.uint64": "int", + + "Float16": "float", + "torch.float16": "float", + "Float32": "float", + "torch.float32": "float", + "Float64": "float", + "torch.float64": "float", + "BFloat16": "float", + "torch.bfloat16": "float", + + "Bool": "bool", + "torch.bool": "bool", + + "Complex64": "complex", + "torch.complex64": "complex", + "Complex128": "complex", + "torch.complex128": "complex", } diff --git a/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py b/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py index cb29ea8254..5b271a3ae4 100644 --- a/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py +++ b/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py @@ -22,7 +22,7 @@ from msprobe.core.common.file_utils import load_yaml from msprobe.core.common.log import logger from msprobe.core.common.utils import CompareException from msprobe.core.compare.acc_compare import Comparator, ModeConfig, setup_comparison -from msprobe.core.compare.check import dtype_mapping +from msprobe.core.compare.check import cross_dtype_mapping from msprobe.core.compare.layer_mapping import generate_data_mapping_by_layer_mapping from msprobe.mindspore.compare.utils import read_npy_data, check_cross_framework from msprobe.pytorch.compare.utils import read_pt_data @@ -196,10 +196,10 @@ class MSComparator(Comparator): npu_df[CompareConst.CMP_KEY] = npu_df[CompareConst.OP_NAME] return npu_df - def process_cross_frame_npu_dtype(self, npu_dtype): + def process_cross_frame_dtype(self, dtype): if self.cross_frame: - npu_dtype = npu_dtype.map(dtype_mapping).fillna(npu_dtype) - return npu_dtype + dtype = dtype.map(cross_dtype_mapping).fillna(dtype) + return dtype def read_real_data(self, npu_dir, npu_data_name, bench_dir, bench_data_name) -> tuple: n_value = read_npy_data(npu_dir, npu_data_name) diff --git a/debug/accuracy_tools/msprobe/pytorch/compare/pt_compare.py b/debug/accuracy_tools/msprobe/pytorch/compare/pt_compare.py index 811e829777..9ffcecc72a 100644 --- a/debug/accuracy_tools/msprobe/pytorch/compare/pt_compare.py +++ b/debug/accuracy_tools/msprobe/pytorch/compare/pt_compare.py @@ -45,8 +45,8 @@ class PTComparator(Comparator): return npu_df - def process_cross_frame_npu_dtype(self, npu_dtype): - return npu_dtype + def process_cross_frame_dtype(self, dtype): + return dtype def read_real_data(self, npu_dir, npu_data_name, bench_dir, bench_data_name) -> tuple: n_value = read_pt_data(npu_dir, npu_data_name) -- Gitee