diff --git a/debug/accuracy_tools/msprobe/core/compare/acc_compare.py b/debug/accuracy_tools/msprobe/core/compare/acc_compare.py index 9e92f7d0a7a7b14b38ab7098dbd863263dd9f807..71d9014a8e4fbb802fd08983c7375903748b0a5e 100644 --- a/debug/accuracy_tools/msprobe/core/compare/acc_compare.py +++ b/debug/accuracy_tools/msprobe/core/compare/acc_compare.py @@ -121,8 +121,8 @@ class Comparator: return merge_list def check_op(self, npu_dict, bench_dict, fuzzy_match): - npu_op_name = npu_dict["op_name"] - bench_op_name = bench_dict["op_name"] + npu_op_name = npu_dict[CompareConst.OP_NAME] + bench_op_name = bench_dict[CompareConst.OP_NAME] graph_mode = check_graph_mode(safe_get_value(npu_op_name, 0, "npu_op_name"), safe_get_value(bench_op_name, 0, "bench_op_name")) @@ -237,7 +237,7 @@ class Comparator: merge_list = self.gen_merge_list(json_data, op_name, stack_json_data, dump_mode) if merge_list: input_index, output_index = 0, 0 - for index, input_or_output in enumerate(merge_list['op_name']): + for index, input_or_output in enumerate(merge_list[CompareConst.OP_NAME]): input_or_output_list = input_or_output.split(Const.SEP) data_name = merge_list.get('data_name') data_name = data_name[index] if data_name else None diff --git a/debug/accuracy_tools/msprobe/docs/11.accuracy_compare_MindSpore.md b/debug/accuracy_tools/msprobe/docs/11.accuracy_compare_MindSpore.md index e8f762b66bfd5a3542b3baefec99cea1d646944b..3d609c67d69eb57cb7f93b48cda59f923476f0f3 100644 --- a/debug/accuracy_tools/msprobe/docs/11.accuracy_compare_MindSpore.md +++ b/debug/accuracy_tools/msprobe/docs/11.accuracy_compare_MindSpore.md @@ -37,10 +37,10 @@ msprobe -f mindspore compare -i ./compare.json -o ./output -s | -s或--stack_mode | 比对结果展示调用栈信息(NPU_Stack_Info)的开关,bool 类型。单卡场景开启时,需要使用[比对文件](#31-比对文件)的单卡场景配置stack_path指定stack.json文件,才能生成详细调用栈信息,否则在比对时会报错;暂不支持多卡场景。通过直接配置该参数开启,默认未配置,表示关闭。 | 否 | | -c或--compare_only | 仅比对开关,bool 类型。该参数默认未配置,会启用自动精度分析,工具自动针对比对结果进行分析,识别到第一个精度可能不达标节点(在比对结果文件中的 Accuracy Reached or Not 列显示为 No),并给出问题可能产生的原因(打屏展示并生成 `advisor_{timestamp}.txt` 文件)。通过配置该参数取消自动精度分析,仅输出比对结果表格。 | 否 | | -f或--fuzzy_match | 模糊匹配。开启后,对于网络中同一层级且命名仅调用次数不同的API,可匹配并进行比对。通过直接配置该参数开启,默认未配置,表示关闭。 | 否 | -| -am或--api_mapping | 跨框架比对。配置该参数时表示开启跨框架API比对功能。仅[跨框架的API比对](#25-跨框架的api比对)场景需要配置。 | 否 | -| -cm或--cell_mapping | 跨框架比对。配置该参数时表示开启跨框架cell模块比对功能,可以指定自定义映射文件*.yaml,不指定映射文件时按照msprobe定义的默认映射关系进行比对。自定义映射文件的格式请参见[自定义映射文件(cell)](#33-自定义映射文件cell)。仅[跨框架的cell模块比对](#26-跨框架的cell模块比对)场景需要配置。 | 否 | -| -dm或--data_mapping | 跨框架比对。配置该参数时表示开启跨框架API或模块的比对功能,需要指定自定义映射文件*.yaml。自定义映射文件的格式请参见[自定义映射文件(API和模块)](#34-自定义映射文件api和模块)。仅[跨框架的API或模块比对](#27-跨框架的api或模块比对)场景需要配置。 | 否 | -| -lm或--layer_mapping | 跨框架比对。配置该参数时表示开启跨框架Layer层的比对功能,指定模型代码中的Layer层后,可以识别对应dump数据中的模块或API。需要指定自定义映射文件*.yaml。自定义映射文件的格式请参见[自定义映射文件(Layer)](#35-自定义映射文件layer)。仅[跨框架的Layer层比对](#28-跨框架的layer层比对)场景需要配置。 | 否 | +| -am或--api_mapping | 跨框架比对。配置该参数时表示开启跨框架API比对功能。仅[跨框架的API比对](#25-跨框架的api比对api-mapping)场景需要配置。 | 否 | +| -cm或--cell_mapping | 跨框架比对。配置该参数时表示开启跨框架cell模块比对功能,可以指定自定义映射文件*.yaml,不指定映射文件时按照msprobe定义的默认映射关系进行比对。自定义映射文件的格式请参见[自定义映射文件(cell)](#33-自定义映射文件cell)。仅[跨框架的cell模块比对](#26-跨框架的cell模块比对cell-mapping)场景需要配置。 | 否 | +| -dm或--data_mapping | 跨框架比对。配置该参数时表示开启跨框架API或模块的比对功能,需要指定自定义映射文件*.yaml。自定义映射文件的格式请参见[自定义映射文件(API和模块)](#34-自定义映射文件api和模块)。仅[跨框架的API或模块比对](#27-跨框架的api或模块比对data-mapping)场景需要配置。 | 否 | +| -lm或--layer_mapping | 跨框架比对。配置该参数时表示开启跨框架Layer层的比对功能,指定模型代码中的Layer层后,可以识别对应dump数据中的模块或API。需要指定自定义映射文件*.yaml。自定义映射文件的格式请参见[自定义映射文件(Layer)](#35-自定义映射文件layer)。仅[跨框架的Layer层比对](#28-跨框架的layer层比对layer-mapping)场景需要配置。 | 否 | ### 2.2 不同版本下的全量API比对 @@ -86,9 +86,9 @@ msprobe -f mindspore compare -i ./compare.json -o ./output -s msprobe -f mindspore compare -i ./compare.json -o ./output -s ``` -5. 查看比对结果,请详见PyTorch目录下的《[PyTorch 场景的精度比对-精度比对结果分析](./10.accuracy_compare_PyTorch.md#3-精度比对结果分析)》章节。 +5. 查看比对结果,请详见PyTorch目录下的《[PyTorch 场景的精度比对-精度比对结果分析](./10.accuracy_compare_PyTorch.md#3-精度比对结果分析)》章节。在比对结果中,如果未采集到数据或匹配不到的将记为N/A,如果采集到的数据为`null`将记为None,如果采集到的数据是`nan`将记为Nan。 -### 2.5 跨框架的API比对 +### 2.5 跨框架的API比对(api-mapping) 1. 配置[config.json](../config.json)文件level配置为L1、task配置为tensor或statistics。 @@ -101,10 +101,15 @@ msprobe -f mindspore compare -i ./compare.json -o ./output -s ```shell msprobe -f mindspore compare -i ./compare.json -o ./output -s -am ``` +5. 自定义api_mapping.yaml: 当内置的默认映射无法正确识别匹配,可以手动配置映射文件api_mapping.yaml,执行命令为: + ```shell + msprobe -f mindspore compare -i ./compare.json -o ./output -s -am api_mapping.yaml + ``` + api_mapping.yaml的写法参见[自定义映射文件api_mapping.yaml](#36-自定义映射文件api_mapping)。 -5. 查看比对结果,请详见PyTorch目录下的《[PyTorch 场景的精度比对-精度比对结果分析](./10.accuracy_compare_PyTorch.md#3-精度比对结果分析)》章节。 +6. 查看比对结果,请详见PyTorch目录下的《[PyTorch 场景的精度比对-精度比对结果分析](./10.accuracy_compare_PyTorch.md#3-精度比对结果分析)》章节。在比对结果中,如果未采集到数据或匹配不到的将记为N/A,如果采集到的数据为`null`将记为None,如果采集到的数据是`nan`将记为Nan。 -### 2.6 跨框架的cell模块比对 +### 2.6 跨框架的cell模块比对(cell-mapping) 1. 配置[config.json](../config.json)文件level配置为L0、task配置为tensor或statistics并指定需要dump的cell模块名。 @@ -128,7 +133,7 @@ msprobe -f mindspore compare -i ./compare.json -o ./output -s 5. 查看比对结果,请详见PyTorch目录下的《[PyTorch 场景的精度比对-精度比对结果分析](./10.accuracy_compare_PyTorch.md#3-精度比对结果分析)》章节。 -### 2.7 跨框架的API或模块比对 +### 2.7 跨框架的API或模块比对(data-mapping) 该场景可用于在“**跨框架的API比对**”和“**跨框架的cell模块比对**”场景均无法完全覆盖模型中的API和模块时,通过手动指定映射关系来补全未被比对的API或模块。 @@ -148,7 +153,7 @@ msprobe -f mindspore compare -i ./compare.json -o ./output -s 5. 查看比对结果,请详见PyTorch目录下的《[PyTorch 场景的精度比对-精度比对结果分析](./10.accuracy_compare_PyTorch.md#3-精度比对结果分析)》章节。 -### 2.8 跨框架的Layer层比对 +### 2.8 跨框架的Layer层比对(layer-mapping) 该场景可简化API或模块场景的配置,从Layer层识别整网的API和模块。 @@ -332,3 +337,146 @@ yaml文件中只需配置MindSpore与PyTorch模型代码中功能一致但名称 模型代码示例: ![ms_dump](./img/ms_layer.png) + +### 3.6 自定义映射文件api_mapping +例: +mindspore的dump数据 +```json +{ + "task": "statistics", + "level": "L1", + "dump_data_dir": null, + "data": { + "Functional.interpolate.0.backward": { + "input": [ + { + "type": "mindsppore.Tensor", + "dtype": "Float32", + "shape": [ + 42, + 256, + 44, + 80 + ], + "Max": 2.2106464156418151e-05, + "Min": -2.3546815186461534e-05, + "Mean": -5.1346841321564535e-11, + "Norm": 0.00548641216546450613 + }, + null, + { + "type": "mindsppore.Tensor", + "dtype": "Float32", + "shape": [ + 42, + 256, + 44, + 80 + ], + "Max": 5.2115615611148151e-05, + "Min": -2.3546616512061534e-05, + "Mean": -1.1346060626014535e-11, + "Norm": 0.00548060560641416113 + } + ], + "input_kwargs": { + "x_value": { + "type": "int", + "value": 10 + } + }, + "output": [ + { + "type": "mindspore.Tensor", + "dtype": "Float32", + "shape": [], + "Max": 0.00621970560641416113, + "Min": 0.00621970560641416113, + "Mean": 0.00621970560641416113, + "Norm": 0.00621970560641416113 + } + ] + } + } +} +``` +标杆数据: +```json +{ + "task": "statistics", + "level": "L1", + "dump_data_dir": null, + "data": { + "Functional.interpolate.0.backward": { + "input": [ + { + "type": "torch.Tensor", + "dtype": "torch.float32", + "shape": [ + 42, + 256, + 44, + 80 + ], + "Max": 2.2106464156418151e-05, + "Min": -2.3546815186461534e-05, + "Mean": -5.1346841321564535e-11, + "Norm": 0.00548641216546450613, + "requires_grad": false + }, + { + "type": "torch.Tensor", + "dtype": "torch.float32", + "shape": [ + 42, + 256, + 44, + 80 + ], + "Max": 5.2115615611148151e-05, + "Min": -2.3546616512061534e-05, + "Mean": -1.1346060626014535e-11, + "Norm": 0.00548060560641416113, + "requires_grad": false + } + ], + "input_kwargs": { + "x": { + "type": "int", + "value": 10 + } + }, + "output": [ + { + "type": "torch.Tensor", + "dtype": "torch.float32", + "shape": [], + "Max": 0.00621970560641416113, + "Min": 0.00621970560641416113, + "Mean": 0.00621970560641416113, + "Norm": 0.00621970560641416113, + "requires_grad": false + } + ] + } + } +} +``` +其中,mindspore的`Functional.interpolate.0.backward`的input中第0、2个数据对应PyTorch中`Functional.interpolate.0.backward`的input中第0、1个数据,input_kwargs中mindspore的x_value对应PyTorch的x,output中mindspore的第0个数据对应pytorch的第0个数据,因此user_mapping.yaml写为: +```yaml +- ms_api: Functional.interpolate + pt_api: Functional.interpolate + ms_args: + - '0' + - '2' + - x_value + pt_args: + - '0' + - '1' + - x + ms_output: + - 0 + pt_output: + - 0 +``` +请注意,如果配置了user_mapping.yaml,且没有在user_mapping.yaml中指定对应关系的参数将不会被匹配,如mindspore的`Functional.interpolate.0.backward`的input中第1个数据在结果中对应的标杆数据为N/A。 \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py b/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py index db1a594da25a03a8848ab999785eaf2c5db40ffa..b4f83a8f808417d7c8dbf16557fa8ad595f2fa21 100644 --- a/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py +++ b/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py @@ -13,24 +13,42 @@ # See the License for the specific language governing permissions and # limitations under the License. -import copy import os import re +from collections import defaultdict + +import numpy as np +import pandas as pd + from msprobe.core.common.const import CompareConst, Const from msprobe.core.common.exceptions import FileCheckException -from msprobe.core.common.file_utils import (FileOpen, create_directory, +from msprobe.core.common.file_utils import (FileOpen, create_directory, load_json, load_npy, load_yaml) from msprobe.core.common.log import logger from msprobe.core.common.utils import (CompareException, check_compare_param, check_configuration_param, - get_dump_mode, set_dump_path) + get_dump_mode, set_dump_path, check_op_str_pattern_valid) +from msprobe.core.compare.check import dtype_mapping from msprobe.core.compare.acc_compare import Comparator -from msprobe.core.compare.check import check_struct_match, fuzzy_check_op from msprobe.core.compare.layer_mapping import generate_data_mapping_by_layer_mapping +INPUT_PATTERN = Const.SEP + Const.INPUT + Const.SEP +OUTPUT_PATTERN = Const.SEP + Const.OUTPUT + Const.SEP +COMPARE_KEY = 'compare_key' +COMPARE_SHAPE = 'compare_shape' +INTERNAL_API_MAPPING_FILE = 'ms_to_pt_api.yaml' + + class MSComparator(Comparator): + """ + 用于mindspore动态图同框架/跨框架精度比对,支持md5/summary/all模式。 + cell_mapping: mindspore在cell级别(L0)dump数据和pytorch的module之间的映射关系; + api_mapping: mindspore在api级别(L1)dump数据和pytorch的api之间的映射关系; + data_mapping: mindspore的cell或api的入参/出参和pytorch之间的映射关系; + is_cross_framework: 是否跨框架。 + """ def __init__(self, cell_mapping=None, api_mapping=None, data_mapping=None, is_cross_framework=False): self.frame_name = MSComparator.__name__ self.cell_mapping = cell_mapping @@ -52,10 +70,108 @@ class MSComparator(Comparator): else: raise TypeError(f"The type of parameter `data_mapping` must be dict, str or None, but got " f"{type(self.data_mapping)}") + + @classmethod + def calc_accuracy(cls, result_df, dump_mode, header): + condition_no_bench = result_df[CompareConst.BENCH_NAME] == CompareConst.N_A + result_df[condition_no_bench] = result_df[condition_no_bench].fillna(CompareConst.N_A) + result_df.loc[condition_no_bench, CompareConst.ERROR_MESSAGE] = CompareConst.NO_BENCH + + def calc_summary_diff(data_type: str): + def type_check(val): + check_series = pd.Series(False, index=val.index) + val_str = val.astype(str) + check_series[pd.to_numeric(val_str, errors='coerce').notna() | val_str.str.lower().eq('nan')] = True + return check_series + + def get_number(val): + return pd.to_numeric(val.astype(str), errors='coerce') + + ms_val = result_df['NPU ' + data_type] + pt_val = result_df['Bench ' + data_type] + diff_name = data_type.capitalize() + ' diff' + rel_err_name = ('norm' if data_type == 'l2norm' else data_type).capitalize() + 'RelativeErr' + condition_na = ~type_check(ms_val) | ~type_check(pt_val) + result_df.loc[condition_na, [diff_name, rel_err_name]] = CompareConst.N_A + result_df.loc[~(condition_no_bench | condition_na), diff_name] = get_number(ms_val) - get_number(pt_val) + condition_nan_diff = ~condition_no_bench & ~condition_na & result_df[diff_name].isna() + condition_not_nan_diff = ~condition_no_bench & ~condition_na & result_df[diff_name].notna() + result_df.loc[condition_nan_diff, [diff_name, rel_err_name]] = CompareConst.NAN + condition_pt_zero = pt_val == 0 + result_df.loc[condition_not_nan_diff & condition_pt_zero, rel_err_name] = CompareConst.NAN + condition_ref_err = condition_not_nan_diff & ~condition_pt_zero + result_df.loc[condition_ref_err, rel_err_name] = (result_df.loc[condition_ref_err, diff_name] / + pt_val[condition_ref_err] * 100) + result_df.loc[condition_ref_err, rel_err_name] = (result_df.loc[condition_ref_err, rel_err_name] + .abs().astype(str) + '%') + magnitude = get_number(result_df[diff_name]).abs() / ( + pd.Series(np.maximum(get_number(ms_val), get_number(pt_val))).abs() + CompareConst.EPSILON) + return magnitude > CompareConst.MAGNITUDE + + if dump_mode == Const.MD5: + condition_md5_equal = result_df[CompareConst.NPU_MD5] == result_df[CompareConst.BENCH_MD5] + result_df.loc[condition_md5_equal, CompareConst.RESULT] = CompareConst.PASS + result_df.loc[~condition_md5_equal & ~condition_no_bench, CompareConst.RESULT] = CompareConst.DIFF + elif dump_mode == Const.SUMMARY: + warning_list = [calc_summary_diff(data_type) for data_type in ['max', 'min', 'mean', 'l2norm']] + warning_flag = pd.DataFrame(warning_list).all() + result_df.loc[~condition_no_bench, [CompareConst.RESULT, CompareConst.ERROR_MESSAGE]] = '' + result_df.loc[warning_flag, CompareConst.RESULT] = CompareConst.WARNING + result_df.loc[warning_flag, CompareConst.ERROR_MESSAGE] = 'Need double check api accuracy.' + else: + fill_cols = [CompareConst.COSINE, CompareConst.MAX_ABS_ERR, CompareConst.MAX_RELATIVE_ERR, + CompareConst.ONE_THOUSANDTH_ERR_RATIO, CompareConst.FIVE_THOUSANDTHS_ERR_RATIO, + CompareConst.ERROR_MESSAGE] + result_df.loc[~condition_no_bench, fill_cols] = '' + result_df.loc[~condition_no_bench, CompareConst.ACCURACY] = CompareConst.ACCURACY_CHECK_YES + return result_df[header] + + @classmethod + def make_result_df(cls, result, stack_mode, dump_mode): + header = CompareConst.HEAD_OF_COMPARE_MODE[dump_mode] + + if stack_mode: + header.append(CompareConst.STACK) + if dump_mode == Const.ALL: + header.append(CompareConst.DATA_NAME) + result.rename(columns={'op_name_x': CompareConst.NPU_NAME, + 'op_name_y': CompareConst.BENCH_NAME, + 'dtype_x': CompareConst.NPU_DTYPE, + 'dtype_y': CompareConst.BENCH_DTYPE, + 'shape_x': CompareConst.NPU_SHAPE, + 'shape_y': CompareConst.BENCH_SHAPE, + 'md5_x': CompareConst.NPU_MD5, + 'md5_y': CompareConst.BENCH_MD5, + 'data_name_x': CompareConst.DATA_NAME, + 'stack_info_x': CompareConst.STACK}, inplace=True) + + npu_summary = [CompareConst.NPU_MAX, CompareConst.NPU_MIN, CompareConst.NPU_MEAN, CompareConst.NPU_NORM] + bench_summary = [CompareConst.BENCH_MAX, CompareConst.BENCH_MIN, CompareConst.BENCH_MEAN, + CompareConst.BENCH_NORM] + def set_summary(summary): + if summary == CompareConst.N_A: + return [CompareConst.N_A] * 4 + summary_list = [] + for i in summary: + if i is None: + summary_list.append(CompareConst.N_A) + elif str(i).lower() == 'nan': + summary_list.append(CompareConst.NAN) + else: + summary_list.append(i) + return summary_list + + result[npu_summary] = result['summary_x'].apply(set_summary).tolist() + result[bench_summary] = result['summary_y'].apply(set_summary).tolist() + result_df = pd.DataFrame(columns=header) + for h in header: + if h in result.columns: + result_df[h] = result[h] + return cls.calc_accuracy(result_df, dump_mode, header) def load_internal_api(self): cur_path = os.path.dirname(os.path.realpath(__file__)) - yaml_path = os.path.join(cur_path, "ms_to_pt_api.yaml") + yaml_path = os.path.abspath(os.path.join(cur_path, INTERNAL_API_MAPPING_FILE)) return load_yaml(yaml_path) def load_mapping_file(self, mapping_file): @@ -66,41 +182,17 @@ class MSComparator(Comparator): return mapping_dict def process_cell_mapping(self, npu_op_name): - npu_op_name = [op_name.replace("Cell", "Module", 1) for op_name in npu_op_name] + if not npu_op_name or not re.match(r'.+(?:for|back)ward\..+', npu_op_name): + return CompareConst.N_A + npu_op_name = npu_op_name.replace("Cell", "Module", 1) if self.cell_mapping_dict: - for index, op_name in enumerate(npu_op_name): - # get cell name & class name from op_name - # Cell.fc1.Dense.forward.0.input.0 - cell_name = op_name.split(Const.SEP, 1)[-1].rsplit(Const.SEP, 4)[0] - if cell_name in self.cell_mapping_dict: - npu_op_name[index] = op_name.replace(cell_name, self.cell_mapping_dict[cell_name], 1) + # get cell name & class name from op_name + # Cell.fc1.Dense.forward.0.input.0 + cell_name = re.split(r'\.(?:for|back)ward\.', npu_op_name.split(Const.SEP, 1)[-1])[0] + if cell_name in self.cell_mapping_dict: + npu_op_name = npu_op_name.replace(cell_name, self.cell_mapping_dict[cell_name], 1) return npu_op_name - def check_op(self, npu_dict, bench_dict, fuzzy_match): - npu_dict_new, bench_dict_new = copy.deepcopy(npu_dict), copy.deepcopy(bench_dict) - npu_op_name, bench_op_name = npu_dict_new.get(CompareConst.OP_NAME), bench_dict_new.get(CompareConst.OP_NAME) - if self.cell_mapping is not None: - npu_op_name = self.process_cell_mapping(npu_op_name) - if self.api_mapping is not None: - npu_op_name = self.process_internal_api_mapping(npu_op_name, bench_op_name) - if isinstance(self.api_mapping, str): - npu_dict_new, bench_dict_new, target_dict = self.transform_user_mapping_api(npu_dict_new, - bench_dict_new) - if target_dict: - bench_dict = self.reconstitution_bench_dict(npu_dict, copy.deepcopy(bench_dict_new), target_dict) - npu_op_name = npu_dict_new.get(CompareConst.OP_NAME) - bench_op_name = bench_dict_new.get(CompareConst.OP_NAME) - struct_match = check_struct_match(npu_dict_new, bench_dict_new, cross_frame=self.cross_frame) - if not fuzzy_match: - return npu_op_name == bench_op_name and struct_match - is_match = True - try: - is_match = fuzzy_check_op(npu_op_name, bench_op_name) - except Exception as err: - logger.warning("%s and %s can not fuzzy match." % (npu_op_name, bench_op_name)) - is_match = False - return is_match and struct_match - def read_npy_data(self, dir_path, file_name, load_pt_file=False): data_path = os.path.join(dir_path, file_name) if load_pt_file: @@ -118,20 +210,18 @@ class MSComparator(Comparator): for idx, _ in enumerate(npu_op_name): npu_op_name[idx] = npu_op_name[idx].replace(target, para) return npu_op_name - - def process_internal_api_mapping(self, npu_op_name, bench_op_name): + + def process_internal_api_mapping(self, npu_op_name): # get api name & class name from op_name # Functional.addcmul.0.forward.input.0 - npu_op_name, bench_op_name = npu_op_name.copy(), bench_op_name.copy() - ms_api_name = self.get_api_name(npu_op_name[0].split(Const.SEP)) - pt_api_name = self.get_api_name(bench_op_name[0].split(Const.SEP)) + ms_api_name = self.get_api_name(npu_op_name.split(Const.SEP)) class_name = ms_api_name.split(Const.SEP)[0] if class_name == "Mint": - return self.api_replace(npu_op_name, "Mint", "Torch") + return npu_op_name.replace("Mint", "Torch") elif class_name == "MintFunctional": - return self.api_replace(npu_op_name, "MintFunctional", "Functional") - elif self.ms_to_pt_mapping.get(ms_api_name) == pt_api_name: - return self.api_replace(npu_op_name, ms_api_name, pt_api_name) + return npu_op_name.replace("MintFunctional", "Functional") + elif self.ms_to_pt_mapping.get(ms_api_name): + return npu_op_name.replace(ms_api_name, self.ms_to_pt_mapping.get(ms_api_name)) else: return npu_op_name @@ -147,109 +237,123 @@ class MSComparator(Comparator): logger.error(f'Failed to retrieve API name, please check if the dump data is reasonable') raise CompareException(CompareException.INDEX_OUT_OF_BOUNDS_ERROR) from error return api_name - - def transform_user_mapping_api(self, new_npu_dict, new_bench_dict): - """ - Transform user mapping API based on new NPU and benchmark dictionaries. - Parameters: - new_npu_dict (dict): New NPU operation dictionary. - new_bench_dict (dict): New benchmark operation dictionary. - Returns: - tuple: Updated NPU and benchmark dictionaries, along with the target dictionary. - """ - npu_op_name, bench_op_name = new_npu_dict.get(CompareConst.OP_NAME), new_bench_dict.get(CompareConst.OP_NAME) - npu_struct_in = new_npu_dict.get(CompareConst.INPUT_STRUCT) - bench_struct_in = new_bench_dict.get(CompareConst.INPUT_STRUCT) - npu_struct_out = new_npu_dict.get(CompareConst.OUTPUT_STRUCT) - bench_struct_out = new_bench_dict.get(CompareConst.OUTPUT_STRUCT) - npu_summary, bench_summary = new_npu_dict.get(CompareConst.SUMMARY), new_bench_dict.get(CompareConst.SUMMARY) - npu_in_len, bench_in_len = len(npu_struct_in), len(bench_struct_in) - npu_out_len, bench_out_len = len(npu_struct_out), len(bench_struct_out) - ms_api_list, pt_api_list = npu_op_name[0].split(Const.SEP), bench_op_name[0].split(Const.SEP) - ms_api_name = self.get_api_name(ms_api_list) - pt_api_name = self.get_api_name(pt_api_list) - target_dict = {} - for api_dict in self.api_mapping_dict: - if api_dict.get("pt_api") == pt_api_name and api_dict.get("ms_api") == ms_api_name: - ms_user_args_len, pt_user_args_len = len(api_dict.get("ms_args")), len(api_dict.get("pt_args")) - ms_user_output_len, pt_user_output_len = len(api_dict.get("ms_output")), len(api_dict.get("pt_output")) - if ms_user_args_len != pt_user_args_len or ms_user_output_len != pt_user_output_len: - logger.warning("The user-defined mapping table is incorrect,\ - make sure that the number of parameters is equal") - break - ms_out_list = api_dict.get("ms_output", []) - for idx in reversed(range(npu_out_len)): - if idx not in ms_out_list: - del npu_struct_out[idx] - if idx + npu_in_len < len(npu_summary) and idx + npu_in_len < len(npu_op_name): - del npu_summary[idx + npu_in_len] - del npu_op_name[idx + npu_in_len] - pt_out_list = api_dict.get("pt_output", []) - for idx in reversed(range(bench_out_len)): - if idx not in pt_out_list: - del bench_struct_out[idx] - if idx + bench_in_len < len(bench_summary) and idx + bench_in_len < len(bench_op_name): - del bench_summary[idx + bench_in_len] - del bench_op_name[idx + bench_in_len] - ms_para_list = api_dict.get("ms_args", []) - for idx in reversed(range(npu_in_len)): - if idx not in ms_para_list: - self.remove_element(npu_op_name, npu_struct_in, npu_summary, idx) - pt_para_list = api_dict.get("pt_args", []) - for idx in reversed(range(bench_in_len)): - if idx not in pt_para_list: - self.remove_element(bench_op_name, bench_struct_in, bench_summary, idx) - npu_op_name = self.api_replace(npu_op_name, ms_api_name, pt_api_name) - if len(npu_op_name) != len(bench_op_name): - logger.warning( - "The total number of input and output parameters of \ - npu_op_name and bench_op_name are not equal.") - break - npu_op_name = self.para_sequence_update(npu_op_name, bench_op_name) - target_dict = api_dict - break - if target_dict: - new_npu_dict.update({CompareConst.OP_NAME: npu_op_name, CompareConst.INPUT_STRUCT: npu_struct_in, - CompareConst.OUTPUT_STRUCT: npu_struct_out, CompareConst.SUMMARY: npu_summary}) - new_bench_dict.update({CompareConst.OP_NAME: bench_op_name, CompareConst.INPUT_STRUCT: bench_struct_in, - CompareConst.OUTPUT_STRUCT: bench_struct_out, CompareConst.SUMMARY: bench_summary}) - return new_npu_dict, new_bench_dict, target_dict - - def para_sequence_update(self, npu_op_name, bench_op_name): - for idx, _ in enumerate(npu_op_name): - bench_op_name_list = bench_op_name[idx].rsplit(Const.SEP, 1) - if len(bench_op_name_list) != 0: - npu_op_name[idx] = npu_op_name[idx].rsplit(Const.SEP, 1)[0] + Const.SEP + bench_op_name_list[-1] - return npu_op_name - def reconstitution_bench_dict(self, npu_dict, del_bench_dict, api_dict): - ms_user_args_list = api_dict.get("ms_args", []) - ms_user_output_list = api_dict.get("ms_output", []) - npu_struct_in = npu_dict.get(CompareConst.INPUT_STRUCT) - npu_struct_out = npu_dict.get(CompareConst.OUTPUT_STRUCT) - npu_in_len = len(npu_struct_in) - npu_out_len = len(npu_struct_out) - if npu_in_len == len(ms_user_args_list) and npu_out_len == len(ms_user_output_list): - return del_bench_dict - ms_input_args_list = [i for i in range(npu_in_len)] - input_sub_list = list(set(ms_input_args_list) - set(ms_user_args_list)) - ms_output_args_list = [i for i in range(npu_out_len)] - output_sub_list = list(set(ms_output_args_list) - set(ms_user_output_list)) - bench_op_name = del_bench_dict.get(CompareConst.OP_NAME, []) - bench_struct_in = del_bench_dict.get(CompareConst.INPUT_STRUCT, []) - bench_struct_out = del_bench_dict.get(CompareConst.OUTPUT_STRUCT, []) - bench_summary = del_bench_dict.get(CompareConst.SUMMARY, []) - for idx in input_sub_list: # Fill in the blank value field in the pt dictionary - bench_op_name.insert(idx, CompareConst.N_A) - bench_struct_in.insert(idx, CompareConst.N_A) - bench_summary.insert(idx, CompareConst.N_A) - for idx in output_sub_list: # Fill in the blank value field in the pt dictionary - bench_op_name.insert(npu_in_len + idx, CompareConst.N_A) - bench_struct_out.insert(idx, CompareConst.N_A) - bench_summary.insert(npu_in_len + idx, CompareConst.N_A) - del_bench_dict.update({CompareConst.OP_NAME: bench_op_name, CompareConst.INPUT_STRUCT: bench_struct_in, - CompareConst.OUTPUT_STRUCT: bench_struct_out, CompareConst.SUMMARY: bench_summary}) - return del_bench_dict + def compare_process(self, file_lists, stack_mode, fuzzy_match, dump_mode): + npu_json_path, bench_json_path, stack_json_path = file_lists + npu_json_data = load_json(npu_json_path) + bench_json_data = load_json(bench_json_path) + stack_json_data = load_json(stack_json_path) + + npu_df = self.gen_data_df(npu_json_data, stack_json_data, dump_mode) + bench_df = self.gen_data_df(bench_json_data, stack_json_data, dump_mode) + if self.cell_mapping: + npu_df[COMPARE_KEY] = npu_df.apply(lambda row: self.process_cell_mapping(row[CompareConst.OP_NAME]), + axis=1) + elif self.api_mapping: + npu_df[COMPARE_KEY] = npu_df.apply( + lambda row: self.process_internal_api_mapping(row[CompareConst.OP_NAME]), axis=1) + if isinstance(self.api_mapping, str): + self.modify_compare_data_with_user_mapping(npu_df, bench_df) + else: + npu_df[COMPARE_KEY] = npu_df[CompareConst.OP_NAME] + npu_df[[Const.DTYPE, Const.SHAPE]] = npu_df[[Const.DTYPE, Const.SHAPE]].astype(str) + bench_df[[Const.DTYPE, Const.SHAPE]] = bench_df[[Const.DTYPE, Const.SHAPE]].astype(str) + npu_df[COMPARE_SHAPE] = npu_df[Const.SHAPE] + bench_df[COMPARE_SHAPE] = bench_df[Const.SHAPE] + bench_df[COMPARE_KEY] = bench_df[CompareConst.OP_NAME] + match_result = pd.merge(npu_df, bench_df, on=[COMPARE_KEY, COMPARE_SHAPE], how='outer') + match_result = match_result[match_result['op_name_x'].notna()].fillna(CompareConst.N_A) + + def gen_dtype_condition(): + npu_dtype = match_result['dtype_x'] + bench_dtype = match_result['dtype_y'] + if self.cross_frame: + npu_dtype = npu_dtype.map(dtype_mapping).fillna(npu_dtype) + return ((npu_dtype == bench_dtype) | + ((npu_dtype == Const.FLOAT16) & (bench_dtype == Const.FLOAT32)) | + ((npu_dtype == Const.FLOAT32) & (bench_dtype == Const.FLOAT16)) | + ((npu_dtype == Const.FLOAT16) & (bench_dtype == Const.BFLOAT16)) | + ((npu_dtype == Const.BFLOAT16) & (bench_dtype == Const.FLOAT16)) | + ((npu_dtype == Const.TORCH_FLOAT16) & (bench_dtype == Const.TORCH_FLOAT32)) | + ((npu_dtype == Const.TORCH_FLOAT32) & (bench_dtype == Const.TORCH_FLOAT16)) | + ((npu_dtype == Const.TORCH_FLOAT16) & (bench_dtype == Const.TORCH_BFLOAT16)) | + ((npu_dtype == Const.TORCH_BFLOAT16) & (bench_dtype == Const.TORCH_FLOAT16))) + + match_result.loc[~gen_dtype_condition(), [i + '_y' for i in bench_df.columns]] = CompareConst.N_A + return MSComparator.make_result_df(match_result, stack_mode, dump_mode) + + def modify_compare_data_with_user_mapping(self, npu_df, bench_df): + def get_api_indices_dict(op_name_df): + api_indices_dict = defaultdict(list) + for op_index, name in enumerate(op_name_df[CompareConst.OP_NAME]): + api = self.get_api_name(name.split(Const.SEP)) + api_indices_dict[api].append(op_index) + return api_indices_dict + + ms_api_indices_dict = get_api_indices_dict(npu_df) + pt_api_indices_dict = get_api_indices_dict(bench_df) + + for mapping_dict in self.api_mapping_dict: + if (len(mapping_dict.get('ms_args')) != len(mapping_dict.get('pt_args')) or + len(mapping_dict.get('ms_output')) != len(mapping_dict.get('pt_output'))): + logger.warning('The user-defined mapping table is incorrect,\ + make sure that the number of parameters is equal') + continue + ms_api, pt_api = mapping_dict.get('ms_api'), mapping_dict.get('pt_api') + if ms_api not in ms_api_indices_dict or pt_api not in pt_api_indices_dict: + continue + for index in ms_api_indices_dict.get(ms_api): + op_name = npu_df.loc[index, CompareConst.OP_NAME].replace(ms_api, pt_api, 1) + is_abandoned = True + if INPUT_PATTERN in op_name: + for i, prefix in enumerate(mapping_dict.get('ms_args')): + if op_name.split(INPUT_PATTERN)[1].startswith(str(prefix)): + npu_df.loc[index, COMPARE_KEY] = ( + op_name.replace(INPUT_PATTERN + str(prefix), + INPUT_PATTERN + str(mapping_dict.get('pt_args')[i]))) + is_abandoned = False + else: + for i, prefix in enumerate(mapping_dict.get('ms_output')): + if op_name.split(OUTPUT_PATTERN)[1].startswith(str(prefix)): + npu_df.loc[index, COMPARE_KEY] = ( + op_name.replace(OUTPUT_PATTERN + str(prefix), + OUTPUT_PATTERN + str(mapping_dict.get('pt_output')[i]))) + is_abandoned = False + if is_abandoned: + npu_df.loc[index, COMPARE_KEY] = op_name + 'abandoned' + + def gen_data_df(self, data_json, stack_json, dump_mode): + result = { + CompareConst.OP_NAME: [], + Const.DTYPE: [], + Const.SHAPE: [], + Const.SUMMARY: [], + 'stack_info': [] + } + if dump_mode == Const.ALL: + result['data_name'] = [] + elif dump_mode == Const.MD5: + result[Const.MD5] = [] + for data in data_json['data']: + check_op_str_pattern_valid(data) + merge_list = self.gen_merge_list(data_json, data, stack_json, dump_mode) + if not merge_list: + continue + for op_name in merge_list[CompareConst.OP_NAME]: + result[CompareConst.OP_NAME].append(op_name) + if INPUT_PATTERN in op_name: + struct = merge_list[CompareConst.INPUT_STRUCT].pop(0) + else: + struct = merge_list[CompareConst.OUTPUT_STRUCT].pop(0) + result[Const.DTYPE].append(struct[0]) + result[Const.SHAPE].append(struct[1]) + if dump_mode == Const.MD5: + result[Const.MD5].append(struct[2]) + result[Const.SUMMARY].append(merge_list[Const.SUMMARY].pop(0)) + result['stack_info'].append(merge_list['stack_info'][0]) + if dump_mode == Const.ALL: + result['data_name'].append(merge_list['data_name'].pop(0)) + return pd.DataFrame(result) def check_cross_framework(bench_json_path): diff --git a/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare_check.py b/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare_check.py index 95065ff7b798d514e9a8d783aebead38772173ca..a1e5f8eee1bce9b170e6f4f7fdfeda65d47252c9 100644 --- a/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare_check.py +++ b/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare_check.py @@ -67,7 +67,7 @@ op_name = 'Functional.conv2d.0.backward.input.0' class TestUtilsMethods(unittest.TestCase): def test_check_struct_match_success(self): - result = check_struct_match(npu_dict, bench_dict, cross_frame=False) + result = check_struct_match(npu_dict, bench_dict) self.assertTrue(result) def test_check_struct_match_fail(self): @@ -80,7 +80,7 @@ class TestUtilsMethods(unittest.TestCase): ('torch.float32', [16])], 'output_struct': [('torch.float32', [1, 16, 28, 28])] } - result = check_struct_match(npu_dict2, bench_dict2, cross_frame=False) + result = check_struct_match(npu_dict2, bench_dict2) self.assertFalse(result) def test_check_struct_index_error(self): @@ -94,7 +94,7 @@ class TestUtilsMethods(unittest.TestCase): 'output_struct': [('torch.float32')] } with self.assertRaises(CompareException) as context: - result = check_struct_match(npu_dict3, bench_dict3, cross_frame=False) + result = check_struct_match(npu_dict3, bench_dict3) self.assertEqual(context.exception.code, CompareException.INDEX_OUT_OF_BOUNDS_ERROR) def test_check_type_shape_match_success(self):