diff --git a/debug/accuracy_tools/msprobe/core/common/const.py b/debug/accuracy_tools/msprobe/core/common/const.py index a86b87ce0d064bdd1019b551968ad48d9381512c..bd1292ee47b27a79e4a19a22ea58ad87a5aa0fd3 100644 --- a/debug/accuracy_tools/msprobe/core/common/const.py +++ b/debug/accuracy_tools/msprobe/core/common/const.py @@ -238,6 +238,7 @@ class Const: NORM = 'Norm' DATA_NAME = 'data_name' STATE = 'state' + REQ_GRAD = 'requires_grad' API_ORIGIN_NAME = 'api_origin_name' TENSOR_STAT_INDEX = 'tensor_stat_index' SUMMARY_METRICS_LIST = [MAX, MIN, MEAN, NORM] @@ -419,6 +420,9 @@ class CompareConst: MIN_RELATIVE_ERR = "MinRelativeErr" MEAN_RELATIVE_ERR = "MeanRelativeErr" NORM_RELATIVE_ERR = "NormRelativeErr" + REQ_GRAD_CONSIST = "Requires_grad Consistent" + NPU_REQ_GRAD = "NPU Requires_grad" + BENCH_REQ_GRAD = "Bench Requires_grad" ACCURACY = "Accuracy Reached or Not" STACK = "NPU_Stack_Info" DATA_NAME = "Data_name" @@ -491,17 +495,23 @@ class CompareConst: COMPARE_RESULT_HEADER = [ NPU_NAME, BENCH_NAME, NPU_DTYPE, BENCH_DTYPE, NPU_SHAPE, BENCH_SHAPE, COSINE, EUC_DIST, MAX_ABS_ERR, MAX_RELATIVE_ERR, ONE_THOUSANDTH_ERR_RATIO, FIVE_THOUSANDTHS_ERR_RATIO, - NPU_MAX, NPU_MIN, NPU_MEAN, NPU_NORM, BENCH_MAX, BENCH_MIN, BENCH_MEAN, BENCH_NORM, ACCURACY, ERROR_MESSAGE + NPU_MAX, NPU_MIN, NPU_MEAN, NPU_NORM, BENCH_MAX, BENCH_MIN, BENCH_MEAN, BENCH_NORM, + REQ_GRAD_CONSIST, NPU_REQ_GRAD, BENCH_REQ_GRAD, + ACCURACY, ERROR_MESSAGE ] SUMMARY_COMPARE_RESULT_HEADER = [ NPU_NAME, BENCH_NAME, NPU_DTYPE, BENCH_DTYPE, NPU_SHAPE, BENCH_SHAPE, MAX_DIFF, MIN_DIFF, MEAN_DIFF, NORM_DIFF, MAX_RELATIVE_ERR, MIN_RELATIVE_ERR, MEAN_RELATIVE_ERR, NORM_RELATIVE_ERR, - NPU_MAX, NPU_MIN, NPU_MEAN, NPU_NORM, BENCH_MAX, BENCH_MIN, BENCH_MEAN, BENCH_NORM, RESULT, ERROR_MESSAGE + NPU_MAX, NPU_MIN, NPU_MEAN, NPU_NORM, BENCH_MAX, BENCH_MIN, BENCH_MEAN, BENCH_NORM, + REQ_GRAD_CONSIST, NPU_REQ_GRAD, BENCH_REQ_GRAD, + RESULT, ERROR_MESSAGE ] MD5_COMPARE_RESULT_HEADER = [ - NPU_NAME, BENCH_NAME, NPU_DTYPE, BENCH_DTYPE, NPU_SHAPE, BENCH_SHAPE, NPU_MD5, BENCH_MD5, RESULT + NPU_NAME, BENCH_NAME, NPU_DTYPE, BENCH_DTYPE, NPU_SHAPE, BENCH_SHAPE, NPU_MD5, BENCH_MD5, + REQ_GRAD_CONSIST, NPU_REQ_GRAD, BENCH_REQ_GRAD, + RESULT ] COMPARE_RESULT_HEADER_STACK = COMPARE_RESULT_HEADER + [STACK] diff --git a/debug/accuracy_tools/msprobe/core/compare/acc_compare.py b/debug/accuracy_tools/msprobe/core/compare/acc_compare.py index 7b34b73d2e054ed9c73095b21997e407a81b7232..f92088de09b95c7686dfc8005bca8f152dea2c56 100644 --- a/debug/accuracy_tools/msprobe/core/compare/acc_compare.py +++ b/debug/accuracy_tools/msprobe/core/compare/acc_compare.py @@ -182,7 +182,8 @@ class ParseData: Const.SUMMARY: [], Const.STACK_INFO: [], Const.STATE: [], - Const.API_ORIGIN_NAME: [] + Const.API_ORIGIN_NAME: [], + Const.REQ_GRAD: [] } if self.mode_config.dump_mode == Const.ALL: result[Const.DATA_NAME] = [] @@ -208,10 +209,9 @@ class ParseData: summary_list = merge_list.get(Const.SUMMARY) data_name_list = merge_list.get(Const.DATA_NAME) state_list = merge_list.get(Const.STATE) - op_name_reorder, summary_reorder, data_name_reorder, state_reorder = reorder_op_x_list(op_name_list, - summary_list, - data_name_list, - state_list) + requires_grad_list = merge_list.get(Const.REQ_GRAD) + op_name_reorder, summary_reorder, data_name_reorder, state_reorder, requires_grad_reorder = ( + reorder_op_x_list(op_name_list, summary_list, data_name_list, state_list, requires_grad_list)) # 遍历单个API的所有item for index, (op_name, state) in enumerate(zip(op_name_reorder, state_reorder)): result[CompareConst.OP_NAME].append(op_name) @@ -231,9 +231,6 @@ class ParseData: check_api_info_len(op_name, struct, 2) result[Const.DTYPE].append(struct[0]) result[Const.SHAPE].append(struct[1]) - if self.mode_config.dump_mode == Const.MD5: - check_api_info_len(op_name, struct, 3) - result[Const.MD5].append(struct[2]) check_api_info_len(op_name, summary_reorder, 1) result[Const.SUMMARY].append(summary_reorder.pop(0)) @@ -244,12 +241,18 @@ class ParseData: else: result[Const.STACK_INFO].append(None) + if self.mode_config.dump_mode == Const.MD5: + check_api_info_len(op_name, struct, 3) + result[Const.MD5].append(struct[2]) if self.mode_config.dump_mode == Const.ALL: check_api_info_len(op_name, data_name_reorder, 1) result[Const.DATA_NAME].append(data_name_reorder.pop(0)) result[Const.STATE].append(state) result[Const.API_ORIGIN_NAME].append(data_name) + check_api_info_len(op_name, requires_grad_reorder, 1) + result[Const.REQ_GRAD].append(requires_grad_reorder.pop(0)) + progress_bar.update(1) progress_bar.close() return pd.DataFrame(result) @@ -620,12 +623,19 @@ class CreateTable: 'data_name_x': CompareConst.DATA_NAME, 'stack_info_x': CompareConst.STACK, 'state_x': Const.STATE, - 'api_origin_name_x': Const.API_ORIGIN_NAME}, inplace=True) + 'api_origin_name_x': Const.API_ORIGIN_NAME, + 'requires_grad_x': CompareConst.NPU_REQ_GRAD, + 'requires_grad_y': CompareConst.BENCH_REQ_GRAD + }, + inplace=True) # process summary data npu_summary = [CompareConst.NPU_MAX, CompareConst.NPU_MIN, CompareConst.NPU_MEAN, CompareConst.NPU_NORM] bench_summary = [CompareConst.BENCH_MAX, CompareConst.BENCH_MIN, CompareConst.BENCH_MEAN, CompareConst.BENCH_NORM] + # process requires_grad + result[CompareConst.REQ_GRAD_CONSIST] = result[CompareConst.NPU_REQ_GRAD] == result[CompareConst.BENCH_REQ_GRAD] + if result.empty: result[npu_summary] = pd.DataFrame(columns=npu_summary) result[bench_summary] = pd.DataFrame(columns=bench_summary) diff --git a/debug/accuracy_tools/msprobe/core/compare/utils.py b/debug/accuracy_tools/msprobe/core/compare/utils.py index 0f5ea4a50d0501e0f1ff142f2a34c88648acaa71..bca188f1cb14c18c4adbdb4f13d8115fd250ca5c 100644 --- a/debug/accuracy_tools/msprobe/core/compare/utils.py +++ b/debug/accuracy_tools/msprobe/core/compare/utils.py @@ -115,17 +115,18 @@ def op_item_parse(op_data, op_name: str, state: str, depth: int = 0) -> list: state = Const.INPUT default_item = { 'full_op_name': op_name, - 'type': None, - 'Max': None, - 'Min': None, - 'Mean': None, - 'Norm': None, - 'dtype': None, - 'shape': None, - 'md5': None, - 'value': None, - 'data_name': '-1', - 'state': state + Const.TYPE: None, + Const.MAX: None, + Const.MIN: None, + Const.MEAN: None, + Const.NORM: None, + Const.DTYPE: None, + Const.SHAPE: None, + Const.MD5: None, + Const.VALUE: None, + Const.DATA_NAME: '-1', + Const.STATE: state, + Const.REQ_GRAD: None } if depth > Const.MAX_DEPTH: @@ -163,6 +164,8 @@ def gen_op_item(op_data, op_name, state): op_item[Const.DATA_NAME] = data_name op_item['full_op_name'] = data_name.rsplit(Const.SEP, 1)[0] if data_name != '-1' else op_name op_item[Const.STATE] = state + if Const.REQ_GRAD not in op_item: + op_item[Const.REQ_GRAD] = None params = [Const.MAX, Const.MIN, Const.MEAN, Const.NORM] for i in params: @@ -215,12 +218,13 @@ def merge_tensor(tensor_list, dump_mode): CompareConst.DEBUG_STRUCT, Const.SUMMARY, Const.STACK_INFO, - Const.STATE + Const.STATE, + Const.REQ_GRAD ] op_dict = {key: [] for key in keys} if dump_mode == Const.ALL: - op_dict["data_name"] = [] + op_dict[Const.DATA_NAME] = [] for tensor in tensor_list: # A dict(len=2) with 'full_op_name' and 'full_info' is added to the tensor only if self.stack_mode is True @@ -231,6 +235,7 @@ def merge_tensor(tensor_list, dump_mode): op_dict[CompareConst.OP_NAME].append(tensor.get('full_op_name')) state = tensor.get(Const.STATE) op_dict[Const.STATE].append(state) + op_dict[Const.REQ_GRAD].append(tensor.get(Const.REQ_GRAD)) struct_key = CompareConst.STATE_TO_STRUCT_MAPPING.get(state) if not struct_key: @@ -245,7 +250,7 @@ def merge_tensor(tensor_list, dump_mode): [str(tensor[key]) if tensor[key] is None else tensor[key] for key in Const.SUMMARY_METRICS_LIST]) if dump_mode == Const.ALL: - op_dict["data_name"].append(tensor['data_name']) + op_dict[Const.DATA_NAME].append(tensor.get(Const.DATA_NAME)) if not op_dict[CompareConst.KWARGS_STRUCT]: del op_dict[CompareConst.KWARGS_STRUCT] @@ -308,23 +313,25 @@ def reorder_op_name_list(op_name_list, state_list): return op_name_reorder, state_reorder -def reorder_op_x_list(op_name_list, summary_list, data_name_list, state_list): +def reorder_op_x_list(op_name_list, summary_list, data_name_list, state_list, requires_grad_list): """ - 对op_name, summary, data_name, state重新排序,把parameters放到input后output前,data_name由于统计量比对时,为None,单独处理 + 对op_name, summary, data_name, state, requires_grad重新排序, + 把parameters放到input后output前,data_name由于统计量比对时,为None,单独处理 """ if not op_name_list or not summary_list: return op_name_list, summary_list, data_name_list, state_list index_map = {name: index for index, name in enumerate(op_name_list)} - op_name_reorder, state_order = reorder_op_name_list(op_name_list, state_list) + op_name_reorder, state_reorder = reorder_op_name_list(op_name_list, state_list) summary_reorder = [summary_list[index_map.get(name)] for name in op_name_reorder] + requires_grad_reorder = [requires_grad_list[index_map.get(name)] for name in op_name_reorder] if data_name_list: data_name_reorder = [data_name_list[index_map.get(name)] for name in op_name_reorder] else: data_name_reorder = data_name_list - return op_name_reorder, summary_reorder, data_name_reorder, state_order + return op_name_reorder, summary_reorder, data_name_reorder, state_reorder, requires_grad_reorder def process_summary_data(summary_data):