From 622aa28913273c83c6e9ec49827d600ff76f54a6 Mon Sep 17 00:00:00 2001 From: cai-weiwei1989 <734267852@qq.com> Date: Fri, 29 Mar 2024 17:13:20 +0800 Subject: [PATCH 01/28] =?UTF-8?q?[ptdbg]=E8=B5=84=E6=96=99=E4=BD=8E?= =?UTF-8?q?=E9=94=99=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ...\345\212\237\350\203\275\350\257\264\346\230\216_v5.0.T4.md" | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git "a/debug/accuracy_tools/ptdbg_ascend/doc/ptdbg_ascend\347\262\276\345\272\246\345\267\245\345\205\267\345\212\237\350\203\275\350\257\264\346\230\216_v5.0.T4.md" "b/debug/accuracy_tools/ptdbg_ascend/doc/ptdbg_ascend\347\262\276\345\272\246\345\267\245\345\205\267\345\212\237\350\203\275\350\257\264\346\230\216_v5.0.T4.md" index aa88541c3e..9145972762 100644 --- "a/debug/accuracy_tools/ptdbg_ascend/doc/ptdbg_ascend\347\262\276\345\272\246\345\267\245\345\205\267\345\212\237\350\203\275\350\257\264\346\230\216_v5.0.T4.md" +++ "b/debug/accuracy_tools/ptdbg_ascend/doc/ptdbg_ascend\347\262\276\345\272\246\345\267\245\345\205\267\345\212\237\350\203\275\350\257\264\346\230\216_v5.0.T4.md" @@ -858,7 +858,7 @@ bn1_BatchNorm2d_0_backward_output.2.npy dump: ```python -debugger.configure_hook(mode="api_stack", scope=[], api_list=[], filter_switch="OFF", acl_config=None, backward_input=[], input_output_mode=["all"], summary_only=False) +debugger.configure_hook(mode="api_stack", scope=[], api_list=[], filter_switch="OFF", acl_config=None, backward_input=[], input_output_mode=["all"], summary_only=False, summary_mode=all) ``` 溢出检测: -- Gitee From d2e771dc4f045665dee0cdd78a57995fcba730fa Mon Sep 17 00:00:00 2001 From: cai-weiwei1989 <734267852@qq.com> Date: Fri, 29 Mar 2024 18:14:26 +0800 Subject: [PATCH 02/28] =?UTF-8?q?[ptdbg]=E6=A3=80=E8=A7=86=E6=84=8F?= =?UTF-8?q?=E8=A7=81=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ...\212\237\350\203\275\350\257\264\346\230\216_v5.0.T4.md" | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git "a/debug/accuracy_tools/ptdbg_ascend/doc/ptdbg_ascend\347\262\276\345\272\246\345\267\245\345\205\267\345\212\237\350\203\275\350\257\264\346\230\216_v5.0.T4.md" "b/debug/accuracy_tools/ptdbg_ascend/doc/ptdbg_ascend\347\262\276\345\272\246\345\267\245\345\205\267\345\212\237\350\203\275\350\257\264\346\230\216_v5.0.T4.md" index 9145972762..523878963e 100644 --- "a/debug/accuracy_tools/ptdbg_ascend/doc/ptdbg_ascend\347\262\276\345\272\246\345\267\245\345\205\267\345\212\237\350\203\275\350\257\264\346\230\216_v5.0.T4.md" +++ "b/debug/accuracy_tools/ptdbg_ascend/doc/ptdbg_ascend\347\262\276\345\272\246\345\267\245\345\205\267\345\212\237\350\203\275\350\257\264\346\230\216_v5.0.T4.md" @@ -858,7 +858,7 @@ bn1_BatchNorm2d_0_backward_output.2.npy dump: ```python -debugger.configure_hook(mode="api_stack", scope=[], api_list=[], filter_switch="OFF", acl_config=None, backward_input=[], input_output_mode=["all"], summary_only=False, summary_mode=all) +debugger.configure_hook(mode="api_stack", scope=[], api_list=[], filter_switch="OFF", acl_config=None, backward_input=[], input_output_mode=["all"], summary_only=False, summary_mode="all") ``` 溢出检测: @@ -878,7 +878,7 @@ debugger.configure_hook(mode=None, acl_config=None, overflow_nums=1, need_replic | backward_input | 该输入文件为首次运行训练dump得到反向API输入的.npy文件。例如若需要dump Functional_conv2d_1 API的反向过程的输入输出,则需要在dump目录下查找命名包含Functional_conv2d_1、backward和input字段的.npy文件。 | 否 | | input_output_mode | dump数据过滤。可取值"all"、"forward"、"backward"、"input"和"output",表示仅保存dump的数据中文件名包含"forward"、"backward"、"input"和"output"的前向、反向、输入或输出的.npy文件。参数示例input_output_mode=["backward"]或input_output_mode=["forward", "backward"]。默认为all,即保存所有dump的数据。除了all参数只能单独配置外,其他参数可以自由组合。 | 否 | | summary_only | dump npy文件过滤,可取值True或False,配置为True后仅dump保存API统计信息的pkl文件,参数示例:summary_only=False,默认为False。 | 否 | -| summary_mode | 控制dump文件输出的模式,可取值md5(dump仅输出包含md5值的pkl文件,用于验证数据的完整性)、summary(dump仅输出包含API统计信息的pkl文件)、all(dump输出包含API统计信息的pkl文件以及具体的npy文件),参数示例:summary_mode=md5,默认为all。summary_only=True时,不允许配置该参数。 | 否 | +| summary_mode | 控制dump文件输出的模式,可取值md5(dump仅输出包含md5值的pkl文件,用于验证数据的完整性)、summary(dump仅输出包含API统计信息的pkl文件)、all(dump输出包含API统计信息的pkl文件以及具体的npy文件),参数示例:summary_mode="md5",默认为all。summary_only=True时,不允许配置该参数。 | 否 | | overflow_nums | 控制溢出次数,表示第N次溢出时,停止训练,过程中检测到溢出API对应ACL数据均dump。参数示例:overflow_nums=3。配置overflow_check时可配置,默认不配置,即检测到1次溢出,训练停止,配置为-1时,表示持续检测溢出直到训练结束。 | 否 | | need_replicate | 过程dump数据生成开关,执行溢出检测时,dump目录下会生成forward_real_data和backward_real_data的过程dump数据目录,可取值True(生成)或False(不生成),默认不生成。 | 否 | @@ -1689,7 +1689,7 @@ dump过程中,npy文件在对应算子或者模块被执行后就会落盘, 精度比对dump场景的结果如下: -* dump.pkl文件:包含dump数据的API名称(命名格式为:`{api_type}_{api_name}_{API调用次数}_{前向反向}_{input/output}.{参数序号}`)、dtype、 shape、各数据的max、min、mean、L2norm统计信息以及当配置summary_mode=md5时的md5数据。 +* dump.pkl文件:包含dump数据的API名称(命名格式为:`{api_type}_{api_name}_{API调用次数}_{前向反向}_{input/output}.{参数序号}`)、dtype、 shape、各数据的max、min、mean、L2norm统计信息以及当配置summary_mode="md5"时的md5数据。 其中,“参数序号”表示该API下的第n个参数,例如1,则为第一个参数,若该参数为list格式,则根据list继续排序,例如1.1,表示该API的第1个参数的第1个子参数;L2norm表示2范数(平方根)。 -- Gitee From 1f2b8415dea56ca3b616db75e966f9d66727a5b9 Mon Sep 17 00:00:00 2001 From: l30036321 Date: Sat, 30 Mar 2024 17:21:51 +0800 Subject: [PATCH 03/28] doc fix --- ...\345\212\237\350\203\275\350\257\264\346\230\216_v5.0.T4.md" | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git "a/debug/accuracy_tools/ptdbg_ascend/doc/ptdbg_ascend\347\262\276\345\272\246\345\267\245\345\205\267\345\212\237\350\203\275\350\257\264\346\230\216_v5.0.T4.md" "b/debug/accuracy_tools/ptdbg_ascend/doc/ptdbg_ascend\347\262\276\345\272\246\345\267\245\345\205\267\345\212\237\350\203\275\350\257\264\346\230\216_v5.0.T4.md" index 523878963e..972c986841 100644 --- "a/debug/accuracy_tools/ptdbg_ascend/doc/ptdbg_ascend\347\262\276\345\272\246\345\267\245\345\205\267\345\212\237\350\203\275\350\257\264\346\230\216_v5.0.T4.md" +++ "b/debug/accuracy_tools/ptdbg_ascend/doc/ptdbg_ascend\347\262\276\345\272\246\345\267\245\345\205\267\345\212\237\350\203\275\350\257\264\346\230\216_v5.0.T4.md" @@ -878,7 +878,7 @@ debugger.configure_hook(mode=None, acl_config=None, overflow_nums=1, need_replic | backward_input | 该输入文件为首次运行训练dump得到反向API输入的.npy文件。例如若需要dump Functional_conv2d_1 API的反向过程的输入输出,则需要在dump目录下查找命名包含Functional_conv2d_1、backward和input字段的.npy文件。 | 否 | | input_output_mode | dump数据过滤。可取值"all"、"forward"、"backward"、"input"和"output",表示仅保存dump的数据中文件名包含"forward"、"backward"、"input"和"output"的前向、反向、输入或输出的.npy文件。参数示例input_output_mode=["backward"]或input_output_mode=["forward", "backward"]。默认为all,即保存所有dump的数据。除了all参数只能单独配置外,其他参数可以自由组合。 | 否 | | summary_only | dump npy文件过滤,可取值True或False,配置为True后仅dump保存API统计信息的pkl文件,参数示例:summary_only=False,默认为False。 | 否 | -| summary_mode | 控制dump文件输出的模式,可取值md5(dump仅输出包含md5值的pkl文件,用于验证数据的完整性)、summary(dump仅输出包含API统计信息的pkl文件)、all(dump输出包含API统计信息的pkl文件以及具体的npy文件),参数示例:summary_mode="md5",默认为all。summary_only=True时,不允许配置该参数。 | 否 | +| summary_mode | 控制dump文件输出的模式,可取值md5(dump仅输出包含md5值的pkl文件,用于验证数据的完整性)、summary(dump仅输出包含API统计信息的pkl文件)、all(dump输出包含API统计信息的pkl文件以及具体的npy文件),参数示例:summary_mode="md5",默认为"all"。summary_only=True时,不允许配置该参数。 | 否 | | overflow_nums | 控制溢出次数,表示第N次溢出时,停止训练,过程中检测到溢出API对应ACL数据均dump。参数示例:overflow_nums=3。配置overflow_check时可配置,默认不配置,即检测到1次溢出,训练停止,配置为-1时,表示持续检测溢出直到训练结束。 | 否 | | need_replicate | 过程dump数据生成开关,执行溢出检测时,dump目录下会生成forward_real_data和backward_real_data的过程dump数据目录,可取值True(生成)或False(不生成),默认不生成。 | 否 | -- Gitee From 930f236b71f35638001df4888b46a8fa64e8bf52 Mon Sep 17 00:00:00 2001 From: gitee Date: Mon, 1 Apr 2024 11:36:34 +0800 Subject: [PATCH 04/28] infnan --- .../api_accuracy_checker/dump/api_info.py | 32 +++++++++++-- .../run_ut/data_generate.py | 46 +++++++++++++++---- 2 files changed, 66 insertions(+), 12 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/dump/api_info.py b/debug/accuracy_tools/api_accuracy_checker/dump/api_info.py index e14405fe14..0bff80aea8 100644 --- a/debug/accuracy_tools/api_accuracy_checker/dump/api_info.py +++ b/debug/accuracy_tools/api_accuracy_checker/dump/api_info.py @@ -19,9 +19,29 @@ def get_tensor_extremum(data, operator): return False not in data data_clone = data.clone().detach() if operator == 'max': - return torch._C._VariableFunctionsClass.max(data_clone.float()).item() + max_result = torch._C._VariableFunctionsClass.max(data_clone.float()) + if torch.isinf(max_result) or torch.isnan(max_result): + return handle_tensor_extremum_nan_inf(data_clone, operator), max_result.item() + else: + return max_result.item(), max_result.item() else: - return torch._C._VariableFunctionsClass.min(data_clone.float()).item() + min_result = torch._C._VariableFunctionsClass.min(data_clone.float()) + if torch.isinf(min_result) or torch.isnan(min_result): + return handle_tensor_extremum_nan_inf(data_clone, operator), min_result.item() + else: + return min_result.item(), min_result.item() + + +def handle_tensor_extremum_nan_inf(data_clone, operator): + data_no_nan = data_clone[~torch.isnan(data_clone)] + if len(data_no_nan) == 0: + return float('nan') + data_no_inf = data_no_nan[~torch.isinf(data_no_nan)] + if len(data_no_inf) == 0: + return torch._C._VariableFunctionsClass.max(data_no_nan.float()).item() if operator =='max' else \ + torch._C._VariableFunctionsClass.min(data_no_nan.float()).item() + return torch._C._VariableFunctionsClass.max(data_no_inf.float()).item() if operator =='max' else \ + torch._C._VariableFunctionsClass.min(data_no_inf.float()).item() def get_type_name(name): @@ -118,8 +138,12 @@ class APIInfo: single_arg.update({'type': 'torch.Tensor'}) single_arg.update({'dtype': str(arg.dtype)}) single_arg.update({'shape': arg.shape}) - single_arg.update({'Max': transfer_types(get_tensor_extremum(arg, 'max'), str(arg.dtype))}) - single_arg.update({'Min': transfer_types(get_tensor_extremum(arg, 'min'), str(arg.dtype))}) + max_handle, max_origin = get_tensor_extremum(arg,'max') + single_arg.update({'Max': transfer_types(max_handle, str(arg.dtype))}) + single_arg.update({'Max_origin': transfer_types(max_origin, str(arg.dtype))}) + min_handle, min_origin = get_tensor_extremum(arg,'min') + single_arg.update({'Min': transfer_types(min_handle, str(arg.dtype))}) + single_arg.update({'Min_origin': transfer_types(min_origin, str(arg.dtype))}) single_arg.update({'requires_grad': arg.requires_grad}) else: api_args = self.api_name + '.' + str(self.args_num) diff --git a/debug/accuracy_tools/api_accuracy_checker/run_ut/data_generate.py b/debug/accuracy_tools/api_accuracy_checker/run_ut/data_generate.py index ec3c539f7f..32ad7b3d62 100644 --- a/debug/accuracy_tools/api_accuracy_checker/run_ut/data_generate.py +++ b/debug/accuracy_tools/api_accuracy_checker/run_ut/data_generate.py @@ -18,6 +18,7 @@ import os import torch import numpy +import math from api_accuracy_checker.common.utils import Const, check_file_or_directory_path, check_object_type, print_warn_log, \ print_error_log, get_full_data_path, CompareException @@ -105,6 +106,9 @@ def gen_random_tensor(info, convert_type): """ check_object_type(info, dict) low, high = info.get('Min'), info.get('Max') + low_origin, high_origin = info.get('Min_origin'), info.get('Max_origin') + low_info = [low, low_origin] + high_info = [high, high_origin] data_dtype = info.get('dtype') shape = tuple(info.get('shape')) if not isinstance(low, (int, float)) or not isinstance(high, (int, float)): @@ -113,11 +117,11 @@ def gen_random_tensor(info, convert_type): if data_dtype == "torch.bool": data = gen_bool_tensor(low, high, shape) else: - data = gen_common_tensor(low, high, shape, data_dtype, convert_type) + data = gen_common_tensor(low_info, high_info, shape, data_dtype, convert_type) return data -def gen_common_tensor(low, high, shape, data_dtype, convert_type): +def gen_common_tensor(low_info, high_info, shape, data_dtype, convert_type): """ Function Description: Based on API basic information, generate int or float tensor @@ -133,10 +137,23 @@ def gen_common_tensor(low, high, shape, data_dtype, convert_type): if ori_dtype == data_dtype: data_dtype = Const.CONVERT.get(convert_type)[1] if data_dtype in FLOAT_TYPE: - if high in [float('inf'), float('-inf')] or low in [float('inf'), float('-inf')]: - error_info = 'Parameter contains inf, skip comparison.' - raise CompareException(CompareException.INVALID_PARAM_ERROR, error_info) - scale = high - low + low, low_origin = low_info[0], low_info[1] + high, high_origin = high_info[0], high_info[1] + if math.isnan(high): + tensor = torch.full(shape, float('nan'), dtype=eval(data_dtype)) + return tensor + low_scale, high_scale = low, high + dtype_finio = torch.finio(eval(data_dtype)) + if high == float('inf'): + high_scale = dtype_finio.max + elif high == float('-inf'): + high_scale = dtype_finio.min + if low == float('inf'): + low_scale = dtype_finio.max + elif low == float('-inf'): + low_scale = dtype_finio.min + + scale = high_scale - low_scale rand01 = torch.rand(shape, dtype=eval(data_dtype)) tensor = rand01 * scale + low elif 'int' in data_dtype or 'long' in data_dtype: @@ -148,8 +165,21 @@ def gen_common_tensor(low, high, shape, data_dtype, convert_type): if tensor.nelement() == 0: return tensor tmp_tensor = tensor.reshape(-1) - tmp_tensor[0] = low - tmp_tensor[-1] = high + if math.isnan(high_origin): + if tmp_tensor.numel() <= 2: + tmp_tensor[0] = float('nan') + tmp_tensor[-1] = high + else: + tmp_tensor[0] = low + tmp_tensor[1] = float('nan') + tmp_tensor[-1] = high + else: + tmp_tensor[0] = low + tmp_tensor[-1] = high + if high_origin in [float('inf'), float('-inf')]: + tmp_tensor[-1] = high_origin + if low_origin in [float('inf'), float('-inf')]: + tmp_tensor[0] = low_origin data = tmp_tensor.reshape(shape) return data -- Gitee From 1764771a25b3436de0a43612800b38f87e4de81f Mon Sep 17 00:00:00 2001 From: gitee Date: Mon, 1 Apr 2024 11:43:37 +0800 Subject: [PATCH 05/28] fix --- .../accuracy_tools/api_accuracy_checker/run_ut/data_generate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/run_ut/data_generate.py b/debug/accuracy_tools/api_accuracy_checker/run_ut/data_generate.py index 32ad7b3d62..2b44ac94f6 100644 --- a/debug/accuracy_tools/api_accuracy_checker/run_ut/data_generate.py +++ b/debug/accuracy_tools/api_accuracy_checker/run_ut/data_generate.py @@ -143,7 +143,7 @@ def gen_common_tensor(low_info, high_info, shape, data_dtype, convert_type): tensor = torch.full(shape, float('nan'), dtype=eval(data_dtype)) return tensor low_scale, high_scale = low, high - dtype_finio = torch.finio(eval(data_dtype)) + dtype_finio = torch.finfo(eval(data_dtype)) if high == float('inf'): high_scale = dtype_finio.max elif high == float('-inf'): -- Gitee From bfe717eca7f8f7eb3fe972b5e6844a7daa6fe975 Mon Sep 17 00:00:00 2001 From: gitee Date: Mon, 1 Apr 2024 11:45:42 +0800 Subject: [PATCH 06/28] fix --- .../api_accuracy_checker/run_ut/data_generate.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/run_ut/data_generate.py b/debug/accuracy_tools/api_accuracy_checker/run_ut/data_generate.py index 2b44ac94f6..e325dd5173 100644 --- a/debug/accuracy_tools/api_accuracy_checker/run_ut/data_generate.py +++ b/debug/accuracy_tools/api_accuracy_checker/run_ut/data_generate.py @@ -143,15 +143,15 @@ def gen_common_tensor(low_info, high_info, shape, data_dtype, convert_type): tensor = torch.full(shape, float('nan'), dtype=eval(data_dtype)) return tensor low_scale, high_scale = low, high - dtype_finio = torch.finfo(eval(data_dtype)) + dtype_finfo = torch.finfo(eval(data_dtype)) if high == float('inf'): - high_scale = dtype_finio.max + high_scale = dtype_finfo.max elif high == float('-inf'): - high_scale = dtype_finio.min + high_scale = dtype_finfo.min if low == float('inf'): - low_scale = dtype_finio.max + low_scale = dtype_finfo.max elif low == float('-inf'): - low_scale = dtype_finio.min + low_scale = dtype_finfo.min scale = high_scale - low_scale rand01 = torch.rand(shape, dtype=eval(data_dtype)) -- Gitee From e404c3b13e90729c25531722f4d21b9597515035 Mon Sep 17 00:00:00 2001 From: gitee Date: Mon, 1 Apr 2024 15:24:03 +0800 Subject: [PATCH 07/28] fix --- debug/accuracy_tools/api_accuracy_checker/compare/compare.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/compare/compare.py b/debug/accuracy_tools/api_accuracy_checker/compare/compare.py index 15bfb1904c..addf2996d0 100644 --- a/debug/accuracy_tools/api_accuracy_checker/compare/compare.py +++ b/debug/accuracy_tools/api_accuracy_checker/compare/compare.py @@ -160,9 +160,9 @@ class Comparator: _, api_name, _ = full_api_name.split("*") compare_func = self._compare_dropout if "dropout" in full_api_name else self._compare_core_wrapper fwd_success_status, fwd_compare_alg_results = compare_func(api_name, bench_output, device_output) - bwd_success_status, bwd_compare_alg_results = (CompareConst.PASS, []) if not (bench_grad and npu_grad) else compare_func(api_name, bench_grad[0], npu_grad[0]) if "dropout" in full_api_name else compare_func(api_name, bench_grad, npu_grad) + bwd_success_status, bwd_compare_alg_results = (CompareConst.SPACE, []) if not (bench_grad and npu_grad) else compare_func(api_name, bench_grad[0], npu_grad[0]) if "dropout" in full_api_name else compare_func(api_name, bench_grad, npu_grad) self.record_results(full_api_name, fwd_success_status, bwd_success_status if bwd_compare_alg_results is not None else CompareConst.SPACE, fwd_compare_alg_results, bwd_compare_alg_results) - return fwd_success_status == CompareConst.PASS, bwd_success_status == CompareConst.PASS + return fwd_success_status == CompareConst.PASS, bwd_success_status == CompareConst.PASS or bwd_success_status == CompareConst.SPACE def _compare_core_wrapper(self, api_name, bench_output, device_output): detailed_result_total = [] -- Gitee From c6b9e0bd1532a9dbe82bb787aff261782660ee9a Mon Sep 17 00:00:00 2001 From: gitee Date: Mon, 1 Apr 2024 15:27:22 +0800 Subject: [PATCH 08/28] fix --- .../accuracy_tools/api_accuracy_checker/dump/api_info.py | 8 ++++---- .../api_accuracy_checker/run_ut/data_generate.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/dump/api_info.py b/debug/accuracy_tools/api_accuracy_checker/dump/api_info.py index 0bff80aea8..01549932ac 100644 --- a/debug/accuracy_tools/api_accuracy_checker/dump/api_info.py +++ b/debug/accuracy_tools/api_accuracy_checker/dump/api_info.py @@ -38,9 +38,9 @@ def handle_tensor_extremum_nan_inf(data_clone, operator): return float('nan') data_no_inf = data_no_nan[~torch.isinf(data_no_nan)] if len(data_no_inf) == 0: - return torch._C._VariableFunctionsClass.max(data_no_nan.float()).item() if operator =='max' else \ + return torch._C._VariableFunctionsClass.max(data_no_nan.float()).item() if operator == 'max' else \ torch._C._VariableFunctionsClass.min(data_no_nan.float()).item() - return torch._C._VariableFunctionsClass.max(data_no_inf.float()).item() if operator =='max' else \ + return torch._C._VariableFunctionsClass.max(data_no_inf.float()).item() if operator == 'max' else \ torch._C._VariableFunctionsClass.min(data_no_inf.float()).item() @@ -138,10 +138,10 @@ class APIInfo: single_arg.update({'type': 'torch.Tensor'}) single_arg.update({'dtype': str(arg.dtype)}) single_arg.update({'shape': arg.shape}) - max_handle, max_origin = get_tensor_extremum(arg,'max') + max_handle, max_origin = get_tensor_extremum(arg, 'max') single_arg.update({'Max': transfer_types(max_handle, str(arg.dtype))}) single_arg.update({'Max_origin': transfer_types(max_origin, str(arg.dtype))}) - min_handle, min_origin = get_tensor_extremum(arg,'min') + min_handle, min_origin = get_tensor_extremum(arg, 'min') single_arg.update({'Min': transfer_types(min_handle, str(arg.dtype))}) single_arg.update({'Min_origin': transfer_types(min_origin, str(arg.dtype))}) single_arg.update({'requires_grad': arg.requires_grad}) diff --git a/debug/accuracy_tools/api_accuracy_checker/run_ut/data_generate.py b/debug/accuracy_tools/api_accuracy_checker/run_ut/data_generate.py index e325dd5173..525831fe5e 100644 --- a/debug/accuracy_tools/api_accuracy_checker/run_ut/data_generate.py +++ b/debug/accuracy_tools/api_accuracy_checker/run_ut/data_generate.py @@ -16,9 +16,9 @@ """ import os +import math import torch import numpy -import math from api_accuracy_checker.common.utils import Const, check_file_or_directory_path, check_object_type, print_warn_log, \ print_error_log, get_full_data_path, CompareException -- Gitee From 9867ee5135fca339b1225eac8ed889057293e6bb Mon Sep 17 00:00:00 2001 From: gitee Date: Mon, 1 Apr 2024 19:27:56 +0800 Subject: [PATCH 09/28] fix --- debug/accuracy_tools/api_accuracy_checker/dump/api_info.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/dump/api_info.py b/debug/accuracy_tools/api_accuracy_checker/dump/api_info.py index 01549932ac..0439df5ec4 100644 --- a/debug/accuracy_tools/api_accuracy_checker/dump/api_info.py +++ b/debug/accuracy_tools/api_accuracy_checker/dump/api_info.py @@ -12,11 +12,11 @@ from ptdbg_ascend.src.python.ptdbg_ascend.common.utils import check_path_before_ def get_tensor_extremum(data, operator): if data.dtype is torch.bool: if data.numel() == 0: - return False + return False, False if operator == 'max': - return True in data + return True in data, True in data elif operator == 'min': - return False not in data + return False not in data, False not in data data_clone = data.clone().detach() if operator == 'max': max_result = torch._C._VariableFunctionsClass.max(data_clone.float()) -- Gitee From 7cd7f3d42a9a8e19d031b735213ee921cbe2442e Mon Sep 17 00:00:00 2001 From: gitee Date: Mon, 1 Apr 2024 20:55:23 +0800 Subject: [PATCH 10/28] fix --- .../api_accuracy_checker/dump/api_info.py | 16 ++++++++-------- .../api_accuracy_checker/run_ut/data_generate.py | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/dump/api_info.py b/debug/accuracy_tools/api_accuracy_checker/dump/api_info.py index 0439df5ec4..e179c3a538 100644 --- a/debug/accuracy_tools/api_accuracy_checker/dump/api_info.py +++ b/debug/accuracy_tools/api_accuracy_checker/dump/api_info.py @@ -19,17 +19,17 @@ def get_tensor_extremum(data, operator): return False not in data, False not in data data_clone = data.clone().detach() if operator == 'max': - max_result = torch._C._VariableFunctionsClass.max(data_clone.float()) - if torch.isinf(max_result) or torch.isnan(max_result): - return handle_tensor_extremum_nan_inf(data_clone, operator), max_result.item() + max_result = torch._C._VariableFunctionsClass.max(data_clone.float()).item() + if np.isinf(max_result) or np.isnan(max_result): + return handle_tensor_extremum_nan_inf(data_clone, operator), max_result else: - return max_result.item(), max_result.item() + return max_result, max_result else: - min_result = torch._C._VariableFunctionsClass.min(data_clone.float()) - if torch.isinf(min_result) or torch.isnan(min_result): - return handle_tensor_extremum_nan_inf(data_clone, operator), min_result.item() + min_result = torch._C._VariableFunctionsClass.min(data_clone.float()).item() + if np.isinf(min_result) or np.isnan(min_result): + return handle_tensor_extremum_nan_inf(data_clone, operator), min_result else: - return min_result.item(), min_result.item() + return min_result, min_result def handle_tensor_extremum_nan_inf(data_clone, operator): diff --git a/debug/accuracy_tools/api_accuracy_checker/run_ut/data_generate.py b/debug/accuracy_tools/api_accuracy_checker/run_ut/data_generate.py index 525831fe5e..e32927527f 100644 --- a/debug/accuracy_tools/api_accuracy_checker/run_ut/data_generate.py +++ b/debug/accuracy_tools/api_accuracy_checker/run_ut/data_generate.py @@ -140,7 +140,7 @@ def gen_common_tensor(low_info, high_info, shape, data_dtype, convert_type): low, low_origin = low_info[0], low_info[1] high, high_origin = high_info[0], high_info[1] if math.isnan(high): - tensor = torch.full(shape, float('nan'), dtype=eval(data_dtype)) + tensor = torch._C._VariableFunctionsClass.full(shape, float('nan'), dtype=eval(data_dtype)) return tensor low_scale, high_scale = low, high dtype_finfo = torch.finfo(eval(data_dtype)) -- Gitee From e1ab1ce7f7fb4fd98da941899611676849cad10c Mon Sep 17 00:00:00 2001 From: gitee Date: Tue, 2 Apr 2024 10:10:57 +0800 Subject: [PATCH 11/28] fix --- .../accuracy_tools/api_accuracy_checker/dump/api_info.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/dump/api_info.py b/debug/accuracy_tools/api_accuracy_checker/dump/api_info.py index e179c3a538..31e1309088 100644 --- a/debug/accuracy_tools/api_accuracy_checker/dump/api_info.py +++ b/debug/accuracy_tools/api_accuracy_checker/dump/api_info.py @@ -38,10 +38,11 @@ def handle_tensor_extremum_nan_inf(data_clone, operator): return float('nan') data_no_inf = data_no_nan[~torch.isinf(data_no_nan)] if len(data_no_inf) == 0: - return torch._C._VariableFunctionsClass.max(data_no_nan.float()).item() if operator == 'max' else \ - torch._C._VariableFunctionsClass.min(data_no_nan.float()).item() - return torch._C._VariableFunctionsClass.max(data_no_inf.float()).item() if operator == 'max' else \ - torch._C._VariableFunctionsClass.min(data_no_inf.float()).item() + float_data = data_no_nan.float() + else: + float_data = data_no_inf.float() + return torch._C._VariableFunctionsClass.max(float_data).item() if operator == 'max' else \ + torch._C._VariableFunctionsClass.min(float_data).item() def get_type_name(name): -- Gitee From e14762b19abe06c1bd8e91b8a9e6db13e4f87079 Mon Sep 17 00:00:00 2001 From: gitee Date: Tue, 2 Apr 2024 10:19:32 +0800 Subject: [PATCH 12/28] fix --- .../api_accuracy_checker/compare/compare.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/compare/compare.py b/debug/accuracy_tools/api_accuracy_checker/compare/compare.py index addf2996d0..64d5dde312 100644 --- a/debug/accuracy_tools/api_accuracy_checker/compare/compare.py +++ b/debug/accuracy_tools/api_accuracy_checker/compare/compare.py @@ -160,7 +160,13 @@ class Comparator: _, api_name, _ = full_api_name.split("*") compare_func = self._compare_dropout if "dropout" in full_api_name else self._compare_core_wrapper fwd_success_status, fwd_compare_alg_results = compare_func(api_name, bench_output, device_output) - bwd_success_status, bwd_compare_alg_results = (CompareConst.SPACE, []) if not (bench_grad and npu_grad) else compare_func(api_name, bench_grad[0], npu_grad[0]) if "dropout" in full_api_name else compare_func(api_name, bench_grad, npu_grad) + if not (bench_grad and npu_grad): + bwd_success_status, bwd_compare_alg_results = (CompareConst.SPACE, []) + else: + if "dropout" in full_api_name: + bwd_success_status, bwd_compare_alg_results = compare_func(api_name, bench_grad[0], npu_grad[0]) + else: + bwd_success_status, bwd_compare_alg_results = compare_func(api_name, bench_grad, npu_grad) self.record_results(full_api_name, fwd_success_status, bwd_success_status if bwd_compare_alg_results is not None else CompareConst.SPACE, fwd_compare_alg_results, bwd_compare_alg_results) return fwd_success_status == CompareConst.PASS, bwd_success_status == CompareConst.PASS or bwd_success_status == CompareConst.SPACE -- Gitee From 0cb2524ebd00289f920191d8fc4823bd44bd5208 Mon Sep 17 00:00:00 2001 From: gitee Date: Tue, 2 Apr 2024 10:45:24 +0800 Subject: [PATCH 13/28] fix --- debug/accuracy_tools/api_accuracy_checker/dump/api_info.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/dump/api_info.py b/debug/accuracy_tools/api_accuracy_checker/dump/api_info.py index 31e1309088..bd532d9555 100644 --- a/debug/accuracy_tools/api_accuracy_checker/dump/api_info.py +++ b/debug/accuracy_tools/api_accuracy_checker/dump/api_info.py @@ -33,10 +33,10 @@ def get_tensor_extremum(data, operator): def handle_tensor_extremum_nan_inf(data_clone, operator): - data_no_nan = data_clone[~torch.isnan(data_clone)] + data_no_nan = data_clone[~torch._C._VariableFunctionsClass.isnan(data_clone)] if len(data_no_nan) == 0: return float('nan') - data_no_inf = data_no_nan[~torch.isinf(data_no_nan)] + data_no_inf = data_no_nan[~torch._C._VariableFunctionsClass.isinf(data_no_nan)] if len(data_no_inf) == 0: float_data = data_no_nan.float() else: -- Gitee From 2ae7577bd958e7b65aca7d400df8f6e4ff3be38d Mon Sep 17 00:00:00 2001 From: gitee Date: Tue, 2 Apr 2024 11:21:34 +0800 Subject: [PATCH 14/28] fix --- .../api_accuracy_checker/dump/api_info.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/dump/api_info.py b/debug/accuracy_tools/api_accuracy_checker/dump/api_info.py index bd532d9555..41615f23f9 100644 --- a/debug/accuracy_tools/api_accuracy_checker/dump/api_info.py +++ b/debug/accuracy_tools/api_accuracy_checker/dump/api_info.py @@ -33,16 +33,19 @@ def get_tensor_extremum(data, operator): def handle_tensor_extremum_nan_inf(data_clone, operator): - data_no_nan = data_clone[~torch._C._VariableFunctionsClass.isnan(data_clone)] - if len(data_no_nan) == 0: + + data_nan = torch._C._VariableFunctionsClass.isnan(data_clone) + if int(torch._C._VariableFunctionsClass.sum(data_nan)) == data_clone.numel(): return float('nan') - data_no_inf = data_no_nan[~torch._C._VariableFunctionsClass.isinf(data_no_nan)] - if len(data_no_inf) == 0: - float_data = data_no_nan.float() + finite_mask = torch._C._VariableFunctionsClass.isfinite(data_clone) + if int(torch._C._VariableFunctionsClass.sum(finite_mask)) > 0: + finite_values = data_clone[finite_mask] + return torch._C._VariableFunctionsClass.max(finite_values).item() if operator == 'max' else \ + torch._C._VariableFunctionsClass.min(finite_values).item() else: - float_data = data_no_inf.float() - return torch._C._VariableFunctionsClass.max(float_data).item() if operator == 'max' else \ - torch._C._VariableFunctionsClass.min(float_data).item() + data_no_nan = data_clone[~torch._C._VariableFunctionsClass.isnan(data_clone)] + return torch._C._VariableFunctionsClass.max(data_no_nan).item() if operator == 'max' else \ + torch._C._VariableFunctionsClass.min(data_no_nan).item() def get_type_name(name): -- Gitee From b8c9230a48f7740bc4ca3adbf329308bc30936c4 Mon Sep 17 00:00:00 2001 From: gitee Date: Tue, 2 Apr 2024 11:21:52 +0800 Subject: [PATCH 15/28] fix --- debug/accuracy_tools/api_accuracy_checker/dump/api_info.py | 1 - 1 file changed, 1 deletion(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/dump/api_info.py b/debug/accuracy_tools/api_accuracy_checker/dump/api_info.py index 41615f23f9..672ebe409a 100644 --- a/debug/accuracy_tools/api_accuracy_checker/dump/api_info.py +++ b/debug/accuracy_tools/api_accuracy_checker/dump/api_info.py @@ -33,7 +33,6 @@ def get_tensor_extremum(data, operator): def handle_tensor_extremum_nan_inf(data_clone, operator): - data_nan = torch._C._VariableFunctionsClass.isnan(data_clone) if int(torch._C._VariableFunctionsClass.sum(data_nan)) == data_clone.numel(): return float('nan') -- Gitee From ccbf5984809c9dffa70405daca4608f2161307bd Mon Sep 17 00:00:00 2001 From: gitee Date: Tue, 2 Apr 2024 14:40:07 +0800 Subject: [PATCH 16/28] fix --- .../accuracy_tools/api_accuracy_checker/dump/api_info.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/dump/api_info.py b/debug/accuracy_tools/api_accuracy_checker/dump/api_info.py index 672ebe409a..50ad39166f 100644 --- a/debug/accuracy_tools/api_accuracy_checker/dump/api_info.py +++ b/debug/accuracy_tools/api_accuracy_checker/dump/api_info.py @@ -17,15 +17,15 @@ def get_tensor_extremum(data, operator): return True in data, True in data elif operator == 'min': return False not in data, False not in data - data_clone = data.clone().detach() + data_clone = data.float().clone().detach() if operator == 'max': - max_result = torch._C._VariableFunctionsClass.max(data_clone.float()).item() + max_result = torch._C._VariableFunctionsClass.max(data_clone).item() if np.isinf(max_result) or np.isnan(max_result): return handle_tensor_extremum_nan_inf(data_clone, operator), max_result else: return max_result, max_result else: - min_result = torch._C._VariableFunctionsClass.min(data_clone.float()).item() + min_result = torch._C._VariableFunctionsClass.min(data_clone).item() if np.isinf(min_result) or np.isnan(min_result): return handle_tensor_extremum_nan_inf(data_clone, operator), min_result else: @@ -42,7 +42,7 @@ def handle_tensor_extremum_nan_inf(data_clone, operator): return torch._C._VariableFunctionsClass.max(finite_values).item() if operator == 'max' else \ torch._C._VariableFunctionsClass.min(finite_values).item() else: - data_no_nan = data_clone[~torch._C._VariableFunctionsClass.isnan(data_clone)] + data_no_nan = data_clone[~data_nan] return torch._C._VariableFunctionsClass.max(data_no_nan).item() if operator == 'max' else \ torch._C._VariableFunctionsClass.min(data_no_nan).item() -- Gitee From 5a1de26081dbcbd2055e983b064a75676f58c1b0 Mon Sep 17 00:00:00 2001 From: manyny Date: Tue, 2 Apr 2024 15:49:47 +0800 Subject: [PATCH 17/28] add debug/weight_convert --- README.md | 3 + debug/weight_convert/README.md | 90 +++++ debug/weight_convert/bloom.py | 526 +++++++++++++++++++++++++ debug/weight_convert/convert_ckpt.py | 90 +++++ debug/weight_convert/diff.patch | 76 ++++ debug/weight_convert/llama.py | 560 +++++++++++++++++++++++++++ debug/weight_convert/load_utils.py | 371 ++++++++++++++++++ 7 files changed, 1716 insertions(+) create mode 100644 debug/weight_convert/README.md create mode 100644 debug/weight_convert/bloom.py create mode 100644 debug/weight_convert/convert_ckpt.py create mode 100644 debug/weight_convert/diff.patch create mode 100644 debug/weight_convert/llama.py create mode 100644 debug/weight_convert/load_utils.py diff --git a/README.md b/README.md index cb203544c7..87a1a03725 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,9 @@ Ascend Training Tools,昇腾训练工具链。针对训练&大模型场景, 脚本迁移工具提供后端命令行用于将GPU上训练的PyTorch脚本迁移至NPU上,得到新的训练脚本用于训练。 +4. [训推一体权重转换工具](https://gitee.com/Ascend/att/wikis/%E5%B7%A5%E5%85%B7%E4%BB%8B%E7%BB%8D/%E5%88%86%E6%9E%90%E8%BF%81%E7%A7%BB%E5%B7%A5%E5%85%B7/%E8%AE%AD%E6%8E%A8%E4%B8%80%E4%BD%93%E6%9D%83%E9%87%8D%E8%BD%AC%E6%8D%A2%E5%B7%A5%E5%85%B7%E4%BD%BF%E7%94%A8%E6%8C%87%E5%AF%BC) + + 训推一体权重转换工具,支持在GPU和NPU上训练好的模型转成加速推理支持的格式。 ### [精度工具](https://gitee.com/ascend/att/tree/master/debug/accuracy_tools) diff --git a/debug/weight_convert/README.md b/debug/weight_convert/README.md new file mode 100644 index 0000000000..6cc4e2481f --- /dev/null +++ b/debug/weight_convert/README.md @@ -0,0 +1,90 @@ +## 训推一体权重转换工具 + +推理场景基于Huggingface的权重进行推理,Huggingface主要基于GPU训练,而昇腾主要在NPU上进行训练,不同硬件平台对应的模型权重格式存在差异。需要支持在NPU和GPU上训练好的模型转成Huggingface格式safetensors文件,用于推理使用。 + + +#### 前提条件 +准备以下权重: +1. 复训前huggingface权重 +2. 复训后权重 + +##### 依赖安装 + +如下命令如果使用非root用户安装,需要在安装命令后加上--user,例如:**pip3 install xxx** **--user**,安装命令可在任意路径下执行。 + +```shell +# python=3.8 +pip install torch-2.1.0-cp38-cp38m-linux_aarch64.whl +pip install torch_npu-2.1.0.post5_XXXXXX-cp38-cp38m-linux_aarch64.whl +source /path/to/Ascend/ascend-toolkit/set_env.sh + +git clone https://gitee.com/ascend/ModelLink.git +git clone https://gitee.com/ascend/AscendSpeed.git + +cd AscendSpeed +pip3 install -r requirements.txt +pip3 install -e . +cd .. +cd ModelLink +pip3 install -r requirements.txt +export PYTHONPATH=`pwd`:$PYTHONPATH +cd .. +``` + +##### 代码获取 + +```shell +git clone https://gitee.com/Ascend/att.git +cd att +git checkout develop + +cd ../ModelLink +git reset --hard c566ce4fa99cf3ea179b163355fca2c2aedfc471 +cp ../att/debug/weight_convert/diff.patch . +git apply --check diff.patch +git apply diff.patch +cd ../att/debug/weight_convert/ +``` + +#### 启动工具 + +1. 参考表1 参数说明配置信息,执行如下命令启动分析任务。转换后权重会保存在`原始huggingface权重存放位置/mg2hf`下 + +```shell +python3 convert_ckpt.py -i 待转换权重路径 -o 原始huggingface权重存放位置 -m 模型类型,可选项:llama/bloom\ + [--target-tensor-parallel-size 张量并行数 \ + --target-pipeline-parallel-size 流水线并行数\ + --embed-layernorm] +``` + + **表1 参数说明** + + | 参数 | 参数说明 | 取值示例 | + | ---------------------------------- | -------------------------------------- | ------------------------------------------------------------ | + | -i
--input-model-dir | **必选** 待转换权重文件的存放位置 | /home/*xxx*/*input_weight* | + | -o
--output-model-dir | **必选** 导出权重文件的存放位置(要求目录下有原始huggingface权重) | /home/*xxx*/*output_weight* | + | -m
--model | **必选** 转换的模型类型 | llama(默认)
bloom | + | --target-tensor-parallel-size | 转换后张量并行数 | 1 | + | --target-pipeline-parallel-size | 转换后流水线并行数 | 1 | + | --embed-layernorm | 模型中是否存在embedding layernorm结构 | False(默认)
True | + | -h
--help | 显示帮助信息。 | - | + + +2. 模型转换命令参考 + + **Llama 7/13/65B**、 **Llama2 7/13/70B** +```shell +python3 convert_ckpt.py -o "your huggingface checkpoint output path" \ + -i "your megatron checkpoint path" \ + --model llama +``` + + **Bloom 7B** +```shell +python3 convert_ckpt.py -o "your huggingface checkpoint output path" \ + -i "your megatron checkpoint path" \ + --model bloom +``` + + +3. 分析完成后,进入输出路径,查看转换结果。 \ No newline at end of file diff --git a/debug/weight_convert/bloom.py b/debug/weight_convert/bloom.py new file mode 100644 index 0000000000..d884d451f3 --- /dev/null +++ b/debug/weight_convert/bloom.py @@ -0,0 +1,526 @@ +# Copyright 2024 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the License); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +from collections.abc import Mapping +import concurrent.futures +import os +import gc +import sys +import shutil +import torch +import torch_npu + + +def add_arguments(parser): + group = parser.add_argument_group(title='Megatron saver') + group.add_argument('--target-tensor-parallel-size', type=int, + help='Target tensor model parallel size, defaults to the tensor parallel size ' + 'in the input checkpoint if provided by the loader, otherwise to 1') + group.add_argument('--target-pipeline-parallel-size', type=int, + help='Target tensor model parallel size, default to the pipeline parall size ' + 'in the input checkpoint if provided by the loader, otherwise to 1') + group.add_argument("--w-pack", type=bool, + help='True is w_pack weight for llm', + default=False) + + +def save_huggingface_bloom(args, model, model_args): + hf2mg_map = {} + for name_param_m in model.named_parameters(): + print("name_param_m", name_param_m[0]) + layer_num = name_param_m[0].split(".")[3] if len(name_param_m[0].split(".")) > 3 else name_param_m[0].split(".")[1] + nh = model_args.num_attention_heads + ng = ( + model_args.checkpoint_args.num_query_groups + if model_args.checkpoint_args.group_query_attention + else model_args.num_attention_heads + ) + repeats = nh // ng + # word embedding + if name_param_m[0] == "language_model.embedding.word_embeddings.weight": + hf2mg_map["word_embeddings.weight"] = name_param_m[1] + continue + if name_param_m[0] == "language_model.embedding.word_embeddings.norm.weight": + hf2mg_map["word_embeddings_layernorm.weight"] = name_param_m[1] + continue + if name_param_m[0] == "language_model.embedding.word_embeddings.norm.bias": + hf2mg_map["word_embeddings_layernorm.bias"] = name_param_m[1] + continue + + # input layernorm + if name_param_m[0] == f"language_model.encoder.layers.{layer_num}.input_norm.weight": + hf2mg_map[f"h.{layer_num}.input_layernorm.weight"] = name_param_m[1] + continue + if name_param_m[0] == f"language_model.encoder.layers.{layer_num}.input_norm.bias": + hf2mg_map[f"h.{layer_num}.input_layernorm.bias"] = name_param_m[1] + continue + + # qkv + if name_param_m[0] == f"language_model.encoder.layers.{layer_num}.self_attention.query_key_value.weight": + hf2mg_map[f"h.{layer_num}.self_attention.query_key_value.weight"] = name_param_m[1] + continue + if name_param_m[0] == f"language_model.encoder.layers.{layer_num}.self_attention.query_key_value.bias": + hf2mg_map[f"h.{layer_num}.self_attention.query_key_value.bias"] = name_param_m[1] + continue + + # post attention norm + if name_param_m[0] == f"language_model.encoder.layers.{layer_num}.post_attention_norm.weight": + hf2mg_map[f"h.{layer_num}.post_attention_layernorm.weight"] = name_param_m[1] + continue + if name_param_m[0] == f"language_model.encoder.layers.{layer_num}.post_attention_norm.bias": + hf2mg_map[f"h.{layer_num}.post_attention_layernorm.bias"] = name_param_m[1] + continue + + # dense + if name_param_m[0] == f"language_model.encoder.layers.{layer_num}.self_attention.dense.weight": + hf2mg_map[f"h.{layer_num}.self_attention.dense.weight"] = name_param_m[ + 1 + ] + continue + if name_param_m[0] == f"language_model.encoder.layers.{layer_num}.self_attention.dense.bias": + hf2mg_map[f"h.{layer_num}.self_attention.dense.bias"] = name_param_m[1] + continue + # mlp + if name_param_m[0] == f"language_model.encoder.layers.{layer_num}.mlp.dense_h_to_4h.weight": + hf2mg_map[f"h.{layer_num}.mlp.dense_h_to_4h.weight"] = name_param_m[1] + continue + if name_param_m[0] == f"language_model.encoder.layers.{layer_num}.mlp.dense_h_to_4h.bias": + hf2mg_map[f"h.{layer_num}.mlp.dense_h_to_4h.bias"] = name_param_m[1] + continue + if name_param_m[0] == f"language_model.encoder.layers.{layer_num}.mlp.dense_4h_to_h.weight": + hf2mg_map[f"h.{layer_num}.mlp.dense_4h_to_h.weight"] = name_param_m[1] + continue + if name_param_m[0] == f"language_model.encoder.layers.{layer_num}.mlp.dense_4h_to_h.bias": + hf2mg_map[f"h.{layer_num}.mlp.dense_4h_to_h.bias"] = name_param_m[1] + continue + # final norm + if name_param_m[0] == "language_model.encoder.final_norm.weight": + hf2mg_map[f"ln_f.weight"] = name_param_m[1] + continue + if name_param_m[0] == "language_model.encoder.final_norm.bias": + hf2mg_map[f"ln_f.bias"] = name_param_m[1] + continue + print('hf2mg_map.keys', hf2mg_map.keys()) + + gc.collect() + file_format = "pytorch_model-{:05d}-of-{:05d}.bin" + file_list = os.listdir(args.output_model_dir) + output_mg2hg_path = os.path.join(args.output_model_dir, 'mg2hg') + os.makedirs(output_mg2hg_path, exist_ok=True) + for filename in file_list: + if filename.startswith("pytorch_model-") and filename.endswith(".bin"): + try: + start_index = len("pytorch_model-") + end_index = filename.index("-of-") + index1 = int(filename[start_index:end_index]) + index2 = int(filename[end_index + len("-of-"): -len(".bin")]) + if 0 <= index1 <= index2 <= 99999: + formatted_filename = file_format.format(index1, index2) + file_path = os.path.join(args.output_model_dir, formatted_filename) + hf_model = torch.load(file_path, map_location=torch.device('cpu')) + for key in hf_model.keys(): + print(key) + if key in hf2mg_map.keys(): + hf_model[key].data.copy_(hf2mg_map[key]) + output_file_path = os.path.join(output_mg2hg_path, formatted_filename) + print(f'save weight to {output_file_path}') + # torch.save(hf_model, output_file_path) + else: + print("Invalid file format:", filename) + except ValueError: + print("Unable to save file:", filename) + elif filename.endswith('.json') or filename.endswith('.model'): + source_file = os.path.join(args.output_model_dir, filename) + destination_file = os.path.join(output_mg2hg_path, filename) + shutil.copyfile(source_file, destination_file) + output_model_dir = os.path.join(args.output_model_dir, 'mg2hg') + print(f'save weight to {output_model_dir}') + + +def save_model_checkpoint(queue, args): + + # Search in directory above this + # sys.path.append(os.path.abspath( + # os.path.join(os.path.dirname(__file__), + # os.path.pardir, + # os.path.pardir))) + + from pretrain_gpt import model_provider + from ascendspeed import megatron_adaptor + from megatron.arguments import validate_args + from modellink.utils import parse_args + from megatron.checkpointing import save_checkpoint + from megatron.global_vars import set_global_variables, get_args + from megatron.core.enums import ModelType + from megatron.tokenizer.tokenizer import _vocab_size_with_padding + from megatron import fused_kernels + from megatron.core import mpu + + def queue_get(name=None): + val = queue.get() + if val == "exit": + raise RuntimeError('Loader exited, exiting saver') + if name is not None and args.checking and val["name"] != name: + val_name = val["name"] + raise RuntimeError(f'Unexpected message. Expecting "{name}" but got "{val_name}". Exiting saver.') + if name is not None: + print(f"received {name}") + return val + + def check_message(msg): + if not args.checking: + return + msg_name = msg.pop("name") + if len(msg.keys()) > 0: + print(f"Unexpected values in {msg_name}:") + for key in msg.keys(): + print(f" {key}") + raise RuntimeError(f"Exiting. If you want to ignore this, use the argument --no-checking.") + + + md = queue_get() + + if args.target_tensor_parallel_size is None: + if hasattr(md, 'previous_tensor_parallel_size'): + args.target_tensor_parallel_size = md.previous_tensor_parallel_size + else: + print("loader did not provide a tensor parallel size and --target-tensor-parallel-size not provided on command line. " + "Default to 1.") + args.target_tensor_parallel_size = 1 + + if args.target_pipeline_parallel_size is None: + if hasattr(md, 'previous_pipeline_parallel_size'): + args.target_pipeline_parallel_size = md.previous_pipeline_parallel_size + else: + print("loader did not provide a pipeline parallel size and --target-pipeline-parallel-size not provided on command line. " + "Default to 1.") + args.target_pipeline_parallel_size = 1 + + + # Arguments do sanity checks on the world size, but we don't care, + # so trick it into thinking we are plenty of processes + if args.target_tensor_parallel_size is not None and args.target_pipeline_parallel_size is not None: + os.environ["WORLD_SIZE"] = f'{args.target_tensor_parallel_size * args.target_pipeline_parallel_size}' + + # We want all arguments to come from us + sys.argv = ['script.py', + '--num-layers', str(md.num_layers), + '--hidden-size', str(md.hidden_size), + '--seq-length', str(md.seq_length), + '--num-attention-heads', str(md.num_attention_heads), + '--max-position-embeddings', str(md.max_position_embeddings), + '--position-embedding-type', str(md.position_embedding_type), + '--tokenizer-type', str(md.tokenizer_type), + '--tensor-model-parallel-size', str(args.target_tensor_parallel_size), + '--pipeline-model-parallel-size', str(args.target_pipeline_parallel_size), + '--no-masked-softmax-fusion', + '--no-bias-gelu-fusion', + '--no-bias-dropout-fusion', + '--no-async-tensor-model-parallel-allreduce', + '--use-cpu-initialization', + '--micro-batch-size', '1', + '--no-load-optim', + '--no-load-rng', + '--no-save-optim', + '--no-save-rng', + '--no-initialization', + '--save-interval', '1', + '--save', args.output_model_dir, + '--fp16' + ] + + if md.make_vocab_size_divisible_by is not None: + sys.argv.extend(['--make-vocab-size-divisible-by', str(md.make_vocab_size_divisible_by)]) + if md.output_layer: + sys.argv.append('--untie-embeddings-and-output-weights') + if not md.linear_bias: + sys.argv.append('--disable-bias-linear') + + margs = parse_args() + margs.w_pack = args.w_pack + + + if hasattr(md, 'checkpoint_args'): + # These are arguments that we are either changing, or cause problems for validation if they are set + # Note that some of these deal with T5 so will need to be changed if we support T5. + args_to_keep = ['tensor_model_parallel_size', 'pipeline_model_parallel_size', 'world_size', 'params_dtype', + 'num_layers_per_virtual_pipeline_stage', 'virtual_pipeline_model_parallel_size', + 'masked_softmax_fusion', 'bias_gelu_fusion', 'bias_dropout_fusion', + 'sequence_parallel', 'async_tensor_model_parallel_allreduce', + 'no_load_optim', 'no_load_rng', 'no_save_optim', 'no_save_rng', + 'vocab_file', 'tokenizer_model', + 'save_interval', 'save', + 'perform_initialization', 'use_cpu_initialization', + 'recompute_granularity', 'recompute_num_layers', 'recompute_method', + 'encoder_num_layers', 'encoder_seq_length', + 'distribute_saved_activations', + 'train_iters', 'lr_decay_iters', 'lr_warmup_iters', 'lr_warmup_fraction', + 'start_weight_decay', 'end_weight_decay'] + + + for arg, value in vars(md.checkpoint_args).items(): + if arg in args_to_keep: + continue + if not hasattr(margs, arg): + print(f"Checkpoint had argument {arg} but new arguments does not have this.") + continue + if getattr(margs, arg) != value: + print(f"Overwriting default {arg} value {getattr(margs, arg)} with value from checkpoint {value}.") + setattr(margs, arg, value) + + validate_args(margs) + + set_global_variables(margs, build_tokenizer=False) + + # margs = megatron args + margs = get_args() + + margs.model_type = ModelType.encoder_or_decoder + + if hasattr(md, 'consumed_train_samples'): + margs.consumed_train_samples = md.consumed_train_samples + margs.consumed_valid_samples = md.consumed_valid_samples + print(f"Setting consumed_train_samples to {margs.consumed_train_samples}" + f" and consumed_valid_samples to {margs.consumed_valid_samples}") + else: + print("consumed_train_samples not provided.") + + def get_models(count, dtype, pre_process, post_process): + models = [model_provider(pre_process, post_process).to(dtype) for _ in range(count)] + return models + + # fake initializing distributed + mpu.set_tensor_model_parallel_world_size(args.target_tensor_parallel_size) + mpu.set_pipeline_model_parallel_world_size(args.target_pipeline_parallel_size) + mpu.set_tensor_model_parallel_rank(0) + mpu.set_pipeline_model_parallel_rank(0) + # Embeddings + #----------- + embeddings_msg = queue_get("embeddings") + + pos_embed = None + if md.position_embedding_type == 'learned_absolute': + pos_embed = embeddings_msg.pop("position embeddings") + orig_word_embed = embeddings_msg.pop("word embeddings") + orig_word_embed_n_w, orig_word_embed_n_b = None, None + if "word embeddings norm_w" in embeddings_msg and "word embeddings norm_b" in embeddings_msg: + orig_word_embed_n_w = embeddings_msg.pop("word embeddings norm_w") + orig_word_embed_n_b = embeddings_msg.pop("word embeddings norm_b") + check_message(embeddings_msg) + + # Deal with padding + if md.true_vocab_size is not None: + # figure out what our padded vocab size is + orig_vocab_size = orig_word_embed.shape[0] + margs.padded_vocab_size = _vocab_size_with_padding(md.true_vocab_size, margs) + + # Cut out extra padding we don't need + if orig_vocab_size > margs.padded_vocab_size: + full_word_embed = orig_word_embed[0:margs.padded_vocab_size, :] + + # Expanding embedding to larger size by replicating final entry + elif orig_vocab_size < margs.padded_vocab_size: + padding_size = margs.padded_vocab_size - orig_vocab_size + + full_word_embed = torch.cat(( + orig_word_embed, + orig_word_embed[-1].unsqueeze(0).expand(padding_size, -1))) + + # Same size! + else: + full_word_embed = orig_word_embed + else: + print("Original vocab size not specified, leaving embedding table as-is. " + "If you've changed the tensor parallel size this could cause problems.") + margs.padded_vocab_size = orig_word_embed.shape[0] + full_word_embed = orig_word_embed + + # Split into new tensor model parallel sizes + out_word_embed = torch.chunk(full_word_embed, args.target_tensor_parallel_size, dim=0) + + # Make models for first pipeline stage and fill in embeddings + mpu.set_pipeline_model_parallel_rank(0) + post_process = args.target_pipeline_parallel_size == 1 + models = get_models(args.target_tensor_parallel_size, md.params_dtype, True, post_process) + for tp_rank, model in enumerate(models): + model.language_model.embedding.word_embeddings.weight.data.copy_(out_word_embed[tp_rank]) + if orig_word_embed_n_w is not None: + model.language_model.embedding.word_embeddings.norm.weight.data.copy_(orig_word_embed_n_w) + model.language_model.embedding.word_embeddings.norm.bias.data.copy_(orig_word_embed_n_b) + if pos_embed is not None: + model.language_model.embedding.position_embeddings.weight.data.copy_(pos_embed) + else: + if hasattr(model.language_model.embedding, 'position_embeddings'): + raise ValueError("model should have position_embeddings") + + # Transformer layers + #------------------- + total_layer_num = 0 + for pp_rank in range(args.target_pipeline_parallel_size): + # For later pipeline parallel ranks, make the new models + if pp_rank > 0: + mpu.set_pipeline_model_parallel_rank(pp_rank) + post_process = pp_rank == args.target_pipeline_parallel_size - 1 + models = get_models(args.target_tensor_parallel_size, md.params_dtype, False, post_process) + + encoder_layer_num = len(models[0].language_model.encoder.layers) + for layer in range(encoder_layer_num): + msg = queue_get(f"transformer layer {total_layer_num}") + + # duplicated tensors + input_norm_weight = msg.pop("input norm weight") + if md.norm_has_bias: + input_norm_bias = msg.pop("input norm bias") + post_norm_weight = msg.pop("post norm weight") + if md.norm_has_bias: + post_norm_bias = msg.pop("post norm bias") + if md.linear_bias: + dense_bias = msg.pop("dense bias") + mlp_l1_bias = msg.pop("mlp l1 bias") + + if args.add_qkv_bias: + qkv_bias = torch.chunk(msg.pop("qkv bias"), args.target_tensor_parallel_size, dim=0) + if args.add_dense_bias: + dense_bias = msg.pop("dense bias") + + qkv_org = msg.pop("qkv weight") + qkv_weight = torch.chunk(qkv_org, args.target_tensor_parallel_size, dim=0) + + # Split up the parallel tensors + dense_weight = torch.chunk(msg.pop("dense weight"), args.target_tensor_parallel_size, dim=1) + mlp_l1_weight = torch.chunk(msg.pop("mlp l1 weight"), args.target_tensor_parallel_size, dim=1) + + # Special handling for swiglu + if md.swiglu: + mlp_l0_weight_W = torch.chunk(msg.pop("mlp l0 weight W"), args.target_tensor_parallel_size, dim=0) + mlp_l0_weight_V = torch.chunk(msg.pop("mlp l0 weight V"), args.target_tensor_parallel_size, dim=0) + mlp_l0_weight = [torch.cat(weights, dim=0) for weights in zip(mlp_l0_weight_W, mlp_l0_weight_V)] + else: + mlp_l0_weight = torch.chunk(msg.pop("mlp l0 weight"), args.target_tensor_parallel_size, dim=0) + + if md.linear_bias: + qkv_bias = torch.chunk(msg.pop("qkv bias"), args.target_tensor_parallel_size, dim=0) + if md.swiglu: + mlp_l0_bias_W = torch.chunk(msg.pop("mlp l0 bias W"), args.target_tensor_parallel_size, dim=0) + mlp_l0_bias_V = torch.chunk(msg.pop("mlp l0 bias V"), args.target_tensor_parallel_size, dim=0) + mlp_l0_weight = [] + for weights in zip(mlp_l0_weight_W, mlp_l0_weight_V): + mlp_l0_weight.append(torch.cat(weights, dim=0)) + else: + mlp_l0_bias = torch.chunk(msg.pop("mlp l0 bias"), args.target_tensor_parallel_size, dim=0) + + # Save them to the model + for tp_rank in range(args.target_tensor_parallel_size): + layer_encoder = models[tp_rank].language_model.encoder.layers[layer] + layer_encoder.input_norm.weight.data.copy_(input_norm_weight) + if md.norm_has_bias: + layer_encoder.input_norm.bias.data.copy_(input_norm_bias) + layer_encoder.self_attention.query_key_value.weight.data.copy_(qkv_weight[tp_rank]) + layer_encoder.self_attention.dense.weight.data.copy_(dense_weight[tp_rank]) + layer_encoder.post_attention_norm.weight.data.copy_(post_norm_weight) + if md.norm_has_bias: + layer_encoder.post_attention_norm.bias.data.copy_(post_norm_bias) + layer_encoder.mlp.dense_h_to_4h.weight.data.copy_(mlp_l0_weight[tp_rank]) + layer_encoder.mlp.dense_4h_to_h.weight.data.copy_(mlp_l1_weight[tp_rank]) + if md.linear_bias: + layer_encoder.self_attention.query_key_value.bias.data.copy_(qkv_bias[tp_rank]) + layer_encoder.self_attention.dense.bias.data.copy_(dense_bias) + layer_encoder.mlp.dense_h_to_4h.bias.data.copy_(mlp_l0_bias[tp_rank]) + layer_encoder.mlp.dense_4h_to_h.bias.data.copy_(mlp_l1_bias) + if args.add_qkv_bias: + layer_encoder.self_attention.query_key_value.bias.data.copy_(qkv_bias[tp_rank]) + if args.add_dense_bias: + layer_encoder.self_attention.dense.bias.data.copy_(dense_bias) + + total_layer_num = total_layer_num + 1 + check_message(msg) + + if post_process: + msg = queue_get("final norm") + final_norm_weight = msg.pop("weight") + if md.norm_has_bias: + final_norm_bias = msg.pop("bias") + for tp_rank in range(args.target_tensor_parallel_size): + models[tp_rank].language_model.encoder.final_norm.weight.data.copy_(final_norm_weight) + if md.norm_has_bias: + models[tp_rank].language_model.encoder.final_norm.bias.data.copy_(final_norm_bias) + if pp_rank != 0 and not md.output_layer: + # Copy word embeddings to final pipeline rank + models[tp_rank].word_embeddings.weight.data.copy_(out_word_embed[tp_rank]) + del final_norm_weight + if md.norm_has_bias: + del final_norm_bias + check_message(msg) + + if md.output_layer: + msg = queue_get("output layer") + if not hasattr(models[0].language_model, 'output_layer'): + raise RuntimeError("ERROR: got an output layer, but model does not have one") + output_layer_weight = torch.chunk(msg.pop("weight"), args.target_tensor_parallel_size, dim=0) + for tp_rank in range(args.target_tensor_parallel_size): + models[tp_rank].language_model.output_layer.weight.data.copy_(output_layer_weight[tp_rank]) + del output_layer_weight + check_message(msg) + + msg = queue_get() + if msg != "done" and msg["name"] == "pooler": + if not hasattr(models[0].language_model, 'pooler'): + raise RuntimeError("ERROR: got a pooler, but model does not have one") + print("received pooler") + pooler_weight = msg.pop("weight") + pooler_bias = msg.pop("bias") + for tp_rank in range(args.target_tensor_parallel_size): + models[tp_rank].language_model.pooler.dense.weight.data.copy_(pooler_weight) + models[tp_rank].language_model.pooler.dense.bias.data.copy_(pooler_bias) + del pooler_weight + del pooler_bias + check_message(msg) + msg = queue_get() + + if msg != "done" and msg["name"] == "lm head": + if not hasattr(models[0], 'lm_head'): + raise RuntimeError("ERROR: got an lm head, but model does not have one") + print("received lm head") + lm_head_dense_weight = msg.pop("dense weight") + lm_head_dense_bias = msg.pop("dense bias") + lm_head_norm_weight = msg.pop("norm weight") + if md.norm_has_bias: + lm_head_norm_bias = msg.pop("norm bias") + for tp_rank in range(args.target_tensor_parallel_size): + models[tp_rank].lm_head.dense.weight.data.copy_(lm_head_dense_weight) + models[tp_rank].lm_head.dense.bias.data.copy_(lm_head_dense_bias) + models[tp_rank].lm_head.norm.weight.data.copy_(lm_head_norm_weight) + if md.norm_has_bias: + models[tp_rank].lm_head.norm.bias.data.copy_(lm_head_norm_bias) + check_message(msg) + msg = queue_get() + + if msg != "done" and msg["name"] == "binary head": + if not hasattr(models[0], 'binary_head'): + raise RuntimeError("ERROR: got a binary head, but model does not have one") + print("received binary head") + binary_head_weight = msg.pop("weight") + binary_head_bias = msg.pop("bias") + for tp_rank in range(args.target_tensor_parallel_size): + models[tp_rank].binary_head.weight.data.copy_(binary_head_weight) + models[tp_rank].binary_head.bias.data.copy_(binary_head_bias) + check_message(msg) + msg = queue_get() + + if msg != "done": + print("ERROR: got some more data but was expecting to be done") + + for tp_rank in range(args.target_tensor_parallel_size): + mpu.set_tensor_model_parallel_rank(tp_rank) + save_huggingface_bloom(args, models[tp_rank], md) diff --git a/debug/weight_convert/convert_ckpt.py b/debug/weight_convert/convert_ckpt.py new file mode 100644 index 0000000000..b88a73c361 --- /dev/null +++ b/debug/weight_convert/convert_ckpt.py @@ -0,0 +1,90 @@ +# Copyright 2024 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the License); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import sys +import argparse +import importlib +import torch.multiprocessing as mp + + +def check_and_convert_weight(args): + import torch + from transformers import AutoModelForCausalLM + try: + output_mg2hg_path = os.path.join(args.output_model_dir, 'mg2hg') + hf_model = AutoModelForCausalLM.from_pretrained( + output_mg2hg_path, device_map="cpu", torch_dtype=torch.float16) + hf_model.save_pretrained(output_mg2hg_path, safe_serialization=True) + except ModuleNotFoundError as e: + print('failed to convert bin 2 safetensors') + raise exc from e + + +def load_model(model_name): + module_name = f"{model_name}" + try: + converter = importlib.import_module(module_name) + except ModuleNotFoundError as e: + raise exc from e + return converter + + +def main(): + parser = argparse.ArgumentParser( + description="convert weight to huggingface format") + + parser.add_argument('-m', '--model', type=str, required=True, + choices=['llama', 'bloom', 'gptneox'], + help='Type of the model') + parser.add_argument('-i', '--input-model-dir', type=str, required=True, + help='Directory to load model checkpoint from') + parser.add_argument('-o', '--output-model-dir', type=str, required=True, + help='Directory to save model checkpoint to') + parser.add_argument('--no-checking', action='store_false', + help='Do not perform checking on the name and ordering of weights', + dest='checking') + parser.add_argument('--convert-to-safetensors', action='store_false', + help='convert .bin to safetensors') + + known_args, _ = parser.parse_known_args() + loader = importlib.import_module('load_utils') + saver = load_model(known_args.model) + + loader.add_arguments(parser) + saver.add_arguments(parser) + + args = parser.parse_args() + + queue = mp.Queue(maxsize=50) + + print("Starting saver...") + saver_proc = mp.Process( + target=saver.save_model_checkpoint, args=(queue, args)) + saver_proc.start() + + print("Starting loader...") + loader.load_checkpoint(queue, args) + + print("Waiting for saver to complete...") + saver_proc.join() + + if args.convert_to_safetensors: + print("converting .bin to safetensors...") + check_and_convert_weight(args) + + print("Done!") + + +if __name__ == '__main__': + main() diff --git a/debug/weight_convert/diff.patch b/debug/weight_convert/diff.patch new file mode 100644 index 0000000000..134890cee4 --- /dev/null +++ b/debug/weight_convert/diff.patch @@ -0,0 +1,76 @@ +diff --git a/modellink/__init__.py b/modellink/__init__.py +index ceae8c4..837386e 100644 +--- a/modellink/__init__.py ++++ b/modellink/__init__.py +@@ -22,7 +22,7 @@ except Exception as e: + logging.warning("Warning: You did not install torch_npu") + + +-from .model_adaptor import exe_adaptor ++# from .model_adaptor import exe_adaptor + + +-exe_adaptor() ++# exe_adaptor() +diff --git a/modellink/model_adaptor.py b/modellink/model_adaptor.py +index 7c1bda1..7376b98 100644 +--- a/modellink/model_adaptor.py ++++ b/modellink/model_adaptor.py +@@ -26,32 +26,32 @@ from .utils import ALL_MODULE_WRAPPER_CLASSNAMES + from .checkpointing import _load_base_checkpoint_wrapper, load_checkpoint_wrapper + + +-def exe_adaptor(): +- import megatron +- megatron.utils.ALL_MODULE_WRAPPER_CLASSNAMES = ALL_MODULE_WRAPPER_CLASSNAMES +- megatron.initialize.parse_args = parse_args_decorator(megatron.initialize.parse_args) +- megatron.arguments.parse_args = parse_args_decorator(megatron.arguments.parse_args) +- megatron.global_vars.build_tokenizer = build_tokenizer ++# def exe_adaptor(): ++# import megatron ++# megatron.utils.ALL_MODULE_WRAPPER_CLASSNAMES = ALL_MODULE_WRAPPER_CLASSNAMES ++# megatron.initialize.parse_args = parse_args_decorator(megatron.initialize.parse_args) ++# megatron.arguments.parse_args = parse_args_decorator(megatron.arguments.parse_args) ++# megatron.global_vars.build_tokenizer = build_tokenizer + +- import megatron.training +- megatron.training.get_model = get_model_wrapper(megatron.training.get_model) +- megatron.training.build_pretraining_data_loader = build_pretraining_data_loader ++# import megatron.training ++# megatron.training.get_model = get_model_wrapper(megatron.training.get_model) ++# megatron.training.build_pretraining_data_loader = build_pretraining_data_loader + +- megatron.model.GPTModel = GPTModel +- megatron.model.transformer.SwitchMLP = SwitchMLP +- megatron.model.transformer.ParallelTransformer.__init__ = parallel_transformer_init +- megatron.model.transformer.ParallelTransformer.state_dict_for_save_checkpoint \ +- = state_dict_for_save_checkpoint_wrapper( +- megatron.model.transformer.ParallelTransformer.state_dict_for_save_checkpoint) +- megatron.model.language_model.TransformerLanguageModel.forward = (seq_length_wrapper( +- megatron.model.language_model.TransformerLanguageModel.forward)) ++# megatron.model.GPTModel = GPTModel ++# megatron.model.transformer.SwitchMLP = SwitchMLP ++# megatron.model.transformer.ParallelTransformer.__init__ = parallel_transformer_init ++# megatron.model.transformer.ParallelTransformer.state_dict_for_save_checkpoint \ ++# = state_dict_for_save_checkpoint_wrapper( ++# megatron.model.transformer.ParallelTransformer.state_dict_for_save_checkpoint) ++# megatron.model.language_model.TransformerLanguageModel.forward = (seq_length_wrapper( ++# megatron.model.language_model.TransformerLanguageModel.forward)) + +- megatron.core.tensor_parallel.layers.VocabParallelEmbedding.forward = vocab_embedding_wrapper( +- megatron.core.tensor_parallel.layers.VocabParallelEmbedding.forward) +- megatron.core.tensor_parallel.layers.VocabParallelEmbedding.__init__ = norm_wrapper( +- megatron.core.tensor_parallel.layers.VocabParallelEmbedding.__init__) ++# megatron.core.tensor_parallel.layers.VocabParallelEmbedding.forward = vocab_embedding_wrapper( ++# megatron.core.tensor_parallel.layers.VocabParallelEmbedding.forward) ++# megatron.core.tensor_parallel.layers.VocabParallelEmbedding.__init__ = norm_wrapper( ++# megatron.core.tensor_parallel.layers.VocabParallelEmbedding.__init__) + +- megatron.checkpointing._load_base_checkpoint = _load_base_checkpoint_wrapper( +- megatron.checkpointing._load_base_checkpoint) +- megatron.training.load_checkpoint = load_checkpoint_wrapper( +- megatron.checkpointing.load_checkpoint) ++# megatron.checkpointing._load_base_checkpoint = _load_base_checkpoint_wrapper( ++# megatron.checkpointing._load_base_checkpoint) ++# megatron.training.load_checkpoint = load_checkpoint_wrapper( ++# megatron.checkpointing.load_checkpoint) diff --git a/debug/weight_convert/llama.py b/debug/weight_convert/llama.py new file mode 100644 index 0000000000..0ae2173c6c --- /dev/null +++ b/debug/weight_convert/llama.py @@ -0,0 +1,560 @@ +# Copyright 2024 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the License); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +from collections.abc import Mapping +import concurrent.futures +import os +import gc +import sys +import shutil +import torch +import torch_npu + + +def add_arguments(parser): + group = parser.add_argument_group(title='Megatron saver') + group.add_argument('--target-tensor-parallel-size', type=int, + help='Target tensor model parallel size, defaults to the tensor parallel size ' + 'in the input checkpoint if provided by the loader, otherwise to 1') + group.add_argument('--target-pipeline-parallel-size', type=int, + help='Target tensor model parallel size, default to the pipeline parall size ' + 'in the input checkpoint if provided by the loader, otherwise to 1') + group.add_argument("--w-pack", type=bool, + help='True is w_pack weight for llm', + default=False) + + +def save_huggingface_llama(args, model, model_args): + hf2mg_map = {} + for name_param_m in model.named_parameters(): + layer_num = name_param_m[0].split(".")[3] if len( + name_param_m[0].split(".")) > 3 else name_param_m[0].split(".")[1] + nh = model_args.num_attention_heads + ng = ( + model_args.checkpoint_args.num_query_groups + if model_args.checkpoint_args.group_query_attention + else model_args.num_attention_heads + ) + repeats = nh // ng + if name_param_m[0] == "language_model.embedding.word_embeddings.weight": + hf2mg_map["model.embed_tokens.weight"] = name_param_m[1] + continue + if name_param_m[0] == f"language_model.encoder.layers.{layer_num}.post_attention_norm.weight": + hf2mg_map[f"model.layers.{layer_num}.post_attention_layernorm.weight"] = name_param_m[1] + continue + if name_param_m[0] == f"language_model.encoder.layers.{layer_num}.input_norm.weight": + hf2mg_map[f"model.layers.{layer_num}.input_layernorm.weight"] = name_param_m[1] + continue + if name_param_m[0] == f"language_model.encoder.layers.{layer_num}.post_attention_norm.weight": + hf2mg_map[f"model.layers.{layer_num}.post_attention_layernorm.weight"] = name_param_m[1] + continue + if name_param_m[0] == f"language_model.encoder.layers.{layer_num}.self_attention.query_key_value.weight": + qkv_weight = name_param_m[1].reshape( + ng, + repeats + 2, + name_param_m[1].shape[0] // ng // (repeats + 2), + name_param_m[1].shape[1], + ) + w = qkv_weight.shape[-1] + qw = qkv_weight[:, :repeats, ...].reshape(-1, w) + kw = qkv_weight[:, repeats: repeats + 1, ...].reshape(-1, w) + vw = qkv_weight[:, repeats + 1:, ...].reshape(-1, w) + if args.w_pack: + qkv = torch.cat((qw, kw, vw), dim=0) + hf2mg_map[f"model.layers.{layer_num}.self_attn.W_pack.weight"] = qkv + else: + hf2mg_map[f"model.layers.{layer_num}.self_attn.q_proj.weight"] = qw + hf2mg_map[f"model.layers.{layer_num}.self_attn.k_proj.weight"] = kw + hf2mg_map[f"model.layers.{layer_num}.self_attn.v_proj.weight"] = vw + continue + if name_param_m[0] == f"language_model.encoder.layers.{layer_num}.self_attention.query_key_value.bias": + bias_weight = name_param_m[1].reshape( + ng, repeats + + 2, name_param_m[1].shape[0] // ng // (repeats + 2) + ) + w = bias_weight.shape[-1] + qw = bias_weight[:, :repeats, ...].reshape(-1) + kw = bias_weight[:, repeats: repeats + 1, ...].reshape(-1) + vw = bias_weight[:, repeats + 1:, ...].reshape(-1) + hf2mg_map[f"model.layers.{layer_num}.self_attn.q_proj.bias"] = qw + hf2mg_map[f"model.layers.{layer_num}.self_attn.k_proj.bias"] = kw + hf2mg_map[f"model.layers.{layer_num}.self_attn.v_proj.bias"] = vw + continue + if name_param_m[0] == f"language_model.encoder.layers.{layer_num}.self_attention.dense.bias": + hf2mg_map[f"model.layers.{layer_num}.self_attn.dense.bias"] = name_param_m[1] + continue + if name_param_m[0] == f"language_model.encoder.layers.{layer_num}.self_attention.dense.weight": + hf2mg_map[f"model.layers.{layer_num}.self_attn.o_proj.weight"] = name_param_m[1] + continue + if name_param_m[0] == f"language_model.encoder.layers.{layer_num}.mlp.dense_h_to_4h.weight": + proj_read_h_half = name_param_m[1].shape[0] // 2 + hf2mg_map[f"model.layers.{layer_num}.mlp.gate_proj.weight"] = name_param_m[1][:proj_read_h_half, ...] + hf2mg_map[f"model.layers.{layer_num}.mlp.up_proj.weight"] = name_param_m[1][proj_read_h_half:, ...] + continue + if name_param_m[0] == f"language_model.encoder.layers.{layer_num}.mlp.dense_4h_to_h.weight": + hf2mg_map[f"model.layers.{layer_num}.mlp.down_proj.weight"] = name_param_m[1] + continue + if name_param_m[0] == "language_model.encoder.final_norm.weight": + hf2mg_map[f"model.norm.weight"] = name_param_m[1] + continue + if name_param_m[0] == "language_model.output_layer.weight": + hf2mg_map[f"lm_head.weight"] = name_param_m[1] + continue + + gc.collect() + file_format = "pytorch_model-{:05d}-of-{:05d}.bin" + file_list = os.listdir(args.output_model_dir) + output_mg2hg_path = os.path.join(args.output_model_dir, 'mg2hg') + os.makedirs(output_mg2hg_path, exist_ok=True) + for filename in file_list: + if filename.startswith("pytorch_model-") and filename.endswith(".bin"): + try: + start_index = len("pytorch_model-") + end_index = filename.index("-of-") + index1 = int(filename[start_index:end_index]) + index2 = int(filename[end_index + len("-of-"): -len(".bin")]) + if 0 <= index1 <= index2 <= 99999: + formatted_filename = file_format.format(index1, index2) + file_path = os.path.join( + args.output_model_dir, formatted_filename) + hf_model = torch.load( + file_path, map_location=torch.device('cpu')) + for key in hf_model.keys(): + if key in hf2mg_map.keys(): + hf_model[key].data.copy_(hf2mg_map[key]) + output_file_path = os.path.join( + output_mg2hg_path, formatted_filename) + print(f'save weight to {output_file_path}') + torch.save(hf_model, output_file_path) + else: + print("Invalid file format:", filename) + except ValueError: + print("Unable to save file:", filename) + elif (filename.endswith('.json') or filename.endswith('.mode')) and 'safetensors' not in filename: + source_file = os.path.join(args.output_model_dir, filename) + destination_file = os.path.join(output_mg2hg_path, filename) + shutil.copyfile(source_file, destination_file) + + +def save_model_checkpoint(queue, args): + from pretrain_gpt import model_provider + from ascendspeed import megatron_adaptor + from megatron.arguments import validate_args + from modellink.utils import parse_args + from megatron.checkpointing import save_checkpoint + from megatron.global_vars import set_global_variables, get_args + from megatron.core.enums import ModelType + from megatron.tokenizer.tokenizer import _vocab_size_with_padding + from megatron import fused_kernels + from megatron.core import mpu + + def queue_get(name=None): + val = queue.get() + if val == "exit": + raise RuntimeError('Loader exited, exiting saver') + if name is not None and args.checking and val["name"] != name: + val_name = val["name"] + raise RuntimeError(f'Unexpected message. Expecting "{name}" but got "{val_name}". Exiting saver.') + if name is not None: + print(f"received {name}") + return val + + def check_message(msg): + if not args.checking: + return + msg_name = msg.pop("name") + if len(msg.keys()) > 0: + print(f"Unexpected values in {msg_name}:") + for key in msg.keys(): + print(f" {key}") + raise RuntimeError(f"Exiting. If you want to ignore this, use the argument --no-checking.") + + md = queue_get() + + if args.target_tensor_parallel_size is None: + if hasattr(md, 'previous_tensor_parallel_size'): + args.target_tensor_parallel_size = md.previous_tensor_parallel_size + else: + print("loader did not provide a tensor parallel size and --target-tensor-parallel-size not provided on command line. " + "Default to 1.") + args.target_tensor_parallel_size = 1 + + if args.target_pipeline_parallel_size is None: + if hasattr(md, 'previous_pipeline_parallel_size'): + args.target_pipeline_parallel_size = md.previous_pipeline_parallel_size + else: + print("loader did not provide a pipeline parallel size and --target-pipeline-parallel-size not provided on command line. " + "Default to 1.") + args.target_pipeline_parallel_size = 1 + + # Arguments do sanity checks on the world size, but we don't care, + # so trick it into thinking we are plenty of processes + if args.target_tensor_parallel_size is not None and args.target_pipeline_parallel_size is not None: + os.environ["WORLD_SIZE"] = f'{args.target_tensor_parallel_size * args.target_pipeline_parallel_size}' + + # We want all arguments to come from us + sys.argv = ['script.py', + '--num-layers', str(md.num_layers), + '--hidden-size', str(md.hidden_size), + '--seq-length', str(md.seq_length), + '--num-attention-heads', str(md.num_attention_heads), + '--max-position-embeddings', str(md.max_position_embeddings), + '--position-embedding-type', str(md.position_embedding_type), + '--tokenizer-type', str(md.tokenizer_type), + '--tensor-model-parallel-size', str( + args.target_tensor_parallel_size), + '--pipeline-model-parallel-size', str( + args.target_pipeline_parallel_size), + '--no-masked-softmax-fusion', + '--no-bias-gelu-fusion', + '--no-bias-dropout-fusion', + '--no-async-tensor-model-parallel-allreduce', + '--use-cpu-initialization', + '--micro-batch-size', '1', + '--no-load-optim', + '--no-load-rng', + '--no-save-optim', + '--no-save-rng', + '--no-initialization', + '--save-interval', '1', + '--save', args.output_model_dir, + '--fp16' + ] + + if md.make_vocab_size_divisible_by is not None: + sys.argv.extend(['--make-vocab-size-divisible-by', + str(md.make_vocab_size_divisible_by)]) + if md.output_layer: + sys.argv.append('--untie-embeddings-and-output-weights') + if not md.linear_bias: + sys.argv.append('--disable-bias-linear') + + margs = parse_args() + margs.w_pack = args.w_pack + + if hasattr(md, 'checkpoint_args'): + # These are arguments that we are either changing, or cause problems for validation if they are set + # Note that some of these deal with T5 so will need to be changed if we support T5. + args_to_keep = ['tensor_model_parallel_size', 'pipeline_model_parallel_size', 'world_size', 'params_dtype', + 'num_layers_per_virtual_pipeline_stage', 'virtual_pipeline_model_parallel_size', + 'masked_softmax_fusion', 'bias_gelu_fusion', 'bias_dropout_fusion', + 'sequence_parallel', 'async_tensor_model_parallel_allreduce', + 'no_load_optim', 'no_load_rng', 'no_save_optim', 'no_save_rng', + 'vocab_file', 'tokenizer_model', + 'save_interval', 'save', + 'perform_initialization', 'use_cpu_initialization', + 'recompute_granularity', 'recompute_num_layers', 'recompute_method', + 'encoder_num_layers', 'encoder_seq_length', + 'distribute_saved_activations', + 'train_iters', 'lr_decay_iters', 'lr_warmup_iters', 'lr_warmup_fraction', + 'start_weight_decay', 'end_weight_decay'] + + for arg, value in vars(md.checkpoint_args).items(): + if arg in args_to_keep: + continue + if not hasattr(margs, arg): + print( + f"Checkpoint had argument {arg} but new arguments does not have this.") + continue + if getattr(margs, arg) != value: + print( + f"Overwriting default {arg} value {getattr(margs, arg)} with value from checkpoint {value}.") + setattr(margs, arg, value) + + validate_args(margs) + + set_global_variables(margs, build_tokenizer=False) + + # margs means megatron args + margs = get_args() + + margs.model_type = ModelType.encoder_or_decoder + + if hasattr(md, 'consumed_train_samples'): + margs.consumed_train_samples = md.consumed_train_samples + margs.consumed_valid_samples = md.consumed_valid_samples + print(f"Setting consumed_train_samples to {margs.consumed_train_samples}" + f" and consumed_valid_samples to {margs.consumed_valid_samples}") + else: + print("consumed_train_samples not provided.") + + def get_models(count, dtype, pre_process, post_process): + models = [model_provider(pre_process, post_process).to( + dtype) for _ in range(count)] + return models + + # fake initializing distributed + mpu.set_tensor_model_parallel_world_size(args.target_tensor_parallel_size) + mpu.set_pipeline_model_parallel_world_size( + args.target_pipeline_parallel_size) + mpu.set_tensor_model_parallel_rank(0) + mpu.set_pipeline_model_parallel_rank(0) + # Embeddings + # ----------- + embeddings_msg = queue_get("embeddings") + + pos_embed = None + if md.position_embedding_type == 'learned_absolute': + pos_embed = embeddings_msg.pop("position embeddings") + orig_word_embed = embeddings_msg.pop("word embeddings") + orig_word_embed_n_w, orig_word_embed_n_b = None, None + if "word embeddings norm_w" in embeddings_msg and "word embeddings norm_b" in embeddings_msg: + orig_word_embed_n_w = embeddings_msg.pop("word embeddings norm_w") + orig_word_embed_n_b = embeddings_msg.pop("word embeddings norm_b") + check_message(embeddings_msg) + + # Deal with padding + if md.true_vocab_size is not None: + # figure out what our padded vocab size is + orig_vocab_size = orig_word_embed.shape[0] + margs.padded_vocab_size = _vocab_size_with_padding( + md.true_vocab_size, margs) + + # Cut out extra padding we don't need + if orig_vocab_size > margs.padded_vocab_size: + full_word_embed = orig_word_embed[0:margs.padded_vocab_size, :] + + # Expanding embedding to larger size by replicating final entry + elif orig_vocab_size < margs.padded_vocab_size: + padding_size = margs.padded_vocab_size - orig_vocab_size + + full_word_embed = torch.cat(( + orig_word_embed, + orig_word_embed[-1].unsqueeze(0).expand(padding_size, -1))) + + # Same size! + else: + full_word_embed = orig_word_embed + else: + print("Original vocab size not specified, leaving embedding table as-is. " + "If you've changed the tensor parallel size this could cause problems.") + margs.padded_vocab_size = orig_word_embed.shape[0] + full_word_embed = orig_word_embed + + # Split into new tensor model parallel sizes + out_word_embed = torch.chunk( + full_word_embed, args.target_tensor_parallel_size, dim=0) + + # Make models for first pipeline stage and fill in embeddings + mpu.set_pipeline_model_parallel_rank(0) + post_process = args.target_pipeline_parallel_size == 1 + models = get_models(args.target_tensor_parallel_size, + md.params_dtype, True, post_process) + for tp_rank, model in enumerate(models): + model.language_model.embedding.word_embeddings.weight.data.copy_( + out_word_embed[tp_rank]) + if orig_word_embed_n_w is not None: + model.language_model.embedding.word_embeddings.norm.weight.data.copy_( + orig_word_embed_n_w) + model.language_model.embedding.word_embeddings.norm.bias.data.copy_( + orig_word_embed_n_b) + if pos_embed is not None: + model.language_model.embedding.position_embeddings.weight.data.copy_( + pos_embed) + else: + if hasattr(model.language_model.embedding, 'position_embeddings'): + raise ValueError("model should have position_embeddings") + + # Transformer layers + # ------------------- + total_layer_num = 0 + for pp_rank in range(args.target_pipeline_parallel_size): + # For later pipeline parallel ranks, make the new models + if pp_rank > 0: + mpu.set_pipeline_model_parallel_rank(pp_rank) + post_process = pp_rank == args.target_pipeline_parallel_size - 1 + models = get_models(args.target_tensor_parallel_size, + md.params_dtype, False, post_process) + + encoder_layer_num = len(models[0].language_model.encoder.layers) + for layer in range(encoder_layer_num): + msg = queue_get(f"transformer layer {total_layer_num}") + + # duplicated tensors + input_norm_weight = msg.pop("input norm weight") + if md.norm_has_bias: + input_norm_bias = msg.pop("input norm bias") + post_norm_weight = msg.pop("post norm weight") + if md.norm_has_bias: + post_norm_bias = msg.pop("post norm bias") + if md.linear_bias: + dense_bias = msg.pop("dense bias") + mlp_l1_bias = msg.pop("mlp l1 bias") + + if args.add_qkv_bias: + qkv_bias = torch.chunk( + msg.pop("qkv bias"), args.target_tensor_parallel_size, dim=0) + if args.add_dense_bias: + dense_bias = msg.pop("dense bias") + + qkv_org = msg.pop("qkv weight") + qkv_weight = torch.chunk( + qkv_org, args.target_tensor_parallel_size, dim=0) + + # Split up the parallel tensors + dense_weight = torch.chunk( + msg.pop("dense weight"), args.target_tensor_parallel_size, dim=1) + mlp_l1_weight = torch.chunk( + msg.pop("mlp l1 weight"), args.target_tensor_parallel_size, dim=1) + + # Special handling for swiglu + if md.swiglu: + mlp_l0_weight_W = torch.chunk( + msg.pop("mlp l0 weight W"), args.target_tensor_parallel_size, dim=0) + mlp_l0_weight_V = torch.chunk( + msg.pop("mlp l0 weight V"), args.target_tensor_parallel_size, dim=0) + mlp_l0_weight = [] + for weights in zip(mlp_l0_weight_W, mlp_l0_weight_V): + mlp_l0_weight.append(torch.cat(weights, dim=0)) + else: + mlp_l0_weight = torch.chunk( + msg.pop("mlp l0 weight"), args.target_tensor_parallel_size, dim=0) + + if md.linear_bias: + qkv_bias = torch.chunk( + msg.pop("qkv bias"), args.target_tensor_parallel_size, dim=0) + if md.swiglu: + mlp_l0_bias_W = torch.chunk( + msg.pop("mlp l0 bias W"), args.target_tensor_parallel_size, dim=0) + mlp_l0_bias_V = torch.chunk( + msg.pop("mlp l0 bias V"), args.target_tensor_parallel_size, dim=0) + mlp_l0_weight = [] + for weights in zip(mlp_l0_weight_W, mlp_l0_weight_V): + mlp_l0_weight.append(torch.cat(weights, dim=0)) + else: + mlp_l0_bias = torch.chunk( + msg.pop("mlp l0 bias"), args.target_tensor_parallel_size, dim=0) + + # Save them to the model + for tp_rank in range(args.target_tensor_parallel_size): + layer_encoder = models[tp_rank].language_model.encoder.layers[layer] + layer_encoder.input_norm.weight.data.copy_(input_norm_weight) + if md.norm_has_bias: + layer_encoder.input_norm.bias.data.copy_(input_norm_bias) + layer_encoder.self_attention.query_key_value.weight.data.copy_( + qkv_weight[tp_rank]) + layer_encoder.self_attention.dense.weight.data.copy_(dense_weight[tp_rank]) + layer_encoder.post_attention_norm.weight.data.copy_(post_norm_weight) + if md.norm_has_bias: + layer_encoder.post_attention_norm.bias.data.copy_(post_norm_bias) + layer_encoder.mlp.dense_h_to_4h.weight.data.copy_(mlp_l0_weight[tp_rank]) + layer_encoder.mlp.dense_4h_to_h.weight.data.copy_(mlp_l1_weight[tp_rank]) + if md.linear_bias: + layer_encoder.self_attention.query_key_value.bias.data.copy_( + qkv_bias[tp_rank]) + layer_encoder.self_attention.dense.bias.data.copy_(dense_bias) + layer_encoder.mlp.dense_h_to_4h.bias.data.copy_(mlp_l0_bias[tp_rank]) + layer_encoder.mlp.dense_4h_to_h.bias.data.copy_(mlp_l1_bias) + if args.add_qkv_bias: + layer_encoder.self_attention.query_key_value.bias.data.copy_( + qkv_bias[tp_rank]) + if args.add_dense_bias: + layer_encoder.self_attention.dense.bias.data.copy_(dense_bias) + + total_layer_num = total_layer_num + 1 + check_message(msg) + + if post_process: + msg = queue_get("final norm") + final_norm_weight = msg.pop("weight") + if md.norm_has_bias: + final_norm_bias = msg.pop("bias") + for tp_rank in range(args.target_tensor_parallel_size): + models[tp_rank].language_model.encoder.final_norm.weight.data.copy_( + final_norm_weight) + if md.norm_has_bias: + models[tp_rank].language_model.encoder.final_norm.bias.data.copy_( + final_norm_bias) + if pp_rank != 0 and not md.output_layer: + # Copy word embeddings to final pipeline rank + models[tp_rank].word_embeddings.weight.data.copy_( + out_word_embed[tp_rank]) + del final_norm_weight + if md.norm_has_bias: + del final_norm_bias + check_message(msg) + + if md.output_layer: + msg = queue_get("output layer") + if not hasattr(models[0].language_model, 'output_layer'): + raise AttributeError( + "ERROR: got an output layer, but model does not have one") + output_layer_weight = torch.chunk( + msg.pop("weight"), args.target_tensor_parallel_size, dim=0) + for tp_rank in range(args.target_tensor_parallel_size): + models[tp_rank].language_model.output_layer.weight.data.copy_( + output_layer_weight[tp_rank]) + del output_layer_weight + check_message(msg) + + msg = queue_get() + if msg != "done" and msg["name"] == "pooler": + if not hasattr(models[0].language_model, 'pooler'): + raise AttributeError( + "ERROR: got a pooler, but model does not have one") + print("received pooler") + pooler_weight = msg.pop("weight") + pooler_bias = msg.pop("bias") + for tp_rank in range(args.target_tensor_parallel_size): + models[tp_rank].language_model.pooler.dense.weight.data.copy_( + pooler_weight) + models[tp_rank].language_model.pooler.dense.bias.data.copy_( + pooler_bias) + del pooler_weight + del pooler_bias + check_message(msg) + msg = queue_get() + + if msg != "done" and msg["name"] == "lm head": + if not hasattr(models[0], 'lm_head'): + raise RuntimeError("ERROR: got an lm head, but model does not have one") + print("received lm head") + lm_head_dense_weight = msg.pop("dense weight") + lm_head_dense_bias = msg.pop("dense bias") + lm_head_norm_weight = msg.pop("norm weight") + if md.norm_has_bias: + lm_head_norm_bias = msg.pop("norm bias") + for tp_rank in range(args.target_tensor_parallel_size): + models[tp_rank].lm_head.dense.weight.data.copy_( + lm_head_dense_weight) + models[tp_rank].lm_head.dense.bias.data.copy_( + lm_head_dense_bias) + models[tp_rank].lm_head.norm.weight.data.copy_( + lm_head_norm_weight) + if md.norm_has_bias: + models[tp_rank].lm_head.norm.bias.data.copy_( + lm_head_norm_bias) + check_message(msg) + msg = queue_get() + + if msg != "done" and msg["name"] == "binary head": + if not hasattr(models[0], 'binary_head'): + raise RuntimeError("ERROR: got a binary head, but model does not have one") + print("received binary head") + binary_head_weight = msg.pop("weight") + binary_head_bias = msg.pop("bias") + for tp_rank in range(args.target_tensor_parallel_size): + models[tp_rank].binary_head.weight.data.copy_( + binary_head_weight) + models[tp_rank].binary_head.bias.data.copy_( + binary_head_bias) + check_message(msg) + msg = queue_get() + + if msg != "done": + print("ERROR: got some more data but was expecting to be done") + + for tp_rank in range(args.target_tensor_parallel_size): + mpu.set_tensor_model_parallel_rank(tp_rank) + save_huggingface_llama(args, models[tp_rank], md) diff --git a/debug/weight_convert/load_utils.py b/debug/weight_convert/load_utils.py new file mode 100644 index 0000000000..a041d61cff --- /dev/null +++ b/debug/weight_convert/load_utils.py @@ -0,0 +1,371 @@ +# Copyright 2024 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the License); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import sys +import types +import argparse +import importlib +import torch +import torch.multiprocessing as mp + + +def add_arguments(parser): + group = parser.add_argument_group(title='Megatron loader') + + group.add_argument('--true-vocab-size', type=int, default=None, + help='original size of vocab, if specified will trim padding from embedding table.') + group.add_argument('--vocab-file', type=str, default=None, + help='Path to the vocab file. If specified will use this to get vocab size and ' + 'trim padding from the embedding table.') + parser.add_argument('--add-qkv-bias', action='store_true', + help='Add bias for attention qkv', default=False, + ) + parser.add_argument('--add-dense-bias', action='store_true', + help='Add bias for attention dense', default=False, + ) + parser.add_argument('--embed-layernorm', action='store_true', + help='Add embed layernorm for word embedding', default=False, + ) + parser.add_argument('--params-dtype', type=str, + help='Set weight dtype', default='fp16', + ) + + +def _load_checkpoint(queue, args): + + # Search in directory above this + sys.path.append(os.path.abspath( + os.path.join(os.path.dirname(__file__), + os.path.pardir))) + try: + from pretrain_gpt import model_provider + from ascendspeed import megatron_adaptor + from megatron.arguments import validate_args + from modellink.utils import parse_args + from megatron.global_vars import set_args, set_global_variables + from megatron.checkpointing import load_args_from_checkpoint + from megatron.checkpointing import load_checkpoint as load_checkpoint_mg + from megatron.model import module + from megatron.core import mpu + from megatron.core.enums import ModelType + except ModuleNotFoundError: + print("Unable to import Megatron, please specify the path to Megatron using --megatron-path. Exiting.") + queue.put("exit") + if args.input_model_dir: + print(f">>>{args.input_model_dir}") + else: + print("NO") + # We want all arguments to come from us + sys.argv = ['script.py', + '--no-masked-softmax-fusion', + '--no-bias-gelu-fusion', + '--no-bias-dropout-fusion', + '--no-async-tensor-model-parallel-allreduce', + '--use-cpu-initialization', + '--micro-batch-size', '1', + '--no-load-optim', + '--no-load-rng', + '--no-save-optim', + '--no-save-rng', + '--no-initialization', + '--load', args.input_model_dir + ] + margs = parse_args() + margs.embed_layernorm = args.embed_layernorm + margs, checkpoint_args = load_args_from_checkpoint(margs) + margs.add_qkv_bias = args.add_qkv_bias + margs.add_dense_bias = args.add_dense_bias + margs.fp16 = True + if args.add_dense_bias: + margs.skip_bias_add = False + + # Arguments do sanity checks on the world size, but we don't care, + # so trick it into thinking we are plenty of processes + margs.world_size = margs.tensor_model_parallel_size * \ + margs.pipeline_model_parallel_size + + margs = validate_args(margs) + + def check_for_arg(arg_name, default=None): + if getattr(margs, arg_name, None) is None: + if default is not None: + setattr(margs, arg_name, default) + else: + print( + f"Checkpoint does not specify the argument {arg_name}. Exiting.") + print(f"Arguments: {margs}") + queue.put("exit") + + check_for_arg('tensor_model_parallel_size') + check_for_arg('pipeline_model_parallel_size') + check_for_arg('num_layers') + check_for_arg('hidden_size') + check_for_arg('seq_length') + check_for_arg('num_attention_heads') + check_for_arg('max_position_embeddings') + check_for_arg('position_embedding_type') + check_for_arg('tokenizer_type') + check_for_arg('iteration') + check_for_arg('bert_binary_head') + check_for_arg('disable_bias_linear', False) + check_for_arg('params_dtype') + check_for_arg('swiglu', False) + + margs.model_type = ModelType.encoder_or_decoder + # supress warning about torch.distributed not being initialized + module.MegatronModule.embedding_warning_printed = True + + consumed_train_samples = None + consumed_valid_samples = None + + def get_models(count, dtype): + nonlocal consumed_train_samples + nonlocal consumed_valid_samples + model_array_len = margs.virtual_pipeline_model_parallel_size + if model_array_len is None: + model_array_len = 1 + models = [[] for _ in range(model_array_len)] + pre_process = mpu.is_pipeline_first_stage() + post_process = mpu.is_pipeline_last_stage() + for rank in range(count): + mpu.set_tensor_model_parallel_rank(rank) + if margs.virtual_pipeline_model_parallel_size is not None: + model_ = [] + for i in range(margs.virtual_pipeline_model_parallel_size): + mpu.set_virtual_pipeline_model_parallel_rank(i) + # Set pre_process and post_process only after virtual rank is set. + pre_process = mpu.is_pipeline_first_stage() + post_process = mpu.is_pipeline_last_stage() + this_model = model_provider( + pre_process=pre_process, + post_process=post_process + ).to(dtype) + model_.append(this_model) + else: + pre_process = mpu.is_pipeline_first_stage() + post_process = mpu.is_pipeline_last_stage() + model_rank = 0 + model_ = [model_provider(pre_process, post_process).to(dtype)] + margs.consumed_train_samples = 0 + margs.consumed_valid_samples = 0 + load_checkpoint_mg(model_, None, None) + + if consumed_train_samples is not None: + if margs.consumed_train_samples != consumed_train_samples: + return None + else: + consumed_train_samples = margs.consumed_train_samples + if consumed_valid_samples is not None: + if margs.consumed_valid_samples != consumed_valid_samples: + return None + else: + consumed_valid_samples = margs.consumed_valid_samples + for vp_rank in range(model_array_len): + models[vp_rank].append(model_[vp_rank]) + return models + + set_global_variables(margs, build_tokenizer=False) + mpu.set_tensor_model_parallel_world_size(margs.tensor_model_parallel_size) + mpu.set_pipeline_model_parallel_world_size( + margs.pipeline_model_parallel_size) + mpu.set_virtual_pipeline_model_parallel_world_size( + margs.virtual_pipeline_model_parallel_size) + + # Get true (non-padded) vocab size + if args.true_vocab_size is not None: + true_vocab_size = args.true_vocab_size + elif args.vocab_file is not None: + vb_file = open(args.vocab_file) + vocab = json.load(vb_file) + true_vocab_size = len(vocab) + if args.true_vocab_size is not None and true_vocab_size != args.true_vocab_size: + print( + "Both --true-vocab-size and --vocab-file specified and the vocab size does not match, aborting.") + queue.put("exit") + vb_file.close() + else: + true_vocab_size = None + + # short aliases + tp_size = margs.tensor_model_parallel_size + pp_size = margs.pipeline_model_parallel_size + vp_size = margs.virtual_pipeline_model_parallel_size + if vp_size is None: + vp_size = 1 + + # Layernorm has bias; RMSNorm does not. + if hasattr(checkpoint_args, 'normalization'): + norm_has_bias = checkpoint_args.normalization == "LayerNorm" + else: + # older models only supported LayerNorm + norm_has_bias = True + + # metadata + md = types.SimpleNamespace() + md.model_type = 'GPT' + md.num_layers = margs.num_layers + md.hidden_size = margs.hidden_size + md.seq_length = margs.seq_length + md.num_attention_heads = margs.num_attention_heads + md.max_position_embeddings = margs.max_position_embeddings + md.tokenizer_type = margs.tokenizer_type + md.iteration = margs.iteration + md.params_dtype = margs.params_dtype + md.bert_binary_head = margs.bert_binary_head + md.output_layer = margs.untie_embeddings_and_output_weights + md.position_embedding_type = margs.position_embedding_type + md.linear_bias = margs.add_bias_linear + md.norm_has_bias = norm_has_bias + md.swiglu = margs.swiglu + md.previous_tensor_parallel_size = margs.tensor_model_parallel_size + md.previous_pipeline_parallel_size = margs.pipeline_model_parallel_size + md.true_vocab_size = true_vocab_size + md.make_vocab_size_divisible_by = margs.make_vocab_size_divisible_by + md.checkpoint_args = checkpoint_args + md.embed_layernorm = margs.embed_layernorm + + # Get first pipe stage + mpu.set_pipeline_model_parallel_rank(0) + all_models = [get_models(tp_size, md.params_dtype)] + models = all_models[0][0] + + md.consumed_train_samples = consumed_train_samples + md.consumed_valid_samples = consumed_valid_samples + queue.put(md) + + def queue_put(name, msg): + print(f"sending {name}") + msg["name"] = name + queue.put(msg) + + # Send embeddings + message_word_embedding = [] + for tp_rank in range(tp_size): + message_word_embedding.append(models[tp_rank].language_model.embedding.word_embeddings.weight.data) + message = {"word embeddings": torch.cat(message_word_embedding, dim=0)} + if md.position_embedding_type == 'learned_absolute': + message["position embeddings"] = models[0].language_model.embedding.position_embeddings.weight.data + if md.embed_layernorm: + message["word embeddings norm_w"] = models[0].language_model.embedding.word_embeddings.norm.weight.data + message["word embeddings norm_b"] = models[0].language_model.embedding.word_embeddings.norm.bias.data + queue_put("embeddings", message) + + total_layer_num = 0 + for vp_rank in range(vp_size): + mpu.set_virtual_pipeline_model_parallel_rank(vp_rank) + for pp_rank in range(pp_size): + if pp_rank > 0: + mpu.set_pipeline_model_parallel_rank(pp_rank) + if vp_rank == 0: + all_models.append(get_models(tp_size, md.params_dtype)) + models = all_models[pp_rank][vp_rank] + for layer_num, _ in enumerate(models[0].language_model.encoder.layers): + message = {} + + # Get non-parallel tensors from tp_rank 0 + layer = models[0].language_model.encoder.layers[layer_num] + message["input norm weight"] = layer.input_norm.weight.data + if norm_has_bias: + message["input norm bias"] = layer.input_norm.bias.data + message["post norm weight"] = layer.post_attention_norm.weight.data + if norm_has_bias: + message["post norm bias"] = layer.post_attention_norm.bias.data + if md.linear_bias: + message["dense bias"] = layer.self_attention.dense.bias.data + message["mlp l1 bias"] = layer.mlp.dense_4h_to_h.bias.data + if args.add_dense_bias: + message["dense bias"] = layer.self_attention.dense.bias.data + + # Grab all parallel tensors for this layer + qkv_weight = [] + qkv_bias = [] + dense_weight = [] + mlp_l0_weight = [] + mlp_l0_bias = [] + mlp_l1_weight = [] + for tp_rank, model in enumerate(models): + layer = model.language_model.encoder.layers[layer_num] + qkv_weight.append( + layer.self_attention.query_key_value.weight.data) + dense_weight.append(layer.self_attention.dense.weight.data) + mlp_l0_weight.append(layer.mlp.dense_h_to_4h.weight.data) + mlp_l1_weight.append(layer.mlp.dense_4h_to_h.weight.data) + if md.linear_bias: + qkv_bias.append( + layer.self_attention.query_key_value.bias.data) + mlp_l0_bias.append(layer.mlp.dense_h_to_4h.bias.data) + if args.add_qkv_bias: + qkv_bias.append( + layer.self_attention.query_key_value.bias.data) + + # Handle gated linear units + if md.swiglu: + # concat all the first halves ('W's) and all the second halves ('V's) + for tp_rank in range(tp_size): + mlp_l0_weight[tp_rank] = torch.chunk( + mlp_l0_weight[tp_rank], 2, dim=0) + message["mlp l0 weight W"] = torch.cat( + [w[0] for w in mlp_l0_weight], dim=0) + message["mlp l0 weight V"] = torch.cat( + [w[1] for w in mlp_l0_weight], dim=0) + else: + message["mlp l0 weight"] = torch.cat(mlp_l0_weight, dim=0) + + # simple concat of the rest + message["qkv weight"] = torch.cat(qkv_weight, dim=0) + message["dense weight"] = torch.cat(dense_weight, dim=1) + message["mlp l1 weight"] = torch.cat(mlp_l1_weight, dim=1) + if md.linear_bias: + message["qkv bias"] = torch.cat(qkv_bias, dim=0) + if md.swiglu: + for tp_rank in range(tp_size): + mlp_l0_bias[tp_rank] = torch.chunk( + mlp_l0_bias[tp_rank], 2, dim=0) + message["mlp l0 bias W"] = torch.cat( + [b[0] for b in mlp_l0_bias], dim=0) + message["mlp l0 bias V"] = torch.cat( + [b[1] for b in mlp_l0_bias], dim=0) + else: + message["mlp l0 bias"] = torch.cat(mlp_l0_bias, dim=0) + if args.add_qkv_bias: + message["qkv bias"] = torch.cat(qkv_bias, dim=0) + + queue_put(f"transformer layer {total_layer_num}", message) + + total_layer_num = total_layer_num + 1 + + # Send final norm from tp_rank 0 + message = { + "weight": models[0].language_model.encoder.final_norm.weight.data, + } + if norm_has_bias: + message["bias"] = models[0].language_model.encoder.final_norm.bias.data + queue_put("final norm", message) + + if md.output_layer: + message_weight = [] + for tp_rank in range(tp_size): + message_weight.append(models[tp_rank].language_model.output_layer.weight.data) + message = {"weight": torch.cat(message_weight, dim=0)} + queue_put("output layer", message) + + queue.put("done") + + +def load_checkpoint(queue, args): + try: + _load_checkpoint(queue, args) + except: + queue.put("exit") + raise -- Gitee From d0bebbe66a494edcd6fade2fb6c8076c4c7a0b65 Mon Sep 17 00:00:00 2001 From: jiangchangting1 Date: Tue, 2 Apr 2024 07:57:06 +0000 Subject: [PATCH 18/28] update debug/accuracy_tools/api_accuracy_checker/test/resources/forward.json. Signed-off-by: jiangchangting1 --- .../api_accuracy_checker/test/resources/forward.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/test/resources/forward.json b/debug/accuracy_tools/api_accuracy_checker/test/resources/forward.json index 5f54e077bf..f938f35246 100644 --- a/debug/accuracy_tools/api_accuracy_checker/test/resources/forward.json +++ b/debug/accuracy_tools/api_accuracy_checker/test/resources/forward.json @@ -1,3 +1,3 @@ { - "Functional*silu*0": {"args": [{"type": "torch.Tensor", "dtype": "torch.float32", "shape": [2, 2560, 24, 24], "Max": 5.7421875, "Min": -5.125, "requires_grad": true}], "kwargs" :{"inplace": {"type": "bool", "value": false}}} + "Functional*silu*0": {"args": [{"type": "torch.Tensor", "dtype": "torch.float32", "shape": [2, 2560, 24, 24], "Max": 5.7421875, "Max_origin": 5.7421875, "Min": -5.125, "Min_origin": -5.125, "requires_grad": true}], "kwargs" :{"inplace": {"type": "bool", "value": false}}} } \ No newline at end of file -- Gitee From 091804d1746c14a71af3c93df79f97a6cf0499ba Mon Sep 17 00:00:00 2001 From: jiangchangting1 Date: Tue, 2 Apr 2024 07:58:06 +0000 Subject: [PATCH 19/28] update debug/accuracy_tools/api_accuracy_checker/test/ut/dump/test_api_info.py. Signed-off-by: jiangchangting1 --- .../test/ut/dump/test_api_info.py | 47 ++++++++++++++++++- 1 file changed, 45 insertions(+), 2 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/test/ut/dump/test_api_info.py b/debug/accuracy_tools/api_accuracy_checker/test/ut/dump/test_api_info.py index 8951d5523a..ca2787b1f1 100644 --- a/debug/accuracy_tools/api_accuracy_checker/test/ut/dump/test_api_info.py +++ b/debug/accuracy_tools/api_accuracy_checker/test/ut/dump/test_api_info.py @@ -2,6 +2,7 @@ import os import shutil import unittest import torch +import numpy as np from api_accuracy_checker.dump.api_info import APIInfo, ForwardAPIInfo, BackwardAPIInfo, transfer_types, \ get_tensor_extremum, get_type_name, is_builtin_class, analyze_device_in_kwargs, analyze_dtype_in_kwargs from api_accuracy_checker.common.config import msCheckerConfig @@ -55,10 +56,52 @@ class TestAPIInfo(unittest.TestCase): def test_get_tensor_extremum(self): data = torch.tensor([1, 2, 3]) - result_max = get_tensor_extremum(data, 'max') - result_min = get_tensor_extremum(data, 'min') + result_max, result_max_origin = get_tensor_extremum(data, 'max') + result_min, result_min_origin = get_tensor_extremum(data, 'min') self.assertEqual(result_max, 3) self.assertEqual(result_min, 1) + self.assertEqual(result_max_origin, 3) + self.assertEqual(result_min_origin, 1) + + data = torch.tensor([1, float("inf"), 2, 3]) + result_max, result_max_origin = get_tensor_extremum(data, 'max') + result_min, result_min_origin = get_tensor_extremum(data, 'min') + self.assertEqual(result_max, 3) + self.assertEqual(result_min, 1) + self.assertEqual(result_max_origin, float("inf")) + self.assertEqual(result_min_origin, 1) + + data = torch.tensor([1, float("-inf"), 2, 3]) + result_max, result_max_origin = get_tensor_extremum(data, 'max') + result_min, result_min_origin = get_tensor_extremum(data, 'min') + self.assertEqual(result_max, 3) + self.assertEqual(result_min, 1) + self.assertEqual(result_max_origin, 3) + self.assertEqual(result_min_origin, float("-inf")) + + data = torch.tensor([1, float("inf"), float("nan"), 3]) + result_max, result_max_origin = get_tensor_extremum(data, 'max') + result_min, result_min_origin = get_tensor_extremum(data, 'min') + self.assertEqual(result_max, 3) + self.assertEqual(result_min, 1) + self.assertEqual(np.isnan(result_max_origin), True) + self.assertEqual(np.isnan(result_min_origin), True) + + data = torch.tensor([float("inf"), float("nan")]) + result_max, result_max_origin = get_tensor_extremum(data, 'max') + result_min, result_min_origin = get_tensor_extremum(data, 'min') + self.assertEqual(result_max, float("inf")) + self.assertEqual(result_min, float("inf")) + self.assertEqual(np.isnan(result_max_origin), True) + self.assertEqual(np.isnan(result_min_origin), True) + + data = torch.tensor([float("nan"), float("nan")]) + result_max, result_max_origin = get_tensor_extremum(data, 'max') + result_min, result_min_origin = get_tensor_extremum(data, 'min') + self.assertEqual(np.isnan(result_max), True) + self.assertEqual(np.isnan(result_min), True) + self.assertEqual(np.isnan(result_max_origin), True) + self.assertEqual(np.isnan(result_min_origin), True) def test_get_type_name(self): name = "" -- Gitee From 72451589dfb00b978bb451fe870a7d93073c40f0 Mon Sep 17 00:00:00 2001 From: jiangchangting1 Date: Tue, 2 Apr 2024 07:59:07 +0000 Subject: [PATCH 20/28] update debug/accuracy_tools/api_accuracy_checker/test/ut/run_ut/test_data_generate.py. Signed-off-by: jiangchangting1 --- .../test/ut/run_ut/test_data_generate.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/test/ut/run_ut/test_data_generate.py b/debug/accuracy_tools/api_accuracy_checker/test/ut/run_ut/test_data_generate.py index 3f79ecf17b..b98f84d516 100644 --- a/debug/accuracy_tools/api_accuracy_checker/test/ut/run_ut/test_data_generate.py +++ b/debug/accuracy_tools/api_accuracy_checker/test/ut/run_ut/test_data_generate.py @@ -73,9 +73,12 @@ class TestDataGenerateMethods(unittest.TestCase): def test_gen_common_tensor(self): info = api_info_dict.get('args')[0] low, high = info.get('Min'), info.get('Max') + low_origin, high_origin = info.get('Min_origin'), info.get('Max_origin') + low_info = [low, low_origin] + high_info = [high, high_origin] data_dtype = info.get('dtype') shape = tuple(info.get('shape')) - data = gen_common_tensor(low, high, shape, data_dtype, None) + data = gen_common_tensor(low_info, high_info, shape, data_dtype, None) max_diff = abs(data.max() - max_value) min_diff = abs(data.min() - min_value) self.assertEqual(data.dtype, torch.float32) -- Gitee From 27a298ede36bebef5d513168a4a0e9bc1cbdafb7 Mon Sep 17 00:00:00 2001 From: gitee Date: Tue, 2 Apr 2024 16:14:10 +0800 Subject: [PATCH 21/28] fix --- .../accuracy_tools/api_accuracy_checker/run_ut/data_generate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/run_ut/data_generate.py b/debug/accuracy_tools/api_accuracy_checker/run_ut/data_generate.py index e32927527f..b082f6d593 100644 --- a/debug/accuracy_tools/api_accuracy_checker/run_ut/data_generate.py +++ b/debug/accuracy_tools/api_accuracy_checker/run_ut/data_generate.py @@ -165,7 +165,7 @@ def gen_common_tensor(low_info, high_info, shape, data_dtype, convert_type): if tensor.nelement() == 0: return tensor tmp_tensor = tensor.reshape(-1) - if math.isnan(high_origin): + if high_origin and math.isnan(high_origin): if tmp_tensor.numel() <= 2: tmp_tensor[0] = float('nan') tmp_tensor[-1] = high -- Gitee From d9ed15d43b8007105e1fa9c916825e83aa69a04c Mon Sep 17 00:00:00 2001 From: gitee Date: Tue, 2 Apr 2024 16:19:12 +0800 Subject: [PATCH 22/28] fix --- .../test/ut/dump/test_api_info.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/test/ut/dump/test_api_info.py b/debug/accuracy_tools/api_accuracy_checker/test/ut/dump/test_api_info.py index ca2787b1f1..b779fdee4a 100644 --- a/debug/accuracy_tools/api_accuracy_checker/test/ut/dump/test_api_info.py +++ b/debug/accuracy_tools/api_accuracy_checker/test/ut/dump/test_api_info.py @@ -84,24 +84,24 @@ class TestAPIInfo(unittest.TestCase): result_min, result_min_origin = get_tensor_extremum(data, 'min') self.assertEqual(result_max, 3) self.assertEqual(result_min, 1) - self.assertEqual(np.isnan(result_max_origin), True) - self.assertEqual(np.isnan(result_min_origin), True) + self.assertEqual(bool(np.isnan(result_max_origin)), True) + self.assertEqual(bool(np.isnan(result_min_origin)), True) data = torch.tensor([float("inf"), float("nan")]) result_max, result_max_origin = get_tensor_extremum(data, 'max') result_min, result_min_origin = get_tensor_extremum(data, 'min') self.assertEqual(result_max, float("inf")) self.assertEqual(result_min, float("inf")) - self.assertEqual(np.isnan(result_max_origin), True) - self.assertEqual(np.isnan(result_min_origin), True) + self.assertEqual(bool(np.isnan(result_max_origin)), True) + self.assertEqual(bool(np.isnan(result_min_origin)), True) data = torch.tensor([float("nan"), float("nan")]) result_max, result_max_origin = get_tensor_extremum(data, 'max') result_min, result_min_origin = get_tensor_extremum(data, 'min') - self.assertEqual(np.isnan(result_max), True) - self.assertEqual(np.isnan(result_min), True) - self.assertEqual(np.isnan(result_max_origin), True) - self.assertEqual(np.isnan(result_min_origin), True) + self.assertEqual(bool(np.isnan(result_max)), True) + self.assertEqual(bool(np.isnan(result_min)), True) + self.assertEqual(bool(np.isnan(result_max_origin)), True) + self.assertEqual(bool(np.isnan(result_min_origin)), True) def test_get_type_name(self): name = "" -- Gitee From 29f46390676691a37aa5264215f29a50fabb3d03 Mon Sep 17 00:00:00 2001 From: gitee Date: Tue, 2 Apr 2024 17:38:55 +0800 Subject: [PATCH 23/28] fix --- .../test/ut/dump/test_api_info.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/test/ut/dump/test_api_info.py b/debug/accuracy_tools/api_accuracy_checker/test/ut/dump/test_api_info.py index b779fdee4a..2c03d56e72 100644 --- a/debug/accuracy_tools/api_accuracy_checker/test/ut/dump/test_api_info.py +++ b/debug/accuracy_tools/api_accuracy_checker/test/ut/dump/test_api_info.py @@ -84,24 +84,24 @@ class TestAPIInfo(unittest.TestCase): result_min, result_min_origin = get_tensor_extremum(data, 'min') self.assertEqual(result_max, 3) self.assertEqual(result_min, 1) - self.assertEqual(bool(np.isnan(result_max_origin)), True) - self.assertEqual(bool(np.isnan(result_min_origin)), True) + self.assertTrue(np.isnan(result_max_origin)) + self.assertTrue(np.isnan(result_min_origin)) data = torch.tensor([float("inf"), float("nan")]) result_max, result_max_origin = get_tensor_extremum(data, 'max') result_min, result_min_origin = get_tensor_extremum(data, 'min') self.assertEqual(result_max, float("inf")) self.assertEqual(result_min, float("inf")) - self.assertEqual(bool(np.isnan(result_max_origin)), True) - self.assertEqual(bool(np.isnan(result_min_origin)), True) + self.assertTrue(np.isnan(result_max_origin)) + self.assertTrue(np.isnan(result_min_origin)) data = torch.tensor([float("nan"), float("nan")]) result_max, result_max_origin = get_tensor_extremum(data, 'max') result_min, result_min_origin = get_tensor_extremum(data, 'min') - self.assertEqual(bool(np.isnan(result_max)), True) - self.assertEqual(bool(np.isnan(result_min)), True) - self.assertEqual(bool(np.isnan(result_max_origin)), True) - self.assertEqual(bool(np.isnan(result_min_origin)), True) + self.assertTrue(np.isnan(result_max)) + self.assertTrue(np.isnan(result_min)) + self.assertTrue(np.isnan(result_max_origin)) + self.assertTrue(np.isnan(result_min_origin)) def test_get_type_name(self): name = "" -- Gitee From 7bec9dbc20be4ff351cb4c0d34f1d2fd25b30a77 Mon Sep 17 00:00:00 2001 From: gitee Date: Tue, 2 Apr 2024 19:02:31 +0800 Subject: [PATCH 24/28] fix --- .../api_accuracy_checker/run_ut/data_generate.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/run_ut/data_generate.py b/debug/accuracy_tools/api_accuracy_checker/run_ut/data_generate.py index b082f6d593..8c52d62040 100644 --- a/debug/accuracy_tools/api_accuracy_checker/run_ut/data_generate.py +++ b/debug/accuracy_tools/api_accuracy_checker/run_ut/data_generate.py @@ -136,9 +136,9 @@ def gen_common_tensor(low_info, high_info, shape, data_dtype, convert_type): ori_dtype = Const.CONVERT.get(convert_type)[0] if ori_dtype == data_dtype: data_dtype = Const.CONVERT.get(convert_type)[1] + low, low_origin = low_info[0], low_info[1] + high, high_origin = high_info[0], high_info[1] if data_dtype in FLOAT_TYPE: - low, low_origin = low_info[0], low_info[1] - high, high_origin = high_info[0], high_info[1] if math.isnan(high): tensor = torch._C._VariableFunctionsClass.full(shape, float('nan'), dtype=eval(data_dtype)) return tensor -- Gitee From 4d083603d742585bcff82804b43973fc942c321c Mon Sep 17 00:00:00 2001 From: gitee Date: Wed, 3 Apr 2024 09:16:08 +0800 Subject: [PATCH 25/28] fix --- .../accuracy_tools/api_accuracy_checker/run_ut/data_generate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/run_ut/data_generate.py b/debug/accuracy_tools/api_accuracy_checker/run_ut/data_generate.py index 8c52d62040..9ff8f5ce01 100644 --- a/debug/accuracy_tools/api_accuracy_checker/run_ut/data_generate.py +++ b/debug/accuracy_tools/api_accuracy_checker/run_ut/data_generate.py @@ -155,7 +155,7 @@ def gen_common_tensor(low_info, high_info, shape, data_dtype, convert_type): scale = high_scale - low_scale rand01 = torch.rand(shape, dtype=eval(data_dtype)) - tensor = rand01 * scale + low + tensor = rand01 * scale + low_scale elif 'int' in data_dtype or 'long' in data_dtype: low, high = int(low), int(high) tensor = torch.randint(low, high + 1, shape, dtype=eval(data_dtype)) -- Gitee From b8559ff2185e02c9d53a26520b9775fc3eecc4d4 Mon Sep 17 00:00:00 2001 From: gitee Date: Wed, 3 Apr 2024 09:18:40 +0800 Subject: [PATCH 26/28] fix --- debug/accuracy_tools/api_accuracy_checker/compare/compare.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/compare/compare.py b/debug/accuracy_tools/api_accuracy_checker/compare/compare.py index 64d5dde312..cc7c2c83b7 100644 --- a/debug/accuracy_tools/api_accuracy_checker/compare/compare.py +++ b/debug/accuracy_tools/api_accuracy_checker/compare/compare.py @@ -168,7 +168,8 @@ class Comparator: else: bwd_success_status, bwd_compare_alg_results = compare_func(api_name, bench_grad, npu_grad) self.record_results(full_api_name, fwd_success_status, bwd_success_status if bwd_compare_alg_results is not None else CompareConst.SPACE, fwd_compare_alg_results, bwd_compare_alg_results) - return fwd_success_status == CompareConst.PASS, bwd_success_status == CompareConst.PASS or bwd_success_status == CompareConst.SPACE + return fwd_success_status == CompareConst.PASS, bwd_success_status == CompareConst.PASS \ + or bwd_success_status == CompareConst.SPACE def _compare_core_wrapper(self, api_name, bench_output, device_output): detailed_result_total = [] -- Gitee From 9d42abcf108de019323d89df0c643986ea62801d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=A9=AC=E5=AE=81=E6=82=A6?= Date: Wed, 3 Apr 2024 02:35:59 +0000 Subject: [PATCH 27/28] update OWNERS. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 马宁悦 --- OWNERS | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/OWNERS b/OWNERS index d609a18f96..6ad393ee68 100644 --- a/OWNERS +++ b/OWNERS @@ -11,6 +11,7 @@ approvers: - ly-qianxiao - blian - kun_8 +- binghamhuang reviewers: - leo920320 - wo-wenjie @@ -39,4 +40,5 @@ reviewers: - machj - zhengweifeng6 - gong-siwei -- uniteone \ No newline at end of file +- uniteone +- binghamhuang \ No newline at end of file -- Gitee From 37cb1b6c13bcc5bdd95a2f0178bcdf3c5c628235 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E8=8B=A5=E7=A6=B9?= Date: Wed, 3 Apr 2024 17:19:49 +0800 Subject: [PATCH 28/28] =?UTF-8?q?ULP=E8=AF=AF=E5=B7=AE=E6=9A=82=E5=AD=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../api_accuracy_checker/compare/algorithm.py | 18 ++++++++++++++++++ .../compare/api_precision_compare.py | 14 +++++++++++++- .../compare/api_precision_standard.yaml | 5 +++++ .../api_accuracy_checker/compare/compare.py | 14 ++++++++++++-- .../compare/compare_column.py | 13 ++++++++++--- .../compare/compare_utils.py | 8 +++++++- 6 files changed, 65 insertions(+), 7 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/compare/algorithm.py b/debug/accuracy_tools/api_accuracy_checker/compare/algorithm.py index 9dd204f5bc..f024a0b12d 100644 --- a/debug/accuracy_tools/api_accuracy_checker/compare/algorithm.py +++ b/debug/accuracy_tools/api_accuracy_checker/compare/algorithm.py @@ -188,3 +188,21 @@ def check_norm_value(normal_value_mask, rel_err, rtol): err_mask = np.logical_and(err_mask, normal_value_mask) err_cnt = np.sum(err_mask) return 0 if np.sum(normal_value_mask) == 0 else err_cnt / np.sum(normal_value_mask) + +def get_ULP_parameter(dtype): + if dtype == torch.float32: + return -126, 23 + if dtype == torch.float16: + return -14, 10 + if dtype == torch.bfloat16: + return -126, 7 + +def get_ULP_err(bench_output, device_output, dtype): + min_eb, exponent = get_ULP_parameter(dtype) + abs_bench = np.abs(bench_output) + eb = np.where(abs_bench == 0, 0, np.floor(np.log2(abs_bench))) + eb = np.maximum(eb, min_eb) + + ulp_err = (device_output.astype(np.float64) - bench_output).astype(np.float64) * np.exp2(-eb + exponent) + ulp_err = np.abs(ulp_err) + return ulp_err \ No newline at end of file diff --git a/debug/accuracy_tools/api_accuracy_checker/compare/api_precision_compare.py b/debug/accuracy_tools/api_accuracy_checker/compare/api_precision_compare.py index 8c98130ab6..bb96fc56fa 100644 --- a/debug/accuracy_tools/api_accuracy_checker/compare/api_precision_compare.py +++ b/debug/accuracy_tools/api_accuracy_checker/compare/api_precision_compare.py @@ -42,6 +42,10 @@ benchmark_algorithms_thresholds = { 'eb' : { 'error_threshold' : 2, 'warning_threshold' : 1 + }, + 'ULP' : { + 'error_threshold' : 2, + 'warning_threshold' : 1 } } @@ -76,11 +80,13 @@ class BenchmarkStandard: self.max_rel_err_ratio = 1 self.mean_rel_err_ratio = 1 self.eb_ratio = 1 + self.ULP_err_ratio = 1 self.small_value_err_status = CompareConst.PASS self.rmse_status = CompareConst.PASS self.max_rel_err_status = CompareConst.PASS self.mean_rel_err_status = CompareConst.PASS self.eb_status = CompareConst.PASS + self.ULP_err_status = CompareConst.PASS self.check_result_list = [] self.final_result = CompareConst.PASS @@ -97,6 +103,8 @@ class BenchmarkStandard: self.check_result_list.append(self.max_rel_err_status) self.mean_rel_err_status = self._get_status(self.mean_rel_err_ratio, 'mean_rel_err') self.check_result_list.append(self.mean_rel_err_status) + self.ULP_err_status = self._get_status(self.ULP_err_ratio, 'ULP') + self.check_result_list.append(self.ULP_err_status) self.eb_status = self._get_status(self.eb_ratio, 'eb') if CompareConst.ERROR in self.check_result_list: self.final_result = CompareConst.ERROR @@ -115,11 +123,13 @@ class BenchmarkStandard: self.gpu_precision.get(ApiPrecisionCompareColumn.MEAN_REL_ERR)) self.eb_ratio = self._calc_ratio(self.npu_precision.get(ApiPrecisionCompareColumn.EB), self.gpu_precision.get(ApiPrecisionCompareColumn.EB)) + self.ULP_err_ratio = self._calc_ratio(self.npu_precision.get(ApiPrecisionCompareColumn.ULP_ERR_RATIO), + self.gpu_precision.get(ApiPrecisionCompareColumn.ULP_ERR_RATIO)) def to_column_value(self): return [self.small_value_err_ratio, self.small_value_err_status, self.rmse_ratio, self.rmse_status, self.max_rel_err_ratio, self.max_rel_err_status, self.mean_rel_err_ratio, - self.mean_rel_err_status, self.eb_ratio, self.eb_status] + self.mean_rel_err_status, self.eb_ratio, self.eb_status, self.ULP_err_ratio, self.ULP_err_status] @staticmethod def _get_status(ratio, algorithm): @@ -334,6 +344,8 @@ def record_benchmark_compare_result(compare_column, bs): compare_column.mean_rel_err_status = bs.mean_rel_err_status compare_column.eb_ratio = bs.eb_ratio compare_column.eb_status = bs.eb_status + compare_column.ULP_err_ratio = bs.ULP_err_ratio + compare_column.ULP_err_ratio_status = bs.ULP_err_status compare_column.compare_result = bs.final_result compare_column.compare_algorithm = "标杆比对法" message = '' diff --git a/debug/accuracy_tools/api_accuracy_checker/compare/api_precision_standard.yaml b/debug/accuracy_tools/api_accuracy_checker/compare/api_precision_standard.yaml index ceccf65a46..f7b22def26 100644 --- a/debug/accuracy_tools/api_accuracy_checker/compare/api_precision_standard.yaml +++ b/debug/accuracy_tools/api_accuracy_checker/compare/api_precision_standard.yaml @@ -105,3 +105,8 @@ BinaryCompareStandard: - tril_ - triu - triu_ + + +ULPStandard: + - matmul + - __matmul__ \ No newline at end of file diff --git a/debug/accuracy_tools/api_accuracy_checker/compare/compare.py b/debug/accuracy_tools/api_accuracy_checker/compare/compare.py index cc7c2c83b7..2b82ca5b01 100644 --- a/debug/accuracy_tools/api_accuracy_checker/compare/compare.py +++ b/debug/accuracy_tools/api_accuracy_checker/compare/compare.py @@ -7,12 +7,13 @@ from rich.table import Table from rich.console import Console from api_accuracy_checker.common.utils import get_json_contents, write_csv from api_accuracy_checker.compare.compare_utils import CompareConst, check_dtype_comparable, DETAIL_TEST_ROWS, \ - precision_configs, BENCHMARK_COMPARE_SUPPORT_LIST, AbsoluteStandardApi, BinaryStandardApi, apis_threshold + precision_configs, BENCHMARK_COMPARE_SUPPORT_LIST, AbsoluteStandardApi, BinaryStandardApi, ULPStandardApi, \ + apis_threshold from api_accuracy_checker.compare.compare_column import CompareColumn from api_accuracy_checker.compare.algorithm import get_rmse, get_error_balance, get_max_rel_err, get_mean_rel_err, \ get_rel_err, get_abs_err, get_max_abs_err, get_rel_err_ratio, cosine_sim, get_rel_err_origin, \ get_small_value_err_ratio, get_finite_and_infinite_mask, get_small_value_mask, check_inf_nan_value, \ - check_small_value, check_norm_value, get_abs_bench_with_eps + check_small_value, check_norm_value, get_abs_bench_with_eps, get_ULP_err from api_accuracy_checker.common.config import msCheckerConfig from ptdbg_ascend.src.python.ptdbg_ascend.common.file_check_util import FileOpen @@ -271,6 +272,15 @@ class Comparator: if api_name in BinaryStandardApi: err_rate, _, _ = self._compare_bool_tensor(bench_output, device_output) compare_column.error_rate = err_rate + elif api_name in ULPStandardApi: + ulp_err = get_ULP_err(bench_output, device_output, dtype) + compare_column.Max_ULP_error = np.max(ulp_err) + compare_column.Min_ULP_error = np.min(ulp_err) + compare_column.Mean_ULP_error = np.mean(ulp_err) + if dtype == torch.float32: + compare_column.ULP_error_ratio = float(np.sum(ulp_err > 32) / bench_output.size) + else: + compare_column.ULP_error_ratio = float(np.sum(ulp_err > 1) / bench_output.size) elif api_name in AbsoluteStandardApi: small_value_threshold, small_value_atol, rtol = self._get_absolute_threshold_attribute( api_name, str(dtype)) diff --git a/debug/accuracy_tools/api_accuracy_checker/compare/compare_column.py b/debug/accuracy_tools/api_accuracy_checker/compare/compare_column.py index 961fce6811..7913f60730 100644 --- a/debug/accuracy_tools/api_accuracy_checker/compare/compare_column.py +++ b/debug/accuracy_tools/api_accuracy_checker/compare/compare_column.py @@ -20,12 +20,17 @@ class CompareColumn: self.inf_nan_error_ratio = CompareConst.SPACE self.rel_err_ratio = CompareConst.SPACE self.abs_err_ratio = CompareConst.SPACE + self.Max_ULP_error = CompareConst.SPACE + self.Min_ULP_error = CompareConst.SPACE + self.Mean_ULP_error = CompareConst.SPACE + self.ULP_error_ratio = CompareConst.SPACE def to_column_value(self, is_pass, message): return [self.bench_type, self.npu_type, self.shape, self.cosine_sim, self.max_abs_err, self.rel_err_hundredth, self.rel_err_thousandth, self.rel_err_ten_thousandth, self.error_rate, self.EB, self.RMSE, self.small_value_err_ratio, self.Max_rel_error, self.Mean_rel_error, self.inf_nan_error_ratio, - self.rel_err_ratio, self.abs_err_ratio, is_pass, message] + self.rel_err_ratio, self.abs_err_ratio, self.Max_ULP_error, self.Min_ULP_error, self.Mean_ULP_error, + self.ULP_error_ratio, is_pass, message] class ApiPrecisionOutputColumn: @@ -49,6 +54,8 @@ class ApiPrecisionOutputColumn: self.abs_err_ratio_status = CompareConst.SPACE self.error_rate = CompareConst.SPACE self.error_rate_status = CompareConst.SPACE + self.ULP_err_ratio = CompareConst.SPACE + self.ULP_err_ratio_status = CompareConst.SPACE self.compare_result = CompareConst.SPACE self.compare_algorithm = CompareConst.SPACE self.compare_message = CompareConst.SPACE @@ -58,6 +65,6 @@ class ApiPrecisionOutputColumn: self.rmse_status, self.max_rel_err_ratio, self.max_rel_err_status, self.mean_rel_err_ratio, self.mean_rel_err_status, self.eb_ratio, self.eb_status, self.inf_nan_error_ratio, self.inf_nan_error_ratio_status, self.rel_err_ratio, self.rel_err_ratio_status, self.abs_err_ratio, - self.abs_err_ratio_status, self.error_rate, self.error_rate_status, self.compare_result, - self.compare_algorithm, self.compare_message] + self.abs_err_ratio_status, self.error_rate, self.error_rate_status, self.ULP_err_ratio, + self.ULP_err_ratio_status, self.compare_result, self.compare_algorithm, self.compare_message] \ No newline at end of file diff --git a/debug/accuracy_tools/api_accuracy_checker/compare/compare_utils.py b/debug/accuracy_tools/api_accuracy_checker/compare/compare_utils.py index d711265cc7..8411707f6f 100644 --- a/debug/accuracy_tools/api_accuracy_checker/compare/compare_utils.py +++ b/debug/accuracy_tools/api_accuracy_checker/compare/compare_utils.py @@ -21,6 +21,7 @@ with FileOpen(standard_yaml_path, 'r') as f: Apis = yaml.safe_load(f) AbsoluteStandardApi = Apis.get('AbsoluteThreshStandard') BinaryStandardApi = Apis.get('BinaryCompareStandard') + ULPStandardApi = Apis.get('ULPStandard') threshold_yaml_path = os.path.join(cur_path, "api_precision_threshold.yaml") @@ -118,6 +119,9 @@ class ApiPrecisionCompareColumn: REL_ERR_RATIO_STATUS = '相对误差判定结果' ABS_ERR_RATIO = '绝对误差错误率' ABS_ERR_RATIO_STATUS = '绝对误差判定结果' + MEAN_ULP_ERR = 'ULP误差平均值' + ULP_ERR_RATIO = 'ULP误差大于阈值占比比值' + ULP_ERR_RATIO_STATUS = 'ULP误差判定结果' FINAL_RESULT = '比对结果' ALGORITHM = '比对算法' FORWWARD_STATUS = 'Forward Test Success' @@ -130,7 +134,8 @@ class ApiPrecisionCompareColumn: ApiPrecisionCompareColumn.SMALL_VALUE_ERROR_RATE, ApiPrecisionCompareColumn.RMSE, ApiPrecisionCompareColumn.MAX_REL_ERR, ApiPrecisionCompareColumn.MEAN_REL_ERR, ApiPrecisionCompareColumn.EB, ApiPrecisionCompareColumn.ERROR_RATE, ApiPrecisionCompareColumn.INF_NAN_ERROR_RATIO, - ApiPrecisionCompareColumn.REL_ERR_RATIO, ApiPrecisionCompareColumn.ABS_ERR_RATIO] + ApiPrecisionCompareColumn.REL_ERR_RATIO, ApiPrecisionCompareColumn.ABS_ERR_RATIO, + ApiPrecisionCompareColumn.MEAN_REL_ERR, ApiPrecisionCompareColumn.ULP_ERR_RATIO] @staticmethod def get_detail_csv_title(): @@ -144,6 +149,7 @@ class ApiPrecisionCompareColumn: ApiPrecisionCompareColumn.REL_ERR_RATIO, ApiPrecisionCompareColumn.REL_ERR_RATIO_STATUS, ApiPrecisionCompareColumn.ABS_ERR_RATIO, ApiPrecisionCompareColumn.ABS_ERR_RATIO_STATUS, ApiPrecisionCompareColumn.ERROR_RATE, ApiPrecisionCompareColumn.ERROR_RATE_STATUS, + ApiPrecisionCompareColumn.ULP_ERR_RATIO, ApiPrecisionCompareColumn.ULP_ERR_RATIO_STATUS, ApiPrecisionCompareColumn.FINAL_RESULT, ApiPrecisionCompareColumn.ALGORITHM, ApiPrecisionCompareColumn.MESSAGE] @staticmethod -- Gitee