diff --git a/AscendIE/TorchAIE/built-in/nlp/GPT2_Chinese/compare_loss.py b/AscendIE/TorchAIE/built-in/nlp/GPT2_Chinese/compare_loss.py index 980822bc2bfe6099ba87bd87a7d9bc0c768bc3d2..c8d61ebc009dd869795d36a25544adc210c671e8 100644 --- a/AscendIE/TorchAIE/built-in/nlp/GPT2_Chinese/compare_loss.py +++ b/AscendIE/TorchAIE/built-in/nlp/GPT2_Chinese/compare_loss.py @@ -18,6 +18,7 @@ import random import transformers import torch import torch_npu +import torch_aie import numpy as np import argparse import time @@ -31,10 +32,9 @@ def main(): parser = argparse.ArgumentParser() parser.add_argument('--device', default=0, type=int, required=False, help='npu device id') - parser.add_argument('--tokenized_data_path', default='data/tokenized_eval/', type=str, required=False, help='tokenized语料存放位置') - parser.add_argument('--batch_size', default=4, type=int, + parser.add_argument('--batch_size', default=1, type=int, required=False, help='batch size') parser.add_argument('--log_step', default=100, type=int, required=False, help='多少步汇报一次') @@ -52,7 +52,6 @@ def main(): log_step = args.log_step stride = args.stride num_pieces = args.num_pieces - output_dir = args.output_dir n_ctx = args.n_ctx torch.npu.set_device(device_id) @@ -98,7 +97,8 @@ def main(): start = time.time() output = aie_model(inputs_npu) torch.npu.synchronize() - modeltime.append(time.time() - start) + end = time.time() + modeltime.append(end - start) lm_logits = output.cpu() # get loss shift_logits = lm_logits[..., :-1, :].contiguous().float() @@ -113,15 +113,16 @@ def main(): total_steps += 1 if total_steps % log_step == 0: - print('[INFO] Step {} of piece {}, ppl {}'.format( + print('[INFO] Step {} of piece {}, ppl {}, step time {}.'.format( (step + 1), piece_num, - torch.exp(loss))) + torch.exp(loss), + end - start)) piece_num += 1 - print("PPL = {}.".format(np.exp(total_loss.detach().numpy() / total_steps))) print("BatchSize = {}, QPS = {}.".format(batch_size, batch_size * len(modeltime) / sum(modeltime))) + print("PPL = {}.".format(np.exp(total_loss.detach().numpy() / total_steps))) if __name__ == '__main__': diff --git a/AscendIE/TorchAIE/built-in/nlp/GPT2_Chinese/pre_data.py b/AscendIE/TorchAIE/built-in/nlp/GPT2_Chinese/pre_data.py index f5efc312deac22933201fb6b3367d50f887f00f5..e8b01620a431d19f9b1d671473caa3d4edf734de 100644 --- a/AscendIE/TorchAIE/built-in/nlp/GPT2_Chinese/pre_data.py +++ b/AscendIE/TorchAIE/built-in/nlp/GPT2_Chinese/pre_data.py @@ -1,4 +1,4 @@ -# Copyright 2021 Huawei Technologies Co., Ltd +# Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/AscendIE/TorchAIE/built-in/nlp/GPT2_Chinese/readme.md b/AscendIE/TorchAIE/built-in/nlp/GPT2_Chinese/readme.md index ae80aa2ccd860114a6e7659ffd660eccd59cf5cc..8a83fa73b94b8804199c41980ce72160c6e82536 100644 --- a/AscendIE/TorchAIE/built-in/nlp/GPT2_Chinese/readme.md +++ b/AscendIE/TorchAIE/built-in/nlp/GPT2_Chinese/readme.md @@ -31,7 +31,6 @@ GPT-2 模型只使用了多个Masked Self-Attention和Feed Forward Neural Networ ``` url=https://github.com/Morizeyao/GPT2-Chinese commit_id=bbb44651be8361faef35d2a857451d231b5ebe14 - model_name=ACL_PyTorch/built-in/nlp/GPT2_for_Pytorch ``` > 说明:所有脚本都在GPT2的仓下运行 @@ -62,11 +61,12 @@ GPT-2 模型只使用了多个Masked Self-Attention和Feed Forward Neural Networ | 配套 | 版本 | 环境准备指导 | | ------------------------------------------------------------ | ------- | ------------------------------------------------------------ | - | 固件与驱动 | 23.0.RC3 | - | CANN | 7.0.RC1 | - | - | Python | 3.9.0 | - | - | PyTorch | 2.0.1 | - | - + | 固件与驱动 | 23.0.RC3 | - | + | CANN | 7.0.RC1 | - | + | Python | 3.9.0 | - | + | PyTorch | 2.0.1 | - | + | AscendIE | 6.3.RC2 | - | + | Torch AIE | 6.3.RC2 | - | # 快速上手 @@ -81,15 +81,32 @@ GPT-2 模型只使用了多个Masked Self-Attention和Feed Forward Neural Networ git reset --hard bbb44651be8361faef35d2a857451d231b5ebe14 ``` -2. 安装依赖。 +2. 获取模型checkpoint文件和配置文件 + + 在模型根目录GPT2-Chinese下创建model文件夹。 + + 从[这里](https://pan.baidu.com/s/16x0hfBCekWju75xPeyyRfA#list/path=%2F)下载配置文件,提取码`n3s8`,并把`pytorch_model.bin`放到`model`文件夹下,vocab.txt和config.json文件放到模型根目录GPT2-Chinese下。 + +3. 将bin2pth.py、pre_data.py、compare_loss.py、requirement.txt拷贝到源码根目录下,并安装依赖 ``` pip3 install -r requirement.txt ``` -3. 获取配置文件 - - 从[这里](https://pan.baidu.com/s/16x0hfBCekWju75xPeyyRfA#list/path=%2F)下载配置文件,提取码`n3s8`,并把`pytorch_model.bin`放到`model`下面 +4. 修改源码使模型仅返回lm_logits + 查看transformers的安装路径: + ``` + pip3 show transformers + ``` + 根据Location位置,修改源码第549行: + ``` + vim ${Location}/transformers/modeling_gpt2.py + ``` + + 改为: + ``` + return lm_logits.to(torch.half) + ``` ## 准备数据集 @@ -115,30 +132,33 @@ GPT-2 模型只使用了多个Masked Self-Attention和Feed Forward Neural Networ 使用torch aie将模型权重文件pytorch_model.bin转换为.pt文件。 ``` + export ASCENDIE_FASTER_MODE=1 python3 bin2pth.py --batch_size=1 ``` - 如果环境为第一次运行,可尝试使用aoe进行调优,参考如下: + + 其中设置ASCENDIE_FASTER_MODE=1,是为了使用FastGelu算子,提升性能。 + + 如果环境为第一次运行该模型或者实测性能与下方表格数据差距较大,可尝试使用aoe进行调优,参考如下: ``` python3 bin2pth.py --batch_size=1 --optimization_level=1 python3 bin2pth.py --batch_size=1 --optimization_level=2 - ``` + ``` 2. 开始推理验证。 ``` + export TORCH_AIE_NPU_CACHE_MAX_SIZE=8 python3 compare_loss.py --batch_size=1 ``` # 模型推理性能&精度 -调用ACL接口推理计算,性能参考下列数据。 - | 芯片型号 | Batch Size | 数据集 | 精度指标(Loss)| 性能 | | :------: | :--------: | :----: | :--: | :--: | -| 310P3 | 1 | wiki_zh_2019 | 16.5 | 149 | -| 310P3 | 4 | wiki_zh_2019 | 16.5 | 188 | -| 310P3 | 8 | wiki_zh_2019 | 16.5 | 189 | -| 310P3 | 16 | wiki_zh_2019 | 16.5 | 189 | -| 310P3 | 32 | wiki_zh_2019 | 16.5 | 185 | -| 310P3 | 64 | wiki_zh_2019 | 16.5 | 181 | +| 310P3 | 1 | wiki_zh_2019 | 15.6 | 125 | +| 310P3 | 4 | wiki_zh_2019 | 15.6 | 128 | +| 310P3 | 8 | wiki_zh_2019 | 15.7 | 116 | +| 310P3 | 16 | wiki_zh_2019 | 15.7 | 123 | +| 310P3 | 32 | wiki_zh_2019 | 15.7 | 119 | +| 310P3 | 64 | wiki_zh_2019 | 15.8 | 115 | > 注:衡量精度的指标为验证集平均交叉熵损失(Cross-Entropy Loss),数值越低越好。 \ No newline at end of file diff --git a/AscendIE/TorchAIE/built-in/nlp/GPT2_Chinese/requirement.txt b/AscendIE/TorchAIE/built-in/nlp/GPT2_Chinese/requirement.txt index 92089891c1920a5fa9b3157d714d7f143e164cbd..707991807157c71fba2349f43c34dd6780811e86 100644 --- a/AscendIE/TorchAIE/built-in/nlp/GPT2_Chinese/requirement.txt +++ b/AscendIE/TorchAIE/built-in/nlp/GPT2_Chinese/requirement.txt @@ -1,2 +1,3 @@ -transformers==2.5.1 -numpy==1.21.6 \ No newline at end of file +transformers==2.1.1 +numpy==1.21.6 +tqdm