From eea9073d1216984d23731dce5222bb2abc1b5621 Mon Sep 17 00:00:00 2001 From: eason_hw <94364678@qq.com> Date: Mon, 11 Apr 2022 18:19:22 +0800 Subject: [PATCH 1/4] my first commit --- .../contrib/nlp/Transformerxl_large/LICENSE | 204 +++++ .../contrib/nlp/Transformerxl_large/README.md | 68 ++ .../nlp/Transformerxl_large/eval_onnx.py | 226 +++++ .../nlp/Transformerxl_large/fix_int64.py | 85 ++ .../nlp/Transformerxl_large/fusion_switch.cfg | 2 + .../nlp/Transformerxl_large/getdata.sh | 18 + .../Transformerxl_large/modelzoo_levle.txt | 0 .../nlp/Transformerxl_large/perf_gpu.sh | 6 + .../nlp/Transformerxl_large/requirements.txt | 0 .../nlp/Transformerxl_large/sample.patch | 833 ++++++++++++++++++ .../nlp/Transformerxl_large/test/env.sh | 7 + .../Transformerxl_large/test/eval_acc_perf.sh | 1 + .../nlp/Transformerxl_large/test/pth2om.sh | 5 + .../transformerxl_large_postprocess.py | 37 + .../transformerxl_large_preprocess.py | 71 ++ .../transformerxl_large_pth2onnx.py | 79 ++ 16 files changed, 1642 insertions(+) create mode 100644 ACL_PyTorch/contrib/nlp/Transformerxl_large/LICENSE create mode 100644 ACL_PyTorch/contrib/nlp/Transformerxl_large/README.md create mode 100644 ACL_PyTorch/contrib/nlp/Transformerxl_large/eval_onnx.py create mode 100644 ACL_PyTorch/contrib/nlp/Transformerxl_large/fix_int64.py create mode 100644 ACL_PyTorch/contrib/nlp/Transformerxl_large/fusion_switch.cfg create mode 100644 ACL_PyTorch/contrib/nlp/Transformerxl_large/getdata.sh create mode 100644 ACL_PyTorch/contrib/nlp/Transformerxl_large/modelzoo_levle.txt create mode 100644 ACL_PyTorch/contrib/nlp/Transformerxl_large/perf_gpu.sh create mode 100644 ACL_PyTorch/contrib/nlp/Transformerxl_large/requirements.txt create mode 100644 ACL_PyTorch/contrib/nlp/Transformerxl_large/sample.patch create mode 100644 ACL_PyTorch/contrib/nlp/Transformerxl_large/test/env.sh create mode 100644 ACL_PyTorch/contrib/nlp/Transformerxl_large/test/eval_acc_perf.sh create mode 100644 ACL_PyTorch/contrib/nlp/Transformerxl_large/test/pth2om.sh create mode 100644 ACL_PyTorch/contrib/nlp/Transformerxl_large/transformerxl_large_postprocess.py create mode 100644 ACL_PyTorch/contrib/nlp/Transformerxl_large/transformerxl_large_preprocess.py create mode 100644 ACL_PyTorch/contrib/nlp/Transformerxl_large/transformerxl_large_pth2onnx.py diff --git a/ACL_PyTorch/contrib/nlp/Transformerxl_large/LICENSE b/ACL_PyTorch/contrib/nlp/Transformerxl_large/LICENSE new file mode 100644 index 0000000000..2f452296d7 --- /dev/null +++ b/ACL_PyTorch/contrib/nlp/Transformerxl_large/LICENSE @@ -0,0 +1,204 @@ +Copyright 2018-2019 Open-MMLab. All rights reserved. +Copyright 2022 Huawei Technologies Co., Ltd + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2018-2019 Open-MMLab. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/ACL_PyTorch/contrib/nlp/Transformerxl_large/README.md b/ACL_PyTorch/contrib/nlp/Transformerxl_large/README.md new file mode 100644 index 0000000000..cfc18258ff --- /dev/null +++ b/ACL_PyTorch/contrib/nlp/Transformerxl_large/README.md @@ -0,0 +1,68 @@ +## 环境准备 +1. 安装必要的依赖 +``` +pip3.7 install -r requirements.txt +``` +2. 获取,修改与安装开源模型代码 +``` +git clone https://github.com/kimiyoung/transformer-xl.git # 下载原模型 +git clone -b TransformerXL_large https://gitee.com/eason-hw/ModelZoo-PyTorch.git # 下载开源仓代码 +cp -r ModelZoo-PyTorch/ACL_PyTorch/contrib/nlp/TransformerXL_large/. transformer-xl/pytorch/ # 开源仓代码拷入原模型代码中 +cd transformer-xl/pytorch/ # 切换至工作目录 +patch -p1 < sample.patch # 如有提示"File to patch:" 需输入对应patch的文件名 +``` +3.获取权重文件 +[model.pt]() + +4.获取数据集 +``` +bash getdata.sh # enwik8数据集在data/enwik8下,包含处理好的train.txt,valid.txt和test.txt +``` +5.获取安装msame工具 +(如下为设置环境变量的示例,请将两个环境变量的/usr/local/Ascend/ascend-toolkit/latest替换为Ascend 的ACLlib安装包的实际安装路径。) +``` +git clone https://gitee.com/ascend/tools.git +bash test/env.sh +export DDK_PATH=/usr/local/Ascend/ascend-toolkit/latest +export NPU_HOST_LIB=/usr/local/Ascend/ascend-toolkit/latest/acllib/lib64/stub +cd tools/msame/ +./build.sh g++ tools/msame/out # 指定编译后的保存地址,最好给完整目录,没有权限的话需要chmod更改build.sh的权限为777 +``` + +## 离线推理 +310上执行时使用npu-smi info 查看设备状态,确保device空闲 +1. pth转om +``` +# 需要两个参数,一个是pth的路径,一个是batch_size +bash test/pth2om.sh ${pth路径} ${batch_size} +``` +2. 数据集前处理 +``` +python3 transformerxl_large_preprocess.py --batch_size=1 --data=/data/enwik8 +``` +3.使用msame推理 +``` +# 切换至编译后的目录,--model给出om文件完整目录,--input给出输入的bin文件目录,--output给出om推理保存位置 +cd out +./msame --model ${om文件路径} --input ${ModelZoo-PyTorch/ACL_PyTorch/contrib/nlp/TransformerXL_large/bin_data,ModelZoo-PyTorch/ACL_PyTorch/contrib/nlp/TransformerXL_large/bin_target} --output ${ModelZoo-PyTorch/ACL_PyTorch/contrib/nlp/TransformerXL_large/tools/msame/out/} --outfmt TXT +``` +4. 数据集后处理 +``` +# 切换回至工作目录 +cd ${ModelZoo-PyTorch/ACL_PyTorch/contrib/nlp/TransformerXL_large/} +bash test/eval_acc_perf.sh ${om_out_path} ${target_bin} # om_out_path路径,target_bin路径 +``` +## 评测结果: +性能 +| Batch Size | A310 Throughput/Card | T4 Throughput/Card| +| ----------- | -------------------- | ---------------- | +| 1 | 6.46 | 44.1 | +| 4 | 23.67 | 49.8 | +| 8 | 41.83 | 51.9 | +| 16 | 54.98 | 49.3 | + +精度 +| A310 | T4 | +| --------- | -------- | +| bpc=5.37 | bpc=5.39 | + diff --git a/ACL_PyTorch/contrib/nlp/Transformerxl_large/eval_onnx.py b/ACL_PyTorch/contrib/nlp/Transformerxl_large/eval_onnx.py new file mode 100644 index 0000000000..89feb646aa --- /dev/null +++ b/ACL_PyTorch/contrib/nlp/Transformerxl_large/eval_onnx.py @@ -0,0 +1,226 @@ +# coding: utf-8 +import argparse +import time +import math +import os, sys + +import torch +import onnx +import onnxruntime +from data_utils import get_lm_corpus +from mem_transformer import MemTransformerLM +from utils.exp_utils import get_logger + +class ONNXModel(): + def __init__(self, onnx_path): + """ + :param onnx_path: + """ + self.onnx_session = onnxruntime.InferenceSession(onnx_path) + self.input_name = self.get_input_name(self.onnx_session) + self.output_name = self.get_output_name(self.onnx_session) + print("input_name:{}".format(self.input_name)) + print("output_name:{}".format(self.output_name)) + + def get_output_name(self, onnx_session): + """ + output_name = onnx_session.get_outputs()[0].name + :param onnx_session: + :return: + """ + output_name = [] + for node in onnx_session.get_outputs(): + output_name.append(node.name) + return output_name + + def get_input_name(self, onnx_session): + """ + input_name = onnx_session.get_inputs()[0].name + :param onnx_session: + :return: + """ + input_name = [] + for node in onnx_session.get_inputs(): + input_name.append(node.name) + return input_name + + def get_input_feed(self, input_name, data, target, mems): + """ + input_feed={self.input_name: image_tensor} + :param input_name: + :param image_tensor: + :return: + """ + input_feed = {} + + if mems == None or mems == []: + + for i, name in enumerate(input_name): + if i == 0: + input_feed[name] = data.numpy() + else: + input_feed[name] = target.numpy() + else: + print('mems in not None') + + return input_feed + + def forward(self, data, target, mems): + ''' + image_tensor = image.transpose(2, 0, 1) + image_tensor = image_tensor[np.newaxis, :] + onnx_session.run([output_name], {input_name: x}) + :param image_tensor: + :return: + ''' + # 输入数据的类型必须与模型一致,以下三种写法都是可以的 + # scores, boxes = self.onnx_session.run(None, {self.input_name: image_tensor}) + # scores, boxes = self.onnx_session.run(self.output_name, input_feed={self.input_name: image_tensor}) + input_feed = self.get_input_feed(self.input_name, data, target, mems) + #print(input_feed['data'].shape, input_feed['target'].shape) + ret = self.onnx_session.run(self.output_name, input_feed=input_feed) + return ret + + +parser = argparse.ArgumentParser(description='PyTorch Transformer Language Model') +parser.add_argument('--data', type=str, default='../data/enwik8', + help='location of the data corpus') +parser.add_argument('--dataset', type=str, default='enwik8', + choices=['wt103', 'lm1b', 'enwik8', 'text8'], + help='dataset name') +parser.add_argument('--split', type=str, default='all', + choices=['all', 'valid', 'test'], + help='which split to evaluate') +parser.add_argument('--batch_size', type=int, default=1, + help='batch size') +parser.add_argument('--tgt_len', type=int, default=128, + help='number of tokens to predict') +parser.add_argument('--ext_len', type=int, default=0, + help='length of the extended context') +parser.add_argument('--mem_len', type=int, default=128, + help='length of the retained previous heads') +parser.add_argument('--clamp_len', type=int, default=-1, + help='max positional embedding index') +parser.add_argument('--cuda', action='store_true', default=False, + help='use CUDA') +parser.add_argument('--work_dir', type=str, default='./LM-TFM-enwik8/20211216-100033', + help='path to the work_dir') +parser.add_argument('--no_log', action='store_true', + help='do not log the eval result') +parser.add_argument('--same_length', action='store_true', + help='set same length attention with masking') +args = parser.parse_args() +assert args.ext_len >= 0, 'extended context length must be non-negative' + +device = torch.device("cuda" if args.cuda else "cpu") + +# Get logger +logging = get_logger(os.path.join(args.work_dir, 'log.txt'), + log_=not args.no_log) + +# Load dataset +corpus = get_lm_corpus(args.data, args.dataset) +ntokens = len(corpus.vocab) + +va_iter = corpus.get_iterator('valid', args.batch_size, args.tgt_len, + device=device, ext_len=args.ext_len) +te_iter = corpus.get_iterator('test', args.batch_size, args.tgt_len, + device=device, ext_len=args.ext_len) + +# Load the best saved model. +# with open(os.path.join(args.work_dir, 'model.pt'), 'rb') as f: +# model = torch.load(f, map_location=device) +# model.backward_compatible() +# model = model.to(device) + +logging('Evaluating with bsz {} tgt_len {} ext_len {} mem_len {} clamp_len {}'.format( + args.batch_size, args.tgt_len, args.ext_len, args.mem_len, args.clamp_len)) + +# model.reset_length(args.tgt_len, args.ext_len, args.mem_len) +# if args.clamp_len > 0: +# model.clamp_len = args.clamp_len +# if args.same_length: +# model.same_length = True + +############################################################################### +# Evaluation code +############################################################################### +def evaluate(eval_iter): + # Turn on evaluation mode which disables dropout. + model.eval() + total_len, total_loss = 0, 0. + start_time = time.time() + with torch.no_grad(): + mems = tuple() + for idx, (data, target, seq_len) in enumerate(eval_iter): + ret = model(data, target, *mems) + loss, mems = ret[0], ret[1:] + loss = loss.mean() + total_loss += seq_len * loss.item() + total_len += seq_len + if idx <= 10: + print('batch {} ==========>loss_model: {}'.format(idx,loss)) + else: + break + total_time = time.time() - start_time + logging('Time : {:.2f}s, {:.2f}ms/segment'.format( + total_time, 1000 * total_time / (idx+1))) + return total_loss / total_len + +def evaluate_onnx(eval_iter): + onnx_path = 'model.onnx' + onnx_model = ONNXModel(onnx_path) + total_len, total_loss = 0, 0. + start_time = time.time() + + #mems = list() + #for i in range(25): + #mems.append(torch.zeros(args.mem_len, args.batch_size, 1024).to(device)) + mems = None + with torch.no_grad(): + for idx, (data, target, seq_len) in enumerate(eval_iter): + ts = time.perf_counter() + ret = onnx_model.forward(data,target, mems) + loss, mems = ret[0], ret[1:] + loss = loss.mean() + total_loss += seq_len * loss.item() + total_len += seq_len + # if idx <= 10: + print('batch {} use time {:.2f}ms ==========>loss_onnx: {:.4f}'.format(idx,(time.perf_counter()-ts)*1000,loss)) + # else: + # break + total_time = time.time() - start_time + logging('Time : {:.2f}s, {:.2f}ms/segment'.format( + total_time, 1000 * total_time / (idx+1))) + return total_loss / total_len + + +# Run on test data. +if args.split == 'all': + test_loss = evaluate(te_iter) + valid_loss = evaluate(va_iter) +elif args.split == 'valid': + valid_loss = evaluate_onnx(va_iter) + test_loss = None +elif args.split == 'test': + test_loss = evaluate_onnx(te_iter) + valid_loss = None + +def format_log(loss, split): + if args.dataset in ['enwik8', 'text8']: + log_str = '| {0} loss {1:5.2f} | {0} bpc {2:9.5f} '.format( + split, loss, loss / math.log(2)) + else: + log_str = '| {0} loss {1:5.2f} | {0} ppl {2:9.3f} '.format( + split, loss, math.exp(loss)) + return log_str + +log_str = '' +if valid_loss is not None: + log_str += format_log(valid_loss, 'valid') +if test_loss is not None: + log_str += format_log(test_loss, 'test') + +logging('=' * 100) +logging(log_str) +logging('=' * 100) diff --git a/ACL_PyTorch/contrib/nlp/Transformerxl_large/fix_int64.py b/ACL_PyTorch/contrib/nlp/Transformerxl_large/fix_int64.py new file mode 100644 index 0000000000..a1e2bf8f20 --- /dev/null +++ b/ACL_PyTorch/contrib/nlp/Transformerxl_large/fix_int64.py @@ -0,0 +1,85 @@ +import sys +import numpy as np +from magiconnx import OnnxGraph + + +INT64 = 7 +INT32 = 6 +MAXINT32 = 2147483647 +MININT32 = -2147483648 + + +def insert_cast_node(graph, before_node, node_name, dtype=6): + cast_node = graph.add_node( + node_name, + 'Cast', + {'to': dtype} + ) + graph.insert_node(before_node, cast_node, mode='after') + + +def insert_cast_after_shape(graph): + shape_nodes = graph.get_nodes("Shape") + for node in shape_nodes: + node_name = node.name + insert_name = 'expand_after_{}'.format(node_name) + insert_cast_node(graph, node_name, insert_name) + + +def transfer_constantofshape(graph): + constant_nodes = graph.get_nodes("ConstantOfShape") + for node in constant_nodes: + try: + if node.attrs['value'].data_type == INT64: + node.attrs['value'].data_type = INT32 + except: + if node.attrs['value'].t.data_type == INT64: + node.attrs['value'].t.data_type = INT32 + + +def value_to_int32(node): + node_value = node.value.copy() + if (node_value > MAXINT32).any(): + node_value[node_value>MAXINT32] = MAXINT32 + if (node_value < MININT32).any(): + node_value[node_value onnx_gpu_perf.log 2>&1 & diff --git a/ACL_PyTorch/contrib/nlp/Transformerxl_large/requirements.txt b/ACL_PyTorch/contrib/nlp/Transformerxl_large/requirements.txt new file mode 100644 index 0000000000..e69de29bb2 diff --git a/ACL_PyTorch/contrib/nlp/Transformerxl_large/sample.patch b/ACL_PyTorch/contrib/nlp/Transformerxl_large/sample.patch new file mode 100644 index 0000000000..f2ba45a9c6 --- /dev/null +++ b/ACL_PyTorch/contrib/nlp/Transformerxl_large/sample.patch @@ -0,0 +1,833 @@ +diff --git a/pytorch/data_utils.py b/pytorch/data_utils.py +index df762a7..1642018 100644 +--- a/pytorch/data_utils.py ++++ b/pytorch/data_utils.py +@@ -1,12 +1,11 @@ + import os, sys + import glob +- +-from collections import Counter, OrderedDict + import numpy as np + import torch + + from utils.vocabulary import Vocab + ++ + class LMOrderedIterator(object): + def __init__(self, data, bsz, bptt, device='cpu', ext_len=None): + """ +@@ -95,8 +94,6 @@ class LMShuffledIterator(object): + n_retain = 0 + + while True: +- # data : [n_retain+bptt x bsz] +- # target : [bptt x bsz] + data[n_retain:].fill_(-1) + target.fill_(-1) + +@@ -214,7 +211,7 @@ class Corpus(object): + self.valid = self.vocab.encode_file( + os.path.join(path, 'valid.txt'), ordered=False, add_double_eos=True) + self.test = self.vocab.encode_file( +- os.path.join(path, 'test.txt'), ordered=False, add_double_eos=True) ++ os.path.join(path, 'test.py.txt'), ordered=False, add_double_eos=True) + + def get_iterator(self, split, *args, **kwargs): + if split == 'train': +@@ -223,7 +220,7 @@ class Corpus(object): + elif self.dataset == 'lm1b': + kwargs['shuffle'] = True + data_iter = LMMultiFileIterator(self.train, self.vocab, *args, **kwargs) +- elif split in ['valid', 'test']: ++ elif split in ['valid', 'test', 'onnx']: + data = self.valid if split == 'valid' else self.test + if self.dataset in ['ptb', 'wt2', 'wt103', 'enwik8', 'text8']: + data_iter = LMOrderedIterator(data, *args, **kwargs) +@@ -259,12 +256,13 @@ def get_lm_corpus(datadir, dataset): + + return corpus + ++ + if __name__ == '__main__': + import argparse +- parser = argparse.ArgumentParser(description='unit test') +- parser.add_argument('--datadir', type=str, default='../data/text8', ++ parser = argparse.ArgumentParser(description='unit test.py') ++ parser.add_argument('--datadir', type=str, default='../data/enwik8', + help='location of the data corpus') +- parser.add_argument('--dataset', type=str, default='text8', ++ parser.add_argument('--dataset', type=str, default='enwik8', + choices=['ptb', 'wt2', 'wt103', 'lm1b', 'enwik8', 'text8'], + help='dataset name') + args = parser.parse_args() +diff --git a/pytorch/mem_transformer.py b/pytorch/mem_transformer.py +index 45147df..baf1bc0 100644 +--- a/pytorch/mem_transformer.py ++++ b/pytorch/mem_transformer.py +@@ -9,22 +9,126 @@ import torch.nn as nn + import torch.nn.functional as F + + sys.path.append('utils') +-from proj_adaptive_softmax import ProjectedAdaptiveLogSoftmax +-from log_uniform_sampler import LogUniformSampler, sample_logits ++from utils.proj_adaptive_softmax import ProjectedAdaptiveLogSoftmax ++from utils.log_uniform_sampler import LogUniformSampler, sample_logits ++import pdb ++import traceback ++pdb.set_trace = lambda:1 ++ ++ ++def einsum1(eq, opand): ++ """ ++ ibnd,jbnd->ijbn ++ """ ++ tmp = torch.einsum(eq, opand) ++ if not torch.onnx.is_in_onnx_export(): ++ return tmp ++ try: ++ x0, y0 = opand ++ assert len(x0.shape) == 4 ++ assert x0.shape[1:] == y0.shape[1:], "bad shape {}, {}".format(x0.shape, y0.shape) ++ i, b, n, d = x0.shape ++ j, _, _, _ = y0.shape ++ x = x0.clone() ++ y = y0.clone() ++ x = x.reshape(i, b * n, d).permute(1, 0, 2) ++ y = y.reshape(j, b * n, d).permute(1, 2, 0) ++ z = torch.bmm(x, y) ++ z = z.permute(1, 2, 0).reshape(i, j, b, n) ++ assert tmp.equal(z) ++ return z ++ except Exception as e: ++ #print('str(e):\t\t', str(e)) ++ #print('repr(e):\t', repr(e)) ++ #print('traceback.print_exc():', traceback.print_exc()) ++ #print('traceback.format_exc():\n%s' % traceback.format_exc()) ++ pdb.set_trace() ++ ++ ++def einsum2(eq, opand): ++ """ ++ ibnd,jnd->ijbn ++ """ ++ tmp = torch.einsum(eq, opand) ++ if not torch.onnx.is_in_onnx_export(): ++ return tmp ++ try: ++ x0, y0 = opand ++ assert len(y0.shape) == 3 ++ i, b, n, d = x0.shape ++ j, _, _ = y0.shape ++ x = x0.clone() ++ y = y0.clone() ++ y = y.unsqueeze(1).expand(j, b, n, d) ++ x = x.reshape(i, b * n, d).permute(1, 0, 2) ++ y = y.reshape(j, b * n, d).permute(1, 2, 0) ++ z = torch.bmm(x, y) ++ z = z.permute(1, 2, 0).reshape(i, j, b, n) ++ assert tmp.equal(z) ++ return z ++ except: ++ pdb.set_trace() ++ ++ ++def einsum3(eq, opand): ++ """ ++ ijbn,jbnd->ibnd ++ """ ++ tmp = torch.einsum(eq, opand) ++ if not torch.onnx.is_in_onnx_export(): ++ return tmp ++ try: ++ x0, y0 = opand ++ assert len(x0.shape) == 4 ++ assert x0.shape[2:] == y0.shape[1:3], "bad shape {}, {}".format(x0.shape, y0.shape) ++ i, j, b, n = x0.shape ++ j, _, _, d = y0.shape ++ x = x0.clone() ++ y = y0.clone() ++ x = x.reshape(i, j, b * n).permute(2, 0, 1) ++ y = y.reshape(j, b * n, d).permute(1, 0, 2) ++ z = torch.bmm(x, y) ++ z = z.permute(1, 0, 2).reshape(i, b, n, d) ++ assert tmp.equal(z) ++ return z ++ except: ++ pdb.set_trace() ++ ++ ++def triu_onnx(x, diagonal=0): ++ assert len(x.shape) == 2 ++ m, l = x.shape ++ mask = torch.arange(l, device=x.device).expand(m, l) ++ arange = torch.arange(m, device=x.device) ++ arange = arange.unsqueeze(-1) ++ if diagonal: ++ arange = arange + diagonal ++ mask = mask >= arange ++ return x.masked_fill(mask==0, 0) ++ ++ ++def tril_onnx(x, diagonal=0): ++ return x - triu_onnx(x, diagonal) ++ ++ ++torch.triu = triu_onnx ++torch.tril = tril_onnx ++ ++ ++ + + class PositionalEmbedding(nn.Module): + def __init__(self, demb): + super(PositionalEmbedding, self).__init__() +- ++ + self.demb = demb +- + inv_freq = 1 / (10000 ** (torch.arange(0.0, demb, 2.0) / demb)) + self.register_buffer('inv_freq', inv_freq) + + def forward(self, pos_seq, bsz=None): ++ # print(torch.cuda.synchronize(), "打点1") + sinusoid_inp = torch.ger(pos_seq, self.inv_freq) + pos_emb = torch.cat([sinusoid_inp.sin(), sinusoid_inp.cos()], dim=-1) +- + if bsz is not None: + return pos_emb[:,None,:].expand(-1, bsz, -1) + else: +@@ -110,20 +214,20 @@ class MultiHeadAttn(nn.Module): + head_v = head_v.view(c.size(0), c.size(1), self.n_head, self.d_head) + + # [qlen x klen x bsz x n_head] +- attn_score = torch.einsum('ibnd,jbnd->ijbn', (head_q, head_k)) ++ attn_score = einsum1('ibnd,jbnd->ijbn', (head_q, head_k)) + attn_score.mul_(self.scale) + if attn_mask is not None and attn_mask.any().item(): + if attn_mask.dim() == 2: +- attn_score.masked_fill_(attn_mask[None,:,:,None], -float('inf')) ++ attn_score.masked_fill_(attn_mask[None,:,:,None].bool(), -float('inf')) + elif attn_mask.dim() == 3: +- attn_score.masked_fill_(attn_mask[:,:,:,None], -float('inf')) ++ attn_score.masked_fill_(attn_mask[:,:,:,None].bool(), -float('inf')) + + # [qlen x klen x bsz x n_head] + attn_prob = F.softmax(attn_score, dim=1) + attn_prob = self.dropatt(attn_prob) + + # [qlen x klen x bsz x n_head] + [klen x bsz x n_head x d_head] -> [qlen x bsz x n_head x d_head] +- attn_vec = torch.einsum('ijbn,jbnd->ibnd', (attn_prob, head_v)) ++ attn_vec = einsum3('ijbn,jbnd->ibnd', (attn_prob, head_v)) + attn_vec = attn_vec.contiguous().view( + attn_vec.size(0), attn_vec.size(1), self.n_head * self.d_head) + +@@ -198,7 +302,8 @@ class RelMultiHeadAttn(nn.Module): + + x_padded = x_padded.view(x.size(1) + 1, x.size(0), *x.size()[2:]) + +- x = x_padded[1:].view_as(x) ++ #x = x_padded[1:].view_as(x) ++ x = x_padded[1:].view(x.shape) + + if zero_triu: + ones = torch.ones((x.size(0), x.size(1))) +@@ -212,12 +317,13 @@ class RelMultiHeadAttn(nn.Module): + class RelPartialLearnableMultiHeadAttn(RelMultiHeadAttn): + def __init__(self, *args, **kwargs): + super(RelPartialLearnableMultiHeadAttn, self).__init__(*args, **kwargs) +- ++ + self.r_net = nn.Linear(self.d_model, self.n_head * self.d_head, bias=False) + + def forward(self, w, r, r_w_bias, r_r_bias, attn_mask=None, mems=None): + qlen, rlen, bsz = w.size(0), r.size(0), w.size(1) +- ++ ++ pdb.set_trace() + if mems is not None: + cat = torch.cat([mems, w], 0) + if self.pre_lnorm: +@@ -247,31 +353,35 @@ class RelPartialLearnableMultiHeadAttn(RelMultiHeadAttn): + + #### compute attention score + rw_head_q = w_head_q + r_w_bias # qlen x bsz x n_head x d_head +- AC = torch.einsum('ibnd,jbnd->ijbn', (rw_head_q, w_head_k)) # qlen x klen x bsz x n_head ++ AC = einsum1('ibnd,jbnd->ijbn', (rw_head_q, w_head_k)) # qlen x klen x bsz x n_head + + rr_head_q = w_head_q + r_r_bias +- BD = torch.einsum('ibnd,jnd->ijbn', (rr_head_q, r_head_k)) # qlen x klen x bsz x n_head ++ BD = einsum2('ibnd,jnd->ijbn', (rr_head_q, r_head_k)) # qlen x klen x bsz x n_head + BD = self._rel_shift(BD) + + # [qlen x klen x bsz x n_head] + attn_score = AC + BD + attn_score.mul_(self.scale) + ++ + #### compute attention probability ++ ##############################################################################################################################33 ++ # edit this for Warning ++ + if attn_mask is not None and attn_mask.any().item(): + if attn_mask.dim() == 2: +- attn_score = attn_score.float().masked_fill( +- attn_mask[None,:,:,None], -float('inf')).type_as(attn_score) ++ attn_score = attn_score.float().masked_fill(attn_mask[None,:,:,None].bool(), -float('inf')).type_as(attn_score) + elif attn_mask.dim() == 3: +- attn_score = attn_score.float().masked_fill( +- attn_mask[:,:,:,None], -float('inf')).type_as(attn_score) ++ attn_score = attn_score.float().masked_fill(attn_mask[:,:,:,None].bool(), -float('inf')).type_as(attn_score) ++ ++ ################################################################################################################################ + + # [qlen x klen x bsz x n_head] + attn_prob = F.softmax(attn_score, dim=1) + attn_prob = self.dropatt(attn_prob) + + #### compute attention vector +- attn_vec = torch.einsum('ijbn,jbnd->ibnd', (attn_prob, w_head_v)) ++ attn_vec = einsum3('ijbn,jbnd->ibnd', (attn_prob, w_head_v)) + + # [qlen x bsz x n_head x d_head] + attn_vec = attn_vec.contiguous().view( +@@ -335,8 +445,8 @@ class RelLearnableMultiHeadAttn(RelMultiHeadAttn): + #### compute attention score + rw_head_q = w_head_q + r_w_bias[None] # qlen x bsz x n_head x d_head + +- AC = torch.einsum('ibnd,jbnd->ijbn', (rw_head_q, w_head_k)) # qlen x klen x bsz x n_head +- B_ = torch.einsum('ibnd,jnd->ijbn', (w_head_q, r_emb)) # qlen x klen x bsz x n_head ++ AC = einsum1('ibnd,jbnd->ijbn', (rw_head_q, w_head_k)) # qlen x klen x bsz x n_head ++ B_ = einsum2('ibnd,jnd->ijbn', (w_head_q, r_emb)) # qlen x klen x bsz x n_head + D_ = r_bias[None, :, None] # 1 x klen x 1 x n_head + BD = self._rel_shift(B_ + D_) + +@@ -347,16 +457,16 @@ class RelLearnableMultiHeadAttn(RelMultiHeadAttn): + #### compute attention probability + if attn_mask is not None and attn_mask.any().item(): + if attn_mask.dim() == 2: +- attn_score.masked_fill_(attn_mask[None,:,:,None], -float('inf')) ++ attn_score.masked_fill_(attn_mask[None,:,:,None].bool(), -float('inf')) + elif attn_mask.dim() == 3: +- attn_score.masked_fill_(attn_mask[:,:,:,None], -float('inf')) ++ attn_score.masked_fill_(attn_mask[:,:,:,None].bool(), -float('inf')) + + # [qlen x klen x bsz x n_head] + attn_prob = F.softmax(attn_score, dim=1) + attn_prob = self.dropatt(attn_prob) + + #### compute attention vector +- attn_vec = torch.einsum('ijbn,jbnd->ibnd', (attn_prob, w_head_v)) ++ attn_vec = einsum3('ijbn,jbnd->ibnd', (attn_prob, w_head_v)) + + # [qlen x bsz x n_head x d_head] + attn_vec = attn_vec.contiguous().view( +@@ -384,7 +494,7 @@ class DecoderLayer(nn.Module): + pre_lnorm=kwargs.get('pre_lnorm')) + + def forward(self, dec_inp, dec_attn_mask=None, mems=None): +- ++ + output = self.dec_attn(dec_inp, attn_mask=dec_attn_mask, + mems=mems) + output = self.pos_ff(output) +@@ -402,7 +512,7 @@ class RelLearnableDecoderLayer(nn.Module): + pre_lnorm=kwargs.get('pre_lnorm')) + + def forward(self, dec_inp, r_emb, r_w_bias, r_bias, dec_attn_mask=None, mems=None): +- ++ + output = self.dec_attn(dec_inp, r_emb, r_w_bias, r_bias, + attn_mask=dec_attn_mask, + mems=mems) +@@ -415,13 +525,14 @@ class RelPartialLearnableDecoderLayer(nn.Module): + **kwargs): + super(RelPartialLearnableDecoderLayer, self).__init__() + ++ + self.dec_attn = RelPartialLearnableMultiHeadAttn(n_head, d_model, + d_head, dropout, **kwargs) ++ + self.pos_ff = PositionwiseFF(d_model, d_inner, dropout, + pre_lnorm=kwargs.get('pre_lnorm')) +- ++ + def forward(self, dec_inp, r, r_w_bias, r_r_bias, dec_attn_mask=None, mems=None): +- + output = self.dec_attn(dec_inp, r, r_w_bias, r_r_bias, + attn_mask=dec_attn_mask, + mems=mems) +@@ -434,7 +545,7 @@ class AdaptiveEmbedding(nn.Module): + def __init__(self, n_token, d_embed, d_proj, cutoffs, div_val=1, + sample_softmax=False): + super(AdaptiveEmbedding, self).__init__() +- ++ + self.n_token = n_token + self.d_embed = d_embed + +@@ -462,6 +573,7 @@ class AdaptiveEmbedding(nn.Module): + self.emb_projs.append(nn.Parameter(torch.Tensor(d_proj, d_emb_i))) + + def forward(self, inp): ++ + if self.div_val == 1: + embed = self.emb_layers[0](inp) + if self.d_proj != self.d_embed: +@@ -492,7 +604,8 @@ class AdaptiveEmbedding(nn.Module): + + return embed + +-class MemTransformerLM(nn.Module): ++ ++class MemTransformerLM(nn.Module): # 打点11,model入口 + def __init__(self, n_token, n_layer, n_head, d_model, d_head, d_inner, + dropout, dropatt, tie_weight=True, d_embed=None, + div_val=1, tie_projs=[False], pre_lnorm=False, +@@ -509,8 +622,10 @@ class MemTransformerLM(nn.Module): + self.n_head = n_head + self.d_head = d_head + ++ + self.word_emb = AdaptiveEmbedding(n_token, d_embed, d_model, cutoffs, + div_val=div_val) ++ + + self.drop = nn.Dropout(dropout) + +@@ -524,7 +639,7 @@ class MemTransformerLM(nn.Module): + self.attn_type = attn_type + + self.layers = nn.ModuleList() +- if attn_type == 0: # the default attention ++ if attn_type == 0: # the default attention # 执行 + for i in range(n_layer): + self.layers.append( + RelPartialLearnableDecoderLayer( +@@ -532,7 +647,8 @@ class MemTransformerLM(nn.Module): + tgt_len=tgt_len, ext_len=ext_len, mem_len=mem_len, + dropatt=dropatt, pre_lnorm=pre_lnorm) + ) +- elif attn_type == 1: # learnable embeddings ++ ++ elif attn_type == 1: # learnable embeddings # 未执行 + for i in range(n_layer): + self.layers.append( + RelLearnableDecoderLayer( +@@ -540,7 +656,7 @@ class MemTransformerLM(nn.Module): + tgt_len=tgt_len, ext_len=ext_len, mem_len=mem_len, + dropatt=dropatt, pre_lnorm=pre_lnorm) + ) +- elif attn_type in [2, 3]: # absolute embeddings ++ elif attn_type in [2, 3]: # absolute embeddings # 未执行 + for i in range(n_layer): + self.layers.append( + DecoderLayer( +@@ -550,7 +666,7 @@ class MemTransformerLM(nn.Module): + + self.sample_softmax = sample_softmax + # use sampled softmax +- if sample_softmax > 0: ++ if sample_softmax > 0: # 不执行 + self.out_layer = nn.Linear(d_model, n_token) + if tie_weight: + self.out_layer.weight = self.word_emb.weight +@@ -558,10 +674,10 @@ class MemTransformerLM(nn.Module): + self.sampler = LogUniformSampler(n_token, sample_softmax) + + # use adaptive softmax (including standard softmax) +- else: ++ else: # 执行 + self.crit = ProjectedAdaptiveLogSoftmax(n_token, d_embed, d_model, + cutoffs, div_val=div_val) +- ++ + if tie_weight: + for i in range(len(self.crit.out_layers)): + self.crit.out_layers[i].weight = self.word_emb.emb_layers[i].weight +@@ -582,8 +698,8 @@ class MemTransformerLM(nn.Module): + self.sample_softmax = -1 + + def _create_params(self): +- if self.attn_type == 0: # default attention +- self.pos_emb = PositionalEmbedding(self.d_model) ++ if self.attn_type == 0: # default attention # 执行 ++ self.pos_emb = PositionalEmbedding(self.d_model) + self.r_w_bias = nn.Parameter(torch.Tensor(self.n_head, self.d_head)) + self.r_r_bias = nn.Parameter(torch.Tensor(self.n_head, self.d_head)) + elif self.attn_type == 1: # learnable +@@ -598,8 +714,8 @@ class MemTransformerLM(nn.Module): + elif self.attn_type == 3: # absolute deeper SA + self.r_emb = nn.Parameter(torch.Tensor( + self.n_layer, self.max_klen, self.n_head, self.d_head)) +- +- def reset_length(self, tgt_len, ext_len, mem_len): ++ ++ def reset_length(self, tgt_len, ext_len, mem_len): + self.tgt_len = tgt_len + self.mem_len = mem_len + self.ext_len = ext_len +@@ -619,7 +735,7 @@ class MemTransformerLM(nn.Module): + def _update_mems(self, hids, mems, qlen, mlen): + # does not deal with None + if mems is None: return None +- ++ + # mems is not None + assert len(hids) == len(mems), 'len(hids) != len(mems)' + +@@ -633,7 +749,6 @@ class MemTransformerLM(nn.Module): + end_idx = mlen + max(0, qlen - 0 - self.ext_len) + beg_idx = max(0, end_idx - self.mem_len) + for i in range(len(hids)): +- + cat = torch.cat([mems[i], hids[i]], dim=0) + new_mems.append(cat[beg_idx:end_idx].detach()) + +@@ -641,9 +756,7 @@ class MemTransformerLM(nn.Module): + + def _forward(self, dec_inp, mems=None): + qlen, bsz = dec_inp.size() +- + word_emb = self.word_emb(dec_inp) +- + mlen = mems[0].size(0) if mems is not None else 0 + klen = mlen + qlen + if self.same_length: +@@ -663,19 +776,21 @@ class MemTransformerLM(nn.Module): + if self.attn_type == 0: # default + pos_seq = torch.arange(klen-1, -1, -1.0, device=word_emb.device, + dtype=word_emb.dtype) ++ + if self.clamp_len > 0: + pos_seq.clamp_(max=self.clamp_len) + pos_emb = self.pos_emb(pos_seq) + + core_out = self.drop(word_emb) + pos_emb = self.drop(pos_emb) +- +- hids.append(core_out) +- for i, layer in enumerate(self.layers): ++ ++ hids.append(core_out) ++ for i, layer in enumerate(self.layers): + mems_i = None if mems is None else mems[i] + core_out = layer(core_out, pos_emb, self.r_w_bias, + self.r_r_bias, dec_attn_mask=dec_attn_mask, mems=mems_i) + hids.append(core_out) ++ + elif self.attn_type == 1: # learnable + core_out = self.drop(word_emb) + hids.append(core_out) +@@ -727,7 +842,7 @@ class MemTransformerLM(nn.Module): + core_out = layer(core_out, dec_attn_mask=dec_attn_mask, + mems=mems_i) + hids.append(core_out) +- ++ pdb.set_trace() + core_out = self.drop(core_out) + + new_mems = self._update_mems(hids, mems, mlen, qlen) +@@ -735,13 +850,18 @@ class MemTransformerLM(nn.Module): + return core_out, new_mems + + def forward(self, data, target, *mems): ++ + # nn.DataParallel does not allow size(0) tensors to be broadcasted. + # So, have to initialize size(0) mems inside the model forward. + # Moreover, have to return new_mems to allow nn.DataParallel to piece + # them together. ++ + if not mems: mems = self.init_mems() +- +- tgt_len = target.size(0) ++ ++ if torch.onnx.is_in_onnx_export(): ++ tgt_len = target.size(0).numpy() ++ else: ++ tgt_len = target.size(0) + hidden, new_mems = self._forward(data, mems=mems) + + pred_hid = hidden[-tgt_len:] +@@ -751,8 +871,11 @@ class MemTransformerLM(nn.Module): + self.out_layer.bias, target, pred_hid, self.sampler) + loss = -F.log_softmax(logit, -1)[:, :, 0] + else: +- loss = self.crit(pred_hid.view(-1, pred_hid.size(-1)), target.view(-1)) +- loss = loss.view(tgt_len, -1) ++ if torch.onnx.is_in_onnx_export(): ++ loss = self.crit(pred_hid.reshape(-1, pred_hid.size(-1)), target.reshape(-1)) ++ else: ++ loss = self.crit(pred_hid.reshape(-1, pred_hid.size(-1)), target.reshape(-1)) ++ loss = loss.reshape(tgt_len, -1) + + if new_mems is None: + return [loss] +@@ -762,7 +885,7 @@ class MemTransformerLM(nn.Module): + if __name__ == '__main__': + import argparse + +- parser = argparse.ArgumentParser(description='unit test') ++ parser = argparse.ArgumentParser(description='unit test.py') + + parser.add_argument('--n_layer', type=int, default=4, help='') + parser.add_argument('--n_rel_layer', type=int, default=4, help='') +@@ -774,7 +897,7 @@ if __name__ == '__main__': + parser.add_argument('--dropout', type=float, default=0.0, help='') + parser.add_argument('--cuda', action='store_true', help='') + parser.add_argument('--seed', type=int, default=1111, help='') +- parser.add_argument('--multi_gpu', action='store_true', help='') ++ parser.add_argument('--multi_gpu', action='store_true', help='') + + args = parser.parse_args() + +@@ -801,12 +924,12 @@ if __name__ == '__main__': + d_embed=d_embed, div_val=div_val, + tie_projs=tie_projs, pre_lnorm=True, + tgt_len=tgt_len, ext_len=ext_len, mem_len=mem_len, +- cutoffs=cutoffs, attn_type=0).to(device) +- +- print(sum(p.numel() for p in model.parameters())) ++ cutoffs=cutoffs, attn_type=0) ++ ++ #print(sum(p.numel() for p in model.parameters())) + + mems = tuple() + for idx, (inp, tgt, seqlen) in enumerate(diter): +- print('batch {}'.format(idx)) ++ #print('batch {}'.format(idx)) + out = model(inp, tgt, *mems) + mems = out[1:] +diff --git a/pytorch/utils/adaptive_softmax.py b/pytorch/utils/adaptive_softmax.py +index 68ae016..68c59f8 100644 +--- a/pytorch/utils/adaptive_softmax.py ++++ b/pytorch/utils/adaptive_softmax.py +@@ -67,6 +67,10 @@ class AdaptiveLogSoftmax(nn.Module): + head_logprob_i = head_logprob.index_select(0, indices_i) + + if i == 0: ++ ++ print(f'target_i[:,None]: {target_i[:, None]}') ++ print(f'target_i[:,None].shape: {target_i[:, None].shape}') ++ + logprob_i = head_logprob_i.gather(1, target_i[:,None]).squeeze(1) + else: + weight_i = weight[l_idx:h_idx] +@@ -77,6 +81,8 @@ class AdaptiveLogSoftmax(nn.Module): + tail_logit_i = F.linear(hidden_i, weight_i, bias=bias_i) + tail_logprob_i = F.log_softmax(tail_logit_i, dim=1) + ++ print(f'target_i[:,None]: {target_i[:, None]}') ++ print(f'target_i[:,None].shape: {target_i[:, None].shape}') + logprob_i = head_logprob_i[:, -i] \ + + tail_logprob_i.gather(1, target_i[:,None]).squeeze(1) + +diff --git a/pytorch/utils/data_parallel.py b/pytorch/utils/data_parallel.py +index d7e1811..dd4041e 100644 +--- a/pytorch/utils/data_parallel.py ++++ b/pytorch/utils/data_parallel.py +@@ -68,6 +68,12 @@ class BalancedDataParallel(DataParallel): + if self.gpu0_bsz == 0: + replicas = replicas[1:] + outputs = self.parallel_apply(replicas, device_ids, inputs, kwargs) ++ ++ print(f'outputs: {outputs}') ++ print(f'type(outputs): {type(outputs)}') ++ print(f'len(outputs): {len(outputs)}') ++ print(f'self.output_device: {self.output_device}') ++ + return self.gather(outputs, self.output_device) + + def parallel_apply(self, replicas, device_ids, inputs, kwargs): +diff --git a/pytorch/utils/exp_utils.py b/pytorch/utils/exp_utils.py +index e44f7c2..568c3e6 100644 +--- a/pytorch/utils/exp_utils.py ++++ b/pytorch/utils/exp_utils.py +@@ -1,8 +1,6 @@ + import functools +-import os, shutil +- +-import numpy as np +- ++import os ++import shutil + import torch + + +@@ -10,31 +8,18 @@ def logging(s, log_path, print_=True, log_=True): + if print_: + print(s) + if log_: +- with open(log_path, 'a+') as f_log: ++ with open(log_path, 'a+') as f_log: + f_log.write(s + '\n') + +-def get_logger(log_path, **kwargs): +- return functools.partial(logging, log_path=log_path, **kwargs) + +-def create_exp_dir(dir_path, scripts_to_save=None, debug=False): +- if debug: +- print('Debug Mode : no experiment dir created') +- return functools.partial(logging, log_path=None, log_=False) +- +- if not os.path.exists(dir_path): +- os.makedirs(dir_path) ++def get_logger(log_path, **kwargs): ++ return functools.partial(logging, log_path=log_path, **kwargs) + ++def create_exp_dir(dir_path, scripts_to_save=None, debug=False): + print('Experiment dir : {}'.format(dir_path)) +- if scripts_to_save is not None: +- script_path = os.path.join(dir_path, 'scripts') +- if not os.path.exists(script_path): +- os.makedirs(script_path) +- for script in scripts_to_save: +- dst_file = os.path.join(dir_path, 'scripts', os.path.basename(script)) +- shutil.copyfile(script, dst_file) ++ return get_logger('log.txt') + +- return get_logger(log_path=os.path.join(dir_path, 'log.txt')) + + def save_checkpoint(model, optimizer, path, epoch): +- torch.save(model, os.path.join(path, 'model_{}.pt'.format(epoch))) +- torch.save(optimizer.state_dict(), os.path.join(path, 'optimizer_{}.pt'.format(epoch))) ++ torch.save(model, 'model_{}.pt'.format(epoch)) ++ torch.save(optimizer.state_dict(), 'optimizer_{}.pt'.format(epoch)) +diff --git a/pytorch/utils/log_uniform_sampler.py b/pytorch/utils/log_uniform_sampler.py +index 503f635..e1a631c 100644 +--- a/pytorch/utils/log_uniform_sampler.py ++++ b/pytorch/utils/log_uniform_sampler.py +@@ -78,41 +78,6 @@ def sample_logits(embedding, bias, labels, inputs, sampler): + return logits + + +-# class LogUniformSampler(object): +-# def __init__(self, range_max, unique=False): +-# """ +-# Reference : https://github.com/tensorflow/tensorflow/blob/r1.10/tensorflow/python/ops/candidate_sampling_ops.py +-# `P(class) = (log(class + 2) - log(class + 1)) / log(range_max + 1)` +-# """ +-# self.range_max = range_max +-# log_indices = torch.arange(1., range_max+2., 1.).log_() +-# self.dist = (log_indices[1:] - log_indices[:-1]) / log_indices[-1] +- +-# self.unique = unique +- +-# if self.unique: +-# self.exclude_mask = torch.ByteTensor(range_max).fill_(0) +- +-# def sample(self, n_sample, labels): +-# pos_sample, new_labels = labels.unique(return_inverse=True) +-# n_pos_sample = pos_sample.size(0) +-# n_neg_sample = n_sample - n_pos_sample +- +-# if self.unique: +-# self.exclude_mask.index_fill_(0, pos_sample, 1) +-# sample_dist = self.dist.clone().masked_fill_(self.exclude_mask, 0) +-# self.exclude_mask.index_fill_(0, pos_sample, 0) +-# else: +-# sample_dist = self.dist +- +-# neg_sample = torch.multinomial(sample_dist, n_neg_sample) +- +-# sample = torch.cat([pos_sample, neg_sample]) +-# sample_prob = self.dist[sample] +- +-# return new_labels, sample, sample_prob +- +- + if __name__ == '__main__': + S, B = 3, 4 + n_vocab = 10000 +@@ -121,20 +86,7 @@ if __name__ == '__main__': + + labels = torch.LongTensor(S, B).random_(0, n_vocab) + +- # sampler = LogUniformSampler(n_vocab, unique=False) +- # new_labels, sample, sample_prob = sampler.sample(n_sample, labels) +- + sampler = LogUniformSampler(n_vocab, unique=True) +- # true_probs, samp_probs, neg_samples = sampler.sample(n_sample, labels) +- +- # print('true_probs', true_probs.numpy().tolist()) +- # print('samp_probs', samp_probs.numpy().tolist()) +- # print('neg_samples', neg_samples.numpy().tolist()) +- +- # print('sum', torch.sum(sampler.dist).item()) +- +- # assert torch.all(torch.sort(sample.unique())[0].eq(torch.sort(sample)[0])).item() +- + embedding = nn.Embedding(n_vocab, H) + bias = torch.zeros(n_vocab) + inputs = torch.Tensor(S, B, H).normal_() +diff --git a/pytorch/utils/proj_adaptive_softmax.py b/pytorch/utils/proj_adaptive_softmax.py +index a0fbfeb..941dc46 100644 +--- a/pytorch/utils/proj_adaptive_softmax.py ++++ b/pytorch/utils/proj_adaptive_softmax.py +@@ -9,11 +9,11 @@ import torch.nn.functional as F + CUDA_MAJOR = int(torch.version.cuda.split('.')[0]) + CUDA_MINOR = int(torch.version.cuda.split('.')[1]) + ++ + class ProjectedAdaptiveLogSoftmax(nn.Module): + def __init__(self, n_token, d_embed, d_proj, cutoffs, div_val=1, + keep_order=False): + super(ProjectedAdaptiveLogSoftmax, self).__init__() +- + self.n_token = n_token + self.d_embed = d_embed + self.d_proj = d_proj +@@ -83,8 +83,10 @@ class ProjectedAdaptiveLogSoftmax(nn.Module): + if self.n_clusters == 0: + logit = self._compute_logit(hidden, self.out_layers[0].weight, + self.out_layers[0].bias, self.out_projs[0]) ++ + nll = -F.log_softmax(logit, dim=-1) \ + .gather(1, target.unsqueeze(1)).squeeze(1) ++ + else: + # construct weights and biases + weights, biases = [], [] +diff --git a/pytorch/utils/vocabulary.py b/pytorch/utils/vocabulary.py +index b6b8249..2cb2091 100644 +--- a/pytorch/utils/vocabulary.py ++++ b/pytorch/utils/vocabulary.py +@@ -1,8 +1,8 @@ + import os + from collections import Counter, OrderedDict +- + import torch + ++ + class Vocab(object): + def __init__(self, special=[], min_freq=0, max_size=None, lower_case=True, + delimiter=None, vocab_file=None): +@@ -14,6 +14,7 @@ class Vocab(object): + self.delimiter = delimiter + self.vocab_file = vocab_file + ++ + def tokenize(self, line, add_eos=False, add_double_eos=False): + line = line.strip() + # convert to lower case +@@ -33,10 +34,13 @@ class Vocab(object): + else: + return symbols + ++ ++ + def count_file(self, path, verbose=False, add_eos=False): + if verbose: print('counting file {} ...'.format(path)) + assert os.path.exists(path) + ++ # if not verbose + sents = [] + with open(path, 'r', encoding='utf-8') as f: + for idx, line in enumerate(f): +@@ -48,6 +52,7 @@ class Vocab(object): + + return sents + ++ + def count_sents(self, sents, verbose=False): + """ + sents : a list of sentences, each a list of tokenized symbols diff --git a/ACL_PyTorch/contrib/nlp/Transformerxl_large/test/env.sh b/ACL_PyTorch/contrib/nlp/Transformerxl_large/test/env.sh new file mode 100644 index 0000000000..afed56f732 --- /dev/null +++ b/ACL_PyTorch/contrib/nlp/Transformerxl_large/test/env.sh @@ -0,0 +1,7 @@ +#! /bin/bash +export install_path=/usr/local/Ascend/ascend-toolkit/latest +export PATH=/usr/local/python3.7.5/bin:${install_path}/atc/ccec_compiler/bin:${install_path}/atc/bin:$PATH +export PYTHONPATH=${install_path}/atc/python/site-packages:$PYTHONPATH +export LD_LIBRARY_PATH=${install_path}/atc/lib64:${install_path}/acllib/lib64:$LD_LIBRARY_PATH +export ASCEND_OPP_PATH=${install_path}/opp +export ASCEND_AICPU_PATH=${install_path} diff --git a/ACL_PyTorch/contrib/nlp/Transformerxl_large/test/eval_acc_perf.sh b/ACL_PyTorch/contrib/nlp/Transformerxl_large/test/eval_acc_perf.sh new file mode 100644 index 0000000000..05a88a19f9 --- /dev/null +++ b/ACL_PyTorch/contrib/nlp/Transformerxl_large/test/eval_acc_perf.sh @@ -0,0 +1 @@ +python3 transformerxl_large_postprocess.py --om_out_path=${1} --target_path=${2} \ No newline at end of file diff --git a/ACL_PyTorch/contrib/nlp/Transformerxl_large/test/pth2om.sh b/ACL_PyTorch/contrib/nlp/Transformerxl_large/test/pth2om.sh new file mode 100644 index 0000000000..7463c2e95c --- /dev/null +++ b/ACL_PyTorch/contrib/nlp/Transformerxl_large/test/pth2om.sh @@ -0,0 +1,5 @@ +python3 transformerxl_large_pth2onnx.py --work_dir=${1} --batch_size=${2} +python3 -m onnxsim model_bs${2}.onnx model_bs${2}_sim.onnx --input-shape "data:128,${2}" "target:128,${2}" +python3 fix_int64.py model_bs${2}_sim.onnx model_bs${2}_sim_fix.onnx +source env.sh +atc --framework=5 --model=model_bs${2}_sim_fix.onnx --output=model_bs${2} --input_format=ND --input_shape="data:128,${2};target:128,${2}" --log=debug --soc_version=Ascend310 --fusion_switch_file=fusion_switch.cfg \ No newline at end of file diff --git a/ACL_PyTorch/contrib/nlp/Transformerxl_large/transformerxl_large_postprocess.py b/ACL_PyTorch/contrib/nlp/Transformerxl_large/transformerxl_large_postprocess.py new file mode 100644 index 0000000000..35ae494cc7 --- /dev/null +++ b/ACL_PyTorch/contrib/nlp/Transformerxl_large/transformerxl_large_postprocess.py @@ -0,0 +1,37 @@ +import math +import os, sys +import numpy as np +import torch +import argparse + +parser = argparse.ArgumentParser('Set data_bin and target_bin path', add_help=False) +parser.add_argument('--om_out_path', default="/home/huangwei/eval_only_hw/tools/msame/out/2022126_19_0_54_936547/", type=str) +parser.add_argument('--target_path', default="/home/huangwei/eval_only_hw/bin_target/", type=str) +args = parser.parse_args() +device = torch.device("cpu") +output_dir = args.om_out_path +target_dir = args.target_path +filenames = os.listdir(output_dir) +i = 0 +total_len, total_loss = 0, 0. +for file in filenames: + idx = file.split('_')[1] + target_filename = target_dir + 'data_' + str(idx) +'.bin' + target = np.fromfile(target_filename, dtype=np.int64) + with open(output_dir + file) as f: + line = f.readlines()[0].split() + line_f = list(map(float, line)) + ret = torch.from_numpy(np.array(line_f)) + seq_len = len(ret) + loss = ret.mean() + total_loss += seq_len * loss.item() + total_len += seq_len + i += 1 + print('\rHave done {} batches'.format(i),end='') + +print("\nloss = {:.2f} | bpc {:.4f} ".format(total_loss / total_len, loss / math.log(2))) +print('completed!') + + + + diff --git a/ACL_PyTorch/contrib/nlp/Transformerxl_large/transformerxl_large_preprocess.py b/ACL_PyTorch/contrib/nlp/Transformerxl_large/transformerxl_large_preprocess.py new file mode 100644 index 0000000000..cdc9ab6938 --- /dev/null +++ b/ACL_PyTorch/contrib/nlp/Transformerxl_large/transformerxl_large_preprocess.py @@ -0,0 +1,71 @@ +import argparse +import time +import math +import os +import sys +import torch +from data_utils import get_lm_corpus +from utils.exp_utils import get_logger +import pdb +import numpy as np +from tqdm import tqdm + +parser = argparse.ArgumentParser(description='PyTorch Transformer Language Model') +parser.add_argument('--data', type=str, default='../data/enwik8', + help='location of the data corpus') +parser.add_argument('--dataset', type=str, default='enwik8', + choices=['wt103', 'lm1b', 'enwik8', 'text8'], + help='dataset name') +parser.add_argument('--split', type=str, default='onnx', + choices=['all', 'valid', 'test','onnx'], + help='which split to evaluate') +parser.add_argument('--batch_size', type=int, default=1, + help='batch size') +parser.add_argument('--tgt_len', type=int, default=128, + help='number of tokens to predict') +parser.add_argument('--ext_len', type=int, default=0, + help='length of the extended context') +parser.add_argument('--mem_len', type=int, default=0, + help='length of the retained previous heads') +parser.add_argument('--clamp_len', type=int, default=-1, + help='max positional embedding index') +parser.add_argument('--pre_data_save_path', type=str, default='./bin_data', + help='location of the bin data') +parser.add_argument('--pre_target_save_path', type=str, default='./bin_target', +help='location of the bin data') + +args = parser.parse_args() + +assert args.ext_len >= 0, 'extended context length must be non-negative' + +if not os.path.exists(args.pre_data_save_path): + os.makedirs(args.pre_data_save_path) +if not os.path.exists(args.pre_target_save_path): + os.makedirs(args.pre_target_save_path) + +# Load dataset +corpus = get_lm_corpus(args.data, args.dataset) +ntokens = len(corpus.vocab) + +valid_iter = corpus.get_iterator('valid', args.batch_size, args.tgt_len, + device='cpu', ext_len=args.ext_len) +test_iter = corpus.get_iterator('test', args.batch_size, args.tgt_len, + device='cpu', ext_len=args.ext_len) + +f_info_file = open("bin_file.info", "wt") + +for idx, (data, target, seq_len) in enumerate(valid_iter): + if idx < valid_iter.n_batch-1: + data_seq = np.asarray(data, dtype=np.int64) + target_seq = np.asarray(target, dtype=np.int64) + data_bin_file_path = os.path.join(args.pre_data_save_path, 'data_'+ str(idx) + ".bin") + target_bin_file_path = os.path.join(args.pre_target_save_path, 'data_'+ str(idx) + ".bin") + data_seq.tofile(data_bin_file_path) + target_seq.tofile(target_bin_file_path) + f_info_file.write(str(idx) + ' ' + args.pre_data_save_path + '/data_' + str(idx) + ".bin" + '\n') + f_info_file.write(str(idx) + ' ' + args.pre_target_save_path + '/data_' + str(idx) + ".bin" + '\n') + print('\rhave done {} batches'.format(str(idx+1)), end='') + else: + break +print('\nCompleted!') +f_info_file.close() \ No newline at end of file diff --git a/ACL_PyTorch/contrib/nlp/Transformerxl_large/transformerxl_large_pth2onnx.py b/ACL_PyTorch/contrib/nlp/Transformerxl_large/transformerxl_large_pth2onnx.py new file mode 100644 index 0000000000..a4b6bd073a --- /dev/null +++ b/ACL_PyTorch/contrib/nlp/Transformerxl_large/transformerxl_large_pth2onnx.py @@ -0,0 +1,79 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# coding: utf-8 +import argparse +import time +import math +import os +import sys +import torch + +parser = argparse.ArgumentParser(description='PyTorch Transformer Language Model') +parser.add_argument('--data', type=str, default='../data/enwik8', + help='location of the data corpus') +parser.add_argument('--dataset', type=str, default='enwik8', + choices=['wt103', 'lm1b', 'enwik8', 'text8'], + help='dataset name') +parser.add_argument('--batch_size', type=int, default=16, + help='batch size') +parser.add_argument('--tgt_len', type=int, default=128, + help='number of tokens to predict') +parser.add_argument('--ext_len', type=int, default=0, + help='length of the extended context') +parser.add_argument('--mem_len', type=int, default=0, + help='length of the retained previous heads') +parser.add_argument('--clamp_len', type=int, default=-1, + help='max positional embedding index') +parser.add_argument('--cuda', default=False, action='store_true', + help='use CUDA') +parser.add_argument('--work_dir', type=str, + help='path to the work_dir') +parser.add_argument('--same_length', action='store_true', default=True, + help='set same length attention with masking') + + +args = parser.parse_args() +assert args.ext_len >= 0, 'extended context length must be non-negative' +device = torch.device("cuda" if args.cuda else "cpu") + +# Load the best saved model. +with open(os.path.join(args.work_dir, 'model.pt'), 'rb') as f: + model = torch.load(f, map_location=device) +model.backward_compatible() +model = model.to(device) + +print('Evaluating with bsz {} tgt_len {} ext_len {} mem_len {} clamp_len {}'.format( + args.batch_size, args.tgt_len, args.ext_len, args.mem_len, args.clamp_len)) + +model.reset_length(args.tgt_len, args.ext_len, args.mem_len) +if args.clamp_len > 0: + model.clamp_len = args.clamp_len +if args.same_length: + model.same_length = True + +# export onnx model +data = torch.ones(args.tgt_len, args.batch_size, dtype=torch.int64).to(device) +target = torch.ones(args.tgt_len, args.batch_size, dtype=torch.int64).to(device) +model.eval() +mems = tuple() +ret = model(data, target, *mems) +loss, mems = ret[0], ret[1:] +loss = loss.mean() +print('*'*100) +onnx_name = "model_bs" + str(args.batch_size) + ".onnx" +torch.onnx.export(model, (data, target, *mems), onnx_name, input_names=['data', 'target'], output_names=['output'], + do_constant_folding=True, keep_initializers_as_inputs=True, opset_version=12, verbose=True) +print("export onnx model success") +sys.exit() -- Gitee From 0717bdb98a50d2d95621c75c06d97775948e67d3 Mon Sep 17 00:00:00 2001 From: eason_hw <94364678@qq.com> Date: Tue, 12 Apr 2022 19:34:04 +0800 Subject: [PATCH 2/4] add copyright --- .../nlp/Transformerxl_large/eval_onnx.py | 27 ++++++++++--------- .../nlp/Transformerxl_large/fix_int64.py | 15 +++++++++++ .../Transformerxl_large/modelzoo_levle.txt | 3 +++ .../nlp/Transformerxl_large/requirements.txt | 6 +++++ .../transformerxl_large_postprocess.py | 19 +++++++++++-- .../transformerxl_large_preprocess.py | 21 ++++++++++++--- 6 files changed, 73 insertions(+), 18 deletions(-) diff --git a/ACL_PyTorch/contrib/nlp/Transformerxl_large/eval_onnx.py b/ACL_PyTorch/contrib/nlp/Transformerxl_large/eval_onnx.py index 89feb646aa..12d4b8f357 100644 --- a/ACL_PyTorch/contrib/nlp/Transformerxl_large/eval_onnx.py +++ b/ACL_PyTorch/contrib/nlp/Transformerxl_large/eval_onnx.py @@ -1,3 +1,17 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # coding: utf-8 import argparse import time @@ -73,11 +87,9 @@ class ONNXModel(): :param image_tensor: :return: ''' - # 输入数据的类型必须与模型一致,以下三种写法都是可以的 # scores, boxes = self.onnx_session.run(None, {self.input_name: image_tensor}) # scores, boxes = self.onnx_session.run(self.output_name, input_feed={self.input_name: image_tensor}) input_feed = self.get_input_feed(self.input_name, data, target, mems) - #print(input_feed['data'].shape, input_feed['target'].shape) ret = self.onnx_session.run(self.output_name, input_feed=input_feed) return ret @@ -127,20 +139,9 @@ va_iter = corpus.get_iterator('valid', args.batch_size, args.tgt_len, te_iter = corpus.get_iterator('test', args.batch_size, args.tgt_len, device=device, ext_len=args.ext_len) -# Load the best saved model. -# with open(os.path.join(args.work_dir, 'model.pt'), 'rb') as f: -# model = torch.load(f, map_location=device) -# model.backward_compatible() -# model = model.to(device) - logging('Evaluating with bsz {} tgt_len {} ext_len {} mem_len {} clamp_len {}'.format( args.batch_size, args.tgt_len, args.ext_len, args.mem_len, args.clamp_len)) -# model.reset_length(args.tgt_len, args.ext_len, args.mem_len) -# if args.clamp_len > 0: -# model.clamp_len = args.clamp_len -# if args.same_length: -# model.same_length = True ############################################################################### # Evaluation code diff --git a/ACL_PyTorch/contrib/nlp/Transformerxl_large/fix_int64.py b/ACL_PyTorch/contrib/nlp/Transformerxl_large/fix_int64.py index a1e2bf8f20..26ee9d514d 100644 --- a/ACL_PyTorch/contrib/nlp/Transformerxl_large/fix_int64.py +++ b/ACL_PyTorch/contrib/nlp/Transformerxl_large/fix_int64.py @@ -1,3 +1,18 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# coding: utf-8 import sys import numpy as np from magiconnx import OnnxGraph diff --git a/ACL_PyTorch/contrib/nlp/Transformerxl_large/modelzoo_levle.txt b/ACL_PyTorch/contrib/nlp/Transformerxl_large/modelzoo_levle.txt index e69de29bb2..70801afc42 100644 --- a/ACL_PyTorch/contrib/nlp/Transformerxl_large/modelzoo_levle.txt +++ b/ACL_PyTorch/contrib/nlp/Transformerxl_large/modelzoo_levle.txt @@ -0,0 +1,3 @@ +FuncStatus:OK +PrecisionStatus:OK +PerfStatus:OK \ No newline at end of file diff --git a/ACL_PyTorch/contrib/nlp/Transformerxl_large/requirements.txt b/ACL_PyTorch/contrib/nlp/Transformerxl_large/requirements.txt index e69de29bb2..558579624a 100644 --- a/ACL_PyTorch/contrib/nlp/Transformerxl_large/requirements.txt +++ b/ACL_PyTorch/contrib/nlp/Transformerxl_large/requirements.txt @@ -0,0 +1,6 @@ +onnx +onnxruntime +onnx-simplifier +tqdm +argparse +numpy \ No newline at end of file diff --git a/ACL_PyTorch/contrib/nlp/Transformerxl_large/transformerxl_large_postprocess.py b/ACL_PyTorch/contrib/nlp/Transformerxl_large/transformerxl_large_postprocess.py index 35ae494cc7..af8b92b7e7 100644 --- a/ACL_PyTorch/contrib/nlp/Transformerxl_large/transformerxl_large_postprocess.py +++ b/ACL_PyTorch/contrib/nlp/Transformerxl_large/transformerxl_large_postprocess.py @@ -1,3 +1,18 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# coding: utf-8 import math import os, sys import numpy as np @@ -16,7 +31,7 @@ i = 0 total_len, total_loss = 0, 0. for file in filenames: idx = file.split('_')[1] - target_filename = target_dir + 'data_' + str(idx) +'.bin' + target_filename = target_dir + 'data_' + str(idx) + '.bin' target = np.fromfile(target_filename, dtype=np.int64) with open(output_dir + file) as f: line = f.readlines()[0].split() @@ -27,7 +42,7 @@ for file in filenames: total_loss += seq_len * loss.item() total_len += seq_len i += 1 - print('\rHave done {} batches'.format(i),end='') + print('\rHave done {} batches'.format(i), end='') print("\nloss = {:.2f} | bpc {:.4f} ".format(total_loss / total_len, loss / math.log(2))) print('completed!') diff --git a/ACL_PyTorch/contrib/nlp/Transformerxl_large/transformerxl_large_preprocess.py b/ACL_PyTorch/contrib/nlp/Transformerxl_large/transformerxl_large_preprocess.py index cdc9ab6938..2b24b91679 100644 --- a/ACL_PyTorch/contrib/nlp/Transformerxl_large/transformerxl_large_preprocess.py +++ b/ACL_PyTorch/contrib/nlp/Transformerxl_large/transformerxl_large_preprocess.py @@ -1,3 +1,18 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# coding: utf-8 import argparse import time import math @@ -32,7 +47,7 @@ parser.add_argument('--clamp_len', type=int, default=-1, parser.add_argument('--pre_data_save_path', type=str, default='./bin_data', help='location of the bin data') parser.add_argument('--pre_target_save_path', type=str, default='./bin_target', -help='location of the bin data') + help='location of the bin data') args = parser.parse_args() @@ -58,8 +73,8 @@ for idx, (data, target, seq_len) in enumerate(valid_iter): if idx < valid_iter.n_batch-1: data_seq = np.asarray(data, dtype=np.int64) target_seq = np.asarray(target, dtype=np.int64) - data_bin_file_path = os.path.join(args.pre_data_save_path, 'data_'+ str(idx) + ".bin") - target_bin_file_path = os.path.join(args.pre_target_save_path, 'data_'+ str(idx) + ".bin") + data_bin_file_path = os.path.join(args.pre_data_save_path, 'data_' + str(idx) + ".bin") + target_bin_file_path = os.path.join(args.pre_target_save_path, 'data_' + str(idx) + ".bin") data_seq.tofile(data_bin_file_path) target_seq.tofile(target_bin_file_path) f_info_file.write(str(idx) + ' ' + args.pre_data_save_path + '/data_' + str(idx) + ".bin" + '\n') -- Gitee From 2682121b046bc23ec017d2c4c8a554c6e158435f Mon Sep 17 00:00:00 2001 From: eason_hw <94364678@qq.com> Date: Tue, 12 Apr 2022 19:38:04 +0800 Subject: [PATCH 3/4] change readme --- .../contrib/nlp/Transformerxl_large/README.md | 21 ++++++++----------- 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/ACL_PyTorch/contrib/nlp/Transformerxl_large/README.md b/ACL_PyTorch/contrib/nlp/Transformerxl_large/README.md index cfc18258ff..ce37c2724c 100644 --- a/ACL_PyTorch/contrib/nlp/Transformerxl_large/README.md +++ b/ACL_PyTorch/contrib/nlp/Transformerxl_large/README.md @@ -6,6 +6,7 @@ pip3.7 install -r requirements.txt 2. 获取,修改与安装开源模型代码 ``` git clone https://github.com/kimiyoung/transformer-xl.git # 下载原模型 +git checkout 44781ed21dbaec88b280f74d9ae2877f52b492a5 git clone -b TransformerXL_large https://gitee.com/eason-hw/ModelZoo-PyTorch.git # 下载开源仓代码 cp -r ModelZoo-PyTorch/ACL_PyTorch/contrib/nlp/TransformerXL_large/. transformer-xl/pytorch/ # 开源仓代码拷入原模型代码中 cd transformer-xl/pytorch/ # 切换至工作目录 @@ -18,15 +19,11 @@ patch -p1 < sample.patch # 如有提示"File to ``` bash getdata.sh # enwik8数据集在data/enwik8下,包含处理好的train.txt,valid.txt和test.txt ``` -5.获取安装msame工具 -(如下为设置环境变量的示例,请将两个环境变量的/usr/local/Ascend/ascend-toolkit/latest替换为Ascend 的ACLlib安装包的实际安装路径。) +5.获取安装magiconnx工具 ``` -git clone https://gitee.com/ascend/tools.git -bash test/env.sh -export DDK_PATH=/usr/local/Ascend/ascend-toolkit/latest -export NPU_HOST_LIB=/usr/local/Ascend/ascend-toolkit/latest/acllib/lib64/stub -cd tools/msame/ -./build.sh g++ tools/msame/out # 指定编译后的保存地址,最好给完整目录,没有权限的话需要chmod更改build.sh的权限为777 +git clone https://gitee.com/Ronnie_zheng/MagicONNX.git +cd MagicONNX && git checkout 8d62ae9dde478f35bece4b3d04eef573448411c9 +pip install . && cd ``` ## 离线推理 @@ -43,13 +40,13 @@ python3 transformerxl_large_preprocess.py --batch_size=1 --data=/data/enwik8 3.使用msame推理 ``` # 切换至编译后的目录,--model给出om文件完整目录,--input给出输入的bin文件目录,--output给出om推理保存位置 -cd out -./msame --model ${om文件路径} --input ${ModelZoo-PyTorch/ACL_PyTorch/contrib/nlp/TransformerXL_large/bin_data,ModelZoo-PyTorch/ACL_PyTorch/contrib/nlp/TransformerXL_large/bin_target} --output ${ModelZoo-PyTorch/ACL_PyTorch/contrib/nlp/TransformerXL_large/tools/msame/out/} --outfmt TXT +cd tools/masme/out +./msame --model ${om文件路径} --input ${transformer-xl/pytorch/bin_data,transformer-xl/pytorch/bin_target} --output ${transformer-xl/pytorch/tools/msame/out/} --outfmt TXT ``` 4. 数据集后处理 ``` # 切换回至工作目录 -cd ${ModelZoo-PyTorch/ACL_PyTorch/contrib/nlp/TransformerXL_large/} +cd ${transformer-xl/pytorch} bash test/eval_acc_perf.sh ${om_out_path} ${target_bin} # om_out_path路径,target_bin路径 ``` ## 评测结果: @@ -61,7 +58,7 @@ bash test/eval_acc_perf.sh ${om_out_path} ${target_bin} # om_out_path路径 | 8 | 41.83 | 51.9 | | 16 | 54.98 | 49.3 | -精度 +精度(bs=16) | A310 | T4 | | --------- | -------- | | bpc=5.37 | bpc=5.39 | -- Gitee From 149f306c58ef550cad24a9c0046ff4ea1ec54289 Mon Sep 17 00:00:00 2001 From: eason_hw <94364678@qq.com> Date: Thu, 14 Apr 2022 11:34:57 +0800 Subject: [PATCH 4/4] change requirements.txt --- .../contrib/nlp/Transformerxl_large/README.md | 4 ++-- .../contrib/nlp/Transformerxl_large/requirements.txt | 12 ++++++------ .../transformerxl_large_preprocess.py | 3 +-- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/ACL_PyTorch/contrib/nlp/Transformerxl_large/README.md b/ACL_PyTorch/contrib/nlp/Transformerxl_large/README.md index ce37c2724c..bd6d1bae26 100644 --- a/ACL_PyTorch/contrib/nlp/Transformerxl_large/README.md +++ b/ACL_PyTorch/contrib/nlp/Transformerxl_large/README.md @@ -13,7 +13,7 @@ cd transformer-xl/pytorch/ # 切换至工作目录 patch -p1 < sample.patch # 如有提示"File to patch:" 需输入对应patch的文件名 ``` 3.获取权重文件 -[model.pt]() +[model.pt](https://pan.baidu.com/s/18r4I6HC00HdMXqvPBuYJng) 提取码:so0i 4.获取数据集 ``` @@ -23,7 +23,7 @@ bash getdata.sh # enwik8数据集在data/enwik8下,包含处理好的train.tx ``` git clone https://gitee.com/Ronnie_zheng/MagicONNX.git cd MagicONNX && git checkout 8d62ae9dde478f35bece4b3d04eef573448411c9 -pip install . && cd +pip install . && cd.. ``` ## 离线推理 diff --git a/ACL_PyTorch/contrib/nlp/Transformerxl_large/requirements.txt b/ACL_PyTorch/contrib/nlp/Transformerxl_large/requirements.txt index 558579624a..05b4d4e1e6 100644 --- a/ACL_PyTorch/contrib/nlp/Transformerxl_large/requirements.txt +++ b/ACL_PyTorch/contrib/nlp/Transformerxl_large/requirements.txt @@ -1,6 +1,6 @@ -onnx -onnxruntime -onnx-simplifier -tqdm -argparse -numpy \ No newline at end of file +onnx==1.10.2 +onnxruntime==1.10.0 +onnx-simplifier==0.3.6 +tqdm==4.62.3 +numpy==1.21.5 +torch==1.9.0 \ No newline at end of file diff --git a/ACL_PyTorch/contrib/nlp/Transformerxl_large/transformerxl_large_preprocess.py b/ACL_PyTorch/contrib/nlp/Transformerxl_large/transformerxl_large_preprocess.py index 2b24b91679..85138521dc 100644 --- a/ACL_PyTorch/contrib/nlp/Transformerxl_large/transformerxl_large_preprocess.py +++ b/ACL_PyTorch/contrib/nlp/Transformerxl_large/transformerxl_large_preprocess.py @@ -21,9 +21,8 @@ import sys import torch from data_utils import get_lm_corpus from utils.exp_utils import get_logger -import pdb import numpy as np -from tqdm import tqdm + parser = argparse.ArgumentParser(description='PyTorch Transformer Language Model') parser.add_argument('--data', type=str, default='../data/enwik8', -- Gitee