diff --git a/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/LICENSE b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..9c8f3ea0871e0bfe81da0fa6e7c1d7d156dc380e --- /dev/null +++ b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright {yyyy} {name of copyright owner} + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/README.md b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f30dacb7fb64a7ac8bd257effeedc573943d691c --- /dev/null +++ b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/README.md @@ -0,0 +1,180 @@ +- [基本信息](#基本信息.md) +- [概述](#概述.md) +- [训练环境准备](#训练环境准备.md) +- [快速上手](#快速上手.md) +- [训练结果](#训练结果.md) +- [高级参考](#高级参考.md) +

基本信息

+ +**发布者(Publisher):Huawei** + +**应用领域(Application Domain):Natural Language Processing** + +**版本(Version):1.2** + +**修改时间(Modified) :2021.4.6** + +**框架(Framework):TensorFlow 1.15.0** + +**模型格式(Model Format):ckpt** + +**精度(Precision):Mixed** + +**处理器(Processor):昇腾910** + +**应用级别(Categories):Official** + +**描述(Description):基于TensorFlow框架实现Google提出的语言模型Transformer,对不同语言的文字进行翻译的训练代码** + +

概述

+ + Transformer是Google提出的语言模型,抛弃了传统的CNN和RNN,整个网络结构完全是由Attention机制组成。 +- 参考论文: + + https://proceedings.neurips.cc/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf + +- 参考实现: + + https://github.com/Kyubyong/transformer + + +## 默认配置 + +- 训练数据集预处理(以WMT 2014 English-German训练集为例,仅作为用户参考示例): + + - 文本输入格式:bpe + +- 测试数据集预处理(以WMT 2014 English-German验证集为例,仅作为用户参考示例) + + - 文本输入格式:bpe + + + +## 支持特性 + +| 特性列表 | 是否支持 | +|-------|------| +| 分布式训练 | 否 | +| 混合精度 | 是 | +| 并行数据 | 是 | + +## 混合精度训练 + +昇腾910 AI处理器提供自动混合精度功能,可以针对全网中float32数据类型的算子,按照内置的优化策略,自动将部分float32的算子降低精度到float16,从而在精度损失很小的情况下提升系统性能并减少内存使用。 + +## 开启混合精度 + +脚本已默认开启混合精度,设置precision_mode参数的脚本参考如下。 + + ``` + custom_op = session_config.graph_options.rewrite_options.custom_optimizers.add() + custom_op.name = 'NpuOptimizer' + custom_op.parameter_map["precision_mode"].s = tf.compat.as_bytes(str(args.precision_mode)) + ``` + +

训练环境准备

+ +1. 硬件环境准备请参见各硬件产品文档"[驱动和固件安装升级指南]( https://support.huawei.com/enterprise/zh/category/ai-computing-platform-pid-1557196528909)"。需要在硬件设备上安装与CANN版本配套的固件与驱动。 +2. 宿主机上需要安装Docker并登录[Ascend Hub中心](https://ascendhub.huawei.com/#/detail?name=ascend-tensorflow-arm)获取镜像。 + + 当前模型支持的镜像列表如[表1](#zh-cn_topic_0000001074498056_table1519011227314)所示。 + + **表 1** 镜像列表 + + + + + + + + + + + + +

镜像名称

+

镜像版本

+

配套CANN版本

+
+

20.2.0

+

20.2

+
+ + +

快速上手

+ +- 数据集准备 +1. 模型训练使用WMT 2014 English-German数据集,数据集请自行获取。 + +## 模型训练 + +- 单击“立即下载”,并选择合适的下载方式下载源码包。 + +- 启动训练之前,首先要配置程序运行相关环境变量。 + + 环境变量配置信息参见: + + [Ascend 910训练平台环境变量设置](https://gitee.com/ascend/modelzoo/wikis/Ascend%20910%E8%AE%AD%E7%BB%83%E5%B9%B3%E5%8F%B0%E7%8E%AF%E5%A2%83%E5%8F%98%E9%87%8F%E8%AE%BE%E7%BD%AE?sort_id=3148819) + +- 单卡训练 + + 1. 配置训练参数。 + + 首先在脚本test/train_full_1p.sh中,配置data_url、train_url参数,分别代表训练数据路径跟输出数据路径,请用户根据实际路径配置,或者在启动训练的命令行中以参数形式下发。 + + ``` + data_path="../data" + ``` + + 2. 启动训练。 + + 启动单卡训练 (脚本为LeNet_for_TensorFlow/test/train_full_1p.sh) + + ``` + bash train_full_1p.sh --data_path=../data --output_path=../out + ``` + +

训练结果

+ +- 精度结果比对 + +|精度指标项|GPU实测|NPU实测| +|---|---|---| +|loss|2.871|2.751| + + +

高级参考

+ +## 脚本和示例代码 + +``` +├── train.py //网络训练与测试代码 +├── README.md //代码说明文档 +├── data_load.py //数据处理代码 +├── hparams.py //参数解析代码 +├── model.py //模型定义代码 +├── modules.py //模型模块代码 +├── preproNew.py //文本数据转bpe代码 +├── utils.py //精度计算代码 +├── requirements.txt //训练python依赖列表 +├── test +│ ├──train_performance_1p.sh //单卡训练验证性能启动脚本 +│ ├──train_full_1p.sh //单卡全量训练启动脚本 + +``` + +## 脚本参数 + +``` +--data_path 数据集路径,默认:path/dataset +--output_path 训练过程中输出数据路径,默认:path/output +--batch_size 每个NPU的batch size,默认:128 +``` + +## 训练过程 + +1. 通过“模型训练”中的训练指令启动单卡卡训练。 + +2. 参考脚本的模型存储路径为./output/log。 + + diff --git a/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/data_load.py b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/data_load.py new file mode 100644 index 0000000000000000000000000000000000000000..2784e15947b24641180ef216de8748320ba1e0a8 --- /dev/null +++ b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/data_load.py @@ -0,0 +1,181 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# -*- coding: utf-8 -*- +#/usr/bin/python3 +''' +Feb. 2019 by kyubyong park. +kbpark.linguist@gmail.com. +https://www.github.com/kyubyong/transformer + +Note. +if safe, entities on the source side have the prefix 1, and the target side 2, for convenience. +For example, fpath1, fpath2 means source file path and target file path, respectively. +''' +import tensorflow as tf +from utils import calc_num_batches + +def load_vocab(vocab_fpath): + '''Loads vocabulary file and returns idx<->token maps + vocab_fpath: string. vocabulary file path. + Note that these are reserved + 0: , 1: , 2: , 3: + + Returns + two dictionaries. + ''' + vocab = [line.split()[0] for line in open(vocab_fpath, 'r').read().splitlines()] + token2idx = {token: idx for idx, token in enumerate(vocab)} + idx2token = {idx: token for idx, token in enumerate(vocab)} + return token2idx, idx2token + +def load_data(fpath1, fpath2, maxlen1, maxlen2): + '''Loads source and target data and filters out too lengthy samples. + fpath1: source file path. string. + fpath2: target file path. string. + maxlen1: source sent maximum length. scalar. + maxlen2: target sent maximum length. scalar. + + Returns + sents1: list of source sents + sents2: list of target sents + ''' + sents1, sents2 = [], [] + with open(fpath1, 'r') as f1, open(fpath2, 'r') as f2: + for sent1, sent2 in zip(f1, f2): + if len(sent1.split()) + 1 > maxlen1: continue # 1: + if len(sent2.split()) + 1 > maxlen2: continue # 1: + sents1.append(sent1.strip()) + sents2.append(sent2.strip()) + return sents1, sents2 + + +def encode(inp, type, dict): + '''Converts string to number. Used for `generator_fn`. + inp: 1d byte array. + type: "x" (source side) or "y" (target side) + dict: token2idx dictionary + + Returns + list of numbers + ''' + inp_str = inp.decode("utf-8") + if type=="x": tokens = inp_str.split() + [""] + else: tokens = [""] + inp_str.split() + [""] + + x = [dict.get(t, dict[""]) for t in tokens] + return x + +def generator_fn(sents1, sents2, vocab_fpath): + '''Generates training / evaluation data + sents1: list of source sents + sents2: list of target sents + vocab_fpath: string. vocabulary file path. + + yields + xs: tuple of + x: list of source token ids in a sent + x_seqlen: int. sequence length of x + sent1: str. raw source (=input) sentence + labels: tuple of + decoder_input: decoder_input: list of encoded decoder inputs + y: list of target token ids in a sent + y_seqlen: int. sequence length of y + sent2: str. target sentence + ''' + token2idx, _ = load_vocab(vocab_fpath) + for sent1, sent2 in zip(sents1, sents2): + x = encode(sent1, "x", token2idx) + y = encode(sent2, "y", token2idx) + decoder_input, y = y[:-1], y[1:] + + x_seqlen, y_seqlen = len(x), len(y) + yield (x, x_seqlen, sent1), (decoder_input, y, y_seqlen, sent2) + +def input_fn(sents1, sents2, vocab_fpath, batch_size, shuffle=False): + '''Batchify data + sents1: list of source sents + sents2: list of target sents + vocab_fpath: string. vocabulary file path. + batch_size: scalar + shuffle: boolean + + Returns + xs: tuple of + x: int32 tensor. (N, T1) + x_seqlens: int32 tensor. (N,) + sents1: str tensor. (N,) + ys: tuple of + decoder_input: int32 tensor. (N, T2) + y: int32 tensor. (N, T2) + y_seqlen: int32 tensor. (N, ) + sents2: str tensor. (N,) + ''' + shapes = (([None], (), ()), + ([None], [None], (), ())) + types = ((tf.int32, tf.int32, tf.string), + (tf.int32, tf.int32, tf.int32, tf.string)) + paddings = ((0, 0, ''), + (0, 0, 0, '')) + + dataset = tf.data.Dataset.from_generator( + generator_fn, + output_shapes=shapes, + output_types=types, + args=(sents1, sents2, vocab_fpath)) # <- arguments for generator_fn. converted to np string arrays + + if shuffle: # for training + dataset = dataset.shuffle(128*batch_size) + + dataset = dataset.repeat() # iterate forever + # dataset = dataset.padded_batch(batch_size, shapes, paddings).prefetch(1) + shapes = (([100], (), ()), + ([100], [100], (), ())) + dataset = dataset.padded_batch(batch_size, shapes, paddings, drop_remainder=True) + + return dataset + +def get_batch(fpath1, fpath2, maxlen1, maxlen2, vocab_fpath, batch_size, shuffle=False): + '''Gets training / evaluation mini-batches + fpath1: source file path. string. + fpath2: target file path. string. + maxlen1: source sent maximum length. scalar. + maxlen2: target sent maximum length. scalar. + vocab_fpath: string. vocabulary file path. + batch_size: scalar + shuffle: boolean + + Returns + batches + num_batches: number of mini-batches + num_samples + ''' + sents1, sents2 = load_data(fpath1, fpath2, maxlen1, maxlen2) + batches = input_fn(sents1, sents2, vocab_fpath, batch_size, shuffle=shuffle) + num_batches = calc_num_batches(len(sents1), batch_size) + return batches, num_batches, len(sents1) diff --git a/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/fusion_switch.cfg b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/fusion_switch.cfg new file mode 100644 index 0000000000000000000000000000000000000000..9260c45253f20249ce9bae172ab885c8ad583098 --- /dev/null +++ b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/fusion_switch.cfg @@ -0,0 +1,10 @@ +{ + "Switch":{ + "GraphFusion":{ + "ALL":"off" + }, + "UBFusion":{ + "ALL":"off" + } + } +} \ No newline at end of file diff --git a/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/hparams.py b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/hparams.py new file mode 100644 index 0000000000000000000000000000000000000000..f2b3fedab15cd24d280fd0b7ff68a24b2c8a76dd --- /dev/null +++ b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/hparams.py @@ -0,0 +1,95 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import os + + +ABSPATH='' +class Hparams: + parser = argparse.ArgumentParser() + + parser.add_argument("--data_url", type=str, default="./dataset") + parser.add_argument("--train_url", type=str, default="./output") + parser.add_argument('--num_gpus', default='') + # prepro + parser.add_argument('--vocab_size', default=37000, type=int)#37000 + + # train + ## files + parser.add_argument('--train1', default=ABSPATH+'iwslt2016/segmented/train.de.bpe', + help="german training segmented data") + parser.add_argument('--train2', default=ABSPATH+'iwslt2016/segmented/train.en.bpe', + help="english training segmented data") + parser.add_argument('--eval1', default=ABSPATH+'iwslt2016/segmented/eval.de.bpe', + help="german evaluation segmented data") + parser.add_argument('--eval2', default=ABSPATH+'iwslt2016/segmented/eval.en.bpe', + help="english evaluation segmented data") + parser.add_argument('--eval3', default=ABSPATH+'iwslt2016/prepro/eval.en', + help="english evaluation unsegmented data") + + ## vocabulary + parser.add_argument('--vocab', default=ABSPATH+'iwslt2016/segmented/bpe.vocab', + help="vocabulary file path") + + # training scheme + parser.add_argument('--batch_size', default=128, type=int)###128 + parser.add_argument('--eval_batch_size', default=128, type=int)###128 + + parser.add_argument('--lr', default=0.0003, type=float, help="learning rate") + parser.add_argument('--warmup_steps', default=4000, type=int)####4000 + parser.add_argument('--logdir', default=ABSPATH+"log/1", help="log directory") + parser.add_argument('--num_epochs', default=1, type=int)###20 + parser.add_argument('--evaldir', default=ABSPATH+"eval/1", help="evaluation dir") + + # model + parser.add_argument('--d_model', default=512, type=int,###512 + help="hidden dimension of encoder/decoder") + parser.add_argument('--d_ff', default=2048, type=int,###2048 + help="hidden dimension of feedforward layer") + parser.add_argument('--num_blocks', default=6, type=int,#6 + help="number of encoder/decoder blocks") + parser.add_argument('--num_heads', default=8, type=int,###8 + help="number of attention heads") + parser.add_argument('--maxlen1', default=100, type=int,###100 + help="maximum length of a source sequence") + parser.add_argument('--maxlen2', default=100, type=int,###100 + help="maximum length of a target sequence") + parser.add_argument('--dropout_rate', default=0.1, type=float)#####0.3 论文是0.1 + parser.add_argument('--smoothing', default=0.1, type=float, + help="label smoothing rate") + + # test + parser.add_argument('--test1', default=ABSPATH+'iwslt2016/segmented/test.de.bpe', + help="german test segmented data") + parser.add_argument('--test2', default=ABSPATH+'iwslt2016/prepro/test.en', + help="english test data") + parser.add_argument('--ckpt', default=ABSPATH+"log/1", + help="checkpoint file path") + parser.add_argument('--test_batch_size', default=128, type=int)###128 + parser.add_argument('--testdir', default=ABSPATH+"test/1", help="test result dir") diff --git a/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/model.py b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/model.py new file mode 100644 index 0000000000000000000000000000000000000000..3fe80d8bde9b23345c4e722b46737aa5d3ad43d4 --- /dev/null +++ b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/model.py @@ -0,0 +1,250 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# -*- coding: utf-8 -*- +# /usr/bin/python3 +''' +Feb. 2019 by kyubyong park. +kbpark.linguist@gmail.com. +https://www.github.com/kyubyong/transformer + +Transformer network +''' +import tensorflow as tf + +from data_load import load_vocab +from modules import get_token_embeddings, ff, positional_encoding, multihead_attention, label_smoothing, noam_scheme +from utils import convert_idx_to_token_tensor +from tqdm import tqdm +import logging +from npu_bridge.estimator.npu import npu_convert_dropout +from npu_bridge.npu_init import * + + +logging.basicConfig(level=logging.INFO) + +class Transformer: + ''' + xs: tuple of + x: int32 tensor. (N, T1) + x_seqlens: int32 tensor. (N,) + sents1: str tensor. (N,) + ys: tuple of + decoder_input: int32 tensor. (N, T2) + y: int32 tensor. (N, T2) + y_seqlen: int32 tensor. (N, ) + sents2: str tensor. (N,) + training: boolean. + ''' + def __init__(self, hp): + self.hp = hp + self.token2idx, self.idx2token = load_vocab(hp.data_url+hp.vocab) + self.embeddings = get_token_embeddings(self.hp.vocab_size, self.hp.d_model, zero_pad=True) + + def encode(self, xs, training=True): + ''' + Returns + memory: encoder outputs. (N, T1, d_model) + ''' + with tf.variable_scope("encoder", reuse=tf.AUTO_REUSE): + x, seqlens, sents1 = xs + # src_masks + src_masks = tf.math.equal(x, 0) # (N, T1) + # embedding + enc = tf.nn.embedding_lookup(self.embeddings, x) # (N, T1, d_model) + enc *= self.hp.d_model**0.5 # scale + + enc += positional_encoding(enc, self.hp.maxlen1) + #enc = tf.layers.dropout(enc, self.hp.dropout_rate, training=training) + #enc = npu_convert_dropout(enc, self.hp.dropout_rate, training=training) + + ## Blocks + for i in range(self.hp.num_blocks): + with tf.variable_scope("num_blocks_{}".format(i), reuse=tf.AUTO_REUSE): + # self-attention + enc = multihead_attention(queries=enc, + keys=enc, + values=enc, + key_masks=src_masks, + num_heads=self.hp.num_heads, + dropout_rate=self.hp.dropout_rate, + training=training, + causality=False) + # feed forward + enc = ff(enc, num_units=[self.hp.d_ff, self.hp.d_model]) + memory = enc + return memory, sents1, src_masks + + def decode(self, ys, memory, src_masks, training=True): + ''' + memory: encoder outputs. (N, T1, d_model) + src_masks: (N, T1) + + Returns + logits: (N, T2, V). float32. + y_hat: (N, T2). int32 + y: (N, T2). int32 + sents2: (N,). string. + ''' + with tf.variable_scope("decoder", reuse=tf.AUTO_REUSE): + decoder_inputs, y, seqlens, sents2 = ys + + # tgt_masks + tgt_masks = tf.math.equal(decoder_inputs, 0) # (N, T2) + + # embedding + dec = tf.nn.embedding_lookup(self.embeddings, decoder_inputs) # (N, T2, d_model) + dec *= self.hp.d_model ** 0.5 # scale + + dec += positional_encoding(dec, self.hp.maxlen2) + #dec = tf.layers.dropout(dec, self.hp.dropout_rate, training=training) + #dec = npu_convert_dropout(dec, self.hp.dropout_rate, training=training) + + # Blocks + for i in range(self.hp.num_blocks): + with tf.variable_scope("num_blocks_{}".format(i), reuse=tf.AUTO_REUSE): + # Masked self-attention (Note that causality is True at this time) + dec = multihead_attention(queries=dec, + keys=dec, + values=dec, + key_masks=tgt_masks, + num_heads=self.hp.num_heads, + dropout_rate=self.hp.dropout_rate, + training=training, + causality=True, + scope="self_attention") + + # Vanilla attention + dec = multihead_attention(queries=dec, + keys=memory, + values=memory, + key_masks=src_masks, + num_heads=self.hp.num_heads, + dropout_rate=self.hp.dropout_rate, + training=training, + causality=False, + scope="vanilla_attention") + ### Feed Forward + dec = ff(dec, num_units=[self.hp.d_ff, self.hp.d_model]) + + # Final linear projection (embedding weights are shared) + weights = tf.transpose(self.embeddings) # (d_model, vocab_size) + logits = tf.einsum('ntd,dk->ntk', dec, weights) # (N, T2, vocab_size) + y_hat = tf.to_int32(tf.argmax(logits, axis=-1)) + + return logits, y_hat, y, sents2 + + def train(self, xs, ys): + ''' + Returns + loss: scalar. + train_op: training operation + global_step: scalar. + summaries: training summary node + ''' + # forward + memory, sents1, src_masks = self.encode(xs) + logits, preds, y, sents2 = self.decode(ys, memory, src_masks) + + # train scheme + y_ = label_smoothing(tf.one_hot(y, depth=self.hp.vocab_size)) + ce = tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=y_) + nonpadding = tf.to_float(tf.not_equal(y, self.token2idx[""])) # 0: + loss = tf.reduce_sum(ce * nonpadding) / (tf.reduce_sum(nonpadding) + 1e-7) + + global_step = tf.train.get_or_create_global_step() + lr = noam_scheme(self.hp.lr, global_step, self.hp.warmup_steps) + optimizer = tf.train.AdamOptimizer(lr) + ##开启lossscale需要关闭 + #train_op = optimizer.minimize(loss, global_step=global_step) + + #lossscale + loss_scale_opt = optimizer + loss_scale_manager = ExponentialUpdateLossScaleManager(init_loss_scale=2 ** 32, incr_every_n_steps=1000, + decr_every_n_nan_or_inf=2, decr_ratio=0.5) + train_op = NPULossScaleOptimizer(loss_scale_opt, loss_scale_manager) + # + train_op= train_op.minimize(loss, global_step=global_step) + + + # self.refine_optim = tf.train.AdamOptimizer(learning_rate=self.refine_lr).minimize(self.rec_loss, + # var_list=refine_var) + # #### + # self.refine_optim = tf.train.AdamOptimizer(learning_rate=self.refine_lr) + # + # loss_scale_opt = self.refine_optim + # loss_scale_manager = ExponentialUpdateLossScaleManager(init_loss_scale=2 ** 32, incr_every_n_steps=1000, + # decr_every_n_nan_or_inf=2, decr_ratio=0.5) + # self.refine_optim = NPULossScaleOptimizer(loss_scale_opt, loss_scale_manager) + # + # self.refine_optim = self.refine_optim.minimize(self.rec_loss, var_list=refine_var) + + # tf.summary.scalar('lr', lr) + # tf.summary.scalar("loss", loss) + # tf.summary.scalar("global_step", global_step) + # + # summaries = tf.summary.merge_all() + + return loss, train_op, global_step + + #return loss, train_op, global_step + + def eval(self, xs, ys): + '''Predicts autoregressively + At inference, input ys is ignored. + Returns + y_hat: (N, T2) + ''' + decoder_inputs, y, y_seqlen, sents2 = ys + + decoder_inputs = tf.ones((tf.shape(xs[0])[0], 1), tf.int32) * self.token2idx[""] + ys = (decoder_inputs, y, y_seqlen, sents2) + + memory, sents1, src_masks = self.encode(xs, False) + + logging.info("Inference graph is being built. Please be patient.") + for _ in tqdm(range(self.hp.maxlen2)): + logits, y_hat, y, sents2 = self.decode(ys, memory, src_masks, False) + if tf.reduce_sum(y_hat, 1) == self.token2idx[""]: break + + _decoder_inputs = tf.concat((decoder_inputs, y_hat), 1) + ys = (_decoder_inputs, y, y_seqlen, sents2) + + # monitor a random sample + #n = tf.random_uniform((), 0, tf.shape(y_hat)[0]-1, tf.int32) + # sent1 = sents1[n] + # pred = convert_idx_to_token_tensor(y_hat[n], self.idx2token) + # sent2 = sents2[n] + + # tf.summary.text("sent1", sent1) + # tf.summary.text("pred", pred) + # tf.summary.text("sent2", sent2) + # summaries = tf.summary.merge_all() + + return y_hat + diff --git a/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/modelarts_entry_acc.py b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/modelarts_entry_acc.py new file mode 100644 index 0000000000000000000000000000000000000000..1245d57140da14a640cd6dd7d9d43100b8617b67 --- /dev/null +++ b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/modelarts_entry_acc.py @@ -0,0 +1,63 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import argparse +import sys + +# 解析输入参数data_url +parser = argparse.ArgumentParser() +parser.add_argument("--data_url", type=str, default="/home/ma-user/modelarts/inputs/data_url_0") +parser.add_argument("--train_url", type=str, default="/home/ma-user/modelarts/outputs/train_url_0/") +config = parser.parse_args() + +print("[CANN-Modelzoo] code_dir path is [%s]" % (sys.path[0])) +code_dir = sys.path[0] +os.chdir(code_dir) +print("[CANN-Modelzoo] work_dir path is [%s]" % (os.getcwd())) + +print("[CANN-Modelzoo] before train - list my run files:") +os.system("ls -al /usr/local/Ascend/ascend-toolkit/") + +print("[CANN-Modelzoo] before train - list my dataset files:") +os.system("ls -al %s" % config.data_url) + +print("[CANN-Modelzoo] start run train shell") +# 设置sh文件格式为linux可执行 +os.system("dos2unix ./test/*") + +# 执行train_full_1p.sh或者train_performance_1p.sh,需要用户自己指定 +# full和performance的差异,performance只需要执行很少的step,控制在15分钟以内,主要关注性能FPS +os.system("bash ./test/train_full_1p.sh --data_path=%s --output_path=%s " % (config.data_url, config.train_url)) + +print("[CANN-Modelzoo] finish run train shell") + +# 将当前执行目录所有文件拷贝到obs的output进行备份 +print("[CANN-Modelzoo] after train - list my output files:") +os.system("cp -r %s %s " % (code_dir, config.train_url)) +os.system("ls -al %s" % config.train_url) diff --git a/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/modelarts_entry_perf.py b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/modelarts_entry_perf.py new file mode 100644 index 0000000000000000000000000000000000000000..e2d23455d4cdec2d46fc273177a247905c751b73 --- /dev/null +++ b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/modelarts_entry_perf.py @@ -0,0 +1,63 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import argparse +import sys + +# 解析输入参数data_url +parser = argparse.ArgumentParser() +parser.add_argument("--data_url", type=str, default="/home/ma-user/modelarts/inputs/data_url_0") +parser.add_argument("--train_url", type=str, default="/home/ma-user/modelarts/outputs/train_url_0/") +config = parser.parse_args() + +print("[CANN-Modelzoo] code_dir path is [%s]" % (sys.path[0])) +code_dir = sys.path[0] +os.chdir(code_dir) +print("[CANN-Modelzoo] work_dir path is [%s]" % (os.getcwd())) + +print("[CANN-Modelzoo] before train - list my run files:") +os.system("ls -al /usr/local/Ascend/ascend-toolkit/") + +print("[CANN-Modelzoo] before train - list my dataset files:") +os.system("ls -al %s" % config.data_url) + +print("[CANN-Modelzoo] start run train shell") +# 设置sh文件格式为linux可执行 +os.system("dos2unix ./test/*") + +# 执行train_full_1p.sh或者train_performance_1p.sh,需要用户自己指定 +# full和performance的差异,performance只需要执行很少的step,控制在15分钟以内,主要关注性能FPS +os.system("bash ./test/train_performance_1p.sh --data_path=%s --output_path=%s " % (config.data_url, config.train_url)) + +print("[CANN-Modelzoo] finish run train shell") + +# 将当前执行目录所有文件拷贝到obs的output进行备份 +print("[CANN-Modelzoo] after train - list my output files:") +os.system("cp -r %s %s " % (code_dir, config.train_url)) +os.system("ls -al %s" % config.train_url) diff --git a/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/modelzoo_level.txt b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/modelzoo_level.txt new file mode 100644 index 0000000000000000000000000000000000000000..55a9add9fa74832ca908108d73946cd76281a9cd --- /dev/null +++ b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/modelzoo_level.txt @@ -0,0 +1,3 @@ +FuncStatus:OK +PerfStatus:OK +PrecisionStatus:POK \ No newline at end of file diff --git a/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/modules.py b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/modules.py new file mode 100644 index 0000000000000000000000000000000000000000..77f4290aefe9e7e465b1302cd302f4998354fcc3 --- /dev/null +++ b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/modules.py @@ -0,0 +1,337 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# -*- coding: utf-8 -*- +#/usr/bin/python3 +''' +Feb. 2019 by kyubyong park. +kbpark.linguist@gmail.com. +https://www.github.com/kyubyong/transformer. + +Building blocks for Transformer +''' + +import numpy as np +import tensorflow as tf +from npu_bridge.estimator.npu import npu_convert_dropout + +def ln(inputs, epsilon = 1e-8, scope="ln"): + '''Applies layer normalization. See https://arxiv.org/abs/1607.06450. + inputs: A tensor with 2 or more dimensions, where the first dimension has `batch_size`. + epsilon: A floating number. A very small number for preventing ZeroDivision Error. + scope: Optional scope for `variable_scope`. + + Returns: + A tensor with the same shape and data dtype as `inputs`. + ''' + with tf.variable_scope(scope, reuse=tf.AUTO_REUSE): + inputs_shape = inputs.get_shape() + params_shape = inputs_shape[-1:] + + mean, variance = tf.nn.moments(inputs, [-1], keep_dims=True) + beta= tf.get_variable("beta", params_shape, initializer=tf.zeros_initializer()) + gamma = tf.get_variable("gamma", params_shape, initializer=tf.ones_initializer()) + normalized = (inputs - mean) / ( (variance + epsilon) ** (.5) ) + outputs = gamma * normalized + beta + + return outputs + +def get_token_embeddings(vocab_size, num_units, zero_pad=True): + '''Constructs token embedding matrix. + Note that the column of index 0's are set to zeros. + vocab_size: scalar. V. + num_units: embedding dimensionalty. E. + zero_pad: Boolean. If True, all the values of the first row (id = 0) should be constant zero + To apply query/key masks easily, zero pad is turned on. + + Returns + weight variable: (V, E) + ''' + with tf.variable_scope("shared_weight_matrix"): + embeddings = tf.get_variable('weight_mat', + dtype=tf.float32, + shape=(vocab_size, num_units), + initializer=tf.contrib.layers.xavier_initializer()) + if zero_pad: + embeddings = tf.concat((tf.zeros(shape=[1, num_units]), + embeddings[1:, :]), 0) + return embeddings + +def scaled_dot_product_attention(Q, K, V, key_masks, + causality=False, dropout_rate=0., + training=True, + scope="scaled_dot_product_attention"): + '''See 3.2.1. + Q: Packed queries. 3d tensor. [N, T_q, d_k]. + K: Packed keys. 3d tensor. [N, T_k, d_k]. + V: Packed values. 3d tensor. [N, T_k, d_v]. + key_masks: A 2d tensor with shape of [N, key_seqlen] + causality: If True, applies masking for future blinding + dropout_rate: A floating point number of [0, 1]. + training: boolean for controlling droput + scope: Optional scope for `variable_scope`. + ''' + with tf.variable_scope(scope, reuse=tf.AUTO_REUSE): + d_k = Q.get_shape().as_list()[-1] + + # dot product + outputs = tf.matmul(Q, tf.transpose(K, [0, 2, 1])) # (N, T_q, T_k) + + # scale + outputs /= d_k ** 0.5 + + # key masking + outputs = mask(outputs, key_masks=key_masks, type="key") + + # causality or future blinding masking + if causality: + outputs = mask(outputs, type="future") + + # softmax + outputs = tf.nn.softmax(outputs) + attention = tf.transpose(outputs, [0, 2, 1]) + # tf.summary.image("attention", tf.expand_dims(attention[:1], -1)) + + # # query masking + # outputs = mask(outputs, Q, K, type="query") + + # dropout + #outputs = tf.layers.dropout(outputs, rate=dropout_rate, training=training) + #outputs = npu_convert_dropout(outputs, rate=dropout_rate, training=training) + #outputs = npu_convert_dropout(outputs, rate=dropout_rate) + + # weighted sum (context vectors) + outputs = tf.matmul(outputs, V) # (N, T_q, d_v) + + return outputs + + +def mask(inputs, key_masks=None, type=None): + """Masks paddings on keys or queries to inputs + inputs: 3d tensor. (h*N, T_q, T_k) + key_masks: 3d tensor. (N, 1, T_k) + type: string. "key" | "future" + + e.g., + >> inputs = tf.zeros([2, 2, 3], dtype=tf.float32) + >> key_masks = tf.constant([[0., 0., 1.], + [0., 1., 1.]]) + >> mask(inputs, key_masks=key_masks, type="key") + array([[[ 0.0000000e+00, 0.0000000e+00, -4.2949673e+09], + [ 0.0000000e+00, 0.0000000e+00, -4.2949673e+09]], + + [[ 0.0000000e+00, -4.2949673e+09, -4.2949673e+09], + [ 0.0000000e+00, -4.2949673e+09, -4.2949673e+09]], + + [[ 0.0000000e+00, 0.0000000e+00, -4.2949673e+09], + [ 0.0000000e+00, 0.0000000e+00, -4.2949673e+09]], + + [[ 0.0000000e+00, -4.2949673e+09, -4.2949673e+09], + [ 0.0000000e+00, -4.2949673e+09, -4.2949673e+09]]], dtype=float32) + """ + padding_num = -2 ** 32 + 1 + if type in ("k", "key", "keys"): + key_masks = tf.to_float(key_masks) + key_masks = tf.tile(key_masks, [tf.shape(inputs)[0] // tf.shape(key_masks)[0], 1]) # (h*N, seqlen) + key_masks = tf.expand_dims(key_masks, 1) # (h*N, 1, seqlen) + outputs = inputs + key_masks * padding_num + # elif type in ("q", "query", "queries"): + # # Generate masks + # masks = tf.sign(tf.reduce_sum(tf.abs(queries), axis=-1)) # (N, T_q) + # masks = tf.expand_dims(masks, -1) # (N, T_q, 1) + # masks = tf.tile(masks, [1, 1, tf.shape(keys)[1]]) # (N, T_q, T_k) + # + # # Apply masks to inputs + # outputs = inputs*masks + elif type in ("f", "future", "right"): + diag_vals = tf.ones_like(inputs[0, :, :]) # (T_q, T_k) + tril = tf.linalg.LinearOperatorLowerTriangular(diag_vals).to_dense() # (T_q, T_k) + future_masks = tf.tile(tf.expand_dims(tril, 0), [tf.shape(inputs)[0], 1, 1]) # (N, T_q, T_k) + + paddings = tf.ones_like(future_masks) * padding_num + outputs = tf.where(tf.equal(future_masks, 0), paddings, inputs) + else: + print("Check if you entered type correctly!") + + return outputs + + +def multihead_attention(queries, keys, values, key_masks, + num_heads=8, + dropout_rate=0, + training=True, + causality=False, + scope="multihead_attention"): + '''Applies multihead attention. See 3.2.2 + queries: A 3d tensor with shape of [N, T_q, d_model]. + keys: A 3d tensor with shape of [N, T_k, d_model]. + values: A 3d tensor with shape of [N, T_k, d_model]. + key_masks: A 2d tensor with shape of [N, key_seqlen] + num_heads: An int. Number of heads. + dropout_rate: A floating point number. + training: Boolean. Controller of mechanism for dropout. + causality: Boolean. If true, units that reference the future are masked. + scope: Optional scope for `variable_scope`. + + Returns + A 3d tensor with shape of (N, T_q, C) + ''' + d_model = queries.get_shape().as_list()[-1] + with tf.variable_scope(scope, reuse=tf.AUTO_REUSE): + # Linear projections + Q = tf.layers.dense(queries, d_model, use_bias=True) # (N, T_q, d_model) + K = tf.layers.dense(keys, d_model, use_bias=True) # (N, T_k, d_model) + V = tf.layers.dense(values, d_model, use_bias=True) # (N, T_k, d_model) + + # Split and concat + Q_ = tf.concat(tf.split(Q, num_heads, axis=2), axis=0) # (h*N, T_q, d_model/h) + K_ = tf.concat(tf.split(K, num_heads, axis=2), axis=0) # (h*N, T_k, d_model/h) + V_ = tf.concat(tf.split(V, num_heads, axis=2), axis=0) # (h*N, T_k, d_model/h) + + # Attention + outputs = scaled_dot_product_attention(Q_, K_, V_, key_masks, causality, dropout_rate, training) + + # Restore shape + outputs = tf.concat(tf.split(outputs, num_heads, axis=0), axis=2 ) # (N, T_q, d_model) + + # Residual connection + outputs += queries + + # Normalize + outputs = ln(outputs) + + return outputs + +def ff(inputs, num_units, scope="positionwise_feedforward"): + '''position-wise feed forward net. See 3.3 + + inputs: A 3d tensor with shape of [N, T, C]. + num_units: A list of two integers. + scope: Optional scope for `variable_scope`. + + Returns: + A 3d tensor with the same shape and dtype as inputs + ''' + with tf.variable_scope(scope, reuse=tf.AUTO_REUSE): + # Inner layer + outputs = tf.layers.dense(inputs, num_units[0], activation=tf.nn.relu) + + # Outer layer + outputs = tf.layers.dense(outputs, num_units[1]) + + # Residual connection + outputs += inputs + + # Normalize + outputs = ln(outputs) + + return outputs + +def label_smoothing(inputs, epsilon=0.1): + '''Applies label smoothing. See 5.4 and https://arxiv.org/abs/1512.00567. + inputs: 3d tensor. [N, T, V], where V is the number of vocabulary. + epsilon: Smoothing rate. + + For example, + + ``` + import tensorflow as tf + inputs = tf.convert_to_tensor([[[0, 0, 1], + [0, 1, 0], + [1, 0, 0]], + + [[1, 0, 0], + [1, 0, 0], + [0, 1, 0]]], tf.float32) + + outputs = label_smoothing(inputs) + + with tf.Session() as sess: + print(sess.run([outputs])) + + >> + [array([[[ 0.03333334, 0.03333334, 0.93333334], + [ 0.03333334, 0.93333334, 0.03333334], + [ 0.93333334, 0.03333334, 0.03333334]], + + [[ 0.93333334, 0.03333334, 0.03333334], + [ 0.93333334, 0.03333334, 0.03333334], + [ 0.03333334, 0.93333334, 0.03333334]]], dtype=float32)] + ``` + ''' + V = inputs.get_shape().as_list()[-1] # number of channels + return ((1-epsilon) * inputs) + (epsilon / V) + +def positional_encoding(inputs, + maxlen, + masking=True, + scope="positional_encoding"): + '''Sinusoidal Positional_Encoding. See 3.5 + inputs: 3d tensor. (N, T, E) + maxlen: scalar. Must be >= T + masking: Boolean. If True, padding positions are set to zeros. + scope: Optional scope for `variable_scope`. + + returns + 3d tensor that has the same shape as inputs. + ''' + + E = inputs.get_shape().as_list()[-1] # static + N, T = tf.shape(inputs)[0], tf.shape(inputs)[1] # dynamic + with tf.variable_scope(scope, reuse=tf.AUTO_REUSE): + # position indices + position_ind = tf.tile(tf.expand_dims(tf.range(T), 0), [N, 1]) # (N, T) + + # First part of the PE function: sin and cos argument + position_enc = np.array([ + [pos / np.power(10000, (i-i%2)/E) for i in range(E)] + for pos in range(maxlen)]) + + # Second part, apply the cosine to even columns and sin to odds. + position_enc[:, 0::2] = np.sin(position_enc[:, 0::2]) # dim 2i + position_enc[:, 1::2] = np.cos(position_enc[:, 1::2]) # dim 2i+1 + position_enc = tf.convert_to_tensor(position_enc, tf.float32) # (maxlen, E) + + # lookup + outputs = tf.nn.embedding_lookup(position_enc, position_ind) + + # masks + if masking: + outputs = tf.where(tf.equal(inputs, 0), inputs, outputs) + + return tf.to_float(outputs) + +def noam_scheme(init_lr, global_step, warmup_steps=4000.): + '''Noam scheme learning rate decay + init_lr: initial learning rate. scalar. + global_step: scalar. + warmup_steps: scalar. During warmup_steps, learning rate increases + until it reaches init_lr. + ''' + step = tf.cast(global_step + 1, dtype=tf.float32) + return init_lr * warmup_steps ** 0.5 * tf.minimum(step * warmup_steps ** -1.5, step ** -0.5) \ No newline at end of file diff --git a/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/multi-bleu.perl b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/multi-bleu.perl new file mode 100644 index 0000000000000000000000000000000000000000..809879142776e1df62d689e7675648101d2f9a91 --- /dev/null +++ b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/multi-bleu.perl @@ -0,0 +1,165 @@ +#!/usr/bin/perl -w + +use strict; + +if (!scalar(@ARGV)) { + print STDERR "Syntax: multi-bleu.perl [-length_analysis bucket] [ref-stem] < [system-output] +If one reference translation: ref-stem is filename +If multiple reference translations: ref-stem[0,1,2,...] is filename\n"; +} + +my $length_analysis; +if ($ARGV[0] eq '-length_analysis') { + shift @ARGV; + $length_analysis = shift @ARGV; +} + +my @CORRECT_BUCKET; +my @TOTAL_BUCKET; +my @COUNT_LENGTH; +my $max_bucket=0; + +my $stem = $ARGV[0]; +my @REF; +my $ref=0; +while(-e "$stem$ref") { + &add_to_ref("$stem$ref",\@REF); + $ref++; +} +&add_to_ref($stem,\@REF) if -e $stem; +die("did not find any reference translations at $stem") unless scalar @REF; + +sub add_to_ref { + my ($file,$REF) = @_; + my $s=0; + open(REF,$file); + while() { + chop; + push @{$$REF[$s++]}, $_; + } + close(REF); +} + +my(@CORRECT,@TOTAL,$length_translation,$length_reference); +my $s=0; +while() { + chop; + my @WORD = split; + my %REF_NGRAM = (); + my $length_translation_this_sentence = scalar(@WORD); + my ($closest_diff,$closest_length) = (9999,9999); + my $bucket; + foreach my $reference (@{$REF[$s]}) { +# print "$s $_ <=> $reference\n"; + my @WORD = split(/ /,$reference); + my $length = scalar(@WORD); + if ($length_analysis) { + $bucket = int($length/$length_analysis); + $max_bucket=$bucket if ($bucket>$max_bucket); + } + if (abs($length_translation_this_sentence-$length) < $closest_diff) { + $closest_diff = abs($length_translation_this_sentence-$length); + $closest_length = $length; +# print "$i: closest diff = abs($length_translation_this_sentence-$length)
\n"; + } + for(my $n=1;$n<=4;$n++) { + my %REF_NGRAM_N = (); + for(my $start=0;$start<=$#WORD-($n-1);$start++) { + my $ngram = "$n"; + for(my $w=0;$w<$n;$w++) { + $ngram .= " ".$WORD[$start+$w]; + } + $REF_NGRAM_N{$ngram}++; + } + foreach my $ngram (keys %REF_NGRAM_N) { + if (!defined($REF_NGRAM{$ngram}) || + $REF_NGRAM{$ngram} < $REF_NGRAM_N{$ngram}) { + $REF_NGRAM{$ngram} = $REF_NGRAM_N{$ngram}; +# print "$i: REF_NGRAM{$ngram} = $REF_NGRAM{$ngram}
\n"; + } + } + } + } + if ($bucket) { + $COUNT_LENGTH[$bucket]++; + } + $length_translation += $length_translation_this_sentence; + $length_reference += $closest_length; + for(my $n=1;$n<=4;$n++) { + my %T_NGRAM = (); + for(my $start=0;$start<=$#WORD-($n-1);$start++) { + my $ngram = "$n"; + for(my $w=0;$w<$n;$w++) { + $ngram .= " ".$WORD[$start+$w]; + } + $T_NGRAM{$ngram}++; + } + foreach my $ngram (keys %T_NGRAM) { + $ngram =~ /^(\d+) /; + my $n = $1; +# print "$i e $ngram $T_NGRAM{$ngram}
\n"; + $TOTAL[$n] += $T_NGRAM{$ngram}; + if ($bucket) { + $TOTAL_BUCKET[$bucket][$n] += $T_NGRAM{$ngram}; + } + if (defined($REF_NGRAM{$ngram})) { + if ($REF_NGRAM{$ngram} >= $T_NGRAM{$ngram}) { + if ($bucket) { + $CORRECT_BUCKET[$bucket][$n] += $T_NGRAM{$ngram}; + } + $CORRECT[$n] += $T_NGRAM{$ngram}; +# print "$i e correct1 $T_NGRAM{$ngram}
\n"; + } + else { + if ($bucket) { + $CORRECT_BUCKET[$bucket][$n] += $REF_NGRAM{$ngram}; + } + $CORRECT[$n] += $REF_NGRAM{$ngram}; +# print "$i e correct2 $REF_NGRAM{$ngram}
\n"; + } + } + } + } + $s++; +} +my $brevity_penalty = 1; +if ($length_translation<$length_reference) { + $brevity_penalty = exp(1-$length_reference/$length_translation); +} +my $bleu = $brevity_penalty * exp((my_log( $CORRECT[1]/$TOTAL[1] ) + + my_log( $CORRECT[2]/$TOTAL[2] ) + + my_log( $CORRECT[3]/$TOTAL[3] ) + + my_log( $CORRECT[4]/$TOTAL[4] ) ) / 4); + +printf "BLEU = %.2f, %.1f/%.1f/%.1f/%.1f (BP=%.3f, ration=%.3f)\n", + 100*$bleu, + 100*$CORRECT[1]/$TOTAL[1], + 100*$CORRECT[2]/$TOTAL[2], + 100*$CORRECT[3]/$TOTAL[3], + 100*$CORRECT[4]/$TOTAL[4], + $brevity_penalty, + $length_translation / $length_reference; + +if ($length_analysis) { + print "\nLENGTH ANALYSIS:\n"; + for(my $b=int(1/$length_analysis); $b<=$max_bucket; $b++) { + my $range=$b; + if ($length_analysis != 1) { + $range=($b*$length_analysis+1)."-".(($b+1)*$length_analysis); + } + print "$range";; + if ($TOTAL_BUCKET[$b] && $TOTAL_BUCKET[$b][4] && $CORRECT_BUCKET[$b][4]) { + printf "\t%d\t%.2f", $COUNT_LENGTH[$b], + 100*$brevity_penalty * exp((my_log( $CORRECT_BUCKET[$b][1]/$TOTAL_BUCKET[$b][1] ) + + my_log( $CORRECT_BUCKET[$b][2]/$TOTAL_BUCKET[$b][2] ) + + my_log( $CORRECT_BUCKET[$b][3]/$TOTAL_BUCKET[$b][3] ) + + my_log( $CORRECT_BUCKET[$b][4]/$TOTAL_BUCKET[$b][4] ) ) / 4); + } + print "\n"; + } +} + +sub my_log { + return -9999999999 unless $_[0]; + return log($_[0]); +} diff --git a/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/npu_train.sh b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/npu_train.sh new file mode 100644 index 0000000000000000000000000000000000000000..1cbd6f15c3c2e58345e5ad28ecd8fff3672545d6 --- /dev/null +++ b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/npu_train.sh @@ -0,0 +1,73 @@ +#!/bin/bash + +code_dir=$1 +work_dir=$2 +dataset_path=$3 +output_path=$4 + +#############训练前输入目录文件确认######################### +echo "[CANN-ZhongZhi] before train - list my run files[/usr/local/Ascend/ascend-toolkit]:" +ls -al /usr/local/Ascend/ascend-toolkit +echo "" + +echo "[CANN-ZhongZhi] before train - list my code files[${code_dir}]:" +ls -al ${code_dir} +echo "" + +echo "[CANN-ZhongZhi] before train - list my work files[${work_dir}]:" +ls -al ${work_dir} +echo "" + +echo "[CANN-ZhongZhi] before train - list my dataset files[${dataset_path}]:" +ls -al ${dataset_path} +echo "" + +echo "[CANN-ZhongZhi] before train - list my output files[${output_path}]:" +ls -al ${output_path} +echo "" + +######环境变量修改###### +###如果需要修改环境变量的,在此处修改 +###搭配最大内存使用 +#echo "GE_USE_STATIC_MEMORY ${GE_USE_STATIC_MEMORY}" +#echo $GE_USE_STATIC_MEMORY +#echo "GE_USE_STATIC_MEMORY" +#export GE_USE_STATIC_MEMORY=1 +#echo "GE_USE_STATIC_MEMORY ${GE_USE_STATIC_MEMORY}" +#echo $GE_USE_STATIC_MEMORY +#echo "GE_USE_STATIC_MEMORY" + + +##接口老哥提示打开 +echo "ENABLE_FORCE_V2_CONTROL ${GE_USE_STATIC_MEMORY}" +export ENABLE_FORCE_V2_CONTROL=1 +echo "ENABLE_FORCE_V2_CONTROL ${GE_USE_STATIC_MEMORY}" +#设置日志级别为info +#export ASCEND_GLOBAL_LOG_LEVEL=1 +#设置日志打屏到屏幕 +#export ASCEND_SLOG_PRINT_TO_STDOUT=1 +#export TF_CPP_MIN_LOG_LEVEL=0 +env > ${output_path}/my_env.log + +######训练执行###### +###此处每个网络执行命令不同,需要修改 +python3.7 ${code_dir}/train621V.py --data_url=${dataset_path} --train_url=${output_path} +if [ $? -eq 0 ]; +then + echo "[CANN-ZhongZhi] train return success" +else + echo "[CANN-ZhongZhi] train return failed" +fi + +######训练后把需要备份的内容保存到output_path###### +###此处每个网络不同,视情况添加cp +cp -r ${work_dir} ${output_path} + +######训练后输出目录文件确认###### +echo "[CANN-ZhongZhi] after train - list my work files[${work_dir}]:" +ls -al ${work_dir} +echo "" + +echo "[CANN-ZhongZhi] after train - list my output files[${output_path}]:" +ls -al ${output_path} +echo "" diff --git a/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/ops_info.json b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/ops_info.json new file mode 100644 index 0000000000000000000000000000000000000000..d729df649d913c80f1a942bc7b75378829a3ddd4 --- /dev/null +++ b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/ops_info.json @@ -0,0 +1,5 @@ +{ + "black-list": { + "to-add": ["Assign","MatMulV2","Cast","Mul","ReduceSumD","SoftmaxV2","BatchMatMul"] + } +} \ No newline at end of file diff --git a/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/preproNew.py b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/preproNew.py new file mode 100644 index 0000000000000000000000000000000000000000..b2a893aed66a82a6b726bc1e97dd9efc8a4db626 --- /dev/null +++ b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/preproNew.py @@ -0,0 +1,153 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# -*- coding: utf-8 -*- +#/usr/bin/python3 +''' +Feb. 2019 by kyubyong park. +kbpark.linguist@gmail.com. +https://www.github.com/kyubyong/transformer. + +Preprocess the iwslt 2016 datasets. +''' + +import os +import errno +import sentencepiece as spm +import re +from hparams import Hparams +import logging +###因为换了数据集,他的vocabsize可能不一样了 +logging.basicConfig(level=logging.INFO) + +def prepro(hp): + """Load raw data -> Preprocessing -> Segmenting with sentencepice + hp: hyperparams. argparse. + """ + logging.info("# Check if raw files exist") + train1 = "iwslt2016/prepro/train.de" + train2 = "iwslt2016/prepro/train.en" + eval1 = "iwslt2016/prepro/eval.de" + eval2 = "iwslt2016/prepro/eval.en" + test1 = "iwslt2016/prepro/test.de" + test2 = "iwslt2016/prepro/test.en" + for f in (train1, train2, eval1, eval2, test1, test2): + if not os.path.isfile(f): + raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), f) + + logging.info("# Preprocessing") + # train + _prepro = lambda x: [line.strip() for line in open(x, 'r', encoding='UTF-8').read().split("\n") \ + if not line.startswith("<")] + prepro_train1, prepro_train2 = _prepro(train1), _prepro(train2) + assert len(prepro_train1)==len(prepro_train2), "Check if train source and target files match." + + # eval + # _prepro = lambda x: [re.sub("<[^>]+>", "", line).strip() \ + # for line in open(x, 'r', encoding='UTF-8').read().split("\n") \ + # if line.startswith("=1.12.0 +numpy>=1.15.4 +sentencepiece==0.1.8 +tqdm>=4.28.1 \ No newline at end of file diff --git a/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/switch_config.txt b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/switch_config.txt new file mode 100644 index 0000000000000000000000000000000000000000..ef3971b4770d1cb6b1799a2ea00032ba992d3579 --- /dev/null +++ b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/switch_config.txt @@ -0,0 +1,3 @@ +OpType::MatMulV2:InputDtype:float16,float16,float32,OutputDtype:float32 +OpType::BatchMatMul:InputDtype:float16,float16,OutputDtype:float32 +OpType::BatchMatMulV2:InputDtype:float16,float16,OutputDtype:float32 \ No newline at end of file diff --git a/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/test/train_full_1p.sh b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/test/train_full_1p.sh new file mode 100644 index 0000000000000000000000000000000000000000..0f0a73eb88acfbdae65dcf01203900ea0fe404c7 --- /dev/null +++ b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/test/train_full_1p.sh @@ -0,0 +1,157 @@ +#!/bin/bash + +########################################################## +#########第3行 至 90行,请一定不要、不要、不要修改########## +#########第3行 至 90行,请一定不要、不要、不要修改########## +#########第3行 至 90行,请一定不要、不要、不要修改########## +########################################################## +# shell脚本所在路径 +cur_path=`echo $(cd $(dirname $0);pwd)` + +# 判断当前shell是否是performance +perf_flag=`echo $0 | grep performance | wc -l` + +# 当前执行网络的名称 +Network=`echo $(cd $(dirname $0);pwd) | awk -F"/" '{print $(NF-1)}'` + +export RANK_SIZE=1 +export RANK_ID=0 +export JOB_ID=10087 + +# 路径参数初始化 +data_path="" +output_path="" + +# 帮助信息,不需要修改 +if [[ $1 == --help || $1 == -h ]];then + echo"usage:./train_performance_1P.sh " + echo " " + echo "parameter explain: + --data_path # dataset of training + --output_path # output of training + --train_steps # max_step for training + --train_epochs # max_epoch for training + --batch_size # batch size + -h/--help show help message + " + exit 1 +fi + +# 参数校验,不需要修改 +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --output_path* ]];then + output_path=`echo ${para#*=}` + elif [[ $para == --train_steps* ]];then + train_steps=`echo ${para#*=}` + elif [[ $para == --train_epochs* ]];then + train_epochs=`echo ${para#*=}` + elif [[ $para == --batch_size* ]];then + batch_size=`echo ${para#*=}` + fi +done + +# 校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be config" + exit 1 +fi + +# 校验是否传入output_path,不需要修改 +if [[ $output_path == "" ]];then + output_path="./test/output/${ASCEND_DEVICE_ID}" +fi + +CaseName="" +function get_casename() +{ + if [ x"${perf_flag}" = x1 ]; + then + CaseName=${Network}_bs${batch_size}_${RANK_SIZE}'p'_'perf' + else + CaseName=${Network}_bs${batch_size}_${RANK_SIZE}'p'_'acc' + fi +} + +# 跳转到code目录 +cd ${cur_path}/../ +rm -rf ./test/output/${ASCEND_DEVICE_ID} +mkdir -p ./test/output/${ASCEND_DEVICE_ID} + +# 训练开始时间记录,不需要修改 +start_time=$(date +%s) +########################################################## +#########第3行 至 90行,请一定不要、不要、不要修改########## +#########第3行 至 90行,请一定不要、不要、不要修改########## +#########第3行 至 90行,请一定不要、不要、不要修改########## +########################################################## + +#========================================================= +#========================================================= +#========训练执行命令,需要根据您的网络进行修改============== +#========================================================= +#========================================================= +# 基础参数,需要模型审视修改 +# 您的训练数据集在${data_path}路径下,请直接使用这个变量获取 +# 您的训练输出目录在${output_path}路径下,请直接使用这个变量获取 +# 您的其他基础参数,可以自定义增加,但是batch_size请保留,并且设置正确的值 +batch_size=128 + +# 设置打屏日志文件名,请保留,文件名为${print_log} +print_log="./test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log" + +python3.7 ./train.py --data_url=${data_path} --train_url=${output_path} 1>${print_log} 2>&1 + +# 性能相关数据计算 +StepTime=`grep "sec/step :" ${print_log} | tail -n 10 | awk '{print $NF}' | awk '{sum+=$1} END {print sum/NR}'` +FPS=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'/'${StepTime}'}'` + +# 精度相关数据计算 +train_accuracy=`grep "Final Accuracy accuracy" ${print_log} | awk '{print $NF}'` +# 提取所有loss打印信息 +grep "loss :" ${print_log} | awk -F ":" '{print $4}' | awk -F "-" '{print $1}' > ./test/output/${ASCEND_DEVICE_ID}/my_output_loss.txt + +########################################################### +#########后面的所有内容请不要修改########################### +#########后面的所有内容请不要修改########################### +#########后面的所有内容请不要修改########################### +########################################################### + +# 获取最终的casename,请保留,case文件名为${CaseName} +get_casename + +# 重命名loss文件 +if [ -f ./test/output/${ASCEND_DEVICE_ID}/my_output_loss.txt ]; +then + mv ./test/output/${ASCEND_DEVICE_ID}/my_output_loss.txt ./test/output/${ASCEND_DEVICE_ID}/${CaseName}_loss.txt +fi + +# 训练端到端耗时 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +echo "------------------ Final result ------------------" +# 输出性能FPS/单step耗时/端到端耗时 +echo "Final Performance images/sec : $FPS" +echo "Final Performance sec/step : $StepTime" +echo "E2E Training Duration sec : $e2e_time" + +# 输出训练精度 +echo "Final Train Accuracy : ${train_accuracy}" + +# 最后一个迭代loss值,不需要修改 +ActualLoss=(`awk 'END {print $NF}' ./test/output/${ASCEND_DEVICE_ID}/my_output_loss.txt`) + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${batch_size}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = `uname -m`" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${FPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${StepTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file diff --git a/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/test/train_performance_1p.sh b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/test/train_performance_1p.sh new file mode 100644 index 0000000000000000000000000000000000000000..e86eb215ba0c2d1f89981809bd0e925605d830bd --- /dev/null +++ b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/test/train_performance_1p.sh @@ -0,0 +1,158 @@ +#!/bin/bash + +########################################################## +#########第3行 至 90行,请一定不要、不要、不要修改########## +#########第3行 至 90行,请一定不要、不要、不要修改########## +#########第3行 至 90行,请一定不要、不要、不要修改########## +########################################################## +# shell脚本所在路径 +cur_path=`echo $(cd $(dirname $0);pwd)` + +# 判断当前shell是否是performance +perf_flag=`echo $0 | grep performance | wc -l` + +# 当前执行网络的名称 +Network=`echo $(cd $(dirname $0);pwd) | awk -F"/" '{print $(NF-1)}'` + +export RANK_SIZE=1 +export RANK_ID=0 +export JOB_ID=10087 + +# 路径参数初始化 +data_path="" +output_path="" + +# 帮助信息,不需要修改 +if [[ $1 == --help || $1 == -h ]];then + echo"usage:./train_performance_1P.sh " + echo " " + echo "parameter explain: + --data_path # dataset of training + --output_path # output of training + --train_steps # max_step for training + --train_epochs # max_epoch for training + --batch_size # batch size + -h/--help show help message + " + exit 1 +fi + +# 参数校验,不需要修改 +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --output_path* ]];then + output_path=`echo ${para#*=}` + elif [[ $para == --train_steps* ]];then + train_steps=`echo ${para#*=}` + elif [[ $para == --train_epochs* ]];then + train_epochs=`echo ${para#*=}` + elif [[ $para == --batch_size* ]];then + batch_size=`echo ${para#*=}` + fi +done + +# 校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be config" + exit 1 +fi + +# 校验是否传入output_path,不需要修改 +if [[ $output_path == "" ]];then + output_path="./test/output/${ASCEND_DEVICE_ID}" +fi + +CaseName="" +function get_casename() +{ + if [ x"${perf_flag}" = x1 ]; + then + CaseName=${Network}_bs${batch_size}_${RANK_SIZE}'p'_'perf' + else + CaseName=${Network}_bs${batch_size}_${RANK_SIZE}'p'_'acc' + fi +} + +# 跳转到code目录 +cd ${cur_path}/../ +rm -rf ./test/output/${ASCEND_DEVICE_ID} +mkdir -p ./test/output/${ASCEND_DEVICE_ID} + +# 训练开始时间记录,不需要修改 +start_time=$(date +%s) +########################################################## +#########第3行 至 90行,请一定不要、不要、不要修改########## +#########第3行 至 90行,请一定不要、不要、不要修改########## +#########第3行 至 90行,请一定不要、不要、不要修改########## +########################################################## + +#========================================================= +#========================================================= +#========训练执行命令,需要根据您的网络进行修改============== +#========================================================= +#========================================================= +# 基础参数,需要模型审视修改 +# 您的训练数据集在${data_path}路径下,请直接使用这个变量获取 +# 您的训练输出目录在${output_path}路径下,请直接使用这个变量获取 +# 您的其他基础参数,可以自定义增加,但是batch_size请保留,并且设置正确的值 +train_epochs=1 +train_steps=34632 +batch_size=128 + +print_log="./test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log" +python3.7 ./train.py --data_url=${data_path} --train_url=${output_path} --steps=${train_steps} 1>${print_log} 2>&1 + + +# 性能相关数据计算 +StepTime=`grep "sec/step :" ${print_log} | tail -n 10 | awk '{print $NF}' | awk '{sum+=$1} END {print sum/NR}'` +FPS=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'/'${StepTime}'}'` + +# 精度相关数据计算 +train_accuracy=`grep "Final Accuracy accuracy" ${print_log} | awk '{print $NF}'` +# 提取所有loss打印信息 +grep "loss :" ${print_log} | awk -F ":" '{print $4}' | awk -F "-" '{print $1}' > ./test/output/${ASCEND_DEVICE_ID}/my_output_loss.txt + + +########################################################### +#########后面的所有内容请不要修改########################### +#########后面的所有内容请不要修改########################### +#########后面的所有内容请不要修改########################### +########################################################### + +# 获取最终的casename,请保留,case文件名为${CaseName} +get_casename + +# 重命名loss文件 +if [ -f ./test/output/${ASCEND_DEVICE_ID}/my_output_loss.txt ]; +then + mv ./test/output/${ASCEND_DEVICE_ID}/my_output_loss.txt ./test/output/${ASCEND_DEVICE_ID}/${CaseName}_loss.txt +fi + +# 训练端到端耗时 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +echo "------------------ Final result ------------------" +# 输出性能FPS/单step耗时/端到端耗时 +echo "Final Performance images/sec : $FPS" +echo "Final Performance sec/step : $StepTime" +echo "E2E Training Duration sec : $e2e_time" + +# 输出训练精度 +echo "Final Train Accuracy : ${train_accuracy}" + +# 最后一个迭代loss值,不需要修改 +ActualLoss=(`awk 'END {print $NF}' ./test/output/${ASCEND_DEVICE_ID}/${CaseName}_loss.txt`) + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${batch_size}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = `uname -m`" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${FPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${StepTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file diff --git a/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/train.py b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/train.py new file mode 100644 index 0000000000000000000000000000000000000000..c85005aa76f104aa44137acd2d396d5187cdc201 --- /dev/null +++ b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/train.py @@ -0,0 +1,171 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# -*- coding: utf-8 -*- +#/usr/bin/python3 +''' +Feb. 2019 by kyubyong park.f +kbpark.linguist@gmail.com. +https://www.github.com/kyubyong/transformer +''' +import tensorflow as tf +from model import Transformer +from tqdm import tqdm +from data_load import get_batch +from utils import save_hparams, save_variable_specs, get_hypotheses, calc_bleu, calc_bleu2 +import os +from hparams import Hparams +import math +import logging +import time +from tensorflow.core.protobuf.rewriter_config_pb2 import RewriterConfig +from npu_bridge.npu_init import * + +#import precision_tool.tf_config as npu_tf_config + + +logging.basicConfig(level=logging.INFO) + + +logging.info("# hparams") +hparams = Hparams() +parser = hparams.parser +hp = parser.parse_args() +# hp.data_url=os.path.dirname(os.path.realpath(__file__))+'/' +print("data_url\n") +print(hp.data_url) +print("train_url\n") +print(hp.train_url) +save_hparams(hp, hp.logdir) + +logging.info("# Prepare train/eval batches") +train_batches, num_train_batches, num_train_samples = get_batch(hp.data_url+ hp.train1, hp.data_url+hp.train2, + hp.maxlen1, hp.maxlen2, + hp.data_url+hp.vocab, hp.batch_size, + shuffle=True) +# eval_batches, num_eval_batches, num_eval_samples = get_batch(hp.data_url+hp.eval1, hp.data_url+hp.eval2, +# 100000, 100000, +# hp.data_url+hp.vocab, hp.batch_size, +# shuffle=False) +eval_batches, num_eval_batches, num_eval_samples = get_batch(hp.data_url+hp.eval1, hp.data_url+hp.eval2, + hp.maxlen1, hp.maxlen2, + hp.data_url+hp.vocab, hp.batch_size, + shuffle=False) + +# create a iterator of the correct shape and type +iter = tf.data.Iterator.from_structure(train_batches.output_types, train_batches.output_shapes) +xs, ys = iter.get_next() + +train_init_op = iter.make_initializer(train_batches) +eval_init_op = iter.make_initializer(eval_batches) + +logging.info("# Load model") +m = Transformer(hp) +loss, train_op, global_step = m.train(xs, ys) +y_hat = m.eval(xs, ys) +# y_hat = m.infer(xs, ys) + +logging.info("# Session") +saver = tf.train.Saver(max_to_keep=hp.num_epochs) + +####相关设置开关 +if not os.path.exists(hp.train_url + "/tmp/profiling"): os.makedirs(hp.train_url + "/tmp/profiling") +proPath=hp.train_url + "/tmp/profiling" +dumpPath=hp.train_url + "/tmp/overflow" +blackPath=hp.data_url+"/ops_info.json" +fusionPath=hp.data_url+"/fusion_switch.cfg" +switchPath=hp.data_url+"/switch_config.txt" +if not os.path.exists(dumpPath): os.makedirs(dumpPath) + +config = tf.ConfigProto() +custom_op = config.graph_options.rewrite_options.custom_optimizers.add() +custom_op.name = "NpuOptimizer" +custom_op.parameter_map["customize_dtypes"].s = tf.compat.as_bytes("./switch_config.txt") +config.graph_options.rewrite_options.remapping = RewriterConfig.OFF # 必须显式关闭 +config.graph_options.rewrite_options.memory_optimization = RewriterConfig.OFF # 必须显式关闭 + +with tf.Session(config=config) as sess: + ckpt = tf.train.latest_checkpoint(hp.train_url+hp.logdir) + if ckpt is None: + logging.info("Initializing from scratch") + sess.run(tf.global_variables_initializer()) + if not os.path.exists(hp.train_url + hp.logdir): os.makedirs(hp.train_url + hp.logdir) + save_variable_specs(os.path.join(hp.train_url+hp.logdir, "specs")) + else: + saver.restore(sess, ckpt) + + sess.run(train_init_op) + total_steps = hp.num_epochs * num_train_batches + print("Step Info: ", hp.num_epochs, num_train_batches) + + _gs = sess.run(global_step) + + for i in tqdm(range(_gs, total_steps+1)): + _, _gs = sess.run([train_op, global_step]) + epoch = math.ceil(i / num_train_batches) + + _loss = sess.run(loss) # train loss + logging.info("loss: {}".format(_loss)) + + if i and i % num_train_batches == 0: + logging.info("epoch {} is done".format(epoch)) + _loss = sess.run(loss) # train loss + logging.info("loss: {}".format(_loss)) + + logging.info("# test evaluation") + # _ = sess.run([eval_init_op]) + + logging.info("# get hypotheses") + hypotheses = get_hypotheses(num_eval_batches, num_eval_samples, sess, y_hat, m.idx2token) + + logging.info("# write results") + model_output = "iwslt2016_E%02dL%.2f" % (epoch, _loss) + if not os.path.exists(hp.train_url+hp.evaldir): os.makedirs(hp.train_url+hp.evaldir) + translation = os.path.join(hp.train_url + hp.evaldir, model_output) + # if not os.path.exists(hp.train_url + hp.evaldir): os.makedirs(hp.train_url + hp.evaldir) + # translation = os.path.join(hp.train_url+hp.evaldir, model_output) + + ####### + with open(translation, 'w') as fout: + fout.write("\n".join(hypotheses)) + + logging.info("# calc bleu score and append it to translation") + calc_bleu2(hp.data_url + hp.eval3, translation,hp.data_url) + + logging.info("# save models") + ckpt_name = os.path.join(hp.train_url+hp.logdir, model_output) + # if not os.path.exists(hp.train_url + hp.logdir): os.makedirs(hp.train_url + hp.logdir) + # ckpt_name = os.path.join(hp.train_url + hp.logdir, model_output) + ######## + saver.save(sess, ckpt_name, global_step=_gs) + logging.info("after training of {} epochs, {} has been saved.".format(epoch, ckpt_name)) + + logging.info("# fall back to train mode") + sess.run(train_init_op) + +logging.info("Done") diff --git a/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/utils.py b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..cb1fdfd125e2013d27da4efc91773f9f9120375d --- /dev/null +++ b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/utils.py @@ -0,0 +1,272 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# -*- coding: utf-8 -*- +# /usr/bin/python3 +''' +Feb. 2019 by kyubyong park. +kbpark.linguist@gmail.com. +https://www.github.com/kyubyong/transformer. + +Utility functions +''' + +import tensorflow as tf +# from tensorflow.python import pywrap_tensorflow +# import numpy as np +import json +import os, re +import logging + +logging.basicConfig(level=logging.INFO) + +def calc_num_batches(total_num, batch_size): + '''Calculates the number of batches. + total_num: total sample number + batch_size + + Returns + number of batches, allowing for remainders.''' + return total_num // batch_size + int(total_num % batch_size != 0) + +def convert_idx_to_token_tensor(inputs, idx2token): + '''Converts int32 tensor to string tensor. + inputs: 1d int32 tensor. indices. + idx2token: dictionary + + Returns + 1d string tensor. + ''' + def my_func(inputs): + return " ".join(idx2token[elem] for elem in inputs) + + return tf.py_func(my_func, [inputs], tf.string) + +# # def pad(x, maxlen): +# # '''Pads x, list of sequences, and make it as a numpy array. +# # x: list of sequences. e.g., [[2, 3, 4], [5, 6, 7, 8, 9], ...] +# # maxlen: scalar +# # +# # Returns +# # numpy int32 array of (len(x), maxlen) +# # ''' +# # padded = [] +# # for seq in x: +# # seq += [0] * (maxlen - len(seq)) +# # padded.append(seq) +# # +# # arry = np.array(padded, np.int32) +# # assert arry.shape == (len(x), maxlen), "Failed to make an array" +# +# return arry + +def postprocess(hypotheses, idx2token): + '''Processes translation outputs. + hypotheses: list of encoded predictions + idx2token: dictionary + + Returns + processed hypotheses + ''' + _hypotheses = [] + for h in hypotheses: + sent = "".join(idx2token[idx] for idx in h) + sent = sent.split("
")[0].strip() + sent = sent.replace("▁", " ") # remove bpe symbols + #print(sent.strip()) + _hypotheses.append(sent.strip()) + return _hypotheses + +def save_hparams(hparams, path): + '''Saves hparams to path + hparams: argsparse object. + path: output directory. + + Writes + hparams as literal dictionary to path. + ''' + if not os.path.exists(path): os.makedirs(path) + hp = json.dumps(vars(hparams)) + with open(os.path.join(path, "hparams"), 'w') as fout: + fout.write(hp) + +def load_hparams(parser, path): + '''Loads hparams and overrides parser + parser: argsparse parser + path: directory or file where hparams are saved + ''' + if not os.path.isdir(path): + path = os.path.dirname(path) + d = open(os.path.join(path, "hparams"), 'r').read() + flag2val = json.loads(d) + for f, v in flag2val.items(): + parser.f = v + +def save_variable_specs(fpath): + '''Saves information about variables such as + their name, shape, and total parameter number + fpath: string. output file path + + Writes + a text file named fpath. + ''' + def _get_size(shp): + '''Gets size of tensor shape + shp: TensorShape + + Returns + size + ''' + size = 1 + for d in range(len(shp)): + size *=shp[d] + return size + + params, num_params = [], 0 + for v in tf.global_variables(): + params.append("{}==={}".format(v.name, v.shape)) + num_params += _get_size(v.shape) + print("num_params: ", num_params) + with open(fpath, 'w') as fout: + fout.write("num_params: {}\n".format(num_params)) + fout.write("\n".join(params)) + logging.info("Variables info has been saved.") + +def get_hypotheses(num_batches, num_samples, sess, tensor, dict): + '''Gets hypotheses. + num_batches: scalar. + num_samples: scalar. + sess: tensorflow sess object + tensor: target tensor to fetch + dict: idx2token dictionary + + Returns + hypotheses: list of sents + ''' + hypotheses = [] + #print(num_batches) + #print(num_samples) + for i in range(num_batches): + h = sess.run(tensor) + hypotheses.extend(h.tolist()) + print(i) + hypotheses = postprocess(hypotheses, dict) + #print(len(hypotheses)) + return hypotheses[:num_samples] + +def calc_bleu(ref, translation): + '''Calculates bleu score and appends the report to translation + ref: reference file path + translation: model output file path + + Returns + translation that the bleu score is appended to''' + get_bleu_score = "perl multi-bleu.perl {} < {} > {}".format(ref, translation, "temp") + os.system(get_bleu_score) + bleu_score_report = open("temp", "r").read() + with open(translation, "a") as fout: + fout.write("\n{}".format(bleu_score_report)) + try: + score = re.findall("BLEU = ([^,]+)", bleu_score_report)[0] + new_translation = translation + "B{}".format(score) + os.system("mv {} {}".format(translation, new_translation)) + os.remove(translation) + + except: pass + os.remove("temp") + +def calc_bleu2(ref, translation,data_url): + '''Calculates bleu score and appends the report to translation + ref: reference file path + translation: model output file path + + Returns + translation that the bleu score is appended to''' + get_bleu_score = "perl "+"./multi-bleu.perl {} < {} > {}".format(ref, translation, "temp") + os.system(get_bleu_score) + bleu_score_report = open("temp", "r").read() + with open(translation, "a") as fout: + fout.write("\n{}".format(bleu_score_report)) + try: + score = re.findall("BLEU = ([^,]+)", bleu_score_report)[0] + new_translation = translation + "B{}".format(score) + os.system("mv {} {}".format(translation, new_translation)) + os.remove(translation) + + except: pass + os.remove("temp") + +def calc_bleu3(ref, translation,data_url): + '''Calculates bleu score and appends the report to translation + ref: reference file path + translation: model output file path + + Returns + translation that the bleu score is appended to''' + get_bleu_score = "perl "+data_url+"multi-bleu.perl {} < {} > {}".format(ref, translation, "/home/test_user05/transformerAll/temp") + os.system(get_bleu_score) + bleu_score_report = open("/home/test_user05/transformerAll/temp", "r").read() + with open(translation, "a") as fout: + fout.write("\n{}".format(bleu_score_report)) + try: + score = re.findall("BLEU = ([^,]+)", bleu_score_report)[0] + new_translation = translation + "B{}".format(score) + os.system("mv {} {}".format(translation, new_translation)) + os.remove(translation) + + except: pass + #os.remove("temp") +# def calc_bleu5242(ref, translation): +# '''Calculates bleu score and appends the report to translation +# ref: reference file path +# translation: model output file path +# +# Returns +# translation that the bleu score is appended to''' +# get_bleu_score = "perl "+os.path.dirname(os.path.realpath(__file__))+"/"+"multi-bleu.perl {} < {} > {}".format(ref, translation, "temp") +# os.system(get_bleu_score) +# bleu_score_report = open("temp", "r").read() +# with open(translation, "a") as fout: +# fout.write("\n{}".format(bleu_score_report)) +# try: +# score = re.findall("BLEU = ([^,]+)", bleu_score_report)[0] +# new_translation = translation + "B{}".format(score) +# os.system("mv {} {}".format(translation, new_translation)) +# os.remove(translation) +# +# except: pass +# os.remove("temp") +# def get_inference_variables(ckpt, filter): +# reader = pywrap_tensorflow.NewCheckpointReader(ckpt) +# var_to_shape_map = reader.get_variable_to_shape_map() +# vars = [v for v in sorted(var_to_shape_map) if filter not in v] +# return vars + + + +