diff --git a/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/LICENSE b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/LICENSE
new file mode 100644
index 0000000000000000000000000000000000000000..9c8f3ea0871e0bfe81da0fa6e7c1d7d156dc380e
--- /dev/null
+++ b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/LICENSE
@@ -0,0 +1,201 @@
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "{}"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright {yyyy} {name of copyright owner}
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
\ No newline at end of file
diff --git a/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/README.md b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..f30dacb7fb64a7ac8bd257effeedc573943d691c
--- /dev/null
+++ b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/README.md
@@ -0,0 +1,180 @@
+- [基本信息](#基本信息.md)
+- [概述](#概述.md)
+- [训练环境准备](#训练环境准备.md)
+- [快速上手](#快速上手.md)
+- [训练结果](#训练结果.md)
+- [高级参考](#高级参考.md)
+
基本信息
+
+**发布者(Publisher):Huawei**
+
+**应用领域(Application Domain):Natural Language Processing**
+
+**版本(Version):1.2**
+
+**修改时间(Modified) :2021.4.6**
+
+**框架(Framework):TensorFlow 1.15.0**
+
+**模型格式(Model Format):ckpt**
+
+**精度(Precision):Mixed**
+
+**处理器(Processor):昇腾910**
+
+**应用级别(Categories):Official**
+
+**描述(Description):基于TensorFlow框架实现Google提出的语言模型Transformer,对不同语言的文字进行翻译的训练代码**
+
+概述
+
+ Transformer是Google提出的语言模型,抛弃了传统的CNN和RNN,整个网络结构完全是由Attention机制组成。
+- 参考论文:
+
+ https://proceedings.neurips.cc/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf
+
+- 参考实现:
+
+ https://github.com/Kyubyong/transformer
+
+
+## 默认配置
+
+- 训练数据集预处理(以WMT 2014 English-German训练集为例,仅作为用户参考示例):
+
+ - 文本输入格式:bpe
+
+- 测试数据集预处理(以WMT 2014 English-German验证集为例,仅作为用户参考示例)
+
+ - 文本输入格式:bpe
+
+
+
+## 支持特性
+
+| 特性列表 | 是否支持 |
+|-------|------|
+| 分布式训练 | 否 |
+| 混合精度 | 是 |
+| 并行数据 | 是 |
+
+## 混合精度训练
+
+昇腾910 AI处理器提供自动混合精度功能,可以针对全网中float32数据类型的算子,按照内置的优化策略,自动将部分float32的算子降低精度到float16,从而在精度损失很小的情况下提升系统性能并减少内存使用。
+
+## 开启混合精度
+
+脚本已默认开启混合精度,设置precision_mode参数的脚本参考如下。
+
+ ```
+ custom_op = session_config.graph_options.rewrite_options.custom_optimizers.add()
+ custom_op.name = 'NpuOptimizer'
+ custom_op.parameter_map["precision_mode"].s = tf.compat.as_bytes(str(args.precision_mode))
+ ```
+
+训练环境准备
+
+1. 硬件环境准备请参见各硬件产品文档"[驱动和固件安装升级指南]( https://support.huawei.com/enterprise/zh/category/ai-computing-platform-pid-1557196528909)"。需要在硬件设备上安装与CANN版本配套的固件与驱动。
+2. 宿主机上需要安装Docker并登录[Ascend Hub中心](https://ascendhub.huawei.com/#/detail?name=ascend-tensorflow-arm)获取镜像。
+
+ 当前模型支持的镜像列表如[表1](#zh-cn_topic_0000001074498056_table1519011227314)所示。
+
+ **表 1** 镜像列表
+
+
+ 镜像名称
+ |
+ 镜像版本
+ |
+ 配套CANN版本
+ |
+
+
+
+ |
+ 20.2.0
+ |
+ 20.2
+ |
+
+
+
+
+
+快速上手
+
+- 数据集准备
+1. 模型训练使用WMT 2014 English-German数据集,数据集请自行获取。
+
+## 模型训练
+
+- 单击“立即下载”,并选择合适的下载方式下载源码包。
+
+- 启动训练之前,首先要配置程序运行相关环境变量。
+
+ 环境变量配置信息参见:
+
+ [Ascend 910训练平台环境变量设置](https://gitee.com/ascend/modelzoo/wikis/Ascend%20910%E8%AE%AD%E7%BB%83%E5%B9%B3%E5%8F%B0%E7%8E%AF%E5%A2%83%E5%8F%98%E9%87%8F%E8%AE%BE%E7%BD%AE?sort_id=3148819)
+
+- 单卡训练
+
+ 1. 配置训练参数。
+
+ 首先在脚本test/train_full_1p.sh中,配置data_url、train_url参数,分别代表训练数据路径跟输出数据路径,请用户根据实际路径配置,或者在启动训练的命令行中以参数形式下发。
+
+ ```
+ data_path="../data"
+ ```
+
+ 2. 启动训练。
+
+ 启动单卡训练 (脚本为LeNet_for_TensorFlow/test/train_full_1p.sh)
+
+ ```
+ bash train_full_1p.sh --data_path=../data --output_path=../out
+ ```
+
+训练结果
+
+- 精度结果比对
+
+|精度指标项|GPU实测|NPU实测|
+|---|---|---|
+|loss|2.871|2.751|
+
+
+高级参考
+
+## 脚本和示例代码
+
+```
+├── train.py //网络训练与测试代码
+├── README.md //代码说明文档
+├── data_load.py //数据处理代码
+├── hparams.py //参数解析代码
+├── model.py //模型定义代码
+├── modules.py //模型模块代码
+├── preproNew.py //文本数据转bpe代码
+├── utils.py //精度计算代码
+├── requirements.txt //训练python依赖列表
+├── test
+│ ├──train_performance_1p.sh //单卡训练验证性能启动脚本
+│ ├──train_full_1p.sh //单卡全量训练启动脚本
+
+```
+
+## 脚本参数
+
+```
+--data_path 数据集路径,默认:path/dataset
+--output_path 训练过程中输出数据路径,默认:path/output
+--batch_size 每个NPU的batch size,默认:128
+```
+
+## 训练过程
+
+1. 通过“模型训练”中的训练指令启动单卡卡训练。
+
+2. 参考脚本的模型存储路径为./output/log。
+
+
diff --git a/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/data_load.py b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/data_load.py
new file mode 100644
index 0000000000000000000000000000000000000000..2784e15947b24641180ef216de8748320ba1e0a8
--- /dev/null
+++ b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/data_load.py
@@ -0,0 +1,181 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# -*- coding: utf-8 -*-
+#/usr/bin/python3
+'''
+Feb. 2019 by kyubyong park.
+kbpark.linguist@gmail.com.
+https://www.github.com/kyubyong/transformer
+
+Note.
+if safe, entities on the source side have the prefix 1, and the target side 2, for convenience.
+For example, fpath1, fpath2 means source file path and target file path, respectively.
+'''
+import tensorflow as tf
+from utils import calc_num_batches
+
+def load_vocab(vocab_fpath):
+ '''Loads vocabulary file and returns idx<->token maps
+ vocab_fpath: string. vocabulary file path.
+ Note that these are reserved
+ 0: , 1: , 2: , 3:
+
+ Returns
+ two dictionaries.
+ '''
+ vocab = [line.split()[0] for line in open(vocab_fpath, 'r').read().splitlines()]
+ token2idx = {token: idx for idx, token in enumerate(vocab)}
+ idx2token = {idx: token for idx, token in enumerate(vocab)}
+ return token2idx, idx2token
+
+def load_data(fpath1, fpath2, maxlen1, maxlen2):
+ '''Loads source and target data and filters out too lengthy samples.
+ fpath1: source file path. string.
+ fpath2: target file path. string.
+ maxlen1: source sent maximum length. scalar.
+ maxlen2: target sent maximum length. scalar.
+
+ Returns
+ sents1: list of source sents
+ sents2: list of target sents
+ '''
+ sents1, sents2 = [], []
+ with open(fpath1, 'r') as f1, open(fpath2, 'r') as f2:
+ for sent1, sent2 in zip(f1, f2):
+ if len(sent1.split()) + 1 > maxlen1: continue # 1:
+ if len(sent2.split()) + 1 > maxlen2: continue # 1:
+ sents1.append(sent1.strip())
+ sents2.append(sent2.strip())
+ return sents1, sents2
+
+
+def encode(inp, type, dict):
+ '''Converts string to number. Used for `generator_fn`.
+ inp: 1d byte array.
+ type: "x" (source side) or "y" (target side)
+ dict: token2idx dictionary
+
+ Returns
+ list of numbers
+ '''
+ inp_str = inp.decode("utf-8")
+ if type=="x": tokens = inp_str.split() + [""]
+ else: tokens = [""] + inp_str.split() + [""]
+
+ x = [dict.get(t, dict[""]) for t in tokens]
+ return x
+
+def generator_fn(sents1, sents2, vocab_fpath):
+ '''Generates training / evaluation data
+ sents1: list of source sents
+ sents2: list of target sents
+ vocab_fpath: string. vocabulary file path.
+
+ yields
+ xs: tuple of
+ x: list of source token ids in a sent
+ x_seqlen: int. sequence length of x
+ sent1: str. raw source (=input) sentence
+ labels: tuple of
+ decoder_input: decoder_input: list of encoded decoder inputs
+ y: list of target token ids in a sent
+ y_seqlen: int. sequence length of y
+ sent2: str. target sentence
+ '''
+ token2idx, _ = load_vocab(vocab_fpath)
+ for sent1, sent2 in zip(sents1, sents2):
+ x = encode(sent1, "x", token2idx)
+ y = encode(sent2, "y", token2idx)
+ decoder_input, y = y[:-1], y[1:]
+
+ x_seqlen, y_seqlen = len(x), len(y)
+ yield (x, x_seqlen, sent1), (decoder_input, y, y_seqlen, sent2)
+
+def input_fn(sents1, sents2, vocab_fpath, batch_size, shuffle=False):
+ '''Batchify data
+ sents1: list of source sents
+ sents2: list of target sents
+ vocab_fpath: string. vocabulary file path.
+ batch_size: scalar
+ shuffle: boolean
+
+ Returns
+ xs: tuple of
+ x: int32 tensor. (N, T1)
+ x_seqlens: int32 tensor. (N,)
+ sents1: str tensor. (N,)
+ ys: tuple of
+ decoder_input: int32 tensor. (N, T2)
+ y: int32 tensor. (N, T2)
+ y_seqlen: int32 tensor. (N, )
+ sents2: str tensor. (N,)
+ '''
+ shapes = (([None], (), ()),
+ ([None], [None], (), ()))
+ types = ((tf.int32, tf.int32, tf.string),
+ (tf.int32, tf.int32, tf.int32, tf.string))
+ paddings = ((0, 0, ''),
+ (0, 0, 0, ''))
+
+ dataset = tf.data.Dataset.from_generator(
+ generator_fn,
+ output_shapes=shapes,
+ output_types=types,
+ args=(sents1, sents2, vocab_fpath)) # <- arguments for generator_fn. converted to np string arrays
+
+ if shuffle: # for training
+ dataset = dataset.shuffle(128*batch_size)
+
+ dataset = dataset.repeat() # iterate forever
+ # dataset = dataset.padded_batch(batch_size, shapes, paddings).prefetch(1)
+ shapes = (([100], (), ()),
+ ([100], [100], (), ()))
+ dataset = dataset.padded_batch(batch_size, shapes, paddings, drop_remainder=True)
+
+ return dataset
+
+def get_batch(fpath1, fpath2, maxlen1, maxlen2, vocab_fpath, batch_size, shuffle=False):
+ '''Gets training / evaluation mini-batches
+ fpath1: source file path. string.
+ fpath2: target file path. string.
+ maxlen1: source sent maximum length. scalar.
+ maxlen2: target sent maximum length. scalar.
+ vocab_fpath: string. vocabulary file path.
+ batch_size: scalar
+ shuffle: boolean
+
+ Returns
+ batches
+ num_batches: number of mini-batches
+ num_samples
+ '''
+ sents1, sents2 = load_data(fpath1, fpath2, maxlen1, maxlen2)
+ batches = input_fn(sents1, sents2, vocab_fpath, batch_size, shuffle=shuffle)
+ num_batches = calc_num_batches(len(sents1), batch_size)
+ return batches, num_batches, len(sents1)
diff --git a/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/fusion_switch.cfg b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/fusion_switch.cfg
new file mode 100644
index 0000000000000000000000000000000000000000..9260c45253f20249ce9bae172ab885c8ad583098
--- /dev/null
+++ b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/fusion_switch.cfg
@@ -0,0 +1,10 @@
+{
+ "Switch":{
+ "GraphFusion":{
+ "ALL":"off"
+ },
+ "UBFusion":{
+ "ALL":"off"
+ }
+ }
+}
\ No newline at end of file
diff --git a/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/hparams.py b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/hparams.py
new file mode 100644
index 0000000000000000000000000000000000000000..f2b3fedab15cd24d280fd0b7ff68a24b2c8a76dd
--- /dev/null
+++ b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/hparams.py
@@ -0,0 +1,95 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import os
+
+
+ABSPATH=''
+class Hparams:
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument("--data_url", type=str, default="./dataset")
+ parser.add_argument("--train_url", type=str, default="./output")
+ parser.add_argument('--num_gpus', default='')
+ # prepro
+ parser.add_argument('--vocab_size', default=37000, type=int)#37000
+
+ # train
+ ## files
+ parser.add_argument('--train1', default=ABSPATH+'iwslt2016/segmented/train.de.bpe',
+ help="german training segmented data")
+ parser.add_argument('--train2', default=ABSPATH+'iwslt2016/segmented/train.en.bpe',
+ help="english training segmented data")
+ parser.add_argument('--eval1', default=ABSPATH+'iwslt2016/segmented/eval.de.bpe',
+ help="german evaluation segmented data")
+ parser.add_argument('--eval2', default=ABSPATH+'iwslt2016/segmented/eval.en.bpe',
+ help="english evaluation segmented data")
+ parser.add_argument('--eval3', default=ABSPATH+'iwslt2016/prepro/eval.en',
+ help="english evaluation unsegmented data")
+
+ ## vocabulary
+ parser.add_argument('--vocab', default=ABSPATH+'iwslt2016/segmented/bpe.vocab',
+ help="vocabulary file path")
+
+ # training scheme
+ parser.add_argument('--batch_size', default=128, type=int)###128
+ parser.add_argument('--eval_batch_size', default=128, type=int)###128
+
+ parser.add_argument('--lr', default=0.0003, type=float, help="learning rate")
+ parser.add_argument('--warmup_steps', default=4000, type=int)####4000
+ parser.add_argument('--logdir', default=ABSPATH+"log/1", help="log directory")
+ parser.add_argument('--num_epochs', default=1, type=int)###20
+ parser.add_argument('--evaldir', default=ABSPATH+"eval/1", help="evaluation dir")
+
+ # model
+ parser.add_argument('--d_model', default=512, type=int,###512
+ help="hidden dimension of encoder/decoder")
+ parser.add_argument('--d_ff', default=2048, type=int,###2048
+ help="hidden dimension of feedforward layer")
+ parser.add_argument('--num_blocks', default=6, type=int,#6
+ help="number of encoder/decoder blocks")
+ parser.add_argument('--num_heads', default=8, type=int,###8
+ help="number of attention heads")
+ parser.add_argument('--maxlen1', default=100, type=int,###100
+ help="maximum length of a source sequence")
+ parser.add_argument('--maxlen2', default=100, type=int,###100
+ help="maximum length of a target sequence")
+ parser.add_argument('--dropout_rate', default=0.1, type=float)#####0.3 论文是0.1
+ parser.add_argument('--smoothing', default=0.1, type=float,
+ help="label smoothing rate")
+
+ # test
+ parser.add_argument('--test1', default=ABSPATH+'iwslt2016/segmented/test.de.bpe',
+ help="german test segmented data")
+ parser.add_argument('--test2', default=ABSPATH+'iwslt2016/prepro/test.en',
+ help="english test data")
+ parser.add_argument('--ckpt', default=ABSPATH+"log/1",
+ help="checkpoint file path")
+ parser.add_argument('--test_batch_size', default=128, type=int)###128
+ parser.add_argument('--testdir', default=ABSPATH+"test/1", help="test result dir")
diff --git a/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/model.py b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/model.py
new file mode 100644
index 0000000000000000000000000000000000000000..3fe80d8bde9b23345c4e722b46737aa5d3ad43d4
--- /dev/null
+++ b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/model.py
@@ -0,0 +1,250 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# -*- coding: utf-8 -*-
+# /usr/bin/python3
+'''
+Feb. 2019 by kyubyong park.
+kbpark.linguist@gmail.com.
+https://www.github.com/kyubyong/transformer
+
+Transformer network
+'''
+import tensorflow as tf
+
+from data_load import load_vocab
+from modules import get_token_embeddings, ff, positional_encoding, multihead_attention, label_smoothing, noam_scheme
+from utils import convert_idx_to_token_tensor
+from tqdm import tqdm
+import logging
+from npu_bridge.estimator.npu import npu_convert_dropout
+from npu_bridge.npu_init import *
+
+
+logging.basicConfig(level=logging.INFO)
+
+class Transformer:
+ '''
+ xs: tuple of
+ x: int32 tensor. (N, T1)
+ x_seqlens: int32 tensor. (N,)
+ sents1: str tensor. (N,)
+ ys: tuple of
+ decoder_input: int32 tensor. (N, T2)
+ y: int32 tensor. (N, T2)
+ y_seqlen: int32 tensor. (N, )
+ sents2: str tensor. (N,)
+ training: boolean.
+ '''
+ def __init__(self, hp):
+ self.hp = hp
+ self.token2idx, self.idx2token = load_vocab(hp.data_url+hp.vocab)
+ self.embeddings = get_token_embeddings(self.hp.vocab_size, self.hp.d_model, zero_pad=True)
+
+ def encode(self, xs, training=True):
+ '''
+ Returns
+ memory: encoder outputs. (N, T1, d_model)
+ '''
+ with tf.variable_scope("encoder", reuse=tf.AUTO_REUSE):
+ x, seqlens, sents1 = xs
+ # src_masks
+ src_masks = tf.math.equal(x, 0) # (N, T1)
+ # embedding
+ enc = tf.nn.embedding_lookup(self.embeddings, x) # (N, T1, d_model)
+ enc *= self.hp.d_model**0.5 # scale
+
+ enc += positional_encoding(enc, self.hp.maxlen1)
+ #enc = tf.layers.dropout(enc, self.hp.dropout_rate, training=training)
+ #enc = npu_convert_dropout(enc, self.hp.dropout_rate, training=training)
+
+ ## Blocks
+ for i in range(self.hp.num_blocks):
+ with tf.variable_scope("num_blocks_{}".format(i), reuse=tf.AUTO_REUSE):
+ # self-attention
+ enc = multihead_attention(queries=enc,
+ keys=enc,
+ values=enc,
+ key_masks=src_masks,
+ num_heads=self.hp.num_heads,
+ dropout_rate=self.hp.dropout_rate,
+ training=training,
+ causality=False)
+ # feed forward
+ enc = ff(enc, num_units=[self.hp.d_ff, self.hp.d_model])
+ memory = enc
+ return memory, sents1, src_masks
+
+ def decode(self, ys, memory, src_masks, training=True):
+ '''
+ memory: encoder outputs. (N, T1, d_model)
+ src_masks: (N, T1)
+
+ Returns
+ logits: (N, T2, V). float32.
+ y_hat: (N, T2). int32
+ y: (N, T2). int32
+ sents2: (N,). string.
+ '''
+ with tf.variable_scope("decoder", reuse=tf.AUTO_REUSE):
+ decoder_inputs, y, seqlens, sents2 = ys
+
+ # tgt_masks
+ tgt_masks = tf.math.equal(decoder_inputs, 0) # (N, T2)
+
+ # embedding
+ dec = tf.nn.embedding_lookup(self.embeddings, decoder_inputs) # (N, T2, d_model)
+ dec *= self.hp.d_model ** 0.5 # scale
+
+ dec += positional_encoding(dec, self.hp.maxlen2)
+ #dec = tf.layers.dropout(dec, self.hp.dropout_rate, training=training)
+ #dec = npu_convert_dropout(dec, self.hp.dropout_rate, training=training)
+
+ # Blocks
+ for i in range(self.hp.num_blocks):
+ with tf.variable_scope("num_blocks_{}".format(i), reuse=tf.AUTO_REUSE):
+ # Masked self-attention (Note that causality is True at this time)
+ dec = multihead_attention(queries=dec,
+ keys=dec,
+ values=dec,
+ key_masks=tgt_masks,
+ num_heads=self.hp.num_heads,
+ dropout_rate=self.hp.dropout_rate,
+ training=training,
+ causality=True,
+ scope="self_attention")
+
+ # Vanilla attention
+ dec = multihead_attention(queries=dec,
+ keys=memory,
+ values=memory,
+ key_masks=src_masks,
+ num_heads=self.hp.num_heads,
+ dropout_rate=self.hp.dropout_rate,
+ training=training,
+ causality=False,
+ scope="vanilla_attention")
+ ### Feed Forward
+ dec = ff(dec, num_units=[self.hp.d_ff, self.hp.d_model])
+
+ # Final linear projection (embedding weights are shared)
+ weights = tf.transpose(self.embeddings) # (d_model, vocab_size)
+ logits = tf.einsum('ntd,dk->ntk', dec, weights) # (N, T2, vocab_size)
+ y_hat = tf.to_int32(tf.argmax(logits, axis=-1))
+
+ return logits, y_hat, y, sents2
+
+ def train(self, xs, ys):
+ '''
+ Returns
+ loss: scalar.
+ train_op: training operation
+ global_step: scalar.
+ summaries: training summary node
+ '''
+ # forward
+ memory, sents1, src_masks = self.encode(xs)
+ logits, preds, y, sents2 = self.decode(ys, memory, src_masks)
+
+ # train scheme
+ y_ = label_smoothing(tf.one_hot(y, depth=self.hp.vocab_size))
+ ce = tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=y_)
+ nonpadding = tf.to_float(tf.not_equal(y, self.token2idx[""])) # 0:
+ loss = tf.reduce_sum(ce * nonpadding) / (tf.reduce_sum(nonpadding) + 1e-7)
+
+ global_step = tf.train.get_or_create_global_step()
+ lr = noam_scheme(self.hp.lr, global_step, self.hp.warmup_steps)
+ optimizer = tf.train.AdamOptimizer(lr)
+ ##开启lossscale需要关闭
+ #train_op = optimizer.minimize(loss, global_step=global_step)
+
+ #lossscale
+ loss_scale_opt = optimizer
+ loss_scale_manager = ExponentialUpdateLossScaleManager(init_loss_scale=2 ** 32, incr_every_n_steps=1000,
+ decr_every_n_nan_or_inf=2, decr_ratio=0.5)
+ train_op = NPULossScaleOptimizer(loss_scale_opt, loss_scale_manager)
+ #
+ train_op= train_op.minimize(loss, global_step=global_step)
+
+
+ # self.refine_optim = tf.train.AdamOptimizer(learning_rate=self.refine_lr).minimize(self.rec_loss,
+ # var_list=refine_var)
+ # ####
+ # self.refine_optim = tf.train.AdamOptimizer(learning_rate=self.refine_lr)
+ #
+ # loss_scale_opt = self.refine_optim
+ # loss_scale_manager = ExponentialUpdateLossScaleManager(init_loss_scale=2 ** 32, incr_every_n_steps=1000,
+ # decr_every_n_nan_or_inf=2, decr_ratio=0.5)
+ # self.refine_optim = NPULossScaleOptimizer(loss_scale_opt, loss_scale_manager)
+ #
+ # self.refine_optim = self.refine_optim.minimize(self.rec_loss, var_list=refine_var)
+
+ # tf.summary.scalar('lr', lr)
+ # tf.summary.scalar("loss", loss)
+ # tf.summary.scalar("global_step", global_step)
+ #
+ # summaries = tf.summary.merge_all()
+
+ return loss, train_op, global_step
+
+ #return loss, train_op, global_step
+
+ def eval(self, xs, ys):
+ '''Predicts autoregressively
+ At inference, input ys is ignored.
+ Returns
+ y_hat: (N, T2)
+ '''
+ decoder_inputs, y, y_seqlen, sents2 = ys
+
+ decoder_inputs = tf.ones((tf.shape(xs[0])[0], 1), tf.int32) * self.token2idx[""]
+ ys = (decoder_inputs, y, y_seqlen, sents2)
+
+ memory, sents1, src_masks = self.encode(xs, False)
+
+ logging.info("Inference graph is being built. Please be patient.")
+ for _ in tqdm(range(self.hp.maxlen2)):
+ logits, y_hat, y, sents2 = self.decode(ys, memory, src_masks, False)
+ if tf.reduce_sum(y_hat, 1) == self.token2idx[""]: break
+
+ _decoder_inputs = tf.concat((decoder_inputs, y_hat), 1)
+ ys = (_decoder_inputs, y, y_seqlen, sents2)
+
+ # monitor a random sample
+ #n = tf.random_uniform((), 0, tf.shape(y_hat)[0]-1, tf.int32)
+ # sent1 = sents1[n]
+ # pred = convert_idx_to_token_tensor(y_hat[n], self.idx2token)
+ # sent2 = sents2[n]
+
+ # tf.summary.text("sent1", sent1)
+ # tf.summary.text("pred", pred)
+ # tf.summary.text("sent2", sent2)
+ # summaries = tf.summary.merge_all()
+
+ return y_hat
+
diff --git a/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/modelarts_entry_acc.py b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/modelarts_entry_acc.py
new file mode 100644
index 0000000000000000000000000000000000000000..1245d57140da14a640cd6dd7d9d43100b8617b67
--- /dev/null
+++ b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/modelarts_entry_acc.py
@@ -0,0 +1,63 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import argparse
+import sys
+
+# 解析输入参数data_url
+parser = argparse.ArgumentParser()
+parser.add_argument("--data_url", type=str, default="/home/ma-user/modelarts/inputs/data_url_0")
+parser.add_argument("--train_url", type=str, default="/home/ma-user/modelarts/outputs/train_url_0/")
+config = parser.parse_args()
+
+print("[CANN-Modelzoo] code_dir path is [%s]" % (sys.path[0]))
+code_dir = sys.path[0]
+os.chdir(code_dir)
+print("[CANN-Modelzoo] work_dir path is [%s]" % (os.getcwd()))
+
+print("[CANN-Modelzoo] before train - list my run files:")
+os.system("ls -al /usr/local/Ascend/ascend-toolkit/")
+
+print("[CANN-Modelzoo] before train - list my dataset files:")
+os.system("ls -al %s" % config.data_url)
+
+print("[CANN-Modelzoo] start run train shell")
+# 设置sh文件格式为linux可执行
+os.system("dos2unix ./test/*")
+
+# 执行train_full_1p.sh或者train_performance_1p.sh,需要用户自己指定
+# full和performance的差异,performance只需要执行很少的step,控制在15分钟以内,主要关注性能FPS
+os.system("bash ./test/train_full_1p.sh --data_path=%s --output_path=%s " % (config.data_url, config.train_url))
+
+print("[CANN-Modelzoo] finish run train shell")
+
+# 将当前执行目录所有文件拷贝到obs的output进行备份
+print("[CANN-Modelzoo] after train - list my output files:")
+os.system("cp -r %s %s " % (code_dir, config.train_url))
+os.system("ls -al %s" % config.train_url)
diff --git a/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/modelarts_entry_perf.py b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/modelarts_entry_perf.py
new file mode 100644
index 0000000000000000000000000000000000000000..e2d23455d4cdec2d46fc273177a247905c751b73
--- /dev/null
+++ b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/modelarts_entry_perf.py
@@ -0,0 +1,63 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import argparse
+import sys
+
+# 解析输入参数data_url
+parser = argparse.ArgumentParser()
+parser.add_argument("--data_url", type=str, default="/home/ma-user/modelarts/inputs/data_url_0")
+parser.add_argument("--train_url", type=str, default="/home/ma-user/modelarts/outputs/train_url_0/")
+config = parser.parse_args()
+
+print("[CANN-Modelzoo] code_dir path is [%s]" % (sys.path[0]))
+code_dir = sys.path[0]
+os.chdir(code_dir)
+print("[CANN-Modelzoo] work_dir path is [%s]" % (os.getcwd()))
+
+print("[CANN-Modelzoo] before train - list my run files:")
+os.system("ls -al /usr/local/Ascend/ascend-toolkit/")
+
+print("[CANN-Modelzoo] before train - list my dataset files:")
+os.system("ls -al %s" % config.data_url)
+
+print("[CANN-Modelzoo] start run train shell")
+# 设置sh文件格式为linux可执行
+os.system("dos2unix ./test/*")
+
+# 执行train_full_1p.sh或者train_performance_1p.sh,需要用户自己指定
+# full和performance的差异,performance只需要执行很少的step,控制在15分钟以内,主要关注性能FPS
+os.system("bash ./test/train_performance_1p.sh --data_path=%s --output_path=%s " % (config.data_url, config.train_url))
+
+print("[CANN-Modelzoo] finish run train shell")
+
+# 将当前执行目录所有文件拷贝到obs的output进行备份
+print("[CANN-Modelzoo] after train - list my output files:")
+os.system("cp -r %s %s " % (code_dir, config.train_url))
+os.system("ls -al %s" % config.train_url)
diff --git a/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/modelzoo_level.txt b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/modelzoo_level.txt
new file mode 100644
index 0000000000000000000000000000000000000000..55a9add9fa74832ca908108d73946cd76281a9cd
--- /dev/null
+++ b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/modelzoo_level.txt
@@ -0,0 +1,3 @@
+FuncStatus:OK
+PerfStatus:OK
+PrecisionStatus:POK
\ No newline at end of file
diff --git a/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/modules.py b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/modules.py
new file mode 100644
index 0000000000000000000000000000000000000000..77f4290aefe9e7e465b1302cd302f4998354fcc3
--- /dev/null
+++ b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/modules.py
@@ -0,0 +1,337 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# -*- coding: utf-8 -*-
+#/usr/bin/python3
+'''
+Feb. 2019 by kyubyong park.
+kbpark.linguist@gmail.com.
+https://www.github.com/kyubyong/transformer.
+
+Building blocks for Transformer
+'''
+
+import numpy as np
+import tensorflow as tf
+from npu_bridge.estimator.npu import npu_convert_dropout
+
+def ln(inputs, epsilon = 1e-8, scope="ln"):
+ '''Applies layer normalization. See https://arxiv.org/abs/1607.06450.
+ inputs: A tensor with 2 or more dimensions, where the first dimension has `batch_size`.
+ epsilon: A floating number. A very small number for preventing ZeroDivision Error.
+ scope: Optional scope for `variable_scope`.
+
+ Returns:
+ A tensor with the same shape and data dtype as `inputs`.
+ '''
+ with tf.variable_scope(scope, reuse=tf.AUTO_REUSE):
+ inputs_shape = inputs.get_shape()
+ params_shape = inputs_shape[-1:]
+
+ mean, variance = tf.nn.moments(inputs, [-1], keep_dims=True)
+ beta= tf.get_variable("beta", params_shape, initializer=tf.zeros_initializer())
+ gamma = tf.get_variable("gamma", params_shape, initializer=tf.ones_initializer())
+ normalized = (inputs - mean) / ( (variance + epsilon) ** (.5) )
+ outputs = gamma * normalized + beta
+
+ return outputs
+
+def get_token_embeddings(vocab_size, num_units, zero_pad=True):
+ '''Constructs token embedding matrix.
+ Note that the column of index 0's are set to zeros.
+ vocab_size: scalar. V.
+ num_units: embedding dimensionalty. E.
+ zero_pad: Boolean. If True, all the values of the first row (id = 0) should be constant zero
+ To apply query/key masks easily, zero pad is turned on.
+
+ Returns
+ weight variable: (V, E)
+ '''
+ with tf.variable_scope("shared_weight_matrix"):
+ embeddings = tf.get_variable('weight_mat',
+ dtype=tf.float32,
+ shape=(vocab_size, num_units),
+ initializer=tf.contrib.layers.xavier_initializer())
+ if zero_pad:
+ embeddings = tf.concat((tf.zeros(shape=[1, num_units]),
+ embeddings[1:, :]), 0)
+ return embeddings
+
+def scaled_dot_product_attention(Q, K, V, key_masks,
+ causality=False, dropout_rate=0.,
+ training=True,
+ scope="scaled_dot_product_attention"):
+ '''See 3.2.1.
+ Q: Packed queries. 3d tensor. [N, T_q, d_k].
+ K: Packed keys. 3d tensor. [N, T_k, d_k].
+ V: Packed values. 3d tensor. [N, T_k, d_v].
+ key_masks: A 2d tensor with shape of [N, key_seqlen]
+ causality: If True, applies masking for future blinding
+ dropout_rate: A floating point number of [0, 1].
+ training: boolean for controlling droput
+ scope: Optional scope for `variable_scope`.
+ '''
+ with tf.variable_scope(scope, reuse=tf.AUTO_REUSE):
+ d_k = Q.get_shape().as_list()[-1]
+
+ # dot product
+ outputs = tf.matmul(Q, tf.transpose(K, [0, 2, 1])) # (N, T_q, T_k)
+
+ # scale
+ outputs /= d_k ** 0.5
+
+ # key masking
+ outputs = mask(outputs, key_masks=key_masks, type="key")
+
+ # causality or future blinding masking
+ if causality:
+ outputs = mask(outputs, type="future")
+
+ # softmax
+ outputs = tf.nn.softmax(outputs)
+ attention = tf.transpose(outputs, [0, 2, 1])
+ # tf.summary.image("attention", tf.expand_dims(attention[:1], -1))
+
+ # # query masking
+ # outputs = mask(outputs, Q, K, type="query")
+
+ # dropout
+ #outputs = tf.layers.dropout(outputs, rate=dropout_rate, training=training)
+ #outputs = npu_convert_dropout(outputs, rate=dropout_rate, training=training)
+ #outputs = npu_convert_dropout(outputs, rate=dropout_rate)
+
+ # weighted sum (context vectors)
+ outputs = tf.matmul(outputs, V) # (N, T_q, d_v)
+
+ return outputs
+
+
+def mask(inputs, key_masks=None, type=None):
+ """Masks paddings on keys or queries to inputs
+ inputs: 3d tensor. (h*N, T_q, T_k)
+ key_masks: 3d tensor. (N, 1, T_k)
+ type: string. "key" | "future"
+
+ e.g.,
+ >> inputs = tf.zeros([2, 2, 3], dtype=tf.float32)
+ >> key_masks = tf.constant([[0., 0., 1.],
+ [0., 1., 1.]])
+ >> mask(inputs, key_masks=key_masks, type="key")
+ array([[[ 0.0000000e+00, 0.0000000e+00, -4.2949673e+09],
+ [ 0.0000000e+00, 0.0000000e+00, -4.2949673e+09]],
+
+ [[ 0.0000000e+00, -4.2949673e+09, -4.2949673e+09],
+ [ 0.0000000e+00, -4.2949673e+09, -4.2949673e+09]],
+
+ [[ 0.0000000e+00, 0.0000000e+00, -4.2949673e+09],
+ [ 0.0000000e+00, 0.0000000e+00, -4.2949673e+09]],
+
+ [[ 0.0000000e+00, -4.2949673e+09, -4.2949673e+09],
+ [ 0.0000000e+00, -4.2949673e+09, -4.2949673e+09]]], dtype=float32)
+ """
+ padding_num = -2 ** 32 + 1
+ if type in ("k", "key", "keys"):
+ key_masks = tf.to_float(key_masks)
+ key_masks = tf.tile(key_masks, [tf.shape(inputs)[0] // tf.shape(key_masks)[0], 1]) # (h*N, seqlen)
+ key_masks = tf.expand_dims(key_masks, 1) # (h*N, 1, seqlen)
+ outputs = inputs + key_masks * padding_num
+ # elif type in ("q", "query", "queries"):
+ # # Generate masks
+ # masks = tf.sign(tf.reduce_sum(tf.abs(queries), axis=-1)) # (N, T_q)
+ # masks = tf.expand_dims(masks, -1) # (N, T_q, 1)
+ # masks = tf.tile(masks, [1, 1, tf.shape(keys)[1]]) # (N, T_q, T_k)
+ #
+ # # Apply masks to inputs
+ # outputs = inputs*masks
+ elif type in ("f", "future", "right"):
+ diag_vals = tf.ones_like(inputs[0, :, :]) # (T_q, T_k)
+ tril = tf.linalg.LinearOperatorLowerTriangular(diag_vals).to_dense() # (T_q, T_k)
+ future_masks = tf.tile(tf.expand_dims(tril, 0), [tf.shape(inputs)[0], 1, 1]) # (N, T_q, T_k)
+
+ paddings = tf.ones_like(future_masks) * padding_num
+ outputs = tf.where(tf.equal(future_masks, 0), paddings, inputs)
+ else:
+ print("Check if you entered type correctly!")
+
+ return outputs
+
+
+def multihead_attention(queries, keys, values, key_masks,
+ num_heads=8,
+ dropout_rate=0,
+ training=True,
+ causality=False,
+ scope="multihead_attention"):
+ '''Applies multihead attention. See 3.2.2
+ queries: A 3d tensor with shape of [N, T_q, d_model].
+ keys: A 3d tensor with shape of [N, T_k, d_model].
+ values: A 3d tensor with shape of [N, T_k, d_model].
+ key_masks: A 2d tensor with shape of [N, key_seqlen]
+ num_heads: An int. Number of heads.
+ dropout_rate: A floating point number.
+ training: Boolean. Controller of mechanism for dropout.
+ causality: Boolean. If true, units that reference the future are masked.
+ scope: Optional scope for `variable_scope`.
+
+ Returns
+ A 3d tensor with shape of (N, T_q, C)
+ '''
+ d_model = queries.get_shape().as_list()[-1]
+ with tf.variable_scope(scope, reuse=tf.AUTO_REUSE):
+ # Linear projections
+ Q = tf.layers.dense(queries, d_model, use_bias=True) # (N, T_q, d_model)
+ K = tf.layers.dense(keys, d_model, use_bias=True) # (N, T_k, d_model)
+ V = tf.layers.dense(values, d_model, use_bias=True) # (N, T_k, d_model)
+
+ # Split and concat
+ Q_ = tf.concat(tf.split(Q, num_heads, axis=2), axis=0) # (h*N, T_q, d_model/h)
+ K_ = tf.concat(tf.split(K, num_heads, axis=2), axis=0) # (h*N, T_k, d_model/h)
+ V_ = tf.concat(tf.split(V, num_heads, axis=2), axis=0) # (h*N, T_k, d_model/h)
+
+ # Attention
+ outputs = scaled_dot_product_attention(Q_, K_, V_, key_masks, causality, dropout_rate, training)
+
+ # Restore shape
+ outputs = tf.concat(tf.split(outputs, num_heads, axis=0), axis=2 ) # (N, T_q, d_model)
+
+ # Residual connection
+ outputs += queries
+
+ # Normalize
+ outputs = ln(outputs)
+
+ return outputs
+
+def ff(inputs, num_units, scope="positionwise_feedforward"):
+ '''position-wise feed forward net. See 3.3
+
+ inputs: A 3d tensor with shape of [N, T, C].
+ num_units: A list of two integers.
+ scope: Optional scope for `variable_scope`.
+
+ Returns:
+ A 3d tensor with the same shape and dtype as inputs
+ '''
+ with tf.variable_scope(scope, reuse=tf.AUTO_REUSE):
+ # Inner layer
+ outputs = tf.layers.dense(inputs, num_units[0], activation=tf.nn.relu)
+
+ # Outer layer
+ outputs = tf.layers.dense(outputs, num_units[1])
+
+ # Residual connection
+ outputs += inputs
+
+ # Normalize
+ outputs = ln(outputs)
+
+ return outputs
+
+def label_smoothing(inputs, epsilon=0.1):
+ '''Applies label smoothing. See 5.4 and https://arxiv.org/abs/1512.00567.
+ inputs: 3d tensor. [N, T, V], where V is the number of vocabulary.
+ epsilon: Smoothing rate.
+
+ For example,
+
+ ```
+ import tensorflow as tf
+ inputs = tf.convert_to_tensor([[[0, 0, 1],
+ [0, 1, 0],
+ [1, 0, 0]],
+
+ [[1, 0, 0],
+ [1, 0, 0],
+ [0, 1, 0]]], tf.float32)
+
+ outputs = label_smoothing(inputs)
+
+ with tf.Session() as sess:
+ print(sess.run([outputs]))
+
+ >>
+ [array([[[ 0.03333334, 0.03333334, 0.93333334],
+ [ 0.03333334, 0.93333334, 0.03333334],
+ [ 0.93333334, 0.03333334, 0.03333334]],
+
+ [[ 0.93333334, 0.03333334, 0.03333334],
+ [ 0.93333334, 0.03333334, 0.03333334],
+ [ 0.03333334, 0.93333334, 0.03333334]]], dtype=float32)]
+ ```
+ '''
+ V = inputs.get_shape().as_list()[-1] # number of channels
+ return ((1-epsilon) * inputs) + (epsilon / V)
+
+def positional_encoding(inputs,
+ maxlen,
+ masking=True,
+ scope="positional_encoding"):
+ '''Sinusoidal Positional_Encoding. See 3.5
+ inputs: 3d tensor. (N, T, E)
+ maxlen: scalar. Must be >= T
+ masking: Boolean. If True, padding positions are set to zeros.
+ scope: Optional scope for `variable_scope`.
+
+ returns
+ 3d tensor that has the same shape as inputs.
+ '''
+
+ E = inputs.get_shape().as_list()[-1] # static
+ N, T = tf.shape(inputs)[0], tf.shape(inputs)[1] # dynamic
+ with tf.variable_scope(scope, reuse=tf.AUTO_REUSE):
+ # position indices
+ position_ind = tf.tile(tf.expand_dims(tf.range(T), 0), [N, 1]) # (N, T)
+
+ # First part of the PE function: sin and cos argument
+ position_enc = np.array([
+ [pos / np.power(10000, (i-i%2)/E) for i in range(E)]
+ for pos in range(maxlen)])
+
+ # Second part, apply the cosine to even columns and sin to odds.
+ position_enc[:, 0::2] = np.sin(position_enc[:, 0::2]) # dim 2i
+ position_enc[:, 1::2] = np.cos(position_enc[:, 1::2]) # dim 2i+1
+ position_enc = tf.convert_to_tensor(position_enc, tf.float32) # (maxlen, E)
+
+ # lookup
+ outputs = tf.nn.embedding_lookup(position_enc, position_ind)
+
+ # masks
+ if masking:
+ outputs = tf.where(tf.equal(inputs, 0), inputs, outputs)
+
+ return tf.to_float(outputs)
+
+def noam_scheme(init_lr, global_step, warmup_steps=4000.):
+ '''Noam scheme learning rate decay
+ init_lr: initial learning rate. scalar.
+ global_step: scalar.
+ warmup_steps: scalar. During warmup_steps, learning rate increases
+ until it reaches init_lr.
+ '''
+ step = tf.cast(global_step + 1, dtype=tf.float32)
+ return init_lr * warmup_steps ** 0.5 * tf.minimum(step * warmup_steps ** -1.5, step ** -0.5)
\ No newline at end of file
diff --git a/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/multi-bleu.perl b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/multi-bleu.perl
new file mode 100644
index 0000000000000000000000000000000000000000..809879142776e1df62d689e7675648101d2f9a91
--- /dev/null
+++ b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/multi-bleu.perl
@@ -0,0 +1,165 @@
+#!/usr/bin/perl -w
+
+use strict;
+
+if (!scalar(@ARGV)) {
+ print STDERR "Syntax: multi-bleu.perl [-length_analysis bucket] [ref-stem] < [system-output]
+If one reference translation: ref-stem is filename
+If multiple reference translations: ref-stem[0,1,2,...] is filename\n";
+}
+
+my $length_analysis;
+if ($ARGV[0] eq '-length_analysis') {
+ shift @ARGV;
+ $length_analysis = shift @ARGV;
+}
+
+my @CORRECT_BUCKET;
+my @TOTAL_BUCKET;
+my @COUNT_LENGTH;
+my $max_bucket=0;
+
+my $stem = $ARGV[0];
+my @REF;
+my $ref=0;
+while(-e "$stem$ref") {
+ &add_to_ref("$stem$ref",\@REF);
+ $ref++;
+}
+&add_to_ref($stem,\@REF) if -e $stem;
+die("did not find any reference translations at $stem") unless scalar @REF;
+
+sub add_to_ref {
+ my ($file,$REF) = @_;
+ my $s=0;
+ open(REF,$file);
+ while([) {
+ chop;
+ push @{$$REF[$s++]}, $_;
+ }
+ close(REF);
+}
+
+my(@CORRECT,@TOTAL,$length_translation,$length_reference);
+my $s=0;
+while() {
+ chop;
+ my @WORD = split;
+ my %REF_NGRAM = ();
+ my $length_translation_this_sentence = scalar(@WORD);
+ my ($closest_diff,$closest_length) = (9999,9999);
+ my $bucket;
+ foreach my $reference (@{$REF[$s]}) {
+# print "$s $_ <=> $reference\n";
+ my @WORD = split(/ /,$reference);
+ my $length = scalar(@WORD);
+ if ($length_analysis) {
+ $bucket = int($length/$length_analysis);
+ $max_bucket=$bucket if ($bucket>$max_bucket);
+ }
+ if (abs($length_translation_this_sentence-$length) < $closest_diff) {
+ $closest_diff = abs($length_translation_this_sentence-$length);
+ $closest_length = $length;
+# print "$i: closest diff = abs($length_translation_this_sentence-$length)]
\n";
+ }
+ for(my $n=1;$n<=4;$n++) {
+ my %REF_NGRAM_N = ();
+ for(my $start=0;$start<=$#WORD-($n-1);$start++) {
+ my $ngram = "$n";
+ for(my $w=0;$w<$n;$w++) {
+ $ngram .= " ".$WORD[$start+$w];
+ }
+ $REF_NGRAM_N{$ngram}++;
+ }
+ foreach my $ngram (keys %REF_NGRAM_N) {
+ if (!defined($REF_NGRAM{$ngram}) ||
+ $REF_NGRAM{$ngram} < $REF_NGRAM_N{$ngram}) {
+ $REF_NGRAM{$ngram} = $REF_NGRAM_N{$ngram};
+# print "$i: REF_NGRAM{$ngram} = $REF_NGRAM{$ngram}
\n";
+ }
+ }
+ }
+ }
+ if ($bucket) {
+ $COUNT_LENGTH[$bucket]++;
+ }
+ $length_translation += $length_translation_this_sentence;
+ $length_reference += $closest_length;
+ for(my $n=1;$n<=4;$n++) {
+ my %T_NGRAM = ();
+ for(my $start=0;$start<=$#WORD-($n-1);$start++) {
+ my $ngram = "$n";
+ for(my $w=0;$w<$n;$w++) {
+ $ngram .= " ".$WORD[$start+$w];
+ }
+ $T_NGRAM{$ngram}++;
+ }
+ foreach my $ngram (keys %T_NGRAM) {
+ $ngram =~ /^(\d+) /;
+ my $n = $1;
+# print "$i e $ngram $T_NGRAM{$ngram}
\n";
+ $TOTAL[$n] += $T_NGRAM{$ngram};
+ if ($bucket) {
+ $TOTAL_BUCKET[$bucket][$n] += $T_NGRAM{$ngram};
+ }
+ if (defined($REF_NGRAM{$ngram})) {
+ if ($REF_NGRAM{$ngram} >= $T_NGRAM{$ngram}) {
+ if ($bucket) {
+ $CORRECT_BUCKET[$bucket][$n] += $T_NGRAM{$ngram};
+ }
+ $CORRECT[$n] += $T_NGRAM{$ngram};
+# print "$i e correct1 $T_NGRAM{$ngram}
\n";
+ }
+ else {
+ if ($bucket) {
+ $CORRECT_BUCKET[$bucket][$n] += $REF_NGRAM{$ngram};
+ }
+ $CORRECT[$n] += $REF_NGRAM{$ngram};
+# print "$i e correct2 $REF_NGRAM{$ngram}
\n";
+ }
+ }
+ }
+ }
+ $s++;
+}
+my $brevity_penalty = 1;
+if ($length_translation<$length_reference) {
+ $brevity_penalty = exp(1-$length_reference/$length_translation);
+}
+my $bleu = $brevity_penalty * exp((my_log( $CORRECT[1]/$TOTAL[1] ) +
+ my_log( $CORRECT[2]/$TOTAL[2] ) +
+ my_log( $CORRECT[3]/$TOTAL[3] ) +
+ my_log( $CORRECT[4]/$TOTAL[4] ) ) / 4);
+
+printf "BLEU = %.2f, %.1f/%.1f/%.1f/%.1f (BP=%.3f, ration=%.3f)\n",
+ 100*$bleu,
+ 100*$CORRECT[1]/$TOTAL[1],
+ 100*$CORRECT[2]/$TOTAL[2],
+ 100*$CORRECT[3]/$TOTAL[3],
+ 100*$CORRECT[4]/$TOTAL[4],
+ $brevity_penalty,
+ $length_translation / $length_reference;
+
+if ($length_analysis) {
+ print "\nLENGTH ANALYSIS:\n";
+ for(my $b=int(1/$length_analysis); $b<=$max_bucket; $b++) {
+ my $range=$b;
+ if ($length_analysis != 1) {
+ $range=($b*$length_analysis+1)."-".(($b+1)*$length_analysis);
+ }
+ print "$range";;
+ if ($TOTAL_BUCKET[$b] && $TOTAL_BUCKET[$b][4] && $CORRECT_BUCKET[$b][4]) {
+ printf "\t%d\t%.2f", $COUNT_LENGTH[$b],
+ 100*$brevity_penalty * exp((my_log( $CORRECT_BUCKET[$b][1]/$TOTAL_BUCKET[$b][1] ) +
+ my_log( $CORRECT_BUCKET[$b][2]/$TOTAL_BUCKET[$b][2] ) +
+ my_log( $CORRECT_BUCKET[$b][3]/$TOTAL_BUCKET[$b][3] ) +
+ my_log( $CORRECT_BUCKET[$b][4]/$TOTAL_BUCKET[$b][4] ) ) / 4);
+ }
+ print "\n";
+ }
+}
+
+sub my_log {
+ return -9999999999 unless $_[0];
+ return log($_[0]);
+}
diff --git a/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/npu_train.sh b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/npu_train.sh
new file mode 100644
index 0000000000000000000000000000000000000000..1cbd6f15c3c2e58345e5ad28ecd8fff3672545d6
--- /dev/null
+++ b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/npu_train.sh
@@ -0,0 +1,73 @@
+#!/bin/bash
+
+code_dir=$1
+work_dir=$2
+dataset_path=$3
+output_path=$4
+
+#############训练前输入目录文件确认#########################
+echo "[CANN-ZhongZhi] before train - list my run files[/usr/local/Ascend/ascend-toolkit]:"
+ls -al /usr/local/Ascend/ascend-toolkit
+echo ""
+
+echo "[CANN-ZhongZhi] before train - list my code files[${code_dir}]:"
+ls -al ${code_dir}
+echo ""
+
+echo "[CANN-ZhongZhi] before train - list my work files[${work_dir}]:"
+ls -al ${work_dir}
+echo ""
+
+echo "[CANN-ZhongZhi] before train - list my dataset files[${dataset_path}]:"
+ls -al ${dataset_path}
+echo ""
+
+echo "[CANN-ZhongZhi] before train - list my output files[${output_path}]:"
+ls -al ${output_path}
+echo ""
+
+######环境变量修改######
+###如果需要修改环境变量的,在此处修改
+###搭配最大内存使用
+#echo "GE_USE_STATIC_MEMORY ${GE_USE_STATIC_MEMORY}"
+#echo $GE_USE_STATIC_MEMORY
+#echo "GE_USE_STATIC_MEMORY"
+#export GE_USE_STATIC_MEMORY=1
+#echo "GE_USE_STATIC_MEMORY ${GE_USE_STATIC_MEMORY}"
+#echo $GE_USE_STATIC_MEMORY
+#echo "GE_USE_STATIC_MEMORY"
+
+
+##接口老哥提示打开
+echo "ENABLE_FORCE_V2_CONTROL ${GE_USE_STATIC_MEMORY}"
+export ENABLE_FORCE_V2_CONTROL=1
+echo "ENABLE_FORCE_V2_CONTROL ${GE_USE_STATIC_MEMORY}"
+#设置日志级别为info
+#export ASCEND_GLOBAL_LOG_LEVEL=1
+#设置日志打屏到屏幕
+#export ASCEND_SLOG_PRINT_TO_STDOUT=1
+#export TF_CPP_MIN_LOG_LEVEL=0
+env > ${output_path}/my_env.log
+
+######训练执行######
+###此处每个网络执行命令不同,需要修改
+python3.7 ${code_dir}/train621V.py --data_url=${dataset_path} --train_url=${output_path}
+if [ $? -eq 0 ];
+then
+ echo "[CANN-ZhongZhi] train return success"
+else
+ echo "[CANN-ZhongZhi] train return failed"
+fi
+
+######训练后把需要备份的内容保存到output_path######
+###此处每个网络不同,视情况添加cp
+cp -r ${work_dir} ${output_path}
+
+######训练后输出目录文件确认######
+echo "[CANN-ZhongZhi] after train - list my work files[${work_dir}]:"
+ls -al ${work_dir}
+echo ""
+
+echo "[CANN-ZhongZhi] after train - list my output files[${output_path}]:"
+ls -al ${output_path}
+echo ""
diff --git a/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/ops_info.json b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/ops_info.json
new file mode 100644
index 0000000000000000000000000000000000000000..d729df649d913c80f1a942bc7b75378829a3ddd4
--- /dev/null
+++ b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/ops_info.json
@@ -0,0 +1,5 @@
+{
+ "black-list": {
+ "to-add": ["Assign","MatMulV2","Cast","Mul","ReduceSumD","SoftmaxV2","BatchMatMul"]
+ }
+}
\ No newline at end of file
diff --git a/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/preproNew.py b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/preproNew.py
new file mode 100644
index 0000000000000000000000000000000000000000..b2a893aed66a82a6b726bc1e97dd9efc8a4db626
--- /dev/null
+++ b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/preproNew.py
@@ -0,0 +1,153 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# -*- coding: utf-8 -*-
+#/usr/bin/python3
+'''
+Feb. 2019 by kyubyong park.
+kbpark.linguist@gmail.com.
+https://www.github.com/kyubyong/transformer.
+
+Preprocess the iwslt 2016 datasets.
+'''
+
+import os
+import errno
+import sentencepiece as spm
+import re
+from hparams import Hparams
+import logging
+###因为换了数据集,他的vocabsize可能不一样了
+logging.basicConfig(level=logging.INFO)
+
+def prepro(hp):
+ """Load raw data -> Preprocessing -> Segmenting with sentencepice
+ hp: hyperparams. argparse.
+ """
+ logging.info("# Check if raw files exist")
+ train1 = "iwslt2016/prepro/train.de"
+ train2 = "iwslt2016/prepro/train.en"
+ eval1 = "iwslt2016/prepro/eval.de"
+ eval2 = "iwslt2016/prepro/eval.en"
+ test1 = "iwslt2016/prepro/test.de"
+ test2 = "iwslt2016/prepro/test.en"
+ for f in (train1, train2, eval1, eval2, test1, test2):
+ if not os.path.isfile(f):
+ raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), f)
+
+ logging.info("# Preprocessing")
+ # train
+ _prepro = lambda x: [line.strip() for line in open(x, 'r', encoding='UTF-8').read().split("\n") \
+ if not line.startswith("<")]
+ prepro_train1, prepro_train2 = _prepro(train1), _prepro(train2)
+ assert len(prepro_train1)==len(prepro_train2), "Check if train source and target files match."
+
+ # eval
+ # _prepro = lambda x: [re.sub("<[^>]+>", "", line).strip() \
+ # for line in open(x, 'r', encoding='UTF-8').read().split("\n") \
+ # if line.startswith("=1.12.0
+numpy>=1.15.4
+sentencepiece==0.1.8
+tqdm>=4.28.1
\ No newline at end of file
diff --git a/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/switch_config.txt b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/switch_config.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ef3971b4770d1cb6b1799a2ea00032ba992d3579
--- /dev/null
+++ b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/switch_config.txt
@@ -0,0 +1,3 @@
+OpType::MatMulV2:InputDtype:float16,float16,float32,OutputDtype:float32
+OpType::BatchMatMul:InputDtype:float16,float16,OutputDtype:float32
+OpType::BatchMatMulV2:InputDtype:float16,float16,OutputDtype:float32
\ No newline at end of file
diff --git a/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/test/train_full_1p.sh b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/test/train_full_1p.sh
new file mode 100644
index 0000000000000000000000000000000000000000..0f0a73eb88acfbdae65dcf01203900ea0fe404c7
--- /dev/null
+++ b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/test/train_full_1p.sh
@@ -0,0 +1,157 @@
+#!/bin/bash
+
+##########################################################
+#########第3行 至 90行,请一定不要、不要、不要修改##########
+#########第3行 至 90行,请一定不要、不要、不要修改##########
+#########第3行 至 90行,请一定不要、不要、不要修改##########
+##########################################################
+# shell脚本所在路径
+cur_path=`echo $(cd $(dirname $0);pwd)`
+
+# 判断当前shell是否是performance
+perf_flag=`echo $0 | grep performance | wc -l`
+
+# 当前执行网络的名称
+Network=`echo $(cd $(dirname $0);pwd) | awk -F"/" '{print $(NF-1)}'`
+
+export RANK_SIZE=1
+export RANK_ID=0
+export JOB_ID=10087
+
+# 路径参数初始化
+data_path=""
+output_path=""
+
+# 帮助信息,不需要修改
+if [[ $1 == --help || $1 == -h ]];then
+ echo"usage:./train_performance_1P.sh "
+ echo " "
+ echo "parameter explain:
+ --data_path # dataset of training
+ --output_path # output of training
+ --train_steps # max_step for training
+ --train_epochs # max_epoch for training
+ --batch_size # batch size
+ -h/--help show help message
+ "
+ exit 1
+fi
+
+# 参数校验,不需要修改
+for para in $*
+do
+ if [[ $para == --data_path* ]];then
+ data_path=`echo ${para#*=}`
+ elif [[ $para == --output_path* ]];then
+ output_path=`echo ${para#*=}`
+ elif [[ $para == --train_steps* ]];then
+ train_steps=`echo ${para#*=}`
+ elif [[ $para == --train_epochs* ]];then
+ train_epochs=`echo ${para#*=}`
+ elif [[ $para == --batch_size* ]];then
+ batch_size=`echo ${para#*=}`
+ fi
+done
+
+# 校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+ echo "[Error] para \"data_path\" must be config"
+ exit 1
+fi
+
+# 校验是否传入output_path,不需要修改
+if [[ $output_path == "" ]];then
+ output_path="./test/output/${ASCEND_DEVICE_ID}"
+fi
+
+CaseName=""
+function get_casename()
+{
+ if [ x"${perf_flag}" = x1 ];
+ then
+ CaseName=${Network}_bs${batch_size}_${RANK_SIZE}'p'_'perf'
+ else
+ CaseName=${Network}_bs${batch_size}_${RANK_SIZE}'p'_'acc'
+ fi
+}
+
+# 跳转到code目录
+cd ${cur_path}/../
+rm -rf ./test/output/${ASCEND_DEVICE_ID}
+mkdir -p ./test/output/${ASCEND_DEVICE_ID}
+
+# 训练开始时间记录,不需要修改
+start_time=$(date +%s)
+##########################################################
+#########第3行 至 90行,请一定不要、不要、不要修改##########
+#########第3行 至 90行,请一定不要、不要、不要修改##########
+#########第3行 至 90行,请一定不要、不要、不要修改##########
+##########################################################
+
+#=========================================================
+#=========================================================
+#========训练执行命令,需要根据您的网络进行修改==============
+#=========================================================
+#=========================================================
+# 基础参数,需要模型审视修改
+# 您的训练数据集在${data_path}路径下,请直接使用这个变量获取
+# 您的训练输出目录在${output_path}路径下,请直接使用这个变量获取
+# 您的其他基础参数,可以自定义增加,但是batch_size请保留,并且设置正确的值
+batch_size=128
+
+# 设置打屏日志文件名,请保留,文件名为${print_log}
+print_log="./test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log"
+
+python3.7 ./train.py --data_url=${data_path} --train_url=${output_path} 1>${print_log} 2>&1
+
+# 性能相关数据计算
+StepTime=`grep "sec/step :" ${print_log} | tail -n 10 | awk '{print $NF}' | awk '{sum+=$1} END {print sum/NR}'`
+FPS=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'/'${StepTime}'}'`
+
+# 精度相关数据计算
+train_accuracy=`grep "Final Accuracy accuracy" ${print_log} | awk '{print $NF}'`
+# 提取所有loss打印信息
+grep "loss :" ${print_log} | awk -F ":" '{print $4}' | awk -F "-" '{print $1}' > ./test/output/${ASCEND_DEVICE_ID}/my_output_loss.txt
+
+###########################################################
+#########后面的所有内容请不要修改###########################
+#########后面的所有内容请不要修改###########################
+#########后面的所有内容请不要修改###########################
+###########################################################
+
+# 获取最终的casename,请保留,case文件名为${CaseName}
+get_casename
+
+# 重命名loss文件
+if [ -f ./test/output/${ASCEND_DEVICE_ID}/my_output_loss.txt ];
+then
+ mv ./test/output/${ASCEND_DEVICE_ID}/my_output_loss.txt ./test/output/${ASCEND_DEVICE_ID}/${CaseName}_loss.txt
+fi
+
+# 训练端到端耗时
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+echo "------------------ Final result ------------------"
+# 输出性能FPS/单step耗时/端到端耗时
+echo "Final Performance images/sec : $FPS"
+echo "Final Performance sec/step : $StepTime"
+echo "E2E Training Duration sec : $e2e_time"
+
+# 输出训练精度
+echo "Final Train Accuracy : ${train_accuracy}"
+
+# 最后一个迭代loss值,不需要修改
+ActualLoss=(`awk 'END {print $NF}' ./test/output/${ASCEND_DEVICE_ID}/my_output_loss.txt`)
+
+#关键信息打印到${CaseName}.log中,不需要修改
+echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${batch_size}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = `uname -m`" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${FPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${StepTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
\ No newline at end of file
diff --git a/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/test/train_performance_1p.sh b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/test/train_performance_1p.sh
new file mode 100644
index 0000000000000000000000000000000000000000..e86eb215ba0c2d1f89981809bd0e925605d830bd
--- /dev/null
+++ b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/test/train_performance_1p.sh
@@ -0,0 +1,158 @@
+#!/bin/bash
+
+##########################################################
+#########第3行 至 90行,请一定不要、不要、不要修改##########
+#########第3行 至 90行,请一定不要、不要、不要修改##########
+#########第3行 至 90行,请一定不要、不要、不要修改##########
+##########################################################
+# shell脚本所在路径
+cur_path=`echo $(cd $(dirname $0);pwd)`
+
+# 判断当前shell是否是performance
+perf_flag=`echo $0 | grep performance | wc -l`
+
+# 当前执行网络的名称
+Network=`echo $(cd $(dirname $0);pwd) | awk -F"/" '{print $(NF-1)}'`
+
+export RANK_SIZE=1
+export RANK_ID=0
+export JOB_ID=10087
+
+# 路径参数初始化
+data_path=""
+output_path=""
+
+# 帮助信息,不需要修改
+if [[ $1 == --help || $1 == -h ]];then
+ echo"usage:./train_performance_1P.sh "
+ echo " "
+ echo "parameter explain:
+ --data_path # dataset of training
+ --output_path # output of training
+ --train_steps # max_step for training
+ --train_epochs # max_epoch for training
+ --batch_size # batch size
+ -h/--help show help message
+ "
+ exit 1
+fi
+
+# 参数校验,不需要修改
+for para in $*
+do
+ if [[ $para == --data_path* ]];then
+ data_path=`echo ${para#*=}`
+ elif [[ $para == --output_path* ]];then
+ output_path=`echo ${para#*=}`
+ elif [[ $para == --train_steps* ]];then
+ train_steps=`echo ${para#*=}`
+ elif [[ $para == --train_epochs* ]];then
+ train_epochs=`echo ${para#*=}`
+ elif [[ $para == --batch_size* ]];then
+ batch_size=`echo ${para#*=}`
+ fi
+done
+
+# 校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+ echo "[Error] para \"data_path\" must be config"
+ exit 1
+fi
+
+# 校验是否传入output_path,不需要修改
+if [[ $output_path == "" ]];then
+ output_path="./test/output/${ASCEND_DEVICE_ID}"
+fi
+
+CaseName=""
+function get_casename()
+{
+ if [ x"${perf_flag}" = x1 ];
+ then
+ CaseName=${Network}_bs${batch_size}_${RANK_SIZE}'p'_'perf'
+ else
+ CaseName=${Network}_bs${batch_size}_${RANK_SIZE}'p'_'acc'
+ fi
+}
+
+# 跳转到code目录
+cd ${cur_path}/../
+rm -rf ./test/output/${ASCEND_DEVICE_ID}
+mkdir -p ./test/output/${ASCEND_DEVICE_ID}
+
+# 训练开始时间记录,不需要修改
+start_time=$(date +%s)
+##########################################################
+#########第3行 至 90行,请一定不要、不要、不要修改##########
+#########第3行 至 90行,请一定不要、不要、不要修改##########
+#########第3行 至 90行,请一定不要、不要、不要修改##########
+##########################################################
+
+#=========================================================
+#=========================================================
+#========训练执行命令,需要根据您的网络进行修改==============
+#=========================================================
+#=========================================================
+# 基础参数,需要模型审视修改
+# 您的训练数据集在${data_path}路径下,请直接使用这个变量获取
+# 您的训练输出目录在${output_path}路径下,请直接使用这个变量获取
+# 您的其他基础参数,可以自定义增加,但是batch_size请保留,并且设置正确的值
+train_epochs=1
+train_steps=34632
+batch_size=128
+
+print_log="./test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log"
+python3.7 ./train.py --data_url=${data_path} --train_url=${output_path} --steps=${train_steps} 1>${print_log} 2>&1
+
+
+# 性能相关数据计算
+StepTime=`grep "sec/step :" ${print_log} | tail -n 10 | awk '{print $NF}' | awk '{sum+=$1} END {print sum/NR}'`
+FPS=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'/'${StepTime}'}'`
+
+# 精度相关数据计算
+train_accuracy=`grep "Final Accuracy accuracy" ${print_log} | awk '{print $NF}'`
+# 提取所有loss打印信息
+grep "loss :" ${print_log} | awk -F ":" '{print $4}' | awk -F "-" '{print $1}' > ./test/output/${ASCEND_DEVICE_ID}/my_output_loss.txt
+
+
+###########################################################
+#########后面的所有内容请不要修改###########################
+#########后面的所有内容请不要修改###########################
+#########后面的所有内容请不要修改###########################
+###########################################################
+
+# 获取最终的casename,请保留,case文件名为${CaseName}
+get_casename
+
+# 重命名loss文件
+if [ -f ./test/output/${ASCEND_DEVICE_ID}/my_output_loss.txt ];
+then
+ mv ./test/output/${ASCEND_DEVICE_ID}/my_output_loss.txt ./test/output/${ASCEND_DEVICE_ID}/${CaseName}_loss.txt
+fi
+
+# 训练端到端耗时
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+echo "------------------ Final result ------------------"
+# 输出性能FPS/单step耗时/端到端耗时
+echo "Final Performance images/sec : $FPS"
+echo "Final Performance sec/step : $StepTime"
+echo "E2E Training Duration sec : $e2e_time"
+
+# 输出训练精度
+echo "Final Train Accuracy : ${train_accuracy}"
+
+# 最后一个迭代loss值,不需要修改
+ActualLoss=(`awk 'END {print $NF}' ./test/output/${ASCEND_DEVICE_ID}/${CaseName}_loss.txt`)
+
+#关键信息打印到${CaseName}.log中,不需要修改
+echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${batch_size}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = `uname -m`" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${FPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${StepTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
\ No newline at end of file
diff --git a/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/train.py b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/train.py
new file mode 100644
index 0000000000000000000000000000000000000000..c85005aa76f104aa44137acd2d396d5187cdc201
--- /dev/null
+++ b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/train.py
@@ -0,0 +1,171 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# -*- coding: utf-8 -*-
+#/usr/bin/python3
+'''
+Feb. 2019 by kyubyong park.f
+kbpark.linguist@gmail.com.
+https://www.github.com/kyubyong/transformer
+'''
+import tensorflow as tf
+from model import Transformer
+from tqdm import tqdm
+from data_load import get_batch
+from utils import save_hparams, save_variable_specs, get_hypotheses, calc_bleu, calc_bleu2
+import os
+from hparams import Hparams
+import math
+import logging
+import time
+from tensorflow.core.protobuf.rewriter_config_pb2 import RewriterConfig
+from npu_bridge.npu_init import *
+
+#import precision_tool.tf_config as npu_tf_config
+
+
+logging.basicConfig(level=logging.INFO)
+
+
+logging.info("# hparams")
+hparams = Hparams()
+parser = hparams.parser
+hp = parser.parse_args()
+# hp.data_url=os.path.dirname(os.path.realpath(__file__))+'/'
+print("data_url\n")
+print(hp.data_url)
+print("train_url\n")
+print(hp.train_url)
+save_hparams(hp, hp.logdir)
+
+logging.info("# Prepare train/eval batches")
+train_batches, num_train_batches, num_train_samples = get_batch(hp.data_url+ hp.train1, hp.data_url+hp.train2,
+ hp.maxlen1, hp.maxlen2,
+ hp.data_url+hp.vocab, hp.batch_size,
+ shuffle=True)
+# eval_batches, num_eval_batches, num_eval_samples = get_batch(hp.data_url+hp.eval1, hp.data_url+hp.eval2,
+# 100000, 100000,
+# hp.data_url+hp.vocab, hp.batch_size,
+# shuffle=False)
+eval_batches, num_eval_batches, num_eval_samples = get_batch(hp.data_url+hp.eval1, hp.data_url+hp.eval2,
+ hp.maxlen1, hp.maxlen2,
+ hp.data_url+hp.vocab, hp.batch_size,
+ shuffle=False)
+
+# create a iterator of the correct shape and type
+iter = tf.data.Iterator.from_structure(train_batches.output_types, train_batches.output_shapes)
+xs, ys = iter.get_next()
+
+train_init_op = iter.make_initializer(train_batches)
+eval_init_op = iter.make_initializer(eval_batches)
+
+logging.info("# Load model")
+m = Transformer(hp)
+loss, train_op, global_step = m.train(xs, ys)
+y_hat = m.eval(xs, ys)
+# y_hat = m.infer(xs, ys)
+
+logging.info("# Session")
+saver = tf.train.Saver(max_to_keep=hp.num_epochs)
+
+####相关设置开关
+if not os.path.exists(hp.train_url + "/tmp/profiling"): os.makedirs(hp.train_url + "/tmp/profiling")
+proPath=hp.train_url + "/tmp/profiling"
+dumpPath=hp.train_url + "/tmp/overflow"
+blackPath=hp.data_url+"/ops_info.json"
+fusionPath=hp.data_url+"/fusion_switch.cfg"
+switchPath=hp.data_url+"/switch_config.txt"
+if not os.path.exists(dumpPath): os.makedirs(dumpPath)
+
+config = tf.ConfigProto()
+custom_op = config.graph_options.rewrite_options.custom_optimizers.add()
+custom_op.name = "NpuOptimizer"
+custom_op.parameter_map["customize_dtypes"].s = tf.compat.as_bytes("./switch_config.txt")
+config.graph_options.rewrite_options.remapping = RewriterConfig.OFF # 必须显式关闭
+config.graph_options.rewrite_options.memory_optimization = RewriterConfig.OFF # 必须显式关闭
+
+with tf.Session(config=config) as sess:
+ ckpt = tf.train.latest_checkpoint(hp.train_url+hp.logdir)
+ if ckpt is None:
+ logging.info("Initializing from scratch")
+ sess.run(tf.global_variables_initializer())
+ if not os.path.exists(hp.train_url + hp.logdir): os.makedirs(hp.train_url + hp.logdir)
+ save_variable_specs(os.path.join(hp.train_url+hp.logdir, "specs"))
+ else:
+ saver.restore(sess, ckpt)
+
+ sess.run(train_init_op)
+ total_steps = hp.num_epochs * num_train_batches
+ print("Step Info: ", hp.num_epochs, num_train_batches)
+
+ _gs = sess.run(global_step)
+
+ for i in tqdm(range(_gs, total_steps+1)):
+ _, _gs = sess.run([train_op, global_step])
+ epoch = math.ceil(i / num_train_batches)
+
+ _loss = sess.run(loss) # train loss
+ logging.info("loss: {}".format(_loss))
+
+ if i and i % num_train_batches == 0:
+ logging.info("epoch {} is done".format(epoch))
+ _loss = sess.run(loss) # train loss
+ logging.info("loss: {}".format(_loss))
+
+ logging.info("# test evaluation")
+ # _ = sess.run([eval_init_op])
+
+ logging.info("# get hypotheses")
+ hypotheses = get_hypotheses(num_eval_batches, num_eval_samples, sess, y_hat, m.idx2token)
+
+ logging.info("# write results")
+ model_output = "iwslt2016_E%02dL%.2f" % (epoch, _loss)
+ if not os.path.exists(hp.train_url+hp.evaldir): os.makedirs(hp.train_url+hp.evaldir)
+ translation = os.path.join(hp.train_url + hp.evaldir, model_output)
+ # if not os.path.exists(hp.train_url + hp.evaldir): os.makedirs(hp.train_url + hp.evaldir)
+ # translation = os.path.join(hp.train_url+hp.evaldir, model_output)
+
+ #######
+ with open(translation, 'w') as fout:
+ fout.write("\n".join(hypotheses))
+
+ logging.info("# calc bleu score and append it to translation")
+ calc_bleu2(hp.data_url + hp.eval3, translation,hp.data_url)
+
+ logging.info("# save models")
+ ckpt_name = os.path.join(hp.train_url+hp.logdir, model_output)
+ # if not os.path.exists(hp.train_url + hp.logdir): os.makedirs(hp.train_url + hp.logdir)
+ # ckpt_name = os.path.join(hp.train_url + hp.logdir, model_output)
+ ########
+ saver.save(sess, ckpt_name, global_step=_gs)
+ logging.info("after training of {} epochs, {} has been saved.".format(epoch, ckpt_name))
+
+ logging.info("# fall back to train mode")
+ sess.run(train_init_op)
+
+logging.info("Done")
diff --git a/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/utils.py b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..cb1fdfd125e2013d27da4efc91773f9f9120375d
--- /dev/null
+++ b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/utils.py
@@ -0,0 +1,272 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# -*- coding: utf-8 -*-
+# /usr/bin/python3
+'''
+Feb. 2019 by kyubyong park.
+kbpark.linguist@gmail.com.
+https://www.github.com/kyubyong/transformer.
+
+Utility functions
+'''
+
+import tensorflow as tf
+# from tensorflow.python import pywrap_tensorflow
+# import numpy as np
+import json
+import os, re
+import logging
+
+logging.basicConfig(level=logging.INFO)
+
+def calc_num_batches(total_num, batch_size):
+ '''Calculates the number of batches.
+ total_num: total sample number
+ batch_size
+
+ Returns
+ number of batches, allowing for remainders.'''
+ return total_num // batch_size + int(total_num % batch_size != 0)
+
+def convert_idx_to_token_tensor(inputs, idx2token):
+ '''Converts int32 tensor to string tensor.
+ inputs: 1d int32 tensor. indices.
+ idx2token: dictionary
+
+ Returns
+ 1d string tensor.
+ '''
+ def my_func(inputs):
+ return " ".join(idx2token[elem] for elem in inputs)
+
+ return tf.py_func(my_func, [inputs], tf.string)
+
+# # def pad(x, maxlen):
+# # '''Pads x, list of sequences, and make it as a numpy array.
+# # x: list of sequences. e.g., [[2, 3, 4], [5, 6, 7, 8, 9], ...]
+# # maxlen: scalar
+# #
+# # Returns
+# # numpy int32 array of (len(x), maxlen)
+# # '''
+# # padded = []
+# # for seq in x:
+# # seq += [0] * (maxlen - len(seq))
+# # padded.append(seq)
+# #
+# # arry = np.array(padded, np.int32)
+# # assert arry.shape == (len(x), maxlen), "Failed to make an array"
+#
+# return arry
+
+def postprocess(hypotheses, idx2token):
+ '''Processes translation outputs.
+ hypotheses: list of encoded predictions
+ idx2token: dictionary
+
+ Returns
+ processed hypotheses
+ '''
+ _hypotheses = []
+ for h in hypotheses:
+ sent = "".join(idx2token[idx] for idx in h)
+ sent = sent.split("")[0].strip()
+ sent = sent.replace("▁", " ") # remove bpe symbols
+ #print(sent.strip())
+ _hypotheses.append(sent.strip())
+ return _hypotheses
+
+def save_hparams(hparams, path):
+ '''Saves hparams to path
+ hparams: argsparse object.
+ path: output directory.
+
+ Writes
+ hparams as literal dictionary to path.
+ '''
+ if not os.path.exists(path): os.makedirs(path)
+ hp = json.dumps(vars(hparams))
+ with open(os.path.join(path, "hparams"), 'w') as fout:
+ fout.write(hp)
+
+def load_hparams(parser, path):
+ '''Loads hparams and overrides parser
+ parser: argsparse parser
+ path: directory or file where hparams are saved
+ '''
+ if not os.path.isdir(path):
+ path = os.path.dirname(path)
+ d = open(os.path.join(path, "hparams"), 'r').read()
+ flag2val = json.loads(d)
+ for f, v in flag2val.items():
+ parser.f = v
+
+def save_variable_specs(fpath):
+ '''Saves information about variables such as
+ their name, shape, and total parameter number
+ fpath: string. output file path
+
+ Writes
+ a text file named fpath.
+ '''
+ def _get_size(shp):
+ '''Gets size of tensor shape
+ shp: TensorShape
+
+ Returns
+ size
+ '''
+ size = 1
+ for d in range(len(shp)):
+ size *=shp[d]
+ return size
+
+ params, num_params = [], 0
+ for v in tf.global_variables():
+ params.append("{}==={}".format(v.name, v.shape))
+ num_params += _get_size(v.shape)
+ print("num_params: ", num_params)
+ with open(fpath, 'w') as fout:
+ fout.write("num_params: {}\n".format(num_params))
+ fout.write("\n".join(params))
+ logging.info("Variables info has been saved.")
+
+def get_hypotheses(num_batches, num_samples, sess, tensor, dict):
+ '''Gets hypotheses.
+ num_batches: scalar.
+ num_samples: scalar.
+ sess: tensorflow sess object
+ tensor: target tensor to fetch
+ dict: idx2token dictionary
+
+ Returns
+ hypotheses: list of sents
+ '''
+ hypotheses = []
+ #print(num_batches)
+ #print(num_samples)
+ for i in range(num_batches):
+ h = sess.run(tensor)
+ hypotheses.extend(h.tolist())
+ print(i)
+ hypotheses = postprocess(hypotheses, dict)
+ #print(len(hypotheses))
+ return hypotheses[:num_samples]
+
+def calc_bleu(ref, translation):
+ '''Calculates bleu score and appends the report to translation
+ ref: reference file path
+ translation: model output file path
+
+ Returns
+ translation that the bleu score is appended to'''
+ get_bleu_score = "perl multi-bleu.perl {} < {} > {}".format(ref, translation, "temp")
+ os.system(get_bleu_score)
+ bleu_score_report = open("temp", "r").read()
+ with open(translation, "a") as fout:
+ fout.write("\n{}".format(bleu_score_report))
+ try:
+ score = re.findall("BLEU = ([^,]+)", bleu_score_report)[0]
+ new_translation = translation + "B{}".format(score)
+ os.system("mv {} {}".format(translation, new_translation))
+ os.remove(translation)
+
+ except: pass
+ os.remove("temp")
+
+def calc_bleu2(ref, translation,data_url):
+ '''Calculates bleu score and appends the report to translation
+ ref: reference file path
+ translation: model output file path
+
+ Returns
+ translation that the bleu score is appended to'''
+ get_bleu_score = "perl "+"./multi-bleu.perl {} < {} > {}".format(ref, translation, "temp")
+ os.system(get_bleu_score)
+ bleu_score_report = open("temp", "r").read()
+ with open(translation, "a") as fout:
+ fout.write("\n{}".format(bleu_score_report))
+ try:
+ score = re.findall("BLEU = ([^,]+)", bleu_score_report)[0]
+ new_translation = translation + "B{}".format(score)
+ os.system("mv {} {}".format(translation, new_translation))
+ os.remove(translation)
+
+ except: pass
+ os.remove("temp")
+
+def calc_bleu3(ref, translation,data_url):
+ '''Calculates bleu score and appends the report to translation
+ ref: reference file path
+ translation: model output file path
+
+ Returns
+ translation that the bleu score is appended to'''
+ get_bleu_score = "perl "+data_url+"multi-bleu.perl {} < {} > {}".format(ref, translation, "/home/test_user05/transformerAll/temp")
+ os.system(get_bleu_score)
+ bleu_score_report = open("/home/test_user05/transformerAll/temp", "r").read()
+ with open(translation, "a") as fout:
+ fout.write("\n{}".format(bleu_score_report))
+ try:
+ score = re.findall("BLEU = ([^,]+)", bleu_score_report)[0]
+ new_translation = translation + "B{}".format(score)
+ os.system("mv {} {}".format(translation, new_translation))
+ os.remove(translation)
+
+ except: pass
+ #os.remove("temp")
+# def calc_bleu5242(ref, translation):
+# '''Calculates bleu score and appends the report to translation
+# ref: reference file path
+# translation: model output file path
+#
+# Returns
+# translation that the bleu score is appended to'''
+# get_bleu_score = "perl "+os.path.dirname(os.path.realpath(__file__))+"/"+"multi-bleu.perl {} < {} > {}".format(ref, translation, "temp")
+# os.system(get_bleu_score)
+# bleu_score_report = open("temp", "r").read()
+# with open(translation, "a") as fout:
+# fout.write("\n{}".format(bleu_score_report))
+# try:
+# score = re.findall("BLEU = ([^,]+)", bleu_score_report)[0]
+# new_translation = translation + "B{}".format(score)
+# os.system("mv {} {}".format(translation, new_translation))
+# os.remove(translation)
+#
+# except: pass
+# os.remove("temp")
+# def get_inference_variables(ckpt, filter):
+# reader = pywrap_tensorflow.NewCheckpointReader(ckpt)
+# var_to_shape_map = reader.get_variable_to_shape_map()
+# vars = [v for v in sorted(var_to_shape_map) if filter not in v]
+# return vars
+
+
+
+