diff --git a/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/LICENSE b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/LICENSE
new file mode 100644
index 0000000000000000000000000000000000000000..9c8f3ea0871e0bfe81da0fa6e7c1d7d156dc380e
--- /dev/null
+++ b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/LICENSE
@@ -0,0 +1,201 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "{}"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright {yyyy} {name of copyright owner}
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
\ No newline at end of file
diff --git a/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/README.md b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..f30dacb7fb64a7ac8bd257effeedc573943d691c
--- /dev/null
+++ b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/README.md
@@ -0,0 +1,180 @@
+-   [基本信息](#基本信息.md)
+-   [概述](#概述.md)
+-   [训练环境准备](#训练环境准备.md)
+-   [快速上手](#快速上手.md)
+-   [训练结果](#训练结果.md)
+-   [高级参考](#高级参考.md)
+<h2 id="基本信息.md">基本信息</h2>
+
+**发布者（Publisher）：Huawei**
+
+**应用领域（Application Domain）：Natural Language Processing** 
+
+**版本（Version）：1.2**
+
+**修改时间（Modified） ：2021.4.6**
+
+**框架（Framework）：TensorFlow 1.15.0**
+
+**模型格式（Model Format）：ckpt**
+
+**精度（Precision）：Mixed**
+
+**处理器（Processor）：昇腾910**
+
+**应用级别（Categories）：Official**
+
+**描述（Description）：基于TensorFlow框架实现Google提出的语言模型Transformer，对不同语言的文字进行翻译的训练代码** 
+
+<h2 id="概述.md">概述</h2>
+
+	Transformer是Google提出的语言模型，抛弃了传统的CNN和RNN，整个网络结构完全是由Attention机制组成。
+- 参考论文：
+
+    https://proceedings.neurips.cc/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf
+
+- 参考实现：
+
+    https://github.com/Kyubyong/transformer
+
+
+## 默认配置<a name="section91661242121611"></a>
+
+- 训练数据集预处理（以WMT 2014 English-German训练集为例，仅作为用户参考示例）：
+
+  - 文本输入格式：bpe
+  
+- 测试数据集预处理（以WMT 2014 English-German验证集为例，仅作为用户参考示例）
+
+  - 文本输入格式：bpe
+  
+
+
+## 支持特性<a name="section1899153513554"></a>
+
+| 特性列表  | 是否支持 |
+|-------|------|
+| 分布式训练 | 否    |
+| 混合精度  | 是    |
+| 并行数据  | 是    |
+
+## 混合精度训练<a name="section168064817164"></a>
+
+昇腾910 AI处理器提供自动混合精度功能，可以针对全网中float32数据类型的算子，按照内置的优化策略，自动将部分float32的算子降低精度到float16，从而在精度损失很小的情况下提升系统性能并减少内存使用。
+
+## 开启混合精度<a name="section20779114113713"></a>
+
+脚本已默认开启混合精度，设置precision_mode参数的脚本参考如下。
+
+  ```
+  custom_op = session_config.graph_options.rewrite_options.custom_optimizers.add()
+  custom_op.name = 'NpuOptimizer'
+  custom_op.parameter_map["precision_mode"].s = tf.compat.as_bytes(str(args.precision_mode))
+  ```
+
+<h2 id="训练环境准备.md">训练环境准备</h2>
+
+1.  硬件环境准备请参见各硬件产品文档"[驱动和固件安装升级指南]( https://support.huawei.com/enterprise/zh/category/ai-computing-platform-pid-1557196528909)"。需要在硬件设备上安装与CANN版本配套的固件与驱动。
+2.  宿主机上需要安装Docker并登录[Ascend Hub中心](https://ascendhub.huawei.com/#/detail?name=ascend-tensorflow-arm)获取镜像。
+
+    当前模型支持的镜像列表如[表1](#zh-cn_topic_0000001074498056_table1519011227314)所示。
+
+    **表 1** 镜像列表
+
+    <a name="zh-cn_topic_0000001074498056_table1519011227314"></a>
+    <table><thead align="left"><tr id="zh-cn_topic_0000001074498056_row0190152218319"><th class="cellrowborder" valign="top" width="47.32%" id="mcps1.2.4.1.1"><p id="zh-cn_topic_0000001074498056_p1419132211315"><a name="zh-cn_topic_0000001074498056_p1419132211315"></a><a name="zh-cn_topic_0000001074498056_p1419132211315"></a><em id="i1522884921219"><a name="i1522884921219"></a><a name="i1522884921219"></a>镜像名称</em></p>
+    </th>
+    <th class="cellrowborder" valign="top" width="25.52%" id="mcps1.2.4.1.2"><p id="zh-cn_topic_0000001074498056_p75071327115313"><a name="zh-cn_topic_0000001074498056_p75071327115313"></a><a name="zh-cn_topic_0000001074498056_p75071327115313"></a><em id="i1522994919122"><a name="i1522994919122"></a><a name="i1522994919122"></a>镜像版本</em></p>
+    </th>
+    <th class="cellrowborder" valign="top" width="27.16%" id="mcps1.2.4.1.3"><p id="zh-cn_topic_0000001074498056_p1024411406234"><a name="zh-cn_topic_0000001074498056_p1024411406234"></a><a name="zh-cn_topic_0000001074498056_p1024411406234"></a><em id="i723012493123"><a name="i723012493123"></a><a name="i723012493123"></a>配套CANN版本</em></p>
+    </th>
+    </tr>
+    </thead>
+    <tbody><tr id="zh-cn_topic_0000001074498056_row71915221134"><td class="cellrowborder" valign="top" width="47.32%" headers="mcps1.2.4.1.1 "><a name="zh-cn_topic_0000001074498056_ul81691515131910"></a><a name="zh-cn_topic_0000001074498056_ul81691515131910"></a><ul id="zh-cn_topic_0000001074498056_ul81691515131910"><li><em id="i82326495129"><a name="i82326495129"></a><a name="i82326495129"></a>ARM架构：<a href="https://ascend.huawei.com/ascendhub/#/detail?name=ascend-tensorflow-arm" target="_blank" rel="noopener noreferrer">ascend-tensorflow-arm</a></em></li><li><em id="i18233184918125"><a name="i18233184918125"></a><a name="i18233184918125"></a>x86架构：<a href="https://ascend.huawei.com/ascendhub/#/detail?name=ascend-tensorflow-x86" target="_blank" rel="noopener noreferrer">ascend-tensorflow-x86</a></em></li></ul>
+    </td>
+    <td class="cellrowborder" valign="top" width="25.52%" headers="mcps1.2.4.1.2 "><p id="zh-cn_topic_0000001074498056_p1450714271532"><a name="zh-cn_topic_0000001074498056_p1450714271532"></a><a name="zh-cn_topic_0000001074498056_p1450714271532"></a><em id="i72359495125"><a name="i72359495125"></a><a name="i72359495125"></a>20.2.0</em></p>
+    </td>
+    <td class="cellrowborder" valign="top" width="27.16%" headers="mcps1.2.4.1.3 "><p id="zh-cn_topic_0000001074498056_p18244640152312"><a name="zh-cn_topic_0000001074498056_p18244640152312"></a><a name="zh-cn_topic_0000001074498056_p18244640152312"></a><em id="i162363492129"><a name="i162363492129"></a><a name="i162363492129"></a><a href="https://support.huawei.com/enterprise/zh/ascend-computing/cann-pid-251168373/software" target="_blank" rel="noopener noreferrer">20.2</a></em></p>
+    </td>
+    </tr>
+    </tbody>
+    </table>
+
+
+<h2 id="快速上手.md">快速上手</h2>
+
+- 数据集准备
+1. 模型训练使用WMT 2014 English-German数据集，数据集请自行获取。
+
+## 模型训练<a name="section715881518135"></a>
+
+- 单击“立即下载”，并选择合适的下载方式下载源码包。
+
+- 启动训练之前，首先要配置程序运行相关环境变量。
+
+  环境变量配置信息参见：
+
+     [Ascend 910训练平台环境变量设置](https://gitee.com/ascend/modelzoo/wikis/Ascend%20910%E8%AE%AD%E7%BB%83%E5%B9%B3%E5%8F%B0%E7%8E%AF%E5%A2%83%E5%8F%98%E9%87%8F%E8%AE%BE%E7%BD%AE?sort_id=3148819)
+
+- 单卡训练 
+
+  1. 配置训练参数。
+
+     首先在脚本test/train_full_1p.sh中，配置data_url、train_url参数，分别代表训练数据路径跟输出数据路径，请用户根据实际路径配置，或者在启动训练的命令行中以参数形式下发。
+
+     ```
+     data_path="../data"
+     ```
+     
+  2. 启动训练。
+  
+   启动单卡训练 （脚本为LeNet_for_TensorFlow/test/train_full_1p.sh） 
+  
+   ```
+     bash train_full_1p.sh --data_path=../data --output_path=../out
+   ```
+
+<h2 id="训练结果.md">训练结果</h2>
+
+- 精度结果比对
+
+|精度指标项|GPU实测|NPU实测|
+|---|---|---|
+|loss|2.871|2.751|
+
+
+<h2 id="高级参考.md">高级参考</h2>
+
+## 脚本和示例代码<a name="section08421615141513"></a>
+
+```
+├── train.py                                  //网络训练与测试代码
+├── README.md                                 //代码说明文档
+├── data_load.py                      		  //数据处理代码
+├── hparams.py                  			  //参数解析代码
+├── model.py                         		  //模型定义代码
+├── modules.py								  //模型模块代码
+├── preproNew.py                              //文本数据转bpe代码
+├── utils.py								  //精度计算代码
+├── requirements.txt                          //训练python依赖列表
+├── test
+│    ├──train_performance_1p.sh              //单卡训练验证性能启动脚本
+│    ├──train_full_1p.sh                     //单卡全量训练启动脚本
+
+```
+
+## 脚本参数<a name="section6669162441511"></a>
+
+```
+--data_path              数据集路径，默认：path/dataset
+--output_path			 训练过程中输出数据路径，默认：path/output
+--batch_size             每个NPU的batch size，默认：128
+```
+
+## 训练过程<a name="section1589455252218"></a>
+
+1.  通过“模型训练”中的训练指令启动单卡卡训练。
+
+2.  参考脚本的模型存储路径为./output/log。
+
+
diff --git a/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/data_load.py b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/data_load.py
new file mode 100644
index 0000000000000000000000000000000000000000..2784e15947b24641180ef216de8748320ba1e0a8
--- /dev/null
+++ b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/data_load.py
@@ -0,0 +1,181 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# -*- coding: utf-8 -*-
+#/usr/bin/python3
+'''
+Feb. 2019 by kyubyong park.
+kbpark.linguist@gmail.com.
+https://www.github.com/kyubyong/transformer
+
+Note.
+if safe, entities on the source side have the prefix 1, and the target side 2, for convenience.
+For example, fpath1, fpath2 means source file path and target file path, respectively.
+'''
+import tensorflow as tf
+from utils import calc_num_batches
+
+def load_vocab(vocab_fpath):
+    '''Loads vocabulary file and returns idx<->token maps
+    vocab_fpath: string. vocabulary file path.
+    Note that these are reserved
+    0: <pad>, 1: <unk>, 2: <s>, 3: </s>
+
+    Returns
+    two dictionaries.
+    '''
+    vocab = [line.split()[0] for line in open(vocab_fpath, 'r').read().splitlines()]
+    token2idx = {token: idx for idx, token in enumerate(vocab)}
+    idx2token = {idx: token for idx, token in enumerate(vocab)}
+    return token2idx, idx2token
+
+def load_data(fpath1, fpath2, maxlen1, maxlen2):
+    '''Loads source and target data and filters out too lengthy samples.
+    fpath1: source file path. string.
+    fpath2: target file path. string.
+    maxlen1: source sent maximum length. scalar.
+    maxlen2: target sent maximum length. scalar.
+
+    Returns
+    sents1: list of source sents
+    sents2: list of target sents
+    '''
+    sents1, sents2 = [], []
+    with open(fpath1, 'r') as f1, open(fpath2, 'r') as f2:
+        for sent1, sent2 in zip(f1, f2):
+            if len(sent1.split()) + 1 > maxlen1: continue # 1: </s>
+            if len(sent2.split()) + 1 > maxlen2: continue  # 1: </s>
+            sents1.append(sent1.strip())
+            sents2.append(sent2.strip())
+    return sents1, sents2
+
+
+def encode(inp, type, dict):
+    '''Converts string to number. Used for `generator_fn`.
+    inp: 1d byte array.
+    type: "x" (source side) or "y" (target side)
+    dict: token2idx dictionary
+
+    Returns
+    list of numbers
+    '''
+    inp_str = inp.decode("utf-8")
+    if type=="x": tokens = inp_str.split() + ["</s>"]
+    else: tokens = ["<s>"] + inp_str.split() + ["</s>"]
+
+    x = [dict.get(t, dict["<unk>"]) for t in tokens]
+    return x
+
+def generator_fn(sents1, sents2, vocab_fpath):
+    '''Generates training / evaluation data
+    sents1: list of source sents
+    sents2: list of target sents
+    vocab_fpath: string. vocabulary file path.
+
+    yields
+    xs: tuple of
+        x: list of source token ids in a sent
+        x_seqlen: int. sequence length of x
+        sent1: str. raw source (=input) sentence
+    labels: tuple of
+        decoder_input: decoder_input: list of encoded decoder inputs
+        y: list of target token ids in a sent
+        y_seqlen: int. sequence length of y
+        sent2: str. target sentence
+    '''
+    token2idx, _ = load_vocab(vocab_fpath)
+    for sent1, sent2 in zip(sents1, sents2):
+        x = encode(sent1, "x", token2idx)
+        y = encode(sent2, "y", token2idx)
+        decoder_input, y = y[:-1], y[1:]
+
+        x_seqlen, y_seqlen = len(x), len(y)
+        yield (x, x_seqlen, sent1), (decoder_input, y, y_seqlen, sent2)
+
+def input_fn(sents1, sents2, vocab_fpath, batch_size, shuffle=False):
+    '''Batchify data
+    sents1: list of source sents
+    sents2: list of target sents
+    vocab_fpath: string. vocabulary file path.
+    batch_size: scalar
+    shuffle: boolean
+
+    Returns
+    xs: tuple of
+        x: int32 tensor. (N, T1)
+        x_seqlens: int32 tensor. (N,)
+        sents1: str tensor. (N,)
+    ys: tuple of
+        decoder_input: int32 tensor. (N, T2)
+        y: int32 tensor. (N, T2)
+        y_seqlen: int32 tensor. (N, )
+        sents2: str tensor. (N,)
+    '''
+    shapes = (([None], (), ()),
+              ([None], [None], (), ()))
+    types = ((tf.int32, tf.int32, tf.string),
+             (tf.int32, tf.int32, tf.int32, tf.string))
+    paddings = ((0, 0, ''),
+                (0, 0, 0, ''))
+
+    dataset = tf.data.Dataset.from_generator(
+        generator_fn,
+        output_shapes=shapes,
+        output_types=types,
+        args=(sents1, sents2, vocab_fpath))  # <- arguments for generator_fn. converted to np string arrays
+
+    if shuffle: # for training
+        dataset = dataset.shuffle(128*batch_size)
+
+    dataset = dataset.repeat()  # iterate forever
+    # dataset = dataset.padded_batch(batch_size, shapes, paddings).prefetch(1)
+    shapes = (([100], (), ()),
+              ([100], [100], (), ()))
+    dataset = dataset.padded_batch(batch_size, shapes, paddings, drop_remainder=True)
+
+    return dataset
+
+def get_batch(fpath1, fpath2, maxlen1, maxlen2, vocab_fpath, batch_size, shuffle=False):
+    '''Gets training / evaluation mini-batches
+    fpath1: source file path. string.
+    fpath2: target file path. string.
+    maxlen1: source sent maximum length. scalar.
+    maxlen2: target sent maximum length. scalar.
+    vocab_fpath: string. vocabulary file path.
+    batch_size: scalar
+    shuffle: boolean
+
+    Returns
+    batches
+    num_batches: number of mini-batches
+    num_samples
+    '''
+    sents1, sents2 = load_data(fpath1, fpath2, maxlen1, maxlen2)
+    batches = input_fn(sents1, sents2, vocab_fpath, batch_size, shuffle=shuffle)
+    num_batches = calc_num_batches(len(sents1), batch_size)
+    return batches, num_batches, len(sents1)
diff --git a/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/fusion_switch.cfg b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/fusion_switch.cfg
new file mode 100644
index 0000000000000000000000000000000000000000..9260c45253f20249ce9bae172ab885c8ad583098
--- /dev/null
+++ b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/fusion_switch.cfg
@@ -0,0 +1,10 @@
+{
+    "Switch":{
+        "GraphFusion":{
+            "ALL":"off"
+        },
+        "UBFusion":{
+            "ALL":"off"
+         }
+    }
+}
\ No newline at end of file
diff --git a/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/hparams.py b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/hparams.py
new file mode 100644
index 0000000000000000000000000000000000000000..f2b3fedab15cd24d280fd0b7ff68a24b2c8a76dd
--- /dev/null
+++ b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/hparams.py
@@ -0,0 +1,95 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import os
+
+
+ABSPATH=''
+class Hparams:
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("--data_url", type=str, default="./dataset")
+    parser.add_argument("--train_url", type=str, default="./output")
+    parser.add_argument('--num_gpus', default='')
+    # prepro
+    parser.add_argument('--vocab_size', default=37000, type=int)#37000
+
+    # train
+    ## files
+    parser.add_argument('--train1', default=ABSPATH+'iwslt2016/segmented/train.de.bpe',
+                             help="german training segmented data")
+    parser.add_argument('--train2', default=ABSPATH+'iwslt2016/segmented/train.en.bpe',
+                             help="english training segmented data")
+    parser.add_argument('--eval1', default=ABSPATH+'iwslt2016/segmented/eval.de.bpe',
+                             help="german evaluation segmented data")
+    parser.add_argument('--eval2', default=ABSPATH+'iwslt2016/segmented/eval.en.bpe',
+                             help="english evaluation segmented data")
+    parser.add_argument('--eval3', default=ABSPATH+'iwslt2016/prepro/eval.en',
+                             help="english evaluation unsegmented data")
+
+    ## vocabulary
+    parser.add_argument('--vocab', default=ABSPATH+'iwslt2016/segmented/bpe.vocab',
+                        help="vocabulary file path")
+
+    # training scheme
+    parser.add_argument('--batch_size', default=128, type=int)###128
+    parser.add_argument('--eval_batch_size', default=128, type=int)###128
+
+    parser.add_argument('--lr', default=0.0003, type=float, help="learning rate")
+    parser.add_argument('--warmup_steps', default=4000, type=int)####4000
+    parser.add_argument('--logdir', default=ABSPATH+"log/1", help="log directory")
+    parser.add_argument('--num_epochs', default=1, type=int)###20
+    parser.add_argument('--evaldir', default=ABSPATH+"eval/1", help="evaluation dir")
+
+    # model
+    parser.add_argument('--d_model', default=512, type=int,###512
+                        help="hidden dimension of encoder/decoder")
+    parser.add_argument('--d_ff', default=2048, type=int,###2048
+                        help="hidden dimension of feedforward layer")
+    parser.add_argument('--num_blocks', default=6, type=int,#6
+                        help="number of encoder/decoder blocks")
+    parser.add_argument('--num_heads', default=8, type=int,###8
+                        help="number of attention heads")
+    parser.add_argument('--maxlen1', default=100, type=int,###100
+                        help="maximum length of a source sequence")
+    parser.add_argument('--maxlen2', default=100, type=int,###100
+                        help="maximum length of a target sequence")
+    parser.add_argument('--dropout_rate', default=0.1, type=float)#####0.3 论文是0.1
+    parser.add_argument('--smoothing', default=0.1, type=float,
+                        help="label smoothing rate")
+
+    # test
+    parser.add_argument('--test1', default=ABSPATH+'iwslt2016/segmented/test.de.bpe',
+                        help="german test segmented data")
+    parser.add_argument('--test2', default=ABSPATH+'iwslt2016/prepro/test.en',
+                        help="english test data")
+    parser.add_argument('--ckpt', default=ABSPATH+"log/1",
+    help="checkpoint file path")
+    parser.add_argument('--test_batch_size', default=128, type=int)###128
+    parser.add_argument('--testdir', default=ABSPATH+"test/1", help="test result dir")
diff --git a/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/model.py b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/model.py
new file mode 100644
index 0000000000000000000000000000000000000000..3fe80d8bde9b23345c4e722b46737aa5d3ad43d4
--- /dev/null
+++ b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/model.py
@@ -0,0 +1,250 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# -*- coding: utf-8 -*-
+# /usr/bin/python3
+'''
+Feb. 2019 by kyubyong park.
+kbpark.linguist@gmail.com.
+https://www.github.com/kyubyong/transformer
+
+Transformer network
+'''
+import tensorflow as tf
+
+from data_load import load_vocab
+from modules import get_token_embeddings, ff, positional_encoding, multihead_attention, label_smoothing, noam_scheme
+from utils import convert_idx_to_token_tensor
+from tqdm import tqdm
+import logging
+from npu_bridge.estimator.npu import npu_convert_dropout
+from npu_bridge.npu_init import *
+
+
+logging.basicConfig(level=logging.INFO)
+
+class Transformer:
+    '''
+    xs: tuple of
+        x: int32 tensor. (N, T1)
+        x_seqlens: int32 tensor. (N,)
+        sents1: str tensor. (N,)
+    ys: tuple of
+        decoder_input: int32 tensor. (N, T2)
+        y: int32 tensor. (N, T2)
+        y_seqlen: int32 tensor. (N, )
+        sents2: str tensor. (N,)
+    training: boolean.
+    '''
+    def __init__(self, hp):
+        self.hp = hp
+        self.token2idx, self.idx2token = load_vocab(hp.data_url+hp.vocab)
+        self.embeddings = get_token_embeddings(self.hp.vocab_size, self.hp.d_model, zero_pad=True)
+
+    def encode(self, xs, training=True):
+        '''
+        Returns
+        memory: encoder outputs. (N, T1, d_model)
+        '''
+        with tf.variable_scope("encoder", reuse=tf.AUTO_REUSE):
+            x, seqlens, sents1 = xs
+            # src_masks
+            src_masks = tf.math.equal(x, 0) # (N, T1)
+            # embedding
+            enc = tf.nn.embedding_lookup(self.embeddings, x) # (N, T1, d_model)
+            enc *= self.hp.d_model**0.5 # scale
+
+            enc += positional_encoding(enc, self.hp.maxlen1)
+            #enc = tf.layers.dropout(enc, self.hp.dropout_rate, training=training)
+            #enc = npu_convert_dropout(enc, self.hp.dropout_rate, training=training)
+
+            ## Blocks
+            for i in range(self.hp.num_blocks):
+                with tf.variable_scope("num_blocks_{}".format(i), reuse=tf.AUTO_REUSE):
+                    # self-attention
+                    enc = multihead_attention(queries=enc,
+                                              keys=enc,
+                                              values=enc,
+                                              key_masks=src_masks,
+                                              num_heads=self.hp.num_heads,
+                                              dropout_rate=self.hp.dropout_rate,
+                                              training=training,
+                                              causality=False)
+                    # feed forward
+                    enc = ff(enc, num_units=[self.hp.d_ff, self.hp.d_model])
+        memory = enc
+        return memory, sents1, src_masks
+
+    def decode(self, ys, memory, src_masks, training=True):
+        '''
+        memory: encoder outputs. (N, T1, d_model)
+        src_masks: (N, T1)
+
+        Returns
+        logits: (N, T2, V). float32.
+        y_hat: (N, T2). int32
+        y: (N, T2). int32
+        sents2: (N,). string.
+        '''
+        with tf.variable_scope("decoder", reuse=tf.AUTO_REUSE):
+            decoder_inputs, y, seqlens, sents2 = ys
+
+            # tgt_masks
+            tgt_masks = tf.math.equal(decoder_inputs, 0)  # (N, T2)
+
+            # embedding
+            dec = tf.nn.embedding_lookup(self.embeddings, decoder_inputs)  # (N, T2, d_model)
+            dec *= self.hp.d_model ** 0.5  # scale
+
+            dec += positional_encoding(dec, self.hp.maxlen2)
+            #dec = tf.layers.dropout(dec, self.hp.dropout_rate, training=training)
+            #dec = npu_convert_dropout(dec, self.hp.dropout_rate, training=training)
+
+            # Blocks
+            for i in range(self.hp.num_blocks):
+                with tf.variable_scope("num_blocks_{}".format(i), reuse=tf.AUTO_REUSE):
+                    # Masked self-attention (Note that causality is True at this time)
+                    dec = multihead_attention(queries=dec,
+                                              keys=dec,
+                                              values=dec,
+                                              key_masks=tgt_masks,
+                                              num_heads=self.hp.num_heads,
+                                              dropout_rate=self.hp.dropout_rate,
+                                              training=training,
+                                              causality=True,
+                                              scope="self_attention")
+
+                    # Vanilla attention
+                    dec = multihead_attention(queries=dec,
+                                              keys=memory,
+                                              values=memory,
+                                              key_masks=src_masks,
+                                              num_heads=self.hp.num_heads,
+                                              dropout_rate=self.hp.dropout_rate,
+                                              training=training,
+                                              causality=False,
+                                              scope="vanilla_attention")
+                    ### Feed Forward
+                    dec = ff(dec, num_units=[self.hp.d_ff, self.hp.d_model])
+
+        # Final linear projection (embedding weights are shared)
+        weights = tf.transpose(self.embeddings) # (d_model, vocab_size)
+        logits = tf.einsum('ntd,dk->ntk', dec, weights) # (N, T2, vocab_size)
+        y_hat = tf.to_int32(tf.argmax(logits, axis=-1))
+
+        return logits, y_hat, y, sents2
+
+    def train(self, xs, ys):
+        '''
+        Returns
+        loss: scalar.
+        train_op: training operation
+        global_step: scalar.
+        summaries: training summary node
+        '''
+        # forward
+        memory, sents1, src_masks = self.encode(xs)
+        logits, preds, y, sents2 = self.decode(ys, memory, src_masks)
+
+        # train scheme
+        y_ = label_smoothing(tf.one_hot(y, depth=self.hp.vocab_size))
+        ce = tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=y_)
+        nonpadding = tf.to_float(tf.not_equal(y, self.token2idx["<pad>"]))  # 0: <pad>
+        loss = tf.reduce_sum(ce * nonpadding) / (tf.reduce_sum(nonpadding) + 1e-7)
+
+        global_step = tf.train.get_or_create_global_step()
+        lr = noam_scheme(self.hp.lr, global_step, self.hp.warmup_steps)
+        optimizer = tf.train.AdamOptimizer(lr)
+        ##开启lossscale需要关闭
+        #train_op = optimizer.minimize(loss, global_step=global_step)
+
+        #lossscale
+        loss_scale_opt = optimizer
+        loss_scale_manager = ExponentialUpdateLossScaleManager(init_loss_scale=2 ** 32, incr_every_n_steps=1000,
+                                                                decr_every_n_nan_or_inf=2, decr_ratio=0.5)
+        train_op = NPULossScaleOptimizer(loss_scale_opt, loss_scale_manager)
+        #
+        train_op= train_op.minimize(loss, global_step=global_step)
+
+
+        # self.refine_optim = tf.train.AdamOptimizer(learning_rate=self.refine_lr).minimize(self.rec_loss,
+        #                                                                                   var_list=refine_var)
+        # ####
+        # self.refine_optim = tf.train.AdamOptimizer(learning_rate=self.refine_lr)
+        #
+        # loss_scale_opt = self.refine_optim
+        # loss_scale_manager = ExponentialUpdateLossScaleManager(init_loss_scale=2 ** 32, incr_every_n_steps=1000,
+        #                                                        decr_every_n_nan_or_inf=2, decr_ratio=0.5)
+        # self.refine_optim = NPULossScaleOptimizer(loss_scale_opt, loss_scale_manager)
+        #
+        # self.refine_optim = self.refine_optim.minimize(self.rec_loss, var_list=refine_var)
+
+        # tf.summary.scalar('lr', lr)
+        # tf.summary.scalar("loss", loss)
+        # tf.summary.scalar("global_step", global_step)
+        #
+        # summaries = tf.summary.merge_all()
+
+        return loss, train_op, global_step
+
+        #return loss, train_op, global_step
+
+    def eval(self, xs, ys):
+        '''Predicts autoregressively
+        At inference, input ys is ignored.
+        Returns
+        y_hat: (N, T2)
+        '''
+        decoder_inputs, y, y_seqlen, sents2 = ys
+
+        decoder_inputs = tf.ones((tf.shape(xs[0])[0], 1), tf.int32) * self.token2idx["<s>"]
+        ys = (decoder_inputs, y, y_seqlen, sents2)
+
+        memory, sents1, src_masks = self.encode(xs, False)
+
+        logging.info("Inference graph is being built. Please be patient.")
+        for _ in tqdm(range(self.hp.maxlen2)):
+            logits, y_hat, y, sents2 = self.decode(ys, memory, src_masks, False)
+            if tf.reduce_sum(y_hat, 1) == self.token2idx["<pad>"]: break
+
+            _decoder_inputs = tf.concat((decoder_inputs, y_hat), 1)
+            ys = (_decoder_inputs, y, y_seqlen, sents2)
+
+        # monitor a random sample
+        #n = tf.random_uniform((), 0, tf.shape(y_hat)[0]-1, tf.int32)
+        # sent1 = sents1[n]
+        # pred = convert_idx_to_token_tensor(y_hat[n], self.idx2token)
+        # sent2 = sents2[n]
+
+        # tf.summary.text("sent1", sent1)
+        # tf.summary.text("pred", pred)
+        # tf.summary.text("sent2", sent2)
+        # summaries = tf.summary.merge_all()
+
+        return y_hat
+
diff --git a/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/modelarts_entry_acc.py b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/modelarts_entry_acc.py
new file mode 100644
index 0000000000000000000000000000000000000000..1245d57140da14a640cd6dd7d9d43100b8617b67
--- /dev/null
+++ b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/modelarts_entry_acc.py
@@ -0,0 +1,63 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import argparse
+import sys
+
+# 解析输入参数data_url
+parser = argparse.ArgumentParser()
+parser.add_argument("--data_url", type=str, default="/home/ma-user/modelarts/inputs/data_url_0")
+parser.add_argument("--train_url", type=str, default="/home/ma-user/modelarts/outputs/train_url_0/")
+config = parser.parse_args()
+
+print("[CANN-Modelzoo] code_dir path is [%s]" % (sys.path[0]))
+code_dir = sys.path[0]
+os.chdir(code_dir)
+print("[CANN-Modelzoo] work_dir path is [%s]" % (os.getcwd()))
+
+print("[CANN-Modelzoo] before train - list my run files:")
+os.system("ls -al /usr/local/Ascend/ascend-toolkit/")
+
+print("[CANN-Modelzoo] before train - list my dataset files:")
+os.system("ls -al %s" % config.data_url)
+
+print("[CANN-Modelzoo] start run train shell")
+# 设置sh文件格式为linux可执行
+os.system("dos2unix ./test/*")
+
+# 执行train_full_1p.sh或者train_performance_1p.sh，需要用户自己指定
+# full和performance的差异，performance只需要执行很少的step，控制在15分钟以内，主要关注性能FPS
+os.system("bash ./test/train_full_1p.sh --data_path=%s --output_path=%s " % (config.data_url, config.train_url))
+
+print("[CANN-Modelzoo] finish run train shell")
+
+# 将当前执行目录所有文件拷贝到obs的output进行备份
+print("[CANN-Modelzoo] after train - list my output files:")
+os.system("cp -r %s %s " % (code_dir, config.train_url))
+os.system("ls -al %s" % config.train_url)
diff --git a/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/modelarts_entry_perf.py b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/modelarts_entry_perf.py
new file mode 100644
index 0000000000000000000000000000000000000000..e2d23455d4cdec2d46fc273177a247905c751b73
--- /dev/null
+++ b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/modelarts_entry_perf.py
@@ -0,0 +1,63 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import argparse
+import sys
+
+# 解析输入参数data_url
+parser = argparse.ArgumentParser()
+parser.add_argument("--data_url", type=str, default="/home/ma-user/modelarts/inputs/data_url_0")
+parser.add_argument("--train_url", type=str, default="/home/ma-user/modelarts/outputs/train_url_0/")
+config = parser.parse_args()
+
+print("[CANN-Modelzoo] code_dir path is [%s]" % (sys.path[0]))
+code_dir = sys.path[0]
+os.chdir(code_dir)
+print("[CANN-Modelzoo] work_dir path is [%s]" % (os.getcwd()))
+
+print("[CANN-Modelzoo] before train - list my run files:")
+os.system("ls -al /usr/local/Ascend/ascend-toolkit/")
+
+print("[CANN-Modelzoo] before train - list my dataset files:")
+os.system("ls -al %s" % config.data_url)
+
+print("[CANN-Modelzoo] start run train shell")
+# 设置sh文件格式为linux可执行
+os.system("dos2unix ./test/*")
+
+# 执行train_full_1p.sh或者train_performance_1p.sh，需要用户自己指定
+# full和performance的差异，performance只需要执行很少的step，控制在15分钟以内，主要关注性能FPS
+os.system("bash ./test/train_performance_1p.sh --data_path=%s --output_path=%s " % (config.data_url, config.train_url))
+
+print("[CANN-Modelzoo] finish run train shell")
+
+# 将当前执行目录所有文件拷贝到obs的output进行备份
+print("[CANN-Modelzoo] after train - list my output files:")
+os.system("cp -r %s %s " % (code_dir, config.train_url))
+os.system("ls -al %s" % config.train_url)
diff --git a/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/modelzoo_level.txt b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/modelzoo_level.txt
new file mode 100644
index 0000000000000000000000000000000000000000..55a9add9fa74832ca908108d73946cd76281a9cd
--- /dev/null
+++ b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/modelzoo_level.txt
@@ -0,0 +1,3 @@
+FuncStatus:OK
+PerfStatus:OK
+PrecisionStatus:POK
\ No newline at end of file
diff --git a/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/modules.py b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/modules.py
new file mode 100644
index 0000000000000000000000000000000000000000..77f4290aefe9e7e465b1302cd302f4998354fcc3
--- /dev/null
+++ b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/modules.py
@@ -0,0 +1,337 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# -*- coding: utf-8 -*-
+#/usr/bin/python3
+'''
+Feb. 2019 by kyubyong park.
+kbpark.linguist@gmail.com.
+https://www.github.com/kyubyong/transformer.
+
+Building blocks for Transformer
+'''
+
+import numpy as np
+import tensorflow as tf
+from npu_bridge.estimator.npu import npu_convert_dropout
+
+def ln(inputs, epsilon = 1e-8, scope="ln"):
+    '''Applies layer normalization. See https://arxiv.org/abs/1607.06450.
+    inputs: A tensor with 2 or more dimensions, where the first dimension has `batch_size`.
+    epsilon: A floating number. A very small number for preventing ZeroDivision Error.
+    scope: Optional scope for `variable_scope`.
+      
+    Returns:
+      A tensor with the same shape and data dtype as `inputs`.
+    '''
+    with tf.variable_scope(scope, reuse=tf.AUTO_REUSE):
+        inputs_shape = inputs.get_shape()
+        params_shape = inputs_shape[-1:]
+    
+        mean, variance = tf.nn.moments(inputs, [-1], keep_dims=True)
+        beta= tf.get_variable("beta", params_shape, initializer=tf.zeros_initializer())
+        gamma = tf.get_variable("gamma", params_shape, initializer=tf.ones_initializer())
+        normalized = (inputs - mean) / ( (variance + epsilon) ** (.5) )
+        outputs = gamma * normalized + beta
+        
+    return outputs
+
+def get_token_embeddings(vocab_size, num_units, zero_pad=True):
+    '''Constructs token embedding matrix.
+    Note that the column of index 0's are set to zeros.
+    vocab_size: scalar. V.
+    num_units: embedding dimensionalty. E.
+    zero_pad: Boolean. If True, all the values of the first row (id = 0) should be constant zero
+    To apply query/key masks easily, zero pad is turned on.
+
+    Returns
+    weight variable: (V, E)
+    '''
+    with tf.variable_scope("shared_weight_matrix"):
+        embeddings = tf.get_variable('weight_mat',
+                                   dtype=tf.float32,
+                                   shape=(vocab_size, num_units),
+                                   initializer=tf.contrib.layers.xavier_initializer())
+        if zero_pad:
+            embeddings = tf.concat((tf.zeros(shape=[1, num_units]),
+                                    embeddings[1:, :]), 0)
+    return embeddings
+
+def scaled_dot_product_attention(Q, K, V, key_masks,
+                                 causality=False, dropout_rate=0.,
+                                 training=True,
+                                 scope="scaled_dot_product_attention"):
+    '''See 3.2.1.
+    Q: Packed queries. 3d tensor. [N, T_q, d_k].
+    K: Packed keys. 3d tensor. [N, T_k, d_k].
+    V: Packed values. 3d tensor. [N, T_k, d_v].
+    key_masks: A 2d tensor with shape of [N, key_seqlen]
+    causality: If True, applies masking for future blinding
+    dropout_rate: A floating point number of [0, 1].
+    training: boolean for controlling droput
+    scope: Optional scope for `variable_scope`.
+    '''
+    with tf.variable_scope(scope, reuse=tf.AUTO_REUSE):
+        d_k = Q.get_shape().as_list()[-1]
+
+        # dot product
+        outputs = tf.matmul(Q, tf.transpose(K, [0, 2, 1]))  # (N, T_q, T_k)
+
+        # scale
+        outputs /= d_k ** 0.5
+
+        # key masking
+        outputs = mask(outputs, key_masks=key_masks, type="key")
+
+        # causality or future blinding masking
+        if causality:
+            outputs = mask(outputs, type="future")
+
+        # softmax
+        outputs = tf.nn.softmax(outputs)
+        attention = tf.transpose(outputs, [0, 2, 1])
+        # tf.summary.image("attention", tf.expand_dims(attention[:1], -1))
+
+        # # query masking
+        # outputs = mask(outputs, Q, K, type="query")
+
+        # dropout
+        #outputs = tf.layers.dropout(outputs, rate=dropout_rate, training=training)
+        #outputs = npu_convert_dropout(outputs, rate=dropout_rate, training=training)
+        #outputs = npu_convert_dropout(outputs, rate=dropout_rate)
+
+        # weighted sum (context vectors)
+        outputs = tf.matmul(outputs, V)  # (N, T_q, d_v)
+
+    return outputs
+
+
+def mask(inputs, key_masks=None, type=None):
+    """Masks paddings on keys or queries to inputs
+    inputs: 3d tensor. (h*N, T_q, T_k)
+    key_masks: 3d tensor. (N, 1, T_k)
+    type: string. "key" | "future"
+
+    e.g.,
+    >> inputs = tf.zeros([2, 2, 3], dtype=tf.float32)
+    >> key_masks = tf.constant([[0., 0., 1.],
+                                [0., 1., 1.]])
+    >> mask(inputs, key_masks=key_masks, type="key")
+    array([[[ 0.0000000e+00,  0.0000000e+00, -4.2949673e+09],
+        [ 0.0000000e+00,  0.0000000e+00, -4.2949673e+09]],
+
+       [[ 0.0000000e+00, -4.2949673e+09, -4.2949673e+09],
+        [ 0.0000000e+00, -4.2949673e+09, -4.2949673e+09]],
+
+       [[ 0.0000000e+00,  0.0000000e+00, -4.2949673e+09],
+        [ 0.0000000e+00,  0.0000000e+00, -4.2949673e+09]],
+
+       [[ 0.0000000e+00, -4.2949673e+09, -4.2949673e+09],
+        [ 0.0000000e+00, -4.2949673e+09, -4.2949673e+09]]], dtype=float32)
+    """
+    padding_num = -2 ** 32 + 1
+    if type in ("k", "key", "keys"):
+        key_masks = tf.to_float(key_masks)
+        key_masks = tf.tile(key_masks, [tf.shape(inputs)[0] // tf.shape(key_masks)[0], 1]) # (h*N, seqlen)
+        key_masks = tf.expand_dims(key_masks, 1)  # (h*N, 1, seqlen)
+        outputs = inputs + key_masks * padding_num
+    # elif type in ("q", "query", "queries"):
+    #     # Generate masks
+    #     masks = tf.sign(tf.reduce_sum(tf.abs(queries), axis=-1))  # (N, T_q)
+    #     masks = tf.expand_dims(masks, -1)  # (N, T_q, 1)
+    #     masks = tf.tile(masks, [1, 1, tf.shape(keys)[1]])  # (N, T_q, T_k)
+    #
+    #     # Apply masks to inputs
+    #     outputs = inputs*masks
+    elif type in ("f", "future", "right"):
+        diag_vals = tf.ones_like(inputs[0, :, :])  # (T_q, T_k)
+        tril = tf.linalg.LinearOperatorLowerTriangular(diag_vals).to_dense()  # (T_q, T_k)
+        future_masks = tf.tile(tf.expand_dims(tril, 0), [tf.shape(inputs)[0], 1, 1])  # (N, T_q, T_k)
+
+        paddings = tf.ones_like(future_masks) * padding_num
+        outputs = tf.where(tf.equal(future_masks, 0), paddings, inputs)
+    else:
+        print("Check if you entered type correctly!")
+
+    return outputs
+
+
+def multihead_attention(queries, keys, values, key_masks,
+                        num_heads=8, 
+                        dropout_rate=0,
+                        training=True,
+                        causality=False,
+                        scope="multihead_attention"):
+    '''Applies multihead attention. See 3.2.2
+    queries: A 3d tensor with shape of [N, T_q, d_model].
+    keys: A 3d tensor with shape of [N, T_k, d_model].
+    values: A 3d tensor with shape of [N, T_k, d_model].
+    key_masks: A 2d tensor with shape of [N, key_seqlen]
+    num_heads: An int. Number of heads.
+    dropout_rate: A floating point number.
+    training: Boolean. Controller of mechanism for dropout.
+    causality: Boolean. If true, units that reference the future are masked.
+    scope: Optional scope for `variable_scope`.
+        
+    Returns
+      A 3d tensor with shape of (N, T_q, C)  
+    '''
+    d_model = queries.get_shape().as_list()[-1]
+    with tf.variable_scope(scope, reuse=tf.AUTO_REUSE):
+        # Linear projections
+        Q = tf.layers.dense(queries, d_model, use_bias=True) # (N, T_q, d_model)
+        K = tf.layers.dense(keys, d_model, use_bias=True) # (N, T_k, d_model)
+        V = tf.layers.dense(values, d_model, use_bias=True) # (N, T_k, d_model)
+        
+        # Split and concat
+        Q_ = tf.concat(tf.split(Q, num_heads, axis=2), axis=0) # (h*N, T_q, d_model/h)
+        K_ = tf.concat(tf.split(K, num_heads, axis=2), axis=0) # (h*N, T_k, d_model/h)
+        V_ = tf.concat(tf.split(V, num_heads, axis=2), axis=0) # (h*N, T_k, d_model/h)
+
+        # Attention
+        outputs = scaled_dot_product_attention(Q_, K_, V_, key_masks, causality, dropout_rate, training)
+
+        # Restore shape
+        outputs = tf.concat(tf.split(outputs, num_heads, axis=0), axis=2 ) # (N, T_q, d_model)
+              
+        # Residual connection
+        outputs += queries
+              
+        # Normalize
+        outputs = ln(outputs)
+ 
+    return outputs
+
+def ff(inputs, num_units, scope="positionwise_feedforward"):
+    '''position-wise feed forward net. See 3.3
+    
+    inputs: A 3d tensor with shape of [N, T, C].
+    num_units: A list of two integers.
+    scope: Optional scope for `variable_scope`.
+
+    Returns:
+      A 3d tensor with the same shape and dtype as inputs
+    '''
+    with tf.variable_scope(scope, reuse=tf.AUTO_REUSE):
+        # Inner layer
+        outputs = tf.layers.dense(inputs, num_units[0], activation=tf.nn.relu)
+
+        # Outer layer
+        outputs = tf.layers.dense(outputs, num_units[1])
+
+        # Residual connection
+        outputs += inputs
+        
+        # Normalize
+        outputs = ln(outputs)
+    
+    return outputs
+
+def label_smoothing(inputs, epsilon=0.1):
+    '''Applies label smoothing. See 5.4 and https://arxiv.org/abs/1512.00567.
+    inputs: 3d tensor. [N, T, V], where V is the number of vocabulary.
+    epsilon: Smoothing rate.
+    
+    For example,
+    
+    ```
+    import tensorflow as tf
+    inputs = tf.convert_to_tensor([[[0, 0, 1], 
+       [0, 1, 0],
+       [1, 0, 0]],
+
+      [[1, 0, 0],
+       [1, 0, 0],
+       [0, 1, 0]]], tf.float32)
+       
+    outputs = label_smoothing(inputs)
+    
+    with tf.Session() as sess:
+        print(sess.run([outputs]))
+    
+    >>
+    [array([[[ 0.03333334,  0.03333334,  0.93333334],
+        [ 0.03333334,  0.93333334,  0.03333334],
+        [ 0.93333334,  0.03333334,  0.03333334]],
+
+       [[ 0.93333334,  0.03333334,  0.03333334],
+        [ 0.93333334,  0.03333334,  0.03333334],
+        [ 0.03333334,  0.93333334,  0.03333334]]], dtype=float32)]   
+    ```    
+    '''
+    V = inputs.get_shape().as_list()[-1] # number of channels
+    return ((1-epsilon) * inputs) + (epsilon / V)
+    
+def positional_encoding(inputs,
+                        maxlen,
+                        masking=True,
+                        scope="positional_encoding"):
+    '''Sinusoidal Positional_Encoding. See 3.5
+    inputs: 3d tensor. (N, T, E)
+    maxlen: scalar. Must be >= T
+    masking: Boolean. If True, padding positions are set to zeros.
+    scope: Optional scope for `variable_scope`.
+
+    returns
+    3d tensor that has the same shape as inputs.
+    '''
+
+    E = inputs.get_shape().as_list()[-1] # static
+    N, T = tf.shape(inputs)[0], tf.shape(inputs)[1] # dynamic
+    with tf.variable_scope(scope, reuse=tf.AUTO_REUSE):
+        # position indices
+        position_ind = tf.tile(tf.expand_dims(tf.range(T), 0), [N, 1]) # (N, T)
+
+        # First part of the PE function: sin and cos argument
+        position_enc = np.array([
+            [pos / np.power(10000, (i-i%2)/E) for i in range(E)]
+            for pos in range(maxlen)])
+
+        # Second part, apply the cosine to even columns and sin to odds.
+        position_enc[:, 0::2] = np.sin(position_enc[:, 0::2])  # dim 2i
+        position_enc[:, 1::2] = np.cos(position_enc[:, 1::2])  # dim 2i+1
+        position_enc = tf.convert_to_tensor(position_enc, tf.float32) # (maxlen, E)
+
+        # lookup
+        outputs = tf.nn.embedding_lookup(position_enc, position_ind)
+
+        # masks
+        if masking:
+            outputs = tf.where(tf.equal(inputs, 0), inputs, outputs)
+
+        return tf.to_float(outputs)
+
+def noam_scheme(init_lr, global_step, warmup_steps=4000.):
+    '''Noam scheme learning rate decay
+    init_lr: initial learning rate. scalar.
+    global_step: scalar.
+    warmup_steps: scalar. During warmup_steps, learning rate increases
+        until it reaches init_lr.
+    '''
+    step = tf.cast(global_step + 1, dtype=tf.float32)
+    return init_lr * warmup_steps ** 0.5 * tf.minimum(step * warmup_steps ** -1.5, step ** -0.5)
\ No newline at end of file
diff --git a/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/multi-bleu.perl b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/multi-bleu.perl
new file mode 100644
index 0000000000000000000000000000000000000000..809879142776e1df62d689e7675648101d2f9a91
--- /dev/null
+++ b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/multi-bleu.perl
@@ -0,0 +1,165 @@
+#!/usr/bin/perl -w
+
+use strict;
+
+if (!scalar(@ARGV)) {
+  print STDERR "Syntax: multi-bleu.perl [-length_analysis bucket] [ref-stem] < [system-output]
+If one reference translation: ref-stem is filename
+If multiple reference translations: ref-stem[0,1,2,...] is filename\n";
+}
+
+my $length_analysis;
+if ($ARGV[0] eq '-length_analysis') {
+  shift @ARGV;
+  $length_analysis = shift @ARGV;
+}
+
+my @CORRECT_BUCKET;
+my @TOTAL_BUCKET;
+my @COUNT_LENGTH;
+my $max_bucket=0;
+
+my $stem = $ARGV[0];
+my @REF;
+my $ref=0;
+while(-e "$stem$ref") {
+    &add_to_ref("$stem$ref",\@REF);
+    $ref++;
+}
+&add_to_ref($stem,\@REF) if -e $stem;
+die("did not find any reference translations at $stem") unless scalar @REF;
+
+sub add_to_ref {
+    my ($file,$REF) = @_;
+    my $s=0;
+    open(REF,$file);
+    while(<REF>) {
+    chop;
+    push @{$$REF[$s++]}, $_;
+    }
+    close(REF);
+}
+
+my(@CORRECT,@TOTAL,$length_translation,$length_reference);
+my $s=0;
+while(<STDIN>) {
+    chop;
+    my @WORD = split;
+    my %REF_NGRAM = ();
+    my $length_translation_this_sentence = scalar(@WORD);
+    my ($closest_diff,$closest_length) = (9999,9999);
+    my $bucket;
+    foreach my $reference (@{$REF[$s]}) {
+#      print "$s $_ <=> $reference\n";
+    my @WORD = split(/ /,$reference);
+    my $length = scalar(@WORD);
+    if ($length_analysis) {
+        $bucket = int($length/$length_analysis);
+        $max_bucket=$bucket if ($bucket>$max_bucket);
+    }
+    if (abs($length_translation_this_sentence-$length) < $closest_diff) {
+        $closest_diff = abs($length_translation_this_sentence-$length);
+        $closest_length = $length;
+#    print "$i: closest diff = abs($length_translation_this_sentence-$length)<BR>\n";
+    }
+    for(my $n=1;$n<=4;$n++) {
+        my %REF_NGRAM_N = ();
+        for(my $start=0;$start<=$#WORD-($n-1);$start++) {
+        my $ngram = "$n";
+        for(my $w=0;$w<$n;$w++) {
+            $ngram .= " ".$WORD[$start+$w];
+        }
+        $REF_NGRAM_N{$ngram}++;
+        }
+        foreach my $ngram (keys %REF_NGRAM_N) {
+        if (!defined($REF_NGRAM{$ngram}) ||
+            $REF_NGRAM{$ngram} < $REF_NGRAM_N{$ngram}) {
+            $REF_NGRAM{$ngram} = $REF_NGRAM_N{$ngram};
+#        print "$i: REF_NGRAM{$ngram} = $REF_NGRAM{$ngram}<BR>\n";
+        }
+        }
+    }
+    }
+    if ($bucket) {
+        $COUNT_LENGTH[$bucket]++;
+    }
+    $length_translation += $length_translation_this_sentence;
+    $length_reference += $closest_length;
+    for(my $n=1;$n<=4;$n++) {
+    my %T_NGRAM = ();
+    for(my $start=0;$start<=$#WORD-($n-1);$start++) {
+        my $ngram = "$n";
+        for(my $w=0;$w<$n;$w++) {
+        $ngram .= " ".$WORD[$start+$w];
+        }
+        $T_NGRAM{$ngram}++;
+    }
+    foreach my $ngram (keys %T_NGRAM) {
+        $ngram =~ /^(\d+) /;
+        my $n = $1;
+#    print "$i e $ngram $T_NGRAM{$ngram}<BR>\n";
+        $TOTAL[$n] += $T_NGRAM{$ngram};
+        if ($bucket) {
+            $TOTAL_BUCKET[$bucket][$n] += $T_NGRAM{$ngram};
+        }
+        if (defined($REF_NGRAM{$ngram})) {
+        if ($REF_NGRAM{$ngram} >= $T_NGRAM{$ngram}) {
+            if ($bucket) {
+                $CORRECT_BUCKET[$bucket][$n] += $T_NGRAM{$ngram};
+            }
+            $CORRECT[$n] += $T_NGRAM{$ngram};
+#        print "$i e correct1 $T_NGRAM{$ngram}<BR>\n";
+        }
+        else {
+            if ($bucket) {
+                $CORRECT_BUCKET[$bucket][$n] += $REF_NGRAM{$ngram};
+            }
+            $CORRECT[$n] += $REF_NGRAM{$ngram};
+#        print "$i e correct2 $REF_NGRAM{$ngram}<BR>\n";
+        }
+        }
+    }
+    }
+    $s++;
+}
+my $brevity_penalty = 1;
+if ($length_translation<$length_reference) {
+    $brevity_penalty = exp(1-$length_reference/$length_translation);
+}
+my $bleu = $brevity_penalty * exp((my_log( $CORRECT[1]/$TOTAL[1] ) +
+                   my_log( $CORRECT[2]/$TOTAL[2] ) +
+                   my_log( $CORRECT[3]/$TOTAL[3] ) +
+                   my_log( $CORRECT[4]/$TOTAL[4] ) ) / 4);
+
+printf "BLEU = %.2f, %.1f/%.1f/%.1f/%.1f (BP=%.3f, ration=%.3f)\n",
+    100*$bleu,
+    100*$CORRECT[1]/$TOTAL[1],
+    100*$CORRECT[2]/$TOTAL[2],
+    100*$CORRECT[3]/$TOTAL[3],
+    100*$CORRECT[4]/$TOTAL[4],
+    $brevity_penalty,
+    $length_translation / $length_reference;
+
+if ($length_analysis) {
+    print "\nLENGTH ANALYSIS:\n";
+    for(my $b=int(1/$length_analysis); $b<=$max_bucket; $b++) {
+        my $range=$b;
+        if ($length_analysis != 1) {
+            $range=($b*$length_analysis+1)."-".(($b+1)*$length_analysis);
+        }
+        print "$range";;
+        if ($TOTAL_BUCKET[$b] && $TOTAL_BUCKET[$b][4] && $CORRECT_BUCKET[$b][4]) {
+          printf "\t%d\t%.2f", $COUNT_LENGTH[$b],
+                   100*$brevity_penalty * exp((my_log( $CORRECT_BUCKET[$b][1]/$TOTAL_BUCKET[$b][1] ) +
+                   my_log( $CORRECT_BUCKET[$b][2]/$TOTAL_BUCKET[$b][2] ) +
+                   my_log( $CORRECT_BUCKET[$b][3]/$TOTAL_BUCKET[$b][3] ) +
+                   my_log( $CORRECT_BUCKET[$b][4]/$TOTAL_BUCKET[$b][4] ) ) / 4);
+        }
+        print "\n";
+    }
+}
+
+sub my_log {
+  return -9999999999 unless $_[0];
+  return log($_[0]);
+}
diff --git a/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/npu_train.sh b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/npu_train.sh
new file mode 100644
index 0000000000000000000000000000000000000000..1cbd6f15c3c2e58345e5ad28ecd8fff3672545d6
--- /dev/null
+++ b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/npu_train.sh
@@ -0,0 +1,73 @@
+#!/bin/bash
+
+code_dir=$1
+work_dir=$2
+dataset_path=$3
+output_path=$4
+
+#############训练前输入目录文件确认#########################
+echo "[CANN-ZhongZhi] before train - list my run files[/usr/local/Ascend/ascend-toolkit]:"
+ls -al /usr/local/Ascend/ascend-toolkit
+echo ""
+
+echo "[CANN-ZhongZhi] before train - list my code files[${code_dir}]:"
+ls -al ${code_dir}
+echo ""
+
+echo "[CANN-ZhongZhi] before train - list my work files[${work_dir}]:"
+ls -al ${work_dir}
+echo ""
+
+echo "[CANN-ZhongZhi] before train - list my dataset files[${dataset_path}]:"
+ls -al ${dataset_path}
+echo ""
+
+echo "[CANN-ZhongZhi] before train - list my output files[${output_path}]:"
+ls -al ${output_path}
+echo ""
+
+######环境变量修改######
+###如果需要修改环境变量的，在此处修改
+###搭配最大内存使用
+#echo "GE_USE_STATIC_MEMORY ${GE_USE_STATIC_MEMORY}"
+#echo $GE_USE_STATIC_MEMORY
+#echo "GE_USE_STATIC_MEMORY"
+#export GE_USE_STATIC_MEMORY=1
+#echo "GE_USE_STATIC_MEMORY ${GE_USE_STATIC_MEMORY}"
+#echo $GE_USE_STATIC_MEMORY
+#echo "GE_USE_STATIC_MEMORY"
+
+
+##接口老哥提示打开
+echo "ENABLE_FORCE_V2_CONTROL ${GE_USE_STATIC_MEMORY}"
+export ENABLE_FORCE_V2_CONTROL=1
+echo "ENABLE_FORCE_V2_CONTROL ${GE_USE_STATIC_MEMORY}"
+#设置日志级别为info
+#export ASCEND_GLOBAL_LOG_LEVEL=1
+#设置日志打屏到屏幕
+#export ASCEND_SLOG_PRINT_TO_STDOUT=1
+#export TF_CPP_MIN_LOG_LEVEL=0
+env > ${output_path}/my_env.log
+
+######训练执行######
+###此处每个网络执行命令不同，需要修改
+python3.7 ${code_dir}/train621V.py --data_url=${dataset_path} --train_url=${output_path}
+if [ $? -eq 0 ];
+then
+    echo "[CANN-ZhongZhi] train return success"
+else
+    echo "[CANN-ZhongZhi] train return failed"
+fi
+
+######训练后把需要备份的内容保存到output_path######
+###此处每个网络不同，视情况添加cp
+cp -r ${work_dir} ${output_path}
+
+######训练后输出目录文件确认######
+echo "[CANN-ZhongZhi] after train - list my work files[${work_dir}]:"
+ls -al ${work_dir}
+echo ""
+
+echo "[CANN-ZhongZhi] after train - list my output files[${output_path}]:"
+ls -al ${output_path}
+echo ""
diff --git a/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/ops_info.json b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/ops_info.json
new file mode 100644
index 0000000000000000000000000000000000000000..d729df649d913c80f1a942bc7b75378829a3ddd4
--- /dev/null
+++ b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/ops_info.json
@@ -0,0 +1,5 @@
+{
+  "black-list": {
+    "to-add": ["Assign","MatMulV2","Cast","Mul","ReduceSumD","SoftmaxV2","BatchMatMul"]
+  }
+}
\ No newline at end of file
diff --git a/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/preproNew.py b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/preproNew.py
new file mode 100644
index 0000000000000000000000000000000000000000..b2a893aed66a82a6b726bc1e97dd9efc8a4db626
--- /dev/null
+++ b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/preproNew.py
@@ -0,0 +1,153 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# -*- coding: utf-8 -*-
+#/usr/bin/python3
+'''
+Feb. 2019 by kyubyong park.
+kbpark.linguist@gmail.com.
+https://www.github.com/kyubyong/transformer.
+
+Preprocess the iwslt 2016 datasets.
+'''
+
+import os
+import errno
+import sentencepiece as spm
+import re
+from hparams import Hparams
+import logging
+###因为换了数据集，他的vocabsize可能不一样了
+logging.basicConfig(level=logging.INFO)
+
+def prepro(hp):
+    """Load raw data -> Preprocessing -> Segmenting with sentencepice
+    hp: hyperparams. argparse.
+    """
+    logging.info("# Check if raw files exist")
+    train1 = "iwslt2016/prepro/train.de"
+    train2 = "iwslt2016/prepro/train.en"
+    eval1 = "iwslt2016/prepro/eval.de"
+    eval2 = "iwslt2016/prepro/eval.en"
+    test1 = "iwslt2016/prepro/test.de"
+    test2 = "iwslt2016/prepro/test.en"
+    for f in (train1, train2, eval1, eval2, test1, test2):
+        if not os.path.isfile(f):
+            raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), f)
+
+    logging.info("# Preprocessing")
+    # train
+    _prepro = lambda x:  [line.strip() for line in open(x, 'r', encoding='UTF-8').read().split("\n") \
+                      if not line.startswith("<")]
+    prepro_train1, prepro_train2 = _prepro(train1), _prepro(train2)
+    assert len(prepro_train1)==len(prepro_train2), "Check if train source and target files match."
+
+    # eval
+    # _prepro = lambda x: [re.sub("<[^>]+>", "", line).strip() \
+    #                  for line in open(x, 'r', encoding='UTF-8').read().split("\n") \
+    #                  if line.startswith("<seg id")]
+    prepro_eval1, prepro_eval2 = _prepro(eval1), _prepro(eval2)
+    assert len(prepro_eval1) == len(prepro_eval2), "Check if eval source and target files match."
+
+    # test
+    prepro_test1, prepro_test2 = _prepro(test1), _prepro(test2)
+    assert len(prepro_test1) == len(prepro_test2), "Check if test source and target files match."
+
+    # logging.info("Let's see how preprocessed data look like")
+    # logging.info("prepro_train1:", prepro_train1[0])
+    # logging.info("prepro_train2:", prepro_train2[0])
+    # logging.info("prepro_eval1:", prepro_eval1[0])
+    # logging.info("prepro_eval2:", prepro_eval2[0])
+    # logging.info("prepro_test1:", prepro_test1[0])
+    # logging.info("prepro_test2:", prepro_test2[0])
+
+    logging.info("# write preprocessed files to disk")
+    os.makedirs("iwslt2016/prepro", exist_ok=True)
+    def _write(sents, fname):
+        with open(fname, 'w', encoding='UTF-8') as fout:
+            fout.write("\n".join(sents))
+
+    # _write(prepro_train1, "iwslt2016/prepro/train.de")
+    # _write(prepro_train2, "iwslt2016/prepro/train.en")
+    _write(prepro_train1+prepro_train2, "iwslt2016/prepro/train")
+    # _write(prepro_eval1, "iwslt2016/prepro/eval.de")
+    # _write(prepro_eval2, "iwslt2016/prepro/eval.en")
+    # _write(prepro_test1, "iwslt2016/prepro/test.de")
+    # _write(prepro_test2, "iwslt2016/prepro/test.en")
+
+    logging.info("# Train a joint BPE model with sentencepiece")
+    os.makedirs("iwslt2016/segmented", exist_ok=True)
+    train = '--input=iwslt2016/prepro/train --pad_id=0 --unk_id=1 \
+             --bos_id=2 --eos_id=3\
+             --model_prefix=iwslt2016/segmented/bpe --vocab_size={} \
+             --model_type=bpe'.format(hp.vocab_size)
+    spm.SentencePieceTrainer.Train(train)
+
+    logging.info("# Load trained bpe model")
+    sp = spm.SentencePieceProcessor()
+    sp.Load("iwslt2016/segmented/bpe.model")
+
+    logging.info("# Segment")
+    def _segment_and_write(sents, fname):
+        with open(fname, "w", encoding='UTF-8') as fout:
+            for sent in sents:
+                pieces = sp.EncodeAsPieces(sent)
+                fout.write(" ".join(pieces) + "\n")
+
+    _segment_and_write(prepro_train1, "iwslt2016/segmented/train.de.bpe")
+    _segment_and_write(prepro_train2, "iwslt2016/segmented/train.en.bpe")
+    _segment_and_write(prepro_eval1, "iwslt2016/segmented/eval.de.bpe")
+    _segment_and_write(prepro_eval2, "iwslt2016/segmented/eval.en.bpe")
+    _segment_and_write(prepro_test1, "iwslt2016/segmented/test.de.bpe")
+
+    logging.info("Let's see how segmented data look like")
+    print("train1:", open("iwslt2016/segmented/train.de.bpe",'r', encoding='UTF-8').readline())
+    print("train2:", open("iwslt2016/segmented/train.en.bpe", 'r', encoding='UTF-8').readline())
+    print("eval1:", open("iwslt2016/segmented/eval.de.bpe", 'r', encoding='UTF-8').readline())
+    print("eval2:", open("iwslt2016/segmented/eval.en.bpe", 'r', encoding='UTF-8').readline())
+    print("test1:", open("iwslt2016/segmented/test.de.bpe", 'r', encoding='UTF-8').readline())
+
+if __name__ == '__main__':
+    hparams = Hparams()
+    parser = hparams.parser
+    hp = parser.parse_args()
+    prepro(hp)
+    logging.info("Done")
+    # train1 = "iwslt2016/prepro/train.de"
+    # train2 = "iwslt2016/prepro/train.en"
+    # _prepro = lambda x: [line.strip() for line in open(x, 'r', encoding='UTF-8').read().split("\n") \
+    #                      if not line.startswith("<")]
+    # prepro_train1, prepro_train2 = _prepro(train1), _prepro(train2)
+    # s=set()
+    # for i in prepro_train1:
+    #     for j in i:
+    #         s.add(j)
+    # for i in prepro_train2:
+    #     for j in i:
+    #         s.add(j)
+    # print(len(s))
\ No newline at end of file
diff --git a/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/requirements.txt b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0ec34c7513f44ecb1357b2ba58276e72f1e73e67
--- /dev/null
+++ b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/requirements.txt
@@ -0,0 +1,4 @@
+tensorflow>=1.12.0
+numpy>=1.15.4
+sentencepiece==0.1.8
+tqdm>=4.28.1
\ No newline at end of file
diff --git a/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/switch_config.txt b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/switch_config.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ef3971b4770d1cb6b1799a2ea00032ba992d3579
--- /dev/null
+++ b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/switch_config.txt
@@ -0,0 +1,3 @@
+OpType::MatMulV2:InputDtype:float16,float16,float32,OutputDtype:float32
+OpType::BatchMatMul:InputDtype:float16,float16,OutputDtype:float32
+OpType::BatchMatMulV2:InputDtype:float16,float16,OutputDtype:float32
\ No newline at end of file
diff --git a/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/test/train_full_1p.sh b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/test/train_full_1p.sh
new file mode 100644
index 0000000000000000000000000000000000000000..0f0a73eb88acfbdae65dcf01203900ea0fe404c7
--- /dev/null
+++ b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/test/train_full_1p.sh
@@ -0,0 +1,157 @@
+#!/bin/bash
+
+##########################################################
+#########第3行 至 90行，请一定不要、不要、不要修改##########
+#########第3行 至 90行，请一定不要、不要、不要修改##########
+#########第3行 至 90行，请一定不要、不要、不要修改##########
+##########################################################
+# shell脚本所在路径
+cur_path=`echo $(cd $(dirname $0);pwd)`
+
+# 判断当前shell是否是performance
+perf_flag=`echo $0 | grep performance | wc -l`
+
+# 当前执行网络的名称
+Network=`echo $(cd $(dirname $0);pwd) | awk -F"/" '{print $(NF-1)}'`
+
+export RANK_SIZE=1
+export RANK_ID=0
+export JOB_ID=10087
+
+# 路径参数初始化
+data_path=""
+output_path=""
+
+# 帮助信息，不需要修改
+if [[ $1 == --help || $1 == -h ]];then
+    echo"usage:./train_performance_1P.sh <args>"
+    echo " "
+    echo "parameter explain:
+    --data_path              # dataset of training
+    --output_path            # output of training
+    --train_steps            # max_step for training
+	  --train_epochs           # max_epoch for training
+    --batch_size             # batch size
+    -h/--help                show help message
+    "
+    exit 1
+fi
+
+# 参数校验，不需要修改
+for para in $*
+do
+    if [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    elif [[ $para == --output_path* ]];then
+        output_path=`echo ${para#*=}`
+    elif [[ $para == --train_steps* ]];then
+        train_steps=`echo ${para#*=}`
+	elif [[ $para == --train_epochs* ]];then
+        train_epochs=`echo ${para#*=}`
+    elif [[ $para == --batch_size* ]];then
+        batch_size=`echo ${para#*=}`
+    fi
+done
+
+# 校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be config"
+    exit 1
+fi
+
+# 校验是否传入output_path,不需要修改
+if [[ $output_path == "" ]];then
+    output_path="./test/output/${ASCEND_DEVICE_ID}"
+fi
+
+CaseName=""
+function get_casename()
+{
+    if [ x"${perf_flag}" = x1 ];
+    then
+        CaseName=${Network}_bs${batch_size}_${RANK_SIZE}'p'_'perf'
+    else
+        CaseName=${Network}_bs${batch_size}_${RANK_SIZE}'p'_'acc'
+    fi
+}
+
+# 跳转到code目录
+cd ${cur_path}/../
+rm -rf ./test/output/${ASCEND_DEVICE_ID}
+mkdir -p ./test/output/${ASCEND_DEVICE_ID}
+
+# 训练开始时间记录，不需要修改
+start_time=$(date +%s)
+##########################################################
+#########第3行 至 90行，请一定不要、不要、不要修改##########
+#########第3行 至 90行，请一定不要、不要、不要修改##########
+#########第3行 至 90行，请一定不要、不要、不要修改##########
+##########################################################
+
+#=========================================================
+#=========================================================
+#========训练执行命令，需要根据您的网络进行修改==============
+#=========================================================
+#=========================================================
+# 基础参数，需要模型审视修改
+# 您的训练数据集在${data_path}路径下，请直接使用这个变量获取
+# 您的训练输出目录在${output_path}路径下，请直接使用这个变量获取
+# 您的其他基础参数，可以自定义增加，但是batch_size请保留，并且设置正确的值
+batch_size=128
+
+# 设置打屏日志文件名，请保留，文件名为${print_log}
+print_log="./test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log"
+
+python3.7 ./train.py --data_url=${data_path} --train_url=${output_path} 1>${print_log} 2>&1
+
+# 性能相关数据计算
+StepTime=`grep "sec/step :" ${print_log} | tail -n 10 | awk '{print $NF}' | awk '{sum+=$1} END {print sum/NR}'`
+FPS=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'/'${StepTime}'}'`
+
+# 精度相关数据计算
+train_accuracy=`grep "Final Accuracy accuracy" ${print_log}  | awk '{print $NF}'`
+# 提取所有loss打印信息
+grep "loss :" ${print_log} | awk -F ":" '{print $4}' | awk -F "-" '{print $1}' > ./test/output/${ASCEND_DEVICE_ID}/my_output_loss.txt
+
+###########################################################
+#########后面的所有内容请不要修改###########################
+#########后面的所有内容请不要修改###########################
+#########后面的所有内容请不要修改###########################
+###########################################################
+
+# 获取最终的casename，请保留，case文件名为${CaseName}
+get_casename
+
+# 重命名loss文件
+if [ -f ./test/output/${ASCEND_DEVICE_ID}/my_output_loss.txt ];
+then
+    mv ./test/output/${ASCEND_DEVICE_ID}/my_output_loss.txt ./test/output/${ASCEND_DEVICE_ID}/${CaseName}_loss.txt
+fi
+
+# 训练端到端耗时
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+echo "------------------ Final result ------------------"
+# 输出性能FPS/单step耗时/端到端耗时
+echo "Final Performance images/sec : $FPS"
+echo "Final Performance sec/step : $StepTime"
+echo "E2E Training Duration sec : $e2e_time"
+
+# 输出训练精度
+echo "Final Train Accuracy : ${train_accuracy}"
+
+# 最后一个迭代loss值，不需要修改
+ActualLoss=(`awk 'END {print $NF}' ./test/output/${ASCEND_DEVICE_ID}/my_output_loss.txt`)
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${batch_size}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = `uname -m`" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${FPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${StepTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
\ No newline at end of file
diff --git a/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/test/train_performance_1p.sh b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/test/train_performance_1p.sh
new file mode 100644
index 0000000000000000000000000000000000000000..e86eb215ba0c2d1f89981809bd0e925605d830bd
--- /dev/null
+++ b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/test/train_performance_1p.sh
@@ -0,0 +1,158 @@
+#!/bin/bash
+
+##########################################################
+#########第3行 至 90行，请一定不要、不要、不要修改##########
+#########第3行 至 90行，请一定不要、不要、不要修改##########
+#########第3行 至 90行，请一定不要、不要、不要修改##########
+##########################################################
+# shell脚本所在路径
+cur_path=`echo $(cd $(dirname $0);pwd)`
+
+# 判断当前shell是否是performance
+perf_flag=`echo $0 | grep performance | wc -l`
+
+# 当前执行网络的名称
+Network=`echo $(cd $(dirname $0);pwd) | awk -F"/" '{print $(NF-1)}'`
+
+export RANK_SIZE=1
+export RANK_ID=0
+export JOB_ID=10087
+
+# 路径参数初始化
+data_path=""
+output_path=""
+
+# 帮助信息，不需要修改
+if [[ $1 == --help || $1 == -h ]];then
+    echo"usage:./train_performance_1P.sh <args>"
+    echo " "
+    echo "parameter explain:
+    --data_path              # dataset of training
+    --output_path            # output of training
+    --train_steps            # max_step for training
+	  --train_epochs           # max_epoch for training
+    --batch_size             # batch size
+    -h/--help                show help message
+    "
+    exit 1
+fi
+
+# 参数校验，不需要修改
+for para in $*
+do
+    if [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    elif [[ $para == --output_path* ]];then
+        output_path=`echo ${para#*=}`
+    elif [[ $para == --train_steps* ]];then
+        train_steps=`echo ${para#*=}`
+	elif [[ $para == --train_epochs* ]];then
+        train_epochs=`echo ${para#*=}`
+    elif [[ $para == --batch_size* ]];then
+        batch_size=`echo ${para#*=}`
+    fi
+done
+
+# 校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be config"
+    exit 1
+fi
+
+# 校验是否传入output_path,不需要修改
+if [[ $output_path == "" ]];then
+    output_path="./test/output/${ASCEND_DEVICE_ID}"
+fi
+
+CaseName=""
+function get_casename()
+{
+    if [ x"${perf_flag}" = x1 ];
+    then
+        CaseName=${Network}_bs${batch_size}_${RANK_SIZE}'p'_'perf'
+    else
+        CaseName=${Network}_bs${batch_size}_${RANK_SIZE}'p'_'acc'
+    fi
+}
+
+# 跳转到code目录
+cd ${cur_path}/../
+rm -rf ./test/output/${ASCEND_DEVICE_ID}
+mkdir -p ./test/output/${ASCEND_DEVICE_ID}
+
+# 训练开始时间记录，不需要修改
+start_time=$(date +%s)
+##########################################################
+#########第3行 至 90行，请一定不要、不要、不要修改##########
+#########第3行 至 90行，请一定不要、不要、不要修改##########
+#########第3行 至 90行，请一定不要、不要、不要修改##########
+##########################################################
+
+#=========================================================
+#=========================================================
+#========训练执行命令，需要根据您的网络进行修改==============
+#=========================================================
+#=========================================================
+# 基础参数，需要模型审视修改
+# 您的训练数据集在${data_path}路径下，请直接使用这个变量获取
+# 您的训练输出目录在${output_path}路径下，请直接使用这个变量获取
+# 您的其他基础参数，可以自定义增加，但是batch_size请保留，并且设置正确的值
+train_epochs=1
+train_steps=34632
+batch_size=128
+
+print_log="./test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log"
+python3.7 ./train.py --data_url=${data_path} --train_url=${output_path} --steps=${train_steps} 1>${print_log} 2>&1
+
+
+# 性能相关数据计算
+StepTime=`grep "sec/step :" ${print_log} | tail -n 10 | awk '{print $NF}' | awk '{sum+=$1} END {print sum/NR}'`
+FPS=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'/'${StepTime}'}'`
+
+# 精度相关数据计算
+train_accuracy=`grep "Final Accuracy accuracy" ${print_log}  | awk '{print $NF}'`
+# 提取所有loss打印信息
+grep "loss :" ${print_log} | awk -F ":" '{print $4}' | awk -F "-" '{print $1}' > ./test/output/${ASCEND_DEVICE_ID}/my_output_loss.txt
+
+
+###########################################################
+#########后面的所有内容请不要修改###########################
+#########后面的所有内容请不要修改###########################
+#########后面的所有内容请不要修改###########################
+###########################################################
+
+# 获取最终的casename，请保留，case文件名为${CaseName}
+get_casename
+
+# 重命名loss文件
+if [ -f ./test/output/${ASCEND_DEVICE_ID}/my_output_loss.txt ];
+then
+    mv ./test/output/${ASCEND_DEVICE_ID}/my_output_loss.txt ./test/output/${ASCEND_DEVICE_ID}/${CaseName}_loss.txt
+fi
+
+# 训练端到端耗时
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+echo "------------------ Final result ------------------"
+# 输出性能FPS/单step耗时/端到端耗时
+echo "Final Performance images/sec : $FPS"
+echo "Final Performance sec/step : $StepTime"
+echo "E2E Training Duration sec : $e2e_time"
+
+# 输出训练精度
+echo "Final Train Accuracy : ${train_accuracy}"
+
+# 最后一个迭代loss值，不需要修改
+ActualLoss=(`awk 'END {print $NF}' ./test/output/${ASCEND_DEVICE_ID}/${CaseName}_loss.txt`)
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${batch_size}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = `uname -m`" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${FPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${StepTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
\ No newline at end of file
diff --git a/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/train.py b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/train.py
new file mode 100644
index 0000000000000000000000000000000000000000..c85005aa76f104aa44137acd2d396d5187cdc201
--- /dev/null
+++ b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/train.py
@@ -0,0 +1,171 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# -*- coding: utf-8 -*-
+#/usr/bin/python3
+'''
+Feb. 2019 by kyubyong park.f
+kbpark.linguist@gmail.com.
+https://www.github.com/kyubyong/transformer
+'''
+import tensorflow as tf
+from model import Transformer
+from tqdm import tqdm
+from data_load import get_batch
+from utils import save_hparams, save_variable_specs, get_hypotheses, calc_bleu, calc_bleu2
+import os
+from hparams import Hparams
+import math
+import logging
+import time
+from tensorflow.core.protobuf.rewriter_config_pb2 import RewriterConfig
+from npu_bridge.npu_init import *
+
+#import precision_tool.tf_config as npu_tf_config
+
+
+logging.basicConfig(level=logging.INFO)
+
+
+logging.info("# hparams")
+hparams = Hparams()
+parser = hparams.parser
+hp = parser.parse_args()
+# hp.data_url=os.path.dirname(os.path.realpath(__file__))+'/'
+print("data_url\n")
+print(hp.data_url)
+print("train_url\n")
+print(hp.train_url)
+save_hparams(hp, hp.logdir)
+
+logging.info("# Prepare train/eval batches")
+train_batches, num_train_batches, num_train_samples = get_batch(hp.data_url+ hp.train1, hp.data_url+hp.train2,
+                                             hp.maxlen1, hp.maxlen2,
+                                             hp.data_url+hp.vocab, hp.batch_size,
+                                             shuffle=True)
+# eval_batches, num_eval_batches, num_eval_samples = get_batch(hp.data_url+hp.eval1, hp.data_url+hp.eval2,
+#                                              100000, 100000,
+#                                              hp.data_url+hp.vocab, hp.batch_size,
+#                                              shuffle=False)
+eval_batches, num_eval_batches, num_eval_samples = get_batch(hp.data_url+hp.eval1, hp.data_url+hp.eval2,
+                                             hp.maxlen1, hp.maxlen2,
+                                             hp.data_url+hp.vocab, hp.batch_size,
+                                             shuffle=False)
+
+# create a iterator of the correct shape and type
+iter = tf.data.Iterator.from_structure(train_batches.output_types, train_batches.output_shapes)
+xs, ys = iter.get_next()
+
+train_init_op = iter.make_initializer(train_batches)
+eval_init_op = iter.make_initializer(eval_batches)
+
+logging.info("# Load model")
+m = Transformer(hp)
+loss, train_op, global_step = m.train(xs, ys)
+y_hat = m.eval(xs, ys)
+# y_hat = m.infer(xs, ys)
+
+logging.info("# Session")
+saver = tf.train.Saver(max_to_keep=hp.num_epochs)
+
+####相关设置开关
+if not os.path.exists(hp.train_url + "/tmp/profiling"): os.makedirs(hp.train_url + "/tmp/profiling")
+proPath=hp.train_url + "/tmp/profiling"
+dumpPath=hp.train_url + "/tmp/overflow"
+blackPath=hp.data_url+"/ops_info.json"
+fusionPath=hp.data_url+"/fusion_switch.cfg"
+switchPath=hp.data_url+"/switch_config.txt"
+if not os.path.exists(dumpPath): os.makedirs(dumpPath)
+
+config = tf.ConfigProto()
+custom_op = config.graph_options.rewrite_options.custom_optimizers.add()
+custom_op.name = "NpuOptimizer"
+custom_op.parameter_map["customize_dtypes"].s = tf.compat.as_bytes("./switch_config.txt")
+config.graph_options.rewrite_options.remapping = RewriterConfig.OFF  # 必须显式关闭
+config.graph_options.rewrite_options.memory_optimization = RewriterConfig.OFF  # 必须显式关闭
+
+with tf.Session(config=config) as sess:
+    ckpt = tf.train.latest_checkpoint(hp.train_url+hp.logdir)
+    if ckpt is None:
+        logging.info("Initializing from scratch")
+        sess.run(tf.global_variables_initializer())
+        if not os.path.exists(hp.train_url + hp.logdir): os.makedirs(hp.train_url + hp.logdir)
+        save_variable_specs(os.path.join(hp.train_url+hp.logdir, "specs"))
+    else:
+        saver.restore(sess, ckpt)
+
+    sess.run(train_init_op)
+    total_steps = hp.num_epochs * num_train_batches
+    print("Step Info: ", hp.num_epochs, num_train_batches)
+
+    _gs = sess.run(global_step)
+
+    for i in tqdm(range(_gs, total_steps+1)):
+        _, _gs = sess.run([train_op, global_step])
+        epoch = math.ceil(i / num_train_batches)
+        
+        _loss = sess.run(loss)  # train loss
+        logging.info("loss: {}".format(_loss))
+
+        if i and i % num_train_batches == 0:
+            logging.info("epoch {} is done".format(epoch))
+            _loss = sess.run(loss) # train loss
+            logging.info("loss: {}".format(_loss))
+
+            logging.info("# test evaluation")
+            # _ = sess.run([eval_init_op])
+
+            logging.info("# get hypotheses")
+            hypotheses = get_hypotheses(num_eval_batches, num_eval_samples, sess, y_hat, m.idx2token)
+
+            logging.info("# write results")
+            model_output = "iwslt2016_E%02dL%.2f" % (epoch, _loss)
+            if not os.path.exists(hp.train_url+hp.evaldir): os.makedirs(hp.train_url+hp.evaldir)
+            translation = os.path.join(hp.train_url + hp.evaldir, model_output)
+            # if not os.path.exists(hp.train_url + hp.evaldir): os.makedirs(hp.train_url + hp.evaldir)
+            # translation = os.path.join(hp.train_url+hp.evaldir, model_output)
+
+            #######
+            with open(translation, 'w') as fout:
+                fout.write("\n".join(hypotheses))
+
+            logging.info("# calc bleu score and append it to translation")
+            calc_bleu2(hp.data_url + hp.eval3, translation,hp.data_url)
+
+            logging.info("# save models")
+            ckpt_name = os.path.join(hp.train_url+hp.logdir, model_output)
+            # if not os.path.exists(hp.train_url + hp.logdir): os.makedirs(hp.train_url + hp.logdir)
+            # ckpt_name = os.path.join(hp.train_url + hp.logdir, model_output)
+            ########
+            saver.save(sess, ckpt_name, global_step=_gs)
+            logging.info("after training of {} epochs, {} has been saved.".format(epoch, ckpt_name))
+
+            logging.info("# fall back to train mode")
+            sess.run(train_init_op)
+
+logging.info("Done")
diff --git a/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/utils.py b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..cb1fdfd125e2013d27da4efc91773f9f9120375d
--- /dev/null
+++ b/TensorFlow/contrib/nlp/Transformer_ID2361__for_TensorFlow/utils.py
@@ -0,0 +1,272 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# -*- coding: utf-8 -*-
+# /usr/bin/python3
+'''
+Feb. 2019 by kyubyong park.
+kbpark.linguist@gmail.com.
+https://www.github.com/kyubyong/transformer.
+
+Utility functions
+'''
+
+import tensorflow as tf
+# from tensorflow.python import pywrap_tensorflow
+# import numpy as np
+import json
+import os, re
+import logging
+
+logging.basicConfig(level=logging.INFO)
+
+def calc_num_batches(total_num, batch_size):
+    '''Calculates the number of batches.
+    total_num: total sample number
+    batch_size
+
+    Returns
+    number of batches, allowing for remainders.'''
+    return total_num // batch_size + int(total_num % batch_size != 0)
+
+def convert_idx_to_token_tensor(inputs, idx2token):
+    '''Converts int32 tensor to string tensor.
+    inputs: 1d int32 tensor. indices.
+    idx2token: dictionary
+
+    Returns
+    1d string tensor.
+    '''
+    def my_func(inputs):
+        return " ".join(idx2token[elem] for elem in inputs)
+
+    return tf.py_func(my_func, [inputs], tf.string)
+
+# # def pad(x, maxlen):
+# #     '''Pads x, list of sequences, and make it as a numpy array.
+# #     x: list of sequences. e.g., [[2, 3, 4], [5, 6, 7, 8, 9], ...]
+# #     maxlen: scalar
+# #
+# #     Returns
+# #     numpy int32 array of (len(x), maxlen)
+# #     '''
+# #     padded = []
+# #     for seq in x:
+# #         seq += [0] * (maxlen - len(seq))
+# #         padded.append(seq)
+# #
+# #     arry = np.array(padded, np.int32)
+# #     assert arry.shape == (len(x), maxlen), "Failed to make an array"
+#
+#     return arry
+
+def postprocess(hypotheses, idx2token):
+    '''Processes translation outputs.
+    hypotheses: list of encoded predictions
+    idx2token: dictionary
+
+    Returns
+    processed hypotheses
+    '''
+    _hypotheses = []
+    for h in hypotheses:
+        sent = "".join(idx2token[idx] for idx in h)
+        sent = sent.split("</s>")[0].strip()
+        sent = sent.replace("▁", " ") # remove bpe symbols
+        #print(sent.strip())
+        _hypotheses.append(sent.strip())
+    return _hypotheses
+
+def save_hparams(hparams, path):
+    '''Saves hparams to path
+    hparams: argsparse object.
+    path: output directory.
+
+    Writes
+    hparams as literal dictionary to path.
+    '''
+    if not os.path.exists(path): os.makedirs(path)
+    hp = json.dumps(vars(hparams))
+    with open(os.path.join(path, "hparams"), 'w') as fout:
+        fout.write(hp)
+
+def load_hparams(parser, path):
+    '''Loads hparams and overrides parser
+    parser: argsparse parser
+    path: directory or file where hparams are saved
+    '''
+    if not os.path.isdir(path):
+        path = os.path.dirname(path)
+    d = open(os.path.join(path, "hparams"), 'r').read()
+    flag2val = json.loads(d)
+    for f, v in flag2val.items():
+        parser.f = v
+
+def save_variable_specs(fpath):
+    '''Saves information about variables such as
+    their name, shape, and total parameter number
+    fpath: string. output file path
+
+    Writes
+    a text file named fpath.
+    '''
+    def _get_size(shp):
+        '''Gets size of tensor shape
+        shp: TensorShape
+
+        Returns
+        size
+        '''
+        size = 1
+        for d in range(len(shp)):
+            size *=shp[d]
+        return size
+
+    params, num_params = [], 0
+    for v in tf.global_variables():
+        params.append("{}==={}".format(v.name, v.shape))
+        num_params += _get_size(v.shape)
+    print("num_params: ", num_params)
+    with open(fpath, 'w') as fout:
+        fout.write("num_params: {}\n".format(num_params))
+        fout.write("\n".join(params))
+    logging.info("Variables info has been saved.")
+
+def get_hypotheses(num_batches, num_samples, sess, tensor, dict):
+    '''Gets hypotheses.
+    num_batches: scalar.
+    num_samples: scalar.
+    sess: tensorflow sess object
+    tensor: target tensor to fetch
+    dict: idx2token dictionary
+
+    Returns
+    hypotheses: list of sents
+    '''
+    hypotheses = []
+    #print(num_batches)
+    #print(num_samples)
+    for i in range(num_batches):
+        h = sess.run(tensor)
+        hypotheses.extend(h.tolist())
+        print(i)
+    hypotheses = postprocess(hypotheses, dict)
+    #print(len(hypotheses))
+    return hypotheses[:num_samples]
+
+def calc_bleu(ref, translation):
+    '''Calculates bleu score and appends the report to translation
+    ref: reference file path
+    translation: model output file path
+
+    Returns
+    translation that the bleu score is appended to'''
+    get_bleu_score = "perl multi-bleu.perl {} < {} > {}".format(ref, translation, "temp")
+    os.system(get_bleu_score)
+    bleu_score_report = open("temp", "r").read()
+    with open(translation, "a") as fout:
+        fout.write("\n{}".format(bleu_score_report))
+    try:
+        score = re.findall("BLEU = ([^,]+)", bleu_score_report)[0]
+        new_translation = translation + "B{}".format(score)
+        os.system("mv {} {}".format(translation, new_translation))
+        os.remove(translation)
+
+    except: pass
+    os.remove("temp")
+
+def calc_bleu2(ref, translation,data_url):
+    '''Calculates bleu score and appends the report to translation
+    ref: reference file path
+    translation: model output file path
+
+    Returns
+    translation that the bleu score is appended to'''
+    get_bleu_score = "perl "+"./multi-bleu.perl {} < {} > {}".format(ref, translation, "temp")
+    os.system(get_bleu_score)
+    bleu_score_report = open("temp", "r").read()
+    with open(translation, "a") as fout:
+        fout.write("\n{}".format(bleu_score_report))
+    try:
+        score = re.findall("BLEU = ([^,]+)", bleu_score_report)[0]
+        new_translation = translation + "B{}".format(score)
+        os.system("mv {} {}".format(translation, new_translation))
+        os.remove(translation)
+
+    except: pass
+    os.remove("temp")
+
+def calc_bleu3(ref, translation,data_url):
+    '''Calculates bleu score and appends the report to translation
+    ref: reference file path
+    translation: model output file path
+
+    Returns
+    translation that the bleu score is appended to'''
+    get_bleu_score = "perl "+data_url+"multi-bleu.perl {} < {} > {}".format(ref, translation, "/home/test_user05/transformerAll/temp")
+    os.system(get_bleu_score)
+    bleu_score_report = open("/home/test_user05/transformerAll/temp", "r").read()
+    with open(translation, "a") as fout:
+        fout.write("\n{}".format(bleu_score_report))
+    try:
+        score = re.findall("BLEU = ([^,]+)", bleu_score_report)[0]
+        new_translation = translation + "B{}".format(score)
+        os.system("mv {} {}".format(translation, new_translation))
+        os.remove(translation)
+
+    except: pass
+    #os.remove("temp")
+# def calc_bleu5242(ref, translation):
+#     '''Calculates bleu score and appends the report to translation
+#     ref: reference file path
+#     translation: model output file path
+#
+#     Returns
+#     translation that the bleu score is appended to'''
+#     get_bleu_score = "perl "+os.path.dirname(os.path.realpath(__file__))+"/"+"multi-bleu.perl {} < {} > {}".format(ref, translation, "temp")
+#     os.system(get_bleu_score)
+#     bleu_score_report = open("temp", "r").read()
+#     with open(translation, "a") as fout:
+#         fout.write("\n{}".format(bleu_score_report))
+#     try:
+#         score = re.findall("BLEU = ([^,]+)", bleu_score_report)[0]
+#         new_translation = translation + "B{}".format(score)
+#         os.system("mv {} {}".format(translation, new_translation))
+#         os.remove(translation)
+#
+#     except: pass
+#     os.remove("temp")
+# def get_inference_variables(ckpt, filter):
+#     reader = pywrap_tensorflow.NewCheckpointReader(ckpt)
+#     var_to_shape_map = reader.get_variable_to_shape_map()
+#     vars = [v for v in sorted(var_to_shape_map) if filter not in v]
+#     return vars
+
+
+
+