From e670147f8837a09715cc1387867b71d245671d9e Mon Sep 17 00:00:00 2001
From: hxxhl88 <736544296@qq.com>
Date: Sat, 2 Apr 2022 18:51:21 +0800
Subject: [PATCH 01/11] add for google and nv bert

---
 .../CONTRIBUTING.md                           |    31 +
 .../BertGoogle_Series_for_TensorFlow/LICENSE  |   202 +
 .../README.md                                 |   246 +
 .../__init__.py                               |    15 +
 .../benchmark.sh                              |    49 +
 .../configs/rank_table_8p.json                |    52 +
 .../create_pretraining_data.py                |   469 +
 .../evaluate-v1.1.py                          |   122 +
 .../extract_features.py                       |   419 +
 .../gpu_environment.py                        |    36 +
 .../modeling.py                               |   988 +
 .../modeling_test.py                          |   277 +
 .../modelzoo_level.txt                        |     3 +
 .../optimization.py                           |   200 +
 .../optimization_test.py                      |    48 +
 .../requirements.txt                          |     1 +
 .../BertGoogle_Series_for_TensorFlow/run.sh   |    46 +
 .../run_classifier.py                         |   981 +
 .../run_classifier_with_tfhub.py              |   314 +
 .../run_pretraining.py                        |   676 +
 .../run_squad.py                              |  1333 +
 .../sample_text.txt                           |    33 +
 .../test/train_ID0495_Bert-Squad_full_1p.sh   |   155 +
 .../test/train_ID0495_Bert-Squad_full_8p.sh   |   157 +
 .../train_ID0495_Bert-Squad_performance_1p.sh |   149 +
 .../train_ID0495_Bert-Squad_performance_8p.sh |   151 +
 .../train_ID3082_BertLarge-Squad_full_1p.sh   |   154 +
 .../train_ID3082_BertLarge-Squad_full_8p.sh   |   156 +
 ...n_ID3082_BertLarge-Squad_performance_1p.sh |   149 +
 ...n_ID3082_BertLarge-Squad_performance_8p.sh |   150 +
 .../tokenization.py                           |   399 +
 .../tokenization_test.py                      |   137 +
 .../utils/create_glue_data.py                 |   512 +
 .../utils/create_pretraining_data.py          |   501 +
 .../utils/create_squad_data.py                |   561 +
 .../utils/utils.py                            |    75 +
 .../CONTRIBUTING.md                           |    31 +
 .../BertNV_Series_for_TensorFlow/Dockerfile   |     5 +
 .../nlp/BertNV_Series_for_TensorFlow/LICENSE  |   202 +
 .../nlp/BertNV_Series_for_TensorFlow/NOTICE   |     4 +
 .../BertNV_Series_for_TensorFlow/README.md    |   318 +
 .../configs/8p.json                           |    15 +
 .../configs/bert_base_config.json             |    13 +
 .../configs/bert_base_vocab.txt               | 21128 ++++++++++++++++
 .../configs/bert_large_config.json            |    13 +
 .../modelzoo_level.txt                        |     3 +
 .../requirements.txt                          |     0
 .../src/__init__.py                           |    15 +
 .../src/create_pretraining_data.py            |   457 +
 .../src/extract_features.py                   |   419 +
 .../src/fp16_utils.py                         |    35 +
 .../src/fused_layer_norm.py                   |   141 +
 .../src/gpu_environment.py                    |    36 +
 .../src/modeling.py                           |  1031 +
 .../src/optimization.py                       |   441 +
 .../src/run_pretraining.py                    |   748 +
 .../src/tf_metrics.py                         |   230 +
 .../src/tokenization.py                       |   451 +
 .../BertNV_Series_for_TensorFlow/src/utils.py |    76 +
 .../test/train_ID0060_BertBase_full_1p.sh     |   174 +
 .../test/train_ID0060_BertBase_full_8p.sh     |   186 +
 .../train_ID0060_BertBase_performance_1p.sh   |   174 +
 .../train_ID0060_BertBase_performance_8p.sh   |   185 +
 .../train_ID3067_BertLarge-128_full_1p.sh     |   171 +
 .../train_ID3067_BertLarge-128_full_8p.sh     |   183 +
 ...ain_ID3067_BertLarge-128_performance_1p.sh |   170 +
 ...ain_ID3067_BertLarge-128_performance_8p.sh |   182 +
 .../train_ID3068_BertLarge-512_full_1p.sh     |   171 +
 .../train_ID3068_BertLarge-512_full_8p.sh     |   183 +
 ...ain_ID3068_BertLarge-512_performance_1p.sh |   170 +
 ...ain_ID3068_BertLarge-512_performance_8p.sh |   182 +
 .../test/train_ID3069_BertBase-512_full_1p.sh |   170 +
 .../test/train_ID3069_BertBase-512_full_8p.sh |   182 +
 ...rain_ID3069_BertBase-512_performance_1p.sh |   169 +
 ...rain_ID3069_BertBase-512_performance_8p.sh |   181 +
 75 files changed, 38592 insertions(+)
 create mode 100644 TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/CONTRIBUTING.md
 create mode 100644 TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/LICENSE
 create mode 100644 TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/README.md
 create mode 100644 TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/__init__.py
 create mode 100644 TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/benchmark.sh
 create mode 100644 TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/configs/rank_table_8p.json
 create mode 100644 TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/create_pretraining_data.py
 create mode 100644 TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/evaluate-v1.1.py
 create mode 100644 TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/extract_features.py
 create mode 100644 TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/gpu_environment.py
 create mode 100644 TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/modeling.py
 create mode 100644 TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/modeling_test.py
 create mode 100644 TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/modelzoo_level.txt
 create mode 100644 TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/optimization.py
 create mode 100644 TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/optimization_test.py
 create mode 100644 TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/requirements.txt
 create mode 100644 TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/run.sh
 create mode 100644 TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/run_classifier.py
 create mode 100644 TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/run_classifier_with_tfhub.py
 create mode 100644 TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/run_pretraining.py
 create mode 100644 TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/run_squad.py
 create mode 100644 TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/sample_text.txt
 create mode 100644 TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID0495_Bert-Squad_full_1p.sh
 create mode 100644 TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID0495_Bert-Squad_full_8p.sh
 create mode 100644 TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID0495_Bert-Squad_performance_1p.sh
 create mode 100644 TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID0495_Bert-Squad_performance_8p.sh
 create mode 100644 TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID3082_BertLarge-Squad_full_1p.sh
 create mode 100644 TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID3082_BertLarge-Squad_full_8p.sh
 create mode 100644 TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID3082_BertLarge-Squad_performance_1p.sh
 create mode 100644 TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID3082_BertLarge-Squad_performance_8p.sh
 create mode 100644 TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/tokenization.py
 create mode 100644 TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/tokenization_test.py
 create mode 100644 TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/utils/create_glue_data.py
 create mode 100644 TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/utils/create_pretraining_data.py
 create mode 100644 TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/utils/create_squad_data.py
 create mode 100644 TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/utils/utils.py
 create mode 100644 TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/CONTRIBUTING.md
 create mode 100644 TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/Dockerfile
 create mode 100644 TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/LICENSE
 create mode 100644 TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/NOTICE
 create mode 100644 TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/README.md
 create mode 100644 TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/configs/8p.json
 create mode 100644 TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/configs/bert_base_config.json
 create mode 100644 TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/configs/bert_base_vocab.txt
 create mode 100644 TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/configs/bert_large_config.json
 create mode 100644 TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/modelzoo_level.txt
 create mode 100644 TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/requirements.txt
 create mode 100644 TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/__init__.py
 create mode 100644 TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/create_pretraining_data.py
 create mode 100644 TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/extract_features.py
 create mode 100644 TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/fp16_utils.py
 create mode 100644 TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/fused_layer_norm.py
 create mode 100644 TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/gpu_environment.py
 create mode 100644 TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/modeling.py
 create mode 100644 TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/optimization.py
 create mode 100644 TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/run_pretraining.py
 create mode 100644 TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/tf_metrics.py
 create mode 100644 TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/tokenization.py
 create mode 100644 TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/utils.py
 create mode 100644 TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID0060_BertBase_full_1p.sh
 create mode 100644 TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID0060_BertBase_full_8p.sh
 create mode 100644 TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID0060_BertBase_performance_1p.sh
 create mode 100644 TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID0060_BertBase_performance_8p.sh
 create mode 100644 TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3067_BertLarge-128_full_1p.sh
 create mode 100644 TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3067_BertLarge-128_full_8p.sh
 create mode 100644 TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3067_BertLarge-128_performance_1p.sh
 create mode 100644 TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3067_BertLarge-128_performance_8p.sh
 create mode 100644 TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3068_BertLarge-512_full_1p.sh
 create mode 100644 TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3068_BertLarge-512_full_8p.sh
 create mode 100644 TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3068_BertLarge-512_performance_1p.sh
 create mode 100644 TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3068_BertLarge-512_performance_8p.sh
 create mode 100644 TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3069_BertBase-512_full_1p.sh
 create mode 100644 TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3069_BertBase-512_full_8p.sh
 create mode 100644 TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3069_BertBase-512_performance_1p.sh
 create mode 100644 TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3069_BertBase-512_performance_8p.sh

diff --git a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/CONTRIBUTING.md b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/CONTRIBUTING.md
new file mode 100644
index 000000000..124b4b32c
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/CONTRIBUTING.md
@@ -0,0 +1,31 @@
+# How to Contribute
+
+BERT needs to maintain permanent compatibility with the pre-trained model files,
+so we do not plan to make any major changes to this library (other than what was
+promised in the README). However, we can accept small patches related to
+re-factoring and documentation. To submit contributes, there are just a few
+small guidelines you need to follow.
+
+## Contributor License Agreement
+
+Contributions to this project must be accompanied by a Contributor License
+Agreement. You (or your employer) retain the copyright to your contribution;
+this simply gives us permission to use and redistribute your contributions as
+part of the project. Head over to <https://cla.developers.google.com/> to see
+your current agreements on file or to sign a new one.
+
+You generally only need to submit a CLA once, so if you've already submitted one
+(even if it was for a different project), you probably don't need to do it
+again.
+
+## Code reviews
+
+All submissions, including submissions by project members, require review. We
+use GitHub pull requests for this purpose. Consult
+[GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
+information on using pull requests.
+
+## Community Guidelines
+
+This project follows
+[Google's Open Source Community Guidelines](https://opensource.google.com/conduct/).
diff --git a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/LICENSE b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/LICENSE
new file mode 100644
index 000000000..d64569567
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/LICENSE
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/README.md b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/README.md
new file mode 100644
index 000000000..8dac5fc50
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/README.md
@@ -0,0 +1,246 @@
+-   [基本信息](#基本信息.md)
+-   [概述](#概述.md)
+-   [训练环境准备](#训练环境准备.md)
+-   [快速上手](#快速上手.md)
+-   [迁移学习指导](#迁移学习指导.md)
+-   [高级参考](#高级参考.md)
+<h2 id="基本信息.md">基本信息</h2>
+
+**发布者（Publisher）：Huawei**
+
+**应用领域（Application Domain）：Natural Language Processing**
+
+**版本（Version）：1.1**
+
+**修改时间（Modified） ：2021.7.17**
+
+**大小（Size）：1.3G**
+
+**框架（Framework）：TensorFlow 1.15.0**
+
+**模型格式（Model Format）：ckpt**
+
+**精度（Precision）：Mixed**
+
+**处理器（Processor）：昇腾910**
+
+**应用级别（Categories）：Official**
+
+**描述（Description）：基于TensorFlow框架的BERT网络在Squad v1.1数据集上的finetune代码** 
+
+<h2 id="概述.md">概述</h2>
+
+BERT是一种与训练语言表示的方法，这意味着我们在大型文本语料库（如维基百科）上训练一个通用的”语言理解“模型，然后将该模型用于我们关心的下游NLP任务（如问答）。该工程提供了在Squad v1.1数据集上finetune的方法。
+
+- 参考论文：
+
+    [https://arxiv.org/abs/1810.04805](https://arxiv.org/abs/1810.04805).
+
+- 参考实现：
+
+  https://github.com/google-research/bert
+
+- 适配昇腾 AI 处理器的实现：
+  
+  https://gitee.com/ascend/modelzoo/tree/master/built-in/TensorFlow/Official/nlp/Bertsquad_ID0495_for_TensorFlow
+
+
+- 通过Git获取对应commit\_id的代码方法如下：
+  
+    ```
+    git clone {repository_url}    # 克隆仓库的代码
+    cd {repository_name}    # 切换到模型的代码仓目录
+    git checkout  {branch}    # 切换到对应分支
+    git reset --hard ｛commit_id｝     # 代码设置到对应的commit_id
+    cd ｛code_path｝    # 切换到模型代码所在路径，若仓库下只有该模型，则无需切换
+    ```
+
+## 默认配置<a name="section91661242121611"></a>
+
+- 训练超参
+
+  - train_batch_size: 32
+  - learning_rate: 3e-5
+  - num_train_epochs: 2.0
+  - max_seq_length: 384
+  - doc_stride:128
+
+
+## 支持特性<a name="section1899153513554"></a>
+
+| 特性列表  | 是否支持 |
+|-------|------|
+| 分布式训练 | 是    |
+| 混合精度  | 是    |
+| 并行数据  | 是    |
+
+## 混合精度训练<a name="section168064817164"></a>
+
+昇腾910 AI处理器提供自动混合精度功能，可以针对全网中float32数据类型的算子，按照内置的优化策略，自动将部分float32的算子降低精度到float16，从而在精度损失很小的情况下提升系统性能并减少内存使用。
+
+## 开启混合精度<a name="section20779114113713"></a>
+
+脚本已默认开启混合精度，设置precision_mode参数的脚本参考如下。
+
+  ```
+  run_config = NPURunConfig(
+        model_dir=FLAGS.output_dir,
+        save_checkpoints_steps=FLAGS.save_checkpoints_steps,
+        iterations_per_loop=FLAGS.iterations_per_loop,
+        session_config=config,
+        precision_mode="allow_mix_precision",
+        keep_checkpoint_max=5)
+  ```
+
+
+<h2 id="训练环境准备.md">训练环境准备</h2>
+
+1.  硬件环境准备请参见各硬件产品文档"[驱动和固件安装升级指南]( https://support.huawei.com/enterprise/zh/category/ai-computing-platform-pid-1557196528909)"。需要在硬件设备上安装与CANN版本配套的固件与驱动。
+2.  宿主机上需要安装Docker并登录[Ascend Hub中心](https://ascendhub.huawei.com/#/detail?name=ascend-tensorflow-arm)获取镜像。
+
+    当前模型支持的镜像列表如[表1](#zh-cn_topic_0000001074498056_table1519011227314)所示。
+
+    **表 1** 镜像列表
+
+    <a name="zh-cn_topic_0000001074498056_table1519011227314"></a>
+    
+    <table><thead align="left"><tr id="zh-cn_topic_0000001074498056_row0190152218319"><th class="cellrowborder" valign="top" width="47.32%" id="mcps1.2.4.1.1"><p id="zh-cn_topic_0000001074498056_p1419132211315"><a name="zh-cn_topic_0000001074498056_p1419132211315"></a><a name="zh-cn_topic_0000001074498056_p1419132211315"></a><em id="i1522884921219"><a name="i1522884921219"></a><a name="i1522884921219"></a>镜像名称</em></p>
+    </th>
+    <th class="cellrowborder" valign="top" width="25.52%" id="mcps1.2.4.1.2"><p id="zh-cn_topic_0000001074498056_p75071327115313"><a name="zh-cn_topic_0000001074498056_p75071327115313"></a><a name="zh-cn_topic_0000001074498056_p75071327115313"></a><em id="i1522994919122"><a name="i1522994919122"></a><a name="i1522994919122"></a>镜像版本</em></p>
+    </th>
+    <th class="cellrowborder" valign="top" width="27.16%" id="mcps1.2.4.1.3"><p id="zh-cn_topic_0000001074498056_p1024411406234"><a name="zh-cn_topic_0000001074498056_p1024411406234"></a><a name="zh-cn_topic_0000001074498056_p1024411406234"></a><em id="i723012493123"><a name="i723012493123"></a><a name="i723012493123"></a>配套CANN版本</em></p>
+    </th>
+    </tr>
+    </thead>
+    <tbody><tr id="zh-cn_topic_0000001074498056_row71915221134"><td class="cellrowborder" valign="top" width="47.32%" headers="mcps1.2.4.1.1 "><a name="zh-cn_topic_0000001074498056_ul81691515131910"></a><a name="zh-cn_topic_0000001074498056_ul81691515131910"></a><ul id="zh-cn_topic_0000001074498056_ul81691515131910"><li><em id="i82326495129"><a name="i82326495129"></a><a name="i82326495129"></a>ARM架构：<a href="https://ascend.huawei.com/ascendhub/#/detail?name=ascend-tensorflow-arm" target="_blank" rel="noopener noreferrer">ascend-tensorflow-arm</a></em></li><li><em id="i18233184918125"><a name="i18233184918125"></a><a name="i18233184918125"></a>x86架构：<a href="https://ascend.huawei.com/ascendhub/#/detail?name=ascend-tensorflow-x86" target="_blank" rel="noopener noreferrer">ascend-tensorflow-x86</a></em></li></ul>
+    </td>
+    <td class="cellrowborder" valign="top" width="25.52%" headers="mcps1.2.4.1.2 "><p id="zh-cn_topic_0000001074498056_p1450714271532"><a name="zh-cn_topic_0000001074498056_p1450714271532"></a><a name="zh-cn_topic_0000001074498056_p1450714271532"></a><em id="i72359495125"><a name="i72359495125"></a><a name="i72359495125"></a>20.2.0</em></p>
+    </td>
+    <td class="cellrowborder" valign="top" width="27.16%" headers="mcps1.2.4.1.3 "><p id="zh-cn_topic_0000001074498056_p18244640152312"><a name="zh-cn_topic_0000001074498056_p18244640152312"></a><a name="zh-cn_topic_0000001074498056_p18244640152312"></a><em id="i162363492129"><a name="i162363492129"></a><a name="i162363492129"></a><a href="https://support.huawei.com/enterprise/zh/ascend-computing/cann-pid-251168373/software" target="_blank" rel="noopener noreferrer">20.2</a></em></p>
+    </td>
+    </tr>
+    </tbody>
+    </table>
+
+
+<h2 id="快速上手.md">快速上手</h2>
+
+- 数据集准备及预训练模型准备
+1. 模型训练使用Squad v1.1数据集，参考源代码提供路径下载。
+2. 预处理模型使用BERT-Base，Uncased，参考源代码提供路径下载。下载的文件夹中应该含有预处理模型，vocab.txt和bert_config.json。
+3. 数据集和预处理模型下载完成后，放入模型目录下，在训练脚本中指定数据集和模型路径，可正常使用。
+
+## 模型训练
+
+- 单击“立即下载”，并选择合适的下载方式下载源码包。
+
+- 启动训练之前，首先要配置程序运行相关环境变量。
+
+  环境变量配置信息参见：
+
+     [Ascend 910训练平台环境变量设置](https://gitee.com/ascend/modelzoo/wikis/Ascend%20910%E8%AE%AD%E7%BB%83%E5%B9%B3%E5%8F%B0%E7%8E%AF%E5%A2%83%E5%8F%98%E9%87%8F%E8%AE%BE%E7%BD%AE?sort_id=3148819)
+
+  将环境变量配置到scripts/run_*.sh中
+
+- 单卡训练 
+
+  启动单卡训练
+
+  修改scripts/run_1p.sh中的BERT_BASE_DIR和SQUAD_DIR，BERT_BASE_DIR为预处理模型路径，SQUAD_DIR为squad v1.1数据集的路径
+  
+  ```
+  cd scripts
+  bash run_1p.sh
+  ```
+  
+- 8卡训练
+
+  启动8卡训练
+
+  修改scripts/run_8p.sh中的BERT_BASE_DIR和SQUAD_DIR，BERT_BASE_DIR为预处理模型路径，SQUAD_DIR为squad v1.1数据集的路径
+  
+  ```
+  cd scripts
+  bash run_8p.sh
+  ```
+  
+  
+
+<h2 id="高级参考.md">高级参考</h2>
+
+## 脚本和示例代码<a name="section08421615141513"></a>
+
+```
+└─Bertsquad_for_TensorFlow
+    ├─scripts
+    |     ├─8p.json
+    |     ├─docker_start.sh
+    |     ├─run_1p.sh
+    |     ├─run_8p.sh
+    |     ├─test.sh
+    |     ├─train_1p.sh
+    |     └─train_8p.sh
+    ├─utils
+    |   ├─create_glue_data.py
+    |   ├─create_pretraining_data.py
+    |   ├─create_squad_data.py
+    |   └─utils.py
+    ├─CONTRIBUTING.md
+    ├─create_pretraining_data.py
+    ├─evaluate-v1.1.py
+    ├─extract_features.py
+    ├─gpu_environment.py
+    ├─LICENSE
+    ├─modeling.py
+    ├─modeling_test.py
+    ├─multilingual.md
+    ├─optimization.py
+    ├─optimization_test.py
+    ├─README.md
+    ├─run.sh
+    ├─run_classifier.py
+    ├─run_classifier_with_tfhub.py
+    ├─run_pretraining.py
+    ├─run_squad.py
+    ├─tokenization.py
+    └─tokenization_test.py
+```
+
+## 脚本参数<a name="section6669162441511"></a>
+
+```
+python3 run_squad.py \
+  --vocab_file=$BERT_BASE_DIR/vocab.txt \
+  --bert_config_file=$BERT_BASE_DIR/bert_config.json \
+  --init_checkpoint=$BERT_BASE_DIR/bert_model.ckpt \
+  --do_train=True \
+  --train_file=$SQUAD_DIR/train-v1.1.json \
+  --do_predict=True \
+  --predict_file=$SQUAD_DIR/dev-v1.1.json \
+  --train_batch_size=32 \
+  --learning_rate=3e-5 \
+  --num_train_epochs=2.0 \
+  --max_seq_length=384 \
+  --doc_stride=128 \
+  --output_dir=./output
+```
+
+## 训练过程<a name="section1589455252218"></a>
+
+1.  通过“模型训练”中的训练指令启动单卡训练和8卡训练。
+
+2.  训练日志及结果见scripts/result/1p/train_*.log。
+
+
+
+## 推理/验证过程<a name="section1465595372416"></a>
+
+```
+python3 evaluate-v1.1.py dataset/dev-v1.1.json .scripts/result/1p/0/output/predictions.json
+
+##predict result for 1p:
+{"exact_match": 79.87701040681173, "f1": 87.42429097480438}
+
+##predict result for 8p:
+{"exact_match": 79.2620624408704, "f1": 86.8726952549086}
+```
diff --git a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/__init__.py b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/__init__.py
new file mode 100644
index 000000000..effb57b1e
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/__init__.py
@@ -0,0 +1,15 @@
+# coding=utf-8
+# Copyright 2018 The Google AI Language Team Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
diff --git a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/benchmark.sh b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/benchmark.sh
new file mode 100644
index 000000000..1941f5f10
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/benchmark.sh
@@ -0,0 +1,49 @@
+#!/bin/bash
+
+currentDir=$(cd "$(dirname "$0")";pwd)
+export SET_RESULT_FILE=$currentDir/set_result.py
+export RESULT_FILE=$currentDir/result.txt
+
+function prepare() {
+    # download dataset
+
+    # verify  dataset
+
+    # preprocess
+    return 0
+}
+
+function exec_train() {
+
+    # pytorch lenet5 sample
+    #python3.7 $currentDir/pytorch_lenet5_train.py
+
+    # tensorflow-1.15 wide&deep sample
+    #python3.7 $currentDir/tensorflow_1_15_wide_deep.py
+
+    # test sampl
+    
+    start_time=$(date +%s)
+    cd $currentDir/script
+    bash run_8p.sh
+    end_time=$(date +%s)
+    time_ = $(( $end_time - $start_time ))
+    FPS=`100/$time_`
+    train_accuracy=`grep "accuracy =" $currentDir/scripts/result/train_1.log|awk 'END {print$7}'`
+
+    python3.7 $currentDir/set_result.py training "accuracy" $train_accuracy
+    python3.7 $currentDir/set_result.py training "throughput_ratio" $FPS
+    python3.7 $currentDir/set_result.py training "result" "NOK"
+}
+
+function main() {
+
+    prepare
+
+    exec_train
+
+}
+
+main "$@"
+ret=$?
+exit $?
diff --git a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/configs/rank_table_8p.json b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/configs/rank_table_8p.json
new file mode 100644
index 000000000..cd9041f3e
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/configs/rank_table_8p.json
@@ -0,0 +1,52 @@
+{
+  "server_count":"1",
+  "server_list":[
+    {
+      "server_id":"10.147.179.27",
+      "device":[
+        {
+          "device_id":"0",
+          "device_ip":"192.168.100.100",
+          "rank_id":"0"
+        },
+        {
+          "device_id":"1",
+          "device_ip":"192.168.101.100",
+          "rank_id":"1"
+        },
+        {
+          "device_id":"2",
+          "device_ip":"192.168.102.100",
+          "rank_id":"2"
+        },
+        {
+          "device_id":"3",
+          "device_ip":"192.168.103.100",
+          "rank_id":"3"
+        },
+        {
+          "device_id":"4",
+          "device_ip":"192.168.100.101",
+          "rank_id":"4"
+        },
+        {
+          "device_id":"5",
+          "device_ip":"192.168.101.101",
+          "rank_id":"5"
+        },
+        {
+          "device_id":"6",
+          "device_ip":"192.168.102.101",
+          "rank_id":"6"
+        },
+        {
+          "device_id":"7",
+          "device_ip":"192.168.103.101",
+          "rank_id":"7"
+        }
+      ]
+    }
+  ],
+  "status":"completed",
+  "version":"1.0"
+}
\ No newline at end of file
diff --git a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/create_pretraining_data.py b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/create_pretraining_data.py
new file mode 100644
index 000000000..5340d96ae
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/create_pretraining_data.py
@@ -0,0 +1,469 @@
+# coding=utf-8
+# Copyright 2018 The Google AI Language Team Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Create masked LM/next sentence masked_lm TF examples for BERT."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import random
+import tokenization
+import tensorflow as tf
+
+flags = tf.flags
+
+FLAGS = flags.FLAGS
+
+flags.DEFINE_string("input_file", None,
+                    "Input raw text file (or comma-separated list of files).")
+
+flags.DEFINE_string(
+    "output_file", None,
+    "Output TF example file (or comma-separated list of files).")
+
+flags.DEFINE_string("vocab_file", None,
+                    "The vocabulary file that the BERT model was trained on.")
+
+flags.DEFINE_bool(
+    "do_lower_case", True,
+    "Whether to lower case the input text. Should be True for uncased "
+    "models and False for cased models.")
+
+flags.DEFINE_bool(
+    "do_whole_word_mask", False,
+    "Whether to use whole word masking rather than per-WordPiece masking.")
+
+flags.DEFINE_integer("max_seq_length", 128, "Maximum sequence length.")
+
+flags.DEFINE_integer("max_predictions_per_seq", 20,
+                     "Maximum number of masked LM predictions per sequence.")
+
+flags.DEFINE_integer("random_seed", 12345, "Random seed for data generation.")
+
+flags.DEFINE_integer(
+    "dupe_factor", 10,
+    "Number of times to duplicate the input data (with different masks).")
+
+flags.DEFINE_float("masked_lm_prob", 0.15, "Masked LM probability.")
+
+flags.DEFINE_float(
+    "short_seq_prob", 0.1,
+    "Probability of creating sequences which are shorter than the "
+    "maximum length.")
+
+
+class TrainingInstance(object):
+  """A single training instance (sentence pair)."""
+
+  def __init__(self, tokens, segment_ids, masked_lm_positions, masked_lm_labels,
+               is_random_next):
+    self.tokens = tokens
+    self.segment_ids = segment_ids
+    self.is_random_next = is_random_next
+    self.masked_lm_positions = masked_lm_positions
+    self.masked_lm_labels = masked_lm_labels
+
+  def __str__(self):
+    s = ""
+    s += "tokens: %s\n" % (" ".join(
+        [tokenization.printable_text(x) for x in self.tokens]))
+    s += "segment_ids: %s\n" % (" ".join([str(x) for x in self.segment_ids]))
+    s += "is_random_next: %s\n" % self.is_random_next
+    s += "masked_lm_positions: %s\n" % (" ".join(
+        [str(x) for x in self.masked_lm_positions]))
+    s += "masked_lm_labels: %s\n" % (" ".join(
+        [tokenization.printable_text(x) for x in self.masked_lm_labels]))
+    s += "\n"
+    return s
+
+  def __repr__(self):
+    return self.__str__()
+
+
+def write_instance_to_example_files(instances, tokenizer, max_seq_length,
+                                    max_predictions_per_seq, output_files):
+  """Create TF example files from `TrainingInstance`s."""
+  writers = []
+  for output_file in output_files:
+    writers.append(tf.python_io.TFRecordWriter(output_file))
+
+  writer_index = 0
+
+  total_written = 0
+  for (inst_index, instance) in enumerate(instances):
+    input_ids = tokenizer.convert_tokens_to_ids(instance.tokens)
+    input_mask = [1] * len(input_ids)
+    segment_ids = list(instance.segment_ids)
+    assert len(input_ids) <= max_seq_length
+
+    while len(input_ids) < max_seq_length:
+      input_ids.append(0)
+      input_mask.append(0)
+      segment_ids.append(0)
+
+    assert len(input_ids) == max_seq_length
+    assert len(input_mask) == max_seq_length
+    assert len(segment_ids) == max_seq_length
+
+    masked_lm_positions = list(instance.masked_lm_positions)
+    masked_lm_ids = tokenizer.convert_tokens_to_ids(instance.masked_lm_labels)
+    masked_lm_weights = [1.0] * len(masked_lm_ids)
+
+    while len(masked_lm_positions) < max_predictions_per_seq:
+      masked_lm_positions.append(0)
+      masked_lm_ids.append(0)
+      masked_lm_weights.append(0.0)
+
+    next_sentence_label = 1 if instance.is_random_next else 0
+
+    features = collections.OrderedDict()
+    features["input_ids"] = create_int_feature(input_ids)
+    features["input_mask"] = create_int_feature(input_mask)
+    features["segment_ids"] = create_int_feature(segment_ids)
+    features["masked_lm_positions"] = create_int_feature(masked_lm_positions)
+    features["masked_lm_ids"] = create_int_feature(masked_lm_ids)
+    features["masked_lm_weights"] = create_float_feature(masked_lm_weights)
+    features["next_sentence_labels"] = create_int_feature([next_sentence_label])
+
+    tf_example = tf.train.Example(features=tf.train.Features(feature=features))
+
+    writers[writer_index].write(tf_example.SerializeToString())
+    writer_index = (writer_index + 1) % len(writers)
+
+    total_written += 1
+
+    if inst_index < 20:
+      tf.logging.info("*** Example ***")
+      tf.logging.info("tokens: %s" % " ".join(
+          [tokenization.printable_text(x) for x in instance.tokens]))
+
+      for feature_name in features.keys():
+        feature = features[feature_name]
+        values = []
+        if feature.int64_list.value:
+          values = feature.int64_list.value
+        elif feature.float_list.value:
+          values = feature.float_list.value
+        tf.logging.info(
+            "%s: %s" % (feature_name, " ".join([str(x) for x in values])))
+
+  for writer in writers:
+    writer.close()
+
+  tf.logging.info("Wrote %d total instances", total_written)
+
+
+def create_int_feature(values):
+  feature = tf.train.Feature(int64_list=tf.train.Int64List(value=list(values)))
+  return feature
+
+
+def create_float_feature(values):
+  feature = tf.train.Feature(float_list=tf.train.FloatList(value=list(values)))
+  return feature
+
+
+def create_training_instances(input_files, tokenizer, max_seq_length,
+                              dupe_factor, short_seq_prob, masked_lm_prob,
+                              max_predictions_per_seq, rng):
+  """Create `TrainingInstance`s from raw text."""
+  all_documents = [[]]
+
+  # Input file format:
+  # (1) One sentence per line. These should ideally be actual sentences, not
+  # entire paragraphs or arbitrary spans of text. (Because we use the
+  # sentence boundaries for the "next sentence prediction" task).
+  # (2) Blank lines between documents. Document boundaries are needed so
+  # that the "next sentence prediction" task doesn't span between documents.
+  for input_file in input_files:
+    with tf.gfile.GFile(input_file, "r") as reader:
+      while True:
+        line = tokenization.convert_to_unicode(reader.readline())
+        if not line:
+          break
+        line = line.strip()
+
+        # Empty lines are used as document delimiters
+        if not line:
+          all_documents.append([])
+        tokens = tokenizer.tokenize(line)
+        if tokens:
+          all_documents[-1].append(tokens)
+
+  # Remove empty documents
+  all_documents = [x for x in all_documents if x]
+  rng.shuffle(all_documents)
+
+  vocab_words = list(tokenizer.vocab.keys())
+  instances = []
+  for _ in range(dupe_factor):
+    for document_index in range(len(all_documents)):
+      instances.extend(
+          create_instances_from_document(
+              all_documents, document_index, max_seq_length, short_seq_prob,
+              masked_lm_prob, max_predictions_per_seq, vocab_words, rng))
+
+  rng.shuffle(instances)
+  return instances
+
+
+def create_instances_from_document(
+    all_documents, document_index, max_seq_length, short_seq_prob,
+    masked_lm_prob, max_predictions_per_seq, vocab_words, rng):
+  """Creates `TrainingInstance`s for a single document."""
+  document = all_documents[document_index]
+
+  # Account for [CLS], [SEP], [SEP]
+  max_num_tokens = max_seq_length - 3
+
+  # We *usually* want to fill up the entire sequence since we are padding
+  # to `max_seq_length` anyways, so short sequences are generally wasted
+  # computation. However, we *sometimes*
+  # (i.e., short_seq_prob == 0.1 == 10% of the time) want to use shorter
+  # sequences to minimize the mismatch between pre-training and fine-tuning.
+  # The `target_seq_length` is just a rough target however, whereas
+  # `max_seq_length` is a hard limit.
+  target_seq_length = max_num_tokens
+  if rng.random() < short_seq_prob:
+    target_seq_length = rng.randint(2, max_num_tokens)
+
+  # We DON'T just concatenate all of the tokens from a document into a long
+  # sequence and choose an arbitrary split point because this would make the
+  # next sentence prediction task too easy. Instead, we split the input into
+  # segments "A" and "B" based on the actual "sentences" provided by the user
+  # input.
+  instances = []
+  current_chunk = []
+  current_length = 0
+  i = 0
+  while i < len(document):
+    segment = document[i]
+    current_chunk.append(segment)
+    current_length += len(segment)
+    if i == len(document) - 1 or current_length >= target_seq_length:
+      if current_chunk:
+        # `a_end` is how many segments from `current_chunk` go into the `A`
+        # (first) sentence.
+        a_end = 1
+        if len(current_chunk) >= 2:
+          a_end = rng.randint(1, len(current_chunk) - 1)
+
+        tokens_a = []
+        for j in range(a_end):
+          tokens_a.extend(current_chunk[j])
+
+        tokens_b = []
+        # Random next
+        is_random_next = False
+        if len(current_chunk) == 1 or rng.random() < 0.5:
+          is_random_next = True
+          target_b_length = target_seq_length - len(tokens_a)
+
+          # This should rarely go for more than one iteration for large
+          # corpora. However, just to be careful, we try to make sure that
+          # the random document is not the same as the document
+          # we're processing.
+          for _ in range(10):
+            random_document_index = rng.randint(0, len(all_documents) - 1)
+            if random_document_index != document_index:
+              break
+
+          random_document = all_documents[random_document_index]
+          random_start = rng.randint(0, len(random_document) - 1)
+          for j in range(random_start, len(random_document)):
+            tokens_b.extend(random_document[j])
+            if len(tokens_b) >= target_b_length:
+              break
+          # We didn't actually use these segments so we "put them back" so
+          # they don't go to waste.
+          num_unused_segments = len(current_chunk) - a_end
+          i -= num_unused_segments
+        # Actual next
+        else:
+          is_random_next = False
+          for j in range(a_end, len(current_chunk)):
+            tokens_b.extend(current_chunk[j])
+        truncate_seq_pair(tokens_a, tokens_b, max_num_tokens, rng)
+
+        assert len(tokens_a) >= 1
+        assert len(tokens_b) >= 1
+
+        tokens = []
+        segment_ids = []
+        tokens.append("[CLS]")
+        segment_ids.append(0)
+        for token in tokens_a:
+          tokens.append(token)
+          segment_ids.append(0)
+
+        tokens.append("[SEP]")
+        segment_ids.append(0)
+
+        for token in tokens_b:
+          tokens.append(token)
+          segment_ids.append(1)
+        tokens.append("[SEP]")
+        segment_ids.append(1)
+
+        (tokens, masked_lm_positions,
+         masked_lm_labels) = create_masked_lm_predictions(
+             tokens, masked_lm_prob, max_predictions_per_seq, vocab_words, rng)
+        instance = TrainingInstance(
+            tokens=tokens,
+            segment_ids=segment_ids,
+            is_random_next=is_random_next,
+            masked_lm_positions=masked_lm_positions,
+            masked_lm_labels=masked_lm_labels)
+        instances.append(instance)
+      current_chunk = []
+      current_length = 0
+    i += 1
+
+  return instances
+
+
+MaskedLmInstance = collections.namedtuple("MaskedLmInstance",
+                                          ["index", "label"])
+
+
+def create_masked_lm_predictions(tokens, masked_lm_prob,
+                                 max_predictions_per_seq, vocab_words, rng):
+  """Creates the predictions for the masked LM objective."""
+
+  cand_indexes = []
+  for (i, token) in enumerate(tokens):
+    if token == "[CLS]" or token == "[SEP]":
+      continue
+    # Whole Word Masking means that if we mask all of the wordpieces
+    # corresponding to an original word. When a word has been split into
+    # WordPieces, the first token does not have any marker and any subsequence
+    # tokens are prefixed with ##. So whenever we see the ## token, we
+    # append it to the previous set of word indexes.
+    #
+    # Note that Whole Word Masking does *not* change the training code
+    # at all -- we still predict each WordPiece independently, softmaxed
+    # over the entire vocabulary.
+    if (FLAGS.do_whole_word_mask and len(cand_indexes) >= 1 and
+        token.startswith("##")):
+      cand_indexes[-1].append(i)
+    else:
+      cand_indexes.append([i])
+
+  rng.shuffle(cand_indexes)
+
+  output_tokens = list(tokens)
+
+  num_to_predict = min(max_predictions_per_seq,
+                       max(1, int(round(len(tokens) * masked_lm_prob))))
+
+  masked_lms = []
+  covered_indexes = set()
+  for index_set in cand_indexes:
+    if len(masked_lms) >= num_to_predict:
+      break
+    # If adding a whole-word mask would exceed the maximum number of
+    # predictions, then just skip this candidate.
+    if len(masked_lms) + len(index_set) > num_to_predict:
+      continue
+    is_any_index_covered = False
+    for index in index_set:
+      if index in covered_indexes:
+        is_any_index_covered = True
+        break
+    if is_any_index_covered:
+      continue
+    for index in index_set:
+      covered_indexes.add(index)
+
+      masked_token = None
+      # 80% of the time, replace with [MASK]
+      if rng.random() < 0.8:
+        masked_token = "[MASK]"
+      else:
+        # 10% of the time, keep original
+        if rng.random() < 0.5:
+          masked_token = tokens[index]
+        # 10% of the time, replace with random word
+        else:
+          masked_token = vocab_words[rng.randint(0, len(vocab_words) - 1)]
+
+      output_tokens[index] = masked_token
+
+      masked_lms.append(MaskedLmInstance(index=index, label=tokens[index]))
+  assert len(masked_lms) <= num_to_predict
+  masked_lms = sorted(masked_lms, key=lambda x: x.index)
+
+  masked_lm_positions = []
+  masked_lm_labels = []
+  for p in masked_lms:
+    masked_lm_positions.append(p.index)
+    masked_lm_labels.append(p.label)
+
+  return (output_tokens, masked_lm_positions, masked_lm_labels)
+
+
+def truncate_seq_pair(tokens_a, tokens_b, max_num_tokens, rng):
+  """Truncates a pair of sequences to a maximum sequence length."""
+  while True:
+    total_length = len(tokens_a) + len(tokens_b)
+    if total_length <= max_num_tokens:
+      break
+
+    trunc_tokens = tokens_a if len(tokens_a) > len(tokens_b) else tokens_b
+    assert len(trunc_tokens) >= 1
+
+    # We want to sometimes truncate from the front and sometimes from the
+    # back to add more randomness and avoid biases.
+    if rng.random() < 0.5:
+      del trunc_tokens[0]
+    else:
+      trunc_tokens.pop()
+
+
+def main(_):
+  tf.logging.set_verbosity(tf.logging.INFO)
+
+  tokenizer = tokenization.FullTokenizer(
+      vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case)
+
+  input_files = []
+  for input_pattern in FLAGS.input_file.split(","):
+    input_files.extend(tf.gfile.Glob(input_pattern))
+
+  tf.logging.info("*** Reading from input files ***")
+  for input_file in input_files:
+    tf.logging.info("  %s", input_file)
+
+  rng = random.Random(FLAGS.random_seed)
+  instances = create_training_instances(
+      input_files, tokenizer, FLAGS.max_seq_length, FLAGS.dupe_factor,
+      FLAGS.short_seq_prob, FLAGS.masked_lm_prob, FLAGS.max_predictions_per_seq,
+      rng)
+
+  output_files = FLAGS.output_file.split(",")
+  tf.logging.info("*** Writing to output files ***")
+  for output_file in output_files:
+    tf.logging.info("  %s", output_file)
+
+  write_instance_to_example_files(instances, tokenizer, FLAGS.max_seq_length,
+                                  FLAGS.max_predictions_per_seq, output_files)
+
+
+if __name__ == "__main__":
+  flags.mark_flag_as_required("input_file")
+  flags.mark_flag_as_required("output_file")
+  flags.mark_flag_as_required("vocab_file")
+  tf.app.run()
diff --git a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/evaluate-v1.1.py b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/evaluate-v1.1.py
new file mode 100644
index 000000000..2cbed9611
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/evaluate-v1.1.py
@@ -0,0 +1,122 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+""" Official evaluation script for v1.1 of the SQuAD dataset. """
+
+from __future__ import print_function
+from collections import Counter
+import string
+import re
+import argparse
+import json
+import sys
+
+
+def normalize_answer(s):
+    """Lower text and remove punctuation, articles and extra whitespace."""
+    def remove_articles(text):
+        return re.sub(r'\b(a|an|the)\b', ' ', text)
+
+    def white_space_fix(text):
+        return ' '.join(text.split())
+
+    def remove_punc(text):
+        exclude = set(string.punctuation)
+        return ''.join(ch for ch in text if ch not in exclude)
+
+    def lower(text):
+        return text.lower()
+
+    return white_space_fix(remove_articles(remove_punc(lower(s))))
+
+
+def f1_score(prediction, ground_truth):
+    prediction_tokens = normalize_answer(prediction).split()
+    ground_truth_tokens = normalize_answer(ground_truth).split()
+    common = Counter(prediction_tokens) & Counter(ground_truth_tokens)
+    num_same = sum(common.values())
+    if num_same == 0:
+        return 0
+    precision = 1.0 * num_same / len(prediction_tokens)
+    recall = 1.0 * num_same / len(ground_truth_tokens)
+    f1 = (2 * precision * recall) / (precision + recall)
+    return f1
+
+
+def exact_match_score(prediction, ground_truth):
+    return (normalize_answer(prediction) == normalize_answer(ground_truth))
+
+
+def metric_max_over_ground_truths(metric_fn, prediction, ground_truths):
+    scores_for_ground_truths = []
+    for ground_truth in ground_truths:
+        score = metric_fn(prediction, ground_truth)
+        scores_for_ground_truths.append(score)
+    return max(scores_for_ground_truths)
+
+
+def evaluate(dataset, predictions):
+    f1 = exact_match = total = 0
+    for article in dataset:
+        for paragraph in article['paragraphs']:
+            for qa in paragraph['qas']:
+                total += 1
+                if qa['id'] not in predictions:
+                    message = 'Unanswered question ' + qa['id'] + \
+                              ' will receive score 0.'
+                    print(message, file=sys.stderr)
+                    continue
+                ground_truths = list(map(lambda x: x['text'], qa['answers']))
+                prediction = predictions[qa['id']]
+                exact_match += metric_max_over_ground_truths(
+                    exact_match_score, prediction, ground_truths)
+                f1 += metric_max_over_ground_truths(
+                    f1_score, prediction, ground_truths)
+
+    exact_match = 100.0 * exact_match / total
+    f1 = 100.0 * f1 / total
+
+    return {'exact_match': exact_match, 'f1': f1}
+
+
+if __name__ == '__main__':
+    expected_version = '1.1'
+    parser = argparse.ArgumentParser(
+        description='Evaluation for SQuAD ' + expected_version)
+    parser.add_argument('dataset_file', help='Dataset file')
+    parser.add_argument('prediction_file', help='Prediction File')
+    args = parser.parse_args()
+    with open(args.dataset_file) as dataset_file:
+        dataset_json = json.load(dataset_file)
+        if (dataset_json['version'] != expected_version):
+            print('Evaluation expects v-' + expected_version +
+                  ', but got dataset with v-' + dataset_json['version'],
+                  file=sys.stderr)
+        dataset = dataset_json['data']
+    with open(args.prediction_file) as prediction_file:
+        predictions = json.load(prediction_file)
+    print(json.dumps(evaluate(dataset, predictions)))
\ No newline at end of file
diff --git a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/extract_features.py b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/extract_features.py
new file mode 100644
index 000000000..89c6d8d45
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/extract_features.py
@@ -0,0 +1,419 @@
+# coding=utf-8
+# Copyright 2018 The Google AI Language Team Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Extract pre-computed feature vectors from BERT."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import codecs
+import collections
+import json
+import re
+
+import modeling
+import tokenization
+import tensorflow as tf
+
+flags = tf.flags
+
+FLAGS = flags.FLAGS
+
+flags.DEFINE_string("input_file", None, "")
+
+flags.DEFINE_string("output_file", None, "")
+
+flags.DEFINE_string("layers", "-1,-2,-3,-4", "")
+
+flags.DEFINE_string(
+    "bert_config_file", None,
+    "The config json file corresponding to the pre-trained BERT model. "
+    "This specifies the model architecture.")
+
+flags.DEFINE_integer(
+    "max_seq_length", 128,
+    "The maximum total input sequence length after WordPiece tokenization. "
+    "Sequences longer than this will be truncated, and sequences shorter "
+    "than this will be padded.")
+
+flags.DEFINE_string(
+    "init_checkpoint", None,
+    "Initial checkpoint (usually from a pre-trained BERT model).")
+
+flags.DEFINE_string("vocab_file", None,
+                    "The vocabulary file that the BERT model was trained on.")
+
+flags.DEFINE_bool(
+    "do_lower_case", True,
+    "Whether to lower case the input text. Should be True for uncased "
+    "models and False for cased models.")
+
+flags.DEFINE_integer("batch_size", 32, "Batch size for predictions.")
+
+flags.DEFINE_bool("use_tpu", False, "Whether to use TPU or GPU/CPU.")
+
+flags.DEFINE_string("master", None,
+                    "If using a TPU, the address of the master.")
+
+flags.DEFINE_integer(
+    "num_tpu_cores", 8,
+    "Only used if `use_tpu` is True. Total number of TPU cores to use.")
+
+flags.DEFINE_bool(
+    "use_one_hot_embeddings", False,
+    "If True, tf.one_hot will be used for embedding lookups, otherwise "
+    "tf.nn.embedding_lookup will be used. On TPUs, this should be True "
+    "since it is much faster.")
+
+
+class InputExample(object):
+
+  def __init__(self, unique_id, text_a, text_b):
+    self.unique_id = unique_id
+    self.text_a = text_a
+    self.text_b = text_b
+
+
+class InputFeatures(object):
+  """A single set of features of data."""
+
+  def __init__(self, unique_id, tokens, input_ids, input_mask, input_type_ids):
+    self.unique_id = unique_id
+    self.tokens = tokens
+    self.input_ids = input_ids
+    self.input_mask = input_mask
+    self.input_type_ids = input_type_ids
+
+
+def input_fn_builder(features, seq_length):
+  """Creates an `input_fn` closure to be passed to TPUEstimator."""
+
+  all_unique_ids = []
+  all_input_ids = []
+  all_input_mask = []
+  all_input_type_ids = []
+
+  for feature in features:
+    all_unique_ids.append(feature.unique_id)
+    all_input_ids.append(feature.input_ids)
+    all_input_mask.append(feature.input_mask)
+    all_input_type_ids.append(feature.input_type_ids)
+
+  def input_fn(params):
+    """The actual input function."""
+    batch_size = params["batch_size"]
+
+    num_examples = len(features)
+
+    # This is for demo purposes and does NOT scale to large data sets. We do
+    # not use Dataset.from_generator() because that uses tf.py_func which is
+    # not TPU compatible. The right way to load data is with TFRecordReader.
+    d = tf.data.Dataset.from_tensor_slices({
+        "unique_ids":
+            tf.constant(all_unique_ids, shape=[num_examples], dtype=tf.int32),
+        "input_ids":
+            tf.constant(
+                all_input_ids, shape=[num_examples, seq_length],
+                dtype=tf.int32),
+        "input_mask":
+            tf.constant(
+                all_input_mask,
+                shape=[num_examples, seq_length],
+                dtype=tf.int32),
+        "input_type_ids":
+            tf.constant(
+                all_input_type_ids,
+                shape=[num_examples, seq_length],
+                dtype=tf.int32),
+    })
+
+    d = d.batch(batch_size=batch_size, drop_remainder=True)
+    return d
+
+  return input_fn
+
+
+def model_fn_builder(bert_config, init_checkpoint, layer_indexes, use_tpu,
+                     use_one_hot_embeddings):
+  """Returns `model_fn` closure for TPUEstimator."""
+
+  def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
+    """The `model_fn` for TPUEstimator."""
+
+    unique_ids = features["unique_ids"]
+    input_ids = features["input_ids"]
+    input_mask = features["input_mask"]
+    input_type_ids = features["input_type_ids"]
+
+    model = modeling.BertModel(
+        config=bert_config,
+        is_training=False,
+        input_ids=input_ids,
+        input_mask=input_mask,
+        token_type_ids=input_type_ids,
+        use_one_hot_embeddings=use_one_hot_embeddings)
+
+    if mode != tf.estimator.ModeKeys.PREDICT:
+      raise ValueError("Only PREDICT modes are supported: %s" % (mode))
+
+    tvars = tf.trainable_variables()
+    scaffold_fn = None
+    (assignment_map,
+     initialized_variable_names) = modeling.get_assignment_map_from_checkpoint(
+         tvars, init_checkpoint)
+    if use_tpu:
+
+      def tpu_scaffold():
+        tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
+        return tf.train.Scaffold()
+
+      scaffold_fn = tpu_scaffold
+    else:
+      tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
+
+    tf.logging.info("**** Trainable Variables ****")
+    for var in tvars:
+      init_string = ""
+      if var.name in initialized_variable_names:
+        init_string = ", *INIT_FROM_CKPT*"
+      tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
+                      init_string)
+
+    all_layers = model.get_all_encoder_layers()
+
+    predictions = {
+        "unique_id": unique_ids,
+    }
+
+    for (i, layer_index) in enumerate(layer_indexes):
+      predictions["layer_output_%d" % i] = all_layers[layer_index]
+
+    output_spec = tf.contrib.tpu.TPUEstimatorSpec(
+        mode=mode, predictions=predictions, scaffold_fn=scaffold_fn)
+    return output_spec
+
+  return model_fn
+
+
+def convert_examples_to_features(examples, seq_length, tokenizer):
+  """Loads a data file into a list of `InputBatch`s."""
+
+  features = []
+  for (ex_index, example) in enumerate(examples):
+    tokens_a = tokenizer.tokenize(example.text_a)
+
+    tokens_b = None
+    if example.text_b:
+      tokens_b = tokenizer.tokenize(example.text_b)
+
+    if tokens_b:
+      # Modifies `tokens_a` and `tokens_b` in place so that the total
+      # length is less than the specified length.
+      # Account for [CLS], [SEP], [SEP] with "- 3"
+      _truncate_seq_pair(tokens_a, tokens_b, seq_length - 3)
+    else:
+      # Account for [CLS] and [SEP] with "- 2"
+      if len(tokens_a) > seq_length - 2:
+        tokens_a = tokens_a[0:(seq_length - 2)]
+
+    # The convention in BERT is:
+    # (a) For sequence pairs:
+    #  tokens:   [CLS] is this jack ##son ##ville ? [SEP] no it is not . [SEP]
+    #  type_ids: 0     0  0    0    0     0       0 0     1  1  1  1   1 1
+    # (b) For single sequences:
+    #  tokens:   [CLS] the dog is hairy . [SEP]
+    #  type_ids: 0     0   0   0  0     0 0
+    #
+    # Where "type_ids" are used to indicate whether this is the first
+    # sequence or the second sequence. The embedding vectors for `type=0` and
+    # `type=1` were learned during pre-training and are added to the wordpiece
+    # embedding vector (and position vector). This is not *strictly* necessary
+    # since the [SEP] token unambiguously separates the sequences, but it makes
+    # it easier for the model to learn the concept of sequences.
+    #
+    # For classification tasks, the first vector (corresponding to [CLS]) is
+    # used as as the "sentence vector". Note that this only makes sense because
+    # the entire model is fine-tuned.
+    tokens = []
+    input_type_ids = []
+    tokens.append("[CLS]")
+    input_type_ids.append(0)
+    for token in tokens_a:
+      tokens.append(token)
+      input_type_ids.append(0)
+    tokens.append("[SEP]")
+    input_type_ids.append(0)
+
+    if tokens_b:
+      for token in tokens_b:
+        tokens.append(token)
+        input_type_ids.append(1)
+      tokens.append("[SEP]")
+      input_type_ids.append(1)
+
+    input_ids = tokenizer.convert_tokens_to_ids(tokens)
+
+    # The mask has 1 for real tokens and 0 for padding tokens. Only real
+    # tokens are attended to.
+    input_mask = [1] * len(input_ids)
+
+    # Zero-pad up to the sequence length.
+    while len(input_ids) < seq_length:
+      input_ids.append(0)
+      input_mask.append(0)
+      input_type_ids.append(0)
+
+    assert len(input_ids) == seq_length
+    assert len(input_mask) == seq_length
+    assert len(input_type_ids) == seq_length
+
+    if ex_index < 5:
+      tf.logging.info("*** Example ***")
+      tf.logging.info("unique_id: %s" % (example.unique_id))
+      tf.logging.info("tokens: %s" % " ".join(
+          [tokenization.printable_text(x) for x in tokens]))
+      tf.logging.info("input_ids: %s" % " ".join([str(x) for x in input_ids]))
+      tf.logging.info("input_mask: %s" % " ".join([str(x) for x in input_mask]))
+      tf.logging.info(
+          "input_type_ids: %s" % " ".join([str(x) for x in input_type_ids]))
+
+    features.append(
+        InputFeatures(
+            unique_id=example.unique_id,
+            tokens=tokens,
+            input_ids=input_ids,
+            input_mask=input_mask,
+            input_type_ids=input_type_ids))
+  return features
+
+
+def _truncate_seq_pair(tokens_a, tokens_b, max_length):
+  """Truncates a sequence pair in place to the maximum length."""
+
+  # This is a simple heuristic which will always truncate the longer sequence
+  # one token at a time. This makes more sense than truncating an equal percent
+  # of tokens from each, since if one sequence is very short then each token
+  # that's truncated likely contains more information than a longer sequence.
+  while True:
+    total_length = len(tokens_a) + len(tokens_b)
+    if total_length <= max_length:
+      break
+    if len(tokens_a) > len(tokens_b):
+      tokens_a.pop()
+    else:
+      tokens_b.pop()
+
+
+def read_examples(input_file):
+  """Read a list of `InputExample`s from an input file."""
+  examples = []
+  unique_id = 0
+  with tf.gfile.GFile(input_file, "r") as reader:
+    while True:
+      line = tokenization.convert_to_unicode(reader.readline())
+      if not line:
+        break
+      line = line.strip()
+      text_a = None
+      text_b = None
+      m = re.match(r"^(.*) \|\|\| (.*)$", line)
+      if m is None:
+        text_a = line
+      else:
+        text_a = m.group(1)
+        text_b = m.group(2)
+      examples.append(
+          InputExample(unique_id=unique_id, text_a=text_a, text_b=text_b))
+      unique_id += 1
+  return examples
+
+
+def main(_):
+  tf.logging.set_verbosity(tf.logging.INFO)
+
+  layer_indexes = [int(x) for x in FLAGS.layers.split(",")]
+
+  bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file)
+
+  tokenizer = tokenization.FullTokenizer(
+      vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case)
+
+  is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2
+  run_config = tf.contrib.tpu.RunConfig(
+      master=FLAGS.master,
+      tpu_config=tf.contrib.tpu.TPUConfig(
+          num_shards=FLAGS.num_tpu_cores,
+          per_host_input_for_training=is_per_host))
+
+  examples = read_examples(FLAGS.input_file)
+
+  features = convert_examples_to_features(
+      examples=examples, seq_length=FLAGS.max_seq_length, tokenizer=tokenizer)
+
+  unique_id_to_feature = {}
+  for feature in features:
+    unique_id_to_feature[feature.unique_id] = feature
+
+  model_fn = model_fn_builder(
+      bert_config=bert_config,
+      init_checkpoint=FLAGS.init_checkpoint,
+      layer_indexes=layer_indexes,
+      use_tpu=FLAGS.use_tpu,
+      use_one_hot_embeddings=FLAGS.use_one_hot_embeddings)
+
+  # If TPU is not available, this will fall back to normal Estimator on CPU
+  # or GPU.
+  estimator = tf.contrib.tpu.TPUEstimator(
+      use_tpu=FLAGS.use_tpu,
+      model_fn=model_fn,
+      config=run_config,
+      predict_batch_size=FLAGS.batch_size)
+
+  input_fn = input_fn_builder(
+      features=features, seq_length=FLAGS.max_seq_length)
+
+  with codecs.getwriter("utf-8")(tf.gfile.Open(FLAGS.output_file,
+                                               "w")) as writer:
+    for result in estimator.predict(input_fn, yield_single_examples=True):
+      unique_id = int(result["unique_id"])
+      feature = unique_id_to_feature[unique_id]
+      output_json = collections.OrderedDict()
+      output_json["linex_index"] = unique_id
+      all_features = []
+      for (i, token) in enumerate(feature.tokens):
+        all_layers = []
+        for (j, layer_index) in enumerate(layer_indexes):
+          layer_output = result["layer_output_%d" % j]
+          layers = collections.OrderedDict()
+          layers["index"] = layer_index
+          layers["values"] = [
+              round(float(x), 6) for x in layer_output[i:(i + 1)].flat
+          ]
+          all_layers.append(layers)
+        features = collections.OrderedDict()
+        features["token"] = token
+        features["layers"] = all_layers
+        all_features.append(features)
+      output_json["features"] = all_features
+      writer.write(json.dumps(output_json) + "\n")
+
+
+if __name__ == "__main__":
+  flags.mark_flag_as_required("input_file")
+  flags.mark_flag_as_required("vocab_file")
+  flags.mark_flag_as_required("bert_config_file")
+  flags.mark_flag_as_required("init_checkpoint")
+  flags.mark_flag_as_required("output_file")
+  tf.app.run()
diff --git a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/gpu_environment.py b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/gpu_environment.py
new file mode 100644
index 000000000..948c3fa44
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/gpu_environment.py
@@ -0,0 +1,36 @@
+# coding=utf-8
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import tensorflow as tf
+import numpy as np
+
+def float32_variable_storage_getter(getter, name, shape=None, dtype=None,
+                                    initializer=None, regularizer=None,
+                                    trainable=True,
+                                    *args, **kwargs):
+    """Custom variable getter that forces trainable variables to be stored in
+       float32 precision and then casts them to the training precision.
+    """
+    storage_dtype = tf.float32 if trainable else dtype
+    variable = getter(name, shape, dtype=storage_dtype,
+                      initializer=initializer, regularizer=regularizer,
+                      trainable=trainable,
+                      *args, **kwargs)
+    if trainable and dtype != tf.float32:
+        variable = tf.cast(variable, dtype)
+    return variable
+
+def get_custom_getter(compute_type):
+    return float32_variable_storage_getter if compute_type == tf.float16 else None
diff --git a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/modeling.py b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/modeling.py
new file mode 100644
index 000000000..c99f2d611
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/modeling.py
@@ -0,0 +1,988 @@
+# coding=utf-8
+# Copyright 2018 The Google AI Language Team Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""The main BERT model and related functions."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import copy
+import json
+import math
+import re
+import numpy as np
+import six
+import tensorflow as tf
+from npu_bridge.estimator.npu_unary_ops import npu_unary_ops
+from npu_bridge.estimator import npu_ops
+
+class BertConfig(object):
+  """Configuration for `BertModel`."""
+
+  def __init__(self,
+               vocab_size,
+               hidden_size=768,
+               num_hidden_layers=12,
+               num_attention_heads=12,
+               intermediate_size=3072,
+               hidden_act="gelu",
+               hidden_dropout_prob=0.1,
+               attention_probs_dropout_prob=0.1,
+               max_position_embeddings=512,
+               type_vocab_size=16,
+               initializer_range=0.02):
+    """Constructs BertConfig.
+
+    Args:
+      vocab_size: Vocabulary size of `inputs_ids` in `BertModel`.
+      hidden_size: Size of the encoder layers and the pooler layer.
+      num_hidden_layers: Number of hidden layers in the Transformer encoder.
+      num_attention_heads: Number of attention heads for each attention layer in
+        the Transformer encoder.
+      intermediate_size: The size of the "intermediate" (i.e., feed-forward)
+        layer in the Transformer encoder.
+      hidden_act: The non-linear activation function (function or string) in the
+        encoder and pooler.
+      hidden_dropout_prob: The dropout probability for all fully connected
+        layers in the embeddings, encoder, and pooler.
+      attention_probs_dropout_prob: The dropout ratio for the attention
+        probabilities.
+      max_position_embeddings: The maximum sequence length that this model might
+        ever be used with. Typically set this to something large just in case
+        (e.g., 512 or 1024 or 2048).
+      type_vocab_size: The vocabulary size of the `token_type_ids` passed into
+        `BertModel`.
+      initializer_range: The stdev of the truncated_normal_initializer for
+        initializing all weight matrices.
+    """
+    self.vocab_size = vocab_size
+    self.hidden_size = hidden_size
+    self.num_hidden_layers = num_hidden_layers
+    self.num_attention_heads = num_attention_heads
+    self.hidden_act = hidden_act
+    self.intermediate_size = intermediate_size
+    self.hidden_dropout_prob = hidden_dropout_prob
+    self.attention_probs_dropout_prob = attention_probs_dropout_prob
+    self.max_position_embeddings = max_position_embeddings
+    self.type_vocab_size = type_vocab_size
+    self.initializer_range = initializer_range
+
+  @classmethod
+  def from_dict(cls, json_object):
+    """Constructs a `BertConfig` from a Python dictionary of parameters."""
+    config = BertConfig(vocab_size=None)
+    for (key, value) in six.iteritems(json_object):
+      config.__dict__[key] = value
+    return config
+
+  @classmethod
+  def from_json_file(cls, json_file):
+    """Constructs a `BertConfig` from a json file of parameters."""
+    with tf.gfile.GFile(json_file, "r") as reader:
+      text = reader.read()
+    return cls.from_dict(json.loads(text))
+
+  def to_dict(self):
+    """Serializes this instance to a Python dictionary."""
+    output = copy.deepcopy(self.__dict__)
+    return output
+
+  def to_json_string(self):
+    """Serializes this instance to a JSON string."""
+    return json.dumps(self.to_dict(), indent=2, sort_keys=True) + "\n"
+
+
+class BertModel(object):
+  """BERT model ("Bidirectional Encoder Representations from Transformers").
+
+  Example usage:
+
+  ```python
+  # Already been converted into WordPiece token ids
+  input_ids = tf.constant([[31, 51, 99], [15, 5, 0]])
+  input_mask = tf.constant([[1, 1, 1], [1, 1, 0]])
+  token_type_ids = tf.constant([[0, 0, 1], [0, 2, 0]])
+
+  config = modeling.BertConfig(vocab_size=32000, hidden_size=512,
+    num_hidden_layers=8, num_attention_heads=6, intermediate_size=1024)
+
+  model = modeling.BertModel(config=config, is_training=True,
+    input_ids=input_ids, input_mask=input_mask, token_type_ids=token_type_ids)
+
+  label_embeddings = tf.get_variable(...)
+  pooled_output = model.get_pooled_output()
+  logits = tf.matmul(pooled_output, label_embeddings)
+  ...
+  ```
+  """
+
+  def __init__(self,
+               config,
+               is_training,
+               input_ids,
+               input_mask=None,
+               token_type_ids=None,
+               use_one_hot_embeddings=False,
+               scope=None):
+    """Constructor for BertModel.
+
+    Args:
+      config: `BertConfig` instance.
+      is_training: bool. true for training model, false for eval model. Controls
+        whether dropout will be applied.
+      input_ids: int32 Tensor of shape [batch_size, seq_length].
+      input_mask: (optional) int32 Tensor of shape [batch_size, seq_length].
+      token_type_ids: (optional) int32 Tensor of shape [batch_size, seq_length].
+      use_one_hot_embeddings: (optional) bool. Whether to use one-hot word
+        embeddings or tf.embedding_lookup() for the word embeddings.
+      scope: (optional) variable scope. Defaults to "bert".
+
+    Raises:
+      ValueError: The config is invalid or one of the input tensor shapes
+        is invalid.
+    """
+    config = copy.deepcopy(config)
+    if not is_training:
+      config.hidden_dropout_prob = 0.0
+      config.attention_probs_dropout_prob = 0.0
+
+    input_shape = get_shape_list(input_ids, expected_rank=2)
+    batch_size = input_shape[0]
+    seq_length = input_shape[1]
+
+    if input_mask is None:
+      input_mask = tf.ones(shape=[batch_size, seq_length], dtype=tf.int32)
+
+    if token_type_ids is None:
+      token_type_ids = tf.zeros(shape=[batch_size, seq_length], dtype=tf.int32)
+
+    with tf.variable_scope(scope, default_name="bert"):
+      with tf.variable_scope("embeddings"):
+        # Perform embedding lookup on the word ids.
+        (self.embedding_output, self.embedding_table) = embedding_lookup(
+            input_ids=input_ids,
+            vocab_size=config.vocab_size,
+            embedding_size=config.hidden_size,
+            initializer_range=config.initializer_range,
+            word_embedding_name="word_embeddings",
+            use_one_hot_embeddings=use_one_hot_embeddings)
+
+        # Add positional embeddings and token type embeddings, then layer
+        # normalize and perform dropout.
+        self.embedding_output = embedding_postprocessor(
+            input_tensor=self.embedding_output,
+            use_token_type=True,
+            token_type_ids=token_type_ids,
+            token_type_vocab_size=config.type_vocab_size,
+            token_type_embedding_name="token_type_embeddings",
+            use_position_embeddings=True,
+            position_embedding_name="position_embeddings",
+            initializer_range=config.initializer_range,
+            max_position_embeddings=config.max_position_embeddings,
+            dropout_prob=config.hidden_dropout_prob)
+
+      with tf.variable_scope("encoder"):
+        # This converts a 2D mask of shape [batch_size, seq_length] to a 3D
+        # mask of shape [batch_size, seq_length, seq_length] which is used
+        # for the attention scores.
+        attention_mask = create_attention_mask_from_input_mask(
+            input_ids, input_mask)
+
+        # Run the stacked transformer.
+        # `sequence_output` shape = [batch_size, seq_length, hidden_size].
+        self.all_encoder_layers = transformer_model(
+            input_tensor=self.embedding_output,
+            attention_mask=attention_mask,
+            hidden_size=config.hidden_size,
+            num_hidden_layers=config.num_hidden_layers,
+            num_attention_heads=config.num_attention_heads,
+            intermediate_size=config.intermediate_size,
+            intermediate_act_fn=get_activation(config.hidden_act),
+            hidden_dropout_prob=config.hidden_dropout_prob,
+            attention_probs_dropout_prob=config.attention_probs_dropout_prob,
+            initializer_range=config.initializer_range,
+            do_return_all_layers=True)
+
+      self.sequence_output = self.all_encoder_layers[-1]
+      # The "pooler" converts the encoded sequence tensor of shape
+      # [batch_size, seq_length, hidden_size] to a tensor of shape
+      # [batch_size, hidden_size]. This is necessary for segment-level
+      # (or segment-pair-level) classification tasks where we need a fixed
+      # dimensional representation of the segment.
+      with tf.variable_scope("pooler"):
+        # We "pool" the model by simply taking the hidden state corresponding
+        # to the first token. We assume that this has been pre-trained
+        first_token_tensor = tf.squeeze(self.sequence_output[:, 0:1, :], axis=1)
+        self.pooled_output = tf.layers.dense(
+            first_token_tensor,
+            config.hidden_size,
+            activation=tf.tanh,
+            kernel_initializer=create_initializer(config.initializer_range))
+
+  def get_pooled_output(self):
+    return self.pooled_output
+
+  def get_sequence_output(self):
+    """Gets final hidden layer of encoder.
+
+    Returns:
+      float Tensor of shape [batch_size, seq_length, hidden_size] corresponding
+      to the final hidden of the transformer encoder.
+    """
+    return self.sequence_output
+
+  def get_all_encoder_layers(self):
+    return self.all_encoder_layers
+
+  def get_embedding_output(self):
+    """Gets output of the embedding lookup (i.e., input to the transformer).
+
+    Returns:
+      float Tensor of shape [batch_size, seq_length, hidden_size] corresponding
+      to the output of the embedding layer, after summing the word
+      embeddings with the positional embeddings and the token type embeddings,
+      then performing layer normalization. This is the input to the transformer.
+    """
+    return self.embedding_output
+
+  def get_embedding_table(self):
+    return self.embedding_table
+
+
+def gelu(x):
+  """Gaussian Error Linear Unit.
+
+  This is a smoother version of the RELU.
+  Original paper: https://arxiv.org/abs/1606.08415
+  Args:
+    x: float Tensor to perform activation.
+
+  Returns:
+    `x` with the GELU activation applied.
+  """
+  #cdf = 0.5 * (1.0 + tf.tanh(
+  #    (np.sqrt(2 / np.pi) * (x + 0.044715 * tf.pow(x, 3)))))
+  #return x * cdf
+  return npu_unary_ops.gelu(x)
+
+def get_activation(activation_string):
+  """Maps a string to a Python function, e.g., "relu" => `tf.nn.relu`.
+
+  Args:
+    activation_string: String name of the activation function.
+
+  Returns:
+    A Python function corresponding to the activation function. If
+    `activation_string` is None, empty, or "linear", this will return None.
+    If `activation_string` is not a string, it will return `activation_string`.
+
+  Raises:
+    ValueError: The `activation_string` does not correspond to a known
+      activation.
+  """
+
+  # We assume that anything that"s not a string is already an activation
+  # function, so we just return it.
+  if not isinstance(activation_string, six.string_types):
+    return activation_string
+
+  if not activation_string:
+    return None
+
+  act = activation_string.lower()
+  if act == "linear":
+    return None
+  elif act == "relu":
+    return tf.nn.relu
+  elif act == "gelu":
+    return gelu
+  elif act == "tanh":
+    return tf.tanh
+  else:
+    raise ValueError("Unsupported activation: %s" % act)
+
+
+def get_assignment_map_from_checkpoint(tvars, init_checkpoint):
+  """Compute the union of the current variables and checkpoint variables."""
+  assignment_map = {}
+  initialized_variable_names = {}
+
+  name_to_variable = collections.OrderedDict()
+  for var in tvars:
+    name = var.name
+    m = re.match("^(.*):\\d+$", name)
+    if m is not None:
+      name = m.group(1)
+    name_to_variable[name] = var
+
+  init_vars = tf.train.list_variables(init_checkpoint)
+
+  assignment_map = collections.OrderedDict()
+  for x in init_vars:
+    (name, var) = (x[0], x[1])
+    if name not in name_to_variable:
+      continue
+    assignment_map[name] = name
+    initialized_variable_names[name] = 1
+    initialized_variable_names[name + ":0"] = 1
+
+  return (assignment_map, initialized_variable_names)
+
+
+def dropout(input_tensor, dropout_prob):
+  """Perform dropout.
+
+  Args:
+    input_tensor: float Tensor.
+    dropout_prob: Python float. The probability of dropping out a value (NOT of
+      *keeping* a dimension as in `tf.nn.dropout`).
+
+  Returns:
+    A version of `input_tensor` with dropout applied.
+  """
+  if dropout_prob is None or dropout_prob == 0.0:
+    return input_tensor
+
+  #output = tf.nn.dropout(input_tensor, 1.0 - dropout_prob)
+  output = npu_ops.dropout(input_tensor, 1.0 - dropout_prob)
+  return output
+
+
+def layer_norm(input_tensor, name=None):
+  """Run layer normalization on the last dimension of the tensor."""
+  return tf.contrib.layers.layer_norm(
+      inputs=input_tensor, begin_norm_axis=-1, begin_params_axis=-1, scope=name)
+
+
+def layer_norm_and_dropout(input_tensor, dropout_prob, name=None):
+  """Runs layer normalization followed by dropout."""
+  output_tensor = layer_norm(input_tensor, name)
+  output_tensor = dropout(output_tensor, dropout_prob)
+  return output_tensor
+
+
+def create_initializer(initializer_range=0.02):
+  """Creates a `truncated_normal_initializer` with the given range."""
+  return tf.truncated_normal_initializer(stddev=initializer_range)
+
+
+def embedding_lookup(input_ids,
+                     vocab_size,
+                     embedding_size=128,
+                     initializer_range=0.02,
+                     word_embedding_name="word_embeddings",
+                     use_one_hot_embeddings=False):
+  """Looks up words embeddings for id tensor.
+
+  Args:
+    input_ids: int32 Tensor of shape [batch_size, seq_length] containing word
+      ids.
+    vocab_size: int. Size of the embedding vocabulary.
+    embedding_size: int. Width of the word embeddings.
+    initializer_range: float. Embedding initialization range.
+    word_embedding_name: string. Name of the embedding table.
+    use_one_hot_embeddings: bool. If True, use one-hot method for word
+      embeddings. If False, use `tf.gather()`.
+
+  Returns:
+    float Tensor of shape [batch_size, seq_length, embedding_size].
+  """
+  # This function assumes that the input is of shape [batch_size, seq_length,
+  # num_inputs].
+  #
+  # If the input is a 2D tensor of shape [batch_size, seq_length], we
+  # reshape to [batch_size, seq_length, 1].
+  if input_ids.shape.ndims == 2:
+    input_ids = tf.expand_dims(input_ids, axis=[-1])
+
+  embedding_table = tf.get_variable(
+      name=word_embedding_name,
+      shape=[vocab_size, embedding_size],
+      initializer=create_initializer(initializer_range))
+
+  flat_input_ids = tf.reshape(input_ids, [-1])
+  if use_one_hot_embeddings:
+    one_hot_input_ids = tf.one_hot(flat_input_ids, depth=vocab_size)
+    output = tf.matmul(one_hot_input_ids, embedding_table)
+  else:
+    output = tf.gather(embedding_table, flat_input_ids)
+
+  input_shape = get_shape_list(input_ids)
+
+  output = tf.reshape(output,
+                      input_shape[0:-1] + [input_shape[-1] * embedding_size])
+  return (output, embedding_table)
+
+
+def embedding_postprocessor(input_tensor,
+                            use_token_type=False,
+                            token_type_ids=None,
+                            token_type_vocab_size=16,
+                            token_type_embedding_name="token_type_embeddings",
+                            use_position_embeddings=True,
+                            position_embedding_name="position_embeddings",
+                            initializer_range=0.02,
+                            max_position_embeddings=512,
+                            dropout_prob=0.1):
+  """Performs various post-processing on a word embedding tensor.
+
+  Args:
+    input_tensor: float Tensor of shape [batch_size, seq_length,
+      embedding_size].
+    use_token_type: bool. Whether to add embeddings for `token_type_ids`.
+    token_type_ids: (optional) int32 Tensor of shape [batch_size, seq_length].
+      Must be specified if `use_token_type` is True.
+    token_type_vocab_size: int. The vocabulary size of `token_type_ids`.
+    token_type_embedding_name: string. The name of the embedding table variable
+      for token type ids.
+    use_position_embeddings: bool. Whether to add position embeddings for the
+      position of each token in the sequence.
+    position_embedding_name: string. The name of the embedding table variable
+      for positional embeddings.
+    initializer_range: float. Range of the weight initialization.
+    max_position_embeddings: int. Maximum sequence length that might ever be
+      used with this model. This can be longer than the sequence length of
+      input_tensor, but cannot be shorter.
+    dropout_prob: float. Dropout probability applied to the final output tensor.
+
+  Returns:
+    float tensor with same shape as `input_tensor`.
+
+  Raises:
+    ValueError: One of the tensor shapes or input values is invalid.
+  """
+  input_shape = get_shape_list(input_tensor, expected_rank=3)
+  batch_size = input_shape[0]
+  seq_length = input_shape[1]
+  width = input_shape[2]
+
+  output = input_tensor
+
+  if use_token_type:
+    if token_type_ids is None:
+      raise ValueError("`token_type_ids` must be specified if"
+                       "`use_token_type` is True.")
+    token_type_table = tf.get_variable(
+        name=token_type_embedding_name,
+        shape=[token_type_vocab_size, width],
+        initializer=create_initializer(initializer_range))
+    # This vocab will be small so we always do one-hot here, since it is always
+    # faster for a small vocabulary.
+    flat_token_type_ids = tf.reshape(token_type_ids, [-1])
+    one_hot_ids = tf.one_hot(flat_token_type_ids, depth=token_type_vocab_size)
+    token_type_embeddings = tf.matmul(one_hot_ids, token_type_table)
+    token_type_embeddings = tf.reshape(token_type_embeddings,
+                                       [batch_size, seq_length, width])
+    output += token_type_embeddings
+
+  if use_position_embeddings:
+    assert_op = tf.assert_less_equal(seq_length, max_position_embeddings)
+    with tf.control_dependencies([assert_op]):
+      full_position_embeddings = tf.get_variable(
+          name=position_embedding_name,
+          shape=[max_position_embeddings, width],
+          initializer=create_initializer(initializer_range))
+      # Since the position embedding table is a learned variable, we create it
+      # using a (long) sequence length `max_position_embeddings`. The actual
+      # sequence length might be shorter than this, for faster training of
+      # tasks that do not have long sequences.
+      #
+      # So `full_position_embeddings` is effectively an embedding table
+      # for position [0, 1, 2, ..., max_position_embeddings-1], and the current
+      # sequence has positions [0, 1, 2, ... seq_length-1], so we can just
+      # perform a slice.
+      position_embeddings = tf.slice(full_position_embeddings, [0, 0],
+                                     [seq_length, -1])
+      num_dims = len(output.shape.as_list())
+
+      # Only the last two dimensions are relevant (`seq_length` and `width`), so
+      # we broadcast among the first dimensions, which is typically just
+      # the batch size.
+      position_broadcast_shape = []
+      for _ in range(num_dims - 2):
+        position_broadcast_shape.append(1)
+      position_broadcast_shape.extend([seq_length, width])
+      position_embeddings = tf.reshape(position_embeddings,
+                                       position_broadcast_shape)
+      output += position_embeddings
+
+  output = layer_norm_and_dropout(output, dropout_prob)
+  return output
+
+
+def create_attention_mask_from_input_mask(from_tensor, to_mask):
+  """Create 3D attention mask from a 2D tensor mask.
+
+  Args:
+    from_tensor: 2D or 3D Tensor of shape [batch_size, from_seq_length, ...].
+    to_mask: int32 Tensor of shape [batch_size, to_seq_length].
+
+  Returns:
+    float Tensor of shape [batch_size, from_seq_length, to_seq_length].
+  """
+  from_shape = get_shape_list(from_tensor, expected_rank=[2, 3])
+  batch_size = from_shape[0]
+  from_seq_length = from_shape[1]
+
+  to_shape = get_shape_list(to_mask, expected_rank=2)
+  to_seq_length = to_shape[1]
+
+  to_mask = tf.cast(
+      tf.reshape(to_mask, [batch_size, 1, to_seq_length]), tf.float32)
+
+  # We don't assume that `from_tensor` is a mask (although it could be). We
+  # don't actually care if we attend *from* padding tokens (only *to* padding)
+  # tokens so we create a tensor of all ones.
+  #
+  # `broadcast_ones` = [batch_size, from_seq_length, 1]
+  broadcast_ones = tf.ones(
+      shape=[batch_size, from_seq_length, 1], dtype=tf.float32)
+
+  # Here we broadcast along two dimensions to create the mask.
+  mask = broadcast_ones * to_mask
+
+  return mask
+
+
+def attention_layer(from_tensor,
+                    to_tensor,
+                    attention_mask=None,
+                    num_attention_heads=1,
+                    size_per_head=512,
+                    query_act=None,
+                    key_act=None,
+                    value_act=None,
+                    attention_probs_dropout_prob=0.0,
+                    initializer_range=0.02,
+                    do_return_2d_tensor=False,
+                    batch_size=None,
+                    from_seq_length=None,
+                    to_seq_length=None):
+  """Performs multi-headed attention from `from_tensor` to `to_tensor`.
+
+  This is an implementation of multi-headed attention based on "Attention
+  is all you Need". If `from_tensor` and `to_tensor` are the same, then
+  this is self-attention. Each timestep in `from_tensor` attends to the
+  corresponding sequence in `to_tensor`, and returns a fixed-with vector.
+
+  This function first projects `from_tensor` into a "query" tensor and
+  `to_tensor` into "key" and "value" tensors. These are (effectively) a list
+  of tensors of length `num_attention_heads`, where each tensor is of shape
+  [batch_size, seq_length, size_per_head].
+
+  Then, the query and key tensors are dot-producted and scaled. These are
+  softmaxed to obtain attention probabilities. The value tensors are then
+  interpolated by these probabilities, then concatenated back to a single
+  tensor and returned.
+
+  In practice, the multi-headed attention are done with transposes and
+  reshapes rather than actual separate tensors.
+
+  Args:
+    from_tensor: float Tensor of shape [batch_size, from_seq_length,
+      from_width].
+    to_tensor: float Tensor of shape [batch_size, to_seq_length, to_width].
+    attention_mask: (optional) int32 Tensor of shape [batch_size,
+      from_seq_length, to_seq_length]. The values should be 1 or 0. The
+      attention scores will effectively be set to -infinity for any positions in
+      the mask that are 0, and will be unchanged for positions that are 1.
+    num_attention_heads: int. Number of attention heads.
+    size_per_head: int. Size of each attention head.
+    query_act: (optional) Activation function for the query transform.
+    key_act: (optional) Activation function for the key transform.
+    value_act: (optional) Activation function for the value transform.
+    attention_probs_dropout_prob: (optional) float. Dropout probability of the
+      attention probabilities.
+    initializer_range: float. Range of the weight initializer.
+    do_return_2d_tensor: bool. If True, the output will be of shape [batch_size
+      * from_seq_length, num_attention_heads * size_per_head]. If False, the
+      output will be of shape [batch_size, from_seq_length, num_attention_heads
+      * size_per_head].
+    batch_size: (Optional) int. If the input is 2D, this might be the batch size
+      of the 3D version of the `from_tensor` and `to_tensor`.
+    from_seq_length: (Optional) If the input is 2D, this might be the seq length
+      of the 3D version of the `from_tensor`.
+    to_seq_length: (Optional) If the input is 2D, this might be the seq length
+      of the 3D version of the `to_tensor`.
+
+  Returns:
+    float Tensor of shape [batch_size, from_seq_length,
+      num_attention_heads * size_per_head]. (If `do_return_2d_tensor` is
+      true, this will be of shape [batch_size * from_seq_length,
+      num_attention_heads * size_per_head]).
+
+  Raises:
+    ValueError: Any of the arguments or tensor shapes are invalid.
+  """
+
+  def transpose_for_scores(input_tensor, batch_size, num_attention_heads,
+                           seq_length, width):
+    output_tensor = tf.reshape(
+        input_tensor, [batch_size, seq_length, num_attention_heads, width])
+
+    output_tensor = tf.transpose(output_tensor, [0, 2, 1, 3])
+    return output_tensor
+
+  from_shape = get_shape_list(from_tensor, expected_rank=[2, 3])
+  to_shape = get_shape_list(to_tensor, expected_rank=[2, 3])
+
+  if len(from_shape) != len(to_shape):
+    raise ValueError(
+        "The rank of `from_tensor` must match the rank of `to_tensor`.")
+
+  if len(from_shape) == 3:
+    batch_size = from_shape[0]
+    from_seq_length = from_shape[1]
+    to_seq_length = to_shape[1]
+  elif len(from_shape) == 2:
+    if (batch_size is None or from_seq_length is None or to_seq_length is None):
+      raise ValueError(
+          "When passing in rank 2 tensors to attention_layer, the values "
+          "for `batch_size`, `from_seq_length`, and `to_seq_length` "
+          "must all be specified.")
+
+  # Scalar dimensions referenced here:
+  #   B = batch size (number of sequences)
+  #   F = `from_tensor` sequence length
+  #   T = `to_tensor` sequence length
+  #   N = `num_attention_heads`
+  #   H = `size_per_head`
+
+  from_tensor_2d = reshape_to_matrix(from_tensor)
+  to_tensor_2d = reshape_to_matrix(to_tensor)
+
+  # `query_layer` = [B*F, N*H]
+  query_layer = tf.layers.dense(
+      from_tensor_2d,
+      num_attention_heads * size_per_head,
+      activation=query_act,
+      name="query",
+      kernel_initializer=create_initializer(initializer_range))
+
+  # `key_layer` = [B*T, N*H]
+  key_layer = tf.layers.dense(
+      to_tensor_2d,
+      num_attention_heads * size_per_head,
+      activation=key_act,
+      name="key",
+      kernel_initializer=create_initializer(initializer_range))
+
+  # `value_layer` = [B*T, N*H]
+  value_layer = tf.layers.dense(
+      to_tensor_2d,
+      num_attention_heads * size_per_head,
+      activation=value_act,
+      name="value",
+      kernel_initializer=create_initializer(initializer_range))
+
+  # `query_layer` = [B, N, F, H]
+  query_layer = transpose_for_scores(query_layer, batch_size,
+                                     num_attention_heads, from_seq_length,
+                                     size_per_head)
+
+  # `key_layer` = [B, N, T, H]
+  key_layer = transpose_for_scores(key_layer, batch_size, num_attention_heads,
+                                   to_seq_length, size_per_head)
+
+  # Take the dot product between "query" and "key" to get the raw
+  # attention scores.
+  # `attention_scores` = [B, N, F, T]
+  attention_scores = tf.matmul(query_layer, key_layer, transpose_b=True)
+  attention_scores = tf.multiply(attention_scores,
+                                 1.0 / math.sqrt(float(size_per_head)))
+
+  if attention_mask is not None:
+    # `attention_mask` = [B, 1, F, T]
+    attention_mask = tf.expand_dims(attention_mask, axis=[1])
+
+    # Since attention_mask is 1.0 for positions we want to attend and 0.0 for
+    # masked positions, this operation will create a tensor which is 0.0 for
+    # positions we want to attend and -10000.0 for masked positions.
+    adder = (1.0 - tf.cast(attention_mask, tf.float32)) * -10000.0
+
+    # Since we are adding it to the raw scores before the softmax, this is
+    # effectively the same as removing these entirely.
+    attention_scores += adder
+
+  # Normalize the attention scores to probabilities.
+  # `attention_probs` = [B, N, F, T]
+  attention_probs = tf.nn.softmax(attention_scores)
+
+  # This is actually dropping out entire tokens to attend to, which might
+  # seem a bit unusual, but is taken from the original Transformer paper.
+  attention_probs = dropout(attention_probs, attention_probs_dropout_prob)
+
+  # `value_layer` = [B, T, N, H]
+  value_layer = tf.reshape(
+      value_layer,
+      [batch_size, to_seq_length, num_attention_heads, size_per_head])
+
+  # `value_layer` = [B, N, T, H]
+  value_layer = tf.transpose(value_layer, [0, 2, 1, 3])
+
+  # `context_layer` = [B, N, F, H]
+  context_layer = tf.matmul(attention_probs, value_layer)
+
+  # `context_layer` = [B, F, N, H]
+  context_layer = tf.transpose(context_layer, [0, 2, 1, 3])
+
+  if do_return_2d_tensor:
+    # `context_layer` = [B*F, N*H]
+    context_layer = tf.reshape(
+        context_layer,
+        [batch_size * from_seq_length, num_attention_heads * size_per_head])
+  else:
+    # `context_layer` = [B, F, N*H]
+    context_layer = tf.reshape(
+        context_layer,
+        [batch_size, from_seq_length, num_attention_heads * size_per_head])
+
+  return context_layer
+
+
+def transformer_model(input_tensor,
+                      attention_mask=None,
+                      hidden_size=768,
+                      num_hidden_layers=12,
+                      num_attention_heads=12,
+                      intermediate_size=3072,
+                      intermediate_act_fn=gelu,
+                      hidden_dropout_prob=0.1,
+                      attention_probs_dropout_prob=0.1,
+                      initializer_range=0.02,
+                      do_return_all_layers=False):
+  """Multi-headed, multi-layer Transformer from "Attention is All You Need".
+
+  This is almost an exact implementation of the original Transformer encoder.
+
+  See the original paper:
+  https://arxiv.org/abs/1706.03762
+
+  Also see:
+  https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/models/transformer.py
+
+  Args:
+    input_tensor: float Tensor of shape [batch_size, seq_length, hidden_size].
+    attention_mask: (optional) int32 Tensor of shape [batch_size, seq_length,
+      seq_length], with 1 for positions that can be attended to and 0 in
+      positions that should not be.
+    hidden_size: int. Hidden size of the Transformer.
+    num_hidden_layers: int. Number of layers (blocks) in the Transformer.
+    num_attention_heads: int. Number of attention heads in the Transformer.
+    intermediate_size: int. The size of the "intermediate" (a.k.a., feed
+      forward) layer.
+    intermediate_act_fn: function. The non-linear activation function to apply
+      to the output of the intermediate/feed-forward layer.
+    hidden_dropout_prob: float. Dropout probability for the hidden layers.
+    attention_probs_dropout_prob: float. Dropout probability of the attention
+      probabilities.
+    initializer_range: float. Range of the initializer (stddev of truncated
+      normal).
+    do_return_all_layers: Whether to also return all layers or just the final
+      layer.
+
+  Returns:
+    float Tensor of shape [batch_size, seq_length, hidden_size], the final
+    hidden layer of the Transformer.
+
+  Raises:
+    ValueError: A Tensor shape or parameter is invalid.
+  """
+  if hidden_size % num_attention_heads != 0:
+    raise ValueError(
+        "The hidden size (%d) is not a multiple of the number of attention "
+        "heads (%d)" % (hidden_size, num_attention_heads))
+
+  attention_head_size = int(hidden_size / num_attention_heads)
+  input_shape = get_shape_list(input_tensor, expected_rank=3)
+  batch_size = input_shape[0]
+  seq_length = input_shape[1]
+  input_width = input_shape[2]
+
+  # The Transformer performs sum residuals on all layers so the input needs
+  # to be the same as the hidden size.
+  if input_width != hidden_size:
+    raise ValueError("The width of the input tensor (%d) != hidden size (%d)" %
+                     (input_width, hidden_size))
+
+  # We keep the representation as a 2D tensor to avoid re-shaping it back and
+  # forth from a 3D tensor to a 2D tensor. Re-shapes are normally free on
+  # the GPU/CPU but may not be free on the TPU, so we want to minimize them to
+  # help the optimizer.
+  prev_output = reshape_to_matrix(input_tensor)
+
+  all_layer_outputs = []
+  for layer_idx in range(num_hidden_layers):
+    with tf.variable_scope("layer_%d" % layer_idx):
+      layer_input = prev_output
+
+      with tf.variable_scope("attention"):
+        attention_heads = []
+        with tf.variable_scope("self"):
+          attention_head = attention_layer(
+              from_tensor=layer_input,
+              to_tensor=layer_input,
+              attention_mask=attention_mask,
+              num_attention_heads=num_attention_heads,
+              size_per_head=attention_head_size,
+              attention_probs_dropout_prob=attention_probs_dropout_prob,
+              initializer_range=initializer_range,
+              do_return_2d_tensor=True,
+              batch_size=batch_size,
+              from_seq_length=seq_length,
+              to_seq_length=seq_length)
+          attention_heads.append(attention_head)
+
+        attention_output = None
+        if len(attention_heads) == 1:
+          attention_output = attention_heads[0]
+        else:
+          # In the case where we have other sequences, we just concatenate
+          # them to the self-attention head before the projection.
+          attention_output = tf.concat(attention_heads, axis=-1)
+
+        # Run a linear projection of `hidden_size` then add a residual
+        # with `layer_input`.
+        with tf.variable_scope("output"):
+          attention_output = tf.layers.dense(
+              attention_output,
+              hidden_size,
+              kernel_initializer=create_initializer(initializer_range))
+          attention_output = dropout(attention_output, hidden_dropout_prob)
+          attention_output = layer_norm(attention_output + layer_input)
+
+      # The activation is only applied to the "intermediate" hidden layer.
+      with tf.variable_scope("intermediate"):
+        intermediate_output = tf.layers.dense(
+            attention_output,
+            intermediate_size,
+            activation=intermediate_act_fn,
+            kernel_initializer=create_initializer(initializer_range))
+
+      # Down-project back to `hidden_size` then add the residual.
+      with tf.variable_scope("output"):
+        layer_output = tf.layers.dense(
+            intermediate_output,
+            hidden_size,
+            kernel_initializer=create_initializer(initializer_range))
+        layer_output = dropout(layer_output, hidden_dropout_prob)
+        layer_output = layer_norm(layer_output + attention_output)
+        prev_output = layer_output
+        all_layer_outputs.append(layer_output)
+
+  if do_return_all_layers:
+    final_outputs = []
+    for layer_output in all_layer_outputs:
+      final_output = reshape_from_matrix(layer_output, input_shape)
+      final_outputs.append(final_output)
+    return final_outputs
+  else:
+    final_output = reshape_from_matrix(prev_output, input_shape)
+    return final_output
+
+
+def get_shape_list(tensor, expected_rank=None, name=None):
+  """Returns a list of the shape of tensor, preferring static dimensions.
+
+  Args:
+    tensor: A tf.Tensor object to find the shape of.
+    expected_rank: (optional) int. The expected rank of `tensor`. If this is
+      specified and the `tensor` has a different rank, and exception will be
+      thrown.
+    name: Optional name of the tensor for the error message.
+
+  Returns:
+    A list of dimensions of the shape of tensor. All static dimensions will
+    be returned as python integers, and dynamic dimensions will be returned
+    as tf.Tensor scalars.
+  """
+  if name is None:
+    name = tensor.name
+
+  if expected_rank is not None:
+    assert_rank(tensor, expected_rank, name)
+
+  shape = tensor.shape.as_list()
+
+  non_static_indexes = []
+  for (index, dim) in enumerate(shape):
+    if dim is None:
+      non_static_indexes.append(index)
+
+  if not non_static_indexes:
+    return shape
+
+  dyn_shape = tf.shape(tensor)
+  for index in non_static_indexes:
+    shape[index] = dyn_shape[index]
+  return shape
+
+
+def reshape_to_matrix(input_tensor):
+  """Reshapes a >= rank 2 tensor to a rank 2 tensor (i.e., a matrix)."""
+  ndims = input_tensor.shape.ndims
+  if ndims < 2:
+    raise ValueError("Input tensor must have at least rank 2. Shape = %s" %
+                     (input_tensor.shape))
+  if ndims == 2:
+    return input_tensor
+
+  width = input_tensor.shape[-1]
+  output_tensor = tf.reshape(input_tensor, [-1, width])
+  return output_tensor
+
+
+def reshape_from_matrix(output_tensor, orig_shape_list):
+  """Reshapes a rank 2 tensor back to its original rank >= 2 tensor."""
+  if len(orig_shape_list) == 2:
+    return output_tensor
+
+  output_shape = get_shape_list(output_tensor)
+
+  orig_dims = orig_shape_list[0:-1]
+  width = output_shape[-1]
+
+  return tf.reshape(output_tensor, orig_dims + [width])
+
+
+def assert_rank(tensor, expected_rank, name=None):
+  """Raises an exception if the tensor rank is not of the expected rank.
+
+  Args:
+    tensor: A tf.Tensor to check the rank of.
+    expected_rank: Python integer or list of integers, expected rank.
+    name: Optional name of the tensor for the error message.
+
+  Raises:
+    ValueError: If the expected shape doesn't match the actual shape.
+  """
+  if name is None:
+    name = tensor.name
+
+  expected_rank_dict = {}
+  if isinstance(expected_rank, six.integer_types):
+    expected_rank_dict[expected_rank] = True
+  else:
+    for x in expected_rank:
+      expected_rank_dict[x] = True
+
+  actual_rank = tensor.shape.ndims
+  if actual_rank not in expected_rank_dict:
+    scope_name = tf.get_variable_scope().name
+    raise ValueError(
+        "For the tensor `%s` in scope `%s`, the actual rank "
+        "`%d` (shape = %s) is not equal to the expected rank `%s`" %
+        (name, scope_name, actual_rank, str(tensor.shape), str(expected_rank)))
diff --git a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/modeling_test.py b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/modeling_test.py
new file mode 100644
index 000000000..817ad2db5
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/modeling_test.py
@@ -0,0 +1,277 @@
+# coding=utf-8
+# Copyright 2018 The Google AI Language Team Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import json
+import random
+import re
+
+import modeling
+import six
+import tensorflow as tf
+
+
+class BertModelTest(tf.test.TestCase):
+
+  class BertModelTester(object):
+
+    def __init__(self,
+                 parent,
+                 batch_size=13,
+                 seq_length=7,
+                 is_training=True,
+                 use_input_mask=True,
+                 use_token_type_ids=True,
+                 vocab_size=99,
+                 hidden_size=32,
+                 num_hidden_layers=5,
+                 num_attention_heads=4,
+                 intermediate_size=37,
+                 hidden_act="gelu",
+                 hidden_dropout_prob=0.1,
+                 attention_probs_dropout_prob=0.1,
+                 max_position_embeddings=512,
+                 type_vocab_size=16,
+                 initializer_range=0.02,
+                 scope=None):
+      self.parent = parent
+      self.batch_size = batch_size
+      self.seq_length = seq_length
+      self.is_training = is_training
+      self.use_input_mask = use_input_mask
+      self.use_token_type_ids = use_token_type_ids
+      self.vocab_size = vocab_size
+      self.hidden_size = hidden_size
+      self.num_hidden_layers = num_hidden_layers
+      self.num_attention_heads = num_attention_heads
+      self.intermediate_size = intermediate_size
+      self.hidden_act = hidden_act
+      self.hidden_dropout_prob = hidden_dropout_prob
+      self.attention_probs_dropout_prob = attention_probs_dropout_prob
+      self.max_position_embeddings = max_position_embeddings
+      self.type_vocab_size = type_vocab_size
+      self.initializer_range = initializer_range
+      self.scope = scope
+
+    def create_model(self):
+      input_ids = BertModelTest.ids_tensor([self.batch_size, self.seq_length],
+                                           self.vocab_size)
+
+      input_mask = None
+      if self.use_input_mask:
+        input_mask = BertModelTest.ids_tensor(
+            [self.batch_size, self.seq_length], vocab_size=2)
+
+      token_type_ids = None
+      if self.use_token_type_ids:
+        token_type_ids = BertModelTest.ids_tensor(
+            [self.batch_size, self.seq_length], self.type_vocab_size)
+
+      config = modeling.BertConfig(
+          vocab_size=self.vocab_size,
+          hidden_size=self.hidden_size,
+          num_hidden_layers=self.num_hidden_layers,
+          num_attention_heads=self.num_attention_heads,
+          intermediate_size=self.intermediate_size,
+          hidden_act=self.hidden_act,
+          hidden_dropout_prob=self.hidden_dropout_prob,
+          attention_probs_dropout_prob=self.attention_probs_dropout_prob,
+          max_position_embeddings=self.max_position_embeddings,
+          type_vocab_size=self.type_vocab_size,
+          initializer_range=self.initializer_range)
+
+      model = modeling.BertModel(
+          config=config,
+          is_training=self.is_training,
+          input_ids=input_ids,
+          input_mask=input_mask,
+          token_type_ids=token_type_ids,
+          scope=self.scope)
+
+      outputs = {
+          "embedding_output": model.get_embedding_output(),
+          "sequence_output": model.get_sequence_output(),
+          "pooled_output": model.get_pooled_output(),
+          "all_encoder_layers": model.get_all_encoder_layers(),
+      }
+      return outputs
+
+    def check_output(self, result):
+      self.parent.assertAllEqual(
+          result["embedding_output"].shape,
+          [self.batch_size, self.seq_length, self.hidden_size])
+
+      self.parent.assertAllEqual(
+          result["sequence_output"].shape,
+          [self.batch_size, self.seq_length, self.hidden_size])
+
+      self.parent.assertAllEqual(result["pooled_output"].shape,
+                                 [self.batch_size, self.hidden_size])
+
+  def test_default(self):
+    self.run_tester(BertModelTest.BertModelTester(self))
+
+  def test_config_to_json_string(self):
+    config = modeling.BertConfig(vocab_size=99, hidden_size=37)
+    obj = json.loads(config.to_json_string())
+    self.assertEqual(obj["vocab_size"], 99)
+    self.assertEqual(obj["hidden_size"], 37)
+
+  def run_tester(self, tester):
+    with self.test_session() as sess:
+      ops = tester.create_model()
+      init_op = tf.group(tf.global_variables_initializer(),
+                         tf.local_variables_initializer())
+      sess.run(init_op)
+      output_result = sess.run(ops)
+      tester.check_output(output_result)
+
+      self.assert_all_tensors_reachable(sess, [init_op, ops])
+
+  @classmethod
+  def ids_tensor(cls, shape, vocab_size, rng=None, name=None):
+    """Creates a random int32 tensor of the shape within the vocab size."""
+    if rng is None:
+      rng = random.Random()
+
+    total_dims = 1
+    for dim in shape:
+      total_dims *= dim
+
+    values = []
+    for _ in range(total_dims):
+      values.append(rng.randint(0, vocab_size - 1))
+
+    return tf.constant(value=values, dtype=tf.int32, shape=shape, name=name)
+
+  def assert_all_tensors_reachable(self, sess, outputs):
+    """Checks that all the tensors in the graph are reachable from outputs."""
+    graph = sess.graph
+
+    ignore_strings = [
+        "^.*/assert_less_equal/.*$",
+        "^.*/dilation_rate$",
+        "^.*/Tensordot/concat$",
+        "^.*/Tensordot/concat/axis$",
+        "^testing/.*$",
+    ]
+
+    ignore_regexes = [re.compile(x) for x in ignore_strings]
+
+    unreachable = self.get_unreachable_ops(graph, outputs)
+    filtered_unreachable = []
+    for x in unreachable:
+      do_ignore = False
+      for r in ignore_regexes:
+        m = r.match(x.name)
+        if m is not None:
+          do_ignore = True
+      if do_ignore:
+        continue
+      filtered_unreachable.append(x)
+    unreachable = filtered_unreachable
+
+    self.assertEqual(
+        len(unreachable), 0, "The following ops are unreachable: %s" %
+        (" ".join([x.name for x in unreachable])))
+
+  @classmethod
+  def get_unreachable_ops(cls, graph, outputs):
+    """Finds all of the tensors in graph that are unreachable from outputs."""
+    outputs = cls.flatten_recursive(outputs)
+    output_to_op = collections.defaultdict(list)
+    op_to_all = collections.defaultdict(list)
+    assign_out_to_in = collections.defaultdict(list)
+
+    for op in graph.get_operations():
+      for x in op.inputs:
+        op_to_all[op.name].append(x.name)
+      for y in op.outputs:
+        output_to_op[y.name].append(op.name)
+        op_to_all[op.name].append(y.name)
+      if str(op.type) == "Assign":
+        for y in op.outputs:
+          for x in op.inputs:
+            assign_out_to_in[y.name].append(x.name)
+
+    assign_groups = collections.defaultdict(list)
+    for out_name in assign_out_to_in.keys():
+      name_group = assign_out_to_in[out_name]
+      for n1 in name_group:
+        assign_groups[n1].append(out_name)
+        for n2 in name_group:
+          if n1 != n2:
+            assign_groups[n1].append(n2)
+
+    seen_tensors = {}
+    stack = [x.name for x in outputs]
+    while stack:
+      name = stack.pop()
+      if name in seen_tensors:
+        continue
+      seen_tensors[name] = True
+
+      if name in output_to_op:
+        for op_name in output_to_op[name]:
+          if op_name in op_to_all:
+            for input_name in op_to_all[op_name]:
+              if input_name not in stack:
+                stack.append(input_name)
+
+      expanded_names = []
+      if name in assign_groups:
+        for assign_name in assign_groups[name]:
+          expanded_names.append(assign_name)
+
+      for expanded_name in expanded_names:
+        if expanded_name not in stack:
+          stack.append(expanded_name)
+
+    unreachable_ops = []
+    for op in graph.get_operations():
+      is_unreachable = False
+      all_names = [x.name for x in op.inputs] + [x.name for x in op.outputs]
+      for name in all_names:
+        if name not in seen_tensors:
+          is_unreachable = True
+      if is_unreachable:
+        unreachable_ops.append(op)
+    return unreachable_ops
+
+  @classmethod
+  def flatten_recursive(cls, item):
+    """Flattens (potentially nested) a tuple/dictionary/list to a list."""
+    output = []
+    if isinstance(item, list):
+      output.extend(item)
+    elif isinstance(item, tuple):
+      output.extend(list(item))
+    elif isinstance(item, dict):
+      for (_, v) in six.iteritems(item):
+        output.append(v)
+    else:
+      return [item]
+
+    flat_output = []
+    for x in output:
+      flat_output.extend(cls.flatten_recursive(x))
+    return flat_output
+
+
+if __name__ == "__main__":
+  tf.test.main()
diff --git a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/modelzoo_level.txt b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/modelzoo_level.txt
new file mode 100644
index 000000000..a3e2322b3
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/modelzoo_level.txt
@@ -0,0 +1,3 @@
+FuncStatus:OK
+PerfStatus:OK
+PrecisionStatus:OK
diff --git a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/optimization.py b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/optimization.py
new file mode 100644
index 000000000..bde1193d5
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/optimization.py
@@ -0,0 +1,200 @@
+# coding=utf-8
+# Copyright 2018 The Google AI Language Team Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Functions and classes related to optimization (weight updates)."""
+# ============================================================================
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import re
+import tensorflow as tf
+import os
+####################NPU_modify start####################
+from npu_bridge.estimator.npu.npu_optimizer import NPUDistributedOptimizer
+from npu_bridge.estimator.npu import npu_loss_scale_optimizer as lso
+from npu_bridge.estimator.npu import npu_loss_scale_manager as lsm_lib
+####################NPU_modify end######################
+
+rank_size = int(os.getenv("RANK_SIZE"))
+
+def create_optimizer(loss, init_lr, num_train_steps, num_warmup_steps, use_tpu):
+  """Creates an optimizer training op."""
+  global_step = tf.train.get_or_create_global_step()
+
+  learning_rate = tf.constant(value=init_lr, shape=[], dtype=tf.float32)
+
+  # Implements linear decay of the learning rate.
+  learning_rate = tf.train.polynomial_decay(
+      learning_rate,
+      global_step,
+      num_train_steps,
+      end_learning_rate=0.0,
+      power=1.0,
+      cycle=False)
+
+  # Implements linear warmup. I.e., if global_step < num_warmup_steps, the
+  # learning rate will be `global_step/num_warmup_steps * init_lr`.
+  if num_warmup_steps:
+    global_steps_int = tf.cast(global_step, tf.int32)
+    warmup_steps_int = tf.constant(num_warmup_steps, dtype=tf.int32)
+
+    global_steps_float = tf.cast(global_steps_int, tf.float32)
+    warmup_steps_float = tf.cast(warmup_steps_int, tf.float32)
+
+    warmup_percent_done = global_steps_float / warmup_steps_float
+    warmup_learning_rate = init_lr * warmup_percent_done
+
+    is_warmup = tf.cast(global_steps_int < warmup_steps_int, tf.float32)
+    learning_rate = (
+        (1.0 - is_warmup) * learning_rate + is_warmup * warmup_learning_rate)
+
+  # It is recommended that you use this optimizer for fine tuning, since this
+  # is how the model was trained (note that the Adam m/v variables are NOT
+  # loaded from init_checkpoint.)
+  optimizer = AdamWeightDecayOptimizer(
+      learning_rate=learning_rate,
+      weight_decay_rate=0.01,
+      beta_1=0.9,
+      beta_2=0.999,
+      epsilon=1e-6,
+      exclude_from_weight_decay=["LayerNorm", "layer_norm", "bias"])
+
+  if use_tpu:
+    optimizer = tf.contrib.tpu.CrossShardOptimizer(optimizer)
+
+  tvars = tf.trainable_variables()
+  if rank_size == 1:
+      grads = tf.gradients(loss, tvars)
+  else:
+      optimizer = NPUDistributedOptimizer(optimizer)
+      grads_and_vars = optimizer.compute_gradients(loss, tvars)
+      grads_and_vars = [(g, v) for g, v in grads_and_vars if g is not None]
+      grads, tvars = list(zip(*grads_and_vars))
+  # This is how the model was pre-trained.
+  (grads, _) = tf.clip_by_global_norm(grads, clip_norm=1.0)
+
+  train_op = optimizer.apply_gradients(
+      zip(grads, tvars), global_step=global_step)
+
+  # Normally the global step update is done inside of `apply_gradients`.
+  # However, `AdamWeightDecayOptimizer` doesn't do this. But if you use
+  # a different optimizer, you should probably take this line out.
+  new_global_step = global_step + 1
+  train_op = tf.group(train_op, [global_step.assign(new_global_step)])
+  return train_op
+
+
+class AdamWeightDecayOptimizer(tf.train.Optimizer):
+  """A basic Adam optimizer that includes "correct" L2 weight decay."""
+
+  def __init__(self,
+               learning_rate,
+               weight_decay_rate=0.0,
+               beta_1=0.9,
+               beta_2=0.999,
+               epsilon=1e-6,
+               exclude_from_weight_decay=None,
+               name="AdamWeightDecayOptimizer"):
+    """Constructs a AdamWeightDecayOptimizer."""
+    super(AdamWeightDecayOptimizer, self).__init__(False, name)
+
+    self.learning_rate = learning_rate
+    self.weight_decay_rate = weight_decay_rate
+    self.beta_1 = beta_1
+    self.beta_2 = beta_2
+    self.epsilon = epsilon
+    self.exclude_from_weight_decay = exclude_from_weight_decay
+
+  def apply_gradients(self, grads_and_vars, global_step=None, name=None):
+    """See base class."""
+    assignments = []
+    for (grad, param) in grads_and_vars:
+      if grad is None or param is None:
+        continue
+
+      param_name = self._get_variable_name(param.name)
+
+      m = tf.get_variable(
+          name=param_name + "/adam_m",
+          shape=param.shape.as_list(),
+          dtype=tf.float32,
+          trainable=False,
+          initializer=tf.zeros_initializer())
+      v = tf.get_variable(
+          name=param_name + "/adam_v",
+          shape=param.shape.as_list(),
+          dtype=tf.float32,
+          trainable=False,
+          initializer=tf.zeros_initializer())
+
+      # Standard Adam update.
+      next_m = (
+          tf.multiply(self.beta_1, m) + tf.multiply(1.0 - self.beta_1, grad))
+      next_v = (
+          tf.multiply(self.beta_2, v) + tf.multiply(1.0 - self.beta_2,
+                                                    tf.square(grad)))
+
+      update = next_m / (tf.sqrt(next_v) + self.epsilon)
+
+      # Just adding the square of the weights to the loss function is *not*
+      # the correct way of using L2 regularization/weight decay with Adam,
+      # since that will interact with the m and v parameters in strange ways.
+      #
+      # Instead we want ot decay the weights in a manner that doesn't interact
+      # with the m/v parameters. This is equivalent to adding the square
+      # of the weights to the loss with plain (non-momentum) SGD.
+      if self._do_use_weight_decay(param_name):
+        update += self.weight_decay_rate * param
+
+      update_with_lr = self.learning_rate * update
+
+      next_param = param - update_with_lr
+
+      assignments.extend(
+          [param.assign(next_param),
+           m.assign(next_m),
+           v.assign(next_v)])
+    return tf.group(*assignments, name=name)
+
+  def _do_use_weight_decay(self, param_name):
+    """Whether to use L2 weight decay for `param_name`."""
+    if not self.weight_decay_rate:
+      return False
+    if self.exclude_from_weight_decay:
+      for r in self.exclude_from_weight_decay:
+        if re.search(r, param_name) is not None:
+          return False
+    return True
+
+  def _get_variable_name(self, param_name):
+    """Get the variable name from the tensor name."""
+    m = re.match("^(.*):\\d+$", param_name)
+    if m is not None:
+      param_name = m.group(1)
+    return param_name
diff --git a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/optimization_test.py b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/optimization_test.py
new file mode 100644
index 000000000..4f2dcf133
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/optimization_test.py
@@ -0,0 +1,48 @@
+# coding=utf-8
+# Copyright 2018 The Google AI Language Team Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import optimization
+import tensorflow as tf
+
+
+class OptimizationTest(tf.test.TestCase):
+
+  def test_adam(self):
+    with self.test_session() as sess:
+      w = tf.get_variable(
+          "w",
+          shape=[3],
+          initializer=tf.constant_initializer([0.1, -0.2, -0.1]))
+      x = tf.constant([0.4, 0.2, -0.5])
+      loss = tf.reduce_mean(tf.square(x - w))
+      tvars = tf.trainable_variables()
+      grads = tf.gradients(loss, tvars)
+      global_step = tf.train.get_or_create_global_step()
+      optimizer = optimization.AdamWeightDecayOptimizer(learning_rate=0.2)
+      train_op = optimizer.apply_gradients(zip(grads, tvars), global_step)
+      init_op = tf.group(tf.global_variables_initializer(),
+                         tf.local_variables_initializer())
+      sess.run(init_op)
+      for _ in range(100):
+        sess.run(train_op)
+      w_np = sess.run(w)
+      self.assertAllClose(w_np.flat, [0.4, 0.2, -0.5], rtol=1e-2, atol=1e-2)
+
+
+if __name__ == "__main__":
+  tf.test.main()
diff --git a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/requirements.txt b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/requirements.txt
new file mode 100644
index 000000000..3c7ff544b
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/requirements.txt
@@ -0,0 +1 @@
+tensorflow >= 1.11.0   # CPU Version of TensorFlow.
\ No newline at end of file
diff --git a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/run.sh b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/run.sh
new file mode 100644
index 000000000..7db61b57e
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/run.sh
@@ -0,0 +1,46 @@
+export install_path=/usr/local/Ascend
+# driver包依赖
+export LD_LIBRARY_PATH=/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver:$LD_LIBRARY_PATH #仅容器训练场景配置
+export LD_LIBRARY_PATH=/usr/local/Ascend/add-ons:$LD_LIBRARY_PATH
+#fwkacllib 包依赖
+export LD_LIBRARY_PATH=${install_path}/fwkacllib/lib64:$LD_LIBRARY_PATH
+export PYTHONPATH=${install_path}/fwkacllib/python/site-packages:${install_path}/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:${install_path}/fwkacllib/python/site-packages/schedule_search.egg:$PYTHONPATH
+export PATH=${install_path}/fwkacllib/ccec_compiler/bin:{install_path}/fwkacllib/bin:$PATH
+#tfplugin 包依赖
+export PYTHONPATH=/usr/local/Ascend/tfplugin/python/site-packages:$PYTHONPATH
+# opp包依赖
+export ASCEND_OPP_PATH=${install_path}/opp
+
+
+export JOB_ID=10086
+#export DEVICE_ID=0
+#export DEVICE_INDEX=0
+#export RANK_ID=0
+export RANK_SIZE=1
+
+#export ASCEND_SLOG_PRINT_TO_STDOUT=3
+#export ASCEND_LOG_DEVICE_FLUSH_TIMEOUT=0
+export ASCEND_GLOBAL_LOG_LEVEL=3
+
+export DUMP_GE_GRAPH=2
+export DUMP_GRAPH_LEVEL=3
+
+BERT_BASE_DIR=model
+SQUAD_DIR=dataset
+
+#rm -rf output
+
+python3 run_squad.py \
+  --vocab_file=$BERT_BASE_DIR/vocab.txt \
+  --bert_config_file=$BERT_BASE_DIR/bert_config.json \
+  --init_checkpoint=$BERT_BASE_DIR/bert_model.ckpt \
+  --do_train=True \
+  --train_file=$SQUAD_DIR/train-v1.1.json \
+  --do_predict=True \
+  --predict_file=$SQUAD_DIR/dev-v1.1.json \
+  --train_batch_size=32 \
+  --learning_rate=3e-5 \
+  --num_train_epochs=2.0 \
+  --max_seq_length=384 \
+  --doc_stride=128 \
+  --output_dir=None
diff --git a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/run_classifier.py b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/run_classifier.py
new file mode 100644
index 000000000..f84c6a6b8
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/run_classifier.py
@@ -0,0 +1,981 @@
+# coding=utf-8
+# Copyright 2018 The Google AI Language Team Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""BERT finetuning runner."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import csv
+import os
+import modeling
+import optimization
+import tokenization
+import tensorflow as tf
+
+flags = tf.flags
+
+FLAGS = flags.FLAGS
+
+## Required parameters
+flags.DEFINE_string(
+    "data_dir", None,
+    "The input data dir. Should contain the .tsv files (or other data files) "
+    "for the task.")
+
+flags.DEFINE_string(
+    "bert_config_file", None,
+    "The config json file corresponding to the pre-trained BERT model. "
+    "This specifies the model architecture.")
+
+flags.DEFINE_string("task_name", None, "The name of the task to train.")
+
+flags.DEFINE_string("vocab_file", None,
+                    "The vocabulary file that the BERT model was trained on.")
+
+flags.DEFINE_string(
+    "output_dir", None,
+    "The output directory where the model checkpoints will be written.")
+
+## Other parameters
+
+flags.DEFINE_string(
+    "init_checkpoint", None,
+    "Initial checkpoint (usually from a pre-trained BERT model).")
+
+flags.DEFINE_bool(
+    "do_lower_case", True,
+    "Whether to lower case the input text. Should be True for uncased "
+    "models and False for cased models.")
+
+flags.DEFINE_integer(
+    "max_seq_length", 128,
+    "The maximum total input sequence length after WordPiece tokenization. "
+    "Sequences longer than this will be truncated, and sequences shorter "
+    "than this will be padded.")
+
+flags.DEFINE_bool("do_train", False, "Whether to run training.")
+
+flags.DEFINE_bool("do_eval", False, "Whether to run eval on the dev set.")
+
+flags.DEFINE_bool(
+    "do_predict", False,
+    "Whether to run the model in inference mode on the test set.")
+
+flags.DEFINE_integer("train_batch_size", 32, "Total batch size for training.")
+
+flags.DEFINE_integer("eval_batch_size", 8, "Total batch size for eval.")
+
+flags.DEFINE_integer("predict_batch_size", 8, "Total batch size for predict.")
+
+flags.DEFINE_float("learning_rate", 5e-5, "The initial learning rate for Adam.")
+
+flags.DEFINE_float("num_train_epochs", 3.0,
+                   "Total number of training epochs to perform.")
+
+flags.DEFINE_float(
+    "warmup_proportion", 0.1,
+    "Proportion of training to perform linear learning rate warmup for. "
+    "E.g., 0.1 = 10% of training.")
+
+flags.DEFINE_integer("save_checkpoints_steps", 1000,
+                     "How often to save the model checkpoint.")
+
+flags.DEFINE_integer("iterations_per_loop", 1000,
+                     "How many steps to make in each estimator call.")
+
+flags.DEFINE_bool("use_tpu", False, "Whether to use TPU or GPU/CPU.")
+
+tf.flags.DEFINE_string(
+    "tpu_name", None,
+    "The Cloud TPU to use for training. This should be either the name "
+    "used when creating the Cloud TPU, or a grpc://ip.address.of.tpu:8470 "
+    "url.")
+
+tf.flags.DEFINE_string(
+    "tpu_zone", None,
+    "[Optional] GCE zone where the Cloud TPU is located in. If not "
+    "specified, we will attempt to automatically detect the GCE project from "
+    "metadata.")
+
+tf.flags.DEFINE_string(
+    "gcp_project", None,
+    "[Optional] Project name for the Cloud TPU-enabled project. If not "
+    "specified, we will attempt to automatically detect the GCE project from "
+    "metadata.")
+
+tf.flags.DEFINE_string("master", None, "[Optional] TensorFlow master URL.")
+
+flags.DEFINE_integer(
+    "num_tpu_cores", 8,
+    "Only used if `use_tpu` is True. Total number of TPU cores to use.")
+
+
+class InputExample(object):
+  """A single training/test example for simple sequence classification."""
+
+  def __init__(self, guid, text_a, text_b=None, label=None):
+    """Constructs a InputExample.
+
+    Args:
+      guid: Unique id for the example.
+      text_a: string. The untokenized text of the first sequence. For single
+        sequence tasks, only this sequence must be specified.
+      text_b: (Optional) string. The untokenized text of the second sequence.
+        Only must be specified for sequence pair tasks.
+      label: (Optional) string. The label of the example. This should be
+        specified for train and dev examples, but not for test examples.
+    """
+    self.guid = guid
+    self.text_a = text_a
+    self.text_b = text_b
+    self.label = label
+
+
+class PaddingInputExample(object):
+  """Fake example so the num input examples is a multiple of the batch size.
+
+  When running eval/predict on the TPU, we need to pad the number of examples
+  to be a multiple of the batch size, because the TPU requires a fixed batch
+  size. The alternative is to drop the last batch, which is bad because it means
+  the entire output data won't be generated.
+
+  We use this class instead of `None` because treating `None` as padding
+  battches could cause silent errors.
+  """
+
+
+class InputFeatures(object):
+  """A single set of features of data."""
+
+  def __init__(self,
+               input_ids,
+               input_mask,
+               segment_ids,
+               label_id,
+               is_real_example=True):
+    self.input_ids = input_ids
+    self.input_mask = input_mask
+    self.segment_ids = segment_ids
+    self.label_id = label_id
+    self.is_real_example = is_real_example
+
+
+class DataProcessor(object):
+  """Base class for data converters for sequence classification data sets."""
+
+  def get_train_examples(self, data_dir):
+    """Gets a collection of `InputExample`s for the train set."""
+    raise NotImplementedError()
+
+  def get_dev_examples(self, data_dir):
+    """Gets a collection of `InputExample`s for the dev set."""
+    raise NotImplementedError()
+
+  def get_test_examples(self, data_dir):
+    """Gets a collection of `InputExample`s for prediction."""
+    raise NotImplementedError()
+
+  def get_labels(self):
+    """Gets the list of labels for this data set."""
+    raise NotImplementedError()
+
+  @classmethod
+  def _read_tsv(cls, input_file, quotechar=None):
+    """Reads a tab separated value file."""
+    with tf.gfile.Open(input_file, "r") as f:
+      reader = csv.reader(f, delimiter="\t", quotechar=quotechar)
+      lines = []
+      for line in reader:
+        lines.append(line)
+      return lines
+
+
+class XnliProcessor(DataProcessor):
+  """Processor for the XNLI data set."""
+
+  def __init__(self):
+    self.language = "zh"
+
+  def get_train_examples(self, data_dir):
+    """See base class."""
+    lines = self._read_tsv(
+        os.path.join(data_dir, "multinli",
+                     "multinli.train.%s.tsv" % self.language))
+    examples = []
+    for (i, line) in enumerate(lines):
+      if i == 0:
+        continue
+      guid = "train-%d" % (i)
+      text_a = tokenization.convert_to_unicode(line[0])
+      text_b = tokenization.convert_to_unicode(line[1])
+      label = tokenization.convert_to_unicode(line[2])
+      if label == tokenization.convert_to_unicode("contradictory"):
+        label = tokenization.convert_to_unicode("contradiction")
+      examples.append(
+          InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
+    return examples
+
+  def get_dev_examples(self, data_dir):
+    """See base class."""
+    lines = self._read_tsv(os.path.join(data_dir, "xnli.dev.tsv"))
+    examples = []
+    for (i, line) in enumerate(lines):
+      if i == 0:
+        continue
+      guid = "dev-%d" % (i)
+      language = tokenization.convert_to_unicode(line[0])
+      if language != tokenization.convert_to_unicode(self.language):
+        continue
+      text_a = tokenization.convert_to_unicode(line[6])
+      text_b = tokenization.convert_to_unicode(line[7])
+      label = tokenization.convert_to_unicode(line[1])
+      examples.append(
+          InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
+    return examples
+
+  def get_labels(self):
+    """See base class."""
+    return ["contradiction", "entailment", "neutral"]
+
+
+class MnliProcessor(DataProcessor):
+  """Processor for the MultiNLI data set (GLUE version)."""
+
+  def get_train_examples(self, data_dir):
+    """See base class."""
+    return self._create_examples(
+        self._read_tsv(os.path.join(data_dir, "train.tsv")), "train")
+
+  def get_dev_examples(self, data_dir):
+    """See base class."""
+    return self._create_examples(
+        self._read_tsv(os.path.join(data_dir, "dev_matched.tsv")),
+        "dev_matched")
+
+  def get_test_examples(self, data_dir):
+    """See base class."""
+    return self._create_examples(
+        self._read_tsv(os.path.join(data_dir, "test_matched.tsv")), "test")
+
+  def get_labels(self):
+    """See base class."""
+    return ["contradiction", "entailment", "neutral"]
+
+  def _create_examples(self, lines, set_type):
+    """Creates examples for the training and dev sets."""
+    examples = []
+    for (i, line) in enumerate(lines):
+      if i == 0:
+        continue
+      guid = "%s-%s" % (set_type, tokenization.convert_to_unicode(line[0]))
+      text_a = tokenization.convert_to_unicode(line[8])
+      text_b = tokenization.convert_to_unicode(line[9])
+      if set_type == "test":
+        label = "contradiction"
+      else:
+        label = tokenization.convert_to_unicode(line[-1])
+      examples.append(
+          InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
+    return examples
+
+
+class MrpcProcessor(DataProcessor):
+  """Processor for the MRPC data set (GLUE version)."""
+
+  def get_train_examples(self, data_dir):
+    """See base class."""
+    return self._create_examples(
+        self._read_tsv(os.path.join(data_dir, "train.tsv")), "train")
+
+  def get_dev_examples(self, data_dir):
+    """See base class."""
+    return self._create_examples(
+        self._read_tsv(os.path.join(data_dir, "dev.tsv")), "dev")
+
+  def get_test_examples(self, data_dir):
+    """See base class."""
+    return self._create_examples(
+        self._read_tsv(os.path.join(data_dir, "test.tsv")), "test")
+
+  def get_labels(self):
+    """See base class."""
+    return ["0", "1"]
+
+  def _create_examples(self, lines, set_type):
+    """Creates examples for the training and dev sets."""
+    examples = []
+    for (i, line) in enumerate(lines):
+      if i == 0:
+        continue
+      guid = "%s-%s" % (set_type, i)
+      text_a = tokenization.convert_to_unicode(line[3])
+      text_b = tokenization.convert_to_unicode(line[4])
+      if set_type == "test":
+        label = "0"
+      else:
+        label = tokenization.convert_to_unicode(line[0])
+      examples.append(
+          InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
+    return examples
+
+
+class ColaProcessor(DataProcessor):
+  """Processor for the CoLA data set (GLUE version)."""
+
+  def get_train_examples(self, data_dir):
+    """See base class."""
+    return self._create_examples(
+        self._read_tsv(os.path.join(data_dir, "train.tsv")), "train")
+
+  def get_dev_examples(self, data_dir):
+    """See base class."""
+    return self._create_examples(
+        self._read_tsv(os.path.join(data_dir, "dev.tsv")), "dev")
+
+  def get_test_examples(self, data_dir):
+    """See base class."""
+    return self._create_examples(
+        self._read_tsv(os.path.join(data_dir, "test.tsv")), "test")
+
+  def get_labels(self):
+    """See base class."""
+    return ["0", "1"]
+
+  def _create_examples(self, lines, set_type):
+    """Creates examples for the training and dev sets."""
+    examples = []
+    for (i, line) in enumerate(lines):
+      # Only the test set has a header
+      if set_type == "test" and i == 0:
+        continue
+      guid = "%s-%s" % (set_type, i)
+      if set_type == "test":
+        text_a = tokenization.convert_to_unicode(line[1])
+        label = "0"
+      else:
+        text_a = tokenization.convert_to_unicode(line[3])
+        label = tokenization.convert_to_unicode(line[1])
+      examples.append(
+          InputExample(guid=guid, text_a=text_a, text_b=None, label=label))
+    return examples
+
+
+def convert_single_example(ex_index, example, label_list, max_seq_length,
+                           tokenizer):
+  """Converts a single `InputExample` into a single `InputFeatures`."""
+
+  if isinstance(example, PaddingInputExample):
+    return InputFeatures(
+        input_ids=[0] * max_seq_length,
+        input_mask=[0] * max_seq_length,
+        segment_ids=[0] * max_seq_length,
+        label_id=0,
+        is_real_example=False)
+
+  label_map = {}
+  for (i, label) in enumerate(label_list):
+    label_map[label] = i
+
+  tokens_a = tokenizer.tokenize(example.text_a)
+  tokens_b = None
+  if example.text_b:
+    tokens_b = tokenizer.tokenize(example.text_b)
+
+  if tokens_b:
+    # Modifies `tokens_a` and `tokens_b` in place so that the total
+    # length is less than the specified length.
+    # Account for [CLS], [SEP], [SEP] with "- 3"
+    _truncate_seq_pair(tokens_a, tokens_b, max_seq_length - 3)
+  else:
+    # Account for [CLS] and [SEP] with "- 2"
+    if len(tokens_a) > max_seq_length - 2:
+      tokens_a = tokens_a[0:(max_seq_length - 2)]
+
+  # The convention in BERT is:
+  # (a) For sequence pairs:
+  #  tokens:   [CLS] is this jack ##son ##ville ? [SEP] no it is not . [SEP]
+  #  type_ids: 0     0  0    0    0     0       0 0     1  1  1  1   1 1
+  # (b) For single sequences:
+  #  tokens:   [CLS] the dog is hairy . [SEP]
+  #  type_ids: 0     0   0   0  0     0 0
+  #
+  # Where "type_ids" are used to indicate whether this is the first
+  # sequence or the second sequence. The embedding vectors for `type=0` and
+  # `type=1` were learned during pre-training and are added to the wordpiece
+  # embedding vector (and position vector). This is not *strictly* necessary
+  # since the [SEP] token unambiguously separates the sequences, but it makes
+  # it easier for the model to learn the concept of sequences.
+  #
+  # For classification tasks, the first vector (corresponding to [CLS]) is
+  # used as the "sentence vector". Note that this only makes sense because
+  # the entire model is fine-tuned.
+  tokens = []
+  segment_ids = []
+  tokens.append("[CLS]")
+  segment_ids.append(0)
+  for token in tokens_a:
+    tokens.append(token)
+    segment_ids.append(0)
+  tokens.append("[SEP]")
+  segment_ids.append(0)
+
+  if tokens_b:
+    for token in tokens_b:
+      tokens.append(token)
+      segment_ids.append(1)
+    tokens.append("[SEP]")
+    segment_ids.append(1)
+
+  input_ids = tokenizer.convert_tokens_to_ids(tokens)
+
+  # The mask has 1 for real tokens and 0 for padding tokens. Only real
+  # tokens are attended to.
+  input_mask = [1] * len(input_ids)
+
+  # Zero-pad up to the sequence length.
+  while len(input_ids) < max_seq_length:
+    input_ids.append(0)
+    input_mask.append(0)
+    segment_ids.append(0)
+
+  assert len(input_ids) == max_seq_length
+  assert len(input_mask) == max_seq_length
+  assert len(segment_ids) == max_seq_length
+
+  label_id = label_map[example.label]
+  if ex_index < 5:
+    tf.logging.info("*** Example ***")
+    tf.logging.info("guid: %s" % (example.guid))
+    tf.logging.info("tokens: %s" % " ".join(
+        [tokenization.printable_text(x) for x in tokens]))
+    tf.logging.info("input_ids: %s" % " ".join([str(x) for x in input_ids]))
+    tf.logging.info("input_mask: %s" % " ".join([str(x) for x in input_mask]))
+    tf.logging.info("segment_ids: %s" % " ".join([str(x) for x in segment_ids]))
+    tf.logging.info("label: %s (id = %d)" % (example.label, label_id))
+
+  feature = InputFeatures(
+      input_ids=input_ids,
+      input_mask=input_mask,
+      segment_ids=segment_ids,
+      label_id=label_id,
+      is_real_example=True)
+  return feature
+
+
+def file_based_convert_examples_to_features(
+    examples, label_list, max_seq_length, tokenizer, output_file):
+  """Convert a set of `InputExample`s to a TFRecord file."""
+
+  writer = tf.python_io.TFRecordWriter(output_file)
+
+  for (ex_index, example) in enumerate(examples):
+    if ex_index % 10000 == 0:
+      tf.logging.info("Writing example %d of %d" % (ex_index, len(examples)))
+
+    feature = convert_single_example(ex_index, example, label_list,
+                                     max_seq_length, tokenizer)
+
+    def create_int_feature(values):
+      f = tf.train.Feature(int64_list=tf.train.Int64List(value=list(values)))
+      return f
+
+    features = collections.OrderedDict()
+    features["input_ids"] = create_int_feature(feature.input_ids)
+    features["input_mask"] = create_int_feature(feature.input_mask)
+    features["segment_ids"] = create_int_feature(feature.segment_ids)
+    features["label_ids"] = create_int_feature([feature.label_id])
+    features["is_real_example"] = create_int_feature(
+        [int(feature.is_real_example)])
+
+    tf_example = tf.train.Example(features=tf.train.Features(feature=features))
+    writer.write(tf_example.SerializeToString())
+  writer.close()
+
+
+def file_based_input_fn_builder(input_file, seq_length, is_training,
+                                drop_remainder):
+  """Creates an `input_fn` closure to be passed to TPUEstimator."""
+
+  name_to_features = {
+      "input_ids": tf.FixedLenFeature([seq_length], tf.int64),
+      "input_mask": tf.FixedLenFeature([seq_length], tf.int64),
+      "segment_ids": tf.FixedLenFeature([seq_length], tf.int64),
+      "label_ids": tf.FixedLenFeature([], tf.int64),
+      "is_real_example": tf.FixedLenFeature([], tf.int64),
+  }
+
+  def _decode_record(record, name_to_features):
+    """Decodes a record to a TensorFlow example."""
+    example = tf.parse_single_example(record, name_to_features)
+
+    # tf.Example only supports tf.int64, but the TPU only supports tf.int32.
+    # So cast all int64 to int32.
+    for name in list(example.keys()):
+      t = example[name]
+      if t.dtype == tf.int64:
+        t = tf.to_int32(t)
+      example[name] = t
+
+    return example
+
+  def input_fn(params):
+    """The actual input function."""
+    batch_size = params["batch_size"]
+
+    # For training, we want a lot of parallel reading and shuffling.
+    # For eval, we want no shuffling and parallel reading doesn't matter.
+    d = tf.data.TFRecordDataset(input_file)
+    if is_training:
+      d = d.repeat()
+      d = d.shuffle(buffer_size=100)
+
+    d = d.apply(
+        tf.contrib.data.map_and_batch(
+            lambda record: _decode_record(record, name_to_features),
+            batch_size=batch_size,
+            drop_remainder=drop_remainder))
+
+    return d
+
+  return input_fn
+
+
+def _truncate_seq_pair(tokens_a, tokens_b, max_length):
+  """Truncates a sequence pair in place to the maximum length."""
+
+  # This is a simple heuristic which will always truncate the longer sequence
+  # one token at a time. This makes more sense than truncating an equal percent
+  # of tokens from each, since if one sequence is very short then each token
+  # that's truncated likely contains more information than a longer sequence.
+  while True:
+    total_length = len(tokens_a) + len(tokens_b)
+    if total_length <= max_length:
+      break
+    if len(tokens_a) > len(tokens_b):
+      tokens_a.pop()
+    else:
+      tokens_b.pop()
+
+
+def create_model(bert_config, is_training, input_ids, input_mask, segment_ids,
+                 labels, num_labels, use_one_hot_embeddings):
+  """Creates a classification model."""
+  model = modeling.BertModel(
+      config=bert_config,
+      is_training=is_training,
+      input_ids=input_ids,
+      input_mask=input_mask,
+      token_type_ids=segment_ids,
+      use_one_hot_embeddings=use_one_hot_embeddings)
+
+  # In the demo, we are doing a simple classification task on the entire
+  # segment.
+  #
+  # If you want to use the token-level output, use model.get_sequence_output()
+  # instead.
+  output_layer = model.get_pooled_output()
+
+  hidden_size = output_layer.shape[-1].value
+
+  output_weights = tf.get_variable(
+      "output_weights", [num_labels, hidden_size],
+      initializer=tf.truncated_normal_initializer(stddev=0.02))
+
+  output_bias = tf.get_variable(
+      "output_bias", [num_labels], initializer=tf.zeros_initializer())
+
+  with tf.variable_scope("loss"):
+    if is_training:
+      # I.e., 0.1 dropout
+      output_layer = npu_ops.dropout(output_layer, keep_prob=0.9)
+
+    logits = tf.matmul(output_layer, output_weights, transpose_b=True)
+    logits = tf.nn.bias_add(logits, output_bias)
+    probabilities = tf.nn.softmax(logits, axis=-1)
+    log_probs = tf.nn.log_softmax(logits, axis=-1)
+
+    one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)
+
+    per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
+    loss = tf.reduce_mean(per_example_loss)
+
+    return (loss, per_example_loss, logits, probabilities)
+
+
+def model_fn_builder(bert_config, num_labels, init_checkpoint, learning_rate,
+                     num_train_steps, num_warmup_steps, use_tpu,
+                     use_one_hot_embeddings):
+  """Returns `model_fn` closure for TPUEstimator."""
+
+  def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
+    """The `model_fn` for TPUEstimator."""
+
+    tf.logging.info("*** Features ***")
+    for name in sorted(features.keys()):
+      tf.logging.info("  name = %s, shape = %s" % (name, features[name].shape))
+
+    input_ids = features["input_ids"]
+    input_mask = features["input_mask"]
+    segment_ids = features["segment_ids"]
+    label_ids = features["label_ids"]
+    is_real_example = None
+    if "is_real_example" in features:
+      is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32)
+    else:
+      is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32)
+
+    is_training = (mode == tf.estimator.ModeKeys.TRAIN)
+
+    (total_loss, per_example_loss, logits, probabilities) = create_model(
+        bert_config, is_training, input_ids, input_mask, segment_ids, label_ids,
+        num_labels, use_one_hot_embeddings)
+
+    tvars = tf.trainable_variables()
+    initialized_variable_names = {}
+    scaffold_fn = None
+    if init_checkpoint:
+      (assignment_map, initialized_variable_names
+      ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint)
+      if use_tpu:
+
+        def tpu_scaffold():
+          tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
+          return tf.train.Scaffold()
+
+        scaffold_fn = tpu_scaffold
+      else:
+        tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
+
+    tf.logging.info("**** Trainable Variables ****")
+    for var in tvars:
+      init_string = ""
+      if var.name in initialized_variable_names:
+        init_string = ", *INIT_FROM_CKPT*"
+      tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
+                      init_string)
+
+    output_spec = None
+    if mode == tf.estimator.ModeKeys.TRAIN:
+
+      train_op = optimization.create_optimizer(
+          total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu)
+
+      output_spec = tf.contrib.tpu.TPUEstimatorSpec(
+          mode=mode,
+          loss=total_loss,
+          train_op=train_op,
+          scaffold_fn=scaffold_fn)
+    elif mode == tf.estimator.ModeKeys.EVAL:
+
+      def metric_fn(per_example_loss, label_ids, logits, is_real_example):
+        predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
+        accuracy = tf.metrics.accuracy(
+            labels=label_ids, predictions=predictions, weights=is_real_example)
+        loss = tf.metrics.mean(values=per_example_loss, weights=is_real_example)
+        return {
+            "eval_accuracy": accuracy,
+            "eval_loss": loss,
+        }
+
+      eval_metrics = (metric_fn,
+                      [per_example_loss, label_ids, logits, is_real_example])
+      output_spec = tf.contrib.tpu.TPUEstimatorSpec(
+          mode=mode,
+          loss=total_loss,
+          eval_metrics=eval_metrics,
+          scaffold_fn=scaffold_fn)
+    else:
+      output_spec = tf.contrib.tpu.TPUEstimatorSpec(
+          mode=mode,
+          predictions={"probabilities": probabilities},
+          scaffold_fn=scaffold_fn)
+    return output_spec
+
+  return model_fn
+
+
+# This function is not used by this file but is still used by the Colab and
+# people who depend on it.
+def input_fn_builder(features, seq_length, is_training, drop_remainder):
+  """Creates an `input_fn` closure to be passed to TPUEstimator."""
+
+  all_input_ids = []
+  all_input_mask = []
+  all_segment_ids = []
+  all_label_ids = []
+
+  for feature in features:
+    all_input_ids.append(feature.input_ids)
+    all_input_mask.append(feature.input_mask)
+    all_segment_ids.append(feature.segment_ids)
+    all_label_ids.append(feature.label_id)
+
+  def input_fn(params):
+    """The actual input function."""
+    batch_size = params["batch_size"]
+
+    num_examples = len(features)
+
+    # This is for demo purposes and does NOT scale to large data sets. We do
+    # not use Dataset.from_generator() because that uses tf.py_func which is
+    # not TPU compatible. The right way to load data is with TFRecordReader.
+    d = tf.data.Dataset.from_tensor_slices({
+        "input_ids":
+            tf.constant(
+                all_input_ids, shape=[num_examples, seq_length],
+                dtype=tf.int32),
+        "input_mask":
+            tf.constant(
+                all_input_mask,
+                shape=[num_examples, seq_length],
+                dtype=tf.int32),
+        "segment_ids":
+            tf.constant(
+                all_segment_ids,
+                shape=[num_examples, seq_length],
+                dtype=tf.int32),
+        "label_ids":
+            tf.constant(all_label_ids, shape=[num_examples], dtype=tf.int32),
+    })
+
+    if is_training:
+      d = d.repeat()
+      d = d.shuffle(buffer_size=100)
+
+    d = d.batch(batch_size=batch_size, drop_remainder=drop_remainder)
+    return d
+
+  return input_fn
+
+
+# This function is not used by this file but is still used by the Colab and
+# people who depend on it.
+def convert_examples_to_features(examples, label_list, max_seq_length,
+                                 tokenizer):
+  """Convert a set of `InputExample`s to a list of `InputFeatures`."""
+
+  features = []
+  for (ex_index, example) in enumerate(examples):
+    if ex_index % 10000 == 0:
+      tf.logging.info("Writing example %d of %d" % (ex_index, len(examples)))
+
+    feature = convert_single_example(ex_index, example, label_list,
+                                     max_seq_length, tokenizer)
+
+    features.append(feature)
+  return features
+
+
+def main(_):
+  tf.logging.set_verbosity(tf.logging.INFO)
+
+  processors = {
+      "cola": ColaProcessor,
+      "mnli": MnliProcessor,
+      "mrpc": MrpcProcessor,
+      "xnli": XnliProcessor,
+  }
+
+  tokenization.validate_case_matches_checkpoint(FLAGS.do_lower_case,
+                                                FLAGS.init_checkpoint)
+
+  if not FLAGS.do_train and not FLAGS.do_eval and not FLAGS.do_predict:
+    raise ValueError(
+        "At least one of `do_train`, `do_eval` or `do_predict' must be True.")
+
+  bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file)
+
+  if FLAGS.max_seq_length > bert_config.max_position_embeddings:
+    raise ValueError(
+        "Cannot use sequence length %d because the BERT model "
+        "was only trained up to sequence length %d" %
+        (FLAGS.max_seq_length, bert_config.max_position_embeddings))
+
+  tf.gfile.MakeDirs(FLAGS.output_dir)
+
+  task_name = FLAGS.task_name.lower()
+
+  if task_name not in processors:
+    raise ValueError("Task not found: %s" % (task_name))
+
+  processor = processors[task_name]()
+
+  label_list = processor.get_labels()
+
+  tokenizer = tokenization.FullTokenizer(
+      vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case)
+
+  tpu_cluster_resolver = None
+  if FLAGS.use_tpu and FLAGS.tpu_name:
+    tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
+        FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)
+
+  is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2
+  run_config = tf.contrib.tpu.RunConfig(
+      cluster=tpu_cluster_resolver,
+      master=FLAGS.master,
+      model_dir=FLAGS.output_dir,
+      save_checkpoints_steps=FLAGS.save_checkpoints_steps,
+      tpu_config=tf.contrib.tpu.TPUConfig(
+          iterations_per_loop=FLAGS.iterations_per_loop,
+          num_shards=FLAGS.num_tpu_cores,
+          per_host_input_for_training=is_per_host))
+
+  train_examples = None
+  num_train_steps = None
+  num_warmup_steps = None
+  if FLAGS.do_train:
+    train_examples = processor.get_train_examples(FLAGS.data_dir)
+    num_train_steps = int(
+        len(train_examples) / FLAGS.train_batch_size * FLAGS.num_train_epochs)
+    num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion)
+
+  model_fn = model_fn_builder(
+      bert_config=bert_config,
+      num_labels=len(label_list),
+      init_checkpoint=FLAGS.init_checkpoint,
+      learning_rate=FLAGS.learning_rate,
+      num_train_steps=num_train_steps,
+      num_warmup_steps=num_warmup_steps,
+      use_tpu=FLAGS.use_tpu,
+      use_one_hot_embeddings=FLAGS.use_tpu)
+
+  # If TPU is not available, this will fall back to normal Estimator on CPU
+  # or GPU.
+  estimator = tf.contrib.tpu.TPUEstimator(
+      use_tpu=FLAGS.use_tpu,
+      model_fn=model_fn,
+      config=run_config,
+      train_batch_size=FLAGS.train_batch_size,
+      eval_batch_size=FLAGS.eval_batch_size,
+      predict_batch_size=FLAGS.predict_batch_size)
+
+  if FLAGS.do_train:
+    train_file = os.path.join(FLAGS.output_dir, "train.tf_record")
+    file_based_convert_examples_to_features(
+        train_examples, label_list, FLAGS.max_seq_length, tokenizer, train_file)
+    tf.logging.info("***** Running training *****")
+    tf.logging.info("  Num examples = %d", len(train_examples))
+    tf.logging.info("  Batch size = %d", FLAGS.train_batch_size)
+    tf.logging.info("  Num steps = %d", num_train_steps)
+    train_input_fn = file_based_input_fn_builder(
+        input_file=train_file,
+        seq_length=FLAGS.max_seq_length,
+        is_training=True,
+        drop_remainder=True)
+    estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)
+
+  if FLAGS.do_eval:
+    eval_examples = processor.get_dev_examples(FLAGS.data_dir)
+    num_actual_eval_examples = len(eval_examples)
+    if FLAGS.use_tpu:
+      # TPU requires a fixed batch size for all batches, therefore the number
+      # of examples must be a multiple of the batch size, or else examples
+      # will get dropped. So we pad with fake examples which are ignored
+      # later on. These do NOT count towards the metric (all tf.metrics
+      # support a per-instance weight, and these get a weight of 0.0).
+      while len(eval_examples) % FLAGS.eval_batch_size != 0:
+        eval_examples.append(PaddingInputExample())
+
+    eval_file = os.path.join(FLAGS.output_dir, "eval.tf_record")
+    file_based_convert_examples_to_features(
+        eval_examples, label_list, FLAGS.max_seq_length, tokenizer, eval_file)
+
+    tf.logging.info("***** Running evaluation *****")
+    tf.logging.info("  Num examples = %d (%d actual, %d padding)",
+                    len(eval_examples), num_actual_eval_examples,
+                    len(eval_examples) - num_actual_eval_examples)
+    tf.logging.info("  Batch size = %d", FLAGS.eval_batch_size)
+
+    # This tells the estimator to run through the entire set.
+    eval_steps = None
+    # However, if running eval on the TPU, you will need to specify the
+    # number of steps.
+    if FLAGS.use_tpu:
+      assert len(eval_examples) % FLAGS.eval_batch_size == 0
+      eval_steps = int(len(eval_examples) // FLAGS.eval_batch_size)
+
+    eval_drop_remainder = True if FLAGS.use_tpu else False
+    eval_input_fn = file_based_input_fn_builder(
+        input_file=eval_file,
+        seq_length=FLAGS.max_seq_length,
+        is_training=False,
+        drop_remainder=eval_drop_remainder)
+
+    result = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps)
+
+    output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt")
+    with tf.gfile.GFile(output_eval_file, "w") as writer:
+      tf.logging.info("***** Eval results *****")
+      for key in sorted(result.keys()):
+        tf.logging.info("  %s = %s", key, str(result[key]))
+        writer.write("%s = %s\n" % (key, str(result[key])))
+
+  if FLAGS.do_predict:
+    predict_examples = processor.get_test_examples(FLAGS.data_dir)
+    num_actual_predict_examples = len(predict_examples)
+    if FLAGS.use_tpu:
+      # TPU requires a fixed batch size for all batches, therefore the number
+      # of examples must be a multiple of the batch size, or else examples
+      # will get dropped. So we pad with fake examples which are ignored
+      # later on.
+      while len(predict_examples) % FLAGS.predict_batch_size != 0:
+        predict_examples.append(PaddingInputExample())
+
+    predict_file = os.path.join(FLAGS.output_dir, "predict.tf_record")
+    file_based_convert_examples_to_features(predict_examples, label_list,
+                                            FLAGS.max_seq_length, tokenizer,
+                                            predict_file)
+
+    tf.logging.info("***** Running prediction*****")
+    tf.logging.info("  Num examples = %d (%d actual, %d padding)",
+                    len(predict_examples), num_actual_predict_examples,
+                    len(predict_examples) - num_actual_predict_examples)
+    tf.logging.info("  Batch size = %d", FLAGS.predict_batch_size)
+
+    predict_drop_remainder = True if FLAGS.use_tpu else False
+    predict_input_fn = file_based_input_fn_builder(
+        input_file=predict_file,
+        seq_length=FLAGS.max_seq_length,
+        is_training=False,
+        drop_remainder=predict_drop_remainder)
+
+    result = estimator.predict(input_fn=predict_input_fn)
+
+    output_predict_file = os.path.join(FLAGS.output_dir, "test_results.tsv")
+    with tf.gfile.GFile(output_predict_file, "w") as writer:
+      num_written_lines = 0
+      tf.logging.info("***** Predict results *****")
+      for (i, prediction) in enumerate(result):
+        probabilities = prediction["probabilities"]
+        if i >= num_actual_predict_examples:
+          break
+        output_line = "\t".join(
+            str(class_probability)
+            for class_probability in probabilities) + "\n"
+        writer.write(output_line)
+        num_written_lines += 1
+    assert num_written_lines == num_actual_predict_examples
+
+
+if __name__ == "__main__":
+  flags.mark_flag_as_required("data_dir")
+  flags.mark_flag_as_required("task_name")
+  flags.mark_flag_as_required("vocab_file")
+  flags.mark_flag_as_required("bert_config_file")
+  flags.mark_flag_as_required("output_dir")
+  tf.app.run()
diff --git a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/run_classifier_with_tfhub.py b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/run_classifier_with_tfhub.py
new file mode 100644
index 000000000..9d2f80f6b
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/run_classifier_with_tfhub.py
@@ -0,0 +1,314 @@
+# coding=utf-8
+# Copyright 2018 The Google AI Language Team Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""BERT finetuning runner with TF-Hub."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import optimization
+import run_classifier
+import tokenization
+import tensorflow as tf
+import tensorflow_hub as hub
+
+flags = tf.flags
+
+FLAGS = flags.FLAGS
+
+flags.DEFINE_string(
+    "bert_hub_module_handle", None,
+    "Handle for the BERT TF-Hub module.")
+
+
+def create_model(is_training, input_ids, input_mask, segment_ids, labels,
+                 num_labels, bert_hub_module_handle):
+  """Creates a classification model."""
+  tags = set()
+  if is_training:
+    tags.add("train")
+  bert_module = hub.Module(bert_hub_module_handle, tags=tags, trainable=True)
+  bert_inputs = dict(
+      input_ids=input_ids,
+      input_mask=input_mask,
+      segment_ids=segment_ids)
+  bert_outputs = bert_module(
+      inputs=bert_inputs,
+      signature="tokens",
+      as_dict=True)
+
+  # In the demo, we are doing a simple classification task on the entire
+  # segment.
+  #
+  # If you want to use the token-level output, use
+  # bert_outputs["sequence_output"] instead.
+  output_layer = bert_outputs["pooled_output"]
+
+  hidden_size = output_layer.shape[-1].value
+
+  output_weights = tf.get_variable(
+      "output_weights", [num_labels, hidden_size],
+      initializer=tf.truncated_normal_initializer(stddev=0.02))
+
+  output_bias = tf.get_variable(
+      "output_bias", [num_labels], initializer=tf.zeros_initializer())
+
+  with tf.variable_scope("loss"):
+    if is_training:
+      # I.e., 0.1 dropout
+      output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)
+
+    logits = tf.matmul(output_layer, output_weights, transpose_b=True)
+    logits = tf.nn.bias_add(logits, output_bias)
+    probabilities = tf.nn.softmax(logits, axis=-1)
+    log_probs = tf.nn.log_softmax(logits, axis=-1)
+
+    one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)
+
+    per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
+    loss = tf.reduce_mean(per_example_loss)
+
+    return (loss, per_example_loss, logits, probabilities)
+
+
+def model_fn_builder(num_labels, learning_rate, num_train_steps,
+                     num_warmup_steps, use_tpu, bert_hub_module_handle):
+  """Returns `model_fn` closure for TPUEstimator."""
+
+  def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
+    """The `model_fn` for TPUEstimator."""
+
+    tf.logging.info("*** Features ***")
+    for name in sorted(features.keys()):
+      tf.logging.info("  name = %s, shape = %s" % (name, features[name].shape))
+
+    input_ids = features["input_ids"]
+    input_mask = features["input_mask"]
+    segment_ids = features["segment_ids"]
+    label_ids = features["label_ids"]
+
+    is_training = (mode == tf.estimator.ModeKeys.TRAIN)
+
+    (total_loss, per_example_loss, logits, probabilities) = create_model(
+        is_training, input_ids, input_mask, segment_ids, label_ids, num_labels,
+        bert_hub_module_handle)
+
+    output_spec = None
+    if mode == tf.estimator.ModeKeys.TRAIN:
+      train_op = optimization.create_optimizer(
+          total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu)
+
+      output_spec = tf.contrib.tpu.TPUEstimatorSpec(
+          mode=mode,
+          loss=total_loss,
+          train_op=train_op)
+    elif mode == tf.estimator.ModeKeys.EVAL:
+
+      def metric_fn(per_example_loss, label_ids, logits):
+        predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
+        accuracy = tf.metrics.accuracy(label_ids, predictions)
+        loss = tf.metrics.mean(per_example_loss)
+        return {
+            "eval_accuracy": accuracy,
+            "eval_loss": loss,
+        }
+
+      eval_metrics = (metric_fn, [per_example_loss, label_ids, logits])
+      output_spec = tf.contrib.tpu.TPUEstimatorSpec(
+          mode=mode,
+          loss=total_loss,
+          eval_metrics=eval_metrics)
+    elif mode == tf.estimator.ModeKeys.PREDICT:
+      output_spec = tf.contrib.tpu.TPUEstimatorSpec(
+          mode=mode, predictions={"probabilities": probabilities})
+    else:
+      raise ValueError(
+          "Only TRAIN, EVAL and PREDICT modes are supported: %s" % (mode))
+
+    return output_spec
+
+  return model_fn
+
+
+def create_tokenizer_from_hub_module(bert_hub_module_handle):
+  """Get the vocab file and casing info from the Hub module."""
+  with tf.Graph().as_default():
+    bert_module = hub.Module(bert_hub_module_handle)
+    tokenization_info = bert_module(signature="tokenization_info", as_dict=True)
+    with tf.Session() as sess:
+      vocab_file, do_lower_case = sess.run([tokenization_info["vocab_file"],
+                                            tokenization_info["do_lower_case"]])
+  return tokenization.FullTokenizer(
+      vocab_file=vocab_file, do_lower_case=do_lower_case)
+
+
+def main(_):
+  tf.logging.set_verbosity(tf.logging.INFO)
+
+  processors = {
+      "cola": run_classifier.ColaProcessor,
+      "mnli": run_classifier.MnliProcessor,
+      "mrpc": run_classifier.MrpcProcessor,
+  }
+
+  if not FLAGS.do_train and not FLAGS.do_eval:
+    raise ValueError("At least one of `do_train` or `do_eval` must be True.")
+
+  tf.gfile.MakeDirs(FLAGS.output_dir)
+
+  task_name = FLAGS.task_name.lower()
+
+  if task_name not in processors:
+    raise ValueError("Task not found: %s" % (task_name))
+
+  processor = processors[task_name]()
+
+  label_list = processor.get_labels()
+
+  tokenizer = create_tokenizer_from_hub_module(FLAGS.bert_hub_module_handle)
+
+  tpu_cluster_resolver = None
+  if FLAGS.use_tpu and FLAGS.tpu_name:
+    tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
+        FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)
+
+  is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2
+  run_config = tf.contrib.tpu.RunConfig(
+      cluster=tpu_cluster_resolver,
+      master=FLAGS.master,
+      model_dir=FLAGS.output_dir,
+      save_checkpoints_steps=FLAGS.save_checkpoints_steps,
+      tpu_config=tf.contrib.tpu.TPUConfig(
+          iterations_per_loop=FLAGS.iterations_per_loop,
+          num_shards=FLAGS.num_tpu_cores,
+          per_host_input_for_training=is_per_host))
+
+  train_examples = None
+  num_train_steps = None
+  num_warmup_steps = None
+  if FLAGS.do_train:
+    train_examples = processor.get_train_examples(FLAGS.data_dir)
+    num_train_steps = int(
+        len(train_examples) / FLAGS.train_batch_size * FLAGS.num_train_epochs)
+    num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion)
+
+  model_fn = model_fn_builder(
+      num_labels=len(label_list),
+      learning_rate=FLAGS.learning_rate,
+      num_train_steps=num_train_steps,
+      num_warmup_steps=num_warmup_steps,
+      use_tpu=FLAGS.use_tpu,
+      bert_hub_module_handle=FLAGS.bert_hub_module_handle)
+
+  # If TPU is not available, this will fall back to normal Estimator on CPU
+  # or GPU.
+  estimator = tf.contrib.tpu.TPUEstimator(
+      use_tpu=FLAGS.use_tpu,
+      model_fn=model_fn,
+      config=run_config,
+      train_batch_size=FLAGS.train_batch_size,
+      eval_batch_size=FLAGS.eval_batch_size,
+      predict_batch_size=FLAGS.predict_batch_size)
+
+  if FLAGS.do_train:
+    train_features = run_classifier.convert_examples_to_features(
+        train_examples, label_list, FLAGS.max_seq_length, tokenizer)
+    tf.logging.info("***** Running training *****")
+    tf.logging.info("  Num examples = %d", len(train_examples))
+    tf.logging.info("  Batch size = %d", FLAGS.train_batch_size)
+    tf.logging.info("  Num steps = %d", num_train_steps)
+    train_input_fn = run_classifier.input_fn_builder(
+        features=train_features,
+        seq_length=FLAGS.max_seq_length,
+        is_training=True,
+        drop_remainder=True)
+    estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)
+
+  if FLAGS.do_eval:
+    eval_examples = processor.get_dev_examples(FLAGS.data_dir)
+    eval_features = run_classifier.convert_examples_to_features(
+        eval_examples, label_list, FLAGS.max_seq_length, tokenizer)
+
+    tf.logging.info("***** Running evaluation *****")
+    tf.logging.info("  Num examples = %d", len(eval_examples))
+    tf.logging.info("  Batch size = %d", FLAGS.eval_batch_size)
+
+    # This tells the estimator to run through the entire set.
+    eval_steps = None
+    # However, if running eval on the TPU, you will need to specify the
+    # number of steps.
+    if FLAGS.use_tpu:
+      # Eval will be slightly WRONG on the TPU because it will truncate
+      # the last batch.
+      eval_steps = int(len(eval_examples) / FLAGS.eval_batch_size)
+
+    eval_drop_remainder = True if FLAGS.use_tpu else False
+    eval_input_fn = run_classifier.input_fn_builder(
+        features=eval_features,
+        seq_length=FLAGS.max_seq_length,
+        is_training=False,
+        drop_remainder=eval_drop_remainder)
+
+    result = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps)
+
+    output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt")
+    with tf.gfile.GFile(output_eval_file, "w") as writer:
+      tf.logging.info("***** Eval results *****")
+      for key in sorted(result.keys()):
+        tf.logging.info("  %s = %s", key, str(result[key]))
+        writer.write("%s = %s\n" % (key, str(result[key])))
+
+  if FLAGS.do_predict:
+    predict_examples = processor.get_test_examples(FLAGS.data_dir)
+    if FLAGS.use_tpu:
+      # Discard batch remainder if running on TPU
+      n = len(predict_examples)
+      predict_examples = predict_examples[:(n - n % FLAGS.predict_batch_size)]
+
+    predict_file = os.path.join(FLAGS.output_dir, "predict.tf_record")
+    run_classifier.file_based_convert_examples_to_features(
+        predict_examples, label_list, FLAGS.max_seq_length, tokenizer,
+        predict_file)
+
+    tf.logging.info("***** Running prediction*****")
+    tf.logging.info("  Num examples = %d", len(predict_examples))
+    tf.logging.info("  Batch size = %d", FLAGS.predict_batch_size)
+
+    predict_input_fn = run_classifier.file_based_input_fn_builder(
+        input_file=predict_file,
+        seq_length=FLAGS.max_seq_length,
+        is_training=False,
+        drop_remainder=FLAGS.use_tpu)
+
+    result = estimator.predict(input_fn=predict_input_fn)
+
+    output_predict_file = os.path.join(FLAGS.output_dir, "test_results.tsv")
+    with tf.gfile.GFile(output_predict_file, "w") as writer:
+      tf.logging.info("***** Predict results *****")
+      for prediction in result:
+        probabilities = prediction["probabilities"]
+        output_line = "\t".join(
+            str(class_probability)
+            for class_probability in probabilities) + "\n"
+        writer.write(output_line)
+
+
+if __name__ == "__main__":
+  flags.mark_flag_as_required("data_dir")
+  flags.mark_flag_as_required("task_name")
+  flags.mark_flag_as_required("bert_hub_module_handle")
+  flags.mark_flag_as_required("output_dir")
+  tf.app.run()
diff --git a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/run_pretraining.py b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/run_pretraining.py
new file mode 100644
index 000000000..2c4ded524
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/run_pretraining.py
@@ -0,0 +1,676 @@
+# coding=utf-8
+# Copyright 2018 The Google AI Language Team Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Run masked LM/next sentence masked_lm pre-training for BERT."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import modeling
+import optimization
+import tensorflow as tf
+
+####################NPU_modify start####################
+import time
+from utils.utils import LogEvalRunHook
+from gpu_environment import get_custom_getter
+
+from npu_bridge.estimator.npu.npu_config import *
+from npu_bridge.estimator.npu.npu_estimator import *
+from npu_bridge.estimator.npu.npu_config import NPURunConfig
+from npu_bridge.estimator.npu.npu_estimator import NPUEstimator
+
+os.environ['WHICH_OP'] = 'GEOP'
+os.environ['NEW_GE_FE_ID'] = '1'
+os.environ['GE_AICPU_FLAG'] = '1'
+os.environ['GE_USE_STATIC_MEMORY'] = '1'
+os.environ['OPTION_EXEC_HCCL_FLAG'] = '1'
+os.environ['HCCL_CONNECT_TIMEOUT'] = '600'
+####################NPU_modify end######################
+
+flags = tf.flags
+
+FLAGS = flags.FLAGS
+
+## Required parameters
+flags.DEFINE_string(
+    "bert_config_file", None,
+    "The config json file corresponding to the pre-trained BERT model. "
+    "This specifies the model architecture.")
+
+flags.DEFINE_string(
+    "input_file", None,
+    "Input TF example files (can be a glob or comma separated).")
+
+flags.DEFINE_string(
+    "output_dir", None,
+    "The output directory where the model checkpoints will be written.")
+
+## Other parameters
+flags.DEFINE_string(
+    "init_checkpoint", None,
+    "Initial checkpoint (usually from a pre-trained BERT model).")
+
+flags.DEFINE_integer(
+    "max_seq_length", 128,
+    "The maximum total input sequence length after WordPiece tokenization. "
+    "Sequences longer than this will be truncated, and sequences shorter "
+    "than this will be padded. Must match data generation.")
+
+flags.DEFINE_integer(
+    "max_predictions_per_seq", 20,
+    "Maximum number of masked LM predictions per sequence. "
+    "Must match data generation.")
+
+flags.DEFINE_bool("do_train", False, "Whether to run training.")
+
+flags.DEFINE_bool("do_eval", False, "Whether to run eval on the dev set.")
+
+flags.DEFINE_integer("train_batch_size", 32, "Total batch size for training.")
+
+flags.DEFINE_integer("eval_batch_size", 8, "Total batch size for eval.")
+
+flags.DEFINE_float("learning_rate", 5e-5, "The initial learning rate for Adam.")
+
+flags.DEFINE_integer("num_train_steps", 100000, "Number of training steps.")
+
+flags.DEFINE_integer("num_warmup_steps", 10000, "Number of warmup steps.")
+
+flags.DEFINE_integer("save_checkpoints_steps", 1000,
+                     "How often to save the model checkpoint.")
+
+flags.DEFINE_integer("iterations_per_loop", 1000,
+                     "How many steps to make in each estimator call.")
+
+flags.DEFINE_integer("max_eval_steps", 100, "Maximum number of eval steps.")
+
+flags.DEFINE_bool("use_tpu", False, "Whether to use TPU or GPU/CPU.")
+
+tf.flags.DEFINE_string(
+    "tpu_name", None,
+    "The Cloud TPU to use for training. This should be either the name "
+    "used when creating the Cloud TPU, or a grpc://ip.address.of.tpu:8470 "
+    "url.")
+
+tf.flags.DEFINE_string(
+    "tpu_zone", None,
+    "[Optional] GCE zone where the Cloud TPU is located in. If not "
+    "specified, we will attempt to automatically detect the GCE project from "
+    "metadata.")
+
+tf.flags.DEFINE_string(
+    "gcp_project", None,
+    "[Optional] Project name for the Cloud TPU-enabled project. If not "
+    "specified, we will attempt to automatically detect the GCE project from "
+    "metadata.")
+
+tf.flags.DEFINE_string("master", None, "[Optional] TensorFlow master URL.")
+
+flags.DEFINE_integer(
+    "num_tpu_cores", 8,
+    "Only used if `use_tpu` is True. Total number of TPU cores to use.")
+
+####################NPU_modify start####################
+flags.DEFINE_bool("manual_fp16", False, "Whether to use fp32 or fp16 arithmetic on GPU. "
+                                        "Manual casting is done instead of using AMP")
+
+flags.DEFINE_bool("use_fp16", False, "Whether to enable AMP ops.")
+
+flags.DEFINE_integer("display_loss_steps", 10, "How often to print loss")
+
+flags.DEFINE_bool("report_loss", True, "Whether to report total loss during training.")
+
+flags.DEFINE_bool("distributed", False, "Whether to use multi-npu")
+
+flags.DEFINE_bool("use_fp16_cls", False, "Whether to use fp16 in cls and pooler.")
+
+flags.DEFINE_bool('npu_bert_fused_gelu', True, "Whether to use npu defined gelu op.")
+
+flags.DEFINE_bool('npu_bert_debug', False, "If True, dropout and shuffle is disable.")
+
+flags.DEFINE_bool('npu_bert_npu_dropout', True, 'Whether to use npu defined gelu op')
+
+flags.DEFINE_bool('customer_bert_gather', False, 'Whether to use customer defined gather op')
+
+flags.DEFINE_integer("npu_bert_loss_scale", -1, "Whether to use loss scale, -1 is disable, 0 is dynamic loss scale, >=1 is static loss scale")
+
+flags.DEFINE_integer('init_loss_scale_value', 2**32, 'Initial loss scale value for loss scale optimizer')
+
+flags.DEFINE_bool("npu_bert_clip_by_global_norm", True, "Use clip_by_global_norm if True, or use clip_by_norm for each gradient")
+
+flags.DEFINE_bool('npu_bert_use_tdt', True, 'Whether to use tdt as dataset')
+
+flags.DEFINE_bool('hcom_parallel', True, 'Whether to use parallel allreduce')
+
+class _LogSessionRunHook(tf.train.SessionRunHook):
+  def __init__(self, global_batch_size, display_every=10):
+    self.global_batch_size = global_batch_size
+    self.display_every = display_every
+  def after_create_session(self, session, coord):
+    self.elapsed_secs = 0.
+    self.count = 0
+    self.all_count = 0
+    self.avg_loss = 0.0
+
+  def before_run(self, run_context):
+    self.t0 = time.time()
+
+    if (tf.flags.FLAGS.npu_bert_loss_scale == 0) and (FLAGS.manual_fp16 or FLAGS.use_fp16):
+      return tf.train.SessionRunArgs(
+        fetches=['global_step:0', 'total_loss:0',
+                 'learning_rate:0', 'nsp_loss:0',
+                 'mlm_loss:0', 'loss_scale:0', 'apply_grads/overflow_status_reduce_all:0 '])
+    else:
+      return tf.train.SessionRunArgs(
+        fetches=['global_step:0', 'total_loss:0',
+                 'learning_rate:0', 'nsp_loss:0',
+                 'mlm_loss:0'])
+
+  def after_run(self, run_context, run_values):
+    self.elapsed_secs += time.time() - self.t0
+
+    if (tf.flags.FLAGS.npu_bert_loss_scale == 0) and (FLAGS.manual_fp16 or FLAGS.use_fp16):
+        global_step, total_loss, lr, nsp_loss, mlm_loss, loss_scaler, custom_arg = run_values.results
+    else:
+        global_step, total_loss, lr, nsp_loss, mlm_loss = run_values. \
+            results
+    update_step = True
+
+    print_step = global_step + 1 # One-based index for printing.
+    self.avg_loss += total_loss
+    self.all_count += 1
+    if update_step:
+        self.count += 1
+        dt = self.elapsed_secs / self.count
+        sent_per_sec = self.global_batch_size / dt * FLAGS.iterations_per_loop
+        avg_loss_step = self.avg_loss / self.all_count
+
+        if (tf.flags.FLAGS.npu_bert_loss_scale == 0) and (FLAGS.manual_fp16 or FLAGS.use_fp16):
+          print('Step = %6i Throughput = %11.1f MLM Loss = %10.4e NSP Loss = %10.4e Loss = %9.6f Average Loss = %9.6f LR = %6.4e Loss scale = %6.4e isFinite = %6i' %
+                (print_step, sent_per_sec, mlm_loss, nsp_loss, total_loss, avg_loss_step, lr, loss_scaler, custom_arg), flush=True)
+        else:
+          print('Step = %6i Throughput = %11.1f MLM Loss = %10.4e NSP Loss = %10.4e Loss = %9.6f Average Loss = %9.6f LR = %6.4e' %
+                (print_step, sent_per_sec, mlm_loss, nsp_loss, total_loss, avg_loss_step, lr), flush=True)
+        self.elapsed_secs = 0.
+        self.count = 0
+        self.avg_loss = 0.0
+        self.all_count = 0
+
+####################NPU_modify end######################
+
+def model_fn_builder(bert_config, init_checkpoint, learning_rate,
+                     num_train_steps, num_warmup_steps, use_tpu,
+                     use_one_hot_embeddings):
+  """Returns `model_fn` closure for TPUEstimator."""
+
+  def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
+    """The `model_fn` for TPUEstimator."""
+
+    tf.logging.info("*** Features ***")
+    for name in sorted(features.keys()):
+      tf.logging.info("  name = %s, shape = %s" % (name, features[name].shape))
+
+    input_ids = features["input_ids"]
+    input_mask = features["input_mask"]
+    segment_ids = features["segment_ids"]
+    masked_lm_positions = features["masked_lm_positions"]
+    masked_lm_ids = features["masked_lm_ids"]
+    masked_lm_weights = features["masked_lm_weights"]
+    next_sentence_labels = features["next_sentence_labels"]
+
+    is_training = (mode == tf.estimator.ModeKeys.TRAIN)
+
+    model = modeling.BertModel(
+        config=bert_config,
+        is_training=is_training,
+        input_ids=input_ids,
+        input_mask=input_mask,
+        token_type_ids=segment_ids,
+        use_one_hot_embeddings=use_one_hot_embeddings,
+        compute_type=tf.float16 if FLAGS.manual_fp16 else tf.float32)
+
+    (masked_lm_loss,
+     masked_lm_example_loss, masked_lm_log_probs) = get_masked_lm_output(
+         bert_config, model.get_sequence_output(), model.get_embedding_table(),
+         masked_lm_positions, masked_lm_ids, masked_lm_weights)
+
+    (next_sentence_loss, next_sentence_example_loss,
+     next_sentence_log_probs) = get_next_sentence_output(
+         bert_config, model.get_pooled_output(), next_sentence_labels)
+
+    ####################NPU_modify start####################
+    masked_lm_loss = tf.identity(masked_lm_loss, name="mlm_loss")
+    next_sentence_loss = tf.identity(next_sentence_loss, name="nsp_loss")
+    ####################NPU_modify end######################
+
+    total_loss = masked_lm_loss + next_sentence_loss
+
+    ####################NPU_modify start####################
+    total_loss = tf.identity(total_loss, name='total_loss')
+    ####################NPU_modify end######################
+
+    tvars = tf.trainable_variables()
+
+    initialized_variable_names = {}
+    scaffold_fn = None
+    if init_checkpoint:
+      (assignment_map, initialized_variable_names
+      ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint)
+      if use_tpu:
+
+        def tpu_scaffold():
+          tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
+          return tf.train.Scaffold()
+
+        scaffold_fn = tpu_scaffold
+      else:
+        tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
+
+    tf.logging.info("**** Trainable Variables ****")
+    for var in tvars:
+      init_string = ""
+      if var.name in initialized_variable_names:
+        init_string = ", *INIT_FROM_CKPT*"
+      tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
+                      init_string)
+
+    output_spec = None
+    if mode == tf.estimator.ModeKeys.TRAIN:
+      train_op = optimization.create_optimizer(
+          total_loss, learning_rate, num_train_steps, num_warmup_steps, FLAGS.manual_fp16, use_tpu)
+
+      ####################NPU_modify start####################
+      if not use_tpu:
+        output_spec = tf.estimator.EstimatorSpec(
+            mode=mode,
+            loss=total_loss,
+            train_op=train_op)
+      else:
+      ####################NPU_modify end######################
+        output_spec = tf.contrib.tpu.TPUEstimatorSpec(
+            mode=mode,
+            loss=total_loss,
+            train_op=train_op,
+            scaffold_fn=scaffold_fn)
+    elif mode == tf.estimator.ModeKeys.EVAL:
+
+      def metric_fn(masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids,
+                    masked_lm_weights, next_sentence_example_loss,
+                    next_sentence_log_probs, next_sentence_labels):
+        """Computes the loss and accuracy of the model."""
+        masked_lm_log_probs = tf.reshape(masked_lm_log_probs,
+                                         [-1, masked_lm_log_probs.shape[-1]])
+        masked_lm_predictions = tf.argmax(
+            masked_lm_log_probs, axis=-1, output_type=tf.int32)
+        masked_lm_example_loss = tf.reshape(masked_lm_example_loss, [-1])
+        masked_lm_ids = tf.reshape(masked_lm_ids, [-1])
+        masked_lm_weights = tf.reshape(masked_lm_weights, [-1])
+        masked_lm_accuracy = tf.metrics.accuracy(
+            labels=masked_lm_ids,
+            predictions=masked_lm_predictions,
+            weights=masked_lm_weights)
+        masked_lm_mean_loss = tf.metrics.mean(
+            values=masked_lm_example_loss, weights=masked_lm_weights)
+
+        next_sentence_log_probs = tf.reshape(
+            next_sentence_log_probs, [-1, next_sentence_log_probs.shape[-1]])
+        next_sentence_predictions = tf.argmax(
+            next_sentence_log_probs, axis=-1, output_type=tf.int32)
+        next_sentence_labels = tf.reshape(next_sentence_labels, [-1])
+        next_sentence_accuracy = tf.metrics.accuracy(
+            labels=next_sentence_labels, predictions=next_sentence_predictions)
+        next_sentence_mean_loss = tf.metrics.mean(
+            values=next_sentence_example_loss)
+
+        return {
+            "masked_lm_accuracy": masked_lm_accuracy,
+            "masked_lm_loss": masked_lm_mean_loss,
+            "next_sentence_accuracy": next_sentence_accuracy,
+            "next_sentence_loss": next_sentence_mean_loss,
+        }
+
+      eval_metrics = (metric_fn, [
+          masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids,
+          masked_lm_weights, next_sentence_example_loss,
+          next_sentence_log_probs, next_sentence_labels
+      ])
+      output_spec = tf.contrib.tpu.TPUEstimatorSpec(
+          mode=mode,
+          loss=total_loss,
+          eval_metrics=eval_metrics,
+          scaffold_fn=scaffold_fn)
+    else:
+      raise ValueError("Only TRAIN and EVAL modes are supported: %s" % (mode))
+
+    return output_spec
+
+  return model_fn
+
+
+def get_masked_lm_output(bert_config, input_tensor, output_weights, positions,
+                         label_ids, label_weights):
+  """Get loss and log probs for the masked LM."""
+  input_tensor = gather_indexes(input_tensor, positions)
+
+  with tf.variable_scope("cls/predictions"):
+    # We apply one more non-linear transformation before the output layer.
+    # This matrix is not used after pre-training.
+    with tf.variable_scope("transform", custom_getter=get_custom_getter(compute_type=tf.float16 if FLAGS.use_fp16_cls else tf.float32)):
+      ####################NPU_modify start####################
+      if FLAGS.use_fp16_cls:
+        input_tensor = tf.cast(input_tensor, tf.float16)
+      ####################NPU_modify end######################
+      input_tensor = tf.layers.dense(
+          input_tensor,
+          units=bert_config.hidden_size,
+          activation=modeling.get_activation(bert_config.hidden_act),
+          kernel_initializer=modeling.create_initializer(
+              bert_config.initializer_range))
+      ####################NPU_modify start####################
+      input_tensor = tf.cast(input_tensor, tf.float32)
+      ####################NPU_modify end######################
+      input_tensor = modeling.layer_norm(input_tensor)
+
+    # The output weights are the same as the input embeddings, but there is
+    # an output-only bias for each token.
+    output_bias = tf.get_variable(
+        "output_bias",
+        shape=[bert_config.vocab_size],
+        initializer=tf.zeros_initializer())
+    ####################NPU_modify start####################
+    if FLAGS.use_fp16_cls:
+      input_tensor = tf.cast(input_tensor, tf.float16)
+      logits = tf.matmul(input_tensor, tf.cast(output_weights, tf.float16), transpose_b=True)
+      logits = tf.cast(logits, tf.float32)
+    else:
+    ####################NPU_modify end######################
+      logits = tf.matmul(input_tensor, output_weights, transpose_b=True)
+
+    logits = tf.nn.bias_add(logits, output_bias)
+    log_probs = tf.nn.log_softmax(logits, axis=-1)
+
+    label_ids = tf.reshape(label_ids, [-1])
+    label_weights = tf.reshape(label_weights, [-1])
+
+    one_hot_labels = tf.one_hot(
+        label_ids, depth=bert_config.vocab_size, dtype=tf.float32)
+
+    # The `positions` tensor might be zero-padded (if the sequence is too
+    # short to have the maximum number of predictions). The `label_weights`
+    # tensor has a value of 1.0 for every real prediction and 0.0 for the
+    # padding predictions.
+    per_example_loss = -tf.reduce_sum(log_probs * one_hot_labels, axis=[-1])
+    numerator = tf.reduce_sum(label_weights * per_example_loss)
+    denominator = tf.reduce_sum(label_weights) + 1e-5
+    loss = numerator / denominator
+
+  return (loss, per_example_loss, log_probs)
+
+
+def get_next_sentence_output(bert_config, input_tensor, labels):
+  """Get loss and log probs for the next sentence prediction."""
+
+  # Simple binary classification. Note that 0 is "next sentence" and 1 is
+  # "random sentence". This weight matrix is not used after pre-training.
+  with tf.variable_scope("cls/seq_relationship"):
+    output_weights = tf.get_variable(
+        "output_weights",
+        shape=[2, bert_config.hidden_size],
+        initializer=modeling.create_initializer(bert_config.initializer_range))
+    output_bias = tf.get_variable(
+        "output_bias", shape=[2], initializer=tf.zeros_initializer())
+
+    ####################NPU_modify start####################
+    if FLAGS.use_fp16_cls:
+      input_tensor = tf.cast(input_tensor, tf.float16)
+      logits = tf.matmul(input_tensor, tf.cast(output_weights, tf.float16), transpose_b=True)
+      logits = tf.cast(logits, tf.float32)
+    else:
+    ####################NPU_modify end######################
+      logits = tf.matmul(input_tensor, output_weights, transpose_b=True)
+    logits = tf.nn.bias_add(logits, output_bias)
+    log_probs = tf.nn.log_softmax(logits, axis=-1)
+    labels = tf.reshape(labels, [-1])
+    one_hot_labels = tf.one_hot(labels, depth=2, dtype=tf.float32)
+    per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
+    loss = tf.reduce_mean(per_example_loss)
+    return (loss, per_example_loss, log_probs)
+
+
+def gather_indexes(sequence_tensor, positions):
+  """Gathers the vectors at the specific positions over a minibatch."""
+  sequence_shape = modeling.get_shape_list(sequence_tensor, expected_rank=3)
+  batch_size = sequence_shape[0]
+  seq_length = sequence_shape[1]
+  width = sequence_shape[2]
+
+  flat_offsets = tf.reshape(
+      tf.range(0, batch_size, dtype=tf.int32) * seq_length, [-1, 1])
+  flat_positions = tf.reshape(positions + flat_offsets, [-1])
+  flat_sequence_tensor = tf.reshape(sequence_tensor,
+                                    [batch_size * seq_length, width])
+  output_tensor = tf.gather(flat_sequence_tensor, flat_positions)
+  return output_tensor
+
+
+def input_fn_builder(input_files,
+                     batch_size,
+                     max_seq_length,
+                     max_predictions_per_seq,
+                     is_training,
+                     num_cpu_threads=4):
+  """Creates an `input_fn` closure to be passed to TPUEstimator."""
+
+  def input_fn(params):
+    """The actual input function."""
+
+    name_to_features = {
+        "input_ids":
+            tf.FixedLenFeature([max_seq_length], tf.int64),
+        "input_mask":
+            tf.FixedLenFeature([max_seq_length], tf.int64),
+        "segment_ids":
+            tf.FixedLenFeature([max_seq_length], tf.int64),
+        "masked_lm_positions":
+            tf.FixedLenFeature([max_predictions_per_seq], tf.int64),
+        "masked_lm_ids":
+            tf.FixedLenFeature([max_predictions_per_seq], tf.int64),
+        "masked_lm_weights":
+            tf.FixedLenFeature([max_predictions_per_seq], tf.float32),
+        "next_sentence_labels":
+            tf.FixedLenFeature([1], tf.int64),
+    }
+
+    # For training, we want a lot of parallel reading and shuffling.
+    # For eval, we want no shuffling and parallel reading doesn't matter.
+    if is_training:
+      d = tf.data.Dataset.from_tensor_slices(tf.constant(input_files))
+      ####################NPU_modify start####################
+      if FLAGS.distributed:
+          rank_size = int(os.getenv('RANK_SIZE'))
+          local_rank = int(os.getenv('RANK_ID'))
+          tf.logging.info("RANK_SIZE=%d, local_rank=%d", rank_size, local_rank)
+          d = d.shard(rank_size, local_rank)
+      ####################NPU_modify end######################
+      d = d.repeat()
+      d = d.shuffle(buffer_size=len(input_files))
+
+      # `cycle_length` is the number of parallel files that get read.
+      ####################NPU_modify start####################
+      if not FLAGS.npu_bert_debug:
+        cycle_length = min(num_cpu_threads, int(len(input_files) / int(os.getenv('RANK_SIZE'))))
+      else:
+        cycle_length = 1
+      ####################NPU_modify end######################
+
+      # `sloppy` mode means that the interleaving is not exact. This adds
+      # even more randomness to the training pipeline.
+      #######change tf.data.experimental.parallel_interleave by jwx644041######
+      #d = d.apply(
+          #tf.contrib.data.parallel_interleave(
+           #   tf.data.TFRecordDataset,
+           #   sloppy=is_training,
+           #   cycle_length=cycle_length))
+      d = d.interleave(
+          tf.data.TFRecordDataset,
+          cycle_length=cycle_length,
+          num_parallel_calls=tf.data.experimental.AUTOTUNE)
+      #######change tf.data.experimental.parallel_interleave by jwx644041######
+      d = d.shuffle(buffer_size=100)
+    else:
+      d = tf.data.TFRecordDataset(input_files)
+      # Since we evaluate for a fixed number of steps we don't want to encounter
+      # out-of-range exceptions.
+      d = d.repeat()
+
+    # We must `drop_remainder` on training because the TPU requires fixed
+    # size dimensions. For eval, we assume we are evaluating on the CPU or GPU
+    # and we *don't* want to drop the remainder, otherwise we wont cover
+    # every sample.
+    d = d.apply(
+        tf.contrib.data.map_and_batch(
+            lambda record: _decode_record(record, name_to_features),
+            batch_size=batch_size,
+            num_parallel_batches=num_cpu_threads,
+            drop_remainder=True))
+    return d
+
+  return input_fn
+
+
+def _decode_record(record, name_to_features):
+  """Decodes a record to a TensorFlow example."""
+  example = tf.parse_single_example(record, name_to_features)
+
+  # tf.Example only supports tf.int64, but the TPU only supports tf.int32.
+  # So cast all int64 to int32.
+  for name in list(example.keys()):
+    t = example[name]
+    if t.dtype == tf.int64:
+      t = tf.to_int32(t)
+    example[name] = t
+
+  return example
+
+
+def main(_):
+  tf.logging.set_verbosity(tf.logging.INFO)
+
+  if not FLAGS.do_train and not FLAGS.do_eval:
+    raise ValueError("At least one of `do_train` or `do_eval` must be True.")
+
+  ####################NPU_modify start####################
+  if FLAGS.use_fp16:
+    os.environ["TF_ENABLE_AUTO_MIXED_PRECISION_GRAPH_REWRITE"] = "1"
+  ####################NPU_modify end######################
+
+  bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file)
+
+  tf.gfile.MakeDirs(FLAGS.output_dir)
+
+  input_files = []
+  for input_pattern in FLAGS.input_file.split(","):
+  ####################NPU_modify start####################
+    input_files.extend(tf.gfile.Glob(os.path.join(input_pattern, "*")))
+
+  input_files.sort()
+  ####################NPU_modify end######################
+
+
+  tf.logging.info("*** Input Files ***")
+  for input_file in input_files:
+    tf.logging.info("  %s" % input_file)
+
+  tpu_cluster_resolver = None
+  if FLAGS.use_tpu and FLAGS.tpu_name:
+    tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
+        FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)
+
+  is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2
+  ####################NPU_modify start####################
+  config = tf.ConfigProto()
+
+  run_config = NPURunConfig(
+      model_dir=FLAGS.output_dir,
+      save_summary_steps=0,
+      session_config=config,
+      save_checkpoints_steps=FLAGS.save_checkpoints_steps,
+      log_step_count_steps=1 if FLAGS.report_loss else 100,
+      enable_data_pre_proc=FLAGS.npu_bert_use_tdt,
+      iterations_per_loop=FLAGS.iterations_per_loop,
+      hcom_parallel=FLAGS.hcom_parallel)
+
+  if FLAGS.distributed:
+      rank_size = int(os.getenv('RANK_SIZE'))
+
+  model_fn = model_fn_builder(
+      bert_config=bert_config,
+      init_checkpoint=FLAGS.init_checkpoint,
+      learning_rate=FLAGS.learning_rate if not (FLAGS.distributed) else FLAGS.learning_rate*rank_size,
+      num_train_steps=FLAGS.num_train_steps,
+      num_warmup_steps=FLAGS.num_warmup_steps,
+      use_tpu=FLAGS.use_tpu,
+      use_one_hot_embeddings=FLAGS.use_tpu)
+
+  training_hooks = []
+
+  if FLAGS.report_loss:
+      global_batch_size = FLAGS.train_batch_size if not FLAGS.distributed else FLAGS.train_batch_size * rank_size
+      training_hooks.append(
+          _LogSessionRunHook(global_batch_size, FLAGS.display_loss_steps))
+
+
+  estimator = NPUEstimator(model_fn=model_fn, config=run_config)
+  ####################NPU_modify end######################
+
+  if FLAGS.do_train:
+    tf.logging.info("***** Running training *****")
+    tf.logging.info("  Batch size = %d", FLAGS.train_batch_size)
+    train_input_fn = input_fn_builder(
+        input_files=input_files,
+        batch_size=FLAGS.train_batch_size,
+        max_seq_length=FLAGS.max_seq_length,
+        max_predictions_per_seq=FLAGS.max_predictions_per_seq,
+        is_training=True)
+    estimator.train(input_fn=train_input_fn, hooks=training_hooks, max_steps=FLAGS.num_train_steps)
+
+  if FLAGS.do_eval:
+    tf.logging.info("***** Running evaluation *****")
+    tf.logging.info("  Batch size = %d", FLAGS.eval_batch_size)
+
+    eval_input_fn = input_fn_builder(
+        input_files=input_files,
+        batch_size=FLAGS.eval_batch_size,
+        max_seq_length=FLAGS.max_seq_length,
+        max_predictions_per_seq=FLAGS.max_predictions_per_seq,
+        is_training=False)
+
+    result = estimator.evaluate(
+        input_fn=eval_input_fn, steps=FLAGS.max_eval_steps)
+
+    output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt")
+    with tf.gfile.GFile(output_eval_file, "w") as writer:
+      tf.logging.info("***** Eval results *****")
+      for key in sorted(result.keys()):
+        tf.logging.info("  %s = %s", key, str(result[key]))
+        writer.write("%s = %s\n" % (key, str(result[key])))
+
+
+if __name__ == "__main__":
+  flags.mark_flag_as_required("input_file")
+  flags.mark_flag_as_required("bert_config_file")
+  flags.mark_flag_as_required("output_dir")
+  tf.app.run()
diff --git a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/run_squad.py b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/run_squad.py
new file mode 100644
index 000000000..11126636d
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/run_squad.py
@@ -0,0 +1,1333 @@
+# coding=utf-8
+# Copyright 2018 The Google AI Language Team Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Run BERT on SQuAD 1.1 and SQuAD 2.0."""
+# ============================================================================
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import json
+import math
+import os
+import random
+import modeling
+import optimization
+import tokenization
+import six
+import tensorflow as tf
+from npu_bridge.estimator.npu.npu_config import NPURunConfig
+from npu_bridge.estimator import npu_ops
+from npu_bridge.estimator.npu.npu_estimator import NPUEstimator,NPUEstimatorSpec
+
+flags = tf.flags
+
+FLAGS = flags.FLAGS
+
+rank_size = int(os.getenv("RANK_SIZE"))
+rank_id = int(os.getenv("RANK_ID"))
+
+## Required parameters
+flags.DEFINE_string(
+    "bert_config_file", None,
+    "The config json file corresponding to the pre-trained BERT model. "
+    "This specifies the model architecture.")
+
+flags.DEFINE_string("vocab_file", None,
+                    "The vocabulary file that the BERT model was trained on.")
+
+flags.DEFINE_string(
+    "output_dir", None,
+    "The output directory where the model checkpoints will be written.")
+
+## Other parameters
+flags.DEFINE_string("train_file", None,
+                    "SQuAD json for training. E.g., train-v1.1.json")
+
+flags.DEFINE_string(
+    "predict_file", None,
+    "SQuAD json for predictions. E.g., dev-v1.1.json or test-v1.1.json")
+
+flags.DEFINE_string(
+    "init_checkpoint", None,
+    "Initial checkpoint (usually from a pre-trained BERT model).")
+
+flags.DEFINE_bool(
+    "do_lower_case", True,
+    "Whether to lower case the input text. Should be True for uncased "
+    "models and False for cased models.")
+
+flags.DEFINE_integer(
+    "max_seq_length", 384,
+    "The maximum total input sequence length after WordPiece tokenization. "
+    "Sequences longer than this will be truncated, and sequences shorter "
+    "than this will be padded.")
+
+flags.DEFINE_integer(
+    "doc_stride", 128,
+    "When splitting up a long document into chunks, how much stride to "
+    "take between chunks.")
+
+flags.DEFINE_integer(
+    "max_query_length", 64,
+    "The maximum number of tokens for the question. Questions longer than "
+    "this will be truncated to this length.")
+
+flags.DEFINE_bool("do_train", True, "Whether to run training.")
+
+flags.DEFINE_bool("do_predict", False, "Whether to run eval on the dev set.")
+
+flags.DEFINE_integer("train_batch_size", 32, "Total batch size for training.")
+
+flags.DEFINE_integer("predict_batch_size", 32,
+                     "Total batch size for predictions.")
+
+flags.DEFINE_float("learning_rate", 5e-5, "The initial learning rate for Adam.")
+
+flags.DEFINE_float("num_train_epochs", 3.0,
+                   "Total number of training epochs to perform.")
+
+flags.DEFINE_float(
+    "warmup_proportion", 0.1,
+    "Proportion of training to perform linear learning rate warmup for. "
+    "E.g., 0.1 = 10% of training.")
+
+flags.DEFINE_integer("save_checkpoints_steps", 1000,
+                     "How often to save the model checkpoint.")
+
+flags.DEFINE_integer("num_train_steps", 0,
+                     "How often to save the model checkpoint.")
+
+flags.DEFINE_integer("iterations_per_loop", 100,
+                     "How many steps to make in each estimator call.")
+
+flags.DEFINE_integer(
+    "n_best_size", 20,
+    "The total number of n-best predictions to generate in the "
+    "nbest_predictions.json output file.")
+
+flags.DEFINE_integer(
+    "max_answer_length", 30,
+    "The maximum length of an answer that can be generated. This is needed "
+    "because the start and end predictions are not conditioned on one another.")
+
+flags.DEFINE_bool("use_tpu", False, "Whether to use TPU or GPU/CPU.")
+
+tf.flags.DEFINE_string(
+    "tpu_name", None,
+    "The Cloud TPU to use for training. This should be either the name "
+    "used when creating the Cloud TPU, or a grpc://ip.address.of.tpu:8470 "
+    "url.")
+
+tf.flags.DEFINE_string(
+    "tpu_zone", None,
+    "[Optional] GCE zone where the Cloud TPU is located in. If not "
+    "specified, we will attempt to automatically detect the GCE project from "
+    "metadata.")
+
+tf.flags.DEFINE_string(
+    "gcp_project", None,
+    "[Optional] Project name for the Cloud TPU-enabled project. If not "
+    "specified, we will attempt to automatically detect the GCE project from "
+    "metadata.")
+
+tf.flags.DEFINE_string("master", None, "[Optional] TensorFlow master URL.")
+
+flags.DEFINE_integer(
+    "num_tpu_cores", 8,
+    "Only used if `use_tpu` is True. Total number of TPU cores to use.")
+
+flags.DEFINE_bool(
+    "verbose_logging", False,
+    "If true, all of the warnings related to data processing will be printed. "
+    "A number of warnings are expected for a normal SQuAD evaluation.")
+
+flags.DEFINE_bool(
+    "version_2_with_negative", False,
+    "If true, the SQuAD examples contain some that do not have an answer.")
+
+flags.DEFINE_float(
+    "null_score_diff_threshold", 0.0,
+    "If null_score - best_non_null is greater than the threshold predict null.")
+
+
+class SquadExample(object):
+  """A single training/test example for simple sequence classification.
+
+     For examples without an answer, the start and end position are -1.
+  """
+
+  def __init__(self,
+               qas_id,
+               question_text,
+               doc_tokens,
+               orig_answer_text=None,
+               start_position=None,
+               end_position=None,
+               is_impossible=False):
+    self.qas_id = qas_id
+    self.question_text = question_text
+    self.doc_tokens = doc_tokens
+    self.orig_answer_text = orig_answer_text
+    self.start_position = start_position
+    self.end_position = end_position
+    self.is_impossible = is_impossible
+
+  def __str__(self):
+    return self.__repr__()
+
+  def __repr__(self):
+    s = ""
+    s += "qas_id: %s" % (tokenization.printable_text(self.qas_id))
+    s += ", question_text: %s" % (
+        tokenization.printable_text(self.question_text))
+    s += ", doc_tokens: [%s]" % (" ".join(self.doc_tokens))
+    if self.start_position:
+      s += ", start_position: %d" % (self.start_position)
+    if self.start_position:
+      s += ", end_position: %d" % (self.end_position)
+    if self.start_position:
+      s += ", is_impossible: %r" % (self.is_impossible)
+    return s
+
+
+class InputFeatures(object):
+  """A single set of features of data."""
+
+  def __init__(self,
+               unique_id,
+               example_index,
+               doc_span_index,
+               tokens,
+               token_to_orig_map,
+               token_is_max_context,
+               input_ids,
+               input_mask,
+               segment_ids,
+               start_position=None,
+               end_position=None,
+               is_impossible=None):
+    self.unique_id = unique_id
+    self.example_index = example_index
+    self.doc_span_index = doc_span_index
+    self.tokens = tokens
+    self.token_to_orig_map = token_to_orig_map
+    self.token_is_max_context = token_is_max_context
+    self.input_ids = input_ids
+    self.input_mask = input_mask
+    self.segment_ids = segment_ids
+    self.start_position = start_position
+    self.end_position = end_position
+    self.is_impossible = is_impossible
+
+
+def read_squad_examples(input_file, is_training):
+  """Read a SQuAD json file into a list of SquadExample."""
+  with tf.gfile.Open(input_file, "r") as reader:
+    input_data = json.load(reader)["data"]
+
+  def is_whitespace(c):
+    if c == " " or c == "\t" or c == "\r" or c == "\n" or ord(c) == 0x202F:
+      return True
+    return False
+
+  examples = []
+  for entry in input_data:
+    for paragraph in entry["paragraphs"]:
+      paragraph_text = paragraph["context"]
+      doc_tokens = []
+      char_to_word_offset = []
+      prev_is_whitespace = True
+      for c in paragraph_text:
+        if is_whitespace(c):
+          prev_is_whitespace = True
+        else:
+          if prev_is_whitespace:
+            doc_tokens.append(c)
+          else:
+            doc_tokens[-1] += c
+          prev_is_whitespace = False
+        char_to_word_offset.append(len(doc_tokens) - 1)
+
+      for qa in paragraph["qas"]:
+        qas_id = qa["id"]
+        question_text = qa["question"]
+        start_position = None
+        end_position = None
+        orig_answer_text = None
+        is_impossible = False
+        if is_training:
+
+          if FLAGS.version_2_with_negative:
+            is_impossible = qa["is_impossible"]
+          if (len(qa["answers"]) != 1) and (not is_impossible):
+            raise ValueError(
+                "For training, each question should have exactly 1 answer.")
+          if not is_impossible:
+            answer = qa["answers"][0]
+            orig_answer_text = answer["text"]
+            answer_offset = answer["answer_start"]
+            answer_length = len(orig_answer_text)
+            start_position = char_to_word_offset[answer_offset]
+            end_position = char_to_word_offset[answer_offset + answer_length -
+                                               1]
+            # Only add answers where the text can be exactly recovered from the
+            # document. If this CAN'T happen it's likely due to weird Unicode
+            # stuff so we will just skip the example.
+            #
+            # Note that this means for training mode, every example is NOT
+            # guaranteed to be preserved.
+            actual_text = " ".join(
+                doc_tokens[start_position:(end_position + 1)])
+            cleaned_answer_text = " ".join(
+                tokenization.whitespace_tokenize(orig_answer_text))
+            if actual_text.find(cleaned_answer_text) == -1:
+              tf.logging.warning("Could not find answer: '%s' vs. '%s'",
+                                 actual_text, cleaned_answer_text)
+              continue
+          else:
+            start_position = -1
+            end_position = -1
+            orig_answer_text = ""
+
+        example = SquadExample(
+            qas_id=qas_id,
+            question_text=question_text,
+            doc_tokens=doc_tokens,
+            orig_answer_text=orig_answer_text,
+            start_position=start_position,
+            end_position=end_position,
+            is_impossible=is_impossible)
+        examples.append(example)
+
+  return examples
+
+
+def convert_examples_to_features(examples, tokenizer, max_seq_length,
+                                 doc_stride, max_query_length, is_training,
+                                 output_fn):
+  """Loads a data file into a list of `InputBatch`s."""
+
+  unique_id = 1000000000
+
+  for (example_index, example) in enumerate(examples):
+    query_tokens = tokenizer.tokenize(example.question_text)
+
+    if len(query_tokens) > max_query_length:
+      query_tokens = query_tokens[0:max_query_length]
+
+    tok_to_orig_index = []
+    orig_to_tok_index = []
+    all_doc_tokens = []
+    for (i, token) in enumerate(example.doc_tokens):
+      orig_to_tok_index.append(len(all_doc_tokens))
+      sub_tokens = tokenizer.tokenize(token)
+      for sub_token in sub_tokens:
+        tok_to_orig_index.append(i)
+        all_doc_tokens.append(sub_token)
+
+    tok_start_position = None
+    tok_end_position = None
+    if is_training and example.is_impossible:
+      tok_start_position = -1
+      tok_end_position = -1
+    if is_training and not example.is_impossible:
+      tok_start_position = orig_to_tok_index[example.start_position]
+      if example.end_position < len(example.doc_tokens) - 1:
+        tok_end_position = orig_to_tok_index[example.end_position + 1] - 1
+      else:
+        tok_end_position = len(all_doc_tokens) - 1
+      (tok_start_position, tok_end_position) = _improve_answer_span(
+          all_doc_tokens, tok_start_position, tok_end_position, tokenizer,
+          example.orig_answer_text)
+
+    # The -3 accounts for [CLS], [SEP] and [SEP]
+    max_tokens_for_doc = max_seq_length - len(query_tokens) - 3
+
+    # We can have documents that are longer than the maximum sequence length.
+    # To deal with this we do a sliding window approach, where we take chunks
+    # of the up to our max length with a stride of `doc_stride`.
+    _DocSpan = collections.namedtuple(  # pylint: disable=invalid-name
+        "DocSpan", ["start", "length"])
+    doc_spans = []
+    start_offset = 0
+    while start_offset < len(all_doc_tokens):
+      length = len(all_doc_tokens) - start_offset
+      if length > max_tokens_for_doc:
+        length = max_tokens_for_doc
+      doc_spans.append(_DocSpan(start=start_offset, length=length))
+      if start_offset + length == len(all_doc_tokens):
+        break
+      start_offset += min(length, doc_stride)
+
+    for (doc_span_index, doc_span) in enumerate(doc_spans):
+      tokens = []
+      token_to_orig_map = {}
+      token_is_max_context = {}
+      segment_ids = []
+      tokens.append("[CLS]")
+      segment_ids.append(0)
+      for token in query_tokens:
+        tokens.append(token)
+        segment_ids.append(0)
+      tokens.append("[SEP]")
+      segment_ids.append(0)
+
+      for i in range(doc_span.length):
+        split_token_index = doc_span.start + i
+        token_to_orig_map[len(tokens)] = tok_to_orig_index[split_token_index]
+
+        is_max_context = _check_is_max_context(doc_spans, doc_span_index,
+                                               split_token_index)
+        token_is_max_context[len(tokens)] = is_max_context
+        tokens.append(all_doc_tokens[split_token_index])
+        segment_ids.append(1)
+      tokens.append("[SEP]")
+      segment_ids.append(1)
+
+      input_ids = tokenizer.convert_tokens_to_ids(tokens)
+
+      # The mask has 1 for real tokens and 0 for padding tokens. Only real
+      # tokens are attended to.
+      input_mask = [1] * len(input_ids)
+
+      # Zero-pad up to the sequence length.
+      while len(input_ids) < max_seq_length:
+        input_ids.append(0)
+        input_mask.append(0)
+        segment_ids.append(0)
+
+      assert len(input_ids) == max_seq_length
+      assert len(input_mask) == max_seq_length
+      assert len(segment_ids) == max_seq_length
+
+      start_position = None
+      end_position = None
+      if is_training and not example.is_impossible:
+        # For training, if our document chunk does not contain an annotation
+        # we throw it out, since there is nothing to predict.
+        doc_start = doc_span.start
+        doc_end = doc_span.start + doc_span.length - 1
+        out_of_span = False
+        if not (tok_start_position >= doc_start and
+                tok_end_position <= doc_end):
+          out_of_span = True
+        if out_of_span:
+          start_position = 0
+          end_position = 0
+        else:
+          doc_offset = len(query_tokens) + 2
+          start_position = tok_start_position - doc_start + doc_offset
+          end_position = tok_end_position - doc_start + doc_offset
+
+      if is_training and example.is_impossible:
+        start_position = 0
+        end_position = 0
+
+      if example_index < 20:
+        tf.logging.info("*** Example ***")
+        tf.logging.info("unique_id: %s" % (unique_id))
+        tf.logging.info("example_index: %s" % (example_index))
+        tf.logging.info("doc_span_index: %s" % (doc_span_index))
+        tf.logging.info("tokens: %s" % " ".join(
+            [tokenization.printable_text(x) for x in tokens]))
+        tf.logging.info("token_to_orig_map: %s" % " ".join(
+            ["%d:%d" % (x, y) for (x, y) in six.iteritems(token_to_orig_map)]))
+        tf.logging.info("token_is_max_context: %s" % " ".join([
+            "%d:%s" % (x, y) for (x, y) in six.iteritems(token_is_max_context)
+        ]))
+        tf.logging.info("input_ids: %s" % " ".join([str(x) for x in input_ids]))
+        tf.logging.info(
+            "input_mask: %s" % " ".join([str(x) for x in input_mask]))
+        tf.logging.info(
+            "segment_ids: %s" % " ".join([str(x) for x in segment_ids]))
+        if is_training and example.is_impossible:
+          tf.logging.info("impossible example")
+        if is_training and not example.is_impossible:
+          answer_text = " ".join(tokens[start_position:(end_position + 1)])
+          tf.logging.info("start_position: %d" % (start_position))
+          tf.logging.info("end_position: %d" % (end_position))
+          tf.logging.info(
+              "answer: %s" % (tokenization.printable_text(answer_text)))
+
+      feature = InputFeatures(
+          unique_id=unique_id,
+          example_index=example_index,
+          doc_span_index=doc_span_index,
+          tokens=tokens,
+          token_to_orig_map=token_to_orig_map,
+          token_is_max_context=token_is_max_context,
+          input_ids=input_ids,
+          input_mask=input_mask,
+          segment_ids=segment_ids,
+          start_position=start_position,
+          end_position=end_position,
+          is_impossible=example.is_impossible)
+
+      # Run callback
+      output_fn(feature)
+
+      unique_id += 1
+
+
+def _improve_answer_span(doc_tokens, input_start, input_end, tokenizer,
+                         orig_answer_text):
+  """Returns tokenized answer spans that better match the annotated answer."""
+
+  # The SQuAD annotations are character based. We first project them to
+  # whitespace-tokenized words. But then after WordPiece tokenization, we can
+  # often find a "better match". For example:
+  #
+  #   Question: What year was John Smith born?
+  #   Context: The leader was John Smith (1895-1943).
+  #   Answer: 1895
+  #
+  # The original whitespace-tokenized answer will be "(1895-1943).". However
+  # after tokenization, our tokens will be "( 1895 - 1943 ) .". So we can match
+  # the exact answer, 1895.
+  #
+  # However, this is not always possible. Consider the following:
+  #
+  #   Question: What country is the top exporter of electornics?
+  #   Context: The Japanese electronics industry is the lagest in the world.
+  #   Answer: Japan
+  #
+  # In this case, the annotator chose "Japan" as a character sub-span of
+  # the word "Japanese". Since our WordPiece tokenizer does not split
+  # "Japanese", we just use "Japanese" as the annotation. This is fairly rare
+  # in SQuAD, but does happen.
+  tok_answer_text = " ".join(tokenizer.tokenize(orig_answer_text))
+
+  for new_start in range(input_start, input_end + 1):
+    for new_end in range(input_end, new_start - 1, -1):
+      text_span = " ".join(doc_tokens[new_start:(new_end + 1)])
+      if text_span == tok_answer_text:
+        return (new_start, new_end)
+
+  return (input_start, input_end)
+
+
+def _check_is_max_context(doc_spans, cur_span_index, position):
+  """Check if this is the 'max context' doc span for the token."""
+
+  # Because of the sliding window approach taken to scoring documents, a single
+  # token can appear in multiple documents. E.g.
+  #  Doc: the man went to the store and bought a gallon of milk
+  #  Span A: the man went to the
+  #  Span B: to the store and bought
+  #  Span C: and bought a gallon of
+  #  ...
+  #
+  # Now the word 'bought' will have two scores from spans B and C. We only
+  # want to consider the score with "maximum context", which we define as
+  # the *minimum* of its left and right context (the *sum* of left and
+  # right context will always be the same, of course).
+  #
+  # In the example the maximum context for 'bought' would be span C since
+  # it has 1 left context and 3 right context, while span B has 4 left context
+  # and 0 right context.
+  best_score = None
+  best_span_index = None
+  for (span_index, doc_span) in enumerate(doc_spans):
+    end = doc_span.start + doc_span.length - 1
+    if position < doc_span.start:
+      continue
+    if position > end:
+      continue
+    num_left_context = position - doc_span.start
+    num_right_context = end - position
+    score = min(num_left_context, num_right_context) + 0.01 * doc_span.length
+    if best_score is None or score > best_score:
+      best_score = score
+      best_span_index = span_index
+
+  return cur_span_index == best_span_index
+
+
+def create_model(bert_config, is_training, input_ids, input_mask, segment_ids,
+                 use_one_hot_embeddings):
+  """Creates a classification model."""
+  model = modeling.BertModel(
+      config=bert_config,
+      is_training=is_training,
+      input_ids=input_ids,
+      input_mask=input_mask,
+      token_type_ids=segment_ids,
+      use_one_hot_embeddings=use_one_hot_embeddings)
+
+  final_hidden = model.get_sequence_output()
+
+  final_hidden_shape = modeling.get_shape_list(final_hidden, expected_rank=3)
+  batch_size = final_hidden_shape[0]
+  seq_length = final_hidden_shape[1]
+  hidden_size = final_hidden_shape[2]
+
+  output_weights = tf.get_variable(
+      "cls/squad/output_weights", [2, hidden_size],
+      initializer=tf.truncated_normal_initializer(stddev=0.02))
+
+  output_bias = tf.get_variable(
+      "cls/squad/output_bias", [2], initializer=tf.zeros_initializer())
+
+  final_hidden_matrix = tf.reshape(final_hidden,
+                                   [batch_size * seq_length, hidden_size])
+  logits = tf.matmul(final_hidden_matrix, output_weights, transpose_b=True)
+  logits = tf.nn.bias_add(logits, output_bias)
+
+  logits = tf.reshape(logits, [batch_size, seq_length, 2])
+  logits = tf.transpose(logits, [2, 0, 1])
+
+  unstacked_logits = tf.unstack(logits, axis=0)
+
+  (start_logits, end_logits) = (unstacked_logits[0], unstacked_logits[1])
+
+  return (start_logits, end_logits)
+
+
+def model_fn_builder(bert_config, init_checkpoint, learning_rate,
+                     num_train_steps, num_warmup_steps, use_tpu,
+                     use_one_hot_embeddings):
+  """Returns `model_fn` closure for TPUEstimator."""
+
+  def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
+    """The `model_fn` for TPUEstimator."""
+
+    tf.logging.info("*** Features ***")
+    for name in sorted(features.keys()):
+      tf.logging.info("  name = %s, shape = %s" % (name, features[name].shape))
+
+    unique_ids = features["unique_ids"]
+    input_ids = features["input_ids"]
+    input_mask = features["input_mask"]
+    segment_ids = features["segment_ids"]
+
+    is_training = (mode == tf.estimator.ModeKeys.TRAIN)
+
+    (start_logits, end_logits) = create_model(
+        bert_config=bert_config,
+        is_training=is_training,
+        input_ids=input_ids,
+        input_mask=input_mask,
+        segment_ids=segment_ids,
+        use_one_hot_embeddings=use_one_hot_embeddings)
+
+    tvars = tf.trainable_variables()
+
+    initialized_variable_names = {}
+    scaffold_fn = None
+    if init_checkpoint:
+      (assignment_map, initialized_variable_names
+      ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint)
+      if use_tpu:
+
+        def tpu_scaffold():
+          tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
+          return tf.train.Scaffold()
+
+        scaffold_fn = tpu_scaffold
+      else:
+        tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
+
+    tf.logging.info("**** Trainable Variables ****")
+    for var in tvars:
+      init_string = ""
+      if var.name in initialized_variable_names:
+        init_string = ", *INIT_FROM_CKPT*"
+      tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
+                      init_string)
+
+    output_spec = None
+    if mode == tf.estimator.ModeKeys.TRAIN:
+      seq_length = modeling.get_shape_list(input_ids)[1]
+
+      def compute_loss(logits, positions):
+        one_hot_positions = tf.one_hot(
+            positions, depth=seq_length, dtype=tf.float32)
+        log_probs = tf.nn.log_softmax(logits, axis=-1)
+        loss = -tf.reduce_mean(
+            tf.reduce_sum(one_hot_positions * log_probs, axis=-1))
+        return loss
+
+      start_positions = features["start_positions"]
+      end_positions = features["end_positions"]
+
+      start_loss = compute_loss(start_logits, start_positions)
+      end_loss = compute_loss(end_logits, end_positions)
+
+      total_loss = (start_loss + end_loss) / 2.0
+
+      train_op = optimization.create_optimizer(
+          total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu)
+
+      output_spec = NPUEstimatorSpec(
+          mode=mode,
+          loss=total_loss,
+          train_op=train_op,
+          scaffold=scaffold_fn)
+    elif mode == tf.estimator.ModeKeys.PREDICT:
+      predictions = {
+          "unique_ids": unique_ids,
+          "start_logits": start_logits,
+          "end_logits": end_logits,
+      }
+      output_spec = NPUEstimatorSpec(
+          mode=mode, predictions=predictions, scaffold=scaffold_fn)
+    else:
+      raise ValueError(
+          "Only TRAIN and PREDICT modes are supported: %s" % (mode))
+
+    return output_spec
+
+  return model_fn
+
+
+def input_fn_builder(input_file, seq_length, is_training, drop_remainder=True):
+  """Creates an `input_fn` closure to be passed to TPUEstimator."""
+
+  name_to_features = {
+      "unique_ids": tf.FixedLenFeature([], tf.int64),
+      "input_ids": tf.FixedLenFeature([seq_length], tf.int64),
+      "input_mask": tf.FixedLenFeature([seq_length], tf.int64),
+      "segment_ids": tf.FixedLenFeature([seq_length], tf.int64),
+  }
+
+  if is_training:
+    name_to_features["start_positions"] = tf.FixedLenFeature([], tf.int64)
+    name_to_features["end_positions"] = tf.FixedLenFeature([], tf.int64)
+
+  def _decode_record(record, name_to_features):
+    """Decodes a record to a TensorFlow example."""
+    example = tf.parse_single_example(record, name_to_features)
+
+    # tf.Example only supports tf.int64, but the TPU only supports tf.int32.
+    # So cast all int64 to int32.
+    for name in list(example.keys()):
+      t = example[name]
+      if t.dtype == tf.int64:
+        t = tf.to_int32(t)
+      example[name] = t
+
+    return example
+
+  def input_fn(params):
+    """The actual input function."""
+    batch_size = params["batch_size"]
+
+    # For training, we want a lot of parallel reading and shuffling.
+    # For eval, we want no shuffling and parallel reading doesn't matter.
+    d = tf.data.TFRecordDataset(input_file)
+    if is_training:
+      d = d.repeat()
+      if rank_size > 1:
+          d = d.shard(rank_size, rank_id)
+      d = d.shuffle(buffer_size=100)
+    d = d.apply(
+        tf.contrib.data.map_and_batch(
+            lambda record: _decode_record(record, name_to_features),
+            batch_size=batch_size,
+            drop_remainder=drop_remainder))
+
+    return d
+
+  return input_fn
+
+
+RawResult = collections.namedtuple("RawResult",
+                                   ["unique_id", "start_logits", "end_logits"])
+
+
+def write_predictions(all_examples, all_features, all_results, n_best_size,
+                      max_answer_length, do_lower_case, output_prediction_file,
+                      output_nbest_file, output_null_log_odds_file):
+  """Write final predictions to the json file and log-odds of null if needed."""
+  tf.logging.info("Writing predictions to: %s" % (output_prediction_file))
+  tf.logging.info("Writing nbest to: %s" % (output_nbest_file))
+
+  example_index_to_features = collections.defaultdict(list)
+  for feature in all_features:
+    example_index_to_features[feature.example_index].append(feature)
+
+  unique_id_to_result = {}
+  for result in all_results:
+    unique_id_to_result[result.unique_id] = result
+
+  # process unique id issue
+  max_unique_id = all_results[-1].unique_id
+  print("max_unique_id=%d" % max_unique_id)
+
+  _PrelimPrediction = collections.namedtuple(  # pylint: disable=invalid-name
+      "PrelimPrediction",
+      ["feature_index", "start_index", "end_index", "start_logit", "end_logit"])
+
+  all_predictions = collections.OrderedDict()
+  all_nbest_json = collections.OrderedDict()
+  scores_diff_json = collections.OrderedDict()
+
+  for (example_index, example) in enumerate(all_examples):
+    features = example_index_to_features[example_index]
+
+    prelim_predictions = []
+    # keep track of the minimum score of null start+end of position 0
+    score_null = 1000000  # large and positive
+    min_null_feature_index = 0  # the paragraph slice with min mull score
+    null_start_logit = 0  # the start logit at the slice with min null score
+    null_end_logit = 0  # the end logit at the slice with min null score
+    for (feature_index, feature) in enumerate(features):
+      if feature.unique_id > max_unique_id:
+          continue
+      result = unique_id_to_result[feature.unique_id]
+      start_indexes = _get_best_indexes(result.start_logits, n_best_size)
+      end_indexes = _get_best_indexes(result.end_logits, n_best_size)
+      # if we could have irrelevant answers, get the min score of irrelevant
+      if FLAGS.version_2_with_negative:
+        feature_null_score = result.start_logits[0] + result.end_logits[0]
+        if feature_null_score < score_null:
+          score_null = feature_null_score
+          min_null_feature_index = feature_index
+          null_start_logit = result.start_logits[0]
+          null_end_logit = result.end_logits[0]
+      for start_index in start_indexes:
+        for end_index in end_indexes:
+          # We could hypothetically create invalid predictions, e.g., predict
+          # that the start of the span is in the question. We throw out all
+          # invalid predictions.
+          if start_index >= len(feature.tokens):
+            continue
+          if end_index >= len(feature.tokens):
+            continue
+          if start_index not in feature.token_to_orig_map:
+            continue
+          if end_index not in feature.token_to_orig_map:
+            continue
+          if not feature.token_is_max_context.get(start_index, False):
+            continue
+          if end_index < start_index:
+            continue
+          length = end_index - start_index + 1
+          if length > max_answer_length:
+            continue
+          prelim_predictions.append(
+              _PrelimPrediction(
+                  feature_index=feature_index,
+                  start_index=start_index,
+                  end_index=end_index,
+                  start_logit=result.start_logits[start_index],
+                  end_logit=result.end_logits[end_index]))
+
+    if FLAGS.version_2_with_negative:
+      prelim_predictions.append(
+          _PrelimPrediction(
+              feature_index=min_null_feature_index,
+              start_index=0,
+              end_index=0,
+              start_logit=null_start_logit,
+              end_logit=null_end_logit))
+    prelim_predictions = sorted(
+        prelim_predictions,
+        key=lambda x: (x.start_logit + x.end_logit),
+        reverse=True)
+
+    _NbestPrediction = collections.namedtuple(  # pylint: disable=invalid-name
+        "NbestPrediction", ["text", "start_logit", "end_logit"])
+
+    seen_predictions = {}
+    nbest = []
+    for pred in prelim_predictions:
+      if len(nbest) >= n_best_size:
+        break
+      feature = features[pred.feature_index]
+      if pred.start_index > 0:  # this is a non-null prediction
+        tok_tokens = feature.tokens[pred.start_index:(pred.end_index + 1)]
+        orig_doc_start = feature.token_to_orig_map[pred.start_index]
+        orig_doc_end = feature.token_to_orig_map[pred.end_index]
+        orig_tokens = example.doc_tokens[orig_doc_start:(orig_doc_end + 1)]
+        tok_text = " ".join(tok_tokens)
+
+        # De-tokenize WordPieces that have been split off.
+        tok_text = tok_text.replace(" ##", "")
+        tok_text = tok_text.replace("##", "")
+
+        # Clean whitespace
+        tok_text = tok_text.strip()
+        tok_text = " ".join(tok_text.split())
+        orig_text = " ".join(orig_tokens)
+
+        final_text = get_final_text(tok_text, orig_text, do_lower_case)
+        if final_text in seen_predictions:
+          continue
+
+        seen_predictions[final_text] = True
+      else:
+        final_text = ""
+        seen_predictions[final_text] = True
+
+      nbest.append(
+          _NbestPrediction(
+              text=final_text,
+              start_logit=pred.start_logit,
+              end_logit=pred.end_logit))
+
+    # if we didn't inlude the empty option in the n-best, inlcude it
+    if FLAGS.version_2_with_negative:
+      if "" not in seen_predictions:
+        nbest.append(
+            _NbestPrediction(
+                text="", start_logit=null_start_logit,
+                end_logit=null_end_logit))
+    # In very rare edge cases we could have no valid predictions. So we
+    # just create a nonce prediction in this case to avoid failure.
+    if not nbest:
+      nbest.append(
+          _NbestPrediction(text="empty", start_logit=0.0, end_logit=0.0))
+
+    assert len(nbest) >= 1
+
+    total_scores = []
+    best_non_null_entry = None
+    for entry in nbest:
+      total_scores.append(entry.start_logit + entry.end_logit)
+      if not best_non_null_entry:
+        if entry.text:
+          best_non_null_entry = entry
+
+    probs = _compute_softmax(total_scores)
+
+    nbest_json = []
+    for (i, entry) in enumerate(nbest):
+      output = collections.OrderedDict()
+      output["text"] = entry.text
+      output["probability"] = probs[i]
+      output["start_logit"] = entry.start_logit
+      output["end_logit"] = entry.end_logit
+      nbest_json.append(output)
+
+    assert len(nbest_json) >= 1
+
+    if not FLAGS.version_2_with_negative:
+      all_predictions[example.qas_id] = nbest_json[0]["text"]
+    else:
+      # predict "" iff the null score - the score of best non-null > threshold
+      score_diff = score_null - best_non_null_entry.start_logit - (
+          best_non_null_entry.end_logit)
+      scores_diff_json[example.qas_id] = score_diff
+      if score_diff > FLAGS.null_score_diff_threshold:
+        all_predictions[example.qas_id] = ""
+      else:
+        all_predictions[example.qas_id] = best_non_null_entry.text
+
+    all_nbest_json[example.qas_id] = nbest_json
+
+  with tf.gfile.GFile(output_prediction_file, "w") as writer:
+    writer.write(json.dumps(all_predictions, indent=4) + "\n")
+
+  with tf.gfile.GFile(output_nbest_file, "w") as writer:
+    writer.write(json.dumps(all_nbest_json, indent=4) + "\n")
+
+  if FLAGS.version_2_with_negative:
+    with tf.gfile.GFile(output_null_log_odds_file, "w") as writer:
+      writer.write(json.dumps(scores_diff_json, indent=4) + "\n")
+
+
+def get_final_text(pred_text, orig_text, do_lower_case):
+  """Project the tokenized prediction back to the original text."""
+
+  # When we created the data, we kept track of the alignment between original
+  # (whitespace tokenized) tokens and our WordPiece tokenized tokens. So
+  # now `orig_text` contains the span of our original text corresponding to the
+  # span that we predicted.
+  #
+  # However, `orig_text` may contain extra characters that we don't want in
+  # our prediction.
+  #
+  # For example, let's say:
+  #   pred_text = steve smith
+  #   orig_text = Steve Smith's
+  #
+  # We don't want to return `orig_text` because it contains the extra "'s".
+  #
+  # We don't want to return `pred_text` because it's already been normalized
+  # (the SQuAD eval script also does punctuation stripping/lower casing but
+  # our tokenizer does additional normalization like stripping accent
+  # characters).
+  #
+  # What we really want to return is "Steve Smith".
+  #
+  # Therefore, we have to apply a semi-complicated alignment heruistic between
+  # `pred_text` and `orig_text` to get a character-to-charcter alignment. This
+  # can fail in certain cases in which case we just return `orig_text`.
+
+  def _strip_spaces(text):
+    ns_chars = []
+    ns_to_s_map = collections.OrderedDict()
+    for (i, c) in enumerate(text):
+      if c == " ":
+        continue
+      ns_to_s_map[len(ns_chars)] = i
+      ns_chars.append(c)
+    ns_text = "".join(ns_chars)
+    return (ns_text, ns_to_s_map)
+
+  # We first tokenize `orig_text`, strip whitespace from the result
+  # and `pred_text`, and check if they are the same length. If they are
+  # NOT the same length, the heuristic has failed. If they are the same
+  # length, we assume the characters are one-to-one aligned.
+  tokenizer = tokenization.BasicTokenizer(do_lower_case=do_lower_case)
+
+  tok_text = " ".join(tokenizer.tokenize(orig_text))
+
+  start_position = tok_text.find(pred_text)
+  if start_position == -1:
+    if FLAGS.verbose_logging:
+      tf.logging.info(
+          "Unable to find text: '%s' in '%s'" % (pred_text, orig_text))
+    return orig_text
+  end_position = start_position + len(pred_text) - 1
+
+  (orig_ns_text, orig_ns_to_s_map) = _strip_spaces(orig_text)
+  (tok_ns_text, tok_ns_to_s_map) = _strip_spaces(tok_text)
+
+  if len(orig_ns_text) != len(tok_ns_text):
+    if FLAGS.verbose_logging:
+      tf.logging.info("Length not equal after stripping spaces: '%s' vs '%s'",
+                      orig_ns_text, tok_ns_text)
+    return orig_text
+
+  # We then project the characters in `pred_text` back to `orig_text` using
+  # the character-to-character alignment.
+  tok_s_to_ns_map = {}
+  for (i, tok_index) in six.iteritems(tok_ns_to_s_map):
+    tok_s_to_ns_map[tok_index] = i
+
+  orig_start_position = None
+  if start_position in tok_s_to_ns_map:
+    ns_start_position = tok_s_to_ns_map[start_position]
+    if ns_start_position in orig_ns_to_s_map:
+      orig_start_position = orig_ns_to_s_map[ns_start_position]
+
+  if orig_start_position is None:
+    if FLAGS.verbose_logging:
+      tf.logging.info("Couldn't map start position")
+    return orig_text
+
+  orig_end_position = None
+  if end_position in tok_s_to_ns_map:
+    ns_end_position = tok_s_to_ns_map[end_position]
+    if ns_end_position in orig_ns_to_s_map:
+      orig_end_position = orig_ns_to_s_map[ns_end_position]
+
+  if orig_end_position is None:
+    if FLAGS.verbose_logging:
+      tf.logging.info("Couldn't map end position")
+    return orig_text
+
+  output_text = orig_text[orig_start_position:(orig_end_position + 1)]
+  return output_text
+
+
+def _get_best_indexes(logits, n_best_size):
+  """Get the n-best logits from a list."""
+  index_and_score = sorted(enumerate(logits), key=lambda x: x[1], reverse=True)
+
+  best_indexes = []
+  for i in range(len(index_and_score)):
+    if i >= n_best_size:
+      break
+    best_indexes.append(index_and_score[i][0])
+  return best_indexes
+
+
+def _compute_softmax(scores):
+  """Compute softmax probability over raw logits."""
+  if not scores:
+    return []
+
+  max_score = None
+  for score in scores:
+    if max_score is None or score > max_score:
+      max_score = score
+
+  exp_scores = []
+  total_sum = 0.0
+  for score in scores:
+    x = math.exp(score - max_score)
+    exp_scores.append(x)
+    total_sum += x
+
+  probs = []
+  for score in exp_scores:
+    probs.append(score / total_sum)
+  return probs
+
+
+class FeatureWriter(object):
+  """Writes InputFeature to TF example file."""
+
+  def __init__(self, filename, is_training):
+    self.filename = filename
+    self.is_training = is_training
+    self.num_features = 0
+    self._writer = tf.python_io.TFRecordWriter(filename)
+
+  def process_feature(self, feature):
+    """Write a InputFeature to the TFRecordWriter as a tf.train.Example."""
+    self.num_features += 1
+
+    def create_int_feature(values):
+      feature = tf.train.Feature(
+          int64_list=tf.train.Int64List(value=list(values)))
+      return feature
+
+    features = collections.OrderedDict()
+    features["unique_ids"] = create_int_feature([feature.unique_id])
+    features["input_ids"] = create_int_feature(feature.input_ids)
+    features["input_mask"] = create_int_feature(feature.input_mask)
+    features["segment_ids"] = create_int_feature(feature.segment_ids)
+
+    if self.is_training:
+      features["start_positions"] = create_int_feature([feature.start_position])
+      features["end_positions"] = create_int_feature([feature.end_position])
+      impossible = 0
+      if feature.is_impossible:
+        impossible = 1
+      features["is_impossible"] = create_int_feature([impossible])
+
+    tf_example = tf.train.Example(features=tf.train.Features(feature=features))
+    self._writer.write(tf_example.SerializeToString())
+
+  def close(self):
+    self._writer.close()
+
+
+def validate_flags_or_throw(bert_config):
+  """Validate the input FLAGS or throw an exception."""
+  tokenization.validate_case_matches_checkpoint(FLAGS.do_lower_case,
+                                                FLAGS.init_checkpoint)
+
+  if not FLAGS.do_train and not FLAGS.do_predict:
+    raise ValueError("At least one of `do_train` or `do_predict` must be True.")
+
+  if FLAGS.do_train:
+    if not FLAGS.train_file:
+      raise ValueError(
+          "If `do_train` is True, then `train_file` must be specified.")
+  if FLAGS.do_predict:
+    if not FLAGS.predict_file:
+      raise ValueError(
+          "If `do_predict` is True, then `predict_file` must be specified.")
+
+  if FLAGS.max_seq_length > bert_config.max_position_embeddings:
+    raise ValueError(
+        "Cannot use sequence length %d because the BERT model "
+        "was only trained up to sequence length %d" %
+        (FLAGS.max_seq_length, bert_config.max_position_embeddings))
+
+  if FLAGS.max_seq_length <= FLAGS.max_query_length + 3:
+    raise ValueError(
+        "The max_seq_length (%d) must be greater than max_query_length "
+        "(%d) + 3" % (FLAGS.max_seq_length, FLAGS.max_query_length))
+
+
+def main(_):
+  tf.logging.set_verbosity(tf.logging.INFO)
+
+  bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file)
+
+  validate_flags_or_throw(bert_config)
+
+  tf.gfile.MakeDirs(FLAGS.output_dir)
+
+  tokenizer = tokenization.FullTokenizer(
+      vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case)
+
+  tpu_cluster_resolver = None
+  if FLAGS.use_tpu and FLAGS.tpu_name:
+    tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
+        FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)
+
+  is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2
+  #run_config = tf.contrib.tpu.RunConfig(
+  #    cluster=tpu_cluster_resolver,
+  #    master=FLAGS.master,
+  #    model_dir=FLAGS.output_dir,
+  #    save_checkpoints_steps=FLAGS.save_checkpoints_steps,
+  #    tpu_config=tf.contrib.tpu.TPUConfig(
+  #        iterations_per_loop=FLAGS.iterations_per_loop,
+  #        num_shards=FLAGS.num_tpu_cores,
+  #        per_host_input_for_training=is_per_host))
+
+  config = tf.ConfigProto(
+      inter_op_parallelism_threads=0,
+      intra_op_parallelism_threads=0,
+      allow_soft_placement=True)
+  #.gpu_options.allow_growth = True
+
+  run_config = NPURunConfig(
+      model_dir=FLAGS.output_dir,
+      save_checkpoints_steps=FLAGS.save_checkpoints_steps,
+      iterations_per_loop=FLAGS.iterations_per_loop,
+      session_config=config,
+      precision_mode="allow_mix_precision",
+      keep_checkpoint_max=5)
+
+  train_examples = None
+  num_train_steps = FLAGS.num_train_steps
+  num_warmup_steps = None
+  if FLAGS.do_train:
+    train_examples = read_squad_examples(
+        input_file=FLAGS.train_file, is_training=True)
+
+    if num_train_steps == 0:
+        num_train_steps = int(
+            len(train_examples) / FLAGS.train_batch_size * FLAGS.num_train_epochs)
+    
+    #print("lenof train_examples = %s , num_train_epochs = %s" %(len(train_examples), FLAGS.num_train_epochs))   
+    #num_train_steps = int(num_train_steps / rank_size)
+    num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion)
+
+    # Pre-shuffle the input to avoid having to make a very large shuffle
+    # buffer in in the `input_fn`.
+    rng = random.Random(12345)
+    rng.shuffle(train_examples)
+
+  model_fn = model_fn_builder(
+      bert_config=bert_config,
+      init_checkpoint=FLAGS.init_checkpoint,
+      learning_rate=FLAGS.learning_rate,
+      num_train_steps=num_train_steps,
+      num_warmup_steps=num_warmup_steps,
+      use_tpu=FLAGS.use_tpu,
+      use_one_hot_embeddings=FLAGS.use_tpu)
+
+  # If TPU is not available, this will fall back to normal Estimator on CPU
+  # or GPU.
+  estimator = NPUEstimator(
+      model_fn=model_fn,
+      config=run_config,
+      model_dir=FLAGS.output_dir,
+      params={"batch_size": FLAGS.train_batch_size, "predict_batch_size": FLAGS.predict_batch_size})
+      #train_batch_size=FLAGS.train_batch_size,
+      #predict_batch_size=FLAGS.predict_batch_size)
+
+  if FLAGS.do_train:
+    # We write to a temporary file to avoid storing very large constant tensors
+    # in memory.
+    train_writer = FeatureWriter(
+        filename=os.path.join(FLAGS.output_dir, "train.tf_record"),
+        is_training=True)
+    convert_examples_to_features(
+        examples=train_examples,
+        tokenizer=tokenizer,
+        max_seq_length=FLAGS.max_seq_length,
+        doc_stride=FLAGS.doc_stride,
+        max_query_length=FLAGS.max_query_length,
+        is_training=True,
+        output_fn=train_writer.process_feature)
+    train_writer.close()
+
+    tf.logging.info("***** Running training *****")
+    tf.logging.info("  Num orig examples = %d", len(train_examples))
+    tf.logging.info("  Num split examples = %d", train_writer.num_features)
+    tf.logging.info("  Batch size = %d", FLAGS.train_batch_size)
+    tf.logging.info("  Num steps = %d", num_train_steps)
+    del train_examples
+
+    train_input_fn = input_fn_builder(
+        input_file=train_writer.filename,
+        seq_length=FLAGS.max_seq_length,
+        is_training=True,
+        drop_remainder=True)
+    estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)
+
+  if FLAGS.do_predict:
+    eval_examples = read_squad_examples(
+        input_file=FLAGS.predict_file, is_training=False)
+
+    eval_writer = FeatureWriter(
+        filename=os.path.join(FLAGS.output_dir, "eval.tf_record"),
+        is_training=False)
+    eval_features = []
+
+    def append_feature(feature):
+      eval_features.append(feature)
+      eval_writer.process_feature(feature)
+
+    convert_examples_to_features(
+        examples=eval_examples,
+        tokenizer=tokenizer,
+        max_seq_length=FLAGS.max_seq_length,
+        doc_stride=FLAGS.doc_stride,
+        max_query_length=FLAGS.max_query_length,
+        is_training=False,
+        output_fn=append_feature)
+    eval_writer.close()
+
+    tf.logging.info("***** Running predictions *****")
+    tf.logging.info("  Num orig examples = %d", len(eval_examples))
+    tf.logging.info("  Num split examples = %d", len(eval_features))
+    tf.logging.info("  Batch size = %d", FLAGS.predict_batch_size)
+
+    all_results = []
+
+    predict_input_fn = input_fn_builder(
+        input_file=eval_writer.filename,
+        seq_length=FLAGS.max_seq_length,
+        is_training=False,
+        drop_remainder=True)
+
+    # If running eval on the TPU, you will need to specify the number of
+    # steps.
+    all_results = []
+    for result in estimator.predict(
+        predict_input_fn, yield_single_examples=True):
+      if len(all_results) % 1000 == 0:
+        tf.logging.info("Processing example: %d" % (len(all_results)))
+      unique_id = int(result["unique_ids"])
+      start_logits = [float(x) for x in result["start_logits"].flat]
+      end_logits = [float(x) for x in result["end_logits"].flat]
+      all_results.append(
+          RawResult(
+              unique_id=unique_id,
+              start_logits=start_logits,
+              end_logits=end_logits))
+
+    output_prediction_file = os.path.join(FLAGS.output_dir, "predictions.json")
+    output_nbest_file = os.path.join(FLAGS.output_dir, "nbest_predictions.json")
+    output_null_log_odds_file = os.path.join(FLAGS.output_dir, "null_odds.json")
+
+    write_predictions(eval_examples, eval_features, all_results,
+                      FLAGS.n_best_size, FLAGS.max_answer_length,
+                      FLAGS.do_lower_case, output_prediction_file,
+                      output_nbest_file, output_null_log_odds_file)
+
+
+if __name__ == "__main__":
+  flags.mark_flag_as_required("vocab_file")
+  flags.mark_flag_as_required("bert_config_file")
+  flags.mark_flag_as_required("output_dir")
+  tf.app.run()
diff --git a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/sample_text.txt b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/sample_text.txt
new file mode 100644
index 000000000..a42812060
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/sample_text.txt
@@ -0,0 +1,33 @@
+This text is included to make sure Unicode is handled properly: 力加勝北区ᴵᴺᵀᵃছজটডণত
+Text should be one-sentence-per-line, with empty lines between documents.
+This sample text is public domain and was randomly selected from Project Guttenberg.
+
+The rain had only ceased with the gray streaks of morning at Blazing Star, and the settlement awoke to a moral sense of cleanliness, and the finding of forgotten knives, tin cups, and smaller camp utensils, where the heavy showers had washed away the debris and dust heaps before the cabin doors.
+Indeed, it was recorded in Blazing Star that a fortunate early riser had once picked up on the highway a solid chunk of gold quartz which the rain had freed from its incumbering soil, and washed into immediate and glittering popularity.
+Possibly this may have been the reason why early risers in that locality, during the rainy season, adopted a thoughtful habit of body, and seldom lifted their eyes to the rifted or india-ink washed skies above them.
+"Cass" Beard had risen early that morning, but not with a view to discovery.
+A leak in his cabin roof,--quite consistent with his careless, improvident habits,--had roused him at 4 A. M., with a flooded "bunk" and wet blankets.
+The chips from his wood pile refused to kindle a fire to dry his bed-clothes, and he had recourse to a more provident neighbor's to supply the deficiency.
+This was nearly opposite.
+Mr. Cassius crossed the highway, and stopped suddenly.
+Something glittered in the nearest red pool before him.
+Gold, surely!
+But, wonderful to relate, not an irregular, shapeless fragment of crude ore, fresh from Nature's crucible, but a bit of jeweler's handicraft in the form of a plain gold ring.
+Looking at it more attentively, he saw that it bore the inscription, "May to Cass."
+Like most of his fellow gold-seekers, Cass was superstitious.
+
+The fountain of classic wisdom, Hypatia herself.
+As the ancient sage--the name is unimportant to a monk--pumped water nightly that he might study by day, so I, the guardian of cloaks and parasols, at the sacred doors of her lecture-room, imbibe celestial knowledge.
+From my youth I felt in me a soul above the matter-entangled herd.
+She revealed to me the glorious fact, that I am a spark of Divinity itself.
+A fallen star, I am, sir!' continued he, pensively, stroking his lean stomach--'a fallen star!--fallen, if the dignity of philosophy will allow of the simile, among the hogs of the lower world--indeed, even into the hog-bucket itself. Well, after all, I will show you the way to the Archbishop's.
+There is a philosophic pleasure in opening one's treasures to the modest young.
+Perhaps you will assist me by carrying this basket of fruit?' And the little man jumped up, put his basket on Philammon's head, and trotted off up a neighbouring street.
+Philammon followed, half contemptuous, half wondering at what this philosophy might be, which could feed the self-conceit of anything so abject as his ragged little apish guide;
+but the novel roar and whirl of the street, the perpetual stream of busy faces, the line of curricles, palanquins, laden asses, camels, elephants, which met and passed him, and squeezed him up steps and into doorways, as they threaded their way through the great Moon-gate into the ample street beyond, drove everything from his mind but wondering curiosity, and a vague, helpless dread of that great living wilderness, more terrible than any dead wilderness of sand which he had left behind.
+Already he longed for the repose, the silence of the Laura--for faces which knew him and smiled upon him; but it was too late to turn back now.
+His guide held on for more than a mile up the great main street, crossed in the centre of the city, at right angles, by one equally magnificent, at each end of which, miles away, appeared, dim and distant over the heads of the living stream of passengers, the yellow sand-hills of the desert;
+while at the end of the vista in front of them gleamed the blue harbour, through a network of countless masts.
+At last they reached the quay at the opposite end of the street;
+and there burst on Philammon's astonished eyes a vast semicircle of blue sea, ringed with palaces and towers.
+He stopped involuntarily; and his little guide stopped also, and looked askance at the young monk, to watch the effect which that grand panorama should produce on him.
diff --git a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID0495_Bert-Squad_full_1p.sh b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID0495_Bert-Squad_full_1p.sh
new file mode 100644
index 000000000..9cb721078
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID0495_Bert-Squad_full_1p.sh
@@ -0,0 +1,155 @@
+#!/bin/bash
+#当前路径,不需要修改
+cur_path=`pwd`
+parent_path=$(dirname $(pwd))
+
+#集合通信参数,不需要修改
+#保证rank table file 文件rank_table_8p.json存放在和test同级的configs目录下
+export RANK_SIZE=1
+export JOB_ID=10087
+RANK_ID_START=0
+
+# 数据集路径,保持为空,不需要修改
+data_path=""
+
+#基础参数 需要模型审视修改
+#网络名称，同目录名称
+Network="Bertsquad_ID0495_for_TensorFlow"
+batch_size=32
+epoch=2
+
+#维持参数，不需要修改
+over_dump=False
+data_dump_flag=False
+data_dump_step="10"
+profiling=False
+
+# 帮助信息，不需要修改
+if [[ $1 == --help || $1 == -h ]];then
+    echo"usage:./train_full_8p.sh <args>"
+    echo " "
+    echo "parameter explain:
+    --precision_mode           precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision)
+    --over_dump		           if or not over detection, default is False
+    --data_dump_flag		   data dump flag, default is 0
+    --data_dump_step		   data dump step, default is 10
+    --profiling		           if or not profiling for performance debug, default is False
+    --autotune                 whether to enable autotune, default is False
+    --data_path		           source data of training
+    -h/--help		           show help message
+    "
+    exit 1
+fi
+
+#参数校验，不需要修改
+for para in $*
+do
+    if [[ $para == --precision_mode* ]];then
+        precision_mode=`echo ${para#*=}`
+    elif [[ $para == --over_dump* ]];then
+        over_dump=`echo ${para#*=}`
+        over_dump_path=${cur_path}/output/overflow_dump
+        mkdir -p ${over_dump_path}
+    elif [[ $para == --data_dump_flag* ]];then
+        data_dump_flag=`echo ${para#*=}`
+        data_dump_path=${cur_path}/output/data_dump
+        mkdir -p ${data_dump_path}
+    elif [[ $para == --data_dump_step* ]];then
+        data_dump_step=`echo ${para#*=}`
+    elif [[ $para == --profiling* ]];then
+        profiling=`echo ${para#*=}`
+        profiling_dump_path=${cur_path}/output/profiling
+        mkdir -p ${profiling_dump_path}
+    elif [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    fi
+done
+
+#校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be confing"
+    exit 1
+fi
+
+
+vocab_file=${data_path}/model/vocab.txt
+bert_config_file=${data_path}/model/bert_config.json
+init_checkpoint=${data_path}/model/bert_model.ckpt
+train_file=${data_path}/dataset/train-v1.1.json
+predict_file=${data_path}/dataset/dev-v1.1.json
+
+#训练开始时间，不需要修改
+start_time=$(date +%s)
+
+#进入训练脚本目录，需要模型审视修改
+for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++));
+do
+    #设置环境变量，不需要修改
+    echo "Device ID: $RANK_ID"
+    export RANK_ID=$RANK_ID
+    
+    #创建DeviceID输出目录，不需要修改
+    if [ -d $cur_path/output/$ASCEND_DEVICE_ID ];then
+        rm -rf $cur_path/output/$ASCEND_DEVICE_ID
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    else
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    fi
+
+    #执行训练脚本，需要模型审视修改
+	nohup python3.7 ${parent_path}/run_squad.py \
+        --vocab_file=$vocab_file \
+        --bert_config_file=$bert_config_file \
+        --init_checkpoint=$init_checkpoint \
+        --train_file=$train_file \
+        --do_predict=True \
+        --do_train=True \
+        --predict_file=$predict_file \
+        --train_batch_size=32 \
+        --num_train_epochs=2 \
+        --learning_rate=3e-5 \
+        --max_seq_length=384 \
+        --doc_stride=128 \
+        --output_dir=${cur_path}/output/${ASCEND_DEVICE_ID}/ckpt > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+        wait
+        python3 ${parent_path}/evaluate-v1.1.py $data_path/dataset/dev-v1.1.json \
+        ${cur_path}/output/${ASCEND_DEVICE_ID}/ckpt/predictions.json >> ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1
+done 
+wait
+
+#训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+#输出训练精度,需要模型审视修改
+train_accuracy=`grep -a "f1" ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk '{print $4}'|tr -d }`
+#打印，不需要修改
+echo "Final Train Accuracy : ${train_accuracy}"
+echo "E2E Training Duration sec : $e2e_time"
+
+#训练用例信息，不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc'
+
+#获取性能数据
+fps=`grep "global_step/sec:" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F 'global_step/sec:' '{print $2}'|awk 'END {print $1}'`
+
+ActualFPS=`echo "scale=2;${fps} * ${batch_size}"|bc`
+temp1=`echo "1000 * ${batch_size}"|bc`
+TrainingTime=`echo "scale=2;${temp1} / ${ActualFPS}"|bc`
+
+ActualLoss=`grep "loss =" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F 'loss =' '{print $2}'|awk 'END {print $1}'|tr -d ,`
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+sed -i -e '/ModuleNotFoundError/d' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log
\ No newline at end of file
diff --git a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID0495_Bert-Squad_full_8p.sh b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID0495_Bert-Squad_full_8p.sh
new file mode 100644
index 000000000..7feb417ff
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID0495_Bert-Squad_full_8p.sh
@@ -0,0 +1,157 @@
+#!/bin/bash
+#当前路径,不需要修改
+cur_path=`pwd`
+parent_path=$(dirname $(pwd))
+
+#集合通信参数,不需要修改
+#保证rank table file 文件rank_table_8p.json存放在和test同级的configs目录下
+export RANK_SIZE=8
+export RANK_TABLE_FILE=${cur_path}/../configs/rank_table_8p.json
+export JOB_ID=10087
+RANK_ID_START=0
+
+# 数据集路径,保持为空,不需要修改
+data_path=""
+
+#基础参数 需要模型审视修改
+#网络名称，同目录名称
+Network="Bertsquad_ID0495_for_TensorFlow"
+batch_size=32
+epoch=2
+
+#维持参数，不需要修改
+over_dump=False
+data_dump_flag=False
+data_dump_step="10"
+profiling=False
+
+# 帮助信息，不需要修改
+if [[ $1 == --help || $1 == -h ]];then
+    echo"usage:./train_full_8p.sh <args>"
+    echo " "
+    echo "parameter explain:
+    --precision_mode           precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision)
+    --over_dump		           if or not over detection, default is False
+    --data_dump_flag		   data dump flag, default is 0
+    --data_dump_step		   data dump step, default is 10
+    --profiling		           if or not profiling for performance debug, default is False
+    --autotune                 whether to enable autotune, default is False
+    --data_path		           source data of training
+    -h/--help		           show help message
+    "
+    exit 1
+fi
+
+#参数校验，不需要修改
+for para in $*
+do
+    if [[ $para == --precision_mode* ]];then
+        precision_mode=`echo ${para#*=}`
+    elif [[ $para == --over_dump* ]];then
+        over_dump=`echo ${para#*=}`
+        over_dump_path=${cur_path}/output/overflow_dump
+        mkdir -p ${over_dump_path}
+    elif [[ $para == --data_dump_flag* ]];then
+        data_dump_flag=`echo ${para#*=}`
+        data_dump_path=${cur_path}/output/data_dump
+        mkdir -p ${data_dump_path}
+    elif [[ $para == --data_dump_step* ]];then
+        data_dump_step=`echo ${para#*=}`
+    elif [[ $para == --profiling* ]];then
+        profiling=`echo ${para#*=}`
+        profiling_dump_path=${cur_path}/output/profiling
+        mkdir -p ${profiling_dump_path}
+    elif [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    fi
+done
+
+#校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be confing"
+    exit 1
+fi
+
+
+vocab_file=${data_path}/model/vocab.txt
+bert_config_file=${data_path}/model/bert_config.json
+init_checkpoint=${data_path}/model/bert_model.ckpt
+train_file=${data_path}/dataset/train-v1.1.json
+predict_file=${data_path}/dataset/dev-v1.1.json
+
+#训练开始时间，不需要修改
+start_time=$(date +%s)
+
+#进入训练脚本目录，需要模型审视修改
+for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++));
+do
+    #设置环境变量，不需要修改
+    echo "Device ID: $RANK_ID"
+    export RANK_ID=$RANK_ID
+    export ASCEND_DEVICE_ID=$RANK_ID
+
+    #创建DeviceID输出目录，不需要修改
+    if [ -d $cur_path/output/$ASCEND_DEVICE_ID ];then
+        rm -rf $cur_path/output/$ASCEND_DEVICE_ID
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    else
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    fi
+
+    #执行训练脚本，需要模型审视修改
+	nohup python3.7 ${parent_path}/run_squad.py \
+        --vocab_file=$vocab_file \
+        --bert_config_file=$bert_config_file \
+        --init_checkpoint=$init_checkpoint \
+        --train_file=$train_file \
+        --do_predict=True \
+        --do_train=True \
+        --predict_file=$predict_file \
+        --train_batch_size=32 \
+        --num_train_epochs=2 \
+        --learning_rate=3e-5 \
+        --max_seq_length=384 \
+        --doc_stride=128 \
+        --output_dir=${cur_path}/output/${ASCEND_DEVICE_ID}/ckpt > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+        wait
+        python3 ${parent_path}/evaluate-v1.1.py $data_path/dataset/dev-v1.1.json \
+        ${cur_path}/output/${ASCEND_DEVICE_ID}/ckpt/predictions.json >> ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1
+done
+wait
+
+#训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+#输出训练精度,需要模型审视修改
+train_accuracy=`grep -a "f1" ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk '{print $4}'|tr -d }`
+#打印，不需要修改
+echo "Final Train Accuracy : ${train_accuracy}"
+echo "E2E Training Duration sec : $e2e_time"
+
+#训练用例信息，不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc'
+
+#获取性能数据
+fps=`grep "global_step/sec:" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F 'global_step/sec:' '{print $2}'|awk 'END {print $1}'`
+temp0=`echo "scale=2;${fps} * ${batch_size}"|bc`
+ActualFPS=`echo "scale=2;${temp0} * ${RANK_SIZE}"|bc`
+temp1=`echo "8000 * ${batch_size}"|bc`
+TrainingTime=`echo "scale=2;${temp1} / ${ActualFPS}"|bc`
+
+ActualLoss=`grep "loss =" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F 'loss =' '{print $2}'|awk 'END {print $1}'|tr -d ,`
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+sed -i -e '/ModuleNotFoundError/d' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log
\ No newline at end of file
diff --git a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID0495_Bert-Squad_performance_1p.sh b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID0495_Bert-Squad_performance_1p.sh
new file mode 100644
index 000000000..ce142ad5e
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID0495_Bert-Squad_performance_1p.sh
@@ -0,0 +1,149 @@
+#!/bin/bash
+#当前路径,不需要修改
+cur_path=`pwd`
+parent_path=$(dirname $(pwd))
+
+#集合通信参数,不需要修改
+#保证rank table file 文件rank_table_8p.json存放在和test同级的configs目录下
+export RANK_SIZE=1
+export JOB_ID=10087
+RANK_ID_START=0
+
+# 数据集路径,保持为空,不需要修改
+data_path=""
+
+#基础参数 需要模型审视修改
+#网络名称，同目录名称
+Network="Bertsquad_ID0495_for_TensorFlow"
+batch_size=32
+epoch=1
+
+#维持参数，不需要修改
+over_dump=False
+data_dump_flag=False
+data_dump_step="10"
+profiling=False
+
+# 帮助信息，不需要修改
+if [[ $1 == --help || $1 == -h ]];then
+    echo"usage:./train_full_8p.sh <args>"
+    echo " "
+    echo "parameter explain:
+    --precision_mode           precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision)
+    --over_dump		           if or not over detection, default is False
+    --data_dump_flag		   data dump flag, default is 0
+    --data_dump_step		   data dump step, default is 10
+    --profiling		           if or not profiling for performance debug, default is False
+    --autotune                 whether to enable autotune, default is False
+    --data_path		           source data of training
+    -h/--help		           show help message
+    "
+    exit 1
+fi
+
+#参数校验，不需要修改
+for para in $*
+do
+    if [[ $para == --precision_mode* ]];then
+        precision_mode=`echo ${para#*=}`
+    elif [[ $para == --over_dump* ]];then
+        over_dump=`echo ${para#*=}`
+        over_dump_path=${cur_path}/output/overflow_dump
+        mkdir -p ${over_dump_path}
+    elif [[ $para == --data_dump_flag* ]];then
+        data_dump_flag=`echo ${para#*=}`
+        data_dump_path=${cur_path}/output/data_dump
+        mkdir -p ${data_dump_path}
+    elif [[ $para == --data_dump_step* ]];then
+        data_dump_step=`echo ${para#*=}`
+    elif [[ $para == --profiling* ]];then
+        profiling=`echo ${para#*=}`
+        profiling_dump_path=${cur_path}/output/profiling
+        mkdir -p ${profiling_dump_path}
+    elif [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    fi
+done
+
+#校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be confing"
+    exit 1
+fi
+
+
+vocab_file=${data_path}/model/vocab.txt
+bert_config_file=${data_path}/model/bert_config.json
+init_checkpoint=${data_path}/model/bert_model.ckpt
+train_file=${data_path}/dataset/train-v1.1.json
+predict_file=${data_path}/dataset/dev-v1.1.json
+
+#训练开始时间，不需要修改
+start_time=$(date +%s)
+
+#进入训练脚本目录，需要模型审视修改
+for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++));
+do
+    #设置环境变量，不需要修改
+    echo "Device ID: $RANK_ID"
+    export RANK_ID=$RANK_ID
+    
+    #创建DeviceID输出目录，不需要修改
+    if [ -d $cur_path/output/$ASCEND_DEVICE_ID ];then
+        rm -rf $cur_path/output/$ASCEND_DEVICE_ID
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    else
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    fi
+
+    #执行训练脚本，需要模型审视修改
+	nohup python3.7 ${parent_path}/run_squad.py \
+        --vocab_file=$vocab_file \
+        --bert_config_file=$bert_config_file \
+        --init_checkpoint=$init_checkpoint \
+        --train_file=$train_file \
+        --do_predict=True \
+        --do_train=True \
+        --predict_file=$predict_file \
+	      --train_batch_size=32 \
+        --num_train_epochs=1 \
+        --num_train_steps=1000 \
+        --learning_rate=3e-5 \
+        --max_seq_length=384 \
+        --doc_stride=128 \
+        --output_dir=${cur_path}/output/${ASCEND_DEVICE_ID}/ckpt > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+done 
+wait
+
+#训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+#E2E训练端到端时长，直接计算，不需要修改
+echo "E2E training Duration sec: $e2e_time"
+
+#训练用例信息，不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf'
+
+#获取性能数据
+fps=`grep "global_step/sec:" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F 'global_step/sec:' '{print $2}'|awk 'END {print $1}'`
+
+ActualFPS=`echo "scale=2;${fps} * ${batch_size}"|bc`
+temp1=`echo "1000 * ${batch_size}"|bc`
+TrainingTime=`echo "scale=2;${temp1} / ${ActualFPS}"|bc`
+
+ActualLoss=`grep "loss =" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F 'loss =' '{print $2}'|awk 'END {print $1}'|tr -d ,`
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+sed -i -e '/ModuleNotFoundError/d' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log
\ No newline at end of file
diff --git a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID0495_Bert-Squad_performance_8p.sh b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID0495_Bert-Squad_performance_8p.sh
new file mode 100644
index 000000000..ab3954948
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID0495_Bert-Squad_performance_8p.sh
@@ -0,0 +1,151 @@
+#!/bin/bash
+#当前路径,不需要修改
+cur_path=`pwd`
+parent_path=$(dirname $(pwd))
+
+#集合通信参数,不需要修改
+#保证rank table file 文件rank_table_8p.json存放在和test同级的configs目录下
+export RANK_SIZE=8
+export RANK_TABLE_FILE=${cur_path}/../configs/rank_table_8p.json
+export JOB_ID=10087
+RANK_ID_START=0
+
+# 数据集路径,保持为空,不需要修改
+data_path=""
+
+#基础参数 需要模型审视修改
+#网络名称，同目录名称
+Network="Bertsquad_ID0495_for_TensorFlow"
+batch_size=32
+epoch=1
+
+#维持参数，不需要修改
+over_dump=False
+data_dump_flag=False
+data_dump_step="10"
+profiling=False
+
+# 帮助信息，不需要修改
+if [[ $1 == --help || $1 == -h ]];then
+    echo"usage:./train_full_8p.sh <args>"
+    echo " "
+    echo "parameter explain:
+    --precision_mode           precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision)
+    --over_dump		           if or not over detection, default is False
+    --data_dump_flag		   data dump flag, default is 0
+    --data_dump_step		   data dump step, default is 10
+    --profiling		           if or not profiling for performance debug, default is False
+    --autotune                 whether to enable autotune, default is False
+    --data_path		           source data of training
+    -h/--help		           show help message
+    "
+    exit 1
+fi
+
+#参数校验，不需要修改
+for para in $*
+do
+    if [[ $para == --precision_mode* ]];then
+        precision_mode=`echo ${para#*=}`
+    elif [[ $para == --over_dump* ]];then
+        over_dump=`echo ${para#*=}`
+        over_dump_path=${cur_path}/output/overflow_dump
+        mkdir -p ${over_dump_path}
+    elif [[ $para == --data_dump_flag* ]];then
+        data_dump_flag=`echo ${para#*=}`
+        data_dump_path=${cur_path}/output/data_dump
+        mkdir -p ${data_dump_path}
+    elif [[ $para == --data_dump_step* ]];then
+        data_dump_step=`echo ${para#*=}`
+    elif [[ $para == --profiling* ]];then
+        profiling=`echo ${para#*=}`
+        profiling_dump_path=${cur_path}/output/profiling
+        mkdir -p ${profiling_dump_path}
+    elif [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    fi
+done
+
+#校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be confing"
+    exit 1
+fi
+
+
+vocab_file=${data_path}/model/vocab.txt
+bert_config_file=${data_path}/model/bert_config.json
+init_checkpoint=${data_path}/model/bert_model.ckpt
+train_file=${data_path}/dataset/train-v1.1.json
+predict_file=${data_path}/dataset/dev-v1.1.json
+
+#训练开始时间，不需要修改
+start_time=$(date +%s)
+
+#进入训练脚本目录，需要模型审视修改
+for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++));
+do
+    #设置环境变量，不需要修改
+    echo "Device ID: $RANK_ID"
+    export RANK_ID=$RANK_ID
+    export ASCEND_DEVICE_ID=$RANK_ID
+    
+    #创建DeviceID输出目录，不需要修改
+    if [ -d $cur_path/output/$ASCEND_DEVICE_ID ];then
+        rm -rf $cur_path/output/$ASCEND_DEVICE_ID
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    else
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    fi
+
+    #执行训练脚本，需要模型审视修改
+	nohup python3.7 ${parent_path}/run_squad.py \
+        --vocab_file=$vocab_file \
+        --bert_config_file=$bert_config_file \
+        --init_checkpoint=$init_checkpoint \
+        --train_file=$train_file \
+        --do_predict=True \
+        --do_train=True \
+        --predict_file=$predict_file \
+        --train_batch_size=32 \
+        --num_train_epochs=1 \
+        --num_train_steps=1000 \
+        --learning_rate=3e-5 \
+        --max_seq_length=384 \
+        --doc_stride=128 \
+        --output_dir=${cur_path}/output/${ASCEND_DEVICE_ID}/ckpt > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+done 
+wait
+
+#训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+#E2E训练端到端时长，直接计算，不需要修改
+echo "E2E training Duration sec: $e2e_time"
+
+#训练用例信息，不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf'
+
+#获取性能数据
+fps=`grep "global_step/sec:" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F 'global_step/sec:' '{print $2}'|awk 'END {print $1}'`
+temp0=`echo "scale=2;${fps} * ${batch_size}"|bc`
+ActualFPS=`echo "scale=2;${temp0} * ${RANK_SIZE}"|bc`
+temp1=`echo "8000 * ${batch_size}"|bc`
+TrainingTime=`echo "scale=2;${temp1} / ${ActualFPS}"|bc`
+
+ActualLoss=`grep "loss =" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F 'loss =' '{print $2}'|awk 'END {print $1}'|tr -d ,`
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+sed -i -e '/ModuleNotFoundError/d' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log
\ No newline at end of file
diff --git a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID3082_BertLarge-Squad_full_1p.sh b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID3082_BertLarge-Squad_full_1p.sh
new file mode 100644
index 000000000..7814192ab
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID3082_BertLarge-Squad_full_1p.sh
@@ -0,0 +1,154 @@
+#!/bin/bash
+#当前路径,不需要修改
+cur_path=`pwd`
+parent_path=$(dirname $(pwd))
+
+#集合通信参数,不需要修改
+#保证rank table file 文件rank_table_8p.json存放在和test同级的configs目录下
+export RANK_SIZE=1
+export JOB_ID=10087
+RANK_ID_START=0
+
+# 数据集路径,保持为空,不需要修改
+data_path=""
+
+#基础参数 需要模型审视修改
+#网络名称，同目录名称
+Network="BertLarge-Squad_ID3082_for_TensorFlow"
+batch_size=24
+epoch=2
+
+#维持参数，不需要修改
+over_dump=False
+data_dump_flag=False
+data_dump_step="10"
+profiling=False
+
+# 帮助信息，不需要修改
+if [[ $1 == --help || $1 == -h ]];then
+    echo"usage:./train_full_8p.sh <args>"
+    echo " "
+    echo "parameter explain:
+    --precision_mode           precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision)
+    --over_dump		           if or not over detection, default is False
+    --data_dump_flag		   data dump flag, default is 0
+    --data_dump_step		   data dump step, default is 10
+    --profiling		           if or not profiling for performance debug, default is False
+    --autotune                 whether to enable autotune, default is False
+    --data_path		           source data of training
+    -h/--help		           show help message
+    "
+    exit 1
+fi
+
+#参数校验，不需要修改
+for para in $*
+do
+    if [[ $para == --precision_mode* ]];then
+        precision_mode=`echo ${para#*=}`
+    elif [[ $para == --over_dump* ]];then
+        over_dump=`echo ${para#*=}`
+        over_dump_path=${cur_path}/output/overflow_dump
+        mkdir -p ${over_dump_path}
+    elif [[ $para == --data_dump_flag* ]];then
+        data_dump_flag=`echo ${para#*=}`
+        data_dump_path=${cur_path}/output/data_dump
+        mkdir -p ${data_dump_path}
+    elif [[ $para == --data_dump_step* ]];then
+        data_dump_step=`echo ${para#*=}`
+    elif [[ $para == --profiling* ]];then
+        profiling=`echo ${para#*=}`
+        profiling_dump_path=${cur_path}/output/profiling
+        mkdir -p ${profiling_dump_path}
+    elif [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    fi
+done
+
+#校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be confing"
+    exit 1
+fi
+
+vocab_file=${data_path}/uncased_L-24_H-1024_A-16/vocab.txt
+bert_config_file=${data_path}/uncased_L-24_H-1024_A-16/bert_config.json
+init_checkpoint=${data_path}/uncased_L-24_H-1024_A-16/bert_model.ckpt
+train_file=${data_path}/dataset/train-v1.1.json
+predict_file=${data_path}/dataset/dev-v1.1.json
+
+#训练开始时间，不需要修改
+start_time=$(date +%s)
+
+#进入训练脚本目录，需要模型审视修改
+for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++));
+do
+    #设置环境变量，不需要修改
+    echo "Device ID: $RANK_ID"
+    export RANK_ID=$RANK_ID
+    
+    #创建DeviceID输出目录，不需要修改
+    if [ -d $cur_path/output/$ASCEND_DEVICE_ID ];then
+        rm -rf $cur_path/output/$ASCEND_DEVICE_ID
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    else
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    fi
+
+    #执行训练脚本，需要模型审视修改
+	nohup python3.7 ${parent_path}/run_squad.py \
+        --vocab_file=$vocab_file \
+        --bert_config_file=$bert_config_file \
+        --init_checkpoint=$init_checkpoint \
+        --train_file=$train_file \
+        --do_predict=True \
+        --do_train=True \
+        --predict_file=$predict_file \
+        --train_batch_size=24 \
+        --num_train_epochs=2 \
+        --learning_rate=3e-5 \
+        --max_seq_length=384 \
+        --doc_stride=128 \
+        --output_dir=${cur_path}/output/${ASCEND_DEVICE_ID}/ckpt > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+        wait
+        python3 ${parent_path}/evaluate-v1.1.py $data_path/dataset/dev-v1.1.json \
+        ${cur_path}/output/${ASCEND_DEVICE_ID}/ckpt/predictions.json >> ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1
+done 
+wait
+
+#训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+#输出训练精度,需要模型审视修改
+train_accuracy=`grep -a "f1" ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk '{print $4}'|tr -d }`
+#打印，不需要修改
+echo "Final Train Accuracy : ${train_accuracy}"
+echo "E2E Training Duration sec : $e2e_time"
+
+#训练用例信息，不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc'
+
+#获取性能数据
+fps=`grep "global_step/sec:" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F 'global_step/sec:' '{print $2}'|awk 'END {print $1}'`
+
+ActualFPS=`echo "scale=2;${fps} * ${batch_size}"|bc`
+temp1=`echo "1000 * ${batch_size}"|bc`
+TrainingTime=`echo "scale=2;${temp1} / ${ActualFPS}"|bc`
+
+ActualLoss=`grep "loss =" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F 'loss =' '{print $2}'|awk 'END {print $1}'|tr -d ,`
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+sed -i -e '/ModuleNotFoundError/d' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log
\ No newline at end of file
diff --git a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID3082_BertLarge-Squad_full_8p.sh b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID3082_BertLarge-Squad_full_8p.sh
new file mode 100644
index 000000000..b86be9a9f
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID3082_BertLarge-Squad_full_8p.sh
@@ -0,0 +1,156 @@
+#!/bin/bash
+#当前路径,不需要修改
+cur_path=`pwd`
+parent_path=$(dirname $(pwd))
+
+#集合通信参数,不需要修改
+#保证rank table file 文件rank_table_8p.json存放在和test同级的configs目录下
+export RANK_SIZE=8
+export RANK_TABLE_FILE=${cur_path}/../configs/rank_table_8p.json
+export JOB_ID=10087
+RANK_ID_START=0
+
+# 数据集路径,保持为空,不需要修改
+data_path=""
+
+#基础参数 需要模型审视修改
+#网络名称，同目录名称
+Network="BertLarge-Squad_ID3082_for_TensorFlow"
+batch_size=24
+epoch=2
+
+#维持参数，不需要修改
+over_dump=False
+data_dump_flag=False
+data_dump_step="10"
+profiling=False
+
+# 帮助信息，不需要修改
+if [[ $1 == --help || $1 == -h ]];then
+    echo"usage:./train_full_8p.sh <args>"
+    echo " "
+    echo "parameter explain:
+    --precision_mode           precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision)
+    --over_dump		           if or not over detection, default is False
+    --data_dump_flag		   data dump flag, default is 0
+    --data_dump_step		   data dump step, default is 10
+    --profiling		           if or not profiling for performance debug, default is False
+    --autotune                 whether to enable autotune, default is False
+    --data_path		           source data of training
+    -h/--help		           show help message
+    "
+    exit 1
+fi
+
+#参数校验，不需要修改
+for para in $*
+do
+    if [[ $para == --precision_mode* ]];then
+        precision_mode=`echo ${para#*=}`
+    elif [[ $para == --over_dump* ]];then
+        over_dump=`echo ${para#*=}`
+        over_dump_path=${cur_path}/output/overflow_dump
+        mkdir -p ${over_dump_path}
+    elif [[ $para == --data_dump_flag* ]];then
+        data_dump_flag=`echo ${para#*=}`
+        data_dump_path=${cur_path}/output/data_dump
+        mkdir -p ${data_dump_path}
+    elif [[ $para == --data_dump_step* ]];then
+        data_dump_step=`echo ${para#*=}`
+    elif [[ $para == --profiling* ]];then
+        profiling=`echo ${para#*=}`
+        profiling_dump_path=${cur_path}/output/profiling
+        mkdir -p ${profiling_dump_path}
+    elif [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    fi
+done
+
+#校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be confing"
+    exit 1
+fi
+
+vocab_file=${data_path}/uncased_L-24_H-1024_A-16/vocab.txt
+bert_config_file=${data_path}/uncased_L-24_H-1024_A-16/bert_config.json
+init_checkpoint=${data_path}/uncased_L-24_H-1024_A-16/bert_model.ckpt
+train_file=${data_path}/dataset/train-v1.1.json
+predict_file=${data_path}/dataset/dev-v1.1.json
+
+#训练开始时间，不需要修改
+start_time=$(date +%s)
+
+#进入训练脚本目录，需要模型审视修改
+for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++));
+do
+    #设置环境变量，不需要修改
+    echo "Device ID: $RANK_ID"
+    export RANK_ID=$RANK_ID
+    export ASCEND_DEVICE_ID=$RANK_ID
+
+    #创建DeviceID输出目录，不需要修改
+    if [ -d $cur_path/output/$ASCEND_DEVICE_ID ];then
+        rm -rf $cur_path/output/$ASCEND_DEVICE_ID
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    else
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    fi
+
+    #执行训练脚本，需要模型审视修改
+	nohup python3.7 ${parent_path}/run_squad.py \
+        --vocab_file=$vocab_file \
+        --bert_config_file=$bert_config_file \
+        --init_checkpoint=$init_checkpoint \
+        --train_file=$train_file \
+        --do_predict=True \
+        --do_train=True \
+        --predict_file=$predict_file \
+        --train_batch_size=24 \
+        --num_train_epochs=2 \
+        --learning_rate=3e-5 \
+        --max_seq_length=384 \
+        --doc_stride=128 \
+        --output_dir=${cur_path}/output/${ASCEND_DEVICE_ID}/ckpt > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+        wait
+        python3 ${parent_path}/evaluate-v1.1.py $data_path/dataset/dev-v1.1.json \
+        ${cur_path}/output/${ASCEND_DEVICE_ID}/ckpt/predictions.json >> ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1
+done
+wait
+
+#训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+#输出训练精度,需要模型审视修改
+train_accuracy=`grep -a "f1" ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk '{print $4}'|tr -d }`
+#打印，不需要修改
+echo "Final Train Accuracy : ${train_accuracy}"
+echo "E2E Training Duration sec : $e2e_time"
+
+#训练用例信息，不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc'
+
+#获取性能数据
+fps=`grep "global_step/sec:" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F 'global_step/sec:' '{print $2}'|awk 'END {print $1}'`
+temp0=`echo "scale=2;${fps} * ${batch_size}"|bc`
+ActualFPS=`echo "scale=2;${temp0} * ${RANK_SIZE}"|bc`
+temp1=`echo "8000 * ${batch_size}"|bc`
+TrainingTime=`echo "scale=2;${temp1} / ${ActualFPS}"|bc`
+
+ActualLoss=`grep "loss =" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F 'loss =' '{print $2}'|awk 'END {print $1}'|tr -d ,`
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+sed -i -e '/ModuleNotFoundError/d' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log
\ No newline at end of file
diff --git a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID3082_BertLarge-Squad_performance_1p.sh b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID3082_BertLarge-Squad_performance_1p.sh
new file mode 100644
index 000000000..e04e3c410
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID3082_BertLarge-Squad_performance_1p.sh
@@ -0,0 +1,149 @@
+#!/bin/bash
+#当前路径,不需要修改
+cur_path=`pwd`
+parent_path=$(dirname $(pwd))
+
+#集合通信参数,不需要修改
+#保证rank table file 文件rank_table_8p.json存放在和test同级的configs目录下
+export RANK_SIZE=1
+export JOB_ID=10087
+RANK_ID_START=0
+
+# 数据集路径,保持为空,不需要修改
+data_path=""
+
+#基础参数 需要模型审视修改
+#网络名称，同目录名称
+Network="BertLarge-Squad_ID3082_for_TensorFlow"
+batch_size=24
+epoch=1
+
+
+#维持参数，不需要修改
+over_dump=False
+data_dump_flag=False
+data_dump_step="10"
+profiling=False
+
+# 帮助信息，不需要修改
+if [[ $1 == --help || $1 == -h ]];then
+    echo"usage:./train_full_8p.sh <args>"
+    echo " "
+    echo "parameter explain:
+    --precision_mode           precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision)
+    --over_dump		           if or not over detection, default is False
+    --data_dump_flag		   data dump flag, default is 0
+    --data_dump_step		   data dump step, default is 10
+    --profiling		           if or not profiling for performance debug, default is False
+    --autotune                 whether to enable autotune, default is False
+    --data_path		           source data of training
+    -h/--help		           show help message
+    "
+    exit 1
+fi
+
+#参数校验，不需要修改
+for para in $*
+do
+    if [[ $para == --precision_mode* ]];then
+        precision_mode=`echo ${para#*=}`
+    elif [[ $para == --over_dump* ]];then
+        over_dump=`echo ${para#*=}`
+        over_dump_path=${cur_path}/output/overflow_dump
+        mkdir -p ${over_dump_path}
+    elif [[ $para == --data_dump_flag* ]];then
+        data_dump_flag=`echo ${para#*=}`
+        data_dump_path=${cur_path}/output/data_dump
+        mkdir -p ${data_dump_path}
+    elif [[ $para == --data_dump_step* ]];then
+        data_dump_step=`echo ${para#*=}`
+    elif [[ $para == --profiling* ]];then
+        profiling=`echo ${para#*=}`
+        profiling_dump_path=${cur_path}/output/profiling
+        mkdir -p ${profiling_dump_path}
+    elif [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    fi
+done
+
+#校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be confing"
+    exit 1
+fi
+
+vocab_file=${data_path}/uncased_L-24_H-1024_A-16/vocab.txt
+bert_config_file=${data_path}/uncased_L-24_H-1024_A-16/bert_config.json
+init_checkpoint=${data_path}/uncased_L-24_H-1024_A-16/bert_model.ckpt
+train_file=${data_path}/dataset/train-v1.1.json
+predict_file=${data_path}/dataset/dev-v1.1.json
+
+#训练开始时间，不需要修改
+start_time=$(date +%s)
+
+#进入训练脚本目录，需要模型审视修改
+for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++));
+do
+    #设置环境变量，不需要修改
+    echo "Device ID: $RANK_ID"
+    export RANK_ID=$RANK_ID
+    
+    #创建DeviceID输出目录，不需要修改
+    if [ -d $cur_path/output/$ASCEND_DEVICE_ID ];then
+        rm -rf $cur_path/output/$ASCEND_DEVICE_ID
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    else
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    fi
+
+    #执行训练脚本，需要模型审视修改
+	nohup python3.7 ${parent_path}/run_squad.py \
+        --vocab_file=$vocab_file \
+        --bert_config_file=$bert_config_file \
+        --init_checkpoint=$init_checkpoint \
+        --train_file=$train_file \
+        --do_predict=True \
+        --do_train=True \
+        --predict_file=$predict_file \
+	      --train_batch_size=24 \
+        --num_train_epochs=1 \
+        --num_train_steps=1000 \
+        --learning_rate=3e-5 \
+        --max_seq_length=384 \
+        --doc_stride=128 \
+        --output_dir=${cur_path}/output/${ASCEND_DEVICE_ID}/ckpt > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+done 
+wait
+
+#训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+#E2E训练端到端时长，直接计算，不需要修改
+echo "E2E training Duration sec: $e2e_time"
+
+#训练用例信息，不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf'
+
+#获取性能数据
+fps=`grep "global_step/sec:" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F 'global_step/sec:' '{print $2}'|awk 'END {print $1}'`
+
+ActualFPS=`echo "scale=2;${fps} * ${batch_size}"|bc`
+temp1=`echo "1000 * ${batch_size}"|bc`
+TrainingTime=`echo "scale=2;${temp1} / ${ActualFPS}"|bc`
+
+ActualLoss=`grep "loss =" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F 'loss =' '{print $2}'|awk 'END {print $1}'|tr -d ,`
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+sed -i -e '/ModuleNotFoundError/d' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log
\ No newline at end of file
diff --git a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID3082_BertLarge-Squad_performance_8p.sh b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID3082_BertLarge-Squad_performance_8p.sh
new file mode 100644
index 000000000..e5efad108
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID3082_BertLarge-Squad_performance_8p.sh
@@ -0,0 +1,150 @@
+#!/bin/bash
+#当前路径,不需要修改
+cur_path=`pwd`
+parent_path=$(dirname $(pwd))
+
+#集合通信参数,不需要修改
+#保证rank table file 文件rank_table_8p.json存放在和test同级的configs目录下
+export RANK_SIZE=8
+export RANK_TABLE_FILE=${cur_path}/../configs/rank_table_8p.json
+export JOB_ID=10087
+RANK_ID_START=0
+
+# 数据集路径,保持为空,不需要修改
+data_path=""
+
+#基础参数 需要模型审视修改
+#网络名称，同目录名称
+Network="BertLarge-Squad_ID3082_for_TensorFlow"
+batch_size=24
+epoch=1
+
+#维持参数，不需要修改
+over_dump=False
+data_dump_flag=False
+data_dump_step="10"
+profiling=False
+
+# 帮助信息，不需要修改
+if [[ $1 == --help || $1 == -h ]];then
+    echo"usage:./train_full_8p.sh <args>"
+    echo " "
+    echo "parameter explain:
+    --precision_mode           precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision)
+    --over_dump		           if or not over detection, default is False
+    --data_dump_flag		   data dump flag, default is 0
+    --data_dump_step		   data dump step, default is 10
+    --profiling		           if or not profiling for performance debug, default is False
+    --autotune                 whether to enable autotune, default is False
+    --data_path		           source data of training
+    -h/--help		           show help message
+    "
+    exit 1
+fi
+
+#参数校验，不需要修改
+for para in $*
+do
+    if [[ $para == --precision_mode* ]];then
+        precision_mode=`echo ${para#*=}`
+    elif [[ $para == --over_dump* ]];then
+        over_dump=`echo ${para#*=}`
+        over_dump_path=${cur_path}/output/overflow_dump
+        mkdir -p ${over_dump_path}
+    elif [[ $para == --data_dump_flag* ]];then
+        data_dump_flag=`echo ${para#*=}`
+        data_dump_path=${cur_path}/output/data_dump
+        mkdir -p ${data_dump_path}
+    elif [[ $para == --data_dump_step* ]];then
+        data_dump_step=`echo ${para#*=}`
+    elif [[ $para == --profiling* ]];then
+        profiling=`echo ${para#*=}`
+        profiling_dump_path=${cur_path}/output/profiling
+        mkdir -p ${profiling_dump_path}
+    elif [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    fi
+done
+
+#校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be confing"
+    exit 1
+fi
+
+vocab_file=${data_path}/uncased_L-24_H-1024_A-16/vocab.txt
+bert_config_file=${data_path}/uncased_L-24_H-1024_A-16/bert_config.json
+init_checkpoint=${data_path}/uncased_L-24_H-1024_A-16/bert_model.ckpt
+train_file=${data_path}/dataset/train-v1.1.json
+predict_file=${data_path}/dataset/dev-v1.1.json
+
+#训练开始时间，不需要修改
+start_time=$(date +%s)
+
+#进入训练脚本目录，需要模型审视修改
+for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++));
+do
+    #设置环境变量，不需要修改
+    echo "Device ID: $RANK_ID"
+    export RANK_ID=$RANK_ID
+    export ASCEND_DEVICE_ID=$RANK_ID
+
+    #创建DeviceID输出目录，不需要修改
+    if [ -d $cur_path/output/$ASCEND_DEVICE_ID ];then
+        rm -rf $cur_path/output/$ASCEND_DEVICE_ID
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    else
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    fi
+
+    #执行训练脚本，需要模型审视修改
+	nohup python3.7 ${parent_path}/run_squad.py \
+        --vocab_file=$vocab_file \
+        --bert_config_file=$bert_config_file \
+        --init_checkpoint=$init_checkpoint \
+        --train_file=$train_file \
+        --do_predict=True \
+        --do_train=True \
+        --predict_file=$predict_file \
+        --train_batch_size=24 \
+        --num_train_epochs=1 \
+        --num_train_steps=1000 \
+        --learning_rate=3e-5 \
+        --max_seq_length=384 \
+        --doc_stride=128 \
+        --output_dir=${cur_path}/output/${ASCEND_DEVICE_ID}/ckpt > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+done 
+wait
+
+#训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+#E2E训练端到端时长，直接计算，不需要修改
+echo "E2E training Duration sec: $e2e_time"
+
+#训练用例信息，不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf'
+
+#获取性能数据
+fps=`grep "global_step/sec:" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F 'global_step/sec:' '{print $2}'|awk 'END {print $1}'`
+temp0=`echo "scale=2;${fps} * ${batch_size}"|bc`
+ActualFPS=`echo "scale=2;${temp0} * ${RANK_SIZE}"|bc`
+temp1=`echo "8000 * ${batch_size}"|bc`
+TrainingTime=`echo "scale=2;${temp1} / ${ActualFPS}"|bc`
+
+ActualLoss=`grep "loss =" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F 'loss =' '{print $2}'|awk 'END {print $1}'|tr -d ,`
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+sed -i -e '/ModuleNotFoundError/d' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log
\ No newline at end of file
diff --git a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/tokenization.py b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/tokenization.py
new file mode 100644
index 000000000..0ee135953
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/tokenization.py
@@ -0,0 +1,399 @@
+# coding=utf-8
+# Copyright 2018 The Google AI Language Team Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tokenization classes."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import re
+import unicodedata
+import six
+import tensorflow as tf
+
+
+def validate_case_matches_checkpoint(do_lower_case, init_checkpoint):
+  """Checks whether the casing config is consistent with the checkpoint name."""
+
+  # The casing has to be passed in by the user and there is no explicit check
+  # as to whether it matches the checkpoint. The casing information probably
+  # should have been stored in the bert_config.json file, but it's not, so
+  # we have to heuristically detect it to validate.
+
+  if not init_checkpoint:
+    return
+
+  m = re.match("^.*?([A-Za-z0-9_-]+)/bert_model.ckpt", init_checkpoint)
+  if m is None:
+    return
+
+  model_name = m.group(1)
+
+  lower_models = [
+      "uncased_L-24_H-1024_A-16", "uncased_L-12_H-768_A-12",
+      "multilingual_L-12_H-768_A-12", "chinese_L-12_H-768_A-12"
+  ]
+
+  cased_models = [
+      "cased_L-12_H-768_A-12", "cased_L-24_H-1024_A-16",
+      "multi_cased_L-12_H-768_A-12"
+  ]
+
+  is_bad_config = False
+  if model_name in lower_models and not do_lower_case:
+    is_bad_config = True
+    actual_flag = "False"
+    case_name = "lowercased"
+    opposite_flag = "True"
+
+  if model_name in cased_models and do_lower_case:
+    is_bad_config = True
+    actual_flag = "True"
+    case_name = "cased"
+    opposite_flag = "False"
+
+  if is_bad_config:
+    raise ValueError(
+        "You passed in `--do_lower_case=%s` with `--init_checkpoint=%s`. "
+        "However, `%s` seems to be a %s model, so you "
+        "should pass in `--do_lower_case=%s` so that the fine-tuning matches "
+        "how the model was pre-training. If this error is wrong, please "
+        "just comment out this check." % (actual_flag, init_checkpoint,
+                                          model_name, case_name, opposite_flag))
+
+
+def convert_to_unicode(text):
+  """Converts `text` to Unicode (if it's not already), assuming utf-8 input."""
+  if six.PY3:
+    if isinstance(text, str):
+      return text
+    elif isinstance(text, bytes):
+      return text.decode("utf-8", "ignore")
+    else:
+      raise ValueError("Unsupported string type: %s" % (type(text)))
+  elif six.PY2:
+    if isinstance(text, str):
+      return text.decode("utf-8", "ignore")
+    elif isinstance(text, unicode):
+      return text
+    else:
+      raise ValueError("Unsupported string type: %s" % (type(text)))
+  else:
+    raise ValueError("Not running on Python2 or Python 3?")
+
+
+def printable_text(text):
+  """Returns text encoded in a way suitable for print or `tf.logging`."""
+
+  # These functions want `str` for both Python2 and Python3, but in one case
+  # it's a Unicode string and in the other it's a byte string.
+  if six.PY3:
+    if isinstance(text, str):
+      return text
+    elif isinstance(text, bytes):
+      return text.decode("utf-8", "ignore")
+    else:
+      raise ValueError("Unsupported string type: %s" % (type(text)))
+  elif six.PY2:
+    if isinstance(text, str):
+      return text
+    elif isinstance(text, unicode):
+      return text.encode("utf-8")
+    else:
+      raise ValueError("Unsupported string type: %s" % (type(text)))
+  else:
+    raise ValueError("Not running on Python2 or Python 3?")
+
+
+def load_vocab(vocab_file):
+  """Loads a vocabulary file into a dictionary."""
+  vocab = collections.OrderedDict()
+  index = 0
+  with tf.gfile.GFile(vocab_file, "r") as reader:
+    while True:
+      token = convert_to_unicode(reader.readline())
+      if not token:
+        break
+      token = token.strip()
+      vocab[token] = index
+      index += 1
+  return vocab
+
+
+def convert_by_vocab(vocab, items):
+  """Converts a sequence of [tokens|ids] using the vocab."""
+  output = []
+  for item in items:
+    output.append(vocab[item])
+  return output
+
+
+def convert_tokens_to_ids(vocab, tokens):
+  return convert_by_vocab(vocab, tokens)
+
+
+def convert_ids_to_tokens(inv_vocab, ids):
+  return convert_by_vocab(inv_vocab, ids)
+
+
+def whitespace_tokenize(text):
+  """Runs basic whitespace cleaning and splitting on a piece of text."""
+  text = text.strip()
+  if not text:
+    return []
+  tokens = text.split()
+  return tokens
+
+
+class FullTokenizer(object):
+  """Runs end-to-end tokenziation."""
+
+  def __init__(self, vocab_file, do_lower_case=True):
+    self.vocab = load_vocab(vocab_file)
+    self.inv_vocab = {v: k for k, v in self.vocab.items()}
+    self.basic_tokenizer = BasicTokenizer(do_lower_case=do_lower_case)
+    self.wordpiece_tokenizer = WordpieceTokenizer(vocab=self.vocab)
+
+  def tokenize(self, text):
+    split_tokens = []
+    for token in self.basic_tokenizer.tokenize(text):
+      for sub_token in self.wordpiece_tokenizer.tokenize(token):
+        split_tokens.append(sub_token)
+
+    return split_tokens
+
+  def convert_tokens_to_ids(self, tokens):
+    return convert_by_vocab(self.vocab, tokens)
+
+  def convert_ids_to_tokens(self, ids):
+    return convert_by_vocab(self.inv_vocab, ids)
+
+
+class BasicTokenizer(object):
+  """Runs basic tokenization (punctuation splitting, lower casing, etc.)."""
+
+  def __init__(self, do_lower_case=True):
+    """Constructs a BasicTokenizer.
+
+    Args:
+      do_lower_case: Whether to lower case the input.
+    """
+    self.do_lower_case = do_lower_case
+
+  def tokenize(self, text):
+    """Tokenizes a piece of text."""
+    text = convert_to_unicode(text)
+    text = self._clean_text(text)
+
+    # This was added on November 1st, 2018 for the multilingual and Chinese
+    # models. This is also applied to the English models now, but it doesn't
+    # matter since the English models were not trained on any Chinese data
+    # and generally don't have any Chinese data in them (there are Chinese
+    # characters in the vocabulary because Wikipedia does have some Chinese
+    # words in the English Wikipedia.).
+    text = self._tokenize_chinese_chars(text)
+
+    orig_tokens = whitespace_tokenize(text)
+    split_tokens = []
+    for token in orig_tokens:
+      if self.do_lower_case:
+        token = token.lower()
+        token = self._run_strip_accents(token)
+      split_tokens.extend(self._run_split_on_punc(token))
+
+    output_tokens = whitespace_tokenize(" ".join(split_tokens))
+    return output_tokens
+
+  def _run_strip_accents(self, text):
+    """Strips accents from a piece of text."""
+    text = unicodedata.normalize("NFD", text)
+    output = []
+    for char in text:
+      cat = unicodedata.category(char)
+      if cat == "Mn":
+        continue
+      output.append(char)
+    return "".join(output)
+
+  def _run_split_on_punc(self, text):
+    """Splits punctuation on a piece of text."""
+    chars = list(text)
+    i = 0
+    start_new_word = True
+    output = []
+    while i < len(chars):
+      char = chars[i]
+      if _is_punctuation(char):
+        output.append([char])
+        start_new_word = True
+      else:
+        if start_new_word:
+          output.append([])
+        start_new_word = False
+        output[-1].append(char)
+      i += 1
+
+    return ["".join(x) for x in output]
+
+  def _tokenize_chinese_chars(self, text):
+    """Adds whitespace around any CJK character."""
+    output = []
+    for char in text:
+      cp = ord(char)
+      if self._is_chinese_char(cp):
+        output.append(" ")
+        output.append(char)
+        output.append(" ")
+      else:
+        output.append(char)
+    return "".join(output)
+
+  def _is_chinese_char(self, cp):
+    """Checks whether CP is the codepoint of a CJK character."""
+    # This defines a "chinese character" as anything in the CJK Unicode block:
+    #   https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
+    #
+    # Note that the CJK Unicode block is NOT all Japanese and Korean characters,
+    # despite its name. The modern Korean Hangul alphabet is a different block,
+    # as is Japanese Hiragana and Katakana. Those alphabets are used to write
+    # space-separated words, so they are not treated specially and handled
+    # like the all of the other languages.
+    if ((cp >= 0x4E00 and cp <= 0x9FFF) or  #
+        (cp >= 0x3400 and cp <= 0x4DBF) or  #
+        (cp >= 0x20000 and cp <= 0x2A6DF) or  #
+        (cp >= 0x2A700 and cp <= 0x2B73F) or  #
+        (cp >= 0x2B740 and cp <= 0x2B81F) or  #
+        (cp >= 0x2B820 and cp <= 0x2CEAF) or
+        (cp >= 0xF900 and cp <= 0xFAFF) or  #
+        (cp >= 0x2F800 and cp <= 0x2FA1F)):  #
+      return True
+
+    return False
+
+  def _clean_text(self, text):
+    """Performs invalid character removal and whitespace cleanup on text."""
+    output = []
+    for char in text:
+      cp = ord(char)
+      if cp == 0 or cp == 0xfffd or _is_control(char):
+        continue
+      if _is_whitespace(char):
+        output.append(" ")
+      else:
+        output.append(char)
+    return "".join(output)
+
+
+class WordpieceTokenizer(object):
+  """Runs WordPiece tokenziation."""
+
+  def __init__(self, vocab, unk_token="[UNK]", max_input_chars_per_word=200):
+    self.vocab = vocab
+    self.unk_token = unk_token
+    self.max_input_chars_per_word = max_input_chars_per_word
+
+  def tokenize(self, text):
+    """Tokenizes a piece of text into its word pieces.
+
+    This uses a greedy longest-match-first algorithm to perform tokenization
+    using the given vocabulary.
+
+    For example:
+      input = "unaffable"
+      output = ["un", "##aff", "##able"]
+
+    Args:
+      text: A single token or whitespace separated tokens. This should have
+        already been passed through `BasicTokenizer.
+
+    Returns:
+      A list of wordpiece tokens.
+    """
+
+    text = convert_to_unicode(text)
+
+    output_tokens = []
+    for token in whitespace_tokenize(text):
+      chars = list(token)
+      if len(chars) > self.max_input_chars_per_word:
+        output_tokens.append(self.unk_token)
+        continue
+
+      is_bad = False
+      start = 0
+      sub_tokens = []
+      while start < len(chars):
+        end = len(chars)
+        cur_substr = None
+        while start < end:
+          substr = "".join(chars[start:end])
+          if start > 0:
+            substr = "##" + substr
+          if substr in self.vocab:
+            cur_substr = substr
+            break
+          end -= 1
+        if cur_substr is None:
+          is_bad = True
+          break
+        sub_tokens.append(cur_substr)
+        start = end
+
+      if is_bad:
+        output_tokens.append(self.unk_token)
+      else:
+        output_tokens.extend(sub_tokens)
+    return output_tokens
+
+
+def _is_whitespace(char):
+  """Checks whether `chars` is a whitespace character."""
+  # \t, \n, and \r are technically contorl characters but we treat them
+  # as whitespace since they are generally considered as such.
+  if char == " " or char == "\t" or char == "\n" or char == "\r":
+    return True
+  cat = unicodedata.category(char)
+  if cat == "Zs":
+    return True
+  return False
+
+
+def _is_control(char):
+  """Checks whether `chars` is a control character."""
+  # These are technically control characters but we count them as whitespace
+  # characters.
+  if char == "\t" or char == "\n" or char == "\r":
+    return False
+  cat = unicodedata.category(char)
+  if cat in ("Cc", "Cf"):
+    return True
+  return False
+
+
+def _is_punctuation(char):
+  """Checks whether `chars` is a punctuation character."""
+  cp = ord(char)
+  # We treat all non-letter/number ASCII as punctuation.
+  # Characters such as "^", "$", and "`" are not in the Unicode
+  # Punctuation class but we treat them as punctuation anyways, for
+  # consistency.
+  if ((cp >= 33 and cp <= 47) or (cp >= 58 and cp <= 64) or
+      (cp >= 91 and cp <= 96) or (cp >= 123 and cp <= 126)):
+    return True
+  cat = unicodedata.category(char)
+  if cat.startswith("P"):
+    return True
+  return False
diff --git a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/tokenization_test.py b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/tokenization_test.py
new file mode 100644
index 000000000..0afaedd2e
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/tokenization_test.py
@@ -0,0 +1,137 @@
+# coding=utf-8
+# Copyright 2018 The Google AI Language Team Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import tempfile
+import tokenization
+import six
+import tensorflow as tf
+
+
+class TokenizationTest(tf.test.TestCase):
+
+  def test_full_tokenizer(self):
+    vocab_tokens = [
+        "[UNK]", "[CLS]", "[SEP]", "want", "##want", "##ed", "wa", "un", "runn",
+        "##ing", ","
+    ]
+    with tempfile.NamedTemporaryFile(delete=False) as vocab_writer:
+      if six.PY2:
+        vocab_writer.write("".join([x + "\n" for x in vocab_tokens]))
+      else:
+        vocab_writer.write("".join(
+            [x + "\n" for x in vocab_tokens]).encode("utf-8"))
+
+      vocab_file = vocab_writer.name
+
+    tokenizer = tokenization.FullTokenizer(vocab_file)
+    os.unlink(vocab_file)
+
+    tokens = tokenizer.tokenize(u"UNwant\u00E9d,running")
+    self.assertAllEqual(tokens, ["un", "##want", "##ed", ",", "runn", "##ing"])
+
+    self.assertAllEqual(
+        tokenizer.convert_tokens_to_ids(tokens), [7, 4, 5, 10, 8, 9])
+
+  def test_chinese(self):
+    tokenizer = tokenization.BasicTokenizer()
+
+    self.assertAllEqual(
+        tokenizer.tokenize(u"ah\u535A\u63A8zz"),
+        [u"ah", u"\u535A", u"\u63A8", u"zz"])
+
+  def test_basic_tokenizer_lower(self):
+    tokenizer = tokenization.BasicTokenizer(do_lower_case=True)
+
+    self.assertAllEqual(
+        tokenizer.tokenize(u" \tHeLLo!how  \n Are yoU?  "),
+        ["hello", "!", "how", "are", "you", "?"])
+    self.assertAllEqual(tokenizer.tokenize(u"H\u00E9llo"), ["hello"])
+
+  def test_basic_tokenizer_no_lower(self):
+    tokenizer = tokenization.BasicTokenizer(do_lower_case=False)
+
+    self.assertAllEqual(
+        tokenizer.tokenize(u" \tHeLLo!how  \n Are yoU?  "),
+        ["HeLLo", "!", "how", "Are", "yoU", "?"])
+
+  def test_wordpiece_tokenizer(self):
+    vocab_tokens = [
+        "[UNK]", "[CLS]", "[SEP]", "want", "##want", "##ed", "wa", "un", "runn",
+        "##ing"
+    ]
+
+    vocab = {}
+    for (i, token) in enumerate(vocab_tokens):
+      vocab[token] = i
+    tokenizer = tokenization.WordpieceTokenizer(vocab=vocab)
+
+    self.assertAllEqual(tokenizer.tokenize(""), [])
+
+    self.assertAllEqual(
+        tokenizer.tokenize("unwanted running"),
+        ["un", "##want", "##ed", "runn", "##ing"])
+
+    self.assertAllEqual(
+        tokenizer.tokenize("unwantedX running"), ["[UNK]", "runn", "##ing"])
+
+  def test_convert_tokens_to_ids(self):
+    vocab_tokens = [
+        "[UNK]", "[CLS]", "[SEP]", "want", "##want", "##ed", "wa", "un", "runn",
+        "##ing"
+    ]
+
+    vocab = {}
+    for (i, token) in enumerate(vocab_tokens):
+      vocab[token] = i
+
+    self.assertAllEqual(
+        tokenization.convert_tokens_to_ids(
+            vocab, ["un", "##want", "##ed", "runn", "##ing"]), [7, 4, 5, 8, 9])
+
+  def test_is_whitespace(self):
+    self.assertTrue(tokenization._is_whitespace(u" "))
+    self.assertTrue(tokenization._is_whitespace(u"\t"))
+    self.assertTrue(tokenization._is_whitespace(u"\r"))
+    self.assertTrue(tokenization._is_whitespace(u"\n"))
+    self.assertTrue(tokenization._is_whitespace(u"\u00A0"))
+
+    self.assertFalse(tokenization._is_whitespace(u"A"))
+    self.assertFalse(tokenization._is_whitespace(u"-"))
+
+  def test_is_control(self):
+    self.assertTrue(tokenization._is_control(u"\u0005"))
+
+    self.assertFalse(tokenization._is_control(u"A"))
+    self.assertFalse(tokenization._is_control(u" "))
+    self.assertFalse(tokenization._is_control(u"\t"))
+    self.assertFalse(tokenization._is_control(u"\r"))
+    self.assertFalse(tokenization._is_control(u"\U0001F4A9"))
+
+  def test_is_punctuation(self):
+    self.assertTrue(tokenization._is_punctuation(u"-"))
+    self.assertTrue(tokenization._is_punctuation(u"$"))
+    self.assertTrue(tokenization._is_punctuation(u"`"))
+    self.assertTrue(tokenization._is_punctuation(u"."))
+
+    self.assertFalse(tokenization._is_punctuation(u"A"))
+    self.assertFalse(tokenization._is_punctuation(u" "))
+
+
+if __name__ == "__main__":
+  tf.test.main()
diff --git a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/utils/create_glue_data.py b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/utils/create_glue_data.py
new file mode 100644
index 000000000..de21962f5
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/utils/create_glue_data.py
@@ -0,0 +1,512 @@
+# Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import json
+import math
+import os
+import random
+import modeling
+import optimization
+import tokenization
+import six
+import tensorflow as tf
+import horovod.tensorflow as hvd
+import time
+import csv
+
+flags = tf.flags
+FLAGS = None
+
+def extract_flags():
+
+    ## Required parameters
+    flags.DEFINE_string(
+        "data_dir", None,
+        "The input data dir. Should contain the .tsv files (or other data files) "
+        "for the task.")
+
+    flags.DEFINE_string("task_name", None, "The name of the task to train.")
+
+    flags.DEFINE_string("vocab_file", None,
+                        "The vocabulary file that the BERT model was trained on.")
+
+    flags.DEFINE_bool(
+        "do_lower_case", True,
+        "Whether to lower case the input text. Should be True for uncased "
+        "models and False for cased models.")
+
+    flags.DEFINE_integer(
+        "max_seq_length", 128,
+        "The maximum total input sequence length after WordPiece tokenization. "
+        "Sequences longer than this will be truncated, and sequences shorter "
+        "than this will be padded.")
+
+    flags.DEFINE_bool(
+        "verbose_logging", False,
+        "If true, all of the warnings related to data processing will be printed. "
+        "A number of warnings are expected for a normal SQuAD evaluation.")
+    flags.mark_flag_as_required("data_dir")
+    flags.mark_flag_as_required("task_name")
+    flags.mark_flag_as_required("vocab_file")
+    return flags.FLAGS
+
+
+class InputExample(object):
+  """A single training/test example for simple sequence classification."""
+
+  def __init__(self, guid, text_a, text_b=None, label=None):
+    """Constructs a InputExample.
+    Args:
+      guid: Unique id for the example.
+      text_a: string. The untokenized text of the first sequence. For single
+        sequence tasks, only this sequence must be specified.
+      text_b: (Optional) string. The untokenized text of the second sequence.
+        Only must be specified for sequence pair tasks.
+      label: (Optional) string. The label of the example. This should be
+        specified for train and dev examples, but not for test examples.
+    """
+    self.guid = guid
+    self.text_a = text_a
+    self.text_b = text_b
+    self.label = label
+
+class PaddingInputExample(object):
+  """Fake example so the num input examples is a multiple of the batch size.
+
+  When running eval/predict on the TPU, we need to pad the number of examples
+  to be a multiple of the batch size, because the TPU requires a fixed batch
+  size. The alternative is to drop the last batch, which is bad because it means
+  the entire output data won't be generated.
+
+  We use this class instead of `None` because treating `None` as padding
+  battches could cause silent errors.
+  """
+
+class InputFeatures(object):
+  """A single set of features of data."""
+
+  def __init__(self,
+               input_ids,
+               input_mask,
+               segment_ids,
+               label_id,
+               is_real_example=True):
+    self.input_ids = input_ids
+    self.input_mask = input_mask
+    self.segment_ids = segment_ids
+    self.label_id = label_id
+    self.is_real_example = is_real_example
+
+
+class DataProcessor(object):
+  """Base class for data converters for sequence classification data sets."""
+
+  def get_train_examples(self, data_dir):
+    """Gets a collection of `InputExample`s for the train set."""
+    raise NotImplementedError()
+
+  def get_dev_examples(self, data_dir):
+    """Gets a collection of `InputExample`s for the dev set."""
+    raise NotImplementedError()
+
+  def get_test_examples(self, data_dir):
+    """Gets a collection of `InputExample`s for prediction."""
+    raise NotImplementedError()
+
+  def get_labels(self):
+    """Gets the list of labels for this data set."""
+    raise NotImplementedError()
+
+  @classmethod
+  def _read_tsv(cls, input_file, quotechar=None):
+    """Reads a tab separated value file."""
+    with tf.gfile.Open(input_file, "r") as f:
+      reader = csv.reader(f, delimiter="\t", quotechar=quotechar)
+      lines = []
+      for line in reader:
+        lines.append(line)
+      return lines
+
+
+class XnliProcessor(DataProcessor):
+  """Processor for the XNLI data set."""
+
+  def __init__(self):
+    self.language = "zh"
+
+  def get_train_examples(self, data_dir):
+    """See base class."""
+    lines = self._read_tsv(
+        os.path.join(data_dir, "multinli",
+                     "multinli.train.%s.tsv" % self.language))
+    examples = []
+    for (i, line) in enumerate(lines):
+      if i == 0:
+        continue
+      guid = "train-%d" % (i)
+      text_a = tokenization.convert_to_unicode(line[0])
+      text_b = tokenization.convert_to_unicode(line[1])
+      label = tokenization.convert_to_unicode(line[2])
+      if label == tokenization.convert_to_unicode("contradictory"):
+        label = tokenization.convert_to_unicode("contradiction")
+      examples.append(
+          InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
+    return examples
+
+  def get_dev_examples(self, data_dir):
+    """See base class."""
+    lines = self._read_tsv(os.path.join(data_dir, "xnli.dev.tsv"))
+    examples = []
+    for (i, line) in enumerate(lines):
+      if i == 0:
+        continue
+      guid = "dev-%d" % (i)
+      language = tokenization.convert_to_unicode(line[0])
+      if language != tokenization.convert_to_unicode(self.language):
+        continue
+      text_a = tokenization.convert_to_unicode(line[6])
+      text_b = tokenization.convert_to_unicode(line[7])
+      label = tokenization.convert_to_unicode(line[1])
+      examples.append(
+          InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
+    return examples
+
+  def get_labels(self):
+    """See base class."""
+    return ["contradiction", "entailment", "neutral"]
+
+
+class MnliProcessor(DataProcessor):
+  """Processor for the MultiNLI data set (GLUE version)."""
+
+  def get_train_examples(self, data_dir):
+    """See base class."""
+    return self._create_examples(
+        self._read_tsv(os.path.join(data_dir, "train.tsv")), "train")
+
+  def get_dev_examples(self, data_dir):
+    """See base class."""
+    return self._create_examples(
+        self._read_tsv(os.path.join(data_dir, "dev_matched.tsv")),
+        "dev_matched")
+
+  def get_test_examples(self, data_dir):
+    """See base class."""
+    return self._create_examples(
+        self._read_tsv(os.path.join(data_dir, "test_matched.tsv")), "test")
+
+  def get_labels(self):
+    """See base class."""
+    return ["contradiction", "entailment", "neutral"]
+
+  def _create_examples(self, lines, set_type):
+    """Creates examples for the training and dev sets."""
+    examples = []
+    for (i, line) in enumerate(lines):
+      if i == 0:
+        continue
+      guid = "%s-%s" % (set_type, tokenization.convert_to_unicode(line[0]))
+      text_a = tokenization.convert_to_unicode(line[8])
+      text_b = tokenization.convert_to_unicode(line[9])
+      if set_type == "test":
+        label = "contradiction"
+      else:
+        label = tokenization.convert_to_unicode(line[-1])
+      examples.append(
+          InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
+    return examples
+
+
+class MrpcProcessor(DataProcessor):
+  """Processor for the MRPC data set (GLUE version)."""
+
+  def get_train_examples(self, data_dir):
+    """See base class."""
+    return self._create_examples(
+        self._read_tsv(os.path.join(data_dir, "train.tsv")), "train")
+
+  def get_dev_examples(self, data_dir):
+    """See base class."""
+    return self._create_examples(
+        self._read_tsv(os.path.join(data_dir, "dev.tsv")), "dev")
+
+  def get_test_examples(self, data_dir):
+    """See base class."""
+    return self._create_examples(
+        self._read_tsv(os.path.join(data_dir, "test.tsv")), "test")
+
+  def get_labels(self):
+    """See base class."""
+    return ["0", "1"]
+
+  def _create_examples(self, lines, set_type):
+    """Creates examples for the training and dev sets."""
+    examples = []
+    for (i, line) in enumerate(lines):
+      if i == 0:
+        continue
+      guid = "%s-%s" % (set_type, i)
+      text_a = tokenization.convert_to_unicode(line[3])
+      text_b = tokenization.convert_to_unicode(line[4])
+      if set_type == "test":
+        label = "0"
+      else:
+        label = tokenization.convert_to_unicode(line[0])
+      examples.append(
+          InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
+    return examples
+
+
+class ColaProcessor(DataProcessor):
+  """Processor for the CoLA data set (GLUE version)."""
+
+  def get_train_examples(self, data_dir):
+    """See base class."""
+    return self._create_examples(
+        self._read_tsv(os.path.join(data_dir, "train.tsv")), "train")
+
+  def get_dev_examples(self, data_dir):
+    """See base class."""
+    return self._create_examples(
+        self._read_tsv(os.path.join(data_dir, "dev.tsv")), "dev")
+
+  def get_test_examples(self, data_dir):
+    """See base class."""
+    return self._create_examples(
+        self._read_tsv(os.path.join(data_dir, "test.tsv")), "test")
+
+  def get_labels(self):
+    """See base class."""
+    return ["0", "1"]
+
+  def _create_examples(self, lines, set_type):
+    """Creates examples for the training and dev sets."""
+    examples = []
+    for (i, line) in enumerate(lines):
+      # Only the test set has a header
+      if set_type == "test" and i == 0:
+        continue
+      guid = "%s-%s" % (set_type, i)
+      if set_type == "test":
+        text_a = tokenization.convert_to_unicode(line[1])
+        label = "0"
+      else:
+        text_a = tokenization.convert_to_unicode(line[3])
+        label = tokenization.convert_to_unicode(line[1])
+      examples.append(
+          InputExample(guid=guid, text_a=text_a, text_b=None, label=label))
+    return examples
+
+
+def _truncate_seq_pair(tokens_a, tokens_b, max_length):
+  """Truncates a sequence pair in place to the maximum length."""
+
+  # This is a simple heuristic which will always truncate the longer sequence
+  # one token at a time. This makes more sense than truncating an equal percent
+  # of tokens from each, since if one sequence is very short then each token
+  # that's truncated likely contains more information than a longer sequence.
+  while True:
+    total_length = len(tokens_a) + len(tokens_b)
+    if total_length <= max_length:
+      break
+    if len(tokens_a) > len(tokens_b):
+      tokens_a.pop()
+    else:
+      tokens_b.pop()
+
+def convert_single_example(ex_index, example, label_list, max_seq_length,
+                           tokenizer, verbose_logging=False):
+  """Converts a single `InputExample` into a single `InputFeatures`."""
+
+  if isinstance(example, PaddingInputExample):
+    return InputFeatures(
+        input_ids=[0] * max_seq_length,
+        input_mask=[0] * max_seq_length,
+        segment_ids=[0] * max_seq_length,
+        label_id=0,
+        is_real_example=False)
+
+  label_map = {}
+  for (i, label) in enumerate(label_list):
+    label_map[label] = i
+
+  tokens_a = tokenizer.tokenize(example.text_a)
+  tokens_b = None
+  if example.text_b:
+    tokens_b = tokenizer.tokenize(example.text_b)
+
+  if tokens_b:
+    # Modifies `tokens_a` and `tokens_b` in place so that the total
+    # length is less than the specified length.
+    # Account for [CLS], [SEP], [SEP] with "- 3"
+    _truncate_seq_pair(tokens_a, tokens_b, max_seq_length - 3)
+  else:
+    # Account for [CLS] and [SEP] with "- 2"
+    if len(tokens_a) > max_seq_length - 2:
+      tokens_a = tokens_a[0:(max_seq_length - 2)]
+
+  # The convention in BERT is:
+  # (a) For sequence pairs:
+  #  tokens:   [CLS] is this jack ##son ##ville ? [SEP] no it is not . [SEP]
+  #  type_ids: 0     0  0    0    0     0       0 0     1  1  1  1   1 1
+  # (b) For single sequences:
+  #  tokens:   [CLS] the dog is hairy . [SEP]
+  #  type_ids: 0     0   0   0  0     0 0
+  #
+  # Where "type_ids" are used to indicate whether this is the first
+  # sequence or the second sequence. The embedding vectors for `type=0` and
+  # `type=1` were learned during pre-training and are added to the wordpiece
+  # embedding vector (and position vector). This is not *strictly* necessary
+  # since the [SEP] token unambiguously separates the sequences, but it makes
+  # it easier for the model to learn the concept of sequences.
+  #
+  # For classification tasks, the first vector (corresponding to [CLS]) is
+  # used as the "sentence vector". Note that this only makes sense because
+  # the entire model is fine-tuned.
+  tokens = []
+  segment_ids = []
+  tokens.append("[CLS]")
+  segment_ids.append(0)
+  for token in tokens_a:
+    tokens.append(token)
+    segment_ids.append(0)
+  tokens.append("[SEP]")
+  segment_ids.append(0)
+
+  if tokens_b:
+    for token in tokens_b:
+      tokens.append(token)
+      segment_ids.append(1)
+    tokens.append("[SEP]")
+    segment_ids.append(1)
+
+  input_ids = tokenizer.convert_tokens_to_ids(tokens)
+
+  # The mask has 1 for real tokens and 0 for padding tokens. Only real
+  # tokens are attended to.
+  input_mask = [1] * len(input_ids)
+
+  # Zero-pad up to the sequence length.
+  while len(input_ids) < max_seq_length:
+    input_ids.append(0)
+    input_mask.append(0)
+    segment_ids.append(0)
+
+  assert len(input_ids) == max_seq_length
+  assert len(input_mask) == max_seq_length
+  assert len(segment_ids) == max_seq_length
+
+  label_id = label_map[example.label]
+  if ex_index < 5 and verbose_logging:
+    tf.logging.info("*** Example ***")
+    tf.logging.info("guid: %s" % (example.guid))
+    tf.logging.info("tokens: %s" % " ".join(
+        [tokenization.printable_text(x) for x in tokens]))
+    tf.logging.info("input_ids: %s" % " ".join([str(x) for x in input_ids]))
+    tf.logging.info("input_mask: %s" % " ".join([str(x) for x in input_mask]))
+    tf.logging.info("segment_ids: %s" % " ".join([str(x) for x in segment_ids]))
+    tf.logging.info("label: %s (id = %d)" % (example.label, label_id))
+
+  feature = InputFeatures(
+      input_ids=input_ids,
+      input_mask=input_mask,
+      segment_ids=segment_ids,
+      label_id=label_id,
+      is_real_example=True)
+  return feature
+
+# This function is not used by this file but is still used by the Colab and
+# people who depend on it.
+def convert_examples_to_features(examples, label_list, max_seq_length,
+                                 tokenizer):
+  """Convert a set of `InputExample`s to a list of `InputFeatures`."""
+
+  features = []
+  for (ex_index, example) in enumerate(examples):
+    if ex_index % 10000 == 0:
+      tf.logging.info("Writing example %d of %d" % (ex_index, len(examples)))
+
+    feature = convert_single_example(ex_index, example, label_list,
+                                     max_seq_length, tokenizer, FLAGS.verbose_logging)
+
+    features.append(feature)
+  return features
+
+def file_based_convert_examples_to_features(
+    examples, label_list, max_seq_length, tokenizer, output_file):
+  """Convert a set of `InputExample`s to a TFRecord file."""
+
+  writer = tf.python_io.TFRecordWriter(output_file)
+
+  for (ex_index, example) in enumerate(examples):
+    if ex_index % 10000 == 0:
+      tf.logging.info("Writing example %d of %d" % (ex_index, len(examples)))
+
+    feature = convert_single_example(ex_index, example, label_list,
+                                     max_seq_length, tokenizer)
+
+    def create_int_feature(values):
+      f = tf.train.Feature(int64_list=tf.train.Int64List(value=list(values)))
+      return f
+
+    features = collections.OrderedDict()
+    features["input_ids"] = create_int_feature(feature.input_ids)
+    features["input_mask"] = create_int_feature(feature.input_mask)
+    features["segment_ids"] = create_int_feature(feature.segment_ids)
+    features["label_ids"] = create_int_feature([feature.label_id])
+    features["is_real_example"] = create_int_feature(
+        [int(feature.is_real_example)])
+
+    tf_example = tf.train.Example(features=tf.train.Features(feature=features))
+    writer.write(tf_example.SerializeToString())
+  writer.close()
+
+def main():
+    processors = {
+        "cola": ColaProcessor,
+        "mnli": MnliProcessor,
+        "mrpc": MrpcProcessor,
+        "xnli": XnliProcessor,
+    }
+    task_name = FLAGS.task_name.lower()
+    if task_name not in processors:
+        raise ValueError("Task not found: %s" % (task_name))
+    processor = processors[task_name]()
+    label_list = processor.get_labels()
+
+    tokenizer = tokenization.FullTokenizer(
+        vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case)
+    tf.gfile.MakeDirs(FLAGS.data_dir + "final_tfrecords_sharded")
+    train_examples = processor.get_train_examples(FLAGS.data_dir)
+    train_file = os.path.join(FLAGS.data_dir, "final_tfrecords_sharded/" + task_name + "train.tf_record")
+    file_based_convert_examples_to_features(
+        train_examples, label_list, FLAGS.max_seq_length, tokenizer, train_file)
+
+    eval_examples = processor.get_dev_examples(FLAGS.data_dir)
+    eval_file = os.path.join(FLAGS.data_dir, "final_tfrecords_sharded/" + task_name + "eval.tf_record")
+    file_based_convert_examples_to_features(
+        eval_examples, label_list, FLAGS.max_seq_length, tokenizer, eval_file)
+
+    predict_examples = processor.get_test_examples(FLAGS.data_dir)
+    predict_file = os.path.join(FLAGS.data_dir, "final_tfrecords_sharded/" + task_name + "predict.tf_record")
+    file_based_convert_examples_to_features(predict_examples, label_list,
+                                            FLAGS.max_seq_length, tokenizer,
+                                            predict_file)
+
+if __name__ == "__main__":
+  main()
\ No newline at end of file
diff --git a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/utils/create_pretraining_data.py b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/utils/create_pretraining_data.py
new file mode 100644
index 000000000..d62809185
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/utils/create_pretraining_data.py
@@ -0,0 +1,501 @@
+# coding=utf-8
+# Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
+# Copyright 2018 The Google AI Language Team Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Create masked LM/next sentence masked_lm TF examples for BERT."""
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+import argparse
+import logging
+import os
+import random
+from io import open
+import h5py
+import tensorflow as tf
+import numpy as np
+from tqdm import tqdm, trange
+
+from tokenization import BertTokenizer
+import tokenization as tokenization
+
+import random
+import collections
+
+class TrainingInstance(object):
+  """A single training instance (sentence pair)."""
+
+  def __init__(self, tokens, segment_ids, masked_lm_positions, masked_lm_labels,
+               is_random_next):
+    self.tokens = tokens
+    self.segment_ids = segment_ids
+    self.is_random_next = is_random_next
+    self.masked_lm_positions = masked_lm_positions
+    self.masked_lm_labels = masked_lm_labels
+
+  def __str__(self):
+    s = ""
+    s += "tokens: %s\n" % (" ".join(
+        [tokenization.printable_text(x) for x in self.tokens]))
+    s += "segment_ids: %s\n" % (" ".join([str(x) for x in self.segment_ids]))
+    s += "is_random_next: %s\n" % self.is_random_next
+    s += "masked_lm_positions: %s\n" % (" ".join(
+        [str(x) for x in self.masked_lm_positions]))
+    s += "masked_lm_labels: %s\n" % (" ".join(
+        [tokenization.printable_text(x) for x in self.masked_lm_labels]))
+    s += "\n"
+    return s
+
+  def __repr__(self):
+    return self.__str__()
+
+
+def write_instance_to_example_files(instances, tokenizer, max_seq_length,
+                                    max_predictions_per_seq, output_files, output_formats="tfrecord"):
+  """Create TF example files from `TrainingInstance`s."""
+  writers = []
+  for output_file in output_files:
+    writers.append(tf.python_io.TFRecordWriter(output_file))
+
+  writer_index = 0
+
+  total_written = 0
+  if 'hdf5' in output_formats:
+    features_hdf5 = collections.OrderedDict()
+    num_instances = len(instances)
+    features_hdf5["input_ids"] = np.zeros([num_instances, max_seq_length], dtype="int32")
+    features_hdf5["input_mask"] = np.zeros([num_instances, max_seq_length], dtype="int32")
+    features_hdf5["segment_ids"] = np.zeros([num_instances, max_seq_length], dtype="int32")
+    features_hdf5["masked_lm_positions"] =  np.zeros([num_instances, max_predictions_per_seq], dtype="int32")
+    features_hdf5["masked_lm_ids"] = np.zeros([num_instances, max_predictions_per_seq], dtype="int32")
+    features_hdf5["next_sentence_labels"] = np.zeros(num_instances, dtype="int32")
+
+  for (inst_index, instance) in enumerate(instances):
+    input_ids = tokenizer.convert_tokens_to_ids(instance.tokens)
+    input_mask = [1] * len(input_ids)
+    segment_ids = list(instance.segment_ids)
+    assert len(input_ids) <= max_seq_length
+
+    while len(input_ids) < max_seq_length:
+      input_ids.append(0)
+      input_mask.append(0)
+      segment_ids.append(0)
+
+    assert len(input_ids) == max_seq_length
+    assert len(input_mask) == max_seq_length
+    assert len(segment_ids) == max_seq_length
+
+    masked_lm_positions = list(instance.masked_lm_positions)
+    masked_lm_ids = tokenizer.convert_tokens_to_ids(instance.masked_lm_labels)
+    masked_lm_weights = [1.0] * len(masked_lm_ids)
+
+    while len(masked_lm_positions) < max_predictions_per_seq:
+      masked_lm_positions.append(0)
+      masked_lm_ids.append(0)
+      masked_lm_weights.append(0.0)
+
+    next_sentence_label = 1 if instance.is_random_next else 0
+
+    features = collections.OrderedDict()
+    features["input_ids"] = create_int_feature(input_ids)
+    features["input_mask"] = create_int_feature(input_mask)
+    features["segment_ids"] = create_int_feature(segment_ids)
+    features["masked_lm_positions"] = create_int_feature(masked_lm_positions)
+    features["masked_lm_ids"] = create_int_feature(masked_lm_ids)
+    features["masked_lm_weights"] = create_float_feature(masked_lm_weights)
+    features["next_sentence_labels"] = create_int_feature([next_sentence_label])
+
+    if 'tfrecord' in output_formats:
+      tf_example = tf.train.Example(features=tf.train.Features(feature=features))
+      writers[writer_index].write(tf_example.SerializeToString())
+    if 'hdf5' in output_formats:
+      features_hdf5["input_ids"][inst_index] = input_ids
+      features_hdf5["input_mask"][inst_index] = input_mask
+      features_hdf5["segment_ids"][inst_index] = segment_ids
+      features_hdf5["masked_lm_positions"][inst_index] = masked_lm_positions
+      features_hdf5["masked_lm_ids"][inst_index] = masked_lm_ids
+      features_hdf5["next_sentence_labels"][inst_index] = next_sentence_label
+    if 'tfrecord' not in output_formats and 'hdf5' not in output_formats:
+      assert False, 'Either empty output_formats list or unsupported type specified. Try: tfrecord or hdf5'
+
+    writer_index = (writer_index + 1) % len(writers)
+
+    total_written += 1
+
+    if inst_index < 20:
+      tf.logging.info("*** Example ***")
+      tf.logging.info("tokens: %s" % " ".join(
+          [tokenization.printable_text(x) for x in instance.tokens]))
+
+      for feature_name in features.keys():
+        feature = features[feature_name]
+        values = []
+        if feature.int64_list.value:
+          values = feature.int64_list.value
+        elif feature.float_list.value:
+          values = feature.float_list.value
+        tf.logging.info(
+            "%s: %s" % (feature_name, " ".join([str(x) for x in values])))
+
+  for writer in writers:
+    writer.close()
+
+  if 'hdf5' in output_formats:
+    f = h5py.File(output_file, 'w')
+    f.create_dataset("input_ids", data=features_hdf5["input_ids"], dtype='i4', compression='gzip')
+    f.create_dataset("input_mask", data=features_hdf5["input_mask"], dtype='i1', compression='gzip')
+    f.create_dataset("segment_ids", data=features_hdf5["segment_ids"], dtype='i1', compression='gzip')
+    f.create_dataset("masked_lm_positions", data=features_hdf5["masked_lm_positions"], dtype='i4', compression='gzip')
+    f.create_dataset("masked_lm_ids", data=features_hdf5["masked_lm_ids"], dtype='i4', compression='gzip')
+    f.create_dataset("next_sentence_labels", data=features_hdf5["next_sentence_labels"], dtype='i1', compression='gzip')
+    f.flush()
+    f.close()
+
+  tf.logging.info("Wrote %d total instances", total_written)
+
+
+def create_int_feature(values):
+  feature = tf.train.Feature(int64_list=tf.train.Int64List(value=list(values)))
+  return feature
+
+
+def create_float_feature(values):
+  feature = tf.train.Feature(float_list=tf.train.FloatList(value=list(values)))
+  return feature
+
+
+def create_training_instances(input_files, tokenizer, max_seq_length,
+                              dupe_factor, short_seq_prob, masked_lm_prob,
+                              max_predictions_per_seq, rng):
+    """Create `TrainingInstance`s from raw text."""
+    all_documents = [[]]
+
+    # Input file format:
+    # (1) One sentence per line. These should ideally be actual sentences, not
+    # entire paragraphs or arbitrary spans of text. (Because we use the
+    # sentence boundaries for the "next sentence prediction" task).
+    # (2) Blank lines between documents. Document boundaries are needed so
+    # that the "next sentence prediction" task doesn't span between documents.
+    for input_file in input_files:
+        print("creating instance from {}".format(input_file))
+        with open(input_file, "r") as reader:
+            while True:
+                line = tokenization.convert_to_unicode(reader.readline())
+                if not line:
+                    break
+                line = line.strip()
+
+                # Empty lines are used as document delimiters
+                if not line:
+                    all_documents.append([])
+                tokens = tokenizer.tokenize(line)
+                if tokens:
+                    all_documents[-1].append(tokens)
+
+    # Remove empty documents
+    all_documents = [x for x in all_documents if x]
+    rng.shuffle(all_documents)
+
+    vocab_words = list(tokenizer.vocab.keys())
+    instances = []
+    for _ in range(dupe_factor):
+        for document_index in range(len(all_documents)):
+            instances.extend(
+                create_instances_from_document(
+                    all_documents, document_index, max_seq_length, short_seq_prob,
+                    masked_lm_prob, max_predictions_per_seq, vocab_words, rng))
+
+    rng.shuffle(instances)
+    return instances
+
+
+def create_instances_from_document(
+        all_documents, document_index, max_seq_length, short_seq_prob,
+        masked_lm_prob, max_predictions_per_seq, vocab_words, rng):
+    """Creates `TrainingInstance`s for a single document."""
+    document = all_documents[document_index]
+
+    # Account for [CLS], [SEP], [SEP]
+    max_num_tokens = max_seq_length - 3
+
+    # We *usually* want to fill up the entire sequence since we are padding
+    # to `max_seq_length` anyways, so short sequences are generally wasted
+    # computation. However, we *sometimes*
+    # (i.e., short_seq_prob == 0.1 == 10% of the time) want to use shorter
+    # sequences to minimize the mismatch between pre-training and fine-tuning.
+    # The `target_seq_length` is just a rough target however, whereas
+    # `max_seq_length` is a hard limit.
+    target_seq_length = max_num_tokens
+    if rng.random() < short_seq_prob:
+        target_seq_length = rng.randint(2, max_num_tokens)
+
+    # We DON'T just concatenate all of the tokens from a document into a long
+    # sequence and choose an arbitrary split point because this would make the
+    # next sentence prediction task too easy. Instead, we split the input into
+    # segments "A" and "B" based on the actual "sentences" provided by the user
+    # input.
+    instances = []
+    current_chunk = []
+    current_length = 0
+    i = 0
+    while i < len(document):
+        segment = document[i]
+        current_chunk.append(segment)
+        current_length += len(segment)
+        if i == len(document) - 1 or current_length >= target_seq_length:
+            if current_chunk:
+                # `a_end` is how many segments from `current_chunk` go into the `A`
+                # (first) sentence.
+                a_end = 1
+                if len(current_chunk) >= 2:
+                    a_end = rng.randint(1, len(current_chunk) - 1)
+
+                tokens_a = []
+                for j in range(a_end):
+                    tokens_a.extend(current_chunk[j])
+
+                tokens_b = []
+                # Random next
+                is_random_next = False
+                if len(current_chunk) == 1 or rng.random() < 0.5:
+                    is_random_next = True
+                    target_b_length = target_seq_length - len(tokens_a)
+
+                    # This should rarely go for more than one iteration for large
+                    # corpora. However, just to be careful, we try to make sure that
+                    # the random document is not the same as the document
+                    # we're processing.
+                    for _ in range(10):
+                        random_document_index = rng.randint(0, len(all_documents) - 1)
+                        if random_document_index != document_index:
+                            break
+
+                    random_document = all_documents[random_document_index]
+                    random_start = rng.randint(0, len(random_document) - 1)
+                    for j in range(random_start, len(random_document)):
+                        tokens_b.extend(random_document[j])
+                        if len(tokens_b) >= target_b_length:
+                            break
+                    # We didn't actually use these segments so we "put them back" so
+                    # they don't go to waste.
+                    num_unused_segments = len(current_chunk) - a_end
+                    i -= num_unused_segments
+                # Actual next
+                else:
+                    is_random_next = False
+                    for j in range(a_end, len(current_chunk)):
+                        tokens_b.extend(current_chunk[j])
+                truncate_seq_pair(tokens_a, tokens_b, max_num_tokens, rng)
+
+                assert len(tokens_a) >= 1
+                assert len(tokens_b) >= 1
+
+                tokens = []
+                segment_ids = []
+                tokens.append("[CLS]")
+                segment_ids.append(0)
+                for token in tokens_a:
+                    tokens.append(token)
+                    segment_ids.append(0)
+
+                tokens.append("[SEP]")
+                segment_ids.append(0)
+
+                for token in tokens_b:
+                    tokens.append(token)
+                    segment_ids.append(1)
+                tokens.append("[SEP]")
+                segment_ids.append(1)
+
+                (tokens, masked_lm_positions,
+                 masked_lm_labels) = create_masked_lm_predictions(
+                    tokens, masked_lm_prob, max_predictions_per_seq, vocab_words, rng)
+                instance = TrainingInstance(
+                    tokens=tokens,
+                    segment_ids=segment_ids,
+                    is_random_next=is_random_next,
+                    masked_lm_positions=masked_lm_positions,
+                    masked_lm_labels=masked_lm_labels)
+                instances.append(instance)
+            current_chunk = []
+            current_length = 0
+        i += 1
+
+    return instances
+
+
+MaskedLmInstance = collections.namedtuple("MaskedLmInstance",
+                                          ["index", "label"])
+
+
+def create_masked_lm_predictions(tokens, masked_lm_prob,
+                                 max_predictions_per_seq, vocab_words, rng):
+    """Creates the predictions for the masked LM objective."""
+
+    cand_indexes = []
+    for (i, token) in enumerate(tokens):
+        if token == "[CLS]" or token == "[SEP]":
+            continue
+        cand_indexes.append(i)
+
+    rng.shuffle(cand_indexes)
+
+    output_tokens = list(tokens)
+
+    num_to_predict = min(max_predictions_per_seq,
+                         max(1, int(round(len(tokens) * masked_lm_prob))))
+
+    masked_lms = []
+    covered_indexes = set()
+    for index in cand_indexes:
+        if len(masked_lms) >= num_to_predict:
+            break
+        if index in covered_indexes:
+            continue
+        covered_indexes.add(index)
+
+        masked_token = None
+        # 80% of the time, replace with [MASK]
+        if rng.random() < 0.8:
+            masked_token = "[MASK]"
+        else:
+            # 10% of the time, keep original
+            if rng.random() < 0.5:
+                masked_token = tokens[index]
+            # 10% of the time, replace with random word
+            else:
+                masked_token = vocab_words[rng.randint(0, len(vocab_words) - 1)]
+
+        output_tokens[index] = masked_token
+
+        masked_lms.append(MaskedLmInstance(index=index, label=tokens[index]))
+
+    masked_lms = sorted(masked_lms, key=lambda x: x.index)
+
+    masked_lm_positions = []
+    masked_lm_labels = []
+    for p in masked_lms:
+        masked_lm_positions.append(p.index)
+        masked_lm_labels.append(p.label)
+
+    return (output_tokens, masked_lm_positions, masked_lm_labels)
+
+
+def truncate_seq_pair(tokens_a, tokens_b, max_num_tokens, rng):
+    """Truncates a pair of sequences to a maximum sequence length."""
+    while True:
+        total_length = len(tokens_a) + len(tokens_b)
+        if total_length <= max_num_tokens:
+            break
+
+        trunc_tokens = tokens_a if len(tokens_a) > len(tokens_b) else tokens_b
+        assert len(trunc_tokens) >= 1
+
+        # We want to sometimes truncate from the front and sometimes from the
+        # back to add more randomness and avoid biases.
+        if rng.random() < 0.5:
+            del trunc_tokens[0]
+        else:
+            trunc_tokens.pop()
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    ## Required parameters
+    parser.add_argument("--vocab_file",
+                        default=None,
+                        type=str,
+                        required=True,
+                        help="The vocabulary the BERT model will train on.")
+    parser.add_argument("--input_file",
+                        default=None,
+                        type=str,
+                        required=True,
+                        help="The input train corpus. can be directory with .txt files or a path to a single file")
+    parser.add_argument("--output_file",
+                        default=None,
+                        type=str,
+                        required=True,
+                        help="The output file where the model checkpoints will be written.")
+
+    ## Other parameters
+    # int
+    parser.add_argument("--max_seq_length",
+                        default=128,
+                        type=int,
+                        help="The maximum total input sequence length after WordPiece tokenization. \n"
+                             "Sequences longer than this will be truncated, and sequences shorter \n"
+                             "than this will be padded.")
+    parser.add_argument("--dupe_factor",
+                        default=10,
+                        type=int,
+                        help="Number of times to duplicate the input data (with different masks).")
+    parser.add_argument("--max_predictions_per_seq",
+                        default=20,
+                        type=int,
+                        help="Maximum sequence length.")
+
+    # floats
+
+    parser.add_argument("--masked_lm_prob",
+                        default=0.15,
+                        type=float,
+                        help="Masked LM probability.")
+
+    parser.add_argument("--short_seq_prob",
+                        default=0.1,
+                        type=float,
+                        help="Probability to create a sequence shorter than maximum sequence length")
+
+    parser.add_argument("--do_lower_case",
+                        action='store_true',
+                        default=True,
+                        help="Whether to lower case the input text. True for uncased models, False for cased models.")
+    parser.add_argument('--random_seed',
+                        type=int,
+                        default=12345,
+                        help="random seed for initialization")
+
+    args = parser.parse_args()
+
+    tokenizer = BertTokenizer(args.vocab_file, do_lower_case=args.do_lower_case)
+
+    input_files = []
+    if os.path.isfile(args.input_file):
+        input_files.append(args.input_file)
+    elif os.path.isdir(args.input_file):
+        input_files = [os.path.join(args.input_file, f) for f in os.listdir(args.input_file) if
+                       (os.path.isfile(os.path.join(args.input_file, f)) and f.endswith('.txt'))]
+    else:
+        raise ValueError("{} is not a valid path".format(args.input_file))
+
+    rng = random.Random(args.random_seed)
+    instances = create_training_instances(
+        input_files, tokenizer, args.max_seq_length, args.dupe_factor,
+        args.short_seq_prob, args.masked_lm_prob, args.max_predictions_per_seq,
+        rng)
+
+    output_files = args.output_file.split(",")
+    print("*** Writing to output files ***")
+    for output_file in output_files:
+        print(output_file)
+
+
+    write_instance_to_example_files(instances, tokenizer, args.max_seq_length,
+                                       args.max_predictions_per_seq, output_files)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/utils/create_squad_data.py b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/utils/create_squad_data.py
new file mode 100644
index 000000000..fe3767540
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/utils/create_squad_data.py
@@ -0,0 +1,561 @@
+# Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import json
+import math
+import os
+import random
+import modeling
+import optimization
+import tokenization
+import six
+import tensorflow as tf
+import horovod.tensorflow as hvd
+import time
+
+flags = tf.flags
+FLAGS = None
+
+def extract_flags():
+    flags.DEFINE_integer(
+        "max_seq_length", 384,
+        "The maximum total input sequence length after WordPiece tokenization. "
+        "Sequences longer than this will be truncated, and sequences shorter "
+        "than this will be padded.")
+
+    flags.DEFINE_integer(
+        "doc_stride", 128,
+        "When splitting up a long document into chunks, how much stride to "
+        "take between chunks.")
+
+    flags.DEFINE_integer(
+        "max_query_length", 64,
+        "The maximum number of tokens for the question. Questions longer than "
+        "this will be truncated to this length.")
+
+    flags.DEFINE_bool(
+        "version_2_with_negative", False,
+        "If true, the SQuAD examples contain some that do not have an answer.")
+
+    flags.DEFINE_string("train_file", None,
+                        "SQuAD json for training. E.g., train-v1.1.json")
+
+    flags.DEFINE_string(
+        "predict_file", None,
+        "SQuAD json for predictions. E.g., dev-v1.1.json or test-v1.1.json")
+
+    flags.DEFINE_string(
+        "squad_dir", None,
+        "The output directory where the model checkpoints will be written.")
+
+    flags.DEFINE_string("vocab_file", None,
+                        "The vocabulary file that the BERT model was trained on.")
+
+    flags.DEFINE_bool(
+        "do_lower_case", True,
+        "Whether to lower case the input text. Should be True for uncased "
+        "models and False for cased models.")
+
+    flags.DEFINE_bool(
+        "verbose_logging", False,
+        "If true, all of the warnings related to data processing will be printed. "
+        "A number of warnings are expected for a normal SQuAD evaluation.")
+    flags.mark_flag_as_required("train_file")
+    flags.mark_flag_as_required("predict_file")
+    flags.mark_flag_as_required("squad_dir")
+    flags.mark_flag_as_required("vocab_file")
+    return flags.FLAGS
+
+class SquadExample(object):
+  """A single training/test example for simple sequence classification.
+
+     For examples without an answer, the start and end position are -1.
+  """
+
+  def __init__(self,
+               qas_id,
+               question_text,
+               doc_tokens,
+               orig_answer_text=None,
+               start_position=None,
+               end_position=None,
+               is_impossible=False):
+    self.qas_id = qas_id
+    self.question_text = question_text
+    self.doc_tokens = doc_tokens
+    self.orig_answer_text = orig_answer_text
+    self.start_position = start_position
+    self.end_position = end_position
+    self.is_impossible = is_impossible
+
+  def __str__(self):
+    return self.__repr__()
+
+  def __repr__(self):
+    s = ""
+    s += "qas_id: %s" % (tokenization.printable_text(self.qas_id))
+    s += ", question_text: %s" % (
+        tokenization.printable_text(self.question_text))
+    s += ", doc_tokens: [%s]" % (" ".join(self.doc_tokens))
+    if self.start_position:
+      s += ", start_position: %d" % (self.start_position)
+    if self.start_position:
+      s += ", end_position: %d" % (self.end_position)
+    if self.start_position:
+      s += ", is_impossible: %r" % (self.is_impossible)
+    return s
+
+class InputFeatures(object):
+  """A single set of features of data."""
+
+  def __init__(self,
+               unique_id,
+               example_index,
+               doc_span_index,
+               tokens,
+               token_to_orig_map,
+               token_is_max_context,
+               input_ids,
+               input_mask,
+               segment_ids,
+               start_position=None,
+               end_position=None,
+               is_impossible=None):
+    self.unique_id = unique_id
+    self.example_index = example_index
+    self.doc_span_index = doc_span_index
+    self.tokens = tokens
+    self.token_to_orig_map = token_to_orig_map
+    self.token_is_max_context = token_is_max_context
+    self.input_ids = input_ids
+    self.input_mask = input_mask
+    self.segment_ids = segment_ids
+    self.start_position = start_position
+    self.end_position = end_position
+    self.is_impossible = is_impossible
+
+def read_squad_examples(input_file, is_training, version_2_with_negative=False):
+  """Read a SQuAD json file into a list of SquadExample."""
+  with tf.gfile.Open(input_file, "r") as reader:
+    input_data = json.load(reader)["data"]
+
+  def is_whitespace(c):
+    if c == " " or c == "\t" or c == "\r" or c == "\n" or ord(c) == 0x202F:
+      return True
+    return False
+
+  examples = []
+  for entry in input_data:
+    for paragraph in entry["paragraphs"]:
+      paragraph_text = paragraph["context"]
+      doc_tokens = []
+      char_to_word_offset = []
+      prev_is_whitespace = True
+      for c in paragraph_text:
+        if is_whitespace(c):
+          prev_is_whitespace = True
+        else:
+          if prev_is_whitespace:
+            doc_tokens.append(c)
+          else:
+            doc_tokens[-1] += c
+          prev_is_whitespace = False
+        char_to_word_offset.append(len(doc_tokens) - 1)
+
+      for qa in paragraph["qas"]:
+        qas_id = qa["id"]
+        question_text = qa["question"]
+        start_position = None
+        end_position = None
+        orig_answer_text = None
+        is_impossible = False
+        if is_training:
+
+          if version_2_with_negative:
+            is_impossible = qa["is_impossible"]
+          if (len(qa["answers"]) != 1) and (not is_impossible):
+            raise ValueError(
+                "For training, each question should have exactly 1 answer.")
+          if not is_impossible:
+            answer = qa["answers"][0]
+            orig_answer_text = answer["text"]
+            answer_offset = answer["answer_start"]
+            answer_length = len(orig_answer_text)
+            start_position = char_to_word_offset[answer_offset]
+            end_position = char_to_word_offset[answer_offset + answer_length -
+                                               1]
+            # Only add answers where the text can be exactly recovered from the
+            # document. If this CAN'T happen it's likely due to weird Unicode
+            # stuff so we will just skip the example.
+            #
+            # Note that this means for training mode, every example is NOT
+            # guaranteed to be preserved.
+            actual_text = " ".join(
+                doc_tokens[start_position:(end_position + 1)])
+            cleaned_answer_text = " ".join(
+                tokenization.whitespace_tokenize(orig_answer_text))
+            if actual_text.find(cleaned_answer_text) == -1:
+              tf.logging.warning("Could not find answer: '%s' vs. '%s'",
+                                 actual_text, cleaned_answer_text)
+              continue
+          else:
+            start_position = -1
+            end_position = -1
+            orig_answer_text = ""
+
+        example = SquadExample(
+            qas_id=qas_id,
+            question_text=question_text,
+            doc_tokens=doc_tokens,
+            orig_answer_text=orig_answer_text,
+            start_position=start_position,
+            end_position=end_position,
+            is_impossible=is_impossible)
+        examples.append(example)
+
+  return examples
+
+def _check_is_max_context(doc_spans, cur_span_index, position):
+  """Check if this is the 'max context' doc span for the token."""
+
+  # Because of the sliding window approach taken to scoring documents, a single
+  # token can appear in multiple documents. E.g.
+  #  Doc: the man went to the store and bought a gallon of milk
+  #  Span A: the man went to the
+  #  Span B: to the store and bought
+  #  Span C: and bought a gallon of
+  #  ...
+  #
+  # Now the word 'bought' will have two scores from spans B and C. We only
+  # want to consider the score with "maximum context", which we define as
+  # the *minimum* of its left and right context (the *sum* of left and
+  # right context will always be the same, of course).
+  #
+  # In the example the maximum context for 'bought' would be span C since
+  # it has 1 left context and 3 right context, while span B has 4 left context
+  # and 0 right context.
+  best_score = None
+  best_span_index = None
+  for (span_index, doc_span) in enumerate(doc_spans):
+    end = doc_span.start + doc_span.length - 1
+    if position < doc_span.start:
+      continue
+    if position > end:
+      continue
+    num_left_context = position - doc_span.start
+    num_right_context = end - position
+    score = min(num_left_context, num_right_context) + 0.01 * doc_span.length
+    if best_score is None or score > best_score:
+      best_score = score
+      best_span_index = span_index
+
+  return cur_span_index == best_span_index
+
+def _improve_answer_span(doc_tokens, input_start, input_end, tokenizer,
+                         orig_answer_text):
+  """Returns tokenized answer spans that better match the annotated answer."""
+
+  # The SQuAD annotations are character based. We first project them to
+  # whitespace-tokenized words. But then after WordPiece tokenization, we can
+  # often find a "better match". For example:
+  #
+  #   Question: What year was John Smith born?
+  #   Context: The leader was John Smith (1895-1943).
+  #   Answer: 1895
+  #
+  # The original whitespace-tokenized answer will be "(1895-1943).". However
+  # after tokenization, our tokens will be "( 1895 - 1943 ) .". So we can match
+  # the exact answer, 1895.
+  #
+  # However, this is not always possible. Consider the following:
+  #
+  #   Question: What country is the top exporter of electornics?
+  #   Context: The Japanese electronics industry is the lagest in the world.
+  #   Answer: Japan
+  #
+  # In this case, the annotator chose "Japan" as a character sub-span of
+  # the word "Japanese". Since our WordPiece tokenizer does not split
+  # "Japanese", we just use "Japanese" as the annotation. This is fairly rare
+  # in SQuAD, but does happen.
+  tok_answer_text = " ".join(tokenizer.tokenize(orig_answer_text))
+
+  for new_start in range(input_start, input_end + 1):
+    for new_end in range(input_end, new_start - 1, -1):
+      text_span = " ".join(doc_tokens[new_start:(new_end + 1)])
+      if text_span == tok_answer_text:
+        return (new_start, new_end)
+
+  return (input_start, input_end)
+
+
+def convert_examples_to_features(examples, tokenizer, max_seq_length,
+                                 doc_stride, max_query_length, is_training,
+                                 output_fn, verbose_logging=False):
+  """Loads a data file into a list of `InputBatch`s."""
+
+  unique_id = 1000000000
+
+  for (example_index, example) in enumerate(examples):
+    query_tokens = tokenizer.tokenize(example.question_text)
+
+    if len(query_tokens) > max_query_length:
+      query_tokens = query_tokens[0:max_query_length]
+
+    tok_to_orig_index = []
+    orig_to_tok_index = []
+    all_doc_tokens = []
+    for (i, token) in enumerate(example.doc_tokens):
+      orig_to_tok_index.append(len(all_doc_tokens))
+      sub_tokens = tokenizer.tokenize(token)
+      for sub_token in sub_tokens:
+        tok_to_orig_index.append(i)
+        all_doc_tokens.append(sub_token)
+
+    tok_start_position = None
+    tok_end_position = None
+    if is_training and example.is_impossible:
+      tok_start_position = -1
+      tok_end_position = -1
+    if is_training and not example.is_impossible:
+      tok_start_position = orig_to_tok_index[example.start_position]
+      if example.end_position < len(example.doc_tokens) - 1:
+        tok_end_position = orig_to_tok_index[example.end_position + 1] - 1
+      else:
+        tok_end_position = len(all_doc_tokens) - 1
+      (tok_start_position, tok_end_position) = _improve_answer_span(
+          all_doc_tokens, tok_start_position, tok_end_position, tokenizer,
+          example.orig_answer_text)
+
+    # The -3 accounts for [CLS], [SEP] and [SEP]
+    max_tokens_for_doc = max_seq_length - len(query_tokens) - 3
+
+    # We can have documents that are longer than the maximum sequence length.
+    # To deal with this we do a sliding window approach, where we take chunks
+    # of the up to our max length with a stride of `doc_stride`.
+    _DocSpan = collections.namedtuple(  # pylint: disable=invalid-name
+        "DocSpan", ["start", "length"])
+    doc_spans = []
+    start_offset = 0
+    while start_offset < len(all_doc_tokens):
+      length = len(all_doc_tokens) - start_offset
+      if length > max_tokens_for_doc:
+        length = max_tokens_for_doc
+      doc_spans.append(_DocSpan(start=start_offset, length=length))
+      if start_offset + length == len(all_doc_tokens):
+        break
+      start_offset += min(length, doc_stride)
+
+    for (doc_span_index, doc_span) in enumerate(doc_spans):
+      tokens = []
+      token_to_orig_map = {}
+      token_is_max_context = {}
+      segment_ids = []
+      tokens.append("[CLS]")
+      segment_ids.append(0)
+      for token in query_tokens:
+        tokens.append(token)
+        segment_ids.append(0)
+      tokens.append("[SEP]")
+      segment_ids.append(0)
+
+      for i in range(doc_span.length):
+        split_token_index = doc_span.start + i
+        token_to_orig_map[len(tokens)] = tok_to_orig_index[split_token_index]
+
+        is_max_context = _check_is_max_context(doc_spans, doc_span_index,
+                                               split_token_index)
+        token_is_max_context[len(tokens)] = is_max_context
+        tokens.append(all_doc_tokens[split_token_index])
+        segment_ids.append(1)
+      tokens.append("[SEP]")
+      segment_ids.append(1)
+
+      input_ids = tokenizer.convert_tokens_to_ids(tokens)
+
+      # The mask has 1 for real tokens and 0 for padding tokens. Only real
+      # tokens are attended to.
+      input_mask = [1] * len(input_ids)
+
+      # Zero-pad up to the sequence length.
+      while len(input_ids) < max_seq_length:
+        input_ids.append(0)
+        input_mask.append(0)
+        segment_ids.append(0)
+
+      assert len(input_ids) == max_seq_length
+      assert len(input_mask) == max_seq_length
+      assert len(segment_ids) == max_seq_length
+
+      start_position = None
+      end_position = None
+      if is_training and not example.is_impossible:
+        # For training, if our document chunk does not contain an annotation
+        # we throw it out, since there is nothing to predict.
+        doc_start = doc_span.start
+        doc_end = doc_span.start + doc_span.length - 1
+        out_of_span = False
+        if not (tok_start_position >= doc_start and
+                tok_end_position <= doc_end):
+          out_of_span = True
+        if out_of_span:
+          start_position = 0
+          end_position = 0
+        else:
+          doc_offset = len(query_tokens) + 2
+          start_position = tok_start_position - doc_start + doc_offset
+          end_position = tok_end_position - doc_start + doc_offset
+
+      if is_training and example.is_impossible:
+        start_position = 0
+        end_position = 0
+
+      if verbose_logging and example_index < 20:
+        tf.logging.info("*** Example ***")
+        tf.logging.info("unique_id: %s" % (unique_id))
+        tf.logging.info("example_index: %s" % (example_index))
+        tf.logging.info("doc_span_index: %s" % (doc_span_index))
+        tf.logging.info("tokens: %s" % " ".join(
+            [tokenization.printable_text(x) for x in tokens]))
+        tf.logging.info("token_to_orig_map: %s" % " ".join(
+            ["%d:%d" % (x, y) for (x, y) in six.iteritems(token_to_orig_map)]))
+        tf.logging.info("token_is_max_context: %s" % " ".join([
+            "%d:%s" % (x, y) for (x, y) in six.iteritems(token_is_max_context)
+        ]))
+        tf.logging.info("input_ids: %s" % " ".join([str(x) for x in input_ids]))
+        tf.logging.info(
+            "input_mask: %s" % " ".join([str(x) for x in input_mask]))
+        tf.logging.info(
+            "segment_ids: %s" % " ".join([str(x) for x in segment_ids]))
+        if is_training and example.is_impossible:
+          tf.logging.info("impossible example")
+        if is_training and not example.is_impossible:
+          answer_text = " ".join(tokens[start_position:(end_position + 1)])
+          tf.logging.info("start_position: %d" % (start_position))
+          tf.logging.info("end_position: %d" % (end_position))
+          tf.logging.info(
+              "answer: %s" % (tokenization.printable_text(answer_text)))
+
+      feature = InputFeatures(
+          unique_id=unique_id,
+          example_index=example_index,
+          doc_span_index=doc_span_index,
+          tokens=tokens,
+          token_to_orig_map=token_to_orig_map,
+          token_is_max_context=token_is_max_context,
+          input_ids=input_ids,
+          input_mask=input_mask,
+          segment_ids=segment_ids,
+          start_position=start_position,
+          end_position=end_position,
+          is_impossible=example.is_impossible)
+
+      # Run callback
+      output_fn(feature)
+
+      unique_id += 1
+
+class FeatureWriter(object):
+  """Writes InputFeature to TF example file."""
+
+  def __init__(self, filename, is_training):
+    self.filename = filename
+    self.is_training = is_training
+    self.num_features = 0
+    self._writer = tf.python_io.TFRecordWriter(filename)
+
+  def process_feature(self, feature):
+    """Write a InputFeature to the TFRecordWriter as a tf.train.Example."""
+    self.num_features += 1
+
+    def create_int_feature(values):
+      feature = tf.train.Feature(
+          int64_list=tf.train.Int64List(value=list(values)))
+      return feature
+
+    features = collections.OrderedDict()
+    features["unique_ids"] = create_int_feature([feature.unique_id])
+    features["input_ids"] = create_int_feature(feature.input_ids)
+    features["input_mask"] = create_int_feature(feature.input_mask)
+    features["segment_ids"] = create_int_feature(feature.segment_ids)
+
+    if self.is_training:
+      features["start_positions"] = create_int_feature([feature.start_position])
+      features["end_positions"] = create_int_feature([feature.end_position])
+      impossible = 0
+      if feature.is_impossible:
+        impossible = 1
+      features["is_impossible"] = create_int_feature([impossible])
+
+    tf_example = tf.train.Example(features=tf.train.Features(feature=features))
+    self._writer.write(tf_example.SerializeToString())
+
+  def close(self):
+    self._writer.close()
+
+def main():
+
+    FLAGS = extract_flags()
+    tokenizer = tokenization.FullTokenizer(
+        vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case)
+    tf.gfile.MakeDirs(FLAGS.squad_dir + "/final_tfrecords_sharded")
+    # We write to a temporary file to avoid storing very large constant tensors
+    # in memory.
+    train_examples = read_squad_examples(
+        input_file=FLAGS.train_file, is_training=True,
+        version_2_with_negative=FLAGS.version_2_with_negative)
+    train_writer = FeatureWriter(
+        filename=os.path.join(FLAGS.squad_dir, "final_tfrecords_sharded/train.tf_record"),
+        is_training=True)
+    convert_examples_to_features(
+        examples=train_examples,
+        tokenizer=tokenizer,
+        max_seq_length=FLAGS.max_seq_length,
+        doc_stride=FLAGS.doc_stride,
+        max_query_length=FLAGS.max_query_length,
+        is_training=True,
+        output_fn=train_writer.process_feature,
+        verbose_logging=FLAGS.verbose_logging)
+    train_writer.close()
+
+
+    eval_examples = read_squad_examples(
+        input_file=FLAGS.predict_file, is_training=False,
+        version_2_with_negative=FLAGS.version_2_with_negative)
+
+    eval_writer = FeatureWriter(
+        filename=os.path.join(FLAGS.squad_dir, "final_tfrecords_sharded/eval.tf_record"),
+        is_training=False)
+    eval_features = []
+
+    def append_feature(feature):
+      eval_features.append(feature)
+      eval_writer.process_feature(feature)
+
+    convert_examples_to_features(
+        examples=eval_examples,
+        tokenizer=tokenizer,
+        max_seq_length=FLAGS.max_seq_length,
+        doc_stride=FLAGS.doc_stride,
+        max_query_length=FLAGS.max_query_length,
+        is_training=False,
+        output_fn=append_feature,
+        verbose_logging=FLAGS.verbose_logging)
+    eval_writer.close()
+
+if __name__ == "__main__":
+  main()
\ No newline at end of file
diff --git a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/utils/utils.py b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/utils/utils.py
new file mode 100644
index 000000000..84affeebb
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/utils/utils.py
@@ -0,0 +1,75 @@
+# Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import tensorflow as tf
+import time
+
+# report latency and throughput during eval
+class LogEvalRunHook(tf.train.SessionRunHook):
+  def __init__(self, global_batch_size, hvd_rank=-1):
+    self.global_batch_size = global_batch_size
+    self.hvd_rank = hvd_rank
+    self.total_time = 0.0
+    self.count = 0
+    self.skipped = 0
+    self.time_list = []
+
+  def before_run(self, run_context):
+    self.t0 = time.time()
+
+  def after_run(self, run_context, run_values):
+    elapsed_secs = time.time() - self.t0
+    self.count += 1
+
+    # Removing first 2 (arbitrary) number of startup iterations from perf evaluations
+    if self.count <= 2:
+      print("Skipping time record for ", self.count, " due to overhead")
+      self.skipped += 1
+    else:
+      self.time_list.append(elapsed_secs)
+      self.total_time += elapsed_secs
+
+# report throughput during training
+class LogTrainRunHook(tf.train.SessionRunHook):
+  def __init__(self, global_batch_size, hvd_rank=-1, save_checkpoints_steps=1000):
+    self.global_batch_size = global_batch_size
+    self.hvd_rank = hvd_rank
+    self.save_checkpoints_steps = save_checkpoints_steps
+
+    self.total_time = 0.0
+    self.count = 0 # Holds number of iterations, including skipped iterations for fp16 loss scaling
+
+  def after_create_session(self, session, coord):
+    self.init_global_step = session.run(tf.train.get_global_step())
+
+  def before_run(self, run_context):
+    self.t0 = time.time()
+    return tf.train.SessionRunArgs(
+        fetches=['step_update:0'])
+
+  def after_run(self, run_context, run_values):
+    elapsed_secs = time.time() - self.t0
+    self.global_step = run_values.results[0]
+    self.count += 1
+
+    # Removing first step + first two steps after every checkpoint save
+    if (self.global_step - self.init_global_step) % self.save_checkpoints_steps <= 1:
+      print("Skipping time record for ", self.global_step, " due to checkpoint-saving/warmup overhead")
+    else:
+      self.total_time += elapsed_secs
+
+  def end(self, session):
+    num_global_steps = self.global_step - self.init_global_step
+
+    self.skipped = (num_global_steps // self.save_checkpoints_steps) * 2 + \
+                   min(2, num_global_steps % self.save_checkpoints_steps) - 1
\ No newline at end of file
diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/CONTRIBUTING.md b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/CONTRIBUTING.md
new file mode 100644
index 000000000..124b4b32c
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/CONTRIBUTING.md
@@ -0,0 +1,31 @@
+# How to Contribute
+
+BERT needs to maintain permanent compatibility with the pre-trained model files,
+so we do not plan to make any major changes to this library (other than what was
+promised in the README). However, we can accept small patches related to
+re-factoring and documentation. To submit contributes, there are just a few
+small guidelines you need to follow.
+
+## Contributor License Agreement
+
+Contributions to this project must be accompanied by a Contributor License
+Agreement. You (or your employer) retain the copyright to your contribution;
+this simply gives us permission to use and redistribute your contributions as
+part of the project. Head over to <https://cla.developers.google.com/> to see
+your current agreements on file or to sign a new one.
+
+You generally only need to submit a CLA once, so if you've already submitted one
+(even if it was for a different project), you probably don't need to do it
+again.
+
+## Code reviews
+
+All submissions, including submissions by project members, require review. We
+use GitHub pull requests for this purpose. Consult
+[GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
+information on using pull requests.
+
+## Community Guidelines
+
+This project follows
+[Google's Open Source Community Guidelines](https://opensource.google.com/conduct/).
diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/Dockerfile b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/Dockerfile
new file mode 100644
index 000000000..0e891026e
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/Dockerfile
@@ -0,0 +1,5 @@
+ARG FROM_IMAGE_NAME=ascend-tensorflow-arm:20.1.0
+FROM ${FROM_IMAGE_NAME}
+
+COPY requirements.txt .
+RUN pip3.7 install -r requirements.txt
\ No newline at end of file
diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/LICENSE b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/LICENSE
new file mode 100644
index 000000000..d64569567
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/LICENSE
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/NOTICE b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/NOTICE
new file mode 100644
index 000000000..917c2a631
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/NOTICE
@@ -0,0 +1,4 @@
+BERT TensorFlow
+
+This repository includes software from https://github.com/google-research/bert
+licensed under the Apache License, Version 2.0 (the "License")
\ No newline at end of file
diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/README.md b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/README.md
new file mode 100644
index 000000000..16c43a5cc
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/README.md
@@ -0,0 +1,318 @@
+-   [基本信息](#基本信息.md)
+-   [概述](#概述.md)
+-   [训练环境准备](#训练环境准备.md)
+-   [快速上手](#快速上手.md)
+-   [迁移学习指导](#迁移学习指导.md)
+-   [高级参考](#高级参考.md)
+<h2 id="基本信息.md">基本信息</h2>
+
+**发布者（Publisher）：Huawei**
+
+**应用领域（Application Domain）：Natural Language Processing**
+
+**版本（Version）：1.1**
+
+**修改时间（Modified） ：2020.10.14**
+
+**大小（Size）：1331.2M**
+
+**框架（Framework）：TensorFlow 1.15.0**
+
+**模型格式（Model Format）：ckpt**
+
+**精度（Precision）：Mixed**
+
+**处理器（Processor）：昇腾910**
+
+**应用级别（Categories）：Benchmark**
+
+**描述（Description）：基于TensorFlow框架的BERT-Base及下游任务代码**
+
+<h2 id="概述.md">概述</h2>
+
+   BERT是谷歌2018年推出的预训练语言模型结构，通过自监督训练实现对语义语境相关的编码，是目前众多NLP应用的基石。
+
+-   参考论文：
+
+    [Devlin, J., Chang, M. W., Lee, K., & Toutanova, K. (2018). Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv 
+    preprint arXiv:1810.04805.](https://arxiv.org/pdf/1810.04805.pdf)
+    
+-   参考实现：
+
+    [https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow/LanguageModeling/BERT](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow/LanguageModeling/BERT)
+
+-   适配昇腾 AI 处理器的实现：
+    
+    https://gitee.com/ascend/modelzoo/tree/master/built-in/TensorFlow/Official/nlp/Bert-base_ID0060_for_TensorFlow
+    
+- 通过Git获取对应commit\_id的代码方法如下：
+
+  ```
+  git clone {repository_url}    # 克隆仓库的代码
+  cd {repository_name}    # 切换到模型的代码仓目录
+  git checkout  {branch}    # 切换到对应分支
+  git reset --hard ｛commit_id｝     # 代码设置到对应的commit_id
+  cd ｛code_path｝    # 切换到模型代码所在路径，若仓库下只有该模型，则无需切换
+  ```
+
+## 默认配置<a name="section91661242121611"></a>
+
+- 网络结构
+
+  学习率为1e-5，使用polynomial decay
+
+  优化器：Adam
+
+  优化器Weight decay为0.01
+
+  优化器epsilon设置为1e-4
+
+  单卡batchsize：128
+
+  32卡batchsize：128*32
+
+  总step数设置为500000
+
+  Warmup step设置为10000
+
+- 训练数据集预处理（以wikipedia为例，仅作为用户参考示例）：
+
+  Sequence Length原则上用户可以自行定义
+
+  以常见的设置128为例，mask其中的20个tokens作为自编码恢复的目标。
+  
+  下游任务预处理以用户需要为准。
+
+- 测试数据集预处理（以wikipedia为例，仅作为用户参考示例）：
+
+  和训练数据集处理一致。
+
+## 支持特性<a name="section1899153513554"></a>
+
+| 特性列表  | 是否支持 |
+|-------|------|
+| 分布式训练 | 是    |
+| 混合精度  | 是    |
+| 数据并行  | 是    |
+
+
+## 混合精度训练<a name="section168064817164"></a>
+
+昇腾910 AI处理器提供自动混合精度功能，可以针对全网中float32数据类型的算子，按照内置的优化策略，自动将部分float32的算子降低精度到float16，从而在精度损失很小的情况下提升系统性能并减少内存使用。
+
+## 开启混合精度<a name="section20779114113713"></a>
+
+开启混合精度相关代码示例。
+
+```
+    run_config = NPURunConfig(
+            model_dir=self.config.model_dir,
+            session_config=session_config,
+            keep_checkpoint_max=5,
+            save_checkpoints_steps=5000,
+            enable_data_pre_proc=True,
+            iterations_per_loop=iterations_per_loop,
+            precision_mode='allow_mix_precision',
+            hcom_parallel=True      
+        ）
+```
+
+
+<h2 id="训练环境准备.md">训练环境准备</h2>
+
+1.  硬件环境准备请参见各硬件产品文档"[驱动和固件安装升级指南]( https://support.huawei.com/enterprise/zh/category/ai-computing-platform-pid-1557196528909)"。需要在硬件设备上安装与CANN版本配套的固件与驱动。
+2.  宿主机上需要安装Docker并登录[Ascend Hub中心](https://ascendhub.huawei.com/#/detail?name=ascend-tensorflow-arm)获取镜像。
+
+    当前模型支持的镜像列表如[表1](#zh-cn_topic_0000001074498056_table1519011227314)所示。
+
+    **表 1** _镜像列表_
+
+    <a name="zh-cn_topic_0000001074498056_table1519011227314"></a>
+    <table><thead align="left"><tr id="zh-cn_topic_0000001074498056_row0190152218319"><th class="cellrowborder" valign="top" width="47.32%" id="mcps1.2.4.1.1"><p id="zh-cn_topic_0000001074498056_p1419132211315"><a name="zh-cn_topic_0000001074498056_p1419132211315"></a><a name="zh-cn_topic_0000001074498056_p1419132211315"></a><em id="i1522884921219"><a name="i1522884921219"></a><a name="i1522884921219"></a>镜像名称</em></p>
+    </th>
+    <th class="cellrowborder" valign="top" width="25.52%" id="mcps1.2.4.1.2"><p id="zh-cn_topic_0000001074498056_p75071327115313"><a name="zh-cn_topic_0000001074498056_p75071327115313"></a><a name="zh-cn_topic_0000001074498056_p75071327115313"></a><em id="i1522994919122"><a name="i1522994919122"></a><a name="i1522994919122"></a>镜像版本</em></p>
+    </th>
+    <th class="cellrowborder" valign="top" width="27.16%" id="mcps1.2.4.1.3"><p id="zh-cn_topic_0000001074498056_p1024411406234"><a name="zh-cn_topic_0000001074498056_p1024411406234"></a><a name="zh-cn_topic_0000001074498056_p1024411406234"></a><em id="i723012493123"><a name="i723012493123"></a><a name="i723012493123"></a>配套CANN版本</em></p>
+    </th>
+    </tr>
+    </thead>
+    <tbody><tr id="zh-cn_topic_0000001074498056_row71915221134"><td class="cellrowborder" valign="top" width="47.32%" headers="mcps1.2.4.1.1 "><a name="zh-cn_topic_0000001074498056_ul81691515131910"></a><a name="zh-cn_topic_0000001074498056_ul81691515131910"></a><ul id="zh-cn_topic_0000001074498056_ul81691515131910"><li><em id="i82326495129"><a name="i82326495129"></a><a name="i82326495129"></a>ARM架构：<a href="https://ascend.huawei.com/ascendhub/#/detail?name=ascend-tensorflow-arm" target="_blank" rel="noopener noreferrer">ascend-tensorflow-arm</a></em></li><li><em id="i18233184918125"><a name="i18233184918125"></a><a name="i18233184918125"></a>x86架构：<a href="https://ascend.huawei.com/ascendhub/#/detail?name=ascend-tensorflow-x86" target="_blank" rel="noopener noreferrer">ascend-tensorflow-x86</a></em></li></ul>
+    </td>
+    <td class="cellrowborder" valign="top" width="25.52%" headers="mcps1.2.4.1.2 "><p id="zh-cn_topic_0000001074498056_p1450714271532"><a name="zh-cn_topic_0000001074498056_p1450714271532"></a><a name="zh-cn_topic_0000001074498056_p1450714271532"></a><em id="i72359495125"><a name="i72359495125"></a><a name="i72359495125"></a>20.2.0</em></p>
+    </td>
+    <td class="cellrowborder" valign="top" width="27.16%" headers="mcps1.2.4.1.3 "><p id="zh-cn_topic_0000001074498056_p18244640152312"><a name="zh-cn_topic_0000001074498056_p18244640152312"></a><a name="zh-cn_topic_0000001074498056_p18244640152312"></a><em id="i162363492129"><a name="i162363492129"></a><a name="i162363492129"></a><a href="https://support.huawei.com/enterprise/zh/ascend-computing/cann-pid-251168373/software" target="_blank" rel="noopener noreferrer">20.2</a></em></p>
+    </td>
+    </tr>
+    </tbody>
+    </table>
+
+
+<h2 id="快速上手.md">快速上手</h2>
+
+- 单击“立即下载”，下载源码包。
+
+- 数据集准备<a name="section361114841316"></a>
+
+数据集以文本格式表示，每段之间以空行隔开，如wikipedia。
+运行如下命令，将数据集转换为tfrecord格式。
+
+```
+      python src/pretrain/create_pretraining_data.py \   
+      --input_file=<path to your testdata> \   
+      --output_file=<tfrecord dir>/some_output_data.tfrecord \   
+      --vocab_file=<path to vocab.txt> \   
+      --do_lower_case=True \   
+      --max_seq_length=128 \   
+      --max_predictions_per_seq=20 \   
+      --masked_lm_prob=0.15 \   
+      --random_seed=12345 \   
+      --dupe_factor=5
+```
+
+- 模型训练
+- 启动训练之前，首先要配置程序运行相关环境变量。环境变量配置信息参见：
+
+[Ascend 910训练平台环境变量设置](https://gitee.com/ascend/modelzoo/wikis/Ascend%20910%E8%AE%AD%E7%BB%83%E5%B9%B3%E5%8F%B0%E7%8E%AF%E5%A2%83%E5%8F%98%E9%87%8F%E8%AE%BE%E7%BD%AE?sort_id=3148819)
+
+- 脚本修改:
+  版本Atlas Data Center SolutionV100R020C30之前的版本:
+
+  修改src/pretrain/run_pretraining.py文件，将apply_grads/overflow_status_reduce_all改为apply_grads/All
+
+
+ - 单卡训练
+   
+    1. 在`scripts`路径下的`run_pretraining.sh`中配置参数，确保 `--input_files_dir` 和 `--eval_files_dir` 配置为用户数据集具体路径，如下：
+       
+```
+        --input_files_dir=/autotest/CI_daily/ModelZoo_BertBase_TF/data/wikipedia_128 \      #训练数据集路径
+        --eval_files_dir=/autotest/CI_daily/ModelZoo_BertBase_TF/data/wikipedia_128 \       #验证数据集路径
+```
+
+      2. 单卡训练指令，在ModelZoo_BertBase_TF目录下，执行如下命令：
+            
+            bash scripts/run_pretraining.sh
+
+   
+
+
+- 8卡训练
+    1. 在`scripts`路径下的`train_8p.sh`中配置参数，确保 `--input_files_dir` 和 `--eval_files_dir` 配置为用户数据集具体路径，如下：
+        ```
+         --input_files_dir=/autotest/CI_daily/ModelZoo_BertBase_TF/data/wikipedia_128 \      #训练数据集路径
+         --eval_files_dir=/autotest/CI_daily/ModelZoo_BertBase_TF/data/wikipedia_128 \       #验证数据集路径  
+        ```
+    2. 8卡训练指令，在ModelZoo_BertBase_TF目录下，执行如下命令： 
+
+        ```
+        bash scripts/run_8p.sh
+        ```
+
+- 注意说明：当前Bert-base网络的下游任务（src/downstream）暂未调测
+
+<h2 id="高级参考.md">高级参考</h2>
+
+    脚本和示例代码
+    ├── configs  
+    │    ├──BERT_base_64p_poc.json              //8*8p rank table配置文件
+    │    ├──nezha_large_config.json               //NEZHA large模型配置文件
+    │    ├──nezha_large_vocab.txt                 //NEZHA large中文词表
+    ├── scripts
+    │    ├──npu_set_env.sh                         //集群配置
+    │    ├──run_downstream_classifier.sh           //运行下游任务分类器
+    │    ├──run_downstream_ner.sh                  //运行下游任务序列标注
+    │    ├──run_downstream_reading.sh              //运行下游任务阅读理解
+    │    ├──run_pretraining.sh                     //单卡预训练脚本
+    │    ├──run_8p.sh                              //8卡预训练入口脚本
+    │    ├──train_8p.sh                            //8卡预训练脚本  
+    ├── src/downstream
+    │    ├──gpu_environment.py                     //原始gpu_environment设置
+    │    ├──metrics_impl.py                       //适配NPU后的metrics_impl.py
+    │    ├──modeling.py                           //NEZHA模型脚本
+    │    ├──optimization.py                       //优化器脚本
+    │    ├──reading_evaluate.py                   //阅读理解评价脚本
+    │    ├──run_classifier.py                     //下游任务分类脚本
+    │    ├──run_ner.py                           //下游任务序列标注脚本
+    │    ├──run_reading.py                         //下游任务阅读理解脚本
+    │    ├──tf_metrics.py                        //tf metrics脚本
+    │    ├──tokenization.py                      //分词器脚本
+    ├── src/pretrain
+    │    ├──gpu_environment.py                     //原始gpu_environment设置
+    │    ├──create_pretraining_data.py            //生成与训练数据脚本
+    │    ├──modeling.py                           //NEZHA模型脚本
+    │    ├──optimization.py                       //优化器脚本
+    │    ├──extract_features.py                   //特征抽取脚本
+    │    ├──fp16_utils.py                       //fp16 utils脚本
+    │    ├──fused_layer_norm.py                     //layer norm融合脚本
+    │    ├──run_pretraining.py                    //预训练启动脚本
+    │    ├──tf_metrics.py                        //tf metrics脚本
+    │    ├──tokenization.py                      //分词器脚本
+    │    ├──utils.py                            //utils脚本├── CONTRIBUTING.md                             //CONTRIBUTING.md
+    ├── src/downstream
+    │    ├──gpu_environment.py                     //原始gpu_environment设置
+    │    ├──metrics_impl.py                       //适配NPU后的metrics_impl.py
+    │    ├──modeling.py                           //NEZHA模型脚本
+    │    ├──optimization.py                       //优化器脚本
+    │    ├──reading_evaluate.py                   //阅读理解评价脚本
+    │    ├──run_classifier.py                     //下游任务分类脚本
+    │    ├──run_ner.py                           //下游任务序列标注脚本
+    │    ├──run_reading.py                         //下游任务阅读理解脚本
+    │    ├──tf_metrics.py                        //tf metrics脚本
+    │    ├──tokenization.py                      //分词器脚本
+    ├── src/pretrain
+    │    ├──gpu_environment.py                     //原始gpu_environment设置
+    │    ├──create_pretraining_data.py            //生成与训练数据脚本
+    │    ├──modeling.py                           //NEZHA模型脚本
+    │    ├──optimization.py                       //优化器脚本
+    │    ├──extract_features.py                   //特征抽取脚本
+    │    ├──fp16_utils.py                       //fp16 utils脚本
+    │    ├──fused_layer_norm.py                     //layer norm融合脚本
+    │    ├──run_pretraining.py                    //预训练启动脚本
+    │    ├──tf_metrics.py                        //tf metrics脚本
+    │    ├──tokenization.py                      //分词器脚本
+    │    ├──utils.py                            //utils脚本
+    ├── CONTRIBUTING.md                             //CONTRIBUTING.md
+    ├── LICENCE                                   //LICENCE
+    ├── NOTICE                                   //NOTICE├── README.md                                 //说明文档
+
+
+## 脚本参数<a name="section6669162441511"></a>
+
+
+```
+         --train_batch_size=128 \           #每个NPU训练的batch size，默认：128
+         --learning_rate=1e-4 \             #学习率，默认：1e-4
+         --num_warmup_steps=10000 \         # 初始warmup训练epoch数，默认：10000
+         --num_train_steps=500000 \         #训练次数，单P 默认：500000
+         --input_files_dir=/autotest/CI_daily/ModelZoo_BertBase_TF/data/wikipedia_128 \      #训练数据集路径
+         --eval_files_dir=/autotest/CI_daily/ModelZoo_BertBase_TF/data/wikipedia_128 \       #验证数据集路径  
+         --iterations_per_loop=100 \        #NPU运行时，device端下沉次数，默认：1000 
+    
+```
+
+
+## 训练过程<a name="section1589455252218"></a>
+
+通过“快速上手”中的训练指令启动训练。
+
+```
+I0521 19:45:05.731803 281473752813584 basic_session_run_hooks.py:692] global_step/sec: 2.451
+I0521 19:45:05.732023 281473228546064 basic_session_run_hooks.py:260] global_step = 1323600, masked_lm_loss = 0.7687549, next_sentence_loss = 0.005564222, total_loss = 0.7743191 (81.600 sec)
+I0521 19:45:05.732058 281473117769744 basic_session_run_hooks.py:260] global_step = 1323600, masked_lm_loss = 0.74314255, next_sentence_loss = 0.023222845, total_loss = 0.7663654 (81.600 sec)
+2020-05-21 19:45:05.732132: I tf_adapter/kernels/geop_npu.cc:526] [GEOP] RunGraphAsync callback, status:0, kernel_name:GeOp15_0[ 2409us]
+I0521 19:45:05.732016 281473584246800 basic_session_run_hooks.py:692] global_step/sec: 2.451
+I0521 19:45:05.732048 281472971046928 basic_session_run_hooks.py:692] global_step/sec: 2.451
+loss_scale: loss_scale:[65536.0] 
+2020-05-21 19:45:05.732378: I tf_adapter/kernels/geop_npu.cc:526] [GEOP] RunGraphAsync callback, status:0, kernel_name:GeOp15_0[ 2445us]
+loss_scale:[65536.0] 
+I0521 19:45:05.732480 281473752813584 basic_session_run_hooks.py:260] global_step = 1323600, masked_lm_loss = 0.94164073, next_sentence_loss = 0.023505606, total_loss = 0.96514636 (81.600 sec)
+I0521 19:45:05.732715 281473584246800 basic_session_run_hooks.py:260] global_step = 1323600, masked_lm_loss = 0.738043, next_sentence_loss = 0.03810045, total_loss = 0.77614343 (81.599 sec)
+I0521 19:45:05.732658 281473385623568 basic_session_run_hooks.py:692] global_step/sec: 2.451
+I0521 19:45:05.732574 281473416220688 basic_session_run_hooks.py:692] global_step/sec: 2.45098
+I0521 19:45:05.732777 281472971046928 basic_session_run_hooks.py:260] global_step = 1323600, masked_lm_loss = 0.7797201, next_sentence_loss = 0.05669275, total_loss = 0.8364129 (81.600 sec)loss_scale: [65536.0]
+loss_scale:[65536.0] 
+I0521 19:45:05.733291 281473385623568 basic_session_run_hooks.py:260] global_step = 1323600, masked_lm_loss = 0.8004036, next_sentence_loss = 0.12787658, total_loss = 0.9282802 (81.600 sec)[65536.0]
+
+```
+
+
diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/configs/8p.json b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/configs/8p.json
new file mode 100644
index 000000000..5dc192400
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/configs/8p.json
@@ -0,0 +1,15 @@
+{
+"server_count":"1",
+"server_list":[{
+    "device":[{"device_id":"0","device_ip":"192.168.100.101","rank_id":"0"},
+              {"device_id":"1","device_ip":"192.168.101.101","rank_id":"1"},
+              {"device_id":"2","device_ip":"192.168.102.101","rank_id":"2"},
+              {"device_id":"3","device_ip":"192.168.103.101","rank_id":"3"},
+              {"device_id":"4","device_ip":"192.168.100.100","rank_id":"4"},
+              {"device_id":"5","device_ip":"192.168.101.100","rank_id":"5"},
+              {"device_id":"6","device_ip":"192.168.102.100","rank_id":"6"},
+              {"device_id":"7","device_ip":"192.168.103.100","rank_id":"7"}],
+    "server_id":"127.0.0.2"}],
+"status":"completed",
+"version":"1.0"
+}
\ No newline at end of file
diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/configs/bert_base_config.json b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/configs/bert_base_config.json
new file mode 100644
index 000000000..012ec3b35
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/configs/bert_base_config.json
@@ -0,0 +1,13 @@
+{
+  "attention_probs_dropout_prob": 0.1,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "max_position_embeddings": 512,
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "type_vocab_size": 2,
+  "vocab_size": 30522 
+}
diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/configs/bert_base_vocab.txt b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/configs/bert_base_vocab.txt
new file mode 100644
index 000000000..ca4f97810
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/configs/bert_base_vocab.txt
@@ -0,0 +1,21128 @@
+[PAD]
+[unused1]
+[unused2]
+[unused3]
+[unused4]
+[unused5]
+[unused6]
+[unused7]
+[unused8]
+[unused9]
+[unused10]
+[unused11]
+[unused12]
+[unused13]
+[unused14]
+[unused15]
+[unused16]
+[unused17]
+[unused18]
+[unused19]
+[unused20]
+[unused21]
+[unused22]
+[unused23]
+[unused24]
+[unused25]
+[unused26]
+[unused27]
+[unused28]
+[unused29]
+[unused30]
+[unused31]
+[unused32]
+[unused33]
+[unused34]
+[unused35]
+[unused36]
+[unused37]
+[unused38]
+[unused39]
+[unused40]
+[unused41]
+[unused42]
+[unused43]
+[unused44]
+[unused45]
+[unused46]
+[unused47]
+[unused48]
+[unused49]
+[unused50]
+[unused51]
+[unused52]
+[unused53]
+[unused54]
+[unused55]
+[unused56]
+[unused57]
+[unused58]
+[unused59]
+[unused60]
+[unused61]
+[unused62]
+[unused63]
+[unused64]
+[unused65]
+[unused66]
+[unused67]
+[unused68]
+[unused69]
+[unused70]
+[unused71]
+[unused72]
+[unused73]
+[unused74]
+[unused75]
+[unused76]
+[unused77]
+[unused78]
+[unused79]
+[unused80]
+[unused81]
+[unused82]
+[unused83]
+[unused84]
+[unused85]
+[unused86]
+[unused87]
+[unused88]
+[unused89]
+[unused90]
+[unused91]
+[unused92]
+[unused93]
+[unused94]
+[unused95]
+[unused96]
+[unused97]
+[unused98]
+[unused99]
+[UNK]
+[CLS]
+[SEP]
+[MASK]
+<S>
+<T>
+!
+"
+#
+$
+%
+&
+'
+(
+)
+*
++
+,
+-
+.
+/
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+:
+;
+<
+=
+>
+?
+@
+[
+\
+]
+^
+_
+a
+b
+c
+d
+e
+f
+g
+h
+i
+j
+k
+l
+m
+n
+o
+p
+q
+r
+s
+t
+u
+v
+w
+x
+y
+z
+{
+|
+}
+~
+£
+¤
+¥
+§
+©
+«
+®
+°
+±
+²
+³
+µ
+·
+¹
+º
+»
+¼
+×
+ß
+æ
+÷
+ø
+đ
+ŋ
+ɔ
+ə
+ɡ
+ʰ
+ˇ
+ˈ
+ˊ
+ˋ
+ˍ
+ː
+˙
+˚
+ˢ
+α
+β
+γ
+δ
+ε
+η
+θ
+ι
+κ
+λ
+μ
+ν
+ο
+π
+ρ
+ς
+σ
+τ
+υ
+φ
+χ
+ψ
+ω
+а
+б
+в
+г
+д
+е
+ж
+з
+и
+к
+л
+м
+н
+о
+п
+р
+с
+т
+у
+ф
+х
+ц
+ч
+ш
+ы
+ь
+я
+і
+ا
+ب
+ة
+ت
+د
+ر
+س
+ع
+ل
+م
+ن
+ه
+و
+ي
+۩
+ก
+ง
+น
+ม
+ย
+ร
+อ
+า
+เ
+๑
+་
+ღ
+ᄀ
+ᄁ
+ᄂ
+ᄃ
+ᄅ
+ᄆ
+ᄇ
+ᄈ
+ᄉ
+ᄋ
+ᄌ
+ᄎ
+ᄏ
+ᄐ
+ᄑ
+ᄒ
+ᅡ
+ᅢ
+ᅣ
+ᅥ
+ᅦ
+ᅧ
+ᅨ
+ᅩ
+ᅪ
+ᅬ
+ᅭ
+ᅮ
+ᅯ
+ᅲ
+ᅳ
+ᅴ
+ᅵ
+ᆨ
+ᆫ
+ᆯ
+ᆷ
+ᆸ
+ᆺ
+ᆻ
+ᆼ
+ᗜ
+ᵃ
+ᵉ
+ᵍ
+ᵏ
+ᵐ
+ᵒ
+ᵘ
+‖
+„
+†
+•
+‥
+‧
+ 
+‰
+′
+″
+‹
+›
+※
+‿
+⁄
+ⁱ
+⁺
+ⁿ
+₁
+₂
+₃
+₄
+€
+℃
+№
+™
+ⅰ
+ⅱ
+ⅲ
+ⅳ
+ⅴ
+←
+↑
+→
+↓
+↔
+↗
+↘
+⇒
+∀
+−
+∕
+∙
+√
+∞
+∟
+∠
+∣
+∥
+∩
+∮
+∶
+∼
+∽
+≈
+≒
+≡
+≤
+≥
+≦
+≧
+≪
+≫
+⊙
+⋅
+⋈
+⋯
+⌒
+①
+②
+③
+④
+⑤
+⑥
+⑦
+⑧
+⑨
+⑩
+⑴
+⑵
+⑶
+⑷
+⑸
+⒈
+⒉
+⒊
+⒋
+ⓒ
+ⓔ
+ⓘ
+─
+━
+│
+┃
+┅
+┆
+┊
+┌
+└
+├
+┣
+═
+║
+╚
+╞
+╠
+╭
+╮
+╯
+╰
+╱
+╳
+▂
+▃
+▅
+▇
+█
+▉
+▋
+▌
+▍
+▎
+■
+□
+▪
+▫
+▬
+▲
+△
+▶
+►
+▼
+▽
+◆
+◇
+○
+◎
+●
+◕
+◠
+◢
+◤
+☀
+★
+☆
+☕
+☞
+☺
+☼
+♀
+♂
+♠
+♡
+♣
+♥
+♦
+♪
+♫
+♬
+✈
+✔
+✕
+✖
+✦
+✨
+✪
+✰
+✿
+❀
+❤
+➜
+➤
+⦿
+、
+。
+〃
+々
+〇
+〈
+〉
+《
+》
+「
+」
+『
+』
+【
+】
+〓
+〔
+〕
+〖
+〗
+〜
+〝
+〞
+ぁ
+あ
+ぃ
+い
+う
+ぇ
+え
+お
+か
+き
+く
+け
+こ
+さ
+し
+す
+せ
+そ
+た
+ち
+っ
+つ
+て
+と
+な
+に
+ぬ
+ね
+の
+は
+ひ
+ふ
+へ
+ほ
+ま
+み
+む
+め
+も
+ゃ
+や
+ゅ
+ゆ
+ょ
+よ
+ら
+り
+る
+れ
+ろ
+わ
+を
+ん
+゜
+ゝ
+ァ
+ア
+ィ
+イ
+ゥ
+ウ
+ェ
+エ
+ォ
+オ
+カ
+キ
+ク
+ケ
+コ
+サ
+シ
+ス
+セ
+ソ
+タ
+チ
+ッ
+ツ
+テ
+ト
+ナ
+ニ
+ヌ
+ネ
+ノ
+ハ
+ヒ
+フ
+ヘ
+ホ
+マ
+ミ
+ム
+メ
+モ
+ャ
+ヤ
+ュ
+ユ
+ョ
+ヨ
+ラ
+リ
+ル
+レ
+ロ
+ワ
+ヲ
+ン
+ヶ
+・
+ー
+ヽ
+ㄅ
+ㄆ
+ㄇ
+ㄉ
+ㄋ
+ㄌ
+ㄍ
+ㄎ
+ㄏ
+ㄒ
+ㄚ
+ㄛ
+ㄞ
+ㄟ
+ㄢ
+ㄤ
+ㄥ
+ㄧ
+ㄨ
+ㆍ
+㈦
+㊣
+㎡
+㗎
+一
+丁
+七
+万
+丈
+三
+上
+下
+不
+与
+丐
+丑
+专
+且
+丕
+世
+丘
+丙
+业
+丛
+东
+丝
+丞
+丟
+両
+丢
+两
+严
+並
+丧
+丨
+个
+丫
+中
+丰
+串
+临
+丶
+丸
+丹
+为
+主
+丼
+丽
+举
+丿
+乂
+乃
+久
+么
+义
+之
+乌
+乍
+乎
+乏
+乐
+乒
+乓
+乔
+乖
+乗
+乘
+乙
+乜
+九
+乞
+也
+习
+乡
+书
+乩
+买
+乱
+乳
+乾
+亀
+亂
+了
+予
+争
+事
+二
+于
+亏
+云
+互
+五
+井
+亘
+亙
+亚
+些
+亜
+亞
+亟
+亡
+亢
+交
+亥
+亦
+产
+亨
+亩
+享
+京
+亭
+亮
+亲
+亳
+亵
+人
+亿
+什
+仁
+仃
+仄
+仅
+仆
+仇
+今
+介
+仍
+从
+仏
+仑
+仓
+仔
+仕
+他
+仗
+付
+仙
+仝
+仞
+仟
+代
+令
+以
+仨
+仪
+们
+仮
+仰
+仲
+件
+价
+任
+份
+仿
+企
+伉
+伊
+伍
+伎
+伏
+伐
+休
+伕
+众
+优
+伙
+会
+伝
+伞
+伟
+传
+伢
+伤
+伦
+伪
+伫
+伯
+估
+伴
+伶
+伸
+伺
+似
+伽
+佃
+但
+佇
+佈
+位
+低
+住
+佐
+佑
+体
+佔
+何
+佗
+佘
+余
+佚
+佛
+作
+佝
+佞
+佟
+你
+佢
+佣
+佤
+佥
+佩
+佬
+佯
+佰
+佳
+併
+佶
+佻
+佼
+使
+侃
+侄
+來
+侈
+例
+侍
+侏
+侑
+侖
+侗
+供
+依
+侠
+価
+侣
+侥
+侦
+侧
+侨
+侬
+侮
+侯
+侵
+侶
+侷
+便
+係
+促
+俄
+俊
+俎
+俏
+俐
+俑
+俗
+俘
+俚
+保
+俞
+俟
+俠
+信
+俨
+俩
+俪
+俬
+俭
+修
+俯
+俱
+俳
+俸
+俺
+俾
+倆
+倉
+個
+倌
+倍
+倏
+們
+倒
+倔
+倖
+倘
+候
+倚
+倜
+借
+倡
+値
+倦
+倩
+倪
+倫
+倬
+倭
+倶
+债
+值
+倾
+偃
+假
+偈
+偉
+偌
+偎
+偏
+偕
+做
+停
+健
+側
+偵
+偶
+偷
+偻
+偽
+偿
+傀
+傅
+傍
+傑
+傘
+備
+傚
+傢
+傣
+傥
+储
+傩
+催
+傭
+傲
+傳
+債
+傷
+傻
+傾
+僅
+働
+像
+僑
+僕
+僖
+僚
+僥
+僧
+僭
+僮
+僱
+僵
+價
+僻
+儀
+儂
+億
+儆
+儉
+儋
+儒
+儕
+儘
+償
+儡
+優
+儲
+儷
+儼
+儿
+兀
+允
+元
+兄
+充
+兆
+兇
+先
+光
+克
+兌
+免
+児
+兑
+兒
+兔
+兖
+党
+兜
+兢
+入
+內
+全
+兩
+八
+公
+六
+兮
+兰
+共
+兲
+关
+兴
+兵
+其
+具
+典
+兹
+养
+兼
+兽
+冀
+内
+円
+冇
+冈
+冉
+冊
+册
+再
+冏
+冒
+冕
+冗
+写
+军
+农
+冠
+冢
+冤
+冥
+冨
+冪
+冬
+冯
+冰
+冲
+决
+况
+冶
+冷
+冻
+冼
+冽
+冾
+净
+凄
+准
+凇
+凈
+凉
+凋
+凌
+凍
+减
+凑
+凛
+凜
+凝
+几
+凡
+凤
+処
+凪
+凭
+凯
+凰
+凱
+凳
+凶
+凸
+凹
+出
+击
+函
+凿
+刀
+刁
+刃
+分
+切
+刈
+刊
+刍
+刎
+刑
+划
+列
+刘
+则
+刚
+创
+初
+删
+判
+別
+刨
+利
+刪
+别
+刮
+到
+制
+刷
+券
+刹
+刺
+刻
+刽
+剁
+剂
+剃
+則
+剉
+削
+剋
+剌
+前
+剎
+剐
+剑
+剔
+剖
+剛
+剜
+剝
+剣
+剤
+剥
+剧
+剩
+剪
+副
+割
+創
+剷
+剽
+剿
+劃
+劇
+劈
+劉
+劊
+劍
+劏
+劑
+力
+劝
+办
+功
+加
+务
+劣
+动
+助
+努
+劫
+劭
+励
+劲
+劳
+労
+劵
+効
+劾
+势
+勁
+勃
+勇
+勉
+勋
+勐
+勒
+動
+勖
+勘
+務
+勛
+勝
+勞
+募
+勢
+勤
+勧
+勳
+勵
+勸
+勺
+勻
+勾
+勿
+匀
+包
+匆
+匈
+匍
+匐
+匕
+化
+北
+匙
+匝
+匠
+匡
+匣
+匪
+匮
+匯
+匱
+匹
+区
+医
+匾
+匿
+區
+十
+千
+卅
+升
+午
+卉
+半
+卍
+华
+协
+卑
+卒
+卓
+協
+单
+卖
+南
+単
+博
+卜
+卞
+卟
+占
+卡
+卢
+卤
+卦
+卧
+卫
+卮
+卯
+印
+危
+即
+却
+卵
+卷
+卸
+卻
+卿
+厂
+厄
+厅
+历
+厉
+压
+厌
+厕
+厘
+厚
+厝
+原
+厢
+厥
+厦
+厨
+厩
+厭
+厮
+厲
+厳
+去
+县
+叁
+参
+參
+又
+叉
+及
+友
+双
+反
+収
+发
+叔
+取
+受
+变
+叙
+叛
+叟
+叠
+叡
+叢
+口
+古
+句
+另
+叨
+叩
+只
+叫
+召
+叭
+叮
+可
+台
+叱
+史
+右
+叵
+叶
+号
+司
+叹
+叻
+叼
+叽
+吁
+吃
+各
+吆
+合
+吉
+吊
+吋
+同
+名
+后
+吏
+吐
+向
+吒
+吓
+吕
+吖
+吗
+君
+吝
+吞
+吟
+吠
+吡
+否
+吧
+吨
+吩
+含
+听
+吭
+吮
+启
+吱
+吳
+吴
+吵
+吶
+吸
+吹
+吻
+吼
+吽
+吾
+呀
+呂
+呃
+呆
+呈
+告
+呋
+呎
+呐
+呓
+呕
+呗
+员
+呛
+呜
+呢
+呤
+呦
+周
+呱
+呲
+味
+呵
+呷
+呸
+呻
+呼
+命
+咀
+咁
+咂
+咄
+咆
+咋
+和
+咎
+咏
+咐
+咒
+咔
+咕
+咖
+咗
+咘
+咙
+咚
+咛
+咣
+咤
+咦
+咧
+咨
+咩
+咪
+咫
+咬
+咭
+咯
+咱
+咲
+咳
+咸
+咻
+咽
+咿
+哀
+品
+哂
+哄
+哆
+哇
+哈
+哉
+哋
+哌
+响
+哎
+哏
+哐
+哑
+哒
+哔
+哗
+哟
+員
+哥
+哦
+哧
+哨
+哩
+哪
+哭
+哮
+哲
+哺
+哼
+哽
+唁
+唄
+唆
+唇
+唉
+唏
+唐
+唑
+唔
+唠
+唤
+唧
+唬
+售
+唯
+唰
+唱
+唳
+唷
+唸
+唾
+啃
+啄
+商
+啉
+啊
+問
+啓
+啕
+啖
+啜
+啞
+啟
+啡
+啤
+啥
+啦
+啧
+啪
+啫
+啬
+啮
+啰
+啱
+啲
+啵
+啶
+啷
+啸
+啻
+啼
+啾
+喀
+喂
+喃
+善
+喆
+喇
+喉
+喊
+喋
+喎
+喏
+喔
+喘
+喙
+喚
+喜
+喝
+喟
+喧
+喪
+喫
+喬
+單
+喰
+喱
+喲
+喳
+喵
+営
+喷
+喹
+喺
+喻
+喽
+嗅
+嗆
+嗇
+嗎
+嗑
+嗒
+嗓
+嗔
+嗖
+嗚
+嗜
+嗝
+嗟
+嗡
+嗣
+嗤
+嗦
+嗨
+嗪
+嗬
+嗯
+嗰
+嗲
+嗳
+嗶
+嗷
+嗽
+嘀
+嘅
+嘆
+嘈
+嘉
+嘌
+嘍
+嘎
+嘔
+嘖
+嘗
+嘘
+嘚
+嘛
+嘜
+嘞
+嘟
+嘢
+嘣
+嘤
+嘧
+嘩
+嘭
+嘮
+嘯
+嘰
+嘱
+嘲
+嘴
+嘶
+嘸
+嘹
+嘻
+嘿
+噁
+噌
+噎
+噓
+噔
+噗
+噙
+噜
+噠
+噢
+噤
+器
+噩
+噪
+噬
+噱
+噴
+噶
+噸
+噹
+噻
+噼
+嚀
+嚇
+嚎
+嚏
+嚐
+嚓
+嚕
+嚟
+嚣
+嚥
+嚨
+嚮
+嚴
+嚷
+嚼
+囂
+囉
+囊
+囍
+囑
+囔
+囗
+囚
+四
+囝
+回
+囟
+因
+囡
+团
+団
+囤
+囧
+囪
+囫
+园
+困
+囱
+囲
+図
+围
+囹
+固
+国
+图
+囿
+圃
+圄
+圆
+圈
+國
+圍
+圏
+園
+圓
+圖
+團
+圜
+土
+圣
+圧
+在
+圩
+圭
+地
+圳
+场
+圻
+圾
+址
+坂
+均
+坊
+坍
+坎
+坏
+坐
+坑
+块
+坚
+坛
+坝
+坞
+坟
+坠
+坡
+坤
+坦
+坨
+坪
+坯
+坳
+坵
+坷
+垂
+垃
+垄
+型
+垒
+垚
+垛
+垠
+垢
+垣
+垦
+垩
+垫
+垭
+垮
+垵
+埂
+埃
+埋
+城
+埔
+埕
+埗
+域
+埠
+埤
+埵
+執
+埸
+培
+基
+埼
+堀
+堂
+堃
+堅
+堆
+堇
+堑
+堕
+堙
+堡
+堤
+堪
+堯
+堰
+報
+場
+堵
+堺
+堿
+塊
+塌
+塑
+塔
+塗
+塘
+塚
+塞
+塢
+塩
+填
+塬
+塭
+塵
+塾
+墀
+境
+墅
+墉
+墊
+墒
+墓
+増
+墘
+墙
+墜
+增
+墟
+墨
+墩
+墮
+墳
+墻
+墾
+壁
+壅
+壆
+壇
+壊
+壑
+壓
+壕
+壘
+壞
+壟
+壢
+壤
+壩
+士
+壬
+壮
+壯
+声
+売
+壳
+壶
+壹
+壺
+壽
+处
+备
+変
+复
+夏
+夔
+夕
+外
+夙
+多
+夜
+够
+夠
+夢
+夥
+大
+天
+太
+夫
+夭
+央
+夯
+失
+头
+夷
+夸
+夹
+夺
+夾
+奂
+奄
+奇
+奈
+奉
+奋
+奎
+奏
+奐
+契
+奔
+奕
+奖
+套
+奘
+奚
+奠
+奢
+奥
+奧
+奪
+奬
+奮
+女
+奴
+奶
+奸
+她
+好
+如
+妃
+妄
+妆
+妇
+妈
+妊
+妍
+妒
+妓
+妖
+妘
+妙
+妝
+妞
+妣
+妤
+妥
+妨
+妩
+妪
+妮
+妲
+妳
+妹
+妻
+妾
+姆
+姉
+姊
+始
+姍
+姐
+姑
+姒
+姓
+委
+姗
+姚
+姜
+姝
+姣
+姥
+姦
+姨
+姪
+姫
+姬
+姹
+姻
+姿
+威
+娃
+娄
+娅
+娆
+娇
+娉
+娑
+娓
+娘
+娛
+娜
+娟
+娠
+娣
+娥
+娩
+娱
+娲
+娴
+娶
+娼
+婀
+婁
+婆
+婉
+婊
+婕
+婚
+婢
+婦
+婧
+婪
+婭
+婴
+婵
+婶
+婷
+婺
+婿
+媒
+媚
+媛
+媞
+媧
+媲
+媳
+媽
+媾
+嫁
+嫂
+嫉
+嫌
+嫑
+嫔
+嫖
+嫘
+嫚
+嫡
+嫣
+嫦
+嫩
+嫲
+嫵
+嫻
+嬅
+嬉
+嬌
+嬗
+嬛
+嬢
+嬤
+嬪
+嬰
+嬴
+嬷
+嬸
+嬿
+孀
+孃
+子
+孑
+孔
+孕
+孖
+字
+存
+孙
+孚
+孛
+孜
+孝
+孟
+孢
+季
+孤
+学
+孩
+孪
+孫
+孬
+孰
+孱
+孳
+孵
+學
+孺
+孽
+孿
+宁
+它
+宅
+宇
+守
+安
+宋
+完
+宏
+宓
+宕
+宗
+官
+宙
+定
+宛
+宜
+宝
+实
+実
+宠
+审
+客
+宣
+室
+宥
+宦
+宪
+宫
+宮
+宰
+害
+宴
+宵
+家
+宸
+容
+宽
+宾
+宿
+寂
+寄
+寅
+密
+寇
+富
+寐
+寒
+寓
+寛
+寝
+寞
+察
+寡
+寢
+寥
+實
+寧
+寨
+審
+寫
+寬
+寮
+寰
+寵
+寶
+寸
+对
+寺
+寻
+导
+対
+寿
+封
+専
+射
+将
+將
+專
+尉
+尊
+尋
+對
+導
+小
+少
+尔
+尕
+尖
+尘
+尚
+尝
+尤
+尧
+尬
+就
+尴
+尷
+尸
+尹
+尺
+尻
+尼
+尽
+尾
+尿
+局
+屁
+层
+屄
+居
+屆
+屈
+屉
+届
+屋
+屌
+屍
+屎
+屏
+屐
+屑
+展
+屜
+属
+屠
+屡
+屢
+層
+履
+屬
+屯
+山
+屹
+屿
+岀
+岁
+岂
+岌
+岐
+岑
+岔
+岖
+岗
+岘
+岙
+岚
+岛
+岡
+岩
+岫
+岬
+岭
+岱
+岳
+岷
+岸
+峇
+峋
+峒
+峙
+峡
+峤
+峥
+峦
+峨
+峪
+峭
+峯
+峰
+峴
+島
+峻
+峽
+崁
+崂
+崆
+崇
+崎
+崑
+崔
+崖
+崗
+崙
+崛
+崧
+崩
+崭
+崴
+崽
+嵇
+嵊
+嵋
+嵌
+嵐
+嵘
+嵩
+嵬
+嵯
+嶂
+嶄
+嶇
+嶋
+嶙
+嶺
+嶼
+嶽
+巅
+巍
+巒
+巔
+巖
+川
+州
+巡
+巢
+工
+左
+巧
+巨
+巩
+巫
+差
+己
+已
+巳
+巴
+巷
+巻
+巽
+巾
+巿
+币
+市
+布
+帅
+帆
+师
+希
+帐
+帑
+帕
+帖
+帘
+帚
+帛
+帜
+帝
+帥
+带
+帧
+師
+席
+帮
+帯
+帰
+帳
+帶
+帷
+常
+帼
+帽
+幀
+幂
+幄
+幅
+幌
+幔
+幕
+幟
+幡
+幢
+幣
+幫
+干
+平
+年
+并
+幸
+幹
+幺
+幻
+幼
+幽
+幾
+广
+庁
+広
+庄
+庆
+庇
+床
+序
+庐
+库
+应
+底
+庖
+店
+庙
+庚
+府
+庞
+废
+庠
+度
+座
+庫
+庭
+庵
+庶
+康
+庸
+庹
+庾
+廁
+廂
+廃
+廈
+廉
+廊
+廓
+廖
+廚
+廝
+廟
+廠
+廢
+廣
+廬
+廳
+延
+廷
+建
+廿
+开
+弁
+异
+弃
+弄
+弈
+弊
+弋
+式
+弑
+弒
+弓
+弔
+引
+弗
+弘
+弛
+弟
+张
+弥
+弦
+弧
+弩
+弭
+弯
+弱
+張
+強
+弹
+强
+弼
+弾
+彅
+彆
+彈
+彌
+彎
+归
+当
+录
+彗
+彙
+彝
+形
+彤
+彥
+彦
+彧
+彩
+彪
+彫
+彬
+彭
+彰
+影
+彷
+役
+彻
+彼
+彿
+往
+征
+径
+待
+徇
+很
+徉
+徊
+律
+後
+徐
+徑
+徒
+従
+徕
+得
+徘
+徙
+徜
+從
+徠
+御
+徨
+復
+循
+徬
+微
+徳
+徴
+徵
+德
+徹
+徼
+徽
+心
+必
+忆
+忌
+忍
+忏
+忐
+忑
+忒
+忖
+志
+忘
+忙
+応
+忠
+忡
+忤
+忧
+忪
+快
+忱
+念
+忻
+忽
+忿
+怀
+态
+怂
+怅
+怆
+怎
+怏
+怒
+怔
+怕
+怖
+怙
+怜
+思
+怠
+怡
+急
+怦
+性
+怨
+怪
+怯
+怵
+总
+怼
+恁
+恃
+恆
+恋
+恍
+恐
+恒
+恕
+恙
+恚
+恢
+恣
+恤
+恥
+恨
+恩
+恪
+恫
+恬
+恭
+息
+恰
+恳
+恵
+恶
+恸
+恺
+恻
+恼
+恿
+悄
+悅
+悉
+悌
+悍
+悔
+悖
+悚
+悟
+悠
+患
+悦
+您
+悩
+悪
+悬
+悯
+悱
+悲
+悴
+悵
+悶
+悸
+悻
+悼
+悽
+情
+惆
+惇
+惊
+惋
+惑
+惕
+惘
+惚
+惜
+惟
+惠
+惡
+惦
+惧
+惨
+惩
+惫
+惬
+惭
+惮
+惯
+惰
+惱
+想
+惴
+惶
+惹
+惺
+愁
+愆
+愈
+愉
+愍
+意
+愕
+愚
+愛
+愜
+感
+愣
+愤
+愧
+愫
+愷
+愿
+慄
+慈
+態
+慌
+慎
+慑
+慕
+慘
+慚
+慟
+慢
+慣
+慧
+慨
+慫
+慮
+慰
+慳
+慵
+慶
+慷
+慾
+憂
+憊
+憋
+憎
+憐
+憑
+憔
+憚
+憤
+憧
+憨
+憩
+憫
+憬
+憲
+憶
+憾
+懂
+懇
+懈
+應
+懊
+懋
+懑
+懒
+懦
+懲
+懵
+懶
+懷
+懸
+懺
+懼
+懾
+懿
+戀
+戈
+戊
+戌
+戍
+戎
+戏
+成
+我
+戒
+戕
+或
+战
+戚
+戛
+戟
+戡
+戦
+截
+戬
+戮
+戰
+戲
+戳
+戴
+戶
+户
+戸
+戻
+戾
+房
+所
+扁
+扇
+扈
+扉
+手
+才
+扎
+扑
+扒
+打
+扔
+払
+托
+扛
+扣
+扦
+执
+扩
+扪
+扫
+扬
+扭
+扮
+扯
+扰
+扱
+扳
+扶
+批
+扼
+找
+承
+技
+抄
+抉
+把
+抑
+抒
+抓
+投
+抖
+抗
+折
+抚
+抛
+抜
+択
+抟
+抠
+抡
+抢
+护
+报
+抨
+披
+抬
+抱
+抵
+抹
+押
+抽
+抿
+拂
+拄
+担
+拆
+拇
+拈
+拉
+拋
+拌
+拍
+拎
+拐
+拒
+拓
+拔
+拖
+拗
+拘
+拙
+拚
+招
+拜
+拟
+拡
+拢
+拣
+拥
+拦
+拧
+拨
+择
+括
+拭
+拮
+拯
+拱
+拳
+拴
+拷
+拼
+拽
+拾
+拿
+持
+挂
+指
+挈
+按
+挎
+挑
+挖
+挙
+挚
+挛
+挝
+挞
+挟
+挠
+挡
+挣
+挤
+挥
+挨
+挪
+挫
+振
+挲
+挹
+挺
+挽
+挾
+捂
+捅
+捆
+捉
+捋
+捌
+捍
+捎
+捏
+捐
+捕
+捞
+损
+捡
+换
+捣
+捧
+捨
+捩
+据
+捱
+捲
+捶
+捷
+捺
+捻
+掀
+掂
+掃
+掇
+授
+掉
+掌
+掏
+掐
+排
+掖
+掘
+掙
+掛
+掠
+採
+探
+掣
+接
+控
+推
+掩
+措
+掬
+掰
+掲
+掳
+掴
+掷
+掸
+掺
+揀
+揃
+揄
+揆
+揉
+揍
+描
+提
+插
+揖
+揚
+換
+握
+揣
+揩
+揪
+揭
+揮
+援
+揶
+揸
+揹
+揽
+搀
+搁
+搂
+搅
+損
+搏
+搐
+搓
+搔
+搖
+搗
+搜
+搞
+搡
+搪
+搬
+搭
+搵
+搶
+携
+搽
+摀
+摁
+摄
+摆
+摇
+摈
+摊
+摒
+摔
+摘
+摞
+摟
+摧
+摩
+摯
+摳
+摸
+摹
+摺
+摻
+撂
+撃
+撅
+撇
+撈
+撐
+撑
+撒
+撓
+撕
+撚
+撞
+撤
+撥
+撩
+撫
+撬
+播
+撮
+撰
+撲
+撵
+撷
+撸
+撻
+撼
+撿
+擀
+擁
+擂
+擄
+擅
+擇
+擊
+擋
+操
+擎
+擒
+擔
+擘
+據
+擞
+擠
+擡
+擢
+擦
+擬
+擰
+擱
+擲
+擴
+擷
+擺
+擼
+擾
+攀
+攏
+攒
+攔
+攘
+攙
+攜
+攝
+攞
+攢
+攣
+攤
+攥
+攪
+攫
+攬
+支
+收
+攸
+改
+攻
+放
+政
+故
+效
+敌
+敍
+敎
+敏
+救
+敕
+敖
+敗
+敘
+教
+敛
+敝
+敞
+敢
+散
+敦
+敬
+数
+敲
+整
+敵
+敷
+數
+斂
+斃
+文
+斋
+斌
+斎
+斐
+斑
+斓
+斗
+料
+斛
+斜
+斟
+斡
+斤
+斥
+斧
+斩
+斫
+斬
+断
+斯
+新
+斷
+方
+於
+施
+旁
+旃
+旅
+旋
+旌
+旎
+族
+旖
+旗
+无
+既
+日
+旦
+旧
+旨
+早
+旬
+旭
+旮
+旱
+时
+旷
+旺
+旻
+昀
+昂
+昆
+昇
+昉
+昊
+昌
+明
+昏
+易
+昔
+昕
+昙
+星
+映
+春
+昧
+昨
+昭
+是
+昱
+昴
+昵
+昶
+昼
+显
+晁
+時
+晃
+晉
+晋
+晌
+晏
+晒
+晓
+晔
+晕
+晖
+晗
+晚
+晝
+晞
+晟
+晤
+晦
+晨
+晩
+普
+景
+晰
+晴
+晶
+晷
+智
+晾
+暂
+暄
+暇
+暈
+暉
+暌
+暐
+暑
+暖
+暗
+暝
+暢
+暧
+暨
+暫
+暮
+暱
+暴
+暸
+暹
+曄
+曆
+曇
+曉
+曖
+曙
+曜
+曝
+曠
+曦
+曬
+曰
+曲
+曳
+更
+書
+曹
+曼
+曾
+替
+最
+會
+月
+有
+朋
+服
+朐
+朔
+朕
+朗
+望
+朝
+期
+朦
+朧
+木
+未
+末
+本
+札
+朮
+术
+朱
+朴
+朵
+机
+朽
+杀
+杂
+权
+杆
+杈
+杉
+李
+杏
+材
+村
+杓
+杖
+杜
+杞
+束
+杠
+条
+来
+杨
+杭
+杯
+杰
+東
+杳
+杵
+杷
+杼
+松
+板
+极
+构
+枇
+枉
+枋
+析
+枕
+林
+枚
+果
+枝
+枢
+枣
+枪
+枫
+枭
+枯
+枰
+枱
+枳
+架
+枷
+枸
+柄
+柏
+某
+柑
+柒
+染
+柔
+柘
+柚
+柜
+柞
+柠
+柢
+查
+柩
+柬
+柯
+柱
+柳
+柴
+柵
+査
+柿
+栀
+栃
+栄
+栅
+标
+栈
+栉
+栋
+栎
+栏
+树
+栓
+栖
+栗
+校
+栩
+株
+样
+核
+根
+格
+栽
+栾
+桀
+桁
+桂
+桃
+桅
+框
+案
+桉
+桌
+桎
+桐
+桑
+桓
+桔
+桜
+桠
+桡
+桢
+档
+桥
+桦
+桧
+桨
+桩
+桶
+桿
+梁
+梅
+梆
+梏
+梓
+梗
+條
+梟
+梢
+梦
+梧
+梨
+梭
+梯
+械
+梳
+梵
+梶
+检
+棂
+棄
+棉
+棋
+棍
+棒
+棕
+棗
+棘
+棚
+棟
+棠
+棣
+棧
+森
+棱
+棲
+棵
+棹
+棺
+椁
+椅
+椋
+植
+椎
+椒
+検
+椪
+椭
+椰
+椹
+椽
+椿
+楂
+楊
+楓
+楔
+楚
+楝
+楞
+楠
+楣
+楨
+楫
+業
+楮
+極
+楷
+楸
+楹
+楼
+楽
+概
+榄
+榆
+榈
+榉
+榔
+榕
+榖
+榛
+榜
+榨
+榫
+榭
+榮
+榱
+榴
+榷
+榻
+槁
+槃
+構
+槌
+槍
+槎
+槐
+槓
+様
+槛
+槟
+槤
+槭
+槲
+槳
+槻
+槽
+槿
+樁
+樂
+樊
+樑
+樓
+標
+樞
+樟
+模
+樣
+権
+横
+樫
+樯
+樱
+樵
+樸
+樹
+樺
+樽
+樾
+橄
+橇
+橋
+橐
+橘
+橙
+機
+橡
+橢
+橫
+橱
+橹
+橼
+檀
+檄
+檎
+檐
+檔
+檗
+檜
+檢
+檬
+檯
+檳
+檸
+檻
+櫃
+櫚
+櫛
+櫥
+櫸
+櫻
+欄
+權
+欒
+欖
+欠
+次
+欢
+欣
+欧
+欲
+欸
+欺
+欽
+款
+歆
+歇
+歉
+歌
+歎
+歐
+歓
+歙
+歛
+歡
+止
+正
+此
+步
+武
+歧
+歩
+歪
+歯
+歲
+歳
+歴
+歷
+歸
+歹
+死
+歼
+殁
+殃
+殆
+殇
+殉
+殊
+残
+殒
+殓
+殖
+殘
+殞
+殡
+殤
+殭
+殯
+殲
+殴
+段
+殷
+殺
+殼
+殿
+毀
+毁
+毂
+毅
+毆
+毋
+母
+毎
+每
+毒
+毓
+比
+毕
+毗
+毘
+毙
+毛
+毡
+毫
+毯
+毽
+氈
+氏
+氐
+民
+氓
+气
+氖
+気
+氙
+氛
+氟
+氡
+氢
+氣
+氤
+氦
+氧
+氨
+氪
+氫
+氮
+氯
+氰
+氲
+水
+氷
+永
+氹
+氾
+汀
+汁
+求
+汆
+汇
+汉
+汎
+汐
+汕
+汗
+汙
+汛
+汝
+汞
+江
+池
+污
+汤
+汨
+汩
+汪
+汰
+汲
+汴
+汶
+汹
+決
+汽
+汾
+沁
+沂
+沃
+沅
+沈
+沉
+沌
+沏
+沐
+沒
+沓
+沖
+沙
+沛
+沟
+没
+沢
+沣
+沥
+沦
+沧
+沪
+沫
+沭
+沮
+沱
+河
+沸
+油
+治
+沼
+沽
+沾
+沿
+況
+泄
+泉
+泊
+泌
+泓
+法
+泗
+泛
+泞
+泠
+泡
+波
+泣
+泥
+注
+泪
+泫
+泮
+泯
+泰
+泱
+泳
+泵
+泷
+泸
+泻
+泼
+泽
+泾
+洁
+洄
+洋
+洒
+洗
+洙
+洛
+洞
+津
+洩
+洪
+洮
+洱
+洲
+洵
+洶
+洸
+洹
+活
+洼
+洽
+派
+流
+浃
+浄
+浅
+浆
+浇
+浊
+测
+济
+浏
+浑
+浒
+浓
+浔
+浙
+浚
+浜
+浣
+浦
+浩
+浪
+浬
+浮
+浯
+浴
+海
+浸
+涂
+涅
+涇
+消
+涉
+涌
+涎
+涓
+涔
+涕
+涙
+涛
+涝
+涞
+涟
+涠
+涡
+涣
+涤
+润
+涧
+涨
+涩
+涪
+涮
+涯
+液
+涵
+涸
+涼
+涿
+淀
+淄
+淅
+淆
+淇
+淋
+淌
+淑
+淒
+淖
+淘
+淙
+淚
+淞
+淡
+淤
+淦
+淨
+淩
+淪
+淫
+淬
+淮
+深
+淳
+淵
+混
+淹
+淺
+添
+淼
+清
+済
+渉
+渊
+渋
+渍
+渎
+渐
+渔
+渗
+渙
+渚
+減
+渝
+渠
+渡
+渣
+渤
+渥
+渦
+温
+測
+渭
+港
+渲
+渴
+游
+渺
+渾
+湃
+湄
+湊
+湍
+湖
+湘
+湛
+湟
+湧
+湫
+湮
+湯
+湳
+湾
+湿
+満
+溃
+溅
+溉
+溏
+源
+準
+溜
+溝
+溟
+溢
+溥
+溧
+溪
+溫
+溯
+溱
+溴
+溶
+溺
+溼
+滁
+滂
+滄
+滅
+滇
+滋
+滌
+滑
+滓
+滔
+滕
+滙
+滚
+滝
+滞
+滟
+满
+滢
+滤
+滥
+滦
+滨
+滩
+滬
+滯
+滲
+滴
+滷
+滸
+滾
+滿
+漁
+漂
+漆
+漉
+漏
+漓
+演
+漕
+漠
+漢
+漣
+漩
+漪
+漫
+漬
+漯
+漱
+漲
+漳
+漸
+漾
+漿
+潆
+潇
+潋
+潍
+潑
+潔
+潘
+潛
+潜
+潞
+潟
+潢
+潤
+潦
+潧
+潭
+潮
+潰
+潴
+潸
+潺
+潼
+澀
+澄
+澆
+澈
+澍
+澎
+澗
+澜
+澡
+澤
+澧
+澱
+澳
+澹
+激
+濁
+濂
+濃
+濑
+濒
+濕
+濘
+濛
+濟
+濠
+濡
+濤
+濫
+濬
+濮
+濯
+濱
+濺
+濾
+瀅
+瀆
+瀉
+瀋
+瀏
+瀑
+瀕
+瀘
+瀚
+瀛
+瀝
+瀞
+瀟
+瀧
+瀨
+瀬
+瀰
+瀾
+灌
+灏
+灑
+灘
+灝
+灞
+灣
+火
+灬
+灭
+灯
+灰
+灵
+灶
+灸
+灼
+災
+灾
+灿
+炀
+炁
+炅
+炉
+炊
+炎
+炒
+炔
+炕
+炖
+炙
+炜
+炫
+炬
+炭
+炮
+炯
+炳
+炷
+炸
+点
+為
+炼
+炽
+烁
+烂
+烃
+烈
+烊
+烏
+烘
+烙
+烛
+烟
+烤
+烦
+烧
+烨
+烩
+烫
+烬
+热
+烯
+烷
+烹
+烽
+焉
+焊
+焕
+焖
+焗
+焘
+焙
+焚
+焜
+無
+焦
+焯
+焰
+焱
+然
+焼
+煅
+煉
+煊
+煌
+煎
+煒
+煖
+煙
+煜
+煞
+煤
+煥
+煦
+照
+煨
+煩
+煮
+煲
+煸
+煽
+熄
+熊
+熏
+熒
+熔
+熙
+熟
+熠
+熨
+熬
+熱
+熵
+熹
+熾
+燁
+燃
+燄
+燈
+燉
+燊
+燎
+燒
+燔
+燕
+燙
+燜
+營
+燥
+燦
+燧
+燭
+燮
+燴
+燻
+燼
+燿
+爆
+爍
+爐
+爛
+爪
+爬
+爭
+爰
+爱
+爲
+爵
+父
+爷
+爸
+爹
+爺
+爻
+爽
+爾
+牆
+片
+版
+牌
+牍
+牒
+牙
+牛
+牝
+牟
+牠
+牡
+牢
+牦
+牧
+物
+牯
+牲
+牴
+牵
+特
+牺
+牽
+犀
+犁
+犄
+犊
+犍
+犒
+犢
+犧
+犬
+犯
+状
+犷
+犸
+犹
+狀
+狂
+狄
+狈
+狎
+狐
+狒
+狗
+狙
+狞
+狠
+狡
+狩
+独
+狭
+狮
+狰
+狱
+狸
+狹
+狼
+狽
+猎
+猕
+猖
+猗
+猙
+猛
+猜
+猝
+猥
+猩
+猪
+猫
+猬
+献
+猴
+猶
+猷
+猾
+猿
+獄
+獅
+獎
+獐
+獒
+獗
+獠
+獣
+獨
+獭
+獰
+獲
+獵
+獷
+獸
+獺
+獻
+獼
+獾
+玄
+率
+玉
+王
+玑
+玖
+玛
+玟
+玠
+玥
+玩
+玫
+玮
+环
+现
+玲
+玳
+玷
+玺
+玻
+珀
+珂
+珅
+珈
+珉
+珊
+珍
+珏
+珐
+珑
+珙
+珞
+珠
+珣
+珥
+珩
+珪
+班
+珮
+珲
+珺
+現
+球
+琅
+理
+琇
+琉
+琊
+琍
+琏
+琐
+琛
+琢
+琥
+琦
+琨
+琪
+琬
+琮
+琰
+琲
+琳
+琴
+琵
+琶
+琺
+琼
+瑀
+瑁
+瑄
+瑋
+瑕
+瑗
+瑙
+瑚
+瑛
+瑜
+瑞
+瑟
+瑠
+瑣
+瑤
+瑩
+瑪
+瑯
+瑰
+瑶
+瑾
+璀
+璁
+璃
+璇
+璉
+璋
+璎
+璐
+璜
+璞
+璟
+璧
+璨
+環
+璽
+璿
+瓊
+瓏
+瓒
+瓜
+瓢
+瓣
+瓤
+瓦
+瓮
+瓯
+瓴
+瓶
+瓷
+甄
+甌
+甕
+甘
+甙
+甚
+甜
+生
+產
+産
+甥
+甦
+用
+甩
+甫
+甬
+甭
+甯
+田
+由
+甲
+申
+电
+男
+甸
+町
+画
+甾
+畀
+畅
+界
+畏
+畑
+畔
+留
+畜
+畝
+畢
+略
+畦
+番
+畫
+異
+畲
+畳
+畴
+當
+畸
+畹
+畿
+疆
+疇
+疊
+疏
+疑
+疔
+疖
+疗
+疙
+疚
+疝
+疟
+疡
+疣
+疤
+疥
+疫
+疮
+疯
+疱
+疲
+疳
+疵
+疸
+疹
+疼
+疽
+疾
+痂
+病
+症
+痈
+痉
+痊
+痍
+痒
+痔
+痕
+痘
+痙
+痛
+痞
+痠
+痢
+痣
+痤
+痧
+痨
+痪
+痫
+痰
+痱
+痴
+痹
+痺
+痼
+痿
+瘀
+瘁
+瘋
+瘍
+瘓
+瘘
+瘙
+瘟
+瘠
+瘡
+瘢
+瘤
+瘦
+瘧
+瘩
+瘪
+瘫
+瘴
+瘸
+瘾
+療
+癇
+癌
+癒
+癖
+癜
+癞
+癡
+癢
+癣
+癥
+癫
+癬
+癮
+癱
+癲
+癸
+発
+登
+發
+白
+百
+皂
+的
+皆
+皇
+皈
+皋
+皎
+皑
+皓
+皖
+皙
+皚
+皮
+皰
+皱
+皴
+皺
+皿
+盂
+盃
+盅
+盆
+盈
+益
+盎
+盏
+盐
+监
+盒
+盔
+盖
+盗
+盘
+盛
+盜
+盞
+盟
+盡
+監
+盤
+盥
+盧
+盪
+目
+盯
+盱
+盲
+直
+相
+盹
+盼
+盾
+省
+眈
+眉
+看
+県
+眙
+眞
+真
+眠
+眦
+眨
+眩
+眯
+眶
+眷
+眸
+眺
+眼
+眾
+着
+睁
+睇
+睏
+睐
+睑
+睛
+睜
+睞
+睡
+睢
+督
+睥
+睦
+睨
+睪
+睫
+睬
+睹
+睽
+睾
+睿
+瞄
+瞅
+瞇
+瞋
+瞌
+瞎
+瞑
+瞒
+瞓
+瞞
+瞟
+瞠
+瞥
+瞧
+瞩
+瞪
+瞬
+瞭
+瞰
+瞳
+瞻
+瞼
+瞿
+矇
+矍
+矗
+矚
+矛
+矜
+矢
+矣
+知
+矩
+矫
+短
+矮
+矯
+石
+矶
+矽
+矾
+矿
+码
+砂
+砌
+砍
+砒
+研
+砖
+砗
+砚
+砝
+砣
+砥
+砧
+砭
+砰
+砲
+破
+砷
+砸
+砺
+砼
+砾
+础
+硅
+硐
+硒
+硕
+硝
+硫
+硬
+确
+硯
+硼
+碁
+碇
+碉
+碌
+碍
+碎
+碑
+碓
+碗
+碘
+碚
+碛
+碟
+碣
+碧
+碩
+碰
+碱
+碳
+碴
+確
+碼
+碾
+磁
+磅
+磊
+磋
+磐
+磕
+磚
+磡
+磨
+磬
+磯
+磲
+磷
+磺
+礁
+礎
+礙
+礡
+礦
+礪
+礫
+礴
+示
+礼
+社
+祀
+祁
+祂
+祇
+祈
+祉
+祎
+祐
+祕
+祖
+祗
+祚
+祛
+祜
+祝
+神
+祟
+祠
+祢
+祥
+票
+祭
+祯
+祷
+祸
+祺
+祿
+禀
+禁
+禄
+禅
+禍
+禎
+福
+禛
+禦
+禧
+禪
+禮
+禱
+禹
+禺
+离
+禽
+禾
+禿
+秀
+私
+秃
+秆
+秉
+秋
+种
+科
+秒
+秘
+租
+秣
+秤
+秦
+秧
+秩
+秭
+积
+称
+秸
+移
+秽
+稀
+稅
+程
+稍
+税
+稔
+稗
+稚
+稜
+稞
+稟
+稠
+稣
+種
+稱
+稲
+稳
+稷
+稹
+稻
+稼
+稽
+稿
+穀
+穂
+穆
+穌
+積
+穎
+穗
+穢
+穩
+穫
+穴
+究
+穷
+穹
+空
+穿
+突
+窃
+窄
+窈
+窍
+窑
+窒
+窓
+窕
+窖
+窗
+窘
+窜
+窝
+窟
+窠
+窥
+窦
+窨
+窩
+窪
+窮
+窯
+窺
+窿
+竄
+竅
+竇
+竊
+立
+竖
+站
+竜
+竞
+竟
+章
+竣
+童
+竭
+端
+競
+竹
+竺
+竽
+竿
+笃
+笆
+笈
+笋
+笏
+笑
+笔
+笙
+笛
+笞
+笠
+符
+笨
+第
+笹
+笺
+笼
+筆
+等
+筊
+筋
+筍
+筏
+筐
+筑
+筒
+答
+策
+筛
+筝
+筠
+筱
+筲
+筵
+筷
+筹
+签
+简
+箇
+箋
+箍
+箏
+箐
+箔
+箕
+算
+箝
+管
+箩
+箫
+箭
+箱
+箴
+箸
+節
+篁
+範
+篆
+篇
+築
+篑
+篓
+篙
+篝
+篠
+篡
+篤
+篩
+篪
+篮
+篱
+篷
+簇
+簌
+簍
+簡
+簦
+簧
+簪
+簫
+簷
+簸
+簽
+簾
+簿
+籁
+籃
+籌
+籍
+籐
+籟
+籠
+籤
+籬
+籮
+籲
+米
+类
+籼
+籽
+粄
+粉
+粑
+粒
+粕
+粗
+粘
+粟
+粤
+粥
+粧
+粪
+粮
+粱
+粲
+粳
+粵
+粹
+粼
+粽
+精
+粿
+糅
+糊
+糍
+糕
+糖
+糗
+糙
+糜
+糞
+糟
+糠
+糧
+糬
+糯
+糰
+糸
+系
+糾
+紀
+紂
+約
+紅
+紉
+紊
+紋
+納
+紐
+紓
+純
+紗
+紘
+紙
+級
+紛
+紜
+素
+紡
+索
+紧
+紫
+紮
+累
+細
+紳
+紹
+紺
+終
+絃
+組
+絆
+経
+結
+絕
+絞
+絡
+絢
+給
+絨
+絮
+統
+絲
+絳
+絵
+絶
+絹
+綁
+綏
+綑
+經
+継
+続
+綜
+綠
+綢
+綦
+綫
+綬
+維
+綱
+網
+綴
+綵
+綸
+綺
+綻
+綽
+綾
+綿
+緊
+緋
+総
+緑
+緒
+緘
+線
+緝
+緞
+締
+緣
+編
+緩
+緬
+緯
+練
+緹
+緻
+縁
+縄
+縈
+縛
+縝
+縣
+縫
+縮
+縱
+縴
+縷
+總
+績
+繁
+繃
+繆
+繇
+繋
+織
+繕
+繚
+繞
+繡
+繩
+繪
+繫
+繭
+繳
+繹
+繼
+繽
+纂
+續
+纍
+纏
+纓
+纔
+纖
+纜
+纠
+红
+纣
+纤
+约
+级
+纨
+纪
+纫
+纬
+纭
+纯
+纰
+纱
+纲
+纳
+纵
+纶
+纷
+纸
+纹
+纺
+纽
+纾
+线
+绀
+练
+组
+绅
+细
+织
+终
+绊
+绍
+绎
+经
+绑
+绒
+结
+绔
+绕
+绘
+给
+绚
+绛
+络
+绝
+绞
+统
+绡
+绢
+绣
+绥
+绦
+继
+绩
+绪
+绫
+续
+绮
+绯
+绰
+绳
+维
+绵
+绶
+绷
+绸
+绻
+综
+绽
+绾
+绿
+缀
+缄
+缅
+缆
+缇
+缈
+缉
+缎
+缓
+缔
+缕
+编
+缘
+缙
+缚
+缜
+缝
+缠
+缢
+缤
+缥
+缨
+缩
+缪
+缭
+缮
+缰
+缱
+缴
+缸
+缺
+缽
+罂
+罄
+罌
+罐
+网
+罔
+罕
+罗
+罚
+罡
+罢
+罩
+罪
+置
+罰
+署
+罵
+罷
+罹
+羁
+羅
+羈
+羊
+羌
+美
+羔
+羚
+羞
+羟
+羡
+羣
+群
+羥
+羧
+羨
+義
+羯
+羲
+羸
+羹
+羽
+羿
+翁
+翅
+翊
+翌
+翎
+習
+翔
+翘
+翟
+翠
+翡
+翦
+翩
+翰
+翱
+翳
+翹
+翻
+翼
+耀
+老
+考
+耄
+者
+耆
+耋
+而
+耍
+耐
+耒
+耕
+耗
+耘
+耙
+耦
+耨
+耳
+耶
+耷
+耸
+耻
+耽
+耿
+聂
+聆
+聊
+聋
+职
+聒
+联
+聖
+聘
+聚
+聞
+聪
+聯
+聰
+聲
+聳
+聴
+聶
+職
+聽
+聾
+聿
+肃
+肄
+肅
+肆
+肇
+肉
+肋
+肌
+肏
+肓
+肖
+肘
+肚
+肛
+肝
+肠
+股
+肢
+肤
+肥
+肩
+肪
+肮
+肯
+肱
+育
+肴
+肺
+肽
+肾
+肿
+胀
+胁
+胃
+胄
+胆
+背
+胍
+胎
+胖
+胚
+胛
+胜
+胝
+胞
+胡
+胤
+胥
+胧
+胫
+胭
+胯
+胰
+胱
+胳
+胴
+胶
+胸
+胺
+能
+脂
+脅
+脆
+脇
+脈
+脉
+脊
+脍
+脏
+脐
+脑
+脓
+脖
+脘
+脚
+脛
+脣
+脩
+脫
+脯
+脱
+脲
+脳
+脸
+脹
+脾
+腆
+腈
+腊
+腋
+腌
+腎
+腐
+腑
+腓
+腔
+腕
+腥
+腦
+腩
+腫
+腭
+腮
+腰
+腱
+腳
+腴
+腸
+腹
+腺
+腻
+腼
+腾
+腿
+膀
+膈
+膊
+膏
+膑
+膘
+膚
+膛
+膜
+膝
+膠
+膦
+膨
+膩
+膳
+膺
+膻
+膽
+膾
+膿
+臀
+臂
+臃
+臆
+臉
+臊
+臍
+臓
+臘
+臟
+臣
+臥
+臧
+臨
+自
+臬
+臭
+至
+致
+臺
+臻
+臼
+臾
+舀
+舂
+舅
+舆
+與
+興
+舉
+舊
+舌
+舍
+舎
+舐
+舒
+舔
+舖
+舗
+舛
+舜
+舞
+舟
+航
+舫
+般
+舰
+舱
+舵
+舶
+舷
+舸
+船
+舺
+舾
+艇
+艋
+艘
+艙
+艦
+艮
+良
+艰
+艱
+色
+艳
+艷
+艹
+艺
+艾
+节
+芃
+芈
+芊
+芋
+芍
+芎
+芒
+芙
+芜
+芝
+芡
+芥
+芦
+芩
+芪
+芫
+芬
+芭
+芮
+芯
+花
+芳
+芷
+芸
+芹
+芻
+芽
+芾
+苁
+苄
+苇
+苋
+苍
+苏
+苑
+苒
+苓
+苔
+苕
+苗
+苛
+苜
+苞
+苟
+苡
+苣
+若
+苦
+苫
+苯
+英
+苷
+苹
+苻
+茁
+茂
+范
+茄
+茅
+茉
+茎
+茏
+茗
+茜
+茧
+茨
+茫
+茬
+茭
+茯
+茱
+茲
+茴
+茵
+茶
+茸
+茹
+茼
+荀
+荃
+荆
+草
+荊
+荏
+荐
+荒
+荔
+荖
+荘
+荚
+荞
+荟
+荠
+荡
+荣
+荤
+荥
+荧
+荨
+荪
+荫
+药
+荳
+荷
+荸
+荻
+荼
+荽
+莅
+莆
+莉
+莊
+莎
+莒
+莓
+莖
+莘
+莞
+莠
+莢
+莧
+莪
+莫
+莱
+莲
+莴
+获
+莹
+莺
+莽
+莿
+菀
+菁
+菅
+菇
+菈
+菊
+菌
+菏
+菓
+菖
+菘
+菜
+菟
+菠
+菡
+菩
+華
+菱
+菲
+菸
+菽
+萁
+萃
+萄
+萊
+萋
+萌
+萍
+萎
+萘
+萝
+萤
+营
+萦
+萧
+萨
+萩
+萬
+萱
+萵
+萸
+萼
+落
+葆
+葉
+著
+葚
+葛
+葡
+董
+葦
+葩
+葫
+葬
+葭
+葯
+葱
+葳
+葵
+葷
+葺
+蒂
+蒋
+蒐
+蒔
+蒙
+蒜
+蒞
+蒟
+蒡
+蒨
+蒲
+蒸
+蒹
+蒻
+蒼
+蒿
+蓁
+蓄
+蓆
+蓉
+蓋
+蓑
+蓓
+蓖
+蓝
+蓟
+蓦
+蓬
+蓮
+蓼
+蓿
+蔑
+蔓
+蔔
+蔗
+蔘
+蔚
+蔡
+蔣
+蔥
+蔫
+蔬
+蔭
+蔵
+蔷
+蔺
+蔻
+蔼
+蔽
+蕁
+蕃
+蕈
+蕉
+蕊
+蕎
+蕙
+蕤
+蕨
+蕩
+蕪
+蕭
+蕲
+蕴
+蕻
+蕾
+薄
+薅
+薇
+薈
+薊
+薏
+薑
+薔
+薙
+薛
+薦
+薨
+薩
+薪
+薬
+薯
+薰
+薹
+藉
+藍
+藏
+藐
+藓
+藕
+藜
+藝
+藤
+藥
+藩
+藹
+藻
+藿
+蘆
+蘇
+蘊
+蘋
+蘑
+蘚
+蘭
+蘸
+蘼
+蘿
+虎
+虏
+虐
+虑
+虔
+處
+虚
+虛
+虜
+虞
+號
+虢
+虧
+虫
+虬
+虱
+虹
+虻
+虽
+虾
+蚀
+蚁
+蚂
+蚊
+蚌
+蚓
+蚕
+蚜
+蚝
+蚣
+蚤
+蚩
+蚪
+蚯
+蚱
+蚵
+蛀
+蛆
+蛇
+蛊
+蛋
+蛎
+蛐
+蛔
+蛙
+蛛
+蛟
+蛤
+蛭
+蛮
+蛰
+蛳
+蛹
+蛻
+蛾
+蜀
+蜂
+蜃
+蜆
+蜇
+蜈
+蜊
+蜍
+蜒
+蜓
+蜕
+蜗
+蜘
+蜚
+蜜
+蜡
+蜢
+蜥
+蜱
+蜴
+蜷
+蜻
+蜿
+蝇
+蝈
+蝉
+蝌
+蝎
+蝕
+蝗
+蝙
+蝟
+蝠
+蝦
+蝨
+蝴
+蝶
+蝸
+蝼
+螂
+螃
+融
+螞
+螢
+螨
+螯
+螳
+螺
+蟀
+蟄
+蟆
+蟋
+蟎
+蟑
+蟒
+蟠
+蟬
+蟲
+蟹
+蟻
+蟾
+蠅
+蠍
+蠔
+蠕
+蠛
+蠟
+蠡
+蠢
+蠣
+蠱
+蠶
+蠹
+蠻
+血
+衄
+衅
+衆
+行
+衍
+術
+衔
+街
+衙
+衛
+衝
+衞
+衡
+衢
+衣
+补
+表
+衩
+衫
+衬
+衮
+衰
+衲
+衷
+衹
+衾
+衿
+袁
+袂
+袄
+袅
+袈
+袋
+袍
+袒
+袖
+袜
+袞
+袤
+袪
+被
+袭
+袱
+裁
+裂
+装
+裆
+裊
+裏
+裔
+裕
+裘
+裙
+補
+裝
+裟
+裡
+裤
+裨
+裱
+裳
+裴
+裸
+裹
+製
+裾
+褂
+複
+褐
+褒
+褓
+褔
+褚
+褥
+褪
+褫
+褲
+褶
+褻
+襁
+襄
+襟
+襠
+襪
+襬
+襯
+襲
+西
+要
+覃
+覆
+覇
+見
+規
+覓
+視
+覚
+覦
+覧
+親
+覬
+観
+覷
+覺
+覽
+觀
+见
+观
+规
+觅
+视
+览
+觉
+觊
+觎
+觐
+觑
+角
+觞
+解
+觥
+触
+觸
+言
+訂
+計
+訊
+討
+訓
+訕
+訖
+託
+記
+訛
+訝
+訟
+訣
+訥
+訪
+設
+許
+訳
+訴
+訶
+診
+註
+証
+詆
+詐
+詔
+評
+詛
+詞
+詠
+詡
+詢
+詣
+試
+詩
+詫
+詬
+詭
+詮
+詰
+話
+該
+詳
+詹
+詼
+誅
+誇
+誉
+誌
+認
+誓
+誕
+誘
+語
+誠
+誡
+誣
+誤
+誥
+誦
+誨
+說
+説
+読
+誰
+課
+誹
+誼
+調
+諄
+談
+請
+諏
+諒
+論
+諗
+諜
+諡
+諦
+諧
+諫
+諭
+諮
+諱
+諳
+諷
+諸
+諺
+諾
+謀
+謁
+謂
+謄
+謊
+謎
+謐
+謔
+謗
+謙
+講
+謝
+謠
+謨
+謬
+謹
+謾
+譁
+證
+譎
+譏
+識
+譙
+譚
+譜
+警
+譬
+譯
+議
+譲
+譴
+護
+譽
+讀
+變
+讓
+讚
+讞
+计
+订
+认
+讥
+讧
+讨
+让
+讪
+讫
+训
+议
+讯
+记
+讲
+讳
+讴
+讶
+讷
+许
+讹
+论
+讼
+讽
+设
+访
+诀
+证
+诃
+评
+诅
+识
+诈
+诉
+诊
+诋
+词
+诏
+译
+试
+诗
+诘
+诙
+诚
+诛
+话
+诞
+诟
+诠
+诡
+询
+诣
+诤
+该
+详
+诧
+诩
+诫
+诬
+语
+误
+诰
+诱
+诲
+说
+诵
+诶
+请
+诸
+诺
+读
+诽
+课
+诿
+谀
+谁
+调
+谄
+谅
+谆
+谈
+谊
+谋
+谌
+谍
+谎
+谏
+谐
+谑
+谒
+谓
+谔
+谕
+谗
+谘
+谙
+谚
+谛
+谜
+谟
+谢
+谣
+谤
+谥
+谦
+谧
+谨
+谩
+谪
+谬
+谭
+谯
+谱
+谲
+谴
+谶
+谷
+豁
+豆
+豇
+豈
+豉
+豊
+豌
+豎
+豐
+豔
+豚
+象
+豢
+豪
+豫
+豬
+豹
+豺
+貂
+貅
+貌
+貓
+貔
+貘
+貝
+貞
+負
+財
+貢
+貧
+貨
+販
+貪
+貫
+責
+貯
+貰
+貳
+貴
+貶
+買
+貸
+費
+貼
+貽
+貿
+賀
+賁
+賂
+賃
+賄
+資
+賈
+賊
+賑
+賓
+賜
+賞
+賠
+賡
+賢
+賣
+賤
+賦
+質
+賬
+賭
+賴
+賺
+購
+賽
+贅
+贈
+贊
+贍
+贏
+贓
+贖
+贛
+贝
+贞
+负
+贡
+财
+责
+贤
+败
+账
+货
+质
+贩
+贪
+贫
+贬
+购
+贮
+贯
+贰
+贱
+贲
+贴
+贵
+贷
+贸
+费
+贺
+贻
+贼
+贾
+贿
+赁
+赂
+赃
+资
+赅
+赈
+赊
+赋
+赌
+赎
+赏
+赐
+赓
+赔
+赖
+赘
+赚
+赛
+赝
+赞
+赠
+赡
+赢
+赣
+赤
+赦
+赧
+赫
+赭
+走
+赳
+赴
+赵
+赶
+起
+趁
+超
+越
+趋
+趕
+趙
+趟
+趣
+趨
+足
+趴
+趵
+趸
+趺
+趾
+跃
+跄
+跆
+跋
+跌
+跎
+跑
+跖
+跚
+跛
+距
+跟
+跡
+跤
+跨
+跩
+跪
+路
+跳
+践
+跷
+跹
+跺
+跻
+踉
+踊
+踌
+踏
+踐
+踝
+踞
+踟
+踢
+踩
+踪
+踮
+踱
+踴
+踵
+踹
+蹂
+蹄
+蹇
+蹈
+蹉
+蹊
+蹋
+蹑
+蹒
+蹙
+蹟
+蹣
+蹤
+蹦
+蹩
+蹬
+蹭
+蹲
+蹴
+蹶
+蹺
+蹼
+蹿
+躁
+躇
+躉
+躊
+躋
+躍
+躏
+躪
+身
+躬
+躯
+躲
+躺
+軀
+車
+軋
+軌
+軍
+軒
+軟
+転
+軸
+軼
+軽
+軾
+較
+載
+輒
+輓
+輔
+輕
+輛
+輝
+輟
+輩
+輪
+輯
+輸
+輻
+輾
+輿
+轄
+轅
+轆
+轉
+轍
+轎
+轟
+车
+轧
+轨
+轩
+转
+轭
+轮
+软
+轰
+轲
+轴
+轶
+轻
+轼
+载
+轿
+较
+辄
+辅
+辆
+辇
+辈
+辉
+辊
+辍
+辐
+辑
+输
+辕
+辖
+辗
+辘
+辙
+辛
+辜
+辞
+辟
+辣
+辦
+辨
+辩
+辫
+辭
+辮
+辯
+辰
+辱
+農
+边
+辺
+辻
+込
+辽
+达
+迁
+迂
+迄
+迅
+过
+迈
+迎
+运
+近
+返
+还
+这
+进
+远
+违
+连
+迟
+迢
+迤
+迥
+迦
+迩
+迪
+迫
+迭
+述
+迴
+迷
+迸
+迹
+迺
+追
+退
+送
+适
+逃
+逅
+逆
+选
+逊
+逍
+透
+逐
+递
+途
+逕
+逗
+這
+通
+逛
+逝
+逞
+速
+造
+逢
+連
+逮
+週
+進
+逵
+逶
+逸
+逻
+逼
+逾
+遁
+遂
+遅
+遇
+遊
+運
+遍
+過
+遏
+遐
+遑
+遒
+道
+達
+違
+遗
+遙
+遛
+遜
+遞
+遠
+遢
+遣
+遥
+遨
+適
+遭
+遮
+遲
+遴
+遵
+遶
+遷
+選
+遺
+遼
+遽
+避
+邀
+邁
+邂
+邃
+還
+邇
+邈
+邊
+邋
+邏
+邑
+邓
+邕
+邛
+邝
+邢
+那
+邦
+邨
+邪
+邬
+邮
+邯
+邰
+邱
+邳
+邵
+邸
+邹
+邺
+邻
+郁
+郅
+郊
+郎
+郑
+郜
+郝
+郡
+郢
+郤
+郦
+郧
+部
+郫
+郭
+郴
+郵
+郷
+郸
+都
+鄂
+鄉
+鄒
+鄔
+鄙
+鄞
+鄢
+鄧
+鄭
+鄰
+鄱
+鄲
+鄺
+酉
+酊
+酋
+酌
+配
+酐
+酒
+酗
+酚
+酝
+酢
+酣
+酥
+酩
+酪
+酬
+酮
+酯
+酰
+酱
+酵
+酶
+酷
+酸
+酿
+醃
+醇
+醉
+醋
+醍
+醐
+醒
+醚
+醛
+醜
+醞
+醣
+醪
+醫
+醬
+醮
+醯
+醴
+醺
+釀
+釁
+采
+釉
+释
+釋
+里
+重
+野
+量
+釐
+金
+釗
+釘
+釜
+針
+釣
+釦
+釧
+釵
+鈀
+鈉
+鈍
+鈎
+鈔
+鈕
+鈞
+鈣
+鈦
+鈪
+鈴
+鈺
+鈾
+鉀
+鉄
+鉅
+鉉
+鉑
+鉗
+鉚
+鉛
+鉤
+鉴
+鉻
+銀
+銃
+銅
+銑
+銓
+銖
+銘
+銜
+銬
+銭
+銮
+銳
+銷
+銹
+鋁
+鋅
+鋒
+鋤
+鋪
+鋰
+鋸
+鋼
+錄
+錐
+錘
+錚
+錠
+錢
+錦
+錨
+錫
+錮
+錯
+録
+錳
+錶
+鍊
+鍋
+鍍
+鍛
+鍥
+鍰
+鍵
+鍺
+鍾
+鎂
+鎊
+鎌
+鎏
+鎔
+鎖
+鎗
+鎚
+鎧
+鎬
+鎮
+鎳
+鏈
+鏖
+鏗
+鏘
+鏞
+鏟
+鏡
+鏢
+鏤
+鏽
+鐘
+鐮
+鐲
+鐳
+鐵
+鐸
+鐺
+鑄
+鑊
+鑑
+鑒
+鑣
+鑫
+鑰
+鑲
+鑼
+鑽
+鑾
+鑿
+针
+钉
+钊
+钎
+钏
+钒
+钓
+钗
+钙
+钛
+钜
+钝
+钞
+钟
+钠
+钡
+钢
+钣
+钤
+钥
+钦
+钧
+钨
+钩
+钮
+钯
+钰
+钱
+钳
+钴
+钵
+钺
+钻
+钼
+钾
+钿
+铀
+铁
+铂
+铃
+铄
+铅
+铆
+铉
+铎
+铐
+铛
+铜
+铝
+铠
+铡
+铢
+铣
+铤
+铨
+铩
+铬
+铭
+铮
+铰
+铲
+铵
+银
+铸
+铺
+链
+铿
+销
+锁
+锂
+锄
+锅
+锆
+锈
+锉
+锋
+锌
+锏
+锐
+锑
+错
+锚
+锟
+锡
+锢
+锣
+锤
+锥
+锦
+锭
+键
+锯
+锰
+锲
+锵
+锹
+锺
+锻
+镀
+镁
+镂
+镇
+镉
+镌
+镍
+镐
+镑
+镕
+镖
+镗
+镛
+镜
+镣
+镭
+镯
+镰
+镳
+镶
+長
+长
+門
+閃
+閉
+開
+閎
+閏
+閑
+閒
+間
+閔
+閘
+閡
+関
+閣
+閥
+閨
+閩
+閱
+閲
+閹
+閻
+閾
+闆
+闇
+闊
+闌
+闍
+闔
+闕
+闖
+闘
+關
+闡
+闢
+门
+闪
+闫
+闭
+问
+闯
+闰
+闲
+间
+闵
+闷
+闸
+闹
+闺
+闻
+闽
+闾
+阀
+阁
+阂
+阅
+阆
+阇
+阈
+阉
+阎
+阐
+阑
+阔
+阕
+阖
+阙
+阚
+阜
+队
+阡
+阪
+阮
+阱
+防
+阳
+阴
+阵
+阶
+阻
+阿
+陀
+陂
+附
+际
+陆
+陇
+陈
+陋
+陌
+降
+限
+陕
+陛
+陝
+陞
+陟
+陡
+院
+陣
+除
+陨
+险
+陪
+陰
+陲
+陳
+陵
+陶
+陷
+陸
+険
+陽
+隅
+隆
+隈
+隊
+隋
+隍
+階
+随
+隐
+隔
+隕
+隘
+隙
+際
+障
+隠
+隣
+隧
+隨
+險
+隱
+隴
+隶
+隸
+隻
+隼
+隽
+难
+雀
+雁
+雄
+雅
+集
+雇
+雉
+雋
+雌
+雍
+雎
+雏
+雑
+雒
+雕
+雖
+雙
+雛
+雜
+雞
+離
+難
+雨
+雪
+雯
+雰
+雲
+雳
+零
+雷
+雹
+電
+雾
+需
+霁
+霄
+霆
+震
+霈
+霉
+霊
+霍
+霎
+霏
+霑
+霓
+霖
+霜
+霞
+霧
+霭
+霰
+露
+霸
+霹
+霽
+霾
+靂
+靄
+靈
+青
+靓
+靖
+静
+靚
+靛
+靜
+非
+靠
+靡
+面
+靥
+靦
+革
+靳
+靴
+靶
+靼
+鞅
+鞋
+鞍
+鞏
+鞑
+鞘
+鞠
+鞣
+鞦
+鞭
+韆
+韋
+韌
+韓
+韜
+韦
+韧
+韩
+韬
+韭
+音
+韵
+韶
+韻
+響
+頁
+頂
+頃
+項
+順
+須
+頌
+預
+頑
+頒
+頓
+頗
+領
+頜
+頡
+頤
+頫
+頭
+頰
+頷
+頸
+頹
+頻
+頼
+顆
+題
+額
+顎
+顏
+顔
+願
+顛
+類
+顧
+顫
+顯
+顱
+顴
+页
+顶
+顷
+项
+顺
+须
+顼
+顽
+顾
+顿
+颁
+颂
+预
+颅
+领
+颇
+颈
+颉
+颊
+颌
+颍
+颐
+频
+颓
+颔
+颖
+颗
+题
+颚
+颛
+颜
+额
+颞
+颠
+颡
+颢
+颤
+颦
+颧
+風
+颯
+颱
+颳
+颶
+颼
+飄
+飆
+风
+飒
+飓
+飕
+飘
+飙
+飚
+飛
+飞
+食
+飢
+飨
+飩
+飪
+飯
+飲
+飼
+飽
+飾
+餃
+餅
+餉
+養
+餌
+餐
+餒
+餓
+餘
+餚
+餛
+餞
+餡
+館
+餮
+餵
+餾
+饅
+饈
+饋
+饌
+饍
+饑
+饒
+饕
+饗
+饞
+饥
+饨
+饪
+饬
+饭
+饮
+饯
+饰
+饱
+饲
+饴
+饵
+饶
+饷
+饺
+饼
+饽
+饿
+馀
+馁
+馄
+馅
+馆
+馈
+馋
+馍
+馏
+馒
+馔
+首
+馗
+香
+馥
+馨
+馬
+馭
+馮
+馳
+馴
+駁
+駄
+駅
+駆
+駐
+駒
+駕
+駛
+駝
+駭
+駱
+駿
+騁
+騎
+騏
+験
+騙
+騨
+騰
+騷
+驀
+驅
+驊
+驍
+驒
+驕
+驗
+驚
+驛
+驟
+驢
+驥
+马
+驭
+驮
+驯
+驰
+驱
+驳
+驴
+驶
+驷
+驸
+驹
+驻
+驼
+驾
+驿
+骁
+骂
+骄
+骅
+骆
+骇
+骈
+骊
+骋
+验
+骏
+骐
+骑
+骗
+骚
+骛
+骜
+骞
+骠
+骡
+骤
+骥
+骧
+骨
+骯
+骰
+骶
+骷
+骸
+骼
+髂
+髅
+髋
+髏
+髒
+髓
+體
+髖
+高
+髦
+髪
+髮
+髯
+髻
+鬃
+鬆
+鬍
+鬓
+鬚
+鬟
+鬢
+鬣
+鬥
+鬧
+鬱
+鬼
+魁
+魂
+魄
+魅
+魇
+魍
+魏
+魔
+魘
+魚
+魯
+魷
+鮑
+鮨
+鮪
+鮭
+鮮
+鯉
+鯊
+鯖
+鯛
+鯨
+鯰
+鯽
+鰍
+鰓
+鰭
+鰲
+鰻
+鰾
+鱈
+鱉
+鱔
+鱗
+鱷
+鱸
+鱼
+鱿
+鲁
+鲈
+鲍
+鲑
+鲛
+鲜
+鲟
+鲢
+鲤
+鲨
+鲫
+鲱
+鲲
+鲶
+鲷
+鲸
+鳃
+鳄
+鳅
+鳌
+鳍
+鳕
+鳖
+鳗
+鳝
+鳞
+鳥
+鳩
+鳳
+鳴
+鳶
+鴉
+鴕
+鴛
+鴦
+鴨
+鴻
+鴿
+鵑
+鵜
+鵝
+鵡
+鵬
+鵰
+鵲
+鶘
+鶩
+鶯
+鶴
+鷗
+鷲
+鷹
+鷺
+鸚
+鸞
+鸟
+鸠
+鸡
+鸢
+鸣
+鸥
+鸦
+鸨
+鸪
+鸭
+鸯
+鸳
+鸵
+鸽
+鸾
+鸿
+鹂
+鹃
+鹄
+鹅
+鹈
+鹉
+鹊
+鹌
+鹏
+鹑
+鹕
+鹘
+鹜
+鹞
+鹤
+鹦
+鹧
+鹫
+鹭
+鹰
+鹳
+鹵
+鹹
+鹼
+鹽
+鹿
+麂
+麋
+麒
+麓
+麗
+麝
+麟
+麥
+麦
+麩
+麴
+麵
+麸
+麺
+麻
+麼
+麽
+麾
+黃
+黄
+黍
+黎
+黏
+黑
+黒
+黔
+默
+黛
+黜
+黝
+點
+黠
+黨
+黯
+黴
+鼋
+鼎
+鼐
+鼓
+鼠
+鼬
+鼹
+鼻
+鼾
+齁
+齊
+齋
+齐
+齒
+齡
+齢
+齣
+齦
+齿
+龄
+龅
+龈
+龊
+龋
+龌
+龍
+龐
+龔
+龕
+龙
+龚
+龛
+龜
+龟
+︰
+︱
+︶
+︿
+﹁
+﹂
+﹍
+﹏
+﹐
+﹑
+﹒
+﹔
+﹕
+﹖
+﹗
+﹙
+﹚
+﹝
+﹞
+﹡
+﹣
+！
+＂
+＃
+＄
+％
+＆
+＇
+（
+）
+＊
+＋
+，
+－
+．
+／
+０
+１
+２
+３
+４
+５
+６
+７
+８
+９
+：
+；
+＜
+＝
+＞
+？
+＠
+［
+＼
+］
+＾
+＿
+｀
+ａ
+ｂ
+ｃ
+ｄ
+ｅ
+ｆ
+ｇ
+ｈ
+ｉ
+ｊ
+ｋ
+ｌ
+ｍ
+ｎ
+ｏ
+ｐ
+ｑ
+ｒ
+ｓ
+ｔ
+ｕ
+ｖ
+ｗ
+ｘ
+ｙ
+ｚ
+｛
+｜
+｝
+～
+｡
+｢
+｣
+､
+･
+ｯ
+ｰ
+ｲ
+ｸ
+ｼ
+ｽ
+ﾄ
+ﾉ
+ﾌ
+ﾗ
+ﾙ
+ﾝ
+ﾞ
+ﾟ
+￣
+￥
+👍
+🔥
+😂
+😎
+...
+yam
+10
+2017
+12
+11
+2016
+20
+30
+15
+06
+lofter
+##s
+2015
+by
+16
+14
+18
+13
+24
+17
+2014
+21
+##0
+22
+19
+25
+23
+com
+100
+00
+05
+2013
+##a
+03
+09
+08
+28
+##2
+50
+01
+04
+##1
+27
+02
+2012
+##3
+26
+##e
+07
+##8
+##5
+##6
+##4
+##9
+##7
+29
+2011
+40
+##t
+2010
+##o
+##d
+##i
+2009
+##n
+app
+www
+the
+##m
+31
+##c
+##l
+##y
+##r
+##g
+2008
+60
+http
+200
+qq
+##p
+80
+##f
+google
+pixnet
+90
+cookies
+tripadvisor
+500
+##er
+##k
+35
+##h
+facebook
+2007
+2000
+70
+##b
+of
+##x
+##u
+45
+300
+iphone
+32
+1000
+2006
+48
+ip
+36
+in
+38
+3d
+##w
+##ing
+55
+ctrip
+##on
+##v
+33
+##の
+to
+34
+400
+id
+2005
+it
+37
+windows
+llc
+top
+99
+42
+39
+000
+led
+at
+##an
+41
+51
+52
+46
+49
+43
+53
+44
+##z
+android
+58
+and
+59
+2004
+56
+vr
+##か
+5000
+2003
+47
+blogthis
+twitter
+54
+##le
+150
+ok
+2018
+57
+75
+cn
+no
+ios
+##in
+##mm
+##00
+800
+on
+te
+3000
+65
+2001
+360
+95
+ig
+lv
+120
+##ng
+##を
+##us
+##に
+pc
+てす
+──
+600
+##te
+85
+2002
+88
+##ed
+html
+ncc
+wifi
+email
+64
+blog
+is
+##10
+##て
+mail
+online
+##al
+dvd
+##ic
+studio
+##は
+##℃
+##ia
+##と
+line
+vip
+72
+##q
+98
+##ce
+##en
+for
+##is
+##ra
+##es
+##j
+usb
+net
+cp
+1999
+asia
+4g
+##cm
+diy
+new
+3c
+##お
+ta
+66
+language
+vs
+apple
+tw
+86
+web
+##ne
+ipad
+62
+you
+##re
+101
+68
+##tion
+ps
+de
+bt
+pony
+atm
+##2017
+1998
+67
+##ch
+ceo
+##or
+go
+##na
+av
+pro
+cafe
+96
+pinterest
+97
+63
+pixstyleme3c
+##ta
+more
+said
+##2016
+1997
+mp3
+700
+##ll
+nba
+jun
+##20
+92
+tv
+1995
+pm
+61
+76
+nbsp
+250
+##ie
+linux
+##ma
+cd
+110
+hd
+##17
+78
+##ion
+77
+6000
+am
+##th
+##st
+94
+##se
+##et
+69
+180
+gdp
+my
+105
+81
+abc
+89
+flash
+79
+one
+93
+1990
+1996
+##ck
+gps
+##も
+##ly
+web885
+106
+2020
+91
+##ge
+4000
+1500
+xd
+boss
+isbn
+1994
+org
+##ry
+me
+love
+##11
+0fork
+73
+##12
+3g
+##ter
+##ar
+71
+82
+##la
+hotel
+130
+1970
+pk
+83
+87
+140
+ie
+##os
+##30
+##el
+74
+##50
+seo
+cpu
+##ml
+p2p
+84
+may
+##る
+sun
+tue
+internet
+cc
+posted
+youtube
+##at
+##ン
+##man
+ii
+##ル
+##15
+abs
+nt
+pdf
+yahoo
+ago
+1980
+##it
+news
+mac
+104
+##てす
+##me
+##り
+java
+1992
+spa
+##de
+##nt
+hk
+all
+plus
+la
+1993
+##mb
+##16
+##ve
+west
+##da
+160
+air
+##い
+##ps
+から
+##to
+1989
+logo
+htc
+php
+https
+fi
+momo
+##son
+sat
+##ke
+##80
+ebd
+suv
+wi
+day
+apk
+##88
+##um
+mv
+galaxy
+wiki
+or
+brake
+##ス
+1200
+する
+this
+1991
+mon
+##こ
+❤2017
+po
+##ない
+javascript
+life
+home
+june
+##ss
+system
+900
+##ー
+##０
+pp
+1988
+world
+fb
+4k
+br
+##as
+ic
+ai
+leonardo
+safari
+##60
+live
+free
+xx
+wed
+win7
+kiehl
+##co
+lg
+o2o
+##go
+us
+235
+1949
+mm
+しい
+vfm
+kanye
+##90
+##2015
+##id
+jr
+##ey
+123
+rss
+##sa
+##ro
+##am
+##no
+thu
+fri
+350
+##sh
+##ki
+103
+comments
+name
+##のて
+##pe
+##ine
+max
+1987
+8000
+uber
+##mi
+##ton
+wordpress
+office
+1986
+1985
+##ment
+107
+bd
+win10
+##ld
+##li
+gmail
+bb
+dior
+##rs
+##ri
+##rd
+##ます
+up
+cad
+##®
+dr
+して
+read
+##21
+をお
+##io
+##99
+url
+1984
+pvc
+paypal
+show
+policy
+##40
+##ty
+##18
+with
+##★
+##01
+txt
+102
+##ba
+dna
+from
+post
+mini
+ar
+taiwan
+john
+##ga
+privacy
+agoda
+##13
+##ny
+word
+##24
+##22
+##by
+##ur
+##hz
+1982
+##ang
+265
+cookie
+netscape
+108
+##ka
+##～
+##ad
+house
+share
+note
+ibm
+code
+hello
+nike
+sim
+survey
+##016
+1979
+1950
+wikia
+##32
+##017
+5g
+cbc
+##tor
+##kg
+1983
+##rt
+##14
+campaign
+store
+2500
+os
+##ct
+##ts
+##°
+170
+api
+##ns
+365
+excel
+##な
+##ao
+##ら
+##し
+～～
+##nd
+university
+163
+には
+518
+##70
+##ya
+##il
+##25
+pierre
+ipo
+0020
+897
+##23
+hotels
+##ian
+のお
+125
+years
+6606
+##ers
+##26
+high
+##day
+time
+##ay
+bug
+##line
+##く
+##す
+##be
+xp
+talk2yam
+yamservice
+10000
+coco
+##dy
+sony
+##ies
+1978
+microsoft
+david
+people
+##ha
+1960
+instagram
+intel
+その
+##ot
+iso
+1981
+##va
+115
+##mo
+##land
+xxx
+man
+co
+ltxsw
+##ation
+baby
+220
+##pa
+##ol
+1945
+7000
+tag
+450
+##ue
+msn
+##31
+oppo
+##ト
+##ca
+control
+##om
+st
+chrome
+##ure
+##ん
+be
+##き
+lol
+##19
+した
+##bo
+240
+lady
+##100
+##way
+##から
+4600
+##ko
+##do
+##un
+4s
+corporation
+168
+##ni
+herme
+##28
+ｃｐ
+978
+##up
+##06
+ui
+##ds
+ppt
+admin
+three
+します
+bbc
+re
+128
+##48
+ca
+##015
+##35
+hp
+##ee
+tpp
+##た
+##ive
+××
+root
+##cc
+##ました
+##ble
+##ity
+adobe
+park
+114
+et
+oled
+city
+##ex
+##ler
+##ap
+china
+##book
+20000
+view
+##ice
+global
+##km
+your
+hong
+##mg
+out
+##ms
+ng
+ebay
+##29
+menu
+ubuntu
+##cy
+rom
+##view
+open
+ktv
+do
+server
+##lo
+if
+english
+##ね
+##５
+##oo
+1600
+##02
+step1
+kong
+club
+135
+july
+inc
+1976
+mr
+hi
+##net
+touch
+##ls
+##ii
+michael
+lcd
+##05
+##33
+phone
+james
+step2
+1300
+ios9
+##box
+dc
+##２
+##ley
+samsung
+111
+280
+pokemon
+css
+##ent
+##les
+いいえ
+##１
+s8
+atom
+play
+bmw
+##said
+sa
+etf
+ctrl
+♥yoyo♥
+##55
+2025
+##2014
+##66
+adidas
+amazon
+1958
+##ber
+##ner
+visa
+##77
+##der
+1800
+connectivity
+##hi
+firefox
+109
+118
+hr
+so
+style
+mark
+pop
+ol
+skip
+1975
+as
+##27
+##ir
+##61
+190
+mba
+##う
+##ai
+le
+##ver
+1900
+cafe2017
+lte
+super
+113
+129
+##ron
+amd
+like
+##☆
+are
+##ster
+we
+##sk
+paul
+data
+international
+##ft
+longchamp
+ssd
+good
+##ート
+##ti
+reply
+##my
+↓↓↓
+apr
+star
+##ker
+source
+136
+js
+112
+get
+force
+photo
+##one
+126
+##2013
+##ow
+link
+bbs
+1972
+goods
+##lin
+python
+119
+##ip
+game
+##ics
+##ません
+blue
+##●
+520
+##45
+page
+itunes
+##03
+1955
+260
+1968
+gt
+gif
+618
+##ff
+##47
+group
+くたさい
+about
+bar
+ganji
+##nce
+music
+lee
+not
+1977
+1971
+1973
+##per
+an
+faq
+comment
+##って
+days
+##ock
+116
+##bs
+1974
+1969
+v1
+player
+1956
+xbox
+sql
+fm
+f1
+139
+##ah
+210
+##lv
+##mp
+##000
+melody
+1957
+##３
+550
+17life
+199
+1966
+xml
+market
+##au
+##71
+999
+##04
+what
+gl
+##95
+##age
+tips
+##68
+book
+##ting
+mysql
+can
+1959
+230
+##ung
+wonderland
+watch
+10℃
+##ction
+9000
+mar
+mobile
+1946
+1962
+article
+##db
+part
+▲top
+party
+って
+1967
+1964
+1948
+##07
+##ore
+##op
+この
+dj
+##78
+##38
+010
+main
+225
+1965
+##ong
+art
+320
+ad
+134
+020
+##73
+117
+pm2
+japan
+228
+##08
+ts
+1963
+##ica
+der
+sm
+##36
+2019
+##wa
+ct
+##７
+##や
+##64
+1937
+homemesh
+search
+##85
+##れは
+##tv
+##di
+macbook
+##９
+##くたさい
+service
+##♥
+type
+った
+750
+##ier
+##si
+##75
+##います
+##ok
+best
+##ット
+goris
+lock
+##った
+cf
+3m
+big
+##ut
+ftp
+carol
+##vi
+１０
+1961
+happy
+sd
+##ac
+122
+anti
+pe
+cnn
+iii
+1920
+138
+##ラ
+1940
+esp
+jan
+tags
+##98
+##51
+august
+vol
+##86
+154
+##™
+##fs
+##れ
+##sion
+design
+ac
+##ム
+press
+jordan
+ppp
+that
+key
+check
+##６
+##tt
+##㎡
+1080p
+##lt
+power
+##42
+1952
+##bc
+vivi
+##ック
+he
+133
+121
+jpg
+##rry
+201
+175
+3500
+1947
+nb
+##ted
+##rn
+しています
+1954
+usd
+##t00
+master
+##ンク
+001
+model
+##58
+al
+##09
+1953
+##34
+ram
+goo
+ても
+##ui
+127
+1930
+red
+##ary
+rpg
+item
+##pm
+##41
+270
+##za
+project
+##2012
+hot
+td
+blogabstract
+##ger
+##62
+650
+##44
+gr2
+##します
+##ｍ
+black
+electronic
+nfc
+year
+asus
+また
+html5
+cindy
+##hd
+m3
+132
+esc
+##od
+booking
+##53
+fed
+tvb
+##81
+##ina
+mit
+165
+##いる
+chan
+192
+distribution
+next
+になる
+peter
+bios
+steam
+cm
+1941
+にも
+pk10
+##ix
+##65
+##91
+dec
+nasa
+##ana
+icecat
+00z
+b1
+will
+##46
+li
+se
+##ji
+##み
+##ard
+oct
+##ain
+jp
+##ze
+##bi
+cio
+##56
+smart
+h5
+##39
+##port
+curve
+vpn
+##nm
+##dia
+utc
+##あり
+12345678910
+##52
+rmvb
+chanel
+a4
+miss
+##and
+##im
+media
+who
+##63
+she
+girl
+5s
+124
+vera
+##して
+class
+vivo
+king
+##フ
+##ei
+national
+ab
+1951
+5cm
+888
+145
+ipod
+ap
+1100
+5mm
+211
+ms
+2756
+##69
+mp4
+msci
+##po
+##89
+131
+mg
+index
+380
+##bit
+##out
+##zz
+##97
+##67
+158
+apec
+##８
+photoshop
+opec
+￥799
+ては
+##96
+##tes
+##ast
+2g
+○○
+##ール
+￥2899
+##ling
+##よ
+##ory
+1938
+##ical
+kitty
+content
+##43
+step3
+##cn
+win8
+155
+vc
+1400
+iphone7
+robert
+##した
+tcl
+137
+beauty
+##87
+en
+dollars
+##ys
+##oc
+step
+pay
+yy
+a1
+##2011
+##lly
+##ks
+##♪
+1939
+188
+download
+1944
+sep
+exe
+ph
+います
+school
+gb
+center
+pr
+street
+##board
+uv
+##37
+##lan
+winrar
+##que
+##ua
+##com
+1942
+1936
+480
+gpu
+##４
+ettoday
+fu
+tom
+##54
+##ren
+##via
+149
+##72
+b2b
+144
+##79
+##tch
+rose
+arm
+mb
+##49
+##ial
+##nn
+nvidia
+step4
+mvp
+00㎡
+york
+156
+##イ
+how
+cpi
+591
+2765
+gov
+kg
+joe
+##xx
+mandy
+pa
+##ser
+copyright
+fashion
+1935
+don
+##け
+ecu
+##ist
+##art
+erp
+wap
+have
+##lm
+talk
+##ek
+##ning
+##if
+ch
+##ite
+video
+1943
+cs
+san
+iot
+look
+##84
+##2010
+##ku
+october
+##ux
+trump
+##hs
+##ide
+box
+141
+first
+##ins
+april
+##ight
+##83
+185
+angel
+protected
+aa
+151
+162
+x1
+m2
+##fe
+##×
+##ho
+size
+143
+min
+ofo
+fun
+gomaji
+ex
+hdmi
+food
+dns
+march
+chris
+kevin
+##のか
+##lla
+##pp
+##ec
+ag
+ems
+6s
+720p
+##rm
+##ham
+off
+##92
+asp
+team
+fandom
+ed
+299
+▌♥
+##ell
+info
+されています
+##82
+sina
+4066
+161
+##able
+##ctor
+330
+399
+315
+dll
+rights
+ltd
+idc
+jul
+3kg
+1927
+142
+ma
+surface
+##76
+##ク
+～～～
+304
+mall
+eps
+146
+green
+##59
+map
+space
+donald
+v2
+sodu
+##light
+1931
+148
+1700
+まて
+310
+reserved
+htm
+##han
+##57
+2d
+178
+mod
+##ise
+##tions
+152
+ti
+##shi
+doc
+1933
+icp
+055
+wang
+##ram
+shopping
+aug
+##pi
+##well
+now
+wam
+b2
+からお
+##hu
+236
+1928
+##gb
+266
+f2
+##93
+153
+mix
+##ef
+##uan
+bwl
+##plus
+##res
+core
+##ess
+tea
+5℃
+hktvmall
+nhk
+##ate
+list
+##ese
+301
+feb
+4m
+inn
+ての
+nov
+159
+12345
+daniel
+##ci
+pass
+##bet
+##nk
+coffee
+202
+ssl
+airbnb
+##ute
+fbi
+woshipm
+skype
+ea
+cg
+sp
+##fc
+##www
+yes
+edge
+alt
+007
+##94
+fpga
+##ght
+##gs
+iso9001
+さい
+##ile
+##wood
+##uo
+image
+lin
+icon
+american
+##em
+1932
+set
+says
+##king
+##tive
+blogger
+##74
+なと
+256
+147
+##ox
+##zy
+##red
+##ium
+##lf
+nokia
+claire
+##リ
+##ding
+november
+lohas
+##500
+##tic
+##マ
+##cs
+##ある
+##che
+##ire
+##gy
+##ult
+db
+january
+win
+##カ
+166
+road
+ptt
+##ま
+##つ
+198
+##fa
+##mer
+anna
+pchome
+はい
+udn
+ef
+420
+##time
+##tte
+2030
+##ア
+g20
+white
+かかります
+1929
+308
+garden
+eleven
+di
+##おります
+chen
+309b
+777
+172
+young
+cosplay
+ちてない
+4500
+bat
+##123
+##tra
+##ては
+kindle
+npc
+steve
+etc
+##ern
+##｜
+call
+xperia
+ces
+travel
+sk
+s7
+##ous
+1934
+##int
+みいたたけます
+183
+edu
+file
+cho
+qr
+##car
+##our
+186
+##ant
+##ｄ
+eric
+1914
+rends
+##jo
+##する
+mastercard
+##2000
+kb
+##min
+290
+##ino
+vista
+##ris
+##ud
+jack
+2400
+##set
+169
+pos
+1912
+##her
+##ou
+taipei
+しく
+205
+beta
+##ませんか
+232
+##fi
+express
+255
+body
+##ill
+aphojoy
+user
+december
+meiki
+##ick
+tweet
+richard
+##av
+##ᆫ
+iphone6
+##dd
+ちてすか
+views
+##mark
+321
+pd
+##００
+times
+##▲
+level
+##ash
+10g
+point
+5l
+##ome
+208
+koreanmall
+##ak
+george
+q2
+206
+wma
+tcp
+##200
+スタッフ
+full
+mlb
+##lle
+##watch
+tm
+run
+179
+911
+smith
+business
+##und
+1919
+color
+##tal
+222
+171
+##less
+moon
+4399
+##rl
+update
+pcb
+shop
+499
+157
+little
+なし
+end
+##mhz
+van
+dsp
+easy
+660
+##house
+##key
+history
+##ｏ
+oh
+##001
+##hy
+##web
+oem
+let
+was
+##2009
+##gg
+review
+##wan
+182
+##°c
+203
+uc
+title
+##val
+united
+233
+2021
+##ons
+doi
+trivago
+overdope
+sbs
+##ance
+##ち
+grand
+special
+573032185
+imf
+216
+wx17house
+##so
+##ーム
+audi
+##he
+london
+william
+##rp
+##ake
+science
+beach
+cfa
+amp
+ps4
+880
+##800
+##link
+##hp
+crm
+ferragamo
+bell
+make
+##eng
+195
+under
+zh
+photos
+2300
+##style
+##ント
+via
+176
+da
+##gi
+company
+i7
+##ray
+thomas
+370
+ufo
+i5
+##max
+plc
+ben
+back
+research
+8g
+173
+mike
+##pc
+##ッフ
+september
+189
+##ace
+vps
+february
+167
+pantos
+wp
+lisa
+1921
+★★
+jquery
+night
+long
+offer
+##berg
+##news
+1911
+##いて
+ray
+fks
+wto
+せます
+over
+164
+340
+##all
+##rus
+1924
+##888
+##works
+blogtitle
+loftpermalink
+##→
+187
+martin
+test
+ling
+km
+##め
+15000
+fda
+v3
+##ja
+##ロ
+ｗedding
+かある
+outlet
+family
+##ea
+をこ
+##top
+story
+##ness
+salvatore
+##lu
+204
+swift
+215
+room
+している
+oracle
+##ul
+1925
+sam
+b2c
+week
+pi
+rock
+##のは
+##ａ
+##けと
+##ean
+##300
+##gle
+cctv
+after
+chinese
+##back
+powered
+x2
+##tan
+1918
+##nes
+##イン
+canon
+only
+181
+##zi
+##las
+say
+##oe
+184
+##sd
+221
+##bot
+##world
+##zo
+sky
+made
+top100
+just
+1926
+pmi
+802
+234
+gap
+##vr
+177
+les
+174
+▲topoct
+ball
+vogue
+vi
+ing
+ofweek
+cos
+##list
+##ort
+▲topmay
+##なら
+##lon
+として
+last
+##tc
+##of
+##bus
+##gen
+real
+eva
+##コ
+a3
+nas
+##lie
+##ria
+##coin
+##bt
+▲topapr
+his
+212
+cat
+nata
+vive
+health
+⋯⋯
+drive
+sir
+▲topmar
+du
+cup
+##カー
+##ook
+##よう
+##sy
+alex
+msg
+tour
+しました
+3ce
+##word
+193
+ebooks
+r8
+block
+318
+##より
+2200
+nice
+pvp
+207
+months
+1905
+rewards
+##ther
+1917
+0800
+##xi
+##チ
+##sc
+micro
+850
+gg
+blogfp
+op
+1922
+daily
+m1
+264
+true
+##bb
+ml
+##tar
+##のお
+##ky
+anthony
+196
+253
+##yo
+state
+218
+##ara
+##aa
+##rc
+##tz
+##ston
+より
+gear
+##eo
+##ade
+ge
+see
+1923
+##win
+##ura
+ss
+heart
+##den
+##ita
+down
+##sm
+el
+png
+2100
+610
+rakuten
+whatsapp
+bay
+dream
+add
+##use
+680
+311
+pad
+gucci
+mpv
+##ode
+##fo
+island
+▲topjun
+##▼
+223
+jason
+214
+chicago
+##❤
+しの
+##hone
+io
+##れる
+##ことか
+sogo
+be2
+##ology
+990
+cloud
+vcd
+##con
+2～3
+##ford
+##joy
+##kb
+##こさいます
+##rade
+but
+##ach
+docker
+##ful
+rfid
+ul
+##ase
+hit
+ford
+##star
+580
+##○
+１１
+a2
+sdk
+reading
+edited
+##are
+cmos
+##mc
+238
+siri
+light
+##ella
+##ため
+bloomberg
+##read
+pizza
+##ison
+jimmy
+##vm
+college
+node
+journal
+ba
+18k
+##play
+245
+##cer
+２０
+magic
+##yu
+191
+jump
+288
+tt
+##ings
+asr
+##lia
+3200
+step5
+network
+##cd
+mc
+いします
+1234
+pixstyleme
+273
+##600
+2800
+money
+★★★★★
+1280
+１２
+430
+bl
+みの
+act
+##tus
+tokyo
+##rial
+##life
+emba
+##ae
+saas
+tcs
+##rk
+##wang
+summer
+##sp
+ko
+##ving
+390
+premium
+##その
+netflix
+##ヒ
+uk
+mt
+##lton
+right
+frank
+two
+209
+える
+##ple
+##cal
+021
+##んな
+##sen
+##ville
+hold
+nexus
+dd
+##ius
+てお
+##mah
+##なく
+tila
+zero
+820
+ce
+##tin
+resort
+##ws
+charles
+old
+p10
+5d
+report
+##360
+##ru
+##には
+bus
+vans
+lt
+##est
+pv
+##レ
+links
+rebecca
+##ツ
+##dm
+azure
+##365
+きな
+limited
+bit
+4gb
+##mon
+1910
+moto
+##eam
+213
+1913
+var
+eos
+なとの
+226
+blogspot
+された
+699
+e3
+dos
+dm
+fc
+##ments
+##ik
+##kw
+boy
+##bin
+##ata
+960
+er
+##せ
+219
+##vin
+##tu
+##ula
+194
+##∥
+station
+##ろ
+##ature
+835
+files
+zara
+hdr
+top10
+nature
+950
+magazine
+s6
+marriott
+##シ
+avira
+case
+##っと
+tab
+##ran
+tony
+##home
+oculus
+im
+##ral
+jean
+saint
+cry
+307
+rosie
+##force
+##ini
+ice
+##bert
+のある
+##nder
+##mber
+pet
+2600
+##◆
+plurk
+▲topdec
+##sis
+00kg
+▲topnov
+720
+##ence
+tim
+##ω
+##nc
+##ても
+##name
+log
+ips
+great
+ikea
+malaysia
+unix
+##イト
+3600
+##ncy
+##nie
+12000
+akb48
+##ye
+##oid
+404
+##chi
+##いた
+oa
+xuehai
+##1000
+##orm
+##rf
+275
+さん
+##ware
+##リー
+980
+ho
+##pro
+text
+##era
+560
+bob
+227
+##ub
+##2008
+8891
+scp
+avi
+##zen
+2022
+mi
+wu
+museum
+qvod
+apache
+lake
+jcb
+▲topaug
+★★★
+ni
+##hr
+hill
+302
+ne
+weibo
+490
+ruby
+##ーシ
+##ヶ
+##row
+4d
+▲topjul
+iv
+##ish
+github
+306
+mate
+312
+##スト
+##lot
+##ane
+andrew
+のハイト
+##tina
+t1
+rf
+ed2k
+##vel
+##900
+way
+final
+りの
+ns
+5a
+705
+197
+##メ
+sweet
+bytes
+##ene
+▲topjan
+231
+##cker
+##2007
+##px
+100g
+topapp
+229
+helpapp
+rs
+low
+14k
+g4g
+care
+630
+ldquo
+あり
+##fork
+leave
+rm
+edition
+##gan
+##zon
+##qq
+▲topsep
+##google
+##ism
+gold
+224
+explorer
+##zer
+toyota
+category
+select
+visual
+##labels
+restaurant
+##md
+posts
+s1
+##ico
+もっと
+angelababy
+123456
+217
+sports
+s3
+mbc
+1915
+してくたさい
+shell
+x86
+candy
+##new
+kbs
+face
+xl
+470
+##here
+4a
+swissinfo
+v8
+▲topfeb
+dram
+##ual
+##vice
+3a
+##wer
+sport
+q1
+ios10
+public
+int
+card
+##ｃ
+ep
+au
+rt
+##れた
+1080
+bill
+##mll
+kim
+３０
+460
+wan
+##uk
+##ミ
+x3
+298
+0t
+scott
+##ming
+239
+e5
+##3d
+h7n9
+worldcat
+brown
+##あります
+##vo
+##led
+##580
+##ax
+249
+410
+##ert
+paris
+##～6
+polo
+925
+##lr
+599
+##ナ
+capital
+##hing
+bank
+cv
+1g
+##chat
+##ｓ
+##たい
+adc
+##ule
+2m
+##ｅ
+digital
+hotmail
+268
+##pad
+870
+bbq
+quot
+##ring
+before
+wali
+##まて
+mcu
+2k
+2b
+という
+costco
+316
+north
+333
+switch
+##city
+##ｐ
+philips
+##mann
+management
+panasonic
+##cl
+##vd
+##ping
+##rge
+alice
+##lk
+##ましょう
+css3
+##ney
+vision
+alpha
+##ular
+##400
+##tter
+lz
+にお
+##ありません
+mode
+gre
+1916
+pci
+##tm
+237
+1～2
+##yan
+##そ
+について
+##let
+##キ
+work
+war
+coach
+ah
+mary
+##ᅵ
+huang
+##pt
+a8
+pt
+follow
+##berry
+1895
+##ew
+a5
+ghost
+##ション
+##wn
+##og
+south
+##code
+girls
+##rid
+action
+villa
+git
+r11
+table
+games
+##cket
+error
+##anonymoussaid
+##ag
+here
+##ame
+##gc
+qa
+##■
+##lis
+gmp
+##gin
+vmalife
+##cher
+yu
+wedding
+##tis
+demo
+dragon
+530
+soho
+social
+bye
+##rant
+river
+orz
+acer
+325
+##↑
+##ース
+##ats
+261
+del
+##ven
+440
+ups
+##ように
+##ター
+305
+value
+macd
+yougou
+##dn
+661
+##ano
+ll
+##urt
+##rent
+continue
+script
+##wen
+##ect
+paper
+263
+319
+shift
+##chel
+##フト
+##cat
+258
+x5
+fox
+243
+##さん
+car
+aaa
+##blog
+loading
+##yn
+##tp
+kuso
+799
+si
+sns
+イカせるテンマ
+ヒンクテンマ3
+rmb
+vdc
+forest
+central
+prime
+help
+ultra
+##rmb
+##ような
+241
+square
+688
+##しい
+のないフロクに
+##field
+##reen
+##ors
+##ju
+c1
+start
+510
+##air
+##map
+cdn
+##wo
+cba
+stephen
+m8
+100km
+##get
+opera
+##base
+##ood
+vsa
+com™
+##aw
+##ail
+251
+なのて
+count
+t2
+##ᅡ
+##een
+2700
+hop
+##gp
+vsc
+tree
+##eg
+##ose
+816
+285
+##ories
+##shop
+alphago
+v4
+1909
+simon
+##ᆼ
+fluke62max
+zip
+スホンサー
+##sta
+louis
+cr
+bas
+##～10
+bc
+##yer
+hadoop
+##ube
+##wi
+1906
+0755
+hola
+##low
+place
+centre
+5v
+d3
+##fer
+252
+##750
+##media
+281
+540
+0l
+exchange
+262
+series
+##ハー
+##san
+eb
+##bank
+##ｋ
+q3
+##nge
+##mail
+take
+##lp
+259
+1888
+client
+east
+cache
+event
+vincent
+##ールを
+きを
+##nse
+sui
+855
+adchoice
+##и
+##stry
+##なたの
+246
+##zone
+ga
+apps
+sea
+##ab
+248
+cisco
+##タ
+##rner
+kymco
+##care
+dha
+##pu
+##yi
+minkoff
+royal
+p1
+への
+annie
+269
+collection
+kpi
+playstation
+257
+になります
+866
+bh
+##bar
+queen
+505
+radio
+1904
+andy
+armani
+##xy
+manager
+iherb
+##ery
+##share
+spring
+raid
+johnson
+1908
+##ob
+volvo
+hall
+##ball
+v6
+our
+taylor
+##hk
+bi
+242
+##cp
+kate
+bo
+water
+technology
+##rie
+サイトは
+277
+##ona
+##sl
+hpv
+303
+gtx
+hip
+rdquo
+jayz
+stone
+##lex
+##rum
+namespace
+##やり
+620
+##ale
+##atic
+des
+##erson
+##ql
+##ves
+##type
+enter
+##この
+##てきます
+d2
+##168
+##mix
+##bian
+との
+a9
+jj
+ky
+##lc
+access
+movie
+##hc
+リストに
+tower
+##ration
+##mit
+ます
+##nch
+ua
+tel
+prefix
+##o2
+1907
+##point
+1901
+ott
+～10
+##http
+##ury
+baidu
+##ink
+member
+##logy
+bigbang
+nownews
+##js
+##shot
+##tb
+##こと
+247
+eba
+##tics
+##lus
+ける
+v5
+spark
+##ama
+there
+##ions
+god
+##lls
+##down
+hiv
+##ress
+burberry
+day2
+##kv
+◆◆
+jeff
+related
+film
+edit
+joseph
+283
+##ark
+cx
+32gb
+order
+g9
+30000
+##ans
+##tty
+s5
+##bee
+かあります
+thread
+xr
+buy
+sh
+005
+land
+spotify
+mx
+##ari
+276
+##verse
+×email
+sf
+why
+##ことて
+244
+7headlines
+nego
+sunny
+dom
+exo
+401
+666
+positioning
+fit
+rgb
+##tton
+278
+kiss
+alexa
+adam
+lp
+みリストを
+##ｇ
+mp
+##ties
+##llow
+amy
+##du
+np
+002
+institute
+271
+##rth
+##lar
+2345
+590
+##des
+sidebar
+１５
+imax
+site
+##cky
+##kit
+##ime
+##009
+season
+323
+##fun
+##ンター
+##ひ
+gogoro
+a7
+pu
+lily
+fire
+twd600
+##ッセーシを
+いて
+##vis
+30ml
+##cture
+##をお
+information
+##オ
+close
+friday
+##くれる
+yi
+nick
+てすか
+##tta
+##tel
+6500
+##lock
+cbd
+economy
+254
+かお
+267
+tinker
+double
+375
+8gb
+voice
+##app
+oops
+channel
+today
+985
+##right
+raw
+xyz
+##＋
+jim
+edm
+##cent
+7500
+supreme
+814
+ds
+##its
+##asia
+dropbox
+##てすか
+##tti
+books
+272
+100ml
+##tle
+##ller
+##ken
+##more
+##boy
+sex
+309
+##dom
+t3
+##ider
+##なります
+##unch
+1903
+810
+feel
+5500
+##かった
+##put
+により
+s2
+mo
+##gh
+men
+ka
+amoled
+div
+##tr
+##n1
+port
+howard
+##tags
+ken
+dnf
+##nus
+adsense
+##а
+ide
+##へ
+buff
+thunder
+##town
+##ique
+has
+##body
+auto
+pin
+##erry
+tee
+てした
+295
+number
+##the
+##013
+object
+psp
+cool
+udnbkk
+16gb
+##mic
+miui
+##tro
+most
+r2
+##alk
+##nity
+1880
+±0
+##いました
+428
+s4
+law
+version
+##oa
+n1
+sgs
+docomo
+##tf
+##ack
+henry
+fc2
+##ded
+##sco
+##014
+##rite
+286
+0mm
+linkedin
+##ada
+##now
+wii
+##ndy
+ucbug
+##◎
+sputniknews
+legalminer
+##ika
+##xp
+2gb
+##bu
+q10
+oo
+b6
+come
+##rman
+cheese
+ming
+maker
+##gm
+nikon
+##fig
+ppi
+kelly
+##ります
+jchere
+てきます
+ted
+md
+003
+fgo
+tech
+##tto
+dan
+soc
+##gl
+##len
+hair
+earth
+640
+521
+img
+##pper
+##a1
+##てきる
+##ロク
+acca
+##ition
+##ference
+suite
+##ig
+outlook
+##mond
+##cation
+398
+##pr
+279
+101vip
+358
+##999
+282
+64gb
+3800
+345
+airport
+##over
+284
+##おり
+jones
+##ith
+lab
+##su
+##いるのて
+co2
+town
+piece
+##llo
+no1
+vmware
+24h
+##qi
+focus
+reader
+##admin
+##ora
+tb
+false
+##log
+1898
+know
+lan
+838
+##ces
+f4
+##ume
+motel
+stop
+##oper
+na
+flickr
+netcomponents
+##af
+##─
+pose
+williams
+local
+##ound
+##cg
+##site
+##iko
+いお
+274
+5m
+gsm
+con
+##ath
+1902
+friends
+##hip
+cell
+317
+##rey
+780
+cream
+##cks
+012
+##dp
+facebooktwitterpinterestgoogle
+sso
+324
+shtml
+song
+swiss
+##mw
+##キンク
+lumia
+xdd
+string
+tiffany
+522
+marc
+られた
+insee
+russell
+sc
+dell
+##ations
+ｏｋ
+camera
+289
+##vs
+##flow
+##late
+classic
+287
+##nter
+stay
+g1
+mtv
+512
+##ever
+##lab
+##nger
+qe
+sata
+ryan
+d1
+50ml
+cms
+##cing
+su
+292
+3300
+editor
+296
+##nap
+security
+sunday
+association
+##ens
+##700
+##bra
+acg
+##かり
+sofascore
+とは
+mkv
+##ign
+jonathan
+gary
+build
+labels
+##oto
+tesla
+moba
+qi
+gohappy
+general
+ajax
+1024
+##かる
+サイト
+society
+##test
+##urs
+wps
+fedora
+##ich
+mozilla
+328
+##480
+##dr
+usa
+urn
+##lina
+##ｒ
+grace
+##die
+##try
+##ader
+1250
+##なり
+elle
+570
+##chen
+##ᆯ
+price
+##ten
+uhz
+##ough
+eq
+##hen
+states
+push
+session
+balance
+wow
+506
+##cus
+##py
+when
+##ward
+##ep
+34e
+wong
+library
+prada
+##サイト
+##cle
+running
+##ree
+313
+ck
+date
+q4
+##ctive
+##ool
+##＞
+mk
+##ira
+##163
+388
+die
+secret
+rq
+dota
+buffet
+は１ヶ
+e6
+##ez
+pan
+368
+ha
+##card
+##cha
+2a
+##さ
+alan
+day3
+eye
+f3
+##end
+france
+keep
+adi
+rna
+tvbs
+##ala
+solo
+nova
+##え
+##tail
+##ょう
+support
+##ries
+##なる
+##ved
+base
+copy
+iis
+fps
+##ways
+hero
+hgih
+profile
+fish
+mu
+ssh
+entertainment
+chang
+##wd
+click
+cake
+##ond
+pre
+##tom
+kic
+pixel
+##ov
+##fl
+product
+6a
+##pd
+dear
+##gate
+es
+yumi
+audio
+##²
+##sky
+echo
+bin
+where
+##ture
+329
+##ape
+find
+sap
+isis
+##なと
+nand
+##101
+##load
+##ream
+band
+a6
+525
+never
+##post
+festival
+50cm
+##we
+555
+guide
+314
+zenfone
+##ike
+335
+gd
+forum
+jessica
+strong
+alexander
+##ould
+software
+allen
+##ious
+program
+360°
+else
+lohasthree
+##gar
+することかてきます
+please
+##れます
+rc
+##ggle
+##ric
+bim
+50000
+##own
+eclipse
+355
+brian
+3ds
+##side
+061
+361
+##other
+##ける
+##tech
+##ator
+485
+engine
+##ged
+##ｔ
+plaza
+##fit
+cia
+ngo
+westbrook
+shi
+tbs
+50mm
+##みませんか
+sci
+291
+reuters
+##ily
+contextlink
+##hn
+af
+##cil
+bridge
+very
+##cel
+1890
+cambridge
+##ize
+15g
+##aid
+##data
+790
+frm
+##head
+award
+butler
+##sun
+meta
+##mar
+america
+ps3
+puma
+pmid
+##すか
+lc
+670
+kitchen
+##lic
+オーフン5
+きなしソフトサーヒス
+そして
+day1
+future
+★★★★
+##text
+##page
+##rris
+pm1
+##ket
+fans
+##っています
+1001
+christian
+bot
+kids
+trackback
+##hai
+c3
+display
+##hl
+n2
+1896
+idea
+さんも
+##sent
+airmail
+##ug
+##men
+pwm
+けます
+028
+##lution
+369
+852
+awards
+schemas
+354
+asics
+wikipedia
+font
+##tional
+##vy
+c2
+293
+##れている
+##dget
+##ein
+っている
+contact
+pepper
+スキル
+339
+##～5
+294
+##uel
+##ument
+730
+##hang
+みてす
+q5
+##sue
+rain
+##ndi
+wei
+swatch
+##cept
+わせ
+331
+popular
+##ste
+##tag
+p2
+501
+trc
+1899
+##west
+##live
+justin
+honda
+ping
+messenger
+##rap
+v9
+543
+##とは
+unity
+appqq
+はすへて
+025
+leo
+##tone
+##テ
+##ass
+uniqlo
+##010
+502
+her
+jane
+memory
+moneydj
+##tical
+human
+12306
+していると
+##m2
+coc
+miacare
+##mn
+tmt
+##core
+vim
+kk
+##may
+fan
+target
+use
+too
+338
+435
+2050
+867
+737
+fast
+##2c
+services
+##ope
+omega
+energy
+##わ
+pinkoi
+1a
+##なから
+##rain
+jackson
+##ement
+##シャンルの
+374
+366
+そんな
+p9
+rd
+##ᆨ
+1111
+##tier
+##vic
+zone
+##│
+385
+690
+dl
+isofix
+cpa
+m4
+322
+kimi
+めて
+davis
+##lay
+lulu
+##uck
+050
+weeks
+qs
+##hop
+920
+##ｎ
+ae
+##ear
+～5
+eia
+405
+##fly
+korea
+jpeg
+boost
+##ship
+small
+##リア
+1860
+eur
+297
+425
+valley
+##iel
+simple
+##ude
+rn
+k2
+##ena
+されます
+non
+patrick
+しているから
+##ナー
+feed
+5757
+30g
+process
+well
+qqmei
+##thing
+they
+aws
+lu
+pink
+##ters
+##kin
+または
+board
+##vertisement
+wine
+##ien
+unicode
+##dge
+r1
+359
+##tant
+いを
+##twitter
+##3c
+cool1
+される
+##れて
+##ｌ
+isp
+##012
+standard
+45㎡2
+402
+##150
+matt
+##fu
+326
+##iner
+googlemsn
+pixnetfacebookyahoo
+##ラン
+x7
+886
+##uce
+メーカー
+sao
+##ev
+##きました
+##file
+9678
+403
+xddd
+shirt
+6l
+##rio
+##hat
+3mm
+givenchy
+ya
+bang
+##lio
+monday
+crystal
+ロクイン
+##abc
+336
+head
+890
+ubuntuforumwikilinuxpastechat
+##vc
+##～20
+##rity
+cnc
+7866
+ipv6
+null
+1897
+##ost
+yang
+imsean
+tiger
+##fet
+##ンス
+352
+##＝
+dji
+327
+ji
+maria
+##come
+##んて
+foundation
+3100
+##beth
+##なった
+1m
+601
+active
+##aft
+##don
+3p
+sr
+349
+emma
+##khz
+living
+415
+353
+1889
+341
+709
+457
+sas
+x6
+##face
+pptv
+x4
+##mate
+han
+sophie
+##jing
+337
+fifa
+##mand
+other
+sale
+inwedding
+##gn
+てきちゃいます
+##mmy
+##pmlast
+bad
+nana
+nbc
+してみてくたさいね
+なとはお
+##wu
+##かあります
+##あ
+note7
+single
+##340
+せからこ
+してくたさい♪この
+しにはとんとんワークケートを
+するとあなたにもっとマッチした
+ならワークケートへ
+もみつかっちゃうかも
+ワークケートの
+##bel
+window
+##dio
+##ht
+union
+age
+382
+１４
+##ivity
+##ｙ
+コメント
+domain
+neo
+##isa
+##lter
+5k
+f5
+steven
+##cts
+powerpoint
+tft
+self
+g2
+ft
+##テル
+zol
+##act
+mwc
+381
+343
+もう
+nbapop
+408
+てある
+eds
+ace
+##room
+previous
+author
+tomtom
+il
+##ets
+hu
+financial
+☆☆☆
+っています
+bp
+5t
+chi
+1gb
+##hg
+fairmont
+cross
+008
+gay
+h2
+function
+##けて
+356
+also
+1b
+625
+##ータ
+##raph
+1894
+3～5
+##ils
+i3
+334
+avenue
+##host
+による
+##bon
+##tsu
+message
+navigation
+50g
+fintech
+h6
+##ことを
+8cm
+##ject
+##vas
+##firm
+credit
+##wf
+xxxx
+form
+##nor
+##space
+huawei
+plan
+json
+sbl
+##dc
+machine
+921
+392
+wish
+##120
+##sol
+windows7
+edward
+##ために
+development
+washington
+##nsis
+lo
+818
+##sio
+##ym
+##bor
+planet
+##～8
+##wt
+ieee
+gpa
+##めて
+camp
+ann
+gm
+##tw
+##oka
+connect
+##rss
+##work
+##atus
+wall
+chicken
+soul
+2mm
+##times
+fa
+##ather
+##cord
+009
+##eep
+hitachi
+gui
+harry
+##pan
+e1
+disney
+##press
+##ーション
+wind
+386
+frigidaire
+##tl
+liu
+hsu
+332
+basic
+von
+ev
+いた
+てきる
+スホンサーサイト
+learning
+##ull
+expedia
+archives
+change
+##wei
+santa
+cut
+ins
+6gb
+turbo
+brand
+cf1
+508
+004
+return
+747
+##rip
+h1
+##nis
+##をこ
+128gb
+##にお
+3t
+application
+しており
+emc
+rx
+##oon
+384
+quick
+412
+15058
+wilson
+wing
+chapter
+##bug
+beyond
+##cms
+##dar
+##oh
+zoom
+e2
+trip
+sb
+##nba
+rcep
+342
+aspx
+ci
+080
+gc
+gnu
+める
+##count
+advanced
+dance
+dv
+##url
+##ging
+367
+8591
+am09
+shadow
+battle
+346
+##ｉ
+##cia
+##という
+emily
+##のてす
+##tation
+host
+ff
+techorz
+sars
+##mini
+##mporary
+##ering
+nc
+4200
+798
+##next
+cma
+##mbps
+##gas
+##ift
+##dot
+##ィ
+455
+##～17
+amana
+##りの
+426
+##ros
+ir
+00㎡1
+##eet
+##ible
+##↓
+710
+ˋ▽ˊ
+##aka
+dcs
+iq
+##ｖ
+l1
+##lor
+maggie
+##011
+##iu
+588
+##～1
+830
+##gt
+1tb
+articles
+create
+##burg
+##iki
+database
+fantasy
+##rex
+##cam
+dlc
+dean
+##you
+hard
+path
+gaming
+victoria
+maps
+cb
+##lee
+##itor
+overchicstoretvhome
+systems
+##xt
+416
+p3
+sarah
+760
+##nan
+407
+486
+x9
+install
+second
+626
+##ann
+##ph
+##rcle
+##nic
+860
+##nar
+ec
+##とう
+768
+metro
+chocolate
+##rian
+～4
+##table
+##しています
+skin
+##sn
+395
+mountain
+##0mm
+inparadise
+6m
+7x24
+ib
+4800
+##jia
+eeworld
+creative
+g5
+g3
+357
+parker
+ecfa
+village
+からの
+18000
+sylvia
+サーヒス
+hbl
+##ques
+##onsored
+##x2
+##きます
+##v4
+##tein
+ie6
+383
+##stack
+389
+ver
+##ads
+##baby
+sound
+bbe
+##110
+##lone
+##uid
+ads
+022
+gundam
+351
+thinkpad
+006
+scrum
+match
+##ave
+mems
+##470
+##oy
+##なりました
+##talk
+glass
+lamigo
+span
+##eme
+job
+##a5
+jay
+wade
+kde
+498
+##lace
+ocean
+tvg
+##covery
+##r3
+##ners
+##rea
+junior
+think
+##aine
+cover
+##ision
+##sia
+↓↓
+##bow
+msi
+413
+458
+406
+##love
+711
+801
+soft
+z2
+##pl
+456
+1840
+mobil
+mind
+##uy
+427
+nginx
+##oi
+めた
+##rr
+6221
+##mple
+##sson
+##ーシてす
+371
+##nts
+91tv
+comhd
+crv3000
+##uard
+1868
+397
+deep
+lost
+field
+gallery
+##bia
+rate
+spf
+redis
+traction
+930
+icloud
+011
+なら
+fe
+jose
+372
+##tory
+into
+sohu
+fx
+899
+379
+kicstart2
+##hia
+すく
+##～3
+##sit
+ra
+２４
+##walk
+##xure
+500g
+##pact
+pacific
+xa
+natural
+carlo
+##250
+##walker
+1850
+##can
+cto
+gigi
+516
+##サー
+pen
+##hoo
+ob
+matlab
+##ｂ
+##yy
+13913459
+##iti
+mango
+##bbs
+sense
+c5
+oxford
+##ニア
+walker
+jennifer
+##ola
+course
+##bre
+701
+##pus
+##rder
+lucky
+075
+##ぁ
+ivy
+なお
+##nia
+sotheby
+side
+##ugh
+joy
+##orage
+##ush
+##bat
+##dt
+364
+r9
+##2d
+##gio
+511
+country
+wear
+##lax
+##～7
+##moon
+393
+seven
+study
+411
+348
+lonzo
+8k
+##ェ
+evolution
+##イフ
+##kk
+gs
+kd
+##レス
+arduino
+344
+b12
+##lux
+arpg
+##rdon
+cook
+##x5
+dark
+five
+##als
+##ida
+とても
+sign
+362
+##ちの
+something
+20mm
+##nda
+387
+##posted
+fresh
+tf
+1870
+422
+cam
+##mine
+##skip
+##form
+##ssion
+education
+394
+##tee
+dyson
+stage
+##jie
+want
+##night
+epson
+pack
+あります
+##ppy
+テリヘル
+##█
+wd
+##eh
+##rence
+left
+##lvin
+golden
+mhz
+discovery
+##trix
+##n2
+loft
+##uch
+##dra
+##sse
+speed
+～1
+1mdb
+sorry
+welcome
+##urn
+wave
+gaga
+##lmer
+teddy
+##160
+トラックハック
+せよ
+611
+##f2016
+378
+rp
+##sha
+rar
+##あなたに
+##きた
+840
+holiday
+##ュー
+373
+074
+##vg
+##nos
+##rail
+gartner
+gi
+6p
+##dium
+kit
+488
+b3
+eco
+##ろう
+20g
+sean
+##stone
+autocad
+nu
+##np
+f16
+write
+029
+m5
+##ias
+images
+atp
+##dk
+fsm
+504
+1350
+ve
+52kb
+##xxx
+##のに
+##cake
+414
+unit
+lim
+ru
+1v
+##ification
+published
+angela
+16g
+analytics
+ak
+##ｑ
+##nel
+gmt
+##icon
+again
+##₂
+##bby
+ios11
+445
+かこさいます
+waze
+いてす
+##ハ
+9985
+##ust
+##ティー
+framework
+##007
+iptv
+delete
+52sykb
+cl
+wwdc
+027
+30cm
+##fw
+##ての
+1389
+##xon
+brandt
+##ses
+##dragon
+tc
+vetements
+anne
+monte
+modern
+official
+##へて
+##ere
+##nne
+##oud
+もちろん
+５０
+etnews
+##a2
+##graphy
+421
+863
+##ちゃん
+444
+##rtex
+##てお
+l2
+##gma
+mount
+ccd
+たと
+archive
+morning
+tan
+ddos
+e7
+##ホ
+day4
+##ウ
+gis
+453
+its
+495
+factory
+bruce
+pg
+##ito
+ってくたさい
+guest
+cdma
+##lling
+536
+n3
+しかし
+3～4
+mega
+eyes
+ro
+１３
+women
+dac
+church
+##jun
+singapore
+##facebook
+6991
+starbucks
+##tos
+##stin
+##shine
+zen
+##mu
+tina
+20℃
+1893
+##たけて
+503
+465
+request
+##gence
+qt
+##っ
+1886
+347
+363
+q7
+##zzi
+diary
+##tore
+409
+##ead
+468
+cst
+##osa
+canada
+agent
+va
+##jiang
+##ちは
+##ーク
+##lam
+sg
+##nix
+##sday
+##よって
+g6
+##master
+bing
+##zl
+charlie
+１６
+8mm
+nb40
+##ーン
+thai
+##ルフ
+ln284ct
+##itz
+##2f
+bonnie
+##food
+##lent
+originals
+##stro
+##lts
+418
+∟∣
+##bscribe
+children
+ntd
+yesstyle
+##かも
+hmv
+##tment
+d5
+2cm
+arts
+sms
+##pn
+##я
+##いい
+topios9
+539
+lifestyle
+virtual
+##ague
+xz
+##deo
+muji
+024
+unt
+##nnis
+##ᅩ
+faq1
+1884
+396
+##ette
+fly
+64㎡
+はしめまして
+441
+curry
+##pop
+のこ
+release
+##←
+##◆◆
+##cast
+073
+ありな
+500ml
+##ews
+5c
+##stle
+ios7
+##ima
+787
+dog
+lenovo
+##r4
+roger
+013
+cbs
+vornado
+100m
+417
+##desk
+##クok
+##ald
+1867
+9595
+2900
+##van
+oil
+##ｘ
+some
+break
+common
+##jy
+##lines
+g7
+twice
+419
+ella
+nano
+belle
+にこ
+##mes
+##self
+##note
+jb
+##ことかてきます
+benz
+##との
+##ova
+451
+save
+##wing
+##ますのて
+kai
+りは
+##hua
+##rect
+rainer
+##unge
+448
+##0m
+adsl
+##かな
+guestname
+##uma
+##kins
+##zu
+tokichoi
+##price
+county
+##med
+##mus
+rmk
+391
+address
+vm
+えて
+openload
+##group
+##hin
+##iginal
+amg
+urban
+##oz
+jobs
+emi
+##public
+beautiful
+##sch
+album
+##dden
+##bell
+jerry
+works
+hostel
+miller
+##drive
+##rmin
+##１０
+376
+boot
+828
+##370
+##fx
+##cm～
+1885
+##nome
+##ctionary
+##oman
+##lish
+##cr
+##hm
+433
+##how
+432
+francis
+xi
+c919
+b5
+evernote
+##uc
+vga
+##3000
+coupe
+##urg
+##cca
+##uality
+019
+6g
+れる
+multi
+##また
+##ett
+em
+hey
+##ani
+##tax
+##rma
+inside
+than
+740
+leonnhurt
+##jin
+ict
+れた
+bird
+notes
+200mm
+くの
+##dical
+##lli
+result
+442
+iu
+ee
+438
+smap
+gopro
+##last
+yin
+pure
+998
+32g
+けた
+5kg
+##dan
+##rame
+mama
+##oot
+bean
+marketing
+##hur
+2l
+bella
+sync
+xuite
+##ground
+515
+discuz
+##getrelax
+##ince
+##bay
+##5s
+cj
+##イス
+gmat
+apt
+##pass
+jing
+##rix
+c4
+rich
+##とても
+niusnews
+##ello
+bag
+770
+##eting
+##mobile
+１８
+culture
+015
+##のてすか
+377
+1020
+area
+##ience
+616
+details
+gp
+universal
+silver
+dit
+はお
+private
+ddd
+u11
+kanshu
+##ified
+fung
+##nny
+dx
+##520
+tai
+475
+023
+##fr
+##lean
+3s
+##pin
+429
+##rin
+25000
+ly
+rick
+##bility
+usb3
+banner
+##baru
+##gion
+metal
+dt
+vdf
+1871
+karl
+qualcomm
+bear
+1010
+oldid
+ian
+jo
+##tors
+population
+##ernel
+1882
+mmorpg
+##mv
+##bike
+603
+##©
+ww
+friend
+##ager
+exhibition
+##del
+##pods
+fpx
+structure
+##free
+##tings
+kl
+##rley
+##copyright
+##mma
+california
+3400
+orange
+yoga
+4l
+canmake
+honey
+##anda
+##コメント
+595
+nikkie
+##ルハイト
+dhl
+publishing
+##mall
+##gnet
+20cm
+513
+##クセス
+##┅
+e88
+970
+##dog
+fishbase
+##!
+##"
+###
+##$
+##%
+##&
+##'
+##(
+##)
+##*
+##+
+##,
+##-
+##.
+##/
+##:
+##;
+##<
+##=
+##>
+##?
+##@
+##[
+##\
+##]
+##^
+##_
+##{
+##|
+##}
+##~
+##£
+##¤
+##¥
+##§
+##«
+##±
+##³
+##µ
+##·
+##¹
+##º
+##»
+##¼
+##ß
+##æ
+##÷
+##ø
+##đ
+##ŋ
+##ɔ
+##ə
+##ɡ
+##ʰ
+##ˇ
+##ˈ
+##ˊ
+##ˋ
+##ˍ
+##ː
+##˙
+##˚
+##ˢ
+##α
+##β
+##γ
+##δ
+##ε
+##η
+##θ
+##ι
+##κ
+##λ
+##μ
+##ν
+##ο
+##π
+##ρ
+##ς
+##σ
+##τ
+##υ
+##φ
+##χ
+##ψ
+##б
+##в
+##г
+##д
+##е
+##ж
+##з
+##к
+##л
+##м
+##н
+##о
+##п
+##р
+##с
+##т
+##у
+##ф
+##х
+##ц
+##ч
+##ш
+##ы
+##ь
+##і
+##ا
+##ب
+##ة
+##ت
+##د
+##ر
+##س
+##ع
+##ل
+##م
+##ن
+##ه
+##و
+##ي
+##۩
+##ก
+##ง
+##น
+##ม
+##ย
+##ร
+##อ
+##า
+##เ
+##๑
+##་
+##ღ
+##ᄀ
+##ᄁ
+##ᄂ
+##ᄃ
+##ᄅ
+##ᄆ
+##ᄇ
+##ᄈ
+##ᄉ
+##ᄋ
+##ᄌ
+##ᄎ
+##ᄏ
+##ᄐ
+##ᄑ
+##ᄒ
+##ᅢ
+##ᅣ
+##ᅥ
+##ᅦ
+##ᅧ
+##ᅨ
+##ᅪ
+##ᅬ
+##ᅭ
+##ᅮ
+##ᅯ
+##ᅲ
+##ᅳ
+##ᅴ
+##ᆷ
+##ᆸ
+##ᆺ
+##ᆻ
+##ᗜ
+##ᵃ
+##ᵉ
+##ᵍ
+##ᵏ
+##ᵐ
+##ᵒ
+##ᵘ
+##‖
+##„
+##†
+##•
+##‥
+##‧
+## 
+##‰
+##′
+##″
+##‹
+##›
+##※
+##‿
+##⁄
+##ⁱ
+##⁺
+##ⁿ
+##₁
+##₃
+##₄
+##€
+##№
+##ⅰ
+##ⅱ
+##ⅲ
+##ⅳ
+##ⅴ
+##↔
+##↗
+##↘
+##⇒
+##∀
+##−
+##∕
+##∙
+##√
+##∞
+##∟
+##∠
+##∣
+##∩
+##∮
+##∶
+##∼
+##∽
+##≈
+##≒
+##≡
+##≤
+##≥
+##≦
+##≧
+##≪
+##≫
+##⊙
+##⋅
+##⋈
+##⋯
+##⌒
+##①
+##②
+##③
+##④
+##⑤
+##⑥
+##⑦
+##⑧
+##⑨
+##⑩
+##⑴
+##⑵
+##⑶
+##⑷
+##⑸
+##⒈
+##⒉
+##⒊
+##⒋
+##ⓒ
+##ⓔ
+##ⓘ
+##━
+##┃
+##┆
+##┊
+##┌
+##└
+##├
+##┣
+##═
+##║
+##╚
+##╞
+##╠
+##╭
+##╮
+##╯
+##╰
+##╱
+##╳
+##▂
+##▃
+##▅
+##▇
+##▉
+##▋
+##▌
+##▍
+##▎
+##□
+##▪
+##▫
+##▬
+##△
+##▶
+##►
+##▽
+##◇
+##◕
+##◠
+##◢
+##◤
+##☀
+##☕
+##☞
+##☺
+##☼
+##♀
+##♂
+##♠
+##♡
+##♣
+##♦
+##♫
+##♬
+##✈
+##✔
+##✕
+##✖
+##✦
+##✨
+##✪
+##✰
+##✿
+##❀
+##➜
+##➤
+##⦿
+##、
+##。
+##〃
+##々
+##〇
+##〈
+##〉
+##《
+##》
+##「
+##」
+##『
+##』
+##【
+##】
+##〓
+##〔
+##〕
+##〖
+##〗
+##〜
+##〝
+##〞
+##ぃ
+##ぇ
+##ぬ
+##ふ
+##ほ
+##む
+##ゃ
+##ゅ
+##ゆ
+##ょ
+##゜
+##ゝ
+##ァ
+##ゥ
+##エ
+##ォ
+##ケ
+##サ
+##セ
+##ソ
+##ッ
+##ニ
+##ヌ
+##ネ
+##ノ
+##ヘ
+##モ
+##ャ
+##ヤ
+##ュ
+##ユ
+##ョ
+##ヨ
+##ワ
+##ヲ
+##・
+##ヽ
+##ㄅ
+##ㄆ
+##ㄇ
+##ㄉ
+##ㄋ
+##ㄌ
+##ㄍ
+##ㄎ
+##ㄏ
+##ㄒ
+##ㄚ
+##ㄛ
+##ㄞ
+##ㄟ
+##ㄢ
+##ㄤ
+##ㄥ
+##ㄧ
+##ㄨ
+##ㆍ
+##㈦
+##㊣
+##㗎
+##一
+##丁
+##七
+##万
+##丈
+##三
+##上
+##下
+##不
+##与
+##丐
+##丑
+##专
+##且
+##丕
+##世
+##丘
+##丙
+##业
+##丛
+##东
+##丝
+##丞
+##丟
+##両
+##丢
+##两
+##严
+##並
+##丧
+##丨
+##个
+##丫
+##中
+##丰
+##串
+##临
+##丶
+##丸
+##丹
+##为
+##主
+##丼
+##丽
+##举
+##丿
+##乂
+##乃
+##久
+##么
+##义
+##之
+##乌
+##乍
+##乎
+##乏
+##乐
+##乒
+##乓
+##乔
+##乖
+##乗
+##乘
+##乙
+##乜
+##九
+##乞
+##也
+##习
+##乡
+##书
+##乩
+##买
+##乱
+##乳
+##乾
+##亀
+##亂
+##了
+##予
+##争
+##事
+##二
+##于
+##亏
+##云
+##互
+##五
+##井
+##亘
+##亙
+##亚
+##些
+##亜
+##亞
+##亟
+##亡
+##亢
+##交
+##亥
+##亦
+##产
+##亨
+##亩
+##享
+##京
+##亭
+##亮
+##亲
+##亳
+##亵
+##人
+##亿
+##什
+##仁
+##仃
+##仄
+##仅
+##仆
+##仇
+##今
+##介
+##仍
+##从
+##仏
+##仑
+##仓
+##仔
+##仕
+##他
+##仗
+##付
+##仙
+##仝
+##仞
+##仟
+##代
+##令
+##以
+##仨
+##仪
+##们
+##仮
+##仰
+##仲
+##件
+##价
+##任
+##份
+##仿
+##企
+##伉
+##伊
+##伍
+##伎
+##伏
+##伐
+##休
+##伕
+##众
+##优
+##伙
+##会
+##伝
+##伞
+##伟
+##传
+##伢
+##伤
+##伦
+##伪
+##伫
+##伯
+##估
+##伴
+##伶
+##伸
+##伺
+##似
+##伽
+##佃
+##但
+##佇
+##佈
+##位
+##低
+##住
+##佐
+##佑
+##体
+##佔
+##何
+##佗
+##佘
+##余
+##佚
+##佛
+##作
+##佝
+##佞
+##佟
+##你
+##佢
+##佣
+##佤
+##佥
+##佩
+##佬
+##佯
+##佰
+##佳
+##併
+##佶
+##佻
+##佼
+##使
+##侃
+##侄
+##來
+##侈
+##例
+##侍
+##侏
+##侑
+##侖
+##侗
+##供
+##依
+##侠
+##価
+##侣
+##侥
+##侦
+##侧
+##侨
+##侬
+##侮
+##侯
+##侵
+##侶
+##侷
+##便
+##係
+##促
+##俄
+##俊
+##俎
+##俏
+##俐
+##俑
+##俗
+##俘
+##俚
+##保
+##俞
+##俟
+##俠
+##信
+##俨
+##俩
+##俪
+##俬
+##俭
+##修
+##俯
+##俱
+##俳
+##俸
+##俺
+##俾
+##倆
+##倉
+##個
+##倌
+##倍
+##倏
+##們
+##倒
+##倔
+##倖
+##倘
+##候
+##倚
+##倜
+##借
+##倡
+##値
+##倦
+##倩
+##倪
+##倫
+##倬
+##倭
+##倶
+##债
+##值
+##倾
+##偃
+##假
+##偈
+##偉
+##偌
+##偎
+##偏
+##偕
+##做
+##停
+##健
+##側
+##偵
+##偶
+##偷
+##偻
+##偽
+##偿
+##傀
+##傅
+##傍
+##傑
+##傘
+##備
+##傚
+##傢
+##傣
+##傥
+##储
+##傩
+##催
+##傭
+##傲
+##傳
+##債
+##傷
+##傻
+##傾
+##僅
+##働
+##像
+##僑
+##僕
+##僖
+##僚
+##僥
+##僧
+##僭
+##僮
+##僱
+##僵
+##價
+##僻
+##儀
+##儂
+##億
+##儆
+##儉
+##儋
+##儒
+##儕
+##儘
+##償
+##儡
+##優
+##儲
+##儷
+##儼
+##儿
+##兀
+##允
+##元
+##兄
+##充
+##兆
+##兇
+##先
+##光
+##克
+##兌
+##免
+##児
+##兑
+##兒
+##兔
+##兖
+##党
+##兜
+##兢
+##入
+##內
+##全
+##兩
+##八
+##公
+##六
+##兮
+##兰
+##共
+##兲
+##关
+##兴
+##兵
+##其
+##具
+##典
+##兹
+##养
+##兼
+##兽
+##冀
+##内
+##円
+##冇
+##冈
+##冉
+##冊
+##册
+##再
+##冏
+##冒
+##冕
+##冗
+##写
+##军
+##农
+##冠
+##冢
+##冤
+##冥
+##冨
+##冪
+##冬
+##冯
+##冰
+##冲
+##决
+##况
+##冶
+##冷
+##冻
+##冼
+##冽
+##冾
+##净
+##凄
+##准
+##凇
+##凈
+##凉
+##凋
+##凌
+##凍
+##减
+##凑
+##凛
+##凜
+##凝
+##几
+##凡
+##凤
+##処
+##凪
+##凭
+##凯
+##凰
+##凱
+##凳
+##凶
+##凸
+##凹
+##出
+##击
+##函
+##凿
+##刀
+##刁
+##刃
+##分
+##切
+##刈
+##刊
+##刍
+##刎
+##刑
+##划
+##列
+##刘
+##则
+##刚
+##创
+##初
+##删
+##判
+##別
+##刨
+##利
+##刪
+##别
+##刮
+##到
+##制
+##刷
+##券
+##刹
+##刺
+##刻
+##刽
+##剁
+##剂
+##剃
+##則
+##剉
+##削
+##剋
+##剌
+##前
+##剎
+##剐
+##剑
+##剔
+##剖
+##剛
+##剜
+##剝
+##剣
+##剤
+##剥
+##剧
+##剩
+##剪
+##副
+##割
+##創
+##剷
+##剽
+##剿
+##劃
+##劇
+##劈
+##劉
+##劊
+##劍
+##劏
+##劑
+##力
+##劝
+##办
+##功
+##加
+##务
+##劣
+##动
+##助
+##努
+##劫
+##劭
+##励
+##劲
+##劳
+##労
+##劵
+##効
+##劾
+##势
+##勁
+##勃
+##勇
+##勉
+##勋
+##勐
+##勒
+##動
+##勖
+##勘
+##務
+##勛
+##勝
+##勞
+##募
+##勢
+##勤
+##勧
+##勳
+##勵
+##勸
+##勺
+##勻
+##勾
+##勿
+##匀
+##包
+##匆
+##匈
+##匍
+##匐
+##匕
+##化
+##北
+##匙
+##匝
+##匠
+##匡
+##匣
+##匪
+##匮
+##匯
+##匱
+##匹
+##区
+##医
+##匾
+##匿
+##區
+##十
+##千
+##卅
+##升
+##午
+##卉
+##半
+##卍
+##华
+##协
+##卑
+##卒
+##卓
+##協
+##单
+##卖
+##南
+##単
+##博
+##卜
+##卞
+##卟
+##占
+##卡
+##卢
+##卤
+##卦
+##卧
+##卫
+##卮
+##卯
+##印
+##危
+##即
+##却
+##卵
+##卷
+##卸
+##卻
+##卿
+##厂
+##厄
+##厅
+##历
+##厉
+##压
+##厌
+##厕
+##厘
+##厚
+##厝
+##原
+##厢
+##厥
+##厦
+##厨
+##厩
+##厭
+##厮
+##厲
+##厳
+##去
+##县
+##叁
+##参
+##參
+##又
+##叉
+##及
+##友
+##双
+##反
+##収
+##发
+##叔
+##取
+##受
+##变
+##叙
+##叛
+##叟
+##叠
+##叡
+##叢
+##口
+##古
+##句
+##另
+##叨
+##叩
+##只
+##叫
+##召
+##叭
+##叮
+##可
+##台
+##叱
+##史
+##右
+##叵
+##叶
+##号
+##司
+##叹
+##叻
+##叼
+##叽
+##吁
+##吃
+##各
+##吆
+##合
+##吉
+##吊
+##吋
+##同
+##名
+##后
+##吏
+##吐
+##向
+##吒
+##吓
+##吕
+##吖
+##吗
+##君
+##吝
+##吞
+##吟
+##吠
+##吡
+##否
+##吧
+##吨
+##吩
+##含
+##听
+##吭
+##吮
+##启
+##吱
+##吳
+##吴
+##吵
+##吶
+##吸
+##吹
+##吻
+##吼
+##吽
+##吾
+##呀
+##呂
+##呃
+##呆
+##呈
+##告
+##呋
+##呎
+##呐
+##呓
+##呕
+##呗
+##员
+##呛
+##呜
+##呢
+##呤
+##呦
+##周
+##呱
+##呲
+##味
+##呵
+##呷
+##呸
+##呻
+##呼
+##命
+##咀
+##咁
+##咂
+##咄
+##咆
+##咋
+##和
+##咎
+##咏
+##咐
+##咒
+##咔
+##咕
+##咖
+##咗
+##咘
+##咙
+##咚
+##咛
+##咣
+##咤
+##咦
+##咧
+##咨
+##咩
+##咪
+##咫
+##咬
+##咭
+##咯
+##咱
+##咲
+##咳
+##咸
+##咻
+##咽
+##咿
+##哀
+##品
+##哂
+##哄
+##哆
+##哇
+##哈
+##哉
+##哋
+##哌
+##响
+##哎
+##哏
+##哐
+##哑
+##哒
+##哔
+##哗
+##哟
+##員
+##哥
+##哦
+##哧
+##哨
+##哩
+##哪
+##哭
+##哮
+##哲
+##哺
+##哼
+##哽
+##唁
+##唄
+##唆
+##唇
+##唉
+##唏
+##唐
+##唑
+##唔
+##唠
+##唤
+##唧
+##唬
+##售
+##唯
+##唰
+##唱
+##唳
+##唷
+##唸
+##唾
+##啃
+##啄
+##商
+##啉
+##啊
+##問
+##啓
+##啕
+##啖
+##啜
+##啞
+##啟
+##啡
+##啤
+##啥
+##啦
+##啧
+##啪
+##啫
+##啬
+##啮
+##啰
+##啱
+##啲
+##啵
+##啶
+##啷
+##啸
+##啻
+##啼
+##啾
+##喀
+##喂
+##喃
+##善
+##喆
+##喇
+##喉
+##喊
+##喋
+##喎
+##喏
+##喔
+##喘
+##喙
+##喚
+##喜
+##喝
+##喟
+##喧
+##喪
+##喫
+##喬
+##單
+##喰
+##喱
+##喲
+##喳
+##喵
+##営
+##喷
+##喹
+##喺
+##喻
+##喽
+##嗅
+##嗆
+##嗇
+##嗎
+##嗑
+##嗒
+##嗓
+##嗔
+##嗖
+##嗚
+##嗜
+##嗝
+##嗟
+##嗡
+##嗣
+##嗤
+##嗦
+##嗨
+##嗪
+##嗬
+##嗯
+##嗰
+##嗲
+##嗳
+##嗶
+##嗷
+##嗽
+##嘀
+##嘅
+##嘆
+##嘈
+##嘉
+##嘌
+##嘍
+##嘎
+##嘔
+##嘖
+##嘗
+##嘘
+##嘚
+##嘛
+##嘜
+##嘞
+##嘟
+##嘢
+##嘣
+##嘤
+##嘧
+##嘩
+##嘭
+##嘮
+##嘯
+##嘰
+##嘱
+##嘲
+##嘴
+##嘶
+##嘸
+##嘹
+##嘻
+##嘿
+##噁
+##噌
+##噎
+##噓
+##噔
+##噗
+##噙
+##噜
+##噠
+##噢
+##噤
+##器
+##噩
+##噪
+##噬
+##噱
+##噴
+##噶
+##噸
+##噹
+##噻
+##噼
+##嚀
+##嚇
+##嚎
+##嚏
+##嚐
+##嚓
+##嚕
+##嚟
+##嚣
+##嚥
+##嚨
+##嚮
+##嚴
+##嚷
+##嚼
+##囂
+##囉
+##囊
+##囍
+##囑
+##囔
+##囗
+##囚
+##四
+##囝
+##回
+##囟
+##因
+##囡
+##团
+##団
+##囤
+##囧
+##囪
+##囫
+##园
+##困
+##囱
+##囲
+##図
+##围
+##囹
+##固
+##国
+##图
+##囿
+##圃
+##圄
+##圆
+##圈
+##國
+##圍
+##圏
+##園
+##圓
+##圖
+##團
+##圜
+##土
+##圣
+##圧
+##在
+##圩
+##圭
+##地
+##圳
+##场
+##圻
+##圾
+##址
+##坂
+##均
+##坊
+##坍
+##坎
+##坏
+##坐
+##坑
+##块
+##坚
+##坛
+##坝
+##坞
+##坟
+##坠
+##坡
+##坤
+##坦
+##坨
+##坪
+##坯
+##坳
+##坵
+##坷
+##垂
+##垃
+##垄
+##型
+##垒
+##垚
+##垛
+##垠
+##垢
+##垣
+##垦
+##垩
+##垫
+##垭
+##垮
+##垵
+##埂
+##埃
+##埋
+##城
+##埔
+##埕
+##埗
+##域
+##埠
+##埤
+##埵
+##執
+##埸
+##培
+##基
+##埼
+##堀
+##堂
+##堃
+##堅
+##堆
+##堇
+##堑
+##堕
+##堙
+##堡
+##堤
+##堪
+##堯
+##堰
+##報
+##場
+##堵
+##堺
+##堿
+##塊
+##塌
+##塑
+##塔
+##塗
+##塘
+##塚
+##塞
+##塢
+##塩
+##填
+##塬
+##塭
+##塵
+##塾
+##墀
+##境
+##墅
+##墉
+##墊
+##墒
+##墓
+##増
+##墘
+##墙
+##墜
+##增
+##墟
+##墨
+##墩
+##墮
+##墳
+##墻
+##墾
+##壁
+##壅
+##壆
+##壇
+##壊
+##壑
+##壓
+##壕
+##壘
+##壞
+##壟
+##壢
+##壤
+##壩
+##士
+##壬
+##壮
+##壯
+##声
+##売
+##壳
+##壶
+##壹
+##壺
+##壽
+##处
+##备
+##変
+##复
+##夏
+##夔
+##夕
+##外
+##夙
+##多
+##夜
+##够
+##夠
+##夢
+##夥
+##大
+##天
+##太
+##夫
+##夭
+##央
+##夯
+##失
+##头
+##夷
+##夸
+##夹
+##夺
+##夾
+##奂
+##奄
+##奇
+##奈
+##奉
+##奋
+##奎
+##奏
+##奐
+##契
+##奔
+##奕
+##奖
+##套
+##奘
+##奚
+##奠
+##奢
+##奥
+##奧
+##奪
+##奬
+##奮
+##女
+##奴
+##奶
+##奸
+##她
+##好
+##如
+##妃
+##妄
+##妆
+##妇
+##妈
+##妊
+##妍
+##妒
+##妓
+##妖
+##妘
+##妙
+##妝
+##妞
+##妣
+##妤
+##妥
+##妨
+##妩
+##妪
+##妮
+##妲
+##妳
+##妹
+##妻
+##妾
+##姆
+##姉
+##姊
+##始
+##姍
+##姐
+##姑
+##姒
+##姓
+##委
+##姗
+##姚
+##姜
+##姝
+##姣
+##姥
+##姦
+##姨
+##姪
+##姫
+##姬
+##姹
+##姻
+##姿
+##威
+##娃
+##娄
+##娅
+##娆
+##娇
+##娉
+##娑
+##娓
+##娘
+##娛
+##娜
+##娟
+##娠
+##娣
+##娥
+##娩
+##娱
+##娲
+##娴
+##娶
+##娼
+##婀
+##婁
+##婆
+##婉
+##婊
+##婕
+##婚
+##婢
+##婦
+##婧
+##婪
+##婭
+##婴
+##婵
+##婶
+##婷
+##婺
+##婿
+##媒
+##媚
+##媛
+##媞
+##媧
+##媲
+##媳
+##媽
+##媾
+##嫁
+##嫂
+##嫉
+##嫌
+##嫑
+##嫔
+##嫖
+##嫘
+##嫚
+##嫡
+##嫣
+##嫦
+##嫩
+##嫲
+##嫵
+##嫻
+##嬅
+##嬉
+##嬌
+##嬗
+##嬛
+##嬢
+##嬤
+##嬪
+##嬰
+##嬴
+##嬷
+##嬸
+##嬿
+##孀
+##孃
+##子
+##孑
+##孔
+##孕
+##孖
+##字
+##存
+##孙
+##孚
+##孛
+##孜
+##孝
+##孟
+##孢
+##季
+##孤
+##学
+##孩
+##孪
+##孫
+##孬
+##孰
+##孱
+##孳
+##孵
+##學
+##孺
+##孽
+##孿
+##宁
+##它
+##宅
+##宇
+##守
+##安
+##宋
+##完
+##宏
+##宓
+##宕
+##宗
+##官
+##宙
+##定
+##宛
+##宜
+##宝
+##实
+##実
+##宠
+##审
+##客
+##宣
+##室
+##宥
+##宦
+##宪
+##宫
+##宮
+##宰
+##害
+##宴
+##宵
+##家
+##宸
+##容
+##宽
+##宾
+##宿
+##寂
+##寄
+##寅
+##密
+##寇
+##富
+##寐
+##寒
+##寓
+##寛
+##寝
+##寞
+##察
+##寡
+##寢
+##寥
+##實
+##寧
+##寨
+##審
+##寫
+##寬
+##寮
+##寰
+##寵
+##寶
+##寸
+##对
+##寺
+##寻
+##导
+##対
+##寿
+##封
+##専
+##射
+##将
+##將
+##專
+##尉
+##尊
+##尋
+##對
+##導
+##小
+##少
+##尔
+##尕
+##尖
+##尘
+##尚
+##尝
+##尤
+##尧
+##尬
+##就
+##尴
+##尷
+##尸
+##尹
+##尺
+##尻
+##尼
+##尽
+##尾
+##尿
+##局
+##屁
+##层
+##屄
+##居
+##屆
+##屈
+##屉
+##届
+##屋
+##屌
+##屍
+##屎
+##屏
+##屐
+##屑
+##展
+##屜
+##属
+##屠
+##屡
+##屢
+##層
+##履
+##屬
+##屯
+##山
+##屹
+##屿
+##岀
+##岁
+##岂
+##岌
+##岐
+##岑
+##岔
+##岖
+##岗
+##岘
+##岙
+##岚
+##岛
+##岡
+##岩
+##岫
+##岬
+##岭
+##岱
+##岳
+##岷
+##岸
+##峇
+##峋
+##峒
+##峙
+##峡
+##峤
+##峥
+##峦
+##峨
+##峪
+##峭
+##峯
+##峰
+##峴
+##島
+##峻
+##峽
+##崁
+##崂
+##崆
+##崇
+##崎
+##崑
+##崔
+##崖
+##崗
+##崙
+##崛
+##崧
+##崩
+##崭
+##崴
+##崽
+##嵇
+##嵊
+##嵋
+##嵌
+##嵐
+##嵘
+##嵩
+##嵬
+##嵯
+##嶂
+##嶄
+##嶇
+##嶋
+##嶙
+##嶺
+##嶼
+##嶽
+##巅
+##巍
+##巒
+##巔
+##巖
+##川
+##州
+##巡
+##巢
+##工
+##左
+##巧
+##巨
+##巩
+##巫
+##差
+##己
+##已
+##巳
+##巴
+##巷
+##巻
+##巽
+##巾
+##巿
+##币
+##市
+##布
+##帅
+##帆
+##师
+##希
+##帐
+##帑
+##帕
+##帖
+##帘
+##帚
+##帛
+##帜
+##帝
+##帥
+##带
+##帧
+##師
+##席
+##帮
+##帯
+##帰
+##帳
+##帶
+##帷
+##常
+##帼
+##帽
+##幀
+##幂
+##幄
+##幅
+##幌
+##幔
+##幕
+##幟
+##幡
+##幢
+##幣
+##幫
+##干
+##平
+##年
+##并
+##幸
+##幹
+##幺
+##幻
+##幼
+##幽
+##幾
+##广
+##庁
+##広
+##庄
+##庆
+##庇
+##床
+##序
+##庐
+##库
+##应
+##底
+##庖
+##店
+##庙
+##庚
+##府
+##庞
+##废
+##庠
+##度
+##座
+##庫
+##庭
+##庵
+##庶
+##康
+##庸
+##庹
+##庾
+##廁
+##廂
+##廃
+##廈
+##廉
+##廊
+##廓
+##廖
+##廚
+##廝
+##廟
+##廠
+##廢
+##廣
+##廬
+##廳
+##延
+##廷
+##建
+##廿
+##开
+##弁
+##异
+##弃
+##弄
+##弈
+##弊
+##弋
+##式
+##弑
+##弒
+##弓
+##弔
+##引
+##弗
+##弘
+##弛
+##弟
+##张
+##弥
+##弦
+##弧
+##弩
+##弭
+##弯
+##弱
+##張
+##強
+##弹
+##强
+##弼
+##弾
+##彅
+##彆
+##彈
+##彌
+##彎
+##归
+##当
+##录
+##彗
+##彙
+##彝
+##形
+##彤
+##彥
+##彦
+##彧
+##彩
+##彪
+##彫
+##彬
+##彭
+##彰
+##影
+##彷
+##役
+##彻
+##彼
+##彿
+##往
+##征
+##径
+##待
+##徇
+##很
+##徉
+##徊
+##律
+##後
+##徐
+##徑
+##徒
+##従
+##徕
+##得
+##徘
+##徙
+##徜
+##從
+##徠
+##御
+##徨
+##復
+##循
+##徬
+##微
+##徳
+##徴
+##徵
+##德
+##徹
+##徼
+##徽
+##心
+##必
+##忆
+##忌
+##忍
+##忏
+##忐
+##忑
+##忒
+##忖
+##志
+##忘
+##忙
+##応
+##忠
+##忡
+##忤
+##忧
+##忪
+##快
+##忱
+##念
+##忻
+##忽
+##忿
+##怀
+##态
+##怂
+##怅
+##怆
+##怎
+##怏
+##怒
+##怔
+##怕
+##怖
+##怙
+##怜
+##思
+##怠
+##怡
+##急
+##怦
+##性
+##怨
+##怪
+##怯
+##怵
+##总
+##怼
+##恁
+##恃
+##恆
+##恋
+##恍
+##恐
+##恒
+##恕
+##恙
+##恚
+##恢
+##恣
+##恤
+##恥
+##恨
+##恩
+##恪
+##恫
+##恬
+##恭
+##息
+##恰
+##恳
+##恵
+##恶
+##恸
+##恺
+##恻
+##恼
+##恿
+##悄
+##悅
+##悉
+##悌
+##悍
+##悔
+##悖
+##悚
+##悟
+##悠
+##患
+##悦
+##您
+##悩
+##悪
+##悬
+##悯
+##悱
+##悲
+##悴
+##悵
+##悶
+##悸
+##悻
+##悼
+##悽
+##情
+##惆
+##惇
+##惊
+##惋
+##惑
+##惕
+##惘
+##惚
+##惜
+##惟
+##惠
+##惡
+##惦
+##惧
+##惨
+##惩
+##惫
+##惬
+##惭
+##惮
+##惯
+##惰
+##惱
+##想
+##惴
+##惶
+##惹
+##惺
+##愁
+##愆
+##愈
+##愉
+##愍
+##意
+##愕
+##愚
+##愛
+##愜
+##感
+##愣
+##愤
+##愧
+##愫
+##愷
+##愿
+##慄
+##慈
+##態
+##慌
+##慎
+##慑
+##慕
+##慘
+##慚
+##慟
+##慢
+##慣
+##慧
+##慨
+##慫
+##慮
+##慰
+##慳
+##慵
+##慶
+##慷
+##慾
+##憂
+##憊
+##憋
+##憎
+##憐
+##憑
+##憔
+##憚
+##憤
+##憧
+##憨
+##憩
+##憫
+##憬
+##憲
+##憶
+##憾
+##懂
+##懇
+##懈
+##應
+##懊
+##懋
+##懑
+##懒
+##懦
+##懲
+##懵
+##懶
+##懷
+##懸
+##懺
+##懼
+##懾
+##懿
+##戀
+##戈
+##戊
+##戌
+##戍
+##戎
+##戏
+##成
+##我
+##戒
+##戕
+##或
+##战
+##戚
+##戛
+##戟
+##戡
+##戦
+##截
+##戬
+##戮
+##戰
+##戲
+##戳
+##戴
+##戶
+##户
+##戸
+##戻
+##戾
+##房
+##所
+##扁
+##扇
+##扈
+##扉
+##手
+##才
+##扎
+##扑
+##扒
+##打
+##扔
+##払
+##托
+##扛
+##扣
+##扦
+##执
+##扩
+##扪
+##扫
+##扬
+##扭
+##扮
+##扯
+##扰
+##扱
+##扳
+##扶
+##批
+##扼
+##找
+##承
+##技
+##抄
+##抉
+##把
+##抑
+##抒
+##抓
+##投
+##抖
+##抗
+##折
+##抚
+##抛
+##抜
+##択
+##抟
+##抠
+##抡
+##抢
+##护
+##报
+##抨
+##披
+##抬
+##抱
+##抵
+##抹
+##押
+##抽
+##抿
+##拂
+##拄
+##担
+##拆
+##拇
+##拈
+##拉
+##拋
+##拌
+##拍
+##拎
+##拐
+##拒
+##拓
+##拔
+##拖
+##拗
+##拘
+##拙
+##拚
+##招
+##拜
+##拟
+##拡
+##拢
+##拣
+##拥
+##拦
+##拧
+##拨
+##择
+##括
+##拭
+##拮
+##拯
+##拱
+##拳
+##拴
+##拷
+##拼
+##拽
+##拾
+##拿
+##持
+##挂
+##指
+##挈
+##按
+##挎
+##挑
+##挖
+##挙
+##挚
+##挛
+##挝
+##挞
+##挟
+##挠
+##挡
+##挣
+##挤
+##挥
+##挨
+##挪
+##挫
+##振
+##挲
+##挹
+##挺
+##挽
+##挾
+##捂
+##捅
+##捆
+##捉
+##捋
+##捌
+##捍
+##捎
+##捏
+##捐
+##捕
+##捞
+##损
+##捡
+##换
+##捣
+##捧
+##捨
+##捩
+##据
+##捱
+##捲
+##捶
+##捷
+##捺
+##捻
+##掀
+##掂
+##掃
+##掇
+##授
+##掉
+##掌
+##掏
+##掐
+##排
+##掖
+##掘
+##掙
+##掛
+##掠
+##採
+##探
+##掣
+##接
+##控
+##推
+##掩
+##措
+##掬
+##掰
+##掲
+##掳
+##掴
+##掷
+##掸
+##掺
+##揀
+##揃
+##揄
+##揆
+##揉
+##揍
+##描
+##提
+##插
+##揖
+##揚
+##換
+##握
+##揣
+##揩
+##揪
+##揭
+##揮
+##援
+##揶
+##揸
+##揹
+##揽
+##搀
+##搁
+##搂
+##搅
+##損
+##搏
+##搐
+##搓
+##搔
+##搖
+##搗
+##搜
+##搞
+##搡
+##搪
+##搬
+##搭
+##搵
+##搶
+##携
+##搽
+##摀
+##摁
+##摄
+##摆
+##摇
+##摈
+##摊
+##摒
+##摔
+##摘
+##摞
+##摟
+##摧
+##摩
+##摯
+##摳
+##摸
+##摹
+##摺
+##摻
+##撂
+##撃
+##撅
+##撇
+##撈
+##撐
+##撑
+##撒
+##撓
+##撕
+##撚
+##撞
+##撤
+##撥
+##撩
+##撫
+##撬
+##播
+##撮
+##撰
+##撲
+##撵
+##撷
+##撸
+##撻
+##撼
+##撿
+##擀
+##擁
+##擂
+##擄
+##擅
+##擇
+##擊
+##擋
+##操
+##擎
+##擒
+##擔
+##擘
+##據
+##擞
+##擠
+##擡
+##擢
+##擦
+##擬
+##擰
+##擱
+##擲
+##擴
+##擷
+##擺
+##擼
+##擾
+##攀
+##攏
+##攒
+##攔
+##攘
+##攙
+##攜
+##攝
+##攞
+##攢
+##攣
+##攤
+##攥
+##攪
+##攫
+##攬
+##支
+##收
+##攸
+##改
+##攻
+##放
+##政
+##故
+##效
+##敌
+##敍
+##敎
+##敏
+##救
+##敕
+##敖
+##敗
+##敘
+##教
+##敛
+##敝
+##敞
+##敢
+##散
+##敦
+##敬
+##数
+##敲
+##整
+##敵
+##敷
+##數
+##斂
+##斃
+##文
+##斋
+##斌
+##斎
+##斐
+##斑
+##斓
+##斗
+##料
+##斛
+##斜
+##斟
+##斡
+##斤
+##斥
+##斧
+##斩
+##斫
+##斬
+##断
+##斯
+##新
+##斷
+##方
+##於
+##施
+##旁
+##旃
+##旅
+##旋
+##旌
+##旎
+##族
+##旖
+##旗
+##无
+##既
+##日
+##旦
+##旧
+##旨
+##早
+##旬
+##旭
+##旮
+##旱
+##时
+##旷
+##旺
+##旻
+##昀
+##昂
+##昆
+##昇
+##昉
+##昊
+##昌
+##明
+##昏
+##易
+##昔
+##昕
+##昙
+##星
+##映
+##春
+##昧
+##昨
+##昭
+##是
+##昱
+##昴
+##昵
+##昶
+##昼
+##显
+##晁
+##時
+##晃
+##晉
+##晋
+##晌
+##晏
+##晒
+##晓
+##晔
+##晕
+##晖
+##晗
+##晚
+##晝
+##晞
+##晟
+##晤
+##晦
+##晨
+##晩
+##普
+##景
+##晰
+##晴
+##晶
+##晷
+##智
+##晾
+##暂
+##暄
+##暇
+##暈
+##暉
+##暌
+##暐
+##暑
+##暖
+##暗
+##暝
+##暢
+##暧
+##暨
+##暫
+##暮
+##暱
+##暴
+##暸
+##暹
+##曄
+##曆
+##曇
+##曉
+##曖
+##曙
+##曜
+##曝
+##曠
+##曦
+##曬
+##曰
+##曲
+##曳
+##更
+##書
+##曹
+##曼
+##曾
+##替
+##最
+##會
+##月
+##有
+##朋
+##服
+##朐
+##朔
+##朕
+##朗
+##望
+##朝
+##期
+##朦
+##朧
+##木
+##未
+##末
+##本
+##札
+##朮
+##术
+##朱
+##朴
+##朵
+##机
+##朽
+##杀
+##杂
+##权
+##杆
+##杈
+##杉
+##李
+##杏
+##材
+##村
+##杓
+##杖
+##杜
+##杞
+##束
+##杠
+##条
+##来
+##杨
+##杭
+##杯
+##杰
+##東
+##杳
+##杵
+##杷
+##杼
+##松
+##板
+##极
+##构
+##枇
+##枉
+##枋
+##析
+##枕
+##林
+##枚
+##果
+##枝
+##枢
+##枣
+##枪
+##枫
+##枭
+##枯
+##枰
+##枱
+##枳
+##架
+##枷
+##枸
+##柄
+##柏
+##某
+##柑
+##柒
+##染
+##柔
+##柘
+##柚
+##柜
+##柞
+##柠
+##柢
+##查
+##柩
+##柬
+##柯
+##柱
+##柳
+##柴
+##柵
+##査
+##柿
+##栀
+##栃
+##栄
+##栅
+##标
+##栈
+##栉
+##栋
+##栎
+##栏
+##树
+##栓
+##栖
+##栗
+##校
+##栩
+##株
+##样
+##核
+##根
+##格
+##栽
+##栾
+##桀
+##桁
+##桂
+##桃
+##桅
+##框
+##案
+##桉
+##桌
+##桎
+##桐
+##桑
+##桓
+##桔
+##桜
+##桠
+##桡
+##桢
+##档
+##桥
+##桦
+##桧
+##桨
+##桩
+##桶
+##桿
+##梁
+##梅
+##梆
+##梏
+##梓
+##梗
+##條
+##梟
+##梢
+##梦
+##梧
+##梨
+##梭
+##梯
+##械
+##梳
+##梵
+##梶
+##检
+##棂
+##棄
+##棉
+##棋
+##棍
+##棒
+##棕
+##棗
+##棘
+##棚
+##棟
+##棠
+##棣
+##棧
+##森
+##棱
+##棲
+##棵
+##棹
+##棺
+##椁
+##椅
+##椋
+##植
+##椎
+##椒
+##検
+##椪
+##椭
+##椰
+##椹
+##椽
+##椿
+##楂
+##楊
+##楓
+##楔
+##楚
+##楝
+##楞
+##楠
+##楣
+##楨
+##楫
+##業
+##楮
+##極
+##楷
+##楸
+##楹
+##楼
+##楽
+##概
+##榄
+##榆
+##榈
+##榉
+##榔
+##榕
+##榖
+##榛
+##榜
+##榨
+##榫
+##榭
+##榮
+##榱
+##榴
+##榷
+##榻
+##槁
+##槃
+##構
+##槌
+##槍
+##槎
+##槐
+##槓
+##様
+##槛
+##槟
+##槤
+##槭
+##槲
+##槳
+##槻
+##槽
+##槿
+##樁
+##樂
+##樊
+##樑
+##樓
+##標
+##樞
+##樟
+##模
+##樣
+##権
+##横
+##樫
+##樯
+##樱
+##樵
+##樸
+##樹
+##樺
+##樽
+##樾
+##橄
+##橇
+##橋
+##橐
+##橘
+##橙
+##機
+##橡
+##橢
+##橫
+##橱
+##橹
+##橼
+##檀
+##檄
+##檎
+##檐
+##檔
+##檗
+##檜
+##檢
+##檬
+##檯
+##檳
+##檸
+##檻
+##櫃
+##櫚
+##櫛
+##櫥
+##櫸
+##櫻
+##欄
+##權
+##欒
+##欖
+##欠
+##次
+##欢
+##欣
+##欧
+##欲
+##欸
+##欺
+##欽
+##款
+##歆
+##歇
+##歉
+##歌
+##歎
+##歐
+##歓
+##歙
+##歛
+##歡
+##止
+##正
+##此
+##步
+##武
+##歧
+##歩
+##歪
+##歯
+##歲
+##歳
+##歴
+##歷
+##歸
+##歹
+##死
+##歼
+##殁
+##殃
+##殆
+##殇
+##殉
+##殊
+##残
+##殒
+##殓
+##殖
+##殘
+##殞
+##殡
+##殤
+##殭
+##殯
+##殲
+##殴
+##段
+##殷
+##殺
+##殼
+##殿
+##毀
+##毁
+##毂
+##毅
+##毆
+##毋
+##母
+##毎
+##每
+##毒
+##毓
+##比
+##毕
+##毗
+##毘
+##毙
+##毛
+##毡
+##毫
+##毯
+##毽
+##氈
+##氏
+##氐
+##民
+##氓
+##气
+##氖
+##気
+##氙
+##氛
+##氟
+##氡
+##氢
+##氣
+##氤
+##氦
+##氧
+##氨
+##氪
+##氫
+##氮
+##氯
+##氰
+##氲
+##水
+##氷
+##永
+##氹
+##氾
+##汀
+##汁
+##求
+##汆
+##汇
+##汉
+##汎
+##汐
+##汕
+##汗
+##汙
+##汛
+##汝
+##汞
+##江
+##池
+##污
+##汤
+##汨
+##汩
+##汪
+##汰
+##汲
+##汴
+##汶
+##汹
+##決
+##汽
+##汾
+##沁
+##沂
+##沃
+##沅
+##沈
+##沉
+##沌
+##沏
+##沐
+##沒
+##沓
+##沖
+##沙
+##沛
+##沟
+##没
+##沢
+##沣
+##沥
+##沦
+##沧
+##沪
+##沫
+##沭
+##沮
+##沱
+##河
+##沸
+##油
+##治
+##沼
+##沽
+##沾
+##沿
+##況
+##泄
+##泉
+##泊
+##泌
+##泓
+##法
+##泗
+##泛
+##泞
+##泠
+##泡
+##波
+##泣
+##泥
+##注
+##泪
+##泫
+##泮
+##泯
+##泰
+##泱
+##泳
+##泵
+##泷
+##泸
+##泻
+##泼
+##泽
+##泾
+##洁
+##洄
+##洋
+##洒
+##洗
+##洙
+##洛
+##洞
+##津
+##洩
+##洪
+##洮
+##洱
+##洲
+##洵
+##洶
+##洸
+##洹
+##活
+##洼
+##洽
+##派
+##流
+##浃
+##浄
+##浅
+##浆
+##浇
+##浊
+##测
+##济
+##浏
+##浑
+##浒
+##浓
+##浔
+##浙
+##浚
+##浜
+##浣
+##浦
+##浩
+##浪
+##浬
+##浮
+##浯
+##浴
+##海
+##浸
+##涂
+##涅
+##涇
+##消
+##涉
+##涌
+##涎
+##涓
+##涔
+##涕
+##涙
+##涛
+##涝
+##涞
+##涟
+##涠
+##涡
+##涣
+##涤
+##润
+##涧
+##涨
+##涩
+##涪
+##涮
+##涯
+##液
+##涵
+##涸
+##涼
+##涿
+##淀
+##淄
+##淅
+##淆
+##淇
+##淋
+##淌
+##淑
+##淒
+##淖
+##淘
+##淙
+##淚
+##淞
+##淡
+##淤
+##淦
+##淨
+##淩
+##淪
+##淫
+##淬
+##淮
+##深
+##淳
+##淵
+##混
+##淹
+##淺
+##添
+##淼
+##清
+##済
+##渉
+##渊
+##渋
+##渍
+##渎
+##渐
+##渔
+##渗
+##渙
+##渚
+##減
+##渝
+##渠
+##渡
+##渣
+##渤
+##渥
+##渦
+##温
+##測
+##渭
+##港
+##渲
+##渴
+##游
+##渺
+##渾
+##湃
+##湄
+##湊
+##湍
+##湖
+##湘
+##湛
+##湟
+##湧
+##湫
+##湮
+##湯
+##湳
+##湾
+##湿
+##満
+##溃
+##溅
+##溉
+##溏
+##源
+##準
+##溜
+##溝
+##溟
+##溢
+##溥
+##溧
+##溪
+##溫
+##溯
+##溱
+##溴
+##溶
+##溺
+##溼
+##滁
+##滂
+##滄
+##滅
+##滇
+##滋
+##滌
+##滑
+##滓
+##滔
+##滕
+##滙
+##滚
+##滝
+##滞
+##滟
+##满
+##滢
+##滤
+##滥
+##滦
+##滨
+##滩
+##滬
+##滯
+##滲
+##滴
+##滷
+##滸
+##滾
+##滿
+##漁
+##漂
+##漆
+##漉
+##漏
+##漓
+##演
+##漕
+##漠
+##漢
+##漣
+##漩
+##漪
+##漫
+##漬
+##漯
+##漱
+##漲
+##漳
+##漸
+##漾
+##漿
+##潆
+##潇
+##潋
+##潍
+##潑
+##潔
+##潘
+##潛
+##潜
+##潞
+##潟
+##潢
+##潤
+##潦
+##潧
+##潭
+##潮
+##潰
+##潴
+##潸
+##潺
+##潼
+##澀
+##澄
+##澆
+##澈
+##澍
+##澎
+##澗
+##澜
+##澡
+##澤
+##澧
+##澱
+##澳
+##澹
+##激
+##濁
+##濂
+##濃
+##濑
+##濒
+##濕
+##濘
+##濛
+##濟
+##濠
+##濡
+##濤
+##濫
+##濬
+##濮
+##濯
+##濱
+##濺
+##濾
+##瀅
+##瀆
+##瀉
+##瀋
+##瀏
+##瀑
+##瀕
+##瀘
+##瀚
+##瀛
+##瀝
+##瀞
+##瀟
+##瀧
+##瀨
+##瀬
+##瀰
+##瀾
+##灌
+##灏
+##灑
+##灘
+##灝
+##灞
+##灣
+##火
+##灬
+##灭
+##灯
+##灰
+##灵
+##灶
+##灸
+##灼
+##災
+##灾
+##灿
+##炀
+##炁
+##炅
+##炉
+##炊
+##炎
+##炒
+##炔
+##炕
+##炖
+##炙
+##炜
+##炫
+##炬
+##炭
+##炮
+##炯
+##炳
+##炷
+##炸
+##点
+##為
+##炼
+##炽
+##烁
+##烂
+##烃
+##烈
+##烊
+##烏
+##烘
+##烙
+##烛
+##烟
+##烤
+##烦
+##烧
+##烨
+##烩
+##烫
+##烬
+##热
+##烯
+##烷
+##烹
+##烽
+##焉
+##焊
+##焕
+##焖
+##焗
+##焘
+##焙
+##焚
+##焜
+##無
+##焦
+##焯
+##焰
+##焱
+##然
+##焼
+##煅
+##煉
+##煊
+##煌
+##煎
+##煒
+##煖
+##煙
+##煜
+##煞
+##煤
+##煥
+##煦
+##照
+##煨
+##煩
+##煮
+##煲
+##煸
+##煽
+##熄
+##熊
+##熏
+##熒
+##熔
+##熙
+##熟
+##熠
+##熨
+##熬
+##熱
+##熵
+##熹
+##熾
+##燁
+##燃
+##燄
+##燈
+##燉
+##燊
+##燎
+##燒
+##燔
+##燕
+##燙
+##燜
+##營
+##燥
+##燦
+##燧
+##燭
+##燮
+##燴
+##燻
+##燼
+##燿
+##爆
+##爍
+##爐
+##爛
+##爪
+##爬
+##爭
+##爰
+##爱
+##爲
+##爵
+##父
+##爷
+##爸
+##爹
+##爺
+##爻
+##爽
+##爾
+##牆
+##片
+##版
+##牌
+##牍
+##牒
+##牙
+##牛
+##牝
+##牟
+##牠
+##牡
+##牢
+##牦
+##牧
+##物
+##牯
+##牲
+##牴
+##牵
+##特
+##牺
+##牽
+##犀
+##犁
+##犄
+##犊
+##犍
+##犒
+##犢
+##犧
+##犬
+##犯
+##状
+##犷
+##犸
+##犹
+##狀
+##狂
+##狄
+##狈
+##狎
+##狐
+##狒
+##狗
+##狙
+##狞
+##狠
+##狡
+##狩
+##独
+##狭
+##狮
+##狰
+##狱
+##狸
+##狹
+##狼
+##狽
+##猎
+##猕
+##猖
+##猗
+##猙
+##猛
+##猜
+##猝
+##猥
+##猩
+##猪
+##猫
+##猬
+##献
+##猴
+##猶
+##猷
+##猾
+##猿
+##獄
+##獅
+##獎
+##獐
+##獒
+##獗
+##獠
+##獣
+##獨
+##獭
+##獰
+##獲
+##獵
+##獷
+##獸
+##獺
+##獻
+##獼
+##獾
+##玄
+##率
+##玉
+##王
+##玑
+##玖
+##玛
+##玟
+##玠
+##玥
+##玩
+##玫
+##玮
+##环
+##现
+##玲
+##玳
+##玷
+##玺
+##玻
+##珀
+##珂
+##珅
+##珈
+##珉
+##珊
+##珍
+##珏
+##珐
+##珑
+##珙
+##珞
+##珠
+##珣
+##珥
+##珩
+##珪
+##班
+##珮
+##珲
+##珺
+##現
+##球
+##琅
+##理
+##琇
+##琉
+##琊
+##琍
+##琏
+##琐
+##琛
+##琢
+##琥
+##琦
+##琨
+##琪
+##琬
+##琮
+##琰
+##琲
+##琳
+##琴
+##琵
+##琶
+##琺
+##琼
+##瑀
+##瑁
+##瑄
+##瑋
+##瑕
+##瑗
+##瑙
+##瑚
+##瑛
+##瑜
+##瑞
+##瑟
+##瑠
+##瑣
+##瑤
+##瑩
+##瑪
+##瑯
+##瑰
+##瑶
+##瑾
+##璀
+##璁
+##璃
+##璇
+##璉
+##璋
+##璎
+##璐
+##璜
+##璞
+##璟
+##璧
+##璨
+##環
+##璽
+##璿
+##瓊
+##瓏
+##瓒
+##瓜
+##瓢
+##瓣
+##瓤
+##瓦
+##瓮
+##瓯
+##瓴
+##瓶
+##瓷
+##甄
+##甌
+##甕
+##甘
+##甙
+##甚
+##甜
+##生
+##產
+##産
+##甥
+##甦
+##用
+##甩
+##甫
+##甬
+##甭
+##甯
+##田
+##由
+##甲
+##申
+##电
+##男
+##甸
+##町
+##画
+##甾
+##畀
+##畅
+##界
+##畏
+##畑
+##畔
+##留
+##畜
+##畝
+##畢
+##略
+##畦
+##番
+##畫
+##異
+##畲
+##畳
+##畴
+##當
+##畸
+##畹
+##畿
+##疆
+##疇
+##疊
+##疏
+##疑
+##疔
+##疖
+##疗
+##疙
+##疚
+##疝
+##疟
+##疡
+##疣
+##疤
+##疥
+##疫
+##疮
+##疯
+##疱
+##疲
+##疳
+##疵
+##疸
+##疹
+##疼
+##疽
+##疾
+##痂
+##病
+##症
+##痈
+##痉
+##痊
+##痍
+##痒
+##痔
+##痕
+##痘
+##痙
+##痛
+##痞
+##痠
+##痢
+##痣
+##痤
+##痧
+##痨
+##痪
+##痫
+##痰
+##痱
+##痴
+##痹
+##痺
+##痼
+##痿
+##瘀
+##瘁
+##瘋
+##瘍
+##瘓
+##瘘
+##瘙
+##瘟
+##瘠
+##瘡
+##瘢
+##瘤
+##瘦
+##瘧
+##瘩
+##瘪
+##瘫
+##瘴
+##瘸
+##瘾
+##療
+##癇
+##癌
+##癒
+##癖
+##癜
+##癞
+##癡
+##癢
+##癣
+##癥
+##癫
+##癬
+##癮
+##癱
+##癲
+##癸
+##発
+##登
+##發
+##白
+##百
+##皂
+##的
+##皆
+##皇
+##皈
+##皋
+##皎
+##皑
+##皓
+##皖
+##皙
+##皚
+##皮
+##皰
+##皱
+##皴
+##皺
+##皿
+##盂
+##盃
+##盅
+##盆
+##盈
+##益
+##盎
+##盏
+##盐
+##监
+##盒
+##盔
+##盖
+##盗
+##盘
+##盛
+##盜
+##盞
+##盟
+##盡
+##監
+##盤
+##盥
+##盧
+##盪
+##目
+##盯
+##盱
+##盲
+##直
+##相
+##盹
+##盼
+##盾
+##省
+##眈
+##眉
+##看
+##県
+##眙
+##眞
+##真
+##眠
+##眦
+##眨
+##眩
+##眯
+##眶
+##眷
+##眸
+##眺
+##眼
+##眾
+##着
+##睁
+##睇
+##睏
+##睐
+##睑
+##睛
+##睜
+##睞
+##睡
+##睢
+##督
+##睥
+##睦
+##睨
+##睪
+##睫
+##睬
+##睹
+##睽
+##睾
+##睿
+##瞄
+##瞅
+##瞇
+##瞋
+##瞌
+##瞎
+##瞑
+##瞒
+##瞓
+##瞞
+##瞟
+##瞠
+##瞥
+##瞧
+##瞩
+##瞪
+##瞬
+##瞭
+##瞰
+##瞳
+##瞻
+##瞼
+##瞿
+##矇
+##矍
+##矗
+##矚
+##矛
+##矜
+##矢
+##矣
+##知
+##矩
+##矫
+##短
+##矮
+##矯
+##石
+##矶
+##矽
+##矾
+##矿
+##码
+##砂
+##砌
+##砍
+##砒
+##研
+##砖
+##砗
+##砚
+##砝
+##砣
+##砥
+##砧
+##砭
+##砰
+##砲
+##破
+##砷
+##砸
+##砺
+##砼
+##砾
+##础
+##硅
+##硐
+##硒
+##硕
+##硝
+##硫
+##硬
+##确
+##硯
+##硼
+##碁
+##碇
+##碉
+##碌
+##碍
+##碎
+##碑
+##碓
+##碗
+##碘
+##碚
+##碛
+##碟
+##碣
+##碧
+##碩
+##碰
+##碱
+##碳
+##碴
+##確
+##碼
+##碾
+##磁
+##磅
+##磊
+##磋
+##磐
+##磕
+##磚
+##磡
+##磨
+##磬
+##磯
+##磲
+##磷
+##磺
+##礁
+##礎
+##礙
+##礡
+##礦
+##礪
+##礫
+##礴
+##示
+##礼
+##社
+##祀
+##祁
+##祂
+##祇
+##祈
+##祉
+##祎
+##祐
+##祕
+##祖
+##祗
+##祚
+##祛
+##祜
+##祝
+##神
+##祟
+##祠
+##祢
+##祥
+##票
+##祭
+##祯
+##祷
+##祸
+##祺
+##祿
+##禀
+##禁
+##禄
+##禅
+##禍
+##禎
+##福
+##禛
+##禦
+##禧
+##禪
+##禮
+##禱
+##禹
+##禺
+##离
+##禽
+##禾
+##禿
+##秀
+##私
+##秃
+##秆
+##秉
+##秋
+##种
+##科
+##秒
+##秘
+##租
+##秣
+##秤
+##秦
+##秧
+##秩
+##秭
+##积
+##称
+##秸
+##移
+##秽
+##稀
+##稅
+##程
+##稍
+##税
+##稔
+##稗
+##稚
+##稜
+##稞
+##稟
+##稠
+##稣
+##種
+##稱
+##稲
+##稳
+##稷
+##稹
+##稻
+##稼
+##稽
+##稿
+##穀
+##穂
+##穆
+##穌
+##積
+##穎
+##穗
+##穢
+##穩
+##穫
+##穴
+##究
+##穷
+##穹
+##空
+##穿
+##突
+##窃
+##窄
+##窈
+##窍
+##窑
+##窒
+##窓
+##窕
+##窖
+##窗
+##窘
+##窜
+##窝
+##窟
+##窠
+##窥
+##窦
+##窨
+##窩
+##窪
+##窮
+##窯
+##窺
+##窿
+##竄
+##竅
+##竇
+##竊
+##立
+##竖
+##站
+##竜
+##竞
+##竟
+##章
+##竣
+##童
+##竭
+##端
+##競
+##竹
+##竺
+##竽
+##竿
+##笃
+##笆
+##笈
+##笋
+##笏
+##笑
+##笔
+##笙
+##笛
+##笞
+##笠
+##符
+##笨
+##第
+##笹
+##笺
+##笼
+##筆
+##等
+##筊
+##筋
+##筍
+##筏
+##筐
+##筑
+##筒
+##答
+##策
+##筛
+##筝
+##筠
+##筱
+##筲
+##筵
+##筷
+##筹
+##签
+##简
+##箇
+##箋
+##箍
+##箏
+##箐
+##箔
+##箕
+##算
+##箝
+##管
+##箩
+##箫
+##箭
+##箱
+##箴
+##箸
+##節
+##篁
+##範
+##篆
+##篇
+##築
+##篑
+##篓
+##篙
+##篝
+##篠
+##篡
+##篤
+##篩
+##篪
+##篮
+##篱
+##篷
+##簇
+##簌
+##簍
+##簡
+##簦
+##簧
+##簪
+##簫
+##簷
+##簸
+##簽
+##簾
+##簿
+##籁
+##籃
+##籌
+##籍
+##籐
+##籟
+##籠
+##籤
+##籬
+##籮
+##籲
+##米
+##类
+##籼
+##籽
+##粄
+##粉
+##粑
+##粒
+##粕
+##粗
+##粘
+##粟
+##粤
+##粥
+##粧
+##粪
+##粮
+##粱
+##粲
+##粳
+##粵
+##粹
+##粼
+##粽
+##精
+##粿
+##糅
+##糊
+##糍
+##糕
+##糖
+##糗
+##糙
+##糜
+##糞
+##糟
+##糠
+##糧
+##糬
+##糯
+##糰
+##糸
+##系
+##糾
+##紀
+##紂
+##約
+##紅
+##紉
+##紊
+##紋
+##納
+##紐
+##紓
+##純
+##紗
+##紘
+##紙
+##級
+##紛
+##紜
+##素
+##紡
+##索
+##紧
+##紫
+##紮
+##累
+##細
+##紳
+##紹
+##紺
+##終
+##絃
+##組
+##絆
+##経
+##結
+##絕
+##絞
+##絡
+##絢
+##給
+##絨
+##絮
+##統
+##絲
+##絳
+##絵
+##絶
+##絹
+##綁
+##綏
+##綑
+##經
+##継
+##続
+##綜
+##綠
+##綢
+##綦
+##綫
+##綬
+##維
+##綱
+##網
+##綴
+##綵
+##綸
+##綺
+##綻
+##綽
+##綾
+##綿
+##緊
+##緋
+##総
+##緑
+##緒
+##緘
+##線
+##緝
+##緞
+##締
+##緣
+##編
+##緩
+##緬
+##緯
+##練
+##緹
+##緻
+##縁
+##縄
+##縈
+##縛
+##縝
+##縣
+##縫
+##縮
+##縱
+##縴
+##縷
+##總
+##績
+##繁
+##繃
+##繆
+##繇
+##繋
+##織
+##繕
+##繚
+##繞
+##繡
+##繩
+##繪
+##繫
+##繭
+##繳
+##繹
+##繼
+##繽
+##纂
+##續
+##纍
+##纏
+##纓
+##纔
+##纖
+##纜
+##纠
+##红
+##纣
+##纤
+##约
+##级
+##纨
+##纪
+##纫
+##纬
+##纭
+##纯
+##纰
+##纱
+##纲
+##纳
+##纵
+##纶
+##纷
+##纸
+##纹
+##纺
+##纽
+##纾
+##线
+##绀
+##练
+##组
+##绅
+##细
+##织
+##终
+##绊
+##绍
+##绎
+##经
+##绑
+##绒
+##结
+##绔
+##绕
+##绘
+##给
+##绚
+##绛
+##络
+##绝
+##绞
+##统
+##绡
+##绢
+##绣
+##绥
+##绦
+##继
+##绩
+##绪
+##绫
+##续
+##绮
+##绯
+##绰
+##绳
+##维
+##绵
+##绶
+##绷
+##绸
+##绻
+##综
+##绽
+##绾
+##绿
+##缀
+##缄
+##缅
+##缆
+##缇
+##缈
+##缉
+##缎
+##缓
+##缔
+##缕
+##编
+##缘
+##缙
+##缚
+##缜
+##缝
+##缠
+##缢
+##缤
+##缥
+##缨
+##缩
+##缪
+##缭
+##缮
+##缰
+##缱
+##缴
+##缸
+##缺
+##缽
+##罂
+##罄
+##罌
+##罐
+##网
+##罔
+##罕
+##罗
+##罚
+##罡
+##罢
+##罩
+##罪
+##置
+##罰
+##署
+##罵
+##罷
+##罹
+##羁
+##羅
+##羈
+##羊
+##羌
+##美
+##羔
+##羚
+##羞
+##羟
+##羡
+##羣
+##群
+##羥
+##羧
+##羨
+##義
+##羯
+##羲
+##羸
+##羹
+##羽
+##羿
+##翁
+##翅
+##翊
+##翌
+##翎
+##習
+##翔
+##翘
+##翟
+##翠
+##翡
+##翦
+##翩
+##翰
+##翱
+##翳
+##翹
+##翻
+##翼
+##耀
+##老
+##考
+##耄
+##者
+##耆
+##耋
+##而
+##耍
+##耐
+##耒
+##耕
+##耗
+##耘
+##耙
+##耦
+##耨
+##耳
+##耶
+##耷
+##耸
+##耻
+##耽
+##耿
+##聂
+##聆
+##聊
+##聋
+##职
+##聒
+##联
+##聖
+##聘
+##聚
+##聞
+##聪
+##聯
+##聰
+##聲
+##聳
+##聴
+##聶
+##職
+##聽
+##聾
+##聿
+##肃
+##肄
+##肅
+##肆
+##肇
+##肉
+##肋
+##肌
+##肏
+##肓
+##肖
+##肘
+##肚
+##肛
+##肝
+##肠
+##股
+##肢
+##肤
+##肥
+##肩
+##肪
+##肮
+##肯
+##肱
+##育
+##肴
+##肺
+##肽
+##肾
+##肿
+##胀
+##胁
+##胃
+##胄
+##胆
+##背
+##胍
+##胎
+##胖
+##胚
+##胛
+##胜
+##胝
+##胞
+##胡
+##胤
+##胥
+##胧
+##胫
+##胭
+##胯
+##胰
+##胱
+##胳
+##胴
+##胶
+##胸
+##胺
+##能
+##脂
+##脅
+##脆
+##脇
+##脈
+##脉
+##脊
+##脍
+##脏
+##脐
+##脑
+##脓
+##脖
+##脘
+##脚
+##脛
+##脣
+##脩
+##脫
+##脯
+##脱
+##脲
+##脳
+##脸
+##脹
+##脾
+##腆
+##腈
+##腊
+##腋
+##腌
+##腎
+##腐
+##腑
+##腓
+##腔
+##腕
+##腥
+##腦
+##腩
+##腫
+##腭
+##腮
+##腰
+##腱
+##腳
+##腴
+##腸
+##腹
+##腺
+##腻
+##腼
+##腾
+##腿
+##膀
+##膈
+##膊
+##膏
+##膑
+##膘
+##膚
+##膛
+##膜
+##膝
+##膠
+##膦
+##膨
+##膩
+##膳
+##膺
+##膻
+##膽
+##膾
+##膿
+##臀
+##臂
+##臃
+##臆
+##臉
+##臊
+##臍
+##臓
+##臘
+##臟
+##臣
+##臥
+##臧
+##臨
+##自
+##臬
+##臭
+##至
+##致
+##臺
+##臻
+##臼
+##臾
+##舀
+##舂
+##舅
+##舆
+##與
+##興
+##舉
+##舊
+##舌
+##舍
+##舎
+##舐
+##舒
+##舔
+##舖
+##舗
+##舛
+##舜
+##舞
+##舟
+##航
+##舫
+##般
+##舰
+##舱
+##舵
+##舶
+##舷
+##舸
+##船
+##舺
+##舾
+##艇
+##艋
+##艘
+##艙
+##艦
+##艮
+##良
+##艰
+##艱
+##色
+##艳
+##艷
+##艹
+##艺
+##艾
+##节
+##芃
+##芈
+##芊
+##芋
+##芍
+##芎
+##芒
+##芙
+##芜
+##芝
+##芡
+##芥
+##芦
+##芩
+##芪
+##芫
+##芬
+##芭
+##芮
+##芯
+##花
+##芳
+##芷
+##芸
+##芹
+##芻
+##芽
+##芾
+##苁
+##苄
+##苇
+##苋
+##苍
+##苏
+##苑
+##苒
+##苓
+##苔
+##苕
+##苗
+##苛
+##苜
+##苞
+##苟
+##苡
+##苣
+##若
+##苦
+##苫
+##苯
+##英
+##苷
+##苹
+##苻
+##茁
+##茂
+##范
+##茄
+##茅
+##茉
+##茎
+##茏
+##茗
+##茜
+##茧
+##茨
+##茫
+##茬
+##茭
+##茯
+##茱
+##茲
+##茴
+##茵
+##茶
+##茸
+##茹
+##茼
+##荀
+##荃
+##荆
+##草
+##荊
+##荏
+##荐
+##荒
+##荔
+##荖
+##荘
+##荚
+##荞
+##荟
+##荠
+##荡
+##荣
+##荤
+##荥
+##荧
+##荨
+##荪
+##荫
+##药
+##荳
+##荷
+##荸
+##荻
+##荼
+##荽
+##莅
+##莆
+##莉
+##莊
+##莎
+##莒
+##莓
+##莖
+##莘
+##莞
+##莠
+##莢
+##莧
+##莪
+##莫
+##莱
+##莲
+##莴
+##获
+##莹
+##莺
+##莽
+##莿
+##菀
+##菁
+##菅
+##菇
+##菈
+##菊
+##菌
+##菏
+##菓
+##菖
+##菘
+##菜
+##菟
+##菠
+##菡
+##菩
+##華
+##菱
+##菲
+##菸
+##菽
+##萁
+##萃
+##萄
+##萊
+##萋
+##萌
+##萍
+##萎
+##萘
+##萝
+##萤
+##营
+##萦
+##萧
+##萨
+##萩
+##萬
+##萱
+##萵
+##萸
+##萼
+##落
+##葆
+##葉
+##著
+##葚
+##葛
+##葡
+##董
+##葦
+##葩
+##葫
+##葬
+##葭
+##葯
+##葱
+##葳
+##葵
+##葷
+##葺
+##蒂
+##蒋
+##蒐
+##蒔
+##蒙
+##蒜
+##蒞
+##蒟
+##蒡
+##蒨
+##蒲
+##蒸
+##蒹
+##蒻
+##蒼
+##蒿
+##蓁
+##蓄
+##蓆
+##蓉
+##蓋
+##蓑
+##蓓
+##蓖
+##蓝
+##蓟
+##蓦
+##蓬
+##蓮
+##蓼
+##蓿
+##蔑
+##蔓
+##蔔
+##蔗
+##蔘
+##蔚
+##蔡
+##蔣
+##蔥
+##蔫
+##蔬
+##蔭
+##蔵
+##蔷
+##蔺
+##蔻
+##蔼
+##蔽
+##蕁
+##蕃
+##蕈
+##蕉
+##蕊
+##蕎
+##蕙
+##蕤
+##蕨
+##蕩
+##蕪
+##蕭
+##蕲
+##蕴
+##蕻
+##蕾
+##薄
+##薅
+##薇
+##薈
+##薊
+##薏
+##薑
+##薔
+##薙
+##薛
+##薦
+##薨
+##薩
+##薪
+##薬
+##薯
+##薰
+##薹
+##藉
+##藍
+##藏
+##藐
+##藓
+##藕
+##藜
+##藝
+##藤
+##藥
+##藩
+##藹
+##藻
+##藿
+##蘆
+##蘇
+##蘊
+##蘋
+##蘑
+##蘚
+##蘭
+##蘸
+##蘼
+##蘿
+##虎
+##虏
+##虐
+##虑
+##虔
+##處
+##虚
+##虛
+##虜
+##虞
+##號
+##虢
+##虧
+##虫
+##虬
+##虱
+##虹
+##虻
+##虽
+##虾
+##蚀
+##蚁
+##蚂
+##蚊
+##蚌
+##蚓
+##蚕
+##蚜
+##蚝
+##蚣
+##蚤
+##蚩
+##蚪
+##蚯
+##蚱
+##蚵
+##蛀
+##蛆
+##蛇
+##蛊
+##蛋
+##蛎
+##蛐
+##蛔
+##蛙
+##蛛
+##蛟
+##蛤
+##蛭
+##蛮
+##蛰
+##蛳
+##蛹
+##蛻
+##蛾
+##蜀
+##蜂
+##蜃
+##蜆
+##蜇
+##蜈
+##蜊
+##蜍
+##蜒
+##蜓
+##蜕
+##蜗
+##蜘
+##蜚
+##蜜
+##蜡
+##蜢
+##蜥
+##蜱
+##蜴
+##蜷
+##蜻
+##蜿
+##蝇
+##蝈
+##蝉
+##蝌
+##蝎
+##蝕
+##蝗
+##蝙
+##蝟
+##蝠
+##蝦
+##蝨
+##蝴
+##蝶
+##蝸
+##蝼
+##螂
+##螃
+##融
+##螞
+##螢
+##螨
+##螯
+##螳
+##螺
+##蟀
+##蟄
+##蟆
+##蟋
+##蟎
+##蟑
+##蟒
+##蟠
+##蟬
+##蟲
+##蟹
+##蟻
+##蟾
+##蠅
+##蠍
+##蠔
+##蠕
+##蠛
+##蠟
+##蠡
+##蠢
+##蠣
+##蠱
+##蠶
+##蠹
+##蠻
+##血
+##衄
+##衅
+##衆
+##行
+##衍
+##術
+##衔
+##街
+##衙
+##衛
+##衝
+##衞
+##衡
+##衢
+##衣
+##补
+##表
+##衩
+##衫
+##衬
+##衮
+##衰
+##衲
+##衷
+##衹
+##衾
+##衿
+##袁
+##袂
+##袄
+##袅
+##袈
+##袋
+##袍
+##袒
+##袖
+##袜
+##袞
+##袤
+##袪
+##被
+##袭
+##袱
+##裁
+##裂
+##装
+##裆
+##裊
+##裏
+##裔
+##裕
+##裘
+##裙
+##補
+##裝
+##裟
+##裡
+##裤
+##裨
+##裱
+##裳
+##裴
+##裸
+##裹
+##製
+##裾
+##褂
+##複
+##褐
+##褒
+##褓
+##褔
+##褚
+##褥
+##褪
+##褫
+##褲
+##褶
+##褻
+##襁
+##襄
+##襟
+##襠
+##襪
+##襬
+##襯
+##襲
+##西
+##要
+##覃
+##覆
+##覇
+##見
+##規
+##覓
+##視
+##覚
+##覦
+##覧
+##親
+##覬
+##観
+##覷
+##覺
+##覽
+##觀
+##见
+##观
+##规
+##觅
+##视
+##览
+##觉
+##觊
+##觎
+##觐
+##觑
+##角
+##觞
+##解
+##觥
+##触
+##觸
+##言
+##訂
+##計
+##訊
+##討
+##訓
+##訕
+##訖
+##託
+##記
+##訛
+##訝
+##訟
+##訣
+##訥
+##訪
+##設
+##許
+##訳
+##訴
+##訶
+##診
+##註
+##証
+##詆
+##詐
+##詔
+##評
+##詛
+##詞
+##詠
+##詡
+##詢
+##詣
+##試
+##詩
+##詫
+##詬
+##詭
+##詮
+##詰
+##話
+##該
+##詳
+##詹
+##詼
+##誅
+##誇
+##誉
+##誌
+##認
+##誓
+##誕
+##誘
+##語
+##誠
+##誡
+##誣
+##誤
+##誥
+##誦
+##誨
+##說
+##説
+##読
+##誰
+##課
+##誹
+##誼
+##調
+##諄
+##談
+##請
+##諏
+##諒
+##論
+##諗
+##諜
+##諡
+##諦
+##諧
+##諫
+##諭
+##諮
+##諱
+##諳
+##諷
+##諸
+##諺
+##諾
+##謀
+##謁
+##謂
+##謄
+##謊
+##謎
+##謐
+##謔
+##謗
+##謙
+##講
+##謝
+##謠
+##謨
+##謬
+##謹
+##謾
+##譁
+##證
+##譎
+##譏
+##識
+##譙
+##譚
+##譜
+##警
+##譬
+##譯
+##議
+##譲
+##譴
+##護
+##譽
+##讀
+##變
+##讓
+##讚
+##讞
+##计
+##订
+##认
+##讥
+##讧
+##讨
+##让
+##讪
+##讫
+##训
+##议
+##讯
+##记
+##讲
+##讳
+##讴
+##讶
+##讷
+##许
+##讹
+##论
+##讼
+##讽
+##设
+##访
+##诀
+##证
+##诃
+##评
+##诅
+##识
+##诈
+##诉
+##诊
+##诋
+##词
+##诏
+##译
+##试
+##诗
+##诘
+##诙
+##诚
+##诛
+##话
+##诞
+##诟
+##诠
+##诡
+##询
+##诣
+##诤
+##该
+##详
+##诧
+##诩
+##诫
+##诬
+##语
+##误
+##诰
+##诱
+##诲
+##说
+##诵
+##诶
+##请
+##诸
+##诺
+##读
+##诽
+##课
+##诿
+##谀
+##谁
+##调
+##谄
+##谅
+##谆
+##谈
+##谊
+##谋
+##谌
+##谍
+##谎
+##谏
+##谐
+##谑
+##谒
+##谓
+##谔
+##谕
+##谗
+##谘
+##谙
+##谚
+##谛
+##谜
+##谟
+##谢
+##谣
+##谤
+##谥
+##谦
+##谧
+##谨
+##谩
+##谪
+##谬
+##谭
+##谯
+##谱
+##谲
+##谴
+##谶
+##谷
+##豁
+##豆
+##豇
+##豈
+##豉
+##豊
+##豌
+##豎
+##豐
+##豔
+##豚
+##象
+##豢
+##豪
+##豫
+##豬
+##豹
+##豺
+##貂
+##貅
+##貌
+##貓
+##貔
+##貘
+##貝
+##貞
+##負
+##財
+##貢
+##貧
+##貨
+##販
+##貪
+##貫
+##責
+##貯
+##貰
+##貳
+##貴
+##貶
+##買
+##貸
+##費
+##貼
+##貽
+##貿
+##賀
+##賁
+##賂
+##賃
+##賄
+##資
+##賈
+##賊
+##賑
+##賓
+##賜
+##賞
+##賠
+##賡
+##賢
+##賣
+##賤
+##賦
+##質
+##賬
+##賭
+##賴
+##賺
+##購
+##賽
+##贅
+##贈
+##贊
+##贍
+##贏
+##贓
+##贖
+##贛
+##贝
+##贞
+##负
+##贡
+##财
+##责
+##贤
+##败
+##账
+##货
+##质
+##贩
+##贪
+##贫
+##贬
+##购
+##贮
+##贯
+##贰
+##贱
+##贲
+##贴
+##贵
+##贷
+##贸
+##费
+##贺
+##贻
+##贼
+##贾
+##贿
+##赁
+##赂
+##赃
+##资
+##赅
+##赈
+##赊
+##赋
+##赌
+##赎
+##赏
+##赐
+##赓
+##赔
+##赖
+##赘
+##赚
+##赛
+##赝
+##赞
+##赠
+##赡
+##赢
+##赣
+##赤
+##赦
+##赧
+##赫
+##赭
+##走
+##赳
+##赴
+##赵
+##赶
+##起
+##趁
+##超
+##越
+##趋
+##趕
+##趙
+##趟
+##趣
+##趨
+##足
+##趴
+##趵
+##趸
+##趺
+##趾
+##跃
+##跄
+##跆
+##跋
+##跌
+##跎
+##跑
+##跖
+##跚
+##跛
+##距
+##跟
+##跡
+##跤
+##跨
+##跩
+##跪
+##路
+##跳
+##践
+##跷
+##跹
+##跺
+##跻
+##踉
+##踊
+##踌
+##踏
+##踐
+##踝
+##踞
+##踟
+##踢
+##踩
+##踪
+##踮
+##踱
+##踴
+##踵
+##踹
+##蹂
+##蹄
+##蹇
+##蹈
+##蹉
+##蹊
+##蹋
+##蹑
+##蹒
+##蹙
+##蹟
+##蹣
+##蹤
+##蹦
+##蹩
+##蹬
+##蹭
+##蹲
+##蹴
+##蹶
+##蹺
+##蹼
+##蹿
+##躁
+##躇
+##躉
+##躊
+##躋
+##躍
+##躏
+##躪
+##身
+##躬
+##躯
+##躲
+##躺
+##軀
+##車
+##軋
+##軌
+##軍
+##軒
+##軟
+##転
+##軸
+##軼
+##軽
+##軾
+##較
+##載
+##輒
+##輓
+##輔
+##輕
+##輛
+##輝
+##輟
+##輩
+##輪
+##輯
+##輸
+##輻
+##輾
+##輿
+##轄
+##轅
+##轆
+##轉
+##轍
+##轎
+##轟
+##车
+##轧
+##轨
+##轩
+##转
+##轭
+##轮
+##软
+##轰
+##轲
+##轴
+##轶
+##轻
+##轼
+##载
+##轿
+##较
+##辄
+##辅
+##辆
+##辇
+##辈
+##辉
+##辊
+##辍
+##辐
+##辑
+##输
+##辕
+##辖
+##辗
+##辘
+##辙
+##辛
+##辜
+##辞
+##辟
+##辣
+##辦
+##辨
+##辩
+##辫
+##辭
+##辮
+##辯
+##辰
+##辱
+##農
+##边
+##辺
+##辻
+##込
+##辽
+##达
+##迁
+##迂
+##迄
+##迅
+##过
+##迈
+##迎
+##运
+##近
+##返
+##还
+##这
+##进
+##远
+##违
+##连
+##迟
+##迢
+##迤
+##迥
+##迦
+##迩
+##迪
+##迫
+##迭
+##述
+##迴
+##迷
+##迸
+##迹
+##迺
+##追
+##退
+##送
+##适
+##逃
+##逅
+##逆
+##选
+##逊
+##逍
+##透
+##逐
+##递
+##途
+##逕
+##逗
+##這
+##通
+##逛
+##逝
+##逞
+##速
+##造
+##逢
+##連
+##逮
+##週
+##進
+##逵
+##逶
+##逸
+##逻
+##逼
+##逾
+##遁
+##遂
+##遅
+##遇
+##遊
+##運
+##遍
+##過
+##遏
+##遐
+##遑
+##遒
+##道
+##達
+##違
+##遗
+##遙
+##遛
+##遜
+##遞
+##遠
+##遢
+##遣
+##遥
+##遨
+##適
+##遭
+##遮
+##遲
+##遴
+##遵
+##遶
+##遷
+##選
+##遺
+##遼
+##遽
+##避
+##邀
+##邁
+##邂
+##邃
+##還
+##邇
+##邈
+##邊
+##邋
+##邏
+##邑
+##邓
+##邕
+##邛
+##邝
+##邢
+##那
+##邦
+##邨
+##邪
+##邬
+##邮
+##邯
+##邰
+##邱
+##邳
+##邵
+##邸
+##邹
+##邺
+##邻
+##郁
+##郅
+##郊
+##郎
+##郑
+##郜
+##郝
+##郡
+##郢
+##郤
+##郦
+##郧
+##部
+##郫
+##郭
+##郴
+##郵
+##郷
+##郸
+##都
+##鄂
+##鄉
+##鄒
+##鄔
+##鄙
+##鄞
+##鄢
+##鄧
+##鄭
+##鄰
+##鄱
+##鄲
+##鄺
+##酉
+##酊
+##酋
+##酌
+##配
+##酐
+##酒
+##酗
+##酚
+##酝
+##酢
+##酣
+##酥
+##酩
+##酪
+##酬
+##酮
+##酯
+##酰
+##酱
+##酵
+##酶
+##酷
+##酸
+##酿
+##醃
+##醇
+##醉
+##醋
+##醍
+##醐
+##醒
+##醚
+##醛
+##醜
+##醞
+##醣
+##醪
+##醫
+##醬
+##醮
+##醯
+##醴
+##醺
+##釀
+##釁
+##采
+##釉
+##释
+##釋
+##里
+##重
+##野
+##量
+##釐
+##金
+##釗
+##釘
+##釜
+##針
+##釣
+##釦
+##釧
+##釵
+##鈀
+##鈉
+##鈍
+##鈎
+##鈔
+##鈕
+##鈞
+##鈣
+##鈦
+##鈪
+##鈴
+##鈺
+##鈾
+##鉀
+##鉄
+##鉅
+##鉉
+##鉑
+##鉗
+##鉚
+##鉛
+##鉤
+##鉴
+##鉻
+##銀
+##銃
+##銅
+##銑
+##銓
+##銖
+##銘
+##銜
+##銬
+##銭
+##銮
+##銳
+##銷
+##銹
+##鋁
+##鋅
+##鋒
+##鋤
+##鋪
+##鋰
+##鋸
+##鋼
+##錄
+##錐
+##錘
+##錚
+##錠
+##錢
+##錦
+##錨
+##錫
+##錮
+##錯
+##録
+##錳
+##錶
+##鍊
+##鍋
+##鍍
+##鍛
+##鍥
+##鍰
+##鍵
+##鍺
+##鍾
+##鎂
+##鎊
+##鎌
+##鎏
+##鎔
+##鎖
+##鎗
+##鎚
+##鎧
+##鎬
+##鎮
+##鎳
+##鏈
+##鏖
+##鏗
+##鏘
+##鏞
+##鏟
+##鏡
+##鏢
+##鏤
+##鏽
+##鐘
+##鐮
+##鐲
+##鐳
+##鐵
+##鐸
+##鐺
+##鑄
+##鑊
+##鑑
+##鑒
+##鑣
+##鑫
+##鑰
+##鑲
+##鑼
+##鑽
+##鑾
+##鑿
+##针
+##钉
+##钊
+##钎
+##钏
+##钒
+##钓
+##钗
+##钙
+##钛
+##钜
+##钝
+##钞
+##钟
+##钠
+##钡
+##钢
+##钣
+##钤
+##钥
+##钦
+##钧
+##钨
+##钩
+##钮
+##钯
+##钰
+##钱
+##钳
+##钴
+##钵
+##钺
+##钻
+##钼
+##钾
+##钿
+##铀
+##铁
+##铂
+##铃
+##铄
+##铅
+##铆
+##铉
+##铎
+##铐
+##铛
+##铜
+##铝
+##铠
+##铡
+##铢
+##铣
+##铤
+##铨
+##铩
+##铬
+##铭
+##铮
+##铰
+##铲
+##铵
+##银
+##铸
+##铺
+##链
+##铿
+##销
+##锁
+##锂
+##锄
+##锅
+##锆
+##锈
+##锉
+##锋
+##锌
+##锏
+##锐
+##锑
+##错
+##锚
+##锟
+##锡
+##锢
+##锣
+##锤
+##锥
+##锦
+##锭
+##键
+##锯
+##锰
+##锲
+##锵
+##锹
+##锺
+##锻
+##镀
+##镁
+##镂
+##镇
+##镉
+##镌
+##镍
+##镐
+##镑
+##镕
+##镖
+##镗
+##镛
+##镜
+##镣
+##镭
+##镯
+##镰
+##镳
+##镶
+##長
+##长
+##門
+##閃
+##閉
+##開
+##閎
+##閏
+##閑
+##閒
+##間
+##閔
+##閘
+##閡
+##関
+##閣
+##閥
+##閨
+##閩
+##閱
+##閲
+##閹
+##閻
+##閾
+##闆
+##闇
+##闊
+##闌
+##闍
+##闔
+##闕
+##闖
+##闘
+##關
+##闡
+##闢
+##门
+##闪
+##闫
+##闭
+##问
+##闯
+##闰
+##闲
+##间
+##闵
+##闷
+##闸
+##闹
+##闺
+##闻
+##闽
+##闾
+##阀
+##阁
+##阂
+##阅
+##阆
+##阇
+##阈
+##阉
+##阎
+##阐
+##阑
+##阔
+##阕
+##阖
+##阙
+##阚
+##阜
+##队
+##阡
+##阪
+##阮
+##阱
+##防
+##阳
+##阴
+##阵
+##阶
+##阻
+##阿
+##陀
+##陂
+##附
+##际
+##陆
+##陇
+##陈
+##陋
+##陌
+##降
+##限
+##陕
+##陛
+##陝
+##陞
+##陟
+##陡
+##院
+##陣
+##除
+##陨
+##险
+##陪
+##陰
+##陲
+##陳
+##陵
+##陶
+##陷
+##陸
+##険
+##陽
+##隅
+##隆
+##隈
+##隊
+##隋
+##隍
+##階
+##随
+##隐
+##隔
+##隕
+##隘
+##隙
+##際
+##障
+##隠
+##隣
+##隧
+##隨
+##險
+##隱
+##隴
+##隶
+##隸
+##隻
+##隼
+##隽
+##难
+##雀
+##雁
+##雄
+##雅
+##集
+##雇
+##雉
+##雋
+##雌
+##雍
+##雎
+##雏
+##雑
+##雒
+##雕
+##雖
+##雙
+##雛
+##雜
+##雞
+##離
+##難
+##雨
+##雪
+##雯
+##雰
+##雲
+##雳
+##零
+##雷
+##雹
+##電
+##雾
+##需
+##霁
+##霄
+##霆
+##震
+##霈
+##霉
+##霊
+##霍
+##霎
+##霏
+##霑
+##霓
+##霖
+##霜
+##霞
+##霧
+##霭
+##霰
+##露
+##霸
+##霹
+##霽
+##霾
+##靂
+##靄
+##靈
+##青
+##靓
+##靖
+##静
+##靚
+##靛
+##靜
+##非
+##靠
+##靡
+##面
+##靥
+##靦
+##革
+##靳
+##靴
+##靶
+##靼
+##鞅
+##鞋
+##鞍
+##鞏
+##鞑
+##鞘
+##鞠
+##鞣
+##鞦
+##鞭
+##韆
+##韋
+##韌
+##韓
+##韜
+##韦
+##韧
+##韩
+##韬
+##韭
+##音
+##韵
+##韶
+##韻
+##響
+##頁
+##頂
+##頃
+##項
+##順
+##須
+##頌
+##預
+##頑
+##頒
+##頓
+##頗
+##領
+##頜
+##頡
+##頤
+##頫
+##頭
+##頰
+##頷
+##頸
+##頹
+##頻
+##頼
+##顆
+##題
+##額
+##顎
+##顏
+##顔
+##願
+##顛
+##類
+##顧
+##顫
+##顯
+##顱
+##顴
+##页
+##顶
+##顷
+##项
+##顺
+##须
+##顼
+##顽
+##顾
+##顿
+##颁
+##颂
+##预
+##颅
+##领
+##颇
+##颈
+##颉
+##颊
+##颌
+##颍
+##颐
+##频
+##颓
+##颔
+##颖
+##颗
+##题
+##颚
+##颛
+##颜
+##额
+##颞
+##颠
+##颡
+##颢
+##颤
+##颦
+##颧
+##風
+##颯
+##颱
+##颳
+##颶
+##颼
+##飄
+##飆
+##风
+##飒
+##飓
+##飕
+##飘
+##飙
+##飚
+##飛
+##飞
+##食
+##飢
+##飨
+##飩
+##飪
+##飯
+##飲
+##飼
+##飽
+##飾
+##餃
+##餅
+##餉
+##養
+##餌
+##餐
+##餒
+##餓
+##餘
+##餚
+##餛
+##餞
+##餡
+##館
+##餮
+##餵
+##餾
+##饅
+##饈
+##饋
+##饌
+##饍
+##饑
+##饒
+##饕
+##饗
+##饞
+##饥
+##饨
+##饪
+##饬
+##饭
+##饮
+##饯
+##饰
+##饱
+##饲
+##饴
+##饵
+##饶
+##饷
+##饺
+##饼
+##饽
+##饿
+##馀
+##馁
+##馄
+##馅
+##馆
+##馈
+##馋
+##馍
+##馏
+##馒
+##馔
+##首
+##馗
+##香
+##馥
+##馨
+##馬
+##馭
+##馮
+##馳
+##馴
+##駁
+##駄
+##駅
+##駆
+##駐
+##駒
+##駕
+##駛
+##駝
+##駭
+##駱
+##駿
+##騁
+##騎
+##騏
+##験
+##騙
+##騨
+##騰
+##騷
+##驀
+##驅
+##驊
+##驍
+##驒
+##驕
+##驗
+##驚
+##驛
+##驟
+##驢
+##驥
+##马
+##驭
+##驮
+##驯
+##驰
+##驱
+##驳
+##驴
+##驶
+##驷
+##驸
+##驹
+##驻
+##驼
+##驾
+##驿
+##骁
+##骂
+##骄
+##骅
+##骆
+##骇
+##骈
+##骊
+##骋
+##验
+##骏
+##骐
+##骑
+##骗
+##骚
+##骛
+##骜
+##骞
+##骠
+##骡
+##骤
+##骥
+##骧
+##骨
+##骯
+##骰
+##骶
+##骷
+##骸
+##骼
+##髂
+##髅
+##髋
+##髏
+##髒
+##髓
+##體
+##髖
+##高
+##髦
+##髪
+##髮
+##髯
+##髻
+##鬃
+##鬆
+##鬍
+##鬓
+##鬚
+##鬟
+##鬢
+##鬣
+##鬥
+##鬧
+##鬱
+##鬼
+##魁
+##魂
+##魄
+##魅
+##魇
+##魍
+##魏
+##魔
+##魘
+##魚
+##魯
+##魷
+##鮑
+##鮨
+##鮪
+##鮭
+##鮮
+##鯉
+##鯊
+##鯖
+##鯛
+##鯨
+##鯰
+##鯽
+##鰍
+##鰓
+##鰭
+##鰲
+##鰻
+##鰾
+##鱈
+##鱉
+##鱔
+##鱗
+##鱷
+##鱸
+##鱼
+##鱿
+##鲁
+##鲈
+##鲍
+##鲑
+##鲛
+##鲜
+##鲟
+##鲢
+##鲤
+##鲨
+##鲫
+##鲱
+##鲲
+##鲶
+##鲷
+##鲸
+##鳃
+##鳄
+##鳅
+##鳌
+##鳍
+##鳕
+##鳖
+##鳗
+##鳝
+##鳞
+##鳥
+##鳩
+##鳳
+##鳴
+##鳶
+##鴉
+##鴕
+##鴛
+##鴦
+##鴨
+##鴻
+##鴿
+##鵑
+##鵜
+##鵝
+##鵡
+##鵬
+##鵰
+##鵲
+##鶘
+##鶩
+##鶯
+##鶴
+##鷗
+##鷲
+##鷹
+##鷺
+##鸚
+##鸞
+##鸟
+##鸠
+##鸡
+##鸢
+##鸣
+##鸥
+##鸦
+##鸨
+##鸪
+##鸭
+##鸯
+##鸳
+##鸵
+##鸽
+##鸾
+##鸿
+##鹂
+##鹃
+##鹄
+##鹅
+##鹈
+##鹉
+##鹊
+##鹌
+##鹏
+##鹑
+##鹕
+##鹘
+##鹜
+##鹞
+##鹤
+##鹦
+##鹧
+##鹫
+##鹭
+##鹰
+##鹳
+##鹵
+##鹹
+##鹼
+##鹽
+##鹿
+##麂
+##麋
+##麒
+##麓
+##麗
+##麝
+##麟
+##麥
+##麦
+##麩
+##麴
+##麵
+##麸
+##麺
+##麻
+##麼
+##麽
+##麾
+##黃
+##黄
+##黍
+##黎
+##黏
+##黑
+##黒
+##黔
+##默
+##黛
+##黜
+##黝
+##點
+##黠
+##黨
+##黯
+##黴
+##鼋
+##鼎
+##鼐
+##鼓
+##鼠
+##鼬
+##鼹
+##鼻
+##鼾
+##齁
+##齊
+##齋
+##齐
+##齒
+##齡
+##齢
+##齣
+##齦
+##齿
+##龄
+##龅
+##龈
+##龊
+##龋
+##龌
+##龍
+##龐
+##龔
+##龕
+##龙
+##龚
+##龛
+##龜
+##龟
+##︰
+##︱
+##︶
+##︿
+##﹁
+##﹂
+##﹍
+##﹏
+##﹐
+##﹑
+##﹒
+##﹔
+##﹕
+##﹖
+##﹗
+##﹙
+##﹚
+##﹝
+##﹞
+##﹡
+##﹣
+##！
+##＂
+##＃
+##＄
+##％
+##＆
+##＇
+##（
+##）
+##＊
+##，
+##－
+##．
+##／
+##：
+##；
+##＜
+##？
+##＠
+##［
+##＼
+##］
+##＾
+##＿
+##｀
+##ｆ
+##ｈ
+##ｊ
+##ｕ
+##ｗ
+##ｚ
+##｛
+##｝
+##｡
+##｢
+##｣
+##､
+##･
+##ｯ
+##ｰ
+##ｲ
+##ｸ
+##ｼ
+##ｽ
+##ﾄ
+##ﾉ
+##ﾌ
+##ﾗ
+##ﾙ
+##ﾝ
+##ﾞ
+##ﾟ
+##￣
+##￥
+##👍
+##🔥
+##😂
+##😎
diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/configs/bert_large_config.json b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/configs/bert_large_config.json
new file mode 100644
index 000000000..60df298df
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/configs/bert_large_config.json
@@ -0,0 +1,13 @@
+{
+  "attention_probs_dropout_prob": 0.1,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 1024,
+  "initializer_range": 0.02,
+  "intermediate_size": 4096,
+  "max_position_embeddings": 512,
+  "num_attention_heads": 16,
+  "num_hidden_layers": 24,
+  "type_vocab_size": 2,
+  "vocab_size": 30522 
+}
diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/modelzoo_level.txt b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/modelzoo_level.txt
new file mode 100644
index 000000000..31529da2e
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/modelzoo_level.txt
@@ -0,0 +1,3 @@
+FuncStatus:OK
+PerfStatus:OK
+PrecisionStatus:OK
\ No newline at end of file
diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/requirements.txt b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/requirements.txt
new file mode 100644
index 000000000..e69de29bb
diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/__init__.py b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/__init__.py
new file mode 100644
index 000000000..effb57b1e
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/__init__.py
@@ -0,0 +1,15 @@
+# coding=utf-8
+# Copyright 2018 The Google AI Language Team Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/create_pretraining_data.py b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/create_pretraining_data.py
new file mode 100644
index 000000000..ec94c765b
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/create_pretraining_data.py
@@ -0,0 +1,457 @@
+# coding=utf-8
+# Copyright 2018 The Google AI Language Team Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+==============================================================================
+#
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Create masked LM/next sentence masked_lm TF examples for BERT."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import random
+import tokenization
+import tensorflow as tf
+
+flags = tf.flags
+
+FLAGS = flags.FLAGS
+
+flags.DEFINE_string("input_file", None,
+                    "Input raw text file (or comma-separated list of files).")
+
+flags.DEFINE_string(
+    "output_file", None,
+    "Output TF example file (or comma-separated list of files).")
+
+flags.DEFINE_string("vocab_file", None,
+                    "The vocabulary file that the BERT model was trained on.")
+
+flags.DEFINE_bool(
+    "do_lower_case", True,
+    "Whether to lower case the input text. Should be True for uncased "
+    "models and False for cased models.")
+
+flags.DEFINE_integer("max_seq_length", 128, "Maximum sequence length.")
+
+flags.DEFINE_integer("max_predictions_per_seq", 20,
+                     "Maximum number of masked LM predictions per sequence.")
+
+flags.DEFINE_integer("random_seed", 12345, "Random seed for data generation.")
+
+flags.DEFINE_integer(
+    "dupe_factor", 10,
+    "Number of times to duplicate the input data (with different masks).")
+
+flags.DEFINE_float("masked_lm_prob", 0.15, "Masked LM probability.")
+
+flags.DEFINE_float(
+    "short_seq_prob", 0.1,
+    "Probability of creating sequences which are shorter than the "
+    "maximum length.")
+
+
+class TrainingInstance(object):
+  """A single training instance (sentence pair)."""
+
+  def __init__(self, tokens, segment_ids, masked_lm_positions, masked_lm_labels,
+               is_random_next):
+    self.tokens = tokens
+    self.segment_ids = segment_ids
+    self.is_random_next = is_random_next
+    self.masked_lm_positions = masked_lm_positions
+    self.masked_lm_labels = masked_lm_labels
+
+  def __str__(self):
+    s = ""
+    s += "tokens: %s\n" % (" ".join(
+        [tokenization.printable_text(x) for x in self.tokens]))
+    s += "segment_ids: %s\n" % (" ".join([str(x) for x in self.segment_ids]))
+    s += "is_random_next: %s\n" % self.is_random_next
+    s += "masked_lm_positions: %s\n" % (" ".join(
+        [str(x) for x in self.masked_lm_positions]))
+    s += "masked_lm_labels: %s\n" % (" ".join(
+        [tokenization.printable_text(x) for x in self.masked_lm_labels]))
+    s += "\n"
+    return s
+
+  def __repr__(self):
+    return self.__str__()
+
+
+def write_instance_to_example_files(instances, tokenizer, max_seq_length,
+                                    max_predictions_per_seq, output_files):
+  """Create TF example files from `TrainingInstance`s."""
+  writers = []
+  for output_file in output_files:
+    writers.append(tf.python_io.TFRecordWriter(output_file))
+
+  writer_index = 0
+
+  total_written = 0
+  for (inst_index, instance) in enumerate(instances):
+    input_ids = tokenizer.convert_tokens_to_ids(instance.tokens)
+    input_mask = [1] * len(input_ids)
+    segment_ids = list(instance.segment_ids)
+    assert len(input_ids) <= max_seq_length
+
+    while len(input_ids) < max_seq_length:
+      input_ids.append(0)
+      input_mask.append(0)
+      segment_ids.append(0)
+
+    assert len(input_ids) == max_seq_length
+    assert len(input_mask) == max_seq_length
+    assert len(segment_ids) == max_seq_length
+
+    masked_lm_positions = list(instance.masked_lm_positions)
+    masked_lm_ids = tokenizer.convert_tokens_to_ids(instance.masked_lm_labels)
+    masked_lm_weights = [1.0] * len(masked_lm_ids)
+
+    while len(masked_lm_positions) < max_predictions_per_seq:
+      masked_lm_positions.append(0)
+      masked_lm_ids.append(0)
+      masked_lm_weights.append(0.0)
+
+    next_sentence_label = 1 if instance.is_random_next else 0
+
+    features = collections.OrderedDict()
+    features["input_ids"] = create_int_feature(input_ids)
+    features["input_mask"] = create_int_feature(input_mask)
+    features["segment_ids"] = create_int_feature(segment_ids)
+    features["masked_lm_positions"] = create_int_feature(masked_lm_positions)
+    features["masked_lm_ids"] = create_int_feature(masked_lm_ids)
+    features["masked_lm_weights"] = create_float_feature(masked_lm_weights)
+    features["next_sentence_labels"] = create_int_feature([next_sentence_label])
+
+    tf_example = tf.train.Example(features=tf.train.Features(feature=features))
+
+    writers[writer_index].write(tf_example.SerializeToString())
+    writer_index = (writer_index + 1) % len(writers)
+
+    total_written += 1
+
+    if inst_index < 20:
+      tf.logging.info("*** Example ***")
+      tf.logging.info("tokens: %s" % " ".join(
+          [tokenization.printable_text(x) for x in instance.tokens]))
+
+      for feature_name in features.keys():
+        feature = features[feature_name]
+        values = []
+        if feature.int64_list.value:
+          values = feature.int64_list.value
+        elif feature.float_list.value:
+          values = feature.float_list.value
+        tf.logging.info(
+            "%s: %s" % (feature_name, " ".join([str(x) for x in values])))
+
+  for writer in writers:
+    writer.close()
+
+  tf.logging.info("Wrote %d total instances", total_written)
+
+
+def create_int_feature(values):
+  feature = tf.train.Feature(int64_list=tf.train.Int64List(value=list(values)))
+  return feature
+
+
+def create_float_feature(values):
+  feature = tf.train.Feature(float_list=tf.train.FloatList(value=list(values)))
+  return feature
+
+
+def create_training_instances(input_files, tokenizer, max_seq_length,
+                              dupe_factor, short_seq_prob, masked_lm_prob,
+                              max_predictions_per_seq, rng):
+  """Create `TrainingInstance`s from raw text."""
+  all_documents = [[]]
+
+  # Input file format:
+  # (1) One sentence per line. These should ideally be actual sentences, not
+  # entire paragraphs or arbitrary spans of text. (Because we use the
+  # sentence boundaries for the "next sentence prediction" task).
+  # (2) Blank lines between documents. Document boundaries are needed so
+  # that the "next sentence prediction" task doesn't span between documents.
+  for input_file in input_files:
+    with tf.gfile.GFile(input_file, "r") as reader:
+      while True:
+        line = tokenization.convert_to_unicode(reader.readline())
+        if not line:
+          break
+        line = line.strip()
+
+        # Empty lines are used as document delimiters
+        if not line:
+          all_documents.append([])
+        tokens = tokenizer.tokenize(line)
+        if tokens:
+          all_documents[-1].append(tokens)
+
+  # Remove empty documents
+  all_documents = [x for x in all_documents if x]
+  rng.shuffle(all_documents)
+
+  vocab_words = list(tokenizer.vocab.keys())
+  instances = []
+  for _ in range(dupe_factor):
+    for document_index in range(len(all_documents)):
+      instances.extend(
+          create_instances_from_document(
+              all_documents, document_index, max_seq_length, short_seq_prob,
+              masked_lm_prob, max_predictions_per_seq, vocab_words, rng))
+
+  rng.shuffle(instances)
+  return instances
+
+
+def create_instances_from_document(
+    all_documents, document_index, max_seq_length, short_seq_prob,
+    masked_lm_prob, max_predictions_per_seq, vocab_words, rng):
+  """Creates `TrainingInstance`s for a single document."""
+  document = all_documents[document_index]
+
+  # Account for [CLS], [SEP], [SEP]
+  max_num_tokens = max_seq_length - 3
+
+  # We *usually* want to fill up the entire sequence since we are padding
+  # to `max_seq_length` anyways, so short sequences are generally wasted
+  # computation. However, we *sometimes*
+  # (i.e., short_seq_prob == 0.1 == 10% of the time) want to use shorter
+  # sequences to minimize the mismatch between pre-training and fine-tuning.
+  # The `target_seq_length` is just a rough target however, whereas
+  # `max_seq_length` is a hard limit.
+  target_seq_length = max_num_tokens
+  if rng.random() < short_seq_prob:
+    target_seq_length = rng.randint(2, max_num_tokens)
+
+  # We DON'T just concatenate all of the tokens from a document into a long
+  # sequence and choose an arbitrary split point because this would make the
+  # next sentence prediction task too easy. Instead, we split the input into
+  # segments "A" and "B" based on the actual "sentences" provided by the user
+  # input.
+  instances = []
+  current_chunk = []
+  current_length = 0
+  i = 0
+  while i < len(document):
+    segment = document[i]
+    current_chunk.append(segment)
+    current_length += len(segment)
+    if i == len(document) - 1 or current_length >= target_seq_length:
+      if current_chunk:
+        # `a_end` is how many segments from `current_chunk` go into the `A`
+        # (first) sentence.
+        a_end = 1
+        if len(current_chunk) >= 2:
+          a_end = rng.randint(1, len(current_chunk) - 1)
+
+        tokens_a = []
+        for j in range(a_end):
+          tokens_a.extend(current_chunk[j])
+
+        tokens_b = []
+        # Random next
+        is_random_next = False
+        if len(current_chunk) == 1 or rng.random() < 0.5:
+          is_random_next = True
+          target_b_length = target_seq_length - len(tokens_a)
+
+          # This should rarely go for more than one iteration for large
+          # corpora. However, just to be careful, we try to make sure that
+          # the random document is not the same as the document
+          # we're processing.
+          for _ in range(10):
+            random_document_index = rng.randint(0, len(all_documents) - 1)
+            if random_document_index != document_index:
+              break
+
+          random_document = all_documents[random_document_index]
+          random_start = rng.randint(0, len(random_document) - 1)
+          for j in range(random_start, len(random_document)):
+            tokens_b.extend(random_document[j])
+            if len(tokens_b) >= target_b_length:
+              break
+          # We didn't actually use these segments so we "put them back" so
+          # they don't go to waste.
+          num_unused_segments = len(current_chunk) - a_end
+          i -= num_unused_segments
+        # Actual next
+        else:
+          is_random_next = False
+          for j in range(a_end, len(current_chunk)):
+            tokens_b.extend(current_chunk[j])
+        truncate_seq_pair(tokens_a, tokens_b, max_num_tokens, rng)
+
+        assert len(tokens_a) >= 1
+        assert len(tokens_b) >= 1
+
+        tokens = []
+        segment_ids = []
+        tokens.append("[CLS]")
+        segment_ids.append(0)
+        for token in tokens_a:
+          tokens.append(token)
+          segment_ids.append(0)
+
+        tokens.append("[SEP]")
+        segment_ids.append(0)
+
+        for token in tokens_b:
+          tokens.append(token)
+          segment_ids.append(1)
+        tokens.append("[SEP]")
+        segment_ids.append(1)
+
+        (tokens, masked_lm_positions,
+         masked_lm_labels) = create_masked_lm_predictions(
+             tokens, masked_lm_prob, max_predictions_per_seq, vocab_words, rng)
+        instance = TrainingInstance(
+            tokens=tokens,
+            segment_ids=segment_ids,
+            is_random_next=is_random_next,
+            masked_lm_positions=masked_lm_positions,
+            masked_lm_labels=masked_lm_labels)
+        instances.append(instance)
+      current_chunk = []
+      current_length = 0
+    i += 1
+
+  return instances
+
+
+MaskedLmInstance = collections.namedtuple("MaskedLmInstance",
+                                          ["index", "label"])
+
+
+def create_masked_lm_predictions(tokens, masked_lm_prob,
+                                 max_predictions_per_seq, vocab_words, rng):
+  """Creates the predictions for the masked LM objective."""
+
+  cand_indexes = []
+  for (i, token) in enumerate(tokens):
+    if token == "[CLS]" or token == "[SEP]":
+      continue
+    cand_indexes.append(i)
+
+  rng.shuffle(cand_indexes)
+
+  output_tokens = list(tokens)
+
+  num_to_predict = min(max_predictions_per_seq,
+                       max(1, int(round(len(tokens) * masked_lm_prob))))
+
+  masked_lms = []
+  covered_indexes = set()
+  for index in cand_indexes:
+    if len(masked_lms) >= num_to_predict:
+      break
+    if index in covered_indexes:
+      continue
+    covered_indexes.add(index)
+
+    masked_token = None
+    # 80% of the time, replace with [MASK]
+    if rng.random() < 0.8:
+      masked_token = "[MASK]"
+    else:
+      # 10% of the time, keep original
+      if rng.random() < 0.5:
+        masked_token = tokens[index]
+      # 10% of the time, replace with random word
+      else:
+        masked_token = vocab_words[rng.randint(0, len(vocab_words) - 1)]
+
+    output_tokens[index] = masked_token
+
+    masked_lms.append(MaskedLmInstance(index=index, label=tokens[index]))
+
+  masked_lms = sorted(masked_lms, key=lambda x: x.index)
+
+  masked_lm_positions = []
+  masked_lm_labels = []
+  for p in masked_lms:
+    masked_lm_positions.append(p.index)
+    masked_lm_labels.append(p.label)
+
+  return (output_tokens, masked_lm_positions, masked_lm_labels)
+
+
+def truncate_seq_pair(tokens_a, tokens_b, max_num_tokens, rng):
+  """Truncates a pair of sequences to a maximum sequence length."""
+  while True:
+    total_length = len(tokens_a) + len(tokens_b)
+    if total_length <= max_num_tokens:
+      break
+
+    trunc_tokens = tokens_a if len(tokens_a) > len(tokens_b) else tokens_b
+    assert len(trunc_tokens) >= 1
+
+    # We want to sometimes truncate from the front and sometimes from the
+    # back to add more randomness and avoid biases.
+    if rng.random() < 0.5:
+      del trunc_tokens[0]
+    else:
+      trunc_tokens.pop()
+
+
+def main(_):
+  tf.logging.set_verbosity(tf.logging.INFO)
+
+  tokenizer = tokenization.FullTokenizer(
+      vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case)
+
+  input_files = []
+  for input_pattern in FLAGS.input_file.split(","):
+    input_files.extend(tf.gfile.Glob(input_pattern))
+
+  tf.logging.info("*** Reading from input files ***")
+  for input_file in input_files:
+    tf.logging.info("  %s", input_file)
+
+  rng = random.Random(FLAGS.random_seed)
+  instances = create_training_instances(
+      input_files, tokenizer, FLAGS.max_seq_length, FLAGS.dupe_factor,
+      FLAGS.short_seq_prob, FLAGS.masked_lm_prob, FLAGS.max_predictions_per_seq,
+      rng)
+
+  output_files = FLAGS.output_file.split(",")
+  tf.logging.info("*** Writing to output files ***")
+  for output_file in output_files:
+    tf.logging.info("  %s", output_file)
+
+  write_instance_to_example_files(instances, tokenizer, FLAGS.max_seq_length,
+                                  FLAGS.max_predictions_per_seq, output_files)
+
+
+if __name__ == "__main__":
+  flags.mark_flag_as_required("input_file")
+  flags.mark_flag_as_required("output_file")
+  flags.mark_flag_as_required("vocab_file")
+  tf.app.run()
diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/extract_features.py b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/extract_features.py
new file mode 100644
index 000000000..60e3830a9
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/extract_features.py
@@ -0,0 +1,419 @@
+# coding=utf-8
+# Copyright 2018 The Google AI Language Team Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Extract pre-computed feature vectors from BERT."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import codecs
+import collections
+import json
+import re
+
+import modeling
+import tokenization
+import tensorflow as tf
+
+flags = tf.flags
+
+FLAGS = flags.FLAGS
+
+flags.DEFINE_string("input_file", None, "")
+
+flags.DEFINE_string("output_file", None, "")
+
+flags.DEFINE_string("layers", "-1,-2,-3,-4", "")
+
+flags.DEFINE_string(
+    "bert_config_file", None,
+    "The config json file corresponding to the pre-trained BERT model. "
+    "This specifies the model architecture.")
+
+flags.DEFINE_integer(
+    "max_seq_length", 128,
+    "The maximum total input sequence length after WordPiece tokenization. "
+    "Sequences longer than this will be truncated, and sequences shorter "
+    "than this will be padded.")
+
+flags.DEFINE_string(
+    "init_checkpoint", None,
+    "Initial checkpoint (usually from a pre-trained BERT model).")
+
+flags.DEFINE_string("vocab_file", None,
+                    "The vocabulary file that the BERT model was trained on.")
+
+flags.DEFINE_bool(
+    "do_lower_case", True,
+    "Whether to lower case the input text. Should be True for uncased "
+    "models and False for cased models.")
+
+flags.DEFINE_integer("batch_size", 32, "Batch size for predictions.")
+
+flags.DEFINE_bool("use_tpu", False, "Whether to use TPU or GPU/CPU.")
+
+flags.DEFINE_string("master", None,
+                    "If using a TPU, the address of the master.")
+
+flags.DEFINE_integer(
+    "num_tpu_cores", 8,
+    "Only used if `use_tpu` is True. Total number of TPU cores to use.")
+
+flags.DEFINE_bool(
+    "use_one_hot_embeddings", False,
+    "If True, tf.one_hot will be used for embedding lookups, otherwise "
+    "tf.nn.embedding_lookup will be used. On TPUs, this should be True "
+    "since it is much faster.")
+
+
+class InputExample(object):
+
+  def __init__(self, unique_id, text_a, text_b):
+    self.unique_id = unique_id
+    self.text_a = text_a
+    self.text_b = text_b
+
+
+class InputFeatures(object):
+  """A single set of features of data."""
+
+  def __init__(self, unique_id, tokens, input_ids, input_mask, input_type_ids):
+    self.unique_id = unique_id
+    self.tokens = tokens
+    self.input_ids = input_ids
+    self.input_mask = input_mask
+    self.input_type_ids = input_type_ids
+
+
+def input_fn_builder(features, seq_length):
+  """Creates an `input_fn` closure to be passed to TPUEstimator."""
+
+  all_unique_ids = []
+  all_input_ids = []
+  all_input_mask = []
+  all_input_type_ids = []
+
+  for feature in features:
+    all_unique_ids.append(feature.unique_id)
+    all_input_ids.append(feature.input_ids)
+    all_input_mask.append(feature.input_mask)
+    all_input_type_ids.append(feature.input_type_ids)
+
+  def input_fn(params):
+    """The actual input function."""
+    batch_size = params["batch_size"]
+
+    num_examples = len(features)
+
+    # This is for demo purposes and does NOT scale to large data sets. We do
+    # not use Dataset.from_generator() because that uses tf.py_func which is
+    # not TPU compatible. The right way to load data is with TFRecordReader.
+    d = tf.data.Dataset.from_tensor_slices({
+        "unique_ids":
+            tf.constant(all_unique_ids, shape=[num_examples], dtype=tf.int32),
+        "input_ids":
+            tf.constant(
+                all_input_ids, shape=[num_examples, seq_length],
+                dtype=tf.int32),
+        "input_mask":
+            tf.constant(
+                all_input_mask,
+                shape=[num_examples, seq_length],
+                dtype=tf.int32),
+        "input_type_ids":
+            tf.constant(
+                all_input_type_ids,
+                shape=[num_examples, seq_length],
+                dtype=tf.int32),
+    })
+
+    d = d.batch(batch_size=batch_size, drop_remainder=False)
+    return d
+
+  return input_fn
+
+
+def model_fn_builder(bert_config, init_checkpoint, layer_indexes, use_tpu,
+                     use_one_hot_embeddings):
+  """Returns `model_fn` closure for TPUEstimator."""
+
+  def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
+    """The `model_fn` for TPUEstimator."""
+
+    unique_ids = features["unique_ids"]
+    input_ids = features["input_ids"]
+    input_mask = features["input_mask"]
+    input_type_ids = features["input_type_ids"]
+
+    model = modeling.BertModel(
+        config=bert_config,
+        is_training=False,
+        input_ids=input_ids,
+        input_mask=input_mask,
+        token_type_ids=input_type_ids,
+        use_one_hot_embeddings=use_one_hot_embeddings)
+
+    if mode != tf.estimator.ModeKeys.PREDICT:
+      raise ValueError("Only PREDICT modes are supported: %s" % (mode))
+
+    tvars = tf.trainable_variables()
+    scaffold_fn = None
+    (assignment_map,
+     initialized_variable_names) = modeling.get_assignment_map_from_checkpoint(
+         tvars, init_checkpoint)
+    if use_tpu:
+
+      def tpu_scaffold():
+        tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
+        return tf.train.Scaffold()
+
+      scaffold_fn = tpu_scaffold
+    else:
+      tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
+
+    tf.logging.info("**** Trainable Variables ****")
+    for var in tvars:
+      init_string = ""
+      if var.name in initialized_variable_names:
+        init_string = ", *INIT_FROM_CKPT*"
+      tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
+                      init_string)
+
+    all_layers = model.get_all_encoder_layers()
+
+    predictions = {
+        "unique_id": unique_ids,
+    }
+
+    for (i, layer_index) in enumerate(layer_indexes):
+      predictions["layer_output_%d" % i] = all_layers[layer_index]
+
+    output_spec = tf.contrib.tpu.TPUEstimatorSpec(
+        mode=mode, predictions=predictions, scaffold_fn=scaffold_fn)
+    return output_spec
+
+  return model_fn
+
+
+def convert_examples_to_features(examples, seq_length, tokenizer):
+  """Loads a data file into a list of `InputBatch`s."""
+
+  features = []
+  for (ex_index, example) in enumerate(examples):
+    tokens_a = tokenizer.tokenize(example.text_a)
+
+    tokens_b = None
+    if example.text_b:
+      tokens_b = tokenizer.tokenize(example.text_b)
+
+    if tokens_b:
+      # Modifies `tokens_a` and `tokens_b` in place so that the total
+      # length is less than the specified length.
+      # Account for [CLS], [SEP], [SEP] with "- 3"
+      _truncate_seq_pair(tokens_a, tokens_b, seq_length - 3)
+    else:
+      # Account for [CLS] and [SEP] with "- 2"
+      if len(tokens_a) > seq_length - 2:
+        tokens_a = tokens_a[0:(seq_length - 2)]
+
+    # The convention in BERT is:
+    # (a) For sequence pairs:
+    #  tokens:   [CLS] is this jack ##son ##ville ? [SEP] no it is not . [SEP]
+    #  type_ids: 0     0  0    0    0     0       0 0     1  1  1  1   1 1
+    # (b) For single sequences:
+    #  tokens:   [CLS] the dog is hairy . [SEP]
+    #  type_ids: 0     0   0   0  0     0 0
+    #
+    # Where "type_ids" are used to indicate whether this is the first
+    # sequence or the second sequence. The embedding vectors for `type=0` and
+    # `type=1` were learned during pre-training and are added to the wordpiece
+    # embedding vector (and position vector). This is not *strictly* necessary
+    # since the [SEP] token unambiguously separates the sequences, but it makes
+    # it easier for the model to learn the concept of sequences.
+    #
+    # For classification tasks, the first vector (corresponding to [CLS]) is
+    # used as as the "sentence vector". Note that this only makes sense because
+    # the entire model is fine-tuned.
+    tokens = []
+    input_type_ids = []
+    tokens.append("[CLS]")
+    input_type_ids.append(0)
+    for token in tokens_a:
+      tokens.append(token)
+      input_type_ids.append(0)
+    tokens.append("[SEP]")
+    input_type_ids.append(0)
+
+    if tokens_b:
+      for token in tokens_b:
+        tokens.append(token)
+        input_type_ids.append(1)
+      tokens.append("[SEP]")
+      input_type_ids.append(1)
+
+    input_ids = tokenizer.convert_tokens_to_ids(tokens)
+
+    # The mask has 1 for real tokens and 0 for padding tokens. Only real
+    # tokens are attended to.
+    input_mask = [1] * len(input_ids)
+
+    # Zero-pad up to the sequence length.
+    while len(input_ids) < seq_length:
+      input_ids.append(0)
+      input_mask.append(0)
+      input_type_ids.append(0)
+
+    assert len(input_ids) == seq_length
+    assert len(input_mask) == seq_length
+    assert len(input_type_ids) == seq_length
+
+    if ex_index < 5:
+      tf.logging.info("*** Example ***")
+      tf.logging.info("unique_id: %s" % (example.unique_id))
+      tf.logging.info("tokens: %s" % " ".join(
+          [tokenization.printable_text(x) for x in tokens]))
+      tf.logging.info("input_ids: %s" % " ".join([str(x) for x in input_ids]))
+      tf.logging.info("input_mask: %s" % " ".join([str(x) for x in input_mask]))
+      tf.logging.info(
+          "input_type_ids: %s" % " ".join([str(x) for x in input_type_ids]))
+
+    features.append(
+        InputFeatures(
+            unique_id=example.unique_id,
+            tokens=tokens,
+            input_ids=input_ids,
+            input_mask=input_mask,
+            input_type_ids=input_type_ids))
+  return features
+
+
+def _truncate_seq_pair(tokens_a, tokens_b, max_length):
+  """Truncates a sequence pair in place to the maximum length."""
+
+  # This is a simple heuristic which will always truncate the longer sequence
+  # one token at a time. This makes more sense than truncating an equal percent
+  # of tokens from each, since if one sequence is very short then each token
+  # that's truncated likely contains more information than a longer sequence.
+  while True:
+    total_length = len(tokens_a) + len(tokens_b)
+    if total_length <= max_length:
+      break
+    if len(tokens_a) > len(tokens_b):
+      tokens_a.pop()
+    else:
+      tokens_b.pop()
+
+
+def read_examples(input_file):
+  """Read a list of `InputExample`s from an input file."""
+  examples = []
+  unique_id = 0
+  with tf.gfile.GFile(input_file, "r") as reader:
+    while True:
+      line = tokenization.convert_to_unicode(reader.readline())
+      if not line:
+        break
+      line = line.strip()
+      text_a = None
+      text_b = None
+      m = re.match(r"^(.*) \|\|\| (.*)$", line)
+      if m is None:
+        text_a = line
+      else:
+        text_a = m.group(1)
+        text_b = m.group(2)
+      examples.append(
+          InputExample(unique_id=unique_id, text_a=text_a, text_b=text_b))
+      unique_id += 1
+  return examples
+
+
+def main(_):
+  tf.logging.set_verbosity(tf.logging.INFO)
+
+  layer_indexes = [int(x) for x in FLAGS.layers.split(",")]
+
+  bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file)
+
+  tokenizer = tokenization.FullTokenizer(
+      vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case)
+
+  is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2
+  run_config = tf.contrib.tpu.RunConfig(
+      master=FLAGS.master,
+      tpu_config=tf.contrib.tpu.TPUConfig(
+          num_shards=FLAGS.num_tpu_cores,
+          per_host_input_for_training=is_per_host))
+
+  examples = read_examples(FLAGS.input_file)
+
+  features = convert_examples_to_features(
+      examples=examples, seq_length=FLAGS.max_seq_length, tokenizer=tokenizer)
+
+  unique_id_to_feature = {}
+  for feature in features:
+    unique_id_to_feature[feature.unique_id] = feature
+
+  model_fn = model_fn_builder(
+      bert_config=bert_config,
+      init_checkpoint=FLAGS.init_checkpoint,
+      layer_indexes=layer_indexes,
+      use_tpu=FLAGS.use_tpu,
+      use_one_hot_embeddings=FLAGS.use_one_hot_embeddings)
+
+  # If TPU is not available, this will fall back to normal Estimator on CPU
+  # or GPU.
+  estimator = tf.contrib.tpu.TPUEstimator(
+      use_tpu=FLAGS.use_tpu,
+      model_fn=model_fn,
+      config=run_config,
+      predict_batch_size=FLAGS.batch_size)
+
+  input_fn = input_fn_builder(
+      features=features, seq_length=FLAGS.max_seq_length)
+
+  with codecs.getwriter("utf-8")(tf.gfile.Open(FLAGS.output_file,
+                                               "w")) as writer:
+    for result in estimator.predict(input_fn, yield_single_examples=True):
+      unique_id = int(result["unique_id"])
+      feature = unique_id_to_feature[unique_id]
+      output_json = collections.OrderedDict()
+      output_json["linex_index"] = unique_id
+      all_features = []
+      for (i, token) in enumerate(feature.tokens):
+        all_layers = []
+        for (j, layer_index) in enumerate(layer_indexes):
+          layer_output = result["layer_output_%d" % j]
+          layers = collections.OrderedDict()
+          layers["index"] = layer_index
+          layers["values"] = [
+              round(float(x), 6) for x in layer_output[i:(i + 1)].flat
+          ]
+          all_layers.append(layers)
+        features = collections.OrderedDict()
+        features["token"] = token
+        features["layers"] = all_layers
+        all_features.append(features)
+      output_json["features"] = all_features
+      writer.write(json.dumps(output_json) + "\n")
+
+
+if __name__ == "__main__":
+  flags.mark_flag_as_required("input_file")
+  flags.mark_flag_as_required("vocab_file")
+  flags.mark_flag_as_required("bert_config_file")
+  flags.mark_flag_as_required("init_checkpoint")
+  flags.mark_flag_as_required("output_file")
+  tf.app.run()
diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/fp16_utils.py b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/fp16_utils.py
new file mode 100644
index 000000000..6b8bda985
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/fp16_utils.py
@@ -0,0 +1,35 @@
+# coding=utf-8
+# Copyright 2018 The Google AI Language Team Authors.
+# Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import tensorflow as tf
+import numpy as np
+
+
+def float32_variable_storage_getter(getter, name, shape=None, dtype=None,
+                                    initializer=None, regularizer=None,
+                                    trainable=True,
+                                    *args, **kwargs):
+    """Custom variable getter that forces trainable variables to be stored in
+       float32 precision and then casts them to the training precision.
+    """
+    storage_dtype = tf.float32 if trainable else dtype
+    variable = getter(name, shape, dtype=storage_dtype,
+                      initializer=initializer, regularizer=regularizer,
+                      trainable=trainable,
+                      *args, **kwargs)
+    if trainable and dtype != tf.float32:
+        variable = tf.cast(variable, dtype)
+    return variable
+
diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/fused_layer_norm.py b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/fused_layer_norm.py
new file mode 100644
index 000000000..ff6e13701
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/fused_layer_norm.py
@@ -0,0 +1,141 @@
+# coding=utf-8
+# Copyright 2018 The Google AI Language Team Authors.
+# Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import collections
+import copy
+import json
+import math
+import re
+import six
+import tensorflow as tf
+
+from tensorflow.python.framework import ops
+from tensorflow.contrib.layers.python.layers import utils
+from tensorflow.contrib.framework.python.ops import variables
+from tensorflow.python.ops import init_ops
+import numpy
+from tensorflow.python.ops import array_ops
+from tensorflow.python.framework import dtypes
+from tensorflow.python.ops import nn
+
+def fused_layer_norm(inputs,
+               center=True,
+               scale=True,
+               activation_fn=None,
+               reuse=None,
+               variables_collections=None,
+               outputs_collections=None,
+               trainable=True,
+               begin_norm_axis=1,
+               begin_params_axis=-1,
+               scope=None,
+               use_fused_batch_norm=False):
+  with tf.variable_scope(
+      scope, 'LayerNorm', [inputs], reuse=reuse) as sc:
+    inputs = ops.convert_to_tensor(inputs)
+    inputs_shape = inputs.shape
+    inputs_rank = inputs_shape.ndims
+    if inputs_rank is None:
+      raise ValueError('Inputs %s has undefined rank.' % inputs.name)
+    dtype = inputs.dtype.base_dtype
+    if begin_norm_axis < 0:
+      begin_norm_axis = inputs_rank + begin_norm_axis
+    if begin_params_axis >= inputs_rank or begin_norm_axis >= inputs_rank:
+      raise ValueError('begin_params_axis (%d) and begin_norm_axis (%d) '
+                       'must be < rank(inputs) (%d)' %
+                       (begin_params_axis, begin_norm_axis, inputs_rank))
+    params_shape = inputs_shape[begin_params_axis:]
+    if not params_shape.is_fully_defined():
+      raise ValueError(
+          'Inputs %s: shape(inputs)[%s:] is not fully defined: %s' %
+          (inputs.name, begin_params_axis, inputs_shape))
+    # Allocate parameters for the beta and gamma of the normalization.
+    beta, gamma = None, None
+    if center:
+      beta_collections = utils.get_variable_collections(variables_collections,
+                                                        'beta')
+      beta = variables.model_variable(
+          'beta',
+          shape=params_shape,
+          dtype=dtype,
+          initializer=init_ops.zeros_initializer(),
+          collections=beta_collections,
+          trainable=trainable)
+    if scale:
+      gamma_collections = utils.get_variable_collections(
+          variables_collections, 'gamma')
+      gamma = variables.model_variable(
+          'gamma',
+          shape=params_shape,
+          dtype=dtype,
+          initializer=init_ops.ones_initializer(),
+          collections=gamma_collections,
+          trainable=trainable)
+    if use_fused_batch_norm:
+      # get static TensorShape if fully defined,
+      # otherwise retrieve shape tensor
+      norm_shape = inputs.shape[begin_norm_axis:]
+      if norm_shape.is_fully_defined():
+        bn_shape = [1, -1, 1, numpy.prod(norm_shape.as_list())]
+      else:
+        norm_shape = tf.shape(inputs)[begin_norm_axis:]
+        bn_shape = [1, -1, 1, tf.reduce_prod(norm_shape)]
+      if inputs.get_shape().is_fully_defined():
+        outputs_shape = inputs.get_shape()
+      else:
+        outputs_shape = tf.shape(inputs)
+      inputs = array_ops.reshape(inputs, bn_shape)
+      if inputs.get_shape().is_fully_defined():
+        # static inputs TensorShape fully defined after reshape.
+        ones = array_ops.ones(inputs.get_shape()[1], dtype=dtypes.float32)
+        zeros = array_ops.zeros(inputs.get_shape()[1], dtype=dtypes.float32)
+      else:
+        # static inputs TensorShape NOT fully defined after reshape.
+        # must use dynamic shape, which means these input tensors
+        # have to be created at runtime, which causes a slowdown.
+        scale_shape = tf.shape(inputs)[1]
+        ones = array_ops.ones(scale_shape, dtype=dtypes.float32)
+        zeros = array_ops.zeros(scale_shape, dtype=dtypes.float32)
+      outputs, mean, variance = nn.fused_batch_norm(
+          inputs,
+          ones, zeros,
+          epsilon=1e-4,
+          data_format="NCHW")
+      outputs = array_ops.reshape(outputs, outputs_shape)
+      if center and scale:
+        outputs = outputs * gamma + beta
+      elif center:
+        outputs = outputs + beta
+      elif scale:
+        outputs = outputs * gamma
+    else:
+      # Calculate the moments on the last axis (layer activations).
+      norm_axes = list(range(begin_norm_axis, inputs_rank))
+      mean, variance = nn.moments(inputs, norm_axes, keep_dims=True)
+      # Compute layer normalization using the batch_normalization function.
+      variance_epsilon = 1e-4
+      outputs = nn.batch_normalization(
+          inputs,
+          mean,
+          variance,
+          offset=beta,
+          scale=gamma,
+          variance_epsilon=variance_epsilon)
+      outputs.set_shape(inputs_shape)
+    if activation_fn is not None:
+      outputs = activation_fn(outputs)
+    return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
+
diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/gpu_environment.py b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/gpu_environment.py
new file mode 100644
index 000000000..948c3fa44
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/gpu_environment.py
@@ -0,0 +1,36 @@
+# coding=utf-8
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import tensorflow as tf
+import numpy as np
+
+def float32_variable_storage_getter(getter, name, shape=None, dtype=None,
+                                    initializer=None, regularizer=None,
+                                    trainable=True,
+                                    *args, **kwargs):
+    """Custom variable getter that forces trainable variables to be stored in
+       float32 precision and then casts them to the training precision.
+    """
+    storage_dtype = tf.float32 if trainable else dtype
+    variable = getter(name, shape, dtype=storage_dtype,
+                      initializer=initializer, regularizer=regularizer,
+                      trainable=trainable,
+                      *args, **kwargs)
+    if trainable and dtype != tf.float32:
+        variable = tf.cast(variable, dtype)
+    return variable
+
+def get_custom_getter(compute_type):
+    return float32_variable_storage_getter if compute_type == tf.float16 else None
diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/modeling.py b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/modeling.py
new file mode 100644
index 000000000..95a8eda8d
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/modeling.py
@@ -0,0 +1,1031 @@
+# coding=utf-8
+# Copyright 2018 The Google AI Language Team Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""The main BERT model and related functions."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import copy
+import json
+import math
+import re
+import numpy as np
+import six
+import tensorflow as tf
+
+from gpu_environment import get_custom_getter
+
+from npu_bridge.estimator.npu_unary_ops import npu_unary_ops
+from npu_bridge.estimator import npu_ops
+
+class BertConfig(object):
+  """Configuration for `BertModel`."""
+
+  def __init__(self,
+               vocab_size,
+               hidden_size=768,
+               num_hidden_layers=12,
+               num_attention_heads=12,
+               intermediate_size=3072,
+               hidden_act="gelu",
+               hidden_dropout_prob=0.1,
+               attention_probs_dropout_prob=0.1,
+               max_position_embeddings=512,
+               type_vocab_size=16,
+               initializer_range=0.02):
+    """Constructs BertConfig.
+
+    Args:
+      vocab_size: Vocabulary size of `inputs_ids` in `BertModel`.
+      hidden_size: Size of the encoder layers and the pooler layer.
+      num_hidden_layers: Number of hidden layers in the Transformer encoder.
+      num_attention_heads: Number of attention heads for each attention layer in
+        the Transformer encoder.
+      intermediate_size: The size of the "intermediate" (i.e., feed-forward)
+        layer in the Transformer encoder.
+      hidden_act: The non-linear activation function (function or string) in the
+        encoder and pooler.
+      hidden_dropout_prob: The dropout probability for all fully connected
+        layers in the embeddings, encoder, and pooler.
+      attention_probs_dropout_prob: The dropout ratio for the attention
+        probabilities.
+      max_position_embeddings: The maximum sequence length that this model might
+        ever be used with. Typically set this to something large just in case
+        (e.g., 512 or 1024 or 2048).
+      type_vocab_size: The vocabulary size of the `token_type_ids` passed into
+        `BertModel`.
+      initializer_range: The stdev of the truncated_normal_initializer for
+        initializing all weight matrices.
+    """
+    self.vocab_size = vocab_size
+    self.hidden_size = hidden_size
+    self.num_hidden_layers = num_hidden_layers
+    self.num_attention_heads = num_attention_heads
+    self.hidden_act = hidden_act
+    self.intermediate_size = intermediate_size
+    self.hidden_dropout_prob = hidden_dropout_prob
+    self.attention_probs_dropout_prob = attention_probs_dropout_prob
+    self.max_position_embeddings = max_position_embeddings
+    self.type_vocab_size = type_vocab_size
+    self.initializer_range = initializer_range
+
+  @classmethod
+  def from_dict(cls, json_object):
+    """Constructs a `BertConfig` from a Python dictionary of parameters."""
+    config = BertConfig(vocab_size=None)
+    for (key, value) in six.iteritems(json_object):
+      config.__dict__[key] = value
+    return config
+
+  @classmethod
+  def from_json_file(cls, json_file):
+    """Constructs a `BertConfig` from a json file of parameters."""
+    with tf.gfile.GFile(json_file, "r") as reader:
+      text = reader.read()
+    return cls.from_dict(json.loads(text))
+
+  def to_dict(self):
+    """Serializes this instance to a Python dictionary."""
+    output = copy.deepcopy(self.__dict__)
+    return output
+
+  def to_json_string(self):
+    """Serializes this instance to a JSON string."""
+    return json.dumps(self.to_dict(), indent=2, sort_keys=True) + "\n"
+
+
+class BertModel(object):
+  """BERT model ("Bidirectional Encoder Representations from Transformers").
+
+  Example usage:
+
+  ```python
+  # Already been converted into WordPiece token ids
+  input_ids = tf.constant([[31, 51, 99], [15, 5, 0]])
+  input_mask = tf.constant([[1, 1, 1], [1, 1, 0]])
+  token_type_ids = tf.constant([[0, 0, 1], [0, 2, 0]])
+
+  config = modeling.BertConfig(vocab_size=32000, hidden_size=512,
+    num_hidden_layers=8, num_attention_heads=6, intermediate_size=1024)
+
+  model = modeling.BertModel(config=config, is_training=True,
+    input_ids=input_ids, input_mask=input_mask, token_type_ids=token_type_ids)
+
+  label_embeddings = tf.get_variable(...)
+  pooled_output = model.get_pooled_output()
+  logits = tf.matmul(pooled_output, label_embeddings)
+  ...
+  ```
+  """
+
+  def __init__(self,
+               config,
+               is_training,
+               input_ids,
+               input_mask=None,
+               token_type_ids=None,
+               use_one_hot_embeddings=False,
+               scope=None,
+               compute_type=tf.float32):
+    """Constructor for BertModel.
+
+    Args:
+      config: `BertConfig` instance.
+      is_training: bool. true for training model, false for eval model. Controls
+        whether dropout will be applied.
+      input_ids: int32 Tensor of shape [batch_size, seq_length].
+      input_mask: (optional) int32 Tensor of shape [batch_size, seq_length].
+      token_type_ids: (optional) int32 Tensor of shape [batch_size, seq_length].
+      use_one_hot_embeddings: (optional) bool. Whether to use one-hot word
+        embeddings or tf.embedding_lookup() for the word embeddings. On the TPU,
+        it is much faster if this is True, on the CPU or GPU, it is faster if
+        this is False.
+      scope: (optional) variable scope. Defaults to "bert".
+      compute_type: (optional) either float32 or float16. Only applies to GPUs.
+
+    Raises:
+      ValueError: The config is invalid or one of the input tensor shapes
+        is invalid.
+    """
+    config = copy.deepcopy(config)
+    if not is_training:
+      config.hidden_dropout_prob = 0.0
+      config.attention_probs_dropout_prob = 0.0
+
+    input_shape = get_shape_list(input_ids, expected_rank=2)
+    batch_size = input_shape[0]
+    seq_length = input_shape[1]
+
+    if input_mask is None:
+      input_mask = tf.ones(shape=[batch_size, seq_length], dtype=tf.int32)
+
+    if token_type_ids is None:
+      token_type_ids = tf.zeros(shape=[batch_size, seq_length], dtype=tf.int32)
+
+    with tf.variable_scope(scope, default_name="bert", custom_getter=get_custom_getter(compute_type)):
+      with tf.variable_scope("embeddings"):
+        # For good convergence with mixed precision training,
+	# it is important that the embedding codes remain fp32.
+        # Perform embedding lookup on the word ids.
+        (self.embedding_output, self.embedding_table) = embedding_lookup(
+            input_ids=input_ids,
+            vocab_size=config.vocab_size,
+            embedding_size=config.hidden_size,
+            initializer_range=config.initializer_range,
+            word_embedding_name="word_embeddings",
+            use_one_hot_embeddings=use_one_hot_embeddings)
+
+        # Add positional embeddings and token type embeddings, then layer
+        # normalize and perform dropout.
+        self.embedding_output = embedding_postprocessor(
+            input_tensor=self.embedding_output,
+            use_token_type=True,
+            token_type_ids=token_type_ids,
+            token_type_vocab_size=config.type_vocab_size,
+            token_type_embedding_name="token_type_embeddings",
+            use_position_embeddings=True,
+            position_embedding_name="position_embeddings",
+            initializer_range=config.initializer_range,
+            max_position_embeddings=config.max_position_embeddings,
+            dropout_prob=config.hidden_dropout_prob,
+            use_one_hot_embeddings=use_one_hot_embeddings)
+
+      with tf.variable_scope("encoder"):
+        # This converts a 2D mask of shape [batch_size, seq_length] to a 3D
+        # mask of shape [batch_size, seq_length, seq_length] which is used
+        # for the attention scores.
+        attention_mask = create_attention_mask_from_input_mask(
+            input_ids, input_mask)
+
+        # Run the stacked transformer.
+        # `sequence_output` shape = [batch_size, seq_length, hidden_size].
+        self.all_encoder_layers = transformer_model(
+            input_tensor=tf.saturate_cast(self.embedding_output, compute_type),
+            attention_mask=attention_mask,
+            hidden_size=config.hidden_size,
+            num_hidden_layers=config.num_hidden_layers,
+            num_attention_heads=config.num_attention_heads,
+            intermediate_size=config.intermediate_size,
+            intermediate_act_fn=get_activation(config.hidden_act),
+            hidden_dropout_prob=config.hidden_dropout_prob,
+            attention_probs_dropout_prob=config.attention_probs_dropout_prob,
+            initializer_range=config.initializer_range,
+            do_return_all_layers=True)
+      self.sequence_output = tf.cast(self.all_encoder_layers[-1], tf.float32)
+      # The "pooler" converts the encoded sequence tensor of shape
+      # [batch_size, seq_length, hidden_size] to a tensor of shape
+      # [batch_size, hidden_size]. This is necessary for segment-level
+      # (or segment-pair-level) classification tasks where we need a fixed
+      # dimensional representation of the segment.
+      with tf.variable_scope("pooler"):
+        # We "pool" the model by simply taking the hidden state corresponding
+        # to the first token. We assume that this has been pre-trained
+        first_token_tensor = tf.squeeze(self.sequence_output[:, 0:1, :], axis=1)
+        if tf.flags.FLAGS.use_fp16_cls:
+          first_token_tensor = tf.cast(first_token_tensor, tf.float16)
+        self_pooled_output = tf.layers.dense(
+            first_token_tensor,
+            config.hidden_size,
+            activation=tf.tanh,
+            kernel_initializer=create_initializer(config.initializer_range))
+        self.pooled_output = tf.cast(self_pooled_output, tf.float32)
+
+  def get_pooled_output(self):
+    return self.pooled_output
+
+  def get_sequence_output(self):
+    """Gets final hidden layer of encoder.
+
+    Returns:
+      float Tensor of shape [batch_size, seq_length, hidden_size] corresponding
+      to the final hidden of the transformer encoder.
+    """
+    return self.sequence_output
+
+  def get_all_encoder_layers(self):
+    return self.all_encoder_layers
+
+  def get_embedding_output(self):
+    """Gets output of the embedding lookup (i.e., input to the transformer).
+
+    Returns:
+      float Tensor of shape [batch_size, seq_length, hidden_size] corresponding
+      to the output of the embedding layer, after summing the word
+      embeddings with the positional embeddings and the token type embeddings,
+      then performing layer normalization. This is the input to the transformer.
+    """
+    return self.embedding_output
+
+  def get_embedding_table(self):
+    return self.embedding_table
+
+
+def gelu(x):
+  """Gaussian Error Linear Unit.
+
+  This is a smoother version of the RELU.
+  Original paper: https://arxiv.org/abs/1606.08415
+  Args:
+    x: float Tensor to perform activation.
+
+  Returns:
+    `x` with the GELU activation applied.
+  """
+
+  if tf.flags.FLAGS.npu_bert_fused_gelu:
+    return npu_unary_ops.gelu(x)
+  else:
+    cdf = 0.5 * (1.0 + tf.tanh(
+        (np.sqrt(2 / np.pi) * (x + 0.044715 * tf.pow(x, 3)))))
+    return x * cdf
+
+
+
+def get_activation(activation_string):
+  """Maps a string to a Python function, e.g., "relu" => `tf.nn.relu`.
+
+  Args:
+    activation_string: String name of the activation function.
+
+  Returns:
+    A Python function corresponding to the activation function. If
+    `activation_string` is None, empty, or "linear", this will return None.
+    If `activation_string` is not a string, it will return `activation_string`.
+
+  Raises:
+    ValueError: The `activation_string` does not correspond to a known
+      activation.
+  """
+
+  # We assume that anything that"s not a string is already an activation
+  # function, so we just return it.
+  if not isinstance(activation_string, six.string_types):
+    return activation_string
+
+  if not activation_string:
+    return None
+
+  act = activation_string.lower()
+  if act == "linear":
+    return None
+  elif act == "relu":
+    return tf.nn.relu
+  elif act == "gelu":
+    return gelu
+  elif act == "tanh":
+    return tf.tanh
+  else:
+    raise ValueError("Unsupported activation: %s" % act)
+
+
+def get_assignment_map_from_checkpoint(tvars, init_checkpoint):
+  """Compute the union of the current variables and checkpoint variables."""
+  assignment_map = {}
+  initialized_variable_names = {}
+
+  name_to_variable = collections.OrderedDict()
+  for var in tvars:
+    name = var.name
+    m = re.match("^(.*):\\d+$", name)
+    if m is not None:
+      name = m.group(1)
+    name_to_variable[name] = var
+
+  init_vars = tf.train.list_variables(init_checkpoint)
+
+  assignment_map = collections.OrderedDict()
+  for x in init_vars:
+    (name, var) = (x[0], x[1])
+    if name not in name_to_variable:
+      continue
+    assignment_map[name] = name
+    initialized_variable_names[name] = 1
+    initialized_variable_names[name + ":0"] = 1
+
+  return (assignment_map, initialized_variable_names)
+
+
+def dropout(input_tensor, dropout_prob):
+  """Perform dropout.
+
+  Args:
+    input_tensor: float Tensor.
+    dropout_prob: Python float. The probability of dropping out a value (NOT of
+      *keeping* a dimension as in `tf.nn.dropout`).
+
+  Returns:
+    A version of `input_tensor` with dropout applied.
+  """
+
+  if tf.flags.FLAGS.npu_bert_debug:
+    return input_tensor
+
+  if dropout_prob is None or dropout_prob == 0.0:
+    return input_tensor
+
+  if tf.flags.FLAGS.npu_bert_npu_dropout:
+    output = npu_ops.dropout(input_tensor, 1.0 - dropout_prob)
+  else:
+    output = tf.nn.dropout(input_tensor, 1.0 - dropout_prob)
+  return output
+
+
+def layer_norm(input_tensor, name=None):
+  """Run layer normalization on the last dimension of the tensor."""
+  if input_tensor.dtype == tf.float16:
+    try:
+      from fused_layer_norm import fused_layer_norm
+      return fused_layer_norm(
+          inputs=input_tensor, begin_norm_axis=-1, begin_params_axis=-1, scope=name,
+          use_fused_batch_norm=True)
+    except ImportError:
+      return tf.contrib.layers.layer_norm(
+          inputs=input_tensor, begin_norm_axis=-1, begin_params_axis=-1, scope=name)
+  else:
+    return tf.contrib.layers.layer_norm(
+        inputs=input_tensor, begin_norm_axis=-1, begin_params_axis=-1, scope=name)
+
+
+def layer_norm_and_dropout(input_tensor, dropout_prob, name=None):
+  """Runs layer normalization followed by dropout."""
+  output_tensor = layer_norm(input_tensor, name)
+  output_tensor = dropout(output_tensor, dropout_prob)
+  return output_tensor
+
+
+def create_initializer(initializer_range=0.02):
+  """Creates a `truncated_normal_initializer` with the given range."""
+  return tf.truncated_normal_initializer(stddev=initializer_range)
+
+
+def embedding_lookup(input_ids,
+                     vocab_size,
+                     embedding_size=128,
+                     initializer_range=0.02,
+                     word_embedding_name="word_embeddings",
+                     use_one_hot_embeddings=False):
+  """Looks up words embeddings for id tensor.
+
+  Args:
+    input_ids: int32 Tensor of shape [batch_size, seq_length] containing word
+      ids.
+    vocab_size: int. Size of the embedding vocabulary.
+    embedding_size: int. Width of the word embeddings.
+    initializer_range: float. Embedding initialization range.
+    word_embedding_name: string. Name of the embedding table.
+    use_one_hot_embeddings: bool. If True, use one-hot method for word
+      embeddings. If False, use `tf.gather()`.
+
+  Returns:
+    float Tensor of shape [batch_size, seq_length, embedding_size].
+  """
+  # This function assumes that the input is of shape [batch_size, seq_length,
+  # num_inputs].
+  #
+  # If the input is a 2D tensor of shape [batch_size, seq_length], we
+  # reshape to [batch_size, seq_length, 1].
+  if input_ids.shape.ndims == 2:
+    input_ids = tf.expand_dims(input_ids, axis=[-1])
+
+  embedding_table = tf.get_variable(
+      name=word_embedding_name,
+      shape=[vocab_size, embedding_size],
+      initializer=create_initializer(initializer_range))
+
+  flat_input_ids = tf.reshape(input_ids, [-1])
+  if use_one_hot_embeddings:
+    one_hot_input_ids = tf.one_hot(flat_input_ids, depth=vocab_size)
+    output = tf.matmul(one_hot_input_ids, embedding_table)
+  else:
+    if tf.flags.FLAGS.npu_gather:
+      output = gather_npu(embedding_table, flat_input_ids)
+    else:
+      output = tf.gather(embedding_table, flat_input_ids)
+
+  input_shape = get_shape_list(input_ids)
+
+  output = tf.reshape(output,
+                      input_shape[0:-1] + [input_shape[-1] * embedding_size])
+  return (output, embedding_table)
+
+
+def embedding_postprocessor(input_tensor,
+                            use_token_type=False,
+                            token_type_ids=None,
+                            token_type_vocab_size=16,
+                            token_type_embedding_name="token_type_embeddings",
+                            use_position_embeddings=True,
+                            position_embedding_name="position_embeddings",
+                            initializer_range=0.02,
+                            max_position_embeddings=512,
+                            dropout_prob=0.1,
+                            use_one_hot_embeddings=False):
+  """Performs various post-processing on a word embedding tensor.
+
+  Args:
+    input_tensor: float Tensor of shape [batch_size, seq_length,
+      embedding_size].
+    use_token_type: bool. Whether to add embeddings for `token_type_ids`.
+    token_type_ids: (optional) int32 Tensor of shape [batch_size, seq_length].
+      Must be specified if `use_token_type` is True.
+    token_type_vocab_size: int. The vocabulary size of `token_type_ids`.
+    token_type_embedding_name: string. The name of the embedding table variable
+      for token type ids.
+    use_position_embeddings: bool. Whether to add position embeddings for the
+      position of each token in the sequence.
+    position_embedding_name: string. The name of the embedding table variable
+      for positional embeddings.
+    initializer_range: float. Range of the weight initialization.
+    max_position_embeddings: int. Maximum sequence length that might ever be
+      used with this model. This can be longer than the sequence length of
+      input_tensor, but cannot be shorter.
+    dropout_prob: float. Dropout probability applied to the final output tensor.
+    use_one_hot_embeddings: (optional) bool. Whether to use one-hot word
+        embeddings or tf.embedding_lookup() for the word embeddings.
+
+  Returns:
+    float tensor with same shape as `input_tensor`.
+
+  Raises:
+    ValueError: One of the tensor shapes or input values is invalid.
+  """
+  input_shape = get_shape_list(input_tensor, expected_rank=3)
+  batch_size = input_shape[0]
+  seq_length = input_shape[1]
+  width = input_shape[2]
+
+  output = input_tensor
+
+  if use_token_type:
+    if token_type_ids is None:
+      raise ValueError("`token_type_ids` must be specified if"
+                       "`use_token_type` is True.")
+    token_type_table = tf.get_variable(
+        name=token_type_embedding_name,
+        shape=[token_type_vocab_size, width],
+        initializer=create_initializer(initializer_range))
+    flat_token_type_ids = tf.reshape(token_type_ids, [-1])
+    if use_one_hot_embeddings:
+      # This vocab will be small so we always do one-hot here, since it is
+      # always faster for a small vocabulary.
+      one_hot_ids = tf.one_hot(flat_token_type_ids, depth=token_type_vocab_size)
+      token_type_embeddings = tf.matmul(one_hot_ids, token_type_table)
+    else:
+      if tf.flags.FLAGS.npu_gather:
+        token_type_embeddings = gather_npu(token_type_table, flat_token_type_ids)
+      else:
+        token_type_embeddings = tf.gather(token_type_table, flat_token_type_ids)
+    token_type_embeddings = tf.reshape(token_type_embeddings,
+                                       [batch_size, seq_length, width])
+    output += token_type_embeddings
+
+  if use_position_embeddings:
+    full_position_embeddings = tf.get_variable(
+        name=position_embedding_name,
+        shape=[max_position_embeddings, width],
+        initializer=create_initializer(initializer_range))
+    # Since the position embedding table is a learned variable, we create it
+    # using a (long) sequence length `max_position_embeddings`. The actual
+    # sequence length might be shorter than this, for faster training of
+    # tasks that do not have long sequences.
+    #
+    # So `full_position_embeddings` is effectively an embedding table
+    # for position [0, 1, 2, ..., max_position_embeddings-1], and the current
+    # sequence has positions [0, 1, 2, ... seq_length-1], so we can just
+    # perform a slice.
+    position_embeddings = tf.slice(full_position_embeddings, [0, 0],
+                                    [seq_length, width])
+    num_dims = len(output.shape.as_list())
+
+    # Only the last two dimensions are relevant (`seq_length` and `width`), so
+    # we broadcast among the first dimensions, which is typically just
+    # the batch size.
+    position_broadcast_shape = []
+    for _ in range(num_dims - 2):
+      position_broadcast_shape.append(1)
+    position_broadcast_shape.extend([seq_length, width])
+    position_embeddings = tf.reshape(position_embeddings,
+                                      position_broadcast_shape)
+    output += position_embeddings
+
+  output = layer_norm_and_dropout(output, dropout_prob)
+  return output
+
+
+def create_attention_mask_from_input_mask(from_tensor, to_mask):
+  """Create 3D attention mask from a 2D tensor mask.
+
+  Args:
+    from_tensor: 2D or 3D Tensor of shape [batch_size, from_seq_length, ...].
+    to_mask: int32 Tensor of shape [batch_size, to_seq_length].
+
+  Returns:
+    float Tensor of shape [batch_size, from_seq_length, to_seq_length].
+  """
+  to_mask = tf.cast(to_mask, dtype=tf.float32)
+
+  from_shape = get_shape_list(from_tensor, expected_rank=[2, 3])
+  batch_size = from_shape[0]
+
+  to_shape = get_shape_list(to_mask, expected_rank=2)
+  to_seq_length = to_shape[1]
+
+  to_mask = tf.reshape(to_mask, [batch_size, 1, to_seq_length])
+  # The mask will be automatically broadcasted to
+  # [batch_size, from_seq_length, to_seq_length] when it is used in the
+  # attention layer.
+  return to_mask
+
+def attention_layer(from_tensor,
+                    to_tensor,
+                    attention_mask=None,
+                    num_attention_heads=1,
+                    size_per_head=512,
+                    query_act=None,
+                    key_act=None,
+                    value_act=None,
+                    attention_probs_dropout_prob=0.0,
+                    initializer_range=0.02,
+                    do_return_2d_tensor=False,
+                    batch_size=None,
+                    from_seq_length=None,
+                    to_seq_length=None):
+  """Performs multi-headed attention from `from_tensor` to `to_tensor`.
+
+  This is an implementation of multi-headed attention based on "Attention
+  is all you Need". If `from_tensor` and `to_tensor` are the same, then
+  this is self-attention. Each timestep in `from_tensor` attends to the
+  corresponding sequence in `to_tensor`, and returns a fixed-with vector.
+
+  This function first projects `from_tensor` into a "query" tensor and
+  `to_tensor` into "key" and "value" tensors. These are (effectively) a list
+  of tensors of length `num_attention_heads`, where each tensor is of shape
+  [batch_size, seq_length, size_per_head].
+
+  Then, the query and key tensors are dot-producted and scaled. These are
+  softmaxed to obtain attention probabilities. The value tensors are then
+  interpolated by these probabilities, then concatenated back to a single
+  tensor and returned.
+
+  In practice, the multi-headed attention are done with transposes and
+  reshapes rather than actual separate tensors.
+
+  Args:
+    from_tensor: float Tensor of shape [batch_size, from_seq_length,
+      from_width].
+    to_tensor: float Tensor of shape [batch_size, to_seq_length, to_width].
+    attention_mask: (optional) int32 Tensor of shape [batch_size,
+      from_seq_length, to_seq_length]. The values should be 1 or 0. The
+      attention scores will effectively be set to -infinity for any positions in
+      the mask that are 0, and will be unchanged for positions that are 1.
+    num_attention_heads: int. Number of attention heads.
+    size_per_head: int. Size of each attention head.
+    query_act: (optional) Activation function for the query transform.
+    key_act: (optional) Activation function for the key transform.
+    value_act: (optional) Activation function for the value transform.
+    attention_probs_dropout_prob: (optional) float. Dropout probability of the
+      attention probabilities.
+    initializer_range: float. Range of the weight initializer.
+    do_return_2d_tensor: bool. If True, the output will be of shape [batch_size
+      * from_seq_length, num_attention_heads * size_per_head]. If False, the
+      output will be of shape [batch_size, from_seq_length, num_attention_heads
+      * size_per_head].
+    batch_size: (Optional) int. If the input is 2D, this might be the batch size
+      of the 3D version of the `from_tensor` and `to_tensor`.
+    from_seq_length: (Optional) If the input is 2D, this might be the seq length
+      of the 3D version of the `from_tensor`.
+    to_seq_length: (Optional) If the input is 2D, this might be the seq length
+      of the 3D version of the `to_tensor`.
+
+  Returns:
+    float Tensor of shape [batch_size, from_seq_length,
+      num_attention_heads * size_per_head]. (If `do_return_2d_tensor` is
+      true, this will be of shape [batch_size * from_seq_length,
+      num_attention_heads * size_per_head]).
+
+  Raises:
+    ValueError: Any of the arguments or tensor shapes are invalid.
+  """
+
+  def transpose_for_scores(input_tensor, batch_size, num_attention_heads,
+                           seq_length, width):
+    output_tensor = tf.reshape(
+        input_tensor, [batch_size, seq_length, num_attention_heads, width])
+
+    output_tensor = tf.transpose(output_tensor, [0, 2, 1, 3])
+    return output_tensor
+
+  from_shape = get_shape_list(from_tensor, expected_rank=[2, 3])
+  to_shape = get_shape_list(to_tensor, expected_rank=[2, 3])
+
+  if len(from_shape) != len(to_shape):
+    raise ValueError(
+        "The rank of `from_tensor` must match the rank of `to_tensor`.")
+
+  if len(from_shape) == 3:
+    batch_size = from_shape[0]
+    from_seq_length = from_shape[1]
+    to_seq_length = to_shape[1]
+  elif len(from_shape) == 2:
+    if (batch_size is None or from_seq_length is None or to_seq_length is None):
+      raise ValueError(
+          "When passing in rank 2 tensors to attention_layer, the values "
+          "for `batch_size`, `from_seq_length`, and `to_seq_length` "
+          "must all be specified.")
+
+  # Scalar dimensions referenced here:
+  #   B = batch size (number of sequences)
+  #   F = `from_tensor` sequence length
+  #   T = `to_tensor` sequence length
+  #   N = `num_attention_heads`
+  #   H = `size_per_head`
+
+  from_tensor_2d = reshape_to_matrix(from_tensor)
+  to_tensor_2d = reshape_to_matrix(to_tensor)
+
+  # `query_layer` = [B*F, N*H]
+  query_layer = tf.layers.dense(
+      from_tensor_2d,
+      num_attention_heads * size_per_head,
+      activation=query_act,
+      name="query",
+      kernel_initializer=create_initializer(initializer_range))
+
+  # `key_layer` = [B*T, N*H]
+  key_layer = tf.layers.dense(
+      to_tensor_2d,
+      num_attention_heads * size_per_head,
+      activation=key_act,
+      name="key",
+      kernel_initializer=create_initializer(initializer_range))
+
+  # `value_layer` = [B*T, N*H]
+  value_layer = tf.layers.dense(
+      to_tensor_2d,
+      num_attention_heads * size_per_head,
+      activation=value_act,
+      name="value",
+      kernel_initializer=create_initializer(initializer_range))
+
+  # `query_layer` = [B, N, F, H]
+  query_layer = transpose_for_scores(query_layer, batch_size,
+                                     num_attention_heads, from_seq_length,
+                                     size_per_head)
+
+  # `key_layer` = [B, N, T, H]
+  key_layer = transpose_for_scores(key_layer, batch_size, num_attention_heads,
+                                   to_seq_length, size_per_head)
+
+  # Take the dot product between "query" and "key" to get the raw
+  # attention scores.
+  # `attention_scores` = [B, N, F, T]
+  attention_scores = tf.matmul(query_layer, key_layer, transpose_b=True)
+  attention_scores = tf.multiply(attention_scores,
+                                 1.0 / math.sqrt(float(size_per_head)))
+
+  if attention_mask is not None:
+    # `attention_mask` = [B, 1, F, T]
+    attention_mask = tf.expand_dims(attention_mask, axis=[1])
+
+    # Since attention_mask is 1.0 for positions we want to attend and 0.0 for
+    # masked positions, this operation will create a tensor which is 0.0 for
+    # positions we want to attend and -10000.0 for masked positions.
+    adder = (1.0 - tf.cast(attention_mask, attention_scores.dtype)) * -10000.0
+
+    # Since we are adding it to the raw scores before the softmax, this is
+    # effectively the same as removing these entirely.
+    attention_scores += adder
+
+  # Normalize the attention scores to probabilities.
+  # `attention_probs` = [B, N, F, T]
+  attention_probs = tf.nn.softmax(attention_scores)
+
+  # This is actually dropping out entire tokens to attend to, which might
+  # seem a bit unusual, but is taken from the original Transformer paper.
+  attention_probs = dropout(attention_probs, attention_probs_dropout_prob)
+
+  # `value_layer` = [B, T, N, H]
+  value_layer = tf.reshape(
+      value_layer,
+      [batch_size, to_seq_length, num_attention_heads, size_per_head])
+
+  # `value_layer` = [B, N, T, H]
+  value_layer = tf.transpose(value_layer, [0, 2, 1, 3])
+
+  # `context_layer` = [B, N, F, H]
+  context_layer = tf.matmul(attention_probs, value_layer)
+
+  # `context_layer` = [B, F, N, H]
+  context_layer = tf.transpose(context_layer, [0, 2, 1, 3])
+
+  if do_return_2d_tensor:
+    # `context_layer` = [B*F, N*H]
+    context_layer = tf.reshape(
+        context_layer,
+        [batch_size * from_seq_length, num_attention_heads * size_per_head])
+  else:
+    # `context_layer` = [B, F, N*H]
+    context_layer = tf.reshape(
+        context_layer,
+        [batch_size, from_seq_length, num_attention_heads * size_per_head])
+
+  return context_layer
+
+
+def transformer_model(input_tensor,
+                      attention_mask=None,
+                      hidden_size=768,
+                      num_hidden_layers=12,
+                      num_attention_heads=12,
+                      intermediate_size=3072,
+                      intermediate_act_fn=gelu,
+                      hidden_dropout_prob=0.1,
+                      attention_probs_dropout_prob=0.1,
+                      initializer_range=0.02,
+                      do_return_all_layers=False):
+  """Multi-headed, multi-layer Transformer from "Attention is All You Need".
+
+  This is almost an exact implementation of the original Transformer encoder.
+
+  See the original paper:
+  https://arxiv.org/abs/1706.03762
+
+  Also see:
+  https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/models/transformer.py
+
+  Args:
+    input_tensor: float Tensor of shape [batch_size, seq_length, hidden_size].
+    attention_mask: (optional) int32 Tensor of shape [batch_size, seq_length,
+      seq_length], with 1 for positions that can be attended to and 0 in
+      positions that should not be.
+    hidden_size: int. Hidden size of the Transformer.
+    num_hidden_layers: int. Number of layers (blocks) in the Transformer.
+    num_attention_heads: int. Number of attention heads in the Transformer.
+    intermediate_size: int. The size of the "intermediate" (a.k.a., feed
+      forward) layer.
+    intermediate_act_fn: function. The non-linear activation function to apply
+      to the output of the intermediate/feed-forward layer.
+    hidden_dropout_prob: float. Dropout probability for the hidden layers.
+    attention_probs_dropout_prob: float. Dropout probability of the attention
+      probabilities.
+    initializer_range: float. Range of the initializer (stddev of truncated
+      normal).
+    do_return_all_layers: Whether to also return all layers or just the final
+      layer.
+
+  Returns:
+    float Tensor of shape [batch_size, seq_length, hidden_size], the final
+    hidden layer of the Transformer.
+
+  Raises:
+    ValueError: A Tensor shape or parameter is invalid.
+  """
+  if hidden_size % num_attention_heads != 0:
+    raise ValueError(
+        "The hidden size (%d) is not a multiple of the number of attention "
+        "heads (%d)" % (hidden_size, num_attention_heads))
+
+  attention_head_size = int(hidden_size / num_attention_heads)
+  input_shape = get_shape_list(input_tensor, expected_rank=3)
+  batch_size = input_shape[0]
+  seq_length = input_shape[1]
+  input_width = input_shape[2]
+
+  # The Transformer performs sum residuals on all layers so the input needs
+  # to be the same as the hidden size.
+  if input_width != hidden_size:
+    raise ValueError("The width of the input tensor (%d) != hidden size (%d)" %
+                     (input_width, hidden_size))
+
+  # We keep the representation as a 2D tensor to avoid re-shaping it back and
+  # forth from a 3D tensor to a 2D tensor. Re-shapes are normally free on
+  # the GPU/CPU but may not be free on the TPU, so we want to minimize them to
+  # help the optimizer.
+  prev_output = reshape_to_matrix(input_tensor)
+
+  all_layer_outputs = []
+  for layer_idx in range(num_hidden_layers):
+    with tf.variable_scope("layer_%d" % layer_idx):
+      layer_input = prev_output
+
+      with tf.variable_scope("attention"):
+        attention_heads = []
+        with tf.variable_scope("self"):
+          attention_head = attention_layer(
+              from_tensor=layer_input,
+              to_tensor=layer_input,
+              attention_mask=attention_mask,
+              num_attention_heads=num_attention_heads,
+              size_per_head=attention_head_size,
+              attention_probs_dropout_prob=attention_probs_dropout_prob,
+              initializer_range=initializer_range,
+              do_return_2d_tensor=True,
+              batch_size=batch_size,
+              from_seq_length=seq_length,
+              to_seq_length=seq_length)
+          attention_heads.append(attention_head)
+
+        attention_output = None
+        if len(attention_heads) == 1:
+          attention_output = attention_heads[0]
+        else:
+          # In the case where we have other sequences, we just concatenate
+          # them to the self-attention head before the projection.
+          attention_output = tf.concat(attention_heads, axis=-1)
+
+        # Run a linear projection of `hidden_size` then add a residual
+        # with `layer_input`.
+        with tf.variable_scope("output"):
+          attention_output = tf.layers.dense(
+              attention_output,
+              hidden_size,
+              kernel_initializer=create_initializer(initializer_range))
+          attention_output = dropout(attention_output, hidden_dropout_prob)
+          attention_output = layer_norm(attention_output + layer_input)
+
+      # The activation is only applied to the "intermediate" hidden layer.
+      with tf.variable_scope("intermediate"):
+        intermediate_output = tf.layers.dense(
+            attention_output,
+            intermediate_size,
+            activation=intermediate_act_fn,
+            kernel_initializer=create_initializer(initializer_range))
+
+      # Down-project back to `hidden_size` then add the residual.
+      with tf.variable_scope("output"):
+        layer_output = tf.layers.dense(
+            intermediate_output,
+            hidden_size,
+            kernel_initializer=create_initializer(initializer_range))
+        layer_output = dropout(layer_output, hidden_dropout_prob)
+        layer_output = layer_norm(layer_output + attention_output)
+        prev_output = layer_output
+        all_layer_outputs.append(layer_output)
+
+  if do_return_all_layers:
+    final_outputs = []
+    for layer_output in all_layer_outputs:
+      final_output = reshape_from_matrix(layer_output, input_shape)
+      final_outputs.append(final_output)
+    return final_outputs
+  else:
+    final_output = reshape_from_matrix(prev_output, input_shape)
+    return final_output
+
+
+def get_shape_list(tensor, expected_rank=None, name=None):
+  """Returns a list of the shape of tensor, preferring static dimensions.
+
+  Args:
+    tensor: A tf.Tensor object to find the shape of.
+    expected_rank: (optional) int. The expected rank of `tensor`. If this is
+      specified and the `tensor` has a different rank, and exception will be
+      thrown.
+    name: Optional name of the tensor for the error message.
+
+  Returns:
+    A list of dimensions of the shape of tensor. All static dimensions will
+    be returned as python integers, and dynamic dimensions will be returned
+    as tf.Tensor scalars.
+  """
+  if name is None:
+    name = tensor.name
+
+  if expected_rank is not None:
+    assert_rank(tensor, expected_rank, name)
+
+  shape = tensor.shape.as_list()
+
+  non_static_indexes = []
+  for (index, dim) in enumerate(shape):
+    if dim is None:
+      non_static_indexes.append(index)
+
+  if not non_static_indexes:
+    return shape
+
+  dyn_shape = tf.shape(tensor)
+  for index in non_static_indexes:
+    shape[index] = dyn_shape[index]
+  return shape
+
+
+def reshape_to_matrix(input_tensor):
+  """Reshapes a >= rank 2 tensor to a rank 2 tensor (i.e., a matrix)."""
+  ndims = input_tensor.shape.ndims
+  if ndims < 2:
+    raise ValueError("Input tensor must have at least rank 2. Shape = %s" %
+                     (input_tensor.shape))
+  if ndims == 2:
+    return input_tensor
+
+  width = input_tensor.shape[-1]
+  output_tensor = tf.reshape(input_tensor, [-1, width])
+  return output_tensor
+
+
+def reshape_from_matrix(output_tensor, orig_shape_list):
+  """Reshapes a rank 2 tensor back to its original rank >= 2 tensor."""
+  if len(orig_shape_list) == 2:
+    return output_tensor
+
+  output_shape = get_shape_list(output_tensor)
+
+  orig_dims = orig_shape_list[0:-1]
+  width = output_shape[-1]
+
+  return tf.reshape(output_tensor, orig_dims + [width])
+
+
+def assert_rank(tensor, expected_rank, name=None):
+  """Raises an exception if the tensor rank is not of the expected rank.
+
+  Args:
+    tensor: A tf.Tensor to check the rank of.
+    expected_rank: Python integer or list of integers, expected rank.
+    name: Optional name of the tensor for the error message.
+
+  Raises:
+    ValueError: If the expected shape doesn't match the actual shape.
+  """
+  if name is None:
+    name = tensor.name
+
+  expected_rank_dict = {}
+  if isinstance(expected_rank, six.integer_types):
+    expected_rank_dict[expected_rank] = True
+  else:
+    for x in expected_rank:
+      expected_rank_dict[x] = True
+
+  actual_rank = tensor.shape.ndims
+  if actual_rank not in expected_rank_dict:
+    scope_name = tf.get_variable_scope().name
+    raise ValueError(
+        "For the tensor `%s` in scope `%s`, the actual rank "
+        "`%d` (shape = %s) is not equal to the expected rank `%s`" %
+        (name, scope_name, actual_rank, str(tensor.shape), str(expected_rank)))
+
+@tf.custom_gradient
+def gather_npu(params, indices):
+  def grad(dy):
+    params_shape = tf.shape(params, out_type=tf.int64)
+    params_shape = tf.cast(params_shape, tf.int32)
+    grad_gather = tf.unsorted_segment_sum(dy, indices, params_shape[0])
+    return grad_gather, None
+  return tf.gather(params, indices), grad
+
diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/optimization.py b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/optimization.py
new file mode 100644
index 000000000..71244aeb1
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/optimization.py
@@ -0,0 +1,441 @@
+# coding=utf-8
+# Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
+# Copyright 2018 The Google AI Language Team Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Functions and classes related to optimization (weight updates)."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import re
+import tensorflow as tf
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import linalg_ops
+from tensorflow.python.ops import math_ops
+
+from npu_bridge.estimator.npu.npu_optimizer import NPUOptimizer
+from npu_bridge.estimator.npu import npu_loss_scale_manager as lsm_lib
+
+def create_optimizer(loss, init_lr, num_train_steps, num_warmup_steps, hvd=None, manual_fp16=False, use_fp16=False, num_accumulation_steps=1,
+                     optimizer_type="adam", allreduce_post_accumulation=False):
+  """Creates an optimizer training op."""
+  global_step = tf.train.get_or_create_global_step()
+  
+  # avoid step change in learning rate at end of warmup phase
+  if optimizer_type == "adam":
+      power = 1.0
+      decayed_learning_rate_at_crossover_point = init_lr * (
+                  (1.0 - float(num_warmup_steps) / float(num_train_steps)) ** power)
+  else:
+      power = 0.5
+      decayed_learning_rate_at_crossover_point = init_lr
+
+  adjusted_init_lr = init_lr * (init_lr / decayed_learning_rate_at_crossover_point)
+  print('decayed_learning_rate_at_crossover_point = %e, adjusted_init_lr = %e' % (decayed_learning_rate_at_crossover_point, adjusted_init_lr))
+
+  learning_rate = tf.constant(value=adjusted_init_lr, shape=[], dtype=tf.float32)
+
+  # Implements linear decay of the learning rate.
+  learning_rate = tf.train.polynomial_decay(
+      learning_rate,
+      global_step,
+      num_train_steps,
+      end_learning_rate=0.0,
+      power=power,
+      cycle=False)
+
+  # Implements linear warmup. I.e., if global_step < num_warmup_steps, the
+  # learning rate will be `global_step/num_warmup_steps * init_lr`.
+  if num_warmup_steps:
+    global_steps_int = tf.cast(global_step, tf.int32)
+    warmup_steps_int = tf.constant(num_warmup_steps, dtype=tf.int32)
+
+    global_steps_float = tf.cast(global_steps_int, tf.float32)
+    warmup_steps_float = tf.cast(warmup_steps_int, tf.float32)
+
+    warmup_percent_done = global_steps_float / warmup_steps_float
+    warmup_learning_rate = init_lr * warmup_percent_done
+
+    is_warmup = tf.cast(global_steps_int < warmup_steps_int, tf.float32)
+    learning_rate = (
+        (1.0 - is_warmup) * learning_rate + is_warmup * warmup_learning_rate)
+
+  if optimizer_type == "lamb":
+      print("Initializing LAMB Optimizer")
+      optimizer = LAMBOptimizer(
+          learning_rate=learning_rate,
+          weight_decay_rate=0.01,
+          beta_1=0.9,
+          beta_2=0.999,
+          epsilon=1e-6,
+          exclude_from_weight_decay=["LayerNorm", "layer_norm", "bias"])
+  else:
+      print("Initializing ADAM Weight Decay Optimizer")
+      # It is recommended that you use this optimizer for fine tuning, since this
+      # is how the model was trained (note that the Adam m/v variables are NOT
+      # loaded from init_checkpoint.)
+      optimizer = AdamWeightDecayOptimizer(
+          learning_rate=learning_rate,
+          weight_decay_rate=0.01,
+          beta_1=0.9,
+          beta_2=0.999,
+          epsilon=1e-4,
+          exclude_from_weight_decay=["LayerNorm", "layer_norm", "bias"])
+
+  # if hvd is not None and (num_accumulation_steps == 1 or (not allreduce_post_accumulation)):
+  #   optimizer = hvd.DistributedOptimizer(optimizer, sparse_as_dense=True, compression=Compression.fp16 if use_fp16 or manual_fp16 else Compression.none)
+
+  if tf.flags.FLAGS.npu_bert_loss_scale not in [None, -1]:
+    if tf.flags.FLAGS.npu_bert_loss_scale == 0:
+      loss_scale_manager = lsm_lib.ExponentialUpdateLossScaleManager(init_loss_scale=tf.flags.FLAGS.init_loss_scale_value, incr_every_n_steps=1000, decr_every_n_nan_or_inf=1, decr_ratio=0.5)
+    elif tf.flags.FLAGS.npu_bert_loss_scale >= 1:
+      loss_scale_manager = lsm_lib.FixedLossScaleManager(loss_scale=tf.flags.FLAGS.npu_bert_loss_scale)
+    else:
+      raise ValueError("Invalid loss scale: %d" % tf.flags.FLAGS.npu_bert_loss_scale)
+    optimizer = NPUOptimizer(optimizer, loss_scale_manager, is_distributed=tf.flags.FLAGS.distributed,
+                             is_loss_scale=True, is_tailing_optimization=tf.flags.FLAGS.npu_bert_tail_optimize)
+  else:
+    optimizer = NPUOptimizer(optimizer, is_distributed=tf.flags.FLAGS.distributed)
+
+  tvars = tf.trainable_variables()
+  grads_and_vars = optimizer.compute_gradients(loss * 1.0 / num_accumulation_steps, tvars)
+
+  if num_accumulation_steps > 1:
+      local_step = tf.get_variable(name="local_step", shape=[], dtype=tf.int32, trainable=False,
+                                   initializer=tf.zeros_initializer)
+      # batch_finite = tf.get_variable(name="batch_finite", shape=[], dtype=tf.bool, trainable=False,
+      #                                initializer=tf.ones_initializer)
+      accum_vars = [tf.get_variable(
+          name=tvar.name.split(":")[0] + "/accum",
+          shape=tvar.shape.as_list(),
+          dtype=tf.float32,
+          trainable=False,
+          initializer=tf.zeros_initializer()) for tvar in tf.trainable_variables()]
+
+      reset_step = tf.cast(tf.math.equal(local_step % num_accumulation_steps, 0), dtype=tf.bool)
+      local_step = tf.cond(reset_step, lambda:local_step.assign(tf.ones_like(local_step)), lambda:local_step.assign_add(1))
+
+      with tf.name_scope("accumulate_step"):
+        grads_and_vars_and_accums = [(gv[0],gv[1],accum_vars[i]) for i, gv in enumerate(grads_and_vars) if gv[0] is not None]
+        grads, tvars, accum_vars = list(zip(*grads_and_vars_and_accums))
+
+        all_are_finite = tf.constant(True, dtype=tf.bool)
+        # all_are_finite = tf.reduce_all([tf.reduce_all(tf.is_finite(g)) for g in grads]) if (tf.flags.FLAGS.npu_bert_loss_scale not in [None, -1]) and (manual_fp16 or use_fp16) else tf.constant(True, dtype=tf.bool)
+        # batch_finite = tf.cond(reset_step,
+        #   lambda: batch_finite.assign(tf.math.logical_and(tf.constant(True, dtype=tf.bool), all_are_finite)),
+        #   lambda:batch_finite.assign(tf.math.logical_and(batch_finite, all_are_finite)))
+
+      # This is how the model was pre-trained.
+      # ensure global norm is a finite number
+      # to prevent clip_by_global_norm from having a hizzy fit.
+      if tf.flags.FLAGS.npu_bert_clip_by_global_norm:
+        (clipped_grads, _) = tf.clip_by_global_norm(
+            grads, clip_norm=1.0,
+            use_norm=tf.cond(
+                all_are_finite,
+                lambda: tf.global_norm(grads),
+                lambda: tf.constant(1.0)))
+      else:
+        with tf.name_scope("clip_grads"):
+          clipped_grads = [
+            (tf.clip_by_norm(grad, clip_norm=1.0))
+            if grad is not None else grad for grad in grads
+          ]
+
+      accum_vars = tf.cond(reset_step,
+              lambda: [accum_vars[i].assign(grad) for i, grad in enumerate(clipped_grads)],
+              lambda: [accum_vars[i].assign_add(grad) for i, grad in enumerate(clipped_grads)])
+
+      def update(accum_vars):
+        with tf.name_scope("opt_update"):
+          # if allreduce_post_accumulation and hvd is not None:
+          #     accum_vars = [hvd.allreduce(tf.convert_to_tensor(accum_var), compression=Compression.fp16 if use_fp16 or manual_fp16 else Compression.none) if isinstance(accum_var, tf.IndexedSlices)
+          #                   else hvd.allreduce(accum_var, compression=Compression.fp16 if use_fp16 or manual_fp16 else Compression.none) for accum_var in accum_vars]
+          return optimizer.apply_gradients(list(zip(accum_vars, tvars)), global_step=global_step)
+
+      update_step = tf.identity(tf.cast(tf.math.equal(local_step % num_accumulation_steps, 0), dtype=tf.bool), name="update_step")
+      update_op = tf.cond(update_step,
+                          lambda: update(accum_vars), lambda: tf.no_op())
+
+      train_op = update_op
+      # new_global_step = tf.cond(tf.math.logical_and(update_step, tf.cast(hvd.allreduce(tf.cast(batch_finite, tf.int32)), tf.bool)), lambda: global_step+1, lambda: global_step)
+      # new_global_step = tf.identity(new_global_step, name='step_update')
+      # train_op = tf.group(update_op, [global_step.assign(new_global_step)])
+  else:
+      grads_and_vars = [(g, v) for g, v in grads_and_vars if g is not None]
+      grads, tvars = list(zip(*grads_and_vars))
+
+      # This is how the model was pre-trained.
+      # ensure global norm is a finite number
+      # to prevent clip_by_global_norm from having a hizzy fit.
+      if tf.flags.FLAGS.npu_bert_clip_by_global_norm:
+        # all_are_finite = tf.reduce_all(
+        #     [tf.reduce_all(tf.is_finite(g)) for g in grads]) if (tf.flags.FLAGS.npu_bert_loss_scale not in [None, -1]) and (use_fp16 or manual_fp16) else tf.constant(True, dtype=tf.bool)
+        # DTS2020061815268
+        all_are_finite = tf.constant(True, dtype=tf.bool)
+        (clipped_grads, _) = tf.clip_by_global_norm(
+          grads, clip_norm=1.0,
+          use_norm=tf.cond(
+              all_are_finite,
+              lambda: tf.global_norm(grads),
+              lambda: tf.constant(1.0)))
+      else:
+        with tf.name_scope("clip_grads"):
+          clipped_grads = [
+            (tf.clip_by_norm(grad, clip_norm=1.0))
+            if grad is not None else grad for grad in grads
+          ]
+      
+      with tf.name_scope("apply_grads"):
+        train_op = optimizer.apply_gradients(
+          list(zip(clipped_grads, tvars)), global_step=global_step)
+
+      #if tf.flags.FLAGS.npu_bert_clip_by_global_norm:
+      #  new_global_step = tf.cond(all_are_finite, lambda: global_step + 1, lambda: global_step)
+      #else:
+      #  new_global_step = global_step + 1
+      #new_global_step = tf.identity(new_global_step, name='step_update')
+      #train_op = tf.group(train_op, [global_step.assign(new_global_step)])
+  return train_op
+
+
+class AdamWeightDecayOptimizer(tf.train.Optimizer):
+  """A basic Adam optimizer that includes "correct" L2 weight decay."""
+
+  def __init__(self,
+               learning_rate,
+               weight_decay_rate=0.0,
+               beta_1=0.9,
+               beta_2=0.999,
+               epsilon=1e-4,
+               exclude_from_weight_decay=None,
+               name="AdamWeightDecayOptimizer"):
+    """Constructs a AdamWeightDecayOptimizer."""
+    super(AdamWeightDecayOptimizer, self).__init__(False, name)
+
+    self.learning_rate = tf.identity(learning_rate, name='learning_rate')
+    self.weight_decay_rate = weight_decay_rate
+    self.beta_1 = beta_1
+    self.beta_2 = beta_2
+    self.epsilon = epsilon
+    self.exclude_from_weight_decay = exclude_from_weight_decay
+
+  def apply_gradients(self, grads_and_vars, global_step=None, name=None,
+      manual_fp16=False):
+    """See base class."""
+    assignments = []
+    for (grad, param) in grads_and_vars:
+      with tf.name_scope("apply_one_adam"):
+        if grad is None or param is None:
+          continue
+
+        param_name = self._get_variable_name(param.name)
+        has_shadow = manual_fp16 and param.dtype.base_dtype != tf.float32
+        if has_shadow:
+          # create shadow fp32 weights for fp16 variable
+          param_fp32 = tf.get_variable(
+              name=param_name + "/shadow",
+              dtype=tf.float32,
+              trainable=False,
+              initializer=tf.cast(param.initialized_value(),tf.float32))
+        else:
+          param_fp32 = param
+
+        m = tf.get_variable(
+            name=param_name + "/adam_m",
+            shape=param.shape.as_list(),
+            dtype=tf.float32,
+            trainable=False,
+            initializer=tf.zeros_initializer())
+        v = tf.get_variable(
+            name=param_name + "/adam_v",
+            shape=param.shape.as_list(),
+            dtype=tf.float32,
+            trainable=False,
+            initializer=tf.zeros_initializer())
+
+        # Standard Adam update.
+        next_m = (
+            tf.multiply(self.beta_1, m) + tf.multiply(1.0 - self.beta_1, grad))
+        next_v = (
+            tf.multiply(self.beta_2, v) + tf.multiply(1.0 - self.beta_2,
+                                                      tf.square(grad)))
+
+        update = next_m / (tf.sqrt(next_v) + self.epsilon)
+
+        # Just adding the square of the weights to the loss function is *not*
+        # the correct way of using L2 regularization/weight decay with Adam,
+        # since that will interact with the m and v parameters in strange ways.
+        #
+        # Instead we want to decay the weights in a manner that doesn't interact
+        # with the m/v parameters. This is equivalent to adding the square
+        # of the weights to the loss with plain (non-momentum) SGD.
+        if self._do_use_weight_decay(param_name):
+          update += self.weight_decay_rate * param_fp32
+
+        update_with_lr = self.learning_rate * update
+
+        next_param = param_fp32 - update_with_lr
+
+        if has_shadow:
+          # cast shadow fp32 weights to fp16 and assign to trainable variable
+          param.assign(tf.cast(next_param, param.dtype.base_dtype))
+        assignments.extend(
+            [param_fp32.assign(next_param),
+             m.assign(next_m),
+             v.assign(next_v)])
+    new_global_step = global_step + 1
+    new_global_step = tf.identity(new_global_step, name='step_update')
+    assignments.extend([global_step.assign(new_global_step)])
+    return tf.group(*assignments, name=name)
+
+  def _do_use_weight_decay(self, param_name):
+    """Whether to use L2 weight decay for `param_name`."""
+    if not self.weight_decay_rate:
+      return False
+    if self.exclude_from_weight_decay:
+      for r in self.exclude_from_weight_decay:
+        if re.search(r, param_name) is not None:
+          return False
+    return True
+
+  def _get_variable_name(self, param_name):
+    """Get the variable name from the tensor name."""
+    m = re.match("^(.*):\\d+$", param_name)
+    if m is not None:
+      param_name = m.group(1)
+    return param_name
+
+
+class LAMBOptimizer(tf.train.Optimizer):
+  """A LAMB optimizer that includes "correct" L2 weight decay."""
+
+  def __init__(self,
+               learning_rate,
+               weight_decay_rate=0.0,
+               beta_1=0.9,
+               beta_2=0.999,
+               epsilon=1e-6,
+               exclude_from_weight_decay=None,
+               name="LAMBOptimizer"):
+    """Constructs a LAMBOptimizer."""
+    super(LAMBOptimizer, self).__init__(False, name)
+
+    self.learning_rate = tf.identity(learning_rate, name='learning_rate')
+    self.weight_decay_rate = weight_decay_rate
+    self.beta_1 = beta_1
+    self.beta_2 = beta_2
+    self.epsilon = epsilon
+    self.exclude_from_weight_decay = exclude_from_weight_decay
+    self.steps = 0
+
+  def apply_gradients(self, grads_and_vars, global_step=None, name=None,
+      manual_fp16=False):
+    """See base class."""
+    assignments = []
+    for (grad, param) in grads_and_vars:
+      with tf.name_scope("apply_one_lamb"):
+        if grad is None or param is None:
+          continue
+
+        param_name = self._get_variable_name(param.name)
+        has_shadow = manual_fp16 and param.dtype.base_dtype != tf.float32
+        if has_shadow:
+          # create shadow fp32 weights for fp16 variable
+          param_fp32 = tf.get_variable(
+              name=param_name + "/shadow",
+              dtype=tf.float32,
+              trainable=False,
+              initializer=tf.cast(param.initialized_value(),tf.float32))
+        else:
+          param_fp32 = param
+
+        m = tf.get_variable(
+            name=param_name + "/adam_m",
+            shape=param.shape.as_list(),
+            dtype=tf.float32,
+            trainable=False,
+            initializer=tf.zeros_initializer())
+        v = tf.get_variable(
+            name=param_name + "/adam_v",
+            shape=param.shape.as_list(),
+            dtype=tf.float32,
+            trainable=False,
+            initializer=tf.zeros_initializer())
+
+        # LAMB update
+        next_m = (
+            tf.multiply(self.beta_1, m) + tf.multiply(1.0 - self.beta_1, grad))
+        next_v = (
+            tf.multiply(self.beta_2, v) + tf.multiply(1.0 - self.beta_2,
+                                                      tf.square(grad)))
+
+        self.steps += 1
+        beta1_correction = (1 - self.beta_1 ** self.steps)
+        beta2_correction = (1 - self.beta_2 ** self.steps)
+
+        next_m_unbiased = next_m / beta1_correction
+        next_v_unbiased = next_v / beta2_correction
+
+        update = next_m_unbiased / (tf.sqrt(next_v_unbiased) + self.epsilon)
+
+        # Just adding the square of the weights to the loss function is *not*
+        # the correct way of using L2 regularization/weight decay with Adam,
+        # since that will interact with the m and v parameters in strange ways.
+        #
+        # Instead we want to decay the weights in a manner that doesn't interact
+        # with the m/v parameters. This is equivalent to adding the square
+        # of the weights to the loss with plain (non-momentum) SGD.
+        if self._do_use_weight_decay(param_name):
+          update += self.weight_decay_rate * param_fp32
+
+        w_norm = linalg_ops.norm(param, ord=2)
+        g_norm = linalg_ops.norm(update, ord=2)
+        ratio = array_ops.where(math_ops.greater(w_norm, 0), array_ops.where(
+            math_ops.greater(g_norm, 0), (w_norm / g_norm), 1.0), 1.0)
+
+        update_with_lr = ratio * self.learning_rate * update
+
+        next_param = param_fp32 - update_with_lr
+
+        if has_shadow:
+          # cast shadow fp32 weights to fp16 and assign to trainable variable
+          param.assign(tf.cast(next_param, param.dtype.base_dtype))
+        assignments.extend(
+            [param_fp32.assign(next_param),
+             m.assign(next_m),
+             v.assign(next_v)])
+    new_global_step = global_step + 1
+    new_global_step = tf.identity(new_global_step, name='step_update')
+    assignments.extend([global_step.assign(new_global_step)])
+    return tf.group(*assignments, name=name)
+
+  def _do_use_weight_decay(self, param_name):
+    """Whether to use L2 weight decay for `param_name`."""
+    if not self.weight_decay_rate:
+      return False
+    if self.exclude_from_weight_decay:
+      for r in self.exclude_from_weight_decay:
+        if re.search(r, param_name) is not None:
+          return False
+    return True
+
+  def _get_variable_name(self, param_name):
+    """Get the variable name from the tensor name."""
+    m = re.match("^(.*):\\d+$", param_name)
+    if m is not None:
+      param_name = m.group(1)
+    return param_name
diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/run_pretraining.py b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/run_pretraining.py
new file mode 100644
index 000000000..3250f0a6f
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/run_pretraining.py
@@ -0,0 +1,748 @@
+# coding=utf-8
+# Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
+# Copyright 2018 The Google AI Language Team Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Run masked LM/next sentence masked_lm pre-training for BERT."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import time
+import modeling
+import optimization
+import tensorflow as tf
+import glob
+from utils import LogEvalRunHook
+from tensorflow.core.protobuf import rewriter_config_pb2
+from gpu_environment import get_custom_getter
+
+from npu_bridge.estimator.npu.npu_config import *
+from npu_bridge.estimator.npu.npu_estimator import *
+from npu_bridge.estimator.npu.npu_config import NPURunConfig
+from npu_bridge.estimator.npu.npu_estimator import NPUEstimator
+
+os.environ['WHICH_OP'] = 'GEOP'
+os.environ['NEW_GE_FE_ID'] = '1'
+os.environ['GE_AICPU_FLAG'] = '1'
+os.environ['GE_USE_STATIC_MEMORY'] = '1'
+os.environ['OPTION_EXEC_HCCL_FLAG'] = '1'
+os.environ['HCCL_CONNECT_TIMEOUT'] = '600'
+
+flags = tf.flags
+
+FLAGS = flags.FLAGS
+
+## Required parameters
+flags.DEFINE_string(
+    "bert_config_file", None,
+    "The config json file corresponding to the pre-trained BERT model. "
+    "This specifies the model architecture.")
+
+flags.DEFINE_string(
+    "input_files_dir", "./data",
+    "Directory with input files, comma separated or single directory.")
+
+flags.DEFINE_string(
+    "eval_files_dir", None,
+    "Directory with eval files, comma separated or single directory. ")
+
+flags.DEFINE_string(
+    "output_dir", "./models",
+    "The output directory where the model checkpoints will be written.")
+
+## Other parameters
+flags.DEFINE_string(
+    "init_checkpoint", None,
+    "Initial checkpoint (usually from a pre-trained BERT model).")
+
+flags.DEFINE_string(
+    "optimizer_type", "lamb",
+    "Optimizer used for training - LAMB or ADAM")
+
+flags.DEFINE_integer(
+    "max_seq_length", 512,
+    "The maximum total input sequence length after WordPiece tokenization. "
+    "Sequences longer than this will be truncated, and sequences shorter "
+    "than this will be padded. Must match data generation.")
+
+flags.DEFINE_integer(
+    "max_predictions_per_seq", 80,
+    "Maximum number of masked LM predictions per sequence. "
+    "Must match data generation.")
+
+flags.DEFINE_bool("do_train", True, "Whether to run training.")
+
+flags.DEFINE_bool("do_eval", False, "Whether to run eval on the dev set.")
+
+flags.DEFINE_integer("train_batch_size", 32, "Total batch size for training.")
+
+flags.DEFINE_integer("eval_batch_size", 8, "Total batch size for eval.")
+
+flags.DEFINE_float("learning_rate", 5e-5, "The initial learning rate for Adam.")
+
+flags.DEFINE_integer("num_train_steps", 100000, "Number of training steps.")
+
+flags.DEFINE_integer("num_warmup_steps", 10000, "Number of warmup steps.")
+
+flags.DEFINE_integer("save_checkpoints_steps", 1000,
+                     "How often to save the model checkpoint.")
+
+flags.DEFINE_integer("display_loss_steps", 10,
+                     "How often to print loss")
+
+flags.DEFINE_integer("iterations_per_loop", 1000,
+                     "How many steps to make in each estimator call.")
+
+flags.DEFINE_integer("max_eval_steps", 100, "Maximum number of eval steps.")
+
+flags.DEFINE_integer("num_accumulation_steps", 1,
+                     "Number of accumulation steps before gradient update." 
+                      "Global batch size = num_accumulation_steps * train_batch_size")
+
+flags.DEFINE_bool("allreduce_post_accumulation", False, "Whether to all reduce after accumulation of N steps or after each step")
+
+flags.DEFINE_bool(
+    "verbose_logging", False,
+    "If true, all of the trainable parameters are printed")
+
+flags.DEFINE_bool("horovod", False, "Whether to use Horovod for multi-gpu runs")
+
+flags.DEFINE_bool("report_loss", True, "Whether to report total loss during training.")
+
+flags.DEFINE_bool("manual_fp16", False, "Whether to use fp32 or fp16 arithmetic on GPU. "
+                                        "Manual casting is done instead of using AMP")
+
+flags.DEFINE_bool("use_xla", False, "Whether to enable XLA JIT compilation.")
+
+flags.DEFINE_integer('init_loss_scale_value', 2**32, 'Initial loss scale value for loss scale optimizer')
+
+flags.DEFINE_bool("over_dump", False, "Whether to enable overflow.")
+
+flags.DEFINE_string("over_dump_path", None, "path to save overflow dump files.")
+
+flags.DEFINE_bool("use_fp16", False, "Whether to enable AMP ops.")
+
+flags.DEFINE_bool("use_fp16_cls", False, "Whether to use fp16 in cls and pooler.")
+
+flags.DEFINE_bool("distributed", False, "Whether to use multi-npu")
+
+flags.DEFINE_bool('npu_bert_fused_gelu', True, 'Whether to use npu defined gelu op')
+
+flags.DEFINE_bool('npu_bert_debug', False, 'If True, dropout and shuffle is disabled.')
+
+flags.DEFINE_bool('npu_bert_use_tdt', True, 'Whether to use tdt as dataset')
+
+flags.DEFINE_string("npu_bert_job_start_file", None, "CSA job start file path.")
+
+flags.DEFINE_integer("npu_bert_loss_scale", 0, "Whether to use loss scale, -1 is disable, 0 is dynamic loss scale, >=1 is static loss scale")
+
+flags.DEFINE_bool("npu_bert_clip_by_global_norm", False, "Use clip_by_global_norm if True, or use clip_by_norm for each gradient")
+
+flags.DEFINE_bool('npu_bert_npu_dropout', True, 'Whether to use npu defined gelu op')
+
+flags.DEFINE_bool('npu_bert_tail_optimize', False, 'Whether to use npu allreduce tail optimization')
+
+flags.DEFINE_bool('npu_gather', True, 'Whether to use gather_npu whose backward propagation avoids IndexedSlices')
+
+flags.DEFINE_bool('hcom_parallel', True, 'Whether to use parallel allreduce')
+
+# report samples/sec, total loss and learning rate during training
+class _LogSessionRunHook(tf.train.SessionRunHook):
+  def __init__(self, global_batch_size, num_accumulation_steps, display_every=10, hvd_rank=-1):
+    self.global_batch_size = global_batch_size
+    self.display_every = display_every
+    self.hvd_rank = hvd_rank
+    self.num_accumulation_steps = num_accumulation_steps
+  def after_create_session(self, session, coord):
+    self.elapsed_secs = 0.
+    self.count = 0
+    self.all_count = 0
+    self.avg_loss = 0.0
+
+  def before_run(self, run_context):
+    self.t0 = time.time()
+    if self.num_accumulation_steps <= 1:
+        if (tf.flags.FLAGS.npu_bert_loss_scale == 0) and (FLAGS.manual_fp16 or FLAGS.use_fp16):
+            return tf.train.SessionRunArgs(
+                fetches=['global_step:0', 'total_loss:0',
+                         'learning_rate:0', 'nsp_loss:0',
+                         'mlm_loss:0', 'loss_scale:0', 'apply_grads/All:0'])
+        else:
+            return tf.train.SessionRunArgs(
+                fetches=['global_step:0', 'total_loss:0',
+                         'learning_rate:0', 'nsp_loss:0',
+                         'mlm_loss:0'])
+    else:
+        if (tf.flags.FLAGS.npu_bert_loss_scale == 0) and (FLAGS.manual_fp16 or FLAGS.use_fp16):
+          return tf.train.SessionRunArgs(
+              fetches=['global_step:0', 'update_step:0', 'total_loss:0',
+                       'learning_rate:0', 'nsp_loss:0',
+                       'mlm_loss:0', 'loss_scale:0'])
+        else:
+          return tf.train.SessionRunArgs(
+              fetches=['global_step:0', 'update_step:0', 'total_loss:0',
+                       'learning_rate:0', 'nsp_loss:0',
+                       'mlm_loss:0'])
+  def after_run(self, run_context, run_values):
+    self.elapsed_secs += time.time() - self.t0
+    if self.num_accumulation_steps <=1:
+        if (tf.flags.FLAGS.npu_bert_loss_scale == 0) and (FLAGS.manual_fp16 or FLAGS.use_fp16):
+            global_step, total_loss, lr, nsp_loss, mlm_loss, loss_scaler, custom_arg = run_values.results
+        else:
+            global_step, total_loss, lr, nsp_loss, mlm_loss = run_values.results
+        update_step = True
+    else:
+        if (tf.flags.FLAGS.npu_bert_loss_scale == 0) and (FLAGS.manual_fp16 or FLAGS.use_fp16):
+          global_step, update_step, total_loss, lr, nsp_loss, mlm_loss, loss_scaler = run_values.results
+        else:
+          global_step, update_step, total_loss, lr, nsp_loss, mlm_loss = run_values.results
+    print_step = global_step + 1 # One-based index for printing.
+    self.avg_loss += total_loss
+    self.all_count += 1
+    if update_step:
+        self.count += 1
+        dt = self.elapsed_secs / self.count
+        sent_per_sec = self.global_batch_size / dt * FLAGS.iterations_per_loop
+        avg_loss_step = self.avg_loss / self.all_count
+        if self.hvd_rank >= 0:
+          if (tf.flags.FLAGS.npu_bert_loss_scale == 0) and (FLAGS.manual_fp16 or FLAGS.use_fp16):
+            print('Rank = %2d :: Step = %6i Throughput = %11.1f MLM Loss = %10.4e NSP Loss = %10.4e Loss = %9.6f Average Loss = %9.6f LR = %6.4e Loss scale = %6.4e isFinite = %2i' %
+                  (self.hvd_rank, print_step, sent_per_sec, mlm_loss, nsp_loss, total_loss, avg_loss_step, lr, loss_scaler, custom_arg), flush=True)
+          else:
+            print('Rank = %2d :: Step = %6i Throughput = %11.1f MLM Loss = %10.4e NSP Loss = %10.4e Loss = %9.6f Average Loss = %9.6f LR = %6.4e' %
+                  (self.hvd_rank, print_step, sent_per_sec, mlm_loss, nsp_loss, total_loss, avg_loss_step, lr), flush=True)
+        else:
+          if (tf.flags.FLAGS.npu_bert_loss_scale == 0) and (FLAGS.manual_fp16 or FLAGS.use_fp16):
+            print('Step = %6i Throughput = %11.1f MLM Loss = %10.4e NSP Loss = %10.4e Loss = %9.6f Average Loss = %9.6f LR = %6.4e Loss scale = %6.4e isFinite = %2i' %
+                  (print_step, sent_per_sec, mlm_loss, nsp_loss, total_loss, avg_loss_step, lr, loss_scaler, custom_arg), flush=True)
+          else:
+            print('Step = %6i Throughput = %11.1f MLM Loss = %10.4e NSP Loss = %10.4e Loss = %9.6f Average Loss = %9.6f LR = %6.4e' %
+                  (print_step, sent_per_sec, mlm_loss, nsp_loss, total_loss, avg_loss_step, lr), flush=True)
+        self.elapsed_secs = 0.
+        self.count = 0
+        self.avg_loss = 0.0
+        self.all_count = 0
+
+def model_fn_builder(bert_config, init_checkpoint, learning_rate,
+                     num_train_steps, num_warmup_steps,
+                     use_one_hot_embeddings, hvd=None):
+  """Returns `model_fn` closure for TPUEstimator."""
+
+  def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
+    """The `model_fn` for TPUEstimator."""
+
+    tf.logging.info("*** Features ***")
+    for name in sorted(features.keys()):
+      tf.logging.info("  name = %s, shape = %s" % (name, features[name].shape))
+
+    input_ids = features["input_ids"]
+    input_mask = features["input_mask"]
+    segment_ids = features["segment_ids"]
+    masked_lm_positions = features["masked_lm_positions"]
+    masked_lm_ids = features["masked_lm_ids"]
+    masked_lm_weights = features["masked_lm_weights"]
+    next_sentence_labels = features["next_sentence_labels"]
+
+    is_training = (mode == tf.estimator.ModeKeys.TRAIN)
+
+    model = modeling.BertModel(
+        config=bert_config,
+        is_training=is_training,
+        input_ids=input_ids,
+        input_mask=input_mask,
+        token_type_ids=segment_ids,
+        use_one_hot_embeddings=use_one_hot_embeddings,
+        compute_type=tf.float16 if FLAGS.manual_fp16 else tf.float32)
+
+    (masked_lm_loss,
+     masked_lm_example_loss, masked_lm_log_probs) = get_masked_lm_output(
+         bert_config, model.get_sequence_output(), model.get_embedding_table(), 
+         masked_lm_positions, masked_lm_ids, 
+         masked_lm_weights)
+
+    (next_sentence_loss, next_sentence_example_loss,
+     next_sentence_log_probs) = get_next_sentence_output(
+         bert_config, model.get_pooled_output(), next_sentence_labels)
+
+    masked_lm_loss = tf.identity(masked_lm_loss, name="mlm_loss")
+    next_sentence_loss = tf.identity(next_sentence_loss, name="nsp_loss")
+    total_loss = masked_lm_loss + next_sentence_loss
+    total_loss = tf.identity(total_loss, name='total_loss')
+
+    tvars = tf.trainable_variables()
+
+    initialized_variable_names = {}
+    if init_checkpoint and (hvd is None or hvd.rank() == 0):
+      print("Loading checkpoint", init_checkpoint)
+      (assignment_map, initialized_variable_names
+      ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint)
+
+      tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
+
+    if FLAGS.verbose_logging:
+        tf.logging.info("**** Trainable Variables ****")
+        for var in tvars:
+          init_string = ""
+          if var.name in initialized_variable_names:
+            init_string = ", *INIT_FROM_CKPT*"
+          tf.logging.info("  %d :: name = %s, shape = %s%s", 0 if hvd is None else hvd.rank(), var.name, var.shape,
+                          init_string)
+
+    output_spec = None
+    if mode == tf.estimator.ModeKeys.TRAIN:
+      train_op = optimization.create_optimizer(
+          total_loss, learning_rate, num_train_steps, num_warmup_steps,
+          hvd, FLAGS.manual_fp16, FLAGS.use_fp16, FLAGS.num_accumulation_steps, FLAGS.optimizer_type, FLAGS.allreduce_post_accumulation)
+
+      output_spec = tf.estimator.EstimatorSpec(
+          mode=mode,
+          loss=total_loss,
+          train_op=train_op)
+    elif mode == tf.estimator.ModeKeys.EVAL:
+
+      def metric_fn(masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids,
+                    masked_lm_weights, next_sentence_example_loss,
+                    next_sentence_log_probs, next_sentence_labels):
+        """Computes the loss and accuracy of the model."""
+        masked_lm_log_probs = tf.reshape(masked_lm_log_probs,
+                                         [-1, masked_lm_log_probs.shape[-1]])
+        masked_lm_predictions = tf.argmax(
+            masked_lm_log_probs, axis=-1, output_type=tf.int32)
+        masked_lm_example_loss = tf.reshape(masked_lm_example_loss, [-1])
+        masked_lm_ids = tf.reshape(masked_lm_ids, [-1])
+        masked_lm_weights = tf.reshape(masked_lm_weights, [-1])
+        masked_lm_accuracy = tf.metrics.accuracy(
+            labels=masked_lm_ids,
+            predictions=masked_lm_predictions,
+            weights=masked_lm_weights)
+        masked_lm_mean_loss = tf.metrics.mean(
+            values=masked_lm_example_loss, weights=masked_lm_weights)
+
+        next_sentence_log_probs = tf.reshape(
+            next_sentence_log_probs, [-1, next_sentence_log_probs.shape[-1]])
+        next_sentence_predictions = tf.argmax(
+            next_sentence_log_probs, axis=-1, output_type=tf.int32)
+        next_sentence_labels = tf.reshape(next_sentence_labels, [-1])
+        next_sentence_accuracy = tf.metrics.accuracy(
+            labels=next_sentence_labels, predictions=next_sentence_predictions)
+        next_sentence_mean_loss = tf.metrics.mean(
+            values=next_sentence_example_loss)
+
+        return {
+            "masked_lm_accuracy": masked_lm_accuracy,
+            "masked_lm_loss": masked_lm_mean_loss,
+            "next_sentence_accuracy": next_sentence_accuracy,
+            "next_sentence_loss": next_sentence_mean_loss,
+        }
+
+      eval_metric_ops = metric_fn(
+          masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids,
+          masked_lm_weights, next_sentence_example_loss,
+          next_sentence_log_probs, next_sentence_labels
+      )
+      output_spec = tf.estimator.EstimatorSpec(
+          mode=mode,
+          loss=total_loss,
+          eval_metric_ops=eval_metric_ops)
+    else:
+      raise ValueError("Only TRAIN and EVAL modes are supported: %s" % (mode))
+
+    return output_spec
+
+  return model_fn
+
+
+def get_masked_lm_output(bert_config, input_tensor, output_weights, positions,
+                         label_ids, label_weights):
+  """Get loss and log probs for the masked LM."""
+  input_tensor = gather_indexes(input_tensor, positions)
+
+  with tf.variable_scope("cls/predictions"):
+    # We apply one more non-linear transformation before the output layer.
+    # This matrix is not used after pre-training.
+    with tf.variable_scope("transform", custom_getter=get_custom_getter(compute_type=tf.float16 if FLAGS.use_fp16_cls else tf.float32)):
+      if FLAGS.use_fp16_cls:
+        input_tensor = tf.cast(input_tensor, tf.float16)
+      input_tensor = tf.layers.dense(
+          input_tensor,
+          units=bert_config.hidden_size,
+          activation=modeling.get_activation(bert_config.hidden_act),
+          kernel_initializer=modeling.create_initializer(
+              bert_config.initializer_range))
+      input_tensor = tf.cast(input_tensor, tf.float32)
+      input_tensor = modeling.layer_norm(input_tensor)
+
+    # The output weights are the same as the input embeddings, but there is
+    # an output-only bias for each token.
+    output_bias = tf.get_variable(
+        "output_bias",
+        shape=[bert_config.vocab_size],
+        initializer=tf.zeros_initializer())
+    if FLAGS.use_fp16_cls:
+      input_tensor = tf.cast(input_tensor, tf.float16)
+      logits = tf.matmul(input_tensor, tf.cast(output_weights, tf.float16), transpose_b=True)
+      logits = tf.cast(logits, tf.float32)
+    else:
+      logits = tf.matmul(tf.cast(input_tensor, tf.float32), output_weights, transpose_b=True)
+    logits = tf.nn.bias_add(logits, output_bias)
+    log_probs = tf.nn.log_softmax(logits, axis=-1)
+
+    label_ids = tf.reshape(label_ids, [-1])
+    label_weights = tf.reshape(label_weights, [-1])
+
+    one_hot_labels = tf.one_hot(
+        label_ids, depth=bert_config.vocab_size, dtype=tf.float32)
+
+    # The `positions` tensor might be zero-padded (if the sequence is too
+    # short to have the maximum number of predictions). The `label_weights`
+    # tensor has a value of 1.0 for every real prediction and 0.0 for the
+    # padding predictions.
+    per_example_loss = -tf.reduce_sum(log_probs * one_hot_labels, axis=[-1])
+    numerator = tf.reduce_sum(label_weights * per_example_loss)
+    denominator = tf.reduce_sum(label_weights) + 1e-5
+    loss = numerator / denominator
+
+  return (loss, per_example_loss, log_probs)
+
+
+def get_next_sentence_output(bert_config, input_tensor, labels):
+  """Get loss and log probs for the next sentence prediction."""
+
+  # Simple binary classification. Note that 0 is "next sentence" and 1 is
+  # "random sentence". This weight matrix is not used after pre-training.
+  with tf.variable_scope("cls/seq_relationship"):
+    output_weights = tf.get_variable(
+        "output_weights",
+        shape=[2, bert_config.hidden_size],
+        initializer=modeling.create_initializer(bert_config.initializer_range))
+    output_bias = tf.get_variable(
+        "output_bias", shape=[2], initializer=tf.zeros_initializer())
+
+    if FLAGS.use_fp16_cls:
+      input_tensor = tf.cast(input_tensor, tf.float16)
+      logits = tf.matmul(input_tensor, tf.cast(output_weights, tf.float16), transpose_b=True)
+      logits = tf.cast(logits, tf.float32)
+    else:
+      logits = tf.matmul(tf.cast(input_tensor, tf.float32), output_weights, transpose_b=True)
+    logits = tf.nn.bias_add(logits, output_bias)
+    log_probs = tf.nn.log_softmax(logits, axis=-1)
+    labels = tf.reshape(labels, [-1])
+    one_hot_labels = tf.one_hot(labels, depth=2, dtype=tf.float32)
+    per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
+    loss = tf.reduce_mean(per_example_loss)
+    return (loss, per_example_loss, log_probs)
+
+
+def gather_indexes(sequence_tensor, positions):
+  """Gathers the vectors at the specific positions over a minibatch."""
+  sequence_shape = modeling.get_shape_list(sequence_tensor, expected_rank=3)
+  batch_size = sequence_shape[0]
+  seq_length = sequence_shape[1]
+  width = sequence_shape[2]
+
+  flat_offsets = tf.reshape(
+      tf.range(0, batch_size, dtype=tf.int32) * seq_length, [-1, 1])
+  flat_positions = tf.reshape(positions + flat_offsets, [-1])
+  flat_sequence_tensor = tf.reshape(sequence_tensor,
+                                    [batch_size * seq_length, width])
+  output_tensor = tf.gather(flat_sequence_tensor, flat_positions)
+  return output_tensor
+
+
+def input_fn_builder(input_files,
+                     batch_size,
+                     max_seq_length,
+                     max_predictions_per_seq,
+                     is_training,
+                     num_cpu_threads=4,
+                     hvd=None):
+  """Creates an `input_fn` closure to be passed to Estimator."""
+
+  def input_fn():
+    """The actual input function."""
+
+    name_to_features = {
+        "input_ids":
+            tf.FixedLenFeature([max_seq_length], tf.int64),
+        "input_mask":
+            tf.FixedLenFeature([max_seq_length], tf.int64),
+        "segment_ids":
+            tf.FixedLenFeature([max_seq_length], tf.int64),
+        "masked_lm_positions":
+            tf.FixedLenFeature([max_predictions_per_seq], tf.int64),
+        "masked_lm_ids":
+            tf.FixedLenFeature([max_predictions_per_seq], tf.int64),
+        "masked_lm_weights":
+            tf.FixedLenFeature([max_predictions_per_seq], tf.float32),
+        "next_sentence_labels":
+            tf.FixedLenFeature([1], tf.int64),
+    }
+
+    # For training, we want a lot of parallel reading and shuffling.
+    # For eval, we want no shuffling and parallel reading doesn't matter.
+    if is_training:
+      d = tf.data.Dataset.from_tensor_slices(tf.constant(input_files))
+      if FLAGS.distributed: 
+        rank_size = int(os.getenv('RANK_SIZE'))
+        rank_id = int(os.getenv('RANK_INDEX'))
+        device_id = int(os.getenv('DEVICE_ID'))
+        local_rank = rank_id * 8 + device_id
+        print('RANK_SIZE=', rank_size, ' RANK_ID=', local_rank)
+        d = d.shard(rank_size, local_rank)
+      d = d.repeat()
+      if not FLAGS.npu_bert_debug:
+        d = d.shuffle(buffer_size=len(input_files))
+
+      # `cycle_length` is the number of parallel files that get read.
+      if not FLAGS.npu_bert_debug:
+        #cycle_length = min(num_cpu_threads, len(input_files))
+        cycle_length = min(num_cpu_threads, int(len(input_files)/int(os.getenv('RANK_SIZE'))))
+      else:
+        cycle_length = 1
+
+      # `sloppy` mode means that the interleaving is not exact. This adds
+      # even more randomness to the training pipeline.
+      #d = d.apply(
+      #    tf.contrib.data.parallel_interleave(
+      #        tf.data.TFRecordDataset,
+      #        sloppy=(not FLAGS.npu_bert_debug),
+      #        cycle_length=cycle_length))
+      d = d.interleave(
+          tf.data.TFRecordDataset,
+          cycle_length=cycle_length,
+          num_parallel_calls=tf.data.experimental.AUTOTUNE)
+      if not FLAGS.npu_bert_debug:
+        d = d.shuffle(buffer_size=100)
+    else:
+      d = tf.data.TFRecordDataset(input_files)
+      # Since we evaluate for a fixed number of steps we don't want to encounter
+      # out-of-range exceptions.
+      d = d.repeat()
+
+    # We must `drop_remainder` on training because the TPU requires fixed
+    # size dimensions. For eval, we assume we are evaluating on the CPU or GPU
+    # and we *don't* want to drop the remainder, otherwise we wont cover
+    # every sample.
+    d = d.apply(
+        tf.contrib.data.map_and_batch(
+            lambda record: _decode_record(record, name_to_features),
+            batch_size=batch_size,
+            num_parallel_batches=num_cpu_threads,
+            drop_remainder=True))
+    return d
+
+  return input_fn
+
+
+def _decode_record(record, name_to_features):
+  """Decodes a record to a TensorFlow example."""
+  example = tf.parse_single_example(record, name_to_features)
+
+  # tf.Example only supports tf.int64, but the TPU only supports tf.int32.
+  # So cast all int64 to int32.
+  for name in list(example.keys()):
+    t = example[name]
+    if t.dtype == tf.int64:
+      t = tf.to_int32(t)
+    example[name] = t
+
+  return example
+
+
+def main(_):
+  for name, value in FLAGS.__flags.items():
+    print("name:", name, "      ", FLAGS[name].value)
+  
+  tf.logging.set_verbosity(tf.logging.INFO)
+
+  if not FLAGS.do_train and not FLAGS.do_eval:
+    raise ValueError("At least one of `do_train` or `do_eval` must be True.")
+
+  if FLAGS.use_fp16:
+    os.environ["TF_ENABLE_AUTO_MIXED_PRECISION_GRAPH_REWRITE"] = "1"
+
+  if FLAGS.horovod:
+    import horovod.tensorflow as hvd
+    hvd.init()
+
+  bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file)
+
+  if FLAGS.npu_gather:
+    if FLAGS.distributed and bert_config.num_hidden_layers == 24:
+      from hccl.split.api import set_split_strategy_by_idx
+      set_split_strategy_by_idx([49, 113, 177, 241, 305, 353, 385, 397])
+    if FLAGS.distributed and bert_config.num_hidden_layers == 12:
+      from hccl.split.api import set_split_strategy_by_idx
+      set_split_strategy_by_idx([8, 56, 104, 152, 200, 205])
+    if FLAGS.distributed and bert_config.num_hidden_layers == 6:
+      from hccl.split.api import set_split_strategy_by_idx
+      set_split_strategy_by_idx([8, 40, 72, 104, 109])
+    
+  tf.gfile.MakeDirs(FLAGS.output_dir)
+
+  input_files = []
+  for input_file_dir in FLAGS.input_files_dir.split(","):
+    input_files.extend(tf.gfile.Glob(os.path.join(input_file_dir, "*")))
+
+  input_files.sort()
+  print("Input Files:", input_files)
+
+  if FLAGS.horovod and len(input_files) < hvd.size():
+      raise ValueError("Input Files must be sharded")
+  if FLAGS.use_fp16 and FLAGS.manual_fp16:
+      raise ValueError("AMP and Manual Mixed Precision Training are both activated! Error")
+
+  is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2
+  if FLAGS.over_dump:
+      dump_config = DumpConfig(enable_dump_debug = True, dump_path = FLAGS.over_dump_path, dump_debug_mode = "all")
+  else:
+      dump_config = DumpConfig(enable_dump_debug = False, dump_path = FLAGS.over_dump_path, dump_debug_mode = "all")
+  config = tf.ConfigProto()
+  if FLAGS.horovod:
+    config.gpu_options.visible_device_list = str(hvd.local_rank())
+    if hvd.rank() == 0:
+      tf.logging.info("***** Configuaration *****")
+      for key in FLAGS.__flags.keys():
+          tf.logging.info('  {}: {}'.format(key, getattr(FLAGS, key)))
+      tf.logging.info("**************************")
+
+#    config.gpu_options.per_process_gpu_memory_fraction = 0.7
+  if FLAGS.use_xla: 
+      config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1
+      config.graph_options.rewrite_options.memory_optimization = rewriter_config_pb2.RewriterConfig.NO_MEM_OPT
+
+  #run_config = tf.estimator.RunConfig(
+  run_config = NPURunConfig(
+      dump_config=dump_config,
+      model_dir=FLAGS.output_dir,
+      save_summary_steps=0,
+      session_config=config,
+      save_checkpoints_steps=FLAGS.save_checkpoints_steps if not FLAGS.horovod or hvd.rank() == 0 else None,
+      # This variable controls how often estimator reports examples/sec.
+      # Default value is every 100 steps.
+      # When --report_loss is True, we set to very large value to prevent
+      # default info reporting from estimator.
+      # Ideally we should set it to None, but that does not work.
+      log_step_count_steps=1 if FLAGS.report_loss else 100,
+      enable_data_pre_proc=FLAGS.npu_bert_use_tdt,
+      iterations_per_loop=FLAGS.iterations_per_loop,
+      is_tailing_optimization=FLAGS.npu_bert_tail_optimize,
+      hcom_parallel=FLAGS.hcom_parallel)
+
+  if FLAGS.distributed:
+    rank_size = int(os.getenv('RANK_SIZE'))
+  model_fn = model_fn_builder(
+      bert_config=bert_config,
+      init_checkpoint=FLAGS.init_checkpoint,
+      learning_rate=FLAGS.learning_rate * rank_size if FLAGS.distributed else FLAGS.learning_rate,
+      num_train_steps=FLAGS.num_train_steps,
+      num_warmup_steps=FLAGS.num_warmup_steps,
+      use_one_hot_embeddings=False,
+      hvd=None if not FLAGS.horovod else hvd)
+
+  training_hooks = []
+  """
+  if FLAGS.report_loss and (not FLAGS.horovod or hvd.rank() == 0):
+    global_batch_size = FLAGS.train_batch_size * FLAGS.num_accumulation_steps if not FLAGS.horovod else FLAGS.train_batch_size * FLAGS.num_accumulation_steps * hvd.size()
+    training_hooks.append(_LogSessionRunHook(global_batch_size, FLAGS.num_accumulation_steps, FLAGS.display_loss_steps))
+  if FLAGS.horovod and hvd.size() > 1:
+    training_hooks.append(hvd.BroadcastGlobalVariablesHook(0))
+  """
+  if FLAGS.report_loss:
+    global_batch_size = FLAGS.train_batch_size * FLAGS.num_accumulation_steps if not FLAGS.distributed else FLAGS.train_batch_size * FLAGS.num_accumulation_steps * rank_size
+    training_hooks.append(_LogSessionRunHook(global_batch_size, FLAGS.num_accumulation_steps, FLAGS.display_loss_steps))
+
+
+  #estimator = tf.estimator.Estimator(
+  estimator = NPUEstimator(
+      model_fn=model_fn,
+      config=run_config,
+      job_start_file=FLAGS.npu_bert_job_start_file)
+
+  if FLAGS.do_train:
+    tf.logging.info("***** Running training *****")
+    tf.logging.info("  Batch size = %d", FLAGS.train_batch_size)
+    train_input_fn = input_fn_builder(
+        input_files=input_files,
+        batch_size=FLAGS.train_batch_size,
+        max_seq_length=FLAGS.max_seq_length,
+        max_predictions_per_seq=FLAGS.max_predictions_per_seq,
+        is_training=True,
+        hvd=None if not FLAGS.horovod else hvd)
+
+    estimator.train(input_fn=train_input_fn, hooks=training_hooks, max_steps=FLAGS.num_train_steps)
+
+  if FLAGS.do_eval and (not FLAGS.horovod or hvd.rank() == 0):
+    tf.logging.info("***** Running evaluation *****")
+    tf.logging.info("  Batch size = %d", FLAGS.eval_batch_size)
+
+    eval_files = []
+    for eval_file_dir in FLAGS.eval_files_dir.split(","):
+        eval_files.extend(tf.gfile.Glob(os.path.join(eval_file_dir, "*")))
+
+    eval_input_fn = input_fn_builder(
+        input_files=eval_files,
+        batch_size=FLAGS.eval_batch_size,
+        max_seq_length=FLAGS.max_seq_length,
+        max_predictions_per_seq=FLAGS.max_predictions_per_seq,
+        is_training=False,
+        hvd=None if not FLAGS.horovod else hvd)
+
+    eval_hooks = [LogEvalRunHook(FLAGS.eval_batch_size)]
+    eval_start_time = time.time()
+    result = estimator.evaluate(
+        input_fn=eval_input_fn, steps=FLAGS.max_eval_steps, hooks=eval_hooks)
+
+    eval_time_elapsed = time.time() - eval_start_time
+    eval_time_wo_overhead = eval_hooks[-1].total_time
+
+    num_sentences = (eval_hooks[-1].count - eval_hooks[-1].skipped) * FLAGS.eval_batch_size
+
+    ss_sentences_per_second = num_sentences * 1.0 / eval_time_wo_overhead
+
+    tf.logging.info("-----------------------------")
+    tf.logging.info("Total Inference Time = %0.2f for Sentences = %d", eval_time_elapsed,
+                    eval_hooks[-1].count * FLAGS.eval_batch_size)
+    tf.logging.info("Total Inference Time W/O Overhead = %0.2f for Sentences = %d", eval_time_wo_overhead,
+                    (eval_hooks[-1].count - eval_hooks[-1].skipped) * FLAGS.eval_batch_size)
+    tf.logging.info("Summary Inference Statistics on EVAL set")
+    tf.logging.info("Batch size = %d", FLAGS.eval_batch_size)
+    tf.logging.info("Sequence Length = %d", FLAGS.max_seq_length)
+    tf.logging.info("Precision = %s", "fp16" if FLAGS.use_fp16 else "fp32")
+    tf.logging.info("Throughput Average (sentences/sec) = %0.2f", ss_sentences_per_second)
+    tf.logging.info("-----------------------------")
+
+    output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt")
+    with tf.gfile.GFile(output_eval_file, "w") as writer:
+      tf.logging.info("***** Eval results *****")
+      for key in sorted(result.keys()):
+        tf.logging.info("  %s = %s", key, str(result[key]))
+        writer.write("%s = %s\n" % (key, str(result[key])))
+
+
+if __name__ == "__main__":
+  flags.mark_flag_as_required("input_files_dir")
+  flags.mark_flag_as_required("eval_files_dir")
+  flags.mark_flag_as_required("bert_config_file")
+  flags.mark_flag_as_required("output_dir")
+  flags.mark_flag_as_required("npu_bert_job_start_file")
+  if FLAGS.use_xla and FLAGS.manual_fp16:
+    print('WARNING! Combining --use_xla with --manual_fp16 may prevent convergence.')
+    print('         This warning message will be removed when the underlying')
+    print('         issues have been fixed and you are running a TF version')
+    print('         that has that fix.')
+  tf.app.run()
diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/tf_metrics.py b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/tf_metrics.py
new file mode 100644
index 000000000..c27cead9e
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/tf_metrics.py
@@ -0,0 +1,230 @@
+# coding=utf-8
+# Copyright 2018 The Google AI Language Team Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Multiclass
+from: 
+https://github.com/guillaumegenthial/tf_metrics/blob/master/tf_metrics/__init__.py
+
+"""
+
+__author__ = "Guillaume Genthial"
+
+import numpy as np
+import tensorflow as tf
+from tensorflow.python.ops.metrics_impl import _streaming_confusion_matrix
+
+
+def precision(labels, predictions, num_classes, pos_indices=None,
+              weights=None, average='micro'):
+    """Multi-class precision metric for Tensorflow
+    Parameters
+    ----------
+    labels : Tensor of tf.int32 or tf.int64
+        The true labels
+    predictions : Tensor of tf.int32 or tf.int64
+        The predictions, same shape as labels
+    num_classes : int
+        The number of classes
+    pos_indices : list of int, optional
+        The indices of the positive classes, default is all
+    weights : Tensor of tf.int32, optional
+        Mask, must be of compatible shape with labels
+    average : str, optional
+        'micro': counts the total number of true positives, false
+            positives, and false negatives for the classes in
+            `pos_indices` and infer the metric from it.
+        'macro': will compute the metric separately for each class in
+            `pos_indices` and average. Will not account for class
+            imbalance.
+        'weighted': will compute the metric separately for each class in
+            `pos_indices` and perform a weighted average by the total
+            number of true labels for each class.
+    Returns
+    -------
+    tuple of (scalar float Tensor, update_op)
+    """
+    cm, op = _streaming_confusion_matrix(
+        labels, predictions, num_classes, weights)
+    pr, _, _ = metrics_from_confusion_matrix(
+        cm, pos_indices, average=average)
+    op, _, _ = metrics_from_confusion_matrix(
+        op, pos_indices, average=average)
+    return (pr, op)
+
+
+def recall(labels, predictions, num_classes, pos_indices=None, weights=None,
+           average='micro'):
+    """Multi-class recall metric for Tensorflow
+    Parameters
+    ----------
+    labels : Tensor of tf.int32 or tf.int64
+        The true labels
+    predictions : Tensor of tf.int32 or tf.int64
+        The predictions, same shape as labels
+    num_classes : int
+        The number of classes
+    pos_indices : list of int, optional
+        The indices of the positive classes, default is all
+    weights : Tensor of tf.int32, optional
+        Mask, must be of compatible shape with labels
+    average : str, optional
+        'micro': counts the total number of true positives, false
+            positives, and false negatives for the classes in
+            `pos_indices` and infer the metric from it.
+        'macro': will compute the metric separately for each class in
+            `pos_indices` and average. Will not account for class
+            imbalance.
+        'weighted': will compute the metric separately for each class in
+            `pos_indices` and perform a weighted average by the total
+            number of true labels for each class.
+    Returns
+    -------
+    tuple of (scalar float Tensor, update_op)
+    """
+    cm, op = _streaming_confusion_matrix(
+        labels, predictions, num_classes, weights)
+    _, re, _ = metrics_from_confusion_matrix(
+        cm, pos_indices, average=average)
+    _, op, _ = metrics_from_confusion_matrix(
+        op, pos_indices, average=average)
+    return (re, op)
+
+
+def f1(labels, predictions, num_classes, pos_indices=None, weights=None,
+       average='micro'):
+    return fbeta(labels, predictions, num_classes, pos_indices, weights,
+                 average)
+
+
+def fbeta(labels, predictions, num_classes, pos_indices=None, weights=None,
+          average='micro', beta=1):
+    """Multi-class fbeta metric for Tensorflow
+    Parameters
+    ----------
+    labels : Tensor of tf.int32 or tf.int64
+        The true labels
+    predictions : Tensor of tf.int32 or tf.int64
+        The predictions, same shape as labels
+    num_classes : int
+        The number of classes
+    pos_indices : list of int, optional
+        The indices of the positive classes, default is all
+    weights : Tensor of tf.int32, optional
+        Mask, must be of compatible shape with labels
+    average : str, optional
+        'micro': counts the total number of true positives, false
+            positives, and false negatives for the classes in
+            `pos_indices` and infer the metric from it.
+        'macro': will compute the metric separately for each class in
+            `pos_indices` and average. Will not account for class
+            imbalance.
+        'weighted': will compute the metric separately for each class in
+            `pos_indices` and perform a weighted average by the total
+            number of true labels for each class.
+    beta : int, optional
+        Weight of precision in harmonic mean
+    Returns
+    -------
+    tuple of (scalar float Tensor, update_op)
+    """
+    cm, op = _streaming_confusion_matrix(
+        labels, predictions, num_classes, weights)
+    _, _, fbeta = metrics_from_confusion_matrix(
+        cm, pos_indices, average=average, beta=beta)
+    _, _, op = metrics_from_confusion_matrix(
+        op, pos_indices, average=average, beta=beta)
+    return (fbeta, op)
+
+
+def safe_div(numerator, denominator):
+    """Safe division, return 0 if denominator is 0"""
+    numerator, denominator = tf.to_float(numerator), tf.to_float(denominator)
+    zeros = tf.zeros_like(numerator, dtype=numerator.dtype)
+    denominator_is_zero = tf.equal(denominator, zeros)
+    return tf.where(denominator_is_zero, zeros, numerator / denominator)
+
+
+def pr_re_fbeta(cm, pos_indices, beta=1):
+    """Uses a confusion matrix to compute precision, recall and fbeta"""
+    num_classes = cm.shape[0]
+    neg_indices = [i for i in range(num_classes) if i not in pos_indices]
+    cm_mask = np.ones([num_classes, num_classes])
+    cm_mask[neg_indices, neg_indices] = 0
+    diag_sum = tf.reduce_sum(tf.diag_part(cm * cm_mask))
+
+    cm_mask = np.ones([num_classes, num_classes])
+    cm_mask[:, neg_indices] = 0
+    tot_pred = tf.reduce_sum(cm * cm_mask)
+
+    cm_mask = np.ones([num_classes, num_classes])
+    cm_mask[neg_indices, :] = 0
+    tot_gold = tf.reduce_sum(cm * cm_mask)
+
+    pr = safe_div(diag_sum, tot_pred)
+    re = safe_div(diag_sum, tot_gold)
+    fbeta = safe_div((1. + beta**2) * pr * re, beta**2 * pr + re)
+
+    return pr, re, fbeta
+
+
+def metrics_from_confusion_matrix(cm, pos_indices=None, average='micro',
+                                  beta=1):
+    """Precision, Recall and F1 from the confusion matrix
+    Parameters
+    ----------
+    cm : tf.Tensor of type tf.int32, of shape (num_classes, num_classes)
+        The streaming confusion matrix.
+    pos_indices : list of int, optional
+        The indices of the positive classes
+    beta : int, optional
+        Weight of precision in harmonic mean
+    average : str, optional
+        'micro', 'macro' or 'weighted'
+    """
+    num_classes = cm.shape[0]
+    if pos_indices is None:
+        pos_indices = [i for i in range(num_classes)]
+
+    if average == 'micro':
+        return pr_re_fbeta(cm, pos_indices, beta)
+    elif average in {'macro', 'weighted'}:
+        precisions, recalls, fbetas, n_golds = [], [], [], []
+        for idx in pos_indices:
+            pr, re, fbeta = pr_re_fbeta(cm, [idx], beta)
+            precisions.append(pr)
+            recalls.append(re)
+            fbetas.append(fbeta)
+            cm_mask = np.zeros([num_classes, num_classes])
+            cm_mask[idx, :] = 1
+            n_golds.append(tf.to_float(tf.reduce_sum(cm * cm_mask)))
+
+        if average == 'macro':
+            pr = tf.reduce_mean(precisions)
+            re = tf.reduce_mean(recalls)
+            fbeta = tf.reduce_mean(fbetas)
+            return pr, re, fbeta
+        if average == 'weighted':
+            n_gold = tf.reduce_sum(n_golds)
+            pr_sum = sum(p * n for p, n in zip(precisions, n_golds))
+            pr = safe_div(pr_sum, n_gold)
+            re_sum = sum(r * n for r, n in zip(recalls, n_golds))
+            re = safe_div(re_sum, n_gold)
+            fbeta_sum = sum(f * n for f, n in zip(fbetas, n_golds))
+            fbeta = safe_div(fbeta_sum, n_gold)
+            return pr, re, fbeta
+
+    else:
+        raise NotImplementedError()
diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/tokenization.py b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/tokenization.py
new file mode 100644
index 000000000..6e53ce767
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/tokenization.py
@@ -0,0 +1,451 @@
+# coding=utf-8
+# Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
+# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tokenization classes."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import unicodedata
+import six
+import tensorflow as tf
+import re
+import os
+
+
+PRETRAINED_VOCAB_ARCHIVE_MAP = {
+    'bert-base-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt",
+    'bert-large-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-vocab.txt",
+    'bert-base-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-vocab.txt",
+    'bert-large-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-vocab.txt",
+    'bert-base-multilingual-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-uncased-vocab.txt",
+    'bert-base-multilingual-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-cased-vocab.txt",
+    'bert-base-chinese': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-chinese-vocab.txt",
+}
+
+def validate_case_matches_checkpoint(do_lower_case, init_checkpoint):
+  """Checks whether the casing config is consistent with the checkpoint name."""
+
+  # The casing has to be passed in by the user and there is no explicit check
+  # as to whether it matches the checkpoint. The casing information probably
+  # should have been stored in the bert_config.json file, but it's not, so
+  # we have to heuristically detect it to validate.
+
+  if not init_checkpoint:
+    return
+
+  m = re.match("^.*?([A-Za-z0-9_-]+)/bert_model.ckpt", init_checkpoint)
+  if m is None:
+    return
+
+  model_name = m.group(1)
+
+  lower_models = [
+      "uncased_L-24_H-1024_A-16", "uncased_L-12_H-768_A-12",
+      "multilingual_L-12_H-768_A-12", "chinese_L-12_H-768_A-12"
+  ]
+
+  cased_models = [
+      "cased_L-12_H-768_A-12", "cased_L-24_H-1024_A-16",
+      "multi_cased_L-12_H-768_A-12"
+  ]
+
+  is_bad_config = False
+  if model_name in lower_models and not do_lower_case:
+    is_bad_config = True
+    actual_flag = "False"
+    case_name = "lowercased"
+    opposite_flag = "True"
+
+  if model_name in cased_models and do_lower_case:
+    is_bad_config = True
+    actual_flag = "True"
+    case_name = "cased"
+    opposite_flag = "False"
+
+  if is_bad_config:
+    raise ValueError(
+        "You passed in `--do_lower_case=%s` with `--init_checkpoint=%s`. "
+        "However, `%s` seems to be a %s model, so you "
+        "should pass in `--do_lower_case=%s` so that the fine-tuning matches "
+        "how the model was pre-training. If this error is wrong, please "
+        "just comment out this check." % (actual_flag, init_checkpoint,
+                                          model_name, case_name, opposite_flag))
+
+
+
+def convert_to_unicode(text):
+    """Converts `text` to Unicode (if it's not already), assuming utf-8 input."""
+    if isinstance(text, str):
+        return text
+    elif isinstance(text, bytes):
+        return text.decode("utf-8", "ignore")
+    else:
+        raise ValueError("Unsupported string type: %s" % (type(text)))
+
+
+def printable_text(text):
+    """Returns text encoded in a way suitable for print or `tf.logging`."""
+
+    # These functions want `str` for both Python2 and Python3, but in one case
+    # it's a Unicode string and in the other it's a byte string.
+    if isinstance(text, str):
+        return text
+    elif isinstance(text, bytes):
+        return text.decode("utf-8", "ignore")
+    else:
+        raise ValueError("Unsupported string type: %s" % (type(text)))
+
+
+def load_vocab(vocab_file):
+    """Loads a vocabulary file into a dictionary."""
+    vocab = collections.OrderedDict()
+    index = 0
+    with open(vocab_file, "r") as reader:
+        while True:
+            token = convert_to_unicode(reader.readline())
+            if not token:
+                break
+            token = token.strip()
+            vocab[token] = index
+            index += 1
+    return vocab
+
+
+def convert_by_vocab(vocab, items):
+  """Converts a sequence of [tokens|ids] using the vocab."""
+  output = []
+  for item in items:
+    output.append(vocab[item])
+  return output
+
+
+def whitespace_tokenize(text):
+    """Runs basic whitespace cleaning and splitting on a peice of text."""
+    text = text.strip()
+    if not text:
+        return []
+    tokens = text.split()
+    return tokens
+
+
+class FullTokenizer(object):
+  """Runs end-to-end tokenziation."""
+
+  def __init__(self, vocab_file, do_lower_case=True):
+    self.vocab = load_vocab(vocab_file)
+    self.inv_vocab = {v: k for k, v in self.vocab.items()}
+    self.basic_tokenizer = BasicTokenizer(do_lower_case=do_lower_case)
+    self.wordpiece_tokenizer = WordpieceTokenizer(vocab=self.vocab)
+
+  def tokenize(self, text):
+    split_tokens = []
+    for token in self.basic_tokenizer.tokenize(text):
+      for sub_token in self.wordpiece_tokenizer.tokenize(token):
+        split_tokens.append(sub_token)
+
+    return split_tokens
+
+  def convert_tokens_to_ids(self, tokens):
+    return convert_by_vocab(self.vocab, tokens)
+
+  def convert_ids_to_tokens(self, ids):
+    return convert_by_vocab(self.inv_vocab, ids)
+
+
+class BertTokenizer(object):
+    """Runs end-to-end tokenization: punctuation splitting + wordpiece"""
+
+    def __init__(self, vocab_file, do_lower_case=True):
+        if not os.path.isfile(vocab_file):
+            raise ValueError(
+                "Can't find a vocabulary file at path '{}'. To load the vocabulary from a Google pretrained "
+                "model use `tokenizer = BertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`".format(vocab_file))
+        self.vocab = load_vocab(vocab_file)
+        self.ids_to_tokens = collections.OrderedDict(
+            [(ids, tok) for tok, ids in self.vocab.items()])
+        self.basic_tokenizer = BasicTokenizer(do_lower_case=do_lower_case)
+        self.wordpiece_tokenizer = WordpieceTokenizer(vocab=self.vocab)
+
+    def tokenize(self, text):
+        split_tokens = []
+        for token in self.basic_tokenizer.tokenize(text):
+            for sub_token in self.wordpiece_tokenizer.tokenize(token):
+                split_tokens.append(sub_token)
+        return split_tokens
+
+    def convert_tokens_to_ids(self, tokens):
+        """Converts a sequence of tokens into ids using the vocab."""
+        ids = []
+        for token in tokens:
+            ids.append(self.vocab[token])
+        return ids
+
+    def convert_ids_to_tokens(self, ids):
+        """Converts a sequence of ids in wordpiece tokens using the vocab."""
+        tokens = []
+        for i in ids:
+            tokens.append(self.ids_to_tokens[i])
+        return tokens
+
+    @classmethod
+    def from_pretrained(cls, pretrained_model_name, do_lower_case=True):
+        """
+    Instantiate a PreTrainedBertModel from a pre-trained model file.
+    Download and cache the pre-trained model file if needed.
+    """
+        if pretrained_model_name in PRETRAINED_VOCAB_ARCHIVE_MAP:
+            vocab_file = PRETRAINED_VOCAB_ARCHIVE_MAP[pretrained_model_name]
+        else:
+            vocab_file = pretrained_model_name
+        # redirect to the cache, if necessary
+        try:
+            resolved_vocab_file = cached_path(vocab_file)
+            if resolved_vocab_file == vocab_file:
+
+                logger.info("loading vocabulary file {}".format(vocab_file))
+            else:
+                logger.info("loading vocabulary file {} from cache at {}".format(
+                    vocab_file, resolved_vocab_file))
+            # Instantiate tokenizer.
+            tokenizer = cls(resolved_vocab_file, do_lower_case)
+        except FileNotFoundError:
+            logger.error(
+                "Model name '{}' was not found in model name list ({}). "
+                "We assumed '{}' was a path or url but couldn't find any file "
+                "associated to this path or url.".format(
+                    pretrained_model_name,
+                    ', '.join(PRETRAINED_VOCAB_ARCHIVE_MAP.keys()),
+                    pretrained_model_name))
+            tokenizer = None
+        return tokenizer
+
+
+class BasicTokenizer(object):
+    """Runs basic tokenization (punctuation splitting, lower casing, etc.)."""
+
+    def __init__(self, do_lower_case=True):
+        """Constructs a BasicTokenizer.
+
+    Args:
+      do_lower_case: Whether to lower case the input.
+    """
+        self.do_lower_case = do_lower_case
+
+    def tokenize(self, text):
+        """Tokenizes a piece of text."""
+        text = convert_to_unicode(text)
+        text = self._clean_text(text)
+        # This was added on November 1st, 2018 for the multilingual and Chinese
+        # models. This is also applied to the English models now, but it doesn't
+        # matter since the English models were not trained on any Chinese data
+        # and generally don't have any Chinese data in them (there are Chinese
+        # characters in the vocabulary because Wikipedia does have some Chinese
+        # words in the English Wikipedia.).
+        text = self._tokenize_chinese_chars(text)
+        orig_tokens = whitespace_tokenize(text)
+        split_tokens = []
+        for token in orig_tokens:
+            if self.do_lower_case:
+                token = token.lower()
+                token = self._run_strip_accents(token)
+            split_tokens.extend(self._run_split_on_punc(token))
+
+        output_tokens = whitespace_tokenize(" ".join(split_tokens))
+        return output_tokens
+
+    def _run_strip_accents(self, text):
+        """Strips accents from a piece of text."""
+        text = unicodedata.normalize("NFD", text)
+        output = []
+        for char in text:
+            cat = unicodedata.category(char)
+            if cat == "Mn":
+                continue
+            output.append(char)
+        return "".join(output)
+
+    def _run_split_on_punc(self, text):
+        """Splits punctuation on a piece of text."""
+        chars = list(text)
+        i = 0
+        start_new_word = True
+        output = []
+        while i < len(chars):
+            char = chars[i]
+            if _is_punctuation(char):
+                output.append([char])
+                start_new_word = True
+            else:
+                if start_new_word:
+                    output.append([])
+                start_new_word = False
+                output[-1].append(char)
+            i += 1
+
+        return ["".join(x) for x in output]
+
+    def _tokenize_chinese_chars(self, text):
+        """Adds whitespace around any CJK character."""
+        output = []
+        for char in text:
+            cp = ord(char)
+            if self._is_chinese_char(cp):
+                output.append(" ")
+                output.append(char)
+                output.append(" ")
+            else:
+                output.append(char)
+        return "".join(output)
+
+    def _is_chinese_char(self, cp):
+        """Checks whether CP is the codepoint of a CJK character."""
+        # This defines a "chinese character" as anything in the CJK Unicode block:
+        #   https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
+        #
+        # Note that the CJK Unicode block is NOT all Japanese and Korean characters,
+        # despite its name. The modern Korean Hangul alphabet is a different block,
+        # as is Japanese Hiragana and Katakana. Those alphabets are used to write
+        # space-separated words, so they are not treated specially and handled
+        # like the all of the other languages.
+        if ((cp >= 0x4E00 and cp <= 0x9FFF) or  #
+                (cp >= 0x3400 and cp <= 0x4DBF) or  #
+                (cp >= 0x20000 and cp <= 0x2A6DF) or  #
+                (cp >= 0x2A700 and cp <= 0x2B73F) or  #
+                (cp >= 0x2B740 and cp <= 0x2B81F) or  #
+                (cp >= 0x2B820 and cp <= 0x2CEAF) or
+                (cp >= 0xF900 and cp <= 0xFAFF) or  #
+                (cp >= 0x2F800 and cp <= 0x2FA1F)):  #
+            return True
+
+        return False
+
+    def _clean_text(self, text):
+        """Performs invalid character removal and whitespace cleanup on text."""
+        output = []
+        for char in text:
+            cp = ord(char)
+            if cp == 0 or cp == 0xfffd or _is_control(char):
+                continue
+            if _is_whitespace(char):
+                output.append(" ")
+            else:
+                output.append(char)
+        return "".join(output)
+
+
+class WordpieceTokenizer(object):
+    """Runs WordPiece tokenization."""
+
+    def __init__(self, vocab, unk_token="[UNK]", max_input_chars_per_word=100):
+        self.vocab = vocab
+        self.unk_token = unk_token
+        self.max_input_chars_per_word = max_input_chars_per_word
+
+    def tokenize(self, text):
+        """Tokenizes a piece of text into its word pieces.
+
+    This uses a greedy longest-match-first algorithm to perform tokenization
+    using the given vocabulary.
+
+    For example:
+      input = "unaffable"
+      output = ["un", "##aff", "##able"]
+
+    Args:
+      text: A single token or whitespace separated tokens. This should have
+        already been passed through `BasicTokenizer.
+
+    Returns:
+      A list of wordpiece tokens.
+    """
+
+        text = convert_to_unicode(text)
+
+        output_tokens = []
+        for token in whitespace_tokenize(text):
+            chars = list(token)
+            if len(chars) > self.max_input_chars_per_word:
+                output_tokens.append(self.unk_token)
+                continue
+
+            is_bad = False
+            start = 0
+            sub_tokens = []
+            while start < len(chars):
+                end = len(chars)
+                cur_substr = None
+                while start < end:
+                    substr = "".join(chars[start:end])
+                    if start > 0:
+                        substr = "##" + substr
+                    if substr in self.vocab:
+                        cur_substr = substr
+                        break
+                    end -= 1
+                if cur_substr is None:
+                    is_bad = True
+                    break
+                sub_tokens.append(cur_substr)
+                start = end
+
+            if is_bad:
+                output_tokens.append(self.unk_token)
+            else:
+                output_tokens.extend(sub_tokens)
+        return output_tokens
+
+
+def _is_whitespace(char):
+    """Checks whether `chars` is a whitespace character."""
+    # \t, \n, and \r are technically contorl characters but we treat them
+    # as whitespace since they are generally considered as such.
+    if char == " " or char == "\t" or char == "\n" or char == "\r":
+        return True
+    cat = unicodedata.category(char)
+    if cat == "Zs":
+        return True
+    return False
+
+
+def _is_control(char):
+    """Checks whether `chars` is a control character."""
+    # These are technically control characters but we count them as whitespace
+    # characters.
+    if char == "\t" or char == "\n" or char == "\r":
+        return False
+    cat = unicodedata.category(char)
+    if cat.startswith("C"):
+        return True
+    return False
+
+
+def _is_punctuation(char):
+    """Checks whether `chars` is a punctuation character."""
+    cp = ord(char)
+    # We treat all non-letter/number ASCII as punctuation.
+    # Characters such as "^", "$", and "`" are not in the Unicode
+    # Punctuation class but we treat them as punctuation anyways, for
+    # consistency.
+    if ((cp >= 33 and cp <= 47) or (cp >= 58 and cp <= 64) or
+            (cp >= 91 and cp <= 96) or (cp >= 123 and cp <= 126)):
+        return True
+    cat = unicodedata.category(char)
+    if cat.startswith("P"):
+        return True
+    return False
diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/utils.py b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/utils.py
new file mode 100644
index 000000000..56aa173a4
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/utils.py
@@ -0,0 +1,76 @@
+# coding=utf-8
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import tensorflow as tf
+import time
+
+# report latency and throughput during eval
+class LogEvalRunHook(tf.train.SessionRunHook):
+  def __init__(self, global_batch_size, hvd_rank=-1):
+    self.global_batch_size = global_batch_size
+    self.hvd_rank = hvd_rank
+    self.total_time = 0.0
+    self.count = 0
+    self.skipped = 0
+    self.time_list = []
+
+  def before_run(self, run_context):
+    self.t0 = time.time()
+
+  def after_run(self, run_context, run_values):
+    elapsed_secs = time.time() - self.t0
+    self.count += 1
+
+    # Removing first 2 (arbitrary) number of startup iterations from perf evaluations
+    if self.count <= 2:
+      print("Skipping time record for ", self.count, " due to overhead")
+      self.skipped += 1
+    else:
+      self.time_list.append(elapsed_secs)
+      self.total_time += elapsed_secs
+
+# report throughput during training
+class LogTrainRunHook(tf.train.SessionRunHook):
+  def __init__(self, global_batch_size, hvd_rank=-1, save_checkpoints_steps=1000):
+    self.global_batch_size = global_batch_size
+    self.hvd_rank = hvd_rank
+    self.save_checkpoints_steps = save_checkpoints_steps
+
+    self.total_time = 0.0
+    self.count = 0 # Holds number of iterations, including skipped iterations for fp16 loss scaling
+
+  def after_create_session(self, session, coord):
+    self.init_global_step = session.run(tf.train.get_global_step())
+
+  def before_run(self, run_context):
+    self.t0 = time.time()
+    return tf.train.SessionRunArgs(
+        fetches=['step_update:0'])
+
+  def after_run(self, run_context, run_values):
+    elapsed_secs = time.time() - self.t0
+    self.global_step = run_values.results[0]
+    self.count += 1
+
+    # Removing first step + first two steps after every checkpoint save
+    if (self.global_step - self.init_global_step) % self.save_checkpoints_steps <= 1:
+      print("Skipping time record for ", self.global_step, " due to checkpoint-saving/warmup overhead")
+    else:
+      self.total_time += elapsed_secs
+
+  def end(self, session):
+    num_global_steps = self.global_step - self.init_global_step
+
+    self.skipped = (num_global_steps // self.save_checkpoints_steps) * 2 + \
+                   min(2, num_global_steps % self.save_checkpoints_steps) - 1
\ No newline at end of file
diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID0060_BertBase_full_1p.sh b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID0060_BertBase_full_1p.sh
new file mode 100644
index 000000000..4cd9fbd51
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID0060_BertBase_full_1p.sh
@@ -0,0 +1,174 @@
+#!/bin/bash
+
+#当前路径,不需要修改
+cur_path=`pwd`
+
+#集合通信参数,不需要修改
+export RANK_SIZE=1
+export JOB_ID=99990001
+RANK_ID_START=0
+
+# 数据集路径,保持为空,不需要修改
+data_path=""
+
+#基础参数，需要模型审视修改
+#网络名称，同目录名称
+Network="Bert-base_ID0060_for_TensorFlow"
+#训练epoch
+train_epochs=1
+#训练batch_size
+batch_size=128
+#训练step
+train_steps=1000
+#学习率
+learning_rate=
+
+#维测参数，precision_mode需要模型审视修改
+#precision_mode="allow_mix_precision"
+#维持参数，以下不需要修改
+over_dump=False
+data_dump_flag=False
+data_dump_step="10"
+profiling=False
+autotune=False
+
+# 帮助信息，不需要修改
+if [[ $1 == --help || $1 == -h ]];then
+    echo"usage:./train_full_1p.sh <args>"
+    echo " "
+    echo "parameter explain:
+    --precision_mode         precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision)
+    --over_dump		         if or not over detection, default is False
+    --data_dump_flag	     data dump flag, default is False
+    --data_dump_step		 data dump step, default is 10
+    --profiling		         if or not profiling for performance debug, default is False
+    --autotune               whether to enable autotune, default is False
+    --data_path		         source data of training
+    -h/--help		         show help message
+    "
+    exit 1
+fi
+
+#参数校验，不需要修改
+for para in $*
+do
+    if [[ $para == --precision_mode* ]];then
+        precision_mode=`echo ${para#*=}`
+    elif [[ $para == --over_dump* ]];then
+        over_dump=`echo ${para#*=}`
+        over_dump_path=${cur_path}/output/overflow_dump
+        mkdir -p ${over_dump_path}
+    elif [[ $para == --data_dump_flag* ]];then
+        data_dump_flag=`echo ${para#*=}`
+        data_dump_path=${cur_path}/output/data_dump
+        mkdir -p ${data_dump_path}
+    elif [[ $para == --data_dump_step* ]];then
+        data_dump_step=`echo ${para#*=}`
+    elif [[ $para == --profiling* ]];then
+        profiling=`echo ${para#*=}`
+        profiling_dump_path=${cur_path}/output/profiling
+        mkdir -p ${profiling_dump_path}
+    elif [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    fi
+done
+
+#校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be confing"
+    exit 1
+fi
+
+#训练开始时间，不需要修改
+start_time=$(date +%s)
+#进入训练脚本目录，需要模型审视修改
+for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++));
+do
+    #设置环境变量，不需要修改
+    echo "Device ID: $ASCEND_DEVICE_ID"
+    export RANK_ID=$RANK_ID
+    
+    #创建DeviceID输出目录，不需要修改
+    if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then
+        rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID}
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    else
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    fi
+
+    #执行训练脚本，以下传参不需要修改，其他需要模型审视修改
+    #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path，--data_dump_flag，--data_dump_step，--data_dump_path，--profiling，--profiling_dump_path，--autotune
+    nohup python3.7 $cur_path/../src/run_pretraining.py --bert_config_file=${cur_path}/../configs/bert_base_config.json \
+    --max_seq_length=128 \
+    --max_predictions_per_seq=20 \
+    --train_batch_size=${batch_size} \
+    --learning_rate=1e-4 \
+    --num_warmup_steps=100 \
+    --num_train_steps=${train_steps} \
+    --optimizer_type=adam \
+    --manual_fp16=True \
+    --use_fp16_cls=True \
+    --input_files_dir=${data_path}/../wikipedia_128 \
+    --eval_files_dir=${data_path} \
+    --npu_bert_debug=False \
+    --npu_bert_use_tdt=True \
+    --do_train=True \
+    --num_accumulation_steps=1 \
+    --npu_bert_job_start_file= \
+    --iterations_per_loop=100 \
+    --save_checkpoints_steps=1000 \
+    --npu_bert_clip_by_global_norm=False \
+    --distributed=False \
+    --npu_bert_loss_scale=0 \
+    --over_dump=${over_dump} \
+    --over_dump_path=${over_dump_path} \
+    --output_dir=${cur_path}/output/${ASCEND_DEVICE_ID}/d_solution/ckpt${ASCEND_DEVICE_ID} > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+done 
+wait
+
+#训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+#结果打印，不需要修改
+echo "------------------ Final result ------------------"
+#输出性能FPS，需要模型审视修改
+step_per_sec=`cat ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | grep "global_step/sec:" | awk -F "global_step/sec: " '{print $2}' | awk '{sum+=$1} END {print sum/NR}'`
+FPS=`echo "scale=3; $step_per_sec * $batch_size"|bc`
+#打印，不需要修改
+echo "Final Performance images/sec : $FPS"
+
+#输出训练精度,需要模型审视修改
+#train_accuracy=`grep -A 1 top1 $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $3}'`
+#打印，不需要修改
+#echo "Final Train Accuracy : ${train_accuracy}"
+echo "E2E Training Duration sec : $e2e_time"
+
+#稳定性精度看护结果汇总
+#训练用例信息，不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc'
+
+##获取性能数据
+#吞吐量，不需要修改
+ActualFPS=${FPS}
+#单迭代训练时长，不需要修改
+TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'*'${RANK_SIZE}'*1000/'${FPS}'}'`
+
+#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型审视
+grep "tensorflow:loss =" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log | awk -F "loss = " '{print $2}' | awk -F "," '{print $1}' >> $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
+
+#最后一个迭代loss值，不需要修改
+ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
\ No newline at end of file
diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID0060_BertBase_full_8p.sh b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID0060_BertBase_full_8p.sh
new file mode 100644
index 000000000..7af34ce6f
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID0060_BertBase_full_8p.sh
@@ -0,0 +1,186 @@
+#!/bin/bash
+
+#当前路径,不需要修改
+cur_path=`pwd`
+
+#集合通信参数,不需要修改
+export RANK_SIZE=8
+export JOB_ID=99990001
+export RANK_TABLE_FILE=${cur_path}/../configs/8p.json
+RANK_ID_START=0
+
+# 数据集路径,保持为空,不需要修改
+data_path=""
+
+#基础参数，需要模型审视修改
+#网络名称，同目录名称
+Network="Bert-base_ID0060_for_TensorFlow"
+#训练epoch
+train_epochs=
+#训练batch_size
+batch_size=128
+#训练step
+train_steps=500000
+#学习率
+learning_rate=
+
+#维测参数，precision_mode需要模型审视修改
+#precision_mode="allow_mix_precision"
+#维持参数，以下不需要修改
+over_dump=False
+data_dump_flag=False
+data_dump_step="10"
+profiling=False
+autotune=False
+
+# 帮助信息，不需要修改
+if [[ $1 == --help || $1 == -h ]];then
+    echo"usage:./train_full_1p.sh <args>"
+    echo " "
+    echo "parameter explain:
+    --precision_mode         precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision)
+    --over_dump		         if or not over detection, default is False
+    --data_dump_flag	     data dump flag, default is False
+    --data_dump_step		 data dump step, default is 10
+    --profiling		         if or not profiling for performance debug, default is False
+    --autotune               whether to enable autotune, default is False
+    --data_path		         source data of training
+    -h/--help		         show help message
+    "
+    exit 1
+fi
+
+#参数校验，不需要修改
+for para in $*
+do
+    if [[ $para == --precision_mode* ]];then
+        precision_mode=`echo ${para#*=}`
+    elif [[ $para == --over_dump* ]];then
+        over_dump=`echo ${para#*=}`
+        over_dump_path=${cur_path}/output/overflow_dump
+        mkdir -p ${over_dump_path}
+    elif [[ $para == --data_dump_flag* ]];then
+        data_dump_flag=`echo ${para#*=}`
+        data_dump_path=${cur_path}/output/data_dump
+        mkdir -p ${data_dump_path}
+    elif [[ $para == --data_dump_step* ]];then
+        data_dump_step=`echo ${para#*=}`
+    elif [[ $para == --profiling* ]];then
+        profiling=`echo ${para#*=}`
+        profiling_dump_path=${cur_path}/output/profiling
+        mkdir -p ${profiling_dump_path}
+    elif [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    fi
+done
+
+#校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be confing"
+    exit 1
+fi
+
+#训练开始时间，不需要修改
+start_time=$(date +%s)
+
+#进入训练脚本目录，需要模型审视修改
+for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++));
+do
+    #设置环境变量，不需要修改
+    echo "Device ID: $RANK_ID"
+    export RANK_ID=$RANK_ID
+    export ASCEND_DEVICE_ID=$RANK_ID
+    
+    #创建DeviceID输出目录，不需要修改
+    if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then
+        rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID}
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    else
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    fi
+    
+     # 绑核，不需要的绑核的模型删除，需要模型审视修改
+    corenum=`cat /proc/cpuinfo |grep "processor"|wc -l`
+    let a=RANK_ID*${corenum}/${RANK_SIZE}
+    let b=RANK_ID+1
+    let c=b*${corenum}/${RANK_SIZE}-1
+
+    #执行训练脚本，以下传参不需要修改，其他需要模型审视修改
+    #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path，--data_dump_flag，--data_dump_step，--data_dump_path，--profiling，--profiling_dump_path
+    if [ "x${bind_core}" != x ];then
+        bind_core="taskset -c $a-$c"
+    fi
+    nohup python3.7 ${cur_path}/../src/run_pretraining.py --bert_config_file=${cur_path}/../configs/bert_base_config.json \
+    --max_seq_length=128 \
+    --max_predictions_per_seq=20 \
+    --train_batch_size=${batch_size} \
+    --learning_rate=1e-4 \
+    --num_warmup_steps=100 \
+    --num_train_steps=${train_steps} \
+    --optimizer_type=adam \
+    --manual_fp16=True \
+    --use_fp16_cls=True \
+    --input_files_dir=${data_path}/../wikipedia_128 \
+    --eval_files_dir=${data_path} \
+    --npu_bert_debug=False \
+    --npu_bert_use_tdt=True \
+    --do_train=True \
+    --num_accumulation_steps=1 \
+    --npu_bert_job_start_file= \
+    --iterations_per_loop=100 \
+    --save_checkpoints_steps=1000 \
+    --npu_bert_clip_by_global_norm=False \
+    --distributed=True \
+    --npu_bert_tail_optimize=True \
+    --npu_bert_loss_scale=0 \
+    --output_dir=${cur_path}/output/${ASCEND_DEVICE_ID}/ckpt${ASCEND_DEVICE_ID} > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+done 
+wait
+
+#训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+#结果打印，不需要修改
+echo "------------------ Final result ------------------"
+#输出性能FPS，需要模型审视修改
+step_per_sec=`cat ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | grep "global_step/sec:" | awk -F "global_step/sec: " '{print $2}' | awk '{sum+=$1} END {print sum/NR}'`
+FPS=`echo "scale=3; $step_per_sec * $batch_size * 8"|bc`
+#打印，不需要修改
+echo "Final Performance images/sec : $FPS"
+
+#输出训练精度,需要模型审视修改
+train_accuracy=`grep "] loss =" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log | awk -F "loss = " '{print $2}' | awk -F "," 'END {print $1}'`
+#打印，不需要修改
+echo "Final Train Accuracy : ${train_accuracy}"
+echo "E2E Training Duration sec : $e2e_time"
+
+#稳定性精度看护结果汇总
+#训练用例信息，不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc'
+
+##获取性能数据
+#吞吐量，不需要修改
+ActualFPS=${FPS}
+#单迭代训练时长，不需要修改
+TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'*'${RANK_SIZE}'*1000/'${FPS}'}'`
+
+#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型审视
+grep "] loss =" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log | awk -F "loss = " '{print $2}' | awk -F "," '{print $1}' >> $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
+
+#最后一个迭代loss值，不需要修改
+ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
+TrainAccuracy=${train_accuracy}
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainAccuracy = ${TrainAccuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
\ No newline at end of file
diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID0060_BertBase_performance_1p.sh b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID0060_BertBase_performance_1p.sh
new file mode 100644
index 000000000..4ced05dec
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID0060_BertBase_performance_1p.sh
@@ -0,0 +1,174 @@
+#!/bin/bash
+
+#当前路径,不需要修改
+cur_path=`pwd`
+
+#集合通信参数,不需要修改
+export RANK_SIZE=1
+export JOB_ID=99990001
+RANK_ID_START=0
+
+# 数据集路径,保持为空,不需要修改
+data_path=""
+
+#基础参数，需要模型审视修改
+#网络名称，同目录名称
+Network="Bert-base_ID0060_for_TensorFlow"
+#训练epoch
+train_epochs=1
+#训练batch_size
+batch_size=128
+#训练step
+train_steps=1000
+#学习率
+learning_rate=
+
+#维测参数，precision_mode需要模型审视修改
+#precision_mode="allow_mix_precision"
+#维持参数，以下不需要修改
+over_dump=False
+data_dump_flag=False
+data_dump_step="10"
+profiling=False
+autotune=False
+
+# 帮助信息，不需要修改
+if [[ $1 == --help || $1 == -h ]];then
+    echo"usage:./train_full_1p.sh <args>"
+    echo " "
+    echo "parameter explain:
+    --precision_mode         precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision)
+    --over_dump		         if or not over detection, default is False
+    --data_dump_flag	     data dump flag, default is False
+    --data_dump_step		 data dump step, default is 10
+    --profiling		         if or not profiling for performance debug, default is False
+    --autotune               whether to enable autotune, default is False
+    --data_path		         source data of training
+    -h/--help		         show help message
+    "
+    exit 1
+fi
+
+#参数校验，不需要修改
+for para in $*
+do
+    if [[ $para == --precision_mode* ]];then
+        precision_mode=`echo ${para#*=}`
+    elif [[ $para == --over_dump* ]];then
+        over_dump=`echo ${para#*=}`
+        over_dump_path=${cur_path}/output/overflow_dump
+        mkdir -p ${over_dump_path}
+    elif [[ $para == --data_dump_flag* ]];then
+        data_dump_flag=`echo ${para#*=}`
+        data_dump_path=${cur_path}/output/data_dump
+        mkdir -p ${data_dump_path}
+    elif [[ $para == --data_dump_step* ]];then
+        data_dump_step=`echo ${para#*=}`
+    elif [[ $para == --profiling* ]];then
+        profiling=`echo ${para#*=}`
+        profiling_dump_path=${cur_path}/output/profiling
+        mkdir -p ${profiling_dump_path}
+    elif [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    fi
+done
+
+#校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be confing"
+    exit 1
+fi
+
+#训练开始时间，不需要修改
+start_time=$(date +%s)
+#进入训练脚本目录，需要模型审视修改
+for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++));
+do
+    #设置环境变量，不需要修改
+    echo "Device ID: $ASCEND_DEVICE_ID"
+    export RANK_ID=$RANK_ID
+    
+    #创建DeviceID输出目录，不需要修改
+    if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then
+        rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID}
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    else
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    fi
+
+    #执行训练脚本，以下传参不需要修改，其他需要模型审视修改
+    #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path，--data_dump_flag，--data_dump_step，--data_dump_path，--profiling，--profiling_dump_path，--autotune
+    nohup python3.7 $cur_path/../src/run_pretraining.py --bert_config_file=${cur_path}/../configs/bert_base_config.json \
+    --max_seq_length=128 \
+    --max_predictions_per_seq=20 \
+    --train_batch_size=${batch_size} \
+    --learning_rate=1e-4 \
+    --num_warmup_steps=100 \
+    --num_train_steps=${train_steps} \
+    --optimizer_type=adam \
+    --manual_fp16=True \
+    --use_fp16_cls=True \
+    --input_files_dir=${data_path}/../wikipedia_128 \
+    --eval_files_dir=${data_path} \
+    --npu_bert_debug=False \
+    --npu_bert_use_tdt=True \
+    --do_train=True \
+    --num_accumulation_steps=1 \
+    --npu_bert_job_start_file= \
+    --iterations_per_loop=100 \
+    --save_checkpoints_steps=1000 \
+    --npu_bert_clip_by_global_norm=False \
+    --distributed=False \
+    --npu_bert_loss_scale=0 \
+    --over_dump=${over_dump} \
+    --over_dump_path=${over_dump_path} \
+    --output_dir=${cur_path}/output/${ASCEND_DEVICE_ID}/d_solution/ckpt${ASCEND_DEVICE_ID} > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+done 
+wait
+
+#训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+#结果打印，不需要修改
+echo "------------------ Final result ------------------"
+#输出性能FPS，需要模型审视修改
+step_per_sec=`cat ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | grep "global_step/sec:" | awk -F "global_step/sec: " '{print $2}' | awk '{sum+=$1} END {print sum/NR}'`
+FPS=`echo "scale=3; $step_per_sec * $batch_size"|bc`
+#打印，不需要修改
+echo "Final Performance images/sec : $FPS"
+
+#输出训练精度,需要模型审视修改
+#train_accuracy=`grep -A 1 top1 $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $3}'`
+#打印，不需要修改
+#echo "Final Train Accuracy : ${train_accuracy}"
+echo "E2E Training Duration sec : $e2e_time"
+
+#稳定性精度看护结果汇总
+#训练用例信息，不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf'
+
+##获取性能数据
+#吞吐量，不需要修改
+ActualFPS=${FPS}
+#单迭代训练时长，不需要修改
+TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'*'${RANK_SIZE}'*1000/'${FPS}'}'`
+
+#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型审视
+grep "tensorflow:loss =" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log | awk -F "loss = " '{print $2}' | awk -F "," '{print $1}' >> $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
+
+#最后一个迭代loss值，不需要修改
+ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
\ No newline at end of file
diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID0060_BertBase_performance_8p.sh b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID0060_BertBase_performance_8p.sh
new file mode 100644
index 000000000..8da58f498
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID0060_BertBase_performance_8p.sh
@@ -0,0 +1,185 @@
+#!/bin/bash
+
+#当前路径,不需要修改
+cur_path=`pwd`
+
+#集合通信参数,不需要修改
+export RANK_SIZE=8
+export JOB_ID=99990001
+export RANK_TABLE_FILE=${cur_path}/../configs/8p.json
+RANK_ID_START=0
+
+# 数据集路径,保持为空,不需要修改
+data_path=""
+
+#基础参数，需要模型审视修改
+#网络名称，同目录名称
+Network="Bert-base_ID0060_for_TensorFlow"
+#训练epoch
+train_epochs=1
+#训练batch_size
+batch_size=128
+#训练step
+train_steps=1000
+#学习率
+learning_rate=
+
+#维测参数，precision_mode需要模型审视修改
+#precision_mode="allow_mix_precision"
+#维持参数，以下不需要修改
+over_dump=False
+data_dump_flag=False
+data_dump_step="10"
+profiling=False
+autotune=False
+
+# 帮助信息，不需要修改
+if [[ $1 == --help || $1 == -h ]];then
+    echo"usage:./train_full_1p.sh <args>"
+    echo " "
+    echo "parameter explain:
+    --precision_mode         precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision)
+    --over_dump		         if or not over detection, default is False
+    --data_dump_flag	     data dump flag, default is False
+    --data_dump_step		 data dump step, default is 10
+    --profiling		         if or not profiling for performance debug, default is False
+    --autotune               whether to enable autotune, default is False
+    --data_path		         source data of training
+    -h/--help		         show help message
+    "
+    exit 1
+fi
+
+#参数校验，不需要修改
+for para in $*
+do
+    if [[ $para == --precision_mode* ]];then
+        precision_mode=`echo ${para#*=}`
+    elif [[ $para == --over_dump* ]];then
+        over_dump=`echo ${para#*=}`
+        over_dump_path=${cur_path}/output/overflow_dump
+        mkdir -p ${over_dump_path}
+    elif [[ $para == --data_dump_flag* ]];then
+        data_dump_flag=`echo ${para#*=}`
+        data_dump_path=${cur_path}/output/data_dump
+        mkdir -p ${data_dump_path}
+    elif [[ $para == --data_dump_step* ]];then
+        data_dump_step=`echo ${para#*=}`
+    elif [[ $para == --profiling* ]];then
+        profiling=`echo ${para#*=}`
+        profiling_dump_path=${cur_path}/output/profiling
+        mkdir -p ${profiling_dump_path}
+    elif [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    fi
+done
+
+#校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be confing"
+    exit 1
+fi
+
+#训练开始时间，不需要修改
+start_time=$(date +%s)
+
+#进入训练脚本目录，需要模型审视修改
+for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++));
+do
+    #设置环境变量，不需要修改
+    echo "Device ID: $RANK_ID"
+    export RANK_ID=$RANK_ID
+    export ASCEND_DEVICE_ID=$RANK_ID
+    
+    #创建DeviceID输出目录，不需要修改
+    if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then
+        rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID}
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    else
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    fi
+    
+     # 绑核，不需要的绑核的模型删除，需要模型审视修改
+    corenum=`cat /proc/cpuinfo |grep "processor"|wc -l`
+    let a=RANK_ID*${corenum}/${RANK_SIZE}
+    let b=RANK_ID+1
+    let c=b*${corenum}/${RANK_SIZE}-1
+
+    #执行训练脚本，以下传参不需要修改，其他需要模型审视修改
+    #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path，--data_dump_flag，--data_dump_step，--data_dump_path，--profiling，--profiling_dump_path
+    if [ "x${bind_core}" != x ];then
+        bind_core="taskset -c $a-$c"
+    fi
+    nohup ${bind_core} python3.7 $cur_path/../src/run_pretraining.py --bert_config_file=${cur_path}/../configs/bert_base_config.json \
+    --max_seq_length=128 \
+    --max_predictions_per_seq=20 \
+    --train_batch_size=${batch_size} \
+    --learning_rate=1e-4 \
+    --num_warmup_steps=100 \
+    --num_train_steps=${train_steps} \
+    --optimizer_type=adam \
+    --manual_fp16=True \
+    --use_fp16_cls=True \
+    --input_files_dir=${data_path}/../wikipedia_128 \
+    --eval_files_dir=${data_path} \
+    --npu_bert_debug=False \
+    --npu_bert_use_tdt=True \
+    --do_train=True \
+    --num_accumulation_steps=1 \
+    --npu_bert_job_start_file= \
+    --iterations_per_loop=100 \
+    --save_checkpoints_steps=1000 \
+    --npu_bert_clip_by_global_norm=False \
+    --distributed=True \
+    --npu_bert_tail_optimize=True \
+    --npu_bert_loss_scale=0 \
+    --output_dir=${cur_path}/output/${ASCEND_DEVICE_ID}/d_solution/ckpt${ASCEND_DEVICE_ID} > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+done 
+wait
+
+#训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+#结果打印，不需要修改
+echo "------------------ Final result ------------------"
+#输出性能FPS，需要模型审视修改
+step_per_sec=`cat ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | grep "global_step/sec:" | awk -F "global_step/sec: " '{print $2}' | awk '{sum+=$1} END {print sum/NR}'`
+FPS=`echo "scale=3; $step_per_sec * $batch_size * 8"|bc`
+#打印，不需要修改
+echo "Final Performance images/sec : $FPS"
+
+#输出训练精度,需要模型审视修改
+#train_accuracy=`grep -A 1 top1 $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $3}'`
+#打印，不需要修改
+#echo "Final Train Accuracy : ${train_accuracy}"
+echo "E2E Training Duration sec : $e2e_time"
+
+#稳定性精度看护结果汇总
+#训练用例信息，不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf'
+
+##获取性能数据
+#吞吐量，不需要修改
+ActualFPS=${FPS}
+#单迭代训练时长，不需要修改
+TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'*'${RANK_SIZE}'*1000/'${FPS}'}'`
+
+#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型审视
+grep "] loss =" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log | awk -F "loss = " '{print $2}' | awk -F "," '{print $1}' >> $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
+
+#最后一个迭代loss值，不需要修改
+ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
\ No newline at end of file
diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3067_BertLarge-128_full_1p.sh b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3067_BertLarge-128_full_1p.sh
new file mode 100644
index 000000000..30f574bbc
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3067_BertLarge-128_full_1p.sh
@@ -0,0 +1,171 @@
+#!/bin/bash
+
+#当前路径,不需要修改
+cur_path=`pwd`
+
+#集合通信参数,不需要修改
+export RANK_SIZE=1
+export JOB_ID=99990001
+RANK_ID_START=0
+
+# 数据集路径,保持为空,不需要修改
+data_path=""
+
+#基础参数，需要模型审视修改
+#网络名称，同目录名称
+Network="BertLarge-128_ID3067_for_TensorFlow"
+#训练epoch
+train_epochs=1
+#训练batch_size
+batch_size=24
+#训练step
+train_steps=100000
+#学习率
+learning_rate=
+
+#维测参数，precision_mode需要模型审视修改
+#precision_mode="allow_mix_precision"
+#维持参数，以下不需要修改
+over_dump=False
+data_dump_flag=False
+data_dump_step="10"
+profiling=False
+autotune=False
+
+# 帮助信息，不需要修改
+if [[ $1 == --help || $1 == -h ]];then
+    echo"usage:./train_full_1p.sh <args>"
+    echo " "
+    echo "parameter explain:
+    --precision_mode         precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision)
+    --over_dump		         if or not over detection, default is False
+    --data_dump_flag	     data dump flag, default is False
+    --data_dump_step		 data dump step, default is 10
+    --profiling		         if or not profiling for performance debug, default is False
+    --autotune               whether to enable autotune, default is False
+    --data_path		         source data of training
+    -h/--help		         show help message
+    "
+    exit 1
+fi
+
+#参数校验，不需要修改
+for para in $*
+do
+    if [[ $para == --precision_mode* ]];then
+        precision_mode=`echo ${para#*=}`
+    elif [[ $para == --over_dump* ]];then
+        over_dump=`echo ${para#*=}`
+        over_dump_path=${cur_path}/output/overflow_dump
+        mkdir -p ${over_dump_path}
+    elif [[ $para == --data_dump_flag* ]];then
+        data_dump_flag=`echo ${para#*=}`
+        data_dump_path=${cur_path}/output/data_dump
+        mkdir -p ${data_dump_path}
+    elif [[ $para == --data_dump_step* ]];then
+        data_dump_step=`echo ${para#*=}`
+    elif [[ $para == --profiling* ]];then
+        profiling=`echo ${para#*=}`
+        profiling_dump_path=${cur_path}/output/profiling
+        mkdir -p ${profiling_dump_path}
+    elif [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    fi
+done
+
+#校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be confing"
+    exit 1
+fi
+
+#训练开始时间，不需要修改
+start_time=$(date +%s)
+#进入训练脚本目录，需要模型审视修改
+for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++));
+do
+    #设置环境变量，不需要修改
+    echo "Device ID: $ASCEND_DEVICE_ID"
+    export RANK_ID=$RANK_ID
+    
+    #创建DeviceID输出目录，不需要修改
+    if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then
+        rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID}
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    else
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    fi
+
+    #执行训练脚本，以下传参不需要修改，其他需要模型审视修改
+    #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path，--data_dump_flag，--data_dump_step，--data_dump_path，--profiling，--profiling_dump_path，--autotune
+    nohup python3.7 $cur_path/../src/run_pretraining.py --bert_config_file=${cur_path}/../configs/bert_large_config.json \
+    --max_seq_length=128 \
+    --max_predictions_per_seq=20 \
+    --train_batch_size=${batch_size} \
+    --learning_rate=1e-4 \
+    --num_warmup_steps=10000 \
+    --num_train_steps=${train_steps} \
+    --optimizer_type=adam \
+    --manual_fp16=True \
+    --use_fp16_cls=True \
+    --input_files_dir=${data_path}/wikipedia_128 \
+    --eval_files_dir=${data_path}/cn-wiki-128 \
+    --npu_bert_debug=False \
+    --npu_bert_use_tdt=True \
+    --do_train=True \
+    --num_accumulation_steps=1 \
+    --npu_bert_job_start_file= \
+    --iterations_per_loop=1000 \
+    --save_checkpoints_steps=1000 \
+    --npu_bert_clip_by_global_norm=False \
+    --distributed=False \
+    --npu_bert_loss_scale=0 \
+    --init_loss_scale_value=1 \
+    --over_dump=${over_dump} \
+    --over_dump_path=${over_dump_path} \
+    --output_dir=${cur_path}/output/${ASCEND_DEVICE_ID}/ckpt${ASCEND_DEVICE_ID} > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+done 
+wait
+
+#训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+#结果打印，不需要修改
+echo "------------------ Final result ------------------"
+#输出性能FPS，需要模型审视修改
+ActualFPS=`grep Throughput ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk 'END {print $6}'`
+TrainingTime=`echo "scale=4; ${RANK_SIZE} * ${batch_size} / ${ActualFPS}"|bc`
+#打印，不需要修改
+echo "Final Performance images/sec : $ActualFPS"
+
+#输出训练精度,需要模型审视修改
+TrainAccuracy=`grep -A 1 top1 $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $3}'`
+#打印，不需要修改
+echo "Final Train Accuracy : ${TrainAccuracy}"
+echo "E2E Training Duration sec : $e2e_time"
+
+#稳定性精度看护结果汇总
+#训练用例信息，不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc'
+
+
+#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型审视
+grep "tensorflow:loss =" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log | awk -F "loss = " '{print $2}' | awk -F "," '{print $1}' >> $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
+
+#最后一个迭代loss值，不需要修改
+ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainAccuracy = ${TrainAccuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
\ No newline at end of file
diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3067_BertLarge-128_full_8p.sh b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3067_BertLarge-128_full_8p.sh
new file mode 100644
index 000000000..1a13b90d1
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3067_BertLarge-128_full_8p.sh
@@ -0,0 +1,183 @@
+#!/bin/bash
+
+#当前路径,不需要修改
+cur_path=`pwd`
+
+#集合通信参数,不需要修改
+export RANK_SIZE=8
+export JOB_ID=99990001
+export RANK_TABLE_FILE=${cur_path}/../configs/8p.json
+RANK_ID_START=0
+
+# 数据集路径,保持为空,不需要修改
+data_path=""
+
+#基础参数，需要模型审视修改
+#网络名称，同目录名称
+Network="BertLarge-128_ID3067_for_TensorFlow"
+#训练epoch
+train_epochs=1
+#训练batch_size
+batch_size=24
+#训练step
+train_steps=32000
+#学习率
+learning_rate=
+
+#维测参数，precision_mode需要模型审视修改
+#precision_mode="allow_mix_precision"
+#维持参数，以下不需要修改
+over_dump=False
+data_dump_flag=False
+data_dump_step="10"
+profiling=False
+autotune=False
+
+# 帮助信息，不需要修改
+if [[ $1 == --help || $1 == -h ]];then
+    echo"usage:./train_full_1p.sh <args>"
+    echo " "
+    echo "parameter explain:
+    --precision_mode         precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision)
+    --over_dump		         if or not over detection, default is False
+    --data_dump_flag	     data dump flag, default is False
+    --data_dump_step		 data dump step, default is 10
+    --profiling		         if or not profiling for performance debug, default is False
+    --autotune               whether to enable autotune, default is False
+    --data_path		         source data of training
+    -h/--help		         show help message
+    "
+    exit 1
+fi
+
+#参数校验，不需要修改
+for para in $*
+do
+    if [[ $para == --precision_mode* ]];then
+        precision_mode=`echo ${para#*=}`
+    elif [[ $para == --over_dump* ]];then
+        over_dump=`echo ${para#*=}`
+        over_dump_path=${cur_path}/output/overflow_dump
+        mkdir -p ${over_dump_path}
+    elif [[ $para == --data_dump_flag* ]];then
+        data_dump_flag=`echo ${para#*=}`
+        data_dump_path=${cur_path}/output/data_dump
+        mkdir -p ${data_dump_path}
+    elif [[ $para == --data_dump_step* ]];then
+        data_dump_step=`echo ${para#*=}`
+    elif [[ $para == --profiling* ]];then
+        profiling=`echo ${para#*=}`
+        profiling_dump_path=${cur_path}/output/profiling
+        mkdir -p ${profiling_dump_path}
+    elif [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    fi
+done
+
+#校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be confing"
+    exit 1
+fi
+
+#训练开始时间，不需要修改
+start_time=$(date +%s)
+#进入训练脚本目录，需要模型审视修改
+for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++));
+do
+    #设置环境变量，不需要修改
+    echo "Device ID: $RANK_ID"
+    export RANK_ID=$RANK_ID
+    export ASCEND_DEVICE_ID=$RANK_ID
+    
+    #创建DeviceID输出目录，不需要修改
+    if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then
+        rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID}
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    else
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    fi
+    
+     # 绑核，不需要的绑核的模型删除，需要模型审视修改
+    corenum=`cat /proc/cpuinfo |grep "processor"|wc -l`
+    let a=RANK_ID*${corenum}/${RANK_SIZE}
+    let b=RANK_ID+1
+    let c=b*${corenum}/${RANK_SIZE}-1
+
+    #执行训练脚本，以下传参不需要修改，其他需要模型审视修改
+    #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path，--data_dump_flag，--data_dump_step，--data_dump_path，--profiling，--profiling_dump_path
+    if [ "x${bind_core}" != x ];then
+        bind_core="taskset -c $a-$c"
+    fi
+    nohup python3.7 ${cur_path}/../src/run_pretraining.py --bert_config_file=${cur_path}/../configs/bert_large_config.json \
+    --max_seq_length=128 \
+    --max_predictions_per_seq=20 \
+    --train_batch_size=${batch_size} \
+    --learning_rate=1e-4 \
+    --num_warmup_steps=1000 \
+    --num_train_steps=${train_steps} \
+    --optimizer_type=adam \
+    --manual_fp16=True \
+    --use_fp16_cls=True \
+    --input_files_dir=${data_path}/wikipedia_128 \
+    --eval_files_dir=${data_path}/cn-wiki-128 \
+    --npu_bert_debug=False \
+    --npu_bert_use_tdt=True \
+    --do_train=True \
+    --num_accumulation_steps=1 \
+    --npu_bert_job_start_file= \
+    --iterations_per_loop=1000 \
+    --save_checkpoints_steps=1000 \
+    --npu_bert_clip_by_global_norm=False \
+    --distributed=True \
+    --npu_bert_tail_optimize=True \
+    --npu_bert_loss_scale=0 \
+    --init_loss_scale_value=1 \
+    --over_dump=${over_dump} \
+    --over_dump_path=${over_dump_path} \
+    --output_dir=${cur_path}/output/${ASCEND_DEVICE_ID}/ckpt${ASCEND_DEVICE_ID} > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+done 
+wait
+
+#训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+#结果打印，不需要修改
+echo "------------------ Final result ------------------"
+#输出性能FPS，需要模型审视修改
+ActualFPS=`grep Throughput ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk 'END {print $6}'`
+TrainingTime=`echo "scale=4; ${RANK_SIZE} * ${batch_size} / ${ActualFPS}"|bc`
+#打印，不需要修改
+echo "Final Performance images/sec : $ActualFPS"
+
+#输出训练精度,需要模型审视修改
+TrainAccuracy=`grep -A 1 top1 $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $3}'`
+#打印，不需要修改
+echo "Final Train Accuracy : ${TrainAccuracy}"
+echo "E2E Training Duration sec : $e2e_time"
+
+#稳定性精度看护结果汇总
+#训练用例信息，不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc'
+
+
+#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型审视
+grep "tensorflow:loss =" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log | awk -F "loss = " '{print $2}' | awk -F "," '{print $1}' >> $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
+
+#最后一个迭代loss值，不需要修改
+ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainAccuracy = ${TrainAccuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
\ No newline at end of file
diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3067_BertLarge-128_performance_1p.sh b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3067_BertLarge-128_performance_1p.sh
new file mode 100644
index 000000000..343ee0ffc
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3067_BertLarge-128_performance_1p.sh
@@ -0,0 +1,170 @@
+#!/bin/bash
+
+#当前路径,不需要修改
+cur_path=`pwd`
+
+#集合通信参数,不需要修改
+export RANK_SIZE=1
+export JOB_ID=99990001
+RANK_ID_START=0
+
+# 数据集路径,保持为空,不需要修改
+data_path=""
+
+#基础参数，需要模型审视修改
+#网络名称，同目录名称
+Network="BertLarge-128_ID3067_for_TensorFlow"
+#训练epoch
+train_epochs=1
+#训练batch_size
+batch_size=24
+#训练step
+train_steps=100
+#学习率
+learning_rate=
+
+#维测参数，precision_mode需要模型审视修改
+#precision_mode="allow_mix_precision"
+#维持参数，以下不需要修改
+over_dump=False
+data_dump_flag=False
+data_dump_step="10"
+profiling=False
+autotune=False
+
+# 帮助信息，不需要修改
+if [[ $1 == --help || $1 == -h ]];then
+    echo"usage:./train_full_1p.sh <args>"
+    echo " "
+    echo "parameter explain:
+    --precision_mode         precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision)
+    --over_dump		         if or not over detection, default is False
+    --data_dump_flag	     data dump flag, default is False
+    --data_dump_step		 data dump step, default is 10
+    --profiling		         if or not profiling for performance debug, default is False
+    --autotune               whether to enable autotune, default is False
+    --data_path		         source data of training
+    -h/--help		         show help message
+    "
+    exit 1
+fi
+
+#参数校验，不需要修改
+for para in $*
+do
+    if [[ $para == --precision_mode* ]];then
+        precision_mode=`echo ${para#*=}`
+    elif [[ $para == --over_dump* ]];then
+        over_dump=`echo ${para#*=}`
+        over_dump_path=${cur_path}/output/overflow_dump
+        mkdir -p ${over_dump_path}
+    elif [[ $para == --data_dump_flag* ]];then
+        data_dump_flag=`echo ${para#*=}`
+        data_dump_path=${cur_path}/output/data_dump
+        mkdir -p ${data_dump_path}
+    elif [[ $para == --data_dump_step* ]];then
+        data_dump_step=`echo ${para#*=}`
+    elif [[ $para == --profiling* ]];then
+        profiling=`echo ${para#*=}`
+        profiling_dump_path=${cur_path}/output/profiling
+        mkdir -p ${profiling_dump_path}
+    elif [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    fi
+done
+
+#校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be confing"
+    exit 1
+fi
+
+#训练开始时间，不需要修改
+start_time=$(date +%s)
+#进入训练脚本目录，需要模型审视修改
+for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++));
+do
+    #设置环境变量，不需要修改
+    echo "Device ID: $ASCEND_DEVICE_ID"
+    export RANK_ID=$RANK_ID
+    
+    #创建DeviceID输出目录，不需要修改
+    if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then
+        rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID}
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    else
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    fi
+
+    #执行训练脚本，以下传参不需要修改，其他需要模型审视修改
+    #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path，--data_dump_flag，--data_dump_step，--data_dump_path，--profiling，--profiling_dump_path，--autotune
+    nohup python3.7 $cur_path/../src/run_pretraining.py --bert_config_file=${cur_path}/../configs/bert_large_config.json \
+    --max_seq_length=128 \
+    --max_predictions_per_seq=20 \
+    --train_batch_size=${batch_size} \
+    --learning_rate=1e-4 \
+    --num_warmup_steps=100 \
+    --num_train_steps=${train_steps} \
+    --optimizer_type=adam \
+    --manual_fp16=True \
+    --use_fp16_cls=True \
+    --input_files_dir=${data_path}/wikipedia_128 \
+    --eval_files_dir=${data_path}/cn-wiki-128 \
+    --npu_bert_debug=False \
+    --npu_bert_use_tdt=True \
+    --do_train=True \
+    --num_accumulation_steps=1 \
+    --npu_bert_job_start_file= \
+    --iterations_per_loop=10 \
+    --save_checkpoints_steps=100 \
+    --npu_bert_clip_by_global_norm=False \
+    --distributed=False \
+    --npu_bert_loss_scale=0 \
+    --init_loss_scale_value=1 \
+    --over_dump=${over_dump} \
+    --over_dump_path=${over_dump_path} \
+    --output_dir=${cur_path}/output/${ASCEND_DEVICE_ID}/ckpt${ASCEND_DEVICE_ID} > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+done 
+wait
+
+#训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+#结果打印，不需要修改
+echo "------------------ Final result ------------------"
+#输出性能FPS，需要模型审视修改
+ActualFPS=`grep Throughput ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk 'END {print $6}'`
+TrainingTime=`echo "scale=4; ${RANK_SIZE} * ${batch_size} / ${ActualFPS}"|bc`
+#打印，不需要修改
+echo "Final Performance images/sec : $ActualFPS"
+
+#输出训练精度,需要模型审视修改
+#train_accuracy=`grep -A 1 top1 $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $3}'`
+#打印，不需要修改
+#echo "Final Train Accuracy : ${train_accuracy}"
+echo "E2E Training Duration sec : $e2e_time"
+
+#稳定性精度看护结果汇总
+#训练用例信息，不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf'
+
+
+#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型审视
+grep "tensorflow:loss =" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log | awk -F "loss = " '{print $2}' | awk -F "," '{print $1}' >> $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
+
+#最后一个迭代loss值，不需要修改
+ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
\ No newline at end of file
diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3067_BertLarge-128_performance_8p.sh b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3067_BertLarge-128_performance_8p.sh
new file mode 100644
index 000000000..aed3d0c4f
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3067_BertLarge-128_performance_8p.sh
@@ -0,0 +1,182 @@
+#!/bin/bash
+
+#当前路径,不需要修改
+cur_path=`pwd`
+
+#集合通信参数,不需要修改
+export RANK_SIZE=8
+export JOB_ID=99990001
+export RANK_TABLE_FILE=${cur_path}/../configs/8p.json
+RANK_ID_START=0
+
+# 数据集路径,保持为空,不需要修改
+data_path=""
+
+#基础参数，需要模型审视修改
+#网络名称，同目录名称
+Network="BertLarge-128_ID3067_for_TensorFlow"
+#训练epoch
+train_epochs=1
+#训练batch_size
+batch_size=24
+#训练step
+train_steps=1000
+#学习率
+learning_rate=
+
+#维测参数，precision_mode需要模型审视修改
+#precision_mode="allow_mix_precision"
+#维持参数，以下不需要修改
+over_dump=False
+data_dump_flag=False
+data_dump_step="10"
+profiling=False
+autotune=False
+
+# 帮助信息，不需要修改
+if [[ $1 == --help || $1 == -h ]];then
+    echo"usage:./train_full_1p.sh <args>"
+    echo " "
+    echo "parameter explain:
+    --precision_mode         precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision)
+    --over_dump		         if or not over detection, default is False
+    --data_dump_flag	     data dump flag, default is False
+    --data_dump_step		 data dump step, default is 10
+    --profiling		         if or not profiling for performance debug, default is False
+    --autotune               whether to enable autotune, default is False
+    --data_path		         source data of training
+    -h/--help		         show help message
+    "
+    exit 1
+fi
+
+#参数校验，不需要修改
+for para in $*
+do
+    if [[ $para == --precision_mode* ]];then
+        precision_mode=`echo ${para#*=}`
+    elif [[ $para == --over_dump* ]];then
+        over_dump=`echo ${para#*=}`
+        over_dump_path=${cur_path}/output/overflow_dump
+        mkdir -p ${over_dump_path}
+    elif [[ $para == --data_dump_flag* ]];then
+        data_dump_flag=`echo ${para#*=}`
+        data_dump_path=${cur_path}/output/data_dump
+        mkdir -p ${data_dump_path}
+    elif [[ $para == --data_dump_step* ]];then
+        data_dump_step=`echo ${para#*=}`
+    elif [[ $para == --profiling* ]];then
+        profiling=`echo ${para#*=}`
+        profiling_dump_path=${cur_path}/output/profiling
+        mkdir -p ${profiling_dump_path}
+    elif [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    fi
+done
+
+#校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be confing"
+    exit 1
+fi
+
+#训练开始时间，不需要修改
+start_time=$(date +%s)
+#进入训练脚本目录，需要模型审视修改
+for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++));
+do
+    #设置环境变量，不需要修改
+    echo "Device ID: $RANK_ID"
+    export RANK_ID=$RANK_ID
+    export ASCEND_DEVICE_ID=$RANK_ID
+    
+    #创建DeviceID输出目录，不需要修改
+    if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then
+        rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID}
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    else
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    fi
+    
+     # 绑核，不需要的绑核的模型删除，需要模型审视修改
+    corenum=`cat /proc/cpuinfo |grep "processor"|wc -l`
+    let a=RANK_ID*${corenum}/${RANK_SIZE}
+    let b=RANK_ID+1
+    let c=b*${corenum}/${RANK_SIZE}-1
+
+    #执行训练脚本，以下传参不需要修改，其他需要模型审视修改
+    #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path，--data_dump_flag，--data_dump_step，--data_dump_path，--profiling，--profiling_dump_path
+    if [ "x${bind_core}" != x ];then
+        bind_core="taskset -c $a-$c"
+    fi
+    nohup ${bind_core} python3.7 $cur_path/../src/run_pretraining.py --bert_config_file=${cur_path}/../configs/bert_large_config.json \
+    --max_seq_length=128 \
+    --max_predictions_per_seq=20 \
+    --train_batch_size=${batch_size} \
+    --learning_rate=1e-4 \
+    --num_warmup_steps=100 \
+    --num_train_steps=${train_steps} \
+    --optimizer_type=adam \
+    --manual_fp16=True \
+    --use_fp16_cls=True \
+    --input_files_dir=${data_path}/wikipedia_128 \
+    --eval_files_dir=${data_path}/cn-wiki-128 \
+    --npu_bert_debug=False \
+    --npu_bert_use_tdt=True \
+    --do_train=True \
+    --num_accumulation_steps=1 \
+    --npu_bert_job_start_file= \
+    --iterations_per_loop=100 \
+    --save_checkpoints_steps=1000 \
+    --npu_bert_clip_by_global_norm=False \
+    --distributed=True \
+    --npu_bert_tail_optimize=True \
+    --npu_bert_loss_scale=0 \
+    --init_loss_scale_value=1 \
+    --over_dump=${over_dump} \
+    --over_dump_path=${over_dump_path} \
+    --output_dir=${cur_path}/output/${ASCEND_DEVICE_ID}/ckpt${ASCEND_DEVICE_ID} > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+done 
+wait
+
+#训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+#结果打印，不需要修改
+echo "------------------ Final result ------------------"
+#输出性能FPS，需要模型审视修改
+ActualFPS=`grep Throughput ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk 'END {print $6}'`
+TrainingTime=`echo "scale=4; ${RANK_SIZE} * ${batch_size} / ${ActualFPS}"|bc`
+#打印，不需要修改
+echo "Final Performance images/sec : $ActualFPS"
+
+#输出训练精度,需要模型审视修改
+#train_accuracy=`grep -A 1 top1 $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $3}'`
+#打印，不需要修改
+#echo "Final Train Accuracy : ${train_accuracy}"
+echo "E2E Training Duration sec : $e2e_time"
+
+#稳定性精度看护结果汇总
+#训练用例信息，不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf'
+
+
+#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型审视
+grep "tensorflow:loss =" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log | awk -F "loss = " '{print $2}' | awk -F "," '{print $1}' >> $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
+
+#最后一个迭代loss值，不需要修改
+ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
\ No newline at end of file
diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3068_BertLarge-512_full_1p.sh b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3068_BertLarge-512_full_1p.sh
new file mode 100644
index 000000000..386935e94
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3068_BertLarge-512_full_1p.sh
@@ -0,0 +1,171 @@
+#!/bin/bash
+
+#当前路径,不需要修改
+cur_path=`pwd`
+
+#集合通信参数,不需要修改
+export RANK_SIZE=1
+export JOB_ID=99990001
+RANK_ID_START=0
+
+# 数据集路径,保持为空,不需要修改
+data_path=""
+
+#基础参数，需要模型审视修改
+#网络名称，同目录名称
+Network="BertLarge-512_ID3068_for_TensorFlow"
+#训练epoch
+train_epochs=1
+#训练batch_size
+batch_size=24
+#训练step
+train_steps=100000
+#学习率
+learning_rate=
+
+#维测参数，precision_mode需要模型审视修改
+#precision_mode="allow_mix_precision"
+#维持参数，以下不需要修改
+over_dump=False
+data_dump_flag=False
+data_dump_step="10"
+profiling=False
+autotune=False
+
+# 帮助信息，不需要修改
+if [[ $1 == --help || $1 == -h ]];then
+    echo"usage:./train_full_1p.sh <args>"
+    echo " "
+    echo "parameter explain:
+    --precision_mode         precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision)
+    --over_dump		         if or not over detection, default is False
+    --data_dump_flag	     data dump flag, default is False
+    --data_dump_step		 data dump step, default is 10
+    --profiling		         if or not profiling for performance debug, default is False
+    --autotune               whether to enable autotune, default is False
+    --data_path		         source data of training
+    -h/--help		         show help message
+    "
+    exit 1
+fi
+
+#参数校验，不需要修改
+for para in $*
+do
+    if [[ $para == --precision_mode* ]];then
+        precision_mode=`echo ${para#*=}`
+    elif [[ $para == --over_dump* ]];then
+        over_dump=`echo ${para#*=}`
+        over_dump_path=${cur_path}/output/overflow_dump
+        mkdir -p ${over_dump_path}
+    elif [[ $para == --data_dump_flag* ]];then
+        data_dump_flag=`echo ${para#*=}`
+        data_dump_path=${cur_path}/output/data_dump
+        mkdir -p ${data_dump_path}
+    elif [[ $para == --data_dump_step* ]];then
+        data_dump_step=`echo ${para#*=}`
+    elif [[ $para == --profiling* ]];then
+        profiling=`echo ${para#*=}`
+        profiling_dump_path=${cur_path}/output/profiling
+        mkdir -p ${profiling_dump_path}
+    elif [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    fi
+done
+
+#校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be confing"
+    exit 1
+fi
+
+#训练开始时间，不需要修改
+start_time=$(date +%s)
+#进入训练脚本目录，需要模型审视修改
+for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++));
+do
+    #设置环境变量，不需要修改
+    echo "Device ID: $ASCEND_DEVICE_ID"
+    export RANK_ID=$RANK_ID
+    
+    #创建DeviceID输出目录，不需要修改
+    if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then
+        rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID}
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    else
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    fi
+
+    #执行训练脚本，以下传参不需要修改，其他需要模型审视修改
+    #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path，--data_dump_flag，--data_dump_step，--data_dump_path，--profiling，--profiling_dump_path，--autotune
+    nohup python3.7 $cur_path/../src/run_pretraining.py --bert_config_file=${cur_path}/../configs/bert_large_config.json \
+    --max_seq_length=128 \
+    --max_predictions_per_seq=20 \
+    --train_batch_size=${batch_size} \
+    --learning_rate=1e-4 \
+    --num_warmup_steps=1000 \
+    --num_train_steps=${train_steps} \
+    --optimizer_type=adam \
+    --manual_fp16=True \
+    --use_fp16_cls=True \
+    --input_files_dir=${data_path}/en_wiki_len512 \
+    --eval_files_dir=${data_path}/en_wiki_len512 \
+    --npu_bert_debug=False \
+    --npu_bert_use_tdt=True \
+    --do_train=True \
+    --num_accumulation_steps=1 \
+    --npu_bert_job_start_file= \
+    --iterations_per_loop=1000 \
+    --save_checkpoints_steps=1000 \
+    --npu_bert_clip_by_global_norm=False \
+    --distributed=False \
+    --npu_bert_loss_scale=0 \
+    --init_loss_scale_value=1 \
+    --over_dump=${over_dump} \
+    --over_dump_path=${over_dump_path} \
+    --output_dir=${cur_path}/output/${ASCEND_DEVICE_ID}/ckpt${ASCEND_DEVICE_ID} > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+done 
+wait
+
+#训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+#结果打印，不需要修改
+echo "------------------ Final result ------------------"
+#输出性能FPS，需要模型审视修改
+ActualFPS=`grep Throughput ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk 'END {print $6}'`
+TrainingTime=`echo "scale=4; ${RANK_SIZE} * ${batch_size} / ${ActualFPS}"|bc`
+#打印，不需要修改
+echo "Final Performance images/sec : $ActualFPS"
+
+#输出训练精度,需要模型审视修改
+TrainAccuracy=`grep -A 1 top1 $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $3}'`
+#打印，不需要修改
+echo "Final Train Accuracy : ${TrainAccuracy}"
+echo "E2E Training Duration sec : $e2e_time"
+
+#稳定性精度看护结果汇总
+#训练用例信息，不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc'
+
+
+#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型审视
+grep "tensorflow:loss =" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log | awk -F "loss = " '{print $2}' | awk -F "," '{print $1}' >> $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
+
+#最后一个迭代loss值，不需要修改
+ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainAccuracy = ${TrainAccuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
\ No newline at end of file
diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3068_BertLarge-512_full_8p.sh b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3068_BertLarge-512_full_8p.sh
new file mode 100644
index 000000000..9736babc1
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3068_BertLarge-512_full_8p.sh
@@ -0,0 +1,183 @@
+#!/bin/bash
+
+#当前路径,不需要修改
+cur_path=`pwd`
+
+#集合通信参数,不需要修改
+export RANK_SIZE=8
+export JOB_ID=99990001
+export RANK_TABLE_FILE=${cur_path}/../configs/8p.json
+RANK_ID_START=0
+
+# 数据集路径,保持为空,不需要修改
+data_path=""
+
+#基础参数，需要模型审视修改
+#网络名称，同目录名称
+Network="BertLarge-512_ID3068_for_TensorFlow"
+#训练epoch
+train_epochs=1
+#训练batch_size
+batch_size=24
+#训练step
+train_steps=32000
+#学习率
+learning_rate=
+
+#维测参数，precision_mode需要模型审视修改
+#precision_mode="allow_mix_precision"
+#维持参数，以下不需要修改
+over_dump=False
+data_dump_flag=False
+data_dump_step="10"
+profiling=False
+autotune=False
+
+# 帮助信息，不需要修改
+if [[ $1 == --help || $1 == -h ]];then
+    echo"usage:./train_full_1p.sh <args>"
+    echo " "
+    echo "parameter explain:
+    --precision_mode         precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision)
+    --over_dump		         if or not over detection, default is False
+    --data_dump_flag	     data dump flag, default is False
+    --data_dump_step		 data dump step, default is 10
+    --profiling		         if or not profiling for performance debug, default is False
+    --autotune               whether to enable autotune, default is False
+    --data_path		         source data of training
+    -h/--help		         show help message
+    "
+    exit 1
+fi
+
+#参数校验，不需要修改
+for para in $*
+do
+    if [[ $para == --precision_mode* ]];then
+        precision_mode=`echo ${para#*=}`
+    elif [[ $para == --over_dump* ]];then
+        over_dump=`echo ${para#*=}`
+        over_dump_path=${cur_path}/output/overflow_dump
+        mkdir -p ${over_dump_path}
+    elif [[ $para == --data_dump_flag* ]];then
+        data_dump_flag=`echo ${para#*=}`
+        data_dump_path=${cur_path}/output/data_dump
+        mkdir -p ${data_dump_path}
+    elif [[ $para == --data_dump_step* ]];then
+        data_dump_step=`echo ${para#*=}`
+    elif [[ $para == --profiling* ]];then
+        profiling=`echo ${para#*=}`
+        profiling_dump_path=${cur_path}/output/profiling
+        mkdir -p ${profiling_dump_path}
+    elif [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    fi
+done
+
+#校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be confing"
+    exit 1
+fi
+
+#训练开始时间，不需要修改
+start_time=$(date +%s)
+#进入训练脚本目录，需要模型审视修改
+for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++));
+do
+    #设置环境变量，不需要修改
+    echo "Device ID: $RANK_ID"
+    export RANK_ID=$RANK_ID
+    export ASCEND_DEVICE_ID=$RANK_ID
+    
+    #创建DeviceID输出目录，不需要修改
+    if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then
+        rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID}
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    else
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    fi
+    
+     # 绑核，不需要的绑核的模型删除，需要模型审视修改
+    corenum=`cat /proc/cpuinfo |grep "processor"|wc -l`
+    let a=RANK_ID*${corenum}/${RANK_SIZE}
+    let b=RANK_ID+1
+    let c=b*${corenum}/${RANK_SIZE}-1
+
+    #执行训练脚本，以下传参不需要修改，其他需要模型审视修改
+    #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path，--data_dump_flag，--data_dump_step，--data_dump_path，--profiling，--profiling_dump_path
+    if [ "x${bind_core}" != x ];then
+        bind_core="taskset -c $a-$c"
+    fi
+    nohup python3.7 ${cur_path}/../src/run_pretraining.py --bert_config_file=${cur_path}/../configs/bert_large_config.json \
+    --max_seq_length=128 \
+    --max_predictions_per_seq=20 \
+    --train_batch_size=${batch_size} \
+    --learning_rate=1e-4 \
+    --num_warmup_steps=1000 \
+    --num_train_steps=${train_steps} \
+    --optimizer_type=adam \
+    --manual_fp16=True \
+    --use_fp16_cls=True \
+    --input_files_dir=${data_path}/en_wiki_len512 \
+    --eval_files_dir=${data_path}/en_wiki_len512 \
+    --npu_bert_debug=False \
+    --npu_bert_use_tdt=True \
+    --do_train=True \
+    --num_accumulation_steps=1 \
+    --npu_bert_job_start_file= \
+    --iterations_per_loop=1000 \
+    --save_checkpoints_steps=1000 \
+    --npu_bert_clip_by_global_norm=False \
+    --distributed=True \
+    --npu_bert_tail_optimize=True \
+    --npu_bert_loss_scale=0 \
+    --init_loss_scale_value=1 \
+    --over_dump=${over_dump} \
+    --over_dump_path=${over_dump_path} \
+    --output_dir=${cur_path}/output/${ASCEND_DEVICE_ID}/ckpt${ASCEND_DEVICE_ID} > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+done 
+wait
+
+#训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+#结果打印，不需要修改
+echo "------------------ Final result ------------------"
+#输出性能FPS，需要模型审视修改
+ActualFPS=`grep Throughput ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk 'END {print $6}'`
+TrainingTime=`echo "scale=4; ${RANK_SIZE} * ${batch_size} / ${ActualFPS}"|bc`
+#打印，不需要修改
+echo "Final Performance images/sec : $ActualFPS"
+
+#输出训练精度,需要模型审视修改
+TrainAccuracy=`grep -A 1 top1 $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $3}'`
+#打印，不需要修改
+echo "Final Train Accuracy : ${TrainAccuracy}"
+echo "E2E Training Duration sec : $e2e_time"
+
+#稳定性精度看护结果汇总
+#训练用例信息，不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc'
+
+
+#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型审视
+grep "tensorflow:loss =" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log | awk -F "loss = " '{print $2}' | awk -F "," '{print $1}' >> $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
+
+#最后一个迭代loss值，不需要修改
+ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainAccuracy = ${TrainAccuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
\ No newline at end of file
diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3068_BertLarge-512_performance_1p.sh b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3068_BertLarge-512_performance_1p.sh
new file mode 100644
index 000000000..234a4d1f8
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3068_BertLarge-512_performance_1p.sh
@@ -0,0 +1,170 @@
+#!/bin/bash
+
+#当前路径,不需要修改
+cur_path=`pwd`
+
+#集合通信参数,不需要修改
+export RANK_SIZE=1
+export JOB_ID=99990001
+RANK_ID_START=0
+
+# 数据集路径,保持为空,不需要修改
+data_path=""
+
+#基础参数，需要模型审视修改
+#网络名称，同目录名称
+Network="BertLarge-512_ID3068_for_TensorFlow"
+#训练epoch
+train_epochs=1
+#训练batch_size
+batch_size=24
+#训练step
+train_steps=100
+#学习率
+learning_rate=
+
+#维测参数，precision_mode需要模型审视修改
+#precision_mode="allow_mix_precision"
+#维持参数，以下不需要修改
+over_dump=False
+data_dump_flag=False
+data_dump_step="10"
+profiling=False
+autotune=False
+
+# 帮助信息，不需要修改
+if [[ $1 == --help || $1 == -h ]];then
+    echo"usage:./train_full_1p.sh <args>"
+    echo " "
+    echo "parameter explain:
+    --precision_mode         precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision)
+    --over_dump		         if or not over detection, default is False
+    --data_dump_flag	     data dump flag, default is False
+    --data_dump_step		 data dump step, default is 10
+    --profiling		         if or not profiling for performance debug, default is False
+    --autotune               whether to enable autotune, default is False
+    --data_path		         source data of training
+    -h/--help		         show help message
+    "
+    exit 1
+fi
+
+#参数校验，不需要修改
+for para in $*
+do
+    if [[ $para == --precision_mode* ]];then
+        precision_mode=`echo ${para#*=}`
+    elif [[ $para == --over_dump* ]];then
+        over_dump=`echo ${para#*=}`
+        over_dump_path=${cur_path}/output/overflow_dump
+        mkdir -p ${over_dump_path}
+    elif [[ $para == --data_dump_flag* ]];then
+        data_dump_flag=`echo ${para#*=}`
+        data_dump_path=${cur_path}/output/data_dump
+        mkdir -p ${data_dump_path}
+    elif [[ $para == --data_dump_step* ]];then
+        data_dump_step=`echo ${para#*=}`
+    elif [[ $para == --profiling* ]];then
+        profiling=`echo ${para#*=}`
+        profiling_dump_path=${cur_path}/output/profiling
+        mkdir -p ${profiling_dump_path}
+    elif [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    fi
+done
+
+#校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be confing"
+    exit 1
+fi
+
+#训练开始时间，不需要修改
+start_time=$(date +%s)
+#进入训练脚本目录，需要模型审视修改
+for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++));
+do
+    #设置环境变量，不需要修改
+    echo "Device ID: $ASCEND_DEVICE_ID"
+    export RANK_ID=$RANK_ID
+    
+    #创建DeviceID输出目录，不需要修改
+    if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then
+        rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID}
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    else
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    fi
+
+    #执行训练脚本，以下传参不需要修改，其他需要模型审视修改
+    #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path，--data_dump_flag，--data_dump_step，--data_dump_path，--profiling，--profiling_dump_path，--autotune
+    nohup python3.7 $cur_path/../src/run_pretraining.py --bert_config_file=${cur_path}/../configs/bert_large_config.json \
+    --max_seq_length=128 \
+    --max_predictions_per_seq=20 \
+    --train_batch_size=${batch_size} \
+    --learning_rate=1e-4 \
+    --num_warmup_steps=0 \
+    --num_train_steps=${train_steps} \
+    --optimizer_type=adam \
+    --manual_fp16=True \
+    --use_fp16_cls=True \
+    --input_files_dir=${data_path}/en_wiki_len512 \
+    --eval_files_dir=${data_path}/en_wiki_len512 \
+    --npu_bert_debug=False \
+    --npu_bert_use_tdt=True \
+    --do_train=True \
+    --num_accumulation_steps=1 \
+    --npu_bert_job_start_file= \
+    --iterations_per_loop=10 \
+    --save_checkpoints_steps=100 \
+    --npu_bert_clip_by_global_norm=False \
+    --distributed=False \
+    --npu_bert_loss_scale=0 \
+    --init_loss_scale_value=1 \
+    --over_dump=${over_dump} \
+    --over_dump_path=${over_dump_path} \
+    --output_dir=${cur_path}/output/${ASCEND_DEVICE_ID}/ckpt${ASCEND_DEVICE_ID} > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+done 
+wait
+
+#训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+#结果打印，不需要修改
+echo "------------------ Final result ------------------"
+#输出性能FPS，需要模型审视修改
+ActualFPS=`grep Throughput ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk 'END {print $6}'`
+TrainingTime=`echo "scale=4; ${RANK_SIZE} * ${batch_size} / ${ActualFPS}"|bc`
+#打印，不需要修改
+echo "Final Performance images/sec : $ActualFPS"
+
+#输出训练精度,需要模型审视修改
+#train_accuracy=`grep -A 1 top1 $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $3}'`
+#打印，不需要修改
+#echo "Final Train Accuracy : ${train_accuracy}"
+echo "E2E Training Duration sec : $e2e_time"
+
+#稳定性精度看护结果汇总
+#训练用例信息，不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf'
+
+
+#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型审视
+grep "tensorflow:loss =" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log | awk -F "loss = " '{print $2}' | awk -F "," '{print $1}' >> $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
+
+#最后一个迭代loss值，不需要修改
+ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
\ No newline at end of file
diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3068_BertLarge-512_performance_8p.sh b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3068_BertLarge-512_performance_8p.sh
new file mode 100644
index 000000000..6940fac80
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3068_BertLarge-512_performance_8p.sh
@@ -0,0 +1,182 @@
+#!/bin/bash
+
+#当前路径,不需要修改
+cur_path=`pwd`
+
+#集合通信参数,不需要修改
+export RANK_SIZE=8
+export JOB_ID=99990001
+export RANK_TABLE_FILE=${cur_path}/../configs/8p.json
+RANK_ID_START=0
+
+# 数据集路径,保持为空,不需要修改
+data_path=""
+
+#基础参数，需要模型审视修改
+#网络名称，同目录名称
+Network="BertLarge-512_ID3068_for_TensorFlow"
+#训练epoch
+train_epochs=1
+#训练batch_size
+batch_size=24
+#训练step
+train_steps=100
+#学习率
+learning_rate=
+
+#维测参数，precision_mode需要模型审视修改
+#precision_mode="allow_mix_precision"
+#维持参数，以下不需要修改
+over_dump=False
+data_dump_flag=False
+data_dump_step="10"
+profiling=False
+autotune=False
+
+# 帮助信息，不需要修改
+if [[ $1 == --help || $1 == -h ]];then
+    echo"usage:./train_full_1p.sh <args>"
+    echo " "
+    echo "parameter explain:
+    --precision_mode         precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision)
+    --over_dump		         if or not over detection, default is False
+    --data_dump_flag	     data dump flag, default is False
+    --data_dump_step		 data dump step, default is 10
+    --profiling		         if or not profiling for performance debug, default is False
+    --autotune               whether to enable autotune, default is False
+    --data_path		         source data of training
+    -h/--help		         show help message
+    "
+    exit 1
+fi
+
+#参数校验，不需要修改
+for para in $*
+do
+    if [[ $para == --precision_mode* ]];then
+        precision_mode=`echo ${para#*=}`
+    elif [[ $para == --over_dump* ]];then
+        over_dump=`echo ${para#*=}`
+        over_dump_path=${cur_path}/output/overflow_dump
+        mkdir -p ${over_dump_path}
+    elif [[ $para == --data_dump_flag* ]];then
+        data_dump_flag=`echo ${para#*=}`
+        data_dump_path=${cur_path}/output/data_dump
+        mkdir -p ${data_dump_path}
+    elif [[ $para == --data_dump_step* ]];then
+        data_dump_step=`echo ${para#*=}`
+    elif [[ $para == --profiling* ]];then
+        profiling=`echo ${para#*=}`
+        profiling_dump_path=${cur_path}/output/profiling
+        mkdir -p ${profiling_dump_path}
+    elif [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    fi
+done
+
+#校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be confing"
+    exit 1
+fi
+
+#训练开始时间，不需要修改
+start_time=$(date +%s)
+#进入训练脚本目录，需要模型审视修改
+for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++));
+do
+    #设置环境变量，不需要修改
+    echo "Device ID: $RANK_ID"
+    export RANK_ID=$RANK_ID
+    export ASCEND_DEVICE_ID=$RANK_ID
+    
+    #创建DeviceID输出目录，不需要修改
+    if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then
+        rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID}
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    else
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    fi
+    
+     # 绑核，不需要的绑核的模型删除，需要模型审视修改
+    corenum=`cat /proc/cpuinfo |grep "processor"|wc -l`
+    let a=RANK_ID*${corenum}/${RANK_SIZE}
+    let b=RANK_ID+1
+    let c=b*${corenum}/${RANK_SIZE}-1
+
+    #执行训练脚本，以下传参不需要修改，其他需要模型审视修改
+    #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path，--data_dump_flag，--data_dump_step，--data_dump_path，--profiling，--profiling_dump_path
+    if [ "x${bind_core}" != x ];then
+        bind_core="taskset -c $a-$c"
+    fi
+    nohup ${bind_core} python3.7 $cur_path/../src/pretrain/run_pretraining.py --bert_config_file=${cur_path}/../configs/bert_base_config.json \
+    --max_seq_length=128 \
+    --max_predictions_per_seq=20 \
+    --train_batch_size=${batch_size} \
+    --learning_rate=1e-4 \
+    --num_warmup_steps=100 \
+    --num_train_steps=${train_steps} \
+    --optimizer_type=adam \
+    --manual_fp16=True \
+    --use_fp16_cls=True \
+    --input_files_dir=${data_path}/en_wiki_len512 \
+    --eval_files_dir=${data_path}/en_wiki_len512 \
+    --npu_bert_debug=False \
+    --npu_bert_use_tdt=True \
+    --do_train=True \
+    --num_accumulation_steps=1 \
+    --npu_bert_job_start_file= \
+    --iterations_per_loop=10 \
+    --save_checkpoints_steps=100 \
+    --npu_bert_clip_by_global_norm=False \
+    --distributed=True \
+    --npu_bert_tail_optimize=True \
+    --npu_bert_loss_scale=0 \
+    --init_loss_scale_value=1 \
+    --over_dump=${over_dump} \
+    --over_dump_path=${over_dump_path} \
+    --output_dir=${cur_path}/output/${ASCEND_DEVICE_ID}/ckpt${ASCEND_DEVICE_ID} > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+done 
+wait
+
+#训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+#结果打印，不需要修改
+echo "------------------ Final result ------------------"
+#输出性能FPS，需要模型审视修改
+ActualFPS=`grep Throughput ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk 'END {print $6}'`
+TrainingTime=`echo "scale=4; ${RANK_SIZE} * ${batch_size} / ${ActualFPS}"|bc`
+#打印，不需要修改
+echo "Final Performance images/sec : $ActualFPS"
+
+#输出训练精度,需要模型审视修改
+#train_accuracy=`grep -A 1 top1 $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $3}'`
+#打印，不需要修改
+#echo "Final Train Accuracy : ${train_accuracy}"
+echo "E2E Training Duration sec : $e2e_time"
+
+#稳定性精度看护结果汇总
+#训练用例信息，不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf'
+
+
+#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型审视
+grep "tensorflow:loss =" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log | awk -F "loss = " '{print $2}' | awk -F "," '{print $1}' >> $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
+
+#最后一个迭代loss值，不需要修改
+ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
\ No newline at end of file
diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3069_BertBase-512_full_1p.sh b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3069_BertBase-512_full_1p.sh
new file mode 100644
index 000000000..1553eaeca
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3069_BertBase-512_full_1p.sh
@@ -0,0 +1,170 @@
+#!/bin/bash
+
+#当前路径,不需要修改
+cur_path=`pwd`
+
+#集合通信参数,不需要修改
+export RANK_SIZE=1
+export JOB_ID=99990001
+RANK_ID_START=0
+
+# 数据集路径,保持为空,不需要修改
+data_path=""
+
+#基础参数，需要模型审视修改
+#网络名称，同目录名称
+Network="BertBase-512_ID3069_for_TensorFlow"
+#训练epoch
+train_epochs=1
+#训练batch_size
+batch_size=64
+#训练step
+train_steps=100000
+#学习率
+learning_rate=
+
+#维测参数，precision_mode需要模型审视修改
+#precision_mode="allow_mix_precision"
+#维持参数，以下不需要修改
+over_dump=False
+data_dump_flag=False
+data_dump_step="10"
+profiling=False
+autotune=False
+
+# 帮助信息，不需要修改
+if [[ $1 == --help || $1 == -h ]];then
+    echo"usage:./train_full_1p.sh <args>"
+    echo " "
+    echo "parameter explain:
+    --precision_mode         precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision)
+    --over_dump		         if or not over detection, default is False
+    --data_dump_flag	     data dump flag, default is False
+    --data_dump_step		 data dump step, default is 10
+    --profiling		         if or not profiling for performance debug, default is False
+    --autotune               whether to enable autotune, default is False
+    --data_path		         source data of training
+    -h/--help		         show help message
+    "
+    exit 1
+fi
+
+#参数校验，不需要修改
+for para in $*
+do
+    if [[ $para == --precision_mode* ]];then
+        precision_mode=`echo ${para#*=}`
+    elif [[ $para == --over_dump* ]];then
+        over_dump=`echo ${para#*=}`
+        over_dump_path=${cur_path}/output/overflow_dump
+        mkdir -p ${over_dump_path}
+    elif [[ $para == --data_dump_flag* ]];then
+        data_dump_flag=`echo ${para#*=}`
+        data_dump_path=${cur_path}/output/data_dump
+        mkdir -p ${data_dump_path}
+    elif [[ $para == --data_dump_step* ]];then
+        data_dump_step=`echo ${para#*=}`
+    elif [[ $para == --profiling* ]];then
+        profiling=`echo ${para#*=}`
+        profiling_dump_path=${cur_path}/output/profiling
+        mkdir -p ${profiling_dump_path}
+    elif [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    fi
+done
+
+#校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be confing"
+    exit 1
+fi
+
+#训练开始时间，不需要修改
+start_time=$(date +%s)
+#进入训练脚本目录，需要模型审视修改
+for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++));
+do
+    #设置环境变量，不需要修改
+    echo "Device ID: $ASCEND_DEVICE_ID"
+    export RANK_ID=$RANK_ID
+    
+    #创建DeviceID输出目录，不需要修改
+    if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then
+        rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID}
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    else
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    fi
+
+    #执行训练脚本，以下传参不需要修改，其他需要模型审视修改
+    #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path，--data_dump_flag，--data_dump_step，--data_dump_path，--profiling，--profiling_dump_path，--autotune
+    nohup python3.7 $cur_path/../src/run_pretraining.py --bert_config_file=${cur_path}/../configs/bert_base_config.json \
+    --max_seq_length=128 \
+    --max_predictions_per_seq=20 \
+    --train_batch_size=${batch_size} \
+    --learning_rate=1e-4 \
+    --num_warmup_steps=1000 \
+    --num_train_steps=${train_steps} \
+    --optimizer_type=adam \
+    --manual_fp16=True \
+    --use_fp16_cls=True \
+    --input_files_dir=${data_path}/en_wiki_len512 \
+    --eval_files_dir=${data_path}/en_wiki_len512 \
+    --npu_bert_debug=False \
+    --npu_bert_use_tdt=True \
+    --do_train=True \
+    --num_accumulation_steps=1 \
+    --npu_bert_job_start_file= \
+    --iterations_per_loop=1000 \
+    --save_checkpoints_steps=1000 \
+    --npu_bert_clip_by_global_norm=False \
+    --distributed=False \
+    --npu_bert_loss_scale=0 \
+    --over_dump=${over_dump} \
+    --over_dump_path=${over_dump_path} \
+    --output_dir=${cur_path}/output/${ASCEND_DEVICE_ID}/ckpt${ASCEND_DEVICE_ID} > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+done 
+wait
+
+#训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+#结果打印，不需要修改
+echo "------------------ Final result ------------------"
+#输出性能FPS，需要模型审视修改
+ActualFPS=`grep Throughput ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk 'END {print $6}'`
+TrainingTime=`echo "scale=4; ${RANK_SIZE} * ${batch_size} / ${ActualFPS}"|bc`
+#打印，不需要修改
+echo "Final Performance images/sec : $ActualFPS"
+
+#输出训练精度,需要模型审视修改
+TrainAccuracy=`grep -A 1 top1 $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $3}'`
+#打印，不需要修改
+echo "Final Train Accuracy : ${TrainAccuracy}"
+echo "E2E Training Duration sec : $e2e_time"
+
+#稳定性精度看护结果汇总
+#训练用例信息，不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc'
+
+
+#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型审视
+grep "tensorflow:loss =" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log | awk -F "loss = " '{print $2}' | awk -F "," '{print $1}' >> $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
+
+#最后一个迭代loss值，不需要修改
+ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainAccuracy = ${TrainAccuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
\ No newline at end of file
diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3069_BertBase-512_full_8p.sh b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3069_BertBase-512_full_8p.sh
new file mode 100644
index 000000000..ce65192e1
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3069_BertBase-512_full_8p.sh
@@ -0,0 +1,182 @@
+#!/bin/bash
+
+#当前路径,不需要修改
+cur_path=`pwd`
+
+#集合通信参数,不需要修改
+export RANK_SIZE=8
+export JOB_ID=99990001
+export RANK_TABLE_FILE=${cur_path}/../configs/8p.json
+RANK_ID_START=0
+
+# 数据集路径,保持为空,不需要修改
+data_path=""
+
+#基础参数，需要模型审视修改
+#网络名称，同目录名称
+Network="BertBase-512_ID3069_for_TensorFlow"
+#训练epoch
+train_epochs=
+#训练batch_size
+batch_size=64
+#训练step
+train_steps=32000
+#学习率
+learning_rate=
+
+#维测参数，precision_mode需要模型审视修改
+#precision_mode="allow_mix_precision"
+#维持参数，以下不需要修改
+over_dump=False
+data_dump_flag=False
+data_dump_step="10"
+profiling=False
+autotune=False
+
+# 帮助信息，不需要修改
+if [[ $1 == --help || $1 == -h ]];then
+    echo"usage:./train_full_1p.sh <args>"
+    echo " "
+    echo "parameter explain:
+    --precision_mode         precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision)
+    --over_dump		         if or not over detection, default is False
+    --data_dump_flag	     data dump flag, default is False
+    --data_dump_step		 data dump step, default is 10
+    --profiling		         if or not profiling for performance debug, default is False
+    --autotune               whether to enable autotune, default is False
+    --data_path		         source data of training
+    -h/--help		         show help message
+    "
+    exit 1
+fi
+
+#参数校验，不需要修改
+for para in $*
+do
+    if [[ $para == --precision_mode* ]];then
+        precision_mode=`echo ${para#*=}`
+    elif [[ $para == --over_dump* ]];then
+        over_dump=`echo ${para#*=}`
+        over_dump_path=${cur_path}/output/overflow_dump
+        mkdir -p ${over_dump_path}
+    elif [[ $para == --data_dump_flag* ]];then
+        data_dump_flag=`echo ${para#*=}`
+        data_dump_path=${cur_path}/output/data_dump
+        mkdir -p ${data_dump_path}
+    elif [[ $para == --data_dump_step* ]];then
+        data_dump_step=`echo ${para#*=}`
+    elif [[ $para == --profiling* ]];then
+        profiling=`echo ${para#*=}`
+        profiling_dump_path=${cur_path}/output/profiling
+        mkdir -p ${profiling_dump_path}
+    elif [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    fi
+done
+
+#校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be confing"
+    exit 1
+fi
+
+#训练开始时间，不需要修改
+start_time=$(date +%s)
+#进入训练脚本目录，需要模型审视修改
+for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++));
+do
+    #设置环境变量，不需要修改
+    echo "Device ID: $RANK_ID"
+    export RANK_ID=$RANK_ID
+    export ASCEND_DEVICE_ID=$RANK_ID
+    
+    #创建DeviceID输出目录，不需要修改
+    if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then
+        rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID}
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    else
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    fi
+    
+     # 绑核，不需要的绑核的模型删除，需要模型审视修改
+    corenum=`cat /proc/cpuinfo |grep "processor"|wc -l`
+    let a=RANK_ID*${corenum}/${RANK_SIZE}
+    let b=RANK_ID+1
+    let c=b*${corenum}/${RANK_SIZE}-1
+
+    #执行训练脚本，以下传参不需要修改，其他需要模型审视修改
+    #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path，--data_dump_flag，--data_dump_step，--data_dump_path，--profiling，--profiling_dump_path
+    if [ "x${bind_core}" != x ];then
+        bind_core="taskset -c $a-$c"
+    fi
+    nohup python3.7 ${cur_path}/../src/run_pretraining.py --bert_config_file=${cur_path}/../configs/bert_base_config.json \
+    --max_seq_length=128 \
+    --max_predictions_per_seq=20 \
+    --train_batch_size=${batch_size} \
+    --learning_rate=1e-4 \
+    --num_warmup_steps=1000 \
+    --num_train_steps=${train_steps} \
+    --optimizer_type=adam \
+    --manual_fp16=True \
+    --use_fp16_cls=True \
+    --input_files_dir=${data_path}/en_wiki_len512 \
+    --eval_files_dir=${data_path}/en_wiki_len512 \
+    --npu_bert_debug=False \
+    --npu_bert_use_tdt=True \
+    --do_train=True \
+    --num_accumulation_steps=1 \
+    --npu_bert_job_start_file= \
+    --iterations_per_loop=1000 \
+    --save_checkpoints_steps=1000 \
+    --npu_bert_clip_by_global_norm=False \
+    --distributed=True \
+    --npu_bert_tail_optimize=True \
+    --npu_bert_loss_scale=0 \
+    --over_dump=${over_dump} \
+    --over_dump_path=${over_dump_path} \
+    --output_dir=${cur_path}/output/${ASCEND_DEVICE_ID}/ckpt${ASCEND_DEVICE_ID} > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+done 
+wait
+
+#训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+#结果打印，不需要修改
+echo "------------------ Final result ------------------"
+#输出性能FPS，需要模型审视修改
+ActualFPS=`grep Throughput ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk 'END {print $6}'`
+TrainingTime=`echo "scale=4; ${RANK_SIZE} * ${batch_size} / ${ActualFPS}"|bc`
+#打印，不需要修改
+echo "Final Performance images/sec : $ActualFPS"
+
+#输出训练精度,需要模型审视修改
+TrainAccuracy=`grep -A 1 top1 $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $3}'`
+#打印，不需要修改
+echo "Final Train Accuracy : ${TrainAccuracy}"
+echo "E2E Training Duration sec : $e2e_time"
+
+#稳定性精度看护结果汇总
+#训练用例信息，不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc'
+
+
+#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型审视
+grep "tensorflow:loss =" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log | awk -F "loss = " '{print $2}' | awk -F "," '{print $1}' >> $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
+
+#最后一个迭代loss值，不需要修改
+ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainAccuracy = ${TrainAccuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
\ No newline at end of file
diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3069_BertBase-512_performance_1p.sh b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3069_BertBase-512_performance_1p.sh
new file mode 100644
index 000000000..8d0092bb7
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3069_BertBase-512_performance_1p.sh
@@ -0,0 +1,169 @@
+#!/bin/bash
+
+#当前路径,不需要修改
+cur_path=`pwd`
+
+#集合通信参数,不需要修改
+export RANK_SIZE=1
+export JOB_ID=99990001
+RANK_ID_START=0
+
+# 数据集路径,保持为空,不需要修改
+data_path=""
+
+#基础参数，需要模型审视修改
+#网络名称，同目录名称
+Network="BertBase-512_ID3069_for_TensorFlow"
+#训练epoch
+train_epochs=1
+#训练batch_size
+batch_size=64
+#训练step
+train_steps=1000
+#学习率
+learning_rate=
+
+#维测参数，precision_mode需要模型审视修改
+#precision_mode="allow_mix_precision"
+#维持参数，以下不需要修改
+over_dump=False
+data_dump_flag=False
+data_dump_step="10"
+profiling=False
+autotune=False
+
+# 帮助信息，不需要修改
+if [[ $1 == --help || $1 == -h ]];then
+    echo"usage:./train_full_1p.sh <args>"
+    echo " "
+    echo "parameter explain:
+    --precision_mode         precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision)
+    --over_dump		         if or not over detection, default is False
+    --data_dump_flag	     data dump flag, default is False
+    --data_dump_step		 data dump step, default is 10
+    --profiling		         if or not profiling for performance debug, default is False
+    --autotune               whether to enable autotune, default is False
+    --data_path		         source data of training
+    -h/--help		         show help message
+    "
+    exit 1
+fi
+
+#参数校验，不需要修改
+for para in $*
+do
+    if [[ $para == --precision_mode* ]];then
+        precision_mode=`echo ${para#*=}`
+    elif [[ $para == --over_dump* ]];then
+        over_dump=`echo ${para#*=}`
+        over_dump_path=${cur_path}/output/overflow_dump
+        mkdir -p ${over_dump_path}
+    elif [[ $para == --data_dump_flag* ]];then
+        data_dump_flag=`echo ${para#*=}`
+        data_dump_path=${cur_path}/output/data_dump
+        mkdir -p ${data_dump_path}
+    elif [[ $para == --data_dump_step* ]];then
+        data_dump_step=`echo ${para#*=}`
+    elif [[ $para == --profiling* ]];then
+        profiling=`echo ${para#*=}`
+        profiling_dump_path=${cur_path}/output/profiling
+        mkdir -p ${profiling_dump_path}
+    elif [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    fi
+done
+
+#校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be confing"
+    exit 1
+fi
+
+#训练开始时间，不需要修改
+start_time=$(date +%s)
+#进入训练脚本目录，需要模型审视修改
+for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++));
+do
+    #设置环境变量，不需要修改
+    echo "Device ID: $ASCEND_DEVICE_ID"
+    export RANK_ID=$RANK_ID
+    
+    #创建DeviceID输出目录，不需要修改
+    if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then
+        rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID}
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    else
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    fi
+
+    #执行训练脚本，以下传参不需要修改，其他需要模型审视修改
+    #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path，--data_dump_flag，--data_dump_step，--data_dump_path，--profiling，--profiling_dump_path，--autotune
+    nohup python3.7 $cur_path/../src/run_pretraining.py --bert_config_file=${cur_path}/../configs/bert_base_config.json \
+    --max_seq_length=128 \
+    --max_predictions_per_seq=20 \
+    --train_batch_size=${batch_size} \
+    --learning_rate=1e-4 \
+    --num_warmup_steps=0 \
+    --num_train_steps=${train_steps} \
+    --optimizer_type=adam \
+    --manual_fp16=True \
+    --use_fp16_cls=True \
+    --input_files_dir=${data_path}/en_wiki_len512 \
+    --eval_files_dir=${data_path}/en_wiki_len512 \
+    --npu_bert_debug=False \
+    --npu_bert_use_tdt=True \
+    --do_train=True \
+    --num_accumulation_steps=1 \
+    --npu_bert_job_start_file= \
+    --iterations_per_loop=100 \
+    --save_checkpoints_steps=1000 \
+    --npu_bert_clip_by_global_norm=False \
+    --distributed=False \
+    --npu_bert_loss_scale=0 \
+    --over_dump=${over_dump} \
+    --over_dump_path=${over_dump_path} \
+    --output_dir=${cur_path}/output/${ASCEND_DEVICE_ID}/ckpt${ASCEND_DEVICE_ID} > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+done 
+wait
+
+#训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+#结果打印，不需要修改
+echo "------------------ Final result ------------------"
+#输出性能FPS，需要模型审视修改
+ActualFPS=`grep Throughput ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk 'END {print $6}'`
+TrainingTime=`echo "scale=4; ${RANK_SIZE} * ${batch_size} / ${ActualFPS}"|bc`
+#打印，不需要修改
+echo "Final Performance images/sec : $ActualFPS"
+
+#输出训练精度,需要模型审视修改
+TrainAccuracy=`grep -A 1 top1 $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $3}'`
+#打印，不需要修改
+echo "Final Train Accuracy : ${TrainAccuracy}"
+echo "E2E Training Duration sec : $e2e_time"
+
+#稳定性精度看护结果汇总
+#训练用例信息，不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf'
+
+
+#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型审视
+grep "tensorflow:loss =" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log | awk -F "loss = " '{print $2}' | awk -F "," '{print $1}' >> $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
+
+#最后一个迭代loss值，不需要修改
+ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
\ No newline at end of file
diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3069_BertBase-512_performance_8p.sh b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3069_BertBase-512_performance_8p.sh
new file mode 100644
index 000000000..7a8c35f35
--- /dev/null
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3069_BertBase-512_performance_8p.sh
@@ -0,0 +1,181 @@
+#!/bin/bash
+
+#当前路径,不需要修改
+cur_path=`pwd`
+
+#集合通信参数,不需要修改
+export RANK_SIZE=8
+export JOB_ID=99990001
+export RANK_TABLE_FILE=${cur_path}/../configs/8p.json
+RANK_ID_START=0
+
+# 数据集路径,保持为空,不需要修改
+data_path=""
+
+#基础参数，需要模型审视修改
+#网络名称，同目录名称
+Network="BertBase-512_ID3069_for_TensorFlow"
+#训练epoch
+train_epochs=1
+#训练batch_size
+batch_size=64
+#训练step
+train_steps=1000
+#学习率
+learning_rate=
+
+#维测参数，precision_mode需要模型审视修改
+#precision_mode="allow_mix_precision"
+#维持参数，以下不需要修改
+over_dump=False
+data_dump_flag=False
+data_dump_step="10"
+profiling=False
+autotune=False
+
+# 帮助信息，不需要修改
+if [[ $1 == --help || $1 == -h ]];then
+    echo"usage:./train_full_1p.sh <args>"
+    echo " "
+    echo "parameter explain:
+    --precision_mode         precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision)
+    --over_dump		         if or not over detection, default is False
+    --data_dump_flag	     data dump flag, default is False
+    --data_dump_step		 data dump step, default is 10
+    --profiling		         if or not profiling for performance debug, default is False
+    --autotune               whether to enable autotune, default is False
+    --data_path		         source data of training
+    -h/--help		         show help message
+    "
+    exit 1
+fi
+
+#参数校验，不需要修改
+for para in $*
+do
+    if [[ $para == --precision_mode* ]];then
+        precision_mode=`echo ${para#*=}`
+    elif [[ $para == --over_dump* ]];then
+        over_dump=`echo ${para#*=}`
+        over_dump_path=${cur_path}/output/overflow_dump
+        mkdir -p ${over_dump_path}
+    elif [[ $para == --data_dump_flag* ]];then
+        data_dump_flag=`echo ${para#*=}`
+        data_dump_path=${cur_path}/output/data_dump
+        mkdir -p ${data_dump_path}
+    elif [[ $para == --data_dump_step* ]];then
+        data_dump_step=`echo ${para#*=}`
+    elif [[ $para == --profiling* ]];then
+        profiling=`echo ${para#*=}`
+        profiling_dump_path=${cur_path}/output/profiling
+        mkdir -p ${profiling_dump_path}
+    elif [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    fi
+done
+
+#校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be confing"
+    exit 1
+fi
+
+#训练开始时间，不需要修改
+start_time=$(date +%s)
+#进入训练脚本目录，需要模型审视修改
+for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++));
+do
+    #设置环境变量，不需要修改
+    echo "Device ID: $RANK_ID"
+    export RANK_ID=$RANK_ID
+    export ASCEND_DEVICE_ID=$RANK_ID
+    
+    #创建DeviceID输出目录，不需要修改
+    if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then
+        rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID}
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    else
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    fi
+    
+     # 绑核，不需要的绑核的模型删除，需要模型审视修改
+    corenum=`cat /proc/cpuinfo |grep "processor"|wc -l`
+    let a=RANK_ID*${corenum}/${RANK_SIZE}
+    let b=RANK_ID+1
+    let c=b*${corenum}/${RANK_SIZE}-1
+
+    #执行训练脚本，以下传参不需要修改，其他需要模型审视修改
+    #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path，--data_dump_flag，--data_dump_step，--data_dump_path，--profiling，--profiling_dump_path
+    if [ "x${bind_core}" != x ];then
+        bind_core="taskset -c $a-$c"
+    fi
+    nohup ${bind_core} python3.7 $cur_path/../src/run_pretraining.py --bert_config_file=${cur_path}/../configs/bert_base_config.json \
+    --max_seq_length=128 \
+    --max_predictions_per_seq=20 \
+    --train_batch_size=${batch_size} \
+    --learning_rate=1e-4 \
+    --num_warmup_steps=100 \
+    --num_train_steps=${train_steps} \
+    --optimizer_type=adam \
+    --manual_fp16=True \
+    --use_fp16_cls=True \
+    --input_files_dir=${data_path}/en_wiki_len512 \
+    --eval_files_dir=${data_path}/en_wiki_len512 \
+    --npu_bert_debug=False \
+    --npu_bert_use_tdt=True \
+    --do_train=True \
+    --num_accumulation_steps=1 \
+    --npu_bert_job_start_file= \
+    --iterations_per_loop=100 \
+    --save_checkpoints_steps=1000 \
+    --npu_bert_clip_by_global_norm=False \
+    --distributed=True \
+    --npu_bert_tail_optimize=True \
+    --npu_bert_loss_scale=0 \
+    --over_dump=${over_dump} \
+    --over_dump_path=${over_dump_path} \
+    --output_dir=${cur_path}/output/${ASCEND_DEVICE_ID}/ckpt${ASCEND_DEVICE_ID} > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+done 
+wait
+
+#训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+#结果打印，不需要修改
+echo "------------------ Final result ------------------"
+#输出性能FPS，需要模型审视修改
+ActualFPS=`grep Throughput ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk 'END {print $6}'`
+TrainingTime=`echo "scale=4; ${RANK_SIZE} * ${batch_size} / ${ActualFPS}"|bc`
+#打印，不需要修改
+echo "Final Performance images/sec : $ActualFPS"
+
+#输出训练精度,需要模型审视修改
+#train_accuracy=`grep -A 1 top1 $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $3}'`
+#打印，不需要修改
+#echo "Final Train Accuracy : ${train_accuracy}"
+echo "E2E Training Duration sec : $e2e_time"
+
+#稳定性精度看护结果汇总
+#训练用例信息，不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf'
+
+
+#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型审视
+grep "tensorflow:loss =" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log | awk -F "loss = " '{print $2}' | awk -F "," '{print $1}' >> $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
+
+#最后一个迭代loss值，不需要修改
+ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
\ No newline at end of file
-- 
Gitee


From 1006d1a3ee449e38cbdbf540bbd7aa701dc73779 Mon Sep 17 00:00:00 2001
From: hxxhl88 <736544296@qq.com>
Date: Wed, 6 Apr 2022 16:12:19 +0800
Subject: [PATCH 02/11] merge performance modify

---
 .../run_squad.py                              |   2 +-
 .../test/train_ID0495_Bert-Squad_full_8p.sh   |   6 +-
 .../train_ID3082_BertLarge-Squad_full_8p.sh   |   6 +-
 .../src/modeling.py                           |  20 +-
 .../src/optimization.py                       | 194 +++++++++++-------
 .../src/run_pretraining.py                    |  10 +-
 ...ain_ID3067_BertLarge-128_performance_1p.sh |   2 +-
 .../train_ID3068_BertLarge-512_full_1p.sh     |   8 +-
 .../train_ID3068_BertLarge-512_full_8p.sh     |   8 +-
 ...ain_ID3068_BertLarge-512_performance_1p.sh |   8 +-
 ...ain_ID3068_BertLarge-512_performance_8p.sh |  10 +-
 .../test/train_ID3069_BertBase-512_full_1p.sh |   8 +-
 .../test/train_ID3069_BertBase-512_full_8p.sh |   8 +-
 ...rain_ID3069_BertBase-512_performance_1p.sh |  10 +-
 ...rain_ID3069_BertBase-512_performance_8p.sh |   8 +-
 15 files changed, 182 insertions(+), 126 deletions(-)

diff --git a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/run_squad.py b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/run_squad.py
index 11126636d..7a056a808 100644
--- a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/run_squad.py
+++ b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/run_squad.py
@@ -122,7 +122,7 @@ flags.DEFINE_integer("save_checkpoints_steps", 1000,
                      "How often to save the model checkpoint.")
 
 flags.DEFINE_integer("num_train_steps", 0,
-                     "How often to save the model checkpoint.")
+                     "How many steps to train.")
 
 flags.DEFINE_integer("iterations_per_loop", 100,
                      "How many steps to make in each estimator call.")
diff --git a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID0495_Bert-Squad_full_8p.sh b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID0495_Bert-Squad_full_8p.sh
index 7feb417ff..aad9ee57c 100644
--- a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID0495_Bert-Squad_full_8p.sh
+++ b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID0495_Bert-Squad_full_8p.sh
@@ -113,10 +113,10 @@ do
         --max_seq_length=384 \
         --doc_stride=128 \
         --output_dir=${cur_path}/output/${ASCEND_DEVICE_ID}/ckpt > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
-        wait
-        python3 ${parent_path}/evaluate-v1.1.py $data_path/dataset/dev-v1.1.json \
-        ${cur_path}/output/${ASCEND_DEVICE_ID}/ckpt/predictions.json >> ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1
 done
+wait
+  python3 ${parent_path}/evaluate-v1.1.py $data_path/dataset/dev-v1.1.json \
+        ${cur_path}/output/${ASCEND_DEVICE_ID}/ckpt/predictions.json >> ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1
 wait
 
 #训练结束时间，不需要修改
diff --git a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID3082_BertLarge-Squad_full_8p.sh b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID3082_BertLarge-Squad_full_8p.sh
index b86be9a9f..86c54483f 100644
--- a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID3082_BertLarge-Squad_full_8p.sh
+++ b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID3082_BertLarge-Squad_full_8p.sh
@@ -112,10 +112,10 @@ do
         --max_seq_length=384 \
         --doc_stride=128 \
         --output_dir=${cur_path}/output/${ASCEND_DEVICE_ID}/ckpt > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
-        wait
-        python3 ${parent_path}/evaluate-v1.1.py $data_path/dataset/dev-v1.1.json \
-        ${cur_path}/output/${ASCEND_DEVICE_ID}/ckpt/predictions.json >> ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1
 done
+wait
+  python3 ${parent_path}/evaluate-v1.1.py $data_path/dataset/dev-v1.1.json \
+        ${cur_path}/output/${ASCEND_DEVICE_ID}/ckpt/predictions.json >> ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1
 wait
 
 #训练结束时间，不需要修改
diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/modeling.py b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/modeling.py
index 95a8eda8d..c32e2e67e 100644
--- a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/modeling.py
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/modeling.py
@@ -31,6 +31,7 @@ from gpu_environment import get_custom_getter
 
 from npu_bridge.estimator.npu_unary_ops import npu_unary_ops
 from npu_bridge.estimator import npu_ops
+from npu_bridge.estimator.npu_aicore_ops import npu_aicore_ops
 
 class BertConfig(object):
   """Configuration for `BertModel`."""
@@ -287,11 +288,18 @@ def gelu(x):
   """
 
   if tf.flags.FLAGS.npu_bert_fused_gelu:
-    return npu_unary_ops.gelu(x)
+      if tf.flags.FLAGS.use_fast_gelu:
+          return npu_aicore_ops.fast_gelu(x)
+      else:
+          return npu_unary_ops.gelu(x)
   else:
-    cdf = 0.5 * (1.0 + tf.tanh(
-        (np.sqrt(2 / np.pi) * (x + 0.044715 * tf.pow(x, 3)))))
-    return x * cdf
+      if tf.flags.FLAGS.use_fast_gelu:
+          f1 = tf.math.exp(0.851 * (x - tf.math.abs(x)))
+          return x / (1 + tf.math.exp(-1.702 * tf.math.abs(x))) * f1
+      else:
+          cdf = 0.5 * (1.0 + tf.tanh(
+              (np.sqrt(2 / np.pi) * (x + 0.044715 * tf.pow(x, 3)))))
+          return x * cdf
 
 
@@ -379,6 +387,8 @@ def dropout(input_tensor, dropout_prob):
 
   if tf.flags.FLAGS.npu_bert_npu_dropout:
     output = npu_ops.dropout(input_tensor, 1.0 - dropout_prob)
+  elif tf.flags.FLAGS.npu_bert_npu_dropout_v3:
+    output = npu_aicore_ops.dropout_v3(input_tensor, 1.0 - dropout_prob)
   else:
     output = tf.nn.dropout(input_tensor, 1.0 - dropout_prob)
   return output
@@ -391,7 +401,7 @@ def layer_norm(input_tensor, name=None):
       from fused_layer_norm import fused_layer_norm
       return fused_layer_norm(
           inputs=input_tensor, begin_norm_axis=-1, begin_params_axis=-1, scope=name,
-          use_fused_batch_norm=True)
+          use_fused_batch_norm=False)
     except ImportError:
       return tf.contrib.layers.layer_norm(
           inputs=input_tensor, begin_norm_axis=-1, begin_params_axis=-1, scope=name)
diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/optimization.py b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/optimization.py
index 71244aeb1..d4d3d3ed2 100644
--- a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/optimization.py
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/optimization.py
@@ -28,8 +28,11 @@ from tensorflow.python.ops import math_ops
 
 from npu_bridge.estimator.npu.npu_optimizer import NPUOptimizer
 from npu_bridge.estimator.npu import npu_loss_scale_manager as lsm_lib
+from npu_bridge.estimator import npu_ops
+from npu_bridge.tbe import npu_vector_ops
 
-def create_optimizer(loss, init_lr, num_train_steps, num_warmup_steps, hvd=None, manual_fp16=False, use_fp16=False, num_accumulation_steps=1,
+def create_optimizer(loss, init_lr, num_train_steps, num_warmup_steps, hvd=None, manual_fp16=False, use_fp16=False,
+                     num_accumulation_steps=1,
                      optimizer_type="adam", allreduce_post_accumulation=False):
   """Creates an optimizer training op."""
   global_step = tf.train.get_or_create_global_step()
@@ -40,7 +43,7 @@ def create_optimizer(loss, init_lr, num_train_steps, num_warmup_steps, hvd=None,
       decayed_learning_rate_at_crossover_point = init_lr * (
                   (1.0 - float(num_warmup_steps) / float(num_train_steps)) ** power)
   else:
-      power = 0.5
+      power = 2.0
       decayed_learning_rate_at_crossover_point = init_lr
 
   adjusted_init_lr = init_lr * (init_lr / decayed_learning_rate_at_crossover_point)
@@ -92,7 +95,7 @@ def create_optimizer(loss, init_lr, num_train_steps, num_warmup_steps, hvd=None,
           weight_decay_rate=0.01,
           beta_1=0.9,
           beta_2=0.999,
-          epsilon=1e-4,
+          epsilon=1e-6,
           exclude_from_weight_decay=["LayerNorm", "layer_norm", "bias"])
 
   # if hvd is not None and (num_accumulation_steps == 1 or (not allreduce_post_accumulation)):
@@ -267,36 +270,49 @@ class AdamWeightDecayOptimizer(tf.train.Optimizer):
             trainable=False,
             initializer=tf.zeros_initializer())
 
-        # Standard Adam update.
-        next_m = (
-            tf.multiply(self.beta_1, m) + tf.multiply(1.0 - self.beta_1, grad))
-        next_v = (
-            tf.multiply(self.beta_2, v) + tf.multiply(1.0 - self.beta_2,
-                                                      tf.square(grad)))
-
-        update = next_m / (tf.sqrt(next_v) + self.epsilon)
-
-        # Just adding the square of the weights to the loss function is *not*
-        # the correct way of using L2 regularization/weight decay with Adam,
-        # since that will interact with the m and v parameters in strange ways.
-        #
-        # Instead we want to decay the weights in a manner that doesn't interact
-        # with the m/v parameters. This is equivalent to adding the square
-        # of the weights to the loss with plain (non-momentum) SGD.
-        if self._do_use_weight_decay(param_name):
-          update += self.weight_decay_rate * param_fp32
-
-        update_with_lr = self.learning_rate * update
-
-        next_param = param_fp32 - update_with_lr
-
-        if has_shadow:
-          # cast shadow fp32 weights to fp16 and assign to trainable variable
-          param.assign(tf.cast(next_param, param.dtype.base_dtype))
-        assignments.extend(
-            [param_fp32.assign(next_param),
-             m.assign(next_m),
-             v.assign(next_v)])
+        if tf.flags.FLAGS.npu_bert_use_fused_adam_momentum:
+            if self._do_use_weight_decay(param_name):
+                assignments.extend(
+                    [npu_ops.adam_apply_one_with_decay_assign(grad, v, m, param_fp32, self.learning_rate,
+                                                              self.beta_1, 1.0 - self.beta_1, self.beta_2,
+                                                              1.0 - self.beta_2,
+                                                              self.weight_decay_rate, self.epsilon)])
+            else:
+                assignments.extend(
+                    [npu_ops.adam_apply_one_assign(grad, v, m, param_fp32, self.learning_rate, self.beta_1,
+                                                   1.0 - self.beta_1, self.beta_2, 1.0 - self.beta_2,
+                                                   self.epsilon)])
+        else:
+            # Standard Adam update.
+            next_m = (
+                tf.multiply(self.beta_1, m) + tf.multiply(1.0 - self.beta_1, grad))
+            next_v = (
+                tf.multiply(self.beta_2, v) + tf.multiply(1.0 - self.beta_2,
+                                                          tf.square(grad)))
+
+            update = next_m / (tf.sqrt(next_v) + self.epsilon)
+
+            # Just adding the square of the weights to the loss function is *not*
+            # the correct way of using L2 regularization/weight decay with Adam,
+            # since that will interact with the m and v parameters in strange ways.
+            #
+            # Instead we want to decay the weights in a manner that doesn't interact
+            # with the m/v parameters. This is equivalent to adding the square
+            # of the weights to the loss with plain (non-momentum) SGD.
+            if self._do_use_weight_decay(param_name):
+              update += self.weight_decay_rate * param_fp32
+
+            update_with_lr = self.learning_rate * update
+
+            next_param = param_fp32 - update_with_lr
+
+            if has_shadow:
+                # cast shadow fp32 weights to fp16 and assign to trainable variable
+                 param.assign(tf.cast(next_param, param.dtype.base_dtype))
+            assignments.extend(
+                [param_fp32.assign(next_param),
+                 m.assign(next_m),
+                 v.assign(next_v)])
     new_global_step = global_step + 1
     new_global_step = tf.identity(new_global_step, name='step_update')
     assignments.extend([global_step.assign(new_global_step)])
@@ -340,12 +356,17 @@ class LAMBOptimizer(tf.train.Optimizer):
     self.beta_2 = beta_2
     self.epsilon = epsilon
     self.exclude_from_weight_decay = exclude_from_weight_decay
-    self.steps = 0
+    # self.steps = 0
 
   def apply_gradients(self, grads_and_vars, global_step=None, name=None,
       manual_fp16=False):
     """See base class."""
     assignments = []
+    new_global_step = global_step + 1
+    new_global_step = tf.identity(new_global_step, name='step_update')
+    assignments.extend([global_step.assign(new_global_step)])
+    steps = tf.cast(new_global_step, tf.float32)
+
     for (grad, param) in grads_and_vars:
       with tf.name_scope("apply_one_lamb"):
         if grad is None or param is None:
@@ -376,51 +397,68 @@ class LAMBOptimizer(tf.train.Optimizer):
             trainable=False,
             initializer=tf.zeros_initializer())
 
-        # LAMB update
-        next_m = (
-            tf.multiply(self.beta_1, m) + tf.multiply(1.0 - self.beta_1, grad))
-        next_v = (
-            tf.multiply(self.beta_2, v) + tf.multiply(1.0 - self.beta_2,
-                                                      tf.square(grad)))
+        if tf.flags.FLAGS.npu_bert_use_fused_lamb_momentum:
+            with tf.name_scope("npu_bert_use_fused_lamb_momentum"):
+                do_use_weight = self._do_use_weight_decay(param_name)
+                do_use_weight = tf.cast(do_use_weight, tf.float32)
+                update, next_v, next_m = npu_vector_ops.lamb_apply_optimizer_assign(grad, v, m, param_fp32, self.beta_1,
+                                                                                    1.0 - self.beta_1, self.beta_2,
+                                                                                    1.0 - self.beta_2, self.epsilon,
+                                                                                    steps, do_use_weight,
+                                                                                    self.weight_decay_rate)
+                w_norm = linalg_ops.norm(param, ord=2)
+                g_norm = linalg_ops.norm(update, ord=2)
+                next_param = npu_vector_ops.lamb_apply_weight_assign(w_norm, g_norm, self.learning_rate, update, param_fp32)
+                assignments.extend([next_param,])
 
-        self.steps += 1
-        beta1_correction = (1 - self.beta_1 ** self.steps)
-        beta2_correction = (1 - self.beta_2 ** self.steps)
-
-        next_m_unbiased = next_m / beta1_correction
-        next_v_unbiased = next_v / beta2_correction
-
-        update = next_m_unbiased / (tf.sqrt(next_v_unbiased) + self.epsilon)
-
-        # Just adding the square of the weights to the loss function is *not*
-        # the correct way of using L2 regularization/weight decay with Adam,
-        # since that will interact with the m and v parameters in strange ways.
-        #
-        # Instead we want to decay the weights in a manner that doesn't interact
-        # with the m/v parameters. This is equivalent to adding the square
-        # of the weights to the loss with plain (non-momentum) SGD.
-        if self._do_use_weight_decay(param_name):
-          update += self.weight_decay_rate * param_fp32
-
-        w_norm = linalg_ops.norm(param, ord=2)
-        g_norm = linalg_ops.norm(update, ord=2)
-        ratio = array_ops.where(math_ops.greater(w_norm, 0), array_ops.where(
-            math_ops.greater(g_norm, 0), (w_norm / g_norm), 1.0), 1.0)
-
-        update_with_lr = ratio * self.learning_rate * update
-
-        next_param = param_fp32 - update_with_lr
-
-        if has_shadow:
-          # cast shadow fp32 weights to fp16 and assign to trainable variable
-          param.assign(tf.cast(next_param, param.dtype.base_dtype))
-        assignments.extend(
-            [param_fp32.assign(next_param),
-             m.assign(next_m),
-             v.assign(next_v)])
-    new_global_step = global_step + 1
-    new_global_step = tf.identity(new_global_step, name='step_update')
-    assignments.extend([global_step.assign(new_global_step)])
+        else:
+            # LAMB update
+            next_m = (
+                tf.multiply(self.beta_1, m) + tf.multiply(1.0 - self.beta_1, grad))
+            next_v = (
+                tf.multiply(self.beta_2, v) + tf.multiply(1.0 - self.beta_2,
+                                                          tf.square(grad)))
+
+            # self.steps += 1
+            # beta1_correction = (1 - self.beta_1 ** self.steps)
+            # beta2_correction = (1 - self.beta_2 ** self.steps)
+            beta1_correction = (1 - self.beta_1 ** steps)
+            beta2_correction = (1 - self.beta_2 ** steps)
+
+            next_m_unbiased = next_m / beta1_correction
+            next_v_unbiased = next_v / beta2_correction
+
+            update = next_m_unbiased / (tf.sqrt(next_v_unbiased) + self.epsilon)
+
+            # Just adding the square of the weights to the loss function is *not*
+            # the correct way of using L2 regularization/weight decay with Adam,
+            # since that will interact with the m and v parameters in strange ways.
+            #
+            # Instead we want to decay the weights in a manner that doesn't interact
+            # with the m/v parameters. This is equivalent to adding the square
+            # of the weights to the loss with plain (non-momentum) SGD.
+            if self._do_use_weight_decay(param_name):
+              update += self.weight_decay_rate * param_fp32
+
+            w_norm = linalg_ops.norm(param, ord=2)
+            g_norm = linalg_ops.norm(update, ord=2)
+            ratio = array_ops.where(math_ops.greater(w_norm, 0), array_ops.where(
+                math_ops.greater(g_norm, 0), (w_norm / g_norm), 1.0), 1.0)
+
+            update_with_lr = ratio * self.learning_rate * update
+
+            next_param = param_fp32 - update_with_lr
+
+            if has_shadow:
+                # cast shadow fp32 weights to fp16 and assign to trainable variable
+                param.assign(tf.cast(next_param, param.dtype.base_dtype))
+            assignments.extend(
+                [param_fp32.assign(next_param),
+                 m.assign(next_m),
+                 v.assign(next_v)])
+    # new_global_step = global_step + 1
+    # new_global_step = tf.identity(new_global_step, name='step_update')
+    # assignments.extend([global_step.assign(new_global_step)])
     return tf.group(*assignments, name=name)
 
   def _do_use_weight_decay(self, param_name):
diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/run_pretraining.py b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/run_pretraining.py
index 3250f0a6f..40ede5f5a 100644
--- a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/run_pretraining.py
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/run_pretraining.py
@@ -152,7 +152,9 @@ flags.DEFINE_integer("npu_bert_loss_scale", 0, "Whether to use loss scale, -1 is
 
 flags.DEFINE_bool("npu_bert_clip_by_global_norm", False, "Use clip_by_global_norm if True, or use clip_by_norm for each gradient")
 
-flags.DEFINE_bool('npu_bert_npu_dropout', True, 'Whether to use npu defined gelu op')
+flags.DEFINE_bool('npu_bert_npu_dropout', True, 'Whether to use npu defined dropout op')
+
+flags.DEFINE_bool('npu_bert_npu_dropout_v3', True, 'Whether to use npu defined dropout_v3 op')
 
 flags.DEFINE_bool('npu_bert_tail_optimize', False, 'Whether to use npu allreduce tail optimization')
 
@@ -160,6 +162,12 @@ flags.DEFINE_bool('npu_gather', True, 'Whether to use gather_npu whose backward
 
 flags.DEFINE_bool('hcom_parallel', True, 'Whether to use parallel allreduce')
 
+flags.DEFINE_bool('use_fast_gelu', True, 'use fast gelu instead gelu')
+
+flags.DEFINE_bool('npu_bert_use_fused_adam_momentum', True, 'Whether to use fused apply and assign in adam')
+
+flags.DEFINE_bool('npu_bert_use_fused_lamb_momentum', True, 'Whether to use fused apply and assign in lamb')
+
 # report samples/sec, total loss and learning rate during training
 class _LogSessionRunHook(tf.train.SessionRunHook):
   def __init__(self, global_batch_size, num_accumulation_steps, display_every=10, hvd_rank=-1):
diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3067_BertLarge-128_performance_1p.sh b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3067_BertLarge-128_performance_1p.sh
index 343ee0ffc..24659e3dc 100644
--- a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3067_BertLarge-128_performance_1p.sh
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3067_BertLarge-128_performance_1p.sh
@@ -103,7 +103,7 @@ do
     --max_predictions_per_seq=20 \
     --train_batch_size=${batch_size} \
     --learning_rate=1e-4 \
-    --num_warmup_steps=100 \
+    --num_warmup_steps=0 \
     --num_train_steps=${train_steps} \
     --optimizer_type=adam \
     --manual_fp16=True \
diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3068_BertLarge-512_full_1p.sh b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3068_BertLarge-512_full_1p.sh
index 386935e94..bca768465 100644
--- a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3068_BertLarge-512_full_1p.sh
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3068_BertLarge-512_full_1p.sh
@@ -99,13 +99,13 @@ do
     #执行训练脚本，以下传参不需要修改，其他需要模型审视修改
     #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path，--data_dump_flag，--data_dump_step，--data_dump_path，--profiling，--profiling_dump_path，--autotune
     nohup python3.7 $cur_path/../src/run_pretraining.py --bert_config_file=${cur_path}/../configs/bert_large_config.json \
-    --max_seq_length=128 \
-    --max_predictions_per_seq=20 \
+    --max_seq_length=512 \
+    --max_predictions_per_seq=76 \
     --train_batch_size=${batch_size} \
-    --learning_rate=1e-4 \
+    --learning_rate=5e-5 \
     --num_warmup_steps=1000 \
     --num_train_steps=${train_steps} \
-    --optimizer_type=adam \
+    --optimizer_type=lamb \
     --manual_fp16=True \
     --use_fp16_cls=True \
     --input_files_dir=${data_path}/en_wiki_len512 \
diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3068_BertLarge-512_full_8p.sh b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3068_BertLarge-512_full_8p.sh
index 9736babc1..d6e289e65 100644
--- a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3068_BertLarge-512_full_8p.sh
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3068_BertLarge-512_full_8p.sh
@@ -110,13 +110,13 @@ do
         bind_core="taskset -c $a-$c"
     fi
     nohup python3.7 ${cur_path}/../src/run_pretraining.py --bert_config_file=${cur_path}/../configs/bert_large_config.json \
-    --max_seq_length=128 \
-    --max_predictions_per_seq=20 \
+    --max_seq_length=512 \
+    --max_predictions_per_seq=76 \
     --train_batch_size=${batch_size} \
-    --learning_rate=1e-4 \
+    --learning_rate=5e-5 \
     --num_warmup_steps=1000 \
     --num_train_steps=${train_steps} \
-    --optimizer_type=adam \
+    --optimizer_type=lamb \
     --manual_fp16=True \
     --use_fp16_cls=True \
     --input_files_dir=${data_path}/en_wiki_len512 \
diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3068_BertLarge-512_performance_1p.sh b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3068_BertLarge-512_performance_1p.sh
index 234a4d1f8..08292d028 100644
--- a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3068_BertLarge-512_performance_1p.sh
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3068_BertLarge-512_performance_1p.sh
@@ -99,13 +99,13 @@ do
     #执行训练脚本，以下传参不需要修改，其他需要模型审视修改
     #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path，--data_dump_flag，--data_dump_step，--data_dump_path，--profiling，--profiling_dump_path，--autotune
     nohup python3.7 $cur_path/../src/run_pretraining.py --bert_config_file=${cur_path}/../configs/bert_large_config.json \
-    --max_seq_length=128 \
-    --max_predictions_per_seq=20 \
+    --max_seq_length=512 \
+    --max_predictions_per_seq=76 \
     --train_batch_size=${batch_size} \
-    --learning_rate=1e-4 \
+    --learning_rate=5e-5 \
     --num_warmup_steps=0 \
     --num_train_steps=${train_steps} \
-    --optimizer_type=adam \
+    --optimizer_type=lamb \
     --manual_fp16=True \
     --use_fp16_cls=True \
     --input_files_dir=${data_path}/en_wiki_len512 \
diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3068_BertLarge-512_performance_8p.sh b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3068_BertLarge-512_performance_8p.sh
index 6940fac80..543e83acc 100644
--- a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3068_BertLarge-512_performance_8p.sh
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3068_BertLarge-512_performance_8p.sh
@@ -110,13 +110,13 @@ do
         bind_core="taskset -c $a-$c"
     fi
     nohup ${bind_core} python3.7 $cur_path/../src/pretrain/run_pretraining.py --bert_config_file=${cur_path}/../configs/bert_base_config.json \
-    --max_seq_length=128 \
-    --max_predictions_per_seq=20 \
+    --max_seq_length=512 \
+    --max_predictions_per_seq=76 \
     --train_batch_size=${batch_size} \
-    --learning_rate=1e-4 \
-    --num_warmup_steps=100 \
+    --learning_rate=5e-5 \
+    --num_warmup_steps=0 \
     --num_train_steps=${train_steps} \
-    --optimizer_type=adam \
+    --optimizer_type=lamb \
     --manual_fp16=True \
     --use_fp16_cls=True \
     --input_files_dir=${data_path}/en_wiki_len512 \
diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3069_BertBase-512_full_1p.sh b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3069_BertBase-512_full_1p.sh
index 1553eaeca..f92ede8b7 100644
--- a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3069_BertBase-512_full_1p.sh
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3069_BertBase-512_full_1p.sh
@@ -99,13 +99,13 @@ do
     #执行训练脚本，以下传参不需要修改，其他需要模型审视修改
     #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path，--data_dump_flag，--data_dump_step，--data_dump_path，--profiling，--profiling_dump_path，--autotune
     nohup python3.7 $cur_path/../src/run_pretraining.py --bert_config_file=${cur_path}/../configs/bert_base_config.json \
-    --max_seq_length=128 \
-    --max_predictions_per_seq=20 \
+    --max_seq_length=512 \
+    --max_predictions_per_seq=76 \
     --train_batch_size=${batch_size} \
-    --learning_rate=1e-4 \
+    --learning_rate=5e-5 \
     --num_warmup_steps=1000 \
     --num_train_steps=${train_steps} \
-    --optimizer_type=adam \
+    --optimizer_type=lamb \
     --manual_fp16=True \
     --use_fp16_cls=True \
     --input_files_dir=${data_path}/en_wiki_len512 \
diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3069_BertBase-512_full_8p.sh b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3069_BertBase-512_full_8p.sh
index ce65192e1..ab38d1cbb 100644
--- a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3069_BertBase-512_full_8p.sh
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3069_BertBase-512_full_8p.sh
@@ -110,13 +110,13 @@ do
         bind_core="taskset -c $a-$c"
     fi
     nohup python3.7 ${cur_path}/../src/run_pretraining.py --bert_config_file=${cur_path}/../configs/bert_base_config.json \
-    --max_seq_length=128 \
-    --max_predictions_per_seq=20 \
+    --max_seq_length=512 \
+    --max_predictions_per_seq=76 \
     --train_batch_size=${batch_size} \
-    --learning_rate=1e-4 \
+    --learning_rate=5e-5 \
     --num_warmup_steps=1000 \
     --num_train_steps=${train_steps} \
-    --optimizer_type=adam \
+    --optimizer_type=lamb \
     --manual_fp16=True \
     --use_fp16_cls=True \
     --input_files_dir=${data_path}/en_wiki_len512 \
diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3069_BertBase-512_performance_1p.sh b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3069_BertBase-512_performance_1p.sh
index 8d0092bb7..e47baac94 100644
--- a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3069_BertBase-512_performance_1p.sh
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3069_BertBase-512_performance_1p.sh
@@ -99,13 +99,13 @@ do
     #执行训练脚本，以下传参不需要修改，其他需要模型审视修改
     #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path，--data_dump_flag，--data_dump_step，--data_dump_path，--profiling，--profiling_dump_path，--autotune
     nohup python3.7 $cur_path/../src/run_pretraining.py --bert_config_file=${cur_path}/../configs/bert_base_config.json \
-    --max_seq_length=128 \
-    --max_predictions_per_seq=20 \
+    --max_seq_length=512 \
+    --max_predictions_per_seq=76 \
     --train_batch_size=${batch_size} \
-    --learning_rate=1e-4 \
-    --num_warmup_steps=0 \
+    --learning_rate=5e-5 \
+    --num_warmup_steps=100 \
     --num_train_steps=${train_steps} \
-    --optimizer_type=adam \
+    --optimizer_type=lamb \
     --manual_fp16=True \
     --use_fp16_cls=True \
     --input_files_dir=${data_path}/en_wiki_len512 \
diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3069_BertBase-512_performance_8p.sh b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3069_BertBase-512_performance_8p.sh
index 7a8c35f35..9e1197847 100644
--- a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3069_BertBase-512_performance_8p.sh
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3069_BertBase-512_performance_8p.sh
@@ -110,13 +110,13 @@ do
         bind_core="taskset -c $a-$c"
     fi
     nohup ${bind_core} python3.7 $cur_path/../src/run_pretraining.py --bert_config_file=${cur_path}/../configs/bert_base_config.json \
-    --max_seq_length=128 \
-    --max_predictions_per_seq=20 \
+    --max_seq_length=512 \
+    --max_predictions_per_seq=76 \
     --train_batch_size=${batch_size} \
-    --learning_rate=1e-4 \
+    --learning_rate=5e-5 \
     --num_warmup_steps=100 \
     --num_train_steps=${train_steps} \
-    --optimizer_type=adam \
+    --optimizer_type=lamb \
     --manual_fp16=True \
     --use_fp16_cls=True \
     --input_files_dir=${data_path}/en_wiki_len512 \
-- 
Gitee


From ffa4476a499627ff184dfd423f1b8ea27104878d Mon Sep 17 00:00:00 2001
From: hxxhl88 <736544296@qq.com>
Date: Wed, 6 Apr 2022 17:04:26 +0800
Subject: [PATCH 03/11] remove bc command

---
 .../test/train_ID0495_Bert-Squad_full_1p.sh              | 7 +++----
 .../test/train_ID0495_Bert-Squad_full_8p.sh              | 9 ++++-----
 .../test/train_ID0495_Bert-Squad_performance_1p.sh       | 7 +++----
 .../test/train_ID0495_Bert-Squad_performance_8p.sh       | 8 +++-----
 .../test/train_ID3082_BertLarge-Squad_full_1p.sh         | 7 +++----
 .../test/train_ID3082_BertLarge-Squad_full_8p.sh         | 8 +++-----
 .../test/train_ID3082_BertLarge-Squad_performance_1p.sh  | 7 +++----
 .../test/train_ID3082_BertLarge-Squad_performance_8p.sh  | 8 +++-----
 .../test/train_ID0060_BertBase_full_1p.sh                | 2 +-
 .../test/train_ID0060_BertBase_full_8p.sh                | 2 +-
 .../test/train_ID0060_BertBase_performance_1p.sh         | 2 +-
 .../test/train_ID0060_BertBase_performance_8p.sh         | 2 +-
 .../test/train_ID3067_BertLarge-128_full_1p.sh           | 2 +-
 .../test/train_ID3067_BertLarge-128_full_8p.sh           | 2 +-
 .../test/train_ID3067_BertLarge-128_performance_1p.sh    | 2 +-
 .../test/train_ID3067_BertLarge-128_performance_8p.sh    | 2 +-
 .../test/train_ID3068_BertLarge-512_full_1p.sh           | 2 +-
 .../test/train_ID3068_BertLarge-512_full_8p.sh           | 2 +-
 .../test/train_ID3068_BertLarge-512_performance_1p.sh    | 2 +-
 .../test/train_ID3068_BertLarge-512_performance_8p.sh    | 2 +-
 .../test/train_ID3069_BertBase-512_full_1p.sh            | 2 +-
 .../test/train_ID3069_BertBase-512_full_8p.sh            | 2 +-
 .../test/train_ID3069_BertBase-512_performance_1p.sh     | 2 +-
 .../test/train_ID3069_BertBase-512_performance_8p.sh     | 2 +-
 24 files changed, 41 insertions(+), 52 deletions(-)

diff --git a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID0495_Bert-Squad_full_1p.sh b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID0495_Bert-Squad_full_1p.sh
index 9cb721078..0c2f45206 100644
--- a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID0495_Bert-Squad_full_1p.sh
+++ b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID0495_Bert-Squad_full_1p.sh
@@ -133,11 +133,10 @@ DeviceType=`uname -m`
 CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc'
 
 #获取性能数据
-fps=`grep "global_step/sec:" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F 'global_step/sec:' '{print $2}'|awk 'END {print $1}'`
+step_per_sec=`grep "global_step/sec:" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F 'global_step/sec:' '{print $2}'|awk 'END {print $1}'`
 
-ActualFPS=`echo "scale=2;${fps} * ${batch_size}"|bc`
-temp1=`echo "1000 * ${batch_size}"|bc`
-TrainingTime=`echo "scale=2;${temp1} / ${ActualFPS}"|bc`
+ActualFPS=`awk 'BEGIN {printf "%.2f\n", '${step_per_sec}' * '${batch_size}'}'`
+TrainingTime=`awk 'BEGIN {printf "%.2f\n", '1000' * '${batch_size}' / '${ActualFPS}'}'`
 
 ActualLoss=`grep "loss =" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F 'loss =' '{print $2}'|awk 'END {print $1}'|tr -d ,`
 
diff --git a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID0495_Bert-Squad_full_8p.sh b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID0495_Bert-Squad_full_8p.sh
index aad9ee57c..ee8ae74e8 100644
--- a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID0495_Bert-Squad_full_8p.sh
+++ b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID0495_Bert-Squad_full_8p.sh
@@ -135,11 +135,10 @@ DeviceType=`uname -m`
 CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc'
 
 #获取性能数据
-fps=`grep "global_step/sec:" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F 'global_step/sec:' '{print $2}'|awk 'END {print $1}'`
-temp0=`echo "scale=2;${fps} * ${batch_size}"|bc`
-ActualFPS=`echo "scale=2;${temp0} * ${RANK_SIZE}"|bc`
-temp1=`echo "8000 * ${batch_size}"|bc`
-TrainingTime=`echo "scale=2;${temp1} / ${ActualFPS}"|bc`
+step_per_sec=`grep "global_step/sec:" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F 'global_step/sec:' '{print $2}'|awk 'END {print $1}'`
+
+ActualFPS=`awk 'BEGIN {printf "%.2f\n", '${step_per_sec}' * '${batch_size}' * '${RANK_SIZE}'}'`
+TrainingTime=`awk 'BEGIN {printf "%.2f\n", '8000' * '${batch_size}' / '${ActualFPS}'}'`
 
 ActualLoss=`grep "loss =" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F 'loss =' '{print $2}'|awk 'END {print $1}'|tr -d ,`
 
diff --git a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID0495_Bert-Squad_performance_1p.sh b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID0495_Bert-Squad_performance_1p.sh
index ce142ad5e..6cb6aed51 100644
--- a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID0495_Bert-Squad_performance_1p.sh
+++ b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID0495_Bert-Squad_performance_1p.sh
@@ -128,11 +128,10 @@ DeviceType=`uname -m`
 CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf'
 
 #获取性能数据
-fps=`grep "global_step/sec:" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F 'global_step/sec:' '{print $2}'|awk 'END {print $1}'`
+step_per_sec=`grep "global_step/sec:" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F 'global_step/sec:' '{print $2}'|awk 'END {print $1}'`
 
-ActualFPS=`echo "scale=2;${fps} * ${batch_size}"|bc`
-temp1=`echo "1000 * ${batch_size}"|bc`
-TrainingTime=`echo "scale=2;${temp1} / ${ActualFPS}"|bc`
+ActualFPS=`awk 'BEGIN {printf "%.2f\n", '${step_per_sec}' * '${batch_size}'}'`
+TrainingTime=`awk 'BEGIN {printf "%.2f\n", '1000' * '${batch_size}' / '${ActualFPS}'}'`
 
 ActualLoss=`grep "loss =" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F 'loss =' '{print $2}'|awk 'END {print $1}'|tr -d ,`
 
diff --git a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID0495_Bert-Squad_performance_8p.sh b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID0495_Bert-Squad_performance_8p.sh
index ab3954948..7f29f5090 100644
--- a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID0495_Bert-Squad_performance_8p.sh
+++ b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID0495_Bert-Squad_performance_8p.sh
@@ -130,11 +130,9 @@ DeviceType=`uname -m`
 CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf'
 
 #获取性能数据
-fps=`grep "global_step/sec:" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F 'global_step/sec:' '{print $2}'|awk 'END {print $1}'`
-temp0=`echo "scale=2;${fps} * ${batch_size}"|bc`
-ActualFPS=`echo "scale=2;${temp0} * ${RANK_SIZE}"|bc`
-temp1=`echo "8000 * ${batch_size}"|bc`
-TrainingTime=`echo "scale=2;${temp1} / ${ActualFPS}"|bc`
+step_per_sec=`grep "global_step/sec:" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F 'global_step/sec:' '{print $2}'|awk 'END {print $1}'`
+ActualFPS=`awk 'BEGIN {printf "%.2f\n", '${step_per_sec}' * '${batch_size}' * '${RANK_SIZE}'}'`
+TrainingTime=`awk 'BEGIN {printf "%.2f\n", '8000' * '${batch_size}' / '${ActualFPS}'}'`
 
 ActualLoss=`grep "loss =" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F 'loss =' '{print $2}'|awk 'END {print $1}'|tr -d ,`
 
diff --git a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID3082_BertLarge-Squad_full_1p.sh b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID3082_BertLarge-Squad_full_1p.sh
index 7814192ab..a41c04a3c 100644
--- a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID3082_BertLarge-Squad_full_1p.sh
+++ b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID3082_BertLarge-Squad_full_1p.sh
@@ -132,11 +132,10 @@ DeviceType=`uname -m`
 CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc'
 
 #获取性能数据
-fps=`grep "global_step/sec:" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F 'global_step/sec:' '{print $2}'|awk 'END {print $1}'`
+step_per_sec=`grep "global_step/sec:" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F 'global_step/sec:' '{print $2}'|awk 'END {print $1}'`
 
-ActualFPS=`echo "scale=2;${fps} * ${batch_size}"|bc`
-temp1=`echo "1000 * ${batch_size}"|bc`
-TrainingTime=`echo "scale=2;${temp1} / ${ActualFPS}"|bc`
+ActualFPS=`awk 'BEGIN {printf "%.2f\n", '${step_per_sec}' * '${batch_size}'}'`
+TrainingTime=`awk 'BEGIN {printf "%.2f\n", '1000' * '${batch_size}' / '${ActualFPS}'}'`
 
 ActualLoss=`grep "loss =" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F 'loss =' '{print $2}'|awk 'END {print $1}'|tr -d ,`
 
diff --git a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID3082_BertLarge-Squad_full_8p.sh b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID3082_BertLarge-Squad_full_8p.sh
index 86c54483f..943c52c52 100644
--- a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID3082_BertLarge-Squad_full_8p.sh
+++ b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID3082_BertLarge-Squad_full_8p.sh
@@ -134,11 +134,9 @@ DeviceType=`uname -m`
 CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc'
 
 #获取性能数据
-fps=`grep "global_step/sec:" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F 'global_step/sec:' '{print $2}'|awk 'END {print $1}'`
-temp0=`echo "scale=2;${fps} * ${batch_size}"|bc`
-ActualFPS=`echo "scale=2;${temp0} * ${RANK_SIZE}"|bc`
-temp1=`echo "8000 * ${batch_size}"|bc`
-TrainingTime=`echo "scale=2;${temp1} / ${ActualFPS}"|bc`
+step_per_sec=`grep "global_step/sec:" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F 'global_step/sec:' '{print $2}'|awk 'END {print $1}'`
+ActualFPS=`awk 'BEGIN {printf "%.2f\n", '${step_per_sec}' * '${batch_size}' * '${RANK_SIZE}'}'`
+TrainingTime=`awk 'BEGIN {printf "%.2f\n", '8000' * '${batch_size}' / '${ActualFPS}'}'`
 
 ActualLoss=`grep "loss =" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F 'loss =' '{print $2}'|awk 'END {print $1}'|tr -d ,`
 
diff --git a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID3082_BertLarge-Squad_performance_1p.sh b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID3082_BertLarge-Squad_performance_1p.sh
index e04e3c410..f9f076470 100644
--- a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID3082_BertLarge-Squad_performance_1p.sh
+++ b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID3082_BertLarge-Squad_performance_1p.sh
@@ -128,11 +128,10 @@ DeviceType=`uname -m`
 CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf'
 
 #获取性能数据
-fps=`grep "global_step/sec:" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F 'global_step/sec:' '{print $2}'|awk 'END {print $1}'`
+step_per_sec=`grep "global_step/sec:" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F 'global_step/sec:' '{print $2}'|awk 'END {print $1}'`
 
-ActualFPS=`echo "scale=2;${fps} * ${batch_size}"|bc`
-temp1=`echo "1000 * ${batch_size}"|bc`
-TrainingTime=`echo "scale=2;${temp1} / ${ActualFPS}"|bc`
+ActualFPS=`awk 'BEGIN {printf "%.2f\n", '${step_per_sec}' * '${batch_size}'}'`
+TrainingTime=`awk 'BEGIN {printf "%.2f\n", '1000' * '${batch_size}' / '${ActualFPS}'}'`
 
 ActualLoss=`grep "loss =" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F 'loss =' '{print $2}'|awk 'END {print $1}'|tr -d ,`
 
diff --git a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID3082_BertLarge-Squad_performance_8p.sh b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID3082_BertLarge-Squad_performance_8p.sh
index e5efad108..c50df201f 100644
--- a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID3082_BertLarge-Squad_performance_8p.sh
+++ b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID3082_BertLarge-Squad_performance_8p.sh
@@ -129,11 +129,9 @@ DeviceType=`uname -m`
 CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf'
 
 #获取性能数据
-fps=`grep "global_step/sec:" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F 'global_step/sec:' '{print $2}'|awk 'END {print $1}'`
-temp0=`echo "scale=2;${fps} * ${batch_size}"|bc`
-ActualFPS=`echo "scale=2;${temp0} * ${RANK_SIZE}"|bc`
-temp1=`echo "8000 * ${batch_size}"|bc`
-TrainingTime=`echo "scale=2;${temp1} / ${ActualFPS}"|bc`
+step_per_sec=`grep "global_step/sec:" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F 'global_step/sec:' '{print $2}'|awk 'END {print $1}'`
+ActualFPS=`awk 'BEGIN {printf "%.2f\n", '${step_per_sec}' * '${batch_size}' * '${RANK_SIZE}'}'`
+TrainingTime=`awk 'BEGIN {printf "%.2f\n", '8000' * '${batch_size}' / '${ActualFPS}'}'`
 
 ActualLoss=`grep "loss =" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F 'loss =' '{print $2}'|awk 'END {print $1}'|tr -d ,`
 
diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID0060_BertBase_full_1p.sh b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID0060_BertBase_full_1p.sh
index 4cd9fbd51..91919cd22 100644
--- a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID0060_BertBase_full_1p.sh
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID0060_BertBase_full_1p.sh
@@ -134,7 +134,7 @@ e2e_time=$(( $end_time - $start_time ))
 echo "------------------ Final result ------------------"
 #输出性能FPS，需要模型审视修改
 step_per_sec=`cat ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | grep "global_step/sec:" | awk -F "global_step/sec: " '{print $2}' | awk '{sum+=$1} END {print sum/NR}'`
-FPS=`echo "scale=3; $step_per_sec * $batch_size"|bc`
+FPS=`awk 'BEGIN {printf "%.2f\n", '${step_per_sec}' * '${batch_size}'}'`
 #打印，不需要修改
 echo "Final Performance images/sec : $FPS"
 
diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID0060_BertBase_full_8p.sh b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID0060_BertBase_full_8p.sh
index 7af34ce6f..d6bbeba3f 100644
--- a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID0060_BertBase_full_8p.sh
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID0060_BertBase_full_8p.sh
@@ -145,7 +145,7 @@ e2e_time=$(( $end_time - $start_time ))
 echo "------------------ Final result ------------------"
 #输出性能FPS，需要模型审视修改
 step_per_sec=`cat ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | grep "global_step/sec:" | awk -F "global_step/sec: " '{print $2}' | awk '{sum+=$1} END {print sum/NR}'`
-FPS=`echo "scale=3; $step_per_sec * $batch_size * 8"|bc`
+FPS=`awk 'BEGIN {printf "%.2f\n", '${step_per_sec}' * '${batch_size}' * '${RANK_SIZE}'}'`
 #打印，不需要修改
 echo "Final Performance images/sec : $FPS"
 
diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID0060_BertBase_performance_1p.sh b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID0060_BertBase_performance_1p.sh
index 4ced05dec..0eb931a2c 100644
--- a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID0060_BertBase_performance_1p.sh
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID0060_BertBase_performance_1p.sh
@@ -134,7 +134,7 @@ e2e_time=$(( $end_time - $start_time ))
 echo "------------------ Final result ------------------"
 #输出性能FPS，需要模型审视修改
 step_per_sec=`cat ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | grep "global_step/sec:" | awk -F "global_step/sec: " '{print $2}' | awk '{sum+=$1} END {print sum/NR}'`
-FPS=`echo "scale=3; $step_per_sec * $batch_size"|bc`
+FPS=`awk 'BEGIN {printf "%.2f\n", '${step_per_sec}' * '${batch_size}'}'`
 #打印，不需要修改
 echo "Final Performance images/sec : $FPS"
 
diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID0060_BertBase_performance_8p.sh b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID0060_BertBase_performance_8p.sh
index 8da58f498..bb08a352e 100644
--- a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID0060_BertBase_performance_8p.sh
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID0060_BertBase_performance_8p.sh
@@ -145,7 +145,7 @@ e2e_time=$(( $end_time - $start_time ))
 echo "------------------ Final result ------------------"
 #输出性能FPS，需要模型审视修改
 step_per_sec=`cat ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | grep "global_step/sec:" | awk -F "global_step/sec: " '{print $2}' | awk '{sum+=$1} END {print sum/NR}'`
-FPS=`echo "scale=3; $step_per_sec * $batch_size * 8"|bc`
+FPS=`awk 'BEGIN {printf "%.2f\n", '${step_per_sec}' * '${batch_size}' * '${RANK_SIZE}'}'`
 #打印，不需要修改
 echo "Final Performance images/sec : $FPS"
 
diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3067_BertLarge-128_full_1p.sh b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3067_BertLarge-128_full_1p.sh
index 30f574bbc..d4515ba22 100644
--- a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3067_BertLarge-128_full_1p.sh
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3067_BertLarge-128_full_1p.sh
@@ -135,7 +135,7 @@ e2e_time=$(( $end_time - $start_time ))
 echo "------------------ Final result ------------------"
 #输出性能FPS，需要模型审视修改
 ActualFPS=`grep Throughput ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk 'END {print $6}'`
-TrainingTime=`echo "scale=4; ${RANK_SIZE} * ${batch_size} / ${ActualFPS}"|bc`
+TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}' * '${RANK_SIZE}' / '${ActualFPS}'}'`
 #打印，不需要修改
 echo "Final Performance images/sec : $ActualFPS"
 
diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3067_BertLarge-128_full_8p.sh b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3067_BertLarge-128_full_8p.sh
index 1a13b90d1..1899db4c4 100644
--- a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3067_BertLarge-128_full_8p.sh
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3067_BertLarge-128_full_8p.sh
@@ -147,7 +147,7 @@ e2e_time=$(( $end_time - $start_time ))
 echo "------------------ Final result ------------------"
 #输出性能FPS，需要模型审视修改
 ActualFPS=`grep Throughput ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk 'END {print $6}'`
-TrainingTime=`echo "scale=4; ${RANK_SIZE} * ${batch_size} / ${ActualFPS}"|bc`
+TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}' * '${RANK_SIZE}' / '${ActualFPS}'}'`
 #打印，不需要修改
 echo "Final Performance images/sec : $ActualFPS"
 
diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3067_BertLarge-128_performance_1p.sh b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3067_BertLarge-128_performance_1p.sh
index 24659e3dc..4371bcf45 100644
--- a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3067_BertLarge-128_performance_1p.sh
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3067_BertLarge-128_performance_1p.sh
@@ -135,7 +135,7 @@ e2e_time=$(( $end_time - $start_time ))
 echo "------------------ Final result ------------------"
 #输出性能FPS，需要模型审视修改
 ActualFPS=`grep Throughput ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk 'END {print $6}'`
-TrainingTime=`echo "scale=4; ${RANK_SIZE} * ${batch_size} / ${ActualFPS}"|bc`
+TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}' * '${RANK_SIZE}' / '${ActualFPS}'}'`
 #打印，不需要修改
 echo "Final Performance images/sec : $ActualFPS"
 
diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3067_BertLarge-128_performance_8p.sh b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3067_BertLarge-128_performance_8p.sh
index aed3d0c4f..a38566d74 100644
--- a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3067_BertLarge-128_performance_8p.sh
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3067_BertLarge-128_performance_8p.sh
@@ -147,7 +147,7 @@ e2e_time=$(( $end_time - $start_time ))
 echo "------------------ Final result ------------------"
 #输出性能FPS，需要模型审视修改
 ActualFPS=`grep Throughput ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk 'END {print $6}'`
-TrainingTime=`echo "scale=4; ${RANK_SIZE} * ${batch_size} / ${ActualFPS}"|bc`
+TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}' * '${RANK_SIZE}' / '${ActualFPS}'}'`
 #打印，不需要修改
 echo "Final Performance images/sec : $ActualFPS"
 
diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3068_BertLarge-512_full_1p.sh b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3068_BertLarge-512_full_1p.sh
index bca768465..3dc76e13b 100644
--- a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3068_BertLarge-512_full_1p.sh
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3068_BertLarge-512_full_1p.sh
@@ -135,7 +135,7 @@ e2e_time=$(( $end_time - $start_time ))
 echo "------------------ Final result ------------------"
 #输出性能FPS，需要模型审视修改
 ActualFPS=`grep Throughput ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk 'END {print $6}'`
-TrainingTime=`echo "scale=4; ${RANK_SIZE} * ${batch_size} / ${ActualFPS}"|bc`
+TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}' * '${RANK_SIZE}' / '${ActualFPS}'}'`
 #打印，不需要修改
 echo "Final Performance images/sec : $ActualFPS"
 
diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3068_BertLarge-512_full_8p.sh b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3068_BertLarge-512_full_8p.sh
index d6e289e65..fe951416a 100644
--- a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3068_BertLarge-512_full_8p.sh
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3068_BertLarge-512_full_8p.sh
@@ -147,7 +147,7 @@ e2e_time=$(( $end_time - $start_time ))
 echo "------------------ Final result ------------------"
 #输出性能FPS，需要模型审视修改
 ActualFPS=`grep Throughput ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk 'END {print $6}'`
-TrainingTime=`echo "scale=4; ${RANK_SIZE} * ${batch_size} / ${ActualFPS}"|bc`
+TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}' * '${RANK_SIZE}' / '${ActualFPS}'}'`
 #打印，不需要修改
 echo "Final Performance images/sec : $ActualFPS"
 
diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3068_BertLarge-512_performance_1p.sh b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3068_BertLarge-512_performance_1p.sh
index 08292d028..640275d0d 100644
--- a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3068_BertLarge-512_performance_1p.sh
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3068_BertLarge-512_performance_1p.sh
@@ -135,7 +135,7 @@ e2e_time=$(( $end_time - $start_time ))
 echo "------------------ Final result ------------------"
 #输出性能FPS，需要模型审视修改
 ActualFPS=`grep Throughput ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk 'END {print $6}'`
-TrainingTime=`echo "scale=4; ${RANK_SIZE} * ${batch_size} / ${ActualFPS}"|bc`
+TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}' * '${RANK_SIZE}' / '${ActualFPS}'}'`
 #打印，不需要修改
 echo "Final Performance images/sec : $ActualFPS"
 
diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3068_BertLarge-512_performance_8p.sh b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3068_BertLarge-512_performance_8p.sh
index 543e83acc..9c87f8b31 100644
--- a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3068_BertLarge-512_performance_8p.sh
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3068_BertLarge-512_performance_8p.sh
@@ -147,7 +147,7 @@ e2e_time=$(( $end_time - $start_time ))
 echo "------------------ Final result ------------------"
 #输出性能FPS，需要模型审视修改
 ActualFPS=`grep Throughput ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk 'END {print $6}'`
-TrainingTime=`echo "scale=4; ${RANK_SIZE} * ${batch_size} / ${ActualFPS}"|bc`
+TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}' * '${RANK_SIZE}' / '${ActualFPS}'}'`
 #打印，不需要修改
 echo "Final Performance images/sec : $ActualFPS"
 
diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3069_BertBase-512_full_1p.sh b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3069_BertBase-512_full_1p.sh
index f92ede8b7..352bfabaa 100644
--- a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3069_BertBase-512_full_1p.sh
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3069_BertBase-512_full_1p.sh
@@ -134,7 +134,7 @@ e2e_time=$(( $end_time - $start_time ))
 echo "------------------ Final result ------------------"
 #输出性能FPS，需要模型审视修改
 ActualFPS=`grep Throughput ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk 'END {print $6}'`
-TrainingTime=`echo "scale=4; ${RANK_SIZE} * ${batch_size} / ${ActualFPS}"|bc`
+TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}' * '${RANK_SIZE}' / '${ActualFPS}'}'`
 #打印，不需要修改
 echo "Final Performance images/sec : $ActualFPS"
 
diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3069_BertBase-512_full_8p.sh b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3069_BertBase-512_full_8p.sh
index ab38d1cbb..c53adce96 100644
--- a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3069_BertBase-512_full_8p.sh
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3069_BertBase-512_full_8p.sh
@@ -146,7 +146,7 @@ e2e_time=$(( $end_time - $start_time ))
 echo "------------------ Final result ------------------"
 #输出性能FPS，需要模型审视修改
 ActualFPS=`grep Throughput ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk 'END {print $6}'`
-TrainingTime=`echo "scale=4; ${RANK_SIZE} * ${batch_size} / ${ActualFPS}"|bc`
+TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}' * '${RANK_SIZE}' / '${ActualFPS}'}'`
 #打印，不需要修改
 echo "Final Performance images/sec : $ActualFPS"
 
diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3069_BertBase-512_performance_1p.sh b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3069_BertBase-512_performance_1p.sh
index e47baac94..bb1b497c9 100644
--- a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3069_BertBase-512_performance_1p.sh
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3069_BertBase-512_performance_1p.sh
@@ -134,7 +134,7 @@ e2e_time=$(( $end_time - $start_time ))
 echo "------------------ Final result ------------------"
 #输出性能FPS，需要模型审视修改
 ActualFPS=`grep Throughput ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk 'END {print $6}'`
-TrainingTime=`echo "scale=4; ${RANK_SIZE} * ${batch_size} / ${ActualFPS}"|bc`
+TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}' * '${RANK_SIZE}' / '${ActualFPS}'}'`
 #打印，不需要修改
 echo "Final Performance images/sec : $ActualFPS"
 
diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3069_BertBase-512_performance_8p.sh b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3069_BertBase-512_performance_8p.sh
index 9e1197847..431e81aa2 100644
--- a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3069_BertBase-512_performance_8p.sh
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3069_BertBase-512_performance_8p.sh
@@ -146,7 +146,7 @@ e2e_time=$(( $end_time - $start_time ))
 echo "------------------ Final result ------------------"
 #输出性能FPS，需要模型审视修改
 ActualFPS=`grep Throughput ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk 'END {print $6}'`
-TrainingTime=`echo "scale=4; ${RANK_SIZE} * ${batch_size} / ${ActualFPS}"|bc`
+TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}' * '${RANK_SIZE}' / '${ActualFPS}'}'`
 #打印，不需要修改
 echo "Final Performance images/sec : $ActualFPS"
 
-- 
Gitee


From 603684a0a5fea58a24f68bd0a37fe24ee084ce6a Mon Sep 17 00:00:00 2001
From: hxxhl88 <736544296@qq.com>
Date: Thu, 7 Apr 2022 11:20:20 +0800
Subject: [PATCH 04/11] remove env

---
 .../nlp/BertNV_Series_for_TensorFlow/src/run_pretraining.py  | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/run_pretraining.py b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/run_pretraining.py
index 40ede5f5a..5a2060119 100644
--- a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/run_pretraining.py
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/run_pretraining.py
@@ -35,12 +35,7 @@ from npu_bridge.estimator.npu.npu_estimator import *
 from npu_bridge.estimator.npu.npu_config import NPURunConfig
 from npu_bridge.estimator.npu.npu_estimator import NPUEstimator
 
-os.environ['WHICH_OP'] = 'GEOP'
-os.environ['NEW_GE_FE_ID'] = '1'
-os.environ['GE_AICPU_FLAG'] = '1'
 os.environ['GE_USE_STATIC_MEMORY'] = '1'
-os.environ['OPTION_EXEC_HCCL_FLAG'] = '1'
-os.environ['HCCL_CONNECT_TIMEOUT'] = '600'
 
 flags = tf.flags
 
-- 
Gitee


From 86df20877db453c2026d11c2c836179d4b3aaebf Mon Sep 17 00:00:00 2001
From: hxxhl88 <736544296@qq.com>
Date: Thu, 7 Apr 2022 17:29:47 +0800
Subject: [PATCH 05/11] modify rank_id

---
 .../nlp/BertNV_Series_for_TensorFlow/src/run_pretraining.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/run_pretraining.py b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/run_pretraining.py
index 5a2060119..18a6bfda0 100644
--- a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/run_pretraining.py
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/run_pretraining.py
@@ -501,10 +501,8 @@ def input_fn_builder(input_files,
       d = tf.data.Dataset.from_tensor_slices(tf.constant(input_files))
       if FLAGS.distributed: 
         rank_size = int(os.getenv('RANK_SIZE'))
-        rank_id = int(os.getenv('RANK_INDEX'))
-        device_id = int(os.getenv('DEVICE_ID'))
-        local_rank = rank_id * 8 + device_id
-        print('RANK_SIZE=', rank_size, ' RANK_ID=', local_rank)
+        rank_id = int(os.getenv('RANK_ID'))
+        print('RANK_SIZE=', rank_size, ' RANK_ID=', rank_id)
         d = d.shard(rank_size, local_rank)
       d = d.repeat()
       if not FLAGS.npu_bert_debug:
-- 
Gitee


From f7794b89c6896a685134a1674bfd2a8921f6f4f5 Mon Sep 17 00:00:00 2001
From: hxxhl88 <736544296@qq.com>
Date: Thu, 7 Apr 2022 18:23:40 +0800
Subject: [PATCH 06/11] modify for local rank

---
 .../nlp/BertNV_Series_for_TensorFlow/src/run_pretraining.py     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/run_pretraining.py b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/run_pretraining.py
index 18a6bfda0..69a2ada9d 100644
--- a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/run_pretraining.py
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/run_pretraining.py
@@ -503,7 +503,7 @@ def input_fn_builder(input_files,
         rank_size = int(os.getenv('RANK_SIZE'))
         rank_id = int(os.getenv('RANK_ID'))
         print('RANK_SIZE=', rank_size, ' RANK_ID=', rank_id)
-        d = d.shard(rank_size, local_rank)
+        d = d.shard(rank_size, rank_id)
       d = d.repeat()
       if not FLAGS.npu_bert_debug:
         d = d.shuffle(buffer_size=len(input_files))
-- 
Gitee


From e5be28cae24b850e428ab1912afabe67887bb72a Mon Sep 17 00:00:00 2001
From: hxxhl88 <736544296@qq.com>
Date: Thu, 7 Apr 2022 18:37:37 +0800
Subject: [PATCH 07/11] modify for graph node

---
 .../src/run_pretraining.py                          | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/run_pretraining.py b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/run_pretraining.py
index 69a2ada9d..fdb3eda48 100644
--- a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/run_pretraining.py
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/src/run_pretraining.py
@@ -180,10 +180,11 @@ class _LogSessionRunHook(tf.train.SessionRunHook):
     self.t0 = time.time()
     if self.num_accumulation_steps <= 1:
         if (tf.flags.FLAGS.npu_bert_loss_scale == 0) and (FLAGS.manual_fp16 or FLAGS.use_fp16):
+            # 1p --> isFinite = 'apply_grads/All:0'
             return tf.train.SessionRunArgs(
                 fetches=['global_step:0', 'total_loss:0',
                          'learning_rate:0', 'nsp_loss:0',
-                         'mlm_loss:0', 'loss_scale:0', 'apply_grads/All:0'])
+                         'mlm_loss:0', 'loss_scale:0'])
         else:
             return tf.train.SessionRunArgs(
                 fetches=['global_step:0', 'total_loss:0',
@@ -204,7 +205,7 @@ class _LogSessionRunHook(tf.train.SessionRunHook):
     self.elapsed_secs += time.time() - self.t0
     if self.num_accumulation_steps <=1:
         if (tf.flags.FLAGS.npu_bert_loss_scale == 0) and (FLAGS.manual_fp16 or FLAGS.use_fp16):
-            global_step, total_loss, lr, nsp_loss, mlm_loss, loss_scaler, custom_arg = run_values.results
+            global_step, total_loss, lr, nsp_loss, mlm_loss, loss_scaler = run_values.results
         else:
             global_step, total_loss, lr, nsp_loss, mlm_loss = run_values.results
         update_step = True
@@ -223,15 +224,15 @@ class _LogSessionRunHook(tf.train.SessionRunHook):
         avg_loss_step = self.avg_loss / self.all_count
         if self.hvd_rank >= 0:
           if (tf.flags.FLAGS.npu_bert_loss_scale == 0) and (FLAGS.manual_fp16 or FLAGS.use_fp16):
-            print('Rank = %2d :: Step = %6i Throughput = %11.1f MLM Loss = %10.4e NSP Loss = %10.4e Loss = %9.6f Average Loss = %9.6f LR = %6.4e Loss scale = %6.4e isFinite = %2i' %
-                  (self.hvd_rank, print_step, sent_per_sec, mlm_loss, nsp_loss, total_loss, avg_loss_step, lr, loss_scaler, custom_arg), flush=True)
+            print('Rank = %2d :: Step = %6i Throughput = %11.1f MLM Loss = %10.4e NSP Loss = %10.4e Loss = %9.6f Average Loss = %9.6f LR = %6.4e Loss scale = %6.4e' %
+                  (self.hvd_rank, print_step, sent_per_sec, mlm_loss, nsp_loss, total_loss, avg_loss_step, lr, loss_scaler), flush=True)
           else:
             print('Rank = %2d :: Step = %6i Throughput = %11.1f MLM Loss = %10.4e NSP Loss = %10.4e Loss = %9.6f Average Loss = %9.6f LR = %6.4e' %
                   (self.hvd_rank, print_step, sent_per_sec, mlm_loss, nsp_loss, total_loss, avg_loss_step, lr), flush=True)
         else:
           if (tf.flags.FLAGS.npu_bert_loss_scale == 0) and (FLAGS.manual_fp16 or FLAGS.use_fp16):
-            print('Step = %6i Throughput = %11.1f MLM Loss = %10.4e NSP Loss = %10.4e Loss = %9.6f Average Loss = %9.6f LR = %6.4e Loss scale = %6.4e isFinite = %2i' %
-                  (print_step, sent_per_sec, mlm_loss, nsp_loss, total_loss, avg_loss_step, lr, loss_scaler, custom_arg), flush=True)
+            print('Step = %6i Throughput = %11.1f MLM Loss = %10.4e NSP Loss = %10.4e Loss = %9.6f Average Loss = %9.6f LR = %6.4e Loss scale = %6.4e' %
+                  (print_step, sent_per_sec, mlm_loss, nsp_loss, total_loss, avg_loss_step, lr, loss_scaler), flush=True)
           else:
             print('Step = %6i Throughput = %11.1f MLM Loss = %10.4e NSP Loss = %10.4e Loss = %9.6f Average Loss = %9.6f LR = %6.4e' %
                   (print_step, sent_per_sec, mlm_loss, nsp_loss, total_loss, avg_loss_step, lr), flush=True)
-- 
Gitee


From b1cf850baca39a10613dafcc8eba1115de14bf2f Mon Sep 17 00:00:00 2001
From: hxxhl88 <736544296@qq.com>
Date: Thu, 7 Apr 2022 19:21:37 +0800
Subject: [PATCH 08/11] modify for shell

---
 .../test/train_ID3068_BertLarge-512_performance_8p.sh           | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3068_BertLarge-512_performance_8p.sh b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3068_BertLarge-512_performance_8p.sh
index 9c87f8b31..1c0ea9603 100644
--- a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3068_BertLarge-512_performance_8p.sh
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3068_BertLarge-512_performance_8p.sh
@@ -109,7 +109,7 @@ do
     if [ "x${bind_core}" != x ];then
         bind_core="taskset -c $a-$c"
     fi
-    nohup ${bind_core} python3.7 $cur_path/../src/pretrain/run_pretraining.py --bert_config_file=${cur_path}/../configs/bert_base_config.json \
+    nohup ${bind_core} python3.7 $cur_path/../src/run_pretraining.py --bert_config_file=${cur_path}/../configs/bert_base_config.json \
     --max_seq_length=512 \
     --max_predictions_per_seq=76 \
     --train_batch_size=${batch_size} \
-- 
Gitee


From c95e89d8670182531d783961c077756de78dd8ee Mon Sep 17 00:00:00 2001
From: hxxhl88 <736544296@qq.com>
Date: Thu, 7 Apr 2022 19:56:16 +0800
Subject: [PATCH 09/11] modify for shell

---
 .../test/train_ID3068_BertLarge-512_performance_8p.sh           | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3068_BertLarge-512_performance_8p.sh b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3068_BertLarge-512_performance_8p.sh
index 1c0ea9603..4ac5f39a8 100644
--- a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3068_BertLarge-512_performance_8p.sh
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/test/train_ID3068_BertLarge-512_performance_8p.sh
@@ -109,7 +109,7 @@ do
     if [ "x${bind_core}" != x ];then
         bind_core="taskset -c $a-$c"
     fi
-    nohup ${bind_core} python3.7 $cur_path/../src/run_pretraining.py --bert_config_file=${cur_path}/../configs/bert_base_config.json \
+    nohup ${bind_core} python3.7 $cur_path/../src/run_pretraining.py --bert_config_file=${cur_path}/../configs/bert_large_config.json \
     --max_seq_length=512 \
     --max_predictions_per_seq=76 \
     --train_batch_size=${batch_size} \
-- 
Gitee


From 853d4e220ccd09d5b09b41e01f1e57f487f66edf Mon Sep 17 00:00:00 2001
From: hxxhl88 <736544296@qq.com>
Date: Fri, 8 Apr 2022 11:35:53 +0800
Subject: [PATCH 10/11] modify for small dataset

---
 .../test/train_ID0495_Bert-Squad_performance_1p.sh              | 2 +-
 .../test/train_ID0495_Bert-Squad_performance_8p.sh              | 2 +-
 .../test/train_ID3082_BertLarge-Squad_performance_1p.sh         | 2 +-
 .../test/train_ID3082_BertLarge-Squad_performance_8p.sh         | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID0495_Bert-Squad_performance_1p.sh b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID0495_Bert-Squad_performance_1p.sh
index 6cb6aed51..b11e96d6a 100644
--- a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID0495_Bert-Squad_performance_1p.sh
+++ b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID0495_Bert-Squad_performance_1p.sh
@@ -75,7 +75,7 @@ fi
 vocab_file=${data_path}/model/vocab.txt
 bert_config_file=${data_path}/model/bert_config.json
 init_checkpoint=${data_path}/model/bert_model.ckpt
-train_file=${data_path}/dataset/train-v1.1.json
+train_file=${data_path}/dataset/train-v1.1_small.json
 predict_file=${data_path}/dataset/dev-v1.1.json
 
 #训练开始时间，不需要修改
diff --git a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID0495_Bert-Squad_performance_8p.sh b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID0495_Bert-Squad_performance_8p.sh
index 7f29f5090..55f784e24 100644
--- a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID0495_Bert-Squad_performance_8p.sh
+++ b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID0495_Bert-Squad_performance_8p.sh
@@ -76,7 +76,7 @@ fi
 vocab_file=${data_path}/model/vocab.txt
 bert_config_file=${data_path}/model/bert_config.json
 init_checkpoint=${data_path}/model/bert_model.ckpt
-train_file=${data_path}/dataset/train-v1.1.json
+train_file=${data_path}/dataset/train-v1.1_small.json
 predict_file=${data_path}/dataset/dev-v1.1.json
 
 #训练开始时间，不需要修改
diff --git a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID3082_BertLarge-Squad_performance_1p.sh b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID3082_BertLarge-Squad_performance_1p.sh
index f9f076470..ef213866b 100644
--- a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID3082_BertLarge-Squad_performance_1p.sh
+++ b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID3082_BertLarge-Squad_performance_1p.sh
@@ -75,7 +75,7 @@ fi
 vocab_file=${data_path}/uncased_L-24_H-1024_A-16/vocab.txt
 bert_config_file=${data_path}/uncased_L-24_H-1024_A-16/bert_config.json
 init_checkpoint=${data_path}/uncased_L-24_H-1024_A-16/bert_model.ckpt
-train_file=${data_path}/dataset/train-v1.1.json
+train_file=${data_path}/dataset/train-v1.1_small.json
 predict_file=${data_path}/dataset/dev-v1.1.json
 
 #训练开始时间，不需要修改
diff --git a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID3082_BertLarge-Squad_performance_8p.sh b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID3082_BertLarge-Squad_performance_8p.sh
index c50df201f..28c3e9bf5 100644
--- a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID3082_BertLarge-Squad_performance_8p.sh
+++ b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID3082_BertLarge-Squad_performance_8p.sh
@@ -75,7 +75,7 @@ fi
 vocab_file=${data_path}/uncased_L-24_H-1024_A-16/vocab.txt
 bert_config_file=${data_path}/uncased_L-24_H-1024_A-16/bert_config.json
 init_checkpoint=${data_path}/uncased_L-24_H-1024_A-16/bert_model.ckpt
-train_file=${data_path}/dataset/train-v1.1.json
+train_file=${data_path}/dataset/train-v1.1_small.json
 predict_file=${data_path}/dataset/dev-v1.1.json
 
 #训练开始时间，不需要修改
-- 
Gitee


From e5c0165f8c11e4f380afd02abac6c29f48a8d4ff Mon Sep 17 00:00:00 2001
From: hxxhl88 <736544296@qq.com>
Date: Fri, 8 Apr 2022 14:27:26 +0800
Subject: [PATCH 11/11] modify for readme

---
 .../README.md                                 | 37 +++----
 .../BertNV_Series_for_TensorFlow/README.md    | 96 ++++---------------
 2 files changed, 41 insertions(+), 92 deletions(-)

diff --git a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/README.md b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/README.md
index 8dac5fc50..bcd449fd2 100644
--- a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/README.md
+++ b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/README.md
@@ -140,28 +140,28 @@ BERT是一种与训练语言表示的方法，这意味着我们在大型文本
 
      [Ascend 910训练平台环境变量设置](https://gitee.com/ascend/modelzoo/wikis/Ascend%20910%E8%AE%AD%E7%BB%83%E5%B9%B3%E5%8F%B0%E7%8E%AF%E5%A2%83%E5%8F%98%E9%87%8F%E8%AE%BE%E7%BD%AE?sort_id=3148819)
 
-  将环境变量配置到scripts/run_*.sh中
+  将环境变量配置到test/train_*.sh中
 
 - 单卡训练 
 
   启动单卡训练
 
-  修改scripts/run_1p.sh中的BERT_BASE_DIR和SQUAD_DIR，BERT_BASE_DIR为预处理模型路径，SQUAD_DIR为squad v1.1数据集的路径
+  修改test/train_ID0495_Bert-Squad_performance_1p.sh中的data_path为squad v1.1数据集以及预训练模型的路径，然后执行：
   
   ```
-  cd scripts
-  bash run_1p.sh
+  cd test
+  bash train_ID0495_Bert-Squad_performance_1p.sh
   ```
   
 - 8卡训练
 
   启动8卡训练
 
-  修改scripts/run_8p.sh中的BERT_BASE_DIR和SQUAD_DIR，BERT_BASE_DIR为预处理模型路径，SQUAD_DIR为squad v1.1数据集的路径
+  修改test/train_ID0495_Bert-Squad_performance_1p.sh中的data_path为squad v1.1数据集以及预训练模型的路径，然后执行：
   
   ```
-  cd scripts
-  bash run_8p.sh
+  cd test
+  bash train_ID0495_Bert-Squad_performance_8p.sh
   ```
   
   
@@ -172,14 +172,17 @@ BERT是一种与训练语言表示的方法，这意味着我们在大型文本
 
 ```
 └─Bertsquad_for_TensorFlow
-    ├─scripts
-    |     ├─8p.json
-    |     ├─docker_start.sh
-    |     ├─run_1p.sh
-    |     ├─run_8p.sh
-    |     ├─test.sh
-    |     ├─train_1p.sh
-    |     └─train_8p.sh
+    ├─configs
+    |     ├─rank_table_8p.json
+    ├─test
+    |     ├─train_ID0495_Bert-Squad_performance_1p.sh
+    |     ├─train_ID0495_Bert-Squad_performance_8p.sh
+    |     ├─train_ID0495_Bert-Squad_full_1p.sh
+    |     ├─train_ID0495_Bert-Squad_full_8p.sh
+    |     ├─train_ID3082_BertLarge-Squad_performance_1p.sh
+    |     └─train_ID3082_BertLarge-Squad_performance_8p.sh
+    |     ├─train_ID3082_BertLarge-Squad_full_1p.sh
+    |     └─train_ID3082_BertLarge-Squad_full_8p.sh
     ├─utils
     |   ├─create_glue_data.py
     |   ├─create_pretraining_data.py
@@ -229,14 +232,14 @@ python3 run_squad.py \
 
 1.  通过“模型训练”中的训练指令启动单卡训练和8卡训练。
 
-2.  训练日志及结果见scripts/result/1p/train_*.log。
+2.  训练日志及结果见test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log。
 
 
 ## 推理/验证过程<a name="section1465595372416"></a>
 
 ```
-python3 evaluate-v1.1.py dataset/dev-v1.1.json .scripts/result/1p/0/output/predictions.json
+python3 evaluate-v1.1.py dataset/dev-v1.1.json .test/output/${ASCEND_DEVICE_ID}/predictions.json
 
 ##predict result for 1p:
 {"exact_match": 79.87701040681173, "f1": 87.42429097480438}
diff --git a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/README.md b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/README.md
index 16c43a5cc..4fa93d301 100644
--- a/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/README.md
+++ b/TensorFlow/built-in/nlp/BertNV_Series_for_TensorFlow/README.md
@@ -157,7 +157,7 @@
 运行如下命令，将数据集转换为tfrecord格式。
 
 ```
-      python src/pretrain/create_pretraining_data.py \   
+      python src/create_pretraining_data.py \   
       --input_file=<path to your testdata> \   
       --output_file=<tfrecord dir>/some_output_data.tfrecord \   
       --vocab_file=<path to vocab.txt> \   
@@ -170,96 +170,41 @@
 ```
 
 - 模型训练
-- 启动训练之前，首先要配置程序运行相关环境变量。环境变量配置信息参见：
+- 启动训练之前，首先要配置程序运行相关环境变量。
 
-[Ascend 910训练平台环境变量设置](https://gitee.com/ascend/modelzoo/wikis/Ascend%20910%E8%AE%AD%E7%BB%83%E5%B9%B3%E5%8F%B0%E7%8E%AF%E5%A2%83%E5%8F%98%E9%87%8F%E8%AE%BE%E7%BD%AE?sort_id=3148819)
+  环境变量配置信息参见：
 
-- 脚本修改:
-  版本Atlas Data Center SolutionV100R020C30之前的版本:
-
-  修改src/pretrain/run_pretraining.py文件，将apply_grads/overflow_status_reduce_all改为apply_grads/All
+     [Ascend 910训练平台环境变量设置](https://gitee.com/ascend/modelzoo/wikis/Ascend%20910%E8%AE%AD%E7%BB%83%E5%B9%B3%E5%8F%B0%E7%8E%AF%E5%A2%83%E5%8F%98%E9%87%8F%E8%AE%BE%E7%BD%AE?sort_id=3148819)
 
+  将环境变量配置到test/train_*.sh中
 
  - 单卡训练
    
-    1. 在`scripts`路径下的`run_pretraining.sh`中配置参数，确保 `--input_files_dir` 和 `--eval_files_dir` 配置为用户数据集具体路径，如下：
-       
-```
-        --input_files_dir=/autotest/CI_daily/ModelZoo_BertBase_TF/data/wikipedia_128 \      #训练数据集路径
-        --eval_files_dir=/autotest/CI_daily/ModelZoo_BertBase_TF/data/wikipedia_128 \       #验证数据集路径
-```
-
-      2. 单卡训练指令，在ModelZoo_BertBase_TF目录下，执行如下命令：
-            
-            bash scripts/run_pretraining.sh
-
-   
+    1. 将test/train_ID0060_BertBase_performance_1p.sh的data_path配置为用户数据集具体路径
+    
+    2. 单卡训练指令，在test目录下，执行如下命令：
+        ``` 
+        bash train_ID0060_BertBase_performance_1p.sh
+        ```
 
 
 - 8卡训练
-    1. 在`scripts`路径下的`train_8p.sh`中配置参数，确保 `--input_files_dir` 和 `--eval_files_dir` 配置为用户数据集具体路径，如下：
-        ```
-         --input_files_dir=/autotest/CI_daily/ModelZoo_BertBase_TF/data/wikipedia_128 \      #训练数据集路径
-         --eval_files_dir=/autotest/CI_daily/ModelZoo_BertBase_TF/data/wikipedia_128 \       #验证数据集路径  
-        ```
-    2. 8卡训练指令，在ModelZoo_BertBase_TF目录下，执行如下命令： 
+    1. 修改test/train_ID0060_BertBase_performance_8p.sh中的data_path配置为用户数据集具体路径
 
+    2. 8卡训练指令，在test目录下，执行如下命令：
         ```
-        bash scripts/run_8p.sh
+        bash train_ID0060_BertBase_performance_8p.sh
         ```
 
-- 注意说明：当前Bert-base网络的下游任务（src/downstream）暂未调测
-
 <h2 id="高级参考.md">高级参考</h2>
 
     脚本和示例代码
     ├── configs  
-    │    ├──BERT_base_64p_poc.json              //8*8p rank table配置文件
-    │    ├──nezha_large_config.json               //NEZHA large模型配置文件
-    │    ├──nezha_large_vocab.txt                 //NEZHA large中文词表
-    ├── scripts
-    │    ├──npu_set_env.sh                         //集群配置
-    │    ├──run_downstream_classifier.sh           //运行下游任务分类器
-    │    ├──run_downstream_ner.sh                  //运行下游任务序列标注
-    │    ├──run_downstream_reading.sh              //运行下游任务阅读理解
-    │    ├──run_pretraining.sh                     //单卡预训练脚本
-    │    ├──run_8p.sh                              //8卡预训练入口脚本
-    │    ├──train_8p.sh                            //8卡预训练脚本  
-    ├── src/downstream
-    │    ├──gpu_environment.py                     //原始gpu_environment设置
-    │    ├──metrics_impl.py                       //适配NPU后的metrics_impl.py
-    │    ├──modeling.py                           //NEZHA模型脚本
-    │    ├──optimization.py                       //优化器脚本
-    │    ├──reading_evaluate.py                   //阅读理解评价脚本
-    │    ├──run_classifier.py                     //下游任务分类脚本
-    │    ├──run_ner.py                           //下游任务序列标注脚本
-    │    ├──run_reading.py                         //下游任务阅读理解脚本
-    │    ├──tf_metrics.py                        //tf metrics脚本
-    │    ├──tokenization.py                      //分词器脚本
-    ├── src/pretrain
-    │    ├──gpu_environment.py                     //原始gpu_environment设置
-    │    ├──create_pretraining_data.py            //生成与训练数据脚本
-    │    ├──modeling.py                           //NEZHA模型脚本
-    │    ├──optimization.py                       //优化器脚本
-    │    ├──extract_features.py                   //特征抽取脚本
-    │    ├──fp16_utils.py                       //fp16 utils脚本
-    │    ├──fused_layer_norm.py                     //layer norm融合脚本
-    │    ├──run_pretraining.py                    //预训练启动脚本
-    │    ├──tf_metrics.py                        //tf metrics脚本
-    │    ├──tokenization.py                      //分词器脚本
-    │    ├──utils.py                            //utils脚本├── CONTRIBUTING.md                             //CONTRIBUTING.md
-    ├── src/downstream
-    │    ├──gpu_environment.py                     //原始gpu_environment设置
-    │    ├──metrics_impl.py                       //适配NPU后的metrics_impl.py
-    │    ├──modeling.py                           //NEZHA模型脚本
-    │    ├──optimization.py                       //优化器脚本
-    │    ├──reading_evaluate.py                   //阅读理解评价脚本
-    │    ├──run_classifier.py                     //下游任务分类脚本
-    │    ├──run_ner.py                           //下游任务序列标注脚本
-    │    ├──run_reading.py                         //下游任务阅读理解脚本
-    │    ├──tf_metrics.py                        //tf metrics脚本
-    │    ├──tokenization.py                      //分词器脚本
-    ├── src/pretrain
+    │    ├──8p.json              //8p rank table配置文件
+    │    ├──bert_base_config.json                //bert large模型配置文件
+    │    ├──bert_large_config.json               //bert base模型配置文件
+    │    ├──bert_base_vocab.txt                 //bert base中文词表
+    ├── src
     │    ├──gpu_environment.py                     //原始gpu_environment设置
     │    ├──create_pretraining_data.py            //生成与训练数据脚本
     │    ├──modeling.py                           //NEZHA模型脚本
@@ -273,7 +218,8 @@
     │    ├──utils.py                            //utils脚本
     ├── CONTRIBUTING.md                             //CONTRIBUTING.md
     ├── LICENCE                                   //LICENCE
-    ├── NOTICE                                   //NOTICE├── README.md                                 //说明文档
+    ├── NOTICE                                   //NOTICE
+    ├── README.md                                 //说明文档
 
 
 ## 脚本参数<a name="section6669162441511"></a>
-- 
Gitee