diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/.github/ISSUE_TEMPLATE/00-official-bug-report-issue.md b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/.github/ISSUE_TEMPLATE/00-official-bug-report-issue.md deleted file mode 100644 index 51e08c26db66114de0b604bf0cc5c461311a0b4f..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/.github/ISSUE_TEMPLATE/00-official-bug-report-issue.md +++ /dev/null @@ -1,59 +0,0 @@ ---- -name: "[Official Model] Bug Report" -about: Use this template for reporting a bug for the “official” directory -labels: type:bug,models:official - ---- - -# Prerequisites - -Please answer the following questions for yourself before submitting an issue. - -- [ ] I am using the latest TensorFlow Model Garden release and TensorFlow 2. -- [ ] I am reporting the issue to the correct repository. (Model Garden official or research directory) -- [ ] I checked to make sure that this issue has not been filed already. - -## 1. The entire URL of the file you are using - -https://github.com/tensorflow/models/tree/master/official/... - -## 2. Describe the bug - -A clear and concise description of what the bug is. - -## 3. Steps to reproduce - -Steps to reproduce the behavior. - -## 4. Expected behavior - -A clear and concise description of what you expected to happen. - -## 5. Additional context - -Include any logs that would be helpful to diagnose the problem. - -## 6. System information - -- OS Platform and Distribution (e.g., Linux Ubuntu 16.04): -- Mobile device name if the issue happens on a mobile device: -- TensorFlow installed from (source or binary): -- TensorFlow version (use command below): -- Python version: -- Bazel version (if compiling from source): -- GCC/Compiler version (if compiling from source): -- CUDA/cuDNN version: -- GPU model and memory: - - diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/.github/ISSUE_TEMPLATE/10-official-documentation-issue.md b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/.github/ISSUE_TEMPLATE/10-official-documentation-issue.md deleted file mode 100644 index 00d79a16916c327d2d8a729791db7d7d3d96b735..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/.github/ISSUE_TEMPLATE/10-official-documentation-issue.md +++ /dev/null @@ -1,20 +0,0 @@ ---- -name: "[Official Model] Documentation Issue" -about: Use this template for reporting a documentation issue for the “official” directory -labels: type:docs,models:official - ---- - -# Prerequisites - -Please answer the following question for yourself before submitting an issue. - -- [ ] I checked to make sure that this issue has not been filed already. - -## 1. The entire URL of the documentation with the issue - -https://github.com/tensorflow/models/tree/master/official/... - -## 2. Describe the issue - -A clear and concise description of what needs to be changed. diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/.github/ISSUE_TEMPLATE/20-official-feature-request-issue.md b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/.github/ISSUE_TEMPLATE/20-official-feature-request-issue.md deleted file mode 100644 index 02d8cab52218202707646345a4ab2570519660dd..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/.github/ISSUE_TEMPLATE/20-official-feature-request-issue.md +++ /dev/null @@ -1,26 +0,0 @@ ---- -name: "[Official Model] Feature request" -about: Use this template for raising a feature request for the “official” directory -labels: type:feature,models:official - ---- - -# Prerequisites - -Please answer the following question for yourself before submitting an issue. - -- [ ] I checked to make sure that this feature has not been requested already. - -## 1. The entire URL of the file you are using - -https://github.com/tensorflow/models/tree/master/official/... - -## 2. Describe the feature you request - -A clear and concise description of what you want to happen. - -## 3. Additional context - -Add any other context about the feature request here. - -## 4. Are you willing to contribute it? (Yes or No) diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/.github/ISSUE_TEMPLATE/30-research-bug-report-issue.md b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/.github/ISSUE_TEMPLATE/30-research-bug-report-issue.md deleted file mode 100644 index 4448ed9e40d6a089b84881635c2ee0f53524ae61..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/.github/ISSUE_TEMPLATE/30-research-bug-report-issue.md +++ /dev/null @@ -1,58 +0,0 @@ ---- -name: "[Research Model] Bug Report" -about: Use this template for reporting a bug for the “research” directory -labels: type:bug,models:research - ---- -# Prerequisites - -Please answer the following questions for yourself before submitting an issue. - -- [ ] I am using the latest TensorFlow Model Garden release and TensorFlow 2. -- [ ] I am reporting the issue to the correct repository. (Model Garden official or research directory) -- [ ] I checked to make sure that this issue has not already been filed. - -## 1. The entire URL of the file you are using - -https://github.com/tensorflow/models/tree/master/research/... - -## 2. Describe the bug - -A clear and concise description of what the bug is. - -## 3. Steps to reproduce - -Steps to reproduce the behavior. - -## 4. Expected behavior - -A clear and concise description of what you expected to happen. - -## 5. Additional context - -Include any logs that would be helpful to diagnose the problem. - -## 6. System information - -- OS Platform and Distribution (e.g., Linux Ubuntu 16.04): -- Mobile device name if the issue happens on a mobile device: -- TensorFlow installed from (source or binary): -- TensorFlow version (use command below): -- Python version: -- Bazel version (if compiling from source): -- GCC/Compiler version (if compiling from source): -- CUDA/cuDNN version: -- GPU model and memory: - - diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/.github/ISSUE_TEMPLATE/40-research-documentation-issue.md b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/.github/ISSUE_TEMPLATE/40-research-documentation-issue.md deleted file mode 100644 index 26adfd83e1fbe27d045ecd8dfccef91bbd27fcf1..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/.github/ISSUE_TEMPLATE/40-research-documentation-issue.md +++ /dev/null @@ -1,20 +0,0 @@ ---- -name: "[Research Model] Documentation Issue" -about: Use this template for reporting a documentation issue for the “research” directory -labels: type:docs,models:research - ---- - -# Prerequisites - -Please answer the following question for yourself before submitting an issue. - -- [ ] I checked to make sure that this issue has not been filed already. - -## 1. The entire URL of the documentation with the issue - -https://github.com/tensorflow/models/tree/master/research/... - -## 2. Describe the issue - -A clear and concise description of what needs to be changed. diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/.github/ISSUE_TEMPLATE/50-research-feature-request-issue.md b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/.github/ISSUE_TEMPLATE/50-research-feature-request-issue.md deleted file mode 100644 index 412942a31be9cc4c2935dcd38ecb059a8a4ec18c..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/.github/ISSUE_TEMPLATE/50-research-feature-request-issue.md +++ /dev/null @@ -1,26 +0,0 @@ ---- -name: "[Research Model] Feature Request" -about: Use this template for raising a feature request for the “research” directory -labels: type:feature,models:research - ---- - -# Prerequisites - -Please answer the following question for yourself before submitting an issue. - -- [ ] I checked to make sure that this feature has not been requested already. - -## 1. The entire URL of the file you are using - -https://github.com/tensorflow/models/tree/master/research/... - -## 2. Describe the feature you request - -A clear and concise description of what you want to happen. - -## 3. Additional context - -Add any other context about the feature request here. - -## 4. Are you willing to contribute it? (Yes or No) diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/.github/ISSUE_TEMPLATE/60-questions-help-issue.md b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/.github/ISSUE_TEMPLATE/60-questions-help-issue.md deleted file mode 100644 index bc85e0bb019fd2d5960b822c18358f906d5264b7..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/.github/ISSUE_TEMPLATE/60-questions-help-issue.md +++ /dev/null @@ -1,14 +0,0 @@ ---- -name: Questions and Help -about: Use this template for Questions and Help. -labels: type:support - ---- - diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/.github/ISSUE_TEMPLATE/config.yml b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/.github/ISSUE_TEMPLATE/config.yml deleted file mode 100644 index 3ba13e0cec6cbbfd462e9ebf529dd2093148cd69..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/.github/ISSUE_TEMPLATE/config.yml +++ /dev/null @@ -1 +0,0 @@ -blank_issues_enabled: false diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/.github/PULL_REQUEST_TEMPLATE.md b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/.github/PULL_REQUEST_TEMPLATE.md deleted file mode 100644 index 379b31c57c118a174d4e787e03099288957f9fe2..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/.github/PULL_REQUEST_TEMPLATE.md +++ /dev/null @@ -1,41 +0,0 @@ -# Description - -> :memo: Please include a summary of the change. -> -> * Please also include relevant motivation and context. -> * List any dependencies that are required for this change. - -## Type of change - -For a new feature or function, please create an issue first to discuss it -with us before submitting a pull request. - -Note: Please delete options that are not relevant. - -- [ ] Bug fix (non-breaking change which fixes an issue) -- [ ] Documentation update -- [ ] TensorFlow 2 migration -- [ ] New feature (non-breaking change which adds functionality) -- [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected) -- [ ] A new research paper code implementation -- [ ] Other (Specify) - -## Tests - -> :memo: Please describe the tests that you ran to verify your changes. -> -> * Provide instructions so we can reproduce. -> * Please also list any relevant details for your test configuration. - -**Test Configuration**: - -## Checklist - -- [ ] I have signed the [Contributor License Agreement](https://github.com/tensorflow/models/wiki/Contributor-License-Agreements). -- [ ] I have read [guidelines for pull request](https://github.com/tensorflow/models/wiki/Submitting-a-pull-request). -- [ ] My code follows the [coding guidelines](https://github.com/tensorflow/models/wiki/Coding-guidelines). -- [ ] I have performed a self [code review](https://github.com/tensorflow/models/wiki/Code-review) of my own code. -- [ ] I have commented my code, particularly in hard-to-understand areas. -- [ ] I have made corresponding changes to the documentation. -- [ ] My changes generate no new warnings. -- [ ] I have added tests that prove my fix is effective or that my feature works. diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/.github/README_TEMPLATE.md b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/.github/README_TEMPLATE.md deleted file mode 100644 index 43dba40f59684df0f79faa341c8de67916313210..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/.github/README_TEMPLATE.md +++ /dev/null @@ -1,124 +0,0 @@ -> :memo: A README.md template for releasing a paper code implementation to a GitHub repository. -> -> * Template version: 1.0.2020.170 -> * Please modify sections depending on needs. - -# Model name, Paper title, or Project Name - -> :memo: Add a badge for the ArXiv identifier of your paper (arXiv:YYMM.NNNNN) - -[![Paper](http://img.shields.io/badge/Paper-arXiv.YYMM.NNNNN-B3181B?logo=arXiv)](https://arxiv.org/abs/...) - -This repository is the official or unofficial implementation of the following paper. - -* Paper title: [Paper Title](https://arxiv.org/abs/YYMM.NNNNN) - -## Description - -> :memo: Provide description of the model. -> -> * Provide brief information of the algorithms used. -> * Provide links for demos, blog posts, etc. - -## History - -> :memo: Provide a changelog. - -## Authors or Maintainers - -> :memo: Provide maintainer information. - -* Full name ([@GitHub username](https://github.com/username)) -* Full name ([@GitHub username](https://github.com/username)) - -## Table of Contents - -> :memo: Provide a table of contents to help readers navigate a lengthy README document. - -## Requirements - -[![TensorFlow 2.1](https://img.shields.io/badge/TensorFlow-2.1-FF6F00?logo=tensorflow)](https://github.com/tensorflow/tensorflow/releases/tag/v2.1.0) -[![Python 3.6](https://img.shields.io/badge/Python-3.6-3776AB)](https://www.python.org/downloads/release/python-360/) - -> :memo: Provide details of the software required. -> -> * Add a `requirements.txt` file to the root directory for installing the necessary dependencies. -> * Describe how to install requirements using pip. -> * Alternatively, create INSTALL.md. - -To install requirements: - -```setup -pip install -r requirements.txt -``` - -## Results - -[![TensorFlow Hub](https://img.shields.io/badge/TF%20Hub-Models-FF6F00?logo=tensorflow)](https://tfhub.dev/...) - -> :memo: Provide a table with results. (e.g., accuracy, latency) -> -> * Provide links to the pre-trained models (checkpoint, SavedModel files). -> * Publish TensorFlow SavedModel files on TensorFlow Hub (tfhub.dev) if possible. -> * Add links to [TensorBoard.dev](https://tensorboard.dev/) for visualizing metrics. -> -> An example table for image classification results -> -> ### Image Classification -> -> | Model name | Download | Top 1 Accuracy | Top 5 Accuracy | -> |------------|----------|----------------|----------------| -> | Model name | [Checkpoint](https://drive.google.com/...), [SavedModel](https://tfhub.dev/...) | xx% | xx% | - -## Dataset - -> :memo: Provide information of the dataset used. - -## Training - -> :memo: Provide training information. -> -> * Provide details for preprocessing, hyperparameters, random seeds, and environment. -> * Provide a command line example for training. - -Please run this command line for training. - -```shell -python3 ... -``` - -## Evaluation - -> :memo: Provide an evaluation script with details of how to reproduce results. -> -> * Describe data preprocessing / postprocessing steps. -> * Provide a command line example for evaluation. - -Please run this command line for evaluation. - -```shell -python3 ... -``` - -## References - -> :memo: Provide links to references. - -## License - -[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) - -> :memo: Place your license text in a file named LICENSE in the root of the repository. -> -> * Include information about your license. -> * Reference: [Adding a license to a repository](https://help.github.com/en/github/building-a-strong-community/adding-a-license-to-a-repository) - -This project is licensed under the terms of the **Apache License 2.0**. - -## Citation - -> :memo: Make your repository citable. -> -> * Reference: [Making Your Code Citable](https://guides.github.com/activities/citable-code/) - -If you want to cite this repository in your research paper, please use the following information. diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/.github/bot_config.yml b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/.github/bot_config.yml deleted file mode 100644 index 952afc316e78d823f865ef651981fda1dde32097..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/.github/bot_config.yml +++ /dev/null @@ -1,24 +0,0 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -# -# THIS IS A GENERATED DOCKERFILE. -# -# This file was assembled from multiple pieces, whose use is documented -# throughout. Please refer to the TensorFlow dockerfiles documentation -# for more information. - -# A list of assignees -assignees: - - saikumarchalla diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/.github/scripts/pylint.sh b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/.github/scripts/pylint.sh deleted file mode 100644 index bb2ebebd8a87199a2138ef513cfd930af5b822bf..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/.github/scripts/pylint.sh +++ /dev/null @@ -1,178 +0,0 @@ -#!/bin/bash -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -# -# Pylint wrapper extracted from main TensorFlow, sharing same exceptions. -# Specify --incremental to only check files touched since last commit on master, -# otherwise will recursively check current directory (full repo takes long!). - -set -euo pipefail - -# Download latest configs from main TensorFlow repo. -wget -q -O /tmp/pylintrc https://raw.githubusercontent.com/tensorflow/tensorflow/master/tensorflow/tools/ci_build/pylintrc - -SCRIPT_DIR=/tmp - -num_cpus() { - # Get the number of CPUs - if [[ -f /proc/cpuinfo ]]; then - N_CPUS=$(grep -c ^processor /proc/cpuinfo) - else - # Fallback method - N_CPUS=`getconf _NPROCESSORS_ONLN` - fi - if [[ -z ${N_CPUS} ]]; then - die "ERROR: Unable to determine the number of CPUs" - fi - - echo ${N_CPUS} -} - -get_changed_files_in_last_non_merge_git_commit() { - git diff --name-only $(git merge-base master $(git branch --show-current)) -} - -# List Python files changed in the last non-merge git commit that still exist, -# i.e., not removed. -# Usage: get_py_files_to_check [--incremental] -get_py_files_to_check() { - if [[ "$1" == "--incremental" ]]; then - CHANGED_PY_FILES=$(get_changed_files_in_last_non_merge_git_commit | \ - grep '.*\.py$') - - # Do not include files removed in the last non-merge commit. - PY_FILES="" - for PY_FILE in ${CHANGED_PY_FILES}; do - if [[ -f "${PY_FILE}" ]]; then - PY_FILES="${PY_FILES} ${PY_FILE}" - fi - done - - echo "${PY_FILES}" - else - find . -name '*.py' - fi -} - -do_pylint() { - if [[ $# == 1 ]] && [[ "$1" == "--incremental" ]]; then - PYTHON_SRC_FILES=$(get_py_files_to_check --incremental) - - if [[ -z "${PYTHON_SRC_FILES}" ]]; then - echo "do_pylint will NOT run due to --incremental flag and due to the "\ -"absence of Python code changes in the last commit." - return 0 - fi - elif [[ $# != 0 ]]; then - echo "Invalid syntax for invoking do_pylint" - echo "Usage: do_pylint [--incremental]" - return 1 - else - PYTHON_SRC_FILES=$(get_py_files_to_check) - fi - - # Something happened. TF no longer has Python code if this branch is taken - if [[ -z ${PYTHON_SRC_FILES} ]]; then - echo "do_pylint found no Python files to check. Returning." - return 0 - fi - - # Now that we know we have to do work, check if `pylint` is installed - PYLINT_BIN="python3.8 -m pylint" - - echo "" - echo "check whether pylint is available or not." - echo "" - ${PYLINT_BIN} --version - if [[ $? -eq 0 ]] - then - echo "" - echo "pylint available, proceeding with pylint sanity check." - echo "" - else - echo "" - echo "pylint not available." - echo "" - return 1 - fi - - # Configure pylint using the following file - PYLINTRC_FILE="${SCRIPT_DIR}/pylintrc" - - if [[ ! -f "${PYLINTRC_FILE}" ]]; then - die "ERROR: Cannot find pylint rc file at ${PYLINTRC_FILE}" - fi - - # Run pylint in parallel, after some disk setup - NUM_SRC_FILES=$(echo ${PYTHON_SRC_FILES} | wc -w) - NUM_CPUS=$(num_cpus) - - echo "Running pylint on ${NUM_SRC_FILES} files with ${NUM_CPUS} "\ -"parallel jobs..." - echo "" - - PYLINT_START_TIME=$(date +'%s') - OUTPUT_FILE="$(mktemp)_pylint_output.log" - ERRORS_FILE="$(mktemp)_pylint_errors.log" - - rm -rf ${OUTPUT_FILE} - rm -rf ${ERRORS_FILE} - - set +e - # When running, filter to only contain the error code lines. Removes module - # header, removes lines of context that show up from some lines. - # Also, don't redirect stderr as this would hide pylint fatal errors. - ${PYLINT_BIN} --rcfile="${PYLINTRC_FILE}" --output-format=parseable \ - --jobs=${NUM_CPUS} ${PYTHON_SRC_FILES} | grep '\[[CEFW]' > ${OUTPUT_FILE} - PYLINT_END_TIME=$(date +'%s') - - echo "" - echo "pylint took $((PYLINT_END_TIME - PYLINT_START_TIME)) s" - echo "" - - # Report only what we care about - # Ref https://pylint.readthedocs.io/en/latest/technical_reference/features.html - # E: all errors - # W0311 bad-indentation - # W0312 mixed-indentation - # C0330 bad-continuation - # C0301 line-too-long - # C0326 bad-whitespace - # W0611 unused-import - # W0622 redefined-builtin - grep -E '(\[E|\[W0311|\[W0312|\[C0330|\[C0301|\[C0326|\[W0611|\[W0622)' ${OUTPUT_FILE} > ${ERRORS_FILE} - - # Determine counts of errors - N_FORBID_ERRORS=$(wc -l ${ERRORS_FILE} | cut -d' ' -f1) - set -e - - # Now, print the errors we should fix - echo "" - if [[ ${N_FORBID_ERRORS} != 0 ]]; then - echo "Found ${N_FORBID_ERRORS} pylint errors:" - cat ${ERRORS_FILE} - fi - - echo "" - if [[ ${N_FORBID_ERRORS} != 0 ]]; then - echo "FAIL: Found ${N_FORBID_ERRORS} errors" - return 1 - else - echo "PASS: Found no errors" - fi -} - -do_pylint "$@" - diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/.github/stale.yml b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/.github/stale.yml deleted file mode 100644 index 7eef5309ecdf53125eb976f90c3b62f1a31a55d4..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/.github/stale.yml +++ /dev/null @@ -1,39 +0,0 @@ - # Copyright 2019 The TensorFlow Authors. All Rights Reserved. - # - # Licensed under the Apache License, Version 2.0 (the "License"); - # you may not use this file except in compliance with the License. - # You may obtain a copy of the License at - # - # http://www.apache.org/licenses/LICENSE-2.0 - # - # Unless required by applicable law or agreed to in writing, software - # distributed under the License is distributed on an "AS IS" BASIS, - # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - # See the License for the specific language governing permissions and - # limitations under the License. - # ============================================================================ - # - # THIS IS A GENERATED DOCKERFILE. - # - # This file was assembled from multiple pieces, whose use is documented - # throughout. Please refer to the TensorFlow dockerfiles documentation - # for more information. - -# Number of days of inactivity before an Issue or Pull Request becomes stale -daysUntilStale: 7 -# Number of days of inactivity before a stale Issue or Pull Request is closed -daysUntilClose: 7 -# Only issues or pull requests with all of these labels are checked if stale. Defaults to `[]` (disabled) -onlyLabels: - - stat:awaiting response -# Comment to post when marking as stale. Set to `false` to disable -markComment: > - This issue has been automatically marked as stale because it has not had - recent activity. It will be closed if no further activity occurs. Thank you. -# Comment to post when removing the stale label. Set to `false` to disable -unmarkComment: false -closeComment: > - Closing as stale. Please reopen if you'd like to work on this further. -limitPerRun: 30 -# Limit to only `issues` or `pulls` -only: issues diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/.github/workflows/ci.yml b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/.github/workflows/ci.yml deleted file mode 100644 index 744f440b053ddb5391a827b5406ddb9ad94eccef..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/.github/workflows/ci.yml +++ /dev/null @@ -1,29 +0,0 @@ -name: CI -on: pull_request - -jobs: - pylint: - runs-on: ubuntu-latest - - steps: - - name: Set up Python 3.8 - uses: actions/setup-python@v2 - with: - python-version: 3.8 - - - name: Install pylint 2.4.4 - run: | - python -m pip install --upgrade pip - pip install pylint==2.4.4 - - - name: Checkout code - uses: actions/checkout@v2 - with: - ref: ${{ github.event.pull_request.head.sha }} - fetch-depth: 0 - - - name: Fetch master for diff - run: git fetch origin master:master - - - name: Run pylint script - run: bash ./.github/scripts/pylint.sh --incremental diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/.gitignore b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/.gitignore deleted file mode 100644 index cbc8846d64152b8a933f4bd2727877a94f98f92a..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/.gitignore +++ /dev/null @@ -1,98 +0,0 @@ -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -env/ -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -*.egg-info/ -.installed.cfg -*.egg - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*,cover -.hypothesis/ - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -target/ - -# IPython Notebook -.ipynb_checkpoints - -# pyenv -.python-version - -# mypy -.mypy_cache - -# celery beat schedule file -celerybeat-schedule - -# dotenv -.env - -# virtualenv -venv/ -ENV/ - -# Spyder project settings -.spyderproject - -# Rope project settings -.ropeproject - -# PyCharm -.idea/ - -# For mac -.DS_Store diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/AUTHORS b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/AUTHORS deleted file mode 100644 index 0fa85c98ffeb38c6d6d0ef2bddb790b75b90f3dc..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/AUTHORS +++ /dev/null @@ -1,10 +0,0 @@ -# This is the official list of authors for copyright purposes. -# This file is distinct from the CONTRIBUTORS files. -# See the latter for an explanation. - -# Names should be added to this file as: -# Name or Organization -# The email address is not required for organizations. - -Google Inc. -David Dao diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/CODEOWNERS b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/CODEOWNERS deleted file mode 100644 index 9dd84ad290b1f3d4c071a73c51eca5dd5af448dd..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/CODEOWNERS +++ /dev/null @@ -1,27 +0,0 @@ -* @tensorflow/tf-garden-team @tensorflow/tf-model-garden-team -/official/ @rachellj218 @saberkun @jaeyounkim -/official/nlp/ @saberkun @lehougoogle @rachellj218 @jaeyounkim -/official/vision/ @xianzhidu @yeqingli @arashwan @saberkun @rachellj218 @jaeyounkim -/official/vision/beta/projects/assemblenet/ @mryoo -/official/vision/beta/projects/deepmac_maskrcnn/ @vighneshbirodkar -/official/vision/beta/projects/movinet/ @hyperparticle @yuanliangzhe @yeqingli -/official/vision/beta/projects/simclr/ @luotigerlsx @chentingpc @saxenasaurabh -/research/adversarial_text/ @rsepassi @a-dai -/research/attention_ocr/ @xavigibert -/research/audioset/ @plakal @dpwe -/research/autoaugment/ @barretzoph -/research/cognitive_planning/ @s-gupta -/research/cvt_text/ @clarkkev @lmthang -/research/deep_speech/ @yhliang2018 -/research/deeplab/ @aquariusjay @yknzhu -/research/delf/ @andrefaraujo -/research/efficient-hrl/ @ofirnachum -/research/lfads/ @jazcollins @sussillo -/research/lstm_object_detection/ @yinxiaoli @yongzhe2160 -/research/marco/ @vincentvanhoucke -/research/object_detection/ @jch1 @tombstone @pkulzc -/research/pcl_rl/ @ofirnachum -/research/rebar/ @gjtucker -/research/seq_flow_lite/ @thunderfyc -/research/slim/ @sguada @marksandler2 -/research/vid2depth/ @rezama diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/CONTRIBUTING.md b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/CONTRIBUTING.md deleted file mode 100644 index f909461ae7b9c75264e0915ecb37228314933e4a..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/CONTRIBUTING.md +++ /dev/null @@ -1,10 +0,0 @@ -# How to contribute - -![Contributors](https://img.shields.io/github/contributors/tensorflow/models) - -We encourage you to contribute to the TensorFlow Model Garden. - -Please read our [guidelines](../../wiki/How-to-contribute) for details. - -**NOTE**: Only [code owners](./CODEOWNERS) are allowed to merge a pull request. -Please contact the code owners of each model to merge your pull request. diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/ISSUES.md b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/ISSUES.md deleted file mode 100644 index b23d6daa1654188d640beb67e6614bd0743f919f..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/ISSUES.md +++ /dev/null @@ -1,24 +0,0 @@ -# If you open a GitHub issue, here is our policy. - -* It must be a **bug**, a **feature request**, or a significant problem -with **documentation**. - * Please send a pull request instead for small documentation fixes. -* The required form must be filled out. -* The issue should be related to the repository it is created in. - -General help and support should be sought on [Stack Overflow](https://stackoverflow.com/questions/tagged/tensorflow-model-garden) or other non-GitHub channels. - -[![](https://img.shields.io/stackexchange/stackoverflow/t/tensorflow-model-garden)](https://stackoverflow.com/questions/tagged/tensorflow-model-garden) - -TensorFlow developers respond to issues. -We want to focus on work that benefits the whole community such as fixing bugs -and adding new features. -It helps us to address bugs and feature requests in a timely manner. - ---- - -Please understand that research models in the [research directory](https://github.com/tensorflow/models/tree/master/research) -included in this repository are experimental and research-style code. -They are not officially supported by the TensorFlow team. - - diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/LICENSE b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/LICENSE deleted file mode 100644 index 43fcf7bf1f1f9f824a1debf05d6ced45bf5810aa..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/LICENSE +++ /dev/null @@ -1,203 +0,0 @@ -Copyright 2016 The TensorFlow Authors. All rights reserved. - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2016, The Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/README.md b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/README.md deleted file mode 100644 index 7f4a29fb9123c7edc2d627c10fa6c7b0f21f652e..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/README.md +++ /dev/null @@ -1,217 +0,0 @@ -- [基本信息](#基本信息.md) -- [概述](#概述.md) -- [训练环境准备](#训练环境准备.md) -- [快速上手](#快速上手.md) -- [迁移学习指导](#迁移学习指导.md) -- [高级参考](#高级参考.md) -

基本信息

- -**发布者(Publisher):Huawei** - -**应用领域(Application Domain):Natural Language Processing** - -**版本(Version):1.1** - -**修改时间(Modified) :2022.6.11** - -**大小(Size):44KB** - -**框架(Framework):TensorFlow_2.6.2** - -**模型格式(Model Format):ckpt** - -**精度(Precision):Mixed** - -**处理器(Processor):昇腾910** - -**应用级别(Categories):Official** - -**描述(Description):基于TensorFlow框架的BertLarge自然语言处理网络训练代码** - -

概述

- -## 简述 - -BERT是一种预训练语言表示方法,是第一种用于预训练NLP的无监督、深度双向系统。这里我们介绍了BERT的Fine-tuning任务,通过对BERT进行微调,在squad数据集上进行预测和问答。 -- 参考论文: - - [https://arxiv.org/abs/1810.04805](https://gitee.com/link?target=https%3A%2F%2Farxiv.org%2Fabs%2F1810.04805) - -- 参考实现: - - https://github.com/tensorflow/models/tree/r2.6.0/official/nlp/bert - -- 适配昇腾 AI 处理器的实现: - - https://gitee.com/ascend/ModelZoo-TensorFlow/tree/master/TensorFlow2/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X - -- 通过Git获取对应commit\_id的代码方法如下: - - git clone {repository_url} # 克隆仓库的代码 - cd {repository_name} # 切换到模型的代码仓目录 - git checkout {branch} # 切换到对应分支 - git reset --hard {commit_id} # 代码设置到对应的commit_id - cd {code_path} # 切换到模型代码所在路径,若仓库下只有该模型,则无需切换 - - -## 默认配置 - -- 网络结构 - - 24-layer, 1024-hidden, 16-heads, 340M parameters -- 训练超参(单卡): - - Batch size: 24 - - max_predictions_per_seq: 76 - - max_seq_length: 384 - - Learning rate(LR): 8e-5 - - Weight decay: 0.01 - - Train epoch: 2 - - -## 支持特性 - -| 特性列表 | 是否支持 | -| ---------- | -------- | -| 分布式训练 | 否 | -| 混合精度 | 是 | -| 数据并行 | 是 | - - -## 混合精度训练 - -昇腾910 AI处理器提供自动混合精度功能,可以针对全网中float32数据类型的算子,按照内置的优化策略,自动将部分float32的算子降低精度到float16,从而在精度损失很小的情况下提升系统性能并减少内存使用。 - -## 开启混合精度 - -拉起脚本中,传入--precision_mode='allow_mix_precision' - -``` - ./train_performance_squad1.1_large_1p.sh --help - -parameter explain: - --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision) - --over_dump if or not over detection, default is False - --data_dump_flag data dump flag, default is False - --data_dump_step data dump step, default is 10 - --profiling if or not profiling for performance debug, default is False - --data_path source data of training - -h/--help show help message -``` - -相关代码示例: - -``` -flags.DEFINE_string(name='precision_mode', default= 'allow_fp32_to_fp16', - help='allow_fp32_to_fp16/force_fp16/ ' - 'must_keep_origin_dtype/allow_mix_precision.') - -npu_device.global_options().precision_mode=FLAGS.precision_mode -``` - -

训练环境准备

- -- 硬件环境和运行环境准备请参见《[CANN软件安装指南](https://support.huawei.com/enterprise/zh/ascend-computing/cann-pid-251168373?category=installation-update)》 -- 运行以下命令安装依赖。 -``` -pip3 install requirements.txt -``` -说明:依赖配置文件requirements.txt文件位于模型的根目录 - -

快速上手

- -## 数据集准备 - -1、用户自行准备好数据集,本网络只包括Bert的Fine tuning任务 - -2、使用的数据集是SQuAD 1.1 和 SQuAD 2.0 - -3、Bert训练的模型及数据集可以参考"简述 -> 参考实现" - - - -## 模型训练 - -- 单击“立即下载”,并选择合适的下载方式下载源码包。 -- 开始训练。 - - 1. 启动训练之前,首先要配置程序运行相关环境变量。 - - 环境变量配置信息参见: - - [Ascend 910训练平台环境变量设置](https://gitee.com/ascend/ModelZoo-TensorFlow/wikis/01.%E8%AE%AD%E7%BB%83%E8%84%9A%E6%9C%AC%E8%BF%81%E7%A7%BB%E6%A1%88%E4%BE%8B/Ascend%20910%E8%AE%AD%E7%BB%83%E5%B9%B3%E5%8F%B0%E7%8E%AF%E5%A2%83%E5%8F%98%E9%87%8F%E8%AE%BE%E7%BD%AE) - - 2. 单卡训练 - - 2.1 Fine tuning单卡任务训练指令(脚本位于BERT_ID2478_for_TensorFlow2.X/test/train_performance_squad1.1_large_1p.sh),需要先使用cd命令进入test目录下,再使用下面的命令启动训练。请确保下面例子中的“--data_path”修改为用户的数据路径,这里选择将数据文件夹放在home目录下。 - - - SQuAD1.1 - - ``` - # bertbase模型 - bash train_performance_squad1.1_base_1p.sh --data_path=/home - - # bertlarge模型 - bash train_performance_squad1.1_large_1p.sh --data_path=/home - ``` - - - - - SQuAD2.0 - - ``` - # bertbase模型 - bash train_performance_squad2.0_base_1p.sh --data_path=/home - - # bertlarge模型 - bash train_performance_squad2.0_large_1p.sh --data_path=/home - ``` - - - - -

高级参考

- -## 脚本和示例代码 - -``` -|--LICENSE -|--README.md #说明文档 -|--model_training_utils.py -|--squad_evaluate_v2_0.py -|--squad_evaluate_v1_1.py -|--run_squad.py #训练代码 -|--requirements.txt #所需依赖 -|--utils.py -|--test #训练脚本目录 -| |--train_full_squad1.1_large_1p.sh #全量训练脚本 -| |--train_performance_squad1.1_large_1p.sh #performance训练脚本 -``` - -## 脚本参数 - -``` - --data_path # the path to train data ---epochs # epochs of training ---input_meta_data_path ---train_data_path ---predict_file ---vocab_file ---ckpt_save_path # directory to ckpt ---batch_size # batch size for 1p ---log_steps # log frequency ---precision_mode # the path to save over dump data ---over_dump # if or not over detection, default is False ---data_dump_flag # data dump flag, default is False ---data_dump_step # data dump step, default is 10 ---profiling # if or not profiling for performance debug, default is False ---profiling_dump_path # the path to save profiling data ---over_dump_path # the path to save over dump data ---data_dump_path # the path to save dump data ---use_mixlist # use_mixlist flag, default is False ---fusion_off_flag # fusion_off flag, default is False ---mixlist_file # mixlist file name, default is ops_info.json ---fusion_off_file # fusion_off file name, default is fusion_switch.cfg -``` - -## 训练过程 - -通过“模型训练”中的训练指令启动单卡或者多卡训练。单卡和多卡通过运行不同脚本,支持单卡,8卡网络训练。模型存储路径为${cur_path}/output/$ASCEND_DEVICE_ID,包括训练的log以及checkpoints文件。以8卡训练为例,loss信息在文件${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log中。 \ No newline at end of file diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/README_ORI.md b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/README_ORI.md deleted file mode 100644 index 9e4a1641386f0f8d0ce0de9d6ef59e23a35475ab..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/README_ORI.md +++ /dev/null @@ -1,25 +0,0 @@ -![Logo](https://storage.googleapis.com/tf_model_garden/tf_model_garden_logo.png) - -# Welcome to the Model Garden for TensorFlow - -The TensorFlow Model Garden is a repository with a number of different implementations of state-of-the-art (SOTA) models and modeling solutions for TensorFlow users. We aim to demonstrate the best practices for modeling so that TensorFlow users -can take full advantage of TensorFlow for their research and product development. - -| Directory | Description | -|-----------|-------------| -| [official](official) | • A collection of example implementations for SOTA models using the latest TensorFlow 2's high-level APIs
• Officially maintained, supported, and kept up to date with the latest TensorFlow 2 APIs by TensorFlow
• Reasonably optimized for fast performance while still being easy to read | -| [research](research) | • A collection of research model implementations in TensorFlow 1 or 2 by researchers
• Maintained and supported by researchers | -| [community](community) | • A curated list of the GitHub repositories with machine learning models and implementations powered by TensorFlow 2 | -| [orbit](orbit) | • A flexible and lightweight library that users can easily use or fork when writing customized training loop code in TensorFlow 2.x. It seamlessly integrates with `tf.distribute` and supports running on different device types (CPU, GPU, and TPU). | - -## [Announcements](https://github.com/tensorflow/models/wiki/Announcements) - -## Contributions - -[![help wanted:paper implementation](https://img.shields.io/github/issues/tensorflow/models/help%20wanted%3Apaper%20implementation)](https://github.com/tensorflow/models/labels/help%20wanted%3Apaper%20implementation) - -If you want to contribute, please review the [contribution guidelines](https://github.com/tensorflow/models/wiki/How-to-contribute). - -## License - -[Apache License 2.0](LICENSE) diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/community/README.md b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/community/README.md deleted file mode 100644 index b3f2bac74897d440d7e16efadcae45b8b5e46249..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/community/README.md +++ /dev/null @@ -1,60 +0,0 @@ -![Logo](https://storage.googleapis.com/tf_model_garden/tf_model_garden_logo.png) - -# TensorFlow Community Models - -This repository provides a curated list of the GitHub repositories with machine learning models and implementations powered by TensorFlow 2. - -**Note**: Contributing companies or individuals are responsible for maintaining their repositories. - -## Computer Vision - -### Image Recognition - -| Model | Paper | Features | Maintainer | -|-------|-------|----------|------------| -| [DenseNet 169](https://github.com/IntelAI/models/tree/master/benchmarks/image_recognition/tensorflow/densenet169) | [Densely Connected Convolutional Networks](https://arxiv.org/pdf/1608.06993) | • FP32 Inference | [Intel](https://github.com/IntelAI) | -| [Inception V3](https://github.com/IntelAI/models/tree/master/benchmarks/image_recognition/tensorflow/inceptionv3) | [Rethinking the Inception Architecture
for Computer Vision](https://arxiv.org/pdf/1512.00567.pdf) | • Int8 Inference
• FP32 Inference | [Intel](https://github.com/IntelAI) | -| [Inception V4](https://github.com/IntelAI/models/tree/master/benchmarks/image_recognition/tensorflow/inceptionv4) | [Inception-v4, Inception-ResNet and the Impact
of Residual Connections on Learning](https://arxiv.org/pdf/1602.07261) | • Int8 Inference
• FP32 Inference | [Intel](https://github.com/IntelAI) | -| [MobileNet V1](https://github.com/IntelAI/models/tree/master/benchmarks/image_recognition/tensorflow/mobilenet_v1) | [MobileNets: Efficient Convolutional Neural Networks
for Mobile Vision Applications](https://arxiv.org/pdf/1704.04861) | • Int8 Inference
• FP32 Inference | [Intel](https://github.com/IntelAI) | -| [ResNet 101](https://github.com/IntelAI/models/tree/master/benchmarks/image_recognition/tensorflow/resnet101) | [Deep Residual Learning for Image Recognition](https://arxiv.org/pdf/1512.03385) | • Int8 Inference
• FP32 Inference | [Intel](https://github.com/IntelAI) | -| [ResNet 50](https://github.com/IntelAI/models/tree/master/benchmarks/image_recognition/tensorflow/resnet50) | [Deep Residual Learning for Image Recognition](https://arxiv.org/pdf/1512.03385) | • Int8 Inference
• FP32 Inference | [Intel](https://github.com/IntelAI) | -| [ResNet 50v1.5](https://github.com/IntelAI/models/tree/master/benchmarks/image_recognition/tensorflow/resnet50v1_5) | [Deep Residual Learning for Image Recognition](https://arxiv.org/pdf/1512.03385) | • Int8 Inference
• FP32 Inference
• FP32 Training | [Intel](https://github.com/IntelAI) | -| [EfficientNet](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow2/Classification/ConvNets/efficientnet) | [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks](https://arxiv.org/pdf/1905.11946.pdf) | • Automatic mixed precision
• Horovod Multi-GPU training (NCCL)
• Multi-node training on a Pyxis/Enroot Slurm cluster
• XLA | [NVIDIA](https://github.com/NVIDIA) | - -### Object Detection - -| Model | Paper | Features | Maintainer | -|-------|-------|----------|------------| -| [R-FCN](https://github.com/IntelAI/models/tree/master/benchmarks/object_detection/tensorflow/rfcn) | [R-FCN: Object Detection
via Region-based Fully Convolutional Networks](https://arxiv.org/pdf/1605.06409) | • Int8 Inference
• FP32 Inference | [Intel](https://github.com/IntelAI) | -| [SSD-MobileNet](https://github.com/IntelAI/models/tree/master/benchmarks/object_detection/tensorflow/ssd-mobilenet) | [MobileNets: Efficient Convolutional Neural Networks
for Mobile Vision Applications](https://arxiv.org/pdf/1704.04861) | • Int8 Inference
• FP32 Inference | [Intel](https://github.com/IntelAI) | -| [SSD-ResNet34](https://github.com/IntelAI/models/tree/master/benchmarks/object_detection/tensorflow/ssd-resnet34) | [SSD: Single Shot MultiBox Detector](https://arxiv.org/pdf/1512.02325) | • Int8 Inference
• FP32 Inference
• FP32 Training | [Intel](https://github.com/IntelAI) | - -### Segmentation - -| Model | Paper | Features | Maintainer | -|-------|-------|----------|------------| -| [Mask R-CNN](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow2/Segmentation/MaskRCNN) | [Mask R-CNN](https://arxiv.org/abs/1703.06870) | • Automatic Mixed Precision
• Multi-GPU training support with Horovod
• TensorRT | [NVIDIA](https://github.com/NVIDIA) | -| [U-Net Medical Image Segmentation](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow2/Segmentation/UNet_Medical) | [U-Net: Convolutional Networks for Biomedical Image Segmentation](https://arxiv.org/abs/1505.04597) | • Automatic Mixed Precision
• Multi-GPU training support with Horovod
• TensorRT | [NVIDIA](https://github.com/NVIDIA) | - -## Natural Language Processing - -| Model | Paper | Features | Maintainer | -|-------|-------|----------|------------| -| [BERT](https://github.com/IntelAI/models/tree/master/benchmarks/language_modeling/tensorflow/bert_large) | [BERT: Pre-training of Deep Bidirectional Transformers
for Language Understanding](https://arxiv.org/pdf/1810.04805) | • FP32 Inference
• FP32 Training | [Intel](https://github.com/IntelAI) | -| [BERT](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow2/LanguageModeling/BERT) | [BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding](https://arxiv.org/pdf/1810.04805) | • Horovod Multi-GPU
• Multi-node with Horovod and Pyxis/Enroot Slurm cluster
• XLA
• Automatic mixed precision
• LAMB | [NVIDIA](https://github.com/NVIDIA) | -| [ELECTRA](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow2/LanguageModeling/ELECTRA) | [ELECTRA: Pre-training Text Encoders as Discriminators Rather Than Generators](https://openreview.net/forum?id=r1xMH1BtvB) | • Automatic Mixed Precision
• Multi-GPU training support with Horovod
• Multi-node training on a Pyxis/Enroot Slurm cluster | [NVIDIA](https://github.com/NVIDIA) | -| [GNMT](https://github.com/IntelAI/models/tree/master/benchmarks/language_translation/tensorflow/mlperf_gnmt) | [Google’s Neural Machine Translation System:
Bridging the Gap between Human and Machine Translation](https://arxiv.org/pdf/1609.08144) | • FP32 Inference | [Intel](https://github.com/IntelAI) | -| [Transformer-LT (Official)](https://github.com/IntelAI/models/tree/master/benchmarks/language_translation/tensorflow/transformer_lt_official) | [Attention Is All You Need](https://arxiv.org/pdf/1706.03762) | • FP32 Inference | [Intel](https://github.com/IntelAI) | -| [Transformer-LT (MLPerf)](https://github.com/IntelAI/models/tree/master/benchmarks/language_translation/tensorflow/transformer_mlperf) | [Attention Is All You Need](https://arxiv.org/pdf/1706.03762) | • FP32 Training | [Intel](https://github.com/IntelAI) | - -## Recommendation Systems - -| Model | Paper | Features | Maintainer | -|-------|-------|----------|------------| -| [Wide & Deep](https://github.com/IntelAI/models/tree/master/benchmarks/recommendation/tensorflow/wide_deep_large_ds) | [Wide & Deep Learning for Recommender Systems](https://arxiv.org/pdf/1606.07792) | • FP32 Inference
• FP32 Training | [Intel](https://github.com/IntelAI) | -| [Wide & Deep](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow2/Recommendation/WideAndDeep) | [Wide & Deep Learning for Recommender Systems](https://arxiv.org/pdf/1606.07792) | • Automatic mixed precision
• Multi-GPU training support with Horovod
• XLA | [NVIDIA](https://github.com/NVIDIA) | -| [DLRM](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow2/Recommendation/DLRM) | [Deep Learning Recommendation Model for Personalization and Recommendation Systems](https://arxiv.org/pdf/1906.00091.pdf) | • Automatic Mixed Precision
• Hybrid-parallel multiGPU training using Horovod all2all
• Multinode training for Pyxis/Enroot Slurm clusters
• XLA
• Criteo dataset preprocessing with Spark on GPU | [NVIDIA](https://github.com/NVIDIA) | - -## Contributions - -If you want to contribute, please review the [contribution guidelines](https://github.com/tensorflow/models/wiki/How-to-contribute). diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/configs/ops_info.json b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/configs/ops_info.json deleted file mode 100644 index 5dc3564522f36d625db1cc85f8cb1fd72967bbe6..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/configs/ops_info.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "black-list": { - "to-add": [ - "Cast", - "SoftmaxV2" - ] - } -} diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/modelzoo_level.txt b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/modelzoo_level.txt deleted file mode 100644 index 31529da2e68f25b61e2a3e698a07537281443c03..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/modelzoo_level.txt +++ /dev/null @@ -1,3 +0,0 @@ -FuncStatus:OK -PerfStatus:OK -PrecisionStatus:OK \ No newline at end of file diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/LICENSE b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/LICENSE deleted file mode 100644 index d3da228420e973edaf4123d5eeb42210f4450b0c..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/LICENSE +++ /dev/null @@ -1,203 +0,0 @@ -Copyright 2015 The TensorFlow Authors. All rights reserved. - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2015, The TensorFlow Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/README-TPU.md b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/README-TPU.md deleted file mode 100644 index 28a5a0a73d210e9fe6e00db38d0e911e3d771ddf..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/README-TPU.md +++ /dev/null @@ -1,29 +0,0 @@ -# Offically Supported TensorFlow 2.1+ Models on Cloud TPU - -## Natural Language Processing - -* [bert](nlp/bert): A powerful pre-trained language representation model: - BERT, which stands for Bidirectional Encoder Representations from - Transformers. - [BERT FineTuning with Cloud TPU](https://cloud.google.com/tpu/docs/tutorials/bert-2.x) provides step by step instructions on Cloud TPU training. You can look [Bert MNLI Tensorboard.dev metrics](https://tensorboard.dev/experiment/LijZ1IrERxKALQfr76gndA) for MNLI fine tuning task. -* [transformer](nlp/transformer): A transformer model to translate the WMT - English to German dataset. - [Training transformer on Cloud TPU](https://cloud.google.com/tpu/docs/tutorials/transformer-2.x) for step by step instructions on Cloud TPU training. - -## Computer Vision - -* [efficientnet](vision/image_classification): A family of convolutional - neural networks that scale by balancing network depth, width, and - resolution and can be used to classify ImageNet's dataset of 1000 classes. - See [Tensorboard.dev training metrics](https://tensorboard.dev/experiment/KnaWjrq5TXGfv0NW5m7rpg/#scalars). -* [mnist](vision/image_classification): A basic model to classify digits - from the MNIST dataset. See [Running MNIST on Cloud TPU](https://cloud.google.com/tpu/docs/tutorials/mnist-2.x) tutorial and [Tensorboard.dev metrics](https://tensorboard.dev/experiment/mIah5lppTASvrHqWrdr6NA). -* [mask-rcnn](vision/detection): An object detection and instance segmentation model. See [Tensorboard.dev training metrics](https://tensorboard.dev/experiment/LH7k0fMsRwqUAcE09o9kPA). -* [resnet](vision/image_classification): A deep residual network that can - be used to classify ImageNet's dataset of 1000 classes. - See [Training ResNet on Cloud TPU](https://cloud.google.com/tpu/docs/tutorials/resnet-2.x) tutorial and [Tensorboard.dev metrics](https://tensorboard.dev/experiment/CxlDK8YMRrSpYEGtBRpOhg). -* [retinanet](vision/detection): A fast and powerful object detector. See [Tensorboard.dev training metrics](https://tensorboard.dev/experiment/b8NRnWU3TqG6Rw0UxueU6Q). -* [shapemask](vision/detection): An object detection and instance segmentation model using shape priors. See [Tensorboard.dev training metrics](https://tensorboard.dev/experiment/ZbXgVoc6Rf6mBRlPj0JpLA). - -## Recommendation -* [ncf](recommendation): Neural Collaborative Filtering. See [Tensorboard.dev training metrics](https://tensorboard.dev/experiment/0k3gKjZlR1ewkVTRyLB6IQ). diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/README.md b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/README.md deleted file mode 100644 index c53decf083e302896fc4a7a92525cb2128ef6352..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/README.md +++ /dev/null @@ -1,177 +0,0 @@ -![Logo](https://storage.googleapis.com/model_garden_artifacts/TF_Model_Garden.png) - -# TensorFlow Official Models - -The TensorFlow official models are a collection of models -that use TensorFlow’s high-level APIs. -They are intended to be well-maintained, tested, and kept up to date -with the latest TensorFlow API. - -They should also be reasonably optimized for fast performance while still -being easy to read. -These models are used as end-to-end tests, ensuring that the models run -with the same or improved speed and performance with each new TensorFlow build. - -## More models to come! - -The team is actively developing new models. -In the near future, we will add: - -* State-of-the-art language understanding models. -* State-of-the-art image classification models. -* State-of-the-art object detection and instance segmentation models. - -## Table of Contents - -- [Models and Implementations](#models-and-implementations) - * [Computer Vision](#computer-vision) - + [Image Classification](#image-classification) - + [Object Detection and Segmentation](#object-detection-and-segmentation) - * [Natural Language Processing](#natural-language-processing) - * [Recommendation](#recommendation) -- [How to get started with the official models](#how-to-get-started-with-the-official-models) - -## Models and Implementations - -### Computer Vision - -#### Image Classification - -| Model | Reference (Paper) | -|-------|-------------------| -| [MNIST](vision/image_classification) | A basic model to classify digits from the [MNIST dataset](http://yann.lecun.com/exdb/mnist/) | -| [ResNet](vision/image_classification) | [Deep Residual Learning for Image Recognition](https://arxiv.org/abs/1512.03385) | -| [ResNet-RS](vision/beta/MODEL_GARDEN.md) | [Revisiting ResNets: Improved Training and Scaling Strategies](https://arxiv.org/abs/2103.07579) | -| [EfficientNet](vision/image_classification) | [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks](https://arxiv.org/abs/1905.11946) | - -#### Object Detection and Segmentation - -| Model | Reference (Paper) | -|-------|-------------------| -| [RetinaNet](vision/detection) | [Focal Loss for Dense Object Detection](https://arxiv.org/abs/1708.02002) | -| [Mask R-CNN](vision/detection) | [Mask R-CNN](https://arxiv.org/abs/1703.06870) | -| [ShapeMask](vision/detection) | [ShapeMask: Learning to Segment Novel Objects by Refining Shape Priors](https://arxiv.org/abs/1904.03239) | -| [SpineNet](vision/detection) | [SpineNet: Learning Scale-Permuted Backbone for Recognition and Localization](https://arxiv.org/abs/1912.05027) | - -### Natural Language Processing - -| Model | Reference (Paper) | -|-------|-------------------| -| [ALBERT (A Lite BERT)](nlp/albert) | [ALBERT: A Lite BERT for Self-supervised Learning of Language Representations](https://arxiv.org/abs/1909.11942) | -| [BERT (Bidirectional Encoder Representations from Transformers)](nlp/bert) | [BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding](https://arxiv.org/abs/1810.04805) | -| [NHNet (News Headline generation model)](nlp/nhnet) | [Generating Representative Headlines for News Stories](https://arxiv.org/abs/2001.09386) | -| [Transformer](nlp/transformer) | [Attention Is All You Need](https://arxiv.org/abs/1706.03762) | -| [XLNet](nlp/xlnet) | [XLNet: Generalized Autoregressive Pretraining for Language Understanding](https://arxiv.org/abs/1906.08237) | -| [MobileBERT](nlp/projects/mobilebert) | [MobileBERT: a Compact Task-Agnostic BERT for Resource-Limited Devices](https://arxiv.org/abs/2004.02984) | - -### Recommendation - -| Model | Reference (Paper) | -|-------|-------------------| -| [NCF](recommendation) | [Neural Collaborative Filtering](https://arxiv.org/abs/1708.05031) | - -## How to get started with the official models - -* The models in the master branch are developed using TensorFlow 2, -and they target the TensorFlow [nightly binaries](https://github.com/tensorflow/tensorflow#installation) -built from the -[master branch of TensorFlow](https://github.com/tensorflow/tensorflow/tree/master). -* The stable versions targeting releases of TensorFlow are available -as tagged branches or [downloadable releases](https://github.com/tensorflow/models/releases). -* Model repository version numbers match the target TensorFlow release, -such that -[release v2.2.0](https://github.com/tensorflow/models/releases/tag/v2.2.0) -are compatible with -[TensorFlow v2.2.0](https://github.com/tensorflow/tensorflow/releases/tag/v2.2.0). - -Please follow the below steps before running models in this repository. - -### Requirements - -* The latest TensorFlow Model Garden release and TensorFlow 2 - * If you are on a version of TensorFlow earlier than 2.2, please -upgrade your TensorFlow to [the latest TensorFlow 2](https://www.tensorflow.org/install/). - -```shell -pip3 install tf-nightly -``` - -### Installation - -#### Method 1: Install the TensorFlow Model Garden pip package - -**tf-models-official** is the stable Model Garden package. -pip will install all models and dependencies automatically. - -```shell -pip install tf-models-official -``` - -If you are using nlp packages, please also install **tensorflow-text**: - -```shell -pip install tensorflow-text -``` - -Please check out our [example](colab/fine_tuning_bert.ipynb) -to learn how to use a PIP package. - -Note that **tf-models-official** may not include the latest changes in this -github repo. To include latest changes, you may install **tf-models-nightly**, -which is the nightly Model Garden package created daily automatically. - -```shell -pip install tf-models-nightly -``` - -#### Method 2: Clone the source - -1. Clone the GitHub repository: - -```shell -git clone https://github.com/tensorflow/models.git -``` - -2. Add the top-level ***/models*** folder to the Python path. - -```shell -export PYTHONPATH=$PYTHONPATH:/path/to/models -``` - -If you are using a Colab notebook, please set the Python path with os.environ. - -```python -import os -os.environ['PYTHONPATH'] += ":/path/to/models" -``` - -3. Install other dependencies - -```shell -pip3 install --user -r official/requirements.txt -``` - -Finally, if you are using nlp packages, please also install -**tensorflow-text-nightly**: - -```shell -pip3 install tensorflow-text-nightly -``` - -## Contributions - -If you want to contribute, please review the [contribution guidelines](https://github.com/tensorflow/models/wiki/How-to-contribute). - -## Citing TF Official Model Garden - -To cite this repository: - -``` -@software{tfmodels2020github, - author = {Chen Chen and Xianzhi Du and Le Hou and Jaeyoun Kim and Jing Li and - Yeqing Li and Abdullah Rashwan and Fan Yang and Hongkun Yu}, - title = {TensorFlow Official Model Garden}, - url = {https://github.com/tensorflow/models/tree/master/official}, - year = {2020}, -} -``` diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/__init__.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/__init__.py deleted file mode 100644 index 9772d6bd74cf0348a137ea4bce7fe8bd29ac9ca1..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/__init__.py +++ /dev/null @@ -1,29 +0,0 @@ -# -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/colab/decoding_api_in_tf_nlp.ipynb b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/colab/decoding_api_in_tf_nlp.ipynb deleted file mode 100644 index 726b382e228265fa1e19c2af3150e7cc32a0ec56..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/colab/decoding_api_in_tf_nlp.ipynb +++ /dev/null @@ -1,492 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "vXLA5InzXydn" - }, - "source": [ - "##### Copyright 2021 The TensorFlow Authors." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "RuRlpLL-X0R_" - }, - "outputs": [], - "source": [ - "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", - "# you may not use this file except in compliance with the License.\n", - "# You may obtain a copy of the License at\n", - "#\n", - "# https://www.apache.org/licenses/LICENSE-2.0\n", - "#\n", - "# Unless required by applicable law or agreed to in writing, software\n", - "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", - "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", - "# See the License for the specific language governing permissions and\n", - "# limitations under the License." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "fsACVQpVSifi" - }, - "source": [ - "### Install the TensorFlow Model Garden pip package\n", - "\n", - "* `tf-models-official` is the stable Model Garden package. Note that it may not include the latest changes in the `tensorflow_models` github repo. To include latest changes, you may install `tf-models-nightly`,\n", - "which is the nightly Model Garden package created daily automatically.\n", - "* pip will install all models and dependencies automatically." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "hYEwGTeCXnnX" - }, - "source": [ - "\u003ctable class=\"tfo-notebook-buttons\" align=\"left\"\u003e\n", - " \u003ctd\u003e\n", - " \u003ca target=\"_blank\" href=\"https://www.tensorflow.org/official_models/tutorials/decoding_api_in_tf_nlp.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" /\u003eView on TensorFlow.org\u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/models/blob/master/official/colab/decoding_api_in_tf_nlp.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" /\u003eRun in Google Colab\u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca target=\"_blank\" href=\"https://github.com/tensorflow/models/blob/master/official/colab/decoding_api_in_tf_nlp.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" /\u003eView source on GitHub\u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca href=\"https://storage.googleapis.com/tensorflow_docs/models/official/colab/decoding_api_in_tf_nlp.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/download_logo_32px.png\" /\u003eDownload notebook\u003c/a\u003e\n", - " \u003c/td\u003e\n", - "\u003c/table\u003e" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "2j-xhrsVQOQT" - }, - "outputs": [], - "source": [ - "pip install tf-models-nightly" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "BjP7zwxmskpY" - }, - "outputs": [], - "source": [ - "import os\n", - "\n", - "import numpy as np\n", - "import matplotlib.pyplot as plt\n", - "\n", - "import tensorflow as tf\n", - "\n", - "from official import nlp\n", - "from official.nlp.modeling.ops import sampling_module\n", - "from official.nlp.modeling.ops import beam_search" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "0AWgyo-IQ5sP" - }, - "source": [ - "# Decoding API\n", - "This API provides an interface to experiment with different decoding strategies used for auto-regressive models.\n", - "\n", - "1. The following sampling strategies are provided in sampling_module.py, which inherits from the base Decoding class:\n", - " * [top_p](https://arxiv.org/abs/1904.09751) : [github](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/ops/sampling_module.py#L65) \n", - "\n", - " This implementation chooses most probable logits with cumulative probabilities upto top_p.\n", - "\n", - " * [top_k](https://arxiv.org/pdf/1805.04833.pdf) : [github](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/ops/sampling_module.py#L48)\n", - "\n", - " At each timestep, this implementation samples from top-k logits based on their probability distribution\n", - "\n", - " * Greedy : [github](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/ops/sampling_module.py#L26)\n", - "\n", - " This implementation returns the top logits based on probabilities.\n", - "\n", - "2. Beam search is provided in beam_search.py. [github](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/ops/beam_search.py)\n", - "\n", - " This implementation reduces the risk of missing hidden high probability logits by keeping the most likely num_beams of logits at each time step and eventually choosing the logits that has the overall highest probability." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "MfOj7oaBRQnS" - }, - "source": [ - "## Initialize Sampling Module in TF-NLP.\n", - "\n", - "\n", - "\u003e **symbols_to_logits_fn** : This is a closure implemented by the users of the API. The input to this closure will be \n", - "```\n", - "Args:\n", - " 1] ids [batch_size, .. (index + 1 or 1 if padded_decode is True)],\n", - " 2] index [scalar] : current decoded step,\n", - " 3] cache [nested dictionary of tensors].\n", - "Returns:\n", - " 1] tensor for next-step logits [batch_size, vocab]\n", - " 2] the updated_cache [nested dictionary of tensors].\n", - "```\n", - "This closure calls the model to predict the logits for the 'index+1' step. The cache is used for faster decoding.\n", - "Here is a [reference](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/ops/beam_search_test.py#L88) implementation for the above closure.\n", - "\n", - "\n", - "\u003e **length_normalization_fn** : Closure for returning length normalization parameter.\n", - "```\n", - "Args: \n", - " 1] length : scalar for decoded step index.\n", - " 2] dtype : data-type of output tensor\n", - "Returns:\n", - " 1] value of length normalization factor.\n", - "Example :\n", - " def _length_norm(length, dtype):\n", - " return tf.pow(((5. + tf.cast(length, dtype)) / 6.), 0.0)\n", - "```\n", - "\n", - "\u003e **vocab_size** : Output vocabulary size.\n", - "\n", - "\u003e **max_decode_length** : Scalar for total number of decoding steps.\n", - "\n", - "\u003e **eos_id** : Decoding will stop if all output decoded ids in the batch have this ID.\n", - "\n", - "\u003e **padded_decode** : Set this to True if running on TPU. Tensors are padded to max_decoding_length if this is True.\n", - "\n", - "\u003e **top_k** : top_k is enabled if this value is \u003e 1.\n", - "\n", - "\u003e **top_p** : top_p is enabled if this value is \u003e 0 and \u003c 1.0\n", - "\n", - "\u003e **sampling_temperature** : This is used to re-estimate the softmax output. Temperature skews the distribution towards high probability tokens and lowers the mass in tail distribution. Value has to be positive. Low temperature is equivalent to greedy and makes the distribution sharper, while high temperature makes it more flat.\n", - "\n", - "\u003e **enable_greedy** : By default, this is true and greedy decoding is enabled.\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "lV1RRp6ihnGX" - }, - "source": [ - "# Initialize the Model Hyper-parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "eTsGp2gaKLdE" - }, - "outputs": [], - "source": [ - "params = {}\n", - "params['num_heads'] = 2\n", - "params['num_layers'] = 2\n", - "params['batch_size'] = 2\n", - "params['n_dims'] = 256\n", - "params['max_decode_length'] = 4" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "UGvmd0_dRFYI" - }, - "source": [ - "## What is a Cache?\n", - "In auto-regressive architectures like Transformer based [Encoder-Decoder](https://arxiv.org/abs/1706.03762) models, \n", - "Cache is used for fast sequential decoding.\n", - "It is a nested dictionary storing pre-computed hidden-states (key and values in the self-attention blocks and in the cross-attention blocks) for every layer.\n", - "\n", - "```\n", - "{\n", - " 'layer_%d' % layer: {\n", - " 'k': tf.zeros([params['batch_size'], params['max_decode_length'], params['num_heads'], params['n_dims']/params['num_heads']], dtype=tf.float32),\n", - " 'v': tf.zeros([params['batch_size'], params['max_decode_length'], params['num_heads'], params['n_dims']/params['num_heads']], dtype=tf.float32)\n", - " } for layer in range(params['num_layers']),\n", - " 'model_specific_item' : Model specific tensor shape,\n", - "}\n", - "\n", - "```" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "CYXkoplAij01" - }, - "source": [ - "# Initialize cache. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "D6kfZOOKgkm1" - }, - "outputs": [], - "source": [ - "cache = {\n", - " 'layer_%d' % layer: {\n", - " 'k': tf.zeros([params['batch_size'], params['max_decode_length'], params['num_heads'], params['n_dims']/params['num_heads']], dtype=tf.float32),\n", - " 'v': tf.zeros([params['batch_size'], params['max_decode_length'], params['num_heads'], params['n_dims']/params['num_heads']], dtype=tf.float32)\n", - " } for layer in range(params['num_layers'])\n", - " }\n", - "print(\"cache key shape for layer 1 :\", cache['layer_1']['k'].shape)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "nNY3Xn8SiblP" - }, - "source": [ - "# Define closure for length normalization. **optional.**\n", - "\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "T92ccAzlnGqh" - }, - "outputs": [], - "source": [ - "def length_norm(length, dtype):\n", - " \"\"\"Return length normalization factor.\"\"\"\n", - " return tf.pow(((5. + tf.cast(length, dtype)) / 6.), 0.0)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "syl7I5nURPgW" - }, - "source": [ - "# Create model_fn\n", - " In practice, this will be replaced by an actual model implementation such as [here](https://github.com/tensorflow/models/blob/master/official/nlp/transformer/transformer.py#L236)\n", - "```\n", - "Args:\n", - "i : Step that is being decoded.\n", - "Returns:\n", - " logit probabilities of size [batch_size, 1, vocab_size]\n", - "```\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "AhzSkRisRdB6" - }, - "outputs": [], - "source": [ - "probabilities = tf.constant([[[0.3, 0.4, 0.3], [0.3, 0.3, 0.4],\n", - " [0.1, 0.1, 0.8], [0.1, 0.1, 0.8]],\n", - " [[0.2, 0.5, 0.3], [0.2, 0.7, 0.1],\n", - " [0.1, 0.1, 0.8], [0.1, 0.1, 0.8]]])\n", - "def model_fn(i):\n", - " return probabilities[:, i, :]" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "DBMUkaVmVZBg" - }, - "source": [ - "# Initialize symbols_to_logits_fn\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "FAJ4CpbfVdjr" - }, - "outputs": [], - "source": [ - "def _symbols_to_logits_fn():\n", - " \"\"\"Calculates logits of the next tokens.\"\"\"\n", - " def symbols_to_logits_fn(ids, i, temp_cache):\n", - " del ids\n", - " logits = tf.cast(tf.math.log(model_fn(i)), tf.float32)\n", - " return logits, temp_cache\n", - " return symbols_to_logits_fn" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "R_tV3jyWVL47" - }, - "source": [ - "# Greedy \n", - "Greedy decoding selects the token id with the highest probability as its next id: $id_t = argmax_{w}P(id | id_{1:t-1})$ at each timestep $t$. The following sketch shows greedy decoding. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "aGt9idSkVQEJ" - }, - "outputs": [], - "source": [ - "greedy_obj = sampling_module.SamplingModule(\n", - " length_normalization_fn=None,\n", - " dtype=tf.float32,\n", - " symbols_to_logits_fn=_symbols_to_logits_fn(),\n", - " vocab_size=3,\n", - " max_decode_length=params['max_decode_length'],\n", - " eos_id=10,\n", - " padded_decode=False)\n", - "ids, _ = greedy_obj.generate(\n", - " initial_ids=tf.constant([9, 1]), initial_cache=cache)\n", - "print(\"Greedy Decoded Ids:\", ids)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "s4pTTsQXVz5O" - }, - "source": [ - "# top_k sampling\n", - "In *Top-K* sampling, the *K* most likely next token ids are filtered and the probability mass is redistributed among only those *K* ids. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "pCLWIn6GV5_G" - }, - "outputs": [], - "source": [ - "top_k_obj = sampling_module.SamplingModule(\n", - " length_normalization_fn=length_norm,\n", - " dtype=tf.float32,\n", - " symbols_to_logits_fn=_symbols_to_logits_fn(),\n", - " vocab_size=3,\n", - " max_decode_length=params['max_decode_length'],\n", - " eos_id=10,\n", - " sample_temperature=tf.constant(1.0),\n", - " top_k=tf.constant(3),\n", - " padded_decode=False,\n", - " enable_greedy=False)\n", - "ids, _ = top_k_obj.generate(\n", - " initial_ids=tf.constant([9, 1]), initial_cache=cache)\n", - "print(\"top-k sampled Ids:\", ids)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Jp3G-eE_WI4Y" - }, - "source": [ - "# top_p sampling\n", - "Instead of sampling only from the most likely *K* token ids, in *Top-p* sampling chooses from the smallest possible set of ids whose cumulative probability exceeds the probability *p*." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "rEGdIWcuWILO" - }, - "outputs": [], - "source": [ - "top_p_obj = sampling_module.SamplingModule(\n", - " length_normalization_fn=length_norm,\n", - " dtype=tf.float32,\n", - " symbols_to_logits_fn=_symbols_to_logits_fn(),\n", - " vocab_size=3,\n", - " max_decode_length=params['max_decode_length'],\n", - " eos_id=10,\n", - " sample_temperature=tf.constant(1.0),\n", - " top_p=tf.constant(0.9),\n", - " padded_decode=False,\n", - " enable_greedy=False)\n", - "ids, _ = top_p_obj.generate(\n", - " initial_ids=tf.constant([9, 1]), initial_cache=cache)\n", - "print(\"top-p sampled Ids:\", ids)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "2hcuyJ2VWjDz" - }, - "source": [ - "# Beam search decoding\n", - "Beam search reduces the risk of missing hidden high probability token ids by keeping the most likely num_beams of hypotheses at each time step and eventually choosing the hypothesis that has the overall highest probability. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "cJ3WzvSrWmSA" - }, - "outputs": [], - "source": [ - "beam_size = 2\n", - "params['batch_size'] = 1\n", - "beam_cache = {\n", - " 'layer_%d' % layer: {\n", - " 'k': tf.zeros([params['batch_size'], params['max_decode_length'], params['num_heads'], params['n_dims']], dtype=tf.float32),\n", - " 'v': tf.zeros([params['batch_size'], params['max_decode_length'], params['num_heads'], params['n_dims']], dtype=tf.float32)\n", - " } for layer in range(params['num_layers'])\n", - " }\n", - "print(\"cache key shape for layer 1 :\", beam_cache['layer_1']['k'].shape)\n", - "ids, _ = beam_search.sequence_beam_search(\n", - " symbols_to_logits_fn=_symbols_to_logits_fn(),\n", - " initial_ids=tf.constant([9], tf.int32),\n", - " initial_cache=beam_cache,\n", - " vocab_size=3,\n", - " beam_size=beam_size,\n", - " alpha=0.6,\n", - " max_decode_length=params['max_decode_length'],\n", - " eos_id=10,\n", - " padded_decode=False,\n", - " dtype=tf.float32)\n", - "print(\"Beam search ids:\", ids)" - ] - } - ], - "metadata": { - "accelerator": "GPU", - "colab": { - "collapsed_sections": [], - "name": "decoding_api_in_tf_nlp.ipynb", - "provenance": [], - "toc_visible": true - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/colab/fine_tuning_bert.ipynb b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/colab/fine_tuning_bert.ipynb deleted file mode 100644 index ad34d68d66770273a055cbaf345c52df734bfa79..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/colab/fine_tuning_bert.ipynb +++ /dev/null @@ -1,1678 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "vXLA5InzXydn" - }, - "source": [ - "##### Copyright 2019 The TensorFlow Authors." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "RuRlpLL-X0R_" - }, - "outputs": [], - "source": [ - "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", - "# you may not use this file except in compliance with the License.\n", - "# You may obtain a copy of the License at\n", - "#\n", - "# https://www.apache.org/licenses/LICENSE-2.0\n", - "#\n", - "# Unless required by applicable law or agreed to in writing, software\n", - "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", - "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", - "# See the License for the specific language governing permissions and\n", - "# limitations under the License." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "1mLJmVotXs64" - }, - "source": [ - "# Fine-tuning a BERT model" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "hYEwGTeCXnnX" - }, - "source": [ - "\u003ctable class=\"tfo-notebook-buttons\" align=\"left\"\u003e\n", - " \u003ctd\u003e\n", - " \u003ca target=\"_blank\" href=\"https://www.tensorflow.org/official_models/tutorials/fine_tune_bert.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" /\u003eView on TensorFlow.org\u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/models/blob/master/official/colab/fine_tuning_bert.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" /\u003eRun in Google Colab\u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca target=\"_blank\" href=\"https://github.com/tensorflow/models/blob/master/official/colab/fine_tuning_bert.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" /\u003eView source on GitHub\u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca href=\"https://storage.googleapis.com/tensorflow_docs/models/official/colab/fine_tuning_bert.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/download_logo_32px.png\" /\u003eDownload notebook\u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca href=\"https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/2\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/hub_logo_32px.png\" /\u003eSee TF Hub model\u003c/a\u003e\n", - " \u003c/td\u003e\n", - "\u003c/table\u003e" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "YN2ACivEPxgD" - }, - "source": [ - "In this example, we will work through fine-tuning a BERT model using the tensorflow-models PIP package.\n", - "\n", - "The pretrained BERT model this tutorial is based on is also available on [TensorFlow Hub](https://tensorflow.org/hub), to see how to use it refer to the [Hub Appendix](#hub_bert)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "s2d9S2CSSO1z" - }, - "source": [ - "## Setup" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "fsACVQpVSifi" - }, - "source": [ - "### Install the TensorFlow Model Garden pip package\n", - "\n", - "* `tf-models-official` is the stable Model Garden package. Note that it may not include the latest changes in the `tensorflow_models` github repo. To include latest changes, you may install `tf-models-nightly`,\n", - "which is the nightly Model Garden package created daily automatically.\n", - "* pip will install all models and dependencies automatically." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "NvNr2svBM-p3" - }, - "outputs": [], - "source": [ - "!pip install -q tf-models-official==2.4.0" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "U-7qPCjWUAyy" - }, - "source": [ - "### Imports" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "lXsXev5MNr20" - }, - "outputs": [], - "source": [ - "import os\n", - "\n", - "import numpy as np\n", - "import matplotlib.pyplot as plt\n", - "\n", - "import tensorflow as tf\n", - "\n", - "import tensorflow_hub as hub\n", - "import tensorflow_datasets as tfds\n", - "tfds.disable_progress_bar()\n", - "\n", - "from official.modeling import tf_utils\n", - "from official import nlp\n", - "from official.nlp import bert\n", - "\n", - "# Load the required submodules\n", - "import official.nlp.optimization\n", - "import official.nlp.bert.bert_models\n", - "import official.nlp.bert.configs\n", - "import official.nlp.bert.run_classifier\n", - "import official.nlp.bert.tokenization\n", - "import official.nlp.data.classifier_data_lib\n", - "import official.nlp.modeling.losses\n", - "import official.nlp.modeling.models\n", - "import official.nlp.modeling.networks\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "mbanlzTvJBsz" - }, - "source": [ - "### Resources" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "PpW0x8TpR8DT" - }, - "source": [ - "This directory contains the configuration, vocabulary, and a pre-trained checkpoint used in this tutorial:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "vzRHOLciR8eq" - }, - "outputs": [], - "source": [ - "gs_folder_bert = \"gs://cloud-tpu-checkpoints/bert/v3/uncased_L-12_H-768_A-12\"\n", - "tf.io.gfile.listdir(gs_folder_bert)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "9uFskufsR2LT" - }, - "source": [ - "You can get a pre-trained BERT encoder from [TensorFlow Hub](https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/2):" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "e0dAkUttJAzj" - }, - "outputs": [], - "source": [ - "hub_url_bert = \"https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/3\"" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Qv6abtRvH4xO" - }, - "source": [ - "## The data\n", - "For this example we used the [GLUE MRPC dataset from TFDS](https://www.tensorflow.org/datasets/catalog/glue#gluemrpc).\n", - "\n", - "This dataset is not set up so that it can be directly fed into the BERT model, so this section also handles the necessary preprocessing." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "28DvUhC1YUiB" - }, - "source": [ - "### Get the dataset from TensorFlow Datasets\n", - "\n", - "The Microsoft Research Paraphrase Corpus (Dolan \u0026 Brockett, 2005) is a corpus of sentence pairs automatically extracted from online news sources, with human annotations for whether the sentences in the pair are semantically equivalent.\n", - "\n", - "* Number of labels: 2.\n", - "* Size of training dataset: 3668.\n", - "* Size of evaluation dataset: 408.\n", - "* Maximum sequence length of training and evaluation dataset: 128.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "Ijikx5OsH9AT" - }, - "outputs": [], - "source": [ - "glue, info = tfds.load('glue/mrpc', with_info=True,\n", - " # It's small, load the whole dataset\n", - " batch_size=-1)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "xf9zz4vLYXjr" - }, - "outputs": [], - "source": [ - "list(glue.keys())" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ZgBg2r2nYT-K" - }, - "source": [ - "The `info` object describes the dataset and it's features:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "IQrHxv7W7jH5" - }, - "outputs": [], - "source": [ - "info.features" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "vhsVWYNxazz5" - }, - "source": [ - "The two classes are:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "n0gfc_VTayfQ" - }, - "outputs": [], - "source": [ - "info.features['label'].names" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "38zJcap6xkbC" - }, - "source": [ - "Here is one example from the training set:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "xON_i6SkwApW" - }, - "outputs": [], - "source": [ - "glue_train = glue['train']\n", - "\n", - "for key, value in glue_train.items():\n", - " print(f\"{key:9s}: {value[0].numpy()}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "9fbTyfJpNr7x" - }, - "source": [ - "### The BERT tokenizer" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "wqeN54S61ZKQ" - }, - "source": [ - "To fine tune a pre-trained model you need to be sure that you're using exactly the same tokenization, vocabulary, and index mapping as you used during training.\n", - "\n", - "The BERT tokenizer used in this tutorial is written in pure Python (It's not built out of TensorFlow ops). So you can't just plug it into your model as a `keras.layer` like you can with `preprocessing.TextVectorization`.\n", - "\n", - "The following code rebuilds the tokenizer that was used by the base model:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "idxyhmrCQcw5" - }, - "outputs": [], - "source": [ - "# Set up tokenizer to generate Tensorflow dataset\n", - "tokenizer = bert.tokenization.FullTokenizer(\n", - " vocab_file=os.path.join(gs_folder_bert, \"vocab.txt\"),\n", - " do_lower_case=True)\n", - "\n", - "print(\"Vocab size:\", len(tokenizer.vocab))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "zYHDSquU2lDU" - }, - "source": [ - "Tokenize a sentence:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "L_OfOYPg853R" - }, - "outputs": [], - "source": [ - "tokens = tokenizer.tokenize(\"Hello TensorFlow!\")\n", - "print(tokens)\n", - "ids = tokenizer.convert_tokens_to_ids(tokens)\n", - "print(ids)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "kkAXLtuyWWDI" - }, - "source": [ - "### Preprocess the data\n", - "\n", - "The section manually preprocessed the dataset into the format expected by the model.\n", - "\n", - "This dataset is small, so preprocessing can be done quickly and easily in memory. For larger datasets the `tf_models` library includes some tools for preprocessing and re-serializing a dataset. See [Appendix: Re-encoding a large dataset](#re_encoding_tools) for details." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "62UTWLQd9-LB" - }, - "source": [ - "#### Encode the sentences\n", - "\n", - "The model expects its two inputs sentences to be concatenated together. This input is expected to start with a `[CLS]` \"This is a classification problem\" token, and each sentence should end with a `[SEP]` \"Separator\" token:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "bdL-dRNRBRJT" - }, - "outputs": [], - "source": [ - "tokenizer.convert_tokens_to_ids(['[CLS]', '[SEP]'])" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "UrPktnqpwqie" - }, - "source": [ - "Start by encoding all the sentences while appending a `[SEP]` token, and packing them into ragged-tensors:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "BR7BmtU498Bh" - }, - "outputs": [], - "source": [ - "def encode_sentence(s):\n", - " tokens = list(tokenizer.tokenize(s.numpy()))\n", - " tokens.append('[SEP]')\n", - " return tokenizer.convert_tokens_to_ids(tokens)\n", - "\n", - "sentence1 = tf.ragged.constant([\n", - " encode_sentence(s) for s in glue_train[\"sentence1\"]])\n", - "sentence2 = tf.ragged.constant([\n", - " encode_sentence(s) for s in glue_train[\"sentence2\"]])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "has42aUdfky-" - }, - "outputs": [], - "source": [ - "print(\"Sentence1 shape:\", sentence1.shape.as_list())\n", - "print(\"Sentence2 shape:\", sentence2.shape.as_list())" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "MU9lTWy_xXbb" - }, - "source": [ - "Now prepend a `[CLS]` token, and concatenate the ragged tensors to form a single `input_word_ids` tensor for each example. `RaggedTensor.to_tensor()` zero pads to the longest sequence." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "USD8uihw-g4J" - }, - "outputs": [], - "source": [ - "cls = [tokenizer.convert_tokens_to_ids(['[CLS]'])]*sentence1.shape[0]\n", - "input_word_ids = tf.concat([cls, sentence1, sentence2], axis=-1)\n", - "_ = plt.pcolormesh(input_word_ids.to_tensor())" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "xmNv4l4k-dBZ" - }, - "source": [ - "#### Mask and input type" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "DIWjNIKq-ldh" - }, - "source": [ - "The model expects two additional inputs:\n", - "\n", - "* The input mask\n", - "* The input type" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ulNZ4U96-8JZ" - }, - "source": [ - "The mask allows the model to cleanly differentiate between the content and the padding. The mask has the same shape as the `input_word_ids`, and contains a `1` anywhere the `input_word_ids` is not padding." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "EezOO9qj91kP" - }, - "outputs": [], - "source": [ - "input_mask = tf.ones_like(input_word_ids).to_tensor()\n", - "\n", - "plt.pcolormesh(input_mask)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "rxLenwAvCkBf" - }, - "source": [ - "The \"input type\" also has the same shape, but inside the non-padded region, contains a `0` or a `1` indicating which sentence the token is a part of. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "2CetH_5C9P2m" - }, - "outputs": [], - "source": [ - "type_cls = tf.zeros_like(cls)\n", - "type_s1 = tf.zeros_like(sentence1)\n", - "type_s2 = tf.ones_like(sentence2)\n", - "input_type_ids = tf.concat([type_cls, type_s1, type_s2], axis=-1).to_tensor()\n", - "\n", - "plt.pcolormesh(input_type_ids)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "P5UBnCn8Ii6s" - }, - "source": [ - "#### Put it all together\n", - "\n", - "Collect the above text parsing code into a single function, and apply it to each split of the `glue/mrpc` dataset." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "sDGiWYPLEd5a" - }, - "outputs": [], - "source": [ - "def encode_sentence(s, tokenizer):\n", - " tokens = list(tokenizer.tokenize(s))\n", - " tokens.append('[SEP]')\n", - " return tokenizer.convert_tokens_to_ids(tokens)\n", - "\n", - "def bert_encode(glue_dict, tokenizer):\n", - " num_examples = len(glue_dict[\"sentence1\"])\n", - " \n", - " sentence1 = tf.ragged.constant([\n", - " encode_sentence(s, tokenizer)\n", - " for s in np.array(glue_dict[\"sentence1\"])])\n", - " sentence2 = tf.ragged.constant([\n", - " encode_sentence(s, tokenizer)\n", - " for s in np.array(glue_dict[\"sentence2\"])])\n", - "\n", - " cls = [tokenizer.convert_tokens_to_ids(['[CLS]'])]*sentence1.shape[0]\n", - " input_word_ids = tf.concat([cls, sentence1, sentence2], axis=-1)\n", - "\n", - " input_mask = tf.ones_like(input_word_ids).to_tensor()\n", - "\n", - " type_cls = tf.zeros_like(cls)\n", - " type_s1 = tf.zeros_like(sentence1)\n", - " type_s2 = tf.ones_like(sentence2)\n", - " input_type_ids = tf.concat(\n", - " [type_cls, type_s1, type_s2], axis=-1).to_tensor()\n", - "\n", - " inputs = {\n", - " 'input_word_ids': input_word_ids.to_tensor(),\n", - " 'input_mask': input_mask,\n", - " 'input_type_ids': input_type_ids}\n", - "\n", - " return inputs" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "yuLKxf6zHxw-" - }, - "outputs": [], - "source": [ - "glue_train = bert_encode(glue['train'], tokenizer)\n", - "glue_train_labels = glue['train']['label']\n", - "\n", - "glue_validation = bert_encode(glue['validation'], tokenizer)\n", - "glue_validation_labels = glue['validation']['label']\n", - "\n", - "glue_test = bert_encode(glue['test'], tokenizer)\n", - "glue_test_labels = glue['test']['label']" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "7FC5aLVxKVKK" - }, - "source": [ - "Each subset of the data has been converted to a dictionary of features, and a set of labels. Each feature in the input dictionary has the same shape, and the number of labels should match:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "jyjTdGpFhO_1" - }, - "outputs": [], - "source": [ - "for key, value in glue_train.items():\n", - " print(f'{key:15s} shape: {value.shape}')\n", - "\n", - "print(f'glue_train_labels shape: {glue_train_labels.shape}')" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "FSwymsbkbLDA" - }, - "source": [ - "## The model" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Efrj3Cn1kLAp" - }, - "source": [ - "### Build the model\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "xxpOY5r2Ayq6" - }, - "source": [ - "The first step is to download the configuration for the pre-trained model.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "ujapVfZ_AKW7" - }, - "outputs": [], - "source": [ - "import json\n", - "\n", - "bert_config_file = os.path.join(gs_folder_bert, \"bert_config.json\")\n", - "config_dict = json.loads(tf.io.gfile.GFile(bert_config_file).read())\n", - "\n", - "bert_config = bert.configs.BertConfig.from_dict(config_dict)\n", - "\n", - "config_dict" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "96ldxDSwkVkj" - }, - "source": [ - "The `config` defines the core BERT Model, which is a Keras model to predict the outputs of `num_classes` from the inputs with maximum sequence length `max_seq_length`.\n", - "\n", - "This function returns both the encoder and the classifier." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "cH682__U0FBv" - }, - "outputs": [], - "source": [ - "bert_classifier, bert_encoder = bert.bert_models.classifier_model(\n", - " bert_config, num_labels=2)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "XqKp3-5GIZlw" - }, - "source": [ - "The classifier has three inputs and one output:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "bAQblMIjwkvx" - }, - "outputs": [], - "source": [ - "tf.keras.utils.plot_model(bert_classifier, show_shapes=True, dpi=48)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "sFmVG4SKZAw8" - }, - "source": [ - "Run it on a test batch of data 10 examples from the training set. The output is the logits for the two classes:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "VTjgPbp4ZDKo" - }, - "outputs": [], - "source": [ - "glue_batch = {key: val[:10] for key, val in glue_train.items()}\n", - "\n", - "bert_classifier(\n", - " glue_batch, training=True\n", - ").numpy()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Q0NTdwZsQK8n" - }, - "source": [ - "The `TransformerEncoder` in the center of the classifier above **is** the `bert_encoder`.\n", - "\n", - "Inspecting the encoder, we see its stack of `Transformer` layers connected to those same three inputs:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "8L__-erBwLIQ" - }, - "outputs": [], - "source": [ - "tf.keras.utils.plot_model(bert_encoder, show_shapes=True, dpi=48)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "mKAvkQc3heSy" - }, - "source": [ - "### Restore the encoder weights\n", - "\n", - "When built the encoder is randomly initialized. Restore the encoder's weights from the checkpoint:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "97Ll2Gichd_Y" - }, - "outputs": [], - "source": [ - "checkpoint = tf.train.Checkpoint(encoder=bert_encoder)\n", - "checkpoint.read(\n", - " os.path.join(gs_folder_bert, 'bert_model.ckpt')).assert_consumed()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "2oHOql35k3Dd" - }, - "source": [ - "Note: The pretrained `TransformerEncoder` is also available on [TensorFlow Hub](https://tensorflow.org/hub). See the [Hub appendix](#hub_bert) for details. " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "115caFLMk-_l" - }, - "source": [ - "### Set up the optimizer\n", - "\n", - "BERT adopts the Adam optimizer with weight decay (aka \"[AdamW](https://arxiv.org/abs/1711.05101)\").\n", - "It also employs a learning rate schedule that firstly warms up from 0 and then decays to 0." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "w8qXKRZuCwW4" - }, - "outputs": [], - "source": [ - "# Set up epochs and steps\n", - "epochs = 3\n", - "batch_size = 32\n", - "eval_batch_size = 32\n", - "\n", - "train_data_size = len(glue_train_labels)\n", - "steps_per_epoch = int(train_data_size / batch_size)\n", - "num_train_steps = steps_per_epoch * epochs\n", - "warmup_steps = int(epochs * train_data_size * 0.1 / batch_size)\n", - "\n", - "# creates an optimizer with learning rate schedule\n", - "optimizer = nlp.optimization.create_optimizer(\n", - " 2e-5, num_train_steps=num_train_steps, num_warmup_steps=warmup_steps)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "pXRGxiRNEHS2" - }, - "source": [ - "This returns an `AdamWeightDecay` optimizer with the learning rate schedule set:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "eQNA16bhDpky" - }, - "outputs": [], - "source": [ - "type(optimizer)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "xqu_K71fJQB8" - }, - "source": [ - "To see an example of how to customize the optimizer and it's schedule, see the [Optimizer schedule appendix](#optiizer_schedule)." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "78FEUOOEkoP0" - }, - "source": [ - "### Train the model" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "OTNcA0O0nSq9" - }, - "source": [ - "The metric is accuracy and we use sparse categorical cross-entropy as loss." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "nzi8hjeTQTRs" - }, - "outputs": [], - "source": [ - "metrics = [tf.keras.metrics.SparseCategoricalAccuracy('accuracy', dtype=tf.float32)]\n", - "loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)\n", - "\n", - "bert_classifier.compile(\n", - " optimizer=optimizer,\n", - " loss=loss,\n", - " metrics=metrics)\n", - "\n", - "bert_classifier.fit(\n", - " glue_train, glue_train_labels,\n", - " validation_data=(glue_validation, glue_validation_labels),\n", - " batch_size=32,\n", - " epochs=epochs)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "IFtKFWbNKb0u" - }, - "source": [ - "Now run the fine-tuned model on a custom example to see that it works.\n", - "\n", - "Start by encoding some sentence pairs:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "9ZoUgDUNJPz3" - }, - "outputs": [], - "source": [ - "my_examples = bert_encode(\n", - " glue_dict = {\n", - " 'sentence1':[\n", - " 'The rain in Spain falls mainly on the plain.',\n", - " 'Look I fine tuned BERT.'],\n", - " 'sentence2':[\n", - " 'It mostly rains on the flat lands of Spain.',\n", - " 'Is it working? This does not match.']\n", - " },\n", - " tokenizer=tokenizer)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "7ynJibkBRTJF" - }, - "source": [ - "The model should report class `1` \"match\" for the first example and class `0` \"no-match\" for the second:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "umo0ttrgRYIM" - }, - "outputs": [], - "source": [ - "result = bert_classifier(my_examples, training=False)\n", - "\n", - "result = tf.argmax(result, axis=-1).numpy()\n", - "result" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "utGl0M3aZCE4" - }, - "outputs": [], - "source": [ - "np.array(info.features['label'].names)[result]" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "fVo_AnT0l26j" - }, - "source": [ - "### Save the model\n", - "\n", - "Often the goal of training a model is to _use_ it for something, so export the model and then restore it to be sure that it works." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "Nl5x6nElZqkP" - }, - "outputs": [], - "source": [ - "export_dir='./saved_model'\n", - "tf.saved_model.save(bert_classifier, export_dir=export_dir)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true, - "id": "y_ACvKPsVUXC" - }, - "outputs": [], - "source": [ - "reloaded = tf.saved_model.load(export_dir)\n", - "reloaded_result = reloaded([my_examples['input_word_ids'],\n", - " my_examples['input_mask'],\n", - " my_examples['input_type_ids']], training=False)\n", - "\n", - "original_result = bert_classifier(my_examples, training=False)\n", - "\n", - "# The results are (nearly) identical:\n", - "print(original_result.numpy())\n", - "print()\n", - "print(reloaded_result.numpy())" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "eQceYqRFT_Eg" - }, - "source": [ - "## Appendix" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "SaC1RlFawUpc" - }, - "source": [ - "\u003ca id=re_encoding_tools\u003e\u003c/a\u003e\n", - "### Re-encoding a large dataset" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "CwUdjFBkzUgh" - }, - "source": [ - "This tutorial you re-encoded the dataset in memory, for clarity.\n", - "\n", - "This was only possible because `glue/mrpc` is a very small dataset. To deal with larger datasets `tf_models` library includes some tools for processing and re-encoding a dataset for efficient training." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "2UTQrkyOT5wD" - }, - "source": [ - "The first step is to describe which features of the dataset should be transformed:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "XQeDFOzYR9Z9" - }, - "outputs": [], - "source": [ - "processor = nlp.data.classifier_data_lib.TfdsProcessor(\n", - " tfds_params=\"dataset=glue/mrpc,text_key=sentence1,text_b_key=sentence2\",\n", - " process_text_fn=bert.tokenization.convert_to_unicode)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "XrFQbfErUWxa" - }, - "source": [ - "Then apply the transformation to generate new TFRecord files." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "ymw7GOHpSHKU" - }, - "outputs": [], - "source": [ - "# Set up output of training and evaluation Tensorflow dataset\n", - "train_data_output_path=\"./mrpc_train.tf_record\"\n", - "eval_data_output_path=\"./mrpc_eval.tf_record\"\n", - "\n", - "max_seq_length = 128\n", - "batch_size = 32\n", - "eval_batch_size = 32\n", - "\n", - "# Generate and save training data into a tf record file\n", - "input_meta_data = (\n", - " nlp.data.classifier_data_lib.generate_tf_record_from_data_file(\n", - " processor=processor,\n", - " data_dir=None, # It is `None` because data is from tfds, not local dir.\n", - " tokenizer=tokenizer,\n", - " train_data_output_path=train_data_output_path,\n", - " eval_data_output_path=eval_data_output_path,\n", - " max_seq_length=max_seq_length))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "uX_Sp-wTUoRm" - }, - "source": [ - "Finally create `tf.data` input pipelines from those TFRecord files:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "rkHxIK57SQ_r" - }, - "outputs": [], - "source": [ - "training_dataset = bert.run_classifier.get_dataset_fn(\n", - " train_data_output_path,\n", - " max_seq_length,\n", - " batch_size,\n", - " is_training=True)()\n", - "\n", - "evaluation_dataset = bert.run_classifier.get_dataset_fn(\n", - " eval_data_output_path,\n", - " max_seq_length,\n", - " eval_batch_size,\n", - " is_training=False)()\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "stbaVouogvzS" - }, - "source": [ - "The resulting `tf.data.Datasets` return `(features, labels)` pairs, as expected by `keras.Model.fit`:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "gwhrlQl4gxVF" - }, - "outputs": [], - "source": [ - "training_dataset.element_spec" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "dbJ76vSJj77j" - }, - "source": [ - "#### Create tf.data.Dataset for training and evaluation\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "9J95LFRohiYw" - }, - "source": [ - "If you need to modify the data loading here is some code to get you started:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "gCvaLLAxPuMc" - }, - "outputs": [], - "source": [ - "def create_classifier_dataset(file_path, seq_length, batch_size, is_training):\n", - " \"\"\"Creates input dataset from (tf)records files for train/eval.\"\"\"\n", - " dataset = tf.data.TFRecordDataset(file_path)\n", - " if is_training:\n", - " dataset = dataset.shuffle(100)\n", - " dataset = dataset.repeat()\n", - "\n", - " def decode_record(record):\n", - " name_to_features = {\n", - " 'input_ids': tf.io.FixedLenFeature([seq_length], tf.int64),\n", - " 'input_mask': tf.io.FixedLenFeature([seq_length], tf.int64),\n", - " 'segment_ids': tf.io.FixedLenFeature([seq_length], tf.int64),\n", - " 'label_ids': tf.io.FixedLenFeature([], tf.int64),\n", - " }\n", - " return tf.io.parse_single_example(record, name_to_features)\n", - "\n", - " def _select_data_from_record(record):\n", - " x = {\n", - " 'input_word_ids': record['input_ids'],\n", - " 'input_mask': record['input_mask'],\n", - " 'input_type_ids': record['segment_ids']\n", - " }\n", - " y = record['label_ids']\n", - " return (x, y)\n", - "\n", - " dataset = dataset.map(decode_record,\n", - " num_parallel_calls=tf.data.experimental.AUTOTUNE)\n", - " dataset = dataset.map(\n", - " _select_data_from_record,\n", - " num_parallel_calls=tf.data.experimental.AUTOTUNE)\n", - " dataset = dataset.batch(batch_size, drop_remainder=is_training)\n", - " dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)\n", - " return dataset" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "rutkBadrhzdR" - }, - "outputs": [], - "source": [ - "# Set up batch sizes\n", - "batch_size = 32\n", - "eval_batch_size = 32\n", - "\n", - "# Return Tensorflow dataset\n", - "training_dataset = create_classifier_dataset(\n", - " train_data_output_path,\n", - " input_meta_data['max_seq_length'],\n", - " batch_size,\n", - " is_training=True)\n", - "\n", - "evaluation_dataset = create_classifier_dataset(\n", - " eval_data_output_path,\n", - " input_meta_data['max_seq_length'],\n", - " eval_batch_size,\n", - " is_training=False)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "59TVgt4Z7fuU" - }, - "outputs": [], - "source": [ - "training_dataset.element_spec" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "QbklKt-w_CiI" - }, - "source": [ - "\u003ca id=\"hub_bert\"\u003e\u003c/a\u003e\n", - "\n", - "### TFModels BERT on TFHub\n", - "\n", - "You can get [the BERT model](https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/2) off the shelf from [TFHub](https://tensorflow.org/hub). It would not be hard to add a classification head on top of this `hub.KerasLayer`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "GDWrHm0BGpbX" - }, - "outputs": [], - "source": [ - "# Note: 350MB download.\n", - "import tensorflow_hub as hub" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "Y29meH0qGq_5" - }, - "outputs": [], - "source": [ - "hub_model_name = \"bert_en_uncased_L-12_H-768_A-12\" #@param [\"bert_en_uncased_L-24_H-1024_A-16\", \"bert_en_wwm_cased_L-24_H-1024_A-16\", \"bert_en_uncased_L-12_H-768_A-12\", \"bert_en_wwm_uncased_L-24_H-1024_A-16\", \"bert_en_cased_L-24_H-1024_A-16\", \"bert_en_cased_L-12_H-768_A-12\", \"bert_zh_L-12_H-768_A-12\", \"bert_multi_cased_L-12_H-768_A-12\"]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "lo6479At4sP1" - }, - "outputs": [], - "source": [ - "hub_encoder = hub.KerasLayer(f\"https://tfhub.dev/tensorflow/{hub_model_name}/3\",\n", - " trainable=True)\n", - "\n", - "print(f\"The Hub encoder has {len(hub_encoder.trainable_variables)} trainable variables\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "iTzF574wivQv" - }, - "source": [ - "Test run it on a batch of data:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "XEcYrCR45Uwo" - }, - "outputs": [], - "source": [ - "result = hub_encoder(\n", - " inputs=dict(\n", - " input_word_ids=glue_train['input_word_ids'][:10],\n", - " input_mask=glue_train['input_mask'][:10],\n", - " input_type_ids=glue_train['input_type_ids'][:10],),\n", - " training=False,\n", - ")\n", - "\n", - "print(\"Pooled output shape:\", result['pooled_output'].shape)\n", - "print(\"Sequence output shape:\", result['sequence_output'].shape)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "cjojn8SmLSRI" - }, - "source": [ - "At this point it would be simple to add a classification head yourself.\n", - "\n", - "The `bert_models.classifier_model` function can also build a classifier onto the encoder from TensorFlow Hub:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "9nTDaApyLR70" - }, - "outputs": [], - "source": [ - "hub_classifier = nlp.modeling.models.BertClassifier(\n", - " bert_encoder,\n", - " num_classes=2,\n", - " dropout_rate=0.1,\n", - " initializer=tf.keras.initializers.TruncatedNormal(\n", - " stddev=0.02))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "xMJX3wV0_v7I" - }, - "source": [ - "The one downside to loading this model from TFHub is that the structure of internal keras layers is not restored. So it's more difficult to inspect or modify the model. The `BertEncoder` model is now a single layer:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "pD71dnvhM2QS" - }, - "outputs": [], - "source": [ - "tf.keras.utils.plot_model(hub_classifier, show_shapes=True, dpi=64)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "nLZD-isBzNKi" - }, - "outputs": [], - "source": [ - "try:\n", - " tf.keras.utils.plot_model(hub_encoder, show_shapes=True, dpi=64)\n", - " assert False\n", - "except Exception as e:\n", - " print(f\"{type(e).__name__}: {e}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ZxSqH0dNAgXV" - }, - "source": [ - "\u003ca id=\"model_builder_functions\"\u003e\u003c/a\u003e\n", - "\n", - "### Low level model building\n", - "\n", - "If you need a more control over the construction of the model it's worth noting that the `classifier_model` function used earlier is really just a thin wrapper over the `nlp.modeling.networks.BertEncoder` and `nlp.modeling.models.BertClassifier` classes. Just remember that if you start modifying the architecture it may not be correct or possible to reload the pre-trained checkpoint so you'll need to retrain from scratch." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "0cgABEwDj06P" - }, - "source": [ - "Build the encoder:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "5r_yqhBFSVEM" - }, - "outputs": [], - "source": [ - "bert_encoder_config = config_dict.copy()\n", - "\n", - "# You need to rename a few fields to make this work:\n", - "bert_encoder_config['attention_dropout_rate'] = bert_encoder_config.pop('attention_probs_dropout_prob')\n", - "bert_encoder_config['activation'] = tf_utils.get_activation(bert_encoder_config.pop('hidden_act'))\n", - "bert_encoder_config['dropout_rate'] = bert_encoder_config.pop('hidden_dropout_prob')\n", - "bert_encoder_config['initializer'] = tf.keras.initializers.TruncatedNormal(\n", - " stddev=bert_encoder_config.pop('initializer_range'))\n", - "bert_encoder_config['max_sequence_length'] = bert_encoder_config.pop('max_position_embeddings')\n", - "bert_encoder_config['num_layers'] = bert_encoder_config.pop('num_hidden_layers')\n", - "\n", - "bert_encoder_config" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "rIO8MI7LLijh" - }, - "outputs": [], - "source": [ - "manual_encoder = nlp.modeling.networks.BertEncoder(**bert_encoder_config)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "4a4tFSg9krRi" - }, - "source": [ - "Restore the weights:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "X6N9NEqfXJCx" - }, - "outputs": [], - "source": [ - "checkpoint = tf.train.Checkpoint(encoder=manual_encoder)\n", - "checkpoint.read(\n", - " os.path.join(gs_folder_bert, 'bert_model.ckpt')).assert_consumed()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "1BPiPO4ykuwM" - }, - "source": [ - "Test run it:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "hlVdgJKmj389" - }, - "outputs": [], - "source": [ - "result = manual_encoder(my_examples, training=True)\n", - "\n", - "print(\"Sequence output shape:\", result[0].shape)\n", - "print(\"Pooled output shape:\", result[1].shape)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "nJMXvVgJkyBv" - }, - "source": [ - "Wrap it in a classifier:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "tQX57GJ6wkAb" - }, - "outputs": [], - "source": [ - "manual_classifier = nlp.modeling.models.BertClassifier(\n", - " bert_encoder,\n", - " num_classes=2,\n", - " dropout_rate=bert_encoder_config['dropout_rate'],\n", - " initializer=bert_encoder_config['initializer'])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "kB-nBWhQk0dS" - }, - "outputs": [], - "source": [ - "manual_classifier(my_examples, training=True).numpy()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "E6AJlOSyIO1L" - }, - "source": [ - "\u003ca id=\"optiizer_schedule\"\u003e\u003c/a\u003e\n", - "\n", - "### Optimizers and schedules\n", - "\n", - "The optimizer used to train the model was created using the `nlp.optimization.create_optimizer` function:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "28Dv3BPRlFTD" - }, - "outputs": [], - "source": [ - "optimizer = nlp.optimization.create_optimizer(\n", - " 2e-5, num_train_steps=num_train_steps, num_warmup_steps=warmup_steps)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "LRjcHr0UlT8c" - }, - "source": [ - "That high level wrapper sets up the learning rate schedules and the optimizer.\n", - "\n", - "The base learning rate schedule used here is a linear decay to zero over the training run:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "MHY8K6kDngQn" - }, - "outputs": [], - "source": [ - "epochs = 3\n", - "batch_size = 32\n", - "eval_batch_size = 32\n", - "\n", - "train_data_size = len(glue_train_labels)\n", - "steps_per_epoch = int(train_data_size / batch_size)\n", - "num_train_steps = steps_per_epoch * epochs" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true, - "id": "wKIcSprulu3P" - }, - "outputs": [], - "source": [ - "decay_schedule = tf.keras.optimizers.schedules.PolynomialDecay(\n", - " initial_learning_rate=2e-5,\n", - " decay_steps=num_train_steps,\n", - " end_learning_rate=0)\n", - "\n", - "plt.plot([decay_schedule(n) for n in range(num_train_steps)])" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "IMTC_gfAl_PZ" - }, - "source": [ - "This, in turn is wrapped in a `WarmUp` schedule that linearly increases the learning rate to the target value over the first 10% of training:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "YRt3VTmBmCBY" - }, - "outputs": [], - "source": [ - "warmup_steps = num_train_steps * 0.1\n", - "\n", - "warmup_schedule = nlp.optimization.WarmUp(\n", - " initial_learning_rate=2e-5,\n", - " decay_schedule_fn=decay_schedule,\n", - " warmup_steps=warmup_steps)\n", - "\n", - "# The warmup overshoots, because it warms up to the `initial_learning_rate`\n", - "# following the original implementation. You can set\n", - "# `initial_learning_rate=decay_schedule(warmup_steps)` if you don't like the\n", - "# overshoot.\n", - "plt.plot([warmup_schedule(n) for n in range(num_train_steps)])" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "l8D9Lv3Bn740" - }, - "source": [ - "Then create the `nlp.optimization.AdamWeightDecay` using that schedule, configured for the BERT model:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "2Hf2rpRXk89N" - }, - "outputs": [], - "source": [ - "optimizer = nlp.optimization.AdamWeightDecay(\n", - " learning_rate=warmup_schedule,\n", - " weight_decay_rate=0.01,\n", - " epsilon=1e-6,\n", - " exclude_from_weight_decay=['LayerNorm', 'layer_norm', 'bias'])" - ] - } - ], - "metadata": { - "accelerator": "GPU", - "colab": { - "collapsed_sections": [], - "name": "fine_tuning_bert.ipynb", - "private_outputs": true, - "provenance": [], - "toc_visible": true - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/colab/nlp/customize_encoder.ipynb b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/colab/nlp/customize_encoder.ipynb deleted file mode 100644 index aeddb29f96352fbd4c8df3540e6bd4b8fe70bb8b..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/colab/nlp/customize_encoder.ipynb +++ /dev/null @@ -1,575 +0,0 @@ -{ - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "name": "Customizing a Transformer Encoder", - "private_outputs": true, - "provenance": [], - "collapsed_sections": [], - "toc_visible": true - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - } - }, - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "Bp8t2AI8i7uP" - }, - "source": [ - "##### Copyright 2020 The TensorFlow Authors." - ] - }, - { - "cell_type": "code", - "metadata": { - "cellView": "form", - "id": "rxPj2Lsni9O4" - }, - "source": [ - "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", - "# you may not use this file except in compliance with the License.\n", - "# You may obtain a copy of the License at\n", - "#\n", - "# https://www.apache.org/licenses/LICENSE-2.0\n", - "#\n", - "# Unless required by applicable law or agreed to in writing, software\n", - "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", - "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", - "# See the License for the specific language governing permissions and\n", - "# limitations under the License." - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "6xS-9i5DrRvO" - }, - "source": [ - "# Customizing a Transformer Encoder" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Mwb9uw1cDXsa" - }, - "source": [ - "\n", - " \n", - " \n", - " \n", - " \n", - "
\n", - " View on TensorFlow.org\n", - " \n", - " Run in Google Colab\n", - " \n", - " View source on GitHub\n", - " \n", - " Download notebook\n", - "
" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "iLrcV4IyrcGX" - }, - "source": [ - "## Learning objectives\n", - "\n", - "The [TensorFlow Models NLP library](https://github.com/tensorflow/models/tree/master/official/nlp/modeling) is a collection of tools for building and training modern high performance natural language models.\n", - "\n", - "The [TransformEncoder](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/networks/encoder_scaffold.py) is the core of this library, and lots of new network architectures are proposed to improve the encoder. In this Colab notebook, we will learn how to customize the encoder to employ new network architectures." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "YYxdyoWgsl8t" - }, - "source": [ - "## Install and import" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "fEJSFutUsn_h" - }, - "source": [ - "### Install the TensorFlow Model Garden pip package\n", - "\n", - "* `tf-models-official` is the stable Model Garden package. Note that it may not include the latest changes in the `tensorflow_models` github repo. To include latest changes, you may install `tf-models-nightly`,\n", - "which is the nightly Model Garden package created daily automatically.\n", - "* `pip` will install all models and dependencies automatically." - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "thsKZDjhswhR" - }, - "source": [ - "!pip install -q tf-models-official==2.4.0" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "hpf7JPCVsqtv" - }, - "source": [ - "### Import Tensorflow and other libraries" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "my4dp-RMssQe" - }, - "source": [ - "import numpy as np\n", - "import tensorflow as tf\n", - "\n", - "from official.modeling import activations\n", - "from official.nlp import modeling\n", - "from official.nlp.modeling import layers, losses, models, networks" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "vjDmVsFfs85n" - }, - "source": [ - "## Canonical BERT encoder\n", - "\n", - "Before learning how to customize the encoder, let's firstly create a canonical BERT enoder and use it to instantiate a `BertClassifier` for classification task." - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "Oav8sbgstWc-" - }, - "source": [ - "cfg = {\n", - " \"vocab_size\": 100,\n", - " \"hidden_size\": 32,\n", - " \"num_layers\": 3,\n", - " \"num_attention_heads\": 4,\n", - " \"intermediate_size\": 64,\n", - " \"activation\": activations.gelu,\n", - " \"dropout_rate\": 0.1,\n", - " \"attention_dropout_rate\": 0.1,\n", - " \"max_sequence_length\": 16,\n", - " \"type_vocab_size\": 2,\n", - " \"initializer\": tf.keras.initializers.TruncatedNormal(stddev=0.02),\n", - "}\n", - "bert_encoder = modeling.networks.BertEncoder(**cfg)\n", - "\n", - "def build_classifier(bert_encoder):\n", - " return modeling.models.BertClassifier(bert_encoder, num_classes=2)\n", - "\n", - "canonical_classifier_model = build_classifier(bert_encoder)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Qe2UWI6_tsHo" - }, - "source": [ - "`canonical_classifier_model` can be trained using the training data. For details about how to train the model, please see the colab [fine_tuning_bert.ipynb](https://github.com/tensorflow/models/blob/master/official/colab/fine_tuning_bert.ipynb). We skip the code that trains the model here.\n", - "\n", - "After training, we can apply the model to do prediction.\n" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "csED2d-Yt5h6" - }, - "source": [ - "def predict(model):\n", - " batch_size = 3\n", - " np.random.seed(0)\n", - " word_ids = np.random.randint(\n", - " cfg[\"vocab_size\"], size=(batch_size, cfg[\"max_sequence_length\"]))\n", - " mask = np.random.randint(2, size=(batch_size, cfg[\"max_sequence_length\"]))\n", - " type_ids = np.random.randint(\n", - " cfg[\"type_vocab_size\"], size=(batch_size, cfg[\"max_sequence_length\"]))\n", - " print(model([word_ids, mask, type_ids], training=False))\n", - "\n", - "predict(canonical_classifier_model)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "PzKStEK9t_Pb" - }, - "source": [ - "## Customize BERT encoder\n", - "\n", - "One BERT encoder consists of an embedding network and multiple transformer blocks, and each transformer block contains an attention layer and a feedforward layer." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "rmwQfhj6fmKz" - }, - "source": [ - "We provide easy ways to customize each of those components via (1)\n", - "[EncoderScaffold](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/networks/encoder_scaffold.py) and (2) [TransformerScaffold](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/layers/transformer_scaffold.py)." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "xsMgEVHAui11" - }, - "source": [ - "### Use EncoderScaffold\n", - "\n", - "`EncoderScaffold` allows users to provide a custom embedding subnetwork\n", - " (which will replace the standard embedding logic) and/or a custom hidden layer class (which will replace the `Transformer` instantiation in the encoder)." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "-JBabpa2AOz8" - }, - "source": [ - "#### Without Customization\n", - "\n", - "Without any customization, `EncoderScaffold` behaves the same the canonical `BertEncoder`.\n", - "\n", - "As shown in the following example, `EncoderScaffold` can load `BertEncoder`'s weights and output the same values:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "ktNzKuVByZQf" - }, - "source": [ - "default_hidden_cfg = dict(\n", - " num_attention_heads=cfg[\"num_attention_heads\"],\n", - " intermediate_size=cfg[\"intermediate_size\"],\n", - " intermediate_activation=activations.gelu,\n", - " dropout_rate=cfg[\"dropout_rate\"],\n", - " attention_dropout_rate=cfg[\"attention_dropout_rate\"],\n", - " kernel_initializer=tf.keras.initializers.TruncatedNormal(0.02),\n", - ")\n", - "default_embedding_cfg = dict(\n", - " vocab_size=cfg[\"vocab_size\"],\n", - " type_vocab_size=cfg[\"type_vocab_size\"],\n", - " hidden_size=cfg[\"hidden_size\"],\n", - " initializer=tf.keras.initializers.TruncatedNormal(0.02),\n", - " dropout_rate=cfg[\"dropout_rate\"],\n", - " max_seq_length=cfg[\"max_sequence_length\"]\n", - ")\n", - "default_kwargs = dict(\n", - " hidden_cfg=default_hidden_cfg,\n", - " embedding_cfg=default_embedding_cfg,\n", - " num_hidden_instances=cfg[\"num_layers\"],\n", - " pooled_output_dim=cfg[\"hidden_size\"],\n", - " return_all_layer_outputs=True,\n", - " pooler_layer_initializer=tf.keras.initializers.TruncatedNormal(0.02),\n", - ")\n", - "\n", - "encoder_scaffold = modeling.networks.EncoderScaffold(**default_kwargs)\n", - "classifier_model_from_encoder_scaffold = build_classifier(encoder_scaffold)\n", - "classifier_model_from_encoder_scaffold.set_weights(\n", - " canonical_classifier_model.get_weights())\n", - "predict(classifier_model_from_encoder_scaffold)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "sMaUmLyIuwcs" - }, - "source": [ - "#### Customize Embedding\n", - "\n", - "Next, we show how to use a customized embedding network.\n", - "\n", - "We firstly build an embedding network that will replace the default network. This one will have 2 inputs (`mask` and `word_ids`) instead of 3, and won't use positional embeddings." - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "LTinnaG6vcsw" - }, - "source": [ - "word_ids = tf.keras.layers.Input(\n", - " shape=(cfg['max_sequence_length'],), dtype=tf.int32, name=\"input_word_ids\")\n", - "mask = tf.keras.layers.Input(\n", - " shape=(cfg['max_sequence_length'],), dtype=tf.int32, name=\"input_mask\")\n", - "embedding_layer = modeling.layers.OnDeviceEmbedding(\n", - " vocab_size=cfg['vocab_size'],\n", - " embedding_width=cfg['hidden_size'],\n", - " initializer=tf.keras.initializers.TruncatedNormal(stddev=0.02),\n", - " name=\"word_embeddings\")\n", - "word_embeddings = embedding_layer(word_ids)\n", - "attention_mask = layers.SelfAttentionMask()([word_embeddings, mask])\n", - "new_embedding_network = tf.keras.Model([word_ids, mask],\n", - " [word_embeddings, attention_mask])" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "HN7_yu-6O3qI" - }, - "source": [ - "Inspecting `new_embedding_network`, we can see it takes two inputs:\n", - "`input_word_ids` and `input_mask`." - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "fO9zKFE4OpHp" - }, - "source": [ - "tf.keras.utils.plot_model(new_embedding_network, show_shapes=True, dpi=48)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "9cOaGQHLv12W" - }, - "source": [ - "We then can build a new encoder using the above `new_embedding_network`." - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "mtFDMNf2vIl9" - }, - "source": [ - "kwargs = dict(default_kwargs)\n", - "\n", - "# Use new embedding network.\n", - "kwargs['embedding_cls'] = new_embedding_network\n", - "kwargs['embedding_data'] = embedding_layer.embeddings\n", - "\n", - "encoder_with_customized_embedding = modeling.networks.EncoderScaffold(**kwargs)\n", - "classifier_model = build_classifier(encoder_with_customized_embedding)\n", - "# ... Train the model ...\n", - "print(classifier_model.inputs)\n", - "\n", - "# Assert that there are only two inputs.\n", - "assert len(classifier_model.inputs) == 2" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Z73ZQDtmwg9K" - }, - "source": [ - "#### Customized Transformer\n", - "\n", - "User can also override the [hidden_cls](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/networks/encoder_scaffold.py#L103) argument in `EncoderScaffold`'s constructor to employ a customized Transformer layer.\n", - "\n", - "See [ReZeroTransformer](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/layers/rezero_transformer.py) for how to implement a customized Transformer layer.\n", - "\n", - "Following is an example of using `ReZeroTransformer`:\n" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "uAIarLZgw6pA" - }, - "source": [ - "kwargs = dict(default_kwargs)\n", - "\n", - "# Use ReZeroTransformer.\n", - "kwargs['hidden_cls'] = modeling.layers.ReZeroTransformer\n", - "\n", - "encoder_with_rezero_transformer = modeling.networks.EncoderScaffold(**kwargs)\n", - "classifier_model = build_classifier(encoder_with_rezero_transformer)\n", - "# ... Train the model ...\n", - "predict(classifier_model)\n", - "\n", - "# Assert that the variable `rezero_alpha` from ReZeroTransformer exists.\n", - "assert 'rezero_alpha' in ''.join([x.name for x in classifier_model.trainable_weights])" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "6PMHFdvnxvR0" - }, - "source": [ - "### Use [TransformerScaffold](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/layers/transformer_scaffold.py)\n", - "\n", - "The above method of customizing `Transformer` requires rewriting the whole `Transformer` layer, while sometimes you may only want to customize either attention layer or feedforward block. In this case, [TransformerScaffold](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/layers/transformer_scaffold.py) can be used.\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "D6FejlgwyAy_" - }, - "source": [ - "#### Customize Attention Layer\n", - "\n", - "User can also override the [attention_cls](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/layers/transformer_scaffold.py#L45) argument in `TransformerScaffold`'s constructor to employ a customized Attention layer.\n", - "\n", - "See [TalkingHeadsAttention](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/layers/talking_heads_attention.py) for how to implement a customized `Attention` layer.\n", - "\n", - "Following is an example of using [TalkingHeadsAttention](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/layers/talking_heads_attention.py):" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "nFrSMrZuyNeQ" - }, - "source": [ - "# Use TalkingHeadsAttention\n", - "hidden_cfg = dict(default_hidden_cfg)\n", - "hidden_cfg['attention_cls'] = modeling.layers.TalkingHeadsAttention\n", - "\n", - "kwargs = dict(default_kwargs)\n", - "kwargs['hidden_cls'] = modeling.layers.TransformerScaffold\n", - "kwargs['hidden_cfg'] = hidden_cfg\n", - "\n", - "encoder = modeling.networks.EncoderScaffold(**kwargs)\n", - "classifier_model = build_classifier(encoder)\n", - "# ... Train the model ...\n", - "predict(classifier_model)\n", - "\n", - "# Assert that the variable `pre_softmax_weight` from TalkingHeadsAttention exists.\n", - "assert 'pre_softmax_weight' in ''.join([x.name for x in classifier_model.trainable_weights])" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "kuEJcTyByVvI" - }, - "source": [ - "#### Customize Feedforward Layer\n", - "\n", - "Similiarly, one could also customize the feedforward layer.\n", - "\n", - "See [GatedFeedforward](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/layers/gated_feedforward.py) for how to implement a customized feedforward layer.\n", - "\n", - "Following is an example of using [GatedFeedforward](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/layers/gated_feedforward.py)." - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "XAbKy_l4y_-i" - }, - "source": [ - "# Use TalkingHeadsAttention\n", - "hidden_cfg = dict(default_hidden_cfg)\n", - "hidden_cfg['feedforward_cls'] = modeling.layers.GatedFeedforward\n", - "\n", - "kwargs = dict(default_kwargs)\n", - "kwargs['hidden_cls'] = modeling.layers.TransformerScaffold\n", - "kwargs['hidden_cfg'] = hidden_cfg\n", - "\n", - "encoder_with_gated_feedforward = modeling.networks.EncoderScaffold(**kwargs)\n", - "classifier_model = build_classifier(encoder_with_gated_feedforward)\n", - "# ... Train the model ...\n", - "predict(classifier_model)\n", - "\n", - "# Assert that the variable `gate` from GatedFeedforward exists.\n", - "assert 'gate' in ''.join([x.name for x in classifier_model.trainable_weights])" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "a_8NWUhkzeAq" - }, - "source": [ - "### Build a new Encoder using building blocks from KerasBERT.\n", - "\n", - "Finally, you could also build a new encoder using building blocks in the modeling library.\n", - "\n", - "See [AlbertEncoder](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/networks/albert_encoder.py) as an example:\n" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "xsiA3RzUzmUM" - }, - "source": [ - "albert_encoder = modeling.networks.AlbertEncoder(**cfg)\n", - "classifier_model = build_classifier(albert_encoder)\n", - "# ... Train the model ...\n", - "predict(classifier_model)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "MeidDfhlHKSO" - }, - "source": [ - "Inspecting the `albert_encoder`, we see it stacks the same `Transformer` layer multiple times." - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "Uv_juT22HERW" - }, - "source": [ - "tf.keras.utils.plot_model(albert_encoder, show_shapes=True, dpi=48)" - ], - "execution_count": null, - "outputs": [] - } - ] -} \ No newline at end of file diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/colab/nlp/nlp_modeling_library_intro.ipynb b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/colab/nlp/nlp_modeling_library_intro.ipynb deleted file mode 100644 index e4ce780c96bfbf679c91891f38b08ac3b0bb983e..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/colab/nlp/nlp_modeling_library_intro.ipynb +++ /dev/null @@ -1,544 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "80xnUmoI7fBX" - }, - "source": [ - "##### Copyright 2020 The TensorFlow Authors." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "8nvTnfs6Q692" - }, - "outputs": [], - "source": [ - "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", - "# you may not use this file except in compliance with the License.\n", - "# You may obtain a copy of the License at\n", - "#\n", - "# https://www.apache.org/licenses/LICENSE-2.0\n", - "#\n", - "# Unless required by applicable law or agreed to in writing, software\n", - "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", - "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", - "# See the License for the specific language governing permissions and\n", - "# limitations under the License." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "WmfcMK5P5C1G" - }, - "source": [ - "# Introduction to the TensorFlow Models NLP library" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "cH-oJ8R6AHMK" - }, - "source": [ - "\u003ctable class=\"tfo-notebook-buttons\" align=\"left\"\u003e\n", - " \u003ctd\u003e\n", - " \u003ca target=\"_blank\" href=\"https://www.tensorflow.org/official_models/nlp/nlp_modeling_library_intro\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" /\u003eView on TensorFlow.org\u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/models/blob/master/official/colab/nlp/nlp_modeling_library_intro.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" /\u003eRun in Google Colab\u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca target=\"_blank\" href=\"https://github.com/tensorflow/models/blob/master/official/colab/nlp/nlp_modeling_library_intro.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" /\u003eView source on GitHub\u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca href=\"https://storage.googleapis.com/tensorflow_docs/models/official/colab/nlp/nlp_modeling_library_intro.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/download_logo_32px.png\" /\u003eDownload notebook\u003c/a\u003e\n", - " \u003c/td\u003e\n", - "\u003c/table\u003e" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "0H_EFIhq4-MJ" - }, - "source": [ - "## Learning objectives\n", - "\n", - "In this Colab notebook, you will learn how to build transformer-based models for common NLP tasks including pretraining, span labelling and classification using the building blocks from [NLP modeling library](https://github.com/tensorflow/models/tree/master/official/nlp/modeling)." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "2N97-dps_nUk" - }, - "source": [ - "## Install and import" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "459ygAVl_rg0" - }, - "source": [ - "### Install the TensorFlow Model Garden pip package\n", - "\n", - "* `tf-models-official` is the stable Model Garden package. Note that it may not include the latest changes in the `tensorflow_models` github repo. To include latest changes, you may install `tf-models-nightly`,\n", - "which is the nightly Model Garden package created daily automatically.\n", - "* `pip` will install all models and dependencies automatically." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "Y-qGkdh6_sZc" - }, - "outputs": [], - "source": [ - "!pip install -q tf-models-official==2.4.0" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "e4huSSwyAG_5" - }, - "source": [ - "### Import Tensorflow and other libraries" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "jqYXqtjBAJd9" - }, - "outputs": [], - "source": [ - "import numpy as np\n", - "import tensorflow as tf\n", - "\n", - "from official.nlp import modeling\n", - "from official.nlp.modeling import layers, losses, models, networks" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "djBQWjvy-60Y" - }, - "source": [ - "## BERT pretraining model\n", - "\n", - "BERT ([Pre-training of Deep Bidirectional Transformers for Language Understanding](https://arxiv.org/abs/1810.04805)) introduced the method of pre-training language representations on a large text corpus and then using that model for downstream NLP tasks.\n", - "\n", - "In this section, we will learn how to build a model to pretrain BERT on the masked language modeling task and next sentence prediction task. For simplicity, we only show the minimum example and use dummy data." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "MKuHVlsCHmiq" - }, - "source": [ - "### Build a `BertPretrainer` model wrapping `BertEncoder`\n", - "\n", - "The [BertEncoder](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/networks/bert_encoder.py) implements the Transformer-based encoder as described in [BERT paper](https://arxiv.org/abs/1810.04805). It includes the embedding lookups and transformer layers, but not the masked language model or classification task networks.\n", - "\n", - "The [BertPretrainer](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/models/bert_pretrainer.py) allows a user to pass in a transformer stack, and instantiates the masked language model and classification networks that are used to create the training objectives." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "EXkcXz-9BwB3" - }, - "outputs": [], - "source": [ - "# Build a small transformer network.\n", - "vocab_size = 100\n", - "sequence_length = 16\n", - "network = modeling.networks.BertEncoder(\n", - " vocab_size=vocab_size, num_layers=2, sequence_length=16)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "0NH5irV5KTMS" - }, - "source": [ - "Inspecting the encoder, we see it contains few embedding layers, stacked `Transformer` layers and are connected to three input layers:\n", - "\n", - "`input_word_ids`, `input_type_ids` and `input_mask`.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "lZNoZkBrIoff" - }, - "outputs": [], - "source": [ - "tf.keras.utils.plot_model(network, show_shapes=True, dpi=48)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "o7eFOZXiIl-b" - }, - "outputs": [], - "source": [ - "# Create a BERT pretrainer with the created network.\n", - "num_token_predictions = 8\n", - "bert_pretrainer = modeling.models.BertPretrainer(\n", - " network, num_classes=2, num_token_predictions=num_token_predictions, output='predictions')" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "d5h5HT7gNHx_" - }, - "source": [ - "Inspecting the `bert_pretrainer`, we see it wraps the `encoder` with additional `MaskedLM` and `Classification` heads." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "2tcNfm03IBF7" - }, - "outputs": [], - "source": [ - "tf.keras.utils.plot_model(bert_pretrainer, show_shapes=True, dpi=48)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "F2oHrXGUIS0M" - }, - "outputs": [], - "source": [ - "# We can feed some dummy data to get masked language model and sentence output.\n", - "batch_size = 2\n", - "word_id_data = np.random.randint(vocab_size, size=(batch_size, sequence_length))\n", - "mask_data = np.random.randint(2, size=(batch_size, sequence_length))\n", - "type_id_data = np.random.randint(2, size=(batch_size, sequence_length))\n", - "masked_lm_positions_data = np.random.randint(2, size=(batch_size, num_token_predictions))\n", - "\n", - "outputs = bert_pretrainer(\n", - " [word_id_data, mask_data, type_id_data, masked_lm_positions_data])\n", - "lm_output = outputs[\"masked_lm\"]\n", - "sentence_output = outputs[\"classification\"]\n", - "print(lm_output)\n", - "print(sentence_output)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "bnx3UCHniCS5" - }, - "source": [ - "### Compute loss\n", - "Next, we can use `lm_output` and `sentence_output` to compute `loss`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "k30H4Q86f52x" - }, - "outputs": [], - "source": [ - "masked_lm_ids_data = np.random.randint(vocab_size, size=(batch_size, num_token_predictions))\n", - "masked_lm_weights_data = np.random.randint(2, size=(batch_size, num_token_predictions))\n", - "next_sentence_labels_data = np.random.randint(2, size=(batch_size))\n", - "\n", - "mlm_loss = modeling.losses.weighted_sparse_categorical_crossentropy_loss(\n", - " labels=masked_lm_ids_data,\n", - " predictions=lm_output,\n", - " weights=masked_lm_weights_data)\n", - "sentence_loss = modeling.losses.weighted_sparse_categorical_crossentropy_loss(\n", - " labels=next_sentence_labels_data,\n", - " predictions=sentence_output)\n", - "loss = mlm_loss + sentence_loss\n", - "print(loss)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "wrmSs8GjHxVw" - }, - "source": [ - "With the loss, you can optimize the model.\n", - "After training, we can save the weights of TransformerEncoder for the downstream fine-tuning tasks. Please see [run_pretraining.py](https://github.com/tensorflow/models/blob/master/official/nlp/bert/run_pretraining.py) for the full example.\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "k8cQVFvBCV4s" - }, - "source": [ - "## Span labeling model\n", - "\n", - "Span labeling is the task to assign labels to a span of the text, for example, label a span of text as the answer of a given question.\n", - "\n", - "In this section, we will learn how to build a span labeling model. Again, we use dummy data for simplicity." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "xrLLEWpfknUW" - }, - "source": [ - "### Build a BertSpanLabeler wrapping BertEncoder\n", - "\n", - "[BertSpanLabeler](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/models/bert_span_labeler.py) implements a simple single-span start-end predictor (that is, a model that predicts two values: a start token index and an end token index), suitable for SQuAD-style tasks.\n", - "\n", - "Note that `BertSpanLabeler` wraps a `BertEncoder`, the weights of which can be restored from the above pretraining model.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "B941M4iUCejO" - }, - "outputs": [], - "source": [ - "network = modeling.networks.BertEncoder(\n", - " vocab_size=vocab_size, num_layers=2, sequence_length=sequence_length)\n", - "\n", - "# Create a BERT trainer with the created network.\n", - "bert_span_labeler = modeling.models.BertSpanLabeler(network)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "QpB9pgj4PpMg" - }, - "source": [ - "Inspecting the `bert_span_labeler`, we see it wraps the encoder with additional `SpanLabeling` that outputs `start_position` and `end_postion`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "RbqRNJCLJu4H" - }, - "outputs": [], - "source": [ - "tf.keras.utils.plot_model(bert_span_labeler, show_shapes=True, dpi=48)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "fUf1vRxZJwio" - }, - "outputs": [], - "source": [ - "# Create a set of 2-dimensional data tensors to feed into the model.\n", - "word_id_data = np.random.randint(vocab_size, size=(batch_size, sequence_length))\n", - "mask_data = np.random.randint(2, size=(batch_size, sequence_length))\n", - "type_id_data = np.random.randint(2, size=(batch_size, sequence_length))\n", - "\n", - "# Feed the data to the model.\n", - "start_logits, end_logits = bert_span_labeler([word_id_data, mask_data, type_id_data])\n", - "print(start_logits)\n", - "print(end_logits)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "WqhgQaN1lt-G" - }, - "source": [ - "### Compute loss\n", - "With `start_logits` and `end_logits`, we can compute loss:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "waqs6azNl3Nn" - }, - "outputs": [], - "source": [ - "start_positions = np.random.randint(sequence_length, size=(batch_size))\n", - "end_positions = np.random.randint(sequence_length, size=(batch_size))\n", - "\n", - "start_loss = tf.keras.losses.sparse_categorical_crossentropy(\n", - " start_positions, start_logits, from_logits=True)\n", - "end_loss = tf.keras.losses.sparse_categorical_crossentropy(\n", - " end_positions, end_logits, from_logits=True)\n", - "\n", - "total_loss = (tf.reduce_mean(start_loss) + tf.reduce_mean(end_loss)) / 2\n", - "print(total_loss)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Zdf03YtZmd_d" - }, - "source": [ - "With the `loss`, you can optimize the model. Please see [run_squad.py](https://github.com/tensorflow/models/blob/master/official/nlp/bert/run_squad.py) for the full example." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "0A1XnGSTChg9" - }, - "source": [ - "## Classification model\n", - "\n", - "In the last section, we show how to build a text classification model.\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "MSK8OpZgnQa9" - }, - "source": [ - "### Build a BertClassifier model wrapping BertEncoder\n", - "\n", - "[BertClassifier](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/models/bert_classifier.py) implements a [CLS] token classification model containing a single classification head." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "cXXCsffkCphk" - }, - "outputs": [], - "source": [ - "network = modeling.networks.BertEncoder(\n", - " vocab_size=vocab_size, num_layers=2, sequence_length=sequence_length)\n", - "\n", - "# Create a BERT trainer with the created network.\n", - "num_classes = 2\n", - "bert_classifier = modeling.models.BertClassifier(\n", - " network, num_classes=num_classes)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "8tZKueKYP4bB" - }, - "source": [ - "Inspecting the `bert_classifier`, we see it wraps the `encoder` with additional `Classification` head." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "snlutm9ZJgEZ" - }, - "outputs": [], - "source": [ - "tf.keras.utils.plot_model(bert_classifier, show_shapes=True, dpi=48)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "yyHPHsqBJkCz" - }, - "outputs": [], - "source": [ - "# Create a set of 2-dimensional data tensors to feed into the model.\n", - "word_id_data = np.random.randint(vocab_size, size=(batch_size, sequence_length))\n", - "mask_data = np.random.randint(2, size=(batch_size, sequence_length))\n", - "type_id_data = np.random.randint(2, size=(batch_size, sequence_length))\n", - "\n", - "# Feed the data to the model.\n", - "logits = bert_classifier([word_id_data, mask_data, type_id_data])\n", - "print(logits)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "w--a2mg4nzKm" - }, - "source": [ - "### Compute loss\n", - "\n", - "With `logits`, we can compute `loss`:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "9X0S1DoFn_5Q" - }, - "outputs": [], - "source": [ - "labels = np.random.randint(num_classes, size=(batch_size))\n", - "\n", - "loss = tf.keras.losses.sparse_categorical_crossentropy(\n", - " labels, logits, from_logits=True)\n", - "print(loss)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "mzBqOylZo3og" - }, - "source": [ - "With the `loss`, you can optimize the model. Please see [run_classifier.py](https://github.com/tensorflow/models/blob/master/official/nlp/bert/run_classifier.py) or the colab [fine_tuning_bert.ipynb](https://github.com/tensorflow/models/blob/master/official/colab/fine_tuning_bert.ipynb) for the full example." - ] - } - ], - "metadata": { - "colab": { - "collapsed_sections": [], - "name": "Introduction to the TensorFlow Models NLP library", - "private_outputs": true, - "provenance": [], - "toc_visible": true - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/common/__init__.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/common/__init__.py deleted file mode 100644 index 3ef7bb85ba5f722a4f34e90623470d5a45af3aa4..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/common/__init__.py +++ /dev/null @@ -1,31 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - - diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/common/dataset_fn.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/common/dataset_fn.py deleted file mode 100644 index fb6a5b42d034e8fdb1a2c2027def08cf65f35afe..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/common/dataset_fn.py +++ /dev/null @@ -1,58 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================== -"""Utility library for picking an appropriate dataset function.""" - -from typing import Any, Callable, Union, Type - -import tensorflow as tf - -PossibleDatasetType = Union[Type[tf.data.Dataset], Callable[[tf.Tensor], Any]] - - -def pick_dataset_fn(file_type: str) -> PossibleDatasetType: - if file_type == 'tfrecord': - return tf.data.TFRecordDataset - - raise ValueError('Unrecognized file_type: {}'.format(file_type)) diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/common/distribute_utils.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/common/distribute_utils.py deleted file mode 100644 index c484e0bfa2704481db8dab695bd4d2426c1ebbce..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/common/distribute_utils.py +++ /dev/null @@ -1,246 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Helper functions for running models in a distributed setting.""" - -import json -import os -import tensorflow as tf - - -def _collective_communication(all_reduce_alg): - """Return a CollectiveCommunication based on all_reduce_alg. - - Args: - all_reduce_alg: a string specifying which collective communication to pick, - or None. - - Returns: - tf.distribute.experimental.CollectiveCommunication object - - Raises: - ValueError: if `all_reduce_alg` not in [None, "ring", "nccl"] - """ - collective_communication_options = { - None: tf.distribute.experimental.CollectiveCommunication.AUTO, - "ring": tf.distribute.experimental.CollectiveCommunication.RING, - "nccl": tf.distribute.experimental.CollectiveCommunication.NCCL - } - if all_reduce_alg not in collective_communication_options: - raise ValueError( - "When used with `multi_worker_mirrored`, valid values for " - "all_reduce_alg are [`ring`, `nccl`]. Supplied value: {}".format( - all_reduce_alg)) - return collective_communication_options[all_reduce_alg] - - -def _mirrored_cross_device_ops(all_reduce_alg, num_packs): - """Return a CrossDeviceOps based on all_reduce_alg and num_packs. - - Args: - all_reduce_alg: a string specifying which cross device op to pick, or None. - num_packs: an integer specifying number of packs for the cross device op. - - Returns: - tf.distribute.CrossDeviceOps object or None. - - Raises: - ValueError: if `all_reduce_alg` not in [None, "nccl", "hierarchical_copy"]. - """ - if all_reduce_alg is None: - return None - mirrored_all_reduce_options = { - "nccl": tf.distribute.NcclAllReduce, - "hierarchical_copy": tf.distribute.HierarchicalCopyAllReduce - } - if all_reduce_alg not in mirrored_all_reduce_options: - raise ValueError( - "When used with `mirrored`, valid values for all_reduce_alg are " - "[`nccl`, `hierarchical_copy`]. Supplied value: {}".format( - all_reduce_alg)) - cross_device_ops_class = mirrored_all_reduce_options[all_reduce_alg] - return cross_device_ops_class(num_packs=num_packs) - - -def tpu_initialize(tpu_address): - """Initializes TPU for TF 2.x training. - - Args: - tpu_address: string, bns address of master TPU worker. - - Returns: - A TPUClusterResolver. - """ - cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver( - tpu=tpu_address) - if tpu_address not in ("", "local"): - tf.config.experimental_connect_to_cluster(cluster_resolver) - tf.tpu.experimental.initialize_tpu_system(cluster_resolver) - return cluster_resolver - - -def get_distribution_strategy(distribution_strategy="mirrored", - num_gpus=0, - all_reduce_alg=None, - num_packs=1, - tpu_address=None, - **kwargs): - """Return a DistributionStrategy for running the model. - - Args: - distribution_strategy: a string specifying which distribution strategy to - use. Accepted values are "off", "one_device", "mirrored", - "parameter_server", "multi_worker_mirrored", and "tpu" -- case - insensitive. "off" means not to use Distribution Strategy; "tpu" means to - use TPUStrategy using `tpu_address`. - num_gpus: Number of GPUs to run this model. - all_reduce_alg: Optional. Specifies which algorithm to use when performing - all-reduce. For `MirroredStrategy`, valid values are "nccl" and - "hierarchical_copy". For `MultiWorkerMirroredStrategy`, valid values are - "ring" and "nccl". If None, DistributionStrategy will choose based on - device topology. - num_packs: Optional. Sets the `num_packs` in `tf.distribute.NcclAllReduce` - or `tf.distribute.HierarchicalCopyAllReduce` for `MirroredStrategy`. - tpu_address: Optional. String that represents TPU to connect to. Must not be - None if `distribution_strategy` is set to `tpu`. - **kwargs: Additional kwargs for internal usages. - - Returns: - tf.distribute.DistibutionStrategy object. - Raises: - ValueError: if `distribution_strategy` is "off" or "one_device" and - `num_gpus` is larger than 1; or `num_gpus` is negative or if - `distribution_strategy` is `tpu` but `tpu_address` is not specified. - """ - del kwargs - if num_gpus < 0: - raise ValueError("`num_gpus` can not be negative.") - - if not isinstance(distribution_strategy, str): - msg = ("distribution_strategy must be a string but got: %s." % - (distribution_strategy,)) - if distribution_strategy == False: # pylint: disable=singleton-comparison,g-explicit-bool-comparison - msg += (" If you meant to pass the string 'off', make sure you add " - "quotes around 'off' so that yaml interprets it as a string " - "instead of a bool.") - raise ValueError(msg) - - distribution_strategy = distribution_strategy.lower() - if distribution_strategy == "off": - if num_gpus > 1: - raise ValueError("When {} GPUs are specified, distribution_strategy " - "flag cannot be set to `off`.".format(num_gpus)) - return None - - if distribution_strategy == "tpu": - # When tpu_address is an empty string, we communicate with local TPUs. - cluster_resolver = tpu_initialize(tpu_address) - return tf.distribute.TPUStrategy(cluster_resolver) - - if distribution_strategy == "multi_worker_mirrored": - return tf.distribute.experimental.MultiWorkerMirroredStrategy( - communication=_collective_communication(all_reduce_alg)) - - if distribution_strategy == "one_device": - if num_gpus == 0: - return tf.distribute.OneDeviceStrategy("device:CPU:0") - if num_gpus > 1: - raise ValueError("`OneDeviceStrategy` can not be used for more than " - "one device.") - return tf.distribute.OneDeviceStrategy("device:GPU:0") - - if distribution_strategy == "mirrored": - if num_gpus == 0: - devices = ["device:CPU:0"] - else: - devices = ["device:GPU:%d" % i for i in range(num_gpus)] - return tf.distribute.MirroredStrategy( - devices=devices, - cross_device_ops=_mirrored_cross_device_ops(all_reduce_alg, num_packs)) - - if distribution_strategy == "parameter_server": - cluster_resolver = tf.distribute.cluster_resolver.TFConfigClusterResolver() - return tf.distribute.experimental.ParameterServerStrategy(cluster_resolver) - - raise ValueError("Unrecognized Distribution Strategy: %r" % - distribution_strategy) - - -def configure_cluster(worker_hosts=None, task_index=-1): - """Set multi-worker cluster spec in TF_CONFIG environment variable. - - Args: - worker_hosts: comma-separated list of worker ip:port pairs. - task_index: index of the worker. - - Returns: - Number of workers in the cluster. - """ - tf_config = json.loads(os.environ.get("TF_CONFIG", "{}")) - if tf_config: - num_workers = ( - len(tf_config["cluster"].get("chief", [])) + - len(tf_config["cluster"].get("worker", []))) - elif worker_hosts: - workers = worker_hosts.split(",") - num_workers = len(workers) - if num_workers > 1 and task_index < 0: - raise ValueError("Must specify task_index when number of workers > 1") - task_index = 0 if num_workers == 1 else task_index - os.environ["TF_CONFIG"] = json.dumps({ - "cluster": { - "worker": workers - }, - "task": { - "type": "worker", - "index": task_index - } - }) - else: - num_workers = 1 - return num_workers - - -def get_strategy_scope(strategy): - if strategy: - strategy_scope = strategy.scope() - else: - strategy_scope = DummyContextManager() - - return strategy_scope - - -class DummyContextManager(object): - - def __enter__(self): - pass - - def __exit__(self, *args): - pass diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/common/distribute_utils_test.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/common/distribute_utils_test.py deleted file mode 100644 index a8c3bfc1775d2c895eed0837dcfa36440b803ee2..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/common/distribute_utils_test.py +++ /dev/null @@ -1,75 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -""" Tests for distribution util functions.""" - -import tensorflow as tf - -from official.common import distribute_utils - - -class GetDistributionStrategyTest(tf.test.TestCase): - """Tests for get_distribution_strategy.""" - - def test_one_device_strategy_cpu(self): - ds = distribute_utils.get_distribution_strategy(num_gpus=0) - self.assertEquals(ds.num_replicas_in_sync, 1) - self.assertEquals(len(ds.extended.worker_devices), 1) - self.assertIn('CPU', ds.extended.worker_devices[0]) - - def test_one_device_strategy_gpu(self): - ds = distribute_utils.get_distribution_strategy(num_gpus=1) - self.assertEquals(ds.num_replicas_in_sync, 1) - self.assertEquals(len(ds.extended.worker_devices), 1) - self.assertIn('GPU', ds.extended.worker_devices[0]) - - def test_mirrored_strategy(self): - ds = distribute_utils.get_distribution_strategy(num_gpus=5) - self.assertEquals(ds.num_replicas_in_sync, 5) - self.assertEquals(len(ds.extended.worker_devices), 5) - for device in ds.extended.worker_devices: - self.assertIn('GPU', device) - - def test_no_strategy(self): - ds = distribute_utils.get_distribution_strategy('off') - self.assertIsNone(ds) - - def test_invalid_strategy(self): - with self.assertRaisesRegexp( - ValueError, - 'distribution_strategy must be a string but got: False. If'): - distribute_utils.get_distribution_strategy(False) - with self.assertRaisesRegexp( - ValueError, 'distribution_strategy must be a string but got: 1'): - distribute_utils.get_distribution_strategy(1) - - -if __name__ == '__main__': - tf.test.main() diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/common/flags.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/common/flags.py deleted file mode 100644 index d7216ee0946b7275c3515630871b188d1d464adb..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/common/flags.py +++ /dev/null @@ -1,108 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""The central place to define flags.""" - -from absl import flags - - -def define_flags(): - """Defines flags.""" - flags.DEFINE_string( - 'experiment', default=None, help='The experiment type registered.') - - flags.DEFINE_enum( - 'mode', - default=None, - enum_values=[ - 'train', 'eval', 'train_and_eval', 'continuous_eval', - 'continuous_train_and_eval', 'train_and_validate' - ], - help='Mode to run: `train`, `eval`, `train_and_eval`, ' - '`continuous_eval`, `continuous_train_and_eval` and ' - '`train_and_validate` (which is not implemented in ' - 'the open source version).') - - flags.DEFINE_string( - 'model_dir', - default=None, - help='The directory where the model and training/evaluation summaries' - 'are stored.') - - flags.DEFINE_multi_string( - 'config_file', - default=None, - help='YAML/JSON files which specifies overrides. The override order ' - 'follows the order of args. Note that each file ' - 'can be used as an override template to override the default parameters ' - 'specified in Python. If the same parameter is specified in both ' - '`--config_file` and `--params_override`, `config_file` will be used ' - 'first, followed by params_override.') - - flags.DEFINE_string( - 'params_override', - default=None, - help='a YAML/JSON string or a YAML file which specifies additional ' - 'overrides over the default parameters and those specified in ' - '`--config_file`. Note that this is supposed to be used only to override ' - 'the model parameters, but not the parameters like TPU specific flags. ' - 'One canonical use case of `--config_file` and `--params_override` is ' - 'users first define a template config file using `--config_file`, then ' - 'use `--params_override` to adjust the minimal set of tuning parameters, ' - 'for example setting up different `train_batch_size`. The final override ' - 'order of parameters: default_model_params --> params from config_file ' - '--> params in params_override. See also the help message of ' - '`--config_file`.') - - # The libraries rely on gin often make mistakes that include flags inside - # the library files which causes conflicts. - try: - flags.DEFINE_multi_string( - 'gin_file', default=None, help='List of paths to the config files.') - except flags.DuplicateFlagError: - pass - - try: - flags.DEFINE_multi_string( - 'gin_params', - default=None, - help='Newline separated list of Gin parameter bindings.') - except flags.DuplicateFlagError: - pass - - flags.DEFINE_string( - 'tpu', - default=None, - help='The Cloud TPU to use for training. This should be either the name ' - 'used when creating the Cloud TPU, or a grpc://ip.address.of.tpu:8470 ' - 'url.') - - flags.DEFINE_string( - 'tf_data_service', default=None, help='The tf.data service address') diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/common/registry_imports.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/common/registry_imports.py deleted file mode 100644 index 653bedaac206c04d1856c43a7bbbc7ba4bfb8534..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/common/registry_imports.py +++ /dev/null @@ -1,36 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""All necessary imports for registration.""" -# pylint: disable=unused-import -from official.nlp import tasks -from official.nlp.configs import experiment_configs -from official.utils.testing import mock_task -from official.vision import beta diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/core/__init__.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/core/__init__.py deleted file mode 100644 index a11b1ff79e891e0fcee5bf824718e75d9103e28f..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/core/__init__.py +++ /dev/null @@ -1,30 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/core/base_task.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/core/base_task.py deleted file mode 100644 index 95558edf8696bee9a84c4f3c8339bc78dd8f30e3..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/core/base_task.py +++ /dev/null @@ -1,320 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Defines the base task abstraction.""" -import abc -from typing import Optional - -from absl import logging -import tensorflow as tf - -from official.core import config_definitions -from official.modeling import optimization -from official.modeling import performance - -OptimizationConfig = optimization.OptimizationConfig -RuntimeConfig = config_definitions.RuntimeConfig - - -class Task(tf.Module, metaclass=abc.ABCMeta): - """A single-replica view of training procedure. - - Tasks provide artifacts for training/validation procedures, including - loading/iterating over Datasets, training/validation steps, calculating the - loss and customized metrics with reduction. - """ - - # Special keys in train/validate step returned logs. - loss = "loss" - - def __init__(self, params, logging_dir: str = None, name: str = None): - """Task initialization. - - Args: - params: the task configuration instance, which can be any of dataclass, - ConfigDict, namedtuple, etc. - logging_dir: a string pointing to where the model, summaries etc. will be - saved. You can also write additional stuff in this directory. - name: the task name. - """ - super().__init__(name=name) - self._task_config = params - self._logging_dir = logging_dir - - @property - def task_config(self): - return self._task_config - - @property - def logging_dir(self) -> str: - return self._logging_dir - - @classmethod - def create_optimizer(cls, optimizer_config: OptimizationConfig, - runtime_config: Optional[RuntimeConfig] = None): - """Creates an TF optimizer from configurations. - - Args: - optimizer_config: the parameters of the Optimization settings. - runtime_config: the parameters of the runtime. - - Returns: - A tf.optimizers.Optimizer object. - """ - opt_factory = optimization.OptimizerFactory(optimizer_config) - optimizer = opt_factory.build_optimizer(opt_factory.build_learning_rate()) - # Configuring optimizer when loss_scale is set in runtime config. This helps - # avoiding overflow/underflow for float16 computations. - if runtime_config and runtime_config.loss_scale: - optimizer = performance.configure_optimizer( - optimizer, - use_float16=runtime_config.mixed_precision_dtype == "float16", - loss_scale=runtime_config.loss_scale) - - return optimizer - - def initialize(self, model: tf.keras.Model): - """[Optional] A callback function used as CheckpointManager's init_fn. - - This function will be called when no checkpoint is found for the model. - If there is a checkpoint, the checkpoint will be loaded and this function - will not be called. You can use this callback function to load a pretrained - checkpoint, saved under a directory other than the model_dir. - - Args: - model: The keras.Model built or used by this task. - """ - ckpt_dir_or_file = self.task_config.init_checkpoint - logging.info("Trying to load pretrained checkpoint from %s", - ckpt_dir_or_file) - if tf.io.gfile.isdir(ckpt_dir_or_file): - ckpt_dir_or_file = tf.train.latest_checkpoint(ckpt_dir_or_file) - if not ckpt_dir_or_file: - return - - if hasattr(model, "checkpoint_items"): - checkpoint_items = model.checkpoint_items - else: - checkpoint_items = dict(model=model) - ckpt = tf.train.Checkpoint(**checkpoint_items) - status = ckpt.read(ckpt_dir_or_file) - status.expect_partial().assert_existing_objects_matched() - logging.info("Finished loading pretrained checkpoint from %s", - ckpt_dir_or_file) - - def build_model(self) -> tf.keras.Model: - """[Optional] Creates model architecture. - - Returns: - A model instance. - """ - - @abc.abstractmethod - def build_inputs(self, - params, - input_context: Optional[tf.distribute.InputContext] = None): - """Returns a dataset or a nested structure of dataset functions. - - Dataset functions define per-host datasets with the per-replica batch size. - With distributed training, this method runs on remote hosts. - - Args: - params: hyperparams to create input pipelines, which can be any of - dataclass, ConfigDict, namedtuple, etc. - input_context: optional distribution input pipeline context. - - Returns: - A nested structure of per-replica input functions. - """ - - def build_losses(self, labels, model_outputs, aux_losses=None) -> tf.Tensor: - """Standard interface to compute losses. - - Args: - labels: optional label tensors. - model_outputs: a nested structure of output tensors. - aux_losses: auxiliary loss tensors, i.e. `losses` in keras.Model. - - Returns: - The total loss tensor. - """ - del model_outputs, labels - - if aux_losses is None: - losses = [tf.constant(0.0, dtype=tf.float32)] - else: - losses = aux_losses - total_loss = tf.add_n(losses) - return total_loss - - def build_metrics(self, training: bool = True): - """Gets streaming metrics for training/validation.""" - del training - return [] - - def process_metrics(self, metrics, labels, model_outputs): - """Process and update metrics. - - Called when using custom training loop API. - - Args: - metrics: a nested structure of metrics objects. The return of function - self.build_metrics. - labels: a tensor or a nested structure of tensors. - model_outputs: a tensor or a nested structure of tensors. For example, - output of the keras model built by self.build_model. - """ - for metric in metrics: - metric.update_state(labels, model_outputs) - - def process_compiled_metrics(self, compiled_metrics, labels, model_outputs): - """Process and update compiled_metrics. - - call when using compile/fit API. - - Args: - compiled_metrics: the compiled metrics (model.compiled_metrics). - labels: a tensor or a nested structure of tensors. - model_outputs: a tensor or a nested structure of tensors. For example, - output of the keras model built by self.build_model. - """ - compiled_metrics.update_state(labels, model_outputs) - - def train_step(self, - inputs, - model: tf.keras.Model, - optimizer: tf.keras.optimizers.Optimizer, - metrics=None): - """Does forward and backward. - - With distribution strategies, this method runs on devices. - - Args: - inputs: a dictionary of input tensors. - model: the model, forward pass definition. - optimizer: the optimizer for this training step. - metrics: a nested structure of metrics objects. - - Returns: - A dictionary of logs. - """ - if isinstance(inputs, tuple) and len(inputs) == 2: - features, labels = inputs - else: - features, labels = inputs, inputs - with tf.GradientTape() as tape: - outputs = model(features, training=True) - # Computes per-replica loss. - if model.compiled_loss: - loss = model.compiled_loss( - labels, outputs, regularization_losses=model.losses) - loss += self.build_losses( - labels=labels, model_outputs=outputs, aux_losses=None) - else: - loss = self.build_losses( - labels=labels, model_outputs=outputs, aux_losses=model.losses) - # Scales loss as the default gradients allreduce performs sum inside the - # optimizer. - scaled_loss = loss / tf.distribute.get_strategy().num_replicas_in_sync - - # For mixed precision, when a LossScaleOptimizer is used, the loss is - # scaled to avoid numeric underflow. - if isinstance(optimizer, - tf.keras.mixed_precision.LossScaleOptimizer): - scaled_loss = optimizer.get_scaled_loss(scaled_loss) - - tvars = model.trainable_variables - grads = tape.gradient(scaled_loss, tvars) - - if isinstance(optimizer, - tf.keras.mixed_precision.LossScaleOptimizer): - grads = optimizer.get_unscaled_gradients(grads) - optimizer.apply_gradients(list(zip(grads, tvars))) - logs = {self.loss: loss} - if metrics: - self.process_metrics(metrics, labels, outputs) - if model.compiled_metrics: - self.process_compiled_metrics(model.compiled_metrics, labels, outputs) - logs.update({m.name: m.result() for m in metrics or []}) - logs.update({m.name: m.result() for m in model.metrics}) - return logs - - def validation_step(self, inputs, model: tf.keras.Model, metrics=None): - """Validation step. - - With distribution strategies, this method runs on devices. - - Args: - inputs: a dictionary of input tensors. - model: the keras.Model. - metrics: a nested structure of metrics objects. - - Returns: - A dictionary of logs. - """ - if isinstance(inputs, tuple) and len(inputs) == 2: - features, labels = inputs - else: - features, labels = inputs, inputs - outputs = self.inference_step(features, model) - loss = self.build_losses( - labels=labels, model_outputs=outputs, aux_losses=model.losses) - logs = {self.loss: loss} - if metrics: - self.process_metrics(metrics, labels, outputs) - if model.compiled_metrics: - self.process_compiled_metrics(model.compiled_metrics, labels, outputs) - logs.update({m.name: m.result() for m in metrics or []}) - logs.update({m.name: m.result() for m in model.metrics}) - return logs - - def inference_step(self, inputs, model: tf.keras.Model): - """Performs the forward step. - - With distribution strategies, this method runs on devices. - - Args: - inputs: a dictionary of input tensors. - model: the keras.Model. - - Returns: - Model outputs. - """ - return model(inputs, training=False) - - def aggregate_logs(self, state, step_logs): - """Optional aggregation over logs returned from a validation step.""" - pass - - def reduce_aggregated_logs(self, - aggregated_logs, - global_step: Optional[tf.Tensor] = None): - """Optional reduce of aggregated logs over validation steps.""" - return {} diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/core/base_trainer.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/core/base_trainer.py deleted file mode 100644 index eb089f6099514f7a22cfab409f76609caa27fe1d..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/core/base_trainer.py +++ /dev/null @@ -1,496 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Standard Trainer implementation. - -The base trainer implements the Orbit `StandardTrainable` and -`StandardEvaluable` interfaces. Trainers inside this project should be -interchangable and independent on model architectures and tasks. -""" -import functools -from typing import Union, Optional -from absl import logging -import gin -import orbit -import tensorflow as tf - -from official.core import base_task -from official.core import config_definitions -from official.modeling import optimization - -ExperimentConfig = config_definitions.ExperimentConfig -TrainerConfig = config_definitions.TrainerConfig - - -class Recovery: - """Built-in model blowup recovery module. - - Checks the loss value by the given threshold. If applicable, recover the - model by reading the checkpoint on disk. - """ - - def __init__(self, - loss_upper_bound: float, - checkpoint_manager: tf.train.CheckpointManager, - recovery_begin_steps: int = 0, - recovery_max_trials: int = 3): - self.recover_counter = 0 - self.recovery_begin_steps = recovery_begin_steps - self.recovery_max_trials = recovery_max_trials - self.loss_upper_bound = loss_upper_bound - self.checkpoint_manager = checkpoint_manager - - def should_recover(self, loss_value, global_step): - if tf.math.is_nan(loss_value): - return True - if (global_step >= self.recovery_begin_steps and - loss_value > self.loss_upper_bound): - return True - return False - - def maybe_recover(self, loss_value, global_step): - """Conditionally recovers the training by triggering checkpoint restoration. - - Args: - loss_value: the loss value as a float. - global_step: the number of global training steps. - - Raises: - RuntimeError: when recovery happens more than the max number of trials, - the job should crash. - """ - if not self.should_recover(loss_value, global_step): - return - self.recover_counter += 1 - if self.recover_counter > self.recovery_max_trials: - raise RuntimeError( - "The loss value is NaN after training loop and it happens %d times." % - self.recover_counter) - # Loads the previous good checkpoint. - checkpoint_path = self.checkpoint_manager.restore_or_initialize() - logging.warning( - "Recovering the model from checkpoint: %s. The loss value becomes " - "%f at step %d.", checkpoint_path, loss_value, global_step) - - -class _AsyncTrainer(orbit.StandardTrainer, orbit.StandardEvaluator): - """Trainer class for both sync and async Strategy.""" - - def init_async(self): - """Initializes the Async Trainer base class.""" - assert isinstance(self._strategy, tf.distribute.Strategy) - self._is_async = isinstance( - self._strategy, tf.distribute.experimental.ParameterServerStrategy) - self._coordinator = None - if self._is_async: - self._coordinator = ( - tf.distribute.experimental.coordinator.ClusterCoordinator( - self._strategy)) - - def join(self): - """Join all async steps. Only useful in aysnc training.""" - if getattr(self, "_is_async", False): - self._coordinator.join() - - def create_train_loop_fn(self): - """Creates a eval loop from the given step function and options.""" - train_loop_fn = super().create_train_loop_fn() - if getattr(self, "_is_async", False): - - def _async_loop_fn(iterator, num_steps): - self._coordinator.schedule(train_loop_fn, args=(iterator, num_steps)) - - return _async_loop_fn - else: - return train_loop_fn - - def create_eval_loop_fn(self, has_state: bool): - """Creates a training loop from the given step function and options.""" - eval_loop_fn = super().create_eval_loop_fn(has_state) - - if getattr(self, "_is_async", False): - if has_state: - raise ValueError( - "Stateful eval loop is not supported in async training.") - - def _async_loop_fn(iterator, num_steps, state=None, reduce_fn=None): - assert state is None - assert reduce_fn is None - self._coordinator.schedule(eval_loop_fn, args=(iterator, num_steps)) - - return _async_loop_fn - else: - return eval_loop_fn - - def distribute_dataset(self, dataset_or_fn, *args, **kwargs): - """A utility function to help create a `tf.distribute.DistributedDataset`. - - Args: - dataset_or_fn: A instance of `tf.data.Dataset`, or a "dataset function" - returning a `tf.data.Dataset`. If it is a function, it may optionally - have an argument named `input_context` which will be passed a - `tf.distribute.InputContext` instance. - *args: Any positional arguments to pass through to `dataset_or_fn`. - **kwargs: Any keyword arguments to pass through to `dataset_or_fn`. - - Returns: - A distributed Dataset. - """ - if getattr(self, "_is_async", False): - per_worker_dataset_fn = functools.partial( - orbit.utils.make_distributed_dataset, self._strategy, dataset_or_fn, - *args, **kwargs) - per_worker_dataset_fn = tf.function(per_worker_dataset_fn) - - return self._coordinator.create_per_worker_dataset(per_worker_dataset_fn) - else: - return orbit.utils.make_distributed_dataset(self._strategy, dataset_or_fn, - *args, **kwargs) - - -def get_runtime_options(config: ExperimentConfig): - """Get tf.distribute.RunOptions from config.""" - xla_options = {} - if config.runtime.tpu_enable_xla_dynamic_padder is not None: - xla_options["enable_xla_dynamic_padder"] = ( - config.runtime.tpu_enable_xla_dynamic_padder) - return tf.distribute.RunOptions( - experimental_xla_options=tf.tpu.XLAOptions(**xla_options)) - - -@gin.configurable -class Trainer(_AsyncTrainer): - """Implements the common trainer shared for TensorFlow models.""" - - # pylint: disable=super-init-not-called - def __init__( - self, - config: ExperimentConfig, - task: base_task.Task, - model: tf.keras.Model, - optimizer: tf.optimizers.Optimizer, - train: bool = True, - evaluate: bool = True, - train_dataset: Optional[Union[tf.data.Dataset, - tf.distribute.DistributedDataset]] = None, - validation_dataset: Optional[Union[ - tf.data.Dataset, tf.distribute.DistributedDataset]] = None, - checkpoint_exporter=None): - """Initialize common trainer for TensorFlow models. - - Args: - config: An `ExperimentConfig` instance specifying experiment config. - task: A base_task.Task instance. - model: The model instance, e.g. a tf.keras.Model instance. - optimizer: tf.optimizers.Optimizer instance. - train: bool, whether or not this trainer will be used for training. - default to True. - evaluate: bool, whether or not this trainer will be used for evaluation. - default to True. - train_dataset: a dataset object created for training. With tf.distribute, - it needs to be a `DistributedDataset`. - validation_dataset: a dataset object created for evaluation. With - tf.distribute, it needs to be a `DistributedDataset`. The evaluator will - create a dataset iterator for each eval round, so the dataset does not - need to repeat. - checkpoint_exporter: an object that has the `maybe_export_checkpoint` - interface. - """ - # Gets the current distribution strategy. If not inside any strategy scope, - # it gets a single-replica no-op strategy. - self._strategy = tf.distribute.get_strategy() - self._validate_params( - config, - check_train_data=train_dataset is None, - check_validation_data=validation_dataset is None) - self._config = config - self._task = task - self._model = model - self._optimizer = optimizer - self._checkpoint_exporter = checkpoint_exporter - self._recovery = None - # Runtime options are only applied to train_step. - # We use default for eval_step. - self._runtime_options = get_runtime_options(config) - - # Creates a shadow copy of the weights to store weights moving average. - if isinstance(self._optimizer, optimization.ExponentialMovingAverage - ) and not self._optimizer.has_shadow_copy: - self._optimizer.shadow_copy(self._model) - - # global_step increases by 1 after each training iteration. - # We should have global_step.numpy() == self.optimizer.iterations.numpy() - # when there is only 1 optimizer. - self._global_step = orbit.utils.create_global_step() - if hasattr(self.model, "checkpoint_items"): - checkpoint_items = self.model.checkpoint_items - else: - checkpoint_items = {} - self._checkpoint = tf.train.Checkpoint( - global_step=self.global_step, - model=self.model, - optimizer=self.optimizer, - **checkpoint_items) - - self._train_loss = tf.keras.metrics.Mean("training_loss", dtype=tf.float32) - self._validation_loss = tf.keras.metrics.Mean( - "validation_loss", dtype=tf.float32) - self._train_metrics = self.task.build_metrics( - training=True) + self.model.metrics - self._validation_metrics = self.task.build_metrics( - training=False) + self.model.metrics - - self.init_async() - - if train: - train_dataset = train_dataset or self.distribute_dataset( - self.task.build_inputs, self.config.task.train_data) - orbit.StandardTrainer.__init__( - self, - train_dataset, - options=orbit.StandardTrainerOptions( - use_tf_while_loop=config.trainer.train_tf_while_loop, - use_tf_function=config.trainer.train_tf_function, - use_tpu_summary_optimization=config.trainer.allow_tpu_summary)) - - if evaluate: - validation_dataset = validation_dataset or self.distribute_dataset( - self.task.build_inputs, self.config.task.validation_data) - orbit.StandardEvaluator.__init__( - self, - validation_dataset, - options=orbit.StandardEvaluatorOptions( - use_tf_function=config.trainer.eval_tf_function, - use_tf_while_loop=config.trainer.eval_tf_while_loop)) - - def _validate_params(self, - config, - check_train_data=True, - check_validation_data=True): - r"""Validates if the configuration object passed to the Trainer. - - The experiment configuration should be structured as: - \trainer - \task - \train_data - \validation_data - - Args: - config: a namedtuple, dataclass, ConfigDict, etc. - check_train_data: whether to check task.train_data field. - check_validation_data: whether to check task.validation_data field. - """ - if not hasattr(config, "trainer"): - raise AttributeError("The trainer requires the configuration contains an" - " attribute `trainer`.") - - if not hasattr(config, "task"): - raise AttributeError("The trainer requires the configuration contains an" - " attribute `task`.") - - if check_train_data and not hasattr(config.task, "train_data"): - raise AttributeError("The trainer requires the configuration contains an" - " attribute `task.train_data`.") - - if check_validation_data and not hasattr(config.task, "validation_data"): - raise AttributeError("The trainer requires the configuration contains an" - " attribute `task.validation_data`.") - - @property - def strategy(self): - return self._strategy - - @property - def config(self): - return self._config - - @property - def task(self): - return self._task - - @property - def model(self): - return self._model - - @property - def optimizer(self): - if hasattr(self, "_optimizer"): - return self._optimizer - else: - return None - - @property - def global_step(self): - return self._global_step - - @property - def train_loss(self): - """Accesses the training loss metric object.""" - return self._train_loss - - @property - def validation_loss(self): - """Accesses the validation loss metric object.""" - return self._validation_loss - - @property - def train_metrics(self): - """Accesses all training metric objects.""" - return self._train_metrics - - @property - def validation_metrics(self): - """Accesses all validation metric metric objects.""" - return self._validation_metrics - - def initialize(self): - """A callback function. - - This function will be called when no checkpoint found for the model. - If there is a checkpoint, the checkpoint will be loaded and this function - will not be called. Tasks may use this callback function to load a - pretrained checkpoint, saved under a directory other than the model_dir. - """ - self.task.initialize(self.model) - - @property - def checkpoint(self): - """Accesses the training checkpoint.""" - return self._checkpoint - - def add_recovery(self, params: TrainerConfig, - checkpoint_manager: tf.train.CheckpointManager): - if params.recovery_max_trials >= 0: - self._recovery = Recovery( - loss_upper_bound=params.loss_upper_bound, - recovery_begin_steps=params.recovery_begin_steps, - recovery_max_trials=params.recovery_max_trials, - checkpoint_manager=checkpoint_manager) - - def train_loop_end(self): - """See base class.""" - self.join() - # Checks if the model numeric status is stable and conducts the checkpoint - # recovery accordingly. - if self._recovery: - self._recovery.maybe_recover(self.train_loss.result().numpy(), - self.global_step.numpy()) - logs = {} - for metric in self.train_metrics + [self.train_loss]: - logs[metric.name] = metric.result() - metric.reset_states() - if callable(self.optimizer.learning_rate): - # Maybe a self-implemented optimizer does not have `optimizer.iterations`. - # So just to be safe here. - if hasattr(self.optimizer, "iterations"): - logs["learning_rate"] = self.optimizer.learning_rate( - self.optimizer.iterations) - else: - logs["learning_rate"] = self.optimizer.learning_rate(self.global_step) - else: - logs["learning_rate"] = self.optimizer.learning_rate - return logs - - def train_step(self, iterator): - """See base class.""" - - def step_fn(inputs): - if self.config.runtime.enable_xla and (self.config.runtime.num_gpus > 0): - task_train_step = tf.function(self.task.train_step, jit_compile=True) - else: - task_train_step = self.task.train_step - logs = task_train_step( - inputs, - model=self.model, - optimizer=self.optimizer, - metrics=self.train_metrics) - self._train_loss.update_state(logs[self.task.loss]) - self.global_step.assign_add(1) - - self.strategy.run( - step_fn, args=(next(iterator),), options=self._runtime_options) - - def eval_begin(self): - """Sets up metrics.""" - for metric in self.validation_metrics + [self.validation_loss]: - metric.reset_states() - # Swaps weights to test on weights moving average. - if self.optimizer and isinstance(self.optimizer, - optimization.ExponentialMovingAverage): - self.optimizer.swap_weights() - - def eval_step(self, iterator): - """See base class.""" - - def step_fn(inputs): - logs = self.task.validation_step( - inputs, model=self.model, metrics=self.validation_metrics) - if self.task.loss in logs: - self._validation_loss.update_state(logs[self.task.loss]) - return logs - - distributed_outputs = self.strategy.run(step_fn, args=(next(iterator),)) - return tf.nest.map_structure(self.strategy.experimental_local_results, - distributed_outputs) - - def eval_end(self, aggregated_logs=None): - """Processes evaluation results.""" - self.join() - logs = {} - for metric in self.validation_metrics: - logs[metric.name] = metric.result() - if self.validation_loss.count.numpy() != 0: - logs[self.validation_loss.name] = self.validation_loss.result() - else: - # `self.validation_loss` metric was not updated, because the validation - # loss was not returned from the task's `validation_step` method. - logging.info("The task did not report validation loss.") - if aggregated_logs: - metrics = self.task.reduce_aggregated_logs( - aggregated_logs, global_step=self.global_step) - logs.update(metrics) - - if self._checkpoint_exporter: - self._checkpoint_exporter.maybe_export_checkpoint( - self.checkpoint, logs, self.global_step.numpy()) - metric_name = self.config.trainer.best_checkpoint_eval_metric - logs["best_" + - metric_name] = self._checkpoint_exporter.best_ckpt_logs[metric_name] - - # Swaps back weights after testing when EMA is used. - # This happens after best checkpoint export so that average weights used for - # eval are exported instead of regular weights. - if self.optimizer and isinstance(self.optimizer, - optimization.ExponentialMovingAverage): - self.optimizer.swap_weights() - return logs - - def eval_reduce(self, state=None, step_outputs=None): - return self.task.aggregate_logs(state, step_outputs) diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/core/base_trainer_test.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/core/base_trainer_test.py deleted file mode 100644 index cb938879299ec7457fec51e50051827d05840bc8..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/core/base_trainer_test.py +++ /dev/null @@ -1,422 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Tests for tensorflow_models.core.trainers.trainer.""" -# pylint: disable=g-direct-tensorflow-import -import multiprocessing -import os -import sys - -from absl.testing import parameterized -import numpy as np -import orbit -import portpicker -import tensorflow as tf - -from tensorflow.python.distribute import combinations -from tensorflow.python.distribute import strategy_combinations -from official.core import base_trainer as trainer_lib -from official.core import config_definitions as cfg -from official.core import train_lib -from official.utils.testing import mock_task - -TPU_TEST = 'test_tpu' in sys.argv[0] -GPU_TEST = 'test_gpu' in sys.argv[0] - - -def all_strategy_combinations(): - return combinations.combine( - distribution=[ - strategy_combinations.default_strategy, - strategy_combinations.cloud_tpu_strategy, - strategy_combinations.one_device_strategy_gpu, - ],) - - -def create_in_process_cluster(num_workers, num_ps): - """Creates and starts local servers and returns the cluster_resolver.""" - worker_ports = [portpicker.pick_unused_port() for _ in range(num_workers)] - ps_ports = [portpicker.pick_unused_port() for _ in range(num_ps)] - - cluster_dict = {} - cluster_dict['worker'] = ['localhost:%s' % port for port in worker_ports] - if num_ps > 0: - cluster_dict['ps'] = ['localhost:%s' % port for port in ps_ports] - - cluster_spec = tf.train.ClusterSpec(cluster_dict) - - # Workers need some inter_ops threads to work properly. - worker_config = tf.compat.v1.ConfigProto() - if multiprocessing.cpu_count() < num_workers + 1: - worker_config.inter_op_parallelism_threads = num_workers + 1 - - for i in range(num_workers): - tf.distribute.Server( - cluster_spec, - job_name='worker', - task_index=i, - config=worker_config, - protocol='grpc') - - for i in range(num_ps): - tf.distribute.Server( - cluster_spec, job_name='ps', task_index=i, protocol='grpc') - - cluster_resolver = tf.distribute.cluster_resolver.SimpleClusterResolver( - cluster_spec, rpc_layer='grpc') - return cluster_resolver - - -def dataset_fn(input_context=None): - del input_context - - def dummy_data(_): - return tf.zeros((1, 1), dtype=tf.float32) - - dataset = tf.data.Dataset.range(1) - dataset = dataset.repeat() - dataset = dataset.map( - dummy_data, num_parallel_calls=tf.data.experimental.AUTOTUNE) - return dataset - - -class MockAsyncTrainer(trainer_lib._AsyncTrainer): - """Mock AsyncTrainer to test the _AsyncTrainer class.""" - - def __init__(self): - self._strategy = tf.distribute.get_strategy() - self.init_async() - - self.global_step = tf.Variable( - 0, - dtype=tf.int64, - name='global_step', - trainable=False, - aggregation=tf.VariableAggregation.ONLY_FIRST_REPLICA) - self.eval_global_step = tf.Variable( - 0, - dtype=tf.int64, - name='eval_global_step', - trainable=False, - aggregation=tf.VariableAggregation.ONLY_FIRST_REPLICA) - - train_dataset = self.distribute_dataset(dataset_fn) - orbit.StandardTrainer.__init__( - self, train_dataset, options=orbit.StandardTrainerOptions()) - - validation_dataset = self.distribute_dataset(dataset_fn) - orbit.StandardEvaluator.__init__( - self, - validation_dataset, - options=orbit.StandardEvaluatorOptions(use_tf_while_loop=True)) - - def train_loop_begin(self): - self.global_step.assign(0) - - def train_step(self, iterator): - - def replica_step(_): - self.global_step.assign_add(1) - - self._strategy.run(replica_step, args=(next(iterator),)) - - def train_loop_end(self): - self.join() - return self.global_step.numpy() - - def eval_begin(self): - self.eval_global_step.assign(0) - - def eval_step(self, iterator): - - def replica_step(_): - self.eval_global_step.assign_add(1) - - self._strategy.run(replica_step, args=(next(iterator),)) - - def eval_end(self): - self.join() - return self.eval_global_step.numpy() - - -class TrainerTest(tf.test.TestCase, parameterized.TestCase): - - def setUp(self): - super().setUp() - self._config = cfg.ExperimentConfig( - trainer=cfg.TrainerConfig( - optimizer_config=cfg.OptimizationConfig({ - 'optimizer': { - 'type': 'sgd' - }, - 'learning_rate': { - 'type': 'constant' - } - }))) - - def create_test_trainer(self, config, model_dir=None, task=None): - task = task or mock_task.MockTask(config.task, logging_dir=model_dir) - ckpt_exporter = train_lib.maybe_create_best_ckpt_exporter(config, model_dir) - trainer = trainer_lib.Trainer( - config, - task, - model=task.build_model(), - optimizer=task.create_optimizer(config.trainer.optimizer_config, - config.runtime), - checkpoint_exporter=ckpt_exporter) - return trainer - - @combinations.generate(all_strategy_combinations()) - def test_trainer_train(self, distribution): - with distribution.scope(): - trainer = self.create_test_trainer(self._config) - logs = trainer.train(tf.convert_to_tensor(5, dtype=tf.int32)) - self.assertIn('training_loss', logs) - self.assertIn('learning_rate', logs) - - @combinations.generate(all_strategy_combinations()) - def test_trainer_passing_datasets(self, distribution): - with distribution.scope(): - task = mock_task.MockTask(self._config) - train_dataset = orbit.utils.make_distributed_dataset( - distribution, task.build_inputs, self._config.task.train_data) - validation_dataset = orbit.utils.make_distributed_dataset( - distribution, task.build_inputs, self._config.task.validation_data) - self._config.task.train_data = None - self._config.task.validation_data = None - trainer = trainer_lib.Trainer( - self._config, - task, - model=task.build_model(), - optimizer=task.create_optimizer(self._config.trainer.optimizer_config, - self._config.runtime), - train_dataset=train_dataset, - validation_dataset=validation_dataset) - logs = trainer.train(tf.convert_to_tensor(5, dtype=tf.int32)) - self.assertIn('training_loss', logs) - self.assertIn('learning_rate', logs) - logs = trainer.evaluate(tf.convert_to_tensor(5, dtype=tf.int32)) - self.assertIn('validation_loss', logs) - - def test_base_async_trainer(self): - if TPU_TEST or GPU_TEST: - self.skipTest('Aysnc training is not available on GPU/GPU.') - num_workers = 3 - num_ps = 2 - cluster_resolver = create_in_process_cluster(num_workers, num_ps) - distribution = tf.distribute.experimental.ParameterServerStrategy( - cluster_resolver) - with distribution.scope(): - trainer = MockAsyncTrainer() - trainer.init_async() - self.assertIsInstance( - trainer._coordinator, - tf.distribute.experimental.coordinator.ClusterCoordinator) - self.assertEqual(trainer.train(tf.constant(10)), 10) - self.assertEqual(trainer.evaluate(tf.constant(11)), 11) - - def test_async_trainer_train(self): - if TPU_TEST or GPU_TEST: - self.skipTest('Aysnc training is not available on GPU/TPU.') - num_workers = 3 - num_ps = 2 - cluster_resolver = create_in_process_cluster(num_workers, num_ps) - distribution = tf.distribute.experimental.ParameterServerStrategy( - cluster_resolver) - with distribution.scope(): - config = cfg.ExperimentConfig(**self._config.as_dict()) - config.trainer.eval_tf_while_loop = True - trainer = self.create_test_trainer(config) - logs = trainer.train(tf.convert_to_tensor(5, dtype=tf.int32)) - self.assertIn('training_loss', logs) - self.assertIn('learning_rate', logs) - - def test_async_trainer_validate(self): - if TPU_TEST or GPU_TEST: - self.skipTest('Aysnc training is not available on GPU/GPU.') - num_workers = 3 - num_ps = 2 - cluster_resolver = create_in_process_cluster(num_workers, num_ps) - distribution = tf.distribute.experimental.ParameterServerStrategy( - cluster_resolver) - with distribution.scope(): - config = cfg.ExperimentConfig(**self._config.as_dict()) - config.trainer.eval_tf_while_loop = True - trainer = self.create_test_trainer(config) - logs = trainer.evaluate(tf.convert_to_tensor(5, dtype=tf.int32)) - self.assertIn('acc', logs) - self.assertIn('validation_loss', logs) - - @combinations.generate(all_strategy_combinations()) - def test_trainer_validate(self, distribution): - with distribution.scope(): - trainer = self.create_test_trainer(self._config) - logs = trainer.evaluate(tf.convert_to_tensor(5, dtype=tf.int32)) - self.assertEqual(logs['counter'], 5. * distribution.num_replicas_in_sync) - self.assertIn('validation_loss', logs) - - @combinations.generate(all_strategy_combinations()) - def test_trainer_validate_without_loss(self, distribution): - - class MockTaskWithoutValidationLoss(mock_task.MockTask): - - def validation_step(self, inputs, model, metrics=None): - # Disable validation loss. - logs = super().validation_step(inputs, model) - del logs[self.loss] - return logs - - with distribution.scope(): - task = MockTaskWithoutValidationLoss() - trainer = self.create_test_trainer(self._config, task=task) - logs = trainer.evaluate(tf.convert_to_tensor(5, dtype=tf.int32)) - self.assertEqual(logs['counter'], 5. * distribution.num_replicas_in_sync) - self.assertNotIn('validation_loss', logs) - - @combinations.generate( - combinations.combine( - mixed_precision_dtype=['float32', 'bfloat16', 'float16'], - loss_scale=[None, 'dynamic', 128, 256], - )) - def test_configure_optimizer(self, mixed_precision_dtype, loss_scale): - config = cfg.ExperimentConfig( - runtime=cfg.RuntimeConfig( - mixed_precision_dtype=mixed_precision_dtype, loss_scale=loss_scale), - trainer=cfg.TrainerConfig( - optimizer_config=cfg.OptimizationConfig({ - 'optimizer': { - 'type': 'sgd' - }, - 'learning_rate': { - 'type': 'constant' - }, - }))) - trainer = self.create_test_trainer(config) - if mixed_precision_dtype != 'float16': - self.assertIsInstance(trainer.optimizer, tf.keras.optimizers.SGD) - elif mixed_precision_dtype == 'float16' and loss_scale is None: - self.assertIsInstance(trainer.optimizer, tf.keras.optimizers.SGD) - else: - self.assertIsInstance(trainer.optimizer, - tf.keras.mixed_precision.LossScaleOptimizer) - - metrics = trainer.train(tf.convert_to_tensor(5, dtype=tf.int32)) - self.assertIn('training_loss', metrics) - - def test_export_best_ckpt(self): - config = cfg.ExperimentConfig( - trainer=cfg.TrainerConfig( - best_checkpoint_export_subdir='best_ckpt', - best_checkpoint_eval_metric='acc', - optimizer_config=cfg.OptimizationConfig({ - 'optimizer': { - 'type': 'sgd' - }, - 'learning_rate': { - 'type': 'constant' - } - }))) - model_dir = self.get_temp_dir() - trainer = self.create_test_trainer(config, model_dir=model_dir) - trainer.train(tf.convert_to_tensor(1, dtype=tf.int32)) - trainer.evaluate(tf.convert_to_tensor(1, dtype=tf.int32)) - self.assertTrue( - tf.io.gfile.exists(os.path.join(model_dir, 'best_ckpt', 'info.json'))) - - def test_recovery(self): - config = cfg.ExperimentConfig( - trainer=cfg.TrainerConfig( - loss_upper_bound=0.5, - recovery_max_trials=2, - optimizer_config=cfg.OptimizationConfig({ - 'optimizer': { - 'type': 'sgd' - }, - 'learning_rate': { - 'type': 'constant' - } - }))) - model_dir = self.get_temp_dir() - trainer = self.create_test_trainer(config, model_dir=model_dir) - checkpoint_manager = tf.train.CheckpointManager( - trainer.checkpoint, self.get_temp_dir(), max_to_keep=2) - checkpoint_manager.save() - trainer.add_recovery(config.trainer, checkpoint_manager=checkpoint_manager) - before_weights = trainer.model.get_weights() - _ = trainer.train(tf.convert_to_tensor(1, dtype=tf.int32)) - # The training loss is 1.0 and upper_bound is 0.5, so the recover happens. - after_weights = trainer.model.get_weights() - for left, right in zip(before_weights, after_weights): - self.assertAllEqual(left, right) - - # Let's the loss be NaN and max_trials = 0 to see RuntimeError. - config = cfg.ExperimentConfig( - trainer=cfg.TrainerConfig( - recovery_max_trials=0, - optimizer_config=cfg.OptimizationConfig({ - 'optimizer': { - 'type': 'sgd' - }, - 'learning_rate': { - 'type': 'constant' - } - }))) - task = mock_task.MockTask(config.task, logging_dir=model_dir) - - def build_losses(labels, model_outputs, aux_losses=None): - del labels, model_outputs - return tf.constant([np.nan], tf.float32) + aux_losses - - task.build_losses = build_losses - trainer = trainer_lib.Trainer( - config, - task, - model=task.build_model(), - optimizer=task.create_optimizer(config.trainer.optimizer_config, - config.runtime)) - trainer.add_recovery(config.trainer, checkpoint_manager=checkpoint_manager) - with self.assertRaises(RuntimeError): - _ = trainer.train(tf.convert_to_tensor(2, dtype=tf.int32)) - - def test_model_with_compiled_loss(self): - task = mock_task.MockTask() - model = task.build_model() - model.compile(loss=tf.keras.losses.CategoricalCrossentropy()) - trainer = trainer_lib.Trainer( - self._config, - task, - model=model, - optimizer=task.create_optimizer(self._config.trainer.optimizer_config)) - logs = trainer.train(tf.convert_to_tensor(5, dtype=tf.int32)) - self.assertIn('training_loss', logs) - - -if __name__ == '__main__': - tf.test.main() diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/core/config_definitions.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/core/config_definitions.py deleted file mode 100644 index 498ecea4b9b4148ee8d6465ec26ed155929b5686..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/core/config_definitions.py +++ /dev/null @@ -1,268 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Common configuration settings.""" - -from typing import Optional, Sequence, Union - -import dataclasses - -from official.modeling.hyperparams import base_config -from official.modeling.optimization.configs import optimization_config - -OptimizationConfig = optimization_config.OptimizationConfig - - -@dataclasses.dataclass -class DataConfig(base_config.Config): - """The base configuration for building datasets. - - Attributes: - input_path: The path to the input. It can be either (1) a str indicating - a file path/pattern, or (2) a str indicating multiple file paths/patterns - separated by comma (e.g "a, b, c" or no spaces "a,b,c"), or - (3) a list of str, each of which is a file path/pattern or multiple file - paths/patterns separated by comma. - It should not be specified when the following `tfds_name` is specified. - tfds_name: The name of the tensorflow dataset (TFDS). It should not be - specified when the above `input_path` is specified. - tfds_split: A str indicating which split of the data to load from TFDS. It - is required when above `tfds_name` is specified. - global_batch_size: The global batch size across all replicas. - is_training: Whether this data is used for training or not. - drop_remainder: Whether the last batch should be dropped in the case it has - fewer than `global_batch_size` elements. - shuffle_buffer_size: The buffer size used for shuffling training data. - cache: Whether to cache dataset examples. If `True`, we will cache the - dataset after applying the decode_fn and parse_fn. It can be used to avoid - re-reading from disk, re-decoding and re-parsing the example on the - second epoch, but it requires significant memory overhead. - cycle_length: The number of files that will be processed concurrently when - interleaving files. - block_length: The number of consecutive elements to produce from each input - element before cycling to another input element when interleaving files. - deterministic: A boolean controlling whether determinism should be enforced. - sharding: Whether sharding is used in the input pipeline. - enable_tf_data_service: A boolean indicating whether to enable tf.data - service for the input pipeline. - tf_data_service_address: The URI of a tf.data service to offload - preprocessing onto during training. The URI should be in the format - "protocol://address", e.g. "grpc://tf-data-service:5050". It can be - overridden by `FLAGS.tf_data_service` flag in the binary. - tf_data_service_job_name: The name of the tf.data service job. This - argument makes it possible for multiple datasets to share the same job. - The default behavior is that the dataset creates anonymous, exclusively - owned jobs. - tfds_data_dir: A str specifying the directory to read/write TFDS data. - tfds_as_supervised: A bool. When loading dataset from TFDS, if True, the - returned tf.data.Dataset will have a 2-tuple structure (input, label) - according to builder.info.supervised_keys; if False, the default, the - returned tf.data.Dataset will have a dictionary with all the features. - tfds_skip_decoding_feature: A str to indicate which features are skipped for - decoding when loading dataset from TFDS. Use comma to separate multiple - features. The main use case is to skip the image/video decoding for better - performance. - seed: An optional seed to use for deterministic shuffling/preprocessing. - """ - input_path: Union[Sequence[str], str] = "" - tfds_name: str = "" - tfds_split: str = "" - global_batch_size: int = 0 - is_training: bool = None - drop_remainder: bool = True - shuffle_buffer_size: int = 100 - cache: bool = False - cycle_length: Optional[int] = None - block_length: int = 1 - deterministic: Optional[bool] = None - sharding: bool = True - enable_tf_data_service: bool = False - tf_data_service_address: Optional[str] = None - tf_data_service_job_name: Optional[str] = None - tfds_data_dir: str = "" - tfds_as_supervised: bool = False - tfds_skip_decoding_feature: str = "" - seed: Optional[int] = None - - -@dataclasses.dataclass -class RuntimeConfig(base_config.Config): - """High-level configurations for Runtime. - - These include parameters that are not directly related to the experiment, - e.g. directories, accelerator type, etc. - - Attributes: - distribution_strategy: e.g. 'mirrored', 'tpu', etc. - enable_xla: Whether or not to enable XLA. - per_gpu_thread_count: thread count per GPU. - gpu_thread_mode: Whether and how the GPU device uses its own threadpool. - dataset_num_private_threads: Number of threads for a private threadpool - created for all datasets computation. - tpu: The address of the TPU to use, if any. - num_gpus: The number of GPUs to use, if any. - worker_hosts: comma-separated list of worker ip:port pairs for running - multi-worker models with DistributionStrategy. - task_index: If multi-worker training, the task index of this worker. - all_reduce_alg: Defines the algorithm for performing all-reduce. - num_packs: Sets `num_packs` in the cross device ops used in - MirroredStrategy. For details, see tf.distribute.NcclAllReduce. - mixed_precision_dtype: dtype of mixed precision policy. It can be 'float32', - 'float16', or 'bfloat16'. - loss_scale: The type of loss scale, or 'float' value. This is used when - setting the mixed precision policy. - run_eagerly: Whether or not to run the experiment eagerly. - batchnorm_spatial_persistent: Whether or not to enable the spatial - persistent mode for CuDNN batch norm kernel for improved GPU performance. - """ - distribution_strategy: str = "mirrored" - enable_xla: bool = False - gpu_thread_mode: Optional[str] = None - dataset_num_private_threads: Optional[int] = None - per_gpu_thread_count: int = 0 - tpu: Optional[str] = None - num_gpus: int = 0 - worker_hosts: Optional[str] = None - task_index: int = -1 - all_reduce_alg: Optional[str] = None - num_packs: int = 1 - mixed_precision_dtype: Optional[str] = None - loss_scale: Optional[Union[str, float]] = None - run_eagerly: bool = False - batchnorm_spatial_persistent: bool = False - - # XLA runtime params. - # XLA params are only applied to the train_step. - # These augments can improve training speed. They can also improve eval, but - # may reduce usability and users would need to make changes to code. - - # Whether to enable XLA dynamic padder - # infrastructure to handle dynamic shapes inputs inside XLA. True by - # default. Disabling this may cause correctness issues with dynamic shapes - # inputs, as XLA will just assume the inputs are with padded shapes. However - # users can optionally set it to False to improve device time if masking is - # already handled in the user side. - # If None, will respect XLA default. - tpu_enable_xla_dynamic_padder: Optional[bool] = None - - # Global model parallelism configurations. - num_cores_per_replica: int = 1 - default_shard_dim: int = -1 - - def model_parallelism(self): - return dict( - num_cores_per_replica=self.num_cores_per_replica, - default_shard_dim=self.default_shard_dim) - - -@dataclasses.dataclass -class TrainerConfig(base_config.Config): - """Configuration for trainer. - - Attributes: - optimizer_config: optimizer config, it includes optimizer, learning rate, - and warmup schedule configs. - train_tf_while_loop: whether or not to use tf while loop. - train_tf_function: whether or not to use tf_function for training loop. - eval_tf_function: whether or not to use tf_function for eval. - allow_tpu_summary: Whether to allow summary happen inside the XLA program - runs on TPU through automatic outside compilation. - steps_per_loop: number of steps per loop. - summary_interval: number of steps between each summary. - checkpoint_interval: number of steps between checkpoints. - max_to_keep: max checkpoints to keep. - continuous_eval_timeout: maximum number of seconds to wait between - checkpoints, if set to None, continuous eval will wait indefinitely. This - is only used continuous_train_and_eval and continuous_eval modes. Default - value is 1 hrs. - train_steps: number of train steps. - validation_steps: number of eval steps. If `None`, the entire eval dataset - is used. - validation_interval: number of training steps to run between evaluations. - best_checkpoint_export_subdir: if set, the trainer will keep track of the - best evaluation metric, and export the corresponding best checkpoint under - `model_dir/best_checkpoint_export_subdir`. Note that this only works if - mode contains eval (such as `train_and_eval`, `continuous_eval`, and - `continuous_train_and_eval`). - best_checkpoint_eval_metric: for exporting the best checkpoint, which - evaluation metric the trainer should monitor. This can be any evaluation - metric appears on tensorboard. - best_checkpoint_metric_comp: for exporting the best checkpoint, how the - trainer should compare the evaluation metrics. This can be either `higher` - (higher the better) or `lower` (lower the better). - validation_summary_subdir: A 'str', sub directory for saving eval summary. - """ - optimizer_config: OptimizationConfig = OptimizationConfig() - # Orbit settings. - train_tf_while_loop: bool = True - train_tf_function: bool = True - eval_tf_function: bool = True - eval_tf_while_loop: bool = False - allow_tpu_summary: bool = False - # Trainer intervals. - steps_per_loop: int = 1000 - summary_interval: int = 1000 - checkpoint_interval: int = 1000 - # Checkpoint manager. - max_to_keep: int = 5 - continuous_eval_timeout: int = 60 * 60 - # Train/Eval routines. - train_steps: int = 0 - # Sets validation steps to be -1 to evaluate the entire dataset. - validation_steps: int = -1 - validation_interval: int = 1000 - # Best checkpoint export. - best_checkpoint_export_subdir: str = "" - best_checkpoint_eval_metric: str = "" - best_checkpoint_metric_comp: str = "higher" - # Blowup recovery. - loss_upper_bound: float = 1e6 - recovery_begin_steps: int = 0 # Enforcing the loss bound after these steps. - # When max trials < 0, no recovery module; max trials = 0, we will check - # the condition and fail the job if the condition happens; max trials > 0, - # we will retore the model states. - recovery_max_trials: int = 0 - validation_summary_subdir: str = "validation" - - -@dataclasses.dataclass -class TaskConfig(base_config.Config): - init_checkpoint: str = "" - model: base_config.Config = None - train_data: DataConfig = DataConfig() - validation_data: DataConfig = DataConfig() - - -@dataclasses.dataclass -class ExperimentConfig(base_config.Config): - """Top-level configuration.""" - task: TaskConfig = TaskConfig() - trainer: TrainerConfig = TrainerConfig() - runtime: RuntimeConfig = RuntimeConfig() diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/core/exp_factory.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/core/exp_factory.py deleted file mode 100644 index e9dbe0972d4ecf4e1e5b3e94142d4b633070a38a..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/core/exp_factory.py +++ /dev/null @@ -1,52 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Experiment factory methods.""" - -from official.core import config_definitions as cfg -from official.core import registry - - -_REGISTERED_CONFIGS = {} - - -def register_config_factory(name): - """Register ExperimentConfig factory method.""" - return registry.register(_REGISTERED_CONFIGS, name) - - -def get_exp_config_creater(exp_name: str): - """Looks up ExperimentConfig factory methods.""" - exp_creater = registry.lookup(_REGISTERED_CONFIGS, exp_name) - return exp_creater - - -def get_exp_config(exp_name: str) -> cfg.ExperimentConfig: - return get_exp_config_creater(exp_name)() diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/core/export_base.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/core/export_base.py deleted file mode 100644 index b8529a2b73991b15cc700a0fc30486a82a48665a..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/core/export_base.py +++ /dev/null @@ -1,125 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Base class for model export.""" - -import abc -import functools -from typing import Any, Callable, Dict, Mapping, List, Optional, Text, Union - -import tensorflow as tf -from tensorflow.python.saved_model.model_utils import export_utils - - -class ExportModule(tf.Module, metaclass=abc.ABCMeta): - """Base Export Module.""" - - def __init__(self, - params, - model: Union[tf.Module, tf.keras.Model], - inference_step: Optional[Callable[..., Any]] = None): - """Instantiates an ExportModel. - - Args: - params: A dataclass for parameters to the module. - model: A model instance which contains weights and forward computation. - inference_step: An optional callable to define how the model is called. - """ - super().__init__(name=None) - self.model = model - self.params = params - - if inference_step is not None: - self.inference_step = functools.partial(inference_step, model=self.model) - else: - self.inference_step = functools.partial( - self.model.__call__, training=False) - - @abc.abstractmethod - def serve(self) -> Mapping[Text, tf.Tensor]: - """The bare inference function which should run on all devices. - - Expecting tensors are passed in through keyword arguments. Returns a - dictionary of tensors, when the keys will be used inside the SignatureDef. - """ - - @abc.abstractmethod - def get_inference_signatures( - self, function_keys: Dict[Text, Text]) -> Mapping[Text, Any]: - """Get defined function signatures.""" - - -def export(export_module: ExportModule, - function_keys: Union[List[Text], Dict[Text, Text]], - export_savedmodel_dir: Text, - checkpoint_path: Optional[Text] = None, - timestamped: bool = True, - save_options: Optional[tf.saved_model.SaveOptions] = None) -> Text: - """Exports to SavedModel format. - - Args: - export_module: a ExportModule with the keras Model and serving tf.functions. - function_keys: a list of string keys to retrieve pre-defined serving - signatures. The signaute keys will be set with defaults. If a dictionary - is provided, the values will be used as signature keys. - export_savedmodel_dir: Output saved model directory. - checkpoint_path: Object-based checkpoint path or directory. - timestamped: Whether to export the savedmodel to a timestamped directory. - save_options: `SaveOptions` for `tf.saved_model.save`. - - Returns: - The savedmodel directory path. - """ - ckpt_dir_or_file = checkpoint_path - if tf.io.gfile.isdir(ckpt_dir_or_file): - ckpt_dir_or_file = tf.train.latest_checkpoint(ckpt_dir_or_file) - if ckpt_dir_or_file: - checkpoint = tf.train.Checkpoint(model=export_module.model) - checkpoint.read( - ckpt_dir_or_file).assert_existing_objects_matched().expect_partial() - if isinstance(function_keys, list): - if len(function_keys) == 1: - function_keys = { - function_keys[0]: tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY - } - else: - raise ValueError( - "If the function_keys is a list, it must contain a single element. %s" - % function_keys) - - signatures = export_module.get_inference_signatures(function_keys) - if timestamped: - export_dir = export_utils.get_timestamped_export_dir( - export_savedmodel_dir).decode("utf-8") - else: - export_dir = export_savedmodel_dir - tf.saved_model.save( - export_module, export_dir, signatures=signatures, options=save_options) - return export_dir diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/core/export_base_test.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/core/export_base_test.py deleted file mode 100644 index c6838697b9e2c0a2b9ff5a9cd587446df807321e..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/core/export_base_test.py +++ /dev/null @@ -1,104 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Tests for official.core.export_base.""" -import os -from typing import Any, Dict, Mapping, Text - -import tensorflow as tf - -from official.core import export_base - - -class TestModule(export_base.ExportModule): - - @tf.function - def serve(self, inputs: tf.Tensor) -> Mapping[Text, tf.Tensor]: - return {'outputs': self.inference_step(inputs)} - - def get_inference_signatures( - self, function_keys: Dict[Text, Text]) -> Mapping[Text, Any]: - input_signature = tf.TensorSpec(shape=[None, None], dtype=tf.float32) - return {'foo': self.serve.get_concrete_function(input_signature)} - - -class ExportBaseTest(tf.test.TestCase): - - def test_export_module(self): - tmp_dir = self.get_temp_dir() - model = tf.keras.layers.Dense(2) - inputs = tf.ones([2, 4], tf.float32) - expected_output = model(inputs, training=False) - module = TestModule(params=None, model=model) - ckpt_path = tf.train.Checkpoint(model=model).save( - os.path.join(tmp_dir, 'ckpt')) - export_dir = export_base.export( - module, ['foo'], - export_savedmodel_dir=tmp_dir, - checkpoint_path=ckpt_path, - timestamped=True) - self.assertTrue(os.path.exists(os.path.join(export_dir, 'saved_model.pb'))) - self.assertTrue( - os.path.exists( - os.path.join(export_dir, 'variables', 'variables.index'))) - self.assertTrue( - os.path.exists( - os.path.join(export_dir, 'variables', - 'variables.data-00000-of-00001'))) - - imported = tf.saved_model.load(export_dir) - output = imported.signatures['foo'](inputs) - self.assertAllClose(output['outputs'].numpy(), expected_output.numpy()) - - def test_custom_inference_step(self): - tmp_dir = self.get_temp_dir() - model = tf.keras.layers.Dense(2) - inputs = tf.ones([2, 4], tf.float32) - - def _inference_step(inputs, model): - return tf.nn.softmax(model(inputs, training=False)) - - module = TestModule( - params=None, model=model, inference_step=_inference_step) - expected_output = _inference_step(inputs, model) - ckpt_path = tf.train.Checkpoint(model=model).save( - os.path.join(tmp_dir, 'ckpt')) - export_dir = export_base.export( - module, ['foo'], - export_savedmodel_dir=tmp_dir, - checkpoint_path=ckpt_path, - timestamped=False) - imported = tf.saved_model.load(export_dir) - output = imported.signatures['foo'](inputs) - self.assertAllClose(output['outputs'].numpy(), expected_output.numpy()) - - -if __name__ == '__main__': - tf.test.main() diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/core/input_reader.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/core/input_reader.py deleted file mode 100644 index f6ce85e5347bc893cf668e1e3cf5844ccd800adb..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/core/input_reader.py +++ /dev/null @@ -1,406 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""A common dataset reader.""" -import random -from typing import Any, Callable, List, Optional - -from absl import logging -import tensorflow as tf -import tensorflow_datasets as tfds - -from official.core import config_definitions as cfg - - -def _get_random_integer(): - return random.randint(0, (1 << 31) - 1) - - -def _maybe_map_fn(dataset: tf.data.Dataset, - fn: Optional[Callable[..., Any]] = None) -> tf.data.Dataset: - """Calls dataset.map if a valid function is passed in.""" - return dataset if fn is None else dataset.map( - fn, num_parallel_calls=tf.data.experimental.AUTOTUNE) - - -class InputReader: - """Input reader that returns a tf.data.Dataset instance.""" - - # A static random number which is the same across different InputReader - # instances. - static_randnum = _get_random_integer() - - def __init__(self, - params: cfg.DataConfig, - dataset_fn=tf.data.TFRecordDataset, - decoder_fn: Optional[Callable[..., Any]] = None, - sample_fn: Optional[Callable[..., Any]] = None, - parser_fn: Optional[Callable[..., Any]] = None, - transform_and_batch_fn: Optional[Callable[ - [tf.data.Dataset, Optional[tf.distribute.InputContext]], - tf.data.Dataset]] = None, - postprocess_fn: Optional[Callable[..., Any]] = None): - """Initializes an InputReader instance. - - Args: - params: A config_definitions.DataConfig object. - dataset_fn: A `tf.data.Dataset` that consumes the input files. For - example, it can be `tf.data.TFRecordDataset`. - decoder_fn: An optional `callable` that takes the serialized data string - and decodes them into the raw tensor dictionary. - sample_fn: An optional `callable` that takes a `tf.data.Dataset` object as - input and outputs the transformed dataset. It performs sampling on the - decoded raw tensors dict before the parser_fn. - parser_fn: An optional `callable` that takes the decoded raw tensors dict - and parse them into a dictionary of tensors that can be consumed by the - model. It will be executed after decoder_fn. - transform_and_batch_fn: An optional `callable` that takes a - `tf.data.Dataset` object and an optional `tf.distribute.InputContext` as - input, and returns a `tf.data.Dataset` object. It will be executed after - `parser_fn` to transform and batch the dataset; if None, after - `parser_fn` is executed, the dataset will be batched into per-replica - batch size. - postprocess_fn: A optional `callable` that processes batched tensors. It - will be executed after batching. - """ - if params.input_path and params.tfds_name: - raise ValueError('At most one of `input_path` and `tfds_name` can be ' - 'specified, but got %s and %s.' % - (params.input_path, params.tfds_name)) - self._tfds_builder = None - self._matched_files = [] - if params.input_path: - self._matched_files = self._match_files(params.input_path) - else: - # Read dataset from TFDS. - if not params.tfds_split: - raise ValueError( - '`tfds_name` is %s, but `tfds_split` is not specified.' % - params.tfds_name) - self._tfds_builder = tfds.builder( - params.tfds_name, data_dir=params.tfds_data_dir) - - self._global_batch_size = params.global_batch_size - self._is_training = params.is_training - self._drop_remainder = params.drop_remainder - self._shuffle_buffer_size = params.shuffle_buffer_size - self._cache = params.cache - self._cycle_length = params.cycle_length - self._block_length = params.block_length - self._deterministic = params.deterministic - self._sharding = params.sharding - self._tfds_split = params.tfds_split - self._tfds_as_supervised = params.tfds_as_supervised - self._tfds_skip_decoding_feature = params.tfds_skip_decoding_feature - - self._dataset_fn = dataset_fn - self._decoder_fn = decoder_fn - self._sample_fn = sample_fn - self._parser_fn = parser_fn - self._transform_and_batch_fn = transform_and_batch_fn - self._postprocess_fn = postprocess_fn - # When tf.data service is enabled, each data service worker should get - # different random seeds. Thus, we set `seed` to None. - if params.seed is not None: - self._seed = params.seed - elif params.enable_tf_data_service: - self._seed = _get_random_integer() - else: - self._seed = None - - self._enable_tf_data_service = ( - params.enable_tf_data_service and params.tf_data_service_address) - self._tf_data_service_address = params.tf_data_service_address - if self._enable_tf_data_service: - # Add a random seed as the tf.data service job name suffix, so tf.data - # service doesn't reuse the previous state if TPU worker gets preempted. - self._tf_data_service_job_name = ( - params.tf_data_service_job_name + str(self.static_randnum)) - self._enable_round_robin_tf_data_service = params.get( - 'enable_round_robin_tf_data_service', False) - - def _match_files(self, input_path: str) -> List[str]: - """Matches files from an input_path.""" - matched_files = [] - # Read dataset from files. - usage = ('`input_path` should be either (1) a str indicating a file ' - 'path/pattern, or (2) a str indicating multiple file ' - 'paths/patterns separated by comma (e.g "a, b, c" or no spaces ' - '"a,b,c", or (3) a list of str, each of which is a file ' - 'path/pattern or multiple file paths/patterns separated by ' - 'comma, but got: %s') - if isinstance(input_path, str): - input_path_list = [input_path] - elif isinstance(input_path, (list, tuple)): - if any(not isinstance(x, str) for x in input_path): - raise ValueError(usage % input_path) - input_path_list = input_path - else: - raise ValueError(usage % input_path) - - for input_path in input_path_list: - input_patterns = input_path.strip().split(',') - for input_pattern in input_patterns: - input_pattern = input_pattern.strip() - if not input_pattern: - continue - if '*' in input_pattern or '?' in input_pattern: - tmp_matched_files = tf.io.gfile.glob(input_pattern) - if not tmp_matched_files: - raise ValueError('%s does not match any files.' % input_pattern) - matched_files.extend(tmp_matched_files) - else: - matched_files.append(input_pattern) - - if not matched_files: - raise ValueError('%s does not match any files.' % input_path) - - return matched_files - - def _shard_files_then_read( - self, - matched_files: List[str], - dataset_fn, - input_context: Optional[tf.distribute.InputContext] = None - ) -> tf.data.Dataset: - """Shards the data files and then sent a split to every worker to read.""" - dataset = tf.data.Dataset.from_tensor_slices(matched_files) - - # Shuffle and repeat at file level. - # If cache is enabled, `reshuffle_each_iteration` is set to False, - # because we will read the same cached data in every iteration anyway. - if self._is_training: - dataset = dataset.shuffle( - len(matched_files), - seed=self._seed, - reshuffle_each_iteration=True if not self._cache else False) - - # Do not enable sharding if tf.data service is enabled, as sharding will be - # handled inside tf.data service. - if self._sharding and input_context and ( - input_context.num_input_pipelines > 1 and - not self._enable_tf_data_service): - dataset = dataset.shard(input_context.num_input_pipelines, - input_context.input_pipeline_id) - - # If cache is enabled, we will call `repeat()` later after `cache()`. - if self._is_training and not self._cache: - dataset = dataset.repeat() - - dataset = dataset.interleave( - map_func=dataset_fn, - cycle_length=self._cycle_length, - block_length=self._block_length, - num_parallel_calls=(self._cycle_length if self._cycle_length else - tf.data.experimental.AUTOTUNE), - deterministic=self._deterministic) - return dataset - - def _read_files_then_shard( - self, - matched_files: List[str], - dataset_fn, - input_context: Optional[tf.distribute.InputContext] = None - ) -> tf.data.Dataset: - """Sends all data files to every worker and then shard by data.""" - dataset = dataset_fn(matched_files) - - # When `input_file` is a path to a single file or the number of files is - # less than the number of input pipelines, disable auto sharding - # so that same input file is sent to all workers. - options = tf.data.Options() - options.experimental_distribute.auto_shard_policy = ( - tf.data.experimental.AutoShardPolicy.OFF) - dataset = dataset.with_options(options) - # Do not enable sharding if tf.data service is enabled, as sharding will be - # handled inside tf.data service. - if self._sharding and input_context and ( - input_context.num_input_pipelines > 1 and - not self._enable_tf_data_service): - dataset = dataset.shard(input_context.num_input_pipelines, - input_context.input_pipeline_id) - - # If cache is enabled, we will call `repeat()` later after `cache()`. - if self._is_training and not self._cache: - dataset = dataset.repeat() - return dataset - - def _read_tfds( - self, - input_context: Optional[tf.distribute.InputContext] = None - ) -> tf.data.Dataset: - """Reads a dataset from tfds.""" - # No op if exist. - self._tfds_builder.download_and_prepare() - - read_config = tfds.ReadConfig( - interleave_cycle_length=self._cycle_length, - interleave_block_length=self._block_length, - input_context=input_context, - shuffle_seed=self._seed) - decoders = {} - if self._tfds_skip_decoding_feature: - for skip_feature in self._tfds_skip_decoding_feature.split(','): - decoders[skip_feature.strip()] = tfds.decode.SkipDecoding() - dataset = self._tfds_builder.as_dataset( - split=self._tfds_split, - shuffle_files=self._is_training, - as_supervised=self._tfds_as_supervised, - decoders=decoders, - read_config=read_config) - - # If cache is enabled, we will call `repeat()` later after `cache()`. - if self._is_training and not self._cache: - dataset = dataset.repeat() - return dataset - - @property - def tfds_info(self) -> tfds.core.DatasetInfo: - """Returns TFDS dataset info, if available.""" - if self._tfds_builder: - return self._tfds_builder.info - else: - raise ValueError('tfds_info is not available, because the dataset ' - 'is not loaded from tfds.') - - def _read_decode_and_parse_dataset( - self, - matched_files: List[str], - dataset_fn, - batch_size: int, - input_context: Optional[tf.distribute.InputContext] = None, - tfds_builder: bool = False) -> tf.data.Dataset: - """Returns a tf.data.Dataset object after reading, decoding, and parsing.""" - if tfds_builder: - dataset = self._read_tfds(input_context) - elif len(matched_files) > 1: - if input_context and (len(matched_files) < - input_context.num_input_pipelines): - logging.warn( - 'The number of files %d is less than the number of input pipelines ' - '%d. We will send all input files to every worker. ' - 'Please consider sharding your data into more files.', - len(matched_files), input_context.num_input_pipelines) - dataset = self._read_files_then_shard(matched_files, - dataset_fn, - input_context) - else: - dataset = self._shard_files_then_read(matched_files, - dataset_fn, - input_context) - elif len(matched_files) == 1: - dataset = self._read_files_then_shard(matched_files, - dataset_fn, - input_context) - else: - raise ValueError('It is unexpected that `tfds_builder` is None and ' - 'there is also no `matched_files`.') - - # If cache is enabled, we will call `shuffle()` later after `cache()`. - if self._is_training and not self._cache: - dataset = dataset.shuffle(self._shuffle_buffer_size, seed=self._seed) - - dataset = _maybe_map_fn(dataset, self._decoder_fn) - if self._sample_fn is not None: - dataset = dataset.apply(self._sample_fn) - dataset = _maybe_map_fn(dataset, self._parser_fn) - - if self._cache: - dataset = dataset.cache() - if self._is_training: - dataset = dataset.repeat() - dataset = dataset.shuffle(self._shuffle_buffer_size, seed=self._seed) - - if self._transform_and_batch_fn is not None: - dataset = self._transform_and_batch_fn(dataset, input_context) - else: - per_replica_batch_size = input_context.get_per_replica_batch_size( - batch_size) if input_context else batch_size - dataset = dataset.batch( - per_replica_batch_size, drop_remainder=self._drop_remainder - ) - - return dataset - - def _maybe_apply_data_service( - self, - dataset: tf.data.Dataset, - input_context: Optional[tf.distribute.InputContext] = None - ) -> tf.data.Dataset: - """Potentially distributes a dataset.""" - if self._enable_tf_data_service and input_context: - if self._enable_round_robin_tf_data_service: - replicas_per_input_pipeline = input_context.num_replicas_in_sync // ( - input_context.num_input_pipelines) - base_consumer_index = input_context.input_pipeline_id * ( - replicas_per_input_pipeline) - num_consumers = input_context.num_input_pipelines * ( - replicas_per_input_pipeline) - range_dataset = tf.data.Dataset.range(replicas_per_input_pipeline) - dataset = range_dataset.map(lambda i: dataset.apply( # pylint: disable=g-long-lambda - tf.data.experimental.service.distribute( - processing_mode='parallel_epochs', - service=self._tf_data_service_address, - job_name=self._tf_data_service_job_name, - consumer_index=base_consumer_index + i, - num_consumers=num_consumers))) - # Use parallel interleave to read multiple batches from a tf.data - # service worker in parallel. - dataset = dataset.interleave( - lambda x: x, - cycle_length=replicas_per_input_pipeline, - num_parallel_calls=replicas_per_input_pipeline, - deterministic=True) - else: - dataset = dataset.apply( - tf.data.experimental.service.distribute( - processing_mode='parallel_epochs', - service=self._tf_data_service_address, - job_name=self._tf_data_service_job_name)) - return dataset - - def read( - self, - input_context: Optional[tf.distribute.InputContext] = None - ) -> tf.data.Dataset: - """Generates a tf.data.Dataset object.""" - dataset = self._read_decode_and_parse_dataset(self._matched_files, - self._dataset_fn, - self._global_batch_size, - input_context, - self._tfds_builder) - dataset = _maybe_map_fn(dataset, self._postprocess_fn) - dataset = self._maybe_apply_data_service(dataset, input_context) - - if self._deterministic is not None: - options = tf.data.Options() - options.experimental_deterministic = self._deterministic - dataset = dataset.with_options(options) - return dataset.prefetch(tf.data.experimental.AUTOTUNE) diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/core/registry.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/core/registry.py deleted file mode 100644 index 193552eae451b518f82ceeac6616d49ec76a2bbf..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/core/registry.py +++ /dev/null @@ -1,113 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Registry utility.""" - - -def register(registered_collection, reg_key): - """Register decorated function or class to collection. - - Register decorated function or class into registered_collection, in a - hierarchical order. For example, when reg_key="my_model/my_exp/my_config_0" - the decorated function or class is stored under - registered_collection["my_model"]["my_exp"]["my_config_0"]. - This decorator is supposed to be used together with the lookup() function in - this file. - - Args: - registered_collection: a dictionary. The decorated function or class will be - put into this collection. - reg_key: The key for retrieving the registered function or class. If reg_key - is a string, it can be hierarchical like my_model/my_exp/my_config_0 - Returns: - A decorator function - Raises: - KeyError: when function or class to register already exists. - """ - def decorator(fn_or_cls): - """Put fn_or_cls in the dictionary.""" - if isinstance(reg_key, str): - hierarchy = reg_key.split("/") - collection = registered_collection - for h_idx, entry_name in enumerate(hierarchy[:-1]): - if entry_name not in collection: - collection[entry_name] = {} - collection = collection[entry_name] - if not isinstance(collection, dict): - raise KeyError( - "Collection path {} at position {} already registered as " - "a function or class.".format(entry_name, h_idx)) - leaf_reg_key = hierarchy[-1] - else: - collection = registered_collection - leaf_reg_key = reg_key - - if leaf_reg_key in collection: - raise KeyError("Function or class {} registered multiple times.".format( - leaf_reg_key)) - - collection[leaf_reg_key] = fn_or_cls - return fn_or_cls - return decorator - - -def lookup(registered_collection, reg_key): - """Lookup and return decorated function or class in the collection. - - Lookup decorated function or class in registered_collection, in a - hierarchical order. For example, when - reg_key="my_model/my_exp/my_config_0", - this function will return - registered_collection["my_model"]["my_exp"]["my_config_0"]. - - Args: - registered_collection: a dictionary. The decorated function or class will be - retrieved from this collection. - reg_key: The key for retrieving the registered function or class. If reg_key - is a string, it can be hierarchical like my_model/my_exp/my_config_0 - Returns: - The registered function or class. - Raises: - LookupError: when reg_key cannot be found. - """ - if isinstance(reg_key, str): - hierarchy = reg_key.split("/") - collection = registered_collection - for h_idx, entry_name in enumerate(hierarchy): - if entry_name not in collection: - raise LookupError( - "collection path {} at position {} never registered.".format( - entry_name, h_idx)) - collection = collection[entry_name] - return collection - else: - if reg_key not in registered_collection: - raise LookupError("registration key {} never registered.".format(reg_key)) - return registered_collection[reg_key] diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/core/registry_test.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/core/registry_test.py deleted file mode 100644 index 9d8ca0533d92dd8f83138e69d7dfc701e2f917b1..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/core/registry_test.py +++ /dev/null @@ -1,104 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Tests for registry.""" - -import tensorflow as tf -from official.core import registry - - -class RegistryTest(tf.test.TestCase): - - def test_register(self): - collection = {} - - @registry.register(collection, 'functions/func_0') - def func_test(): - pass - - self.assertEqual(registry.lookup(collection, 'functions/func_0'), func_test) - - @registry.register(collection, 'classes/cls_0') - class ClassRegistryKey: - pass - - self.assertEqual( - registry.lookup(collection, 'classes/cls_0'), ClassRegistryKey) - - @registry.register(collection, ClassRegistryKey) - class ClassRegistryValue: - pass - - self.assertEqual( - registry.lookup(collection, ClassRegistryKey), ClassRegistryValue) - - def test_register_hierarchy(self): - collection = {} - - @registry.register(collection, 'functions/func_0') - def func_test0(): - pass - - @registry.register(collection, 'func_1') - def func_test1(): - pass - - @registry.register(collection, func_test1) - def func_test2(): - pass - - expected_collection = { - 'functions': { - 'func_0': func_test0, - }, - 'func_1': func_test1, - func_test1: func_test2, - } - self.assertEqual(collection, expected_collection) - - def test_register_error(self): - collection = {} - - @registry.register(collection, 'functions/func_0') - def func_test0(): # pylint: disable=unused-variable - pass - - with self.assertRaises(KeyError): - - @registry.register(collection, 'functions/func_0/sub_func') - def func_test1(): # pylint: disable=unused-variable - pass - - with self.assertRaises(LookupError): - registry.lookup(collection, 'non-exist') - - -if __name__ == '__main__': - tf.test.main() diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/core/task_factory.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/core/task_factory.py deleted file mode 100644 index 56cd92948937db563f6398f98196362ebc008617..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/core/task_factory.py +++ /dev/null @@ -1,83 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""A global factory to register and access all registered tasks.""" - -from official.core import registry - -_REGISTERED_TASK_CLS = {} - - -# TODO(b/158741360): Add type annotations once pytype checks across modules. -def register_task_cls(task_config_cls): - """Decorates a factory of Tasks for lookup by a subclass of TaskConfig. - - This decorator supports registration of tasks as follows: - - ``` - @dataclasses.dataclass - class MyTaskConfig(TaskConfig): - # Add fields here. - pass - - @register_task_cls(MyTaskConfig) - class MyTask(Task): - # Inherits def __init__(self, task_config). - pass - - my_task_config = MyTaskConfig() - my_task = get_task(my_task_config) # Returns MyTask(my_task_config). - ``` - - Besisdes a class itself, other callables that create a Task from a TaskConfig - can be decorated by the result of this function, as long as there is at most - one registration for each config class. - - Args: - task_config_cls: a subclass of TaskConfig (*not* an instance of TaskConfig). - Each task_config_cls can only be used for a single registration. - - Returns: - A callable for use as class decorator that registers the decorated class - for creation from an instance of task_config_cls. - """ - return registry.register(_REGISTERED_TASK_CLS, task_config_cls) - - -def get_task(task_config, **kwargs): - """Creates a Task (of suitable subclass type) from task_config.""" - return get_task_cls(task_config.__class__)(task_config, **kwargs) - - -# The user-visible get_task() is defined after classes have been registered. -# TODO(b/158741360): Add type annotations once pytype checks across modules. -def get_task_cls(task_config_cls): - task_cls = registry.lookup(_REGISTERED_TASK_CLS, task_config_cls) - return task_cls diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/core/train_lib.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/core/train_lib.py deleted file mode 100644 index 1a03ecf1cfdb6d5e6bdb06b872f8ddbe5a823799..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/core/train_lib.py +++ /dev/null @@ -1,153 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""TFM common training driver library.""" -# pytype: disable=attribute-error -import os -from typing import Any, Mapping, Tuple, Optional - -# Import libraries -from absl import logging -import orbit -import tensorflow as tf - -from official.core import base_task -from official.core import base_trainer -from official.core import config_definitions -from official.core import train_utils - -maybe_create_best_ckpt_exporter = train_utils.maybe_create_best_ckpt_exporter - - -def run_experiment( - distribution_strategy: tf.distribute.Strategy, - task: base_task.Task, - mode: str, - params: config_definitions.ExperimentConfig, - model_dir: str, - run_post_eval: bool = False, - save_summary: bool = True, - trainer: Optional[base_trainer.Trainer] = None -) -> Tuple[tf.keras.Model, Mapping[str, Any]]: - """Runs train/eval configured by the experiment params. - - Args: - distribution_strategy: A distribution distribution_strategy. - task: A Task instance. - mode: A 'str', specifying the mode. Can be 'train', 'eval', 'train_and_eval' - or 'continuous_eval'. - params: ExperimentConfig instance. - model_dir: A 'str', a path to store model checkpoints and summaries. - run_post_eval: Whether to run post eval once after training, metrics logs - are returned. - save_summary: Whether to save train and validation summary. - trainer: the base_trainer.Trainer instance. It should be created within the - strategy.scope(). - - Returns: - A 2-tuple of (model, eval_logs). - model: `tf.keras.Model` instance. - eval_logs: returns eval metrics logs when run_post_eval is set to True, - otherwise, returns {}. - """ - - with distribution_strategy.scope(): - if not trainer: - trainer = train_utils.create_trainer( - params, - task, - train='train' in mode, - evaluate=('eval' in mode) or run_post_eval, - checkpoint_exporter=maybe_create_best_ckpt_exporter( - params, model_dir)) - - if trainer.checkpoint: - checkpoint_manager = tf.train.CheckpointManager( - trainer.checkpoint, - directory=model_dir, - max_to_keep=params.trainer.max_to_keep, - step_counter=trainer.global_step, - checkpoint_interval=params.trainer.checkpoint_interval, - init_fn=trainer.initialize) - # Adds recovery handling. - trainer.add_recovery(params.trainer, checkpoint_manager=checkpoint_manager) - else: - checkpoint_manager = None - - controller = orbit.Controller( - strategy=distribution_strategy, - trainer=trainer if 'train' in mode else None, - evaluator=trainer, - global_step=trainer.global_step, - steps_per_loop=params.trainer.steps_per_loop, - checkpoint_manager=checkpoint_manager, - summary_dir=os.path.join(model_dir, 'train') if (save_summary) else None, - eval_summary_dir=os.path.join(model_dir, - params.trainer.validation_summary_subdir) if - (save_summary) else None, - summary_interval=params.trainer.summary_interval if - (save_summary) else None) - - logging.info('Starts to execute mode: %s', mode) - with distribution_strategy.scope(): - if mode == 'train': - controller.train(steps=params.trainer.train_steps) - elif mode == 'train_and_eval': - controller.train_and_evaluate( - train_steps=params.trainer.train_steps, - eval_steps=params.trainer.validation_steps, - eval_interval=params.trainer.validation_interval) - elif mode == 'eval': - controller.evaluate(steps=params.trainer.validation_steps) - elif mode == 'continuous_eval': - - def timeout_fn(): - if trainer.global_step.numpy() >= params.trainer.train_steps: - return True - return False - - controller.evaluate_continuously( - steps=params.trainer.validation_steps, - timeout=params.trainer.continuous_eval_timeout, - timeout_fn=timeout_fn) - else: - raise NotImplementedError('The mode is not implemented: %s' % mode) - - num_params = train_utils.try_count_params(trainer.model) - if num_params is not None: - logging.info('Number of trainable params in model: %f Millions.', - num_params / 10.**6) - - if run_post_eval: - with distribution_strategy.scope(): - return trainer.model, trainer.evaluate( - tf.convert_to_tensor(params.trainer.validation_steps)) - else: - return trainer.model, {} diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/core/train_lib_test.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/core/train_lib_test.py deleted file mode 100644 index 71f5ac5c41c41a1d2953fbaf4461ec4a33b26dc9..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/core/train_lib_test.py +++ /dev/null @@ -1,153 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Tests for train_ctl_lib.""" -import json -import os - -from absl import flags -from absl.testing import flagsaver -from absl.testing import parameterized -import tensorflow as tf - -from tensorflow.python.distribute import combinations -from tensorflow.python.distribute import strategy_combinations -from official.common import flags as tfm_flags -# pylint: disable=unused-import -from official.common import registry_imports -# pylint: enable=unused-import -from official.core import task_factory -from official.core import train_lib -from official.core import train_utils - -FLAGS = flags.FLAGS - -tfm_flags.define_flags() - - -class TrainTest(tf.test.TestCase, parameterized.TestCase): - - def setUp(self): - super(TrainTest, self).setUp() - self._test_config = { - 'trainer': { - 'checkpoint_interval': 10, - 'steps_per_loop': 10, - 'summary_interval': 10, - 'train_steps': 10, - 'validation_steps': 5, - 'validation_interval': 10, - 'continuous_eval_timeout': 1, - 'validation_summary_subdir': 'validation', - 'optimizer_config': { - 'optimizer': { - 'type': 'sgd', - }, - 'learning_rate': { - 'type': 'constant' - } - } - }, - } - - @combinations.generate( - combinations.combine( - distribution_strategy=[ - strategy_combinations.default_strategy, - strategy_combinations.cloud_tpu_strategy, - strategy_combinations.one_device_strategy_gpu, - ], - flag_mode=['train', 'eval', 'train_and_eval'], - run_post_eval=[True, False])) - def test_end_to_end(self, distribution_strategy, flag_mode, run_post_eval): - model_dir = self.get_temp_dir() - flags_dict = dict( - experiment='mock', - mode=flag_mode, - model_dir=model_dir, - params_override=json.dumps(self._test_config)) - with flagsaver.flagsaver(**flags_dict): - params = train_utils.parse_configuration(flags.FLAGS) - train_utils.serialize_config(params, model_dir) - with distribution_strategy.scope(): - task = task_factory.get_task(params.task, logging_dir=model_dir) - - _, logs = train_lib.run_experiment( - distribution_strategy=distribution_strategy, - task=task, - mode=flag_mode, - params=params, - model_dir=model_dir, - run_post_eval=run_post_eval) - - if 'eval' in flag_mode: - self.assertTrue( - tf.io.gfile.exists( - os.path.join(model_dir, - params.trainer.validation_summary_subdir))) - if run_post_eval: - self.assertNotEmpty(logs) - else: - self.assertEmpty(logs) - self.assertNotEmpty( - tf.io.gfile.glob(os.path.join(model_dir, 'params.yaml'))) - if flag_mode == 'eval': - return - self.assertNotEmpty( - tf.io.gfile.glob(os.path.join(model_dir, 'checkpoint'))) - # Tests continuous evaluation. - _, logs = train_lib.run_experiment( - distribution_strategy=distribution_strategy, - task=task, - mode='continuous_eval', - params=params, - model_dir=model_dir, - run_post_eval=run_post_eval) - print(logs) - - def test_parse_configuration(self): - model_dir = self.get_temp_dir() - flags_dict = dict( - experiment='mock', - mode='train', - model_dir=model_dir, - params_override=json.dumps(self._test_config)) - with flagsaver.flagsaver(**flags_dict): - params = train_utils.parse_configuration(flags.FLAGS, lock_return=True) - with self.assertRaises(ValueError): - params.override({'task': {'init_checkpoint': 'Foo'}}) - - params = train_utils.parse_configuration(flags.FLAGS, lock_return=False) - params.override({'task': {'init_checkpoint': 'Bar'}}) - self.assertEqual(params.task.init_checkpoint, 'Bar') - - -if __name__ == '__main__': - tf.test.main() diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/core/train_utils.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/core/train_utils.py deleted file mode 100644 index 4e9ff0274fb5aaaa644880aacf7ee1d877918b24..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/core/train_utils.py +++ /dev/null @@ -1,406 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Training utils.""" -import copy -import json -import os -import pprint -from typing import Any, Callable, Dict, List, Optional - -from absl import logging -import dataclasses -import gin -import orbit -import tensorflow as tf - -from official.core import base_task -from official.core import base_trainer -from official.core import config_definitions -from official.core import exp_factory -from official.modeling import hyperparams - - -def get_leaf_nested_dict(d: Dict[str, Any], keys: List[str]) -> Dict[str, Any]: - """Get leaf from a dictionary with arbitrary depth with a list of keys. - - Args: - d: The dictionary to extract value from. - keys: The list of keys to extract values recursively. - - Returns: - The value of the leaf. - - Raises: - KeyError: If the value of keys extracted is a dictionary. - """ - leaf = d - for k in keys: - if not isinstance(leaf, dict) or k not in leaf: - raise KeyError( - 'Path not exist while traversing the dictionary: d with keys' - ': %s.' % keys) - leaf = leaf[k] - - if isinstance(leaf, dict): - raise KeyError('The value extracted with keys: %s is not a leaf of the ' - 'dictionary: %s.' % (keys, d)) - return leaf - - -def cast_leaf_nested_dict(d: Dict[str, Any], - cast_fn: Callable[[Any], Any]) -> Dict[str, Any]: - """Cast the leaves of a dictionary with arbitrary depth in place. - - Args: - d: The dictionary to extract value from. - cast_fn: The casting function. - - Returns: - A dictionray with the same structure as d. - """ - for key, value in d.items(): - if isinstance(value, dict): - d[key] = cast_leaf_nested_dict(value, cast_fn) - else: - d[key] = cast_fn(value) - return d - - -def maybe_create_best_ckpt_exporter(params: config_definitions.ExperimentConfig, - data_dir: str) -> Any: - """Maybe create a BestCheckpointExporter object, according to the config.""" - export_subdir = params.trainer.best_checkpoint_export_subdir - metric_name = params.trainer.best_checkpoint_eval_metric - metric_comp = params.trainer.best_checkpoint_metric_comp - if data_dir and export_subdir and metric_name: - best_ckpt_dir = os.path.join(data_dir, export_subdir) - best_ckpt_exporter = BestCheckpointExporter(best_ckpt_dir, metric_name, - metric_comp) - logging.info( - 'Created the best checkpoint exporter. ' - 'data_dir: %s, export_subdir: %s, metric_name: %s', data_dir, - export_subdir, metric_name) - else: - best_ckpt_exporter = None - - return best_ckpt_exporter - - -# TODO(b/180147589): Add tests for this module. -class BestCheckpointExporter: - """Keeps track of the best result, and saves its checkpoint. - - Orbit will support an API for checkpoint exporter. This class will be used - together with orbit once this functionality is ready. - """ - - def __init__(self, export_dir: str, metric_name: str, metric_comp: str): - """Initialization. - - Args: - export_dir: The directory that will contain exported checkpoints. - metric_name: Indicates which metric to look at, when determining which - result is better. If eval_logs being passed to maybe_export_checkpoint - is a nested dictionary, use `|` as a seperator for different layers. - metric_comp: Indicates how to compare results. Either `lower` or `higher`. - """ - self._export_dir = export_dir - self._metric_name = metric_name.split('|') - self._metric_comp = metric_comp - if self._metric_comp not in ('lower', 'higher'): - raise ValueError('best checkpoint metric comp must be one of ' - 'higher, lower. Got: {}'.format(self._metric_comp)) - tf.io.gfile.makedirs(os.path.dirname(self.best_ckpt_logs_path)) - self._best_ckpt_logs = self._maybe_load_best_eval_metric() - self._checkpoint_manager = None - - def _get_checkpoint_manager(self, checkpoint): - """Gets an existing checkpoint manager or creates a new one.""" - if self._checkpoint_manager is None or (self._checkpoint_manager.checkpoint - != checkpoint): - logging.info('Creates a new checkpoint manager.') - self._checkpoint_manager = tf.train.CheckpointManager( - checkpoint, - directory=self._export_dir, - max_to_keep=1, - checkpoint_name='best_ckpt') - - return self._checkpoint_manager - - def maybe_export_checkpoint(self, checkpoint, eval_logs, global_step): - logging.info('[BestCheckpointExporter] received eval_logs: %s, at step: %d', - eval_logs, global_step) - if self._best_ckpt_logs is None or self._new_metric_is_better( - self._best_ckpt_logs, eval_logs): - self._best_ckpt_logs = eval_logs - self._export_best_eval_metric(checkpoint, self._best_ckpt_logs, - global_step) - - def _maybe_load_best_eval_metric(self): - if not tf.io.gfile.exists(self.best_ckpt_logs_path): - return None - with tf.io.gfile.GFile(self.best_ckpt_logs_path, 'r') as reader: - return json.loads(reader.read()) - - def _new_metric_is_better(self, old_logs, new_logs): - """Check if the metric in new_logs is better than the metric in old_logs.""" - old_value = float( - orbit.utils.get_value( - get_leaf_nested_dict(old_logs, self._metric_name))) - new_value = float( - orbit.utils.get_value( - get_leaf_nested_dict(new_logs, self._metric_name))) - - logging.info('[BestCheckpointExporter] comparing results. old: %f, new: %f', - old_value, new_value) - if self._metric_comp == 'higher': - if new_value > old_value: - logging.info('[BestCheckpointExporter] ' - 'the new number is better since it is higher.') - return True - else: # self._metric_comp == 'lower': - if new_value < old_value: - logging.info('[BestCheckpointExporter] ' - 'the new number is better since it is lower.') - return True - return False - - def _export_best_eval_metric(self, checkpoint, eval_logs, global_step): - """Export evaluation results of the best checkpoint into a json file.""" - eval_logs_ext = copy.copy(eval_logs) - eval_logs_ext['best_ckpt_global_step'] = global_step - eval_logs_ext = cast_leaf_nested_dict( - eval_logs_ext, lambda x: float(orbit.utils.get_value(x))) - # Saving json file is very fast. - with tf.io.gfile.GFile(self.best_ckpt_logs_path, 'w') as writer: - writer.write(json.dumps(eval_logs_ext, indent=4) + '\n') - - self._get_checkpoint_manager(checkpoint).save() - - @property - def best_ckpt_logs(self): - return self._best_ckpt_logs - - @property - def best_ckpt_logs_path(self): - return os.path.join(self._export_dir, 'info.json') - - @property - def best_ckpt_path(self): - """Returns the best ckpt path or None if there is no ckpt yet.""" - return tf.train.latest_checkpoint(self._export_dir) - - -@gin.configurable -def create_trainer(params: config_definitions.ExperimentConfig, - task: base_task.Task, - train: bool, - evaluate: bool, - checkpoint_exporter: Optional[BestCheckpointExporter] = None, - trainer_cls=base_trainer.Trainer) -> base_trainer.Trainer: - """Create trainer.""" - logging.info('Running default trainer.') - model = task.build_model() - optimizer = task.create_optimizer(params.trainer.optimizer_config, - params.runtime) - return trainer_cls( - params, - task, - model=model, - optimizer=optimizer, - train=train, - evaluate=evaluate, - checkpoint_exporter=checkpoint_exporter) - - -@dataclasses.dataclass -class ParseConfigOptions: - """Use this dataclass instead of FLAGS to customize parse_configuration().""" - experiment: str - config_file: List[str] - tpu: str = '' - tf_data_service: str = '' - params_override: str = '' - - def __contains__(self, name): - return name in dataclasses.asdict(self) - - -def parse_configuration(flags_obj, lock_return=True, print_return=True): - """Parses ExperimentConfig from flags.""" - - # 1. Get the default config from the registered experiment. - params = exp_factory.get_exp_config(flags_obj.experiment) - - # 2. Get the first level of override from `--config_file`. - # `--config_file` is typically used as a template that specifies the common - # override for a particular experiment. - for config_file in flags_obj.config_file or []: - params = hyperparams.override_params_dict( - params, config_file, is_strict=True) - - # 3. Override the TPU address and tf.data service address. - params.override({ - 'runtime': { - 'tpu': flags_obj.tpu, - }, - }) - if ('tf_data_service' in flags_obj and flags_obj.tf_data_service and - isinstance(params.task, config_definitions.TaskConfig)): - params.override({ - 'task': { - 'train_data': { - 'tf_data_service_address': flags_obj.tf_data_service, - }, - 'validation_data': { - 'tf_data_service_address': flags_obj.tf_data_service, - } - } - }) - - # 4. Get the second level of override from `--params_override`. - # `--params_override` is typically used as a further override over the - # template. For example, one may define a particular template for training - # ResNet50 on ImageNet in a config file and pass it via `--config_file`, - # then define different learning rates and pass it via `--params_override`. - if flags_obj.params_override: - params = hyperparams.override_params_dict( - params, flags_obj.params_override, is_strict=True) - - params.validate() - if lock_return: - params.lock() - - if print_return: - pp = pprint.PrettyPrinter() - logging.info('Final experiment parameters: %s', - pp.pformat(params.as_dict())) - - return params - - -def serialize_config(params: config_definitions.ExperimentConfig, - model_dir: str): - """Serializes and saves the experiment config.""" - params_save_path = os.path.join(model_dir, 'params.yaml') - logging.info('Saving experiment configuration to %s', params_save_path) - tf.io.gfile.makedirs(model_dir) - hyperparams.save_params_dict_to_yaml(params, params_save_path) - - -def save_gin_config(filename_surfix: str, model_dir: str): - """Serializes and saves the experiment config.""" - gin_save_path = os.path.join( - model_dir, 'operative_config.{}.gin'.format(filename_surfix)) - logging.info('Saving gin configurations to %s', gin_save_path) - tf.io.gfile.makedirs(model_dir) - with tf.io.gfile.GFile(gin_save_path, 'w') as f: - f.write(gin.operative_config_str()) - - -def read_global_step_from_checkpoint(ckpt_file_path): - """Read global step from checkpoint, or get global step from its filename.""" - global_step = tf.Variable(-1, dtype=tf.int64) - ckpt = tf.train.Checkpoint(global_step=global_step) - try: - ckpt.restore(ckpt_file_path).expect_partial() - global_step_maybe_restored = global_step.numpy() - except tf.errors.InvalidArgumentError: - global_step_maybe_restored = -1 - - if global_step_maybe_restored == -1: - raise ValueError('global_step not found in checkpoint {}. ' - 'If you want to run finetune eval jobs, you need to ' - 'make sure that your pretrain model writes ' - 'global_step in its checkpoints.'.format(ckpt_file_path)) - global_step_restored = global_step.numpy() - logging.info('get global_step %d from checkpoint %s', global_step_restored, - ckpt_file_path) - return global_step_restored - - -def write_json_summary(log_dir, global_step, eval_metrics): - """Dump evaluation metrics to json file.""" - serializable_dict = {} - for name, value in eval_metrics.items(): - if hasattr(value, 'numpy'): - serializable_dict[name] = str(value.numpy()) - else: - serializable_dict[name] = str(value) - output_json = os.path.join(log_dir, 'metrics-{}.json'.format(global_step)) - logging.info('Evaluation results at pretrain step %d: %s', global_step, - serializable_dict) - with tf.io.gfile.GFile(output_json, 'w') as writer: - writer.write(json.dumps(serializable_dict, indent=4) + '\n') - - -def write_summary(summary_writer, global_step, eval_metrics): - """Write evaluation metrics to TF summary.""" - numeric_dict = {} - for name, value in eval_metrics.items(): - numeric_dict[name] = float(orbit.utils.get_value(value)) - with summary_writer.as_default(): - for name, value in numeric_dict.items(): - tf.summary.scalar(name, value, step=global_step) - summary_writer.flush() - - -def remove_ckpts(model_dir): - """Remove model checkpoints, so we can restart.""" - ckpts = os.path.join(model_dir, 'ckpt-*') - logging.info('removing checkpoint files %s', ckpts) - for file_to_remove in tf.io.gfile.glob(ckpts): - tf.io.gfile.rmtree(file_to_remove) - - file_to_remove = os.path.join(model_dir, 'checkpoint') - if tf.io.gfile.exists(file_to_remove): - tf.io.gfile.remove(file_to_remove) - - -def try_count_params(model: tf.keras.Model): - """Count the number of parameters if model is possible. - - Args: - model: Try to count the number of params in this model. - - Returns: - The number of parameters or None. - """ - if hasattr(model, 'count_params'): - try: - return model.count_params() - except ValueError: - logging.info('Number of trainable params unknown, because the build() ' - 'methods in keras layers were not called. This is probably ' - 'because the model was not feed any input, e.g., the max ' - 'train step already reached before this run.') - return None - return None diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/core/train_utils_test.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/core/train_utils_test.py deleted file mode 100644 index b6b607848b99dfad4aa4100897cfbe8ebf8ea361..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/core/train_utils_test.py +++ /dev/null @@ -1,72 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Tests for official.core.train_utils.""" - -import tensorflow as tf - -from official.core import train_utils - - -class TrainUtilsTest(tf.test.TestCase): - - def test_get_leaf_nested_dict(self): - d = {'a': {'i': {'x': 5}}} - self.assertEqual(train_utils.get_leaf_nested_dict(d, ['a', 'i', 'x']), 5) - - def test_get_leaf_nested_dict_not_leaf(self): - with self.assertRaisesRegex(KeyError, 'The value extracted with keys.*'): - d = {'a': {'i': {'x': 5}}} - train_utils.get_leaf_nested_dict(d, ['a', 'i']) - - def test_get_leaf_nested_dict_path_not_exist_missing_key(self): - with self.assertRaisesRegex(KeyError, 'Path not exist while traversing .*'): - d = {'a': {'i': {'x': 5}}} - train_utils.get_leaf_nested_dict(d, ['a', 'i', 'y']) - - def test_get_leaf_nested_dict_path_not_exist_out_of_range(self): - with self.assertRaisesRegex(KeyError, 'Path not exist while traversing .*'): - d = {'a': {'i': {'x': 5}}} - train_utils.get_leaf_nested_dict(d, ['a', 'i', 'z']) - - def test_get_leaf_nested_dict_path_not_exist_meets_leaf(self): - with self.assertRaisesRegex(KeyError, 'Path not exist while traversing .*'): - d = {'a': {'i': 5}} - train_utils.get_leaf_nested_dict(d, ['a', 'i', 'z']) - - def test_cast_leaf_nested_dict(self): - d = {'a': {'i': {'x': '123'}}, 'b': 456.5} - d = train_utils.cast_leaf_nested_dict(d, int) - self.assertEqual(d['a']['i']['x'], 123) - self.assertEqual(d['b'], 456) - - -if __name__ == '__main__': - tf.test.main() diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/__init__.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/__init__.py deleted file mode 100644 index a11b1ff79e891e0fcee5bf824718e75d9103e28f..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/__init__.py +++ /dev/null @@ -1,30 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/activations/__init__.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/activations/__init__.py deleted file mode 100644 index 7f956f13392054a4fc63d3a37bd7343077b15e29..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/activations/__init__.py +++ /dev/null @@ -1,37 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Activations package definition.""" -from official.modeling.activations.gelu import gelu -from official.modeling.activations.relu import relu6 -from official.modeling.activations.sigmoid import hard_sigmoid -from official.modeling.activations.swish import hard_swish -from official.modeling.activations.swish import identity -from official.modeling.activations.swish import simple_swish diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/activations/gelu.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/activations/gelu.py deleted file mode 100644 index d32acd1d65db29422f1e1a84c8eb523176bf56a1..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/activations/gelu.py +++ /dev/null @@ -1,87 +0,0 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -"""Gaussian error linear unit.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import npu_device -import math -import tensorflow as tf -import common_flags -from tensorflow.python.framework import ops -from npu_device.npu_device import gen_npu_ops as npu_aicore_ops -from absl import flags - -FLAGS=flags.FLAGS - - -#@ops.RegisterGradient("FastGelu") -def _fast_gelu_grad(op,grad): - """ The gradient for fastgelu - - Args: - op:The fastgelu operations that we are differentiating,which we can us to find the inputs and outputs of the original op. - grad: Gradient with respect to the output of the fast_gelu op. - - Returns: - Gradient with respect to the input of fast_gelu - """ - return [npu_aicore_ops.fast_gelu_grad(grad,op.inputs[0])] - -grad_registry_list = ops.gradient_registry.list() -if not hasattr(npu_device.ops, 'gelu') and "FastGelu" not in grad_registry_list: - ops.RegisterGradient("FastGelu")(_fast_gelu_grad) - -@tf.keras.utils.register_keras_serializable(package='Text') -def gelu(x): - """Gaussian Error Linear Unit. - - Original paper: https://arxiv.org/abs/1606.08415 - The approximate version is faster. - - Args: - x: float Tensor to perform activation. - - Returns: - `x` with the GELU activation applied. - """ - if FLAGS.use_fastgelu: - if not hasattr(npu_device.ops, 'gelu'): - return npu_device.gen_npu_ops.fast_gelu(x) - else: - fast_gelu = getattr(npu_device.ops, 'gelu') - return fast_gelu(x) - #return npu_aicore_ops.fast_gelu(x) - else: - cdf = 0.5 * (1.0 + tf.tanh( - (math.sqrt(2 / math.pi) * (x + 0.044715 * tf.pow(x, 3))))) - return x * cdf diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/activations/gelu_origin.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/activations/gelu_origin.py deleted file mode 100644 index 39348061e57edc22857c2984f297d9fc23d365e8..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/activations/gelu_origin.py +++ /dev/null @@ -1,48 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Gaussian error linear unit.""" - -import tensorflow as tf - - -@tf.keras.utils.register_keras_serializable(package='Text') -def gelu(x): - """Gaussian Error Linear Unit. - - This is a smoother version of the RELU. - Original paper: https://arxiv.org/abs/1606.08415 - Args: - x: float Tensor to perform activation. - - Returns: - `x` with the GELU activation applied. - """ - return tf.keras.activations.gelu(x, approximate=True) diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/activations/gelu_test.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/activations/gelu_test.py deleted file mode 100644 index f833b655066d8bc19334ca86905955876218d01f..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/activations/gelu_test.py +++ /dev/null @@ -1,50 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Tests for the Gaussian error linear unit.""" - -import tensorflow as tf - -from tensorflow.python.keras import keras_parameterized # pylint: disable=g-direct-tensorflow-import -from official.modeling import activations - - -@keras_parameterized.run_all_keras_modes -class GeluTest(keras_parameterized.TestCase): - - def test_gelu(self): - expected_data = [[0.14967535, 0., -0.10032465], - [-0.15880796, -0.04540223, 2.9963627]] - gelu_data = activations.gelu([[.25, 0, -.25], [-1, -2, 3]]) - self.assertAllClose(expected_data, gelu_data) - - -if __name__ == '__main__': - tf.test.main() diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/activations/relu.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/activations/relu.py deleted file mode 100644 index 72b2ffb22baca47d5529940fc3c8c3d6a3ec5b9f..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/activations/relu.py +++ /dev/null @@ -1,47 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Customized Relu activation.""" - -import tensorflow as tf - - -@tf.keras.utils.register_keras_serializable(package='Text') -def relu6(features): - """Computes the Relu6 activation function. - - Args: - features: A `Tensor` representing preactivation values. - - Returns: - The activation value. - """ - features = tf.convert_to_tensor(features) - return tf.nn.relu6(features) diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/activations/relu_test.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/activations/relu_test.py deleted file mode 100644 index 5352de548da25abba4361ee16d9130415dc821df..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/activations/relu_test.py +++ /dev/null @@ -1,51 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Tests for the customized Relu activation.""" - -import tensorflow as tf - -from tensorflow.python.keras import \ - keras_parameterized # pylint: disable=g-direct-tensorflow-import -from official.modeling import activations - - -@keras_parameterized.run_all_keras_modes -class CustomizedReluTest(keras_parameterized.TestCase): - - def test_relu6(self): - features = [[.25, 0, -.25], [-1, -2, 3]] - customized_relu6_data = activations.relu6(features) - relu6_data = tf.nn.relu6(features) - self.assertAllClose(customized_relu6_data, relu6_data) - - -if __name__ == '__main__': - tf.test.main() diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/activations/sigmoid.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/activations/sigmoid.py deleted file mode 100644 index a331d30b747ad9943c363e185658e2af37b2d423..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/activations/sigmoid.py +++ /dev/null @@ -1,47 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Customized Sigmoid activation.""" - -import tensorflow as tf - - -@tf.keras.utils.register_keras_serializable(package='Text') -def hard_sigmoid(features): - """Computes the hard sigmoid activation function. - - Args: - features: A `Tensor` representing preactivation values. - - Returns: - The activation value. - """ - features = tf.convert_to_tensor(features) - return tf.nn.relu6(features + tf.constant(3.)) * 0.16667 diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/activations/sigmoid_test.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/activations/sigmoid_test.py deleted file mode 100644 index feeb49b17d263584ba3f078143750012be83d25c..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/activations/sigmoid_test.py +++ /dev/null @@ -1,56 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Tests for the customized Sigmoid activation.""" - -import numpy as np -import tensorflow as tf - -from keras import \ - keras_parameterized # pylint: disable=g-direct-tensorflow-import -from official.modeling import activations - - -@keras_parameterized.run_all_keras_modes -class CustomizedSigmoidTest(keras_parameterized.TestCase): - - def _hard_sigmoid_nn(self, x): - x = np.float32(x) - return tf.nn.relu6(x + 3.) * 0.16667 - - def test_hard_sigmoid(self): - features = [[.25, 0, -.25], [-1, -2, 3]] - customized_hard_sigmoid_data = activations.hard_sigmoid(features) - sigmoid_data = self._hard_sigmoid_nn(features) - self.assertAllClose(customized_hard_sigmoid_data, sigmoid_data) - - -if __name__ == '__main__': - tf.test.main() diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/activations/swish.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/activations/swish.py deleted file mode 100644 index 7a6420e52509cb1936ea147ef09f865029408f8c..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/activations/swish.py +++ /dev/null @@ -1,87 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Customized Swish activation.""" - -import tensorflow as tf - - -@tf.keras.utils.register_keras_serializable(package='Text') -def simple_swish(features): - """Computes the Swish activation function. - - The tf.nn.swish operation uses a custom gradient to reduce memory usage. - Since saving custom gradients in SavedModel is currently not supported, and - one would not be able to use an exported TF-Hub module for fine-tuning, we - provide this wrapper that can allow to select whether to use the native - TensorFlow swish operation, or whether to use a customized operation that - has uses default TensorFlow gradient computation. - - Args: - features: A `Tensor` representing preactivation values. - - Returns: - The activation value. - """ - features = tf.convert_to_tensor(features) - return features * tf.nn.sigmoid(features) - - -@tf.keras.utils.register_keras_serializable(package='Text') -def hard_swish(features): - """Computes a hard version of the swish function. - - This operation can be used to reduce computational cost and improve - quantization for edge devices. - - Args: - features: A `Tensor` representing preactivation values. - - Returns: - The activation value. - """ - features = tf.convert_to_tensor(features) - return features * tf.nn.relu6(features + tf.constant(3.)) * (1. / 6.) - - -@tf.keras.utils.register_keras_serializable(package='Text') -def identity(features): - """Computes the identity function. - - Useful for helping in quantization. - - Args: - features: A `Tensor` representing preactivation values. - - Returns: - The activation value. - """ - features = tf.convert_to_tensor(features) - return tf.identity(features) diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/activations/swish_test.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/activations/swish_test.py deleted file mode 100644 index c4e4b5f0c1a7734ca4e6a885ba04fee63ff438b0..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/activations/swish_test.py +++ /dev/null @@ -1,60 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Tests for the customized Swish activation.""" -import numpy as np -import tensorflow as tf - -from keras import keras_parameterized # pylint: disable=g-direct-tensorflow-import -from official.modeling import activations - - -@keras_parameterized.run_all_keras_modes -class CustomizedSwishTest(keras_parameterized.TestCase): - - def _hard_swish_np(self, x): - x = np.float32(x) - return x * np.clip(x + 3, 0, 6) / 6 - - def test_simple_swish(self): - features = [[.25, 0, -.25], [-1, -2, 3]] - customized_swish_data = activations.simple_swish(features) - swish_data = tf.nn.swish(features) - self.assertAllClose(customized_swish_data, swish_data) - - def test_hard_swish(self): - features = [[.25, 0, -.25], [-1, -2, 3]] - customized_swish_data = activations.hard_swish(features) - swish_data = self._hard_swish_np(features) - self.assertAllClose(customized_swish_data, swish_data) - - -if __name__ == '__main__': - tf.test.main() diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/hyperparams/__init__.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/hyperparams/__init__.py deleted file mode 100644 index da270edaeea8a43e1e9c4e0a5fe0fd63e4de61d0..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/hyperparams/__init__.py +++ /dev/null @@ -1,36 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Hyperparams package definition.""" -# pylint: disable=g-multiple-import -from official.modeling.hyperparams.base_config import * -from official.modeling.hyperparams.oneof import * -from official.modeling.hyperparams.params_dict import * - diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/hyperparams/base_config.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/hyperparams/base_config.py deleted file mode 100644 index 03f8edb44bf4d64321dee418165bf1daaa8390b1..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/hyperparams/base_config.py +++ /dev/null @@ -1,286 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Base configurations to standardize experiments.""" - -import copy -import functools -from typing import Any, List, Mapping, Optional, Type -from absl import logging - -import dataclasses -import tensorflow as tf -import yaml - -from official.modeling.hyperparams import params_dict - - -@dataclasses.dataclass -class Config(params_dict.ParamsDict): - """The base configuration class that supports YAML/JSON based overrides. - - Because of YAML/JSON serialization limitations, some semantics of dataclass - are not supported: - * It recursively enforces a allowlist of basic types and container types, so - it avoids surprises with copy and reuse caused by unanticipated types. - * Warning: it converts Dict to `Config` even within sequences, - e.g. for config = Config({'key': [([{'a': 42}],)]), - type(config.key[0][0][0]) is Config rather than dict. - If you define/annotate some field as Dict, the field will convert to a - `Config` instance and lose the dictionary type. - """ - - # It's safe to add bytes and other immutable types here. - IMMUTABLE_TYPES = (str, int, float, bool, type(None)) - # It's safe to add set, frozenset and other collections here. - SEQUENCE_TYPES = (list, tuple) - - default_params: dataclasses.InitVar[Optional[Mapping[str, Any]]] = None - restrictions: dataclasses.InitVar[Optional[List[str]]] = None - - @classmethod - def _isvalidsequence(cls, v): - """Check if the input values are valid sequences. - - Args: - v: Input sequence. - - Returns: - True if the sequence is valid. Valid sequence includes the sequence - type in cls.SEQUENCE_TYPES and element type is in cls.IMMUTABLE_TYPES or - is dict or ParamsDict. - """ - if not isinstance(v, cls.SEQUENCE_TYPES): - return False - return (all(isinstance(e, cls.IMMUTABLE_TYPES) for e in v) or - all(isinstance(e, dict) for e in v) or - all(isinstance(e, params_dict.ParamsDict) for e in v)) - - @classmethod - def _import_config(cls, v, subconfig_type): - """Returns v with dicts converted to Configs, recursively.""" - if not issubclass(subconfig_type, params_dict.ParamsDict): - raise TypeError( - 'Subconfig_type should be subclass of ParamsDict, found {!r}'.format( - subconfig_type)) - if isinstance(v, cls.IMMUTABLE_TYPES): - return v - elif isinstance(v, cls.SEQUENCE_TYPES): - # Only support one layer of sequence. - if not cls._isvalidsequence(v): - raise TypeError( - 'Invalid sequence: only supports single level {!r} of {!r} or ' - 'dict or ParamsDict found: {!r}'.format(cls.SEQUENCE_TYPES, - cls.IMMUTABLE_TYPES, v)) - import_fn = functools.partial( - cls._import_config, subconfig_type=subconfig_type) - return type(v)(map(import_fn, v)) - elif isinstance(v, params_dict.ParamsDict): - # Deepcopy here is a temporary solution for preserving type in nested - # Config object. - return copy.deepcopy(v) - elif isinstance(v, dict): - return subconfig_type(v) - else: - raise TypeError('Unknown type: {!r}'.format(type(v))) - - @classmethod - def _export_config(cls, v): - """Returns v with Configs converted to dicts, recursively.""" - if isinstance(v, cls.IMMUTABLE_TYPES): - return v - elif isinstance(v, cls.SEQUENCE_TYPES): - return type(v)(map(cls._export_config, v)) - elif isinstance(v, params_dict.ParamsDict): - return v.as_dict() - elif isinstance(v, dict): - raise TypeError('dict value not supported in converting.') - else: - raise TypeError('Unknown type: {!r}'.format(type(v))) - - @classmethod - def _get_subconfig_type(cls, k) -> Type[params_dict.ParamsDict]: - """Get element type by the field name. - - Args: - k: the key/name of the field. - - Returns: - Config as default. If a type annotation is found for `k`, - 1) returns the type of the annotation if it is subtype of ParamsDict; - 2) returns the element type if the annotation of `k` is List[SubType] - or Tuple[SubType]. - """ - subconfig_type = Config - if k in cls.__annotations__: - # Directly Config subtype. - type_annotation = cls.__annotations__[k] # pytype: disable=invalid-annotation - if (isinstance(type_annotation, type) and - issubclass(type_annotation, Config)): - subconfig_type = cls.__annotations__[k] # pytype: disable=invalid-annotation - else: - # Check if the field is a sequence of subtypes. - field_type = getattr(type_annotation, '__origin__', type(None)) - if (isinstance(field_type, type) and - issubclass(field_type, cls.SEQUENCE_TYPES)): - element_type = getattr(type_annotation, '__args__', [type(None)])[0] - subconfig_type = ( - element_type if issubclass(element_type, params_dict.ParamsDict) - else subconfig_type) - return subconfig_type - - def __post_init__(self, default_params, restrictions, *args, **kwargs): - super().__init__( - default_params=default_params, - restrictions=restrictions, - *args, - **kwargs) - - def _set(self, k, v): - """Overrides same method in ParamsDict. - - Also called by ParamsDict methods. - - Args: - k: key to set. - v: value. - - Raises: - RuntimeError - """ - subconfig_type = self._get_subconfig_type(k) - - def is_null(k): - if k not in self.__dict__ or not self.__dict__[k]: - return True - return False - - if isinstance(v, dict): - if is_null(k): - # If the key not exist or the value is None, a new Config-family object - # sould be created for the key. - self.__dict__[k] = subconfig_type(v) - else: - self.__dict__[k].override(v) - elif not is_null(k) and isinstance(v, self.SEQUENCE_TYPES) and all( - [not isinstance(e, self.IMMUTABLE_TYPES) for e in v]): - if len(self.__dict__[k]) == len(v): - for i in range(len(v)): - self.__dict__[k][i].override(v[i]) - elif not all([isinstance(e, self.IMMUTABLE_TYPES) for e in v]): - logging.warning( - "The list/tuple don't match the value dictionaries provided. Thus, " - 'the list/tuple is determined by the type annotation and ' - 'values provided. This is error-prone.') - self.__dict__[k] = self._import_config(v, subconfig_type) - else: - self.__dict__[k] = self._import_config(v, subconfig_type) - else: - self.__dict__[k] = self._import_config(v, subconfig_type) - - def __setattr__(self, k, v): - if k not in self.RESERVED_ATTR: - if getattr(self, '_locked', False): - raise ValueError('The Config has been locked. ' 'No change is allowed.') - self._set(k, v) - - def _override(self, override_dict, is_strict=True): - """Overrides same method in ParamsDict. - - Also called by ParamsDict methods. - - Args: - override_dict: dictionary to write to . - is_strict: If True, not allows to add new keys. - - Raises: - KeyError: overriding reserved keys or keys not exist (is_strict=True). - """ - for k, v in sorted(override_dict.items()): - if k in self.RESERVED_ATTR: - raise KeyError('The key {!r} is internally reserved. ' - 'Can not be overridden.'.format(k)) - if k not in self.__dict__: - if is_strict: - raise KeyError('The key {!r} does not exist in {!r}. ' - 'To extend the existing keys, use ' - '`override` with `is_strict` = False.'.format( - k, type(self))) - else: - self._set(k, v) - else: - if isinstance(v, dict) and self.__dict__[k]: - self.__dict__[k]._override(v, is_strict) # pylint: disable=protected-access - elif isinstance(v, params_dict.ParamsDict) and self.__dict__[k]: - self.__dict__[k]._override(v.as_dict(), is_strict) # pylint: disable=protected-access - else: - self._set(k, v) - - def as_dict(self): - """Returns a dict representation of params_dict.ParamsDict. - - For the nested params_dict.ParamsDict, a nested dict will be returned. - """ - return { - k: self._export_config(v) - for k, v in self.__dict__.items() - if k not in self.RESERVED_ATTR - } - - def replace(self, **kwargs): - """Overrides/returns a unlocked copy with the current config unchanged.""" - # pylint: disable=protected-access - params = copy.deepcopy(self) - params._locked = False - params._override(kwargs, is_strict=True) - # pylint: enable=protected-access - return params - - @classmethod - def from_yaml(cls, file_path: str): - # Note: This only works if the Config has all default values. - with tf.io.gfile.GFile(file_path, 'r') as f: - loaded = yaml.load(f, Loader=yaml.FullLoader) - config = cls() - config.override(loaded) - return config - - @classmethod - def from_json(cls, file_path: str): - """Wrapper for `from_yaml`.""" - return cls.from_yaml(file_path) - - @classmethod - def from_args(cls, *args, **kwargs): - """Builds a config from the given list of arguments.""" - attributes = list(cls.__annotations__.keys()) - default_params = {a: p for a, p in zip(attributes, args)} - default_params.update(kwargs) - return cls(default_params) diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/hyperparams/base_config_test.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/hyperparams/base_config_test.py deleted file mode 100644 index 3ae5b3081eaf882ab7c03c8d5ed0482bac4dc2c7..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/hyperparams/base_config_test.py +++ /dev/null @@ -1,376 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import pprint -from typing import List, Tuple - -from absl.testing import parameterized -import dataclasses -import tensorflow as tf -from official.modeling.hyperparams import base_config - - -@dataclasses.dataclass -class DumpConfig1(base_config.Config): - a: int = 1 - b: str = 'text' - - -@dataclasses.dataclass -class DumpConfig2(base_config.Config): - c: int = 2 - d: str = 'text' - e: DumpConfig1 = DumpConfig1() - - -@dataclasses.dataclass -class DumpConfig3(DumpConfig2): - f: int = 2 - g: str = 'text' - h: List[DumpConfig1] = dataclasses.field( - default_factory=lambda: [DumpConfig1(), DumpConfig1()]) - g: Tuple[DumpConfig1, ...] = (DumpConfig1(),) - - -@dataclasses.dataclass -class DumpConfig4(DumpConfig2): - x: int = 3 - - -@dataclasses.dataclass -class DummyConfig5(base_config.Config): - y: Tuple[DumpConfig2, ...] = (DumpConfig2(), DumpConfig4()) - z: Tuple[str] = ('a',) - - -class BaseConfigTest(parameterized.TestCase, tf.test.TestCase): - - def assertHasSameTypes(self, c, d, msg=''): - """Checks if a Config has the same structure as a given dict. - - Args: - c: the Config object to be check. - d: the reference dict object. - msg: The error message to show when type mismatched. - """ - # Make sure d is not a Config. Assume d is either - # dictionary or primitive type and c is the Config or primitive types. - self.assertNotIsInstance(d, base_config.Config) - if isinstance(d, base_config.Config.IMMUTABLE_TYPES): - self.assertEqual(pprint.pformat(c), pprint.pformat(d), msg=msg) - elif isinstance(d, base_config.Config.SEQUENCE_TYPES): - self.assertEqual(type(c), type(d), msg=msg) - for i, v in enumerate(d): - self.assertHasSameTypes(c[i], v, msg='{}[{!r}]'.format(msg, i)) - elif isinstance(d, dict): - self.assertIsInstance(c, base_config.Config, msg=msg) - for k, v in sorted(d.items()): - self.assertHasSameTypes(getattr(c, k), v, msg='{}[{!r}]'.format(msg, k)) - else: - raise TypeError('Unknown type: %r' % type(d)) - - def assertImportExport(self, v): - config = base_config.Config({'key': v}) - back = config.as_dict()['key'] - self.assertEqual(pprint.pformat(back), pprint.pformat(v)) - self.assertHasSameTypes(config.key, v, msg='=%s v' % pprint.pformat(v)) - - def test_invalid_keys(self): - params = base_config.Config() - with self.assertRaises(AttributeError): - _ = params.a - - def test_nested_config_types(self): - config = DumpConfig3() - self.assertIsInstance(config.e, DumpConfig1) - self.assertIsInstance(config.h[0], DumpConfig1) - self.assertIsInstance(config.h[1], DumpConfig1) - self.assertIsInstance(config.g[0], DumpConfig1) - - config.override({'e': {'a': 2, 'b': 'new text'}}) - self.assertIsInstance(config.e, DumpConfig1) - self.assertEqual(config.e.a, 2) - self.assertEqual(config.e.b, 'new text') - - config.override({'h': [{'a': 3, 'b': 'new text 2'}]}) - self.assertIsInstance(config.h[0], DumpConfig1) - self.assertLen(config.h, 1) - self.assertEqual(config.h[0].a, 3) - self.assertEqual(config.h[0].b, 'new text 2') - - config.override({'g': [{'a': 4, 'b': 'new text 3'}]}) - self.assertIsInstance(config.g[0], DumpConfig1) - self.assertLen(config.g, 1) - self.assertEqual(config.g[0].a, 4) - self.assertEqual(config.g[0].b, 'new text 3') - - def test_replace(self): - config = DumpConfig2() - new_config = config.replace(e={'a': 2}) - self.assertEqual(new_config.e.a, 2) - self.assertIsInstance(new_config.e, DumpConfig1) - - config = DumpConfig2(e=DumpConfig2()) - new_config = config.replace(e={'c': 4}) - self.assertEqual(new_config.e.c, 4) - self.assertIsInstance(new_config.e, DumpConfig2) - - config = DumpConfig3() - new_config = config.replace(g=[{'a': 4, 'b': 'new text 3'}]) - self.assertIsInstance(new_config.g[0], DumpConfig1) - self.assertEqual(new_config.g[0].a, 4) - - @parameterized.parameters( - ('_locked', "The key '_locked' is internally reserved."), - ('_restrictions', "The key '_restrictions' is internally reserved."), - ('aa', "The key 'aa' does not exist."), - ) - def test_key_error(self, key, msg): - params = base_config.Config() - with self.assertRaisesRegex(KeyError, msg): - params.override({key: True}) - - @parameterized.parameters( - ('str data',), - (123,), - (1.23,), - (None,), - (['str', 1, 2.3, None],), - (('str', 1, 2.3, None),), - ) - def test_import_export_immutable_types(self, v): - self.assertImportExport(v) - out = base_config.Config({'key': v}) - self.assertEqual(pprint.pformat(v), pprint.pformat(out.key)) - - def test_override_is_strict_true(self): - params = base_config.Config({ - 'a': 'aa', - 'b': 2, - 'c': { - 'c1': 'cc', - 'c2': 20 - } - }) - params.override({'a': 2, 'c': {'c1': 'ccc'}}, is_strict=True) - self.assertEqual(params.a, 2) - self.assertEqual(params.c.c1, 'ccc') - with self.assertRaises(KeyError): - params.override({'d': 'ddd'}, is_strict=True) - with self.assertRaises(KeyError): - params.override({'c': {'c3': 30}}, is_strict=True) - - config = base_config.Config({'key': [{'a': 42}]}) - with self.assertRaisesRegex(KeyError, "The key 'b' does not exist"): - config.override({'key': [{'b': 43}]}) - - @parameterized.parameters( - (lambda x: x, 'Unknown type'), - (object(), 'Unknown type'), - (set(), 'Unknown type'), - (frozenset(), 'Unknown type'), - ) - def test_import_unsupport_types(self, v, msg): - with self.assertRaisesRegex(TypeError, msg): - _ = base_config.Config({'key': v}) - - @parameterized.parameters( - ({ - 'a': [{ - 'b': 2, - }, { - 'c': 3, - }] - },), - ({ - 'c': [{ - 'f': 1.1, - }, { - 'h': [1, 2], - }] - },), - (({ - 'a': 'aa', - 'b': 2, - 'c': { - 'c1': 10, - 'c2': 20, - } - },),), - ) - def test_import_export_nested_structure(self, d): - self.assertImportExport(d) - - @parameterized.parameters( - ([{ - 'a': 42, - 'b': 'hello', - 'c': 1.2 - }],), - (({ - 'a': 42, - 'b': 'hello', - 'c': 1.2 - },),), - ) - def test_import_export_nested_sequences(self, v): - self.assertImportExport(v) - - @parameterized.parameters( - ([([{}],)],), - ([['str', 1, 2.3, None]],), - ((('str', 1, 2.3, None),),), - ([ - ('str', 1, 2.3, None), - ],), - ([ - ('str', 1, 2.3, None), - ],), - ([[{ - 'a': 42, - 'b': 'hello', - 'c': 1.2 - }]],), - ([[[{ - 'a': 42, - 'b': 'hello', - 'c': 1.2 - }]]],), - ((({ - 'a': 42, - 'b': 'hello', - 'c': 1.2 - },),),), - (((({ - 'a': 42, - 'b': 'hello', - 'c': 1.2 - },),),),), - ([({ - 'a': 42, - 'b': 'hello', - 'c': 1.2 - },)],), - (([{ - 'a': 42, - 'b': 'hello', - 'c': 1.2 - }],),), - ) - def test_import_export_unsupport_sequence(self, v): - with self.assertRaisesRegex(TypeError, - 'Invalid sequence: only supports single level'): - _ = base_config.Config({'key': v}) - - def test_construct_subtype(self): - pass - - def test_import_config(self): - params = base_config.Config({'a': [{'b': 2}, {'c': {'d': 3}}]}) - self.assertLen(params.a, 2) - self.assertEqual(params.a[0].b, 2) - self.assertEqual(type(params.a[0]), base_config.Config) - self.assertEqual(pprint.pformat(params.a[0].b), '2') - self.assertEqual(type(params.a[1]), base_config.Config) - self.assertEqual(type(params.a[1].c), base_config.Config) - self.assertEqual(pprint.pformat(params.a[1].c.d), '3') - - def test_override(self): - params = base_config.Config({'a': [{'b': 2}, {'c': {'d': 3}}]}) - params.override({'a': [{'b': 4}, {'c': {'d': 5}}]}, is_strict=False) - self.assertEqual(type(params.a), list) - self.assertEqual(type(params.a[0]), base_config.Config) - self.assertEqual(pprint.pformat(params.a[0].b), '4') - self.assertEqual(type(params.a[1]), base_config.Config) - self.assertEqual(type(params.a[1].c), base_config.Config) - self.assertEqual(pprint.pformat(params.a[1].c.d), '5') - - @parameterized.parameters( - ([{}],), - (({},),), - ) - def test_config_vs_params_dict(self, v): - d = {'key': v} - self.assertEqual(type(base_config.Config(d).key[0]), base_config.Config) - self.assertEqual(type(base_config.params_dict.ParamsDict(d).key[0]), dict) - - def test_ppformat(self): - self.assertEqual( - pprint.pformat([ - 's', 1, 1.0, True, None, {}, [], (), { - (2,): (3, [4], { - 6: 7, - }), - 8: 9, - } - ]), - "['s', 1, 1.0, True, None, {}, [], (), {8: 9, (2,): (3, [4], {6: 7})}]") - - def test_with_restrictions(self): - restrictions = ['e.a[a-zA-Z][\w\.]*) # variable name: "var" or "x" - \s*=\s* - ((?P\'(.*?)\' # single quote - | - \"(.*?)\" # double quote - | - [^,\[]* # single value - | - \[[^\]]*\])) # list of values - ($|,\s*)""", re.VERBOSE) - -_CONST_VALUE_RE = re.compile(r'(\d.*|-\d.*|None)') - -# Yaml loader with an implicit resolver to parse float decimal and exponential -# format. The regular experission parse the following cases: -# 1- Decimal number with an optional exponential term. -# 2- Integer number with an exponential term. -# 3- Decimal number with an optional exponential term. -# 4- Decimal number. - -LOADER = yaml.SafeLoader -LOADER.add_implicit_resolver( - 'tag:yaml.org,2002:float', - re.compile(r''' - ^(?:[-+]?(?:[0-9][0-9_]*)\\.[0-9_]*(?:[eE][-+]?[0-9]+)? - | - [-+]?(?:[0-9][0-9_]*)(?:[eE][-+]?[0-9]+) - | - \\.[0-9_]+(?:[eE][-+][0-9]+)? - | - [-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\\.[0-9_]*)$''', re.X), - list('-+0123456789.')) - - -class ParamsDict(object): - """A hyperparameter container class.""" - - RESERVED_ATTR = ['_locked', '_restrictions'] - - def __init__(self, default_params=None, restrictions=None): - """Instantiate a ParamsDict. - - Instantiate a ParamsDict given a set of default parameters and a list of - restrictions. Upon initialization, it validates itself by checking all the - defined restrictions, and raise error if it finds inconsistency. - - Args: - default_params: a Python dict or another ParamsDict object including the - default parameters to initialize. - restrictions: a list of strings, which define a list of restrictions to - ensure the consistency of different parameters internally. Each - restriction string is defined as a binary relation with a set of - operators, including {'==', '!=', '<', '<=', '>', '>='}. - """ - self._locked = False - self._restrictions = [] - if restrictions: - self._restrictions = restrictions - if default_params is None: - default_params = {} - self.override(default_params, is_strict=False) - - def _set(self, k, v): - if isinstance(v, dict): - self.__dict__[k] = ParamsDict(v) - else: - self.__dict__[k] = copy.deepcopy(v) - - def __setattr__(self, k, v): - """Sets the value of the existing key. - - Note that this does not allow directly defining a new key. Use the - `override` method with `is_strict=False` instead. - - Args: - k: the key string. - v: the value to be used to set the key `k`. - - Raises: - KeyError: if k is not defined in the ParamsDict. - """ - if k not in ParamsDict.RESERVED_ATTR: - if k not in self.__dict__.keys(): - raise KeyError('The key `%{}` does not exist. ' - 'To extend the existing keys, use ' - '`override` with `is_strict` = True.'.format(k)) - if self._locked: - raise ValueError('The ParamsDict has been locked. ' - 'No change is allowed.') - self._set(k, v) - - def __getattr__(self, k): - """Gets the value of the existing key. - - Args: - k: the key string. - - Returns: - the value of the key. - - Raises: - AttributeError: if k is not defined in the ParamsDict. - """ - if k not in self.__dict__.keys(): - raise AttributeError('The key `{}` does not exist. '.format(k)) - return self.__dict__[k] - - def __contains__(self, key): - """Implements the membership test operator.""" - return key in self.__dict__ - - def get(self, key, value=None): - """Accesses through built-in dictionary get method.""" - return self.__dict__.get(key, value) - - def __delattr__(self, k): - """Deletes the key and removes its values. - - Args: - k: the key string. - - Raises: - AttributeError: if k is reserverd or not defined in the ParamsDict. - ValueError: if the ParamsDict instance has been locked. - """ - if k in ParamsDict.RESERVED_ATTR: - raise AttributeError( - 'The key `{}` is reserved. No change is allowes. '.format(k)) - if k not in self.__dict__.keys(): - raise AttributeError('The key `{}` does not exist. '.format(k)) - if self._locked: - raise ValueError('The ParamsDict has been locked. No change is allowed.') - del self.__dict__[k] - - def override(self, override_params, is_strict=True): - """Override the ParamsDict with a set of given params. - - Args: - override_params: a dict or a ParamsDict specifying the parameters to be - overridden. - is_strict: a boolean specifying whether override is strict or not. If - True, keys in `override_params` must be present in the ParamsDict. If - False, keys in `override_params` can be different from what is currently - defined in the ParamsDict. In this case, the ParamsDict will be extended - to include the new keys. - """ - if self._locked: - raise ValueError('The ParamsDict has been locked. No change is allowed.') - if isinstance(override_params, ParamsDict): - override_params = override_params.as_dict() - self._override(override_params, is_strict) # pylint: disable=protected-access - - def _override(self, override_dict, is_strict=True): - """The implementation of `override`.""" - for k, v in six.iteritems(override_dict): - if k in ParamsDict.RESERVED_ATTR: - raise KeyError('The key `%{}` is internally reserved. ' - 'Can not be overridden.') - if k not in self.__dict__.keys(): - if is_strict: - raise KeyError('The key `{}` does not exist. ' - 'To extend the existing keys, use ' - '`override` with `is_strict` = False.'.format(k)) - else: - self._set(k, v) - else: - if isinstance(v, dict): - self.__dict__[k]._override(v, is_strict) # pylint: disable=protected-access - elif isinstance(v, ParamsDict): - self.__dict__[k]._override(v.as_dict(), is_strict) # pylint: disable=protected-access - else: - self.__dict__[k] = copy.deepcopy(v) - - def lock(self): - """Makes the ParamsDict immutable.""" - self._locked = True - - def as_dict(self): - """Returns a dict representation of ParamsDict. - - For the nested ParamsDict, a nested dict will be returned. - """ - params_dict = {} - for k, v in six.iteritems(self.__dict__): - if k not in ParamsDict.RESERVED_ATTR: - if isinstance(v, ParamsDict): - params_dict[k] = v.as_dict() - else: - params_dict[k] = copy.deepcopy(v) - return params_dict - - def validate(self): - """Validate the parameters consistency based on the restrictions. - - This method validates the internal consistency using the pre-defined list of - restrictions. A restriction is defined as a string which specfiies a binary - operation. The supported binary operations are {'==', '!=', '<', '<=', '>', - '>='}. Note that the meaning of these operators are consistent with the - underlying Python immplementation. Users should make sure the define - restrictions on their type make sense. - - For example, for a ParamsDict like the following - ``` - a: - a1: 1 - a2: 2 - b: - bb: - bb1: 10 - bb2: 20 - ccc: - a1: 1 - a3: 3 - ``` - one can define two restrictions like this - ['a.a1 == b.ccc.a1', 'a.a2 <= b.bb.bb2'] - - What it enforces are: - - a.a1 = 1 == b.ccc.a1 = 1 - - a.a2 = 2 <= b.bb.bb2 = 20 - - Raises: - KeyError: if any of the following happens - (1) any of parameters in any of restrictions is not defined in - ParamsDict, - (2) any inconsistency violating the restriction is found. - ValueError: if the restriction defined in the string is not supported. - """ - - def _get_kv(dotted_string, params_dict): - """Get keys and values indicated by dotted_string.""" - if _CONST_VALUE_RE.match(dotted_string) is not None: - const_str = dotted_string - if const_str == 'None': - constant = None - else: - constant = float(const_str) - return None, constant - else: - tokenized_params = dotted_string.split('.') - v = params_dict - for t in tokenized_params: - v = v[t] - return tokenized_params[-1], v - - def _get_kvs(tokens, params_dict): - if len(tokens) != 2: - raise ValueError('Only support binary relation in restriction.') - stripped_tokens = [t.strip() for t in tokens] - left_k, left_v = _get_kv(stripped_tokens[0], params_dict) - right_k, right_v = _get_kv(stripped_tokens[1], params_dict) - return left_k, left_v, right_k, right_v - - params_dict = self.as_dict() - for restriction in self._restrictions: - if '==' in restriction: - tokens = restriction.split('==') - _, left_v, _, right_v = _get_kvs(tokens, params_dict) - if left_v != right_v: - raise KeyError( - 'Found inconsistncy between key `{}` and key `{}`.'.format( - tokens[0], tokens[1])) - elif '!=' in restriction: - tokens = restriction.split('!=') - _, left_v, _, right_v = _get_kvs(tokens, params_dict) - if left_v == right_v: - raise KeyError( - 'Found inconsistncy between key `{}` and key `{}`.'.format( - tokens[0], tokens[1])) - elif '<' in restriction: - tokens = restriction.split('<') - _, left_v, _, right_v = _get_kvs(tokens, params_dict) - if left_v >= right_v: - raise KeyError( - 'Found inconsistncy between key `{}` and key `{}`.'.format( - tokens[0], tokens[1])) - elif '<=' in restriction: - tokens = restriction.split('<=') - _, left_v, _, right_v = _get_kvs(tokens, params_dict) - if left_v > right_v: - raise KeyError( - 'Found inconsistncy between key `{}` and key `{}`.'.format( - tokens[0], tokens[1])) - elif '>' in restriction: - tokens = restriction.split('>') - _, left_v, _, right_v = _get_kvs(tokens, params_dict) - if left_v <= right_v: - raise KeyError( - 'Found inconsistncy between key `{}` and key `{}`.'.format( - tokens[0], tokens[1])) - elif '>=' in restriction: - tokens = restriction.split('>=') - _, left_v, _, right_v = _get_kvs(tokens, params_dict) - if left_v < right_v: - raise KeyError( - 'Found inconsistncy between key `{}` and key `{}`.'.format( - tokens[0], tokens[1])) - else: - raise ValueError('Unsupported relation in restriction.') - - -def read_yaml_to_params_dict(file_path: str): - """Reads a YAML file to a ParamsDict.""" - with tf.io.gfile.GFile(file_path, 'r') as f: - params_dict = yaml.load(f, Loader=LOADER) - return ParamsDict(params_dict) - - -def save_params_dict_to_yaml(params, file_path): - """Saves the input ParamsDict to a YAML file.""" - with tf.io.gfile.GFile(file_path, 'w') as f: - - def _my_list_rep(dumper, data): - # u'tag:yaml.org,2002:seq' is the YAML internal tag for sequence. - return dumper.represent_sequence( - u'tag:yaml.org,2002:seq', data, flow_style=True) - - yaml.add_representer(list, _my_list_rep) - yaml.dump(params.as_dict(), f, default_flow_style=False) - - -def nested_csv_str_to_json_str(csv_str): - """Converts a nested (using '.') comma-separated k=v string to a JSON string. - - Converts a comma-separated string of key/value pairs that supports - nesting of keys to a JSON string. Nesting is implemented using - '.' between levels for a given key. - - Spacing between commas and = is supported (e.g. there is no difference between - "a=1,b=2", "a = 1, b = 2", or "a=1, b=2") but there should be no spaces before - keys or after values (e.g. " a=1,b=2" and "a=1,b=2 " are not supported). - - Note that this will only support values supported by CSV, meaning - values such as nested lists (e.g. "a=[[1,2,3],[4,5,6]]") are not - supported. Strings are supported as well, e.g. "a='hello'". - - An example conversion would be: - - "a=1, b=2, c.a=2, c.b=3, d.a.a=5" - - to - - "{ a: 1, b : 2, c: {a : 2, b : 3}, d: {a: {a : 5}}}" - - Args: - csv_str: the comma separated string. - - Returns: - the converted JSON string. - - Raises: - ValueError: If csv_str is not in a comma separated string or - if the string is formatted incorrectly. - """ - if not csv_str: - return '' - - formatted_entries = [] - nested_map = collections.defaultdict(list) - pos = 0 - while pos < len(csv_str): - m = _PARAM_RE.match(csv_str, pos) - if not m: - raise ValueError('Malformed hyperparameter value while parsing ' - 'CSV string: %s' % csv_str[pos:]) - pos = m.end() - # Parse the values. - m_dict = m.groupdict() - name = m_dict['name'] - v = m_dict['val'] - - # If a GCS path (e.g. gs://...) is provided, wrap this in quotes - # as yaml.load would otherwise throw an exception - if re.match(r'(?=[^\"\'])(?=[gs://])', v): - v = '\'{}\''.format(v) - - name_nested = name.split('.') - if len(name_nested) > 1: - grouping = name_nested[0] - value = '.'.join(name_nested[1:]) + '=' + v - nested_map[grouping].append(value) - else: - formatted_entries.append('%s : %s' % (name, v)) - - for grouping, value in nested_map.items(): - value = ','.join(value) - value = nested_csv_str_to_json_str(value) - formatted_entries.append('%s : %s' % (grouping, value)) - return '{' + ', '.join(formatted_entries) + '}' - - -def override_params_dict(params, dict_or_string_or_yaml_file, is_strict): - """Override a given ParamsDict using a dict, JSON/YAML/CSV string or YAML file. - - The logic of the function is outlined below: - 1. Test that the input is a dict. If not, proceed to 2. - 2. Tests that the input is a string. If not, raise unknown ValueError - 2.1. Test if the string is in a CSV format. If so, parse. - If not, proceed to 2.2. - 2.2. Try loading the string as a YAML/JSON. If successful, parse to - dict and use it to override. If not, proceed to 2.3. - 2.3. Try using the string as a file path and load the YAML file. - - Args: - params: a ParamsDict object to be overridden. - dict_or_string_or_yaml_file: a Python dict, JSON/YAML/CSV string or path to - a YAML file specifying the parameters to be overridden. - is_strict: a boolean specifying whether override is strict or not. - - Returns: - params: the overridden ParamsDict object. - - Raises: - ValueError: if failed to override the parameters. - """ - if not dict_or_string_or_yaml_file: - return params - if isinstance(dict_or_string_or_yaml_file, dict): - params.override(dict_or_string_or_yaml_file, is_strict) - elif isinstance(dict_or_string_or_yaml_file, six.string_types): - try: - dict_or_string_or_yaml_file = ( - nested_csv_str_to_json_str(dict_or_string_or_yaml_file)) - except ValueError: - pass - params_dict = yaml.load(dict_or_string_or_yaml_file, Loader=LOADER) - if isinstance(params_dict, dict): - params.override(params_dict, is_strict) - else: - with tf.io.gfile.GFile(dict_or_string_or_yaml_file) as f: - params.override(yaml.load(f, Loader=yaml.FullLoader), is_strict) - else: - raise ValueError('Unknown input type to parse.') - return params diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/hyperparams/params_dict_test.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/hyperparams/params_dict_test.py deleted file mode 100644 index 528f8df039c4f552e0c68ccf1527cc5f9abfaf6d..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/hyperparams/params_dict_test.py +++ /dev/null @@ -1,445 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Tests for params_dict.py.""" - -import os - -import tensorflow as tf -import yaml - -from official.modeling.hyperparams import params_dict - - -class ParamsDictTest(tf.test.TestCase): - - def test_init_from_an_empty_dict(self): - params = params_dict.ParamsDict() - with self.assertRaises(AttributeError): - _ = params.a - - with self.assertRaises(KeyError): - params.a = 'aa' - - def test_init_from_a_dict(self): - params = params_dict.ParamsDict({'a': 'aa', 'b': 2}) - self.assertEqual(params.a, 'aa') - self.assertEqual(params.b, 2) - - def test_init_from_a_param_dict(self): - params_init = params_dict.ParamsDict({'a': 'aa', 'b': 2}) - params = params_dict.ParamsDict(params_init) - self.assertEqual(params.a, 'aa') - self.assertEqual(params.b, 2) - - def test_lock(self): - params = params_dict.ParamsDict({'a': 1, 'b': 2, 'c': 3}) - params.lock() - with self.assertRaises(ValueError): - params.a = 10 - with self.assertRaises(ValueError): - params.override({'b': 20}) - with self.assertRaises(ValueError): - del params.c - - def test_setattr(self): - params = params_dict.ParamsDict() - params.override({'a': 'aa', 'b': 2, 'c': None}, is_strict=False) - params.c = 'ccc' - self.assertEqual(params.a, 'aa') - self.assertEqual(params.b, 2) - self.assertEqual(params.c, 'ccc') - - def test_getattr(self): - params = params_dict.ParamsDict() - params.override({'a': 'aa', 'b': 2, 'c': None}, is_strict=False) - self.assertEqual(params.a, 'aa') - self.assertEqual(params.b, 2) - self.assertEqual(params.c, None) - - def test_delattr(self): - params = params_dict.ParamsDict() - params.override({ - 'a': 'aa', - 'b': 2, - 'c': None, - 'd': { - 'd1': 1, - 'd2': 10 - } - }, - is_strict=False) - del params.c - self.assertEqual(params.a, 'aa') - self.assertEqual(params.b, 2) - with self.assertRaises(AttributeError): - _ = params.c - del params.d - with self.assertRaises(AttributeError): - _ = params.d.d1 - - def test_contains(self): - params = params_dict.ParamsDict() - params.override({'a': 'aa'}, is_strict=False) - self.assertIn('a', params) - self.assertNotIn('b', params) - - def test_get(self): - params = params_dict.ParamsDict() - params.override({'a': 'aa'}, is_strict=False) - self.assertEqual(params.get('a'), 'aa') - self.assertEqual(params.get('b', 2), 2) - self.assertEqual(params.get('b'), None) - - def test_override_is_strict_true(self): - params = params_dict.ParamsDict({ - 'a': 'aa', - 'b': 2, - 'c': { - 'c1': 'cc', - 'c2': 20 - } - }) - params.override({'a': 2, 'c': {'c1': 'ccc'}}, is_strict=True) - self.assertEqual(params.a, 2) - self.assertEqual(params.c.c1, 'ccc') - with self.assertRaises(KeyError): - params.override({'d': 'ddd'}, is_strict=True) - with self.assertRaises(KeyError): - params.override({'c': {'c3': 30}}, is_strict=True) - - def test_override_is_strict_false(self): - params = params_dict.ParamsDict({ - 'a': 'aa', - 'b': 2, - 'c': { - 'c1': 10, - 'c2': 20 - } - }) - params.override({'a': 2, 'c': {'c3': 3000}}, is_strict=False) - self.assertEqual(params.a, 2) - self.assertEqual(params.c.c3, 3000) - params.override({'d': 'ddd'}, is_strict=False) - self.assertEqual(params.d, 'ddd') - params.override({'c': {'c4': 4444}}, is_strict=False) - self.assertEqual(params.c.c4, 4444) - - def test_as_dict(self): - params = params_dict.ParamsDict({ - 'a': 'aa', - 'b': 2, - 'c': { - 'c1': 10, - 'c2': 20 - } - }) - params_d = params.as_dict() - self.assertEqual(params_d['a'], 'aa') - self.assertEqual(params_d['b'], 2) - self.assertEqual(params_d['c']['c1'], 10) - self.assertEqual(params_d['c']['c2'], 20) - - def test_validate(self): - # Raise error due to the unknown parameter. - with self.assertRaises(KeyError): - params = params_dict.ParamsDict({'a': 1, 'b': {'a': 11}}, ['a == c']) - params.validate() - - # OK to check equality of two nested dicts. - params = params_dict.ParamsDict({ - 'a': 1, - 'b': { - 'a': 10 - }, - 'c': { - 'a': 10 - } - }, ['b == c']) - - # Raise error due to inconsistency - with self.assertRaises(KeyError): - params = params_dict.ParamsDict({'a': 1, 'c': {'a': 10}}, ['a == c.a']) - params.validate() - - # Valid rule. - params = params_dict.ParamsDict({'a': 1, 'c': {'a': 1}}, ['a == c.a']) - - # Overridding violates the existing rule, raise error upon validate. - params.override({'a': 11}) - with self.assertRaises(KeyError): - params.validate() - - # Valid restrictions with constant. - params = params_dict.ParamsDict({ - 'a': None, - 'c': { - 'a': 1 - } - }, ['a == None', 'c.a == 1']) - params.validate() - with self.assertRaises(KeyError): - params = params_dict.ParamsDict({ - 'a': 4, - 'c': { - 'a': 1 - } - }, ['a == None', 'c.a == 1']) - params.validate() - - -class ParamsDictIOTest(tf.test.TestCase): - - def write_temp_file(self, filename, text): - temp_file = os.path.join(self.get_temp_dir(), filename) - with tf.io.gfile.GFile(temp_file, 'w') as writer: - writer.write(text) - return temp_file - - def test_save_params_dict_to_yaml(self): - params = params_dict.ParamsDict({ - 'a': 'aa', - 'b': 2, - 'c': { - 'c1': 10, - 'c2': 20 - } - }) - output_yaml_file = os.path.join(self.get_temp_dir(), 'params.yaml') - params_dict.save_params_dict_to_yaml(params, output_yaml_file) - - with tf.io.gfile.GFile(output_yaml_file, 'r') as f: - params_d = yaml.load(f) - self.assertEqual(params.a, params_d['a']) - self.assertEqual(params.b, params_d['b']) - self.assertEqual(params.c.c1, params_d['c']['c1']) - self.assertEqual(params.c.c2, params_d['c']['c2']) - - def test_read_yaml_to_params_dict(self): - input_yaml_file = self.write_temp_file( - 'params.yaml', r""" - a: 'aa' - b: 2 - c: - c1: 10 - c2: 20 - """) - params = params_dict.read_yaml_to_params_dict(input_yaml_file) - - self.assertEqual(params.a, 'aa') - self.assertEqual(params.b, 2) - self.assertEqual(params.c.c1, 10) - self.assertEqual(params.c.c2, 20) - - def test_override_params_dict_using_dict(self): - params = params_dict.ParamsDict({ - 'a': 1, - 'b': 2.5, - 'c': [3, 4], - 'd': 'hello', - 'e': False - }) - override_dict = {'b': 5.2, 'c': [30, 40]} - params = params_dict.override_params_dict( - params, override_dict, is_strict=True) - self.assertEqual(1, params.a) - self.assertEqual(5.2, params.b) - self.assertEqual([30, 40], params.c) - self.assertEqual('hello', params.d) - self.assertEqual(False, params.e) - - def test_override_params_dict_using_yaml_string(self): - params = params_dict.ParamsDict({ - 'a': 1, - 'b': 2.5, - 'c': [3, 4], - 'd': 'hello', - 'e': False - }) - override_yaml_string = "'b': 5.2\n'c': [30, 40]" - params = params_dict.override_params_dict( - params, override_yaml_string, is_strict=True) - self.assertEqual(1, params.a) - self.assertEqual(5.2, params.b) - self.assertEqual([30, 40], params.c) - self.assertEqual('hello', params.d) - self.assertEqual(False, params.e) - - def test_override_params_dict_using_json_string(self): - params = params_dict.ParamsDict({ - 'a': 1, - 'b': { - 'b1': 2, - 'b2': [2, 3], - }, - 'd': { - 'd1': { - 'd2': 'hello' - } - }, - 'e': False - }) - override_json_string = "{ b: { b2: [3, 4] }, d: { d1: { d2: 'hi' } } }" - params = params_dict.override_params_dict( - params, override_json_string, is_strict=True) - self.assertEqual(1, params.a) - self.assertEqual(2, params.b.b1) - self.assertEqual([3, 4], params.b.b2) - self.assertEqual('hi', params.d.d1.d2) - self.assertEqual(False, params.e) - - def test_override_params_dict_using_csv_string(self): - params = params_dict.ParamsDict({ - 'a': 1, - 'b': { - 'b1': 2, - 'b2': [2, 3], - }, - 'd': { - 'd1': { - 'd2': 'hello' - } - }, - 'e': False - }) - override_csv_string = "b.b2=[3,4], d.d1.d2='hi, world', e=gs://test" - params = params_dict.override_params_dict( - params, override_csv_string, is_strict=True) - self.assertEqual(1, params.a) - self.assertEqual(2, params.b.b1) - self.assertEqual([3, 4], params.b.b2) - self.assertEqual('hi, world', params.d.d1.d2) - self.assertEqual('gs://test', params.e) - # Test different float formats - override_csv_string = 'b.b2=-1.e-3, d.d1.d2=+0.001, e=1e+3, a=-1.5E-3' - params = params_dict.override_params_dict( - params, override_csv_string, is_strict=True) - self.assertEqual(-1e-3, params.b.b2) - self.assertEqual(0.001, params.d.d1.d2) - self.assertEqual(1e3, params.e) - self.assertEqual(-1.5e-3, params.a) - - def test_override_params_dict_using_yaml_file(self): - params = params_dict.ParamsDict({ - 'a': 1, - 'b': 2.5, - 'c': [3, 4], - 'd': 'hello', - 'e': False - }) - override_yaml_file = self.write_temp_file( - 'params.yaml', r""" - b: 5.2 - c: [30, 40] - """) - params = params_dict.override_params_dict( - params, override_yaml_file, is_strict=True) - self.assertEqual(1, params.a) - self.assertEqual(5.2, params.b) - self.assertEqual([30, 40], params.c) - self.assertEqual('hello', params.d) - self.assertEqual(False, params.e) - - -class IOTest(tf.test.TestCase): - - def test_basic_csv_str_to_json_str(self): - csv_str = 'a=1,b=2,c=3' - json_str = '{a : 1, b : 2, c : 3}' - converted_csv_str = params_dict.nested_csv_str_to_json_str(csv_str) - self.assertEqual(converted_csv_str, json_str) - - def test_basic_csv_str_load(self): - csv_str = 'a=1,b=2,c=3' - expected_output = {'a': 1, 'b': 2, 'c': 3} - converted_csv_str = params_dict.nested_csv_str_to_json_str(csv_str) - converted_dict = yaml.load(converted_csv_str) - self.assertDictEqual(converted_dict, expected_output) - - def test_basic_nested_csv_str_to_json_str(self): - csv_str = 'a=1,b.b1=2' - json_str = '{a : 1, b : {b1 : 2}}' - converted_csv_str = params_dict.nested_csv_str_to_json_str(csv_str) - self.assertEqual(converted_csv_str, json_str) - - def test_basic_nested_csv_str_load(self): - csv_str = 'a=1,b.b1=2,c.c1=3' - expected_output = {'a': 1, 'b': {'b1': 2}, 'c': {'c1': 3}} - converted_csv_str = params_dict.nested_csv_str_to_json_str(csv_str) - converted_dict = yaml.load(converted_csv_str) - self.assertDictEqual(converted_dict, expected_output) - - def test_complex_nested_csv_str_to_json_str(self): - csv_str = 'a.aa.aaa.aaaaa.a=1' - json_str = '{a : {aa : {aaa : {aaaaa : {a : 1}}}}}' - converted_csv_str = params_dict.nested_csv_str_to_json_str(csv_str) - self.assertEqual(converted_csv_str, json_str) - - def test_complex_nested_csv_str_load(self): - csv_str = 'a.aa.aaa.aaaaa.a=1,a.a=2' - expected_output = {'a': {'aa': {'aaa': {'aaaaa': {'a': 1}}}, 'a': 2}} - converted_csv_str = params_dict.nested_csv_str_to_json_str(csv_str) - converted_dict = yaml.load(converted_csv_str) - self.assertDictEqual(converted_dict, expected_output) - - def test_csv_str_load_supported_datatypes(self): - csv_str = 'a=1,b=2.,c=[1,2,3],d=\'hello, there\',e=\"Hi.\"' - converted_csv_str = params_dict.nested_csv_str_to_json_str(csv_str) - converted_dict = yaml.load(converted_csv_str) - self.assertEqual(converted_dict['a'], 1) - self.assertEqual(converted_dict['b'], 2.) - self.assertEqual(converted_dict['c'], [1, 2, 3]) - self.assertEqual(converted_dict['d'], 'hello, there') - self.assertEqual(converted_dict['e'], 'Hi.') - - def test_csv_str_load_unsupported_datatypes(self): - csv_str = 'a=[[1,2,3],[4,5,6]]' - self.assertRaises(ValueError, params_dict.nested_csv_str_to_json_str, - csv_str) - - def test_csv_str_to_json_str_spacing(self): - csv_str1 = 'a=1,b=2,c=3' - csv_str2 = 'a = 1, b = 2, c = 3' - json_str = '{a : 1, b : 2, c : 3}' - converted_csv_str1 = params_dict.nested_csv_str_to_json_str(csv_str1) - converted_csv_str2 = params_dict.nested_csv_str_to_json_str(csv_str2) - self.assertEqual(converted_csv_str1, converted_csv_str2) - self.assertEqual(converted_csv_str1, json_str) - self.assertEqual(converted_csv_str2, json_str) - - def test_gcs_added_quotes(self): - csv_str = 'a=gs://abc, b=gs://def' - expected_output = '{a : \'gs://abc\', b : \'gs://def\'}' - converted_csv_str = params_dict.nested_csv_str_to_json_str(csv_str) - self.assertEqual(converted_csv_str, expected_output) - - -if __name__ == '__main__': - tf.test.main() diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/multitask/__init__.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/multitask/__init__.py deleted file mode 100644 index a11b1ff79e891e0fcee5bf824718e75d9103e28f..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/multitask/__init__.py +++ /dev/null @@ -1,30 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/multitask/base_model.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/multitask/base_model.py deleted file mode 100644 index f81140e85ed1bef681378218440acb1d1a941a35..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/multitask/base_model.py +++ /dev/null @@ -1,76 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Lint as: python3 -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================== -"""Abstraction of multi-task model.""" -from typing import Text, Dict - -import tensorflow as tf - - -class MultiTaskBaseModel(tf.Module): - """Base class that holds multi-task model computation.""" - - def __init__(self, **kwargs): - super().__init__(**kwargs) - self._sub_tasks = self._instantiate_sub_tasks() - - def _instantiate_sub_tasks(self) -> Dict[Text, tf.keras.Model]: - """Abstract function that sets up the computation for each sub-task. - - Returns: - A map from task name (as string) to a tf.keras.Model object that - represents the sub-task in the multi-task pool. - """ - raise NotImplementedError( - "_instantiate_sub_task_models() is not implemented.") - - @property - def sub_tasks(self): - """Fetch a map of task name (string) to task model (tf.keras.Model).""" - return self._sub_tasks - - def initialize(self): - """Optional function that loads a pre-train checkpoint.""" - return diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/multitask/base_trainer.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/multitask/base_trainer.py deleted file mode 100644 index 4bcc17fa15909bce286f7ab6761f0c67ab93d22b..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/multitask/base_trainer.py +++ /dev/null @@ -1,192 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Lint as: python3 -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================== -"""Multitask base trainer implementation. - -The trainer derives from the Orbit `StandardTrainer` class. -""" -from typing import Union -import gin -import orbit -import tensorflow as tf - -from official.modeling.multitask import base_model -from official.modeling.multitask import multitask - - -@gin.configurable -class MultiTaskBaseTrainer(orbit.StandardTrainer): - """Multitask base trainer.""" - - def __init__(self, - multi_task: multitask.MultiTask, - multi_task_model: Union[tf.keras.Model, - base_model.MultiTaskBaseModel], - optimizer: tf.optimizers.Optimizer, - trainer_options=None): - self._strategy = tf.distribute.get_strategy() - self._multi_task = multi_task - self._multi_task_model = multi_task_model - self._optimizer = optimizer - - self._training_losses = None - self._training_metrics = None - self._global_step = orbit.utils.create_global_step() - - if hasattr(self.multi_task_model, "checkpoint_items"): - checkpoint_items = self.multi_task_model.checkpoint_items - else: - checkpoint_items = {} - - self._checkpoint = tf.train.Checkpoint( - model=self.multi_task_model, - optimizer=self.optimizer, - global_step=self.global_step, - **checkpoint_items) - - train_datasets = {} - for name, task in self.multi_task.tasks.items(): - train_datasets[name] = orbit.utils.make_distributed_dataset( - self.strategy, task.build_inputs, task.task_config.train_data) - - super().__init__( - train_dataset=train_datasets, - options=trainer_options or orbit.StandardTrainerOptions()) - - def train_loop_begin(self): - """Clean up states that hold losses and metrics.""" - for _, train_loss_metric in self.training_losses.items(): - train_loss_metric.reset_states() - - for _, metrics in self.training_metrics.items(): - for metric in metrics: - metric.reset_states() - - def train_loop_end(self): - """Record loss and metric values per task.""" - result = {} - for task_name, loss in self.training_losses.items(): - result[task_name] = {loss.name: loss.result()} - for task_name, task_metrics in self.training_metrics.items(): - result[task_name].update( - {metric.name: metric.result() for metric in task_metrics}) - # Note that, the learning rate schedule is managed by the keras optimizer - # internally, which respects the number of backward pass as `iterations`. - # The learning rate schedule does not follow the trainer logical global - # step of multiple tasks. - if callable(self.optimizer.learning_rate): - result["learning_rate"] = self.optimizer.learning_rate( - self.optimizer.iterations) - else: - result["learning_rate"] = self.optimizer.learning_rate - return result - - @property - def checkpoint(self): - """Accesses the training checkpoint.""" - return self._checkpoint - - @property - def training_losses(self): - """Access training loss metric objects for all tasks.""" - if self._training_losses is None: - # Builds the per-task metrics and losses. - # This the total summed training loss of tasks in the joint training. - self._training_losses = dict( - total_loss=tf.keras.metrics.Mean("training_loss", dtype=tf.float32)) - for name in self.multi_task.tasks: - self._training_losses[name] = tf.keras.metrics.Mean( - "training_loss", dtype=tf.float32) - return self._training_losses - - @property - def training_metrics(self): - """Access training metric metric objects for all tasks.""" - if self._training_metrics is None: - # Builds the per-task metrics and losses. - self._training_metrics = {} - for name, task in self.multi_task.tasks.items(): - self._training_metrics[name] = task.build_metrics(training=True) - return self._training_metrics - - @property - def strategy(self): - return self._strategy - - @property - def multi_task(self): - return self._multi_task - - @property - def multi_task_model(self): - return self._multi_task_model - - @property - def optimizer(self): - return self._optimizer - - @property - def global_step(self): - return self._global_step - - def train_step(self, iterator_map): - """The default train step calling the multi-task train step. - - Args: - iterator_map: a dictionary of task names and per-task dataset iterators. - """ - - def step_fn(inputs): - losses = self.multi_task.joint_train_step( - inputs, - multi_task_model=self.multi_task_model, - optimizer=self.optimizer, - task_metrics=self.training_metrics) - for key, loss in losses.items(): - self.training_losses[key].update_state(loss) - - self.strategy.run( - step_fn, args=(tf.nest.map_structure(next, iterator_map),)) - self.global_step.assign_add(1) diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/multitask/base_trainer_test.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/multitask/base_trainer_test.py deleted file mode 100644 index 986845c025a8ee1a7334277ae7371fc9815dfcda..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/multitask/base_trainer_test.py +++ /dev/null @@ -1,106 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Tests for multitask.base_trainer.""" -from absl.testing import parameterized -import tensorflow as tf - -from tensorflow.python.distribute import combinations -from tensorflow.python.distribute import strategy_combinations -from official.modeling.multitask import base_trainer -from official.modeling.multitask import configs -from official.modeling.multitask import multitask -from official.modeling.multitask import test_utils - - -def all_strategy_combinations(): - return combinations.combine( - distribution=[ - strategy_combinations.default_strategy, - strategy_combinations.cloud_tpu_strategy, - strategy_combinations.one_device_strategy_gpu, - ], - mode="eager", - ) - - -class BaseTrainerTest(tf.test.TestCase, parameterized.TestCase): - - @combinations.generate(all_strategy_combinations()) - def test_multitask_joint_trainer(self, distribution): - with distribution.scope(): - tasks = [ - test_utils.MockFooTask(params=test_utils.FooConfig(), name="foo"), - test_utils.MockBarTask(params=test_utils.BarConfig(), name="bar") - ] - task_weights = {"foo": 1.0, "bar": 1.0} - test_multitask = multitask.MultiTask( - tasks=tasks, task_weights=task_weights) - test_optimizer = tf.keras.optimizers.SGD(0.1) - model = test_utils.MockMultiTaskModel() - test_trainer = base_trainer.MultiTaskBaseTrainer( - multi_task=test_multitask, - multi_task_model=model, - optimizer=test_optimizer) - results = test_trainer.train(tf.convert_to_tensor(5, dtype=tf.int32)) - self.assertContainsSubset(["training_loss", "bar_acc"], - results["bar"].keys()) - self.assertContainsSubset(["training_loss", "foo_acc"], - results["foo"].keys()) - - def test_trainer_with_configs(self): - config = configs.MultiTaskConfig( - task_routines=(configs.TaskRoutine( - task_name="foo", - task_config=test_utils.FooConfig(), - task_weight=0.5), - configs.TaskRoutine( - task_name="bar", - task_config=test_utils.BarConfig(), - task_weight=0.5))) - test_multitask = multitask.MultiTask.from_config(config) - test_optimizer = tf.keras.optimizers.SGD(0.1) - model = test_utils.MockMultiTaskModel() - test_trainer = base_trainer.MultiTaskBaseTrainer( - multi_task=test_multitask, - multi_task_model=model, - optimizer=test_optimizer) - results = test_trainer.train(tf.convert_to_tensor(5, dtype=tf.int32)) - self.assertContainsSubset(["training_loss", "bar_acc"], - results["bar"].keys()) - self.assertContainsSubset(["training_loss", "foo_acc"], - results["foo"].keys()) - self.assertEqual(test_multitask.task_weight("foo"), 0.5) - self.assertEqual(test_trainer.global_step.numpy(), 5) - self.assertIn("learning_rate", results) - - -if __name__ == "__main__": - tf.test.main() diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/multitask/configs.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/multitask/configs.py deleted file mode 100644 index 1ce4674179501962c86fa06973a530e59cc8ed03..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/multitask/configs.py +++ /dev/null @@ -1,95 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Configuration definitions for multi-task training.""" -from typing import Optional, Tuple - -import dataclasses - -from official.core import config_definitions as cfg -from official.modeling import hyperparams - - -@dataclasses.dataclass -class TaskRoutine(hyperparams.Config): - task_name: str = "" - task_config: cfg.TaskConfig = None - eval_steps: Optional[int] = None - task_weight: Optional[float] = 1.0 - - -@dataclasses.dataclass -class MultiTaskConfig(hyperparams.Config): - init_checkpoint: str = "" - model: hyperparams.Config = None - task_routines: Tuple[TaskRoutine, ...] = () - - -@dataclasses.dataclass -class ProportionalSampleConfig(hyperparams.Config): - alpha: float = 1.0 - - -@dataclasses.dataclass -class AnnealingSampleConfig(hyperparams.Config): - steps_per_epoch: int = 5 - total_steps: int = 20 - - -@dataclasses.dataclass -class TaskSamplingConfig(hyperparams.OneOfConfig): - type: str = "" - uniform: hyperparams.Config = hyperparams.Config() - proportional: ProportionalSampleConfig = ProportionalSampleConfig() - annealing: AnnealingSampleConfig = AnnealingSampleConfig() - - -@dataclasses.dataclass -class MultiTaskTrainerConfig(cfg.TrainerConfig): - trainer_type: str = "interleaving" - task_sampler: TaskSamplingConfig = TaskSamplingConfig(type="proportional") - - -@dataclasses.dataclass -class MultiTaskExperimentConfig(hyperparams.Config): - """An experiment config for multi-task training and multi-task evaluation.""" - task: MultiTaskConfig = MultiTaskConfig() - trainer: MultiTaskTrainerConfig = MultiTaskTrainerConfig() - runtime: cfg.RuntimeConfig = cfg.RuntimeConfig() - - -@dataclasses.dataclass -class MultiEvalExperimentConfig(cfg.ExperimentConfig): - """An experiment config for single-task training and multi-task evaluation. - - Attributes: - eval_tasks: individual evaluation tasks. - """ - eval_tasks: MultiTaskConfig = MultiTaskConfig() diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/multitask/evaluator.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/multitask/evaluator.py deleted file mode 100644 index ec8551d88c9fb22c2c7d908b6a74a80bdea6fa5d..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/multitask/evaluator.py +++ /dev/null @@ -1,188 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Multitask Evaluator implementation. - -The evaluator implements the Orbit `AbstractEvaluator` interface. -""" -from typing import Optional, Union -import gin -import orbit -import tensorflow as tf - -from official.core import train_utils -from official.modeling.multitask import base_model -from official.modeling.multitask import multitask - - -@gin.configurable -class MultiTaskEvaluator(orbit.AbstractEvaluator): - """Implements the common trainer shared for TensorFlow models.""" - - def __init__( - self, - task: multitask.MultiTask, - model: Union[tf.keras.Model, base_model.MultiTaskBaseModel], - global_step: Optional[tf.Variable] = None, - checkpoint_exporter: Optional[train_utils.BestCheckpointExporter] = None): - """Initialize common trainer for TensorFlow models. - - Args: - task: A multitask.MultiTask instance. - model: tf.keras.Model instance. - global_step: the global step variable. - checkpoint_exporter: an object that has the `maybe_export_checkpoint` - interface. - """ - # Gets the current distribution strategy. If not inside any strategy scope, - # it gets a single-replica no-op strategy. - self._strategy = tf.distribute.get_strategy() - self._task = task - self._model = model - self._global_step = global_step or orbit.utils.create_global_step() - self._checkpoint_exporter = checkpoint_exporter - self._checkpoint = tf.train.Checkpoint( - global_step=self.global_step, - model=self.model) - - self._validation_losses = None - self._validation_metrics = None - - # Builds per-task datasets. - self.eval_datasets = {} - for name, task in self.task.tasks.items(): - self.eval_datasets[name] = orbit.utils.make_distributed_dataset( - self.strategy, task.build_inputs, task.task_config.validation_data) - - # Builds per-task validation loops. - def get_function(task_name, task): - - task_metrics = self.validation_metrics[task_name] - task_loss = self.validation_losses[task_name] - if isinstance(self.model, base_model.MultiTaskBaseModel): - model = self.model.sub_tasks[task_name] - else: - model = self.model - - def step_fn(inputs): - logs = task.validation_step(inputs, model=model, metrics=task_metrics) - task_loss.update_state(logs[task.loss]) - return logs - - @tf.function - def eval_step_fn(iterator): - distributed_outputs = self.strategy.run(step_fn, args=(next(iterator),)) - return tf.nest.map_structure(self.strategy.experimental_local_results, - distributed_outputs) - - return orbit.utils.create_loop_fn(eval_step_fn) - - self.task_fns = { - name: get_function(name, task) - for name, task in self.task.tasks.items() - } - - @property - def strategy(self): - return self._strategy - - @property - def task(self): - return self._task - - @property - def model(self): - return self._model - - @property - def global_step(self): - return self._global_step - - @property - def validation_losses(self): - """Accesses the validation loss metric object.""" - if self._validation_losses is None: - # Builds the per-task metrics and losses. - self._validation_losses = {} - for name in self.task.tasks: - self._validation_losses[name] = tf.keras.metrics.Mean( - "validation_loss", dtype=tf.float32) - return self._validation_losses - - @property - def validation_metrics(self): - """Accesses all validation metric metric objects.""" - if self._validation_metrics is None: - # Builds the per-task metrics and losses. - self._validation_metrics = {} - for name, task in self.task.tasks.items(): - self._validation_metrics[name] = task.build_metrics(training=False) - return self._validation_metrics - - @property - def checkpoint(self): - """Accesses the training checkpoint.""" - return self._checkpoint - - def evaluate(self, num_steps: tf.Tensor): - """Performs evaluation for each `EvalTask`.""" - for metric in self.validation_losses.values(): - metric.reset_states() - for metrics in self.validation_metrics.values(): - for metric in metrics: - metric.reset_states() - results = {} - eval_iters = tf.nest.map_structure(iter, self.eval_datasets) - - for name, task_eval_loop in self.task_fns.items(): - outputs = None - eval_iter = eval_iters[name] - task = self.task.tasks[name] - task_eval_steps = self.task.task_eval_steps(name) or num_steps - outputs = task_eval_loop( - eval_iter, - task_eval_steps, - state=outputs, - reduce_fn=task.aggregate_logs) - task_metrics = self.validation_metrics[name] - task_loss = self.validation_losses[name] - logs = {} - for metric in task_metrics + [task_loss]: - logs[metric.name] = metric.result() - if outputs: - metrics = task.reduce_aggregated_logs( - outputs, global_step=self.global_step) - logs.update(metrics) - results[name] = logs - - if self._checkpoint_exporter: - self._checkpoint_exporter.maybe_export_checkpoint( - self.checkpoint, results, self.global_step.numpy()) - return results diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/multitask/evaluator_test.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/multitask/evaluator_test.py deleted file mode 100644 index 7d3650b23d5d0554d8c6db5ef5cbdf6ccea78476..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/multitask/evaluator_test.py +++ /dev/null @@ -1,154 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Tests for multitask.evaluator.""" -from absl.testing import parameterized -import numpy as np -import tensorflow as tf - -from tensorflow.python.distribute import combinations -from tensorflow.python.distribute import strategy_combinations -from official.core import base_task -from official.core import config_definitions as cfg -from official.modeling.multitask import evaluator -from official.modeling.multitask import multitask - - -def all_strategy_combinations(): - return combinations.combine( - distribution=[ - strategy_combinations.default_strategy, - strategy_combinations.cloud_tpu_strategy, - strategy_combinations.one_device_strategy_gpu, - ], - mode="eager", - ) - - -class MockModel(tf.keras.Model): - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.dense = tf.keras.layers.Dense(1) - - def call(self, inputs): - print(inputs, type(inputs)) - if "y" in inputs: - self.add_loss(tf.zeros((1,), dtype=tf.float32)) - else: - self.add_loss(tf.ones((1,), dtype=tf.float32)) - return self.dense(inputs["x"]) - - -class MockTask(base_task.Task): - """Mock task object for testing.""" - - def build_metrics(self, training: bool = True): - del training - return [tf.keras.metrics.Accuracy(name="acc")] - - def build_inputs(self, params): - - def generate_data(_): - x = tf.zeros(shape=(2,), dtype=tf.float32) - label = tf.zeros([1], dtype=tf.int32) - if self.name == "bar": - return dict(x=x, y=x), label - else: - return dict(x=x), label - - dataset = tf.data.Dataset.range(1) - dataset = dataset.repeat() - dataset = dataset.map( - generate_data, num_parallel_calls=tf.data.experimental.AUTOTUNE) - return dataset.prefetch(buffer_size=1).batch(2, drop_remainder=True) - - def validation_step(self, inputs, model: tf.keras.Model, metrics=None): - logs = super().validation_step(inputs, model, metrics) - logs["counter"] = tf.ones((1,), dtype=tf.float32) - return logs - - def aggregate_logs(self, state, step_outputs): - if state is None: - state = {} - for key, value in step_outputs.items(): - if key not in state: - state[key] = [] - state[key].append( - np.concatenate([np.expand_dims(v.numpy(), axis=0) for v in value])) - return state - - def reduce_aggregated_logs(self, - aggregated_logs, - global_step=None): - for k, v in aggregated_logs.items(): - aggregated_logs[k] = np.sum(np.stack(v, axis=0)) - return aggregated_logs - - -class EvaluatorTest(tf.test.TestCase, parameterized.TestCase): - - @combinations.generate(all_strategy_combinations()) - def test_multitask_evaluator(self, distribution): - with distribution.scope(): - tasks = [ - MockTask(params=cfg.TaskConfig(), name="bar"), - MockTask(params=cfg.TaskConfig(), name="foo") - ] - test_multitask = multitask.MultiTask(tasks=tasks) - model = MockModel() - test_evaluator = evaluator.MultiTaskEvaluator( - task=test_multitask, model=model) - results = test_evaluator.evaluate(tf.convert_to_tensor(1, dtype=tf.int32)) - self.assertContainsSubset(["validation_loss", "acc"], results["bar"].keys()) - self.assertContainsSubset(["validation_loss", "acc"], results["foo"].keys()) - self.assertEqual(results["bar"]["validation_loss"], 0.0) - self.assertEqual(results["foo"]["validation_loss"], 1.0) - - @combinations.generate(all_strategy_combinations()) - def test_multitask_evaluator_numpy_metrics(self, distribution): - with distribution.scope(): - tasks = [ - MockTask(params=cfg.TaskConfig(), name="bar"), - MockTask(params=cfg.TaskConfig(), name="foo") - ] - test_multitask = multitask.MultiTask(tasks=tasks) - model = MockModel() - test_evaluator = evaluator.MultiTaskEvaluator( - task=test_multitask, model=model) - results = test_evaluator.evaluate(tf.convert_to_tensor(5, dtype=tf.int32)) - self.assertEqual(results["bar"]["counter"], - 5. * distribution.num_replicas_in_sync) - self.assertEqual(results["foo"]["counter"], - 5. * distribution.num_replicas_in_sync) - - -if __name__ == "__main__": - tf.test.main() diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/multitask/interleaving_trainer.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/multitask/interleaving_trainer.py deleted file mode 100644 index adfdfa409f0aa82884be02d1627a7cb9a86e740e..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/multitask/interleaving_trainer.py +++ /dev/null @@ -1,108 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Multitask trainer that interleaves each task's train step.""" -from typing import Union -import gin -import orbit -import tensorflow as tf -from official.modeling.multitask import base_model -from official.modeling.multitask import base_trainer -from official.modeling.multitask import multitask -from official.modeling.multitask import task_sampler as sampler - - -@gin.configurable -class MultiTaskInterleavingTrainer(base_trainer.MultiTaskBaseTrainer): - """MultiTask trainer that interleaves task update.""" - - def __init__(self, - multi_task: multitask.MultiTask, - multi_task_model: Union[tf.keras.Model, - base_model.MultiTaskBaseModel], - optimizer: tf.optimizers.Optimizer, - task_sampler: sampler.TaskSampler, - trainer_options=None): - super(MultiTaskInterleavingTrainer, self).__init__( - multi_task=multi_task, - multi_task_model=multi_task_model, - optimizer=optimizer, - trainer_options=trainer_options) - self._task_sampler = task_sampler - - # Build per task train step. - def _get_task_step(task_name, task): - - def step_fn(inputs): - if isinstance(self.multi_task_model, base_model.MultiTaskBaseModel): - task_model = self.multi_task_model.sub_tasks[task_name] - else: - task_model = self.multi_task_model - task_logs = task.train_step( - inputs, - model=task_model, - optimizer=self.optimizer, - metrics=self.training_metrics[task_name]) - self.training_losses[task_name].update_state(task_logs[task.loss]) - - return step_fn - - self._task_train_step_map = { - name: _get_task_step(name, task) - for name, task in self.multi_task.tasks.items() - } - - # TODO(haozhangthu): Add taskwise step counter to train_loop_end for logging - # on TensorBoard. - self._task_step_counters = { - name: orbit.utils.create_global_step() for name in self.multi_task.tasks - } - - def task_step_counter(self, name): - return self._task_step_counters[name] - - def train_step(self, iterator_map): - # Sample one task to train according to a multinomial distribution - rn = tf.random.stateless_uniform(shape=[], seed=(0, self.global_step)) - cumulative_sample_distribution = self._task_sampler.task_cumulative_distribution( - self.global_step) - # Prepend a [0.0] for indexing convenience. - cumulative_sample_distribution = tf.concat( - [tf.constant([0.0], dtype=tf.float32), cumulative_sample_distribution], - axis=0) - - for idx, (name, _) in enumerate(self.multi_task.tasks.items()): - begin = cumulative_sample_distribution[idx] - end = cumulative_sample_distribution[idx + 1] - if rn >= begin and rn < end: - self._strategy.run( - self._task_train_step_map[name], args=(next(iterator_map[name]),)) - self.global_step.assign_add(1) - self.task_step_counter(name).assign_add(1) diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/multitask/interleaving_trainer_test.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/multitask/interleaving_trainer_test.py deleted file mode 100644 index b3318743d1cecd696c3403a66998fea395856889..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/multitask/interleaving_trainer_test.py +++ /dev/null @@ -1,117 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Tests for multitask.interleaving_trainer.""" -from absl.testing import parameterized -import tensorflow as tf - -from tensorflow.python.distribute import combinations -from tensorflow.python.distribute import strategy_combinations -from official.modeling.multitask import configs -from official.modeling.multitask import interleaving_trainer -from official.modeling.multitask import multitask -from official.modeling.multitask import task_sampler -from official.modeling.multitask import test_utils - - -def all_strategy_combinations(): - return combinations.combine( - distribution=[ - strategy_combinations.default_strategy, - strategy_combinations.cloud_tpu_strategy, - strategy_combinations.one_device_strategy_gpu, - ], - mode="eager", - ) - - -class InterleavingTrainerTest(tf.test.TestCase, parameterized.TestCase): - - @combinations.generate(all_strategy_combinations()) - def test_multitask_interleaving_trainer(self, distribution): - with distribution.scope(): - tasks = [ - test_utils.MockFooTask(params=test_utils.FooConfig(), name="foo"), - test_utils.MockBarTask(params=test_utils.BarConfig(), name="bar") - ] - test_multitask = multitask.MultiTask(tasks=tasks) - test_optimizer = tf.keras.optimizers.SGD(0.1) - model = test_utils.MockMultiTaskModel() - sampler = task_sampler.UniformTaskSampler( - task_weights=test_multitask.task_weights) - test_trainer = interleaving_trainer.MultiTaskInterleavingTrainer( - multi_task=test_multitask, - multi_task_model=model, - optimizer=test_optimizer, - task_sampler=sampler) - results = test_trainer.train(tf.convert_to_tensor(5, dtype=tf.int32)) - self.assertContainsSubset(["training_loss", "bar_acc"], - results["bar"].keys()) - self.assertContainsSubset(["training_loss", "foo_acc"], - results["foo"].keys()) - - @combinations.generate(all_strategy_combinations()) - def test_trainer_with_configs(self, distribution): - config = configs.MultiTaskConfig( - task_routines=(configs.TaskRoutine( - task_name="foo", - task_config=test_utils.FooConfig(), - task_weight=3.0), - configs.TaskRoutine( - task_name="bar", - task_config=test_utils.BarConfig(), - task_weight=1.0))) - with distribution.scope(): - test_multitask = multitask.MultiTask.from_config(config) - test_optimizer = tf.keras.optimizers.SGD(0.1) - model = test_utils.MockMultiTaskModel() - num_step = 1000 - sampler = task_sampler.AnnealingTaskSampler( - task_weights=test_multitask.task_weights, - steps_per_epoch=num_step/5, - total_steps=num_step) - test_trainer = interleaving_trainer.MultiTaskInterleavingTrainer( - multi_task=test_multitask, - multi_task_model=model, - optimizer=test_optimizer, - task_sampler=sampler) - results = test_trainer.train(tf.convert_to_tensor(num_step, dtype=tf.int32)) - self.assertContainsSubset(["training_loss", "bar_acc"], - results["bar"].keys()) - self.assertContainsSubset(["training_loss", "foo_acc"], - results["foo"].keys()) - self.assertEqual(test_trainer.global_step.numpy(), num_step) - bar_sampled_step = test_trainer.task_step_counter("bar").numpy() - foo_sampled_step = test_trainer.task_step_counter("foo").numpy() - self.assertEqual(bar_sampled_step + foo_sampled_step, num_step) - - -if __name__ == "__main__": - tf.test.main() diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/multitask/multitask.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/multitask/multitask.py deleted file mode 100644 index ef8e720c6c60f752ae774b47a63b344a70a49b6a..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/multitask/multitask.py +++ /dev/null @@ -1,164 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Experimental MultiTask base class for multi-task training/evaluation.""" -import abc -from typing import Dict, List, Optional, Text, Union - -import tensorflow as tf -from official.core import base_task -from official.core import config_definitions -from official.core import task_factory -from official.modeling import optimization -from official.modeling.multitask import base_model -from official.modeling.multitask import configs - -OptimizationConfig = optimization.OptimizationConfig -RuntimeConfig = config_definitions.RuntimeConfig - - -class MultiTask(tf.Module, metaclass=abc.ABCMeta): - """A multi-task class to manage multiple tasks.""" - - def __init__(self, - tasks: Union[Dict[Text, base_task.Task], List[base_task.Task]], - task_weights: Optional[Dict[str, Union[float, int]]] = None, - task_eval_steps: Optional[Dict[str, int]] = None, - name: Optional[str] = None): - """MultiTask initialization. - - Args: - tasks: a list or a flat dict of Task. - task_weights: a dict of (task, task weight), task weight can be applied - directly during loss summation in a joint backward step, or it can be - used to sample task among interleaved backward step. - task_eval_steps: a dict of (task, eval steps). - name: the instance name of a MultiTask object. - """ - super().__init__(name=name) - if isinstance(tasks, list): - self._tasks = {} - for task in tasks: - if task.name in self._tasks: - raise ValueError("Duplicated tasks found, task.name is %s" % - task.name) - self._tasks[task.name] = task - elif isinstance(tasks, dict): - self._tasks = tasks - else: - raise ValueError("The tasks argument has an invalid type: %s" % - type(tasks)) - self._task_eval_steps = task_eval_steps or {} - self._task_eval_steps = dict([ - (name, self._task_eval_steps.get(name, None)) for name in self.tasks - ]) - self._task_weights = task_weights or {} - self._task_weights = dict([ - (name, self._task_weights.get(name, 1.0)) for name in self.tasks - ]) - - @classmethod - def from_config(cls, config: configs.MultiTaskConfig, logging_dir=None): - tasks = {} - task_eval_steps = {} - task_weights = {} - for task_routine in config.task_routines: - task_name = task_routine.task_name - tasks[task_name] = task_factory.get_task( - task_routine.task_config, logging_dir=logging_dir) - task_eval_steps[task_name] = task_routine.eval_steps - task_weights[task_name] = task_routine.task_weight - return cls( - tasks, task_eval_steps=task_eval_steps, task_weights=task_weights) - - @property - def tasks(self): - return self._tasks - - def task_eval_steps(self, task_name): - return self._task_eval_steps[task_name] - - def task_weight(self, task_name): - return self._task_weights[task_name] - - @property - def task_weights(self): - return self._task_weights - - @classmethod - def create_optimizer(cls, - optimizer_config: OptimizationConfig, - runtime_config: Optional[RuntimeConfig] = None): - return base_task.Task.create_optimizer( - optimizer_config=optimizer_config, runtime_config=runtime_config) - - def joint_train_step(self, task_inputs, - multi_task_model: base_model.MultiTaskBaseModel, - optimizer: tf.keras.optimizers.Optimizer, task_metrics): - """The joint train step. - - Args: - task_inputs: a dictionary of task names and per-task features. - multi_task_model: a MultiTaskBaseModel instance. - optimizer: a tf.optimizers.Optimizer. - task_metrics: a dictionary of task names and per-task metrics. - - Returns: - A dictionary of losses, inculding per-task losses and their weighted sum. - """ - losses = {} - with tf.GradientTape() as tape: - total_loss = 0.0 - for name, model in multi_task_model.sub_tasks.items(): - inputs = task_inputs[name] - if isinstance(inputs, tuple) and len(inputs) == 2: - features, labels = inputs - elif isinstance(inputs, dict): - features, labels = inputs, inputs - else: - raise ValueError("The iterator output is neither a tuple nor a " - "dictionary. It is not implemented to support " - "such outputs.") - outputs = model(features, training=True) - task_loss = self.tasks[name].build_losses(labels, outputs) - task_weight = self.task_weight(name) - total_loss += task_weight * task_loss - losses[name] = task_loss - self.tasks[name].process_metrics(task_metrics[name], labels, outputs) - - # Scales loss as the default gradients allreduce performs sum inside - # the optimizer. - scaled_loss = total_loss / tf.distribute.get_strategy( - ).num_replicas_in_sync - tvars = multi_task_model.trainable_variables - grads = tape.gradient(scaled_loss, tvars) - optimizer.apply_gradients(list(zip(grads, tvars))) - losses["total_loss"] = total_loss - return losses diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/multitask/task_sampler.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/multitask/task_sampler.py deleted file mode 100644 index e37b7a55481921f348657a661099a9973b17cf9e..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/multitask/task_sampler.py +++ /dev/null @@ -1,137 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Utils to sample tasks for interleaved optimization.""" -import abc -from typing import Union, Dict, Text -import tensorflow as tf - -from official.modeling.multitask import configs - - -class TaskSampler(tf.Module, metaclass=abc.ABCMeta): - """An abstract class defining task sampling API for interleaving trainer.""" - - def __init__(self, task_weights: Dict[Text, Union[float, int]]): - self._task_weights = task_weights - - @abc.abstractmethod - def task_cumulative_distribution(self, global_step: tf.Tensor) -> tf.Tensor: - """Compute cumulative distribution to sample tasks. - - It calculates the cumulative distribution of the multinomial task - distribution with respect to which to be sampled against. - - Args: - global_step: A tensor indicating current progess of training. - - Returns: - A float tensor with shape (#(task), 1) that represents the cumulative - sampling distribution. - """ - pass - - -class UniformTaskSampler(TaskSampler): - """Sample all tasks uniformly.""" - - def __init__(self, task_weights: Dict[Text, Union[float, int]]): - super(UniformTaskSampler, self).__init__(task_weights=task_weights) - self._uniform_cumulative = tf.math.cumsum( - tf.constant( - [1.0 / len(self._task_weights)] * len(self._task_weights), - dtype=tf.float32)) - - def task_cumulative_distribution(self, global_step: tf.Tensor) -> tf.Tensor: - del global_step - return self._uniform_cumulative - - -class ProportionalTaskSampler(TaskSampler): - """Sample tasks proportional to task weights.""" - - def __init__(self, - task_weights: Dict[Text, Union[float, int]], - alpha: float = 1.0): - super(ProportionalTaskSampler, self).__init__(task_weights=task_weights) - self._alpha = tf.cast(alpha, dtype=tf.float32) - task_weight_dict_ordered_list = tf.constant( - [weight for _, weight in self._task_weights.items()], dtype=tf.float32) - task_sizes = tf.math.pow(task_weight_dict_ordered_list, self._alpha) - task_distribution = task_sizes / tf.reduce_sum(task_sizes) - self._porportional_cumulative = tf.math.cumsum(task_distribution) - - def task_cumulative_distribution(self, global_step: tf.Tensor) -> tf.Tensor: - del global_step - return self._porportional_cumulative - - -class AnnealingTaskSampler(TaskSampler): - """Sample tasks according to task weights as well as training progress.""" - - def __init__(self, - task_weights: Dict[Text, Union[float, int]], - steps_per_epoch: int, - total_steps: int): - super(AnnealingTaskSampler, self).__init__(task_weights=task_weights) - self._steps_per_epoch = tf.cast(steps_per_epoch, dtype=tf.float32) - self._total_epochs = tf.cast( - total_steps / self._steps_per_epoch, dtype=tf.float32) - - def task_cumulative_distribution(self, global_step: tf.Tensor) -> tf.Tensor: - cur_epoch = tf.math.floor( - tf.cast(global_step, dtype=tf.float32) / self._steps_per_epoch) - alpha = 1.0 - 0.8 * (cur_epoch - 1) / (self._total_epochs - 1 + 1e-10) - task_weight_dict_ordered_list = [ - weight for _, weight in self._task_weights.items() - ] - task_sizes = tf.math.pow( - tf.constant(task_weight_dict_ordered_list, dtype=tf.float32), - tf.cast(alpha, dtype=tf.float32)) - dynamic_task_distribution = task_sizes / tf.reduce_sum(task_sizes) - return tf.math.cumsum(dynamic_task_distribution) - - -def get_task_sampler(config: configs.TaskSamplingConfig, - task_weights: Dict[Text, float]) -> TaskSampler: - """Utils to create task sampler with configuration and task weights.""" - oneof_config = config.get() - if config.type == 'uniform': - return UniformTaskSampler(task_weights=task_weights) - elif config.type == 'proportional': - return ProportionalTaskSampler( - task_weights=task_weights, alpha=oneof_config.alpha) - elif config.type == 'annealing': - return AnnealingTaskSampler( - task_weights=task_weights, - steps_per_epoch=oneof_config.steps_per_epoch, - total_steps=oneof_config.total_steps) - else: - raise RuntimeError('Task sampler type not supported') diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/multitask/task_sampler_test.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/multitask/task_sampler_test.py deleted file mode 100644 index afd17cbdee86f1470a19c3f345b5e03cf36cef40..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/multitask/task_sampler_test.py +++ /dev/null @@ -1,91 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Tests for multitask.task_sampler.""" -import tensorflow as tf - -from official.modeling.multitask import configs -from official.modeling.multitask import task_sampler as sampler - - -class TaskSamplerTest(tf.test.TestCase): - - def setUp(self): - super(TaskSamplerTest, self).setUp() - self._task_weights = {'A': 1.0, 'B': 2.0, 'C': 3.0} - - def test_uniform_sample_distribution(self): - uniform_sampler = sampler.get_task_sampler( - configs.TaskSamplingConfig(type='uniform'), self._task_weights) - for step in range(5): - cumulative_distribution = uniform_sampler.task_cumulative_distribution( - tf.constant(step, dtype=tf.int64)) - self.assertAllClose([0.333333, 0.666666, 1.0], - cumulative_distribution.numpy()) - - def test_proportional_sample_distribution(self): - prop_sampler = sampler.get_task_sampler( - configs.TaskSamplingConfig( - type='proportional', - proportional=configs.ProportionalSampleConfig(alpha=2.0)), - self._task_weights) - # CucmulativeOf(Normalize([1.0^2, 2.0^2, 3.0^2])) - for step in range(5): - cumulative_distribution = prop_sampler.task_cumulative_distribution( - tf.constant(step, dtype=tf.int64)) - self.assertAllClose([0.07142857, 0.35714286, 1.0], - cumulative_distribution.numpy()) - - def test_annealing_sample_distribution(self): - num_epoch = 3 - step_per_epoch = 6 - annel_sampler = sampler.get_task_sampler( - configs.TaskSamplingConfig( - type='annealing', - annealing=configs.AnnealingSampleConfig( - steps_per_epoch=step_per_epoch, - total_steps=step_per_epoch * num_epoch)), self._task_weights) - - global_step = tf.Variable( - 0, dtype=tf.int64, name='global_step', trainable=False) - expected_cumulative_epochs = [[0.12056106, 0.4387236, 1.0], - [0.16666667, 0.5, 1.0], - [0.22477472, 0.5654695, 1.0]] - for epoch in range(num_epoch): - for _ in range(step_per_epoch): - cumulative_distribution = annel_sampler.task_cumulative_distribution( - tf.constant(global_step, dtype=tf.int64)) - global_step.assign_add(1) - self.assertAllClose(expected_cumulative_epochs[epoch], - cumulative_distribution.numpy()) - - -if __name__ == '__main__': - tf.test.main() diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/multitask/test_utils.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/multitask/test_utils.py deleted file mode 100644 index 5f4de8b8d2ed373e55170587b83414a61dde1daf..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/multitask/test_utils.py +++ /dev/null @@ -1,141 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Testing utils for mock models and tasks.""" -from typing import Dict, Text -import tensorflow as tf -from official.core import base_task -from official.core import config_definitions as cfg -from official.core import task_factory -from official.modeling.multitask import base_model - - -class MockFooModel(tf.keras.Model): - """A mock model can consume 'foo' and 'bar' inputs.""" - - def __init__(self, shared_layer, *args, **kwargs): - super().__init__(*args, **kwargs) - self._share_layer = shared_layer - self._foo_specific_layer = tf.keras.layers.Dense(1) - - def call(self, inputs): - self.add_loss(tf.zeros((1,), dtype=tf.float32)) - if "foo" in inputs: - input_tensor = inputs["foo"] - else: - input_tensor = inputs["bar"] - return self._foo_specific_layer(self._share_layer(input_tensor)) - - -class MockBarModel(tf.keras.Model): - - def __init__(self, shared_layer, *args, **kwargs): - super().__init__(*args, **kwargs) - self._share_layer = shared_layer - self._bar_specific_layer = tf.keras.layers.Dense(1) - - def call(self, inputs): - self.add_loss(tf.zeros((2,), dtype=tf.float32)) - return self._bar_specific_layer(self._share_layer(inputs["bar"])) - - -class MockMultiTaskModel(base_model.MultiTaskBaseModel): - - def __init__(self, *args, **kwargs): - self._shared_dense = tf.keras.layers.Dense(1) - super().__init__(*args, **kwargs) - - def _instantiate_sub_tasks(self) -> Dict[Text, tf.keras.Model]: - return { - "foo": MockFooModel(self._shared_dense), - "bar": MockBarModel(self._shared_dense) - } - - -def mock_data(feature_name): - """Mock dataset function.""" - - def _generate_data(_): - x = tf.zeros(shape=(2,), dtype=tf.float32) - label = tf.zeros([1], dtype=tf.int32) - return {feature_name: x}, label - - dataset = tf.data.Dataset.range(1) - dataset = dataset.repeat() - dataset = dataset.map( - _generate_data, num_parallel_calls=tf.data.experimental.AUTOTUNE) - return dataset.prefetch(buffer_size=1).batch(2, drop_remainder=True) - - -class FooConfig(cfg.TaskConfig): - pass - - -class BarConfig(cfg.TaskConfig): - pass - - -@task_factory.register_task_cls(FooConfig) -class MockFooTask(base_task.Task): - """Mock foo task object for testing.""" - - def build_metrics(self, training: bool = True): - del training - return [tf.keras.metrics.Accuracy(name="foo_acc")] - - def build_inputs(self, params): - return mock_data("foo") - - def build_model(self) -> tf.keras.Model: - return MockFooModel(shared_layer=tf.keras.layers.Dense(1)) - - def build_losses(self, labels, model_outputs, aux_losses=None) -> tf.Tensor: - loss = tf.keras.losses.mean_squared_error(labels, model_outputs) - if aux_losses: - loss += tf.add_n(aux_losses) - return tf.reduce_mean(loss) - - -@task_factory.register_task_cls(BarConfig) -class MockBarTask(base_task.Task): - """Mock bar task object for testing.""" - - def build_metrics(self, training: bool = True): - del training - return [tf.keras.metrics.Accuracy(name="bar_acc")] - - def build_inputs(self, params): - return mock_data("bar") - - def build_losses(self, labels, model_outputs, aux_losses=None) -> tf.Tensor: - loss = tf.keras.losses.mean_squared_error(labels, model_outputs) - if aux_losses: - loss += tf.add_n(aux_losses) - return tf.reduce_mean(loss) diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/multitask/train_lib.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/multitask/train_lib.py deleted file mode 100644 index 6c3273fb3145ff9786e1eb65aafeb50e45c7ed24..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/multitask/train_lib.py +++ /dev/null @@ -1,265 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Multitask training driver library.""" -# pytype: disable=attribute-error -import os -from typing import Optional -from absl import logging -import orbit -import tensorflow as tf -from official.core import base_task -from official.core import base_trainer as core_lib -from official.core import train_utils -from official.modeling.multitask import base_model -from official.modeling.multitask import base_trainer -from official.modeling.multitask import configs -from official.modeling.multitask import evaluator as evaluator_lib -from official.modeling.multitask import interleaving_trainer -from official.modeling.multitask import multitask -from official.modeling.multitask import task_sampler - -TRAINERS = { - 'interleaving': interleaving_trainer.MultiTaskInterleavingTrainer, - 'joint': base_trainer.MultiTaskBaseTrainer -} - - -def run_experiment(*, distribution_strategy: tf.distribute.Strategy, - task: multitask.MultiTask, - model: base_model.MultiTaskBaseModel, mode: str, - params: configs.MultiTaskExperimentConfig, - model_dir: str) -> base_model.MultiTaskBaseModel: - """Runs train/eval configured by the experiment params. - - Args: - distribution_strategy: A distribution distribution_strategy. - task: A MultiTaskTask instance. - model: A MultiTaskBaseModel instance. - mode: A 'str', specifying the mode. Can be 'train', 'eval', 'train_and_eval' - or 'continuous_eval'. - params: ExperimentConfig instance. - model_dir: A 'str', a path to store model checkpoints and summaries. - - Returns: - model: `base_model.MultiTaskBaseModel` instance. - """ - - is_training = 'train' in mode - is_eval = 'eval' in mode - with distribution_strategy.scope(): - optimizer = task.create_optimizer(params.trainer.optimizer_config, - params.runtime) - kwargs = dict(multi_task=task, multi_task_model=model, optimizer=optimizer) - if params.trainer.trainer_type == 'interleaving': - sampler = task_sampler.get_task_sampler(params.trainer.task_sampler, - task.task_weights) - kwargs.update(dict(task_sampler=sampler)) - trainer = TRAINERS[params.trainer.trainer_type]( - **kwargs) if is_training else None - if is_eval: - evaluator = evaluator_lib.MultiTaskEvaluator( - task=task, - model=model, - global_step=trainer.global_step if is_training else None) - else: - evaluator = None - - if trainer: - checkpoint = trainer.checkpoint - global_step = trainer.global_step - else: - checkpoint = evaluator.checkpoint - global_step = evaluator.global_step - - # TODO(hongkuny,haozhangthu): Revisit initialization method. - checkpoint_manager = tf.train.CheckpointManager( - checkpoint, - directory=model_dir, - max_to_keep=params.trainer.max_to_keep, - step_counter=global_step, - checkpoint_interval=params.trainer.checkpoint_interval, - init_fn=model.initialize) - - controller = orbit.Controller( - strategy=distribution_strategy, - trainer=trainer, - evaluator=evaluator, - global_step=global_step, - steps_per_loop=params.trainer.steps_per_loop, - checkpoint_manager=checkpoint_manager, - summary_dir=os.path.join(model_dir, 'train'), - eval_summary_dir=os.path.join(model_dir, 'validation'), - summary_interval=params.trainer.summary_interval) - - logging.info('Starts to execute mode: %s', mode) - with distribution_strategy.scope(): - if mode == 'train': - controller.train(steps=params.trainer.train_steps) - elif mode == 'train_and_eval': - controller.train_and_evaluate( - train_steps=params.trainer.train_steps, - eval_steps=params.trainer.validation_steps, - eval_interval=params.trainer.validation_interval) - elif mode == 'eval': - controller.evaluate(steps=params.trainer.validation_steps) - elif mode == 'continuous_eval': - - def timeout_fn(): - if evaluator.global_step.numpy() >= params.trainer.train_steps: - return True - return False - - controller.evaluate_continuously( - steps=params.trainer.validation_steps, - timeout=params.trainer.continuous_eval_timeout, - timeout_fn=timeout_fn) - else: - raise NotImplementedError('The mode is not implemented: %s' % mode) - - return model - - -def run_experiment_with_multitask_eval( - *, - distribution_strategy: tf.distribute.Strategy, - train_task: base_task.Task, - eval_tasks: multitask.MultiTask, - mode: str, - params: configs.MultiEvalExperimentConfig, - model_dir: str, - run_post_eval: bool = False, - save_summary: bool = True, - trainer: Optional[core_lib.Trainer] = None) -> tf.keras.Model: - """Runs train/eval configured by the experiment params. - - Args: - distribution_strategy: A distribution distribution_strategy. - train_task: A base_task.Task instance. - eval_tasks: A multitask.MultiTask with evaluation tasks. - mode: A 'str', specifying the mode. Can be 'train', 'eval', 'train_and_eval' - or 'continuous_eval'. - params: MultiEvalExperimentConfig instance. - model_dir: A 'str', a path to store model checkpoints and summaries. - run_post_eval: Whether to run post eval once after training, metrics logs - are returned. - save_summary: Whether to save train and validation summary. - trainer: the core_lib.Trainer instance. It should be created within the - strategy.scope(). If not provided, an instance will be created by default - if `mode` contains 'train'. - - Returns: - model: `tf.keras.Model` instance. - """ - - is_training = 'train' in mode - is_eval = 'eval' in mode - with distribution_strategy.scope(): - if is_training: - trainer = trainer or core_lib.Trainer( - config=params, - task=train_task, - model=train_task.build_model(), - optimizer=train_task.create_optimizer( - params.trainer.optimizer_config, params.runtime), - train=True, - evaluate=False) - else: - trainer = None - model = trainer.model if trainer else train_task.build_model() - - if is_eval: - evaluator = evaluator_lib.MultiTaskEvaluator( - task=eval_tasks, - model=model, - global_step=trainer.global_step if is_training else None, - checkpoint_exporter=train_utils.maybe_create_best_ckpt_exporter( - params, model_dir)) - else: - evaluator = None - - if trainer: - checkpoint = trainer.checkpoint - global_step = trainer.global_step - else: - checkpoint = evaluator.checkpoint - global_step = evaluator.global_step - - checkpoint_manager = tf.train.CheckpointManager( - checkpoint, - directory=model_dir, - max_to_keep=params.trainer.max_to_keep, - step_counter=global_step, - checkpoint_interval=params.trainer.checkpoint_interval, - init_fn=trainer.initialize if trainer else None) - - controller = orbit.Controller( - strategy=distribution_strategy, - trainer=trainer, - evaluator=evaluator, - global_step=global_step, - steps_per_loop=params.trainer.steps_per_loop, - checkpoint_manager=checkpoint_manager, - summary_dir=os.path.join(model_dir, 'train') if save_summary else None, - eval_summary_dir=os.path.join(model_dir, 'validation') if - (save_summary) else None, - summary_interval=params.trainer.summary_interval if - (save_summary) else None) - - logging.info('Starts to execute mode: %s', mode) - with distribution_strategy.scope(): - if mode == 'train': - controller.train(steps=params.trainer.train_steps) - elif mode == 'train_and_eval': - controller.train_and_evaluate( - train_steps=params.trainer.train_steps, - eval_steps=params.trainer.validation_steps, - eval_interval=params.trainer.validation_interval) - elif mode == 'eval': - controller.evaluate(steps=params.trainer.validation_steps) - elif mode == 'continuous_eval': - - def timeout_fn(): - if evaluator.global_step.numpy() >= params.trainer.train_steps: - return True - return False - - controller.evaluate_continuously( - steps=params.trainer.validation_steps, - timeout=params.trainer.continuous_eval_timeout, - timeout_fn=timeout_fn) - else: - raise NotImplementedError('The mode is not implemented: %s' % mode) - - if run_post_eval: - return model, evaluator.evaluate( - tf.convert_to_tensor(params.trainer.validation_steps)) - else: - return model, {} diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/multitask/train_lib_test.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/multitask/train_lib_test.py deleted file mode 100644 index e145e95b2494a6f77703f0c91f555746da265e20..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/multitask/train_lib_test.py +++ /dev/null @@ -1,136 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Tests for multitask.train_lib.""" -from absl.testing import parameterized -import tensorflow as tf - -from tensorflow.python.distribute import combinations -from tensorflow.python.distribute import strategy_combinations -from official.core import task_factory -from official.modeling.hyperparams import params_dict -from official.modeling.multitask import configs -from official.modeling.multitask import multitask -from official.modeling.multitask import test_utils -from official.modeling.multitask import train_lib - - -class TrainLibTest(tf.test.TestCase, parameterized.TestCase): - - def setUp(self): - super().setUp() - self._test_config = { - 'trainer': { - 'checkpoint_interval': 10, - 'steps_per_loop': 10, - 'summary_interval': 10, - 'train_steps': 10, - 'validation_steps': 5, - 'validation_interval': 10, - 'continuous_eval_timeout': 1, - 'optimizer_config': { - 'optimizer': { - 'type': 'sgd', - }, - 'learning_rate': { - 'type': 'constant' - } - } - }, - } - - @combinations.generate( - combinations.combine( - distribution_strategy=[ - strategy_combinations.default_strategy, - strategy_combinations.cloud_tpu_strategy, - strategy_combinations.one_device_strategy_gpu, - ], - mode='eager', - flag_mode=['train', 'eval', 'train_and_eval'])) - def test_end_to_end(self, distribution_strategy, flag_mode): - model_dir = self.get_temp_dir() - experiment_config = configs.MultiTaskExperimentConfig( - task=configs.MultiTaskConfig( - task_routines=( - configs.TaskRoutine( - task_name='foo', - task_config=test_utils.FooConfig()), - configs.TaskRoutine( - task_name='bar', task_config=test_utils.BarConfig())))) - experiment_config = params_dict.override_params_dict( - experiment_config, self._test_config, is_strict=False) - with distribution_strategy.scope(): - test_multitask = multitask.MultiTask.from_config(experiment_config.task) - model = test_utils.MockMultiTaskModel() - train_lib.run_experiment( - distribution_strategy=distribution_strategy, - task=test_multitask, - model=model, - mode=flag_mode, - params=experiment_config, - model_dir=model_dir) - - @combinations.generate( - combinations.combine( - distribution_strategy=[ - strategy_combinations.default_strategy, - strategy_combinations.cloud_tpu_strategy, - strategy_combinations.one_device_strategy_gpu, - ], - mode='eager', - flag_mode=['train', 'eval', 'train_and_eval'])) - def test_end_to_end_multi_eval(self, distribution_strategy, flag_mode): - model_dir = self.get_temp_dir() - experiment_config = configs.MultiEvalExperimentConfig( - task=test_utils.FooConfig(), - eval_tasks=configs.MultiTaskConfig( - task_routines=( - configs.TaskRoutine( - task_name='foo', - task_config=test_utils.FooConfig()), - configs.TaskRoutine( - task_name='bar', task_config=test_utils.BarConfig())))) - experiment_config = params_dict.override_params_dict( - experiment_config, self._test_config, is_strict=False) - with distribution_strategy.scope(): - train_task = task_factory.get_task(experiment_config.task) - eval_tasks = multitask.MultiTask.from_config(experiment_config.eval_tasks) - train_lib.run_experiment_with_multitask_eval( - distribution_strategy=distribution_strategy, - train_task=train_task, - eval_tasks=eval_tasks, - mode=flag_mode, - params=experiment_config, - model_dir=model_dir) - - -if __name__ == '__main__': - tf.test.main() diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/optimization/__init__.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/optimization/__init__.py deleted file mode 100644 index 8b71d02a775d52a95fbfc00b3f50a53e1a0e9dbe..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/optimization/__init__.py +++ /dev/null @@ -1,39 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Optimization package definition.""" - -# pylint: disable=wildcard-import -from official.modeling.optimization.configs.learning_rate_config import * -from official.modeling.optimization.configs.optimization_config import * -from official.modeling.optimization.configs.optimizer_config import * -from official.modeling.optimization.ema_optimizer import ExponentialMovingAverage -from official.modeling.optimization.lr_schedule import * -from official.modeling.optimization.optimizer_factory import OptimizerFactory diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/optimization/configs/__init__.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/optimization/configs/__init__.py deleted file mode 100644 index a11b1ff79e891e0fcee5bf824718e75d9103e28f..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/optimization/configs/__init__.py +++ /dev/null @@ -1,30 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/optimization/configs/learning_rate_config.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/optimization/configs/learning_rate_config.py deleted file mode 100644 index 52e152e561ecfb4be2b8609be32b097ec51396e8..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/optimization/configs/learning_rate_config.py +++ /dev/null @@ -1,258 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Dataclasses for learning rate schedule config.""" -from typing import List, Optional - -import dataclasses -from official.modeling.hyperparams import base_config - - -@dataclasses.dataclass -class ConstantLrConfig(base_config.Config): - """Configuration for constant learning rate. - - This class is a containers for the constant learning rate decay configs. - - Attributes: - name: The name of the learning rate schedule. Defaults to Constant. - learning_rate: A float. The learning rate. Defaults to 0.1. - """ - name: str = 'Constant' - learning_rate: float = 0.1 - - -@dataclasses.dataclass -class StepwiseLrConfig(base_config.Config): - """Configuration for stepwise learning rate decay. - - This class is a container for the piecewise constant learning rate scheduling - configs. It will configure an instance of PiecewiseConstantDecay keras - learning rate schedule. - - An example (from keras docs): use a learning rate that's 1.0 for the first - 100001 steps, 0.5 for the next 10000 steps, and 0.1 for any additional steps. - ```python - boundaries: [100000, 110000] - values: [1.0, 0.5, 0.1] - - Attributes: - name: The name of the learning rate schedule. Defaults to PiecewiseConstant. - boundaries: A list of ints of strictly increasing entries. Defaults to None. - values: A list of floats that specifies the values for the intervals defined - by `boundaries`. It should have one more element than `boundaries`. - The learning rate is computed as follows: [0, boundaries[0]] -> - values[0] [boundaries[0], boundaries[1]] -> values[1] - [boundaries[n-1], boundaries[n]] -> values[n] [boundaries[n], - end] -> values[n+1] Defaults to None. - """ - name: str = 'PiecewiseConstantDecay' - boundaries: Optional[List[int]] = None - values: Optional[List[float]] = None - - -@dataclasses.dataclass -class ExponentialLrConfig(base_config.Config): - """Configuration for exponential learning rate decay. - - This class is a containers for the exponential learning rate decay configs. - - Attributes: - name: The name of the learning rate schedule. Defaults to ExponentialDecay. - initial_learning_rate: A float. The initial learning rate. Defaults to None. - decay_steps: A positive integer that is used for decay computation. Defaults - to None. - decay_rate: A float. Defaults to None. - staircase: A boolean, if true, learning rate is decreased at discreate - intervals. Defaults to False. - """ - name: str = 'ExponentialDecay' - initial_learning_rate: Optional[float] = None - decay_steps: Optional[int] = None - decay_rate: Optional[float] = None - staircase: Optional[bool] = None - - -@dataclasses.dataclass -class PolynomialLrConfig(base_config.Config): - """Configuration for polynomial learning rate decay. - - This class is a containers for the polynomial learning rate decay configs. - - Attributes: - name: The name of the learning rate schedule. Defaults to PolynomialDecay. - initial_learning_rate: A float. The initial learning rate. Defaults to None. - decay_steps: A positive integer that is used for decay computation. Defaults - to None. - end_learning_rate: A float. The minimal end learning rate. - power: A float. The power of the polynomial. Defaults to linear, 1.0. - cycle: A boolean, whether or not it should cycle beyond decay_steps. - Defaults to False. - """ - name: str = 'PolynomialDecay' - initial_learning_rate: Optional[float] = None - decay_steps: Optional[int] = None - end_learning_rate: float = 0.0001 - power: float = 1.0 - cycle: bool = False - - -@dataclasses.dataclass -class CosineLrConfig(base_config.Config): - """Configuration for Cosine learning rate decay. - - This class is a containers for the cosine learning rate decay configs, - tf.keras.experimental.CosineDecay. - - Attributes: - name: The name of the learning rate schedule. Defaults to CosineDecay. - initial_learning_rate: A float. The initial learning rate. Defaults to None. - decay_steps: A positive integer that is used for decay computation. Defaults - to None. - alpha: A float. Minimum learning rate value as a fraction of - initial_learning_rate. - """ - name: str = 'CosineDecay' - initial_learning_rate: Optional[float] = None - decay_steps: Optional[int] = None - alpha: float = 0.0 - - -@dataclasses.dataclass -class DirectPowerLrConfig(base_config.Config): - """Configuration for DirectPower learning rate decay. - - This class configures a schedule following follows lr * (step)^power. - - Attributes: - name: The name of the learning rate schedule. Defaults to DirectPowerDecay. - initial_learning_rate: A float. The initial learning rate. Defaults to None. - power: A float. Defaults to -0.5, for sqrt decay. - """ - name: str = 'DirectPowerDecay' - initial_learning_rate: Optional[float] = None - power: float = -0.5 - - -@dataclasses.dataclass -class PowerAndLinearDecayLrConfig(base_config.Config): - """Configuration for DirectPower learning rate decay. - - The schedule has the following behavoir. - Let offset_step = step - offset. - 1) offset_step < 0, the actual learning rate equals initial_learning_rate. - 2) offset_step <= total_decay_steps * (1 - linear_decay_fraction), the - actual learning rate equals lr * offset_step^power. - 3) total_decay_steps * (1 - linear_decay_fraction) <= offset_step < - total_decay_steps, the actual learning rate equals lr * offset_step^power * - (total_decay_steps - offset_step) / (total_decay_steps * - linear_decay_fraction). - 4) offset_step >= total_decay_steps, the actual learning rate equals zero. - - Attributes: - name: The name of the learning rate schedule. Defaults to - PowerAndLinearDecay. - initial_learning_rate: A float. The initial learning rate. Defaults to None. - total_decay_steps: An int. The total number of steps for power + linear - decay. Defaults to None. - power: A float. The order of the polynomial. Defaults to -0.5, for sqrt - decay. - linear_decay_fraction: A float. In the last `linear_decay_fraction` steps, - the learning rate will be multiplied by a linear decay. Defaults to 0.1. - offset: An int. The offset applied to steps. Defaults to 0. - """ - name: str = 'PowerAndLinearDecay' - initial_learning_rate: Optional[float] = None - total_decay_steps: Optional[int] = None - power: float = -0.5 - linear_decay_fraction: float = 0.1 - offset: int = 0 - - -@dataclasses.dataclass -class PowerDecayWithOffsetLrConfig(base_config.Config): - """Configuration for power learning rate decay with step offset. - - Learning rate equals to `pre_offset_learning_rate` if `step` < `offset`. - Otherwise, learning rate equals to lr * (step - offset)^power. - - Attributes: - name: The name of the learning rate schedule. Defaults to - PowerDecayWithOffset. - initial_learning_rate: A float. The initial learning rate. Defaults to None. - power: A float. Defaults to -0.5, for sqrt decay. - offset: An integer. Power decay happens after `offset` steps. - pre_offset_learning_rate: A float. The constant learning rate before - `offset` steps. - """ - name: str = 'PowerDecayWithOffset' - initial_learning_rate: Optional[float] = None - power: float = -0.5 - offset: int = 0 - pre_offset_learning_rate: float = 1.0e6 - - -@dataclasses.dataclass -class LinearWarmupConfig(base_config.Config): - """Configuration for linear warmup schedule config. - - This class is a container for the linear warmup schedule configs. - Warmup_learning_rate is the initial learning rate, the final learning rate of - the warmup period is the learning_rate of the optimizer in use. The learning - rate at each step linearly increased according to the following formula: - warmup_learning_rate = warmup_learning_rate + - step / warmup_steps * (final_learning_rate - warmup_learning_rate). - Using warmup overrides the learning rate schedule by the number of warmup - steps. - - Attributes: - name: The name of warmup schedule. Defaults to linear. - warmup_learning_rate: Initial learning rate for the warmup. Defaults to 0. - warmup_steps: Warmup steps. Defaults to None. - """ - name: str = 'linear' - warmup_learning_rate: float = 0 - warmup_steps: Optional[int] = None - - -@dataclasses.dataclass -class PolynomialWarmupConfig(base_config.Config): - """Configuration for linear warmup schedule config. - - This class is a container for the polynomial warmup schedule configs. - - Attributes: - name: The name of warmup schedule. Defaults to Polynomial. - power: Polynomial power. Defaults to 1. - warmup_steps: Warmup steps. Defaults to None. - """ - name: str = 'polynomial' - power: float = 1 - warmup_steps: Optional[int] = None diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/optimization/configs/optimization_config.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/optimization/configs/optimization_config.py deleted file mode 100644 index 61ec8d7b4254bbe515f68ae48991c820d7e14816..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/optimization/configs/optimization_config.py +++ /dev/null @@ -1,128 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Dataclasses for optimization configs. - -This file define the dataclass for optimization configs (OptimizationConfig). -It also has two helper functions get_optimizer_config, and get_lr_config from -an OptimizationConfig class. -""" -from typing import Optional - -import dataclasses - -from official.modeling.hyperparams import base_config -from official.modeling.hyperparams import oneof -from official.modeling.optimization.configs import learning_rate_config as lr_cfg -from official.modeling.optimization.configs import optimizer_config as opt_cfg - - -@dataclasses.dataclass -class OptimizerConfig(oneof.OneOfConfig): - """Configuration for optimizer. - - Attributes: - type: 'str', type of optimizer to be used, on the of fields below. - sgd: sgd optimizer config. - adam: adam optimizer config. - adamw: adam with weight decay. - lamb: lamb optimizer. - rmsprop: rmsprop optimizer. - lars: lars optimizer. - adagrad: adagrad optimizer. - """ - type: Optional[str] = None - sgd: opt_cfg.SGDConfig = opt_cfg.SGDConfig() - adam: opt_cfg.AdamConfig = opt_cfg.AdamConfig() - adamw: opt_cfg.AdamWeightDecayConfig = opt_cfg.AdamWeightDecayConfig() - lamb: opt_cfg.LAMBConfig = opt_cfg.LAMBConfig() - rmsprop: opt_cfg.RMSPropConfig = opt_cfg.RMSPropConfig() - lars: opt_cfg.LARSConfig = opt_cfg.LARSConfig() - adagrad: opt_cfg.AdagradConfig = opt_cfg.AdagradConfig() - - -@dataclasses.dataclass -class LrConfig(oneof.OneOfConfig): - """Configuration for lr schedule. - - Attributes: - type: 'str', type of lr schedule to be used, one of the fields below. - constant: constant learning rate config. - stepwise: stepwise learning rate config. - exponential: exponential learning rate config. - polynomial: polynomial learning rate config. - cosine: cosine learning rate config. - power: step^power learning rate config. - power_linear: learning rate config of step^power followed by - step^power*linear. - power_with_offset: power decay with a step offset. - """ - type: Optional[str] = None - constant: lr_cfg.ConstantLrConfig = lr_cfg.ConstantLrConfig() - stepwise: lr_cfg.StepwiseLrConfig = lr_cfg.StepwiseLrConfig() - exponential: lr_cfg.ExponentialLrConfig = lr_cfg.ExponentialLrConfig() - polynomial: lr_cfg.PolynomialLrConfig = lr_cfg.PolynomialLrConfig() - cosine: lr_cfg.CosineLrConfig = lr_cfg.CosineLrConfig() - power: lr_cfg.DirectPowerLrConfig = lr_cfg.DirectPowerLrConfig() - power_linear: lr_cfg.PowerAndLinearDecayLrConfig = ( - lr_cfg.PowerAndLinearDecayLrConfig()) - power_with_offset: lr_cfg.PowerDecayWithOffsetLrConfig = ( - lr_cfg.PowerDecayWithOffsetLrConfig()) - - -@dataclasses.dataclass -class WarmupConfig(oneof.OneOfConfig): - """Configuration for lr schedule. - - Attributes: - type: 'str', type of warmup schedule to be used, one of the fields below. - linear: linear warmup config. - polynomial: polynomial warmup config. - """ - type: Optional[str] = None - linear: lr_cfg.LinearWarmupConfig = lr_cfg.LinearWarmupConfig() - polynomial: lr_cfg.PolynomialWarmupConfig = lr_cfg.PolynomialWarmupConfig() - - -@dataclasses.dataclass -class OptimizationConfig(base_config.Config): - """Configuration for optimizer and learning rate schedule. - - Attributes: - optimizer: optimizer oneof config. - ema: optional exponential moving average optimizer config, if specified, ema - optimizer will be used. - learning_rate: learning rate oneof config. - warmup: warmup oneof config. - """ - optimizer: OptimizerConfig = OptimizerConfig() - ema: Optional[opt_cfg.EMAConfig] = None - learning_rate: LrConfig = LrConfig() - warmup: WarmupConfig = WarmupConfig() diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/optimization/configs/optimization_config_test.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/optimization/configs/optimization_config_test.py deleted file mode 100644 index a4e31078d1a56eed617129b3e6cfbd6a43df105b..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/optimization/configs/optimization_config_test.py +++ /dev/null @@ -1,75 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Tests for optimization_config.py.""" - -import tensorflow as tf - -from official.modeling.optimization.configs import learning_rate_config as lr_cfg -from official.modeling.optimization.configs import optimization_config -from official.modeling.optimization.configs import optimizer_config as opt_cfg - - -class OptimizerConfigTest(tf.test.TestCase): - - def test_no_optimizer(self): - optimizer = optimization_config.OptimizationConfig({}).optimizer.get() - self.assertIsNone(optimizer) - - def test_no_lr_schedule(self): - lr = optimization_config.OptimizationConfig({}).learning_rate.get() - self.assertIsNone(lr) - - def test_no_warmup_schedule(self): - warmup = optimization_config.OptimizationConfig({}).warmup.get() - self.assertIsNone(warmup) - - def test_config(self): - opt_config = optimization_config.OptimizationConfig({ - 'optimizer': { - 'type': 'sgd', - 'sgd': {} # default config - }, - 'learning_rate': { - 'type': 'polynomial', - 'polynomial': {} - }, - 'warmup': { - 'type': 'linear' - } - }) - self.assertEqual(opt_config.optimizer.get(), opt_cfg.SGDConfig()) - self.assertEqual(opt_config.learning_rate.get(), - lr_cfg.PolynomialLrConfig()) - self.assertEqual(opt_config.warmup.get(), lr_cfg.LinearWarmupConfig()) - - -if __name__ == '__main__': - tf.test.main() diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/optimization/configs/optimizer_config.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/optimization/configs/optimizer_config.py deleted file mode 100644 index a14c1f2075b41a9623bec68d07323e0042c567c3..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/optimization/configs/optimizer_config.py +++ /dev/null @@ -1,240 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Dataclasses for optimizer configs.""" -from typing import List, Optional - -import dataclasses -from official.modeling.hyperparams import base_config - - -@dataclasses.dataclass -class BaseOptimizerConfig(base_config.Config): - """Base optimizer config. - - Attributes: - clipnorm: float >= 0 or None. If not None, Gradients will be clipped when - their L2 norm exceeds this value. - clipvalue: float >= 0 or None. If not None, Gradients will be clipped when - their absolute value exceeds this value. - global_clipnorm: float >= 0 or None. If not None, gradient of all weights is - clipped so that their global norm is no higher than this value - """ - clipnorm: Optional[float] = None - clipvalue: Optional[float] = None - global_clipnorm: Optional[float] = None - - -@dataclasses.dataclass -class SGDConfig(BaseOptimizerConfig): - """Configuration for SGD optimizer. - - The attributes for this class matches the arguments of tf.keras.optimizer.SGD. - - Attributes: - name: name of the optimizer. - decay: decay rate for SGD optimizer. - nesterov: nesterov for SGD optimizer. - momentum: momentum for SGD optimizer. - """ - name: str = "SGD" - decay: float = 0.0 - nesterov: bool = False - momentum: float = 0.0 - - -@dataclasses.dataclass -class RMSPropConfig(BaseOptimizerConfig): - """Configuration for RMSProp optimizer. - - The attributes for this class matches the arguments of - tf.keras.optimizers.RMSprop. - - Attributes: - name: name of the optimizer. - rho: discounting factor for RMSprop optimizer. - momentum: momentum for RMSprop optimizer. - epsilon: epsilon value for RMSprop optimizer, help with numerical stability. - centered: Whether to normalize gradients or not. - """ - name: str = "RMSprop" - rho: float = 0.9 - momentum: float = 0.0 - epsilon: float = 1e-7 - centered: bool = False - - -@dataclasses.dataclass -class AdagradConfig(BaseOptimizerConfig): - """Configuration for Adagrad optimizer. - - The attributes of this class match the arguments of - tf.keras.optimizer.Adagrad. - - Attributes: - name: name of the optimizer. - initial_accumulator_value: A floating point value. Starting value for the - accumulators, must be non-negative. - epsilon: A small floating point value to avoid zero denominator. - """ - name: str = "Adagrad" - initial_accumulator_value: float = 0.1 - epsilon: float = 1e-07 - - -@dataclasses.dataclass -class AdamConfig(BaseOptimizerConfig): - """Configuration for Adam optimizer. - - The attributes for this class matches the arguments of - tf.keras.optimizer.Adam. - - Attributes: - name: name of the optimizer. - beta_1: decay rate for 1st order moments. - beta_2: decay rate for 2st order moments. - epsilon: epsilon value used for numerical stability in Adam optimizer. - amsgrad: boolean. Whether to apply AMSGrad variant of this algorithm from - the paper "On the Convergence of Adam and beyond". - """ - name: str = "Adam" - beta_1: float = 0.9 - beta_2: float = 0.999 - epsilon: float = 1e-07 - amsgrad: bool = False - - -@dataclasses.dataclass -class AdamWeightDecayConfig(BaseOptimizerConfig): - """Configuration for Adam optimizer with weight decay. - - Attributes: - name: name of the optimizer. - beta_1: decay rate for 1st order moments. - beta_2: decay rate for 2st order moments. - epsilon: epsilon value used for numerical stability in the optimizer. - amsgrad: boolean. Whether to apply AMSGrad variant of this algorithm from - the paper "On the Convergence of Adam and beyond". - weight_decay_rate: float. Weight decay rate. Default to 0. - include_in_weight_decay: list[str], or None. List of weight names to include - in weight decay. - exclude_from_weight_decay: list[str], or None. List of weight names to not - include in weight decay. - gradient_clip_norm: A positive float. Clips the gradients to this maximum - L2-norm. Default to 1.0. - """ - name: str = "AdamWeightDecay" - beta_1: float = 0.9 - beta_2: float = 0.999 - epsilon: float = 1e-07 - amsgrad: bool = False - weight_decay_rate: float = 0.0 - include_in_weight_decay: Optional[List[str]] = None - exclude_from_weight_decay: Optional[List[str]] = None - gradient_clip_norm: float = 1.0 - - -@dataclasses.dataclass -class LAMBConfig(BaseOptimizerConfig): - """Configuration for LAMB optimizer. - - The attributes for this class matches the arguments of - tensorflow_addons.optimizers.LAMB. - - Attributes: - name: name of the optimizer. - beta_1: decay rate for 1st order moments. - beta_2: decay rate for 2st order moments. - epsilon: epsilon value used for numerical stability in LAMB optimizer. - weight_decay_rate: float. Weight decay rate. Default to 0. - exclude_from_weight_decay: List of regex patterns of variables excluded from - weight decay. Variables whose name contain a substring matching the - pattern will be excluded. - exclude_from_layer_adaptation: List of regex patterns of variables excluded - from layer adaptation. Variables whose name contain a substring matching - the pattern will be excluded. - """ - name: str = "LAMB" - beta_1: float = 0.9 - beta_2: float = 0.999 - epsilon: float = 1e-6 - weight_decay_rate: float = 0.0 - exclude_from_weight_decay: Optional[List[str]] = None - exclude_from_layer_adaptation: Optional[List[str]] = None - - -@dataclasses.dataclass -class EMAConfig(BaseOptimizerConfig): - """Exponential moving average optimizer config. - - Attributes: - name: 'str', name of the optimizer. - average_decay: 'float', average decay value. - start_step: 'int', start step to apply moving average. - dynamic_decay: 'bool', whether to apply dynamic decay or not. - """ - name: str = "ExponentialMovingAverage" - average_decay: float = 0.99 - start_step: int = 0 - dynamic_decay: bool = True - - -@dataclasses.dataclass -class LARSConfig(BaseOptimizerConfig): - """Layer-wise adaptive rate scaling config. - - Attributes: - name: 'str', name of the optimizer. - momentum: `float` hyperparameter >= 0 that accelerates gradient descent in - the relevant direction and dampens oscillations. Defaults to 0.9. - eeta: `float` LARS coefficient as used in the paper. Default set to LARS - coefficient from the paper. (eeta / weight_decay) determines the highest - scaling factor in LARS.. - weight_decay_rate: `float` for weight decay. - nesterov: 'boolean' for whether to use nesterov momentum. - classic_momentum: `boolean` for whether to use classic (or popular) - momentum. The learning rate is applied during momentum update in classic - momentum, but after momentum for popular momentum. - exclude_from_weight_decay: A list of `string` for variable screening, if any - of the string appears in a variable's name, the variable will be excluded - for computing weight decay. For example, one could specify the list like - ['batch_normalization', 'bias'] to exclude BN and bias from weight decay. - exclude_from_layer_adaptation: Similar to exclude_from_weight_decay, but for - layer adaptation. If it is None, it will be defaulted the same as - exclude_from_weight_decay. - """ - name: str = "LARS" - momentum: float = 0.9 - eeta: float = 0.001 - weight_decay_rate: float = 0.0 - nesterov: bool = False - classic_momentum: bool = True - exclude_from_weight_decay: Optional[List[str]] = None - exclude_from_layer_adaptation: Optional[List[str]] = None diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/optimization/ema_optimizer.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/optimization/ema_optimizer.py deleted file mode 100644 index d4eab40d4d1d5e81989a50e605df02b2d643f44c..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/optimization/ema_optimizer.py +++ /dev/null @@ -1,261 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Exponential moving average optimizer.""" - -from typing import Text, List - -import tensorflow as tf - -# pylint: disable=protected-access - - -class ExponentialMovingAverage(tf.keras.optimizers.Optimizer): - """Optimizer that computes an exponential moving average of the variables. - - Empirically it has been found that using the moving average of the trained - parameters of a deep network is better than using its trained parameters - directly. This optimizer allows you to compute this moving average and swap - the variables at save time so that any code outside of the training loop - will use by default the average values instead of the original ones. - - Example of usage for training: - ```python - opt = tf.keras.optimizers.SGD(learning_rate) - opt = ExponentialMovingAverage(opt) - - opt.shadow_copy(model) - ``` - - At test time, swap the shadow variables to evaluate on the averaged weights: - ```python - opt.swap_weights() - # Test eval the model here - opt.swap_weights() - ``` - """ - - def __init__(self, - optimizer: tf.keras.optimizers.Optimizer, - average_decay: float = 0.99, - start_step: int = 0, - dynamic_decay: bool = True, - name: Text = 'ExponentialMovingAverage', - **kwargs): - """Construct a new ExponentialMovingAverage optimizer. - - Args: - optimizer: `tf.keras.optimizers.Optimizer` that will be - used to compute and apply gradients. - average_decay: float. Decay to use to maintain the moving averages - of trained variables. - start_step: int. What step to start the moving average. - dynamic_decay: bool. Whether to change the decay based on the number - of optimizer updates. Decay will start at 0.1 and gradually increase - up to `average_decay` after each optimizer update. This behavior is - similar to `tf.train.ExponentialMovingAverage` in TF 1.x. - name: Optional name for the operations created when applying - gradients. Defaults to "moving_average". - **kwargs: keyword arguments. Allowed to be {`clipnorm`, - `clipvalue`, `lr`, `decay`}. - """ - super().__init__(name, **kwargs) - self._average_decay = average_decay - self._start_step = tf.constant(start_step, tf.float32) - self._dynamic_decay = dynamic_decay - self._optimizer = optimizer - self._track_trackable(self._optimizer, 'base_optimizer') - self._average_weights = None - self._model_weights = None - - def shadow_copy(self, model: tf.keras.Model): - """Creates shadow variables for the given model weights.""" - for var in model.weights: - self.add_slot(var, 'average', initializer='zeros') - self._average_weights = [ - self.get_slot(var, 'average') for var in model.weights - ] - self._model_weights = model.weights - - @property - def has_shadow_copy(self): - """Whether this optimizer has created shadow variables.""" - return self._model_weights is not None and self._average_weights is not None - - def _create_slots(self, var_list): - self._optimizer._create_slots(var_list=var_list) # pylint: disable=protected-access - - def apply_gradients(self, grads_and_vars, name: Text = None): - result = self._optimizer.apply_gradients(grads_and_vars, name) - self.update_average(self.iterations) - return result - - @tf.function - def update_average(self, step: tf.Tensor): - step = tf.cast(step, tf.float32) - if step < self._start_step: - decay = tf.constant(0., tf.float32) - elif self._dynamic_decay: - decay = step - self._start_step - decay = tf.minimum(self._average_decay, (1. + decay) / (10. + decay)) - else: - decay = self._average_decay - - def _apply_moving(v_moving, v_normal): - diff = v_moving - v_normal - v_moving.assign_sub(tf.cast(1. - decay, v_moving.dtype) * diff) - return v_moving - - def _update(strategy, v_moving_and_v_normal): - for v_moving, v_normal in v_moving_and_v_normal: - strategy.extended.update(v_moving, _apply_moving, args=(v_normal,)) - - ctx = tf.distribute.get_replica_context() - return ctx.merge_call(_update, args=(zip(self._average_weights, - self._model_weights),)) - - def swap_weights(self): - """Swap the average and moving weights. - - This is a convenience method to allow one to evaluate the averaged weights - at test time. Loads the weights stored in `self._average` into the model, - keeping a copy of the original model weights. Swapping twice will return - the original weights. - """ - if tf.distribute.in_cross_replica_context(): - strategy = tf.distribute.get_strategy() - strategy.run(self._swap_weights, args=()) - else: - raise ValueError('Swapping weights must occur under a ' - 'tf.distribute.Strategy') - - @tf.function - def _swap_weights(self): - def fn_0(a, b): - a.assign_add(b) - return a - def fn_1(b, a): - b.assign(a - b) - return b - def fn_2(a, b): - a.assign_sub(b) - return a - - def swap(strategy, a_and_b): - """Swap `a` and `b` and mirror to all devices.""" - for a, b in a_and_b: - strategy.extended.update(a, fn_0, args=(b,)) # a = a + b - strategy.extended.update(b, fn_1, args=(a,)) # b = a - b - strategy.extended.update(a, fn_2, args=(b,)) # a = a - b - - ctx = tf.distribute.get_replica_context() - return ctx.merge_call( - swap, args=(zip(self._average_weights, self._model_weights),)) - - def assign_average_vars(self, var_list: List[tf.Variable]): - """Assign variables in var_list with their respective averages. - - Args: - var_list: List of model variables to be assigned to their average. - Returns: - assign_op: The op corresponding to the assignment operation of - variables to their average. - """ - assign_op = tf.group([ - var.assign(self.get_slot(var, 'average')) for var in var_list - if var.trainable - ]) - return assign_op - - def _create_hypers(self): - self._optimizer._create_hypers() # pylint: disable=protected-access - - def _prepare(self, var_list): - return self._optimizer._prepare(var_list=var_list) # pylint: disable=protected-access - - @property - def iterations(self): - return self._optimizer.iterations - - @iterations.setter - def iterations(self, variable): - self._optimizer.iterations = variable - - @property - def weights(self): - # return self._weights + self._optimizer.weights - return self._optimizer.weights - - def variables(self): - return self._weights + [self.iterations] - - @property - def lr(self): - return self._optimizer._get_hyper('learning_rate') - - @lr.setter - def lr(self, lr): - self._optimizer._set_hyper('learning_rate', lr) - - @property - def learning_rate(self): - return self._optimizer._get_hyper('learning_rate') - - @learning_rate.setter - def learning_rate(self, learning_rate): # pylint: disable=redefined-outer-name - self._optimizer._set_hyper('learning_rate', learning_rate) - - def _resource_apply_dense(self, grad, var): - return self._optimizer._resource_apply_dense(grad, var) - - def _resource_apply_sparse(self, grad, var, indices): - return self._optimizer._resource_apply_sparse(grad, var, indices) - - def _resource_apply_sparse_duplicate_indices(self, grad, var, indices): - return self._optimizer._resource_apply_sparse_duplicate_indices( - grad, var, indices) - - def get_config(self): - config = { - 'optimizer': tf.keras.optimizers.serialize(self._optimizer), - 'average_decay': self._average_decay, - 'start_step': self._start_step, - 'dynamic_decay': self._dynamic_decay, - } - base_config = super(ExponentialMovingAverage, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - @classmethod - def from_config(cls, config, custom_objects=None): - optimizer = tf.keras.optimizers.deserialize( - config.pop('optimizer'), - custom_objects=custom_objects, - ) - return cls(optimizer, **config) diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/optimization/lars_optimizer.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/optimization/lars_optimizer.py deleted file mode 100644 index a2e88ba1900b2af43d3802a3586bec4219213e4e..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/optimization/lars_optimizer.py +++ /dev/null @@ -1,202 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Layer-wise adaptive rate scaling optimizer.""" -import re -from typing import Text, List, Optional - -import tensorflow as tf - - -# pylint: disable=protected-access - - -class LARS(tf.keras.optimizers.Optimizer): - """Layer-wise Adaptive Rate Scaling for large batch training. - - Introduced by "Large Batch Training of Convolutional Networks" by Y. You, - I. Gitman, and B. Ginsburg. (https://arxiv.org/abs/1708.03888) - """ - - def __init__(self, - learning_rate: float = 0.01, - momentum: float = 0.9, - weight_decay_rate: float = 0.0, - eeta: float = 0.001, - nesterov: bool = False, - classic_momentum: bool = True, - exclude_from_weight_decay: Optional[List[Text]] = None, - exclude_from_layer_adaptation: Optional[List[Text]] = None, - name: Text = "LARS", - **kwargs): - """Constructs a LARSOptimizer. - - Args: - learning_rate: `float` for learning rate. Defaults to 0.01. - momentum: `float` hyperparameter >= 0 that accelerates gradient descent - in the relevant direction and dampens oscillations. Defaults to 0.9. - weight_decay_rate: `float` for weight decay. - eeta: `float` LARS coefficient as used in the paper. Default set to LARS - coefficient from the paper. (eeta / weight_decay) determines the - highest scaling factor in LARS.. - nesterov: 'boolean' for whether to use nesterov momentum. - classic_momentum: `boolean` for whether to use classic (or popular) - momentum. The learning rate is applied during momentum update in - classic momentum, but after momentum for popular momentum. - exclude_from_weight_decay: A list of `string` for variable screening, if - any of the string appears in a variable's name, the variable will be - excluded for computing weight decay. For example, one could specify - the list like ['batch_normalization', 'bias'] to exclude BN and bias - from weight decay. - exclude_from_layer_adaptation: Similar to exclude_from_weight_decay, but - for layer adaptation. If it is None, it will be defaulted the same as - exclude_from_weight_decay. - name: `Text` as optional name for the operations created when applying - gradients. Defaults to "LARS". - **kwargs: keyword arguments. Allowed to be {`clipnorm`, `clipvalue`, `lr`, - `decay`}. `clipnorm` is clip gradients by norm; `clipvalue` is clip - gradients by value, `decay` is included for backward compatibility to - allow time inverse decay of learning rate. `lr` is included for - backward compatibility, recommended to use `learning_rate` instead. - """ - super(LARS, self).__init__(name, **kwargs) - - self._set_hyper("learning_rate", learning_rate) - self._set_hyper("decay", self._initial_decay) - self.momentum = momentum - self.weight_decay_rate = weight_decay_rate - self.eeta = eeta - self.nesterov = nesterov - self.classic_momentum = classic_momentum - self.exclude_from_weight_decay = exclude_from_weight_decay - # exclude_from_layer_adaptation is set to exclude_from_weight_decay if the - # arg is None. - if exclude_from_layer_adaptation: - self.exclude_from_layer_adaptation = exclude_from_layer_adaptation - else: - self.exclude_from_layer_adaptation = exclude_from_weight_decay - - def _create_slots(self, var_list): - for v in var_list: - self.add_slot(v, "momentum") - - def _resource_apply_dense(self, grad, param, apply_state=None): - if grad is None or param is None: - return tf.no_op() - - var_device, var_dtype = param.device, param.dtype.base_dtype - coefficients = ((apply_state or {}).get((var_device, var_dtype)) or - self._fallback_apply_state(var_device, var_dtype)) - learning_rate = coefficients["lr_t"] - - param_name = param.name - - v = self.get_slot(param, "momentum") - - if self._use_weight_decay(param_name): - grad += self.weight_decay_rate * param - - if self.classic_momentum: - trust_ratio = 1.0 - if self._do_layer_adaptation(param_name): - w_norm = tf.norm(param, ord=2) - g_norm = tf.norm(grad, ord=2) - trust_ratio = tf.where( - tf.greater(w_norm, 0), - tf.where(tf.greater(g_norm, 0), (self.eeta * w_norm / g_norm), 1.0), - 1.0) - scaled_lr = learning_rate * trust_ratio - - next_v = tf.multiply(self.momentum, v) + scaled_lr * grad - if self.nesterov: - update = tf.multiply(self.momentum, next_v) + scaled_lr * grad - else: - update = next_v - next_param = param - update - else: - next_v = tf.multiply(self.momentum, v) + grad - if self.nesterov: - update = tf.multiply(self.momentum, next_v) + grad - else: - update = next_v - - trust_ratio = 1.0 - if self._do_layer_adaptation(param_name): - w_norm = tf.norm(param, ord=2) - v_norm = tf.norm(update, ord=2) - trust_ratio = tf.where( - tf.greater(w_norm, 0), - tf.where(tf.greater(v_norm, 0), (self.eeta * w_norm / v_norm), 1.0), - 1.0) - scaled_lr = trust_ratio * learning_rate - next_param = param - scaled_lr * update - - return tf.group(*[ - param.assign(next_param, use_locking=False), - v.assign(next_v, use_locking=False) - ]) - - def _resource_apply_sparse(self, grad, handle, indices, apply_state): - raise NotImplementedError("Applying sparse gradients is not implemented.") - - def _use_weight_decay(self, param_name): - """Whether to use L2 weight decay for `param_name`.""" - if not self.weight_decay_rate: - return False - if self.exclude_from_weight_decay: - for r in self.exclude_from_weight_decay: - if re.search(r, param_name) is not None: - return False - return True - - def _do_layer_adaptation(self, param_name): - """Whether to do layer-wise learning rate adaptation for `param_name`.""" - if self.exclude_from_layer_adaptation: - for r in self.exclude_from_layer_adaptation: - if re.search(r, param_name) is not None: - return False - return True - - def get_config(self): - config = super(LARS, self).get_config() - config.update({ - "learning_rate": self._serialize_hyperparameter("learning_rate"), - "decay": self._serialize_hyperparameter("decay"), - "momentum": self.momentum, - "classic_momentum": self.classic_momentum, - "weight_decay_rate": self.weight_decay_rate, - "eeta": self.eeta, - "nesterov": self.nesterov, - }) - return config - - @classmethod - def from_config(cls, config, custom_objects=None): - return cls(**config) diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/optimization/lr_schedule.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/optimization/lr_schedule.py deleted file mode 100644 index ff1f6b189a9a2d7b3ce2df950451764eaacf397d..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/optimization/lr_schedule.py +++ /dev/null @@ -1,332 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Learning rate schedule classes.""" - -from typing import Mapping, Any, Union, Optional - -import tensorflow as tf - - -class LinearWarmup(tf.keras.optimizers.schedules.LearningRateSchedule): - """Linear warmup schedule.""" - - def __init__(self, - after_warmup_lr_sched: Union[ - tf.keras.optimizers.schedules.LearningRateSchedule, float], - warmup_steps: int, - warmup_learning_rate: float, - name: Optional[str] = None): - """Add linear warmup schedule to a learning rate schedule. - - warmup_lr is the initial learning rate, the final learning rate of the - init_warmup period is the initial learning rate of lr_schedule in use. - The learning rate at each step linearly increased according to the following - formula: - learning_rate = warmup_lr + step / warmup_steps - * (final_warmup_lr - warmup_lr). - Using warmup overrides the learning rate schedule by the number of warmup - steps. - - Args: - after_warmup_lr_sched: tf.keras.optimizers.schedules .LearningRateSchedule - or a constant. - warmup_steps: Number of the warmup steps. - warmup_learning_rate: Initial learning rate for the warmup. - name: Optional, name of warmup schedule. - """ - super().__init__() - self._name = name - self._after_warmup_lr_sched = after_warmup_lr_sched - self._warmup_steps = warmup_steps - self._init_warmup_lr = warmup_learning_rate - if isinstance(after_warmup_lr_sched, - tf.keras.optimizers.schedules.LearningRateSchedule): - self._final_warmup_lr = after_warmup_lr_sched(warmup_steps) - else: - self._final_warmup_lr = tf.cast(after_warmup_lr_sched, dtype=tf.float32) - - def __call__(self, step: int): - - global_step = tf.cast(step, dtype=tf.float32) - - linear_warmup_lr = ( - self._init_warmup_lr + global_step / self._warmup_steps * - (self._final_warmup_lr - self._init_warmup_lr)) - - if isinstance(self._after_warmup_lr_sched, - tf.keras.optimizers.schedules.LearningRateSchedule): - after_warmup_lr = self._after_warmup_lr_sched(step) - else: - after_warmup_lr = tf.cast(self._after_warmup_lr_sched, dtype=tf.float32) - - lr = tf.cond(global_step < self._warmup_steps, - lambda: linear_warmup_lr, - lambda: after_warmup_lr) - return lr - - def get_config(self) -> Mapping[str, Any]: - if isinstance(self._after_warmup_lr_sched, - tf.keras.optimizers.schedules.LearningRateSchedule): - config = { - "after_warmup_lr_sched": self._after_warmup_lr_sched.get_config()} # pytype: disable=attribute-error - else: - config = {"after_warmup_lr_sched": self._after_warmup_lr_sched} # pytype: disable=attribute-error - - config.update({ - "warmup_steps": self._warmup_steps, - "warmup_learning_rate": self._init_warmup_lr, - "name": self._name - }) - return config - - -class PolynomialWarmUp(tf.keras.optimizers.schedules.LearningRateSchedule): - """Applies polynomial warmup schedule on a given learning rate decay schedule.""" - - def __init__(self, - after_warmup_lr_sched: Union[ - tf.keras.optimizers.schedules.LearningRateSchedule, float], - warmup_steps: int, - power: float = 1.0, - name: str = "PolynomialWarmup"): - super().__init__() - if isinstance(after_warmup_lr_sched, - tf.keras.optimizers.schedules.LearningRateSchedule): - self._initial_learning_rate = after_warmup_lr_sched(warmup_steps) - else: - self._initial_learning_rate = tf.cast( - after_warmup_lr_sched, dtype=tf.float32) - - self._warmup_steps = warmup_steps - self._power = power - self._after_warmup_lr_sched = after_warmup_lr_sched - self._name = name - - def __call__(self, step): - with tf.name_scope(self._name or "PolynomialWarmUp") as name: - # Implements polynomial warmup. i.e., if global_step < warmup_steps, the - # learning rate will be `global_step/num_warmup_steps * init_lr`. - global_step_float = tf.cast(step, tf.float32) - warmup_steps_float = tf.cast(self._warmup_steps, tf.float32) - - if self._warmup_steps <= 0: - warmup_percent_done = 1.0 - else: - # A zero `step` may cause Inf. So make `step` positive. - step_non_zero = tf.math.maximum(global_step_float, 1.0) - warmup_percent_done = step_non_zero / warmup_steps_float - - warmup_learning_rate = ( - self._initial_learning_rate * - tf.math.pow(warmup_percent_done, self._power)) - - if isinstance(self._after_warmup_lr_sched, - tf.keras.optimizers.schedules.LearningRateSchedule): - after_warmup_lr = self._after_warmup_lr_sched(step) - else: - after_warmup_lr = tf.cast(self._after_warmup_lr_sched, dtype=tf.float32) - - return tf.cond( - global_step_float < warmup_steps_float, - lambda: warmup_learning_rate, - lambda: after_warmup_lr, - name=name) - - def get_config(self) -> Mapping[str, Any]: - if isinstance(self._after_warmup_lr_sched, - tf.keras.optimizers.schedules.LearningRateSchedule): - config = { - "after_warmup_lr_sched": self._after_warmup_lr_sched.get_config()} # pytype: disable=attribute-error - else: - config = {"after_warmup_lr_sched": self._after_warmup_lr_sched} # pytype: disable=attribute-error - - config.update({ - "warmup_steps": self._warmup_steps, - "power": self._power, - "name": self._name - }) - return config - - -class DirectPowerDecay(tf.keras.optimizers.schedules.LearningRateSchedule): - """Learning rate schedule follows lr * (step)^power.""" - - def __init__(self, - initial_learning_rate: float, - power: float = 1.0, - name: str = "DirectPowerDecay"): - """Initialize configuration of the learning rate schedule. - - Args: - initial_learning_rate: The initial learning rate. - power: The order of the polynomial. - name: Optional, name of learning rate schedule. - """ - super().__init__() - self._initial_learning_rate = initial_learning_rate - self._power = power - self._name = name - - def __call__(self, step): - with tf.name_scope(self._name or "DirectPowerDecay"): - step = tf.cast(step, tf.float32) - learning_rate = self._initial_learning_rate - # A zero `step` may cause Inf. So make `step` positive. - step_non_zero = tf.math.maximum(step, 1.0) - learning_rate *= tf.math.pow(step_non_zero, self._power) - return learning_rate - - def get_config(self): - """Get the configuration of the learning rate schedule.""" - return { - "initial_learning_rate": self._initial_learning_rate, - "power": self._power, - "name": self._name, - } - - -class PowerAndLinearDecay(tf.keras.optimizers.schedules.LearningRateSchedule): - """Learning rate schedule with multiplied by linear decay at the end. - - The schedule has the following behavoir. - Let offset_step = step - offset. - 1) offset_step < 0, the actual learning rate equals initial_learning_rate. - 2) offset_step <= total_decay_steps * (1 - linear_decay_fraction), the - actual learning rate equals lr * offset_step^power. - 3) total_decay_steps * (1 - linear_decay_fraction) <= offset_step < - total_decay_steps, the actual learning rate equals lr * offset_step^power * - (total_decay_steps - offset_step) / (total_decay_steps * - linear_decay_fraction). - 4) offset_step >= total_decay_steps, the actual learning rate equals zero. - """ - - def __init__(self, - initial_learning_rate: float, - total_decay_steps: int, - power: float = 1.0, - linear_decay_fraction: float = 0.1, - offset: int = 0, - name: str = "PowerAndLinearDecay"): - """Initialize configuration of the learning rate schedule. - - Args: - initial_learning_rate: The initial learning rate. - total_decay_steps: The total number of steps for power + linear decay. - power: The order of the polynomial. - linear_decay_fraction: In the last `linear_decay_fraction` steps, the - learning rate will be multiplied by a linear decay. - offset: The offset applied to steps. - name: Optional, name of learning rate schedule. - """ - super().__init__() - self._initial_learning_rate = initial_learning_rate - self._total_decay_steps = total_decay_steps - self._power = power - self._linear_decay_fraction = linear_decay_fraction - self._offset = offset - self._name = name - - def __call__(self, step): - with tf.name_scope(self._name or "PowerAndLinearDecay"): - step = tf.cast(step - self._offset, tf.float32) - learning_rate = self._initial_learning_rate - # A zero `step` may cause Inf. So make `step` positive. - step_non_zero = tf.math.maximum(step, 1.0) - learning_rate *= tf.math.pow(step_non_zero, self._power) - if self._total_decay_steps * self._linear_decay_fraction > 0: - learning_rate *= tf.minimum( - 1.0, (self._total_decay_steps - step) / - (self._total_decay_steps * self._linear_decay_fraction)) - learning_rate = tf.maximum(0.0, learning_rate) - return learning_rate - - def get_config(self): - """Get the configuration of the learning rate schedule.""" - return { - "initial_learning_rate": self._initial_learning_rate, - "total_decay_steps": self._total_decay_steps, - "power": self._power, - "linear_decay_fraction": self._linear_decay_fraction, - "offset": self._offset, - "name": self._name, - } - - -class PowerDecayWithOffset(tf.keras.optimizers.schedules.LearningRateSchedule): - """Power learning rate decay with offset. - - Learning rate equals to `pre_offset_learning_rate` if `step` < `offset`. - Otherwise, learning rate equals to lr * (step - offset)^power. - """ - - def __init__(self, - initial_learning_rate: float, - power: float = 1.0, - offset: int = 0, - pre_offset_learning_rate: float = 1.0e6, - name: str = "PowerDecayWithOffset"): - """Initialize configuration of the learning rate schedule. - - Args: - initial_learning_rate: The initial learning rate. - power: The order of the polynomial. - offset: The offset when computing the power decay. - pre_offset_learning_rate: The maximum learning rate we'll use. - name: Optional, name of learning rate schedule. - """ - super().__init__() - self._initial_learning_rate = initial_learning_rate - self._power = power - self._offset = offset - self._pre_offset_lr = pre_offset_learning_rate - self._name = name - - def __call__(self, step): - with tf.name_scope(self._name or "PowerDecayWithOffset"): - step = tf.cast(step, tf.float32) - lr_after_offset = tf.math.pow( - tf.math.maximum(step - self._offset, 1.0), self._power) * ( - self._initial_learning_rate) - - sign = tf.cast(step > self._offset, tf.float32) - lr_combined = (1.0 - sign) * self._pre_offset_lr + sign * lr_after_offset - # Power may give infinitely large LR. So cap it with pre_offset_lr. - return tf.math.minimum(lr_combined, self._pre_offset_lr) - - def get_config(self): - """Get the configuration of the learning rate schedule.""" - return { - "initial_learning_rate": self._initial_learning_rate, - "power": self._power, - "offset": self._offset, - "pre_offset_learning_rate": self._pre_offset_lr, - "name": self._name, - } diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/optimization/lr_schedule_test.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/optimization/lr_schedule_test.py deleted file mode 100644 index f475de874cb4bc6847afbb1498b888c8b5f3829b..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/optimization/lr_schedule_test.py +++ /dev/null @@ -1,90 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Tests for lr_schedule.""" -from absl.testing import parameterized -import tensorflow as tf - -from official.modeling.optimization import lr_schedule - - -class PowerAndLinearDecayTest(tf.test.TestCase, parameterized.TestCase): - - @parameterized.named_parameters( - dict( - testcase_name='power_only', - init_lr=1.0, - power=-1.0, - linear_decay_fraction=0.0, - total_decay_steps=100, - offset=0, - expected=[[0, 1.0], [1, 1.0], [40, 1. / 40.], [60, 1. / 60], - [100, 1. / 100]]), - dict( - testcase_name='linear_only', - init_lr=1.0, - power=0.0, - linear_decay_fraction=1.0, - total_decay_steps=100, - offset=0, - expected=[[0, 1.0], [1, 0.99], [40, 0.6], [60, 0.4], [100, 0.0]]), - dict( - testcase_name='general', - init_lr=1.0, - power=-1.0, - linear_decay_fraction=0.5, - total_decay_steps=100, - offset=0, - expected=[[0, 1.0], [1, 1.0], [40, 1. / 40.], - [60, 1. / 60. * 0.8], [100, 0.0]]), - dict( - testcase_name='offset', - init_lr=1.0, - power=-1.0, - linear_decay_fraction=0.5, - total_decay_steps=100, - offset=90, - expected=[[0, 1.0], [90, 1.0], [91, 1.0], [130, 1. / 40.], - [150, 1. / 60. * 0.8], [190, 0.0], [200, 0.0]]), - ) - def test_power_linear_lr_schedule(self, init_lr, power, linear_decay_fraction, - total_decay_steps, offset, expected): - lr = lr_schedule.PowerAndLinearDecay( - initial_learning_rate=init_lr, - power=power, - linear_decay_fraction=linear_decay_fraction, - total_decay_steps=total_decay_steps, - offset=offset) - for step, value in expected: - self.assertAlmostEqual(lr(step).numpy(), value) - - -if __name__ == '__main__': - tf.test.main() diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/optimization/optimizer_factory.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/optimization/optimizer_factory.py deleted file mode 100644 index 9cdd6747d713a6a56dc3598665d2e05dde6a2833..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/optimization/optimizer_factory.py +++ /dev/null @@ -1,191 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Optimizer factory class.""" -from typing import Callable, Union - -import gin -import tensorflow as tf -import tensorflow_addons.optimizers as tfa_optimizers - -from official.modeling.optimization import ema_optimizer -from official.modeling.optimization import lars_optimizer -from official.modeling.optimization import lr_schedule -from official.modeling.optimization.configs import optimization_config as opt_cfg -from official.nlp import optimization as nlp_optimization - -OPTIMIZERS_CLS = { - 'sgd': tf.keras.optimizers.SGD, - 'adam': tf.keras.optimizers.Adam, - 'adamw': nlp_optimization.AdamWeightDecay, - 'lamb': tfa_optimizers.LAMB, - 'rmsprop': tf.keras.optimizers.RMSprop, - 'lars': lars_optimizer.LARS, - 'adagrad': tf.keras.optimizers.Adagrad, -} - -LR_CLS = { - 'stepwise': tf.keras.optimizers.schedules.PiecewiseConstantDecay, - 'polynomial': tf.keras.optimizers.schedules.PolynomialDecay, - 'exponential': tf.keras.optimizers.schedules.ExponentialDecay, - 'cosine': tf.keras.experimental.CosineDecay, - 'power': lr_schedule.DirectPowerDecay, - 'power_linear': lr_schedule.PowerAndLinearDecay, - 'power_with_offset': lr_schedule.PowerDecayWithOffset, -} - -WARMUP_CLS = { - 'linear': lr_schedule.LinearWarmup, - 'polynomial': lr_schedule.PolynomialWarmUp -} - - -class OptimizerFactory: - """Optimizer factory class. - - This class builds learning rate and optimizer based on an optimization config. - To use this class, you need to do the following: - (1) Define optimization config, this includes optimizer, and learning rate - schedule. - (2) Initialize the class using the optimization config. - (3) Build learning rate. - (4) Build optimizer. - - This is a typical example for using this class: - params = { - 'optimizer': { - 'type': 'sgd', - 'sgd': {'momentum': 0.9} - }, - 'learning_rate': { - 'type': 'stepwise', - 'stepwise': {'boundaries': [10000, 20000], - 'values': [0.1, 0.01, 0.001]} - }, - 'warmup': { - 'type': 'linear', - 'linear': {'warmup_steps': 500, 'warmup_learning_rate': 0.01} - } - } - opt_config = OptimizationConfig(params) - opt_factory = OptimizerFactory(opt_config) - lr = opt_factory.build_learning_rate() - optimizer = opt_factory.build_optimizer(lr) - """ - - def __init__(self, config: opt_cfg.OptimizationConfig): - """Initializing OptimizerFactory. - - Args: - config: OptimizationConfig instance contain optimization config. - """ - self._config = config - self._optimizer_config = config.optimizer.get() - self._optimizer_type = config.optimizer.type - - self._use_ema = config.ema is not None - self._ema_config = config.ema - - if self._optimizer_config is None: - raise ValueError('Optimizer type must be specified') - - self._lr_config = config.learning_rate.get() - self._lr_type = config.learning_rate.type - - if self._lr_type is None: - raise ValueError('Learning rate type must be specified') - - self._warmup_config = config.warmup.get() - self._warmup_type = config.warmup.type - - def build_learning_rate(self): - """Build learning rate. - - Builds learning rate from config. Learning rate schedule is built according - to the learning rate config. If learning rate type is consant, - lr_config.learning_rate is returned. - - Returns: - tf.keras.optimizers.schedules.LearningRateSchedule instance. If - learning rate type is consant, lr_config.learning_rate is returned. - """ - if self._lr_type == 'constant': - lr = self._lr_config.learning_rate - else: - lr = LR_CLS[self._lr_type](**self._lr_config.as_dict()) - - if self._warmup_config: - lr = WARMUP_CLS[self._warmup_type](lr, **self._warmup_config.as_dict()) - - return lr - - @gin.configurable - def build_optimizer( - self, - lr: Union[tf.keras.optimizers.schedules.LearningRateSchedule, float], - postprocessor: Callable[[tf.keras.optimizers.Optimizer], - tf.keras.optimizers.Optimizer] = None): - """Build optimizer. - - Builds optimizer from config. It takes learning rate as input, and builds - the optimizer according to the optimizer config. Typically, the learning - rate built using self.build_lr() is passed as an argument to this method. - - Args: - lr: A floating point value, or a - tf.keras.optimizers.schedules.LearningRateSchedule instance. - postprocessor: An optional function for postprocessing the optimizer. It - takes an optimizer and returns an optimizer. - - Returns: - tf.keras.optimizers.Optimizer instance. - """ - - optimizer_dict = self._optimizer_config.as_dict() - ## Delete clipnorm and clipvalue if None - if optimizer_dict['clipnorm'] is None: - del optimizer_dict['clipnorm'] - if optimizer_dict['clipvalue'] is None: - del optimizer_dict['clipvalue'] - - optimizer_dict['learning_rate'] = lr - - optimizer = OPTIMIZERS_CLS[self._optimizer_type](**optimizer_dict) - - if self._use_ema: - optimizer = ema_optimizer.ExponentialMovingAverage( - optimizer, **self._ema_config.as_dict()) - if postprocessor: - optimizer = postprocessor(optimizer) - assert isinstance(optimizer, tf.keras.optimizers.Optimizer), ( - 'OptimizerFactory.build_optimizer returning a non-optimizer object: ' - '{}'.format(optimizer)) - - return optimizer diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/optimization/optimizer_factory_test.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/optimization/optimizer_factory_test.py deleted file mode 100644 index 52471de8f9fb2905d34ef8e8b9db5306fe9002c0..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/optimization/optimizer_factory_test.py +++ /dev/null @@ -1,414 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Tests for optimizer_factory.py.""" -from absl.testing import parameterized -import numpy as np -import tensorflow as tf - -from official.modeling.optimization import optimizer_factory -from official.modeling.optimization.configs import optimization_config - - -class OptimizerFactoryTest(tf.test.TestCase, parameterized.TestCase): - - @parameterized.parameters(('sgd'), ('rmsprop'), ('adam'), ('adamw'), ('lamb'), - ('lars'), ('adagrad')) - def test_optimizers(self, optimizer_type): - params = { - 'optimizer': { - 'type': optimizer_type - }, - 'learning_rate': { - 'type': 'constant', - 'constant': { - 'learning_rate': 0.1 - } - } - } - optimizer_cls = optimizer_factory.OPTIMIZERS_CLS[optimizer_type] - expected_optimizer_config = optimizer_cls().get_config() - expected_optimizer_config['learning_rate'] = 0.1 - - opt_config = optimization_config.OptimizationConfig(params) - opt_factory = optimizer_factory.OptimizerFactory(opt_config) - lr = opt_factory.build_learning_rate() - optimizer = opt_factory.build_optimizer(lr, postprocessor=lambda x: x) - - self.assertIsInstance(optimizer, optimizer_cls) - self.assertEqual(expected_optimizer_config, optimizer.get_config()) - - @parameterized.parameters((None, None), (1.0, None), (None, 1.0)) - def test_gradient_clipping(self, clipnorm, clipvalue): - params = { - 'optimizer': { - 'type': 'sgd', - 'sgd': { - 'clipnorm': clipnorm, - 'clipvalue': clipvalue - } - }, - 'learning_rate': { - 'type': 'constant', - 'constant': { - 'learning_rate': 1.0 - } - } - } - - opt_config = optimization_config.OptimizationConfig(params) - opt_factory = optimizer_factory.OptimizerFactory(opt_config) - lr = opt_factory.build_learning_rate() - optimizer = opt_factory.build_optimizer(lr) - - var0 = tf.Variable([1.0, 2.0]) - var1 = tf.Variable([3.0, 4.0]) - - grads0 = tf.constant([0.1, 0.1]) - grads1 = tf.constant([2.0, 3.0]) - - grads_and_vars = list(zip([grads0, grads1], [var0, var1])) - optimizer.apply_gradients(grads_and_vars) - - self.assertAllClose(np.array([0.9, 1.9]), var0.numpy()) - if clipvalue is not None: - self.assertAllClose(np.array([2.0, 3.0]), var1.numpy()) - elif clipnorm is not None: - self.assertAllClose(np.array([2.4452999, 3.1679497]), var1.numpy()) - else: - self.assertAllClose(np.array([1.0, 1.0]), var1.numpy()) - - def test_missing_types(self): - params = {'optimizer': {'type': 'sgd', 'sgd': {'momentum': 0.9}}} - with self.assertRaises(ValueError): - optimizer_factory.OptimizerFactory( - optimization_config.OptimizationConfig(params)) - params = { - 'learning_rate': { - 'type': 'stepwise', - 'stepwise': { - 'boundaries': [10000, 20000], - 'values': [0.1, 0.01, 0.001] - } - } - } - with self.assertRaises(ValueError): - optimizer_factory.OptimizerFactory( - optimization_config.OptimizationConfig(params)) - - -# TODO(b/187559334) refactor lr_schedule tests into `lr_schedule_test.py`. - - def test_stepwise_lr_schedule(self): - params = { - 'optimizer': { - 'type': 'sgd', - 'sgd': { - 'momentum': 0.9 - } - }, - 'learning_rate': { - 'type': 'stepwise', - 'stepwise': { - 'boundaries': [10000, 20000], - 'values': [0.1, 0.01, 0.001] - } - } - } - expected_lr_step_values = [[0, 0.1], [5000, 0.1], [10000, 0.1], - [10001, 0.01], [20000, 0.01], [20001, 0.001]] - opt_config = optimization_config.OptimizationConfig(params) - opt_factory = optimizer_factory.OptimizerFactory(opt_config) - lr = opt_factory.build_learning_rate() - - for step, value in expected_lr_step_values: - self.assertAlmostEqual(lr(step).numpy(), value) - - def test_stepwise_lr_with_warmup_schedule(self): - params = { - 'optimizer': { - 'type': 'sgd', - 'sgd': { - 'momentum': 0.9 - } - }, - 'learning_rate': { - 'type': 'stepwise', - 'stepwise': { - 'boundaries': [10000, 20000], - 'values': [0.1, 0.01, 0.001] - } - }, - 'warmup': { - 'type': 'linear', - 'linear': { - 'warmup_steps': 500, - 'warmup_learning_rate': 0.01 - } - } - } - expected_lr_step_values = [[0, 0.01], [250, 0.055], [500, 0.1], [5500, 0.1], - [10000, 0.1], [10001, 0.01], [20000, 0.01], - [20001, 0.001]] - opt_config = optimization_config.OptimizationConfig(params) - opt_factory = optimizer_factory.OptimizerFactory(opt_config) - lr = opt_factory.build_learning_rate() - - for step, value in expected_lr_step_values: - self.assertAlmostEqual(lr(step).numpy(), value) - - def test_exponential_lr_schedule(self): - params = { - 'optimizer': { - 'type': 'sgd', - 'sgd': { - 'momentum': 0.9 - } - }, - 'learning_rate': { - 'type': 'exponential', - 'exponential': { - 'initial_learning_rate': 0.1, - 'decay_steps': 1000, - 'decay_rate': 0.96, - 'staircase': True - } - } - } - expected_lr_step_values = [ - [0, 0.1], - [999, 0.1], - [1000, 0.096], - [1999, 0.096], - [2000, 0.09216], - ] - opt_config = optimization_config.OptimizationConfig(params) - opt_factory = optimizer_factory.OptimizerFactory(opt_config) - lr = opt_factory.build_learning_rate() - - for step, value in expected_lr_step_values: - self.assertAlmostEqual(lr(step).numpy(), value) - - def test_polynomial_lr_schedule(self): - params = { - 'optimizer': { - 'type': 'sgd', - 'sgd': { - 'momentum': 0.9 - } - }, - 'learning_rate': { - 'type': 'polynomial', - 'polynomial': { - 'initial_learning_rate': 0.1, - 'decay_steps': 1000, - 'end_learning_rate': 0.001 - } - } - } - - expected_lr_step_values = [[0, 0.1], [500, 0.0505], [1000, 0.001]] - opt_config = optimization_config.OptimizationConfig(params) - opt_factory = optimizer_factory.OptimizerFactory(opt_config) - lr = opt_factory.build_learning_rate() - - for step, value in expected_lr_step_values: - self.assertAlmostEqual(lr(step).numpy(), value) - - def test_cosine_lr_schedule(self): - params = { - 'optimizer': { - 'type': 'sgd', - 'sgd': { - 'momentum': 0.9 - } - }, - 'learning_rate': { - 'type': 'cosine', - 'cosine': { - 'initial_learning_rate': 0.1, - 'decay_steps': 1000 - } - } - } - expected_lr_step_values = [[0, 0.1], [250, 0.08535534], [500, 0.04999999], - [750, 0.01464466], [1000, 0]] - opt_config = optimization_config.OptimizationConfig(params) - opt_factory = optimizer_factory.OptimizerFactory(opt_config) - lr = opt_factory.build_learning_rate() - - for step, value in expected_lr_step_values: - self.assertAlmostEqual(lr(step).numpy(), value) - - def test_constant_lr_with_warmup_schedule(self): - params = { - 'optimizer': { - 'type': 'sgd', - 'sgd': { - 'momentum': 0.9 - } - }, - 'learning_rate': { - 'type': 'constant', - 'constant': { - 'learning_rate': 0.1 - } - }, - 'warmup': { - 'type': 'linear', - 'linear': { - 'warmup_steps': 500, - 'warmup_learning_rate': 0.01 - } - } - } - - expected_lr_step_values = [[0, 0.01], [250, 0.055], [500, 0.1], [5000, 0.1], - [10000, 0.1], [20000, 0.1]] - opt_config = optimization_config.OptimizationConfig(params) - opt_factory = optimizer_factory.OptimizerFactory(opt_config) - lr = opt_factory.build_learning_rate() - - for step, value in expected_lr_step_values: - self.assertAlmostEqual(lr(step).numpy(), value) - - def test_stepwise_lr_with_polynomial_warmup_schedule(self): - params = { - 'optimizer': { - 'type': 'sgd', - 'sgd': { - 'momentum': 0.9 - } - }, - 'learning_rate': { - 'type': 'stepwise', - 'stepwise': { - 'boundaries': [10000, 20000], - 'values': [0.1, 0.01, 0.001] - } - }, - 'warmup': { - 'type': 'polynomial', - 'polynomial': { - 'warmup_steps': 500, - 'power': 2. - } - } - } - expected_lr_step_values = [[0, 0.0], [250, 0.025], [500, 0.1], [5500, 0.1], - [10000, 0.1], [10001, 0.01], [20000, 0.01], - [20001, 0.001]] - opt_config = optimization_config.OptimizationConfig(params) - opt_factory = optimizer_factory.OptimizerFactory(opt_config) - lr = opt_factory.build_learning_rate() - - for step, value in expected_lr_step_values: - self.assertAlmostEqual(lr(step).numpy(), value, places=6) - - def test_power_lr_schedule(self): - params = { - 'optimizer': { - 'type': 'sgd', - 'sgd': { - 'momentum': 0.9 - } - }, - 'learning_rate': { - 'type': 'power', - 'power': { - 'initial_learning_rate': 1.0, - 'power': -1.0 - } - } - } - expected_lr_step_values = [[0, 1.0], [1, 1.0], [250, 1. / 250.]] - opt_config = optimization_config.OptimizationConfig(params) - opt_factory = optimizer_factory.OptimizerFactory(opt_config) - lr = opt_factory.build_learning_rate() - - for step, value in expected_lr_step_values: - self.assertAlmostEqual(lr(step).numpy(), value) - - def test_power_linear_lr_schedule(self): - params = { - 'optimizer': { - 'type': 'sgd', - 'sgd': { - 'momentum': 0.9 - } - }, - 'learning_rate': { - 'type': 'power_linear', - 'power_linear': { - 'initial_learning_rate': 1.0, - 'power': -1.0, - 'linear_decay_fraction': 0.5, - 'total_decay_steps': 100, - 'offset': 0, - } - } - } - expected_lr_step_values = [[0, 1.0], [1, 1.0], [40, 1. / 40.], - [60, 1. / 60. * 0.8]] - opt_config = optimization_config.OptimizationConfig(params) - opt_factory = optimizer_factory.OptimizerFactory(opt_config) - lr = opt_factory.build_learning_rate() - - for step, value in expected_lr_step_values: - self.assertAlmostEqual(lr(step).numpy(), value) - - def test_power_with_offset_lr_schedule(self): - params = { - 'optimizer': { - 'type': 'sgd', - 'sgd': { - 'momentum': 0.9 - } - }, - 'learning_rate': { - 'type': 'power_with_offset', - 'power_with_offset': { - 'initial_learning_rate': 1.0, - 'power': -1.0, - 'offset': 10, - 'pre_offset_learning_rate': 3.0, - } - } - } - expected_lr_step_values = [[1, 3.0], [10, 3.0], [20, 1. / 10.]] - opt_config = optimization_config.OptimizationConfig(params) - opt_factory = optimizer_factory.OptimizerFactory(opt_config) - lr = opt_factory.build_learning_rate() - - for step, value in expected_lr_step_values: - self.assertAlmostEqual(lr(step).numpy(), value) - -if __name__ == '__main__': - tf.test.main() diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/performance.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/performance.py deleted file mode 100644 index f3fe1cc1603bd0a93cff74458c4962fe21448725..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/performance.py +++ /dev/null @@ -1,126 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Functions and classes related to training performance.""" - -from absl import logging -import tensorflow as tf -import npu_device as npu -''' -def configure_optimizer(optimizer, - use_float16=False, - use_graph_rewrite=False, - loss_scale="dynamic"): - """Configures optimizer object with performance options.""" - - if isinstance(loss_scale,(int,float)): - print("Use static npu loss scale with init loss scale {}".format(loss_scale),flush=True) - optimizer=(npu.train.optimizer.NpuLossScaleOptimizer(optimizer,dynamic=False,initial_scale=loss_scale)) - elif loss_scale=="dynamic": - print("Use dynamic npu loss scale ",flush=True) - optimizer=(npu.train.optimizer.NpuLossScaleOptimizer(optimizer)) - else: - raise RuntimeError("UnSupported npu loss scale value {}".format(loss_scale)) - - if use_graph_rewrite: - # Note: the model dtype must be 'float32', which will ensure - # tf.ckeras.mixed_precision and - # tf.train.experimental.enable_mixed_precision_graph_rewrite do not double - # up. - optimizer = tf.train.experimental.enable_mixed_precision_graph_rewrite( - optimizer) - return optimizer - -''' -def configure_optimizer(optimizer, - use_float16=False, - use_graph_rewrite=False, - loss_scale='dynamic', - use_experimental_api=False): - """Configures optimizer object with performance options.""" - if use_experimental_api: - logging.warning('Passing use_experimental_api=True is deprecated. The ' - 'argument will be removed in the future.') - if use_float16: - # TODO(b/171936854): Move all methods to non-experimental api. - if use_experimental_api: - # Wraps optimizer with a LossScaleOptimizer. This is done automatically - # in compile() with the "mixed_float16" policy, but since we do not call - # compile(), we must wrap the optimizer manually. - optimizer = ( - tf.keras.mixed_precision.experimental.LossScaleOptimizer( - optimizer, loss_scale=loss_scale)) - elif loss_scale == 'dynamic': - optimizer = tf.keras.mixed_precision.LossScaleOptimizer(optimizer) - else: - # loss_scale is a number. We interpret that as a fixed loss scale. - optimizer = tf.keras.mixed_precision.LossScaleOptimizer( - optimizer, dynamic=False, initial_scale=loss_scale) - if use_graph_rewrite: - # Note: the model dtype must be 'float32', which will ensure - # tf.keras.mixed_precision and enable_mixed_precision_graph_rewrite do not - # double up. - optimizer = ( - tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite( - optimizer)) - return optimizer - - -def set_mixed_precision_policy(dtype, loss_scale=None, - use_experimental_api=False): - """Sets mix precision policy.""" - if use_experimental_api: - logging.warning('Passing use_experimental_api=True is deprecated. The ' - 'argument will be removed in the future.') - assert use_experimental_api or loss_scale is None, ( - 'loss_scale cannot be specified if use_experimental_api is False. If the ' - 'non-experimental API is used, specify the loss scaling configuration ' - 'when creating the LossScaleOptimizer instead.' - ) - if dtype == tf.float16: - # TODO(b/171936854): Move all methods to non-experimental api. - if use_experimental_api: - policy = tf.keras.mixed_precision.experimental.Policy( - 'mixed_float16', loss_scale=loss_scale) - tf.keras.mixed_precision.experimental.set_policy(policy) - else: - tf.keras.mixed_precision.set_global_policy('mixed_float16') - elif dtype == tf.bfloat16: - if use_experimental_api: - tf.keras.mixed_precision.experimental.set_policy('mixed_bfloat16') - else: - tf.keras.mixed_precision.set_global_policy('mixed_bfloat16') - elif dtype == tf.float32: - if use_experimental_api: - tf.keras.mixed_precision.experimental.set_policy('float32') - else: - tf.keras.mixed_precision.set_global_policy('float32') - else: - raise ValueError('Unexpected dtype: %s' % dtype) diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/progressive/policies.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/progressive/policies.py deleted file mode 100644 index 879a3a61f9b3bc54d192c508a3f880a7aff58f21..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/progressive/policies.py +++ /dev/null @@ -1,189 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Base ProgressivePolicy definition for progressive training. - -To write a progressive model, subclass ProgressivePolicy and implement its -abstract methods to handle each training stage. -""" - -import abc -from typing import Any, Mapping -from absl import logging -import dataclasses -import six -import tensorflow as tf -from official.modeling.hyperparams import base_config -from official.modeling.progressive import utils - - -@dataclasses.dataclass -class ProgressiveConfig(base_config.Config): - pass - - -@six.add_metaclass(abc.ABCMeta) -class ProgressivePolicy: - """The APIs for handling progressive training stages. - - Attributes: - cur_model: The model for the current progressive training stage. - cur_train_dataset: The train dataset function for the current stage. - cur_eval_dataset: The eval dataset function for the current stage. - cur_optimizer: The optimizer for the current stage. - cur_checkpoint_items: Items to be saved in and restored from checkpoints, - for the progressive trainer. - is_last_stage: Whether it is currently in the last stage. - - Interfaces: - is_stage_advancing: Returns if progressive training is advancing to the - next stage. - update_pt_stage: Update progressive training stage. - """ - - def __init__(self): - """Initialize stage policy.""" - self._cur_train_dataset = None - self._cur_eval_dataset = None - self._volatiles = utils.VolatileTrackable(optimizer=None, model=None) - - stage_id = 0 - self._stage_id = tf.Variable( - stage_id, - trainable=False, - dtype=tf.int64, - aggregation=tf.VariableAggregation.ONLY_FIRST_REPLICA, - shape=[]) - self._volatiles.reassign_trackable( - optimizer=self.get_optimizer(stage_id), - model=self.get_model(stage_id, old_model=None)) - - def compute_stage_id(self, global_step: int) -> int: - for stage_id in range(self.num_stages()): - global_step -= self.num_steps(stage_id) - if global_step < 0: - return stage_id - logging.error('Global step %d found no matching progressive stages. ' - 'Default to the last stage.', global_step) - return self.num_stages() - 1 - - @abc.abstractmethod - def num_stages(self) -> int: - """Return the total number of progressive stages.""" - pass - - @abc.abstractmethod - def num_steps(self, stage_id: int) -> int: - """Return the total number of steps in this stage.""" - pass - - @abc.abstractmethod - def get_model(self, - stage_id: int, - old_model: tf.keras.Model = None) -> tf.keras.Model: - """Return model for this stage. For initialization, `old_model` = None.""" - pass - - @abc.abstractmethod - def get_optimizer(self, stage_id: int) -> tf.keras.optimizers.Optimizer: - """Return optimizer for this stage.""" - pass - - @abc.abstractmethod - def get_train_dataset(self, stage_id: int) -> tf.data.Dataset: - """Return training Dataset for this stage.""" - pass - - @abc.abstractmethod - def get_eval_dataset(self, stage_id: int) -> tf.data.Dataset: - """Return evaluation Dataset for this stage.""" - pass - - @property - def cur_model(self) -> tf.keras.Model: - return self._volatiles.model - - @property - def cur_train_dataset(self) -> tf.data.Dataset: - if self._cur_train_dataset is None: - self._cur_train_dataset = self.get_train_dataset(self._stage_id.numpy()) - return self._cur_train_dataset - - @property - def cur_eval_dataset(self) -> tf.data.Dataset: - if self._cur_eval_dataset is None: - self._cur_eval_dataset = self.get_eval_dataset(self._stage_id.numpy()) - return self._cur_eval_dataset - - @property - def cur_optimizer(self) -> tf.keras.optimizers.Optimizer: - return self._volatiles.optimizer - - @property - def is_last_stage(self) -> bool: - stage_id = self._stage_id.numpy() - return stage_id >= self.num_stages() - 1 - - @property - def cur_checkpoint_items(self) -> Mapping[str, Any]: - return dict(stage_id=self._stage_id, volatiles=self._volatiles) - - def is_stage_advancing(self, global_step: int) -> bool: - old_stage_id = self._stage_id.numpy() - new_stage_id = self.compute_stage_id(global_step) - return old_stage_id != new_stage_id - - def update_pt_stage(self, global_step: int, pass_old_model=True) -> None: - """Update progressive training internal status. - - Call this after a training loop ends. - - Args: - global_step: an integer scalar of the current global step. - pass_old_model: whether to pass the old_model to get_model() function. - This is set to False if the old_model is irrelevant (e.g, just a default - model from stage 0). - """ - old_stage_id = self._stage_id.numpy() - new_stage_id = self.compute_stage_id(global_step) - logging.info('Switching stage from %d to %d', old_stage_id, new_stage_id) - - # Update stage id. - self._stage_id.assign(new_stage_id) - # Update dataset function. - self._cur_train_dataset = None - self._cur_eval_dataset = None - - # Update optimizer and model. - new_optimizer = self.get_optimizer(new_stage_id) - self._volatiles.reassign_trackable(optimizer=new_optimizer) - new_model = self.get_model( - new_stage_id, old_model=self.cur_model if pass_old_model else None) - self._volatiles.reassign_trackable(model=new_model) diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/progressive/train.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/progressive/train.py deleted file mode 100644 index 0419792db81c2ebd4a0ad127bb56113ab3897a4d..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/progressive/train.py +++ /dev/null @@ -1,85 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""TFM binary for the progressive trainer.""" - -from absl import app -from absl import flags -import gin - -from official.common import distribute_utils -# pylint: disable=unused-import -from official.common import registry_imports -# pylint: enable=unused-import -from official.common import flags as tfm_flags -from official.core import task_factory -from official.core import train_utils -from official.modeling import performance -from official.modeling.progressive import train_lib - -FLAGS = flags.FLAGS - - -def main(_): - gin.parse_config_files_and_bindings(FLAGS.gin_file, FLAGS.gin_params) - params = train_utils.parse_configuration(FLAGS) - model_dir = FLAGS.model_dir - if 'train' in FLAGS.mode: - # Pure eval modes do not output yaml files. Otherwise continuous eval job - # may race against the train job for writing the same file. - train_utils.serialize_config(params, model_dir) - - # Sets mixed_precision policy. Using 'mixed_float16' or 'mixed_bfloat16' - # can have significant impact on model speeds by utilizing float16 in case of - # GPUs, and bfloat16 in the case of TPUs. loss_scale takes effect only when - # dtype is float16 - if params.runtime.mixed_precision_dtype: - performance.set_mixed_precision_policy(params.runtime.mixed_precision_dtype) - distribution_strategy = distribute_utils.get_distribution_strategy( - distribution_strategy=params.runtime.distribution_strategy, - all_reduce_alg=params.runtime.all_reduce_alg, - num_gpus=params.runtime.num_gpus, - tpu_address=params.runtime.tpu, - **params.runtime.model_parallelism()) - with distribution_strategy.scope(): - task = task_factory.get_task(params.task, logging_dir=model_dir) - - train_lib.run_experiment( - distribution_strategy=distribution_strategy, - task=task, - mode=FLAGS.mode, - params=params, - model_dir=model_dir) - - train_utils.save_gin_config(FLAGS.mode, model_dir) - -if __name__ == '__main__': - tfm_flags.define_flags() - app.run(main) diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/progressive/train_lib.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/progressive/train_lib.py deleted file mode 100644 index 7334aa6be22b9aece7b6c1bd5e93657e5e39219d..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/progressive/train_lib.py +++ /dev/null @@ -1,142 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""TFM progressive training driver library. - -Compared to the common training driver, the only difference is that we use -prog_trainer_lib.ProgressiveTrainer instead of the base trainer. -""" - -# pytype: disable=attribute-error -import os -from typing import Any, Mapping, Tuple - -# Import libraries -from absl import logging -import orbit -import tensorflow as tf -from official.core import base_task -from official.core import config_definitions -from official.core import train_lib as base_train_lib -from official.modeling.progressive import trainer as prog_trainer_lib - - -def run_experiment(distribution_strategy: tf.distribute.Strategy, - task: base_task.Task, - mode: str, - params: config_definitions.ExperimentConfig, - model_dir: str, - run_post_eval: bool = False, - save_summary: bool = True) \ --> Tuple[tf.keras.Model, Mapping[str, Any]]: - """Runs train/eval configured by the experiment params. - - Args: - distribution_strategy: A distribution distribution_strategy. - task: A Task instance. - mode: A 'str', specifying the mode. Can be 'train', 'eval', 'train_and_eval' - or 'continuous_eval'. - params: ExperimentConfig instance. - model_dir: A 'str', a path to store model checkpoints and summaries. - run_post_eval: Whether to run post eval once after training, metrics logs - are returned. - save_summary: Whether to save train and validation summary. - - Returns: - A 2-tuple of (model, eval_logs). - model: `tf.keras.Model` instance. - eval_logs: returns eval metrics logs when run_post_eval is set to True, - otherwise, returns {}. - """ - - with distribution_strategy.scope(): - logging.info('Running progressive trainer.') - trainer = prog_trainer_lib.ProgressiveTrainer( - params, task, ckpt_dir=model_dir, - train='train' in mode, - evaluate=('eval' in mode) or run_post_eval, - checkpoint_exporter=base_train_lib.maybe_create_best_ckpt_exporter( - params, model_dir)) - - if trainer.checkpoint: - checkpoint_manager = tf.train.CheckpointManager( - trainer.checkpoint, - directory=model_dir, - max_to_keep=params.trainer.max_to_keep, - step_counter=trainer.global_step, - checkpoint_interval=params.trainer.checkpoint_interval, - init_fn=trainer.initialize) - else: - checkpoint_manager = None - - controller = orbit.Controller( - strategy=distribution_strategy, - trainer=trainer if 'train' in mode else None, - evaluator=trainer, - global_step=trainer.global_step, - steps_per_loop=params.trainer.steps_per_loop, - checkpoint_manager=checkpoint_manager, - summary_dir=os.path.join(model_dir, 'train') if (save_summary) else None, - eval_summary_dir=os.path.join(model_dir, 'validation') if - (save_summary) else None, - summary_interval=params.trainer.summary_interval if - (save_summary) else None) - - logging.info('Starts to execute mode: %s', mode) - with distribution_strategy.scope(): - if mode == 'train': - controller.train(steps=params.trainer.train_steps) - elif mode == 'train_and_eval': - controller.train_and_evaluate( - train_steps=params.trainer.train_steps, - eval_steps=params.trainer.validation_steps, - eval_interval=params.trainer.validation_interval) - elif mode == 'eval': - controller.evaluate(steps=params.trainer.validation_steps) - elif mode == 'continuous_eval': - - def timeout_fn(): - if trainer.global_step.numpy() >= params.trainer.train_steps: - return True - return False - - controller.evaluate_continuously( - steps=params.trainer.validation_steps, - timeout=params.trainer.continuous_eval_timeout, - timeout_fn=timeout_fn) - else: - raise NotImplementedError('The mode is not implemented: %s' % mode) - - if run_post_eval: - with distribution_strategy.scope(): - return trainer.model, trainer.evaluate( - tf.convert_to_tensor(params.trainer.validation_steps)) - else: - return trainer.model, {} diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/progressive/train_lib_test.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/progressive/train_lib_test.py deleted file mode 100644 index f69a862d028ce31e6536583d26d1ef0bd4effdcc..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/progressive/train_lib_test.py +++ /dev/null @@ -1,199 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Tests for the progressive train_lib.""" -import os - -from absl import flags -from absl.testing import parameterized -import dataclasses -import orbit -import tensorflow as tf - -from tensorflow.python.distribute import combinations -from tensorflow.python.distribute import strategy_combinations -from official.common import flags as tfm_flags -# pylint: disable=unused-import -from official.common import registry_imports -# pylint: enable=unused-import -from official.core import config_definitions as cfg -from official.core import task_factory -from official.modeling import optimization -from official.modeling.hyperparams import params_dict -from official.modeling.progressive import policies -from official.modeling.progressive import train_lib -from official.modeling.progressive import trainer as prog_trainer_lib -from official.utils.testing import mock_task - -FLAGS = flags.FLAGS - -tfm_flags.define_flags() - - -@dataclasses.dataclass -class ProgTaskConfig(cfg.TaskConfig): - pass - - -@task_factory.register_task_cls(ProgTaskConfig) -class ProgMockTask(policies.ProgressivePolicy, mock_task.MockTask): - """Progressive task for testing.""" - - def __init__(self, params: cfg.TaskConfig, logging_dir: str = None): - mock_task.MockTask.__init__( - self, params=params, logging_dir=logging_dir) - policies.ProgressivePolicy.__init__(self) - - def num_stages(self): - return 2 - - def num_steps(self, stage_id): - return 2 if stage_id == 0 else 4 - - def get_model(self, stage_id, old_model=None): - del stage_id, old_model - return self.build_model() - - def get_optimizer(self, stage_id): - """Build optimizer for each stage.""" - params = optimization.OptimizationConfig({ - 'optimizer': { - 'type': 'adamw', - }, - 'learning_rate': { - 'type': 'polynomial', - 'polynomial': { - 'initial_learning_rate': 0.01, - 'end_learning_rate': 0.0, - 'power': 1.0, - 'decay_steps': 10, - }, - }, - 'warmup': { - 'polynomial': { - 'power': 1, - 'warmup_steps': 2, - }, - 'type': 'polynomial', - } - }) - opt_factory = optimization.OptimizerFactory(params) - optimizer = opt_factory.build_optimizer(opt_factory.build_learning_rate()) - - return optimizer - - def get_train_dataset(self, stage_id): - del stage_id - strategy = tf.distribute.get_strategy() - return orbit.utils.make_distributed_dataset( - strategy, self.build_inputs, None) - - def get_eval_dataset(self, stage_id): - del stage_id - strategy = tf.distribute.get_strategy() - return orbit.utils.make_distributed_dataset( - strategy, self.build_inputs, None) - - -class TrainTest(tf.test.TestCase, parameterized.TestCase): - - def setUp(self): - super(TrainTest, self).setUp() - self._test_config = { - 'trainer': { - 'checkpoint_interval': 10, - 'steps_per_loop': 10, - 'summary_interval': 10, - 'train_steps': 10, - 'validation_steps': 5, - 'validation_interval': 10, - 'continuous_eval_timeout': 1, - 'optimizer_config': { - 'optimizer': { - 'type': 'sgd', - }, - 'learning_rate': { - 'type': 'constant' - } - } - }, - } - - @combinations.generate( - combinations.combine( - distribution_strategy=[ - strategy_combinations.default_strategy, - strategy_combinations.cloud_tpu_strategy, - strategy_combinations.one_device_strategy_gpu, - ], - flag_mode=['train', 'eval', 'train_and_eval'], - run_post_eval=[True, False])) - def test_end_to_end(self, distribution_strategy, flag_mode, run_post_eval): - model_dir = self.get_temp_dir() - experiment_config = cfg.ExperimentConfig( - trainer=prog_trainer_lib.ProgressiveTrainerConfig(), - task=ProgTaskConfig()) - experiment_config = params_dict.override_params_dict( - experiment_config, self._test_config, is_strict=False) - - with distribution_strategy.scope(): - task = task_factory.get_task(experiment_config.task, - logging_dir=model_dir) - - _, logs = train_lib.run_experiment( - distribution_strategy=distribution_strategy, - task=task, - mode=flag_mode, - params=experiment_config, - model_dir=model_dir, - run_post_eval=run_post_eval) - - if run_post_eval: - self.assertNotEmpty(logs) - else: - self.assertEmpty(logs) - - if flag_mode == 'eval': - return - self.assertNotEmpty( - tf.io.gfile.glob(os.path.join(model_dir, 'checkpoint'))) - # Tests continuous evaluation. - _, logs = train_lib.run_experiment( - distribution_strategy=distribution_strategy, - task=task, - mode='continuous_eval', - params=experiment_config, - model_dir=model_dir, - run_post_eval=run_post_eval) - print(logs) - - -if __name__ == '__main__': - tf.test.main() diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/progressive/trainer.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/progressive/trainer.py deleted file mode 100644 index 13bece1fdc2dc5070107b5d9a9e7c59074ed5c79..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/progressive/trainer.py +++ /dev/null @@ -1,307 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Progressive Trainer implementation. - -The trainer implements the Orbit `StandardTrainable` and -`StandardEvaluable` interfaces. Trainers inside this project should be -interchangable and independent on model architectures and tasks. -""" -import os -from typing import Any, Optional - -# Import libraries -from absl import logging - -import dataclasses -import gin -import orbit -import tensorflow as tf -from official.core import base_task -from official.core import base_trainer as trainer_lib -from official.core import config_definitions -from official.modeling.progressive import policies -from official.modeling.progressive import utils - -ExperimentConfig = config_definitions.ExperimentConfig - - -@dataclasses.dataclass -class ProgressiveTrainerConfig(config_definitions.TrainerConfig): - """Configuration for progressive trainer. - - Attributes: - progressive: A task-specific config. Users can subclass ProgressiveConfig - and define any task-specific settings in their subclass. - export_checkpoint: A bool. Whether to export checkpoints in non-progressive - manner (without the volatiles wrapper) such that your down-stream tasks - can load checkpoints from a progressive trainer as if it is a regular - checkpoint. - export_checkpoint_interval: A bool. The number of steps between exporting - checkpoints. If None (by default), will use the same value as - TrainerConfig.checkpoint_interval. - export_max_to_keep: The maximum number of exported checkpoints to keep. - If None (by default), will use the same value as - TrainerConfig.max_to_keep. - export_only_final_stage_ckpt: A bool. Whether to just export checkpoints - during the final progressive training stage. In other words, whether to - not export small, partial models. In many cases, it is not meaningful to - finetune a small, partial model in down-stream tasks. - """ - progressive: Optional[policies.ProgressiveConfig] = None - export_checkpoint: bool = True - export_checkpoint_interval: Optional[int] = None - export_max_to_keep: Optional[int] = None - export_only_final_stage_ckpt: bool = True - - -@gin.configurable -class ProgressiveTrainer(trainer_lib.Trainer): - """Implements the progressive trainer shared for TensorFlow models.""" - - def __init__( - self, - config: ExperimentConfig, - prog_task: base_task.Task, # also implemented ProgressivePolicy. - ckpt_dir: str = '', - train: bool = True, - evaluate: bool = True, - checkpoint_exporter: Any = None): - """Initialize common trainer for TensorFlow models. - - Args: - config: An `ExperimentConfig` instance specifying experiment config. - prog_task: An instance both implemented policies.ProgressivePolicy and - base_task.Task. - ckpt_dir: Checkpoint directory. - train: bool, whether or not this trainer will be used for training. - default to True. - evaluate: bool, whether or not this trainer will be used for evaluation. - default to True. - checkpoint_exporter: an object that has the `maybe_export_checkpoint` - interface. - """ - # Gets the current distribution strategy. If not inside any strategy scope, - # it gets a single-replica no-op strategy. - self._strategy = tf.distribute.get_strategy() - self._config = config - self._runtime_options = trainer_lib.get_runtime_options(config) - self._task = prog_task - - # Directory for non-progressive checkpoint - self._export_ckpt_dir = os.path.join(ckpt_dir, 'exported_ckpts') - tf.io.gfile.makedirs(self._export_ckpt_dir) - self._export_ckpt_manager = None - - # Receive other checkpoint export, e.g, best checkpoint exporter. - # TODO(lehou): unify the checkpoint exporting logic, although the default - # setting does not use checkpoint_exporter. - self._checkpoint_exporter = checkpoint_exporter - - self._global_step = orbit.utils.create_global_step() - - self._checkpoint = utils.CheckpointWithHooks( - before_load_hook=self._update_pt_stage_from_ckpt, - global_step=self.global_step, - **self._task.cur_checkpoint_items) - - self._train_loss = tf.keras.metrics.Mean('training_loss', dtype=tf.float32) - self._validation_loss = tf.keras.metrics.Mean( - 'validation_loss', dtype=tf.float32) - self._train_metrics = self.task.build_metrics( - training=True) + self.model.metrics - self._validation_metrics = self.task.build_metrics( - training=False) + self.model.metrics - - if train: - orbit.StandardTrainer.__init__( - self, - None, # Manage train_dataset by ourselves, not by StandardTrainer. - options=orbit.StandardTrainerOptions( - use_tf_while_loop=config.trainer.train_tf_while_loop, - use_tf_function=config.trainer.train_tf_function)) - - if evaluate: - orbit.StandardEvaluator.__init__( - self, - None, # Manage train_dataset by ourselves, not by StandardEvaluator. - options=orbit.StandardEvaluatorOptions( - use_tf_function=config.trainer.eval_tf_function)) - - @property - def model(self): - return self._task.cur_model - - @property - def optimizer(self): - return self._task.cur_optimizer - - # override - @property - def train_dataset(self): - """Overriding StandardTrainer.train_dataset.""" - return self._task.cur_train_dataset - - # override - @train_dataset.setter - def train_dataset(self, _): - raise SyntaxError('Please do not set train_dataset. Progressive training ' - 'relies on progressive policy to manager train dataset.') - - # override - @property - def eval_dataset(self): - """Overriding StandardEvaluator.eval_dataset.""" - return self._task.cur_eval_dataset - - # override - @eval_dataset.setter - def eval_dataset(self, _): - raise SyntaxError('Please do not set eval_dataset. Progressive training ' - 'relies on progressive policy to manager eval dataset.') - - def train_loop_end(self): - """See base class.""" - logs = {} - for metric in self.train_metrics + [self.train_loss]: - logs[metric.name] = metric.result() - metric.reset_states() - if callable(self.optimizer.learning_rate): - logs['learning_rate'] = self.optimizer.learning_rate( - self.optimizer.iterations) - else: - logs['learning_rate'] = self.optimizer.learning_rate - - self._maybe_export_non_progressive_checkpoint(self._export_ckpt_dir) - if self._task.is_stage_advancing(self.global_step.numpy()): - old_train_dataset = self.train_dataset - - # Update progressive properties - self._task.update_pt_stage(self.global_step.numpy()) - - # Setting `self._train_loop_fn` and `self._eval_loop_fn` to None will - # rebuild the train and eval functions with the updated model. - self._train_loop_fn = None - self._eval_loop_fn = None - - if self.train_dataset != old_train_dataset: - # Setting `self._train_iter` to None will rebuild the dataset iterator. - self._train_iter = None - - # Setting `self._export_ckpt_manager` to None will rebuild the checkpoint - # for exporting. - self._export_ckpt_manager = None - - return logs - - def _update_pt_stage_from_ckpt(self, ckpt_file): - """Update stage properties based on the global_step variable in a ckpt file. - - Before loading variables from a checkpoint file, we need to go to the - correct stage and build corresponding model and optimizer, to make sure that - we retore variables of the right model and optimizer. - - Args: - ckpt_file: Checkpoint file that will be restored/read from. - """ - if not ckpt_file: - return - ckpt = tf.train.Checkpoint(global_step=self.global_step) - ckpt.read(ckpt_file).expect_partial().assert_existing_objects_matched() - - if self._task.is_stage_advancing(self.global_step.numpy()): - old_train_dataset = self.train_dataset - - # Update progressive properties - self._task.update_pt_stage(self.global_step.numpy(), pass_old_model=False) - - # Setting `self._train_loop_fn` and `self._eval_loop_fn` to None will - # rebuild the train and eval functions with the updated model. - self._train_loop_fn = None - self._eval_loop_fn = None - - if self.train_dataset != old_train_dataset: - # Setting `self._train_iter` to None will rebuild the dataset iterator. - self._train_iter = None - - # Setting `self._export_ckpt_manager` to None will rebuild the checkpoint - # for exporting. - self._export_ckpt_manager = None - - def _maybe_export_non_progressive_checkpoint(self, export_ckpt_dir): - """Export checkpoints in non-progressive format. - - This basically removes the wrapping of self._task.cur_checkpoint_items - -- just save the model, optimizer, etc., directly. - The purpose is to let your down-stream tasks to use these checkpoints. - - Args: - export_ckpt_dir: A str. folder of exported checkpoints. - """ - if not self.config.trainer.export_checkpoint: - logging.info('Not exporting checkpoints.') - return - if not self._task.is_last_stage and ( - self.config.trainer.export_only_final_stage_ckpt): - logging.info('Not exporting checkpoints until the last stage.') - return - - if self._export_ckpt_manager is None: - # Create a checkpoint object just now, to make sure we use - # progressive_policy.cur_model and progressive_policy.cur_optimizer of the - # current stage. - if hasattr(self.model, 'checkpoint_items'): - checkpoint_items = self.model.checkpoint_items - else: - checkpoint_items = {} - checkpoint = tf.train.Checkpoint( - global_step=self.global_step, - model=self.model, - optimizer=self.optimizer, - **checkpoint_items) - - max_to_keep = self.config.trainer.export_max_to_keep or ( - self.config.trainer.max_to_keep) - checkpoint_interval = self.config.trainer.export_checkpoint_interval or ( - self.config.trainer.checkpoint_interval) - self._export_ckpt_manager = tf.train.CheckpointManager( - checkpoint, - directory=export_ckpt_dir, - checkpoint_name='ckpt', - step_counter=self.global_step, - max_to_keep=max_to_keep, - checkpoint_interval=checkpoint_interval, - ) - - checkpoint_path = self._export_ckpt_manager.save( - checkpoint_number=self.global_step.numpy(), - check_interval=True) - if checkpoint_path: - logging.info('Checkpoints exported: %s.', checkpoint_path) diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/progressive/trainer_test.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/progressive/trainer_test.py deleted file mode 100644 index 7969caa0d1fe371163d40c94d9cd7334a23cb49c..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/progressive/trainer_test.py +++ /dev/null @@ -1,254 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Tests for the progressive trainer.""" -# pylint: disable=g-direct-tensorflow-import -import os - -from absl.testing import parameterized -import orbit -import tensorflow as tf - -from tensorflow.python.distribute import combinations -from tensorflow.python.distribute import strategy_combinations -from official.core import config_definitions as cfg -from official.modeling import optimization -from official.modeling.progressive import policies -from official.modeling.progressive import trainer as trainer_lib -from official.nlp.configs import bert -from official.utils.testing import mock_task - - -def all_strategy_combinations(): - return combinations.combine( - distribution=[ - strategy_combinations.default_strategy, - strategy_combinations.cloud_tpu_strategy, - strategy_combinations.one_device_strategy_gpu, - ],) - - -def get_exp_config(): - return cfg.ExperimentConfig( - task=cfg.TaskConfig( - model=bert.PretrainerConfig()), - trainer=trainer_lib.ProgressiveTrainerConfig( - export_checkpoint=True, - export_checkpoint_interval=1, - export_only_final_stage_ckpt=False)) - - -class TestPolicy(policies.ProgressivePolicy, mock_task.MockTask): - """Just for testing purposes.""" - - def __init__(self, strategy, task_config, change_train_dataset=True): - self._strategy = strategy - self._change_train_dataset = change_train_dataset - self._my_train_dataset = None - mock_task.MockTask.__init__(self, params=task_config, logging_dir=None) - policies.ProgressivePolicy.__init__(self) - - def num_stages(self) -> int: - return 2 - - def num_steps(self, stage_id: int) -> int: - return 2 if stage_id == 0 else 4 - - def get_model(self, - stage_id: int, - old_model: tf.keras.Model) -> tf.keras.Model: - del stage_id, old_model - return self.build_model() - - def get_optimizer(self, stage_id: int) -> tf.keras.optimizers.Optimizer: - optimizer_type = 'sgd' if stage_id == 0 else 'adamw' - optimizer_config = cfg.OptimizationConfig({ - 'optimizer': {'type': optimizer_type}, - 'learning_rate': {'type': 'constant'}}) - opt_factory = optimization.OptimizerFactory(optimizer_config) - return opt_factory.build_optimizer(opt_factory.build_learning_rate()) - - def get_train_dataset(self, stage_id: int) -> tf.data.Dataset: - if not self._change_train_dataset and self._my_train_dataset: - return self._my_train_dataset - if self._strategy: - self._my_train_dataset = orbit.utils.make_distributed_dataset( - self._strategy, - self._build_inputs, - stage_id) - else: - self._my_train_dataset = self._build_inputs(stage_id) - return self._my_train_dataset - - def get_eval_dataset(self, stage_id: int) -> tf.data.Dataset: - if self._strategy: - return orbit.utils.make_distributed_dataset( - self._strategy, - self._build_inputs, - stage_id) - return self._build_inputs(stage_id) - - def _build_inputs(self, stage_id): - def dummy_data(_): - batch_size = 2 if stage_id == 0 else 1 - x = tf.zeros(shape=(batch_size, 2), dtype=tf.float32) - label = tf.zeros(shape=(batch_size, 1), dtype=tf.float32) - return x, label - dataset = tf.data.Dataset.range(1) - dataset = dataset.repeat() - return dataset.map( - dummy_data, num_parallel_calls=tf.data.experimental.AUTOTUNE) - - -class TrainerTest(tf.test.TestCase, parameterized.TestCase): - - def setUp(self): - super(TrainerTest, self).setUp() - self._config = get_exp_config() - - def create_test_trainer(self, distribution, model_dir, change_train_dataset): - trainer = trainer_lib.ProgressiveTrainer( - self._config, - prog_task=TestPolicy( - distribution, self._config.task, change_train_dataset), - ckpt_dir=model_dir) - return trainer - - @combinations.generate(all_strategy_combinations()) - def test_checkpointing(self, distribution): - model_dir = self.get_temp_dir() - ckpt_file = os.path.join(model_dir, 'ckpt') - with distribution.scope(): - trainer = self.create_test_trainer(distribution, model_dir, True) - self.assertFalse(trainer._task.is_last_stage) - trainer.train(tf.convert_to_tensor(4, dtype=tf.int32)) - self.assertTrue(trainer._task.is_last_stage) - trainer.checkpoint.save(ckpt_file) - - trainer = self.create_test_trainer(distribution, model_dir, True) - self.assertFalse(trainer._task.is_last_stage) - trainer.checkpoint.restore(ckpt_file + '-1') - self.assertTrue(trainer._task.is_last_stage) - - @combinations.generate(all_strategy_combinations()) - def test_train_dataset(self, distribution): - model_dir = self.get_temp_dir() - with distribution.scope(): - trainer = self.create_test_trainer(distribution, model_dir, True) - # Using dataset of stage == 0 - train_iter = tf.nest.map_structure(iter, trainer.train_dataset) - train_data = train_iter.next()[0] - if distribution.num_replicas_in_sync > 1: - train_data = train_data.values[0] - self.assertEqual(train_data.shape[0], 2) - - trainer.train(tf.convert_to_tensor(4, dtype=tf.int32)) - # Using dataset of stage == 1 - train_iter = tf.nest.map_structure(iter, trainer.train_dataset) - train_data = train_iter.next()[0] - if distribution.num_replicas_in_sync > 1: - train_data = train_data.values[0] - self.assertEqual(train_data.shape[0], 1) - - with self.assertRaises(SyntaxError): - trainer.train_dataset = None - - @combinations.generate(all_strategy_combinations()) - def test_train_dataset_no_switch(self, distribution): - model_dir = self.get_temp_dir() - with distribution.scope(): - trainer = self.create_test_trainer(distribution, model_dir, False) - trainer.train(tf.convert_to_tensor(2, dtype=tf.int32)) - # _train_iter is not reset since the dataset is not changed. - self.assertIsNotNone(trainer._train_iter) - with distribution.scope(): - trainer = self.create_test_trainer(distribution, model_dir, True) - trainer.train(tf.convert_to_tensor(2, dtype=tf.int32)) - # _train_iter is reset since the dataset changed. - self.assertIsNone(trainer._train_iter) - - -class TrainerWithMaskedLMTaskTest(tf.test.TestCase, parameterized.TestCase): - - def setUp(self): - super(TrainerWithMaskedLMTaskTest, self).setUp() - self._config = get_exp_config() - - def create_test_trainer(self, distribution): - trainer = trainer_lib.ProgressiveTrainer( - self._config, - prog_task=TestPolicy(distribution, self._config.task), - ckpt_dir=self.get_temp_dir()) - return trainer - - @combinations.generate(all_strategy_combinations()) - def test_trainer_train(self, distribution): - with distribution.scope(): - trainer = self.create_test_trainer(distribution) - logs = trainer.train(tf.convert_to_tensor(5, dtype=tf.int32)) - self.assertIn('training_loss', logs) - self.assertIn('learning_rate', logs) - - @combinations.generate(all_strategy_combinations()) - def test_trainer_validate(self, distribution): - with distribution.scope(): - trainer = self.create_test_trainer(distribution) - logs = trainer.evaluate(tf.convert_to_tensor(5, dtype=tf.int32)) - self.assertIn('validation_loss', logs) - self.assertEqual(logs['counter'], 5. * distribution.num_replicas_in_sync) - - @combinations.generate( - combinations.combine( - mixed_precision_dtype=['float32', 'bfloat16', 'float16'], - loss_scale=[None, 'dynamic', 128, 256], - )) - def test_configure_optimizer(self, mixed_precision_dtype, loss_scale): - config = cfg.ExperimentConfig( - task=cfg.TaskConfig( - model=bert.PretrainerConfig()), - runtime=cfg.RuntimeConfig( - mixed_precision_dtype=mixed_precision_dtype, loss_scale=loss_scale), - trainer=trainer_lib.ProgressiveTrainerConfig( - export_checkpoint=True, - export_checkpoint_interval=1, - export_only_final_stage_ckpt=False)) - task = TestPolicy(None, config.task) - trainer = trainer_lib.ProgressiveTrainer(config, task, self.get_temp_dir()) - if mixed_precision_dtype != 'float16': - self.assertIsInstance(trainer.optimizer, tf.keras.optimizers.SGD) - elif mixed_precision_dtype == 'float16' and loss_scale is None: - self.assertIsInstance(trainer.optimizer, tf.keras.optimizers.SGD) - - metrics = trainer.train(tf.convert_to_tensor(5, dtype=tf.int32)) - self.assertIn('training_loss', metrics) - - -if __name__ == '__main__': - tf.test.main() diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/progressive/utils.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/progressive/utils.py deleted file mode 100644 index aa2c5523509011ee0fb7fa74cb870a97f22e88fb..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/progressive/utils.py +++ /dev/null @@ -1,72 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Util classes and functions.""" - -from absl import logging -import tensorflow as tf - -# pylint: disable=g-direct-tensorflow-import -from tensorflow.python.training.tracking import tracking - - -class VolatileTrackable(tracking.AutoTrackable): - """A util class to keep Trackables that might change instances.""" - - def __init__(self, **kwargs): - for k, v in kwargs.items(): - setattr(self, k, v) - - def reassign_trackable(self, **kwargs): - for k, v in kwargs.items(): - delattr(self, k) # untrack this object - setattr(self, k, v) # track the new object - - -class CheckpointWithHooks(tf.train.Checkpoint): - """Same as tf.train.Checkpoint but supports hooks. - - In progressive training, use this class instead of tf.train.Checkpoint. - - Since the network architecture changes during progressive training, we need to - prepare something (like switch to the correct architecture) before loading the - checkpoint. This class supports a hook that will be executed before checkpoint - loading. - """ - - def __init__(self, before_load_hook, **kwargs): - self._before_load_hook = before_load_hook - super(CheckpointWithHooks, self).__init__(**kwargs) - - # override - def read(self, save_path, options=None): - self._before_load_hook(save_path) - logging.info('Ran before_load_hook.') - super(CheckpointWithHooks, self).read(save_path=save_path, options=options) diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/tf_utils.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/tf_utils.py deleted file mode 100644 index c8ec65d1fda5b7f53581a6770fcb9687eff47252..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/modeling/tf_utils.py +++ /dev/null @@ -1,216 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Common TF utilities.""" - -import six -import tensorflow as tf - -from tensorflow.python.util import deprecation -from official.modeling import activations - - -@deprecation.deprecated( - None, - "tf.keras.layers.Layer supports multiple positional args and kwargs as " - "input tensors. pack/unpack inputs to override __call__ is no longer " - "needed.") -def pack_inputs(inputs): - """Pack a list of `inputs` tensors to a tuple. - - Args: - inputs: a list of tensors. - - Returns: - a tuple of tensors. if any input is None, replace it with a special constant - tensor. - """ - inputs = tf.nest.flatten(inputs) - outputs = [] - for x in inputs: - if x is None: - outputs.append(tf.constant(0, shape=[], dtype=tf.int32)) - else: - outputs.append(x) - return tuple(outputs) - - -@deprecation.deprecated( - None, - "tf.keras.layers.Layer supports multiple positional args and kwargs as " - "input tensors. pack/unpack inputs to override __call__ is no longer " - "needed.") -def unpack_inputs(inputs): - """unpack a tuple of `inputs` tensors to a tuple. - - Args: - inputs: a list of tensors. - - Returns: - a tuple of tensors. if any input is a special constant tensor, replace it - with None. - """ - inputs = tf.nest.flatten(inputs) - outputs = [] - for x in inputs: - if is_special_none_tensor(x): - outputs.append(None) - else: - outputs.append(x) - x = tuple(outputs) - - # To trick the very pointless 'unbalanced-tuple-unpacking' pylint check - # from triggering. - if len(x) == 1: - return x[0] - return tuple(outputs) - - -def is_special_none_tensor(tensor): - """Checks if a tensor is a special None Tensor.""" - return tensor.shape.ndims == 0 and tensor.dtype == tf.int32 - - -def get_activation(identifier, use_keras_layer=False): - """Maps a identifier to a Python function, e.g., "relu" => `tf.nn.relu`. - - It checks string first and if it is one of customized activation not in TF, - the corresponding activation will be returned. For non-customized activation - names and callable identifiers, always fallback to tf.keras.activations.get. - - Prefers using keras layers when use_keras_layer=True. Now it only supports - 'relu', 'linear', 'identity', 'swish'. - - Args: - identifier: String name of the activation function or callable. - use_keras_layer: If True, use keras layer if identifier is allow-listed. - - Returns: - A Python function corresponding to the activation function or a keras - activation layer when use_keras_layer=True. - """ - if isinstance(identifier, six.string_types): - identifier = str(identifier).lower() - if use_keras_layer: - keras_layer_allowlist = { - "relu": "relu", - "linear": "linear", - "identity": "linear", - "swish": "swish", - "relu6": tf.nn.relu6, - } - if identifier in keras_layer_allowlist: - return tf.keras.layers.Activation(keras_layer_allowlist[identifier]) - name_to_fn = { - "gelu": activations.gelu, - "simple_swish": activations.simple_swish, - "hard_swish": activations.hard_swish, - "relu6": activations.relu6, - "hard_sigmoid": activations.hard_sigmoid, - "identity": activations.identity, - } - if identifier in name_to_fn: - return tf.keras.activations.get(name_to_fn[identifier]) - return tf.keras.activations.get(identifier) - - -def get_shape_list(tensor, expected_rank=None, name=None): - """Returns a list of the shape of tensor, preferring static dimensions. - - Args: - tensor: A tf.Tensor object to find the shape of. - expected_rank: (optional) int. The expected rank of `tensor`. If this is - specified and the `tensor` has a different rank, and exception will be - thrown. - name: Optional name of the tensor for the error message. - - Returns: - A list of dimensions of the shape of tensor. All static dimensions will - be returned as python integers, and dynamic dimensions will be returned - as tf.Tensor scalars. - """ - if expected_rank is not None: - assert_rank(tensor, expected_rank, name) - - shape = tensor.shape.as_list() - - non_static_indexes = [] - for (index, dim) in enumerate(shape): - if dim is None: - non_static_indexes.append(index) - - if not non_static_indexes: - return shape - - dyn_shape = tf.shape(tensor) - for index in non_static_indexes: - shape[index] = dyn_shape[index] - return shape - - -def assert_rank(tensor, expected_rank, name=None): - """Raises an exception if the tensor rank is not of the expected rank. - - Args: - tensor: A tf.Tensor to check the rank of. - expected_rank: Python integer or list of integers, expected rank. - name: Optional name of the tensor for the error message. - - Raises: - ValueError: If the expected shape doesn't match the actual shape. - """ - expected_rank_dict = {} - if isinstance(expected_rank, six.integer_types): - expected_rank_dict[expected_rank] = True - else: - for x in expected_rank: - expected_rank_dict[x] = True - - actual_rank = tensor.shape.ndims - if actual_rank not in expected_rank_dict: - raise ValueError( - "For the tensor `%s`, the actual tensor rank `%d` (shape = %s) is not " - "equal to the expected tensor rank `%s`" % - (name, actual_rank, str(tensor.shape), str(expected_rank))) - - -def safe_mean(losses): - """Computes a safe mean of the losses. - - Args: - losses: `Tensor` whose elements contain individual loss measurements. - - Returns: - A scalar representing the mean of `losses`. If `num_present` is zero, - then zero is returned. - """ - total = tf.reduce_sum(losses) - num_elements = tf.cast(tf.size(losses), dtype=losses.dtype) - return tf.math.divide_no_nan(total, num_elements) diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/README.md b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/README.md deleted file mode 100644 index dfa047b4ed3f0bea46e4b4db48578bd543f3e984..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/README.md +++ /dev/null @@ -1,62 +0,0 @@ -# TensorFlow NLP Modelling Toolkit - -This codebase provides a Natrual Language Processing modeling toolkit written in -[TF2](https://www.tensorflow.org/guide/effective_tf2). It allows researchers and -developers to reproduce state-of-the-art model results and train custom models -to experiment new research ideas. - -## Features - -* Reusable and modularized modeling building blocks -* State-of-the-art reproducible -* Easy to customize and extend -* End-to-end training -* Distributed trainable on both GPUs and TPUs - -## Major components - -### Libraries - -We provide modeling library to allow users to train custom models for new -research ideas. Detailed intructions can be found in READMEs in each folder. - -* [modeling/](modeling): modeling library that provides building blocks - (e.g.,Layers, Networks, and Models) that can be assembled into - transformer-based achitectures . -* [data/](data): binaries and utils for input preprocessing, tokenization, - etc. - -### State-of-the-Art models and examples - -We provide SoTA model implementations, pre-trained models, training and -evaluation examples, and command lines. Detail instructions can be found in the -READMEs for specific papers. - -1. [BERT](bert): [BERT: Pre-training of Deep Bidirectional Transformers for - Language Understanding](https://arxiv.org/abs/1810.04805) by Devlin et al., - 2018 -2. [ALBERT](albert): - [A Lite BERT for Self-supervised Learning of Language Representations](https://arxiv.org/abs/1909.11942) - by Lan et al., 2019 -3. [XLNet](xlnet): - [XLNet: Generalized Autoregressive Pretraining for Language Understanding](https://arxiv.org/abs/1906.08237) - by Yang et al., 2019 -4. [Transformer for translation](transformer): - [Attention Is All You Need](https://arxiv.org/abs/1706.03762) by Vaswani et - al., 2017 -5. [NHNet](nhnet): - [Generating Representative Headlines for News Stories](https://arxiv.org/abs/2001.09386) - by Gu et al, 2020 - -### Common Training Driver - -We provide a single common driver [train.py](train.py) to train above SoTA -models on popluar tasks. Please see [docs/train.md](docs/train.md) for -more details. - - -### Pre-trained models with checkpoints and TF-Hub - -We provide a large collection of baselines and checkpoints for NLP pre-trained -models. Please see [docs/pretrained_models.md](docs/pretrained_models.md) for -more details. diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/__init__.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/__init__.py deleted file mode 100644 index a11b1ff79e891e0fcee5bf824718e75d9103e28f..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/__init__.py +++ /dev/null @@ -1,30 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/README.md b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/README.md deleted file mode 100644 index 037ff0b1ff8c6ea22bcf692bb8f786320b7d2d48..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/README.md +++ /dev/null @@ -1,395 +0,0 @@ -# BERT (Bidirectional Encoder Representations from Transformers) - -**WARNING**: We are on the way to deprecate most of the code in this directory. -Please see -[this link](https://github.com/tensorflow/models/blob/master/official/nlp/docs/train.md) -for the new tutorial and use the new code in `nlp/modeling`. This README is -still correct for this legacy implementation. - -The academic paper which describes BERT in detail and provides full results on a -number of tasks can be found here: https://arxiv.org/abs/1810.04805. - -This repository contains TensorFlow 2.x implementation for BERT. - -## Contents - * [Contents](#contents) - * [Pre-trained Models](#pre-trained-models) - * [Restoring from Checkpoints](#restoring-from-checkpoints) - * [Set Up](#set-up) - * [Process Datasets](#process-datasets) - * [Fine-tuning with BERT](#fine-tuning-with-bert) - * [Cloud GPUs and TPUs](#cloud-gpus-and-tpus) - * [Sentence and Sentence-pair Classification Tasks](#sentence-and-sentence-pair-classification-tasks) - * [SQuAD 1.1](#squad-1.1) - - -## Pre-trained Models - -We released both checkpoints and tf.hub modules as the pretrained models for -fine-tuning. They are TF 2.x compatible and are converted from the checkpoints -released in TF 1.x official BERT repository -[google-research/bert](https://github.com/google-research/bert) -in order to keep consistent with BERT paper. - - -### Access to Pretrained Checkpoints - -Pretrained checkpoints can be found in the following links: - -**Note: We have switched BERT implementation -to use Keras functional-style networks in [nlp/modeling](../modeling). -The new checkpoints are:** - -* **[`BERT-Large, Uncased (Whole Word Masking)`](https://storage.googleapis.com/cloud-tpu-checkpoints/bert/keras_bert/wwm_uncased_L-24_H-1024_A-16.tar.gz)**: - 24-layer, 1024-hidden, 16-heads, 340M parameters -* **[`BERT-Large, Cased (Whole Word Masking)`](https://storage.googleapis.com/cloud-tpu-checkpoints/bert/keras_bert/wwm_cased_L-24_H-1024_A-16.tar.gz)**: - 24-layer, 1024-hidden, 16-heads, 340M parameters -* **[`BERT-Base, Uncased`](https://storage.googleapis.com/cloud-tpu-checkpoints/bert/keras_bert/uncased_L-12_H-768_A-12.tar.gz)**: - 12-layer, 768-hidden, 12-heads, 110M parameters -* **[`BERT-Large, Uncased`](https://storage.googleapis.com/cloud-tpu-checkpoints/bert/keras_bert/uncased_L-24_H-1024_A-16.tar.gz)**: - 24-layer, 1024-hidden, 16-heads, 340M parameters -* **[`BERT-Base, Cased`](https://storage.googleapis.com/cloud-tpu-checkpoints/bert/keras_bert/cased_L-12_H-768_A-12.tar.gz)**: - 12-layer, 768-hidden, 12-heads , 110M parameters -* **[`BERT-Large, Cased`](https://storage.googleapis.com/cloud-tpu-checkpoints/bert/keras_bert/cased_L-24_H-1024_A-16.tar.gz)**: - 24-layer, 1024-hidden, 16-heads, 340M parameters -* **[`BERT-Base, Multilingual Cased`](https://storage.googleapis.com/cloud-tpu-checkpoints/bert/keras_bert/multi_cased_L-12_H-768_A-12.tar.gz)**: - 104 languages, 12-layer, 768-hidden, 12-heads, 110M parameters - -We recommend to host checkpoints on Google Cloud storage buckets when you use -Cloud GPU/TPU. - -### Restoring from Checkpoints - -`tf.train.Checkpoint` is used to manage model checkpoints in TF 2. To restore -weights from provided pre-trained checkpoints, you can use the following code: - -```python -init_checkpoint='the pretrained model checkpoint path.' -model=tf.keras.Model() # Bert pre-trained model as feature extractor. -checkpoint = tf.train.Checkpoint(model=model) -checkpoint.restore(init_checkpoint) -``` - -Checkpoints featuring native serialized Keras models -(i.e. model.load()/load_weights()) will be available soon. - -### Access to Pretrained hub modules. - -Pretrained tf.hub modules in TF 2.x SavedModel format can be found in the -following links: - -* **[`BERT-Large, Uncased (Whole Word Masking)`](https://tfhub.dev/tensorflow/bert_en_wwm_uncased_L-24_H-1024_A-16/)**: - 24-layer, 1024-hidden, 16-heads, 340M parameters -* **[`BERT-Large, Cased (Whole Word Masking)`](https://tfhub.dev/tensorflow/bert_en_wwm_cased_L-24_H-1024_A-16/)**: - 24-layer, 1024-hidden, 16-heads, 340M parameters -* **[`BERT-Base, Uncased`](https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/)**: - 12-layer, 768-hidden, 12-heads, 110M parameters -* **[`BERT-Large, Uncased`](https://tfhub.dev/tensorflow/bert_en_uncased_L-24_H-1024_A-16/)**: - 24-layer, 1024-hidden, 16-heads, 340M parameters -* **[`BERT-Base, Cased`](https://tfhub.dev/tensorflow/bert_en_cased_L-12_H-768_A-12/)**: - 12-layer, 768-hidden, 12-heads , 110M parameters -* **[`BERT-Large, Cased`](https://tfhub.dev/tensorflow/bert_en_cased_L-24_H-1024_A-16/)**: - 24-layer, 1024-hidden, 16-heads, 340M parameters -* **[`BERT-Base, Multilingual Cased`](https://tfhub.dev/tensorflow/bert_multi_cased_L-12_H-768_A-12/)**: - 104 languages, 12-layer, 768-hidden, 12-heads, 110M parameters -* **[`BERT-Base, Chinese`](https://tfhub.dev/tensorflow/bert_zh_L-12_H-768_A-12/)**: - Chinese Simplified and Traditional, 12-layer, 768-hidden, 12-heads, - 110M parameters - -## Set Up - -```shell -export PYTHONPATH="$PYTHONPATH:/path/to/models" -``` - -Install `tf-nightly` to get latest updates: - -```shell -pip install tf-nightly-gpu -``` - -With TPU, GPU support is not necessary. First, you need to create a `tf-nightly` -TPU with [ctpu tool](https://github.com/tensorflow/tpu/tree/master/tools/ctpu): - -```shell -ctpu up -name --tf-version=”nightly” -``` - -Second, you need to install TF 2 `tf-nightly` on your VM: - -```shell -pip install tf-nightly -``` - -## Process Datasets - -### Pre-training - -There is no change to generate pre-training data. Please use the script -[`../data/create_pretraining_data.py`](../data/create_pretraining_data.py) -which is essentially branched from [BERT research repo](https://github.com/google-research/bert) -to get processed pre-training data and it adapts to TF2 symbols and python3 -compatibility. - -Running the pre-training script requires an input and output directory, as well as a vocab file. Note that max_seq_length will need to match the sequence length parameter you specify when you run pre-training. - -Example shell script to call create_pretraining_data.py -``` -export WORKING_DIR='local disk or cloud location' -export BERT_DIR='local disk or cloud location' -python models/official/nlp/data/create_pretraining_data.py \ - --input_file=$WORKING_DIR/input/input.txt \ - --output_file=$WORKING_DIR/output/tf_examples.tfrecord \ - --vocab_file=$BERT_DIR/wwm_uncased_L-24_H-1024_A-16/vocab.txt \ - --do_lower_case=True \ - --max_seq_length=512 \ - --max_predictions_per_seq=76 \ - --masked_lm_prob=0.15 \ - --random_seed=12345 \ - --dupe_factor=5 -``` - -### Fine-tuning - -To prepare the fine-tuning data for final model training, use the -[`../data/create_finetuning_data.py`](../data/create_finetuning_data.py) script. -Resulting datasets in `tf_record` format and training meta data should be later -passed to training or evaluation scripts. The task-specific arguments are -described in following sections: - -* GLUE - -Users can download the -[GLUE data](https://gluebenchmark.com/tasks) by running -[this script](https://gist.github.com/W4ngatang/60c2bdb54d156a41194446737ce03e2e) -and unpack it to some directory `$GLUE_DIR`. -Also, users can download [Pretrained Checkpoint](#access-to-pretrained-checkpoints) and locate on some directory `$BERT_DIR` instead of using checkpoints on Google Cloud Storage. - -```shell -export GLUE_DIR=~/glue -export BERT_DIR=gs://cloud-tpu-checkpoints/bert/keras_bert/uncased_L-24_H-1024_A-16 - -export TASK_NAME=MNLI -export OUTPUT_DIR=gs://some_bucket/datasets -python ../data/create_finetuning_data.py \ - --input_data_dir=${GLUE_DIR}/${TASK_NAME}/ \ - --vocab_file=${BERT_DIR}/vocab.txt \ - --train_data_output_path=${OUTPUT_DIR}/${TASK_NAME}_train.tf_record \ - --eval_data_output_path=${OUTPUT_DIR}/${TASK_NAME}_eval.tf_record \ - --meta_data_file_path=${OUTPUT_DIR}/${TASK_NAME}_meta_data \ - --fine_tuning_task_type=classification --max_seq_length=128 \ - --classification_task_name=${TASK_NAME} -``` - -* SQUAD - -The [SQuAD website](https://rajpurkar.github.io/SQuAD-explorer/) contains -detailed information about the SQuAD datasets and evaluation. - -The necessary files can be found here: - -* [train-v1.1.json](https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v1.1.json) -* [dev-v1.1.json](https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v1.1.json) -* [evaluate-v1.1.py](https://github.com/allenai/bi-att-flow/blob/master/squad/evaluate-v1.1.py) -* [train-v2.0.json](https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v2.0.json) -* [dev-v2.0.json](https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v2.0.json) -* [evaluate-v2.0.py](https://worksheets.codalab.org/rest/bundles/0x6b567e1cf2e041ec80d7098f031c5c9e/contents/blob/) - -```shell -export SQUAD_DIR=~/squad -export SQUAD_VERSION=v1.1 -export BERT_DIR=gs://cloud-tpu-checkpoints/bert/keras_bert/uncased_L-24_H-1024_A-16 -export OUTPUT_DIR=gs://some_bucket/datasets - -python ../data/create_finetuning_data.py \ - --squad_data_file=${SQUAD_DIR}/train-${SQUAD_VERSION}.json \ - --vocab_file=${BERT_DIR}/vocab.txt \ - --train_data_output_path=${OUTPUT_DIR}/squad_${SQUAD_VERSION}_train.tf_record \ - --meta_data_file_path=${OUTPUT_DIR}/squad_${SQUAD_VERSION}_meta_data \ - --fine_tuning_task_type=squad --max_seq_length=384 -``` - -Note: To create fine-tuning data with SQUAD 2.0, you need to add flag `--version_2_with_negative=True`. - -## Fine-tuning with BERT - -### Cloud GPUs and TPUs - -* Cloud Storage - -The unzipped pre-trained model files can also be found in the Google Cloud -Storage folder `gs://cloud-tpu-checkpoints/bert/keras_bert`. For example: - -```shell -export BERT_DIR=gs://cloud-tpu-checkpoints/bert/keras_bert/uncased_L-24_H-1024_A-16 -export MODEL_DIR=gs://some_bucket/my_output_dir -``` - -Currently, users are able to access to `tf-nightly` TPUs and the following TPU -script should run with `tf-nightly`. - -* GPU -> TPU - -Just add the following flags to `run_classifier.py` or `run_squad.py`: - -```shell - --distribution_strategy=tpu - --tpu=grpc://${TPU_IP_ADDRESS}:8470 -``` - -### Sentence and Sentence-pair Classification Tasks - -This example code fine-tunes `BERT-Large` on the Microsoft Research Paraphrase -Corpus (MRPC) corpus, which only contains 3,600 examples and can fine-tune in a -few minutes on most GPUs. - -We use the `BERT-Large` (uncased_L-24_H-1024_A-16) as an example throughout the -workflow. -For GPU memory of 16GB or smaller, you may try to use `BERT-Base` -(uncased_L-12_H-768_A-12). - -```shell -export BERT_DIR=gs://cloud-tpu-checkpoints/bert/keras_bert/uncased_L-24_H-1024_A-16 -export MODEL_DIR=gs://some_bucket/my_output_dir -export GLUE_DIR=gs://some_bucket/datasets -export TASK=MRPC - -python run_classifier.py \ - --mode='train_and_eval' \ - --input_meta_data_path=${GLUE_DIR}/${TASK}_meta_data \ - --train_data_path=${GLUE_DIR}/${TASK}_train.tf_record \ - --eval_data_path=${GLUE_DIR}/${TASK}_eval.tf_record \ - --bert_config_file=${BERT_DIR}/bert_config.json \ - --init_checkpoint=${BERT_DIR}/bert_model.ckpt \ - --train_batch_size=4 \ - --eval_batch_size=4 \ - --steps_per_loop=1 \ - --learning_rate=2e-5 \ - --num_train_epochs=3 \ - --model_dir=${MODEL_DIR} \ - --distribution_strategy=mirrored -``` - -Alternatively, instead of specifying `init_checkpoint`, you can specify -`hub_module_url` to employ a pretraind BERT hub module, e.g., -` --hub_module_url=https://tfhub.dev/tensorflow/bert_en_uncased_L-24_H-1024_A-16/1`. - -After training a model, to get predictions from the classifier, you can set the -`--mode=predict` and offer the test set tfrecords to `--eval_data_path`. -Output will be created in file called test_results.tsv in the output folder. -Each line will contain output for each sample, columns are the class -probabilities. - -```shell -python run_classifier.py \ - --mode='predict' \ - --input_meta_data_path=${GLUE_DIR}/${TASK}_meta_data \ - --eval_data_path=${GLUE_DIR}/${TASK}_eval.tf_record \ - --bert_config_file=${BERT_DIR}/bert_config.json \ - --eval_batch_size=4 \ - --model_dir=${MODEL_DIR} \ - --distribution_strategy=mirrored -``` - -To use TPU, you only need to switch distribution strategy type to `tpu` with TPU -information and use remote storage for model checkpoints. - -```shell -export BERT_DIR=gs://cloud-tpu-checkpoints/bert/keras_bert/uncased_L-24_H-1024_A-16 -export TPU_IP_ADDRESS='???' -export MODEL_DIR=gs://some_bucket/my_output_dir -export GLUE_DIR=gs://some_bucket/datasets -export TASK=MRPC - -python run_classifier.py \ - --mode='train_and_eval' \ - --input_meta_data_path=${GLUE_DIR}/${TASK}_meta_data \ - --train_data_path=${GLUE_DIR}/${TASK}_train.tf_record \ - --eval_data_path=${GLUE_DIR}/${TASK}_eval.tf_record \ - --bert_config_file=${BERT_DIR}/bert_config.json \ - --init_checkpoint=${BERT_DIR}/bert_model.ckpt \ - --train_batch_size=32 \ - --eval_batch_size=32 \ - --steps_per_loop=1000 \ - --learning_rate=2e-5 \ - --num_train_epochs=3 \ - --model_dir=${MODEL_DIR} \ - --distribution_strategy=tpu \ - --tpu=grpc://${TPU_IP_ADDRESS}:8470 -``` - -Note that, we specify `steps_per_loop=1000` for TPU, because running a loop of -training steps inside a `tf.function` can significantly increase TPU utilization -and callbacks will not be called inside the loop. - -### SQuAD 1.1 - -The Stanford Question Answering Dataset (SQuAD) is a popular question answering -benchmark dataset. See more in [SQuAD website](https://rajpurkar.github.io/SQuAD-explorer/). - -We use the `BERT-Large` (uncased_L-24_H-1024_A-16) as an example throughout the -workflow. -For GPU memory of 16GB or smaller, you may try to use `BERT-Base` -(uncased_L-12_H-768_A-12). - -```shell -export BERT_DIR=gs://cloud-tpu-checkpoints/bert/keras_bert/uncased_L-24_H-1024_A-16 -export SQUAD_DIR=gs://some_bucket/datasets -export MODEL_DIR=gs://some_bucket/my_output_dir -export SQUAD_VERSION=v1.1 - -python run_squad.py \ - --input_meta_data_path=${SQUAD_DIR}/squad_${SQUAD_VERSION}_meta_data \ - --train_data_path=${SQUAD_DIR}/squad_${SQUAD_VERSION}_train.tf_record \ - --predict_file=${SQUAD_DIR}/dev-v1.1.json \ - --vocab_file=${BERT_DIR}/vocab.txt \ - --bert_config_file=${BERT_DIR}/bert_config.json \ - --init_checkpoint=${BERT_DIR}/bert_model.ckpt \ - --train_batch_size=4 \ - --predict_batch_size=4 \ - --learning_rate=8e-5 \ - --num_train_epochs=2 \ - --model_dir=${MODEL_DIR} \ - --distribution_strategy=mirrored -``` - -Similarily, you can replace `init_checkpoint` FLAG with `hub_module_url` to -specify a hub module path. - -`run_squad.py` writes the prediction for `--predict_file` by default. If you set -the `--model=predict` and offer the SQuAD test data, the scripts will generate -the prediction json file. - -To use TPU, you need switch distribution strategy type to `tpu` with TPU -information. - -```shell -export BERT_DIR=gs://cloud-tpu-checkpoints/bert/keras_bert/uncased_L-24_H-1024_A-16 -export TPU_IP_ADDRESS='???' -export MODEL_DIR=gs://some_bucket/my_output_dir -export SQUAD_DIR=gs://some_bucket/datasets -export SQUAD_VERSION=v1.1 - -python run_squad.py \ - --input_meta_data_path=${SQUAD_DIR}/squad_${SQUAD_VERSION}_meta_data \ - --train_data_path=${SQUAD_DIR}/squad_${SQUAD_VERSION}_train.tf_record \ - --predict_file=${SQUAD_DIR}/dev-v1.1.json \ - --vocab_file=${BERT_DIR}/vocab.txt \ - --bert_config_file=${BERT_DIR}/bert_config.json \ - --init_checkpoint=${BERT_DIR}/bert_model.ckpt \ - --train_batch_size=32 \ - --learning_rate=8e-5 \ - --num_train_epochs=2 \ - --model_dir=${MODEL_DIR} \ - --distribution_strategy=tpu \ - --tpu=grpc://${TPU_IP_ADDRESS}:8470 -``` - -The dev set predictions will be saved into a file called predictions.json in the -model_dir: - -```shell -python $SQUAD_DIR/evaluate-v1.1.py $SQUAD_DIR/dev-v1.1.json ./squad/predictions.json -``` - - diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/__init__.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/__init__.py deleted file mode 100644 index 3ef7bb85ba5f722a4f34e90623470d5a45af3aa4..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/__init__.py +++ /dev/null @@ -1,31 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - - diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/bert_cloud_tpu.md b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/bert_cloud_tpu.md deleted file mode 100644 index baf6f9bdc0c155cb53b30cea5f404aa166c3a2c6..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/bert_cloud_tpu.md +++ /dev/null @@ -1,110 +0,0 @@ -# BERT FineTuning with Cloud TPU: Sentence and Sentence-Pair Classification Tasks (TF 2.1) -This tutorial shows you how to train the Bidirectional Encoder Representations from Transformers (BERT) model on Cloud TPU. - - -## Set up Cloud Storage and Compute Engine VM -1. [Open a cloud shell window](https://console.cloud.google.com/?cloudshell=true&_ga=2.11844148.-1612541229.1552429951) -2. Create a variable for the project's id: -``` -export PROJECT_ID=your-project_id -``` -3. Configure `gcloud` command-line tool to use the project where you want to create Cloud TPU. -``` -gcloud config set project ${PROJECT_ID} -``` -4. Create a Cloud Storage bucket using the following command: -``` -gsutil mb -p ${PROJECT_ID} -c standard -l europe-west4 -b on gs://your-bucket-name -``` -This Cloud Storage bucket stores the data you use to train your model and the training results. -5. Launch a Compute Engine VM and Cloud TPU using the ctpu up command. -``` -ctpu up --tpu-size=v3-8 \ - --machine-type=n1-standard-8 \ - --zone=europe-west4-a \ - --tf-version=2.1 [optional flags: --project, --name] -``` -6. The configuration you specified appears. Enter y to approve or n to cancel. -7. When the ctpu up command has finished executing, verify that your shell prompt has changed from username@project to username@tpuname. This change shows that you are now logged into your Compute Engine VM. -``` -gcloud compute ssh vm-name --zone=europe-west4-a -(vm)$ export TPU_NAME=vm-name -``` -As you continue these instructions, run each command that begins with `(vm)$` in your VM session window. - -## Prepare the Dataset -1. From your Compute Engine virtual machine (VM), install requirements.txt. -``` -(vm)$ cd /usr/share/models -(vm)$ sudo pip3 install -r official/requirements.txt -``` -2. Optional: download download_glue_data.py - -This tutorial uses the General Language Understanding Evaluation (GLUE) benchmark to evaluate and analyze the performance of the model. The GLUE data is provided for this tutorial at gs://cloud-tpu-checkpoints/bert/classification. - -## Define parameter values -Next, define several parameter values that are required when you train and evaluate your model: - -``` -(vm)$ export PYTHONPATH="$PYTHONPATH:/usr/share/tpu/models" -(vm)$ export STORAGE_BUCKET=gs://your-bucket-name -(vm)$ export BERT_BASE_DIR=gs://cloud-tpu-checkpoints/bert/keras_bert/uncased_L-24_H-1024_A-16 -(vm)$ export MODEL_DIR=${STORAGE_BUCKET}/bert-output -(vm)$ export GLUE_DIR=gs://cloud-tpu-checkpoints/bert/classification -(vm)$ export TASK=mnli -``` - -## Train the model -From your Compute Engine VM, run the following command. - -``` -(vm)$ python3 official/nlp/bert/run_classifier.py \ - --mode='train_and_eval' \ - --input_meta_data_path=${GLUE_DIR}/${TASK}_meta_data \ - --train_data_path=${GLUE_DIR}/${TASK}_train.tf_record \ - --eval_data_path=${GLUE_DIR}/${TASK}_eval.tf_record \ - --bert_config_file=$BERT_BASE_DIR/bert_config.json \ - --init_checkpoint=$BERT_BASE_DIR/bert_model.ckpt \ - --train_batch_size=32 \ - --eval_batch_size=32 \ - --learning_rate=2e-5 \ - --num_train_epochs=3 \ - --model_dir=${MODEL_DIR} \ - --distribution_strategy=tpu \ - --tpu=${TPU_NAME} -``` - -## Verify your results -The training takes approximately 1 hour on a v3-8 TPU. When script completes, you should see results similar to the following: -``` -Training Summary: -{'train_loss': 0.28142181038856506, -'last_train_metrics': 0.9467429518699646, -'eval_metrics': 0.8599063158035278, -'total_training_steps': 36813} -``` - -## Clean up -To avoid incurring charges to your GCP account for the resources used in this topic: -1. Disconnect from the Compute Engine VM: -``` -(vm)$ exit -``` -2. In your Cloud Shell, run ctpu delete with the --zone flag you used when you set up the Cloud TPU to delete your Compute Engine VM and your Cloud TPU: -``` -$ ctpu delete --zone=your-zone -``` -3. Run ctpu status specifying your zone to make sure you have no instances allocated to avoid unnecessary charges for TPU usage. The deletion might take several minutes. A response like the one below indicates there are no more allocated instances: -``` -$ ctpu status --zone=your-zone -``` -4. Run gsutil as shown, replacing your-bucket with the name of the Cloud Storage bucket you created for this tutorial: -``` -$ gsutil rm -r gs://your-bucket -``` - - - - - - diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/bert_models.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/bert_models.py deleted file mode 100644 index 8bfc10ac9064ec42126454f02666aeb9c7a88da6..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/bert_models.py +++ /dev/null @@ -1,382 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""BERT models that are compatible with TF 2.0.""" - -import gin -import tensorflow as tf -import tensorflow_hub as hub - -from official.modeling import tf_utils -from official.nlp.albert import configs as albert_configs -from official.nlp.bert import configs -from official.nlp.modeling import models -from official.nlp.modeling import networks - - -class BertPretrainLossAndMetricLayer(tf.keras.layers.Layer): - """Returns layer that computes custom loss and metrics for pretraining.""" - - def __init__(self, vocab_size, **kwargs): - super(BertPretrainLossAndMetricLayer, self).__init__(**kwargs) - self._vocab_size = vocab_size - self.config = { - 'vocab_size': vocab_size, - } - - def _add_metrics(self, lm_output, lm_labels, lm_label_weights, - lm_example_loss, sentence_output, sentence_labels, - next_sentence_loss): - """Adds metrics.""" - masked_lm_accuracy = tf.keras.metrics.sparse_categorical_accuracy( - lm_labels, lm_output) - numerator = tf.reduce_sum(masked_lm_accuracy * lm_label_weights) - denominator = tf.reduce_sum(lm_label_weights) + 1e-5 - masked_lm_accuracy = numerator / denominator - self.add_metric( - masked_lm_accuracy, name='masked_lm_accuracy', aggregation='mean') - - self.add_metric(lm_example_loss, name='lm_example_loss', aggregation='mean') - - if sentence_labels is not None: - next_sentence_accuracy = tf.keras.metrics.sparse_categorical_accuracy( - sentence_labels, sentence_output) - self.add_metric( - next_sentence_accuracy, - name='next_sentence_accuracy', - aggregation='mean') - - if next_sentence_loss is not None: - self.add_metric( - next_sentence_loss, name='next_sentence_loss', aggregation='mean') - - def call(self, - lm_output_logits, - sentence_output_logits, - lm_label_ids, - lm_label_weights, - sentence_labels=None): - """Implements call() for the layer.""" - lm_label_weights = tf.cast(lm_label_weights, tf.float32) - lm_output_logits = tf.cast(lm_output_logits, tf.float32) - - lm_prediction_losses = tf.keras.losses.sparse_categorical_crossentropy( - lm_label_ids, lm_output_logits, from_logits=True) - lm_numerator_loss = tf.reduce_sum(lm_prediction_losses * lm_label_weights) - lm_denominator_loss = tf.reduce_sum(lm_label_weights) - mask_label_loss = tf.math.divide_no_nan(lm_numerator_loss, - lm_denominator_loss) - - if sentence_labels is not None: - sentence_output_logits = tf.cast(sentence_output_logits, tf.float32) - sentence_loss = tf.keras.losses.sparse_categorical_crossentropy( - sentence_labels, sentence_output_logits, from_logits=True) - sentence_loss = tf.reduce_mean(sentence_loss) - loss = mask_label_loss + sentence_loss - else: - sentence_loss = None - loss = mask_label_loss - - batch_shape = tf.slice(tf.shape(lm_label_ids), [0], [1]) - # TODO(hongkuny): Avoids the hack and switches add_loss. - final_loss = tf.fill(batch_shape, loss) - - self._add_metrics(lm_output_logits, lm_label_ids, lm_label_weights, - mask_label_loss, sentence_output_logits, sentence_labels, - sentence_loss) - return final_loss - - -@gin.configurable -def get_transformer_encoder(bert_config, - sequence_length=None, - transformer_encoder_cls=None, - output_range=None): - """Gets a 'TransformerEncoder' object. - - Args: - bert_config: A 'modeling.BertConfig' or 'modeling.AlbertConfig' object. - sequence_length: [Deprecated]. - transformer_encoder_cls: A EncoderScaffold class. If it is None, uses the - default BERT encoder implementation. - output_range: the sequence output range, [0, output_range). Default setting - is to return the entire sequence output. - - Returns: - A encoder object. - """ - del sequence_length - if transformer_encoder_cls is not None: - # TODO(hongkuny): evaluate if it is better to put cfg definition in gin. - embedding_cfg = dict( - vocab_size=bert_config.vocab_size, - type_vocab_size=bert_config.type_vocab_size, - hidden_size=bert_config.hidden_size, - max_seq_length=bert_config.max_position_embeddings, - initializer=tf.keras.initializers.TruncatedNormal( - stddev=bert_config.initializer_range), - dropout_rate=bert_config.hidden_dropout_prob, - ) - hidden_cfg = dict( - num_attention_heads=bert_config.num_attention_heads, - intermediate_size=bert_config.intermediate_size, - intermediate_activation=tf_utils.get_activation(bert_config.hidden_act), - dropout_rate=bert_config.hidden_dropout_prob, - attention_dropout_rate=bert_config.attention_probs_dropout_prob, - kernel_initializer=tf.keras.initializers.TruncatedNormal( - stddev=bert_config.initializer_range), - ) - kwargs = dict( - embedding_cfg=embedding_cfg, - hidden_cfg=hidden_cfg, - num_hidden_instances=bert_config.num_hidden_layers, - pooled_output_dim=bert_config.hidden_size, - pooler_layer_initializer=tf.keras.initializers.TruncatedNormal( - stddev=bert_config.initializer_range)) - - # Relies on gin configuration to define the Transformer encoder arguments. - return transformer_encoder_cls(**kwargs) - - kwargs = dict( - vocab_size=bert_config.vocab_size, - hidden_size=bert_config.hidden_size, - num_layers=bert_config.num_hidden_layers, - num_attention_heads=bert_config.num_attention_heads, - intermediate_size=bert_config.intermediate_size, - activation=tf_utils.get_activation(bert_config.hidden_act), - dropout_rate=bert_config.hidden_dropout_prob, - attention_dropout_rate=bert_config.attention_probs_dropout_prob, - max_sequence_length=bert_config.max_position_embeddings, - type_vocab_size=bert_config.type_vocab_size, - embedding_width=bert_config.embedding_size, - initializer=tf.keras.initializers.TruncatedNormal( - stddev=bert_config.initializer_range)) - if isinstance(bert_config, albert_configs.AlbertConfig): - return networks.AlbertEncoder(**kwargs) - else: - assert isinstance(bert_config, configs.BertConfig) - kwargs['output_range'] = output_range - return networks.BertEncoder(**kwargs) - - -def pretrain_model(bert_config, - seq_length, - max_predictions_per_seq, - initializer=None, - use_next_sentence_label=True, - return_core_pretrainer_model=False): - """Returns model to be used for pre-training. - - Args: - bert_config: Configuration that defines the core BERT model. - seq_length: Maximum sequence length of the training data. - max_predictions_per_seq: Maximum number of tokens in sequence to mask out - and use for pretraining. - initializer: Initializer for weights in BertPretrainer. - use_next_sentence_label: Whether to use the next sentence label. - return_core_pretrainer_model: Whether to also return the `BertPretrainer` - object. - - Returns: - A Tuple of (1) Pretraining model, (2) core BERT submodel from which to - save weights after pretraining, and (3) optional core `BertPretrainer` - object if argument `return_core_pretrainer_model` is True. - """ - input_word_ids = tf.keras.layers.Input( - shape=(seq_length,), name='input_word_ids', dtype=tf.int32) - input_mask = tf.keras.layers.Input( - shape=(seq_length,), name='input_mask', dtype=tf.int32) - input_type_ids = tf.keras.layers.Input( - shape=(seq_length,), name='input_type_ids', dtype=tf.int32) - masked_lm_positions = tf.keras.layers.Input( - shape=(max_predictions_per_seq,), - name='masked_lm_positions', - dtype=tf.int32) - masked_lm_ids = tf.keras.layers.Input( - shape=(max_predictions_per_seq,), name='masked_lm_ids', dtype=tf.int32) - masked_lm_weights = tf.keras.layers.Input( - shape=(max_predictions_per_seq,), - name='masked_lm_weights', - dtype=tf.int32) - - if use_next_sentence_label: - next_sentence_labels = tf.keras.layers.Input( - shape=(1,), name='next_sentence_labels', dtype=tf.int32) - else: - next_sentence_labels = None - - transformer_encoder = get_transformer_encoder(bert_config, seq_length) - if initializer is None: - initializer = tf.keras.initializers.TruncatedNormal( - stddev=bert_config.initializer_range) - pretrainer_model = models.BertPretrainer( - network=transformer_encoder, - embedding_table=transformer_encoder.get_embedding_table(), - num_classes=2, # The next sentence prediction label has two classes. - activation=tf_utils.get_activation(bert_config.hidden_act), - num_token_predictions=max_predictions_per_seq, - initializer=initializer, - output='logits') - - outputs = pretrainer_model( - [input_word_ids, input_mask, input_type_ids, masked_lm_positions]) - lm_output = outputs['masked_lm'] - sentence_output = outputs['classification'] - pretrain_loss_layer = BertPretrainLossAndMetricLayer( - vocab_size=bert_config.vocab_size) - output_loss = pretrain_loss_layer(lm_output, sentence_output, masked_lm_ids, - masked_lm_weights, next_sentence_labels) - inputs = { - 'input_word_ids': input_word_ids, - 'input_mask': input_mask, - 'input_type_ids': input_type_ids, - 'masked_lm_positions': masked_lm_positions, - 'masked_lm_ids': masked_lm_ids, - 'masked_lm_weights': masked_lm_weights, - } - if use_next_sentence_label: - inputs['next_sentence_labels'] = next_sentence_labels - - keras_model = tf.keras.Model(inputs=inputs, outputs=output_loss) - if return_core_pretrainer_model: - return keras_model, transformer_encoder, pretrainer_model - else: - return keras_model, transformer_encoder - - -def squad_model(bert_config, - max_seq_length, - initializer=None, - hub_module_url=None, - hub_module_trainable=True): - """Returns BERT Squad model along with core BERT model to import weights. - - Args: - bert_config: BertConfig, the config defines the core Bert model. - max_seq_length: integer, the maximum input sequence length. - initializer: Initializer for the final dense layer in the span labeler. - Defaulted to TruncatedNormal initializer. - hub_module_url: TF-Hub path/url to Bert module. - hub_module_trainable: True to finetune layers in the hub module. - - Returns: - A tuple of (1) keras model that outputs start logits and end logits and - (2) the core BERT transformer encoder. - """ - if initializer is None: - initializer = tf.keras.initializers.TruncatedNormal( - stddev=bert_config.initializer_range) - if not hub_module_url: - bert_encoder = get_transformer_encoder(bert_config, max_seq_length) - return models.BertSpanLabeler( - network=bert_encoder, initializer=initializer), bert_encoder - - input_word_ids = tf.keras.layers.Input( - shape=(max_seq_length,), dtype=tf.int32, name='input_word_ids') - input_mask = tf.keras.layers.Input( - shape=(max_seq_length,), dtype=tf.int32, name='input_mask') - input_type_ids = tf.keras.layers.Input( - shape=(max_seq_length,), dtype=tf.int32, name='input_type_ids') - core_model = hub.KerasLayer(hub_module_url, trainable=hub_module_trainable) - pooled_output, sequence_output = core_model( - [input_word_ids, input_mask, input_type_ids]) - bert_encoder = tf.keras.Model( - inputs={ - 'input_word_ids': input_word_ids, - 'input_mask': input_mask, - 'input_type_ids': input_type_ids, - }, - outputs=[sequence_output, pooled_output], - name='core_model') - return models.BertSpanLabeler( - network=bert_encoder, initializer=initializer), bert_encoder - - -def classifier_model(bert_config, - num_labels, - max_seq_length=None, - final_layer_initializer=None, - hub_module_url=None, - hub_module_trainable=True): - """BERT classifier model in functional API style. - - Construct a Keras model for predicting `num_labels` outputs from an input with - maximum sequence length `max_seq_length`. - - Args: - bert_config: BertConfig or AlbertConfig, the config defines the core BERT or - ALBERT model. - num_labels: integer, the number of classes. - max_seq_length: integer, the maximum input sequence length. - final_layer_initializer: Initializer for final dense layer. Defaulted - TruncatedNormal initializer. - hub_module_url: TF-Hub path/url to Bert module. - hub_module_trainable: True to finetune layers in the hub module. - - Returns: - Combined prediction model (words, mask, type) -> (one-hot labels) - BERT sub-model (words, mask, type) -> (bert_outputs) - """ - if final_layer_initializer is not None: - initializer = final_layer_initializer - else: - initializer = tf.keras.initializers.TruncatedNormal( - stddev=bert_config.initializer_range) - - if not hub_module_url: - bert_encoder = get_transformer_encoder( - bert_config, max_seq_length, output_range=1) - return models.BertClassifier( - bert_encoder, - num_classes=num_labels, - dropout_rate=bert_config.hidden_dropout_prob, - initializer=initializer), bert_encoder - - input_word_ids = tf.keras.layers.Input( - shape=(max_seq_length,), dtype=tf.int32, name='input_word_ids') - input_mask = tf.keras.layers.Input( - shape=(max_seq_length,), dtype=tf.int32, name='input_mask') - input_type_ids = tf.keras.layers.Input( - shape=(max_seq_length,), dtype=tf.int32, name='input_type_ids') - bert_model = hub.KerasLayer(hub_module_url, trainable=hub_module_trainable) - pooled_output, _ = bert_model([input_word_ids, input_mask, input_type_ids]) - output = tf.keras.layers.Dropout(rate=bert_config.hidden_dropout_prob)( - pooled_output) - - output = tf.keras.layers.Dense( - num_labels, kernel_initializer=initializer, name='output')( - output) - return tf.keras.Model( - inputs={ - 'input_word_ids': input_word_ids, - 'input_mask': input_mask, - 'input_type_ids': input_type_ids - }, - outputs=output), bert_model diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/bert_models_test.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/bert_models_test.py deleted file mode 100644 index 03ee8abd238682da110fb7d3625e1754d4c85248..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/bert_models_test.py +++ /dev/null @@ -1,122 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import tensorflow as tf - -from official.nlp.bert import bert_models -from official.nlp.bert import configs as bert_configs -from official.nlp.modeling import networks - - -class BertModelsTest(tf.test.TestCase): - - def setUp(self): - super(BertModelsTest, self).setUp() - self._bert_test_config = bert_configs.BertConfig( - attention_probs_dropout_prob=0.0, - hidden_act='gelu', - hidden_dropout_prob=0.0, - hidden_size=16, - initializer_range=0.02, - intermediate_size=32, - max_position_embeddings=128, - num_attention_heads=2, - num_hidden_layers=2, - type_vocab_size=2, - vocab_size=30522) - - def test_pretrain_model(self): - model, encoder = bert_models.pretrain_model( - self._bert_test_config, - seq_length=5, - max_predictions_per_seq=2, - initializer=None, - use_next_sentence_label=True) - self.assertIsInstance(model, tf.keras.Model) - self.assertIsInstance(encoder, networks.BertEncoder) - - # model has one scalar output: loss value. - self.assertEqual(model.output.shape.as_list(), [ - None, - ]) - - # Expect two output from encoder: sequence and classification output. - self.assertIsInstance(encoder.output, list) - self.assertLen(encoder.output, 2) - # shape should be [batch size, hidden_size] - self.assertEqual(encoder.output[1].shape.as_list(), [None, 16]) - - def test_squad_model(self): - model, core_model = bert_models.squad_model( - self._bert_test_config, - max_seq_length=5, - initializer=None, - hub_module_url=None, - hub_module_trainable=None) - self.assertIsInstance(model, tf.keras.Model) - self.assertIsInstance(core_model, tf.keras.Model) - - # Expect two output from model: start positions and end positions - self.assertIsInstance(model.output, list) - self.assertLen(model.output, 2) - - # Expect two output from core_model: sequence and classification output. - self.assertIsInstance(core_model.output, list) - self.assertLen(core_model.output, 2) - # shape should be [batch size, None, hidden_size] - self.assertEqual(core_model.output[0].shape.as_list(), [None, None, 16]) - # shape should be [batch size, hidden_size] - self.assertEqual(core_model.output[1].shape.as_list(), [None, 16]) - - def test_classifier_model(self): - model, core_model = bert_models.classifier_model( - self._bert_test_config, - num_labels=3, - max_seq_length=5, - final_layer_initializer=None, - hub_module_url=None, - hub_module_trainable=None) - self.assertIsInstance(model, tf.keras.Model) - self.assertIsInstance(core_model, tf.keras.Model) - - # model has one classification output with num_labels=3. - self.assertEqual(model.output.shape.as_list(), [None, 3]) - - # Expect two output from core_model: sequence and classification output. - self.assertIsInstance(core_model.output, list) - self.assertLen(core_model.output, 2) - # shape should be [batch size, None, hidden_size] - self.assertEqual(core_model.output[0].shape.as_list(), [None, None, 16]) - # shape should be [batch size, hidden_size] - self.assertEqual(core_model.output[1].shape.as_list(), [None, 16]) - - -if __name__ == '__main__': - tf.test.main() diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/common_flags.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/common_flags.py deleted file mode 100644 index c7242864721734f885384c52dbb0e4bb4bba8b97..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/common_flags.py +++ /dev/null @@ -1,145 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Defining common flags used across all BERT models/applications.""" - -from absl import flags -import tensorflow as tf - -from official.utils import hyperparams_flags -from official.utils.flags import core as flags_core - - -def define_common_bert_flags(): - """Define common flags for BERT tasks.""" - flags_core.define_base( - data_dir=False, - model_dir=True, - clean=False, - train_epochs=False, - epochs_between_evals=False, - stop_threshold=False, - batch_size=False, - num_gpu=True, - export_dir=False, - distribution_strategy=True, - run_eagerly=True) - flags_core.define_distribution() - flags.DEFINE_string('bert_config_file', None, - 'Bert configuration file to define core bert layers.') - flags.DEFINE_string( - 'model_export_path', None, - 'Path to the directory, where trainined model will be ' - 'exported.') - flags.DEFINE_string('tpu', '', 'TPU address to connect to.') - flags.DEFINE_string( - 'init_checkpoint', None, - 'Initial checkpoint (usually from a pre-trained BERT model).') - flags.DEFINE_integer('num_train_epochs', 3, - 'Total number of training epochs to perform.') - flags.DEFINE_integer( - 'steps_per_loop', None, - 'Number of steps per graph-mode loop. Only training step ' - 'happens inside the loop. Callbacks will not be called ' - 'inside. If not set the value will be configured depending on the ' - 'devices available.') - flags.DEFINE_float('learning_rate', 5e-5, - 'The initial learning rate for Adam.') - flags.DEFINE_float('end_lr', 0.0, - 'The end learning rate for learning rate decay.') - flags.DEFINE_string('optimizer_type', 'adamw', - 'The type of optimizer to use for training (adamw|lamb)') - flags.DEFINE_boolean( - 'scale_loss', False, - 'Whether to divide the loss by number of replica inside the per-replica ' - 'loss function.') - flags.DEFINE_boolean( - 'use_keras_compile_fit', False, - 'If True, uses Keras compile/fit() API for training logic. Otherwise ' - 'use custom training loop.') - flags.DEFINE_string( - 'hub_module_url', None, 'TF-Hub path/url to Bert module. ' - 'If specified, init_checkpoint flag should not be used.') - flags.DEFINE_bool('hub_module_trainable', True, - 'True to make keras layers in the hub module trainable.') - flags.DEFINE_string( - 'sub_model_export_name', None, - 'If set, `sub_model` checkpoints are exported into ' - 'FLAGS.model_dir/FLAGS.sub_model_export_name.') - flags.DEFINE_bool('explicit_allreduce', False, - 'True to use explicit allreduce instead of the implicit ' - 'allreduce in optimizer.apply_gradients(). If fp16 mixed ' - 'precision training is used, this also enables allreduce ' - 'gradients in fp16.') - flags.DEFINE_integer('allreduce_bytes_per_pack', 0, - 'Number of bytes of a gradient pack for allreduce. ' - 'Should be positive integer, if set to 0, all ' - 'gradients are in one pack. Breaking gradient into ' - 'packs could enable overlap between allreduce and ' - 'backprop computation. This flag only takes effect ' - 'when explicit_allreduce is set to True.') - - flags_core.define_log_steps() - - # Adds flags for mixed precision and multi-worker training. - flags_core.define_performance( - num_parallel_calls=False, - inter_op=False, - intra_op=False, - synthetic_data=False, - max_train_steps=False, - dtype=True, - loss_scale=True, - all_reduce_alg=True, - num_packs=False, - tf_gpu_thread_mode=True, - datasets_num_private_threads=True, - enable_xla=True, - fp16_implementation=True, - ) - - # Adds gin configuration flags. - hyperparams_flags.define_gin_flags() - - -def dtype(): - return flags_core.get_tf_dtype(flags.FLAGS) - - -def use_float16(): - return flags_core.get_tf_dtype(flags.FLAGS) == tf.float16 - - -def use_graph_rewrite(): - return flags.FLAGS.fp16_implementation == 'graph_rewrite' - - -def get_loss_scale(): - return flags_core.get_loss_scale(flags.FLAGS, default_for_fp16='dynamic') diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/configs.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/configs.py deleted file mode 100644 index 1cb128d15e1858f85819a7621308d33bf781fb07..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/configs.py +++ /dev/null @@ -1,120 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""The main BERT model and related functions.""" - -import copy -import json - -import six -import tensorflow as tf - - -class BertConfig(object): - """Configuration for `BertModel`.""" - - def __init__(self, - vocab_size, - hidden_size=768, - num_hidden_layers=12, - num_attention_heads=12, - intermediate_size=3072, - hidden_act="gelu", - hidden_dropout_prob=0.1, - attention_probs_dropout_prob=0.1, - max_position_embeddings=512, - type_vocab_size=16, - initializer_range=0.02, - embedding_size=None, - backward_compatible=True): - """Constructs BertConfig. - - Args: - vocab_size: Vocabulary size of `inputs_ids` in `BertModel`. - hidden_size: Size of the encoder layers and the pooler layer. - num_hidden_layers: Number of hidden layers in the Transformer encoder. - num_attention_heads: Number of attention heads for each attention layer in - the Transformer encoder. - intermediate_size: The size of the "intermediate" (i.e., feed-forward) - layer in the Transformer encoder. - hidden_act: The non-linear activation function (function or string) in the - encoder and pooler. - hidden_dropout_prob: The dropout probability for all fully connected - layers in the embeddings, encoder, and pooler. - attention_probs_dropout_prob: The dropout ratio for the attention - probabilities. - max_position_embeddings: The maximum sequence length that this model might - ever be used with. Typically set this to something large just in case - (e.g., 512 or 1024 or 2048). - type_vocab_size: The vocabulary size of the `token_type_ids` passed into - `BertModel`. - initializer_range: The stdev of the truncated_normal_initializer for - initializing all weight matrices. - embedding_size: (Optional) width of the factorized word embeddings. - backward_compatible: Boolean, whether the variables shape are compatible - with checkpoints converted from TF 1.x BERT. - """ - self.vocab_size = vocab_size - self.hidden_size = hidden_size - self.num_hidden_layers = num_hidden_layers - self.num_attention_heads = num_attention_heads - self.hidden_act = hidden_act - self.intermediate_size = intermediate_size - self.hidden_dropout_prob = hidden_dropout_prob - self.attention_probs_dropout_prob = attention_probs_dropout_prob - self.max_position_embeddings = max_position_embeddings - self.type_vocab_size = type_vocab_size - self.initializer_range = initializer_range - self.embedding_size = embedding_size - self.backward_compatible = backward_compatible - - @classmethod - def from_dict(cls, json_object): - """Constructs a `BertConfig` from a Python dictionary of parameters.""" - config = BertConfig(vocab_size=None) - for (key, value) in six.iteritems(json_object): - config.__dict__[key] = value - return config - - @classmethod - def from_json_file(cls, json_file): - """Constructs a `BertConfig` from a json file of parameters.""" - with tf.io.gfile.GFile(json_file, "r") as reader: - text = reader.read() - return cls.from_dict(json.loads(text)) - - def to_dict(self): - """Serializes this instance to a Python dictionary.""" - output = copy.deepcopy(self.__dict__) - return output - - def to_json_string(self): - """Serializes this instance to a JSON string.""" - return json.dumps(self.to_dict(), indent=2, sort_keys=True) + "\n" diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/export_tfhub.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/export_tfhub.py deleted file mode 100644 index 2d4fda1329e0b6a77713bbd94385f235bdbd47eb..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/export_tfhub.py +++ /dev/null @@ -1,155 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""A script to export BERT as a TF-Hub SavedModel. - -This script is **DEPRECATED** for exporting BERT encoder models; -see the error message in by main() for details. -""" - -from typing import Text - -# Import libraries -from absl import app -from absl import flags -from absl import logging -import tensorflow as tf -from official.nlp.bert import bert_models -from official.nlp.bert import configs - -FLAGS = flags.FLAGS - -flags.DEFINE_string("bert_config_file", None, - "Bert configuration file to define core bert layers.") -flags.DEFINE_string("model_checkpoint_path", None, - "File path to TF model checkpoint.") -flags.DEFINE_string("export_path", None, "TF-Hub SavedModel destination path.") -flags.DEFINE_string("vocab_file", None, - "The vocabulary file that the BERT model was trained on.") -flags.DEFINE_bool( - "do_lower_case", None, "Whether to lowercase. If None, " - "do_lower_case will be enabled if 'uncased' appears in the " - "name of --vocab_file") -flags.DEFINE_enum("model_type", "encoder", ["encoder", "squad"], - "What kind of BERT model to export.") - - -def create_bert_model(bert_config: configs.BertConfig) -> tf.keras.Model: - """Creates a BERT keras core model from BERT configuration. - - Args: - bert_config: A `BertConfig` to create the core model. - - Returns: - A keras model. - """ - # Adds input layers just as placeholders. - input_word_ids = tf.keras.layers.Input( - shape=(None,), dtype=tf.int32, name="input_word_ids") - input_mask = tf.keras.layers.Input( - shape=(None,), dtype=tf.int32, name="input_mask") - input_type_ids = tf.keras.layers.Input( - shape=(None,), dtype=tf.int32, name="input_type_ids") - transformer_encoder = bert_models.get_transformer_encoder( - bert_config, sequence_length=None) - sequence_output, pooled_output = transformer_encoder( - [input_word_ids, input_mask, input_type_ids]) - # To keep consistent with legacy hub modules, the outputs are - # "pooled_output" and "sequence_output". - return tf.keras.Model( - inputs=[input_word_ids, input_mask, input_type_ids], - outputs=[pooled_output, sequence_output]), transformer_encoder - - -def export_bert_tfhub(bert_config: configs.BertConfig, - model_checkpoint_path: Text, - hub_destination: Text, - vocab_file: Text, - do_lower_case: bool = None): - """Restores a tf.keras.Model and saves for TF-Hub.""" - # If do_lower_case is not explicit, default to checking whether "uncased" is - # in the vocab file name - if do_lower_case is None: - do_lower_case = "uncased" in vocab_file - logging.info("Using do_lower_case=%s based on name of vocab_file=%s", - do_lower_case, vocab_file) - core_model, encoder = create_bert_model(bert_config) - checkpoint = tf.train.Checkpoint( - model=encoder, # Legacy checkpoints. - encoder=encoder) - checkpoint.restore(model_checkpoint_path).assert_existing_objects_matched() - core_model.vocab_file = tf.saved_model.Asset(vocab_file) - core_model.do_lower_case = tf.Variable(do_lower_case, trainable=False) - core_model.save(hub_destination, include_optimizer=False, save_format="tf") - - -def export_bert_squad_tfhub(bert_config: configs.BertConfig, - model_checkpoint_path: Text, - hub_destination: Text, - vocab_file: Text, - do_lower_case: bool = None): - """Restores a tf.keras.Model for BERT with SQuAD and saves for TF-Hub.""" - # If do_lower_case is not explicit, default to checking whether "uncased" is - # in the vocab file name - if do_lower_case is None: - do_lower_case = "uncased" in vocab_file - logging.info("Using do_lower_case=%s based on name of vocab_file=%s", - do_lower_case, vocab_file) - span_labeling, _ = bert_models.squad_model(bert_config, max_seq_length=None) - checkpoint = tf.train.Checkpoint(model=span_labeling) - checkpoint.restore(model_checkpoint_path).assert_existing_objects_matched() - span_labeling.vocab_file = tf.saved_model.Asset(vocab_file) - span_labeling.do_lower_case = tf.Variable(do_lower_case, trainable=False) - span_labeling.save(hub_destination, include_optimizer=False, save_format="tf") - - -def main(_): - bert_config = configs.BertConfig.from_json_file(FLAGS.bert_config_file) - if FLAGS.model_type == "encoder": - deprecation_note = ( - "nlp/bert/export_tfhub is **DEPRECATED** for exporting BERT encoder " - "models. Please switch to nlp/tools/export_tfhub for exporting BERT " - "(and other) encoders with dict inputs/outputs conforming to " - "https://www.tensorflow.org/hub/common_saved_model_apis/text#transformer-encoders" - ) - logging.error(deprecation_note) - print("\n\nNOTICE:", deprecation_note, "\n") - export_bert_tfhub(bert_config, FLAGS.model_checkpoint_path, - FLAGS.export_path, FLAGS.vocab_file, FLAGS.do_lower_case) - elif FLAGS.model_type == "squad": - export_bert_squad_tfhub(bert_config, FLAGS.model_checkpoint_path, - FLAGS.export_path, FLAGS.vocab_file, - FLAGS.do_lower_case) - else: - raise ValueError("Unsupported model_type %s." % FLAGS.model_type) - - -if __name__ == "__main__": - app.run(main) diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/export_tfhub_test.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/export_tfhub_test.py deleted file mode 100644 index 9a5c1f9ad36d1f756488544a237b0a5e16ae6a07..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/export_tfhub_test.py +++ /dev/null @@ -1,124 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Tests official.nlp.bert.export_tfhub.""" - -import os - -from absl.testing import parameterized -import numpy as np -import tensorflow as tf -import tensorflow_hub as hub - -from official.nlp.bert import configs -from official.nlp.bert import export_tfhub - - -class ExportTfhubTest(tf.test.TestCase, parameterized.TestCase): - - @parameterized.parameters("model", "encoder") - def test_export_tfhub(self, ckpt_key_name): - # Exports a savedmodel for TF-Hub - hidden_size = 16 - bert_config = configs.BertConfig( - vocab_size=100, - hidden_size=hidden_size, - intermediate_size=32, - max_position_embeddings=128, - num_attention_heads=2, - num_hidden_layers=1) - bert_model, encoder = export_tfhub.create_bert_model(bert_config) - model_checkpoint_dir = os.path.join(self.get_temp_dir(), "checkpoint") - checkpoint = tf.train.Checkpoint(**{ckpt_key_name: encoder}) - checkpoint.save(os.path.join(model_checkpoint_dir, "test")) - model_checkpoint_path = tf.train.latest_checkpoint(model_checkpoint_dir) - - vocab_file = os.path.join(self.get_temp_dir(), "uncased_vocab.txt") - with tf.io.gfile.GFile(vocab_file, "w") as f: - f.write("dummy content") - - hub_destination = os.path.join(self.get_temp_dir(), "hub") - export_tfhub.export_bert_tfhub(bert_config, model_checkpoint_path, - hub_destination, vocab_file) - - # Restores a hub KerasLayer. - hub_layer = hub.KerasLayer(hub_destination, trainable=True) - - if hasattr(hub_layer, "resolved_object"): - # Checks meta attributes. - self.assertTrue(hub_layer.resolved_object.do_lower_case.numpy()) - with tf.io.gfile.GFile( - hub_layer.resolved_object.vocab_file.asset_path.numpy()) as f: - self.assertEqual("dummy content", f.read()) - # Checks the hub KerasLayer. - for source_weight, hub_weight in zip(bert_model.trainable_weights, - hub_layer.trainable_weights): - self.assertAllClose(source_weight.numpy(), hub_weight.numpy()) - - seq_length = 10 - dummy_ids = np.zeros((2, seq_length), dtype=np.int32) - hub_outputs = hub_layer([dummy_ids, dummy_ids, dummy_ids]) - source_outputs = bert_model([dummy_ids, dummy_ids, dummy_ids]) - - # The outputs of hub module are "pooled_output" and "sequence_output", - # while the outputs of encoder is in reversed order, i.e., - # "sequence_output" and "pooled_output". - encoder_outputs = reversed(encoder([dummy_ids, dummy_ids, dummy_ids])) - self.assertEqual(hub_outputs[0].shape, (2, hidden_size)) - self.assertEqual(hub_outputs[1].shape, (2, seq_length, hidden_size)) - for source_output, hub_output, encoder_output in zip( - source_outputs, hub_outputs, encoder_outputs): - self.assertAllClose(source_output.numpy(), hub_output.numpy()) - self.assertAllClose(source_output.numpy(), encoder_output.numpy()) - - # Test that training=True makes a difference (activates dropout). - def _dropout_mean_stddev(training, num_runs=20): - input_ids = np.array([[14, 12, 42, 95, 99]], np.int32) - inputs = [input_ids, np.ones_like(input_ids), np.zeros_like(input_ids)] - outputs = np.concatenate( - [hub_layer(inputs, training=training)[0] for _ in range(num_runs)]) - return np.mean(np.std(outputs, axis=0)) - - self.assertLess(_dropout_mean_stddev(training=False), 1e-6) - self.assertGreater(_dropout_mean_stddev(training=True), 1e-3) - - # Test propagation of seq_length in shape inference. - input_word_ids = tf.keras.layers.Input(shape=(seq_length,), dtype=tf.int32) - input_mask = tf.keras.layers.Input(shape=(seq_length,), dtype=tf.int32) - input_type_ids = tf.keras.layers.Input(shape=(seq_length,), dtype=tf.int32) - pooled_output, sequence_output = hub_layer( - [input_word_ids, input_mask, input_type_ids]) - self.assertEqual(pooled_output.shape.as_list(), [None, hidden_size]) - self.assertEqual(sequence_output.shape.as_list(), - [None, seq_length, hidden_size]) - - -if __name__ == "__main__": - tf.test.main() diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/input_pipeline.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/input_pipeline.py deleted file mode 100644 index 6e0d05afa42957fb8a2a35fa77cf77426d24fad8..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/input_pipeline.py +++ /dev/null @@ -1,318 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""BERT model input pipelines.""" - -import tensorflow as tf - - -def decode_record(record, name_to_features): - """Decodes a record to a TensorFlow example.""" - example = tf.io.parse_single_example(record, name_to_features) - - # tf.Example only supports tf.int64, but the TPU only supports tf.int32. - # So cast all int64 to int32. - for name in list(example.keys()): - t = example[name] - if t.dtype == tf.int64: - t = tf.cast(t, tf.int32) - example[name] = t - - return example - - -def single_file_dataset(input_file, name_to_features, num_samples=None): - """Creates a single-file dataset to be passed for BERT custom training.""" - # For training, we want a lot of parallel reading and shuffling. - # For eval, we want no shuffling and parallel reading doesn't matter. - d = tf.data.TFRecordDataset(input_file) - if num_samples: - d = d.take(num_samples) - d = d.map( - lambda record: decode_record(record, name_to_features), - num_parallel_calls=tf.data.experimental.AUTOTUNE) - - # When `input_file` is a path to a single file or a list - # containing a single path, disable auto sharding so that - # same input file is sent to all workers. - if isinstance(input_file, str) or len(input_file) == 1: - options = tf.data.Options() - options.experimental_distribute.auto_shard_policy = ( - tf.data.experimental.AutoShardPolicy.OFF) - d = d.with_options(options) - return d - - -def create_pretrain_dataset(input_patterns, - seq_length, - max_predictions_per_seq, - batch_size, - is_training=True, - input_pipeline_context=None, - use_next_sentence_label=True, - use_position_id=False, - output_fake_labels=True): - """Creates input dataset from (tf)records files for pretraining.""" - name_to_features = { - 'input_ids': - tf.io.FixedLenFeature([seq_length], tf.int64), - 'input_mask': - tf.io.FixedLenFeature([seq_length], tf.int64), - 'segment_ids': - tf.io.FixedLenFeature([seq_length], tf.int64), - 'masked_lm_positions': - tf.io.FixedLenFeature([max_predictions_per_seq], tf.int64), - 'masked_lm_ids': - tf.io.FixedLenFeature([max_predictions_per_seq], tf.int64), - 'masked_lm_weights': - tf.io.FixedLenFeature([max_predictions_per_seq], tf.float32), - } - if use_next_sentence_label: - name_to_features['next_sentence_labels'] = tf.io.FixedLenFeature([1], - tf.int64) - if use_position_id: - name_to_features['position_ids'] = tf.io.FixedLenFeature([seq_length], - tf.int64) - for input_pattern in input_patterns: - if not tf.io.gfile.glob(input_pattern): - raise ValueError('%s does not match any files.' % input_pattern) - - dataset = tf.data.Dataset.list_files(input_patterns, shuffle=is_training) - - if input_pipeline_context and input_pipeline_context.num_input_pipelines > 1: - dataset = dataset.shard(input_pipeline_context.num_input_pipelines, - input_pipeline_context.input_pipeline_id) - if is_training: - dataset = dataset.repeat() - - # We set shuffle buffer to exactly match total number of - # training files to ensure that training data is well shuffled. - input_files = [] - for input_pattern in input_patterns: - input_files.extend(tf.io.gfile.glob(input_pattern)) - dataset = dataset.shuffle(len(input_files)) - - # In parallel, create tf record dataset for each train files. - # cycle_length = 8 means that up to 8 files will be read and deserialized in - # parallel. You may want to increase this number if you have a large number of - # CPU cores. - dataset = dataset.interleave( - tf.data.TFRecordDataset, - cycle_length=8, - num_parallel_calls=tf.data.experimental.AUTOTUNE) - - if is_training: - dataset = dataset.shuffle(100) - - decode_fn = lambda record: decode_record(record, name_to_features) - dataset = dataset.map( - decode_fn, num_parallel_calls=tf.data.experimental.AUTOTUNE) - - def _select_data_from_record(record): - """Filter out features to use for pretraining.""" - x = { - 'input_word_ids': record['input_ids'], - 'input_mask': record['input_mask'], - 'input_type_ids': record['segment_ids'], - 'masked_lm_positions': record['masked_lm_positions'], - 'masked_lm_ids': record['masked_lm_ids'], - 'masked_lm_weights': record['masked_lm_weights'], - } - if use_next_sentence_label: - x['next_sentence_labels'] = record['next_sentence_labels'] - if use_position_id: - x['position_ids'] = record['position_ids'] - - # TODO(hongkuny): Remove the fake labels after migrating bert pretraining. - if output_fake_labels: - return (x, record['masked_lm_weights']) - else: - return x - - dataset = dataset.map( - _select_data_from_record, - num_parallel_calls=tf.data.experimental.AUTOTUNE) - dataset = dataset.batch(batch_size, drop_remainder=is_training) - dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE) - return dataset - - -def create_classifier_dataset(file_path, - seq_length, - batch_size, - is_training=True, - input_pipeline_context=None, - label_type=tf.int64, - include_sample_weights=False, - num_samples=None): - """Creates input dataset from (tf)records files for train/eval.""" - name_to_features = { - 'input_ids': tf.io.FixedLenFeature([seq_length], tf.int64), - 'input_mask': tf.io.FixedLenFeature([seq_length], tf.int64), - 'segment_ids': tf.io.FixedLenFeature([seq_length], tf.int64), - 'label_ids': tf.io.FixedLenFeature([], label_type), - } - if include_sample_weights: - name_to_features['weight'] = tf.io.FixedLenFeature([], tf.float32) - dataset = single_file_dataset(file_path, name_to_features, - num_samples=num_samples) - - # The dataset is always sharded by number of hosts. - # num_input_pipelines is the number of hosts rather than number of cores. - if input_pipeline_context and input_pipeline_context.num_input_pipelines > 1: - dataset = dataset.shard(input_pipeline_context.num_input_pipelines, - input_pipeline_context.input_pipeline_id) - - def _select_data_from_record(record): - x = { - 'input_word_ids': record['input_ids'], - 'input_mask': record['input_mask'], - 'input_type_ids': record['segment_ids'] - } - y = record['label_ids'] - if include_sample_weights: - w = record['weight'] - return (x, y, w) - return (x, y) - - if is_training: - dataset = dataset.shuffle(100) - dataset = dataset.repeat() - - dataset = dataset.map( - _select_data_from_record, - num_parallel_calls=tf.data.experimental.AUTOTUNE) - dataset = dataset.batch(batch_size, drop_remainder=is_training) - dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE) - return dataset - - -def create_squad_dataset(file_path, - seq_length, - batch_size, - is_training=True, - input_pipeline_context=None): - """Creates input dataset from (tf)records files for train/eval.""" - name_to_features = { - 'input_ids': tf.io.FixedLenFeature([seq_length], tf.int64), - 'input_mask': tf.io.FixedLenFeature([seq_length], tf.int64), - 'segment_ids': tf.io.FixedLenFeature([seq_length], tf.int64), - } - if is_training: - name_to_features['start_positions'] = tf.io.FixedLenFeature([], tf.int64) - name_to_features['end_positions'] = tf.io.FixedLenFeature([], tf.int64) - else: - name_to_features['unique_ids'] = tf.io.FixedLenFeature([], tf.int64) - - dataset = single_file_dataset(file_path, name_to_features) - - # The dataset is always sharded by number of hosts. - # num_input_pipelines is the number of hosts rather than number of cores. - if input_pipeline_context and input_pipeline_context.num_input_pipelines > 1: - dataset = dataset.shard(input_pipeline_context.num_input_pipelines, - input_pipeline_context.input_pipeline_id) - - def _select_data_from_record(record): - """Dispatches record to features and labels.""" - x, y = {}, {} - for name, tensor in record.items(): - if name in ('start_positions', 'end_positions'): - y[name] = tensor - elif name == 'input_ids': - x['input_word_ids'] = tensor - elif name == 'segment_ids': - x['input_type_ids'] = tensor - else: - x[name] = tensor - return (x, y) - - if is_training: - dataset = dataset.shuffle(100) - dataset = dataset.repeat() - - dataset = dataset.map( - _select_data_from_record, - num_parallel_calls=tf.data.experimental.AUTOTUNE) - dataset = dataset.batch(batch_size, drop_remainder=True) - dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE) - return dataset - - -def create_retrieval_dataset(file_path, - seq_length, - batch_size, - input_pipeline_context=None): - """Creates input dataset from (tf)records files for scoring.""" - name_to_features = { - 'input_ids': tf.io.FixedLenFeature([seq_length], tf.int64), - 'input_mask': tf.io.FixedLenFeature([seq_length], tf.int64), - 'segment_ids': tf.io.FixedLenFeature([seq_length], tf.int64), - 'example_id': tf.io.FixedLenFeature([1], tf.int64), - } - dataset = single_file_dataset(file_path, name_to_features) - - # The dataset is always sharded by number of hosts. - # num_input_pipelines is the number of hosts rather than number of cores. - if input_pipeline_context and input_pipeline_context.num_input_pipelines > 1: - dataset = dataset.shard(input_pipeline_context.num_input_pipelines, - input_pipeline_context.input_pipeline_id) - - def _select_data_from_record(record): - x = { - 'input_word_ids': record['input_ids'], - 'input_mask': record['input_mask'], - 'input_type_ids': record['segment_ids'] - } - y = record['example_id'] - return (x, y) - - dataset = dataset.map( - _select_data_from_record, - num_parallel_calls=tf.data.experimental.AUTOTUNE) - dataset = dataset.batch(batch_size, drop_remainder=False) - - def _pad_to_batch(x, y): - cur_size = tf.shape(y)[0] - pad_size = batch_size - cur_size - - pad_ids = tf.zeros(shape=[pad_size, seq_length], dtype=tf.int32) - for key in ('input_word_ids', 'input_mask', 'input_type_ids'): - x[key] = tf.concat([x[key], pad_ids], axis=0) - - pad_labels = -tf.ones(shape=[pad_size, 1], dtype=tf.int32) - y = tf.concat([y, pad_labels], axis=0) - return x, y - - dataset = dataset.map( - _pad_to_batch, - num_parallel_calls=tf.data.experimental.AUTOTUNE) - - dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE) - return dataset diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/model_saving_utils.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/model_saving_utils.py deleted file mode 100644 index 141ebb2c4e347c89b5dacc4ed6fb105a3e3b3017..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/model_saving_utils.py +++ /dev/null @@ -1,84 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Utilities to save models.""" - -import os - -from absl import logging -import tensorflow as tf -import typing - - -def export_bert_model(model_export_path: typing.Text, - model: tf.keras.Model, - checkpoint_dir: typing.Optional[typing.Text] = None, - restore_model_using_load_weights: bool = False) -> None: - """Export BERT model for serving which does not include the optimizer. - - Args: - model_export_path: Path to which exported model will be saved. - model: Keras model object to export. - checkpoint_dir: Path from which model weights will be loaded, if - specified. - restore_model_using_load_weights: Whether to use checkpoint.restore() API - for custom checkpoint or to use model.load_weights() API. There are 2 - different ways to save checkpoints. One is using tf.train.Checkpoint and - another is using Keras model.save_weights(). Custom training loop - implementation uses tf.train.Checkpoint API and Keras ModelCheckpoint - callback internally uses model.save_weights() API. Since these two API's - cannot be used toghether, model loading logic must be take into account - how model checkpoint was saved. - - Raises: - ValueError when either model_export_path or model is not specified. - """ - if not model_export_path: - raise ValueError('model_export_path must be specified.') - if not isinstance(model, tf.keras.Model): - raise ValueError('model must be a tf.keras.Model object.') - - if checkpoint_dir: - if restore_model_using_load_weights: - model_weight_path = os.path.join(checkpoint_dir, 'checkpoint') - assert tf.io.gfile.exists(model_weight_path) - model.load_weights(model_weight_path) - else: - checkpoint = tf.train.Checkpoint(model=model) - - # Restores the model from latest checkpoint. - latest_checkpoint_file = tf.train.latest_checkpoint(checkpoint_dir) - assert latest_checkpoint_file - logging.info('Checkpoint file %s found and restoring from ' - 'checkpoint', latest_checkpoint_file) - checkpoint.restore( - latest_checkpoint_file).assert_existing_objects_matched() - - model.save(model_export_path, include_optimizer=False, save_format='tf') diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/model_training_utils.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/model_training_utils.py deleted file mode 100644 index 189021ecb5e59446bb69bca6c5e17cbf05c4e47c..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/model_training_utils.py +++ /dev/null @@ -1,607 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""A light weight utilities to train NLP models.""" - -import json -import os -import tempfile -from absl import logging -import tensorflow as tf -from tensorflow.python.util import deprecation -from official.common import distribute_utils -from official.staging.training import grad_utils - -_SUMMARY_TXT = 'training_summary.txt' -_MIN_SUMMARY_STEPS = 10 - - -def _should_export_checkpoint(strategy): - return (not strategy) or strategy.extended.should_checkpoint - - -def _should_export_summary(strategy): - return (not strategy) or strategy.extended.should_save_summary - - -def _save_checkpoint(strategy, checkpoint, model_dir, checkpoint_prefix): - """Saves model to with provided checkpoint prefix.""" - - if _should_export_checkpoint(strategy): - checkpoint_path = os.path.join(model_dir, checkpoint_prefix) - saved_path = checkpoint.save(checkpoint_path) - logging.info('Saving model as TF checkpoint: %s', saved_path) - else: - # In multi worker training we need every worker to save checkpoint, because - # variables can trigger synchronization on read and synchronization needs - # all workers to participate. To avoid workers overriding each other we save - # to a temporary directory on non-chief workers. - tmp_dir = tempfile.mkdtemp() - checkpoint.save(os.path.join(tmp_dir, 'ckpt')) - tf.io.gfile.rmtree(tmp_dir) - return - - -def _get_input_iterator(input_fn, strategy): - """Returns distributed dataset iterator.""" - # When training with TPU pods, datasets needs to be cloned across - # workers. Since Dataset instance cannot be cloned in eager mode, we instead - # pass callable that returns a dataset. - if not callable(input_fn): - raise ValueError('`input_fn` should be a closure that returns a dataset.') - iterator = iter(strategy.distribute_datasets_from_function(input_fn)) - return iterator - - -def _float_metric_value(metric): - """Gets the value of a float-value keras metric.""" - return metric.result().numpy().astype(float) - - -def clip_by_global_norm_callback(grads_and_vars): - """Performs gradient clipping.""" - grads, variables = zip(*grads_and_vars) - (clipped_grads, _) = tf.clip_by_global_norm(grads, clip_norm=1.0) - return zip(clipped_grads, variables) - - -def steps_to_run(current_step, steps_per_epoch, steps_per_loop): - """Calculates steps to run on device.""" - if steps_per_loop <= 0: - raise ValueError('steps_per_loop should be positive integer.') - if steps_per_loop == 1: - return steps_per_loop - remainder_in_epoch = current_step % steps_per_epoch - if remainder_in_epoch != 0: - return min(steps_per_epoch - remainder_in_epoch, steps_per_loop) - else: - return steps_per_loop - - -def write_txt_summary(training_summary, summary_dir): - """Writes a summary text file to record stats.""" - if not tf.io.gfile.exists(summary_dir): - tf.io.gfile.mkdir(summary_dir) - summary_path = os.path.join(summary_dir, _SUMMARY_TXT) - with tf.io.gfile.GFile(summary_path, 'wb') as f: - logging.info('Training Summary: \n%s', str(training_summary)) - f.write(json.dumps(training_summary, indent=4)) - - -@deprecation.deprecated( - None, 'This function is deprecated and we do not expect adding new ' - 'functionalities. Please do not have your code depending ' - 'on this library.') -def run_customized_training_loop( - # pylint: disable=invalid-name - _sentinel=None, - # pylint: enable=invalid-name - strategy=None, - model_fn=None, - loss_fn=None, - scale_loss=True, - model_dir=None, - train_input_fn=None, - steps_per_epoch=None, - num_eval_per_epoch=1, - steps_per_loop=None, - epochs=1, - eval_input_fn=None, - eval_steps=None, - metric_fn=None, - init_checkpoint=None, - custom_callbacks=None, - run_eagerly=False, - sub_model_export_name=None, - explicit_allreduce=False, - pre_allreduce_callbacks=None, - post_allreduce_callbacks=None, - train_summary_interval=0, - allreduce_bytes_per_pack=0): - """Run BERT pretrain model training using low-level API. - - Args: - _sentinel: Used to prevent positional parameters. Internal, do not use. - strategy: Distribution strategy on which to run low level training loop. - model_fn: Function that returns a tuple (model, sub_model). Caller of this - function should add optimizer to the `model` via calling - `model.compile()` API or manually setting `model.optimizer` attribute. - Second element of the returned tuple(sub_model) is an optional sub model - to be used for initial checkpoint -- if provided. - loss_fn: Function with signature func(labels, logits) and returns a loss - tensor. - scale_loss: Whether to divide the raw loss by number of replicas before - gradients calculation. - model_dir: Model directory used during training for restoring/saving model - weights. - train_input_fn: Function that returns a tf.data.Dataset used for training. - steps_per_epoch: Number of steps to run per epoch. At the end of each - epoch, model checkpoint will be saved and evaluation will be conducted - if evaluation dataset is provided. - num_eval_per_epoch: Number of evaluations per epoch. - steps_per_loop: Number of steps per graph-mode loop. In order to reduce - communication in eager context, training logs are printed every - steps_per_loop. - epochs: Number of epochs to train. - eval_input_fn: Function that returns evaluation dataset. If none, - evaluation is skipped. - eval_steps: Number of steps to run evaluation. Required if `eval_input_fn` - is not none. - metric_fn: A metrics function that returns either a Keras Metric object or - a list of Keras Metric objects to record evaluation result using - evaluation dataset or with training dataset after every epoch. - init_checkpoint: Optional checkpoint to load to `sub_model` returned by - `model_fn`. - custom_callbacks: A list of Keras Callbacks objects to run during - training. More specifically, `on_train_begin(), on_train_end(), - on_batch_begin()`, `on_batch_end()`, `on_epoch_begin()`, - `on_epoch_end()` methods are invoked during training. Note that some - metrics may be missing from `logs`. - run_eagerly: Whether to run model training in pure eager execution. This - should be disable for TPUStrategy. - sub_model_export_name: If not None, will export `sub_model` returned by - `model_fn` into checkpoint files. The name of intermediate checkpoint - file is {sub_model_export_name}_step_{step}.ckpt and the last - checkpint's name is {sub_model_export_name}.ckpt; if None, `sub_model` - will not be exported as checkpoint. - explicit_allreduce: Whether to explicitly perform gradient allreduce, - instead of relying on implicit allreduce in optimizer.apply_gradients(). - default is False. For now, if training using FP16 mixed precision, - explicit allreduce will aggregate gradients in FP16 format. For TPU and - GPU training using FP32, explicit allreduce will aggregate gradients in - FP32 format. - pre_allreduce_callbacks: A list of callback functions that takes gradients - and model variables pairs as input, manipulate them, and returns a new - gradients and model variables paris. The callback functions will be - invoked in the list order and before gradients are allreduced. With - mixed precision training, the pre_allreduce_allbacks will be applied on - scaled_gradients. Default is no callbacks. Only used when - explicit_allreduce=True. - post_allreduce_callbacks: A list of callback functions that takes - gradients and model variables pairs as input, manipulate them, and - returns a new gradients and model variables paris. The callback - functions will be invoked in the list order and right before gradients - are applied to variables for updates. Default is no callbacks. Only used - when explicit_allreduce=True. - train_summary_interval: Step interval for training summaries. If the value - is a negative number, then training summaries are not enabled. - allreduce_bytes_per_pack: A non-negative integer. Breaks collective - operations into packs of certain size. If it's zero, all gradients are - in one pack. Breaking gradient into packs could enable overlap between - allreduce and backprop computation. This flag only takes effect when - explicit_allreduce is set to True.' - - Returns: - Trained model. - - Raises: - ValueError: (1) When model returned by `model_fn` does not have optimizer - attribute or when required parameters are set to none. (2) eval args are - not specified correctly. (3) metric_fn must be a callable if specified. - (4) sub_model_checkpoint_name is specified, but `sub_model` returned - by `model_fn` is None. - """ - - if _sentinel is not None: - raise ValueError('only call `run_customized_training_loop()` ' - 'with named arguments.') - - required_arguments = [ - strategy, model_fn, loss_fn, model_dir, steps_per_epoch, train_input_fn - ] - - steps_between_evals = int(steps_per_epoch / num_eval_per_epoch) - if [arg for arg in required_arguments if arg is None]: - raise ValueError('`strategy`, `model_fn`, `loss_fn`, `model_dir`, ' - '`steps_per_epoch` and `train_input_fn` are required ' - 'parameters.') - if not steps_per_loop: - if tf.config.list_logical_devices('TPU'): - # One can't fully utilize a TPU with steps_per_loop=1, so in this case - # default users to a more useful value. - steps_per_loop = min(1000, steps_between_evals) - else: - steps_per_loop = 1 - logging.info('steps_per_loop not specified. Using steps_per_loop=%d', - steps_per_loop) - if steps_per_loop > steps_between_evals: - logging.warning( - 'steps_per_loop: %d is specified to be greater than ' - ' steps_between_evals: %d, we will use steps_between_evals as' - ' steps_per_loop.', steps_per_loop, steps_between_evals) - steps_per_loop = steps_between_evals - assert tf.executing_eagerly() - - if run_eagerly: - if isinstance( - strategy, - (tf.distribute.TPUStrategy, tf.distribute.experimental.TPUStrategy)): - raise ValueError( - 'TPUStrategy should not run eagerly as it heavily relies on graph' - ' optimization for the distributed system.') - - if eval_input_fn and eval_steps is None: - raise ValueError( - '`eval_step` is required when `eval_input_fn ` is not none.') - if metric_fn and not callable(metric_fn): - raise ValueError( - 'if `metric_fn` is specified, metric_fn must be a callable.') - - total_training_steps = steps_per_epoch * epochs - train_iterator = _get_input_iterator(train_input_fn, strategy) - eval_loss_metric = tf.keras.metrics.Mean('training_loss', dtype=tf.float32) - - with distribute_utils.get_strategy_scope(strategy): - # To correctly place the model weights on accelerators, - # model and optimizer should be created in scope. - model, sub_model = model_fn() - if not hasattr(model, 'optimizer'): - raise ValueError('User should set optimizer attribute to model ' - 'inside `model_fn`.') - if sub_model_export_name and sub_model is None: - raise ValueError('sub_model_export_name is specified as %s, but ' - 'sub_model is None.' % sub_model_export_name) - - callback_list = tf.keras.callbacks.CallbackList( - callbacks=custom_callbacks, model=model) - - optimizer = model.optimizer - - if init_checkpoint: - logging.info( - 'Checkpoint file %s found and restoring from ' - 'initial checkpoint for core model.', init_checkpoint) - checkpoint = tf.train.Checkpoint(model=sub_model, encoder=sub_model) - checkpoint.restore(init_checkpoint).expect_partial().assert_existing_objects_matched() - logging.info('Loading from checkpoint file completed') - - train_loss_metric = tf.keras.metrics.Mean('training_loss', dtype=tf.float32) - eval_metrics = metric_fn() if metric_fn else [] - if not isinstance(eval_metrics, list): - eval_metrics = [eval_metrics] - # If evaluation is required, make a copy of metric as it will be used by - # both train and evaluation. - train_metrics = [ - metric.__class__.from_config(metric.get_config()) - for metric in eval_metrics - ] - - # Create summary writers - if _should_export_summary(strategy): - summary_dir = os.path.join(model_dir, 'summaries') - else: - # In multi worker training we need every worker to write summary, because - # variables can trigger synchronization on read and synchronization needs - # all workers to participate. - summary_dir = tempfile.mkdtemp() - eval_summary_writer = tf.summary.create_file_writer( - os.path.join(summary_dir, 'eval')) - last_summary_step = 0 - if steps_per_loop >= _MIN_SUMMARY_STEPS and train_summary_interval >= 0: - # Only writes summary when the stats are collected sufficiently over - # enough steps. - train_summary_writer = tf.summary.create_file_writer( - os.path.join(summary_dir, 'train')) - else: - train_summary_writer = tf.summary.create_noop_writer() - - # Collects training variables. - training_vars = model.trainable_variables - - def _replicated_step(inputs): - """Replicated training step.""" - - inputs, labels = inputs - with tf.GradientTape() as tape: - model_outputs = model(inputs, training=True) - loss = loss_fn(labels, model_outputs) - # Raw loss is used for reporting in metrics/logs. - raw_loss = loss - if scale_loss: - # Scales down the loss for gradients to be invariant from replicas. - loss = loss / strategy.num_replicas_in_sync - - if explicit_allreduce: - grad_utils.minimize_using_explicit_allreduce(tape, optimizer, loss, - training_vars, - pre_allreduce_callbacks, - post_allreduce_callbacks, - allreduce_bytes_per_pack) - else: - if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer): - with tape: - scaled_loss = optimizer.get_scaled_loss(loss) - scaled_grads = tape.gradient(scaled_loss, training_vars) - grads = optimizer.get_unscaled_gradients(scaled_grads) - else: - grads = tape.gradient(loss, training_vars) - optimizer.apply_gradients(zip(grads, training_vars)) - # For reporting, the metric takes the mean of losses. - train_loss_metric.update_state(raw_loss) - for metric in train_metrics: - metric.update_state(labels, model_outputs) - - @tf.function - def train_steps(iterator, steps): - """Performs distributed training steps in a loop. - - Args: - iterator: the distributed iterator of training datasets. - steps: an tf.int32 integer tensor to specify number of steps to run - inside host training loop. - - Raises: - ValueError: Any of the arguments or tensor shapes are invalid. - """ - if not isinstance(steps, tf.Tensor): - raise ValueError('steps should be an Tensor. Python object may cause ' - 'retracing.') - - for _ in tf.range(steps): - strategy.run(_replicated_step, args=(next(iterator),)) - - def train_single_step(iterator): - """Performs a distributed training step. - - Args: - iterator: the distributed iterator of training datasets. - - Raises: - ValueError: Any of the arguments or tensor shapes are invalid. - """ - strategy.run(_replicated_step, args=(next(iterator),)) - - def test_step(iterator): - """Calculates evaluation metrics on distributed devices.""" - - def _test_step_fn(inputs): - """Replicated accuracy calculation.""" - - inputs, labels = inputs - model_outputs = model(inputs, training=False) - for metric in eval_metrics: - metric.update_state(labels, model_outputs) - return model_outputs, labels - - outputs, labels = strategy.run(_test_step_fn, args=(next(iterator),)) - outputs = tf.nest.map_structure(strategy.experimental_local_results, - outputs) - labels = tf.nest.map_structure(strategy.experimental_local_results, - labels) - return outputs, labels - - if not run_eagerly: - train_single_step = tf.function(train_single_step) - test_step = tf.function(test_step) - - def _run_evaluation(current_training_step, test_iterator): - """Runs validation steps and aggregate metrics. - - Args: - current_training_step: tf.int32 tensor containing the current step. - test_iterator: distributed iterator of test datasets. - - Returns: - A dict of metic names and values. - """ - # The last batch of the evaluation is often smaller than previous ones. - # Moreover, in some distributed pieces it might even be empty. Therefore, - # different from the way training_loss is calculated, it is needed to - # gather all the logits and labels here to calculate the evaluation loss - # outside. - loss_list, loss_weights = list(), list() - for _ in range(eval_steps): - outputs, labels = test_step(test_iterator) - for cur_logits, cur_labels in zip(outputs, labels): - # This is to handle cases when cur_labels is not a single tensor, - # but a dict of tensors. - cur_weight = tf.shape(tf.nest.flatten(cur_labels)[0])[0] - if cur_weight != 0: - loss_list.append(loss_fn(cur_labels, cur_logits).numpy()) - loss_weights.append(cur_weight) - # The sample_weights are the actual number of examples in each batch, - # a summation of numbers of examples in each replica if using - # distributed training. - eval_loss_metric.update_state(loss_list, sample_weight=loss_weights) - - logs = {} - with eval_summary_writer.as_default(): - for metric in [eval_loss_metric] + eval_metrics + model.metrics: - metric_value = _float_metric_value(metric) - logs[metric.name] = metric_value - logging.info('Step: [%d] Validation %s = %f', current_training_step, - metric.name, metric_value) - tf.summary.scalar( - metric.name, metric_value, step=current_training_step) - eval_summary_writer.flush() - - return logs - - # Training loop starts here. - checkpoint = tf.train.Checkpoint( - model=model, optimizer=optimizer, global_step=optimizer.iterations) - sub_model_checkpoint = tf.train.Checkpoint( - model=sub_model, - global_step=optimizer.iterations) if sub_model_export_name else None - - latest_checkpoint_file = tf.train.latest_checkpoint(model_dir) - if latest_checkpoint_file: - logging.info('Checkpoint file %s found and restoring from ' - 'checkpoint', latest_checkpoint_file) - checkpoint.restore(latest_checkpoint_file).expect_partial() - logging.info('Loading from checkpoint file completed') - - current_step = optimizer.iterations.numpy() - checkpoint_name = 'ctl_step_{step}.ckpt' - - logs = {} - callback_list.on_train_begin() - while current_step < total_training_steps and not model.stop_training: - if current_step % steps_per_epoch == 0: - callback_list.on_epoch_begin(int(current_step / steps_per_epoch) + 1) - - # Training loss/metric are taking average over steps inside micro - # training loop. We reset the their values before each round. - train_loss_metric.reset_states() - for metric in train_metrics + model.metrics: - metric.reset_states() - - callback_list.on_batch_begin(current_step) - # Runs several steps in the host while loop. - steps = steps_to_run(current_step, steps_between_evals, steps_per_loop) - - if tf.config.list_physical_devices('GPU'): - # TODO(zongweiz): merge with train_steps once tf.while_loop - # GPU performance bugs are fixed. - for _ in range(steps): - train_single_step(train_iterator) - else: - # Converts steps to a Tensor to avoid tf.function retracing. - train_steps(train_iterator, tf.convert_to_tensor(steps, dtype=tf.int32)) - - train_loss = _float_metric_value(train_loss_metric) - current_step += steps - - # Updates training logging. - training_status = 'Train Step: %d/%d / loss = %s' % ( - current_step, total_training_steps, train_loss) - - if current_step >= last_summary_step + train_summary_interval: - summary_writer = train_summary_writer - last_summary_step = current_step - else: - summary_writer = tf.summary.create_noop_writer() - - with summary_writer.as_default(): - if callable(optimizer.learning_rate): - tf.summary.scalar( - 'learning_rate', - optimizer.learning_rate(current_step), - step=current_step) - tf.summary.scalar(train_loss_metric.name, train_loss, step=current_step) - for metric in train_metrics + model.metrics: - metric_value = _float_metric_value(metric) - training_status += ' %s = %f' % (metric.name, metric_value) - tf.summary.scalar(metric.name, metric_value, step=current_step) - summary_writer.flush() - training_status += '\n' - logging.info(training_status) - - # If no need for evaluation, we only call on_batch_end with train_loss, - # this is to ensure we get granular global_step/sec on Tensorboard. - if current_step % steps_between_evals: - callback_list.on_batch_end(current_step - 1, {'loss': train_loss}) - else: - # Save a submodel with the step in the file name after each epoch. - if sub_model_export_name: - _save_checkpoint( - strategy, sub_model_checkpoint, model_dir, - '%s_step_%d.ckpt' % (sub_model_export_name, current_step)) - - # Save model checkpoints and run validation steps after each epoch - # (with the exception of the final epoch which is handled after the - # training loop). - if current_step < total_training_steps: - _save_checkpoint(strategy, checkpoint, model_dir, - checkpoint_name.format(step=current_step)) - if eval_input_fn: - # Re-initialize evaluation metric. - eval_loss_metric.reset_states() - for metric in eval_metrics + model.metrics: - metric.reset_states() - - logging.info('Running evaluation after step: %s.', current_step) - logs = _run_evaluation(current_step, - _get_input_iterator(eval_input_fn, strategy)) - # We add train_loss here rather than call on_batch_end twice to make - # sure that no duplicated values are generated. - logs['loss'] = train_loss - callback_list.on_batch_end(current_step - 1, logs) - - # Calls on_epoch_end after each real epoch ends to prevent mis-calculation - # of training steps. - if current_step % steps_per_epoch == 0: - callback_list.on_epoch_end(int(current_step / steps_per_epoch), logs) - - if sub_model_export_name: - _save_checkpoint(strategy, sub_model_checkpoint, model_dir, - '%s.ckpt' % sub_model_export_name) - - _save_checkpoint(strategy, checkpoint, model_dir, - checkpoint_name.format(step=current_step)) - if eval_input_fn: - # Re-initialize evaluation metric. - eval_loss_metric.reset_states() - for metric in eval_metrics + model.metrics: - metric.reset_states() - - logging.info('Running final evaluation after training is complete.') - logs = _run_evaluation(current_step, - _get_input_iterator(eval_input_fn, strategy)) - callback_list.on_epoch_end(int(current_step / steps_per_epoch), logs) - training_summary = { - 'total_training_steps': total_training_steps, - 'train_loss': _float_metric_value(train_loss_metric), - } - for metric in model.metrics: - training_summary[metric.name] = _float_metric_value(metric) - if eval_metrics: - training_summary['last_train_metrics'] = _float_metric_value( - train_metrics[0]) - training_summary['eval_metrics'] = _float_metric_value(eval_metrics[0]) - - write_txt_summary(training_summary, summary_dir) - - if not _should_export_summary(strategy): - tf.io.gfile.rmtree(summary_dir) - - callback_list.on_train_end() - - return model diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/model_training_utils_test.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/model_training_utils_test.py deleted file mode 100644 index 52011f0c2e21f2c3a182df4a90dc5f71f55779ef..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/model_training_utils_test.py +++ /dev/null @@ -1,322 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Tests for official.modeling.training.model_training_utils.""" - -import os - -from absl import logging -from absl.testing import flagsaver -from absl.testing import parameterized -from absl.testing.absltest import mock -import numpy as np -import tensorflow as tf - -from tensorflow.python.distribute import combinations -from tensorflow.python.distribute import strategy_combinations -from official.nlp.bert import common_flags -from official.nlp.bert import model_training_utils - - -common_flags.define_common_bert_flags() - - -def eager_strategy_combinations(): - return combinations.combine( - distribution=[ - strategy_combinations.default_strategy, - strategy_combinations.cloud_tpu_strategy, - strategy_combinations.one_device_strategy_gpu, - strategy_combinations.mirrored_strategy_with_gpu_and_cpu, - strategy_combinations.mirrored_strategy_with_two_gpus, - ],) - - -def eager_gpu_strategy_combinations(): - return combinations.combine( - distribution=[ - strategy_combinations.default_strategy, - strategy_combinations.one_device_strategy_gpu, - strategy_combinations.mirrored_strategy_with_gpu_and_cpu, - strategy_combinations.mirrored_strategy_with_two_gpus, - ],) - - -def create_fake_data_input_fn(batch_size, features_shape, num_classes): - """Creates a dummy input function with the given feature and label shapes. - - Args: - batch_size: integer. - features_shape: list[int]. Feature shape for an individual example. - num_classes: integer. Number of labels. - - Returns: - An input function that is usable in the executor. - """ - - def _dataset_fn(input_context=None): - """An input function for generating fake data.""" - local_batch_size = input_context.get_per_replica_batch_size(batch_size) - features = np.random.rand(64, *features_shape) - labels = np.random.randint(2, size=[64, num_classes]) - # Convert the inputs to a Dataset. - dataset = tf.data.Dataset.from_tensor_slices((features, labels)) - dataset = dataset.shard(input_context.num_input_pipelines, - input_context.input_pipeline_id) - - def _assign_dtype(features, labels): - features = tf.cast(features, tf.float32) - labels = tf.cast(labels, tf.float32) - return features, labels - - # Shuffle, repeat, and batch the examples. - dataset = dataset.map(_assign_dtype) - dataset = dataset.shuffle(64).repeat() - dataset = dataset.batch(local_batch_size, drop_remainder=True) - dataset = dataset.prefetch(buffer_size=64) - return dataset - - return _dataset_fn - - -def create_model_fn(input_shape, num_classes, use_float16=False): - - def _model_fn(): - """A one-layer softmax model suitable for testing.""" - input_layer = tf.keras.layers.Input(shape=input_shape) - x = tf.keras.layers.Dense(num_classes, activation='relu')(input_layer) - output_layer = tf.keras.layers.Dense(num_classes, activation='softmax')(x) - sub_model = tf.keras.models.Model(input_layer, x, name='sub_model') - model = tf.keras.models.Model(input_layer, output_layer, name='model') - model.add_metric( - tf.reduce_mean(input_layer), name='mean_input', aggregation='mean') - model.optimizer = tf.keras.optimizers.SGD(learning_rate=0.1, momentum=0.9) - if use_float16: - model.optimizer = tf.keras.mixed_precision.LossScaleOptimizer( - model.optimizer) - return model, sub_model - - return _model_fn - - -def metric_fn(): - """Gets a tf.keras metric object.""" - return tf.keras.metrics.CategoricalAccuracy(name='accuracy', dtype=tf.float32) - - -def summaries_with_matching_keyword(keyword, summary_dir): - """Yields summary protos matching given keyword from event file.""" - event_paths = tf.io.gfile.glob(os.path.join(summary_dir, 'events*')) - for event in tf.compat.v1.train.summary_iterator(event_paths[-1]): - if event.summary is not None: - for value in event.summary.value: - if keyword in value.tag: - logging.error(event) - yield event.summary - - -def check_eventfile_for_keyword(keyword, summary_dir): - """Checks event files for the keyword.""" - return any(summaries_with_matching_keyword(keyword, summary_dir)) - - -class RecordingCallback(tf.keras.callbacks.Callback): - - def __init__(self): - self.batch_begin = [] # (batch, logs) - self.batch_end = [] # (batch, logs) - self.epoch_begin = [] # (epoch, logs) - self.epoch_end = [] # (epoch, logs) - - def on_batch_begin(self, batch, logs=None): - self.batch_begin.append((batch, logs)) - - def on_batch_end(self, batch, logs=None): - self.batch_end.append((batch, logs)) - - def on_epoch_begin(self, epoch, logs=None): - self.epoch_begin.append((epoch, logs)) - - def on_epoch_end(self, epoch, logs=None): - self.epoch_end.append((epoch, logs)) - - -class ModelTrainingUtilsTest(tf.test.TestCase, parameterized.TestCase): - - def setUp(self): - super(ModelTrainingUtilsTest, self).setUp() - self._model_fn = create_model_fn(input_shape=[128], num_classes=3) - - @flagsaver.flagsaver - def run_training(self, strategy, model_dir, steps_per_loop, run_eagerly): - input_fn = create_fake_data_input_fn( - batch_size=8, features_shape=[128], num_classes=3) - model_training_utils.run_customized_training_loop( - strategy=strategy, - model_fn=self._model_fn, - loss_fn=tf.keras.losses.categorical_crossentropy, - model_dir=model_dir, - steps_per_epoch=20, - steps_per_loop=steps_per_loop, - epochs=2, - train_input_fn=input_fn, - eval_input_fn=input_fn, - eval_steps=10, - init_checkpoint=None, - sub_model_export_name='my_submodel_name', - metric_fn=metric_fn, - custom_callbacks=None, - run_eagerly=run_eagerly) - - @combinations.generate(eager_strategy_combinations()) - def test_train_eager_single_step(self, distribution): - model_dir = self.create_tempdir().full_path - if isinstance( - distribution, - (tf.distribute.TPUStrategy, tf.distribute.experimental.TPUStrategy)): - with self.assertRaises(ValueError): - self.run_training( - distribution, model_dir, steps_per_loop=1, run_eagerly=True) - else: - self.run_training( - distribution, model_dir, steps_per_loop=1, run_eagerly=True) - - @combinations.generate(eager_gpu_strategy_combinations()) - def test_train_eager_mixed_precision(self, distribution): - model_dir = self.create_tempdir().full_path - tf.keras.mixed_precision.set_global_policy('mixed_float16') - self._model_fn = create_model_fn( - input_shape=[128], num_classes=3, use_float16=True) - self.run_training( - distribution, model_dir, steps_per_loop=1, run_eagerly=True) - - @combinations.generate(eager_strategy_combinations()) - def test_train_check_artifacts(self, distribution): - model_dir = self.create_tempdir().full_path - self.run_training( - distribution, model_dir, steps_per_loop=10, run_eagerly=False) - - # Two checkpoints should be saved after two epochs. - files = map(os.path.basename, - tf.io.gfile.glob(os.path.join(model_dir, 'ctl_step_*index'))) - self.assertCountEqual( - ['ctl_step_20.ckpt-1.index', 'ctl_step_40.ckpt-2.index'], files) - - # Three submodel checkpoints should be saved after two epochs (one after - # each epoch plus one final). - files = map( - os.path.basename, - tf.io.gfile.glob(os.path.join(model_dir, 'my_submodel_name*index'))) - self.assertCountEqual([ - 'my_submodel_name.ckpt-3.index', - 'my_submodel_name_step_20.ckpt-1.index', - 'my_submodel_name_step_40.ckpt-2.index' - ], files) - - self.assertNotEmpty( - tf.io.gfile.glob( - os.path.join(model_dir, 'summaries/training_summary*'))) - - # Loss and accuracy values should be written into summaries. - self.assertTrue( - check_eventfile_for_keyword('loss', - os.path.join(model_dir, 'summaries/train'))) - self.assertTrue( - check_eventfile_for_keyword('accuracy', - os.path.join(model_dir, 'summaries/train'))) - self.assertTrue( - check_eventfile_for_keyword('mean_input', - os.path.join(model_dir, 'summaries/train'))) - self.assertTrue( - check_eventfile_for_keyword('accuracy', - os.path.join(model_dir, 'summaries/eval'))) - self.assertTrue( - check_eventfile_for_keyword('mean_input', - os.path.join(model_dir, 'summaries/eval'))) - - @combinations.generate(eager_strategy_combinations()) - def test_train_check_callbacks(self, distribution): - model_dir = self.create_tempdir().full_path - callback = RecordingCallback() - callbacks = [callback] - input_fn = create_fake_data_input_fn( - batch_size=8, features_shape=[128], num_classes=3) - model_training_utils.run_customized_training_loop( - strategy=distribution, - model_fn=self._model_fn, - loss_fn=tf.keras.losses.categorical_crossentropy, - model_dir=model_dir, - steps_per_epoch=20, - num_eval_per_epoch=4, - steps_per_loop=10, - epochs=2, - train_input_fn=input_fn, - eval_input_fn=input_fn, - eval_steps=10, - init_checkpoint=None, - metric_fn=metric_fn, - custom_callbacks=callbacks, - run_eagerly=False) - self.assertEqual(callback.epoch_begin, [(1, {}), (2, {})]) - epoch_ends, epoch_end_infos = zip(*callback.epoch_end) - self.assertEqual(list(epoch_ends), [1, 2, 2]) - for info in epoch_end_infos: - self.assertIn('accuracy', info) - - self.assertEqual(callback.batch_begin, [(0, {}), (5, {}), (10, {}), - (15, {}), (20, {}), (25, {}), - (30, {}), (35, {})]) - batch_ends, batch_end_infos = zip(*callback.batch_end) - self.assertEqual(list(batch_ends), [4, 9, 14, 19, 24, 29, 34, 39]) - for info in batch_end_infos: - self.assertIn('loss', info) - - @combinations.generate( - combinations.combine( - distribution=[ - strategy_combinations.one_device_strategy_gpu, - ],)) - def test_train_check_artifacts_non_chief(self, distribution): - # We shouldn't export artifacts on non-chief workers. Since there's no easy - # way to test with real MultiWorkerMirroredStrategy, we patch the strategy - # to make it as if it's MultiWorkerMirroredStrategy on non-chief workers. - extended = distribution.extended - with mock.patch.object(extended.__class__, 'should_checkpoint', - new_callable=mock.PropertyMock, return_value=False), \ - mock.patch.object(extended.__class__, 'should_save_summary', - new_callable=mock.PropertyMock, return_value=False): - model_dir = self.create_tempdir().full_path - self.run_training( - distribution, model_dir, steps_per_loop=10, run_eagerly=False) - self.assertEmpty(tf.io.gfile.listdir(model_dir)) - - -if __name__ == '__main__': - tf.test.main() diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/npu_convert_dropout.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/npu_convert_dropout.py deleted file mode 100644 index 95f8689ce4da26c08f18a0fcb49c42eb7f1c8b06..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/npu_convert_dropout.py +++ /dev/null @@ -1,54 +0,0 @@ -#!/usr/bin/env python3 -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -from keras import backend -from keras.utils import control_flow_util -from keras.layers.core import Dropout -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import nn -import npu_ops - -def dropout_call(self, inputs, training=None): - """Make Keras Dropout to execute NPU dropout""" - if training is None: - training = backend.learning_phase() - - def dropped_inputs(): - return npu_ops.dropout( - inputs, - noise_shape=self._get_noise_shape(inputs), - seed=self.seed, - keep_prob=1 - self.rate) - - output = control_flow_util.smart_cond(training, - dropped_inputs, - lambda : array_ops.identity(inputs)) - - return output - -Dropout.call = dropout_call diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/npu_ops.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/npu_ops.py deleted file mode 100644 index 9de214f8b8eb307ef743bdb29dc0488a61ce32a1..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/npu_ops.py +++ /dev/null @@ -1,260 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -"""Ops for collective operations implemented using hccl.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - - -import numbers -from tensorflow.python.ops import array_ops -from tensorflow.python.framework import tensor_shape -from tensorflow.python.framework import ops -from tensorflow.python.eager import context - -from npu_device import gen_npu_ops - - -DEFAULT_GRAPH_SEED = 87654321 -_MAXINT32 = 2**31 - 1 -def LARSV2(input_weight, - input_grad, - weight_decay, - learning_rate, - hyperpara=0.001, - epsilon=0.00001, - use_clip=False, - name=None): - if context.executing_eagerly(): - raise RuntimeError("tf.LARSV2() is not compatible with " - "eager execution.") - - return gen_npu_ops.lars_v2(input_weight=input_weight, - input_grad=input_grad, - weight_decay=weight_decay, - learning_rate=learning_rate, - hyperpara=hyperpara, - epsilon=epsilon, - use_clip=use_clip, - name=name) - - -def _truncate_seed(seed): - return seed % _MAXINT32 # Truncate to fit into 32-bit integer - -def get_seed(op_seed): - global_seed = ops.get_default_graph().seed - - if global_seed is not None: - if op_seed is None: - op_seed = ops.get_default_graph()._last_id - - seeds = _truncate_seed(global_seed), _truncate_seed(op_seed) - else: - if op_seed is not None: - seeds = DEFAULT_GRAPH_SEED, _truncate_seed(op_seed) - else: - seeds = None, None - # Avoid (0, 0) as the C++ ops interpret it as nondeterminism, which would - # be unexpected since Python docs say nondeterminism is (None, None). - if seeds == (0, 0): - return (0, _MAXINT32) - return seeds - -def _get_noise_shape(x, noise_shape): - # If noise_shape is none return immediately. - if noise_shape is None: - return array_ops.shape(x) - - try: - # Best effort to figure out the intended shape. - # If not possible, let the op to handle it. - # In eager mode exception will show up. - noise_shape_ = tensor_shape.as_shape(noise_shape) - except (TypeError, ValueError): - return noise_shape - - if x.shape.dims is not None and len(x.shape.dims) == len(noise_shape_.dims): - new_dims = [] - for i, dim in enumerate(x.shape.dims): - if noise_shape_.dims[i].value is None and dim.value is not None: - new_dims.append(dim.value) - else: - new_dims.append(noise_shape_.dims[i].value) - return tensor_shape.TensorShape(new_dims) - - return noise_shape - -def dropout(x, keep_prob, noise_shape=None, seed=None, name=None): - """The gradient for `gelu`. - - Args: - x: A tensor with type is float. - keep_prob: A tensor, float, rate of every element reserved. - noise_shape: A 1-D tensor, with type int32, shape of keep/drop what random - generated. - seed: Random seed. - name: Layer name. - - Returns: - A tensor. - """ - if context.executing_eagerly(): - raise RuntimeError("tf.dropout() is not compatible with " - "eager execution.") - x = ops.convert_to_tensor(x, name="x") - if not x.dtype.is_floating: - raise ValueError("x has to be a floating point tensor since it's going to" - " be scaled. Got a %s tensor instead." % x.dtype) - if isinstance(keep_prob, numbers.Real) and not 0 < keep_prob <= 1: - raise ValueError("keep_prob must be a scalar tensor or a float in the " - "range (0, 1], got %g" % keep_prob) - if isinstance(keep_prob, float) and keep_prob == 1: - return x - seed, seed2 = get_seed(seed) - noise_shape = _get_noise_shape(x, noise_shape) - gen_out = gen_npu_ops.drop_out_gen_mask(noise_shape, keep_prob, seed, seed2, name) - result = gen_npu_ops.drop_out_do_mask(x, gen_out, keep_prob, name) - return result - -#@ops.RegisterGradient("DropOutDoMask") -def _DropOutDoMaskGrad(op, grad): - result = gen_npu_ops.drop_out_do_mask(grad, op.inputs[1], op.inputs[2]) - return [result, None, None] - -grad_registry_list = ops.gradient_registry.list() -if "DropOutDoMask" not in grad_registry_list: - ops.RegisterGradient("DropOutDoMask")(_DropOutDoMaskGrad) - -def basic_lstm_cell(x, h, c, w, b, keep_prob, forget_bias, state_is_tuple, - activation, name=None): - if context.executing_eagerly(): - raise RuntimeError("tf.basic_lstm_cell() is not compatible with " - "eager execution.") - x = ops.convert_to_tensor(x, name="x") - h = ops.convert_to_tensor(h, name="h") - c = ops.convert_to_tensor(c, name="c") - w = ops.convert_to_tensor(w, name="w") - b = ops.convert_to_tensor(b, name="b") - result = gen_npu_ops.basic_lstm_cell(x, h, c, w, b, keep_prob, forget_bias, state_is_tuple, - activation, name) - return result - -@ops.RegisterGradient("BasicLSTMCell") -def basic_lstm_cell_grad(op, dct, dht, dit, djt, dft, dot, dtanhct): - - dgate, dct_1 = gen_npu_ops.basic_lstm_cell_c_state_grad(op.inputs[2], dht, dct, op.outputs[2], op.outputs[3], op.outputs[4], op.outputs[5], op.outputs[6], forget_bias=op.get_attr("forget_bias"), activation=op.get_attr("activation")) - dw, db = gen_npu_ops.basic_lstm_cell_weight_grad(op.inputs[0], op.inputs[1], dgate) - dxt, dht = gen_npu_ops.basic_lstm_cell_input_grad(dgate, op.inputs[3], keep_prob=op.get_attr("keep_prob")) - - return [dxt, dht, dct_1, dw, db] - -def adam_apply_one_assign(input0, input1, input2, input3, input4, - mul0_x, mul1_x, mul2_x, mul3_x, add2_y, name=None): - if context.executing_eagerly(): - raise RuntimeError("tf.adam_apply_one_assign() is not compatible with " - "eager execution.") - result = gen_npu_ops.adam_apply_one_assign(input0, input1, input2, input3, input4, - mul0_x, mul1_x, mul2_x, mul3_x, add2_y,name) - return result - -def adam_apply_one_with_decay_assign(input0, input1, input2, input3, input4, - mul0_x, mul1_x, mul2_x, mul3_x, mul4_x, add2_y, name=None): - if context.executing_eagerly(): - raise RuntimeError("tf.adam_apply_one_with_decay_assign() is not compatible with " - "eager execution.") - result = gen_npu_ops.adam_apply_one_with_decay_assign(input0, input1, input2, input3, input4, - mul0_x, mul1_x, mul2_x, mul3_x, mul4_x, add2_y, name) - return result - -@ops.RegisterGradient("DynamicGruV2") -def dynamic_gru_v2_grad(op, dy, doutput_h, dupdate, dreset, dnew, dhidden_new): - (x, weight_input, weight_hidden, bias_input, bias_hidden, seq_length, init_h) = op.inputs - (y, output_h, update, reset, new, hidden_new) = op.outputs - (dw_input, dw_hidden, db_input, db_hidden, dx, dh_prev) = gen_npu_ops.dynamic_gru_v2_grad(x, weight_input, weight_hidden, y, init_h, output_h, dy, doutput_h, update, reset, new, hidden_new, direction=op.get_attr("direction"), cell_depth=op.get_attr("cell_depth"), keep_prob=op.get_attr("keep_prob"), cell_clip=op.get_attr("cell_clip"), num_proj=op.get_attr("num_proj"), time_major=op.get_attr("time_major"), gate_order=op.get_attr("gate_order"), reset_after=op.get_attr("reset_after")) - - return (dx, dw_input, dw_hidden, db_input, db_hidden, seq_length, dh_prev) - -@ops.RegisterGradient("DynamicRnn") -def dynamic_rnn_grad(op, dy, dh, dc, di, dj, df, do, dtanhc): - (x, w, b, seq_length, init_h, init_c) = op.inputs - (y, output_h, output_c, i, j, f, o, tanhc) = op.outputs - (dw, db, dx, dh_prev, dc_prev) = gen_npu_ops.dynamic_rnn_grad(x, w, b, y, init_h[-1], init_c[-1], output_h, output_c, dy, dh[-1], dc[-1], i, j, f, o, tanhc, cell_type=op.get_attr("cell_type"), direction=op.get_attr("direction"), cell_depth=op.get_attr("cell_depth"), use_peephole=op.get_attr("use_peephole"), keep_prob=op.get_attr("keep_prob"), cell_clip=op.get_attr("cell_clip"), num_proj=op.get_attr("num_proj"), time_major=op.get_attr("time_major"), forget_bias=op.get_attr("forget_bias")) - - return (dx, dw, db, seq_length, dh_prev, dc_prev) - -def lamb_apply_optimizer_assign(input0,input1,input2,input3,mul0_x,mul1_x,mul2_x, - mul3_x,add2_y,steps,do_use_weight,weight_decay_rate,name=None): - if context.executing_eagerly(): - raise RuntimeError("tf.lamb_apply_optimizer_assign() is not compatible with eager execution") - update,nextv,nextm=gen_npu_ops.lamb_apply_optimizer_assign(input0,input1,input2,input3,mul0_x,mul1_x,mul2_x, - mul3_x,add2_y,steps,do_use_weight,weight_decay_rate,name) - return update,nextv,nextm - -def lamb_apply_weight_assign(input0,input1,input2,input3,input4,name=None): - if context.executing_eagerly(): - raise RuntimeError("tf.lamb_apply_weight_assign() is not compatible with eager execution") - result = gen_npu_ops.lamb_apply_weight_assign(input0,input1,input2,input3,input4,name) - return result - -def dropout_v3(x, keep_prob, noise_shape=None, seed=None, name=None): - """ The gradient for gelu - - Args: - x: A tensor with type is float - keep_prob: A tensor, float, rate of every element reserved - noise_shape: A 1-D tensor, with type int32, shape of keep/drop what random generated. - seed: Random seed. - name: Layer name. - - Returns: - A tensor. - """ - x = ops.convert_to_tensor(x,name="x") - if not x.dtype.is_floating: - raise ValueError("x has to be a floating point tensor since it's going to be scaled. Got a %s tensor instead." % x.dtype) - - if isinstance(keep_prob,numbers.Real) and not 0 < keep_prob <=1: - raise ValueError("Keep_prob must be a scalar tensor or a float in the range (0,1], got %g" % keep_prob) - - if isinstance(keep_prob,float) and keep_prob==1: - return x - - seed, seed2 = get_seed(seed) - noise_shape = _get_noise_shape(x,noise_shape) - gen_out = gen_npu_ops.drop_out_gen_mask_v3(noise_shape,keep_prob,seed,seed2,name) - result = gen_npu_ops.drop_out_do_mask_v3(x, gen_out, keep_prob, name) - return result - -@ops.RegisterGradient("DropOutDoMaskV3") -def _DropOutDoMaskV3Grad(op,grad): - result = gen_npu_ops.drop_out_do_mask_v3(grad, op.inputs[1], op.inputs[2]) - return [result, None, None] \ No newline at end of file diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/run_classifier.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/run_classifier.py deleted file mode 100644 index 1796ddca9cd8778ee5d1001d49922022849163a2..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/run_classifier.py +++ /dev/null @@ -1,532 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""BERT classification or regression finetuning runner in TF 2.x.""" - -import functools -import json -import math -import os - -# Import libraries -from absl import app -from absl import flags -from absl import logging -import gin -import tensorflow as tf -from official.common import distribute_utils -from official.modeling import performance -from official.nlp import optimization -from official.nlp.bert import bert_models -from official.nlp.bert import common_flags -from official.nlp.bert import configs as bert_configs -from official.nlp.bert import input_pipeline -from official.nlp.bert import model_saving_utils -from official.utils.misc import keras_utils - -flags.DEFINE_enum( - 'mode', 'train_and_eval', ['train_and_eval', 'export_only', 'predict'], - 'One of {"train_and_eval", "export_only", "predict"}. `train_and_eval`: ' - 'trains the model and evaluates in the meantime. ' - '`export_only`: will take the latest checkpoint inside ' - 'model_dir and export a `SavedModel`. `predict`: takes a checkpoint and ' - 'restores the model to output predictions on the test set.') -flags.DEFINE_string('train_data_path', None, - 'Path to training data for BERT classifier.') -flags.DEFINE_string('eval_data_path', None, - 'Path to evaluation data for BERT classifier.') -flags.DEFINE_string( - 'input_meta_data_path', None, - 'Path to file that contains meta data about input ' - 'to be used for training and evaluation.') -flags.DEFINE_integer('train_data_size', None, 'Number of training samples ' - 'to use. If None, uses the full train data. ' - '(default: None).') -flags.DEFINE_string('predict_checkpoint_path', None, - 'Path to the checkpoint for predictions.') -flags.DEFINE_integer( - 'num_eval_per_epoch', 1, - 'Number of evaluations per epoch. The purpose of this flag is to provide ' - 'more granular evaluation scores and checkpoints. For example, if original ' - 'data has N samples and num_eval_per_epoch is n, then each epoch will be ' - 'evaluated every N/n samples.') -flags.DEFINE_integer('train_batch_size', 32, 'Batch size for training.') -flags.DEFINE_integer('eval_batch_size', 32, 'Batch size for evaluation.') - -common_flags.define_common_bert_flags() - -FLAGS = flags.FLAGS - -LABEL_TYPES_MAP = {'int': tf.int64, 'float': tf.float32} - - -def get_loss_fn(num_classes): - """Gets the classification loss function.""" - - def classification_loss_fn(labels, logits): - """Classification loss.""" - labels = tf.squeeze(labels) - log_probs = tf.nn.log_softmax(logits, axis=-1) - one_hot_labels = tf.one_hot( - tf.cast(labels, dtype=tf.int32), depth=num_classes, dtype=tf.float32) - per_example_loss = -tf.reduce_sum( - tf.cast(one_hot_labels, dtype=tf.float32) * log_probs, axis=-1) - return tf.reduce_mean(per_example_loss) - - return classification_loss_fn - - -def get_dataset_fn(input_file_pattern, - max_seq_length, - global_batch_size, - is_training, - label_type=tf.int64, - include_sample_weights=False, - num_samples=None): - """Gets a closure to create a dataset.""" - - def _dataset_fn(ctx=None): - """Returns tf.data.Dataset for distributed BERT pretraining.""" - batch_size = ctx.get_per_replica_batch_size( - global_batch_size) if ctx else global_batch_size - dataset = input_pipeline.create_classifier_dataset( - tf.io.gfile.glob(input_file_pattern), - max_seq_length, - batch_size, - is_training=is_training, - input_pipeline_context=ctx, - label_type=label_type, - include_sample_weights=include_sample_weights, - num_samples=num_samples) - return dataset - - return _dataset_fn - - -def run_bert_classifier(strategy, - bert_config, - input_meta_data, - model_dir, - epochs, - steps_per_epoch, - steps_per_loop, - eval_steps, - warmup_steps, - initial_lr, - init_checkpoint, - train_input_fn, - eval_input_fn, - training_callbacks=True, - custom_callbacks=None, - custom_metrics=None): - """Run BERT classifier training using low-level API.""" - max_seq_length = input_meta_data['max_seq_length'] - num_classes = input_meta_data.get('num_labels', 1) - is_regression = num_classes == 1 - - def _get_classifier_model(): - """Gets a classifier model.""" - classifier_model, core_model = ( - bert_models.classifier_model( - bert_config, - num_classes, - max_seq_length, - hub_module_url=FLAGS.hub_module_url, - hub_module_trainable=FLAGS.hub_module_trainable)) - optimizer = optimization.create_optimizer(initial_lr, - steps_per_epoch * epochs, - warmup_steps, FLAGS.end_lr, - FLAGS.optimizer_type) - classifier_model.optimizer = performance.configure_optimizer( - optimizer, - use_float16=common_flags.use_float16(), - use_graph_rewrite=common_flags.use_graph_rewrite()) - return classifier_model, core_model - - # tf.keras.losses objects accept optional sample_weight arguments (eg. coming - # from the dataset) to compute weighted loss, as used for the regression - # tasks. The classification tasks, using the custom get_loss_fn don't accept - # sample weights though. - loss_fn = (tf.keras.losses.MeanSquaredError() if is_regression - else get_loss_fn(num_classes)) - - # Defines evaluation metrics function, which will create metrics in the - # correct device and strategy scope. - if custom_metrics: - metric_fn = custom_metrics - elif is_regression: - metric_fn = functools.partial( - tf.keras.metrics.MeanSquaredError, - 'mean_squared_error', - dtype=tf.float32) - else: - metric_fn = functools.partial( - tf.keras.metrics.SparseCategoricalAccuracy, - 'accuracy', - dtype=tf.float32) - - # Start training using Keras compile/fit API. - logging.info('Training using TF 2.x Keras compile/fit API with ' - 'distribution strategy.') - return run_keras_compile_fit( - model_dir, - strategy, - _get_classifier_model, - train_input_fn, - eval_input_fn, - loss_fn, - metric_fn, - init_checkpoint, - epochs, - steps_per_epoch, - steps_per_loop, - eval_steps, - training_callbacks=training_callbacks, - custom_callbacks=custom_callbacks) - - -def run_keras_compile_fit(model_dir, - strategy, - model_fn, - train_input_fn, - eval_input_fn, - loss_fn, - metric_fn, - init_checkpoint, - epochs, - steps_per_epoch, - steps_per_loop, - eval_steps, - training_callbacks=True, - custom_callbacks=None): - """Runs BERT classifier model using Keras compile/fit API.""" - - with strategy.scope(): - training_dataset = train_input_fn() - evaluation_dataset = eval_input_fn() if eval_input_fn else None - bert_model, sub_model = model_fn() - optimizer = bert_model.optimizer - - if init_checkpoint: - checkpoint = tf.train.Checkpoint(model=sub_model, encoder=sub_model) - checkpoint.restore(init_checkpoint).expect_partial().assert_existing_objects_matched() - - if not isinstance(metric_fn, (list, tuple)): - metric_fn = [metric_fn] - bert_model.compile( - optimizer=optimizer, - loss=loss_fn, - metrics=[fn() for fn in metric_fn], - steps_per_execution=steps_per_loop) - - summary_dir = os.path.join(model_dir, 'summaries') - summary_callback = tf.keras.callbacks.TensorBoard(summary_dir) - checkpoint = tf.train.Checkpoint(model=bert_model, optimizer=optimizer) - checkpoint_manager = tf.train.CheckpointManager( - checkpoint, - directory=model_dir, - max_to_keep=None, - step_counter=optimizer.iterations, - checkpoint_interval=0) - checkpoint_callback = keras_utils.SimpleCheckpoint(checkpoint_manager) - - if training_callbacks: - if custom_callbacks is not None: - custom_callbacks += [summary_callback, checkpoint_callback] - else: - custom_callbacks = [summary_callback, checkpoint_callback] - - history = bert_model.fit( - x=training_dataset, - validation_data=evaluation_dataset, - steps_per_epoch=steps_per_epoch, - epochs=epochs, - validation_steps=eval_steps, - callbacks=custom_callbacks) - stats = {'total_training_steps': steps_per_epoch * epochs} - if 'loss' in history.history: - stats['train_loss'] = history.history['loss'][-1] - if 'val_accuracy' in history.history: - stats['eval_metrics'] = history.history['val_accuracy'][-1] - return bert_model, stats - - -def get_predictions_and_labels(strategy, - trained_model, - eval_input_fn, - is_regression=False, - return_probs=False): - """Obtains predictions of trained model on evaluation data. - - Note that list of labels is returned along with the predictions because the - order changes on distributing dataset over TPU pods. - - Args: - strategy: Distribution strategy. - trained_model: Trained model with preloaded weights. - eval_input_fn: Input function for evaluation data. - is_regression: Whether it is a regression task. - return_probs: Whether to return probabilities of classes. - - Returns: - predictions: List of predictions. - labels: List of gold labels corresponding to predictions. - """ - - @tf.function - def test_step(iterator): - """Computes predictions on distributed devices.""" - - def _test_step_fn(inputs): - """Replicated predictions.""" - inputs, labels = inputs - logits = trained_model(inputs, training=False) - if not is_regression: - probabilities = tf.nn.softmax(logits) - return probabilities, labels - else: - return logits, labels - - outputs, labels = strategy.run(_test_step_fn, args=(next(iterator),)) - # outputs: current batch logits as a tuple of shard logits - outputs = tf.nest.map_structure(strategy.experimental_local_results, - outputs) - labels = tf.nest.map_structure(strategy.experimental_local_results, labels) - return outputs, labels - - def _run_evaluation(test_iterator): - """Runs evaluation steps.""" - preds, golds = list(), list() - try: - with tf.experimental.async_scope(): - while True: - probabilities, labels = test_step(test_iterator) - for cur_probs, cur_labels in zip(probabilities, labels): - if return_probs: - preds.extend(cur_probs.numpy().tolist()) - else: - preds.extend(tf.math.argmax(cur_probs, axis=1).numpy()) - golds.extend(cur_labels.numpy().tolist()) - except (StopIteration, tf.errors.OutOfRangeError): - tf.experimental.async_clear_error() - return preds, golds - - test_iter = iter(strategy.distribute_datasets_from_function(eval_input_fn)) - predictions, labels = _run_evaluation(test_iter) - - return predictions, labels - - -def export_classifier(model_export_path, input_meta_data, bert_config, - model_dir): - """Exports a trained model as a `SavedModel` for inference. - - Args: - model_export_path: a string specifying the path to the SavedModel directory. - input_meta_data: dictionary containing meta data about input and model. - bert_config: Bert configuration file to define core bert layers. - model_dir: The directory where the model weights and training/evaluation - summaries are stored. - - Raises: - Export path is not specified, got an empty string or None. - """ - if not model_export_path: - raise ValueError('Export path is not specified: %s' % model_export_path) - if not model_dir: - raise ValueError('Export path is not specified: %s' % model_dir) - - # Export uses float32 for now, even if training uses mixed precision. - tf.keras.mixed_precision.set_global_policy('float32') - classifier_model = bert_models.classifier_model( - bert_config, - input_meta_data.get('num_labels', 1), - hub_module_url=FLAGS.hub_module_url, - hub_module_trainable=False)[0] - - model_saving_utils.export_bert_model( - model_export_path, model=classifier_model, checkpoint_dir=model_dir) - - -def run_bert(strategy, - input_meta_data, - model_config, - train_input_fn=None, - eval_input_fn=None, - init_checkpoint=None, - custom_callbacks=None, - custom_metrics=None): - """Run BERT training.""" - # Enables XLA in Session Config. Should not be set for TPU. - keras_utils.set_session_config(FLAGS.enable_xla) - performance.set_mixed_precision_policy(common_flags.dtype()) - - epochs = FLAGS.num_train_epochs * FLAGS.num_eval_per_epoch - train_data_size = ( - input_meta_data['train_data_size'] // FLAGS.num_eval_per_epoch) - if FLAGS.train_data_size: - train_data_size = min(train_data_size, FLAGS.train_data_size) - logging.info('Updated train_data_size: %s', train_data_size) - steps_per_epoch = int(train_data_size / FLAGS.train_batch_size) - warmup_steps = int(epochs * train_data_size * 0.1 / FLAGS.train_batch_size) - eval_steps = int( - math.ceil(input_meta_data['eval_data_size'] / FLAGS.eval_batch_size)) - - if not strategy: - raise ValueError('Distribution strategy has not been specified.') - - if not custom_callbacks: - custom_callbacks = [] - - if FLAGS.log_steps: - custom_callbacks.append( - keras_utils.TimeHistory( - batch_size=FLAGS.train_batch_size, - log_steps=FLAGS.log_steps, - logdir=FLAGS.model_dir)) - - trained_model, _ = run_bert_classifier( - strategy, - model_config, - input_meta_data, - FLAGS.model_dir, - epochs, - steps_per_epoch, - FLAGS.steps_per_loop, - eval_steps, - warmup_steps, - FLAGS.learning_rate, - init_checkpoint or FLAGS.init_checkpoint, - train_input_fn, - eval_input_fn, - custom_callbacks=custom_callbacks, - custom_metrics=custom_metrics) - - if FLAGS.model_export_path: - model_saving_utils.export_bert_model( - FLAGS.model_export_path, model=trained_model) - return trained_model - - -def custom_main(custom_callbacks=None, custom_metrics=None): - """Run classification or regression. - - Args: - custom_callbacks: list of tf.keras.Callbacks passed to training loop. - custom_metrics: list of metrics passed to the training loop. - """ - gin.parse_config_files_and_bindings(FLAGS.gin_file, FLAGS.gin_param) - - with tf.io.gfile.GFile(FLAGS.input_meta_data_path, 'rb') as reader: - input_meta_data = json.loads(reader.read().decode('utf-8')) - label_type = LABEL_TYPES_MAP[input_meta_data.get('label_type', 'int')] - include_sample_weights = input_meta_data.get('has_sample_weights', False) - - if not FLAGS.model_dir: - FLAGS.model_dir = '/tmp/bert20/' - - bert_config = bert_configs.BertConfig.from_json_file(FLAGS.bert_config_file) - - if FLAGS.mode == 'export_only': - export_classifier(FLAGS.model_export_path, input_meta_data, bert_config, - FLAGS.model_dir) - return - - strategy = distribute_utils.get_distribution_strategy( - distribution_strategy=FLAGS.distribution_strategy, - num_gpus=FLAGS.num_gpus, - tpu_address=FLAGS.tpu) - eval_input_fn = get_dataset_fn( - FLAGS.eval_data_path, - input_meta_data['max_seq_length'], - FLAGS.eval_batch_size, - is_training=False, - label_type=label_type, - include_sample_weights=include_sample_weights) - - if FLAGS.mode == 'predict': - num_labels = input_meta_data.get('num_labels', 1) - with strategy.scope(): - classifier_model = bert_models.classifier_model( - bert_config, num_labels)[0] - checkpoint = tf.train.Checkpoint(model=classifier_model) - latest_checkpoint_file = ( - FLAGS.predict_checkpoint_path or - tf.train.latest_checkpoint(FLAGS.model_dir)) - assert latest_checkpoint_file - logging.info('Checkpoint file %s found and restoring from ' - 'checkpoint', latest_checkpoint_file) - checkpoint.restore( - latest_checkpoint_file).assert_existing_objects_matched() - preds, _ = get_predictions_and_labels( - strategy, - classifier_model, - eval_input_fn, - is_regression=(num_labels == 1), - return_probs=True) - output_predict_file = os.path.join(FLAGS.model_dir, 'test_results.tsv') - with tf.io.gfile.GFile(output_predict_file, 'w') as writer: - logging.info('***** Predict results *****') - for probabilities in preds: - output_line = '\t'.join( - str(class_probability) - for class_probability in probabilities) + '\n' - writer.write(output_line) - return - - if FLAGS.mode != 'train_and_eval': - raise ValueError('Unsupported mode is specified: %s' % FLAGS.mode) - train_input_fn = get_dataset_fn( - FLAGS.train_data_path, - input_meta_data['max_seq_length'], - FLAGS.train_batch_size, - is_training=True, - label_type=label_type, - include_sample_weights=include_sample_weights, - num_samples=FLAGS.train_data_size) - run_bert( - strategy, - input_meta_data, - bert_config, - train_input_fn, - eval_input_fn, - custom_callbacks=custom_callbacks, - custom_metrics=custom_metrics) - - -def main(_): - custom_main(custom_callbacks=None, custom_metrics=None) - - -if __name__ == '__main__': - flags.mark_flag_as_required('bert_config_file') - flags.mark_flag_as_required('input_meta_data_path') - flags.mark_flag_as_required('model_dir') - app.run(main) diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/run_pretraining.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/run_pretraining.py deleted file mode 100644 index 864964a301d83cdd52d6a6ecd7a9c57d07190dde..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/run_pretraining.py +++ /dev/null @@ -1,234 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Run masked LM/next sentence pre-training for BERT in TF 2.x.""" - -# Import libraries -from absl import app -from absl import flags -from absl import logging -import gin -import tensorflow as tf -from official.common import distribute_utils -from official.modeling import performance -from official.nlp import optimization -from official.nlp.bert import bert_models -from official.nlp.bert import common_flags -from official.nlp.bert import configs -from official.nlp.bert import input_pipeline -from official.nlp.bert import model_training_utils - - -flags.DEFINE_string('input_files', None, - 'File path to retrieve training data for pre-training.') -# Model training specific flags. -flags.DEFINE_integer( - 'max_seq_length', 128, - 'The maximum total input sequence length after WordPiece tokenization. ' - 'Sequences longer than this will be truncated, and sequences shorter ' - 'than this will be padded.') -flags.DEFINE_integer('max_predictions_per_seq', 20, - 'Maximum predictions per sequence_output.') -flags.DEFINE_integer('train_batch_size', 32, 'Total batch size for training.') -flags.DEFINE_integer('num_steps_per_epoch', 1000, - 'Total number of training steps to run per epoch.') -flags.DEFINE_float('warmup_steps', 10000, - 'Warmup steps for Adam weight decay optimizer.') -flags.DEFINE_bool('use_next_sentence_label', True, - 'Whether to use next sentence label to compute final loss.') -flags.DEFINE_bool('train_summary_interval', 0, 'Step interval for training ' - 'summaries. If the value is a negative number, ' - 'then training summaries are not enabled.') - -common_flags.define_common_bert_flags() - -FLAGS = flags.FLAGS - - -def get_pretrain_dataset_fn(input_file_pattern, seq_length, - max_predictions_per_seq, global_batch_size, - use_next_sentence_label=True): - """Returns input dataset from input file string.""" - def _dataset_fn(ctx=None): - """Returns tf.data.Dataset for distributed BERT pretraining.""" - input_patterns = input_file_pattern.split(',') - batch_size = ctx.get_per_replica_batch_size(global_batch_size) - train_dataset = input_pipeline.create_pretrain_dataset( - input_patterns, - seq_length, - max_predictions_per_seq, - batch_size, - is_training=True, - input_pipeline_context=ctx, - use_next_sentence_label=use_next_sentence_label) - return train_dataset - - return _dataset_fn - - -def get_loss_fn(): - """Returns loss function for BERT pretraining.""" - - def _bert_pretrain_loss_fn(unused_labels, losses, **unused_args): - return tf.reduce_mean(losses) - - return _bert_pretrain_loss_fn - - -def run_customized_training(strategy, - bert_config, - init_checkpoint, - max_seq_length, - max_predictions_per_seq, - model_dir, - steps_per_epoch, - steps_per_loop, - epochs, - initial_lr, - warmup_steps, - end_lr, - optimizer_type, - input_files, - train_batch_size, - use_next_sentence_label=True, - train_summary_interval=0, - custom_callbacks=None, - explicit_allreduce=False, - pre_allreduce_callbacks=None, - post_allreduce_callbacks=None, - allreduce_bytes_per_pack=0): - """Run BERT pretrain model training using low-level API.""" - - train_input_fn = get_pretrain_dataset_fn(input_files, max_seq_length, - max_predictions_per_seq, - train_batch_size, - use_next_sentence_label) - - def _get_pretrain_model(): - """Gets a pretraining model.""" - pretrain_model, core_model = bert_models.pretrain_model( - bert_config, max_seq_length, max_predictions_per_seq, - use_next_sentence_label=use_next_sentence_label) - optimizer = optimization.create_optimizer( - initial_lr, steps_per_epoch * epochs, warmup_steps, - end_lr, optimizer_type) - pretrain_model.optimizer = performance.configure_optimizer( - optimizer, - use_float16=common_flags.use_float16(), - use_graph_rewrite=common_flags.use_graph_rewrite()) - return pretrain_model, core_model - - trained_model = model_training_utils.run_customized_training_loop( - strategy=strategy, - model_fn=_get_pretrain_model, - loss_fn=get_loss_fn(), - scale_loss=FLAGS.scale_loss, - model_dir=model_dir, - init_checkpoint=init_checkpoint, - train_input_fn=train_input_fn, - steps_per_epoch=steps_per_epoch, - steps_per_loop=steps_per_loop, - epochs=epochs, - sub_model_export_name='pretrained/bert_model', - explicit_allreduce=explicit_allreduce, - pre_allreduce_callbacks=pre_allreduce_callbacks, - post_allreduce_callbacks=post_allreduce_callbacks, - allreduce_bytes_per_pack=allreduce_bytes_per_pack, - train_summary_interval=train_summary_interval, - custom_callbacks=custom_callbacks) - - return trained_model - - -def run_bert_pretrain(strategy, custom_callbacks=None): - """Runs BERT pre-training.""" - - bert_config = configs.BertConfig.from_json_file(FLAGS.bert_config_file) - if not strategy: - raise ValueError('Distribution strategy is not specified.') - - # Runs customized training loop. - logging.info('Training using customized training loop TF 2.0 with distributed' - 'strategy.') - - performance.set_mixed_precision_policy(common_flags.dtype()) - - # Only when explicit_allreduce = True, post_allreduce_callbacks and - # allreduce_bytes_per_pack will take effect. optimizer.apply_gradients() no - # longer implicitly allreduce gradients, users manually allreduce gradient and - # pass the allreduced grads_and_vars to apply_gradients(). - # With explicit_allreduce = True, clip_by_global_norm is moved to after - # allreduce. - return run_customized_training( - strategy, - bert_config, - FLAGS.init_checkpoint, # Used to initialize only the BERT submodel. - FLAGS.max_seq_length, - FLAGS.max_predictions_per_seq, - FLAGS.model_dir, - FLAGS.num_steps_per_epoch, - FLAGS.steps_per_loop, - FLAGS.num_train_epochs, - FLAGS.learning_rate, - FLAGS.warmup_steps, - FLAGS.end_lr, - FLAGS.optimizer_type, - FLAGS.input_files, - FLAGS.train_batch_size, - FLAGS.use_next_sentence_label, - FLAGS.train_summary_interval, - custom_callbacks=custom_callbacks, - explicit_allreduce=FLAGS.explicit_allreduce, - pre_allreduce_callbacks=[ - model_training_utils.clip_by_global_norm_callback - ], - allreduce_bytes_per_pack=FLAGS.allreduce_bytes_per_pack) - - -def main(_): - gin.parse_config_files_and_bindings(FLAGS.gin_file, FLAGS.gin_param) - if not FLAGS.model_dir: - FLAGS.model_dir = '/tmp/bert20/' - # Configures cluster spec for multi-worker distribution strategy. - if FLAGS.num_gpus > 0: - _ = distribute_utils.configure_cluster(FLAGS.worker_hosts, FLAGS.task_index) - strategy = distribute_utils.get_distribution_strategy( - distribution_strategy=FLAGS.distribution_strategy, - num_gpus=FLAGS.num_gpus, - all_reduce_alg=FLAGS.all_reduce_alg, - tpu_address=FLAGS.tpu) - if strategy: - print('***** Number of cores used : ', strategy.num_replicas_in_sync) - - run_bert_pretrain(strategy) - - -if __name__ == '__main__': - app.run(main) diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/run_squad.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/run_squad.py deleted file mode 100644 index 31521003835f9695b3163b5fc13059f86d20cfa1..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/run_squad.py +++ /dev/null @@ -1,231 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Run BERT on SQuAD 1.1 and SQuAD 2.0 in TF 2.x.""" - -import json -import os -import time - -import npu_device - - -# Import libraries -from absl import app -from absl import flags -from absl import logging -import gin -import tensorflow as tf -from official.common import distribute_utils -from official.nlp.bert import configs as bert_configs -from official.nlp.bert import run_squad_helper -from official.nlp.bert import tokenization -from official.nlp.data import squad_lib as squad_lib_wp -from official.utils.misc import keras_utils - - -flags.DEFINE_string('vocab_file', None, - 'The vocabulary file that the BERT model was trained on.') -flags.DEFINE_integer(name='train_steps', default=0, - help='train steps') -flags.DEFINE_boolean(name='use_fastgelu', default=True, - help='whether to enable fastgelu, default is True') -flags.DEFINE_string(name='precision_mode', default= 'allow_fp32_to_fp16', - help='allow_fp32_to_fp16/force_fp16/ ' - 'must_keep_origin_dtype/allow_mix_precision.') -flags.DEFINE_boolean(name='over_dump', default=False, - help='if or not over detection, default is False') -flags.DEFINE_boolean(name='data_dump_flag', default=False, - help='data dump flag, default is False') -flags.DEFINE_string(name='data_dump_step', default="10", - help='data dump step, default is 10') -flags.DEFINE_boolean(name='profiling', default=False, - help='if or not profiling for performance debug, default is False') -flags.DEFINE_string(name='profiling_dump_path', default="/home/data", - help='the path to save profiling data') -flags.DEFINE_string(name='over_dump_path', default="/home/data", - help='the path to save over dump data') -flags.DEFINE_string(name='data_dump_path', default="/home/data", - help='the path to save dump data') -flags.DEFINE_boolean(name='use_mixlist', default=False, - help='whether to enable mixlist, default is False') -flags.DEFINE_boolean(name='fusion_off_flag', default=False, - help='whether to enable fusion_off_flag, default is False') -flags.DEFINE_string(name='mixlist_file', default='ops_info.json', - help='mixlist file name, default is ops_info.json') -flags.DEFINE_string(name='fusion_off_file', default='fusion_switch.cfg', - help='fusion_off file name, default is fusion_switch.cfg') -flags.DEFINE_boolean(name='auto_tune', default=False, - help='whether to enable auto_tune, default is False') -# More flags can be found in run_squad_helper. -run_squad_helper.define_common_squad_flags() - -FLAGS = flags.FLAGS - -def npu_config(): - FLAGS = flags.FLAGS - npu_config = {} - - if FLAGS.data_dump_flag: - npu_device.global_options().dump_config.enable_dump = True - npu_device.global_options().dump_config.dump_path = FLAGS.data_dump_path - npu_device.global_options().dump_config.dump_step = FLAGS.data_dump_step - npu_device.global_options().dump_config.dump_mode = "all" - - if FLAGS.over_dump: - npu_device.global_options().dump_config.enable_dump_debug = True - npu_device.global_options().dump_config.dump_path = FLAGS.over_dump_path - npu_device.global_options().dump_config.dump_debug_mode = "all" - - if FLAGS.profiling: - npu_device.global_options().profiling_config.enable_profiling = True - profiling_options = '{"output":"' + FLAGS.profiling_dump_path + '", \ - "training_trace":"on", \ - "task_trace":"on", \ - "aicpu":"on", \ - "aic_metrics":"PipeUtilization",\ - "fp_point":"", \ - "bp_point":""}' - npu_device.global_options().profiling_config.profiling_options = profiling_options - npu_device.global_options().precision_mode=FLAGS.precision_mode - if FLAGS.use_mixlist and FLAGS.precision_mode=='allow_mix_precision': - npu_device.global_options().modify_mixlist=FLAGS.mixlist_file - if FLAGS.fusion_off_flag: - npu_device.global_options().fusion_switch_file=FLAGS.fusion_off_file - if FLAGS.auto_tune: - npu_device.global_options().auto_tune_mode="RL,GA" - npu_device.open().as_default() - -def train_squad(strategy, - input_meta_data, - custom_callbacks=None, - run_eagerly=False, - init_checkpoint=None, - sub_model_export_name=None): - """Run bert squad training.""" - bert_config = bert_configs.BertConfig.from_json_file(FLAGS.bert_config_file) - init_checkpoint = init_checkpoint or FLAGS.init_checkpoint - run_squad_helper.train_squad(strategy, input_meta_data, bert_config, - custom_callbacks, run_eagerly, init_checkpoint, - sub_model_export_name=sub_model_export_name) - - -def predict_squad(strategy, input_meta_data): - """Makes predictions for the squad dataset.""" - bert_config = bert_configs.BertConfig.from_json_file(FLAGS.bert_config_file) - tokenizer = tokenization.FullTokenizer( - vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case) - run_squad_helper.predict_squad( - strategy, input_meta_data, tokenizer, bert_config, squad_lib_wp) - - -def eval_squad(strategy, input_meta_data): - """Evaluate on the squad dataset.""" - bert_config = bert_configs.BertConfig.from_json_file(FLAGS.bert_config_file) - tokenizer = tokenization.FullTokenizer( - vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case) - eval_metrics = run_squad_helper.eval_squad( - strategy, input_meta_data, tokenizer, bert_config, squad_lib_wp) - return eval_metrics - - -def export_squad(model_export_path, input_meta_data): - """Exports a trained model as a `SavedModel` for inference. - - Args: - model_export_path: a string specifying the path to the SavedModel directory. - input_meta_data: dictionary containing meta data about input and model. - - Raises: - Export path is not specified, got an empty string or None. - """ - bert_config = bert_configs.BertConfig.from_json_file(FLAGS.bert_config_file) - run_squad_helper.export_squad(model_export_path, input_meta_data, bert_config) - - -def main(_): - gin.parse_config_files_and_bindings(FLAGS.gin_file, FLAGS.gin_param) - npu_config() - - with tf.io.gfile.GFile(FLAGS.input_meta_data_path, 'rb') as reader: - input_meta_data = json.loads(reader.read().decode('utf-8')) - - if FLAGS.mode == 'export_only': - export_squad(FLAGS.model_export_path, input_meta_data) - return - - # Configures cluster spec for multi-worker distribution strategy. - if FLAGS.num_gpus > 0: - _ = distribute_utils.configure_cluster(FLAGS.worker_hosts, FLAGS.task_index) - strategy = distribute_utils.get_distribution_strategy( - distribution_strategy=FLAGS.distribution_strategy, - num_gpus=FLAGS.num_gpus, - all_reduce_alg=FLAGS.all_reduce_alg, - tpu_address=FLAGS.tpu) - - if 'train' in FLAGS.mode: - if FLAGS.log_steps: - custom_callbacks = [keras_utils.TimeHistory( - batch_size=FLAGS.train_batch_size, - log_steps=FLAGS.log_steps, - logdir=FLAGS.model_dir, - )] - else: - custom_callbacks = None - - train_squad( - strategy, - input_meta_data, - custom_callbacks=custom_callbacks, - run_eagerly=FLAGS.run_eagerly, - sub_model_export_name=FLAGS.sub_model_export_name, - ) - if 'predict' in FLAGS.mode: - predict_squad(strategy, input_meta_data) - if 'eval' in FLAGS.mode: - eval_metrics = eval_squad(strategy, input_meta_data) - f1_score = eval_metrics['final_f1'] - logging.info('SQuAD eval F1-score: %f', f1_score) - summary_dir = os.path.join(FLAGS.model_dir, 'summaries', 'eval') - summary_writer = tf.summary.create_file_writer(summary_dir) - with summary_writer.as_default(): - # TODO(lehou): write to the correct step number. - tf.summary.scalar('F1-score', f1_score, step=0) - summary_writer.flush() - # Also write eval_metrics to json file. - squad_lib_wp.write_to_json_files( - eval_metrics, os.path.join(summary_dir, 'eval_metrics.json')) - time.sleep(60) - - -if __name__ == '__main__': - flags.mark_flag_as_required('bert_config_file') - flags.mark_flag_as_required('model_dir') - app.run(main) diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/run_squad_helper.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/run_squad_helper.py deleted file mode 100644 index 385f94ca7a3b891b540041252d1858f42a254294..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/run_squad_helper.py +++ /dev/null @@ -1,489 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Library for running BERT family models on SQuAD 1.1/2.0 in TF 2.x.""" - -import collections -import json -import os - -from absl import flags -from absl import logging -import tensorflow as tf -from official.modeling import performance -from official.nlp import optimization -from official.nlp.bert import bert_models -from official.nlp.bert import common_flags -from official.nlp.bert import input_pipeline -from official.nlp.bert import model_saving_utils -from official.nlp.bert import model_training_utils -from official.nlp.bert import squad_evaluate_v1_1 -from official.nlp.bert import squad_evaluate_v2_0 -from official.nlp.data import squad_lib_sp -from official.utils.misc import keras_utils - - -def define_common_squad_flags(): - """Defines common flags used by SQuAD tasks.""" - flags.DEFINE_enum( - 'mode', 'train_and_eval', [ - 'train_and_eval', 'train_and_predict', 'train', 'eval', 'predict', - 'export_only' - ], 'One of {"train_and_eval", "train_and_predict", ' - '"train", "eval", "predict", "export_only"}. ' - '`train_and_eval`: train & predict to json files & compute eval metrics. ' - '`train_and_predict`: train & predict to json files. ' - '`train`: only trains the model. ' - '`eval`: predict answers from squad json file & compute eval metrics. ' - '`predict`: predict answers from the squad json file. ' - '`export_only`: will take the latest checkpoint inside ' - 'model_dir and export a `SavedModel`.') - flags.DEFINE_string('train_data_path', '', - 'Training data path with train tfrecords.') - flags.DEFINE_string( - 'input_meta_data_path', None, - 'Path to file that contains meta data about input ' - 'to be used for training and evaluation.') - # Model training specific flags. - flags.DEFINE_integer('train_batch_size', 32, 'Total batch size for training.') - # Predict processing related. - flags.DEFINE_string( - 'predict_file', None, 'SQuAD prediction json file path. ' - '`predict` mode supports multiple files: one can use ' - 'wildcard to specify multiple files and it can also be ' - 'multiple file patterns separated by comma. Note that ' - '`eval` mode only supports a single predict file.') - flags.DEFINE_bool( - 'do_lower_case', True, - 'Whether to lower case the input text. Should be True for uncased ' - 'models and False for cased models.') - flags.DEFINE_float( - 'null_score_diff_threshold', 0.0, - 'If null_score - best_non_null is greater than the threshold, ' - 'predict null. This is only used for SQuAD v2.') - flags.DEFINE_bool( - 'verbose_logging', False, - 'If true, all of the warnings related to data processing will be ' - 'printed. A number of warnings are expected for a normal SQuAD ' - 'evaluation.') - flags.DEFINE_integer('predict_batch_size', 8, - 'Total batch size for prediction.') - flags.DEFINE_integer( - 'n_best_size', 20, - 'The total number of n-best predictions to generate in the ' - 'nbest_predictions.json output file.') - flags.DEFINE_integer( - 'max_answer_length', 30, - 'The maximum length of an answer that can be generated. This is needed ' - 'because the start and end predictions are not conditioned on one ' - 'another.') - - common_flags.define_common_bert_flags() - - -FLAGS = flags.FLAGS - - -def squad_loss_fn(start_positions, end_positions, start_logits, end_logits): - """Returns sparse categorical crossentropy for start/end logits.""" - start_loss = tf.keras.losses.sparse_categorical_crossentropy( - start_positions, start_logits, from_logits=True) - end_loss = tf.keras.losses.sparse_categorical_crossentropy( - end_positions, end_logits, from_logits=True) - - total_loss = (tf.reduce_mean(start_loss) + tf.reduce_mean(end_loss)) / 2 - return total_loss - - -def get_loss_fn(): - """Gets a loss function for squad task.""" - - def _loss_fn(labels, model_outputs): - start_positions = labels['start_positions'] - end_positions = labels['end_positions'] - start_logits, end_logits = model_outputs - return squad_loss_fn(start_positions, end_positions, start_logits, - end_logits) - - return _loss_fn - - -RawResult = collections.namedtuple('RawResult', - ['unique_id', 'start_logits', 'end_logits']) - - -def get_raw_results(predictions): - """Converts multi-replica predictions to RawResult.""" - for unique_ids, start_logits, end_logits in zip(predictions['unique_ids'], - predictions['start_logits'], - predictions['end_logits']): - for values in zip(unique_ids.numpy(), start_logits.numpy(), - end_logits.numpy()): - yield RawResult( - unique_id=values[0], - start_logits=values[1].tolist(), - end_logits=values[2].tolist()) - - -def get_dataset_fn(input_file_pattern, max_seq_length, global_batch_size, - is_training): - """Gets a closure to create a dataset..""" - - def _dataset_fn(ctx=None): - """Returns tf.data.Dataset for distributed BERT pretraining.""" - batch_size = ctx.get_per_replica_batch_size( - global_batch_size) if ctx else global_batch_size - dataset = input_pipeline.create_squad_dataset( - input_file_pattern, - max_seq_length, - batch_size, - is_training=is_training, - input_pipeline_context=ctx) - return dataset - - return _dataset_fn - - -def get_squad_model_to_predict(strategy, bert_config, checkpoint_path, - input_meta_data): - """Gets a squad model to make predictions.""" - with strategy.scope(): - # Prediction always uses float32, even if training uses mixed precision. - tf.keras.mixed_precision.set_global_policy('float32') - squad_model, _ = bert_models.squad_model( - bert_config, - input_meta_data['max_seq_length'], - hub_module_url=FLAGS.hub_module_url) - - if checkpoint_path is None: - checkpoint_path = tf.train.latest_checkpoint(FLAGS.model_dir) - logging.info('Restoring checkpoints from %s', checkpoint_path) - checkpoint = tf.train.Checkpoint(model=squad_model) - checkpoint.restore(checkpoint_path).expect_partial() - return squad_model - - -def predict_squad_customized(strategy, input_meta_data, predict_tfrecord_path, - num_steps, squad_model): - """Make predictions using a Bert-based squad model.""" - predict_dataset_fn = get_dataset_fn( - predict_tfrecord_path, - input_meta_data['max_seq_length'], - FLAGS.predict_batch_size, - is_training=False) - predict_iterator = iter( - strategy.distribute_datasets_from_function(predict_dataset_fn)) - - @tf.function - def predict_step(iterator): - """Predicts on distributed devices.""" - - def _replicated_step(inputs): - """Replicated prediction calculation.""" - x, _ = inputs - unique_ids = x.pop('unique_ids') - start_logits, end_logits = squad_model(x, training=False) - return dict( - unique_ids=unique_ids, - start_logits=start_logits, - end_logits=end_logits) - - outputs = strategy.run(_replicated_step, args=(next(iterator),)) - return tf.nest.map_structure(strategy.experimental_local_results, outputs) - - all_results = [] - for _ in range(num_steps): - predictions = predict_step(predict_iterator) - for result in get_raw_results(predictions): - all_results.append(result) - if len(all_results) % 100 == 0: - logging.info('Made predictions for %d records.', len(all_results)) - return all_results - - -def train_squad(strategy, - input_meta_data, - bert_config, - custom_callbacks=None, - run_eagerly=False, - init_checkpoint=None, - sub_model_export_name=None): - """Run bert squad training.""" - if strategy: - logging.info('Training using customized training loop with distribution' - ' strategy.') - # Enables XLA in Session Config. Should not be set for TPU. - keras_utils.set_session_config(FLAGS.enable_xla) - performance.set_mixed_precision_policy(common_flags.dtype()) - - epochs = FLAGS.num_train_epochs - num_train_examples = input_meta_data['train_data_size'] - max_seq_length = input_meta_data['max_seq_length'] - steps_per_epoch = int(num_train_examples / FLAGS.train_batch_size) if not FLAGS.train_steps else FLAGS.train_steps - logging.info('steps_per_epoch: %d', steps_per_epoch) - warmup_steps = int(epochs * num_train_examples * 0.1 / FLAGS.train_batch_size) - train_input_fn = get_dataset_fn( - FLAGS.train_data_path, - max_seq_length, - FLAGS.train_batch_size, - is_training=True) - - def _get_squad_model(): - """Get Squad model and optimizer.""" - squad_model, core_model = bert_models.squad_model( - bert_config, - max_seq_length, - hub_module_url=FLAGS.hub_module_url, - hub_module_trainable=FLAGS.hub_module_trainable) - optimizer = optimization.create_optimizer(FLAGS.learning_rate, - steps_per_epoch * epochs, - warmup_steps, FLAGS.end_lr, - FLAGS.optimizer_type) - - squad_model.optimizer = performance.configure_optimizer( - optimizer, - use_float16=common_flags.use_float16(), - use_graph_rewrite=common_flags.use_graph_rewrite()) - return squad_model, core_model - - # Only when explicit_allreduce = True, post_allreduce_callbacks and - # allreduce_bytes_per_pack will take effect. optimizer.apply_gradients() no - # longer implicitly allreduce gradients, users manually allreduce gradient and - # pass the allreduced grads_and_vars to apply_gradients(). - # With explicit_allreduce = True, clip_by_global_norm is moved to after - # allreduce. - model_training_utils.run_customized_training_loop( - strategy=strategy, - model_fn=_get_squad_model, - loss_fn=get_loss_fn(), - model_dir=FLAGS.model_dir, - steps_per_epoch=steps_per_epoch, - steps_per_loop=FLAGS.steps_per_loop, - epochs=epochs, - train_input_fn=train_input_fn, - init_checkpoint=init_checkpoint or FLAGS.init_checkpoint, - sub_model_export_name=sub_model_export_name, - run_eagerly=run_eagerly, - custom_callbacks=custom_callbacks, - explicit_allreduce=FLAGS.explicit_allreduce, - pre_allreduce_callbacks=[ - model_training_utils.clip_by_global_norm_callback - ], - allreduce_bytes_per_pack=FLAGS.allreduce_bytes_per_pack) - - -def prediction_output_squad(strategy, input_meta_data, tokenizer, squad_lib, - predict_file, squad_model): - """Makes predictions for a squad dataset.""" - doc_stride = input_meta_data['doc_stride'] - max_query_length = input_meta_data['max_query_length'] - # Whether data should be in Ver 2.0 format. - version_2_with_negative = input_meta_data.get('version_2_with_negative', - False) - eval_examples = squad_lib.read_squad_examples( - input_file=predict_file, - is_training=False, - version_2_with_negative=version_2_with_negative) - - eval_writer = squad_lib.FeatureWriter( - filename=os.path.join(FLAGS.model_dir, 'eval.tf_record'), - is_training=False) - eval_features = [] - - def _append_feature(feature, is_padding): - if not is_padding: - eval_features.append(feature) - eval_writer.process_feature(feature) - - # TPU requires a fixed batch size for all batches, therefore the number - # of examples must be a multiple of the batch size, or else examples - # will get dropped. So we pad with fake examples which are ignored - # later on. - kwargs = dict( - examples=eval_examples, - tokenizer=tokenizer, - max_seq_length=input_meta_data['max_seq_length'], - doc_stride=doc_stride, - max_query_length=max_query_length, - is_training=False, - output_fn=_append_feature, - batch_size=FLAGS.predict_batch_size) - - # squad_lib_sp requires one more argument 'do_lower_case'. - if squad_lib == squad_lib_sp: - kwargs['do_lower_case'] = FLAGS.do_lower_case - dataset_size = squad_lib.convert_examples_to_features(**kwargs) - eval_writer.close() - - logging.info('***** Running predictions *****') - logging.info(' Num orig examples = %d', len(eval_examples)) - logging.info(' Num split examples = %d', len(eval_features)) - logging.info(' Batch size = %d', FLAGS.predict_batch_size) - - num_steps = int(dataset_size / FLAGS.predict_batch_size) - all_results = predict_squad_customized(strategy, input_meta_data, - eval_writer.filename, num_steps, - squad_model) - - all_predictions, all_nbest_json, scores_diff_json = ( - squad_lib.postprocess_output( - eval_examples, - eval_features, - all_results, - FLAGS.n_best_size, - FLAGS.max_answer_length, - FLAGS.do_lower_case, - version_2_with_negative=version_2_with_negative, - null_score_diff_threshold=FLAGS.null_score_diff_threshold, - verbose=FLAGS.verbose_logging)) - - return all_predictions, all_nbest_json, scores_diff_json - - -def dump_to_files(all_predictions, - all_nbest_json, - scores_diff_json, - squad_lib, - version_2_with_negative, - file_prefix=''): - """Save output to json files.""" - output_prediction_file = os.path.join(FLAGS.model_dir, - '%spredictions.json' % file_prefix) - output_nbest_file = os.path.join(FLAGS.model_dir, - '%snbest_predictions.json' % file_prefix) - output_null_log_odds_file = os.path.join(FLAGS.model_dir, file_prefix, - '%snull_odds.json' % file_prefix) - logging.info('Writing predictions to: %s', (output_prediction_file)) - logging.info('Writing nbest to: %s', (output_nbest_file)) - - squad_lib.write_to_json_files(all_predictions, output_prediction_file) - squad_lib.write_to_json_files(all_nbest_json, output_nbest_file) - if version_2_with_negative: - squad_lib.write_to_json_files(scores_diff_json, output_null_log_odds_file) - - -def _get_matched_files(input_path): - """Returns all files that matches the input_path.""" - input_patterns = input_path.strip().split(',') - all_matched_files = [] - for input_pattern in input_patterns: - input_pattern = input_pattern.strip() - if not input_pattern: - continue - matched_files = tf.io.gfile.glob(input_pattern) - if not matched_files: - raise ValueError('%s does not match any files.' % input_pattern) - else: - all_matched_files.extend(matched_files) - return sorted(all_matched_files) - - -def predict_squad(strategy, - input_meta_data, - tokenizer, - bert_config, - squad_lib, - init_checkpoint=None): - """Get prediction results and evaluate them to hard drive.""" - if init_checkpoint is None: - init_checkpoint = tf.train.latest_checkpoint(FLAGS.model_dir) - - all_predict_files = _get_matched_files(FLAGS.predict_file) - squad_model = get_squad_model_to_predict(strategy, bert_config, - init_checkpoint, input_meta_data) - for idx, predict_file in enumerate(all_predict_files): - all_predictions, all_nbest_json, scores_diff_json = prediction_output_squad( - strategy, input_meta_data, tokenizer, squad_lib, predict_file, - squad_model) - if len(all_predict_files) == 1: - file_prefix = '' - else: - # if predict_file is /path/xquad.ar.json, the `file_prefix` may be - # "xquad.ar-0-" - file_prefix = '%s-' % os.path.splitext( - os.path.basename(all_predict_files[idx]))[0] - dump_to_files(all_predictions, all_nbest_json, scores_diff_json, squad_lib, - input_meta_data.get('version_2_with_negative', False), - file_prefix) - - -def eval_squad(strategy, - input_meta_data, - tokenizer, - bert_config, - squad_lib, - init_checkpoint=None): - """Get prediction results and evaluate them against ground truth.""" - if init_checkpoint is None: - init_checkpoint = tf.train.latest_checkpoint(FLAGS.model_dir) - - all_predict_files = _get_matched_files(FLAGS.predict_file) - if len(all_predict_files) != 1: - raise ValueError('`eval_squad` only supports one predict file, ' - 'but got %s' % all_predict_files) - - squad_model = get_squad_model_to_predict(strategy, bert_config, - init_checkpoint, input_meta_data) - all_predictions, all_nbest_json, scores_diff_json = prediction_output_squad( - strategy, input_meta_data, tokenizer, squad_lib, all_predict_files[0], - squad_model) - dump_to_files(all_predictions, all_nbest_json, scores_diff_json, squad_lib, - input_meta_data.get('version_2_with_negative', False)) - - with tf.io.gfile.GFile(FLAGS.predict_file, 'r') as reader: - dataset_json = json.load(reader) - pred_dataset = dataset_json['data'] - if input_meta_data.get('version_2_with_negative', False): - eval_metrics = squad_evaluate_v2_0.evaluate(pred_dataset, all_predictions, - scores_diff_json) - else: - eval_metrics = squad_evaluate_v1_1.evaluate(pred_dataset, all_predictions) - return eval_metrics - - -def export_squad(model_export_path, input_meta_data, bert_config): - """Exports a trained model as a `SavedModel` for inference. - - Args: - model_export_path: a string specifying the path to the SavedModel directory. - input_meta_data: dictionary containing meta data about input and model. - bert_config: Bert configuration file to define core bert layers. - - Raises: - Export path is not specified, got an empty string or None. - """ - if not model_export_path: - raise ValueError('Export path is not specified: %s' % model_export_path) - # Export uses float32 for now, even if training uses mixed precision. - tf.keras.mixed_precision.set_global_policy('float32') - squad_model, _ = bert_models.squad_model(bert_config, - input_meta_data['max_seq_length']) - model_saving_utils.export_bert_model( - model_export_path, model=squad_model, checkpoint_dir=FLAGS.model_dir) diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/serving.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/serving.py deleted file mode 100644 index cac5425123504666b7721659be25c64ae52ea024..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/serving.py +++ /dev/null @@ -1,149 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Examples of SavedModel export for tf-serving.""" - -from absl import app -from absl import flags -import tensorflow as tf - -from official.nlp.bert import bert_models -from official.nlp.bert import configs - -flags.DEFINE_integer( - "sequence_length", None, "Sequence length to parse the tf.Example. If " - "sequence_length > 0, add a signature for serialized " - "tf.Example and define the parsing specification by the " - "sequence_length.") -flags.DEFINE_string("bert_config_file", None, - "Bert configuration file to define core bert layers.") -flags.DEFINE_string("model_checkpoint_path", None, - "File path to TF model checkpoint.") -flags.DEFINE_string("export_path", None, - "Destination folder to export the serving SavedModel.") - -FLAGS = flags.FLAGS - - -class BertServing(tf.keras.Model): - """Bert transformer encoder model for serving.""" - - def __init__(self, bert_config, name_to_features=None, name="serving_model"): - super(BertServing, self).__init__(name=name) - self.encoder = bert_models.get_transformer_encoder( - bert_config, sequence_length=None) - self.name_to_features = name_to_features - - def call(self, inputs): - input_word_ids = inputs["input_ids"] - input_mask = inputs["input_mask"] - input_type_ids = inputs["segment_ids"] - - encoder_outputs, _ = self.encoder( - [input_word_ids, input_mask, input_type_ids]) - return encoder_outputs - - def serve_body(self, input_ids, input_mask=None, segment_ids=None): - if segment_ids is None: - # Requires CLS token is the first token of inputs. - segment_ids = tf.zeros_like(input_ids) - if input_mask is None: - # The mask has 1 for real tokens and 0 for padding tokens. - input_mask = tf.where( - tf.equal(input_ids, 0), tf.zeros_like(input_ids), - tf.ones_like(input_ids)) - - inputs = dict( - input_ids=input_ids, input_mask=input_mask, segment_ids=segment_ids) - return self.call(inputs) - - @tf.function - def serve(self, input_ids, input_mask=None, segment_ids=None): - outputs = self.serve_body(input_ids, input_mask, segment_ids) - # Returns a dictionary to control SignatureDef output signature. - return {"outputs": outputs[-1]} - - @tf.function - def serve_examples(self, inputs): - features = tf.io.parse_example(inputs, self.name_to_features) - for key in list(features.keys()): - t = features[key] - if t.dtype == tf.int64: - t = tf.cast(t, tf.int32) - features[key] = t - return self.serve( - features["input_ids"], - input_mask=features["input_mask"] if "input_mask" in features else None, - segment_ids=features["segment_ids"] - if "segment_ids" in features else None) - - @classmethod - def export(cls, model, export_dir): - if not isinstance(model, cls): - raise ValueError("Invalid model instance: %s, it should be a %s" % - (model, cls)) - - signatures = { - "serving_default": - model.serve.get_concrete_function( - input_ids=tf.TensorSpec( - shape=[None, None], dtype=tf.int32, name="inputs")), - } - if model.name_to_features: - signatures[ - "serving_examples"] = model.serve_examples.get_concrete_function( - tf.TensorSpec(shape=[None], dtype=tf.string, name="examples")) - tf.saved_model.save(model, export_dir=export_dir, signatures=signatures) - - -def main(_): - sequence_length = FLAGS.sequence_length - if sequence_length is not None and sequence_length > 0: - name_to_features = { - "input_ids": tf.io.FixedLenFeature([sequence_length], tf.int64), - "input_mask": tf.io.FixedLenFeature([sequence_length], tf.int64), - "segment_ids": tf.io.FixedLenFeature([sequence_length], tf.int64), - } - else: - name_to_features = None - bert_config = configs.BertConfig.from_json_file(FLAGS.bert_config_file) - serving_model = BertServing( - bert_config=bert_config, name_to_features=name_to_features) - checkpoint = tf.train.Checkpoint(model=serving_model.encoder) - checkpoint.restore(FLAGS.model_checkpoint_path - ).assert_existing_objects_matched().run_restore_ops() - BertServing.export(serving_model, FLAGS.export_path) - - -if __name__ == "__main__": - flags.mark_flag_as_required("bert_config_file") - flags.mark_flag_as_required("model_checkpoint_path") - flags.mark_flag_as_required("export_path") - app.run(main) diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/squad_evaluate_v1_1.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/squad_evaluate_v1_1.py deleted file mode 100644 index cada87b5d88c9303ed8ef395ad0eddd34d27c6bf..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/squad_evaluate_v1_1.py +++ /dev/null @@ -1,122 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Evaluation of SQuAD predictions (version 1.1). - -The functions are copied from -https://worksheets.codalab.org/rest/bundles/0xbcd57bee090b421c982906709c8c27e1/contents/blob/. - -The SQuAD dataset is described in this paper: -SQuAD: 100,000+ Questions for Machine Comprehension of Text -Pranav Rajpurkar, Jian Zhang, Konstantin Lopyrev, Percy Liang -https://nlp.stanford.edu/pubs/rajpurkar2016squad.pdf -""" - -import collections -import re -import string - -# pylint: disable=g-bad-import-order - -from absl import logging -# pylint: enable=g-bad-import-order - - -def _normalize_answer(s): - """Lowers text and remove punctuation, articles and extra whitespace.""" - - def remove_articles(text): - return re.sub(r"\b(a|an|the)\b", " ", text) - - def white_space_fix(text): - return " ".join(text.split()) - - def remove_punc(text): - exclude = set(string.punctuation) - return "".join(ch for ch in text if ch not in exclude) - - def lower(text): - return text.lower() - - return white_space_fix(remove_articles(remove_punc(lower(s)))) - - -def _f1_score(prediction, ground_truth): - """Computes F1 score by comparing prediction to ground truth.""" - prediction_tokens = _normalize_answer(prediction).split() - ground_truth_tokens = _normalize_answer(ground_truth).split() - prediction_counter = collections.Counter(prediction_tokens) - ground_truth_counter = collections.Counter(ground_truth_tokens) - common = prediction_counter & ground_truth_counter - num_same = sum(common.values()) - if num_same == 0: - return 0 - precision = 1.0 * num_same / len(prediction_tokens) - recall = 1.0 * num_same / len(ground_truth_tokens) - f1 = (2 * precision * recall) / (precision + recall) - return f1 - - -def _exact_match_score(prediction, ground_truth): - """Checks if predicted answer exactly matches ground truth answer.""" - return _normalize_answer(prediction) == _normalize_answer(ground_truth) - - -def _metric_max_over_ground_truths(metric_fn, prediction, ground_truths): - """Computes the max over all metric scores.""" - scores_for_ground_truths = [] - for ground_truth in ground_truths: - score = metric_fn(prediction, ground_truth) - scores_for_ground_truths.append(score) - return max(scores_for_ground_truths) - - -def evaluate(dataset, predictions): - """Evaluates predictions for a dataset.""" - f1 = exact_match = total = 0 - for article in dataset: - for paragraph in article["paragraphs"]: - for qa in paragraph["qas"]: - total += 1 - if qa["id"] not in predictions: - message = "Unanswered question " + qa["id"] + " will receive score 0." - logging.error(message) - continue - ground_truths = [entry["text"] for entry in qa["answers"]] - prediction = predictions[qa["id"]] - exact_match += _metric_max_over_ground_truths(_exact_match_score, - prediction, ground_truths) - f1 += _metric_max_over_ground_truths(_f1_score, prediction, - ground_truths) - - exact_match = exact_match / total - f1 = f1 / total - - return {"exact_match": exact_match, "final_f1": f1} diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/squad_evaluate_v2_0.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/squad_evaluate_v2_0.py deleted file mode 100644 index 1a325f7ec569a8a15526a693b0f087283b04854f..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/squad_evaluate_v2_0.py +++ /dev/null @@ -1,265 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Evaluation script for SQuAD version 2.0. - -The functions are copied and modified from -https://raw.githubusercontent.com/white127/SQUAD-2.0-bidaf/master/evaluate-v2.0.py - -In addition to basic functionality, we also compute additional statistics and -plot precision-recall curves if an additional na_prob.json file is provided. -This file is expected to map question ID's to the model's predicted probability -that a question is unanswerable. -""" - -import collections -import re -import string - -from absl import logging - - -def _make_qid_to_has_ans(dataset): - qid_to_has_ans = {} - for article in dataset: - for p in article['paragraphs']: - for qa in p['qas']: - qid_to_has_ans[qa['id']] = bool(qa['answers']) - return qid_to_has_ans - - -def _normalize_answer(s): - """Lower text and remove punctuation, articles and extra whitespace.""" - def remove_articles(text): - regex = re.compile(r'\b(a|an|the)\b', re.UNICODE) - return re.sub(regex, ' ', text) - def white_space_fix(text): - return ' '.join(text.split()) - def remove_punc(text): - exclude = set(string.punctuation) - return ''.join(ch for ch in text if ch not in exclude) - def lower(text): - return text.lower() - return white_space_fix(remove_articles(remove_punc(lower(s)))) - - -def _get_tokens(s): - if not s: return [] - return _normalize_answer(s).split() - - -def _compute_exact(a_gold, a_pred): - return int(_normalize_answer(a_gold) == _normalize_answer(a_pred)) - - -def _compute_f1(a_gold, a_pred): - """Compute F1-score.""" - gold_toks = _get_tokens(a_gold) - pred_toks = _get_tokens(a_pred) - common = collections.Counter(gold_toks) & collections.Counter(pred_toks) - num_same = sum(common.values()) - if not gold_toks or not pred_toks: - # If either is no-answer, then F1 is 1 if they agree, 0 otherwise - return int(gold_toks == pred_toks) - if num_same == 0: - return 0 - precision = 1.0 * num_same / len(pred_toks) - recall = 1.0 * num_same / len(gold_toks) - f1 = (2 * precision * recall) / (precision + recall) - return f1 - - -def _get_raw_scores(dataset, predictions): - """Compute raw scores.""" - exact_scores = {} - f1_scores = {} - for article in dataset: - for p in article['paragraphs']: - for qa in p['qas']: - qid = qa['id'] - gold_answers = [a['text'] for a in qa['answers'] - if _normalize_answer(a['text'])] - if not gold_answers: - # For unanswerable questions, only correct answer is empty string - gold_answers = [''] - if qid not in predictions: - logging.error('Missing prediction for %s', qid) - continue - a_pred = predictions[qid] - # Take max over all gold answers - exact_scores[qid] = max(_compute_exact(a, a_pred) for a in gold_answers) - f1_scores[qid] = max(_compute_f1(a, a_pred) for a in gold_answers) - return exact_scores, f1_scores - - -def _apply_no_ans_threshold( - scores, na_probs, qid_to_has_ans, na_prob_thresh=1.0): - new_scores = {} - for qid, s in scores.items(): - pred_na = na_probs[qid] > na_prob_thresh - if pred_na: - new_scores[qid] = float(not qid_to_has_ans[qid]) - else: - new_scores[qid] = s - return new_scores - - -def _make_eval_dict(exact_scores, f1_scores, qid_list=None): - """Make evaluation result dictionary.""" - if not qid_list: - total = len(exact_scores) - return collections.OrderedDict([ - ('exact', 100.0 * sum(exact_scores.values()) / total), - ('f1', 100.0 * sum(f1_scores.values()) / total), - ('total', total), - ]) - else: - total = len(qid_list) - return collections.OrderedDict([ - ('exact', 100.0 * sum(exact_scores[k] for k in qid_list) / total), - ('f1', 100.0 * sum(f1_scores[k] for k in qid_list) / total), - ('total', total), - ]) - - -def _merge_eval(main_eval, new_eval, prefix): - for k in new_eval: - main_eval['%s_%s' % (prefix, k)] = new_eval[k] - - -def _make_precision_recall_eval(scores, na_probs, num_true_pos, qid_to_has_ans): - """Make evaluation dictionary containing average recision recall.""" - qid_list = sorted(na_probs, key=lambda k: na_probs[k]) - true_pos = 0.0 - cur_p = 1.0 - cur_r = 0.0 - precisions = [1.0] - recalls = [0.0] - avg_prec = 0.0 - for i, qid in enumerate(qid_list): - if qid_to_has_ans[qid]: - true_pos += scores[qid] - cur_p = true_pos / float(i+1) - cur_r = true_pos / float(num_true_pos) - if i == len(qid_list) - 1 or na_probs[qid] != na_probs[qid_list[i+1]]: - # i.e., if we can put a threshold after this point - avg_prec += cur_p * (cur_r - recalls[-1]) - precisions.append(cur_p) - recalls.append(cur_r) - return {'ap': 100.0 * avg_prec} - - -def _run_precision_recall_analysis( - main_eval, exact_raw, f1_raw, na_probs, qid_to_has_ans): - """Run precision recall analysis and return result dictionary.""" - num_true_pos = sum(1 for v in qid_to_has_ans.values() if v) - if num_true_pos == 0: - return - pr_exact = _make_precision_recall_eval( - exact_raw, na_probs, num_true_pos, qid_to_has_ans) - pr_f1 = _make_precision_recall_eval( - f1_raw, na_probs, num_true_pos, qid_to_has_ans) - oracle_scores = {k: float(v) for k, v in qid_to_has_ans.items()} - pr_oracle = _make_precision_recall_eval( - oracle_scores, na_probs, num_true_pos, qid_to_has_ans) - _merge_eval(main_eval, pr_exact, 'pr_exact') - _merge_eval(main_eval, pr_f1, 'pr_f1') - _merge_eval(main_eval, pr_oracle, 'pr_oracle') - - -def _find_best_thresh(predictions, scores, na_probs, qid_to_has_ans): - """Find the best threshold for no answer probability.""" - num_no_ans = sum(1 for k in qid_to_has_ans if not qid_to_has_ans[k]) - cur_score = num_no_ans - best_score = cur_score - best_thresh = 0.0 - qid_list = sorted(na_probs, key=lambda k: na_probs[k]) - for qid in qid_list: - if qid not in scores: continue - if qid_to_has_ans[qid]: - diff = scores[qid] - else: - if predictions[qid]: - diff = -1 - else: - diff = 0 - cur_score += diff - if cur_score > best_score: - best_score = cur_score - best_thresh = na_probs[qid] - return 100.0 * best_score / len(scores), best_thresh - - -def _find_all_best_thresh( - main_eval, predictions, exact_raw, f1_raw, na_probs, qid_to_has_ans): - best_exact, exact_thresh = _find_best_thresh( - predictions, exact_raw, na_probs, qid_to_has_ans) - best_f1, f1_thresh = _find_best_thresh( - predictions, f1_raw, na_probs, qid_to_has_ans) - main_eval['final_exact'] = best_exact - main_eval['final_exact_thresh'] = exact_thresh - main_eval['final_f1'] = best_f1 - main_eval['final_f1_thresh'] = f1_thresh - - -def evaluate(dataset, predictions, na_probs=None): - """Evaluate prediction results.""" - new_orig_data = [] - for article in dataset: - for p in article['paragraphs']: - for qa in p['qas']: - if qa['id'] in predictions: - new_para = {'qas': [qa]} - new_article = {'paragraphs': [new_para]} - new_orig_data.append(new_article) - dataset = new_orig_data - - if na_probs is None: - na_probs = {k: 0.0 for k in predictions} - qid_to_has_ans = _make_qid_to_has_ans(dataset) # maps qid to True/False - has_ans_qids = [k for k, v in qid_to_has_ans.items() if v] - no_ans_qids = [k for k, v in qid_to_has_ans.items() if not v] - exact_raw, f1_raw = _get_raw_scores(dataset, predictions) - exact_thresh = _apply_no_ans_threshold(exact_raw, na_probs, qid_to_has_ans) - f1_thresh = _apply_no_ans_threshold(f1_raw, na_probs, qid_to_has_ans) - out_eval = _make_eval_dict(exact_thresh, f1_thresh) - if has_ans_qids: - has_ans_eval = _make_eval_dict( - exact_thresh, f1_thresh, qid_list=has_ans_qids) - _merge_eval(out_eval, has_ans_eval, 'HasAns') - if no_ans_qids: - no_ans_eval = _make_eval_dict(exact_thresh, f1_thresh, qid_list=no_ans_qids) - _merge_eval(out_eval, no_ans_eval, 'NoAns') - - _find_all_best_thresh( - out_eval, predictions, exact_raw, f1_raw, na_probs, qid_to_has_ans) - _run_precision_recall_analysis( - out_eval, exact_raw, f1_raw, na_probs, qid_to_has_ans) - return out_eval diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/tf1_checkpoint_converter_lib.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/tf1_checkpoint_converter_lib.py deleted file mode 100644 index ba6e593be16db3e6396a62f226171bbd1be0db97..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/tf1_checkpoint_converter_lib.py +++ /dev/null @@ -1,217 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -r"""Convert checkpoints created by Estimator (tf1) to be Keras compatible.""" - -import numpy as np -import tensorflow.compat.v1 as tf # TF 1.x - -# Mapping between old <=> new names. The source pattern in original variable -# name will be replaced by destination pattern. -BERT_NAME_REPLACEMENTS = ( - ("bert", "bert_model"), - ("embeddings/word_embeddings", "word_embeddings/embeddings"), - ("embeddings/token_type_embeddings", - "embedding_postprocessor/type_embeddings"), - ("embeddings/position_embeddings", - "embedding_postprocessor/position_embeddings"), - ("embeddings/LayerNorm", "embedding_postprocessor/layer_norm"), - ("attention/self", "self_attention"), - ("attention/output/dense", "self_attention_output"), - ("attention/output/LayerNorm", "self_attention_layer_norm"), - ("intermediate/dense", "intermediate"), - ("output/dense", "output"), - ("output/LayerNorm", "output_layer_norm"), - ("pooler/dense", "pooler_transform"), -) - -BERT_V2_NAME_REPLACEMENTS = ( - ("bert/", ""), - ("encoder", "transformer"), - ("embeddings/word_embeddings", "word_embeddings/embeddings"), - ("embeddings/token_type_embeddings", "type_embeddings/embeddings"), - ("embeddings/position_embeddings", "position_embedding/embeddings"), - ("embeddings/LayerNorm", "embeddings/layer_norm"), - ("attention/self", "self_attention"), - ("attention/output/dense", "self_attention/attention_output"), - ("attention/output/LayerNorm", "self_attention_layer_norm"), - ("intermediate/dense", "intermediate"), - ("output/dense", "output"), - ("output/LayerNorm", "output_layer_norm"), - ("pooler/dense", "pooler_transform"), - ("cls/predictions", "bert/cls/predictions"), - ("cls/predictions/output_bias", "cls/predictions/output_bias/bias"), - ("cls/seq_relationship/output_bias", "predictions/transform/logits/bias"), - ("cls/seq_relationship/output_weights", - "predictions/transform/logits/kernel"), -) - -BERT_PERMUTATIONS = () - -BERT_V2_PERMUTATIONS = (("cls/seq_relationship/output_weights", (1, 0)),) - - -def _bert_name_replacement(var_name, name_replacements): - """Gets the variable name replacement.""" - for src_pattern, tgt_pattern in name_replacements: - if src_pattern in var_name: - old_var_name = var_name - var_name = var_name.replace(src_pattern, tgt_pattern) - tf.logging.info("Converted: %s --> %s", old_var_name, var_name) - return var_name - - -def _has_exclude_patterns(name, exclude_patterns): - """Checks if a string contains substrings that match patterns to exclude.""" - for p in exclude_patterns: - if p in name: - return True - return False - - -def _get_permutation(name, permutations): - """Checks whether a variable requires transposition by pattern matching.""" - for src_pattern, permutation in permutations: - if src_pattern in name: - tf.logging.info("Permuted: %s --> %s", name, permutation) - return permutation - - return None - - -def _get_new_shape(name, shape, num_heads): - """Checks whether a variable requires reshape by pattern matching.""" - if "self_attention/attention_output/kernel" in name: - return tuple([num_heads, shape[0] // num_heads, shape[1]]) - if "self_attention/attention_output/bias" in name: - return shape - - patterns = [ - "self_attention/query", "self_attention/value", "self_attention/key" - ] - for pattern in patterns: - if pattern in name: - if "kernel" in name: - return tuple([shape[0], num_heads, shape[1] // num_heads]) - if "bias" in name: - return tuple([num_heads, shape[0] // num_heads]) - return None - - -def create_v2_checkpoint(model, - src_checkpoint, - output_path, - checkpoint_model_name="model"): - """Converts a name-based matched TF V1 checkpoint to TF V2 checkpoint.""" - # Uses streaming-restore in eager model to read V1 name-based checkpoints. - model.load_weights(src_checkpoint).assert_existing_objects_matched() - if hasattr(model, "checkpoint_items"): - checkpoint_items = model.checkpoint_items - else: - checkpoint_items = {} - - checkpoint_items[checkpoint_model_name] = model - checkpoint = tf.train.Checkpoint(**checkpoint_items) - checkpoint.save(output_path) - - -def convert(checkpoint_from_path, - checkpoint_to_path, - num_heads, - name_replacements, - permutations, - exclude_patterns=None): - """Migrates the names of variables within a checkpoint. - - Args: - checkpoint_from_path: Path to source checkpoint to be read in. - checkpoint_to_path: Path to checkpoint to be written out. - num_heads: The number of heads of the model. - name_replacements: A list of tuples of the form (match_str, replace_str) - describing variable names to adjust. - permutations: A list of tuples of the form (match_str, permutation) - describing permutations to apply to given variables. Note that match_str - should match the original variable name, not the replaced one. - exclude_patterns: A list of string patterns to exclude variables from - checkpoint conversion. - - Returns: - A dictionary that maps the new variable names to the Variable objects. - A dictionary that maps the old variable names to the new variable names. - """ - with tf.Graph().as_default(): - tf.logging.info("Reading checkpoint_from_path %s", checkpoint_from_path) - reader = tf.train.NewCheckpointReader(checkpoint_from_path) - name_shape_map = reader.get_variable_to_shape_map() - new_variable_map = {} - conversion_map = {} - for var_name in name_shape_map: - if exclude_patterns and _has_exclude_patterns(var_name, exclude_patterns): - continue - # Get the original tensor data. - tensor = reader.get_tensor(var_name) - - # Look up the new variable name, if any. - new_var_name = _bert_name_replacement(var_name, name_replacements) - - # See if we need to reshape the underlying tensor. - new_shape = None - if num_heads > 0: - new_shape = _get_new_shape(new_var_name, tensor.shape, num_heads) - if new_shape: - tf.logging.info("Veriable %s has a shape change from %s to %s", - var_name, tensor.shape, new_shape) - tensor = np.reshape(tensor, new_shape) - - # See if we need to permute the underlying tensor. - permutation = _get_permutation(var_name, permutations) - if permutation: - tensor = np.transpose(tensor, permutation) - - # Create a new variable with the possibly-reshaped or transposed tensor. - var = tf.Variable(tensor, name=var_name) - - # Save the variable into the new variable map. - new_variable_map[new_var_name] = var - - # Keep a list of converter variables for sanity checking. - if new_var_name != var_name: - conversion_map[var_name] = new_var_name - - saver = tf.train.Saver(new_variable_map) - - with tf.Session() as sess: - sess.run(tf.global_variables_initializer()) - tf.logging.info("Writing checkpoint_to_path %s", checkpoint_to_path) - saver.save(sess, checkpoint_to_path, write_meta_graph=False) - - tf.logging.info("Summary:") - tf.logging.info(" Converted %d variable name(s).", len(new_variable_map)) - tf.logging.info(" Converted: %s", str(conversion_map)) diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/tf2_encoder_checkpoint_converter.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/tf2_encoder_checkpoint_converter.py deleted file mode 100644 index caec572d8a66888046596f27f02b5bd1d276d699..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/tf2_encoder_checkpoint_converter.py +++ /dev/null @@ -1,176 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""A converter from a V1 BERT encoder checkpoint to a V2 encoder checkpoint. - -The conversion will yield an object-oriented checkpoint that can be used -to restore a BertEncoder or BertPretrainerV2 object (see the `converted_model` -FLAG below). -""" - -import os - -from absl import app -from absl import flags - -import tensorflow as tf -from official.modeling import tf_utils -from official.nlp.bert import configs -from official.nlp.bert import tf1_checkpoint_converter_lib -from official.nlp.modeling import models -from official.nlp.modeling import networks - -FLAGS = flags.FLAGS - -flags.DEFINE_string("bert_config_file", None, - "Bert configuration file to define core bert layers.") -flags.DEFINE_string( - "checkpoint_to_convert", None, - "Initial checkpoint from a pretrained BERT model core (that is, only the " - "BertModel, with no task heads.)") -flags.DEFINE_string("converted_checkpoint_path", None, - "Name for the created object-based V2 checkpoint.") -flags.DEFINE_string("checkpoint_model_name", "encoder", - "The name of the model when saving the checkpoint, i.e., " - "the checkpoint will be saved using: " - "tf.train.Checkpoint(FLAGS.checkpoint_model_name=model).") -flags.DEFINE_enum( - "converted_model", "encoder", ["encoder", "pretrainer"], - "Whether to convert the checkpoint to a `BertEncoder` model or a " - "`BertPretrainerV2` model (with mlm but without classification heads).") - - -def _create_bert_model(cfg): - """Creates a BERT keras core model from BERT configuration. - - Args: - cfg: A `BertConfig` to create the core model. - - Returns: - A BertEncoder network. - """ - bert_encoder = networks.BertEncoder( - vocab_size=cfg.vocab_size, - hidden_size=cfg.hidden_size, - num_layers=cfg.num_hidden_layers, - num_attention_heads=cfg.num_attention_heads, - intermediate_size=cfg.intermediate_size, - activation=tf_utils.get_activation(cfg.hidden_act), - dropout_rate=cfg.hidden_dropout_prob, - attention_dropout_rate=cfg.attention_probs_dropout_prob, - max_sequence_length=cfg.max_position_embeddings, - type_vocab_size=cfg.type_vocab_size, - initializer=tf.keras.initializers.TruncatedNormal( - stddev=cfg.initializer_range), - embedding_width=cfg.embedding_size) - - return bert_encoder - - -def _create_bert_pretrainer_model(cfg): - """Creates a BERT keras core model from BERT configuration. - - Args: - cfg: A `BertConfig` to create the core model. - - Returns: - A BertPretrainerV2 model. - """ - bert_encoder = _create_bert_model(cfg) - pretrainer = models.BertPretrainerV2( - encoder_network=bert_encoder, - mlm_activation=tf_utils.get_activation(cfg.hidden_act), - mlm_initializer=tf.keras.initializers.TruncatedNormal( - stddev=cfg.initializer_range)) - # Makes sure the pretrainer variables are created. - _ = pretrainer(pretrainer.inputs) - return pretrainer - - -def convert_checkpoint(bert_config, - output_path, - v1_checkpoint, - checkpoint_model_name="model", - converted_model="encoder"): - """Converts a V1 checkpoint into an OO V2 checkpoint.""" - output_dir, _ = os.path.split(output_path) - tf.io.gfile.makedirs(output_dir) - - # Create a temporary V1 name-converted checkpoint in the output directory. - temporary_checkpoint_dir = os.path.join(output_dir, "temp_v1") - temporary_checkpoint = os.path.join(temporary_checkpoint_dir, "ckpt") - - tf1_checkpoint_converter_lib.convert( - checkpoint_from_path=v1_checkpoint, - checkpoint_to_path=temporary_checkpoint, - num_heads=bert_config.num_attention_heads, - name_replacements=tf1_checkpoint_converter_lib.BERT_V2_NAME_REPLACEMENTS, - permutations=tf1_checkpoint_converter_lib.BERT_V2_PERMUTATIONS, - exclude_patterns=["adam", "Adam"]) - - if converted_model == "encoder": - model = _create_bert_model(bert_config) - elif converted_model == "pretrainer": - model = _create_bert_pretrainer_model(bert_config) - else: - raise ValueError("Unsupported converted_model: %s" % converted_model) - - # Create a V2 checkpoint from the temporary checkpoint. - tf1_checkpoint_converter_lib.create_v2_checkpoint(model, temporary_checkpoint, - output_path, - checkpoint_model_name) - - # Clean up the temporary checkpoint, if it exists. - try: - tf.io.gfile.rmtree(temporary_checkpoint_dir) - except tf.errors.OpError: - # If it doesn't exist, we don't need to clean it up; continue. - pass - - -def main(argv): - if len(argv) > 1: - raise app.UsageError("Too many command-line arguments.") - - output_path = FLAGS.converted_checkpoint_path - v1_checkpoint = FLAGS.checkpoint_to_convert - checkpoint_model_name = FLAGS.checkpoint_model_name - converted_model = FLAGS.converted_model - bert_config = configs.BertConfig.from_json_file(FLAGS.bert_config_file) - convert_checkpoint( - bert_config=bert_config, - output_path=output_path, - v1_checkpoint=v1_checkpoint, - checkpoint_model_name=checkpoint_model_name, - converted_model=converted_model) - - -if __name__ == "__main__": - app.run(main) diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/tokenization.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/tokenization.py deleted file mode 100644 index 6cc777ce8240a0f7ea7364cd72db7af42ebe2dc7..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/tokenization.py +++ /dev/null @@ -1,557 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# coding=utf-8 -"""Tokenization classes implementation. - -The file is forked from: -https://github.com/google-research/bert/blob/master/tokenization.py. -""" - -import collections -import re -import unicodedata - -import six -import tensorflow as tf - -import sentencepiece as spm - -SPIECE_UNDERLINE = "鈻" - - -def validate_case_matches_checkpoint(do_lower_case, init_checkpoint): - """Checks whether the casing config is consistent with the checkpoint name.""" - - # The casing has to be passed in by the user and there is no explicit check - # as to whether it matches the checkpoint. The casing information probably - # should have been stored in the bert_config.json file, but it's not, so - # we have to heuristically detect it to validate. - - if not init_checkpoint: - return - - m = re.match("^.*?([A-Za-z0-9_-]+)/bert_model.ckpt", init_checkpoint) - if m is None: - return - - model_name = m.group(1) - - lower_models = [ - "uncased_L-24_H-1024_A-16", "uncased_L-12_H-768_A-12", - "multilingual_L-12_H-768_A-12", "chinese_L-12_H-768_A-12" - ] - - cased_models = [ - "cased_L-12_H-768_A-12", "cased_L-24_H-1024_A-16", - "multi_cased_L-12_H-768_A-12" - ] - - is_bad_config = False - if model_name in lower_models and not do_lower_case: - is_bad_config = True - actual_flag = "False" - case_name = "lowercased" - opposite_flag = "True" - - if model_name in cased_models and do_lower_case: - is_bad_config = True - actual_flag = "True" - case_name = "cased" - opposite_flag = "False" - - if is_bad_config: - raise ValueError( - "You passed in `--do_lower_case=%s` with `--init_checkpoint=%s`. " - "However, `%s` seems to be a %s model, so you " - "should pass in `--do_lower_case=%s` so that the fine-tuning matches " - "how the model was pre-training. If this error is wrong, please " - "just comment out this check." % - (actual_flag, init_checkpoint, model_name, case_name, opposite_flag)) - - -def convert_to_unicode(text): - """Converts `text` to Unicode (if it's not already), assuming utf-8 input.""" - if six.PY3: - if isinstance(text, str): - return text - elif isinstance(text, bytes): - return text.decode("utf-8", "ignore") - else: - raise ValueError("Unsupported string type: %s" % (type(text))) - elif six.PY2: - if isinstance(text, str): - return text.decode("utf-8", "ignore") - elif isinstance(text, unicode): - return text - else: - raise ValueError("Unsupported string type: %s" % (type(text))) - else: - raise ValueError("Not running on Python2 or Python 3?") - - -def printable_text(text): - """Returns text encoded in a way suitable for print or `tf.logging`.""" - - # These functions want `str` for both Python2 and Python3, but in one case - # it's a Unicode string and in the other it's a byte string. - if six.PY3: - if isinstance(text, str): - return text - elif isinstance(text, bytes): - return text.decode("utf-8", "ignore") - else: - raise ValueError("Unsupported string type: %s" % (type(text))) - elif six.PY2: - if isinstance(text, str): - return text - elif isinstance(text, unicode): - return text.encode("utf-8") - else: - raise ValueError("Unsupported string type: %s" % (type(text))) - else: - raise ValueError("Not running on Python2 or Python 3?") - - -def load_vocab(vocab_file): - """Loads a vocabulary file into a dictionary.""" - vocab = collections.OrderedDict() - index = 0 - with tf.io.gfile.GFile(vocab_file, "r") as reader: - while True: - token = convert_to_unicode(reader.readline()) - if not token: - break - token = token.strip() - vocab[token] = index - index += 1 - return vocab - - -def convert_by_vocab(vocab, items): - """Converts a sequence of [tokens|ids] using the vocab.""" - output = [] - for item in items: - output.append(vocab[item]) - return output - - -def convert_tokens_to_ids(vocab, tokens): - return convert_by_vocab(vocab, tokens) - - -def convert_ids_to_tokens(inv_vocab, ids): - return convert_by_vocab(inv_vocab, ids) - - -def whitespace_tokenize(text): - """Runs basic whitespace cleaning and splitting on a piece of text.""" - text = text.strip() - if not text: - return [] - tokens = text.split() - return tokens - - -class FullTokenizer(object): - """Runs end-to-end tokenziation.""" - - def __init__(self, vocab_file, do_lower_case=True, split_on_punc=True): - self.vocab = load_vocab(vocab_file) - self.inv_vocab = {v: k for k, v in self.vocab.items()} - self.basic_tokenizer = BasicTokenizer( - do_lower_case=do_lower_case, split_on_punc=split_on_punc) - self.wordpiece_tokenizer = WordpieceTokenizer(vocab=self.vocab) - - def tokenize(self, text): - split_tokens = [] - for token in self.basic_tokenizer.tokenize(text): - for sub_token in self.wordpiece_tokenizer.tokenize(token): - split_tokens.append(sub_token) - - return split_tokens - - def convert_tokens_to_ids(self, tokens): - return convert_by_vocab(self.vocab, tokens) - - def convert_ids_to_tokens(self, ids): - return convert_by_vocab(self.inv_vocab, ids) - - -class BasicTokenizer(object): - """Runs basic tokenization (punctuation splitting, lower casing, etc.).""" - - def __init__(self, do_lower_case=True, split_on_punc=True): - """Constructs a BasicTokenizer. - - Args: - do_lower_case: Whether to lower case the input. - split_on_punc: Whether to apply split on punctuations. By default BERT - starts a new token for punctuations. This makes detokenization difficult - for tasks like seq2seq decoding. - """ - self.do_lower_case = do_lower_case - self.split_on_punc = split_on_punc - - def tokenize(self, text): - """Tokenizes a piece of text.""" - text = convert_to_unicode(text) - text = self._clean_text(text) - - # This was added on November 1st, 2018 for the multilingual and Chinese - # models. This is also applied to the English models now, but it doesn't - # matter since the English models were not trained on any Chinese data - # and generally don't have any Chinese data in them (there are Chinese - # characters in the vocabulary because Wikipedia does have some Chinese - # words in the English Wikipedia.). - text = self._tokenize_chinese_chars(text) - - orig_tokens = whitespace_tokenize(text) - split_tokens = [] - for token in orig_tokens: - if self.do_lower_case: - token = token.lower() - token = self._run_strip_accents(token) - if self.split_on_punc: - split_tokens.extend(self._run_split_on_punc(token)) - else: - split_tokens.append(token) - - output_tokens = whitespace_tokenize(" ".join(split_tokens)) - return output_tokens - - def _run_strip_accents(self, text): - """Strips accents from a piece of text.""" - text = unicodedata.normalize("NFD", text) - output = [] - for char in text: - cat = unicodedata.category(char) - if cat == "Mn": - continue - output.append(char) - return "".join(output) - - def _run_split_on_punc(self, text): - """Splits punctuation on a piece of text.""" - chars = list(text) - i = 0 - start_new_word = True - output = [] - while i < len(chars): - char = chars[i] - if _is_punctuation(char): - output.append([char]) - start_new_word = True - else: - if start_new_word: - output.append([]) - start_new_word = False - output[-1].append(char) - i += 1 - - return ["".join(x) for x in output] - - def _tokenize_chinese_chars(self, text): - """Adds whitespace around any CJK character.""" - output = [] - for char in text: - cp = ord(char) - if self._is_chinese_char(cp): - output.append(" ") - output.append(char) - output.append(" ") - else: - output.append(char) - return "".join(output) - - def _is_chinese_char(self, cp): - """Checks whether CP is the codepoint of a CJK character.""" - # This defines a "chinese character" as anything in the CJK Unicode block: - # https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block) - # - # Note that the CJK Unicode block is NOT all Japanese and Korean characters, - # despite its name. The modern Korean Hangul alphabet is a different block, - # as is Japanese Hiragana and Katakana. Those alphabets are used to write - # space-separated words, so they are not treated specially and handled - # like the all of the other languages. - if ((cp >= 0x4E00 and cp <= 0x9FFF) or # - (cp >= 0x3400 and cp <= 0x4DBF) or # - (cp >= 0x20000 and cp <= 0x2A6DF) or # - (cp >= 0x2A700 and cp <= 0x2B73F) or # - (cp >= 0x2B740 and cp <= 0x2B81F) or # - (cp >= 0x2B820 and cp <= 0x2CEAF) or - (cp >= 0xF900 and cp <= 0xFAFF) or # - (cp >= 0x2F800 and cp <= 0x2FA1F)): # - return True - - return False - - def _clean_text(self, text): - """Performs invalid character removal and whitespace cleanup on text.""" - output = [] - for char in text: - cp = ord(char) - if cp == 0 or cp == 0xfffd or _is_control(char): - continue - if _is_whitespace(char): - output.append(" ") - else: - output.append(char) - return "".join(output) - - -class WordpieceTokenizer(object): - """Runs WordPiece tokenziation.""" - - def __init__(self, vocab, unk_token="[UNK]", max_input_chars_per_word=400): - self.vocab = vocab - self.unk_token = unk_token - self.max_input_chars_per_word = max_input_chars_per_word - - def tokenize(self, text): - """Tokenizes a piece of text into its word pieces. - - This uses a greedy longest-match-first algorithm to perform tokenization - using the given vocabulary. - - For example: - input = "unaffable" - output = ["un", "##aff", "##able"] - - Args: - text: A single token or whitespace separated tokens. This should have - already been passed through `BasicTokenizer. - - Returns: - A list of wordpiece tokens. - """ - - text = convert_to_unicode(text) - - output_tokens = [] - for token in whitespace_tokenize(text): - chars = list(token) - if len(chars) > self.max_input_chars_per_word: - output_tokens.append(self.unk_token) - continue - - is_bad = False - start = 0 - sub_tokens = [] - while start < len(chars): - end = len(chars) - cur_substr = None - while start < end: - substr = "".join(chars[start:end]) - if start > 0: - substr = "##" + substr - if substr in self.vocab: - cur_substr = substr - break - end -= 1 - if cur_substr is None: - is_bad = True - break - sub_tokens.append(cur_substr) - start = end - - if is_bad: - output_tokens.append(self.unk_token) - else: - output_tokens.extend(sub_tokens) - return output_tokens - - -def _is_whitespace(char): - """Checks whether `chars` is a whitespace character.""" - # \t, \n, and \r are technically control characters but we treat them - # as whitespace since they are generally considered as such. - if char == " " or char == "\t" or char == "\n" or char == "\r": - return True - cat = unicodedata.category(char) - if cat == "Zs": - return True - return False - - -def _is_control(char): - """Checks whether `chars` is a control character.""" - # These are technically control characters but we count them as whitespace - # characters. - if char == "\t" or char == "\n" or char == "\r": - return False - cat = unicodedata.category(char) - if cat in ("Cc", "Cf"): - return True - return False - - -def _is_punctuation(char): - """Checks whether `chars` is a punctuation character.""" - cp = ord(char) - # We treat all non-letter/number ASCII as punctuation. - # Characters such as "^", "$", and "`" are not in the Unicode - # Punctuation class but we treat them as punctuation anyways, for - # consistency. - if ((cp >= 33 and cp <= 47) or (cp >= 58 and cp <= 64) or - (cp >= 91 and cp <= 96) or (cp >= 123 and cp <= 126)): - return True - cat = unicodedata.category(char) - if cat.startswith("P"): - return True - return False - - -def preprocess_text(inputs, remove_space=True, lower=False): - """Preprocesses data by removing extra space and normalize data. - - This method is used together with sentence piece tokenizer and is forked from: - https://github.com/google-research/google-research/blob/e1f6fa00/albert/tokenization.py - - Args: - inputs: The input text. - remove_space: Whether to remove the extra space. - lower: Whether to lowercase the text. - - Returns: - The preprocessed text. - - """ - outputs = inputs - if remove_space: - outputs = " ".join(inputs.strip().split()) - - if six.PY2 and isinstance(outputs, str): - try: - outputs = six.ensure_text(outputs, "utf-8") - except UnicodeDecodeError: - outputs = six.ensure_text(outputs, "latin-1") - - outputs = unicodedata.normalize("NFKD", outputs) - outputs = "".join([c for c in outputs if not unicodedata.combining(c)]) - if lower: - outputs = outputs.lower() - - return outputs - - -def encode_pieces(sp_model, text, sample=False): - """Segements text into pieces. - - This method is used together with sentence piece tokenizer and is forked from: - https://github.com/google-research/google-research/blob/e1f6fa00/albert/tokenization.py - - - Args: - sp_model: A spm.SentencePieceProcessor object. - text: The input text to be segemented. - sample: Whether to randomly sample a segmentation output or return a - deterministic one. - - Returns: - A list of token pieces. - """ - if six.PY2 and isinstance(text, six.text_type): - text = six.ensure_binary(text, "utf-8") - - if not sample: - pieces = sp_model.EncodeAsPieces(text) - else: - pieces = sp_model.SampleEncodeAsPieces(text, 64, 0.1) - new_pieces = [] - for piece in pieces: - piece = printable_text(piece) - if len(piece) > 1 and piece[-1] == "," and piece[-2].isdigit(): - cur_pieces = sp_model.EncodeAsPieces(piece[:-1].replace( - SPIECE_UNDERLINE, "")) - if piece[0] != SPIECE_UNDERLINE and cur_pieces[0][0] == SPIECE_UNDERLINE: - if len(cur_pieces[0]) == 1: - cur_pieces = cur_pieces[1:] - else: - cur_pieces[0] = cur_pieces[0][1:] - cur_pieces.append(piece[-1]) - new_pieces.extend(cur_pieces) - else: - new_pieces.append(piece) - - return new_pieces - - -def encode_ids(sp_model, text, sample=False): - """Segments text and return token ids. - - This method is used together with sentence piece tokenizer and is forked from: - https://github.com/google-research/google-research/blob/e1f6fa00/albert/tokenization.py - - Args: - sp_model: A spm.SentencePieceProcessor object. - text: The input text to be segemented. - sample: Whether to randomly sample a segmentation output or return a - deterministic one. - - Returns: - A list of token ids. - """ - pieces = encode_pieces(sp_model, text, sample=sample) - ids = [sp_model.PieceToId(piece) for piece in pieces] - return ids - - -class FullSentencePieceTokenizer(object): - """Runs end-to-end sentence piece tokenization. - - The interface of this class is intended to keep the same as above - `FullTokenizer` class for easier usage. - """ - - def __init__(self, sp_model_file): - """Inits FullSentencePieceTokenizer. - - Args: - sp_model_file: The path to the sentence piece model file. - """ - self.sp_model = spm.SentencePieceProcessor() - self.sp_model.Load(sp_model_file) - self.vocab = { - self.sp_model.IdToPiece(i): i - for i in six.moves.range(self.sp_model.GetPieceSize()) - } - - def tokenize(self, text): - """Tokenizes text into pieces.""" - return encode_pieces(self.sp_model, text) - - def convert_tokens_to_ids(self, tokens): - """Converts a list of tokens to a list of ids.""" - return [self.sp_model.PieceToId(printable_text(token)) for token in tokens] - - def convert_ids_to_tokens(self, ids): - """Converts a list of ids ot a list of tokens.""" - return [self.sp_model.IdToPiece(id_) for id_ in ids] diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/tokenization_test.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/tokenization_test.py deleted file mode 100644 index 43fdf9854c077d592dd56cbe52ae8b57be4d0add..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/bert/tokenization_test.py +++ /dev/null @@ -1,172 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import os -import tempfile - -import six -import tensorflow as tf - -from official.nlp.bert import tokenization - - -class TokenizationTest(tf.test.TestCase): - """Tokenization test. - - The implementation is forked from - https://github.com/google-research/bert/blob/master/tokenization_test.py." - """ - - def test_full_tokenizer(self): - vocab_tokens = [ - "[UNK]", "[CLS]", "[SEP]", "want", "##want", "##ed", "wa", "un", "runn", - "##ing", "," - ] - with tempfile.NamedTemporaryFile(delete=False) as vocab_writer: - if six.PY2: - vocab_writer.write("".join([x + "\n" for x in vocab_tokens])) - else: - vocab_writer.write("".join([x + "\n" for x in vocab_tokens - ]).encode("utf-8")) - - vocab_file = vocab_writer.name - - tokenizer = tokenization.FullTokenizer(vocab_file) - os.unlink(vocab_file) - - tokens = tokenizer.tokenize(u"UNwant\u00E9d,running") - self.assertAllEqual(tokens, ["un", "##want", "##ed", ",", "runn", "##ing"]) - - self.assertAllEqual( - tokenizer.convert_tokens_to_ids(tokens), [7, 4, 5, 10, 8, 9]) - - def test_chinese(self): - tokenizer = tokenization.BasicTokenizer() - - self.assertAllEqual( - tokenizer.tokenize(u"ah\u535A\u63A8zz"), - [u"ah", u"\u535A", u"\u63A8", u"zz"]) - - def test_basic_tokenizer_lower(self): - tokenizer = tokenization.BasicTokenizer(do_lower_case=True) - - self.assertAllEqual( - tokenizer.tokenize(u" \tHeLLo!how \n Are yoU? "), - ["hello", "!", "how", "are", "you", "?"]) - self.assertAllEqual(tokenizer.tokenize(u"H\u00E9llo"), ["hello"]) - - def test_basic_tokenizer_no_lower(self): - tokenizer = tokenization.BasicTokenizer(do_lower_case=False) - - self.assertAllEqual( - tokenizer.tokenize(u" \tHeLLo!how \n Are yoU? "), - ["HeLLo", "!", "how", "Are", "yoU", "?"]) - - def test_basic_tokenizer_no_split_on_punc(self): - tokenizer = tokenization.BasicTokenizer( - do_lower_case=True, split_on_punc=False) - - self.assertAllEqual( - tokenizer.tokenize(u" \tHeLLo!how \n Are yoU? "), - ["hello!how", "are", "you?"]) - - def test_wordpiece_tokenizer(self): - vocab_tokens = [ - "[UNK]", "[CLS]", "[SEP]", "want", "##want", "##ed", "wa", "un", "runn", - "##ing", "##!", "!" - ] - - vocab = {} - for (i, token) in enumerate(vocab_tokens): - vocab[token] = i - tokenizer = tokenization.WordpieceTokenizer(vocab=vocab) - - self.assertAllEqual(tokenizer.tokenize(""), []) - - self.assertAllEqual( - tokenizer.tokenize("unwanted running"), - ["un", "##want", "##ed", "runn", "##ing"]) - - self.assertAllEqual( - tokenizer.tokenize("unwanted running !"), - ["un", "##want", "##ed", "runn", "##ing", "!"]) - - self.assertAllEqual( - tokenizer.tokenize("unwanted running!"), - ["un", "##want", "##ed", "runn", "##ing", "##!"]) - - self.assertAllEqual( - tokenizer.tokenize("unwantedX running"), ["[UNK]", "runn", "##ing"]) - - def test_convert_tokens_to_ids(self): - vocab_tokens = [ - "[UNK]", "[CLS]", "[SEP]", "want", "##want", "##ed", "wa", "un", "runn", - "##ing" - ] - - vocab = {} - for (i, token) in enumerate(vocab_tokens): - vocab[token] = i - - self.assertAllEqual( - tokenization.convert_tokens_to_ids( - vocab, ["un", "##want", "##ed", "runn", "##ing"]), [7, 4, 5, 8, 9]) - - def test_is_whitespace(self): - self.assertTrue(tokenization._is_whitespace(u" ")) - self.assertTrue(tokenization._is_whitespace(u"\t")) - self.assertTrue(tokenization._is_whitespace(u"\r")) - self.assertTrue(tokenization._is_whitespace(u"\n")) - self.assertTrue(tokenization._is_whitespace(u"\u00A0")) - - self.assertFalse(tokenization._is_whitespace(u"A")) - self.assertFalse(tokenization._is_whitespace(u"-")) - - def test_is_control(self): - self.assertTrue(tokenization._is_control(u"\u0005")) - - self.assertFalse(tokenization._is_control(u"A")) - self.assertFalse(tokenization._is_control(u" ")) - self.assertFalse(tokenization._is_control(u"\t")) - self.assertFalse(tokenization._is_control(u"\r")) - self.assertFalse(tokenization._is_control(u"\U0001F4A9")) - - def test_is_punctuation(self): - self.assertTrue(tokenization._is_punctuation(u"-")) - self.assertTrue(tokenization._is_punctuation(u"$")) - self.assertTrue(tokenization._is_punctuation(u"`")) - self.assertTrue(tokenization._is_punctuation(u".")) - - self.assertFalse(tokenization._is_punctuation(u"A")) - self.assertFalse(tokenization._is_punctuation(u" ")) - - -if __name__ == "__main__": - tf.test.main() diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/configs/__init__.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/configs/__init__.py deleted file mode 100644 index a11b1ff79e891e0fcee5bf824718e75d9103e28f..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/configs/__init__.py +++ /dev/null @@ -1,30 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/configs/bert.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/configs/bert.py deleted file mode 100644 index 0dc63ca03eae0bb74570194a794bbbcf777e855d..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/configs/bert.py +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Multi-head BERT encoder network with classification heads. - -Includes configurations and instantiation methods. -""" -from typing import List, Optional, Text - -import dataclasses - -from official.modeling.hyperparams import base_config -from official.nlp.configs import encoders - - -@dataclasses.dataclass -class ClsHeadConfig(base_config.Config): - inner_dim: int = 0 - num_classes: int = 2 - activation: Optional[Text] = "tanh" - dropout_rate: float = 0.0 - cls_token_idx: int = 0 - name: Optional[Text] = None - - -@dataclasses.dataclass -class PretrainerConfig(base_config.Config): - """Pretrainer configuration.""" - encoder: encoders.EncoderConfig = encoders.EncoderConfig() - cls_heads: List[ClsHeadConfig] = dataclasses.field(default_factory=list) - mlm_activation: str = "gelu" - mlm_initializer_range: float = 0.02 diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/configs/electra.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/configs/electra.py deleted file mode 100644 index 10dd5fa08780c8fc208a99a0821f1d93429475f6..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/configs/electra.py +++ /dev/null @@ -1,52 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""ELECTRA model configurations and instantiation methods.""" -from typing import List - -import dataclasses - -from official.modeling.hyperparams import base_config -from official.nlp.configs import bert -from official.nlp.configs import encoders - - -@dataclasses.dataclass -class ElectraPretrainerConfig(base_config.Config): - """ELECTRA pretrainer configuration.""" - num_masked_tokens: int = 76 - sequence_length: int = 512 - num_classes: int = 2 - discriminator_loss_weight: float = 50.0 - tie_embeddings: bool = True - disallow_correct: bool = False - generator_encoder: encoders.EncoderConfig = encoders.EncoderConfig() - discriminator_encoder: encoders.EncoderConfig = encoders.EncoderConfig() - cls_heads: List[bert.ClsHeadConfig] = dataclasses.field(default_factory=list) diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/configs/encoders.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/configs/encoders.py deleted file mode 100644 index f4d759c908363e7ad07038e25fe9bbeb86c0d1be..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/configs/encoders.py +++ /dev/null @@ -1,446 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Transformer Encoders. - -Includes configurations and factory methods. -""" -from typing import Optional - -import dataclasses -import gin -import tensorflow as tf - -from official.modeling import hyperparams -from official.modeling import tf_utils -from official.nlp.modeling import layers -from official.nlp.modeling import networks - - -@dataclasses.dataclass -class BertEncoderConfig(hyperparams.Config): - """BERT encoder configuration.""" - vocab_size: int = 30522 - hidden_size: int = 768 - num_layers: int = 12 - num_attention_heads: int = 12 - hidden_activation: str = "gelu" - intermediate_size: int = 3072 - dropout_rate: float = 0.1 - attention_dropout_rate: float = 0.1 - max_position_embeddings: int = 512 - type_vocab_size: int = 2 - initializer_range: float = 0.02 - embedding_size: Optional[int] = None - output_range: Optional[int] = None - return_all_encoder_outputs: bool = False - - -@dataclasses.dataclass -class MobileBertEncoderConfig(hyperparams.Config): - """MobileBERT encoder configuration. - - Attributes: - word_vocab_size: number of words in the vocabulary. - word_embed_size: word embedding size. - type_vocab_size: number of word types. - max_sequence_length: maximum length of input sequence. - num_blocks: number of transformer block in the encoder model. - hidden_size: the hidden size for the transformer block. - num_attention_heads: number of attention heads in the transformer block. - intermediate_size: the size of the "intermediate" (a.k.a., feed forward) - layer. - hidden_activation: the non-linear activation function to apply to the - output of the intermediate/feed-forward layer. - hidden_dropout_prob: dropout probability for the hidden layers. - attention_probs_dropout_prob: dropout probability of the attention - probabilities. - intra_bottleneck_size: the size of bottleneck. - initializer_range: The stddev of the truncated_normal_initializer for - initializing all weight matrices. - use_bottleneck_attention: Use attention inputs from the bottleneck - transformation. If true, the following `key_query_shared_bottleneck` - will be ignored. - key_query_shared_bottleneck: whether to share linear transformation for keys - and queries. - num_feedforward_networks: number of stacked feed-forward networks. - normalization_type: the type of normalization_type, only 'no_norm' and - 'layer_norm' are supported. 'no_norm' represents the element-wise linear - transformation for the student model, as suggested by the original - MobileBERT paper. 'layer_norm' is used for the teacher model. - classifier_activation: if using the tanh activation for the final - representation of the [CLS] token in fine-tuning. - """ - word_vocab_size: int = 30522 - word_embed_size: int = 128 - type_vocab_size: int = 2 - max_sequence_length: int = 512 - num_blocks: int = 24 - hidden_size: int = 512 - num_attention_heads: int = 4 - intermediate_size: int = 4096 - hidden_activation: str = "gelu" - hidden_dropout_prob: float = 0.1 - attention_probs_dropout_prob: float = 0.1 - intra_bottleneck_size: int = 1024 - initializer_range: float = 0.02 - use_bottleneck_attention: bool = False - key_query_shared_bottleneck: bool = False - num_feedforward_networks: int = 1 - normalization_type: str = "layer_norm" - classifier_activation: bool = True - input_mask_dtype: str = "int32" - - -@dataclasses.dataclass -class AlbertEncoderConfig(hyperparams.Config): - """ALBERT encoder configuration.""" - vocab_size: int = 30000 - embedding_width: int = 128 - hidden_size: int = 768 - num_layers: int = 12 - num_attention_heads: int = 12 - hidden_activation: str = "gelu" - intermediate_size: int = 3072 - dropout_rate: float = 0.0 - attention_dropout_rate: float = 0.0 - max_position_embeddings: int = 512 - type_vocab_size: int = 2 - initializer_range: float = 0.02 - - -@dataclasses.dataclass -class BigBirdEncoderConfig(hyperparams.Config): - """BigBird encoder configuration.""" - vocab_size: int = 50358 - hidden_size: int = 768 - num_layers: int = 12 - num_attention_heads: int = 12 - hidden_activation: str = "gelu" - intermediate_size: int = 3072 - dropout_rate: float = 0.1 - attention_dropout_rate: float = 0.1 - max_position_embeddings: int = 4096 - num_rand_blocks: int = 3 - block_size: int = 64 - type_vocab_size: int = 16 - initializer_range: float = 0.02 - embedding_width: Optional[int] = None - use_gradient_checkpointing: bool = False - - -@dataclasses.dataclass -class KernelEncoderConfig(hyperparams.Config): - """Linear encoder configuration.""" - vocab_size: int = 30522 - hidden_size: int = 768 - num_layers: int = 12 - num_attention_heads: int = 12 - hidden_activation: str = "gelu" - intermediate_size: int = 3072 - dropout_rate: float = 0.1 - attention_dropout_rate: float = 0.1 - max_position_embeddings: int = 512 - type_vocab_size: int = 2 - initializer_range: float = 0.02 - embedding_size: Optional[int] = None - feature_transform: str = "exp" - num_random_features: int = 256 - redraw: bool = False - is_short_seq: bool = False - begin_kernel: int = 0 - - -@dataclasses.dataclass -class XLNetEncoderConfig(hyperparams.Config): - """XLNet encoder configuration.""" - vocab_size: int = 32000 - num_layers: int = 24 - hidden_size: int = 1024 - num_attention_heads: int = 16 - head_size: int = 64 - inner_size: int = 4096 - inner_activation: str = "gelu" - dropout_rate: float = 0.1 - attention_dropout_rate: float = 0.1 - attention_type: str = "bi" - bi_data: bool = False - tie_attention_biases: bool = False - memory_length: int = 0 - same_length: bool = False - clamp_length: int = -1 - reuse_length: int = 0 - use_cls_mask: bool = False - embedding_width: int = 1024 - initializer_range: float = 0.02 - two_stream: bool = False - - -@dataclasses.dataclass -class EncoderConfig(hyperparams.OneOfConfig): - """Encoder configuration.""" - type: Optional[str] = "bert" - albert: AlbertEncoderConfig = AlbertEncoderConfig() - bert: BertEncoderConfig = BertEncoderConfig() - bigbird: BigBirdEncoderConfig = BigBirdEncoderConfig() - kernel: KernelEncoderConfig = KernelEncoderConfig() - mobilebert: MobileBertEncoderConfig = MobileBertEncoderConfig() - xlnet: XLNetEncoderConfig = XLNetEncoderConfig() - - -@gin.configurable -def build_encoder(config: EncoderConfig, - embedding_layer: Optional[tf.keras.layers.Layer] = None, - encoder_cls=None, - bypass_config: bool = False): - """Instantiate a Transformer encoder network from EncoderConfig. - - Args: - config: the one-of encoder config, which provides encoder parameters of a - chosen encoder. - embedding_layer: an external embedding layer passed to the encoder. - encoder_cls: an external encoder cls not included in the supported encoders, - usually used by gin.configurable. - bypass_config: whether to ignore config instance to create the object with - `encoder_cls`. - - Returns: - An encoder instance. - """ - if bypass_config: - return encoder_cls() - encoder_type = config.type - encoder_cfg = config.get() - if encoder_cls and encoder_cls.__name__ == "EncoderScaffold": - embedding_cfg = dict( - vocab_size=encoder_cfg.vocab_size, - type_vocab_size=encoder_cfg.type_vocab_size, - hidden_size=encoder_cfg.hidden_size, - max_seq_length=encoder_cfg.max_position_embeddings, - initializer=tf.keras.initializers.TruncatedNormal( - stddev=encoder_cfg.initializer_range), - dropout_rate=encoder_cfg.dropout_rate, - ) - hidden_cfg = dict( - num_attention_heads=encoder_cfg.num_attention_heads, - intermediate_size=encoder_cfg.intermediate_size, - intermediate_activation=tf_utils.get_activation( - encoder_cfg.hidden_activation), - dropout_rate=encoder_cfg.dropout_rate, - attention_dropout_rate=encoder_cfg.attention_dropout_rate, - kernel_initializer=tf.keras.initializers.TruncatedNormal( - stddev=encoder_cfg.initializer_range), - ) - kwargs = dict( - embedding_cfg=embedding_cfg, - hidden_cfg=hidden_cfg, - num_hidden_instances=encoder_cfg.num_layers, - pooled_output_dim=encoder_cfg.hidden_size, - pooler_layer_initializer=tf.keras.initializers.TruncatedNormal( - stddev=encoder_cfg.initializer_range), - return_all_layer_outputs=encoder_cfg.return_all_encoder_outputs, - dict_outputs=True) - return encoder_cls(**kwargs) - - if encoder_type == "mobilebert": - return networks.MobileBERTEncoder( - word_vocab_size=encoder_cfg.word_vocab_size, - word_embed_size=encoder_cfg.word_embed_size, - type_vocab_size=encoder_cfg.type_vocab_size, - max_sequence_length=encoder_cfg.max_sequence_length, - num_blocks=encoder_cfg.num_blocks, - hidden_size=encoder_cfg.hidden_size, - num_attention_heads=encoder_cfg.num_attention_heads, - intermediate_size=encoder_cfg.intermediate_size, - intermediate_act_fn=encoder_cfg.hidden_activation, - hidden_dropout_prob=encoder_cfg.hidden_dropout_prob, - attention_probs_dropout_prob=encoder_cfg.attention_probs_dropout_prob, - intra_bottleneck_size=encoder_cfg.intra_bottleneck_size, - initializer_range=encoder_cfg.initializer_range, - use_bottleneck_attention=encoder_cfg.use_bottleneck_attention, - key_query_shared_bottleneck=encoder_cfg.key_query_shared_bottleneck, - num_feedforward_networks=encoder_cfg.num_feedforward_networks, - normalization_type=encoder_cfg.normalization_type, - classifier_activation=encoder_cfg.classifier_activation, - input_mask_dtype=encoder_cfg.input_mask_dtype) - - if encoder_type == "albert": - return networks.AlbertEncoder( - vocab_size=encoder_cfg.vocab_size, - embedding_width=encoder_cfg.embedding_width, - hidden_size=encoder_cfg.hidden_size, - num_layers=encoder_cfg.num_layers, - num_attention_heads=encoder_cfg.num_attention_heads, - max_sequence_length=encoder_cfg.max_position_embeddings, - type_vocab_size=encoder_cfg.type_vocab_size, - intermediate_size=encoder_cfg.intermediate_size, - activation=tf_utils.get_activation(encoder_cfg.hidden_activation), - dropout_rate=encoder_cfg.dropout_rate, - attention_dropout_rate=encoder_cfg.attention_dropout_rate, - initializer=tf.keras.initializers.TruncatedNormal( - stddev=encoder_cfg.initializer_range), - dict_outputs=True) - - if encoder_type == "bigbird": - # TODO(frederickliu): Support use_gradient_checkpointing. - if encoder_cfg.use_gradient_checkpointing: - raise ValueError("Gradient checkpointing unsupported at the moment.") - embedding_cfg = dict( - vocab_size=encoder_cfg.vocab_size, - type_vocab_size=encoder_cfg.type_vocab_size, - hidden_size=encoder_cfg.hidden_size, - max_seq_length=encoder_cfg.max_position_embeddings, - initializer=tf.keras.initializers.TruncatedNormal( - stddev=encoder_cfg.initializer_range), - dropout_rate=encoder_cfg.dropout_rate) - attention_cfg = dict( - num_heads=encoder_cfg.num_attention_heads, - key_dim=int(encoder_cfg.hidden_size // encoder_cfg.num_attention_heads), - kernel_initializer=tf.keras.initializers.TruncatedNormal( - stddev=encoder_cfg.initializer_range), - max_rand_mask_length=encoder_cfg.max_position_embeddings, - num_rand_blocks=encoder_cfg.num_rand_blocks, - from_block_size=encoder_cfg.block_size, - to_block_size=encoder_cfg.block_size, - ) - hidden_cfg = dict( - num_attention_heads=encoder_cfg.num_attention_heads, - intermediate_size=encoder_cfg.intermediate_size, - intermediate_activation=tf_utils.get_activation( - encoder_cfg.hidden_activation), - dropout_rate=encoder_cfg.dropout_rate, - attention_dropout_rate=encoder_cfg.attention_dropout_rate, - kernel_initializer=tf.keras.initializers.TruncatedNormal( - stddev=encoder_cfg.initializer_range), - attention_cls=layers.BigBirdAttention, - attention_cfg=attention_cfg) - kwargs = dict( - embedding_cfg=embedding_cfg, - hidden_cls=layers.TransformerScaffold, - hidden_cfg=hidden_cfg, - num_hidden_instances=encoder_cfg.num_layers, - mask_cls=layers.BigBirdMasks, - mask_cfg=dict(block_size=encoder_cfg.block_size), - pooled_output_dim=encoder_cfg.hidden_size, - pooler_layer_initializer=tf.keras.initializers.TruncatedNormal( - stddev=encoder_cfg.initializer_range), - return_all_layer_outputs=False, - dict_outputs=True, - layer_idx_as_attention_seed=True) - return networks.EncoderScaffold(**kwargs) - - if encoder_type == "kernel": - embedding_cfg = dict( - vocab_size=encoder_cfg.vocab_size, - type_vocab_size=encoder_cfg.type_vocab_size, - hidden_size=encoder_cfg.hidden_size, - max_seq_length=encoder_cfg.max_position_embeddings, - initializer=tf.keras.initializers.TruncatedNormal( - stddev=encoder_cfg.initializer_range), - dropout_rate=encoder_cfg.dropout_rate) - attention_cfg = dict( - num_heads=encoder_cfg.num_attention_heads, - key_dim=int(encoder_cfg.hidden_size // encoder_cfg.num_attention_heads), - kernel_initializer=tf.keras.initializers.TruncatedNormal( - stddev=encoder_cfg.initializer_range), - feature_transform=encoder_cfg.feature_transform, - num_random_features=encoder_cfg.num_random_features, - redraw=encoder_cfg.redraw, - is_short_seq=encoder_cfg.is_short_seq, - begin_kernel=encoder_cfg.begin_kernel, - ) - hidden_cfg = dict( - num_attention_heads=encoder_cfg.num_attention_heads, - intermediate_size=encoder_cfg.intermediate_size, - intermediate_activation=tf_utils.get_activation( - encoder_cfg.hidden_activation), - dropout_rate=encoder_cfg.dropout_rate, - attention_dropout_rate=encoder_cfg.attention_dropout_rate, - kernel_initializer=tf.keras.initializers.TruncatedNormal( - stddev=encoder_cfg.initializer_range), - attention_cls=layers.KernelAttention, - attention_cfg=attention_cfg) - kwargs = dict( - embedding_cfg=embedding_cfg, - hidden_cls=layers.TransformerScaffold, - hidden_cfg=hidden_cfg, - num_hidden_instances=encoder_cfg.num_layers, - mask_cls=layers.KernelMask, - pooled_output_dim=encoder_cfg.hidden_size, - pooler_layer_initializer=tf.keras.initializers.TruncatedNormal( - stddev=encoder_cfg.initializer_range), - return_all_layer_outputs=False, - dict_outputs=True, - layer_idx_as_attention_seed=True) - return networks.EncoderScaffold(**kwargs) - - if encoder_type == "xlnet": - return networks.XLNetBase( - vocab_size=encoder_cfg.vocab_size, - num_layers=encoder_cfg.num_layers, - hidden_size=encoder_cfg.hidden_size, - num_attention_heads=encoder_cfg.num_attention_heads, - head_size=encoder_cfg.head_size, - inner_size=encoder_cfg.inner_size, - dropout_rate=encoder_cfg.dropout_rate, - attention_dropout_rate=encoder_cfg.attention_dropout_rate, - attention_type=encoder_cfg.attention_type, - bi_data=encoder_cfg.bi_data, - two_stream=encoder_cfg.two_stream, - tie_attention_biases=encoder_cfg.tie_attention_biases, - memory_length=encoder_cfg.memory_length, - clamp_length=encoder_cfg.clamp_length, - reuse_length=encoder_cfg.reuse_length, - inner_activation=encoder_cfg.inner_activation, - use_cls_mask=encoder_cfg.use_cls_mask, - embedding_width=encoder_cfg.embedding_width, - initializer=tf.keras.initializers.RandomNormal( - stddev=encoder_cfg.initializer_range)) - - # Uses the default BERTEncoder configuration schema to create the encoder. - # If it does not match, please add a switch branch by the encoder type. - return networks.BertEncoder( - vocab_size=encoder_cfg.vocab_size, - hidden_size=encoder_cfg.hidden_size, - num_layers=encoder_cfg.num_layers, - num_attention_heads=encoder_cfg.num_attention_heads, - intermediate_size=encoder_cfg.intermediate_size, - activation=tf_utils.get_activation(encoder_cfg.hidden_activation), - dropout_rate=encoder_cfg.dropout_rate, - attention_dropout_rate=encoder_cfg.attention_dropout_rate, - max_sequence_length=encoder_cfg.max_position_embeddings, - type_vocab_size=encoder_cfg.type_vocab_size, - initializer=tf.keras.initializers.TruncatedNormal( - stddev=encoder_cfg.initializer_range), - output_range=encoder_cfg.output_range, - embedding_width=encoder_cfg.embedding_size, - embedding_layer=embedding_layer, - return_all_encoder_outputs=encoder_cfg.return_all_encoder_outputs, - dict_outputs=True) diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/configs/encoders_test.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/configs/encoders_test.py deleted file mode 100644 index 89ba49c4474ee922a6bdf14e7bf4d0290f06621a..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/configs/encoders_test.py +++ /dev/null @@ -1,58 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Tests for official.nlp.configs.encoders.""" -import os - -import tensorflow as tf - -from official.modeling import hyperparams -from official.nlp.configs import encoders - - -class EncodersTest(tf.test.TestCase): - - def test_encoder_from_yaml(self): - config = encoders.EncoderConfig( - type="bert", bert=encoders.BertEncoderConfig(num_layers=1)) - encoder = encoders.build_encoder(config) - ckpt = tf.train.Checkpoint(encoder=encoder) - ckpt_path = ckpt.save(self.get_temp_dir() + "/ckpt") - params_save_path = os.path.join(self.get_temp_dir(), "params.yaml") - hyperparams.save_params_dict_to_yaml(config, params_save_path) - - retored_cfg = encoders.EncoderConfig.from_yaml(params_save_path) - retored_encoder = encoders.build_encoder(retored_cfg) - status = tf.train.Checkpoint(encoder=retored_encoder).restore(ckpt_path) - status.assert_consumed() - - -if __name__ == "__main__": - tf.test.main() diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/configs/experiment_configs.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/configs/experiment_configs.py deleted file mode 100644 index 1185d17d329b5011bdf730c4c5d14360cb2e29e0..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/configs/experiment_configs.py +++ /dev/null @@ -1,35 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Experiments definition.""" -# pylint: disable=unused-import -from official.nlp.configs import finetuning_experiments -from official.nlp.configs import pretraining_experiments -from official.nlp.configs import wmt_transformer_experiments diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/configs/experiments/glue_mnli_matched.yaml b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/configs/experiments/glue_mnli_matched.yaml deleted file mode 100644 index 29dfcb68b9c314d309239c321dde4ec4f439da1d..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/configs/experiments/glue_mnli_matched.yaml +++ /dev/null @@ -1,49 +0,0 @@ -task: - hub_module_url: '' - model: - num_classes: 3 - init_checkpoint: '' - metric_type: 'accuracy' - train_data: - drop_remainder: true - global_batch_size: 32 - input_path: '' - is_training: true - seq_length: 128 - label_type: 'int' - validation_data: - drop_remainder: false - global_batch_size: 32 - input_path: '' - is_training: false - seq_length: 128 - label_type: 'int' -trainer: - checkpoint_interval: 3000 - optimizer_config: - learning_rate: - polynomial: - # 100% of train_steps. - decay_steps: 36813 - end_learning_rate: 0.0 - initial_learning_rate: 3.0e-05 - power: 1.0 - type: polynomial - optimizer: - type: adamw - warmup: - polynomial: - power: 1 - # ~10% of train_steps. - warmup_steps: 3681 - type: polynomial - steps_per_loop: 1000 - summary_interval: 1000 - # Training data size 392,702 examples, 3 epochs. - train_steps: 36813 - validation_interval: 6135 - # Eval data size = 9815 examples. - validation_steps: 307 - best_checkpoint_export_subdir: 'best_ckpt' - best_checkpoint_eval_metric: 'cls_accuracy' - best_checkpoint_metric_comp: 'higher' diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/configs/experiments/squad_v1.yaml b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/configs/experiments/squad_v1.yaml deleted file mode 100644 index a69710a58f7dfa4e044bceb73c5870701ca39189..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/configs/experiments/squad_v1.yaml +++ /dev/null @@ -1,50 +0,0 @@ -task: - hub_module_url: '' - max_answer_length: 30 - n_best_size: 20 - null_score_diff_threshold: 0.0 - init_checkpoint: '' - train_data: - drop_remainder: true - global_batch_size: 48 - input_path: '' - is_training: true - seq_length: 384 - validation_data: - do_lower_case: true - doc_stride: 128 - drop_remainder: false - global_batch_size: 48 - input_path: '' - is_training: false - query_length: 64 - seq_length: 384 - tokenization: WordPiece - version_2_with_negative: false - vocab_file: '' -trainer: - checkpoint_interval: 1000 - max_to_keep: 5 - optimizer_config: - learning_rate: - polynomial: - decay_steps: 3699 - end_learning_rate: 0.0 - initial_learning_rate: 8.0e-05 - power: 1.0 - type: polynomial - optimizer: - type: adamw - warmup: - polynomial: - power: 1 - warmup_steps: 370 - type: polynomial - steps_per_loop: 1000 - summary_interval: 1000 - train_steps: 3699 - validation_interval: 1000 - validation_steps: 226 - best_checkpoint_export_subdir: 'best_ckpt' - best_checkpoint_eval_metric: 'final_f1' - best_checkpoint_metric_comp: 'higher' diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/configs/finetuning_experiments.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/configs/finetuning_experiments.py deleted file mode 100644 index 6aef1fcc5f12d3aaf95ce2d072f969e3c445df4f..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/configs/finetuning_experiments.py +++ /dev/null @@ -1,155 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Finetuning experiment configurations.""" -# pylint: disable=g-doc-return-or-yield,line-too-long -from official.core import config_definitions as cfg -from official.core import exp_factory -from official.modeling import optimization -from official.nlp.data import question_answering_dataloader -from official.nlp.data import sentence_prediction_dataloader -from official.nlp.data import tagging_dataloader -from official.nlp.tasks import question_answering -from official.nlp.tasks import sentence_prediction -from official.nlp.tasks import tagging - - -@exp_factory.register_config_factory('bert/sentence_prediction') -def bert_sentence_prediction() -> cfg.ExperimentConfig: - r"""BERT GLUE.""" - config = cfg.ExperimentConfig( - task=sentence_prediction.SentencePredictionConfig( - train_data=sentence_prediction_dataloader - .SentencePredictionDataConfig(), - validation_data=sentence_prediction_dataloader - .SentencePredictionDataConfig( - is_training=False, drop_remainder=False)), - trainer=cfg.TrainerConfig( - optimizer_config=optimization.OptimizationConfig({ - 'optimizer': { - 'type': 'adamw', - 'adamw': { - 'weight_decay_rate': - 0.01, - 'exclude_from_weight_decay': - ['LayerNorm', 'layer_norm', 'bias'], - } - }, - 'learning_rate': { - 'type': 'polynomial', - 'polynomial': { - 'initial_learning_rate': 3e-5, - 'end_learning_rate': 0.0, - } - }, - 'warmup': { - 'type': 'polynomial' - } - })), - restrictions=[ - 'task.train_data.is_training != None', - 'task.validation_data.is_training != None' - ]) - config.task.model.encoder.type = 'bert' - return config - - -@exp_factory.register_config_factory('bert/squad') -def bert_squad() -> cfg.ExperimentConfig: - """BERT Squad V1/V2.""" - config = cfg.ExperimentConfig( - task=question_answering.QuestionAnsweringConfig( - train_data=question_answering_dataloader.QADataConfig(), - validation_data=question_answering_dataloader.QADataConfig()), - trainer=cfg.TrainerConfig( - optimizer_config=optimization.OptimizationConfig({ - 'optimizer': { - 'type': 'adamw', - 'adamw': { - 'weight_decay_rate': - 0.01, - 'exclude_from_weight_decay': - ['LayerNorm', 'layer_norm', 'bias'], - } - }, - 'learning_rate': { - 'type': 'polynomial', - 'polynomial': { - 'initial_learning_rate': 8e-5, - 'end_learning_rate': 0.0, - } - }, - 'warmup': { - 'type': 'polynomial' - } - })), - restrictions=[ - 'task.train_data.is_training != None', - 'task.validation_data.is_training != None' - ]) - config.task.model.encoder.type = 'bert' - return config - - -@exp_factory.register_config_factory('bert/tagging') -def bert_tagging() -> cfg.ExperimentConfig: - """BERT tagging task.""" - config = cfg.ExperimentConfig( - task=tagging.TaggingConfig( - train_data=tagging_dataloader.TaggingDataConfig(), - validation_data=tagging_dataloader.TaggingDataConfig( - is_training=False, drop_remainder=False)), - trainer=cfg.TrainerConfig( - optimizer_config=optimization.OptimizationConfig({ - 'optimizer': { - 'type': 'adamw', - 'adamw': { - 'weight_decay_rate': - 0.01, - 'exclude_from_weight_decay': - ['LayerNorm', 'layer_norm', 'bias'], - } - }, - 'learning_rate': { - 'type': 'polynomial', - 'polynomial': { - 'initial_learning_rate': 8e-5, - 'end_learning_rate': 0.0, - } - }, - 'warmup': { - 'type': 'polynomial' - } - })), - restrictions=[ - 'task.train_data.is_training != None', - 'task.validation_data.is_training != None', - ]) - return config diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/configs/models/bert_en_uncased_base.yaml b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/configs/models/bert_en_uncased_base.yaml deleted file mode 100644 index 1e49bc5430ed0135aa6d981421aad623f4f1fac9..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/configs/models/bert_en_uncased_base.yaml +++ /dev/null @@ -1,16 +0,0 @@ -task: - model: - encoder: - type: bert - bert: - attention_dropout_rate: 0.1 - dropout_rate: 0.1 - hidden_activation: gelu - hidden_size: 768 - initializer_range: 0.02 - intermediate_size: 3072 - max_position_embeddings: 512 - num_attention_heads: 12 - num_layers: 12 - type_vocab_size: 2 - vocab_size: 30522 diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/configs/pretraining_experiments.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/configs/pretraining_experiments.py deleted file mode 100644 index 7ef200989a5e773d6722d13aacd81c439ad9aebf..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/configs/pretraining_experiments.py +++ /dev/null @@ -1,98 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Pretraining experiment configurations.""" -# pylint: disable=g-doc-return-or-yield,line-too-long -from official.core import config_definitions as cfg -from official.core import exp_factory -from official.modeling import optimization -from official.nlp.data import pretrain_dataloader -from official.nlp.data import pretrain_dynamic_dataloader -from official.nlp.tasks import masked_lm - -_TRAINER = cfg.TrainerConfig( - train_steps=1000000, - optimizer_config=optimization.OptimizationConfig({ - 'optimizer': { - 'type': 'adamw', - 'adamw': { - 'weight_decay_rate': - 0.01, - 'exclude_from_weight_decay': [ - 'LayerNorm', 'layer_norm', 'bias' - ], - } - }, - 'learning_rate': { - 'type': 'polynomial', - 'polynomial': { - 'initial_learning_rate': 1e-4, - 'end_learning_rate': 0.0, - } - }, - 'warmup': { - 'type': 'polynomial' - } - })) - - -@exp_factory.register_config_factory('bert/pretraining') -def bert_pretraining() -> cfg.ExperimentConfig: - """BERT pretraining experiment.""" - config = cfg.ExperimentConfig( - task=masked_lm.MaskedLMConfig( - train_data=pretrain_dataloader.BertPretrainDataConfig(), - validation_data=pretrain_dataloader.BertPretrainDataConfig( - is_training=False)), - trainer=_TRAINER, - restrictions=[ - 'task.train_data.is_training != None', - 'task.validation_data.is_training != None' - ]) - return config - - -@exp_factory.register_config_factory('bert/pretraining_dynamic') -def bert_dynamic() -> cfg.ExperimentConfig: - """BERT base with dynamic input sequences. - - TPU needs to run with tf.data service with round-robin behavior. - """ - config = cfg.ExperimentConfig( - task=masked_lm.MaskedLMConfig( - train_data=pretrain_dynamic_dataloader.BertPretrainDataConfig(), - validation_data=pretrain_dataloader.BertPretrainDataConfig( - is_training=False)), - trainer=_TRAINER, - restrictions=[ - 'task.train_data.is_training != None', - 'task.validation_data.is_training != None' - ]) - return config diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/configs/wmt_transformer_experiments.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/configs/wmt_transformer_experiments.py deleted file mode 100644 index bddce16bf2d3a72c039271283ae9debbcbd528b5..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/configs/wmt_transformer_experiments.py +++ /dev/null @@ -1,126 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# Lint as: python3 -# pylint: disable=g-doc-return-or-yield,line-too-long -"""WMT translation configurations.""" - -from official.core import config_definitions as cfg -from official.core import exp_factory -from official.modeling import optimization -from official.nlp.data import wmt_dataloader -from official.nlp.tasks import translation - - -@exp_factory.register_config_factory('wmt_transformer/large') -def wmt_transformer_large() -> cfg.ExperimentConfig: - """WMT Transformer Large. - - Please refer to - tensorflow_models/official/nlp/data/train_sentencepiece.py - to generate sentencepiece_model - and pass - --params_override=task.sentencepiece_model_path='YOUR_PATH' - to the train script. - """ - learning_rate = 2.0 - hidden_size = 1024 - learning_rate *= (hidden_size**-0.5) - warmup_steps = 16000 - train_steps = 300000 - token_batch_size = 24576 - encdecoder = translation.EncDecoder( - num_attention_heads=16, intermediate_size=hidden_size * 4) - config = cfg.ExperimentConfig( - task=translation.TranslationConfig( - model=translation.ModelConfig( - encoder=encdecoder, - decoder=encdecoder, - embedding_width=hidden_size, - padded_decode=True, - decode_max_length=100), - train_data=wmt_dataloader.WMTDataConfig( - tfds_name='wmt14_translate/de-en', - tfds_split='train', - src_lang='en', - tgt_lang='de', - is_training=True, - global_batch_size=token_batch_size, - static_batch=True, - max_seq_length=64 - ), - validation_data=wmt_dataloader.WMTDataConfig( - tfds_name='wmt14_translate/de-en', - tfds_split='test', - src_lang='en', - tgt_lang='de', - is_training=False, - global_batch_size=32, - static_batch=True, - max_seq_length=100, - ), - sentencepiece_model_path=None, - ), - trainer=cfg.TrainerConfig( - train_steps=train_steps, - validation_steps=-1, - steps_per_loop=1000, - summary_interval=1000, - checkpoint_interval=5000, - validation_interval=5000, - max_to_keep=1, - optimizer_config=optimization.OptimizationConfig({ - 'optimizer': { - 'type': 'adam', - 'adam': { - 'beta_2': 0.997, - 'epsilon': 1e-9, - }, - }, - 'learning_rate': { - 'type': 'power', - 'power': { - 'initial_learning_rate': learning_rate, - 'power': -0.5, - } - }, - 'warmup': { - 'type': 'linear', - 'linear': { - 'warmup_steps': warmup_steps, - 'warmup_learning_rate': 0.0 - } - } - })), - restrictions=[ - 'task.train_data.is_training != None', - 'task.sentencepiece_model_path != None', - ]) - return config diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/continuous_finetune_lib.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/continuous_finetune_lib.py deleted file mode 100644 index 5274e4b720110cf8116224c564b2d80b3790b2b8..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/continuous_finetune_lib.py +++ /dev/null @@ -1,231 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""TFM continuous finetuning+eval training driver library.""" -import gc -import os -import time -from typing import Any, Mapping, Optional - -from absl import logging -import tensorflow as tf - -from official.common import distribute_utils -from official.core import config_definitions -from official.core import task_factory -from official.core import train_lib -from official.core import train_utils -from official.modeling import performance -from official.modeling.multitask import configs -from official.modeling.multitask import multitask -from official.modeling.multitask import train_lib as multitask_train_lib - - -def _flatten_dict(xs): - """Flatten a nested dictionary. - - The nested keys are flattened to a tuple. - - Example:: - - xs = {'foo': 1, 'bar': {'a': 2, 'b': {}}} - flat_xs = flatten_dict(xs) - print(flat_xs) - # { - # ('foo',): 1, - # ('bar', 'a'): 2, - # } - - Note that empty dictionaries are ignored and - will not be restored by `unflatten_dict`. - - Args: - xs: a nested dictionary - - Returns: - The flattened dictionary. - """ - assert isinstance(xs, dict), 'input is not a dict' - - def _flatten(xs, prefix): - if not isinstance(xs, dict): - return {prefix: xs} - result = {} - for key, value in xs.items(): - path = prefix + (key,) - result.update(_flatten(value, path)) - return result - - return _flatten(xs, ()) - - -def run_continuous_finetune( - mode: str, - params: config_definitions.ExperimentConfig, - model_dir: str, - run_post_eval: bool = False, - pretrain_steps: Optional[int] = None, -) -> Mapping[str, Any]: - """Run modes with continuous training. - - Currently only supports continuous_train_and_eval. - - Args: - mode: A 'str', specifying the mode. continuous_train_and_eval - monitors a - checkpoint directory. Once a new checkpoint is discovered, loads the - checkpoint, finetune the model by training it (probably on another dataset - or with another task), then evaluate the finetuned model. - params: ExperimentConfig instance. - model_dir: A 'str', a path to store model checkpoints and summaries. - run_post_eval: Whether to run post eval once after training, metrics logs - are returned. - pretrain_steps: Optional, the number of total training steps for the - pretraining job. - - Returns: - eval logs: returns eval metrics logs when run_post_eval is set to True, - othewise, returns {}. - """ - - assert mode == 'continuous_train_and_eval', ( - 'Only continuous_train_and_eval is supported by continuous_finetune. ' - 'Got mode: {}'.format(mode)) - - # Sets mixed_precision policy. Using 'mixed_float16' or 'mixed_bfloat16' - # can have significant impact on model speeds by utilizing float16 in case of - # GPUs, and bfloat16 in the case of TPUs. loss_scale takes effect only when - # dtype is float16 - if params.runtime.mixed_precision_dtype: - performance.set_mixed_precision_policy(params.runtime.mixed_precision_dtype) - distribution_strategy = distribute_utils.get_distribution_strategy( - distribution_strategy=params.runtime.distribution_strategy, - all_reduce_alg=params.runtime.all_reduce_alg, - num_gpus=params.runtime.num_gpus, - tpu_address=params.runtime.tpu) - - retry_times = 0 - while not tf.io.gfile.isdir(params.task.init_checkpoint): - # Wait for the init_checkpoint directory to be created. - if retry_times >= 60: - raise ValueError( - 'ExperimentConfig.task.init_checkpoint must be a directory for ' - 'continuous_train_and_eval mode.') - retry_times += 1 - time.sleep(60) - - summary_writer = tf.summary.create_file_writer( - os.path.join(model_dir, 'eval')) - - global_step = 0 - - def timeout_fn(): - if pretrain_steps and global_step < pretrain_steps: - # Keeps waiting for another timeout period. - logging.info( - 'Continue waiting for new checkpoint as current pretrain ' - 'global_step=%d and target is %d.', global_step, pretrain_steps) - return False - # Quits the loop. - return True - - for pretrain_ckpt in tf.train.checkpoints_iterator( - checkpoint_dir=params.task.init_checkpoint, - min_interval_secs=10, - timeout=params.trainer.continuous_eval_timeout, - timeout_fn=timeout_fn): - - # If there are checkpoints, they might be the finetune checkpoint of a - # different pretrained checkpoint. So we just remove all checkpoints. - train_utils.remove_ckpts(model_dir) - - with distribution_strategy.scope(): - global_step = train_utils.read_global_step_from_checkpoint(pretrain_ckpt) - # Replaces params.task.init_checkpoint to make sure that we load - # exactly this pretrain checkpoint. - if params.trainer.best_checkpoint_export_subdir: - best_ckpt_subdir = '{}_{}'.format( - params.trainer.best_checkpoint_export_subdir, global_step) - params_replaced = params.replace( - task={'init_checkpoint': pretrain_ckpt}, - trainer={'best_checkpoint_export_subdir': best_ckpt_subdir}) - else: - params_replaced = params.replace(task={'init_checkpoint': pretrain_ckpt}) - params_replaced.lock() - logging.info('Running finetuning with params: %s', params_replaced) - - with distribution_strategy.scope(): - if isinstance(params, configs.MultiEvalExperimentConfig): - task = task_factory.get_task(params_replaced.task) - eval_tasks = multitask.MultiTask.from_config(params_replaced.eval_tasks) - (_, - eval_metrics) = multitask_train_lib.run_experiment_with_multitask_eval( - distribution_strategy=distribution_strategy, - train_task=task, - eval_tasks=eval_tasks, - mode='train_and_eval', - params=params_replaced, - model_dir=model_dir, - run_post_eval=True, - save_summary=False) - else: - task = task_factory.get_task( - params_replaced.task, logging_dir=model_dir) - _, eval_metrics = train_lib.run_experiment( - distribution_strategy=distribution_strategy, - task=task, - mode='train_and_eval', - params=params_replaced, - model_dir=model_dir, - run_post_eval=True, - save_summary=False) - logging.info('Evaluation finished. Pretrain global_step: %d', global_step) - train_utils.write_json_summary(model_dir, global_step, eval_metrics) - - if not os.path.basename(model_dir): # if model_dir.endswith('/') - summary_grp = os.path.dirname(model_dir) + '_' + task.name - else: - summary_grp = os.path.basename(model_dir) + '_' + task.name - summaries = {} - for name, value in _flatten_dict(eval_metrics).items(): - summaries[summary_grp + '/' + '-'.join(name)] = value - train_utils.write_summary(summary_writer, global_step, summaries) - - train_utils.remove_ckpts(model_dir) - # In TF2, the resource life cycle is bound with the python object life - # cycle. Force trigger python garbage collection here so those resources - # can be deallocated in time, so it doesn't cause OOM when allocating new - # objects. - # TODO(b/169178664): Fix cycle reference in Keras model and revisit to see - # if we need gc here. - gc.collect() - - if run_post_eval: - return eval_metrics - return {} diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/continuous_finetune_lib_test.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/continuous_finetune_lib_test.py deleted file mode 100644 index f43902f687c141a6789a8b6edd97ea3fb973616e..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/continuous_finetune_lib_test.py +++ /dev/null @@ -1,114 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import os - -from absl import flags -from absl.testing import flagsaver -from absl.testing import parameterized -import tensorflow as tf - -# pylint: disable=unused-import -from official.common import registry_imports -# pylint: enable=unused-import -from official.common import flags as tfm_flags -from official.core import task_factory -from official.core import train_lib -from official.core import train_utils -from official.nlp import continuous_finetune_lib - -FLAGS = flags.FLAGS - -tfm_flags.define_flags() - - -class ContinuousFinetuneTest(tf.test.TestCase, parameterized.TestCase): - - def setUp(self): - super().setUp() - self._model_dir = os.path.join(self.get_temp_dir(), 'model_dir') - - def testContinuousFinetune(self): - pretrain_steps = 1 - src_model_dir = self.get_temp_dir() - flags_dict = dict( - experiment='mock', - mode='continuous_train_and_eval', - model_dir=self._model_dir, - params_override={ - 'task': { - 'init_checkpoint': src_model_dir, - }, - 'trainer': { - 'continuous_eval_timeout': 1, - 'steps_per_loop': 1, - 'train_steps': 1, - 'validation_steps': 1, - 'best_checkpoint_export_subdir': 'best_ckpt', - 'best_checkpoint_eval_metric': 'acc', - 'optimizer_config': { - 'optimizer': { - 'type': 'sgd' - }, - 'learning_rate': { - 'type': 'constant' - } - } - } - }) - - with flagsaver.flagsaver(**flags_dict): - # Train and save some checkpoints. - params = train_utils.parse_configuration(flags.FLAGS) - distribution_strategy = tf.distribute.get_strategy() - with distribution_strategy.scope(): - task = task_factory.get_task(params.task, logging_dir=src_model_dir) - _ = train_lib.run_experiment( - distribution_strategy=distribution_strategy, - task=task, - mode='train', - params=params, - model_dir=src_model_dir) - - params = train_utils.parse_configuration(FLAGS) - eval_metrics = continuous_finetune_lib.run_continuous_finetune( - FLAGS.mode, - params, - FLAGS.model_dir, - run_post_eval=True, - pretrain_steps=pretrain_steps) - self.assertIn('best_acc', eval_metrics) - - self.assertFalse( - tf.io.gfile.exists(os.path.join(FLAGS.model_dir, 'checkpoint'))) - - -if __name__ == '__main__': - tf.test.main() diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/data/__init__.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/data/__init__.py deleted file mode 100644 index a11b1ff79e891e0fcee5bf824718e75d9103e28f..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/data/__init__.py +++ /dev/null @@ -1,30 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/data/classifier_data_lib.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/data/classifier_data_lib.py deleted file mode 100644 index 6936b7eb3a8740e7c48e5e82791f054c6acd48b7..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/data/classifier_data_lib.py +++ /dev/null @@ -1,1528 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""BERT library to process data for classification task.""" - -import collections -import csv -import importlib -import json -import os - -from absl import logging -import tensorflow as tf -import tensorflow_datasets as tfds - -from official.nlp.bert import tokenization - - -class InputExample(object): - """A single training/test example for simple seq regression/classification.""" - - def __init__(self, - guid, - text_a, - text_b=None, - label=None, - weight=None, - example_id=None): - """Constructs a InputExample. - - Args: - guid: Unique id for the example. - text_a: string. The untokenized text of the first sequence. For single - sequence tasks, only this sequence must be specified. - text_b: (Optional) string. The untokenized text of the second sequence. - Only must be specified for sequence pair tasks. - label: (Optional) string for classification, float for regression. The - label of the example. This should be specified for train and dev - examples, but not for test examples. - weight: (Optional) float. The weight of the example to be used during - training. - example_id: (Optional) int. The int identification number of example in - the corpus. - """ - self.guid = guid - self.text_a = text_a - self.text_b = text_b - self.label = label - self.weight = weight - self.example_id = example_id - - -class InputFeatures(object): - """A single set of features of data.""" - - def __init__(self, - input_ids, - input_mask, - segment_ids, - label_id, - is_real_example=True, - weight=None, - example_id=None): - self.input_ids = input_ids - self.input_mask = input_mask - self.segment_ids = segment_ids - self.label_id = label_id - self.is_real_example = is_real_example - self.weight = weight - self.example_id = example_id - - -class DataProcessor(object): - """Base class for converters for seq regression/classification datasets.""" - - def __init__(self, process_text_fn=tokenization.convert_to_unicode): - self.process_text_fn = process_text_fn - self.is_regression = False - self.label_type = None - - def get_train_examples(self, data_dir): - """Gets a collection of `InputExample`s for the train set.""" - raise NotImplementedError() - - def get_dev_examples(self, data_dir): - """Gets a collection of `InputExample`s for the dev set.""" - raise NotImplementedError() - - def get_test_examples(self, data_dir): - """Gets a collection of `InputExample`s for prediction.""" - raise NotImplementedError() - - def get_labels(self): - """Gets the list of labels for this data set.""" - raise NotImplementedError() - - @staticmethod - def get_processor_name(): - """Gets the string identifier of the processor.""" - raise NotImplementedError() - - @classmethod - def _read_tsv(cls, input_file, quotechar=None): - """Reads a tab separated value file.""" - with tf.io.gfile.GFile(input_file, "r") as f: - reader = csv.reader(f, delimiter="\t", quotechar=quotechar) - lines = [] - for line in reader: - lines.append(line) - return lines - - @classmethod - def _read_jsonl(cls, input_file): - """Reads a json line file.""" - with tf.io.gfile.GFile(input_file, "r") as f: - lines = [] - for json_str in f: - lines.append(json.loads(json_str)) - return lines - - -class AxProcessor(DataProcessor): - """Processor for the AX dataset (GLUE diagnostics dataset).""" - - def get_train_examples(self, data_dir): - """See base class.""" - return self._create_examples( - self._read_tsv(os.path.join(data_dir, "train.tsv")), "train") - - def get_dev_examples(self, data_dir): - """See base class.""" - return self._create_examples( - self._read_tsv(os.path.join(data_dir, "dev.tsv")), "dev") - - def get_test_examples(self, data_dir): - """See base class.""" - return self._create_examples( - self._read_tsv(os.path.join(data_dir, "test.tsv")), "test") - - def get_labels(self): - """See base class.""" - return ["contradiction", "entailment", "neutral"] - - @staticmethod - def get_processor_name(): - """See base class.""" - return "AX" - - def _create_examples(self, lines, set_type): - """Creates examples for the training/dev/test sets.""" - text_a_index = 1 if set_type == "test" else 8 - text_b_index = 2 if set_type == "test" else 9 - examples = [] - for i, line in enumerate(lines): - # Skip header. - if i == 0: - continue - guid = "%s-%s" % (set_type, self.process_text_fn(line[0])) - text_a = self.process_text_fn(line[text_a_index]) - text_b = self.process_text_fn(line[text_b_index]) - if set_type == "test": - label = "contradiction" - else: - label = self.process_text_fn(line[-1]) - examples.append( - InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label)) - return examples - - -class ColaProcessor(DataProcessor): - """Processor for the CoLA data set (GLUE version).""" - - def get_train_examples(self, data_dir): - """See base class.""" - return self._create_examples( - self._read_tsv(os.path.join(data_dir, "train.tsv")), "train") - - def get_dev_examples(self, data_dir): - """See base class.""" - return self._create_examples( - self._read_tsv(os.path.join(data_dir, "dev.tsv")), "dev") - - def get_test_examples(self, data_dir): - """See base class.""" - return self._create_examples( - self._read_tsv(os.path.join(data_dir, "test.tsv")), "test") - - def get_labels(self): - """See base class.""" - return ["0", "1"] - - @staticmethod - def get_processor_name(): - """See base class.""" - return "COLA" - - def _create_examples(self, lines, set_type): - """Creates examples for the training/dev/test sets.""" - examples = [] - for i, line in enumerate(lines): - # Only the test set has a header. - if set_type == "test" and i == 0: - continue - guid = "%s-%s" % (set_type, i) - if set_type == "test": - text_a = self.process_text_fn(line[1]) - label = "0" - else: - text_a = self.process_text_fn(line[3]) - label = self.process_text_fn(line[1]) - examples.append( - InputExample(guid=guid, text_a=text_a, text_b=None, label=label)) - return examples - - -class ImdbProcessor(DataProcessor): - """Processor for the IMDb dataset.""" - - def get_labels(self): - return ["neg", "pos"] - - def get_train_examples(self, data_dir): - return self._create_examples(os.path.join(data_dir, "train")) - - def get_dev_examples(self, data_dir): - return self._create_examples(os.path.join(data_dir, "test")) - - @staticmethod - def get_processor_name(): - """See base class.""" - return "IMDB" - - def _create_examples(self, data_dir): - """Creates examples.""" - examples = [] - for label in ["neg", "pos"]: - cur_dir = os.path.join(data_dir, label) - for filename in tf.io.gfile.listdir(cur_dir): - if not filename.endswith("txt"): - continue - - if len(examples) % 1000 == 0: - logging.info("Loading dev example %d", len(examples)) - - path = os.path.join(cur_dir, filename) - with tf.io.gfile.GFile(path, "r") as f: - text = f.read().strip().replace("
", " ") - examples.append( - InputExample( - guid="unused_id", text_a=text, text_b=None, label=label)) - return examples - - -class MnliProcessor(DataProcessor): - """Processor for the MultiNLI data set (GLUE version).""" - - def __init__(self, - mnli_type="matched", - process_text_fn=tokenization.convert_to_unicode): - super(MnliProcessor, self).__init__(process_text_fn) - if mnli_type not in ("matched", "mismatched"): - raise ValueError("Invalid `mnli_type`: %s" % mnli_type) - self.mnli_type = mnli_type - - def get_train_examples(self, data_dir): - """See base class.""" - return self._create_examples( - self._read_tsv(os.path.join(data_dir, "train.tsv")), "train") - - def get_dev_examples(self, data_dir): - """See base class.""" - if self.mnli_type == "matched": - return self._create_examples( - self._read_tsv(os.path.join(data_dir, "dev_matched.tsv")), - "dev_matched") - else: - return self._create_examples( - self._read_tsv(os.path.join(data_dir, "dev_mismatched.tsv")), - "dev_mismatched") - - def get_test_examples(self, data_dir): - """See base class.""" - if self.mnli_type == "matched": - return self._create_examples( - self._read_tsv(os.path.join(data_dir, "test_matched.tsv")), "test") - else: - return self._create_examples( - self._read_tsv(os.path.join(data_dir, "test_mismatched.tsv")), "test") - - def get_labels(self): - """See base class.""" - return ["contradiction", "entailment", "neutral"] - - @staticmethod - def get_processor_name(): - """See base class.""" - return "MNLI" - - def _create_examples(self, lines, set_type): - """Creates examples for the training/dev/test sets.""" - examples = [] - for i, line in enumerate(lines): - if i == 0: - continue - guid = "%s-%s" % (set_type, self.process_text_fn(line[0])) - text_a = self.process_text_fn(line[8]) - text_b = self.process_text_fn(line[9]) - if set_type == "test": - label = "contradiction" - else: - label = self.process_text_fn(line[-1]) - examples.append( - InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label)) - return examples - - -class MrpcProcessor(DataProcessor): - """Processor for the MRPC data set (GLUE version).""" - - def get_train_examples(self, data_dir): - """See base class.""" - return self._create_examples( - self._read_tsv(os.path.join(data_dir, "train.tsv")), "train") - - def get_dev_examples(self, data_dir): - """See base class.""" - return self._create_examples( - self._read_tsv(os.path.join(data_dir, "dev.tsv")), "dev") - - def get_test_examples(self, data_dir): - """See base class.""" - return self._create_examples( - self._read_tsv(os.path.join(data_dir, "test.tsv")), "test") - - def get_labels(self): - """See base class.""" - return ["0", "1"] - - @staticmethod - def get_processor_name(): - """See base class.""" - return "MRPC" - - def _create_examples(self, lines, set_type): - """Creates examples for the training/dev/test sets.""" - examples = [] - for i, line in enumerate(lines): - if i == 0: - continue - guid = "%s-%s" % (set_type, i) - text_a = self.process_text_fn(line[3]) - text_b = self.process_text_fn(line[4]) - if set_type == "test": - label = "0" - else: - label = self.process_text_fn(line[0]) - examples.append( - InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label)) - return examples - - -class PawsxProcessor(DataProcessor): - """Processor for the PAWS-X data set.""" - supported_languages = ["de", "en", "es", "fr", "ja", "ko", "zh"] - - def __init__(self, - language="en", - process_text_fn=tokenization.convert_to_unicode): - super(PawsxProcessor, self).__init__(process_text_fn) - if language == "all": - self.languages = PawsxProcessor.supported_languages - elif language not in PawsxProcessor.supported_languages: - raise ValueError("language %s is not supported for PAWS-X task." % - language) - else: - self.languages = [language] - - def get_train_examples(self, data_dir): - """See base class.""" - lines = [] - for language in self.languages: - if language == "en": - train_tsv = "train.tsv" - else: - train_tsv = "translated_train.tsv" - # Skips the header. - lines.extend( - self._read_tsv(os.path.join(data_dir, language, train_tsv))[1:]) - - examples = [] - for i, line in enumerate(lines): - guid = "train-%d" % i - text_a = self.process_text_fn(line[1]) - text_b = self.process_text_fn(line[2]) - label = self.process_text_fn(line[3]) - examples.append( - InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label)) - return examples - - def get_dev_examples(self, data_dir): - """See base class.""" - lines = [] - for lang in PawsxProcessor.supported_languages: - lines.extend( - self._read_tsv(os.path.join(data_dir, lang, "dev_2k.tsv"))[1:]) - - examples = [] - for i, line in enumerate(lines): - guid = "dev-%d" % i - text_a = self.process_text_fn(line[1]) - text_b = self.process_text_fn(line[2]) - label = self.process_text_fn(line[3]) - examples.append( - InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label)) - return examples - - def get_test_examples(self, data_dir): - """See base class.""" - examples_by_lang = {k: [] for k in self.supported_languages} - for lang in self.supported_languages: - lines = self._read_tsv(os.path.join(data_dir, lang, "test_2k.tsv"))[1:] - for i, line in enumerate(lines): - guid = "test-%d" % i - text_a = self.process_text_fn(line[1]) - text_b = self.process_text_fn(line[2]) - label = self.process_text_fn(line[3]) - examples_by_lang[lang].append( - InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label)) - return examples_by_lang - - def get_labels(self): - """See base class.""" - return ["0", "1"] - - @staticmethod - def get_processor_name(): - """See base class.""" - return "XTREME-PAWS-X" - - -class QnliProcessor(DataProcessor): - """Processor for the QNLI data set (GLUE version).""" - - def get_train_examples(self, data_dir): - """See base class.""" - return self._create_examples( - self._read_tsv(os.path.join(data_dir, "train.tsv")), "train") - - def get_dev_examples(self, data_dir): - """See base class.""" - return self._create_examples( - self._read_tsv(os.path.join(data_dir, "dev.tsv")), "dev_matched") - - def get_test_examples(self, data_dir): - """See base class.""" - return self._create_examples( - self._read_tsv(os.path.join(data_dir, "test.tsv")), "test") - - def get_labels(self): - """See base class.""" - return ["entailment", "not_entailment"] - - @staticmethod - def get_processor_name(): - """See base class.""" - return "QNLI" - - def _create_examples(self, lines, set_type): - """Creates examples for the training/dev/test sets.""" - examples = [] - for i, line in enumerate(lines): - if i == 0: - continue - guid = "%s-%s" % (set_type, 1) - if set_type == "test": - text_a = tokenization.convert_to_unicode(line[1]) - text_b = tokenization.convert_to_unicode(line[2]) - label = "entailment" - else: - text_a = tokenization.convert_to_unicode(line[1]) - text_b = tokenization.convert_to_unicode(line[2]) - label = tokenization.convert_to_unicode(line[-1]) - examples.append( - InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label)) - return examples - - -class QqpProcessor(DataProcessor): - """Processor for the QQP data set (GLUE version).""" - - def get_train_examples(self, data_dir): - """See base class.""" - return self._create_examples( - self._read_tsv(os.path.join(data_dir, "train.tsv")), "train") - - def get_dev_examples(self, data_dir): - """See base class.""" - return self._create_examples( - self._read_tsv(os.path.join(data_dir, "dev.tsv")), "dev") - - def get_test_examples(self, data_dir): - """See base class.""" - return self._create_examples( - self._read_tsv(os.path.join(data_dir, "test.tsv")), "test") - - def get_labels(self): - """See base class.""" - return ["0", "1"] - - @staticmethod - def get_processor_name(): - """See base class.""" - return "QQP" - - def _create_examples(self, lines, set_type): - """Creates examples for the training/dev/test sets.""" - examples = [] - for i, line in enumerate(lines): - if i == 0: - continue - guid = "%s-%s" % (set_type, line[0]) - if set_type == "test": - text_a = line[1] - text_b = line[2] - label = "0" - else: - # There appear to be some garbage lines in the train dataset. - try: - text_a = line[3] - text_b = line[4] - label = line[5] - except IndexError: - continue - examples.append( - InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label)) - return examples - - -class RteProcessor(DataProcessor): - """Processor for the RTE data set (GLUE version).""" - - def get_train_examples(self, data_dir): - """See base class.""" - return self._create_examples( - self._read_tsv(os.path.join(data_dir, "train.tsv")), "train") - - def get_dev_examples(self, data_dir): - """See base class.""" - return self._create_examples( - self._read_tsv(os.path.join(data_dir, "dev.tsv")), "dev") - - def get_test_examples(self, data_dir): - """See base class.""" - return self._create_examples( - self._read_tsv(os.path.join(data_dir, "test.tsv")), "test") - - def get_labels(self): - """See base class.""" - # All datasets are converted to 2-class split, where for 3-class datasets we - # collapse neutral and contradiction into not_entailment. - return ["entailment", "not_entailment"] - - @staticmethod - def get_processor_name(): - """See base class.""" - return "RTE" - - def _create_examples(self, lines, set_type): - """Creates examples for the training/dev/test sets.""" - examples = [] - for i, line in enumerate(lines): - if i == 0: - continue - guid = "%s-%s" % (set_type, i) - text_a = tokenization.convert_to_unicode(line[1]) - text_b = tokenization.convert_to_unicode(line[2]) - if set_type == "test": - label = "entailment" - else: - label = tokenization.convert_to_unicode(line[3]) - examples.append( - InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label)) - return examples - - -class SstProcessor(DataProcessor): - """Processor for the SST-2 data set (GLUE version).""" - - def get_train_examples(self, data_dir): - """See base class.""" - return self._create_examples( - self._read_tsv(os.path.join(data_dir, "train.tsv")), "train") - - def get_dev_examples(self, data_dir): - """See base class.""" - return self._create_examples( - self._read_tsv(os.path.join(data_dir, "dev.tsv")), "dev") - - def get_test_examples(self, data_dir): - """See base class.""" - return self._create_examples( - self._read_tsv(os.path.join(data_dir, "test.tsv")), "test") - - def get_labels(self): - """See base class.""" - return ["0", "1"] - - @staticmethod - def get_processor_name(): - """See base class.""" - return "SST-2" - - def _create_examples(self, lines, set_type): - """Creates examples for the training/dev/test sets.""" - examples = [] - for i, line in enumerate(lines): - if i == 0: - continue - guid = "%s-%s" % (set_type, i) - if set_type == "test": - text_a = tokenization.convert_to_unicode(line[1]) - label = "0" - else: - text_a = tokenization.convert_to_unicode(line[0]) - label = tokenization.convert_to_unicode(line[1]) - examples.append( - InputExample(guid=guid, text_a=text_a, text_b=None, label=label)) - return examples - - -class StsBProcessor(DataProcessor): - """Processor for the STS-B data set (GLUE version).""" - - def __init__(self, process_text_fn=tokenization.convert_to_unicode): - super(StsBProcessor, self).__init__(process_text_fn=process_text_fn) - self.is_regression = True - self.label_type = float - self._labels = None - - def get_train_examples(self, data_dir): - """See base class.""" - return self._create_examples( - self._read_tsv(os.path.join(data_dir, "train.tsv")), "train") - - def get_dev_examples(self, data_dir): - """See base class.""" - return self._create_examples( - self._read_tsv(os.path.join(data_dir, "dev.tsv")), "dev") - - def get_test_examples(self, data_dir): - """See base class.""" - return self._create_examples( - self._read_tsv(os.path.join(data_dir, "test.tsv")), "test") - - def get_labels(self): - """See base class.""" - return self._labels - - @staticmethod - def get_processor_name(): - """See base class.""" - return "STS-B" - - def _create_examples(self, lines, set_type): - """Creates examples for the training/dev/test sets.""" - examples = [] - for i, line in enumerate(lines): - if i == 0: - continue - guid = "%s-%s" % (set_type, i) - text_a = tokenization.convert_to_unicode(line[7]) - text_b = tokenization.convert_to_unicode(line[8]) - if set_type == "test": - label = 0.0 - else: - label = self.label_type(tokenization.convert_to_unicode(line[9])) - examples.append( - InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label)) - return examples - - -class TfdsProcessor(DataProcessor): - """Processor for generic text classification and regression TFDS data set. - - The TFDS parameters are expected to be provided in the tfds_params string, in - a comma-separated list of parameter assignments. - Examples: - tfds_params="dataset=scicite,text_key=string" - tfds_params="dataset=imdb_reviews,test_split=,dev_split=test" - tfds_params="dataset=glue/cola,text_key=sentence" - tfds_params="dataset=glue/sst2,text_key=sentence" - tfds_params="dataset=glue/qnli,text_key=question,text_b_key=sentence" - tfds_params="dataset=glue/mrpc,text_key=sentence1,text_b_key=sentence2" - tfds_params="dataset=glue/stsb,text_key=sentence1,text_b_key=sentence2," - "is_regression=true,label_type=float" - tfds_params="dataset=snli,text_key=premise,text_b_key=hypothesis," - "skip_label=-1" - Possible parameters (please refer to the documentation of Tensorflow Datasets - (TFDS) for the meaning of individual parameters): - dataset: Required dataset name (potentially with subset and version number). - data_dir: Optional TFDS source root directory. - module_import: Optional Dataset module to import. - train_split: Name of the train split (defaults to `train`). - dev_split: Name of the dev split (defaults to `validation`). - test_split: Name of the test split (defaults to `test`). - text_key: Key of the text_a feature (defaults to `text`). - text_b_key: Key of the second text feature if available. - label_key: Key of the label feature (defaults to `label`). - test_text_key: Key of the text feature to use in test set. - test_text_b_key: Key of the second text feature to use in test set. - test_label: String to be used as the label for all test examples. - label_type: Type of the label key (defaults to `int`). - weight_key: Key of the float sample weight (is not used if not provided). - is_regression: Whether the task is a regression problem (defaults to False). - skip_label: Skip examples with given label (defaults to None). - """ - - def __init__(self, - tfds_params, - process_text_fn=tokenization.convert_to_unicode): - super(TfdsProcessor, self).__init__(process_text_fn) - self._process_tfds_params_str(tfds_params) - if self.module_import: - importlib.import_module(self.module_import) - - self.dataset, info = tfds.load( - self.dataset_name, data_dir=self.data_dir, with_info=True) - if self.is_regression: - self._labels = None - else: - self._labels = list(range(info.features[self.label_key].num_classes)) - - def _process_tfds_params_str(self, params_str): - """Extracts TFDS parameters from a comma-separated assignements string.""" - dtype_map = {"int": int, "float": float} - cast_str_to_bool = lambda s: s.lower() not in ["false", "0"] - - tuples = [x.split("=") for x in params_str.split(",")] - d = {k.strip(): v.strip() for k, v in tuples} - self.dataset_name = d["dataset"] # Required. - self.data_dir = d.get("data_dir", None) - self.module_import = d.get("module_import", None) - self.train_split = d.get("train_split", "train") - self.dev_split = d.get("dev_split", "validation") - self.test_split = d.get("test_split", "test") - self.text_key = d.get("text_key", "text") - self.text_b_key = d.get("text_b_key", None) - self.label_key = d.get("label_key", "label") - self.test_text_key = d.get("test_text_key", self.text_key) - self.test_text_b_key = d.get("test_text_b_key", self.text_b_key) - self.test_label = d.get("test_label", "test_example") - self.label_type = dtype_map[d.get("label_type", "int")] - self.is_regression = cast_str_to_bool(d.get("is_regression", "False")) - self.weight_key = d.get("weight_key", None) - self.skip_label = d.get("skip_label", None) - if self.skip_label is not None: - self.skip_label = self.label_type(self.skip_label) - - def get_train_examples(self, data_dir): - assert data_dir is None - return self._create_examples(self.train_split, "train") - - def get_dev_examples(self, data_dir): - assert data_dir is None - return self._create_examples(self.dev_split, "dev") - - def get_test_examples(self, data_dir): - assert data_dir is None - return self._create_examples(self.test_split, "test") - - def get_labels(self): - return self._labels - - def get_processor_name(self): - return "TFDS_" + self.dataset_name - - def _create_examples(self, split_name, set_type): - """Creates examples for the training/dev/test sets.""" - if split_name not in self.dataset: - raise ValueError("Split {} not available.".format(split_name)) - dataset = self.dataset[split_name].as_numpy_iterator() - examples = [] - text_b, weight = None, None - for i, example in enumerate(dataset): - guid = "%s-%s" % (set_type, i) - if set_type == "test": - text_a = self.process_text_fn(example[self.test_text_key]) - if self.test_text_b_key: - text_b = self.process_text_fn(example[self.test_text_b_key]) - label = self.test_label - else: - text_a = self.process_text_fn(example[self.text_key]) - if self.text_b_key: - text_b = self.process_text_fn(example[self.text_b_key]) - label = self.label_type(example[self.label_key]) - if self.skip_label is not None and label == self.skip_label: - continue - if self.weight_key: - weight = float(example[self.weight_key]) - examples.append( - InputExample( - guid=guid, - text_a=text_a, - text_b=text_b, - label=label, - weight=weight)) - return examples - - -class WnliProcessor(DataProcessor): - """Processor for the WNLI data set (GLUE version).""" - - def get_train_examples(self, data_dir): - """See base class.""" - return self._create_examples( - self._read_tsv(os.path.join(data_dir, "train.tsv")), "train") - - def get_dev_examples(self, data_dir): - """See base class.""" - return self._create_examples( - self._read_tsv(os.path.join(data_dir, "dev.tsv")), "dev") - - def get_test_examples(self, data_dir): - """See base class.""" - return self._create_examples( - self._read_tsv(os.path.join(data_dir, "test.tsv")), "test") - - def get_labels(self): - """See base class.""" - return ["0", "1"] - - @staticmethod - def get_processor_name(): - """See base class.""" - return "WNLI" - - def _create_examples(self, lines, set_type): - """Creates examples for the training/dev/test sets.""" - examples = [] - for i, line in enumerate(lines): - if i == 0: - continue - guid = "%s-%s" % (set_type, i) - text_a = tokenization.convert_to_unicode(line[1]) - text_b = tokenization.convert_to_unicode(line[2]) - if set_type == "test": - label = "0" - else: - label = tokenization.convert_to_unicode(line[3]) - examples.append( - InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label)) - return examples - - -class XnliProcessor(DataProcessor): - """Processor for the XNLI data set.""" - supported_languages = [ - "ar", "bg", "de", "el", "en", "es", "fr", "hi", "ru", "sw", "th", "tr", - "ur", "vi", "zh" - ] - - def __init__(self, - language="en", - process_text_fn=tokenization.convert_to_unicode): - super(XnliProcessor, self).__init__(process_text_fn) - if language == "all": - self.languages = XnliProcessor.supported_languages - elif language not in XnliProcessor.supported_languages: - raise ValueError("language %s is not supported for XNLI task." % language) - else: - self.languages = [language] - - def get_train_examples(self, data_dir): - """See base class.""" - lines = [] - for language in self.languages: - # Skips the header. - lines.extend( - self._read_tsv( - os.path.join(data_dir, "multinli", - "multinli.train.%s.tsv" % language))[1:]) - - examples = [] - for i, line in enumerate(lines): - guid = "train-%d" % i - text_a = self.process_text_fn(line[0]) - text_b = self.process_text_fn(line[1]) - label = self.process_text_fn(line[2]) - if label == self.process_text_fn("contradictory"): - label = self.process_text_fn("contradiction") - examples.append( - InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label)) - return examples - - def get_dev_examples(self, data_dir): - """See base class.""" - lines = self._read_tsv(os.path.join(data_dir, "xnli.dev.tsv")) - examples = [] - for i, line in enumerate(lines): - if i == 0: - continue - guid = "dev-%d" % i - text_a = self.process_text_fn(line[6]) - text_b = self.process_text_fn(line[7]) - label = self.process_text_fn(line[1]) - examples.append( - InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label)) - return examples - - def get_test_examples(self, data_dir): - """See base class.""" - lines = self._read_tsv(os.path.join(data_dir, "xnli.test.tsv")) - examples_by_lang = {k: [] for k in XnliProcessor.supported_languages} - for i, line in enumerate(lines): - if i == 0: - continue - guid = "test-%d" % i - language = self.process_text_fn(line[0]) - text_a = self.process_text_fn(line[6]) - text_b = self.process_text_fn(line[7]) - label = self.process_text_fn(line[1]) - examples_by_lang[language].append( - InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label)) - return examples_by_lang - - def get_labels(self): - """See base class.""" - return ["contradiction", "entailment", "neutral"] - - @staticmethod - def get_processor_name(): - """See base class.""" - return "XNLI" - - -class XtremePawsxProcessor(DataProcessor): - """Processor for the XTREME PAWS-X data set.""" - supported_languages = ["de", "en", "es", "fr", "ja", "ko", "zh"] - - def __init__(self, - process_text_fn=tokenization.convert_to_unicode, - translated_data_dir=None, - only_use_en_dev=True): - """See base class. - - Args: - process_text_fn: See base class. - translated_data_dir: If specified, will also include translated data in - the training and testing data. - only_use_en_dev: If True, only use english dev data. Otherwise, use dev - data from all languages. - """ - super(XtremePawsxProcessor, self).__init__(process_text_fn) - self.translated_data_dir = translated_data_dir - self.only_use_en_dev = only_use_en_dev - - def get_train_examples(self, data_dir): - """See base class.""" - examples = [] - if self.translated_data_dir is None: - lines = self._read_tsv(os.path.join(data_dir, "train-en.tsv")) - for i, line in enumerate(lines): - guid = "train-%d" % i - text_a = self.process_text_fn(line[0]) - text_b = self.process_text_fn(line[1]) - label = self.process_text_fn(line[2]) - examples.append( - InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label)) - else: - for lang in self.supported_languages: - lines = self._read_tsv( - os.path.join(self.translated_data_dir, "translate-train", - f"en-{lang}-translated.tsv")) - for i, line in enumerate(lines): - guid = f"train-{lang}-{i}" - text_a = self.process_text_fn(line[2]) - text_b = self.process_text_fn(line[3]) - label = self.process_text_fn(line[4]) - examples.append( - InputExample( - guid=guid, text_a=text_a, text_b=text_b, label=label)) - return examples - - def get_dev_examples(self, data_dir): - """See base class.""" - examples = [] - if self.only_use_en_dev: - lines = self._read_tsv(os.path.join(data_dir, "dev-en.tsv")) - for i, line in enumerate(lines): - guid = "dev-%d" % i - text_a = self.process_text_fn(line[0]) - text_b = self.process_text_fn(line[1]) - label = self.process_text_fn(line[2]) - examples.append( - InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label)) - else: - for lang in self.supported_languages: - lines = self._read_tsv(os.path.join(data_dir, f"dev-{lang}.tsv")) - for i, line in enumerate(lines): - guid = f"dev-{lang}-{i}" - text_a = self.process_text_fn(line[0]) - text_b = self.process_text_fn(line[1]) - label = self.process_text_fn(line[2]) - examples.append( - InputExample( - guid=guid, text_a=text_a, text_b=text_b, label=label)) - return examples - - def get_test_examples(self, data_dir): - """See base class.""" - examples_by_lang = {} - for lang in self.supported_languages: - examples_by_lang[lang] = [] - lines = self._read_tsv(os.path.join(data_dir, f"test-{lang}.tsv")) - for i, line in enumerate(lines): - guid = f"test-{lang}-{i}" - text_a = self.process_text_fn(line[0]) - text_b = self.process_text_fn(line[1]) - label = "0" - examples_by_lang[lang].append( - InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label)) - if self.translated_data_dir is not None: - for lang in self.supported_languages: - if lang == "en": - continue - examples_by_lang[f"{lang}-en"] = [] - lines = self._read_tsv( - os.path.join(self.translated_data_dir, "translate-test", - f"test-{lang}-en-translated.tsv")) - for i, line in enumerate(lines): - guid = f"test-{lang}-en-{i}" - text_a = self.process_text_fn(line[2]) - text_b = self.process_text_fn(line[3]) - label = "0" - examples_by_lang[f"{lang}-en"].append( - InputExample( - guid=guid, text_a=text_a, text_b=text_b, label=label)) - return examples_by_lang - - def get_labels(self): - """See base class.""" - return ["0", "1"] - - @staticmethod - def get_processor_name(): - """See base class.""" - return "XTREME-PAWS-X" - - -class XtremeXnliProcessor(DataProcessor): - """Processor for the XTREME XNLI data set.""" - supported_languages = [ - "ar", "bg", "de", "el", "en", "es", "fr", "hi", "ru", "sw", "th", "tr", - "ur", "vi", "zh" - ] - - def __init__(self, - process_text_fn=tokenization.convert_to_unicode, - translated_data_dir=None, - only_use_en_dev=True): - """See base class. - - Args: - process_text_fn: See base class. - translated_data_dir: If specified, will also include translated data in - the training data. - only_use_en_dev: If True, only use english dev data. Otherwise, use dev - data from all languages. - """ - super(XtremeXnliProcessor, self).__init__(process_text_fn) - self.translated_data_dir = translated_data_dir - self.only_use_en_dev = only_use_en_dev - - def get_train_examples(self, data_dir): - """See base class.""" - lines = self._read_tsv(os.path.join(data_dir, "train-en.tsv")) - - examples = [] - if self.translated_data_dir is None: - for i, line in enumerate(lines): - guid = "train-%d" % i - text_a = self.process_text_fn(line[0]) - text_b = self.process_text_fn(line[1]) - label = self.process_text_fn(line[2]) - if label == self.process_text_fn("contradictory"): - label = self.process_text_fn("contradiction") - examples.append( - InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label)) - else: - for lang in self.supported_languages: - lines = self._read_tsv( - os.path.join(self.translated_data_dir, "translate-train", - f"en-{lang}-translated.tsv")) - for i, line in enumerate(lines): - guid = f"train-{lang}-{i}" - text_a = self.process_text_fn(line[2]) - text_b = self.process_text_fn(line[3]) - label = self.process_text_fn(line[4]) - if label == self.process_text_fn("contradictory"): - label = self.process_text_fn("contradiction") - examples.append( - InputExample( - guid=guid, text_a=text_a, text_b=text_b, label=label)) - return examples - - def get_dev_examples(self, data_dir): - """See base class.""" - examples = [] - if self.only_use_en_dev: - lines = self._read_tsv(os.path.join(data_dir, "dev-en.tsv")) - for i, line in enumerate(lines): - guid = "dev-%d" % i - text_a = self.process_text_fn(line[0]) - text_b = self.process_text_fn(line[1]) - label = self.process_text_fn(line[2]) - examples.append( - InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label)) - else: - for lang in self.supported_languages: - lines = self._read_tsv(os.path.join(data_dir, f"dev-{lang}.tsv")) - for i, line in enumerate(lines): - guid = f"dev-{lang}-{i}" - text_a = self.process_text_fn(line[0]) - text_b = self.process_text_fn(line[1]) - label = self.process_text_fn(line[2]) - if label == self.process_text_fn("contradictory"): - label = self.process_text_fn("contradiction") - examples.append( - InputExample( - guid=guid, text_a=text_a, text_b=text_b, label=label)) - return examples - - def get_test_examples(self, data_dir): - """See base class.""" - examples_by_lang = {} - for lang in self.supported_languages: - examples_by_lang[lang] = [] - lines = self._read_tsv(os.path.join(data_dir, f"test-{lang}.tsv")) - for i, line in enumerate(lines): - guid = f"test-{lang}-{i}" - text_a = self.process_text_fn(line[0]) - text_b = self.process_text_fn(line[1]) - label = "contradiction" - examples_by_lang[lang].append( - InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label)) - if self.translated_data_dir is not None: - for lang in self.supported_languages: - if lang == "en": - continue - examples_by_lang[f"{lang}-en"] = [] - lines = self._read_tsv( - os.path.join(self.translated_data_dir, "translate-test", - f"test-{lang}-en-translated.tsv")) - for i, line in enumerate(lines): - guid = f"test-{lang}-en-{i}" - text_a = self.process_text_fn(line[2]) - text_b = self.process_text_fn(line[3]) - label = "contradiction" - examples_by_lang[f"{lang}-en"].append( - InputExample( - guid=guid, text_a=text_a, text_b=text_b, label=label)) - return examples_by_lang - - def get_labels(self): - """See base class.""" - return ["contradiction", "entailment", "neutral"] - - @staticmethod - def get_processor_name(): - """See base class.""" - return "XTREME-XNLI" - - -def convert_single_example(ex_index, example, label_list, max_seq_length, - tokenizer): - """Converts a single `InputExample` into a single `InputFeatures`.""" - label_map = {} - if label_list: - for (i, label) in enumerate(label_list): - label_map[label] = i - - tokens_a = tokenizer.tokenize(example.text_a) - tokens_b = None - if example.text_b: - tokens_b = tokenizer.tokenize(example.text_b) - - if tokens_b: - # Modifies `tokens_a` and `tokens_b` in place so that the total - # length is less than the specified length. - # Account for [CLS], [SEP], [SEP] with "- 3" - _truncate_seq_pair(tokens_a, tokens_b, max_seq_length - 3) - else: - # Account for [CLS] and [SEP] with "- 2" - if len(tokens_a) > max_seq_length - 2: - tokens_a = tokens_a[0:(max_seq_length - 2)] - - seg_id_a = 0 - seg_id_b = 1 - seg_id_cls = 0 - seg_id_pad = 0 - - # The convention in BERT is: - # (a) For sequence pairs: - # tokens: [CLS] is this jack ##son ##ville ? [SEP] no it is not . [SEP] - # type_ids: 0 0 0 0 0 0 0 0 1 1 1 1 1 1 - # (b) For single sequences: - # tokens: [CLS] the dog is hairy . [SEP] - # type_ids: 0 0 0 0 0 0 0 - # - # Where "type_ids" are used to indicate whether this is the first - # sequence or the second sequence. The embedding vectors for `type=0` and - # `type=1` were learned during pre-training and are added to the wordpiece - # embedding vector (and position vector). This is not *strictly* necessary - # since the [SEP] token unambiguously separates the sequences, but it makes - # it easier for the model to learn the concept of sequences. - # - # For classification tasks, the first vector (corresponding to [CLS]) is - # used as the "sentence vector". Note that this only makes sense because - # the entire model is fine-tuned. - tokens = [] - segment_ids = [] - tokens.append("[CLS]") - segment_ids.append(seg_id_cls) - for token in tokens_a: - tokens.append(token) - segment_ids.append(seg_id_a) - tokens.append("[SEP]") - segment_ids.append(seg_id_a) - - if tokens_b: - for token in tokens_b: - tokens.append(token) - segment_ids.append(seg_id_b) - tokens.append("[SEP]") - segment_ids.append(seg_id_b) - - input_ids = tokenizer.convert_tokens_to_ids(tokens) - - # The mask has 1 for real tokens and 0 for padding tokens. Only real - # tokens are attended to. - input_mask = [1] * len(input_ids) - - # Zero-pad up to the sequence length. - while len(input_ids) < max_seq_length: - input_ids.append(0) - input_mask.append(0) - segment_ids.append(seg_id_pad) - - assert len(input_ids) == max_seq_length - assert len(input_mask) == max_seq_length - assert len(segment_ids) == max_seq_length - - label_id = label_map[example.label] if label_map else example.label - if ex_index < 5: - logging.info("*** Example ***") - logging.info("guid: %s", (example.guid)) - logging.info("tokens: %s", - " ".join([tokenization.printable_text(x) for x in tokens])) - logging.info("input_ids: %s", " ".join([str(x) for x in input_ids])) - logging.info("input_mask: %s", " ".join([str(x) for x in input_mask])) - logging.info("segment_ids: %s", " ".join([str(x) for x in segment_ids])) - logging.info("label: %s (id = %s)", example.label, str(label_id)) - logging.info("weight: %s", example.weight) - logging.info("example_id: %s", example.example_id) - - feature = InputFeatures( - input_ids=input_ids, - input_mask=input_mask, - segment_ids=segment_ids, - label_id=label_id, - is_real_example=True, - weight=example.weight, - example_id=example.example_id) - - return feature - - -class AXgProcessor(DataProcessor): - """Processor for the AXg dataset (SuperGLUE diagnostics dataset).""" - - def get_test_examples(self, data_dir): - """See base class.""" - return self._create_examples( - self._read_jsonl(os.path.join(data_dir, "AX-g.jsonl")), "test") - - def get_labels(self): - """See base class.""" - return ["entailment", "not_entailment"] - - @staticmethod - def get_processor_name(): - """See base class.""" - return "AXg" - - def _create_examples(self, lines, set_type): - """Creates examples for the training/dev/test sets.""" - examples = [] - for line in lines: - guid = "%s-%s" % (set_type, self.process_text_fn(str(line["idx"]))) - text_a = self.process_text_fn(line["premise"]) - text_b = self.process_text_fn(line["hypothesis"]) - label = self.process_text_fn(line["label"]) - examples.append( - InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label)) - return examples - - -class SuperGLUERTEProcessor(DataProcessor): - """Processor for the RTE dataset (SuperGLUE version).""" - - def get_train_examples(self, data_dir): - """See base class.""" - return self._create_examples( - self._read_jsonl(os.path.join(data_dir, "train.jsonl")), "train") - - def get_dev_examples(self, data_dir): - """See base class.""" - return self._create_examples( - self._read_jsonl(os.path.join(data_dir, "val.jsonl")), "dev") - - def get_test_examples(self, data_dir): - """See base class.""" - return self._create_examples( - self._read_jsonl(os.path.join(data_dir, "test.jsonl")), "test") - - def get_labels(self): - """See base class.""" - # All datasets are converted to 2-class split, where for 3-class datasets we - # collapse neutral and contradiction into not_entailment. - return ["entailment", "not_entailment"] - - @staticmethod - def get_processor_name(): - """See base class.""" - return "RTESuperGLUE" - - def _create_examples(self, lines, set_type): - """Creates examples for the training/dev/test sets.""" - examples = [] - for i, line in enumerate(lines): - guid = "%s-%s" % (set_type, i) - text_a = self.process_text_fn(line["premise"]) - text_b = self.process_text_fn(line["hypothesis"]) - if set_type == "test": - label = "entailment" - else: - label = self.process_text_fn(line["label"]) - examples.append( - InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label)) - return examples - - -def file_based_convert_examples_to_features(examples, - label_list, - max_seq_length, - tokenizer, - output_file, - label_type=None): - """Convert a set of `InputExample`s to a TFRecord file.""" - - tf.io.gfile.makedirs(os.path.dirname(output_file)) - writer = tf.io.TFRecordWriter(output_file) - - for ex_index, example in enumerate(examples): - if ex_index % 10000 == 0: - logging.info("Writing example %d of %d", ex_index, len(examples)) - - feature = convert_single_example(ex_index, example, label_list, - max_seq_length, tokenizer) - - def create_int_feature(values): - f = tf.train.Feature(int64_list=tf.train.Int64List(value=list(values))) - return f - - def create_float_feature(values): - f = tf.train.Feature(float_list=tf.train.FloatList(value=list(values))) - return f - - features = collections.OrderedDict() - features["input_ids"] = create_int_feature(feature.input_ids) - features["input_mask"] = create_int_feature(feature.input_mask) - features["segment_ids"] = create_int_feature(feature.segment_ids) - if label_type is not None and label_type == float: - features["label_ids"] = create_float_feature([feature.label_id]) - elif feature.label_id is not None: - features["label_ids"] = create_int_feature([feature.label_id]) - features["is_real_example"] = create_int_feature( - [int(feature.is_real_example)]) - if feature.weight is not None: - features["weight"] = create_float_feature([feature.weight]) - if feature.example_id is not None: - features["example_id"] = create_int_feature([feature.example_id]) - else: - features["example_id"] = create_int_feature([ex_index]) - - tf_example = tf.train.Example(features=tf.train.Features(feature=features)) - writer.write(tf_example.SerializeToString()) - writer.close() - - -def _truncate_seq_pair(tokens_a, tokens_b, max_length): - """Truncates a sequence pair in place to the maximum length.""" - - # This is a simple heuristic which will always truncate the longer sequence - # one token at a time. This makes more sense than truncating an equal percent - # of tokens from each, since if one sequence is very short then each token - # that's truncated likely contains more information than a longer sequence. - while True: - total_length = len(tokens_a) + len(tokens_b) - if total_length <= max_length: - break - if len(tokens_a) > len(tokens_b): - tokens_a.pop() - else: - tokens_b.pop() - - -def generate_tf_record_from_data_file(processor, - data_dir, - tokenizer, - train_data_output_path=None, - eval_data_output_path=None, - test_data_output_path=None, - max_seq_length=128): - """Generates and saves training data into a tf record file. - - Args: - processor: Input processor object to be used for generating data. Subclass - of `DataProcessor`. - data_dir: Directory that contains train/eval/test data to process. - tokenizer: The tokenizer to be applied on the data. - train_data_output_path: Output to which processed tf record for training - will be saved. - eval_data_output_path: Output to which processed tf record for evaluation - will be saved. - test_data_output_path: Output to which processed tf record for testing - will be saved. Must be a pattern template with {} if processor has - language specific test data. - max_seq_length: Maximum sequence length of the to be generated - training/eval data. - - Returns: - A dictionary containing input meta data. - """ - assert train_data_output_path or eval_data_output_path - - label_list = processor.get_labels() - label_type = getattr(processor, "label_type", None) - is_regression = getattr(processor, "is_regression", False) - has_sample_weights = getattr(processor, "weight_key", False) - - num_training_data = 0 - if train_data_output_path: - train_input_data_examples = processor.get_train_examples(data_dir) - file_based_convert_examples_to_features(train_input_data_examples, - label_list, max_seq_length, - tokenizer, train_data_output_path, - label_type) - num_training_data = len(train_input_data_examples) - - if eval_data_output_path: - eval_input_data_examples = processor.get_dev_examples(data_dir) - file_based_convert_examples_to_features(eval_input_data_examples, - label_list, max_seq_length, - tokenizer, eval_data_output_path, - label_type) - - meta_data = { - "processor_type": processor.get_processor_name(), - "train_data_size": num_training_data, - "max_seq_length": max_seq_length, - } - - if test_data_output_path: - test_input_data_examples = processor.get_test_examples(data_dir) - if isinstance(test_input_data_examples, dict): - for language, examples in test_input_data_examples.items(): - file_based_convert_examples_to_features( - examples, label_list, max_seq_length, tokenizer, - test_data_output_path.format(language), label_type) - meta_data["test_{}_data_size".format(language)] = len(examples) - else: - file_based_convert_examples_to_features(test_input_data_examples, - label_list, max_seq_length, - tokenizer, test_data_output_path, - label_type) - meta_data["test_data_size"] = len(test_input_data_examples) - - if is_regression: - meta_data["task_type"] = "bert_regression" - meta_data["label_type"] = {int: "int", float: "float"}[label_type] - else: - meta_data["task_type"] = "bert_classification" - meta_data["num_labels"] = len(processor.get_labels()) - if has_sample_weights: - meta_data["has_sample_weights"] = True - - if eval_data_output_path: - meta_data["eval_data_size"] = len(eval_input_data_examples) - - return meta_data diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/data/create_finetuning_data.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/data/create_finetuning_data.py deleted file mode 100644 index 246c1ad219b9e83ccb2f289dfe1d2f788a40aec9..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/data/create_finetuning_data.py +++ /dev/null @@ -1,434 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""BERT finetuning task dataset generator.""" - -import functools -import json -import os - -# Import libraries -from absl import app -from absl import flags -import tensorflow as tf -from official.nlp.bert import tokenization -from official.nlp.data import classifier_data_lib -from official.nlp.data import sentence_retrieval_lib -# word-piece tokenizer based squad_lib -from official.nlp.data import squad_lib as squad_lib_wp -# sentence-piece tokenizer based squad_lib -from official.nlp.data import squad_lib_sp -from official.nlp.data import tagging_data_lib - -FLAGS = flags.FLAGS - -# TODO(chendouble): consider moving each task to its own binary. -flags.DEFINE_enum( - "fine_tuning_task_type", "classification", - ["classification", "regression", "squad", "retrieval", "tagging"], - "The name of the BERT fine tuning task for which data " - "will be generated.") - -# BERT classification specific flags. -flags.DEFINE_string( - "input_data_dir", None, - "The input data dir. Should contain the .tsv files (or other data files) " - "for the task.") - -flags.DEFINE_enum( - "classification_task_name", "MNLI", [ - "AX", "COLA", "IMDB", "MNLI", "MRPC", "PAWS-X", "QNLI", "QQP", "RTE", - "SST-2", "STS-B", "WNLI", "XNLI", "XTREME-XNLI", "XTREME-PAWS-X", - "AX-g", "SUPERGLUE-RTE" - ], "The name of the task to train BERT classifier. The " - "difference between XTREME-XNLI and XNLI is: 1. the format " - "of input tsv files; 2. the dev set for XTREME is english " - "only and for XNLI is all languages combined. Same for " - "PAWS-X.") - -# MNLI task-specific flag. -flags.DEFINE_enum("mnli_type", "matched", ["matched", "mismatched"], - "The type of MNLI dataset.") - -# XNLI task-specific flag. -flags.DEFINE_string( - "xnli_language", "en", - "Language of training data for XNLI task. If the value is 'all', the data " - "of all languages will be used for training.") - -# PAWS-X task-specific flag. -flags.DEFINE_string( - "pawsx_language", "en", - "Language of training data for PAWS-X task. If the value is 'all', the data " - "of all languages will be used for training.") - -# XTREME classification specific flags. Only used in XtremePawsx and XtremeXnli. -flags.DEFINE_string( - "translated_input_data_dir", None, - "The translated input data dir. Should contain the .tsv files (or other " - "data files) for the task.") - -# Retrieval task-specific flags. -flags.DEFINE_enum("retrieval_task_name", "bucc", ["bucc", "tatoeba"], - "The name of sentence retrieval task for scoring") - -# Tagging task-specific flags. -flags.DEFINE_enum("tagging_task_name", "panx", ["panx", "udpos"], - "The name of BERT tagging (token classification) task.") - -flags.DEFINE_bool("tagging_only_use_en_train", True, - "Whether only use english training data in tagging.") - -# BERT Squad task-specific flags. -flags.DEFINE_string( - "squad_data_file", None, - "The input data file in for generating training data for BERT squad task.") - -flags.DEFINE_string( - "translated_squad_data_folder", None, - "The translated data folder for generating training data for BERT squad " - "task.") - -flags.DEFINE_integer( - "doc_stride", 128, - "When splitting up a long document into chunks, how much stride to " - "take between chunks.") - -flags.DEFINE_integer( - "max_query_length", 64, - "The maximum number of tokens for the question. Questions longer than " - "this will be truncated to this length.") - -flags.DEFINE_bool( - "version_2_with_negative", False, - "If true, the SQuAD examples contain some that do not have an answer.") - -flags.DEFINE_bool( - "xlnet_format", False, - "If true, then data will be preprocessed in a paragraph, query, class order" - " instead of the BERT-style class, paragraph, query order.") - -# XTREME specific flags. -flags.DEFINE_bool("only_use_en_dev", True, "Whether only use english dev data.") - -# Shared flags across BERT fine-tuning tasks. -flags.DEFINE_string("vocab_file", None, - "The vocabulary file that the BERT model was trained on.") - -flags.DEFINE_string( - "train_data_output_path", None, - "The path in which generated training input data will be written as tf" - " records.") - -flags.DEFINE_string( - "eval_data_output_path", None, - "The path in which generated evaluation input data will be written as tf" - " records.") - -flags.DEFINE_string( - "test_data_output_path", None, - "The path in which generated test input data will be written as tf" - " records. If None, do not generate test data. Must be a pattern template" - " as test_{}.tfrecords if processor has language specific test data.") - -flags.DEFINE_string("meta_data_file_path", None, - "The path in which input meta data will be written.") - -flags.DEFINE_bool( - "do_lower_case", True, - "Whether to lower case the input text. Should be True for uncased " - "models and False for cased models.") - -flags.DEFINE_integer( - "max_seq_length", 128, - "The maximum total input sequence length after WordPiece tokenization. " - "Sequences longer than this will be truncated, and sequences shorter " - "than this will be padded.") - -flags.DEFINE_string("sp_model_file", "", - "The path to the model used by sentence piece tokenizer.") - -flags.DEFINE_enum( - "tokenization", "WordPiece", ["WordPiece", "SentencePiece"], - "Specifies the tokenizer implementation, i.e., whether to use WordPiece " - "or SentencePiece tokenizer. Canonical BERT uses WordPiece tokenizer, " - "while ALBERT uses SentencePiece tokenizer.") - -flags.DEFINE_string( - "tfds_params", "", "Comma-separated list of TFDS parameter assigments for " - "generic classfication data import (for more details " - "see the TfdsProcessor class documentation).") - - -def generate_classifier_dataset(): - """Generates classifier dataset and returns input meta data.""" - assert (FLAGS.input_data_dir and FLAGS.classification_task_name or - FLAGS.tfds_params) - - if FLAGS.tokenization == "WordPiece": - tokenizer = tokenization.FullTokenizer( - vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case) - processor_text_fn = tokenization.convert_to_unicode - else: - assert FLAGS.tokenization == "SentencePiece" - tokenizer = tokenization.FullSentencePieceTokenizer(FLAGS.sp_model_file) - processor_text_fn = functools.partial( - tokenization.preprocess_text, lower=FLAGS.do_lower_case) - - if FLAGS.tfds_params: - processor = classifier_data_lib.TfdsProcessor( - tfds_params=FLAGS.tfds_params, process_text_fn=processor_text_fn) - return classifier_data_lib.generate_tf_record_from_data_file( - processor, - None, - tokenizer, - train_data_output_path=FLAGS.train_data_output_path, - eval_data_output_path=FLAGS.eval_data_output_path, - test_data_output_path=FLAGS.test_data_output_path, - max_seq_length=FLAGS.max_seq_length) - else: - processors = { - "ax": - classifier_data_lib.AxProcessor, - "cola": - classifier_data_lib.ColaProcessor, - "imdb": - classifier_data_lib.ImdbProcessor, - "mnli": - functools.partial( - classifier_data_lib.MnliProcessor, mnli_type=FLAGS.mnli_type), - "mrpc": - classifier_data_lib.MrpcProcessor, - "qnli": - classifier_data_lib.QnliProcessor, - "qqp": - classifier_data_lib.QqpProcessor, - "rte": - classifier_data_lib.RteProcessor, - "sst-2": - classifier_data_lib.SstProcessor, - "sts-b": - classifier_data_lib.StsBProcessor, - "xnli": - functools.partial( - classifier_data_lib.XnliProcessor, - language=FLAGS.xnli_language), - "paws-x": - functools.partial( - classifier_data_lib.PawsxProcessor, - language=FLAGS.pawsx_language), - "wnli": - classifier_data_lib.WnliProcessor, - "xtreme-xnli": - functools.partial( - classifier_data_lib.XtremeXnliProcessor, - translated_data_dir=FLAGS.translated_input_data_dir, - only_use_en_dev=FLAGS.only_use_en_dev), - "xtreme-paws-x": - functools.partial( - classifier_data_lib.XtremePawsxProcessor, - translated_data_dir=FLAGS.translated_input_data_dir, - only_use_en_dev=FLAGS.only_use_en_dev), - "ax-g": - classifier_data_lib.AXgProcessor, - "superglue-rte": - classifier_data_lib.SuperGLUERTEProcessor - } - task_name = FLAGS.classification_task_name.lower() - if task_name not in processors: - raise ValueError("Task not found: %s" % (task_name)) - - processor = processors[task_name](process_text_fn=processor_text_fn) - return classifier_data_lib.generate_tf_record_from_data_file( - processor, - FLAGS.input_data_dir, - tokenizer, - train_data_output_path=FLAGS.train_data_output_path, - eval_data_output_path=FLAGS.eval_data_output_path, - test_data_output_path=FLAGS.test_data_output_path, - max_seq_length=FLAGS.max_seq_length) - - -def generate_regression_dataset(): - """Generates regression dataset and returns input meta data.""" - if FLAGS.tokenization == "WordPiece": - tokenizer = tokenization.FullTokenizer( - vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case) - processor_text_fn = tokenization.convert_to_unicode - else: - assert FLAGS.tokenization == "SentencePiece" - tokenizer = tokenization.FullSentencePieceTokenizer(FLAGS.sp_model_file) - processor_text_fn = functools.partial( - tokenization.preprocess_text, lower=FLAGS.do_lower_case) - - if FLAGS.tfds_params: - processor = classifier_data_lib.TfdsProcessor( - tfds_params=FLAGS.tfds_params, process_text_fn=processor_text_fn) - return classifier_data_lib.generate_tf_record_from_data_file( - processor, - None, - tokenizer, - train_data_output_path=FLAGS.train_data_output_path, - eval_data_output_path=FLAGS.eval_data_output_path, - test_data_output_path=FLAGS.test_data_output_path, - max_seq_length=FLAGS.max_seq_length) - else: - raise ValueError("No data processor found for the given regression task.") - - -def generate_squad_dataset(): - """Generates squad training dataset and returns input meta data.""" - assert FLAGS.squad_data_file - if FLAGS.tokenization == "WordPiece": - return squad_lib_wp.generate_tf_record_from_json_file( - input_file_path=FLAGS.squad_data_file, - vocab_file_path=FLAGS.vocab_file, - output_path=FLAGS.train_data_output_path, - translated_input_folder=FLAGS.translated_squad_data_folder, - max_seq_length=FLAGS.max_seq_length, - do_lower_case=FLAGS.do_lower_case, - max_query_length=FLAGS.max_query_length, - doc_stride=FLAGS.doc_stride, - version_2_with_negative=FLAGS.version_2_with_negative, - xlnet_format=FLAGS.xlnet_format) - else: - assert FLAGS.tokenization == "SentencePiece" - return squad_lib_sp.generate_tf_record_from_json_file( - input_file_path=FLAGS.squad_data_file, - sp_model_file=FLAGS.sp_model_file, - output_path=FLAGS.train_data_output_path, - translated_input_folder=FLAGS.translated_squad_data_folder, - max_seq_length=FLAGS.max_seq_length, - do_lower_case=FLAGS.do_lower_case, - max_query_length=FLAGS.max_query_length, - doc_stride=FLAGS.doc_stride, - xlnet_format=FLAGS.xlnet_format, - version_2_with_negative=FLAGS.version_2_with_negative) - - -def generate_retrieval_dataset(): - """Generate retrieval test and dev dataset and returns input meta data.""" - assert (FLAGS.input_data_dir and FLAGS.retrieval_task_name) - if FLAGS.tokenization == "WordPiece": - tokenizer = tokenization.FullTokenizer( - vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case) - processor_text_fn = tokenization.convert_to_unicode - else: - assert FLAGS.tokenization == "SentencePiece" - tokenizer = tokenization.FullSentencePieceTokenizer(FLAGS.sp_model_file) - processor_text_fn = functools.partial( - tokenization.preprocess_text, lower=FLAGS.do_lower_case) - - processors = { - "bucc": sentence_retrieval_lib.BuccProcessor, - "tatoeba": sentence_retrieval_lib.TatoebaProcessor, - } - - task_name = FLAGS.retrieval_task_name.lower() - if task_name not in processors: - raise ValueError("Task not found: %s" % task_name) - - processor = processors[task_name](process_text_fn=processor_text_fn) - - return sentence_retrieval_lib.generate_sentence_retrevial_tf_record( - processor, FLAGS.input_data_dir, tokenizer, FLAGS.eval_data_output_path, - FLAGS.test_data_output_path, FLAGS.max_seq_length) - - -def generate_tagging_dataset(): - """Generates tagging dataset.""" - processors = { - "panx": - functools.partial( - tagging_data_lib.PanxProcessor, - only_use_en_train=FLAGS.tagging_only_use_en_train, - only_use_en_dev=FLAGS.only_use_en_dev), - "udpos": - functools.partial( - tagging_data_lib.UdposProcessor, - only_use_en_train=FLAGS.tagging_only_use_en_train, - only_use_en_dev=FLAGS.only_use_en_dev), - } - task_name = FLAGS.tagging_task_name.lower() - if task_name not in processors: - raise ValueError("Task not found: %s" % task_name) - - if FLAGS.tokenization == "WordPiece": - tokenizer = tokenization.FullTokenizer( - vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case) - processor_text_fn = tokenization.convert_to_unicode - elif FLAGS.tokenization == "SentencePiece": - tokenizer = tokenization.FullSentencePieceTokenizer(FLAGS.sp_model_file) - processor_text_fn = functools.partial( - tokenization.preprocess_text, lower=FLAGS.do_lower_case) - else: - raise ValueError("Unsupported tokenization: %s" % FLAGS.tokenization) - - processor = processors[task_name]() - return tagging_data_lib.generate_tf_record_from_data_file( - processor, FLAGS.input_data_dir, tokenizer, FLAGS.max_seq_length, - FLAGS.train_data_output_path, FLAGS.eval_data_output_path, - FLAGS.test_data_output_path, processor_text_fn) - - -def main(_): - if FLAGS.tokenization == "WordPiece": - if not FLAGS.vocab_file: - raise ValueError( - "FLAG vocab_file for word-piece tokenizer is not specified.") - else: - assert FLAGS.tokenization == "SentencePiece" - if not FLAGS.sp_model_file: - raise ValueError( - "FLAG sp_model_file for sentence-piece tokenizer is not specified.") - - if FLAGS.fine_tuning_task_type != "retrieval": - flags.mark_flag_as_required("train_data_output_path") - - if FLAGS.fine_tuning_task_type == "classification": - input_meta_data = generate_classifier_dataset() - elif FLAGS.fine_tuning_task_type == "regression": - input_meta_data = generate_regression_dataset() - elif FLAGS.fine_tuning_task_type == "retrieval": - input_meta_data = generate_retrieval_dataset() - elif FLAGS.fine_tuning_task_type == "squad": - input_meta_data = generate_squad_dataset() - else: - assert FLAGS.fine_tuning_task_type == "tagging" - input_meta_data = generate_tagging_dataset() - - tf.io.gfile.makedirs(os.path.dirname(FLAGS.meta_data_file_path)) - with tf.io.gfile.GFile(FLAGS.meta_data_file_path, "w") as writer: - writer.write(json.dumps(input_meta_data, indent=4) + "\n") - - -if __name__ == "__main__": - flags.mark_flag_as_required("meta_data_file_path") - app.run(main) diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/data/create_pretraining_data.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/data/create_pretraining_data.py deleted file mode 100644 index e7086bdcb9236ee25a420083beb5d9ec45dbcc88..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/data/create_pretraining_data.py +++ /dev/null @@ -1,685 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Create masked LM/next sentence masked_lm TF examples for BERT.""" - -import collections -import itertools -import random - -# Import libraries -from absl import app -from absl import flags -from absl import logging -import tensorflow as tf - -from official.nlp.bert import tokenization - -FLAGS = flags.FLAGS - -flags.DEFINE_string("input_file", None, - "Input raw text file (or comma-separated list of files).") - -flags.DEFINE_string( - "output_file", None, - "Output TF example file (or comma-separated list of files).") - -flags.DEFINE_string("vocab_file", None, - "The vocabulary file that the BERT model was trained on.") - -flags.DEFINE_bool( - "do_lower_case", True, - "Whether to lower case the input text. Should be True for uncased " - "models and False for cased models.") - -flags.DEFINE_bool( - "do_whole_word_mask", False, - "Whether to use whole word masking rather than per-WordPiece masking.") - -flags.DEFINE_integer( - "max_ngram_size", None, - "Mask contiguous whole words (n-grams) of up to `max_ngram_size` using a " - "weighting scheme to favor shorter n-grams. " - "Note: `--do_whole_word_mask=True` must also be set when n-gram masking.") - -flags.DEFINE_bool( - "gzip_compress", False, - "Whether to use `GZIP` compress option to get compressed TFRecord files.") - -flags.DEFINE_bool( - "use_v2_feature_names", False, - "Whether to use the feature names consistent with the models.") - -flags.DEFINE_integer("max_seq_length", 128, "Maximum sequence length.") - -flags.DEFINE_integer("max_predictions_per_seq", 20, - "Maximum number of masked LM predictions per sequence.") - -flags.DEFINE_integer("random_seed", 12345, "Random seed for data generation.") - -flags.DEFINE_integer( - "dupe_factor", 10, - "Number of times to duplicate the input data (with different masks).") - -flags.DEFINE_float("masked_lm_prob", 0.15, "Masked LM probability.") - -flags.DEFINE_float( - "short_seq_prob", 0.1, - "Probability of creating sequences which are shorter than the " - "maximum length.") - - -class TrainingInstance(object): - """A single training instance (sentence pair).""" - - def __init__(self, tokens, segment_ids, masked_lm_positions, masked_lm_labels, - is_random_next): - self.tokens = tokens - self.segment_ids = segment_ids - self.is_random_next = is_random_next - self.masked_lm_positions = masked_lm_positions - self.masked_lm_labels = masked_lm_labels - - def __str__(self): - s = "" - s += "tokens: %s\n" % (" ".join( - [tokenization.printable_text(x) for x in self.tokens])) - s += "segment_ids: %s\n" % (" ".join([str(x) for x in self.segment_ids])) - s += "is_random_next: %s\n" % self.is_random_next - s += "masked_lm_positions: %s\n" % (" ".join( - [str(x) for x in self.masked_lm_positions])) - s += "masked_lm_labels: %s\n" % (" ".join( - [tokenization.printable_text(x) for x in self.masked_lm_labels])) - s += "\n" - return s - - def __repr__(self): - return self.__str__() - - -def write_instance_to_example_files(instances, tokenizer, max_seq_length, - max_predictions_per_seq, output_files, - gzip_compress, use_v2_feature_names): - """Creates TF example files from `TrainingInstance`s.""" - writers = [] - for output_file in output_files: - writers.append( - tf.io.TFRecordWriter( - output_file, options="GZIP" if gzip_compress else "")) - - writer_index = 0 - - total_written = 0 - for (inst_index, instance) in enumerate(instances): - input_ids = tokenizer.convert_tokens_to_ids(instance.tokens) - input_mask = [1] * len(input_ids) - segment_ids = list(instance.segment_ids) - assert len(input_ids) <= max_seq_length - - while len(input_ids) < max_seq_length: - input_ids.append(0) - input_mask.append(0) - segment_ids.append(0) - - assert len(input_ids) == max_seq_length - assert len(input_mask) == max_seq_length - assert len(segment_ids) == max_seq_length - - masked_lm_positions = list(instance.masked_lm_positions) - masked_lm_ids = tokenizer.convert_tokens_to_ids(instance.masked_lm_labels) - masked_lm_weights = [1.0] * len(masked_lm_ids) - - while len(masked_lm_positions) < max_predictions_per_seq: - masked_lm_positions.append(0) - masked_lm_ids.append(0) - masked_lm_weights.append(0.0) - - next_sentence_label = 1 if instance.is_random_next else 0 - - features = collections.OrderedDict() - if use_v2_feature_names: - features["input_word_ids"] = create_int_feature(input_ids) - features["input_type_ids"] = create_int_feature(segment_ids) - else: - features["input_ids"] = create_int_feature(input_ids) - features["segment_ids"] = create_int_feature(segment_ids) - - features["input_mask"] = create_int_feature(input_mask) - features["masked_lm_positions"] = create_int_feature(masked_lm_positions) - features["masked_lm_ids"] = create_int_feature(masked_lm_ids) - features["masked_lm_weights"] = create_float_feature(masked_lm_weights) - features["next_sentence_labels"] = create_int_feature([next_sentence_label]) - - tf_example = tf.train.Example(features=tf.train.Features(feature=features)) - - writers[writer_index].write(tf_example.SerializeToString()) - writer_index = (writer_index + 1) % len(writers) - - total_written += 1 - - if inst_index < 20: - logging.info("*** Example ***") - logging.info("tokens: %s", " ".join( - [tokenization.printable_text(x) for x in instance.tokens])) - - for feature_name in features.keys(): - feature = features[feature_name] - values = [] - if feature.int64_list.value: - values = feature.int64_list.value - elif feature.float_list.value: - values = feature.float_list.value - logging.info("%s: %s", feature_name, " ".join([str(x) for x in values])) - - for writer in writers: - writer.close() - - logging.info("Wrote %d total instances", total_written) - - -def create_int_feature(values): - feature = tf.train.Feature(int64_list=tf.train.Int64List(value=list(values))) - return feature - - -def create_float_feature(values): - feature = tf.train.Feature(float_list=tf.train.FloatList(value=list(values))) - return feature - - -def create_training_instances(input_files, - tokenizer, - max_seq_length, - dupe_factor, - short_seq_prob, - masked_lm_prob, - max_predictions_per_seq, - rng, - do_whole_word_mask=False, - max_ngram_size=None): - """Create `TrainingInstance`s from raw text.""" - all_documents = [[]] - - # Input file format: - # (1) One sentence per line. These should ideally be actual sentences, not - # entire paragraphs or arbitrary spans of text. (Because we use the - # sentence boundaries for the "next sentence prediction" task). - # (2) Blank lines between documents. Document boundaries are needed so - # that the "next sentence prediction" task doesn't span between documents. - for input_file in input_files: - with tf.io.gfile.GFile(input_file, "rb") as reader: - while True: - line = tokenization.convert_to_unicode(reader.readline()) - if not line: - break - line = line.strip() - - # Empty lines are used as document delimiters - if not line: - all_documents.append([]) - tokens = tokenizer.tokenize(line) - if tokens: - all_documents[-1].append(tokens) - - # Remove empty documents - all_documents = [x for x in all_documents if x] - rng.shuffle(all_documents) - - vocab_words = list(tokenizer.vocab.keys()) - instances = [] - for _ in range(dupe_factor): - for document_index in range(len(all_documents)): - instances.extend( - create_instances_from_document( - all_documents, document_index, max_seq_length, short_seq_prob, - masked_lm_prob, max_predictions_per_seq, vocab_words, rng, - do_whole_word_mask, max_ngram_size)) - - rng.shuffle(instances) - return instances - - -def create_instances_from_document( - all_documents, document_index, max_seq_length, short_seq_prob, - masked_lm_prob, max_predictions_per_seq, vocab_words, rng, - do_whole_word_mask=False, - max_ngram_size=None): - """Creates `TrainingInstance`s for a single document.""" - document = all_documents[document_index] - - # Account for [CLS], [SEP], [SEP] - max_num_tokens = max_seq_length - 3 - - # We *usually* want to fill up the entire sequence since we are padding - # to `max_seq_length` anyways, so short sequences are generally wasted - # computation. However, we *sometimes* - # (i.e., short_seq_prob == 0.1 == 10% of the time) want to use shorter - # sequences to minimize the mismatch between pre-training and fine-tuning. - # The `target_seq_length` is just a rough target however, whereas - # `max_seq_length` is a hard limit. - target_seq_length = max_num_tokens - if rng.random() < short_seq_prob: - target_seq_length = rng.randint(2, max_num_tokens) - - # We DON'T just concatenate all of the tokens from a document into a long - # sequence and choose an arbitrary split point because this would make the - # next sentence prediction task too easy. Instead, we split the input into - # segments "A" and "B" based on the actual "sentences" provided by the user - # input. - instances = [] - current_chunk = [] - current_length = 0 - i = 0 - while i < len(document): - segment = document[i] - current_chunk.append(segment) - current_length += len(segment) - if i == len(document) - 1 or current_length >= target_seq_length: - if current_chunk: - # `a_end` is how many segments from `current_chunk` go into the `A` - # (first) sentence. - a_end = 1 - if len(current_chunk) >= 2: - a_end = rng.randint(1, len(current_chunk) - 1) - - tokens_a = [] - for j in range(a_end): - tokens_a.extend(current_chunk[j]) - - tokens_b = [] - # Random next - is_random_next = False - if len(current_chunk) == 1 or rng.random() < 0.5: - is_random_next = True - target_b_length = target_seq_length - len(tokens_a) - - # This should rarely go for more than one iteration for large - # corpora. However, just to be careful, we try to make sure that - # the random document is not the same as the document - # we're processing. - for _ in range(10): - random_document_index = rng.randint(0, len(all_documents) - 1) - if random_document_index != document_index: - break - - random_document = all_documents[random_document_index] - random_start = rng.randint(0, len(random_document) - 1) - for j in range(random_start, len(random_document)): - tokens_b.extend(random_document[j]) - if len(tokens_b) >= target_b_length: - break - # We didn't actually use these segments so we "put them back" so - # they don't go to waste. - num_unused_segments = len(current_chunk) - a_end - i -= num_unused_segments - # Actual next - else: - is_random_next = False - for j in range(a_end, len(current_chunk)): - tokens_b.extend(current_chunk[j]) - truncate_seq_pair(tokens_a, tokens_b, max_num_tokens, rng) - - assert len(tokens_a) >= 1 - assert len(tokens_b) >= 1 - - tokens = [] - segment_ids = [] - tokens.append("[CLS]") - segment_ids.append(0) - for token in tokens_a: - tokens.append(token) - segment_ids.append(0) - - tokens.append("[SEP]") - segment_ids.append(0) - - for token in tokens_b: - tokens.append(token) - segment_ids.append(1) - tokens.append("[SEP]") - segment_ids.append(1) - - (tokens, masked_lm_positions, - masked_lm_labels) = create_masked_lm_predictions( - tokens, masked_lm_prob, max_predictions_per_seq, vocab_words, rng, - do_whole_word_mask, max_ngram_size) - instance = TrainingInstance( - tokens=tokens, - segment_ids=segment_ids, - is_random_next=is_random_next, - masked_lm_positions=masked_lm_positions, - masked_lm_labels=masked_lm_labels) - instances.append(instance) - current_chunk = [] - current_length = 0 - i += 1 - - return instances - - -MaskedLmInstance = collections.namedtuple("MaskedLmInstance", - ["index", "label"]) - -# A _Gram is a [half-open) interval of token indices which form a word. -# E.g., -# words: ["The", "doghouse"] -# tokens: ["The", "dog", "##house"] -# grams: [(0,1), (1,3)] -_Gram = collections.namedtuple("_Gram", ["begin", "end"]) - - -def _window(iterable, size): - """Helper to create a sliding window iterator with a given size. - - E.g., - input = [1, 2, 3, 4] - _window(input, 1) => [1], [2], [3], [4] - _window(input, 2) => [1, 2], [2, 3], [3, 4] - _window(input, 3) => [1, 2, 3], [2, 3, 4] - _window(input, 4) => [1, 2, 3, 4] - _window(input, 5) => None - - Args: - iterable: elements to iterate over. - size: size of the window. - - Yields: - Elements of `iterable` batched into a sliding window of length `size`. - """ - i = iter(iterable) - window = [] - try: - for e in range(0, size): - window.append(next(i)) - yield window - except StopIteration: - # handle the case where iterable's length is less than the window size. - return - for e in i: - window = window[1:] + [e] - yield window - - -def _contiguous(sorted_grams): - """Test whether a sequence of grams is contiguous. - - Args: - sorted_grams: _Grams which are sorted in increasing order. - Returns: - True if `sorted_grams` are touching each other. - - E.g., - _contiguous([(1, 4), (4, 5), (5, 10)]) == True - _contiguous([(1, 2), (4, 5)]) == False - """ - for a, b in _window(sorted_grams, 2): - if a.end != b.begin: - return False - return True - - -def _masking_ngrams(grams, max_ngram_size, max_masked_tokens, rng): - """Create a list of masking {1, ..., n}-grams from a list of one-grams. - - This is an extention of 'whole word masking' to mask multiple, contiguous - words such as (e.g., "the red boat"). - - Each input gram represents the token indices of a single word, - words: ["the", "red", "boat"] - tokens: ["the", "red", "boa", "##t"] - grams: [(0,1), (1,2), (2,4)] - - For a `max_ngram_size` of three, possible outputs masks include: - 1-grams: (0,1), (1,2), (2,4) - 2-grams: (0,2), (1,4) - 3-grams; (0,4) - - Output masks will not overlap and contain less than `max_masked_tokens` total - tokens. E.g., for the example above with `max_masked_tokens` as three, - valid outputs are, - [(0,1), (1,2)] # "the", "red" covering two tokens - [(1,2), (2,4)] # "red", "boa", "##t" covering three tokens - - The length of the selected n-gram follows a zipf weighting to - favor shorter n-gram sizes (weight(1)=1, weight(2)=1/2, weight(3)=1/3, ...). - - Args: - grams: List of one-grams. - max_ngram_size: Maximum number of contiguous one-grams combined to create - an n-gram. - max_masked_tokens: Maximum total number of tokens to be masked. - rng: `random.Random` generator. - - Returns: - A list of n-grams to be used as masks. - """ - if not grams: - return None - - grams = sorted(grams) - num_tokens = grams[-1].end - - # Ensure our grams are valid (i.e., they don't overlap). - for a, b in _window(grams, 2): - if a.end > b.begin: - raise ValueError("overlapping grams: {}".format(grams)) - - # Build map from n-gram length to list of n-grams. - ngrams = {i: [] for i in range(1, max_ngram_size+1)} - for gram_size in range(1, max_ngram_size+1): - for g in _window(grams, gram_size): - if _contiguous(g): - # Add an n-gram which spans these one-grams. - ngrams[gram_size].append(_Gram(g[0].begin, g[-1].end)) - - # Shuffle each list of n-grams. - for v in ngrams.values(): - rng.shuffle(v) - - # Create the weighting for n-gram length selection. - # Stored cummulatively for `random.choices` below. - cummulative_weights = list( - itertools.accumulate([1./n for n in range(1, max_ngram_size+1)])) - - output_ngrams = [] - # Keep a bitmask of which tokens have been masked. - masked_tokens = [False] * num_tokens - # Loop until we have enough masked tokens or there are no more candidate - # n-grams of any length. - # Each code path should ensure one or more elements from `ngrams` are removed - # to guarentee this loop terminates. - while (sum(masked_tokens) < max_masked_tokens and - sum(len(s) for s in ngrams.values())): - # Pick an n-gram size based on our weights. - sz = random.choices(range(1, max_ngram_size+1), - cum_weights=cummulative_weights)[0] - - # Ensure this size doesn't result in too many masked tokens. - # E.g., a two-gram contains _at least_ two tokens. - if sum(masked_tokens) + sz > max_masked_tokens: - # All n-grams of this length are too long and can be removed from - # consideration. - ngrams[sz].clear() - continue - - # All of the n-grams of this size have been used. - if not ngrams[sz]: - continue - - # Choose a random n-gram of the given size. - gram = ngrams[sz].pop() - num_gram_tokens = gram.end-gram.begin - - # Check if this would add too many tokens. - if num_gram_tokens + sum(masked_tokens) > max_masked_tokens: - continue - - # Check if any of the tokens in this gram have already been masked. - if sum(masked_tokens[gram.begin:gram.end]): - continue - - # Found a usable n-gram! Mark its tokens as masked and add it to return. - masked_tokens[gram.begin:gram.end] = [True] * (gram.end-gram.begin) - output_ngrams.append(gram) - return output_ngrams - - -def _wordpieces_to_grams(tokens): - """Reconstitue grams (words) from `tokens`. - - E.g., - tokens: ['[CLS]', 'That', 'lit', '##tle', 'blue', 'tru', '##ck', '[SEP]'] - grams: [ [1,2), [2, 4), [4,5) , [5, 6)] - - Args: - tokens: list of wordpieces - Returns: - List of _Grams representing spans of whole words - (without "[CLS]" and "[SEP]"). - """ - grams = [] - gram_start_pos = None - for i, token in enumerate(tokens): - if gram_start_pos is not None and token.startswith("##"): - continue - if gram_start_pos is not None: - grams.append(_Gram(gram_start_pos, i)) - if token not in ["[CLS]", "[SEP]"]: - gram_start_pos = i - else: - gram_start_pos = None - if gram_start_pos is not None: - grams.append(_Gram(gram_start_pos, len(tokens))) - return grams - - -def create_masked_lm_predictions(tokens, masked_lm_prob, - max_predictions_per_seq, vocab_words, rng, - do_whole_word_mask, - max_ngram_size=None): - """Creates the predictions for the masked LM objective.""" - if do_whole_word_mask: - grams = _wordpieces_to_grams(tokens) - else: - # Here we consider each token to be a word to allow for sub-word masking. - if max_ngram_size: - raise ValueError("cannot use ngram masking without whole word masking") - grams = [_Gram(i, i+1) for i in range(0, len(tokens)) - if tokens[i] not in ["[CLS]", "[SEP]"]] - - num_to_predict = min(max_predictions_per_seq, - max(1, int(round(len(tokens) * masked_lm_prob)))) - # Generate masks. If `max_ngram_size` in [0, None] it means we're doing - # whole word masking or token level masking. Both of these can be treated - # as the `max_ngram_size=1` case. - masked_grams = _masking_ngrams(grams, max_ngram_size or 1, - num_to_predict, rng) - masked_lms = [] - output_tokens = list(tokens) - for gram in masked_grams: - # 80% of the time, replace all n-gram tokens with [MASK] - if rng.random() < 0.8: - replacement_action = lambda idx: "[MASK]" - else: - # 10% of the time, keep all the original n-gram tokens. - if rng.random() < 0.5: - replacement_action = lambda idx: tokens[idx] - # 10% of the time, replace each n-gram token with a random word. - else: - replacement_action = lambda idx: rng.choice(vocab_words) - - for idx in range(gram.begin, gram.end): - output_tokens[idx] = replacement_action(idx) - masked_lms.append(MaskedLmInstance(index=idx, label=tokens[idx])) - - assert len(masked_lms) <= num_to_predict - masked_lms = sorted(masked_lms, key=lambda x: x.index) - - masked_lm_positions = [] - masked_lm_labels = [] - for p in masked_lms: - masked_lm_positions.append(p.index) - masked_lm_labels.append(p.label) - - return (output_tokens, masked_lm_positions, masked_lm_labels) - - -def truncate_seq_pair(tokens_a, tokens_b, max_num_tokens, rng): - """Truncates a pair of sequences to a maximum sequence length.""" - while True: - total_length = len(tokens_a) + len(tokens_b) - if total_length <= max_num_tokens: - break - - trunc_tokens = tokens_a if len(tokens_a) > len(tokens_b) else tokens_b - assert len(trunc_tokens) >= 1 - - # We want to sometimes truncate from the front and sometimes from the - # back to add more randomness and avoid biases. - if rng.random() < 0.5: - del trunc_tokens[0] - else: - trunc_tokens.pop() - - -def main(_): - tokenizer = tokenization.FullTokenizer( - vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case) - - input_files = [] - for input_pattern in FLAGS.input_file.split(","): - input_files.extend(tf.io.gfile.glob(input_pattern)) - - logging.info("*** Reading from input files ***") - for input_file in input_files: - logging.info(" %s", input_file) - - rng = random.Random(FLAGS.random_seed) - instances = create_training_instances( - input_files, tokenizer, FLAGS.max_seq_length, FLAGS.dupe_factor, - FLAGS.short_seq_prob, FLAGS.masked_lm_prob, FLAGS.max_predictions_per_seq, - rng, FLAGS.do_whole_word_mask, FLAGS.max_ngram_size) - - output_files = FLAGS.output_file.split(",") - logging.info("*** Writing to output files ***") - for output_file in output_files: - logging.info(" %s", output_file) - - write_instance_to_example_files(instances, tokenizer, FLAGS.max_seq_length, - FLAGS.max_predictions_per_seq, output_files, - FLAGS.gzip_compress, - FLAGS.use_v2_feature_names) - - -if __name__ == "__main__": - flags.mark_flag_as_required("input_file") - flags.mark_flag_as_required("output_file") - flags.mark_flag_as_required("vocab_file") - app.run(main) diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/data/create_pretraining_data_test.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/data/create_pretraining_data_test.py deleted file mode 100644 index fe7093064a4914e832c0057d7c3596c24090a444..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/data/create_pretraining_data_test.py +++ /dev/null @@ -1,144 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Tests for official.nlp.data.create_pretraining_data.""" -import random - -import tensorflow as tf - -from official.nlp.data import create_pretraining_data as cpd - -_VOCAB_WORDS = ["vocab_1", "vocab_2"] - - -class CreatePretrainingDataTest(tf.test.TestCase): - - def assertTokens(self, input_tokens, output_tokens, masked_positions, - masked_labels): - # Ensure the masked positions are unique. - self.assertCountEqual(masked_positions, set(masked_positions)) - - # Ensure we can reconstruct the input from the output. - reconstructed_tokens = output_tokens - for pos, label in zip(masked_positions, masked_labels): - reconstructed_tokens[pos] = label - self.assertEqual(input_tokens, reconstructed_tokens) - - # Ensure each label is valid. - for pos, label in zip(masked_positions, masked_labels): - output_token = output_tokens[pos] - if (output_token == "[MASK]" or output_token in _VOCAB_WORDS or - output_token == input_tokens[pos]): - continue - self.fail("invalid mask value: {}".format(output_token)) - - def test_wordpieces_to_grams(self): - tests = [ - (["That", "cone"], [(0, 1), (1, 2)]), - (["That", "cone", "##s"], [(0, 1), (1, 3)]), - (["Swit", "##zer", "##land"], [(0, 3)]), - (["[CLS]", "Up", "##dog"], [(1, 3)]), - (["[CLS]", "Up", "##dog", "[SEP]", "Down"], [(1, 3), (4, 5)]), - ] - for inp, expected in tests: - output = cpd._wordpieces_to_grams(inp) - self.assertEqual(expected, output) - - def test_window(self): - input_list = [1, 2, 3, 4] - window_outputs = [ - (1, [[1], [2], [3], [4]]), - (2, [[1, 2], [2, 3], [3, 4]]), - (3, [[1, 2, 3], [2, 3, 4]]), - (4, [[1, 2, 3, 4]]), - (5, []), - ] - for window, expected in window_outputs: - output = cpd._window(input_list, window) - self.assertEqual(expected, list(output)) - - def test_create_masked_lm_predictions(self): - tokens = ["[CLS]", "a", "##a", "b", "##b", "c", "##c", "[SEP]"] - rng = random.Random(123) - for _ in range(0, 5): - output_tokens, masked_positions, masked_labels = ( - cpd.create_masked_lm_predictions( - tokens=tokens, - masked_lm_prob=1.0, - max_predictions_per_seq=3, - vocab_words=_VOCAB_WORDS, - rng=rng, - do_whole_word_mask=False, - max_ngram_size=None)) - self.assertEqual(len(masked_positions), 3) - self.assertEqual(len(masked_labels), 3) - self.assertTokens(tokens, output_tokens, masked_positions, masked_labels) - - def test_create_masked_lm_predictions_whole_word(self): - tokens = ["[CLS]", "a", "##a", "b", "##b", "c", "##c", "[SEP]"] - rng = random.Random(345) - for _ in range(0, 5): - output_tokens, masked_positions, masked_labels = ( - cpd.create_masked_lm_predictions( - tokens=tokens, - masked_lm_prob=1.0, - max_predictions_per_seq=3, - vocab_words=_VOCAB_WORDS, - rng=rng, - do_whole_word_mask=True, - max_ngram_size=None)) - # since we can't get exactly three tokens without breaking a word we - # only take two. - self.assertEqual(len(masked_positions), 2) - self.assertEqual(len(masked_labels), 2) - self.assertTokens(tokens, output_tokens, masked_positions, masked_labels) - # ensure that we took an entire word. - self.assertIn(masked_labels, [["a", "##a"], ["b", "##b"], ["c", "##c"]]) - - def test_create_masked_lm_predictions_ngram(self): - tokens = ["[CLS]"] + ["tok{}".format(i) for i in range(0, 512)] + ["[SEP]"] - rng = random.Random(345) - for _ in range(0, 5): - output_tokens, masked_positions, masked_labels = ( - cpd.create_masked_lm_predictions( - tokens=tokens, - masked_lm_prob=1.0, - max_predictions_per_seq=76, - vocab_words=_VOCAB_WORDS, - rng=rng, - do_whole_word_mask=True, - max_ngram_size=3)) - self.assertEqual(len(masked_positions), 76) - self.assertEqual(len(masked_labels), 76) - self.assertTokens(tokens, output_tokens, masked_positions, masked_labels) - - -if __name__ == "__main__": - tf.test.main() diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/data/create_xlnet_pretraining_data.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/data/create_xlnet_pretraining_data.py deleted file mode 100644 index 3afbffaae4656a86e8b6f609840c9bd849e1dc46..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/data/create_xlnet_pretraining_data.py +++ /dev/null @@ -1,737 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Create LM TF examples for XLNet.""" - -import json -import math -import os - -import random -from typing import Iterable, Mapping, List, Optional, Tuple -import unicodedata - -# Import libraries - -from absl import app -from absl import flags -from absl import logging - -import dataclasses -import numpy as np -import tensorflow as tf - -from official.nlp.bert import tokenization - -special_symbols = { - "": 0, - "": 1, - "": 2, - "": 3, - "": 4, - "": 5, - "": 6, - "": 7, - "": 8, -} - -FLAGS = flags.FLAGS - -flags.DEFINE_integer("seq_length", 512, - help="Sequence length.") -flags.DEFINE_integer("reuse_length", 256, - help="Number of token that can be reused as memory. " - "Could be half of `seq_len`.") -flags.DEFINE_string("input_file", None, - "Input raw text file (or comma-separated list of files).") -flags.DEFINE_string( - "save_dir", None, - "Directory for saving processed data.") -flags.DEFINE_string("sp_model_file", "", - "The path to the model used by sentence piece tokenizer.") -flags.DEFINE_bool("use_eod_token", True, - "Whether or not to include EOD tokens.") -flags.DEFINE_bool("bi_data", True, "Whether or not to use bi-directional data.") -flags.DEFINE_bool( - "do_lower_case", True, - "Whether to lower case the input text. Should be True for uncased " - "models and False for cased models.") -flags.DEFINE_integer("per_host_batch_size", 32, "Batch size per host.") -flags.DEFINE_integer("num_cores_per_host", 16, - "The number of (TPU) cores per host.") -flags.DEFINE_string("prefix", "", "Filename prefix.") -flags.DEFINE_string("suffix", "", "Filename suffix.") - -flags.DEFINE_integer("task_id", None, - "The id of the current task.") -flags.DEFINE_integer("num_tasks", None, - "The total number of tasks.") -flags.DEFINE_integer("num_passes", 1, "The number of times to run the script.") - - -@dataclasses.dataclass -class TrainingInstance: - """Representation of a single XLNet Pretraining instance.""" - data: Iterable[int] - segment_ids: Iterable[int] - boundary_indices: Iterable[int] - label: int - - def to_feature(self) -> Mapping[str, tf.train.Feature]: - feat = lambda x: tf.train.Feature(int64_list=tf.train.Int64List(value=x)) - return dict( - input_word_ids=feat(self.data), - input_type_ids=feat(self.segment_ids), - boundary_indices=feat(self.boundary_indices), - label=feat([self.label])) - - def to_example(self) -> tf.train.Example: - return tf.train.Example( - features=tf.train.Features(feature=self.to_feature())) - - def __str__(self): - def seq_to_str(seq): - return " ".join([str(x) for x in seq]) - - s = "" - s += "tokens: %s\n" % seq_to_str(self.data) - s += "segment_ids: %s\n" % seq_to_str(self.segment_ids) - s += "boundary_indices: %s\n" % seq_to_str(self.boundary_indices) - s += "label: %s\n" % self.label - s += "\n" - return s - - def __repr__(self): - return self.__str__() - - -def _preprocess_line(line: str, do_lower_case: bool = False) -> str: - """Preprocesses an individual raw text line. - - This function will: - - Remove extraneous spaces. - - Replace `` with ", and '' with ". - - Replaces accents. - - Applies lower casing. - - Args: - line: The input line to preprocess. - do_lower_case: Whether or not to lower case the text. - - Returns: - The preprocessed line. - - """ - line = " ".join(line.split()) - line = line.replace("``", "\"").replace("''", "\"") - - # Replace accents. - line = unicodedata.normalize("NFKD", line) - line = "".join([c for c in line if not unicodedata.combining(c)]) - - if do_lower_case: - line = line.lower() - return line - - -def preprocess_and_tokenize_input_files( - input_files: Iterable[str], - tokenizer: tokenization.FullSentencePieceTokenizer, - use_eod: bool = True, - do_lower_case: bool = False, - log_example_freq: int = 100000) -> List[Tuple[np.array, np.array]]: - """Preprocesses and encodes raw text from input files. - - This function preprocesses raw text and encodes them into tokens using a - `SentencePieceModel` tokenization method. This also provides the sentence - indicator for each token. - - Args: - input_files: The list of input file names. - tokenizer: The SentencePiece tokenizer that has the attribute `sp_model`. - use_eod: Whether or not to use an EOD indicator. If `False`, then EOD is - not included. - do_lower_case: Whether or not to apply lower casing during raw text - preprocessing. - log_example_freq: The optional field for how many lines to process before - emitting an info log. - - Returns: - The preprocessed list. Each entry in the list is a tuple consisting of - the token IDs and the sentence IDs. - - """ - all_data = [] - eod_symbol = special_symbols[""] - - total_number_of_lines = 0 - - # Input file format: - # (1) One sentence per line. These should ideally be actual sentences, not - # entire paragraphs or arbitrary spans of text. (Because we use the - # sentence boundaries for the "next sentence prediction" task). - # (2) Blank lines between documents. Document boundaries are needed so - # that the "next sentence prediction" task doesn't span between documents. - for input_file in input_files: - line_count = 0 - logging.info("Preprocessing %s", input_file) - - all_tokens = [] - all_sentence_ids = [] - - sentence_id = True - - with tf.io.gfile.GFile(input_file, "rb") as reader: - while True: - line = tokenization.convert_to_unicode(reader.readline()) - if not line: - break - - line_count += 1 - if line_count % log_example_freq == 0: - logging.info("Loading line %d", line_count) - - line = line.strip() - - if not line: - if use_eod: - token_ids = [eod_symbol] - sentence_id = not sentence_id - else: - continue - else: - preprocessed_line = _preprocess_line( - line=line, do_lower_case=do_lower_case) - token_ids = tokenization.encode_ids( - sp_model=tokenizer.sp_model, text=preprocessed_line) - - all_tokens.extend(token_ids) - all_sentence_ids.extend([sentence_id] * len(token_ids)) - sentence_id = not sentence_id - logging.info("Finished processing %s. Number of lines: %d", - input_file, line_count) - if line_count == 0: - continue - total_number_of_lines += line_count - all_tokens = np.array(all_tokens, dtype=np.int64) - all_sentence_ids = np.array(all_sentence_ids, dtype=np.bool) - all_data.append((all_tokens, all_sentence_ids)) - - logging.info("Completed text preprocessing. Total number of lines: %d", - total_number_of_lines) - return all_data - - -def _reshape_to_batch_dimensions( - tokens: np.array, - sentence_ids: np.array, - per_host_batch_size: int) -> Tuple[np.array, np.array]: - """Truncates and reshapes input data with a batch major dimension. - - Args: - tokens: The input token ids. This should have the same shape as - `sentence_ids`. - sentence_ids: The input sentence ids. This should have the same shape as - `token_ids`. - per_host_batch_size: The target per-host batch size. - - Returns: - The tuple of reshaped tokens and sentence_ids. - """ - num_steps = len(tokens) // per_host_batch_size - truncated_data_length = num_steps * per_host_batch_size - - logging.info("per_host_batch_size: %d", per_host_batch_size) - logging.info("num_steps: %d", num_steps) - def truncate_and_reshape(a): - return a[:truncated_data_length].reshape((per_host_batch_size, num_steps)) - - return (truncate_and_reshape(tokens), truncate_and_reshape(sentence_ids)) - - -def _create_a_and_b_segments( - tokens: np.array, - sentence_ids: np.array, - begin_index: int, - total_length: int, - no_cut_probability: float = 0.5): - """Splits segments A and B from a single instance of tokens and sentence ids. - - Args: - tokens: The 1D input token ids. This represents an individual entry within a - batch. - sentence_ids: The 1D input sentence ids. This represents an indivdual entry - within a batch. This should be the same length as `tokens`. - begin_index: The reference beginning index to split data. - total_length: The target combined length of segments A and B. - no_cut_probability: The probability of not cutting a segment despite - a cut possibly existing. - - Returns: - A tuple consisting of A data, B data, and label. - - """ - data_length = tokens.shape[0] - if begin_index + total_length >= data_length: - logging.info("[_create_segments]: begin_index %d + total_length %d >= " - "data_length %d", begin_index, total_length, data_length) - return None - - end_index = begin_index + 1 - cut_indices = [] - - # Identify all indices where sentence IDs change from one to the next. - while end_index < data_length: - if sentence_ids[end_index] != sentence_ids[end_index - 1]: - if end_index - begin_index >= total_length: - break - cut_indices.append(end_index) - end_index += 1 - - a_begin = begin_index - - if not cut_indices or random.random() < no_cut_probability: - # Segments A and B are contained within the same sentence. - label = 0 - if not cut_indices: - a_end = end_index - else: - a_end = random.choice(cut_indices) - b_length = max(1, total_length - (a_end - a_begin)) - b_begin = random.randint(0, data_length - 1 - b_length) - b_end = b_begin + b_length - - while b_begin > 0 and sentence_ids[b_begin - 1] == sentence_ids[b_begin]: - b_begin -= 1 - while (b_end < data_length - 1 and - sentence_ids[b_end - 1] == sentence_ids[b_end]): - b_end += 1 - else: - # Segments A and B are different sentences. - label = 1 - a_end = random.choice(cut_indices) - b_begin = a_end - b_end = end_index - - while a_end - a_begin + b_end - b_begin > total_length: - if a_end - a_begin > b_end - b_begin: - # Delete only the right side for the LM objective. - a_end -= 1 - else: - b_end -= 1 - if a_end >= data_length or b_end >= data_length: - logging.info("[_create_segments]: a_end %d or b_end %d >= data_length %d", - a_end, b_end, data_length) - return None - - a_data = tokens[a_begin: a_end] - b_data = tokens[b_begin: b_end] - return a_data, b_data, label - - -def _is_functional_piece(piece: str) -> bool: - return piece != "" and piece.startswith("<") and piece.endswith(">") - - -def _is_start_piece(piece: str) -> bool: - special_pieces = set(list('!"#$%&\"()*+,-./:;?@[\\]^_`{|}~')) - if (piece.startswith("鈻") or piece in special_pieces): - return True - else: - return False - - -def _get_boundary_indices( - data: np.array, - tokenizer: tokenization.FullSentencePieceTokenizer) -> np.array: - """Gets the boundary indices of whole words.""" - seq_length = len(data) - boundary_indices = [] - for index, piece in enumerate(tokenizer.convert_ids_to_tokens(data.tolist())): - if _is_start_piece(piece) and not _is_functional_piece(piece): - boundary_indices.append(index) - boundary_indices.append(seq_length) - return boundary_indices - - -def _convert_tokens_to_instances( - tokens: np.array, - sentence_ids: np.array, - per_host_batch_size: int, - seq_length: int, - reuse_length: int, - bi_data: bool, - tokenizer: tokenization.FullSentencePieceTokenizer, - num_cores_per_host: int = 0, - logging_frequency: int = 500) -> List[TrainingInstance]: - """Converts tokens and sentence IDs into individual training instances. - - The format of data in the XLNet pretraining task is very similar to the - BERT pretraining task. Two segments A and B are randomly sampled, and the - contatenation of A and B into a single sequence is used to perform - language modeling. - - To create an XLNet Pretraining instance from a single long sequence, S: - - Create a segment of length `reuse_length`. This first segment represents - past tokens. During modeling, this segment is used to cache obtained - content representations for the segment recurrence mechanism. - - Similar to BERT, create a segment of length `seq_length` - `reuse_length` - composed of A and B segments. - For XLNet, the order is "A", "SEP", "B", "SEP", "CLS". - - Args: - tokens: All tokens concatenated into a single list. - sentence_ids: All sentence IDs concatenated into a single list. - per_host_batch_size: The target batch size per host. - seq_length: The max sequence length. - reuse_length: The number of tokens to use from the previous segment. - bi_data: Whether or not to use bidirectional data. - tokenizer: The SentencePiece tokenizer that has the attribute `sp_model`. - num_cores_per_host: The number of cores per host. This is required if - `bi_data` = `True`. - logging_frequency: The frequency at which to log status updates. - - Returns: - A list of `TrainingInstance` objects. - """ - instances = [] - - per_core_batch_size = (per_host_batch_size // num_cores_per_host - if bi_data else None) - - if bi_data: - logging.info("Bi-directional data enabled.") - assert per_host_batch_size % (2 * num_cores_per_host) == 0 - forward_tokens, forward_sentence_ids = _reshape_to_batch_dimensions( - tokens=tokens, - sentence_ids=sentence_ids, - per_host_batch_size=per_host_batch_size // 2) - forward_data_shape = (num_cores_per_host, 1, per_core_batch_size // 2, -1) - - forward_tokens = forward_tokens.reshape(forward_data_shape) - forward_sentence_ids = forward_sentence_ids.reshape(forward_data_shape) - - backwards_tokens = forward_tokens[:, :, :, ::-1] - backwards_sentence_ids = forward_sentence_ids[:, :, :, ::-1] - - tokens = np.concatenate([forward_tokens, backwards_tokens], 1).reshape( - per_host_batch_size, -1) - sentence_ids = np.concatenate( - [forward_sentence_ids, backwards_sentence_ids]).reshape( - per_host_batch_size, -1) - else: - logging.info("Bi-directional data disabled.") - tokens, sentence_ids = _reshape_to_batch_dimensions( - tokens=tokens, - sentence_ids=sentence_ids, - per_host_batch_size=per_host_batch_size) - - logging.info("Tokens shape: %s", tokens.shape) - - data_length = tokens.shape[1] - sep = np.array([special_symbols[""]], dtype=np.int64) - cls = np.array([special_symbols[""]], dtype=np.int64) - # 2 sep, 1 cls - num_special_tokens = 3 - - data_index = 0 - batch_number = 0 - step_size = reuse_length if reuse_length else seq_length - num_batches = math.ceil(data_length / step_size) - - while data_index + seq_length <= data_length: - if batch_number % logging_frequency == 0: - logging.info("Processing batch %d of %d", batch_number, num_batches) - - for batch_index in range(per_host_batch_size): - previous_segment_tokens = tokens[ - batch_index, data_index: data_index + reuse_length] - - results = _create_a_and_b_segments( - tokens=tokens[batch_index], - sentence_ids=sentence_ids[batch_index], - begin_index=data_index + reuse_length, - total_length=seq_length - reuse_length - num_special_tokens) - - if results is None: - logging.info("Stopping at data index: %d", data_index) - break - a_data, b_data, label = results - - data = np.concatenate( - [previous_segment_tokens, a_data, sep, b_data, sep, cls]) - a_length = a_data.shape[0] - b_length = b_data.shape[0] - segment_ids = ([0] * (reuse_length + a_length) + [0] - + [1] * b_length + [1] + [2]) - boundary_indices = _get_boundary_indices(tokenizer=tokenizer, - data=data) - assert len(data) == seq_length - assert len(segment_ids) == seq_length - assert len(boundary_indices) > 0 # pylint: disable=g-explicit-length-test - - instances.append(TrainingInstance( - data=data, - segment_ids=segment_ids, - boundary_indices=boundary_indices, - label=label)) - batch_number += 1 - data_index += step_size - return instances - - -def write_instances_to_tfrecord( - instances: Iterable[TrainingInstance], - save_path: str): - """Writes instances to TFRecord.""" - record_writer = tf.io.TFRecordWriter(save_path) - logging.info("Start writing to %s.", save_path) - - for i, instance in enumerate(instances): - if i < 5: - logging.info("Instance %d: %s", i, str(instance)) - record_writer.write(instance.to_example().SerializeToString()) - - record_writer.close() - logging.info("Done writing %s.", save_path) - - -def shuffle_and_combine_preprocessed_data( - all_data: List[Tuple[np.array, np.array]]) -> Tuple[np.array, np.array]: - """Shuffles and combines preprocessed token/sentence IDs from documents.""" - document_permutation = np.random.permutation(len(all_data)) - - previous_sentence_id = None - - all_tokens, all_sentence_ids = [], [] - for document_index in document_permutation: - tokens, sentence_ids = all_data[document_index] - # pylint: disable=g-explicit-length-test - if len(tokens) == 0: - continue - if (previous_sentence_id is not None and - sentence_ids[0] == previous_sentence_id): - sentence_ids = np.logical_not(sentence_ids) - - all_tokens.append(tokens) - all_sentence_ids.append(sentence_ids) - - previous_sentence_id = sentence_ids[-1] - - return np.concatenate(all_tokens), np.concatenate(all_sentence_ids) - - -def get_tfrecord_name( - per_host_batch_size: int, - num_cores_per_host: int, - seq_length: int, - bi_data: bool, - reuse_length: int, - do_lower_case: bool, - use_eod_token: bool, - prefix: str = "", - suffix: str = "", - pass_id: int = 0, - num_passes: int = 1, - task_id: int = None, - num_tasks: int = None) -> str: - """Formats the resulting TFRecord name based on provided inputs.""" - components = [] - if prefix: - components.append(prefix) - components.append("seqlen-{}".format(seq_length)) - if reuse_length == 0: - components.append("memless") - else: - components.append("reuse-{}".format(reuse_length)) - components.append("bs-{}".format(per_host_batch_size)) - components.append("cores-{}".format(num_cores_per_host)) - - if do_lower_case: - components.append("uncased") - else: - components.append("cased") - if use_eod_token: - components.append("eod") - if bi_data: - components.append("bi") - else: - components.append("uni") - - if suffix: - components.append(suffix) - - s = "_".join(components) + ".tfrecord" - if num_passes == 1 and task_id is None: - return s - - if task_id is None: - num_tasks = 1 - task_id = 0 - - current_shard = task_id * num_passes + pass_id - total_shards = num_tasks * num_passes - return s + "-{}-of-{}".format(current_shard, total_shards) - - -def create_tfrecords( - tokenizer: tokenization.FullSentencePieceTokenizer, - input_file_or_files: str, - use_eod_token: bool, - do_lower_case: bool, - per_host_batch_size: int, - seq_length: int, - reuse_length: int, - bi_data: bool, - num_cores_per_host: int, - save_dir: str, - prefix: str = "", - suffix: str = "", - num_tasks: Optional[int] = None, - task_id: Optional[int] = None, - num_passes: int = 1): - """Runs the end-to-end preprocessing pipeline.""" - - logging.info("Input configuration:") - logging.info("input file(s): %s", input_file_or_files) - logging.info("use_eod_token: %s", use_eod_token) - logging.info("do_lower_case: %s", do_lower_case) - logging.info("per_host_batch_size: %d", per_host_batch_size) - logging.info("seq_length: %d", seq_length) - logging.info("reuse_length: %d", reuse_length) - logging.info("bi_data: %s", bi_data) - logging.info("num_cores_per_host: %d", num_cores_per_host) - logging.info("save_dir: %s", save_dir) - if task_id is not None and num_tasks is not None: - logging.info("task_id: %d", task_id) - logging.info("num_tasks: %d", num_tasks) - - input_files = [] - for input_pattern in input_file_or_files.split(","): - input_files.extend(tf.io.gfile.glob(input_pattern)) - - logging.info("*** Reading from input files ***") - for input_file in input_files: - logging.info(" %s", input_file) - - logging.info("Shuffling the files with a fixed random seed.") - np.random.shuffle(input_files) - if num_tasks is not None: - assert task_id is not None - logging.info("Total number of input files: %d", len(input_files)) - logging.info("Splitting into %d shards of %d files each.", - num_tasks, len(input_files) // num_tasks) - input_files = input_files[task_id::num_tasks] - - all_data = preprocess_and_tokenize_input_files( - input_files=input_files, - tokenizer=tokenizer, - use_eod=use_eod_token, - do_lower_case=do_lower_case) - for pass_id in range(num_passes): - logging.info("Beginning pass %d of %d", pass_id, num_passes) - tokens, sentence_ids = shuffle_and_combine_preprocessed_data(all_data) - - assert len(tokens) == len(sentence_ids) - - filename = get_tfrecord_name( - per_host_batch_size=per_host_batch_size, - num_cores_per_host=num_cores_per_host, - seq_length=seq_length, - bi_data=bi_data, - use_eod_token=use_eod_token, - reuse_length=reuse_length, - do_lower_case=do_lower_case, - prefix=prefix, - suffix=suffix, - pass_id=pass_id, - num_passes=num_passes, - num_tasks=num_tasks, - task_id=task_id) - save_path = os.path.join(save_dir, filename) - if os.path.exists(save_path): - # If the path already exists, then we were probably preempted but - # previously wrote this file. - logging.info("%s already exists, skipping this batch.", save_path) - else: - instances = _convert_tokens_to_instances( - tokenizer=tokenizer, - tokens=tokens, - sentence_ids=sentence_ids, - per_host_batch_size=per_host_batch_size, - seq_length=seq_length, - reuse_length=reuse_length, - bi_data=bi_data, - num_cores_per_host=num_cores_per_host) - write_instances_to_tfrecord(instances=instances, save_path=save_path) - - if task_id is None or task_id == 0: - corpus_info = { - "vocab_size": 32000, - "per_host_batch_size": per_host_batch_size, - "num_cores_per_host": num_cores_per_host, - "seq_length": seq_length, - "reuse_length": reuse_length, - "do_lower_case": do_lower_case, - "bi_data": bi_data, - "use_eod_token": use_eod_token, - } - corpus_fname = os.path.basename(filename) + ".json" - corpus_destination = os.path.join(save_dir, corpus_fname) - logging.info("Saving corpus info to %s", corpus_destination) - - with tf.io.gfile.GFile(corpus_destination, "w") as fp: - json.dump(corpus_info, fp) - - -def main(_): - tokenizer = tokenization.FullSentencePieceTokenizer(FLAGS.sp_model_file) - create_tfrecords( - tokenizer=tokenizer, - input_file_or_files=FLAGS.input_file, - use_eod_token=FLAGS.use_eod_token, - do_lower_case=FLAGS.do_lower_case, - per_host_batch_size=FLAGS.per_host_batch_size, - seq_length=FLAGS.seq_length, - reuse_length=FLAGS.reuse_length, - bi_data=FLAGS.bi_data, - num_cores_per_host=FLAGS.num_cores_per_host, - save_dir=FLAGS.save_dir, - prefix=FLAGS.prefix, - suffix=FLAGS.suffix, - num_tasks=FLAGS.num_tasks, - task_id=FLAGS.task_id, - num_passes=FLAGS.num_passes) - - -if __name__ == "__main__": - np.random.seed(0) - logging.set_verbosity(logging.INFO) - app.run(main) diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/data/create_xlnet_pretraining_data_test.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/data/create_xlnet_pretraining_data_test.py deleted file mode 100644 index 94cf00843489eb6edac8f5133d296ff3cd27a913..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/data/create_xlnet_pretraining_data_test.py +++ /dev/null @@ -1,371 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Tests for official.nlp.data.create_xlnet_pretraining_data.""" -import os -import tempfile -from typing import List - -from absl import logging -from absl.testing import parameterized - -import numpy as np -import tensorflow as tf - -from official.nlp.data import create_xlnet_pretraining_data as cpd - -_VOCAB_WORDS = ["vocab_1", "vocab_2"] - - -# pylint: disable=invalid-name -def _create_files( - temp_dir: str, file_contents: List[List[str]]) -> List[str]: - """Writes arbitrary documents into files.""" - root_dir = tempfile.mkdtemp(dir=temp_dir) - files = [] - - for i, file_content in enumerate(file_contents): - destination = os.path.join(root_dir, "%d.txt" % i) - with open(destination, "wb") as f: - for line in file_content: - f.write(line.encode("utf-8")) - files.append(destination) - return files - - -def _get_mock_tokenizer(): - """Creates a mock tokenizer.""" - - class MockSpieceModel: - """Mock Spiece model for testing.""" - - def __init__(self): - self._special_piece_to_id = { - "": 0, - } - for piece in set(list('!"#$%&\"()*+,-./:;?@[\\]^_`{|}~')): - self._special_piece_to_id[piece] = 1 - - def EncodeAsPieces(self, inputs: str) -> List[str]: - return inputs - - def SampleEncodeAsPieces(self, - inputs: str, - nbest_size: int, - theta: float) -> List[str]: - del nbest_size, theta - return inputs - - def PieceToId(self, piece: str) -> int: - return ord(piece[0]) - - def IdToPiece(self, id_: int) -> str: - return chr(id_) * 3 - - class Tokenizer: - """Mock Tokenizer for testing.""" - - def __init__(self): - self.sp_model = MockSpieceModel() - - def convert_ids_to_tokens(self, ids: List[int]) -> List[str]: - return [self.sp_model.IdToPiece(id_) for id_ in ids] - - return Tokenizer() - - -class PreprocessDataTest(tf.test.TestCase): - - def test_remove_extraneous_space(self): - line = " abc " - output = cpd._preprocess_line(line) - self.assertEqual(output, "abc") - - def test_symbol_replacements(self): - self.assertEqual(cpd._preprocess_line("``abc``"), "\"abc\"") - self.assertEqual(cpd._preprocess_line("''abc''"), "\"abc\"") - - def test_accent_replacements(self): - self.assertEqual(cpd._preprocess_line("氓bc"), "abc") - - def test_lower_case(self): - self.assertEqual(cpd._preprocess_line("ABC", do_lower_case=True), "abc") - - def test_end_to_end(self): - self.assertEqual( - cpd._preprocess_line("HelLo ``w贸rLd``", do_lower_case=True), - "hello \"world\"") - - -class PreprocessAndTokenizeFilesTest(tf.test.TestCase): - - def test_basic_end_to_end(self): - documents = [ - [ - "This is sentence 1.\n", - "This is sentence 2.\n", - "Sentence 3 is what this is.\n", - ], - [ - "This is the second document.\n", - "This is the second line of the second document.\n" - ], - ] - input_files = _create_files(temp_dir=self.get_temp_dir(), - file_contents=documents) - all_data = cpd.preprocess_and_tokenize_input_files( - input_files=input_files, - tokenizer=_get_mock_tokenizer(), - log_example_freq=1) - - self.assertEqual(len(all_data), len(documents)) - for token_ids, sentence_ids in all_data: - self.assertEqual(len(token_ids), len(sentence_ids)) - - def test_basic_correctness(self): - documents = [["a\n", "b\n", "c\n"]] - input_files = _create_files(temp_dir=self.get_temp_dir(), - file_contents=documents) - all_data = cpd.preprocess_and_tokenize_input_files( - input_files=input_files, - tokenizer=_get_mock_tokenizer(), - log_example_freq=1) - - token_ids, sentence_ids = all_data[0] - - self.assertAllClose(token_ids, [97, 98, 99]) - self.assertAllClose(sentence_ids, [True, False, True]) - - def test_correctness_with_spaces_and_accents(self): - documents = [[ - " 氓 \n", - "b \n", - " c \n", - ]] - input_files = _create_files(temp_dir=self.get_temp_dir(), - file_contents=documents) - all_data = cpd.preprocess_and_tokenize_input_files( - input_files=input_files, - tokenizer=_get_mock_tokenizer(), - log_example_freq=1) - - token_ids, sentence_ids = all_data[0] - - self.assertAllClose(token_ids, [97, 98, 99]) - self.assertAllClose(sentence_ids, [True, False, True]) - - -class BatchReshapeTests(tf.test.TestCase): - - def test_basic_functionality(self): - per_host_batch_size = 3 - mock_shape = (20,) - - # Should truncate and reshape. - expected_result_shape = (3, 6) - - tokens = np.zeros(mock_shape) - sentence_ids = np.zeros(mock_shape) - - reshaped_data = cpd._reshape_to_batch_dimensions( - tokens=tokens, - sentence_ids=sentence_ids, - per_host_batch_size=per_host_batch_size) - for values in reshaped_data: - self.assertEqual(len(values.flatten()) % per_host_batch_size, 0) - self.assertAllClose(values.shape, expected_result_shape) - - -class CreateSegmentsTest(tf.test.TestCase): - - def test_basic_functionality(self): - data_length = 10 - tokens = np.arange(data_length) - sentence_ids = np.concatenate([np.zeros(data_length // 2), - np.ones(data_length // 2)]) - begin_index = 0 - total_length = 8 - a_data, b_data, label = cpd._create_a_and_b_segments( - tokens=tokens, - sentence_ids=sentence_ids, - begin_index=begin_index, - total_length=total_length, - no_cut_probability=0.) - self.assertAllClose(a_data, [0, 1, 2, 3]) - self.assertAllClose(b_data, [5, 6, 7, 8]) - self.assertEqual(label, 1) - - def test_no_cut(self): - data_length = 10 - tokens = np.arange(data_length) - sentence_ids = np.zeros(data_length) - - begin_index = 0 - total_length = 8 - a_data, b_data, label = cpd._create_a_and_b_segments( - tokens=tokens, - sentence_ids=sentence_ids, - begin_index=begin_index, - total_length=total_length, - no_cut_probability=0.) - self.assertGreater(len(a_data), 0) - self.assertGreater(len(b_data), 0) - self.assertEqual(label, 0) - - def test_no_cut_with_probability(self): - data_length = 10 - tokens = np.arange(data_length) - sentence_ids = np.concatenate([np.zeros(data_length // 2), - np.ones(data_length // 2)]) - begin_index = 0 - total_length = 8 - a_data, b_data, label = cpd._create_a_and_b_segments( - tokens=tokens, - sentence_ids=sentence_ids, - begin_index=begin_index, - total_length=total_length, - no_cut_probability=1.) - self.assertGreater(len(a_data), 0) - self.assertGreater(len(b_data), 0) - self.assertEqual(label, 0) - - -class CreateInstancesTest(tf.test.TestCase): - """Tests conversions of Token/Sentence IDs to training instances.""" - - def test_basic(self): - data_length = 12 - tokens = np.arange(data_length) - sentence_ids = np.zeros(data_length) - seq_length = 8 - instances = cpd._convert_tokens_to_instances( - tokens=tokens, - sentence_ids=sentence_ids, - per_host_batch_size=2, - seq_length=seq_length, - reuse_length=4, - tokenizer=_get_mock_tokenizer(), - bi_data=False, - num_cores_per_host=1, - logging_frequency=1) - for instance in instances: - self.assertEqual(len(instance.data), seq_length) - self.assertEqual(len(instance.segment_ids), seq_length) - self.assertIsInstance(instance.label, int) - self.assertIsInstance(instance.boundary_indices, list) - - -class TFRecordPathTests(tf.test.TestCase): - - def test_basic(self): - base_kwargs = dict( - per_host_batch_size=1, - num_cores_per_host=1, - seq_length=2, - reuse_length=1) - - config1 = dict( - prefix="test", - suffix="", - bi_data=True, - use_eod_token=False, - do_lower_case=True) - config1.update(base_kwargs) - expectation1 = "test_seqlen-2_reuse-1_bs-1_cores-1_uncased_bi.tfrecord" - self.assertEqual(cpd.get_tfrecord_name(**config1), expectation1) - - config2 = dict( - prefix="", - suffix="test", - bi_data=False, - use_eod_token=False, - do_lower_case=False) - config2.update(base_kwargs) - expectation2 = "seqlen-2_reuse-1_bs-1_cores-1_cased_uni_test.tfrecord" - self.assertEqual(cpd.get_tfrecord_name(**config2), expectation2) - - config3 = dict( - prefix="", - suffix="", - use_eod_token=True, - bi_data=False, - do_lower_case=True) - config3.update(base_kwargs) - expectation3 = "seqlen-2_reuse-1_bs-1_cores-1_uncased_eod_uni.tfrecord" - self.assertEqual(cpd.get_tfrecord_name(**config3), expectation3) - - -class TestCreateTFRecords(parameterized.TestCase, tf.test.TestCase): - - @parameterized.named_parameters( - ("bi_data_only", True, False, False), - ("eod_token_only", False, True, True), - ("lower_case_only", False, False, True), - ("all_enabled", True, True, True), - ) - def test_end_to_end(self, - bi_data: bool, - use_eod_token: bool, - do_lower_case: bool): - tokenizer = _get_mock_tokenizer() - - num_documents = 5 - sentences_per_document = 10 - document_length = 50 - - documents = [ - ["a " * document_length for _ in range(sentences_per_document)] - for _ in range(num_documents)] - - save_dir = tempfile.mkdtemp(dir=self.get_temp_dir()) - files = _create_files(temp_dir=self.get_temp_dir(), file_contents=documents) - - cpd.create_tfrecords( - tokenizer=tokenizer, - input_file_or_files=",".join(files), - use_eod_token=use_eod_token, - do_lower_case=do_lower_case, - per_host_batch_size=8, - seq_length=8, - reuse_length=4, - bi_data=bi_data, - num_cores_per_host=2, - save_dir=save_dir) - - self.assertTrue(any(filter(lambda x: x.endswith(".json"), - os.listdir(save_dir)))) - self.assertTrue(any(filter(lambda x: x.endswith(".tfrecord"), - os.listdir(save_dir)))) - - -if __name__ == "__main__": - np.random.seed(0) - logging.set_verbosity(logging.INFO) - tf.test.main() diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/data/data_loader.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/data/data_loader.py deleted file mode 100644 index 03657e83acff22f75c110832ff1d9da1cb344ed7..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/data/data_loader.py +++ /dev/null @@ -1,64 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""An abstraction that NLP models define input pipelines.""" - -import abc -from typing import Optional - -import tensorflow as tf - - -class DataLoader(metaclass=abc.ABCMeta): - """An abstract class defining the APIs for tf.data input pipeline.""" - - @abc.abstractmethod - def load( - self, - input_context: Optional[tf.distribute.InputContext] = None - ) -> tf.data.Dataset: - """Implements DataLoader load method. - - Builds the entire input pipeline inside the load method. Users can define - states inside the DataLoader class and returns a tf.data dataset - object. - - Args: - input_context: This is a context class that is passed to the user's input - function and contains information about the compute replicas and input - pipelines. This object is used for multi-host inputs and passed by the - distribution strategy. - - Returns: - A per-host tf.data dataset. Note that, we usually create the distributed - dataset through the load method, so we should not directly return a - distributed dataset here. - """ - pass diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/data/data_loader_factory.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/data/data_loader_factory.py deleted file mode 100644 index d9506145efb3c73f979d179f8d3c311b92790434..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/data/data_loader_factory.py +++ /dev/null @@ -1,74 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""A global factory to access NLP registered data loaders.""" - -from official.core import registry - -_REGISTERED_DATA_LOADER_CLS = {} - - -def register_data_loader_cls(data_config_cls): - """Decorates a factory of DataLoader for lookup by a subclass of DataConfig. - - This decorator supports registration of data loaders as follows: - - ``` - @dataclasses.dataclass - class MyDataConfig(DataConfig): - # Add fields here. - pass - - @register_data_loader_cls(MyDataConfig) - class MyDataLoader: - # Inherits def __init__(self, data_config). - pass - - my_data_config = MyDataConfig() - - # Returns MyDataLoader(my_data_config). - my_loader = get_data_loader(my_data_config) - ``` - - Args: - data_config_cls: a subclass of DataConfig (*not* an instance - of DataConfig). - - Returns: - A callable for use as class decorator that registers the decorated class - for creation from an instance of data_config_cls. - """ - return registry.register(_REGISTERED_DATA_LOADER_CLS, data_config_cls) - - -def get_data_loader(data_config): - """Creates a data_loader from data_config.""" - return registry.lookup(_REGISTERED_DATA_LOADER_CLS, data_config.__class__)( - data_config) diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/data/data_loader_factory_test.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/data/data_loader_factory_test.py deleted file mode 100644 index 707f6107e5f9f193685f110d4450088f1bc6194d..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/data/data_loader_factory_test.py +++ /dev/null @@ -1,61 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Tests for official.nlp.data.data_loader_factory.""" - -import dataclasses -import tensorflow as tf - -from official.core import config_definitions as cfg -from official.nlp.data import data_loader_factory - - -@dataclasses.dataclass -class MyDataConfig(cfg.DataConfig): - is_training: bool = True - - -@data_loader_factory.register_data_loader_cls(MyDataConfig) -class MyDataLoader: - - def __init__(self, params): - self.params = params - - -class DataLoaderFactoryTest(tf.test.TestCase): - - def test_register_and_load(self): - train_config = MyDataConfig() - train_loader = data_loader_factory.get_data_loader(train_config) - self.assertTrue(train_loader.params.is_training) - - -if __name__ == "__main__": - tf.test.main() diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/data/pretrain_dataloader.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/data/pretrain_dataloader.py deleted file mode 100644 index efe864a083189ea75221bbb0073cef52f2c96c18..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/data/pretrain_dataloader.py +++ /dev/null @@ -1,620 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Loads dataset for the BERT pretraining task.""" -from typing import Mapping, Optional - -from absl import logging - -import dataclasses -import numpy as np -import tensorflow as tf -from official.core import config_definitions as cfg -from official.core import input_reader -from official.nlp.data import data_loader -from official.nlp.data import data_loader_factory - - -@dataclasses.dataclass -class BertPretrainDataConfig(cfg.DataConfig): - """Data config for BERT pretraining task (tasks/masked_lm).""" - input_path: str = '' - global_batch_size: int = 512 - is_training: bool = True - seq_length: int = 512 - max_predictions_per_seq: int = 76 - use_next_sentence_label: bool = True - use_position_id: bool = False - # Historically, BERT implementations take `input_ids` and `segment_ids` as - # feature names. Inside the TF Model Garden implementation, the Keras model - # inputs are set as `input_word_ids` and `input_type_ids`. When - # v2_feature_names is True, the data loader assumes the tf.Examples use - # `input_word_ids` and `input_type_ids` as keys. - use_v2_feature_names: bool = False - - -@data_loader_factory.register_data_loader_cls(BertPretrainDataConfig) -class BertPretrainDataLoader(data_loader.DataLoader): - """A class to load dataset for bert pretraining task.""" - - def __init__(self, params): - """Inits `BertPretrainDataLoader` class. - - Args: - params: A `BertPretrainDataConfig` object. - """ - self._params = params - self._seq_length = params.seq_length - self._max_predictions_per_seq = params.max_predictions_per_seq - self._use_next_sentence_label = params.use_next_sentence_label - self._use_position_id = params.use_position_id - - def _name_to_features(self): - name_to_features = { - 'input_mask': - tf.io.FixedLenFeature([self._seq_length], tf.int64), - 'masked_lm_positions': - tf.io.FixedLenFeature([self._max_predictions_per_seq], tf.int64), - 'masked_lm_ids': - tf.io.FixedLenFeature([self._max_predictions_per_seq], tf.int64), - 'masked_lm_weights': - tf.io.FixedLenFeature([self._max_predictions_per_seq], tf.float32), - } - if self._params.use_v2_feature_names: - name_to_features.update({ - 'input_word_ids': tf.io.FixedLenFeature([self._seq_length], tf.int64), - 'input_type_ids': tf.io.FixedLenFeature([self._seq_length], tf.int64), - }) - else: - name_to_features.update({ - 'input_ids': tf.io.FixedLenFeature([self._seq_length], tf.int64), - 'segment_ids': tf.io.FixedLenFeature([self._seq_length], tf.int64), - }) - if self._use_next_sentence_label: - name_to_features['next_sentence_labels'] = tf.io.FixedLenFeature([1], - tf.int64) - if self._use_position_id: - name_to_features['position_ids'] = tf.io.FixedLenFeature( - [self._seq_length], tf.int64) - return name_to_features - - def _decode(self, record: tf.Tensor): - """Decodes a serialized tf.Example.""" - name_to_features = self._name_to_features() - example = tf.io.parse_single_example(record, name_to_features) - - # tf.Example only supports tf.int64, but the TPU only supports tf.int32. - # So cast all int64 to int32. - for name in list(example.keys()): - t = example[name] - if t.dtype == tf.int64: - t = tf.cast(t, tf.int32) - example[name] = t - - return example - - def _parse(self, record: Mapping[str, tf.Tensor]): - """Parses raw tensors into a dict of tensors to be consumed by the model.""" - x = { - 'input_mask': record['input_mask'], - 'masked_lm_positions': record['masked_lm_positions'], - 'masked_lm_ids': record['masked_lm_ids'], - 'masked_lm_weights': record['masked_lm_weights'], - } - if self._params.use_v2_feature_names: - x['input_word_ids'] = record['input_word_ids'] - x['input_type_ids'] = record['input_type_ids'] - else: - x['input_word_ids'] = record['input_ids'] - x['input_type_ids'] = record['segment_ids'] - if self._use_next_sentence_label: - x['next_sentence_labels'] = record['next_sentence_labels'] - if self._use_position_id: - x['position_ids'] = record['position_ids'] - - return x - - def load(self, input_context: Optional[tf.distribute.InputContext] = None): - """Returns a tf.dataset.Dataset.""" - reader = input_reader.InputReader( - params=self._params, decoder_fn=self._decode, parser_fn=self._parse) - return reader.read(input_context) - - -@dataclasses.dataclass -class XLNetPretrainDataConfig(cfg.DataConfig): - """Data config for XLNet pretraining task. - - Attributes: - input_path: See base class. - global_batch_size: See base calss. - is_training: See base class. - seq_length: The length of each sequence. - max_predictions_per_seq: The number of predictions per sequence. - reuse_length: The number of tokens in a previous segment to reuse. This - should be the same value used during pretrain data creation. - sample_strategy: The strategy used to sample factorization permutations. - Possible values: 'single_token', 'whole_word', 'token_span', 'word_span'. - min_num_tokens: The minimum number of tokens to sample in a span. - This is used when `sample_strategy` is 'token_span'. - max_num_tokens: The maximum number of tokens to sample in a span. - This is used when `sample_strategy` is 'token_span'. - min_num_words: The minimum number of words to sample in a span. - This is used when `sample_strategy` is 'word_span'. - max_num_words: The maximum number of words to sample in a span. - This is used when `sample_strategy` is 'word_span'. - permutation_size: The length of the longest permutation. This can be set - to `reuse_length`. This should NOT be greater than `reuse_length`, - otherwise this may introduce data leaks. - leak_ratio: The percentage of masked tokens that are leaked. - segment_sep_id: The ID of the SEP token used when preprocessing - the dataset. - segment_cls_id: The ID of the CLS token used when preprocessing - the dataset. - - """ - input_path: str = '' - global_batch_size: int = 512 - is_training: bool = True - seq_length: int = 512 - max_predictions_per_seq: int = 76 - reuse_length: int = 256 - sample_strategy: str = 'word_span' - min_num_tokens: int = 1 - max_num_tokens: int = 5 - min_num_words: int = 1 - max_num_words: int = 5 - permutation_size: int = 256 - leak_ratio: float = 0.1 - segment_sep_id: int = 4 - segment_cls_id: int = 3 - - -@data_loader_factory.register_data_loader_cls(XLNetPretrainDataConfig) -class XLNetPretrainDataLoader(data_loader.DataLoader): - """A class to load dataset for xlnet pretraining task.""" - - def __init__(self, params: XLNetPretrainDataConfig): - """Inits `XLNetPretrainDataLoader` class. - - Args: - params: A `XLNetPretrainDataConfig` object. - """ - self._params = params - self._seq_length = params.seq_length - self._max_predictions_per_seq = params.max_predictions_per_seq - self._reuse_length = params.reuse_length - self._num_replicas_in_sync = None - self._permutation_size = params.permutation_size - self._sep_id = params.segment_sep_id - self._cls_id = params.segment_cls_id - self._sample_strategy = params.sample_strategy - self._leak_ratio = params.leak_ratio - - def _decode(self, record: tf.Tensor): - """Decodes a serialized tf.Example.""" - name_to_features = { - 'input_word_ids': - tf.io.FixedLenFeature([self._seq_length], tf.int64), - 'input_type_ids': - tf.io.FixedLenFeature([self._seq_length], tf.int64), - 'boundary_indices': - tf.io.VarLenFeature(tf.int64), - } - example = tf.io.parse_single_example(record, name_to_features) - - # tf.Example only supports tf.int64, but the TPU only supports tf.int32. - # So cast all int64 to int32. - for name in list(example.keys()): - t = example[name] - if t.dtype == tf.int64: - t = tf.cast(t, tf.int32) - example[name] = t - - return example - - def _parse(self, record: Mapping[str, tf.Tensor]): - """Parses raw tensors into a dict of tensors to be consumed by the model.""" - x = {} - - inputs = record['input_word_ids'] - x['input_type_ids'] = record['input_type_ids'] - - if self._sample_strategy in ['whole_word', 'word_span']: - boundary = tf.sparse.to_dense(record['boundary_indices']) - else: - boundary = None - - input_mask = self._online_sample_mask(inputs=inputs, boundary=boundary) - - if self._reuse_length > 0: - if self._permutation_size > self._reuse_length: - logging.warning( - '`permutation_size` is greater than `reuse_length` (%d > %d).' - 'This may introduce data leakage.', - self._permutation_size, self._reuse_length) - - # Enable the memory mechanism. - # Permute the reuse and non-reuse segments separately. - non_reuse_len = self._seq_length - self._reuse_length - if not (self._reuse_length % self._permutation_size == 0 - and non_reuse_len % self._permutation_size == 0): - raise ValueError('`reuse_length` and `seq_length` should both be ' - 'a multiple of `permutation_size`.') - - # Creates permutation mask and target mask for the first reuse_len tokens. - # The tokens in this part are reused from the last sequence. - perm_mask_0, target_mask_0, tokens_0, masked_0 = self._get_factorization( - inputs=inputs[:self._reuse_length], - input_mask=input_mask[:self._reuse_length]) - - # Creates permutation mask and target mask for the rest of tokens in - # current example, which are concatentation of two new segments. - perm_mask_1, target_mask_1, tokens_1, masked_1 = self._get_factorization( - inputs[self._reuse_length:], input_mask[self._reuse_length:]) - - perm_mask_0 = tf.concat( - [perm_mask_0, - tf.zeros([self._reuse_length, non_reuse_len], dtype=tf.int32)], - axis=1) - perm_mask_1 = tf.concat( - [tf.ones([non_reuse_len, self._reuse_length], dtype=tf.int32), - perm_mask_1], axis=1) - perm_mask = tf.concat([perm_mask_0, perm_mask_1], axis=0) - target_mask = tf.concat([target_mask_0, target_mask_1], axis=0) - tokens = tf.concat([tokens_0, tokens_1], axis=0) - masked_tokens = tf.concat([masked_0, masked_1], axis=0) - else: - # Disable the memory mechanism. - if self._seq_length % self._permutation_size != 0: - raise ValueError('`seq_length` should be a multiple of ' - '`permutation_size`.') - # Permute the entire sequence together - perm_mask, target_mask, tokens, masked_tokens = self._get_factorization( - inputs=inputs, input_mask=input_mask) - x['permutation_mask'] = tf.reshape( - perm_mask, [self._seq_length, self._seq_length]) - x['input_word_ids'] = tokens - x['masked_tokens'] = masked_tokens - - target = tokens - if self._max_predictions_per_seq is not None: - indices = tf.range(self._seq_length, dtype=tf.int32) - bool_target_mask = tf.cast(target_mask, tf.bool) - indices = tf.boolean_mask(indices, bool_target_mask) - - # account for extra padding due to CLS/SEP. - actual_num_predict = tf.shape(indices)[0] - pad_len = self._max_predictions_per_seq - actual_num_predict - - target_mapping = tf.one_hot(indices, self._seq_length, dtype=tf.int32) - paddings = tf.zeros([pad_len, self._seq_length], - dtype=target_mapping.dtype) - target_mapping = tf.concat([target_mapping, paddings], axis=0) - x['target_mapping'] = tf.reshape( - target_mapping, [self._max_predictions_per_seq, self._seq_length]) - - target = tf.boolean_mask(target, bool_target_mask) - paddings = tf.zeros([pad_len], dtype=target.dtype) - target = tf.concat([target, paddings], axis=0) - x['target'] = tf.reshape(target, [self._max_predictions_per_seq]) - - target_mask = tf.concat([ - tf.ones([actual_num_predict], dtype=tf.int32), - tf.zeros([pad_len], dtype=tf.int32) - ], axis=0) - x['target_mask'] = tf.reshape(target_mask, - [self._max_predictions_per_seq]) - else: - x['target'] = tf.reshape(target, [self._seq_length]) - x['target_mask'] = tf.reshape(target_mask, [self._seq_length]) - return x - - def _index_pair_to_mask(self, - begin_indices: tf.Tensor, - end_indices: tf.Tensor, - inputs: tf.Tensor) -> tf.Tensor: - """Converts beginning and end indices into an actual mask.""" - non_func_mask = tf.logical_and( - tf.not_equal(inputs, self._sep_id), tf.not_equal(inputs, self._cls_id)) - all_indices = tf.where( - non_func_mask, - tf.range(self._seq_length, dtype=tf.int32), - tf.constant(-1, shape=[self._seq_length], dtype=tf.int32)) - candidate_matrix = tf.cast( - tf.logical_and(all_indices[None, :] >= begin_indices[:, None], - all_indices[None, :] < end_indices[:, None]), tf.float32) - cumsum_matrix = tf.reshape( - tf.cumsum(tf.reshape(candidate_matrix, [-1])), [-1, self._seq_length]) - masked_matrix = tf.cast(cumsum_matrix <= self._max_predictions_per_seq, - tf.float32) - target_mask = tf.reduce_sum(candidate_matrix * masked_matrix, axis=0) - return tf.cast(target_mask, tf.bool) - - def _single_token_mask(self, inputs: tf.Tensor) -> tf.Tensor: - """Samples individual tokens as prediction targets.""" - all_indices = tf.range(self._seq_length, dtype=tf.int32) - non_func_mask = tf.logical_and( - tf.not_equal(inputs, self._sep_id), tf.not_equal(inputs, self._cls_id)) - non_func_indices = tf.boolean_mask(all_indices, non_func_mask) - - masked_pos = tf.random.shuffle(non_func_indices) - masked_pos = tf.sort(masked_pos[:self._max_predictions_per_seq]) - - sparse_indices = tf.stack( - [tf.zeros_like(masked_pos), masked_pos], axis=-1) - sparse_indices = tf.cast(sparse_indices, tf.int64) - - sparse_indices = tf.sparse.SparseTensor( - sparse_indices, - values=tf.ones_like(masked_pos), - dense_shape=(1, self._seq_length)) - - target_mask = tf.sparse.to_dense( - sp_input=sparse_indices, - default_value=0) - - return tf.squeeze(tf.cast(target_mask, tf.bool)) - - def _whole_word_mask(self, - inputs: tf.Tensor, - boundary: tf.Tensor) -> tf.Tensor: - """Samples whole words as prediction targets.""" - pair_indices = tf.concat([boundary[:-1, None], boundary[1:, None]], axis=1) - cand_pair_indices = tf.random.shuffle( - pair_indices)[:self._max_predictions_per_seq] - begin_indices = cand_pair_indices[:, 0] - end_indices = cand_pair_indices[:, 1] - - return self._index_pair_to_mask( - begin_indices=begin_indices, - end_indices=end_indices, - inputs=inputs) - - def _token_span_mask(self, inputs: tf.Tensor) -> tf.Tensor: - """Samples token spans as prediction targets.""" - min_num_tokens = self._params.min_num_tokens - max_num_tokens = self._params.max_num_tokens - - mask_alpha = self._seq_length / self._max_predictions_per_seq - round_to_int = lambda x: tf.cast(tf.round(x), tf.int32) - - # Sample span lengths from a zipf distribution - span_len_seq = np.arange(min_num_tokens, max_num_tokens + 1) - probs = np.array([1.0 / (i + 1) for i in span_len_seq]) - - probs /= np.sum(probs) - logits = tf.constant(np.log(probs), dtype=tf.float32) - span_lens = tf.random.categorical( - logits=logits[None], - num_samples=self._max_predictions_per_seq, - dtype=tf.int32, - )[0] + min_num_tokens - - # Sample the ratio [0.0, 1.0) of left context lengths - span_lens_float = tf.cast(span_lens, tf.float32) - left_ratio = tf.random.uniform( - shape=[self._max_predictions_per_seq], minval=0.0, maxval=1.0) - left_ctx_len = left_ratio * span_lens_float * (mask_alpha - 1) - left_ctx_len = round_to_int(left_ctx_len) - - # Compute the offset from left start to the right end - right_offset = round_to_int(span_lens_float * mask_alpha) - left_ctx_len - - # Get the actual begin and end indices - begin_indices = ( - tf.cumsum(left_ctx_len) + tf.cumsum(right_offset, exclusive=True)) - end_indices = begin_indices + span_lens - - # Remove out of range indices - valid_idx_mask = end_indices < self._seq_length - begin_indices = tf.boolean_mask(begin_indices, valid_idx_mask) - end_indices = tf.boolean_mask(end_indices, valid_idx_mask) - - # Shuffle valid indices - num_valid = tf.cast(tf.shape(begin_indices)[0], tf.int32) - order = tf.random.shuffle(tf.range(num_valid, dtype=tf.int32)) - begin_indices = tf.gather(begin_indices, order) - end_indices = tf.gather(end_indices, order) - - return self._index_pair_to_mask( - begin_indices=begin_indices, - end_indices=end_indices, - inputs=inputs) - - def _word_span_mask(self, - inputs: tf.Tensor, - boundary: tf.Tensor): - """Sample whole word spans as prediction targets.""" - min_num_words = self._params.min_num_words - max_num_words = self._params.max_num_words - - # Note: 1.2 is the token-to-word ratio - mask_alpha = self._seq_length / self._max_predictions_per_seq / 1.2 - round_to_int = lambda x: tf.cast(tf.round(x), tf.int32) - - # Sample span lengths from a zipf distribution - span_len_seq = np.arange(min_num_words, max_num_words + 1) - probs = np.array([1.0 / (i + 1) for i in span_len_seq]) - probs /= np.sum(probs) - logits = tf.constant(np.log(probs), dtype=tf.float32) - - # Sample `num_predict` words here: note that this is over sampling - span_lens = tf.random.categorical( - logits=logits[None], - num_samples=self._max_predictions_per_seq, - dtype=tf.int32, - )[0] + min_num_words - - # Sample the ratio [0.0, 1.0) of left context lengths - span_lens_float = tf.cast(span_lens, tf.float32) - left_ratio = tf.random.uniform( - shape=[self._max_predictions_per_seq], minval=0.0, maxval=1.0) - left_ctx_len = left_ratio * span_lens_float * (mask_alpha - 1) - - left_ctx_len = round_to_int(left_ctx_len) - right_offset = round_to_int(span_lens_float * mask_alpha) - left_ctx_len - - begin_indices = ( - tf.cumsum(left_ctx_len) + tf.cumsum(right_offset, exclusive=True)) - end_indices = begin_indices + span_lens - - # Remove out of range indices - max_boundary_index = tf.cast(tf.shape(boundary)[0] - 1, tf.int32) - valid_idx_mask = end_indices < max_boundary_index - begin_indices = tf.boolean_mask(begin_indices, valid_idx_mask) - end_indices = tf.boolean_mask(end_indices, valid_idx_mask) - - begin_indices = tf.gather(boundary, begin_indices) - end_indices = tf.gather(boundary, end_indices) - - # Shuffle valid indices - num_valid = tf.cast(tf.shape(begin_indices)[0], tf.int32) - order = tf.random.shuffle(tf.range(num_valid, dtype=tf.int32)) - begin_indices = tf.gather(begin_indices, order) - end_indices = tf.gather(end_indices, order) - - return self._index_pair_to_mask( - begin_indices=begin_indices, - end_indices=end_indices, - inputs=inputs) - - def _online_sample_mask(self, - inputs: tf.Tensor, - boundary: tf.Tensor) -> tf.Tensor: - """Samples target positions for predictions. - - Descriptions of each strategy: - - 'single_token': Samples individual tokens as prediction targets. - - 'token_span': Samples spans of tokens as prediction targets. - - 'whole_word': Samples individual words as prediction targets. - - 'word_span': Samples spans of words as prediction targets. - - Args: - inputs: The input tokens. - boundary: The `int` Tensor of indices indicating whole word boundaries. - This is used in 'whole_word' and 'word_span' - - Returns: - The sampled `bool` input mask. - - Raises: - `ValueError`: if `max_predictions_per_seq` is not set or if boundary is - not provided for 'whole_word' and 'word_span' sample strategies. - """ - if self._max_predictions_per_seq is None: - raise ValueError('`max_predictions_per_seq` must be set.') - - if boundary is None and 'word' in self._sample_strategy: - raise ValueError('`boundary` must be provided for {} strategy'.format( - self._sample_strategy)) - - if self._sample_strategy == 'single_token': - return self._single_token_mask(inputs) - elif self._sample_strategy == 'token_span': - return self._token_span_mask(inputs) - elif self._sample_strategy == 'whole_word': - return self._whole_word_mask(inputs, boundary) - elif self._sample_strategy == 'word_span': - return self._word_span_mask(inputs, boundary) - else: - raise NotImplementedError('Invalid sample strategy.') - - def _get_factorization(self, - inputs: tf.Tensor, - input_mask: tf.Tensor): - """Samples a permutation of the factorization order. - - Args: - inputs: the input tokens. - input_mask: the `bool` Tensor of the same shape as `inputs`. - If `True`, then this means select for partial prediction. - - Returns: - perm_mask: An `int32` Tensor of shape [seq_length, seq_length] consisting - of 0s and 1s. If perm_mask[i][j] == 0, then this means that the i-th - token (in original order) cannot attend to the jth attention token. - target_mask: An `int32` Tensor of shape [seq_len] consisting of 0s and 1s. - If target_mask[i] == 1, then the i-th token needs to be predicted and - the mask will be used as input. This token will be included in the loss. - If target_mask[i] == 0, then the token (or [SEP], [CLS]) will be used as - input. This token will not be included in the loss. - tokens: int32 Tensor of shape [seq_length]. - masked_tokens: int32 Tensor of shape [seq_length]. - - """ - factorization_length = tf.shape(inputs)[0] - # Generate permutation indices - index = tf.range(factorization_length, dtype=tf.int32) - index = tf.transpose(tf.reshape(index, [-1, self._permutation_size])) - index = tf.random.shuffle(index) - index = tf.reshape(tf.transpose(index), [-1]) - - input_mask = tf.cast(input_mask, tf.bool) - - # non-functional tokens - non_func_tokens = tf.logical_not( - tf.logical_or( - tf.equal(inputs, self._sep_id), tf.equal(inputs, self._cls_id))) - masked_tokens = tf.logical_and(input_mask, non_func_tokens) - non_masked_or_func_tokens = tf.logical_not(masked_tokens) - - smallest_index = -2 * tf.ones([factorization_length], dtype=tf.int32) - - # Similar to BERT, randomly leak some masked tokens - if self._leak_ratio > 0: - leak_tokens = tf.logical_and( - masked_tokens, - tf.random.uniform([factorization_length], - maxval=1.0) < self._leak_ratio) - can_attend_self = tf.logical_or(non_masked_or_func_tokens, leak_tokens) - else: - can_attend_self = non_masked_or_func_tokens - to_index = tf.where(can_attend_self, smallest_index, index) - from_index = tf.where(can_attend_self, to_index + 1, to_index) - - # For masked tokens, can attend if i > j - # For context tokens, always can attend each other - can_attend = from_index[:, None] > to_index[None, :] - - perm_mask = tf.cast(can_attend, tf.int32) - - # Only masked tokens are included in the loss - target_mask = tf.cast(masked_tokens, tf.int32) - - return perm_mask, target_mask, inputs, masked_tokens - - def load(self, input_context: Optional[tf.distribute.InputContext] = None): - """Returns a tf.dataset.Dataset.""" - if input_context: - self._num_replicas_in_sync = input_context.num_replicas_in_sync - reader = input_reader.InputReader( - params=self._params, decoder_fn=self._decode, parser_fn=self._parse) - return reader.read(input_context) diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/data/pretrain_dataloader_test.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/data/pretrain_dataloader_test.py deleted file mode 100644 index 826cce8845f2af4d8b48e19d384ae99f52a69d0a..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/data/pretrain_dataloader_test.py +++ /dev/null @@ -1,258 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Tests for official.nlp.data.pretrain_dataloader.""" -import itertools -import os - -from absl.testing import parameterized -import numpy as np -import tensorflow as tf - -from official.nlp.data import pretrain_dataloader - - -def create_int_feature(values): - f = tf.train.Feature(int64_list=tf.train.Int64List(value=list(values))) - return f - - -def _create_fake_bert_dataset( - output_path, - seq_length, - max_predictions_per_seq, - use_position_id, - use_next_sentence_label, - use_v2_feature_names=False): - """Creates a fake dataset.""" - writer = tf.io.TFRecordWriter(output_path) - - def create_float_feature(values): - f = tf.train.Feature(float_list=tf.train.FloatList(value=list(values))) - return f - - for _ in range(100): - features = {} - input_ids = np.random.randint(100, size=(seq_length)) - features["input_mask"] = create_int_feature(np.ones_like(input_ids)) - if use_v2_feature_names: - features["input_word_ids"] = create_int_feature(input_ids) - features["input_type_ids"] = create_int_feature(np.ones_like(input_ids)) - else: - features["input_ids"] = create_int_feature(input_ids) - features["segment_ids"] = create_int_feature(np.ones_like(input_ids)) - - features["masked_lm_positions"] = create_int_feature( - np.random.randint(100, size=(max_predictions_per_seq))) - features["masked_lm_ids"] = create_int_feature( - np.random.randint(100, size=(max_predictions_per_seq))) - features["masked_lm_weights"] = create_float_feature( - [1.0] * max_predictions_per_seq) - - if use_next_sentence_label: - features["next_sentence_labels"] = create_int_feature([1]) - - if use_position_id: - features["position_ids"] = create_int_feature(range(0, seq_length)) - - tf_example = tf.train.Example(features=tf.train.Features(feature=features)) - writer.write(tf_example.SerializeToString()) - writer.close() - - -def _create_fake_xlnet_dataset( - output_path, seq_length, max_predictions_per_seq): - """Creates a fake dataset.""" - writer = tf.io.TFRecordWriter(output_path) - for _ in range(100): - features = {} - input_ids = np.random.randint(100, size=(seq_length)) - num_boundary_indices = np.random.randint(1, seq_length) - - if max_predictions_per_seq is not None: - input_mask = np.zeros_like(input_ids) - input_mask[:max_predictions_per_seq] = 1 - np.random.shuffle(input_mask) - else: - input_mask = np.ones_like(input_ids) - - features["input_mask"] = create_int_feature(input_mask) - features["input_word_ids"] = create_int_feature(input_ids) - features["input_type_ids"] = create_int_feature(np.ones_like(input_ids)) - features["boundary_indices"] = create_int_feature( - sorted(np.random.randint(seq_length, size=(num_boundary_indices)))) - features["target"] = create_int_feature(input_ids + 1) - features["label"] = create_int_feature([1]) - tf_example = tf.train.Example(features=tf.train.Features(feature=features)) - writer.write(tf_example.SerializeToString()) - writer.close() - - -class BertPretrainDataTest(tf.test.TestCase, parameterized.TestCase): - - @parameterized.parameters(itertools.product( - (False, True), - (False, True), - )) - def test_load_data(self, use_next_sentence_label, use_position_id): - train_data_path = os.path.join(self.get_temp_dir(), "train.tf_record") - seq_length = 128 - max_predictions_per_seq = 20 - _create_fake_bert_dataset( - train_data_path, - seq_length, - max_predictions_per_seq, - use_next_sentence_label=use_next_sentence_label, - use_position_id=use_position_id) - data_config = pretrain_dataloader.BertPretrainDataConfig( - input_path=train_data_path, - max_predictions_per_seq=max_predictions_per_seq, - seq_length=seq_length, - global_batch_size=10, - is_training=True, - use_next_sentence_label=use_next_sentence_label, - use_position_id=use_position_id) - - dataset = pretrain_dataloader.BertPretrainDataLoader(data_config).load() - features = next(iter(dataset)) - self.assertLen(features, - 6 + int(use_next_sentence_label) + int(use_position_id)) - self.assertIn("input_word_ids", features) - self.assertIn("input_mask", features) - self.assertIn("input_type_ids", features) - self.assertIn("masked_lm_positions", features) - self.assertIn("masked_lm_ids", features) - self.assertIn("masked_lm_weights", features) - - self.assertEqual("next_sentence_labels" in features, - use_next_sentence_label) - self.assertEqual("position_ids" in features, use_position_id) - - def test_v2_feature_names(self): - train_data_path = os.path.join(self.get_temp_dir(), "train.tf_record") - seq_length = 128 - max_predictions_per_seq = 20 - _create_fake_bert_dataset( - train_data_path, - seq_length, - max_predictions_per_seq, - use_next_sentence_label=True, - use_position_id=False, - use_v2_feature_names=True) - data_config = pretrain_dataloader.BertPretrainDataConfig( - input_path=train_data_path, - max_predictions_per_seq=max_predictions_per_seq, - seq_length=seq_length, - global_batch_size=10, - is_training=True, - use_next_sentence_label=True, - use_position_id=False, - use_v2_feature_names=True) - - dataset = pretrain_dataloader.BertPretrainDataLoader(data_config).load() - features = next(iter(dataset)) - self.assertIn("input_word_ids", features) - self.assertIn("input_mask", features) - self.assertIn("input_type_ids", features) - self.assertIn("masked_lm_positions", features) - self.assertIn("masked_lm_ids", features) - self.assertIn("masked_lm_weights", features) - - -class XLNetPretrainDataTest(parameterized.TestCase, tf.test.TestCase): - - @parameterized.parameters(itertools.product( - ("single_token", "whole_word", "token_span"), - (0, 64), - (20, None), - )) - def test_load_data( - self, sample_strategy, reuse_length, max_predictions_per_seq): - train_data_path = os.path.join(self.get_temp_dir(), "train.tf_record") - seq_length = 128 - batch_size = 5 - - _create_fake_xlnet_dataset( - train_data_path, seq_length, max_predictions_per_seq) - - data_config = pretrain_dataloader.XLNetPretrainDataConfig( - input_path=train_data_path, - max_predictions_per_seq=max_predictions_per_seq, - seq_length=seq_length, - global_batch_size=batch_size, - is_training=True, - reuse_length=reuse_length, - sample_strategy=sample_strategy, - min_num_tokens=1, - max_num_tokens=2, - permutation_size=seq_length // 2, - leak_ratio=0.1) - - if max_predictions_per_seq is None: - with self.assertRaises(ValueError): - dataset = pretrain_dataloader.XLNetPretrainDataLoader( - data_config).load() - features = next(iter(dataset)) - else: - dataset = pretrain_dataloader.XLNetPretrainDataLoader(data_config).load() - features = next(iter(dataset)) - - self.assertIn("input_word_ids", features) - self.assertIn("input_type_ids", features) - self.assertIn("permutation_mask", features) - self.assertIn("masked_tokens", features) - self.assertIn("target", features) - self.assertIn("target_mask", features) - - self.assertAllClose(features["input_word_ids"].shape, - (batch_size, seq_length)) - self.assertAllClose(features["input_type_ids"].shape, - (batch_size, seq_length)) - self.assertAllClose(features["permutation_mask"].shape, - (batch_size, seq_length, seq_length)) - self.assertAllClose(features["masked_tokens"].shape, - (batch_size, seq_length,)) - if max_predictions_per_seq is not None: - self.assertIn("target_mapping", features) - self.assertAllClose(features["target_mapping"].shape, - (batch_size, max_predictions_per_seq, seq_length)) - self.assertAllClose(features["target_mask"].shape, - (batch_size, max_predictions_per_seq)) - self.assertAllClose(features["target"].shape, - (batch_size, max_predictions_per_seq)) - else: - self.assertAllClose(features["target_mask"].shape, - (batch_size, seq_length)) - self.assertAllClose(features["target"].shape, - (batch_size, seq_length)) - - -if __name__ == "__main__": - tf.test.main() diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/data/pretrain_dynamic_dataloader.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/data/pretrain_dynamic_dataloader.py deleted file mode 100644 index bde8e15b0e102e184c99d2681b40fe96799381d8..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/data/pretrain_dynamic_dataloader.py +++ /dev/null @@ -1,227 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Dataset loader for the pre-training with dynamic sequence length.""" -from typing import Optional, Tuple - -import dataclasses -import tensorflow as tf - -from official.core import config_definitions as cfg -from official.core import input_reader -from official.nlp.data import data_loader_factory -from official.nlp.data import pretrain_dataloader - - -@dataclasses.dataclass -class BertPretrainDataConfig(cfg.DataConfig): - """Data config for BERT pretraining task (tasks/masked_lm).""" - input_path: str = '' - global_batch_size: int = 512 - is_training: bool = True - seq_bucket_lengths: Tuple[int, ...] = (128, 256, 384, 512,) - # TODO(rxsang): `seq_bucket_window_scale` is only useful when round robin - # tf.data service is disabled. Deprecate this flag once we always enable round - # robin tf.data service. - seq_bucket_window_scale: int = 8 - use_next_sentence_label: bool = True - use_position_id: bool = False - deterministic: bool = False - enable_tf_data_service: bool = False - enable_round_robin_tf_data_service: bool = False - tf_data_service_job_name: str = 'bert_pretrain' - use_v2_feature_names: bool = False - - -@data_loader_factory.register_data_loader_cls(BertPretrainDataConfig) -class PretrainingDynamicDataLoader(pretrain_dataloader.BertPretrainDataLoader): - """Dataset loader for bert-style pretraining with dynamic sequenece length. - - Bucketizes the input id features by the seq_bucket_lengths and features are - padded to the bucket boundaries. The mask features are usually short than - input id features and can also be dynamic. We require the mask feature lengths - within a bucket must be the same. For example, with [128, 256] buckets, - the mask features for bucket 128 should always have the length as X and - features for bucket 256 should always have the length as Y. - - The dataloader does not filter out empty masks. Make sure to handle this - in the model. - """ - - def __init__(self, params): - self._params = params - if len(params.seq_bucket_lengths) < 1: - raise ValueError('The seq_bucket_lengths cannot be empty.') - self._seq_bucket_lengths = params.seq_bucket_lengths - self._seq_bucket_window_scale = params.seq_bucket_window_scale - self._global_batch_size = params.global_batch_size - self._use_next_sentence_label = params.use_next_sentence_label - self._use_position_id = params.use_position_id - self._drop_remainder = params.drop_remainder - self._enable_tf_data_service = params.enable_tf_data_service - self._enable_round_robin_tf_data_service = ( - params.enable_round_robin_tf_data_service) - self._mask_keys = [ - 'masked_lm_positions', 'masked_lm_ids', 'masked_lm_weights' - ] - - def _decode(self, record: tf.Tensor): - """Decodes a serialized tf.Example.""" - name_to_features = { - 'input_ids': tf.io.VarLenFeature(tf.int64), - 'input_mask': tf.io.VarLenFeature(tf.int64), - 'segment_ids': tf.io.VarLenFeature(tf.int64), - 'masked_lm_positions': tf.io.VarLenFeature(tf.int64), - 'masked_lm_ids': tf.io.VarLenFeature(tf.int64), - 'masked_lm_weights': tf.io.VarLenFeature(tf.float32), - } - if self._use_next_sentence_label: - name_to_features['next_sentence_labels'] = tf.io.FixedLenFeature([1], - tf.int64) - dynamic_keys = ['input_ids', 'input_mask', 'segment_ids'] - if self._use_position_id: - name_to_features['position_ids'] = tf.io.VarLenFeature(tf.int64) - dynamic_keys.append('position_ids') - - example = tf.io.parse_single_example(record, name_to_features) - for key in dynamic_keys + self._mask_keys: - example[key] = tf.sparse.to_dense(example[key]) - - # Truncate padded data after the first non pad in the - # sequence length dimension. - # Pad before the first non pad from the back should not be removed. - mask = tf.math.greater( - tf.math.cumsum(example['input_ids'], reverse=True), 0) - for key in dynamic_keys: - example[key] = tf.boolean_mask(example[key], mask) - - # masked_lm_ids should be 0 padded. - # Change mask features to -1 padding so that we can differentiate - # padding from data or from bucketizing. - mask = tf.math.not_equal(example['masked_lm_ids'], 0) - example['masked_lm_ids'] = tf.where( - mask, example['masked_lm_ids'], - -tf.ones(tf.shape(example['masked_lm_ids']), dtype=example[key].dtype)) - - # tf.Example only supports tf.int64, but the TPU only supports tf.int32. - # So cast all int64 to int32. - # tf.data service uses dataset graph fingerprint to distinguish input - # pipeline jobs, thus we sort the keys here to make sure they are generated - # in a deterministic order each time the dataset function is traced. - for name in sorted(list(example.keys())): - t = example[name] - if t.dtype == tf.int64: - t = tf.cast(t, tf.int32) - example[name] = t - - return example - - def _bucketize_and_batch( - self, - dataset, - input_context: Optional[tf.distribute.InputContext] = None): - """Bucketize by sequence length and batch the datasets.""" - per_replica_batch_size = input_context.get_per_replica_batch_size( - self._global_batch_size) if input_context else self._global_batch_size - - def element_length_func(example, seq_len_dim): - return tf.shape(example['input_word_ids'])[seq_len_dim] - - bucket_boundaries = [length + 1 for length in self._seq_bucket_lengths] - bucket_batch_sizes = [per_replica_batch_size] * (len(bucket_boundaries) + 1) - - # Bucketize and batch the dataset with per replica batch size first. - dataset = dataset.apply( - tf.data.experimental.bucket_by_sequence_length( - lambda example: tf.cast(element_length_func(example, 0), tf.int32), - bucket_boundaries, - bucket_batch_sizes, - pad_to_bucket_boundary=True, - drop_remainder=self._drop_remainder)) - if input_context: - window_size = input_context.num_replicas_in_sync - if self._enable_tf_data_service and ( - not self._enable_round_robin_tf_data_service): - # If tf.data service is enabled but round-robin behavior is not enabled, - # different TPU workers may fetch data from one tf.data service worker - # in different speed. We set the window size to be - # `seq_bucket_window_scale` larger to leave buffer if some workers are - # fetching data faster than others, so all the data within the same - # global batch can still have more chances to be in the same bucket. - window_size *= self._seq_bucket_window_scale - - # Group `num_replicas_in_sync` batches from same bucket together, so all - # replicas can get the same sequence length for one global step. - dataset = dataset.apply( - tf.data.experimental.group_by_window( - key_func=lambda example: tf.cast( # pylint: disable=g-long-lambda - element_length_func(example, 1), tf.int64), - reduce_func=lambda _, x: tf.data.Dataset.from_tensors(x), - window_size=window_size)) - dataset = dataset.flat_map(lambda x: x) - - def _remove_pads_from_bucketize(features): - # All mask features must have the same effective length. - # The real masked ids padding token is -1 and 0 comes from - # bucket_by_sequence_length. - mask = tf.math.not_equal(features['masked_lm_ids'], 0) - - mask_per_example = tf.math.reduce_sum(tf.cast(mask, tf.int32), axis=1) - normalized = tf.cast( - mask_per_example / tf.math.reduce_max(mask_per_example), tf.int32) - assert_op = tf.debugging.assert_equal( - tf.math.reduce_sum(normalized), per_replica_batch_size, - 'Number of non padded mask tokens is not the same for each example ' - 'in the same sequence length.') - with tf.control_dependencies([assert_op]): - for key in self._mask_keys: - features[key] = tf.reshape( - tf.boolean_mask( - features[key], mask), [per_replica_batch_size, -1]) - # Revert masked_lm_ids to be 0-padded. - mask = tf.math.not_equal(features['masked_lm_ids'], -1) - features['masked_lm_ids'] = tf.where( - mask, features['masked_lm_ids'], - tf.zeros( - tf.shape(features['masked_lm_ids']), - dtype=features['masked_lm_ids'].dtype)) - return features - - dataset = dataset.map(_remove_pads_from_bucketize) - return dataset - - def load(self, input_context: Optional[tf.distribute.InputContext] = None): - """Returns a tf.dataset.Dataset.""" - reader = input_reader.InputReader( - params=self._params, - decoder_fn=self._decode, - parser_fn=self._parse, - transform_and_batch_fn=self._bucketize_and_batch) - return reader.read(input_context) diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/data/pretrain_dynamic_dataloader_test.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/data/pretrain_dynamic_dataloader_test.py deleted file mode 100644 index 07ab60746b16f934fa29c2ef5434673047260149..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/data/pretrain_dynamic_dataloader_test.py +++ /dev/null @@ -1,258 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Tests for nlp.data.pretrain_dynamic_dataloader.""" -import os - -from absl import logging -from absl.testing import parameterized -import numpy as np -import orbit -import tensorflow as tf - -from tensorflow.python.distribute import combinations -from tensorflow.python.distribute import strategy_combinations -from official.nlp.configs import bert -from official.nlp.configs import encoders -from official.nlp.data import pretrain_dataloader -from official.nlp.data import pretrain_dynamic_dataloader -from official.nlp.tasks import masked_lm - - -def _create_fake_dataset(output_path, seq_length, num_masked_tokens, - max_seq_length, num_examples): - """Creates a fake dataset.""" - writer = tf.io.TFRecordWriter(output_path) - - def create_int_feature(values): - f = tf.train.Feature(int64_list=tf.train.Int64List(value=list(values))) - return f - - def create_float_feature(values): - f = tf.train.Feature(float_list=tf.train.FloatList(value=list(values))) - return f - - for _ in range(num_examples): - features = {} - padding = np.zeros(shape=(max_seq_length - seq_length), dtype=np.int32) - input_ids = np.random.randint(low=1, high=100, size=(seq_length)) - features['input_ids'] = create_int_feature( - np.concatenate((input_ids, padding))) - features['input_mask'] = create_int_feature( - np.concatenate((np.ones_like(input_ids), padding))) - features['segment_ids'] = create_int_feature( - np.concatenate((np.ones_like(input_ids), padding))) - features['position_ids'] = create_int_feature( - np.concatenate((np.ones_like(input_ids), padding))) - features['masked_lm_positions'] = create_int_feature( - np.random.randint(60, size=(num_masked_tokens), dtype=np.int64)) - features['masked_lm_ids'] = create_int_feature( - np.random.randint(100, size=(num_masked_tokens), dtype=np.int64)) - features['masked_lm_weights'] = create_float_feature( - np.ones((num_masked_tokens,), dtype=np.float32)) - features['next_sentence_labels'] = create_int_feature(np.array([0])) - - tf_example = tf.train.Example(features=tf.train.Features(feature=features)) - writer.write(tf_example.SerializeToString()) - writer.close() - - -class PretrainDynamicDataLoaderTest(tf.test.TestCase, parameterized.TestCase): - - @combinations.generate( - combinations.combine( - distribution_strategy=[ - strategy_combinations.cloud_tpu_strategy, - ], - mode='eager')) - def test_distribution_strategy(self, distribution_strategy): - max_seq_length = 128 - batch_size = 8 - input_path = os.path.join(self.get_temp_dir(), 'train.tf_record') - _create_fake_dataset( - input_path, - seq_length=60, - num_masked_tokens=20, - max_seq_length=max_seq_length, - num_examples=batch_size) - data_config = pretrain_dynamic_dataloader.BertPretrainDataConfig( - is_training=False, - input_path=input_path, - seq_bucket_lengths=[64, 128], - global_batch_size=batch_size) - dataloader = pretrain_dynamic_dataloader.PretrainingDynamicDataLoader( - data_config) - distributed_ds = orbit.utils.make_distributed_dataset( - distribution_strategy, dataloader.load) - train_iter = iter(distributed_ds) - with distribution_strategy.scope(): - config = masked_lm.MaskedLMConfig( - init_checkpoint=self.get_temp_dir(), - model=bert.PretrainerConfig( - encoders.EncoderConfig( - bert=encoders.BertEncoderConfig( - vocab_size=30522, num_layers=1)), - cls_heads=[ - bert.ClsHeadConfig( - inner_dim=10, num_classes=2, name='next_sentence') - ]), - train_data=data_config) - task = masked_lm.MaskedLMTask(config) - model = task.build_model() - metrics = task.build_metrics() - - @tf.function - def step_fn(features): - return task.validation_step(features, model, metrics=metrics) - - distributed_outputs = distribution_strategy.run( - step_fn, args=(next(train_iter),)) - local_results = tf.nest.map_structure( - distribution_strategy.experimental_local_results, distributed_outputs) - logging.info('Dynamic padding: local_results= %s', str(local_results)) - dynamic_metrics = {} - for metric in metrics: - dynamic_metrics[metric.name] = metric.result() - - data_config = pretrain_dataloader.BertPretrainDataConfig( - is_training=False, - input_path=input_path, - seq_length=max_seq_length, - max_predictions_per_seq=20, - global_batch_size=batch_size) - dataloader = pretrain_dataloader.BertPretrainDataLoader(data_config) - distributed_ds = orbit.utils.make_distributed_dataset( - distribution_strategy, dataloader.load) - train_iter = iter(distributed_ds) - with distribution_strategy.scope(): - metrics = task.build_metrics() - - @tf.function - def step_fn_b(features): - return task.validation_step(features, model, metrics=metrics) - - distributed_outputs = distribution_strategy.run( - step_fn_b, args=(next(train_iter),)) - local_results = tf.nest.map_structure( - distribution_strategy.experimental_local_results, distributed_outputs) - logging.info('Static padding: local_results= %s', str(local_results)) - static_metrics = {} - for metric in metrics: - static_metrics[metric.name] = metric.result() - for key in static_metrics: - # We need to investigate the differences on losses. - if key != 'next_sentence_loss': - self.assertEqual(dynamic_metrics[key], static_metrics[key]) - - def test_load_dataset(self): - max_seq_length = 128 - batch_size = 2 - input_path_1 = os.path.join(self.get_temp_dir(), 'train_1.tf_record') - _create_fake_dataset( - input_path_1, - seq_length=60, - num_masked_tokens=20, - max_seq_length=max_seq_length, - num_examples=batch_size) - input_path_2 = os.path.join(self.get_temp_dir(), 'train_2.tf_record') - _create_fake_dataset( - input_path_2, - seq_length=100, - num_masked_tokens=70, - max_seq_length=max_seq_length, - num_examples=batch_size) - input_paths = ','.join([input_path_1, input_path_2]) - data_config = pretrain_dynamic_dataloader.BertPretrainDataConfig( - is_training=False, - input_path=input_paths, - seq_bucket_lengths=[64, 128], - use_position_id=True, - global_batch_size=batch_size) - dataset = pretrain_dynamic_dataloader.PretrainingDynamicDataLoader( - data_config).load() - dataset_it = iter(dataset) - features = next(dataset_it) - self.assertCountEqual([ - 'input_word_ids', - 'input_mask', - 'input_type_ids', - 'next_sentence_labels', - 'masked_lm_positions', - 'masked_lm_ids', - 'masked_lm_weights', - 'position_ids', - ], features.keys()) - # Sequence length dimension should be bucketized and pad to 64. - self.assertEqual(features['input_word_ids'].shape, (batch_size, 64)) - self.assertEqual(features['input_mask'].shape, (batch_size, 64)) - self.assertEqual(features['input_type_ids'].shape, (batch_size, 64)) - self.assertEqual(features['position_ids'].shape, (batch_size, 64)) - self.assertEqual(features['masked_lm_positions'].shape, (batch_size, 20)) - features = next(dataset_it) - self.assertEqual(features['input_word_ids'].shape, (batch_size, 128)) - self.assertEqual(features['input_mask'].shape, (batch_size, 128)) - self.assertEqual(features['input_type_ids'].shape, (batch_size, 128)) - self.assertEqual(features['position_ids'].shape, (batch_size, 128)) - self.assertEqual(features['masked_lm_positions'].shape, (batch_size, 70)) - - def test_load_dataset_not_same_masks(self): - max_seq_length = 128 - batch_size = 2 - input_path_1 = os.path.join(self.get_temp_dir(), 'train_3.tf_record') - _create_fake_dataset( - input_path_1, - seq_length=60, - num_masked_tokens=20, - max_seq_length=max_seq_length, - num_examples=batch_size) - input_path_2 = os.path.join(self.get_temp_dir(), 'train_4.tf_record') - _create_fake_dataset( - input_path_2, - seq_length=60, - num_masked_tokens=15, - max_seq_length=max_seq_length, - num_examples=batch_size) - input_paths = ','.join([input_path_1, input_path_2]) - data_config = pretrain_dynamic_dataloader.BertPretrainDataConfig( - is_training=False, - input_path=input_paths, - seq_bucket_lengths=[64, 128], - use_position_id=True, - global_batch_size=batch_size * 2) - dataset = pretrain_dynamic_dataloader.PretrainingDynamicDataLoader( - data_config).load() - dataset_it = iter(dataset) - with self.assertRaisesRegex( - tf.errors.InvalidArgumentError, '.*Number of non padded mask tokens.*'): - next(dataset_it) - - -if __name__ == '__main__': - tf.test.main() diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/data/question_answering_dataloader.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/data/question_answering_dataloader.py deleted file mode 100644 index 5798e2386707b0ef4a3fef47b36d45ec2ac49b39..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/data/question_answering_dataloader.py +++ /dev/null @@ -1,126 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Loads dataset for the question answering (e.g, SQuAD) task.""" -from typing import Mapping, Optional - -import dataclasses -import tensorflow as tf -from official.core import config_definitions as cfg -from official.core import input_reader -from official.nlp.data import data_loader -from official.nlp.data import data_loader_factory - - -@dataclasses.dataclass -class QADataConfig(cfg.DataConfig): - """Data config for question answering task (tasks/question_answering).""" - # For training, `input_path` is expected to be a pre-processed TFRecord file, - # while for evaluation, it is expected to be a raw JSON file (b/173814590). - input_path: str = '' - global_batch_size: int = 48 - is_training: bool = True - seq_length: int = 384 - # Settings below are question answering specific. - version_2_with_negative: bool = False - # Settings below are only used for eval mode. - input_preprocessed_data_path: str = '' - doc_stride: int = 128 - query_length: int = 64 - # The path to the vocab file of word piece tokenizer or the - # model of the sentence piece tokenizer. - vocab_file: str = '' - tokenization: str = 'WordPiece' # WordPiece or SentencePiece - do_lower_case: bool = True - xlnet_format: bool = False - - -@data_loader_factory.register_data_loader_cls(QADataConfig) -class QuestionAnsweringDataLoader(data_loader.DataLoader): - """A class to load dataset for sentence prediction (classification) task.""" - - def __init__(self, params): - self._params = params - self._seq_length = params.seq_length - self._is_training = params.is_training - self._xlnet_format = params.xlnet_format - - def _decode(self, record: tf.Tensor): - """Decodes a serialized tf.Example.""" - name_to_features = { - 'input_ids': tf.io.FixedLenFeature([self._seq_length], tf.int64), - 'input_mask': tf.io.FixedLenFeature([self._seq_length], tf.int64), - 'segment_ids': tf.io.FixedLenFeature([self._seq_length], tf.int64), - } - if self._xlnet_format: - name_to_features['class_index'] = tf.io.FixedLenFeature([], tf.int64) - name_to_features['paragraph_mask'] = tf.io.FixedLenFeature( - [self._seq_length], tf.int64) - if self._is_training: - name_to_features['is_impossible'] = tf.io.FixedLenFeature([], tf.int64) - - if self._is_training: - name_to_features['start_positions'] = tf.io.FixedLenFeature([], tf.int64) - name_to_features['end_positions'] = tf.io.FixedLenFeature([], tf.int64) - else: - name_to_features['unique_ids'] = tf.io.FixedLenFeature([], tf.int64) - example = tf.io.parse_single_example(record, name_to_features) - - # tf.Example only supports tf.int64, but the TPU only supports tf.int32. - # So cast all int64 to int32. - for name in example: - t = example[name] - if t.dtype == tf.int64: - t = tf.cast(t, tf.int32) - example[name] = t - - return example - - def _parse(self, record: Mapping[str, tf.Tensor]): - """Parses raw tensors into a dict of tensors to be consumed by the model.""" - x, y = {}, {} - for name, tensor in record.items(): - if name in ('start_positions', 'end_positions', 'is_impossible'): - y[name] = tensor - elif name == 'input_ids': - x['input_word_ids'] = tensor - elif name == 'segment_ids': - x['input_type_ids'] = tensor - else: - x[name] = tensor - if name == 'start_positions' and self._xlnet_format: - x[name] = tensor - return (x, y) - - def load(self, input_context: Optional[tf.distribute.InputContext] = None): - """Returns a tf.dataset.Dataset.""" - reader = input_reader.InputReader( - params=self._params, decoder_fn=self._decode, parser_fn=self._parse) - return reader.read(input_context) diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/data/question_answering_dataloader_test.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/data/question_answering_dataloader_test.py deleted file mode 100644 index d2fed7107123ace020891225856750f47ec26cdd..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/data/question_answering_dataloader_test.py +++ /dev/null @@ -1,90 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Tests for official.nlp.data.question_answering_dataloader.""" -import os - -import numpy as np -import tensorflow as tf - -from official.nlp.data import question_answering_dataloader - - -def _create_fake_dataset(output_path, seq_length): - """Creates a fake dataset.""" - writer = tf.io.TFRecordWriter(output_path) - - def create_int_feature(values): - f = tf.train.Feature(int64_list=tf.train.Int64List(value=list(values))) - return f - - for _ in range(100): - features = {} - input_ids = np.random.randint(100, size=(seq_length)) - features['input_ids'] = create_int_feature(input_ids) - features['input_mask'] = create_int_feature(np.ones_like(input_ids)) - features['segment_ids'] = create_int_feature(np.ones_like(input_ids)) - features['start_positions'] = create_int_feature(np.array([0])) - features['end_positions'] = create_int_feature(np.array([10])) - - tf_example = tf.train.Example(features=tf.train.Features(feature=features)) - writer.write(tf_example.SerializeToString()) - writer.close() - - -class QuestionAnsweringDataTest(tf.test.TestCase): - - def test_load_dataset(self): - seq_length = 128 - batch_size = 10 - input_path = os.path.join(self.get_temp_dir(), 'train.tf_record') - _create_fake_dataset(input_path, seq_length) - data_config = question_answering_dataloader.QADataConfig( - is_training=True, - input_path=input_path, - seq_length=seq_length, - global_batch_size=batch_size) - dataset = question_answering_dataloader.QuestionAnsweringDataLoader( - data_config).load() - features, labels = next(iter(dataset)) - - self.assertCountEqual(['input_word_ids', 'input_mask', 'input_type_ids'], - features.keys()) - self.assertEqual(features['input_word_ids'].shape, (batch_size, seq_length)) - self.assertEqual(features['input_mask'].shape, (batch_size, seq_length)) - self.assertEqual(features['input_type_ids'].shape, (batch_size, seq_length)) - - self.assertCountEqual(['start_positions', 'end_positions'], labels.keys()) - self.assertEqual(labels['start_positions'].shape, (batch_size,)) - self.assertEqual(labels['end_positions'].shape, (batch_size,)) - - -if __name__ == '__main__': - tf.test.main() diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/data/sentence_prediction_dataloader.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/data/sentence_prediction_dataloader.py deleted file mode 100644 index 3c01e79e4aee26fc1005fb195067d9ea5066512d..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/data/sentence_prediction_dataloader.py +++ /dev/null @@ -1,255 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Loads dataset for the sentence prediction (classification) task.""" -import functools -from typing import List, Mapping, Optional - -import dataclasses -import tensorflow as tf -import tensorflow_hub as hub - -from official.common import dataset_fn -from official.core import config_definitions as cfg -from official.core import input_reader -from official.nlp import modeling -from official.nlp.data import data_loader -from official.nlp.data import data_loader_factory - -LABEL_TYPES_MAP = {'int': tf.int64, 'float': tf.float32} - - -@dataclasses.dataclass -class SentencePredictionDataConfig(cfg.DataConfig): - """Data config for sentence prediction task (tasks/sentence_prediction).""" - input_path: str = '' - global_batch_size: int = 32 - is_training: bool = True - seq_length: int = 128 - label_type: str = 'int' - # Whether to include the example id number. - include_example_id: bool = False - - -@data_loader_factory.register_data_loader_cls(SentencePredictionDataConfig) -class SentencePredictionDataLoader(data_loader.DataLoader): - """A class to load dataset for sentence prediction (classification) task.""" - - def __init__(self, params): - self._params = params - self._seq_length = params.seq_length - self._include_example_id = params.include_example_id - - def _decode(self, record: tf.Tensor): - """Decodes a serialized tf.Example.""" - label_type = LABEL_TYPES_MAP[self._params.label_type] - name_to_features = { - 'input_ids': tf.io.FixedLenFeature([self._seq_length], tf.int64), - 'input_mask': tf.io.FixedLenFeature([self._seq_length], tf.int64), - 'segment_ids': tf.io.FixedLenFeature([self._seq_length], tf.int64), - 'label_ids': tf.io.FixedLenFeature([], label_type), - } - if self._include_example_id: - name_to_features['example_id'] = tf.io.FixedLenFeature([], tf.int64) - - example = tf.io.parse_single_example(record, name_to_features) - - # tf.Example only supports tf.int64, but the TPU only supports tf.int32. - # So cast all int64 to int32. - for name in example: - t = example[name] - if t.dtype == tf.int64: - t = tf.cast(t, tf.int32) - example[name] = t - - return example - - def _parse(self, record: Mapping[str, tf.Tensor]): - """Parses raw tensors into a dict of tensors to be consumed by the model.""" - x = { - 'input_word_ids': record['input_ids'], - 'input_mask': record['input_mask'], - 'input_type_ids': record['segment_ids'] - } - if self._include_example_id: - x['example_id'] = record['example_id'] - - y = record['label_ids'] - return (x, y) - - def load(self, input_context: Optional[tf.distribute.InputContext] = None): - """Returns a tf.dataset.Dataset.""" - reader = input_reader.InputReader( - params=self._params, decoder_fn=self._decode, parser_fn=self._parse) - return reader.read(input_context) - - -@dataclasses.dataclass -class SentencePredictionTextDataConfig(cfg.DataConfig): - """Data config for sentence prediction task with raw text.""" - # Either set `input_path`... - input_path: str = '' - # Either `int` or `float`. - label_type: str = 'int' - # ...or `tfds_name` and `tfds_split` to specify input. - tfds_name: str = '' - tfds_split: str = '' - # The name of the text feature fields. The text features will be - # concatenated in order. - text_fields: Optional[List[str]] = None - label_field: str = 'label' - global_batch_size: int = 32 - seq_length: int = 128 - is_training: bool = True - # Either build preprocessing with Python code by specifying these values - # for modeling.layers.BertTokenizer()/SentencepieceTokenizer().... - tokenization: str = 'WordPiece' # WordPiece or SentencePiece - # Text vocab file if tokenization is WordPiece, or sentencepiece.ModelProto - # file if tokenization is SentencePiece. - vocab_file: str = '' - lower_case: bool = True - # ...or load preprocessing from a SavedModel at this location. - preprocessing_hub_module_url: str = '' - # Either tfrecord or sstsable or recordio. - file_type: str = 'tfrecord' - include_example_id: bool = False - - -class TextProcessor(tf.Module): - """Text features processing for sentence prediction task.""" - - def __init__(self, - seq_length: int, - vocab_file: Optional[str] = None, - tokenization: Optional[str] = None, - lower_case: Optional[bool] = True, - preprocessing_hub_module_url: Optional[str] = None): - if preprocessing_hub_module_url: - self._preprocessing_hub_module = hub.load(preprocessing_hub_module_url) - self._tokenizer = self._preprocessing_hub_module.tokenize - self._pack_inputs = functools.partial( - self._preprocessing_hub_module.bert_pack_inputs, - seq_length=seq_length) - return - - if tokenization == 'WordPiece': - self._tokenizer = modeling.layers.BertTokenizer( - vocab_file=vocab_file, lower_case=lower_case) - elif tokenization == 'SentencePiece': - self._tokenizer = modeling.layers.SentencepieceTokenizer( - model_file_path=vocab_file, lower_case=lower_case, - strip_diacritics=True) # Strip diacritics to follow ALBERT model - else: - raise ValueError('Unsupported tokenization: %s' % tokenization) - - self._pack_inputs = modeling.layers.BertPackInputs( - seq_length=seq_length, - special_tokens_dict=self._tokenizer.get_special_tokens_dict()) - - def __call__(self, segments): - segments = [self._tokenizer(s) for s in segments] - # BertTokenizer returns a RaggedTensor with shape [batch, word, subword], - # and SentencepieceTokenizer returns a RaggedTensor with shape - # [batch, sentencepiece], - segments = [ - tf.cast(x.merge_dims(1, -1) if x.shape.rank > 2 else x, tf.int32) - for x in segments - ] - return self._pack_inputs(segments) - - -@data_loader_factory.register_data_loader_cls(SentencePredictionTextDataConfig) -class SentencePredictionTextDataLoader(data_loader.DataLoader): - """Loads dataset with raw text for sentence prediction task.""" - - def __init__(self, params): - if bool(params.tfds_name) != bool(params.tfds_split): - raise ValueError('`tfds_name` and `tfds_split` should be specified or ' - 'unspecified at the same time.') - if bool(params.tfds_name) == bool(params.input_path): - raise ValueError('Must specify either `tfds_name` and `tfds_split` ' - 'or `input_path`.') - if not params.text_fields: - raise ValueError('Unexpected empty text fields.') - if bool(params.vocab_file) == bool(params.preprocessing_hub_module_url): - raise ValueError('Must specify exactly one of vocab_file (with matching ' - 'lower_case flag) or preprocessing_hub_module_url.') - - self._params = params - self._text_fields = params.text_fields - self._label_field = params.label_field - self._label_type = params.label_type - self._include_example_id = params.include_example_id - self._text_processor = TextProcessor( - seq_length=params.seq_length, - vocab_file=params.vocab_file, - tokenization=params.tokenization, - lower_case=params.lower_case, - preprocessing_hub_module_url=params.preprocessing_hub_module_url) - - def _bert_preprocess(self, record: Mapping[str, tf.Tensor]): - """Berts preprocess.""" - segments = [record[x] for x in self._text_fields] - model_inputs = self._text_processor(segments) - if self._include_example_id: - model_inputs['example_id'] = record['example_id'] - y = record[self._label_field] - return model_inputs, y - - def _decode(self, record: tf.Tensor): - """Decodes a serialized tf.Example.""" - name_to_features = {} - for text_field in self._text_fields: - name_to_features[text_field] = tf.io.FixedLenFeature([], tf.string) - - label_type = LABEL_TYPES_MAP[self._label_type] - name_to_features[self._label_field] = tf.io.FixedLenFeature([], label_type) - if self._include_example_id: - name_to_features['example_id'] = tf.io.FixedLenFeature([], tf.int64) - example = tf.io.parse_single_example(record, name_to_features) - - # tf.Example only supports tf.int64, but the TPU only supports tf.int32. - # So cast all int64 to int32. - for name in example: - t = example[name] - if t.dtype == tf.int64: - t = tf.cast(t, tf.int32) - example[name] = t - - return example - - def load(self, input_context: Optional[tf.distribute.InputContext] = None): - """Returns a tf.dataset.Dataset.""" - reader = input_reader.InputReader( - dataset_fn=dataset_fn.pick_dataset_fn(self._params.file_type), - decoder_fn=self._decode if self._params.input_path else None, - params=self._params, - postprocess_fn=self._bert_preprocess) - return reader.read(input_context) diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/data/sentence_prediction_dataloader_test.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/data/sentence_prediction_dataloader_test.py deleted file mode 100644 index e70261e6af96fc8e7987fca09a1cfd8dadaa974c..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/data/sentence_prediction_dataloader_test.py +++ /dev/null @@ -1,265 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Tests for official.nlp.data.sentence_prediction_dataloader.""" -import os - -from absl.testing import parameterized -import numpy as np -import tensorflow as tf - -from sentencepiece import SentencePieceTrainer -from official.nlp.data import sentence_prediction_dataloader as loader - - -def _create_fake_preprocessed_dataset(output_path, seq_length, label_type): - """Creates a fake dataset.""" - writer = tf.io.TFRecordWriter(output_path) - - def create_int_feature(values): - f = tf.train.Feature(int64_list=tf.train.Int64List(value=list(values))) - return f - - def create_float_feature(values): - f = tf.train.Feature(float_list=tf.train.FloatList(value=list(values))) - return f - - for _ in range(100): - features = {} - input_ids = np.random.randint(100, size=(seq_length)) - features['input_ids'] = create_int_feature(input_ids) - features['input_mask'] = create_int_feature(np.ones_like(input_ids)) - features['segment_ids'] = create_int_feature(np.ones_like(input_ids)) - - if label_type == 'int': - features['label_ids'] = create_int_feature([1]) - elif label_type == 'float': - features['label_ids'] = create_float_feature([0.5]) - else: - raise ValueError('Unsupported label_type: %s' % label_type) - - tf_example = tf.train.Example(features=tf.train.Features(feature=features)) - writer.write(tf_example.SerializeToString()) - writer.close() - - -def _create_fake_raw_dataset(output_path, text_fields, label_type): - """Creates a fake tf record file.""" - writer = tf.io.TFRecordWriter(output_path) - - def create_str_feature(value): - f = tf.train.Feature(bytes_list=tf.train.BytesList(value=value)) - return f - - def create_int_feature(values): - f = tf.train.Feature(int64_list=tf.train.Int64List(value=list(values))) - return f - - def create_float_feature(values): - f = tf.train.Feature(float_list=tf.train.FloatList(value=list(values))) - return f - - for _ in range(100): - features = {} - for text_field in text_fields: - features[text_field] = create_str_feature([b'hello world']) - - if label_type == 'int': - features['label'] = create_int_feature([0]) - elif label_type == 'float': - features['label'] = create_float_feature([0.5]) - else: - raise ValueError('Unexpected label_type: %s' % label_type) - tf_example = tf.train.Example(features=tf.train.Features(feature=features)) - writer.write(tf_example.SerializeToString()) - writer.close() - - -def _create_fake_sentencepiece_model(output_dir): - vocab = ['a', 'b', 'c', 'd', 'e', 'abc', 'def', 'ABC', 'DEF'] - model_prefix = os.path.join(output_dir, 'spm_model') - input_text_file_path = os.path.join(output_dir, 'train_input.txt') - with tf.io.gfile.GFile(input_text_file_path, 'w') as f: - f.write(' '.join(vocab + ['\n'])) - # Add 7 more tokens: , , [CLS], [SEP], [MASK], , . - full_vocab_size = len(vocab) + 7 - flags = dict( - model_prefix=model_prefix, - model_type='word', - input=input_text_file_path, - pad_id=0, - unk_id=1, - control_symbols='[CLS],[SEP],[MASK]', - vocab_size=full_vocab_size, - bos_id=full_vocab_size - 2, - eos_id=full_vocab_size - 1) - SentencePieceTrainer.Train(' '.join( - ['--{}={}'.format(k, v) for k, v in flags.items()])) - return model_prefix + '.model' - - -def _create_fake_vocab_file(vocab_file_path): - tokens = ['[PAD]'] - for i in range(1, 100): - tokens.append('[unused%d]' % i) - tokens.extend(['[UNK]', '[CLS]', '[SEP]', '[MASK]', 'hello', 'world']) - with tf.io.gfile.GFile(vocab_file_path, 'w') as outfile: - outfile.write('\n'.join(tokens)) - - -class SentencePredictionDataTest(tf.test.TestCase, parameterized.TestCase): - - @parameterized.parameters(('int', tf.int32), ('float', tf.float32)) - def test_load_dataset(self, label_type, expected_label_type): - input_path = os.path.join(self.get_temp_dir(), 'train.tf_record') - batch_size = 10 - seq_length = 128 - _create_fake_preprocessed_dataset(input_path, seq_length, label_type) - data_config = loader.SentencePredictionDataConfig( - input_path=input_path, - seq_length=seq_length, - global_batch_size=batch_size, - label_type=label_type) - dataset = loader.SentencePredictionDataLoader(data_config).load() - features, labels = next(iter(dataset)) - self.assertCountEqual(['input_word_ids', 'input_mask', 'input_type_ids'], - features.keys()) - self.assertEqual(features['input_word_ids'].shape, (batch_size, seq_length)) - self.assertEqual(features['input_mask'].shape, (batch_size, seq_length)) - self.assertEqual(features['input_type_ids'].shape, (batch_size, seq_length)) - self.assertEqual(labels.shape, (batch_size,)) - self.assertEqual(labels.dtype, expected_label_type) - - -class SentencePredictionTfdsDataLoaderTest(tf.test.TestCase, - parameterized.TestCase): - - @parameterized.parameters(True, False) - def test_python_wordpiece_preprocessing(self, use_tfds): - batch_size = 10 - seq_length = 256 # Non-default value. - lower_case = True - - tf_record_path = os.path.join(self.get_temp_dir(), 'train.tf_record') - text_fields = ['sentence1', 'sentence2'] - if not use_tfds: - _create_fake_raw_dataset(tf_record_path, text_fields, label_type='int') - - vocab_file_path = os.path.join(self.get_temp_dir(), 'vocab.txt') - _create_fake_vocab_file(vocab_file_path) - - data_config = loader.SentencePredictionTextDataConfig( - input_path='' if use_tfds else tf_record_path, - tfds_name='glue/mrpc' if use_tfds else '', - tfds_split='train' if use_tfds else '', - text_fields=text_fields, - global_batch_size=batch_size, - seq_length=seq_length, - is_training=True, - lower_case=lower_case, - vocab_file=vocab_file_path) - dataset = loader.SentencePredictionTextDataLoader(data_config).load() - features, labels = next(iter(dataset)) - self.assertCountEqual(['input_word_ids', 'input_type_ids', 'input_mask'], - features.keys()) - self.assertEqual(features['input_word_ids'].shape, (batch_size, seq_length)) - self.assertEqual(features['input_mask'].shape, (batch_size, seq_length)) - self.assertEqual(features['input_type_ids'].shape, (batch_size, seq_length)) - self.assertEqual(labels.shape, (batch_size,)) - - @parameterized.parameters(True, False) - def test_python_sentencepiece_preprocessing(self, use_tfds): - batch_size = 10 - seq_length = 256 # Non-default value. - lower_case = True - - tf_record_path = os.path.join(self.get_temp_dir(), 'train.tf_record') - text_fields = ['sentence1', 'sentence2'] - if not use_tfds: - _create_fake_raw_dataset(tf_record_path, text_fields, label_type='int') - - sp_model_file_path = _create_fake_sentencepiece_model(self.get_temp_dir()) - data_config = loader.SentencePredictionTextDataConfig( - input_path='' if use_tfds else tf_record_path, - tfds_name='glue/mrpc' if use_tfds else '', - tfds_split='train' if use_tfds else '', - text_fields=text_fields, - global_batch_size=batch_size, - seq_length=seq_length, - is_training=True, - lower_case=lower_case, - tokenization='SentencePiece', - vocab_file=sp_model_file_path, - ) - dataset = loader.SentencePredictionTextDataLoader(data_config).load() - features, labels = next(iter(dataset)) - self.assertCountEqual(['input_word_ids', 'input_type_ids', 'input_mask'], - features.keys()) - self.assertEqual(features['input_word_ids'].shape, (batch_size, seq_length)) - self.assertEqual(features['input_mask'].shape, (batch_size, seq_length)) - self.assertEqual(features['input_type_ids'].shape, (batch_size, seq_length)) - self.assertEqual(labels.shape, (batch_size,)) - - @parameterized.parameters(True, False) - def test_saved_model_preprocessing(self, use_tfds): - batch_size = 10 - seq_length = 256 # Non-default value. - - tf_record_path = os.path.join(self.get_temp_dir(), 'train.tf_record') - text_fields = ['sentence1', 'sentence2'] - if not use_tfds: - _create_fake_raw_dataset(tf_record_path, text_fields, label_type='float') - - vocab_file_path = os.path.join(self.get_temp_dir(), 'vocab.txt') - _create_fake_vocab_file(vocab_file_path) - data_config = loader.SentencePredictionTextDataConfig( - input_path='' if use_tfds else tf_record_path, - tfds_name='glue/mrpc' if use_tfds else '', - tfds_split='train' if use_tfds else '', - text_fields=text_fields, - global_batch_size=batch_size, - seq_length=seq_length, - is_training=True, - preprocessing_hub_module_url=( - 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3'), - label_type='int' if use_tfds else 'float', - ) - dataset = loader.SentencePredictionTextDataLoader(data_config).load() - features, labels = next(iter(dataset)) - self.assertCountEqual(['input_word_ids', 'input_type_ids', 'input_mask'], - features.keys()) - self.assertEqual(features['input_word_ids'].shape, (batch_size, seq_length)) - self.assertEqual(features['input_mask'].shape, (batch_size, seq_length)) - self.assertEqual(features['input_type_ids'].shape, (batch_size, seq_length)) - self.assertEqual(labels.shape, (batch_size,)) - - -if __name__ == '__main__': - tf.test.main() diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/data/sentence_retrieval_lib.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/data/sentence_retrieval_lib.py deleted file mode 100644 index 9971d6e2ae6f3d13192efb15b1e5b289cba68293..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/data/sentence_retrieval_lib.py +++ /dev/null @@ -1,182 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""BERT library to process data for cross lingual sentence retrieval task.""" - -import os - -from absl import logging -from official.nlp.bert import tokenization -from official.nlp.data import classifier_data_lib - - -class BuccProcessor(classifier_data_lib.DataProcessor): - """Procssor for Xtreme BUCC data set.""" - supported_languages = ["de", "fr", "ru", "zh"] - - def __init__(self, process_text_fn=tokenization.convert_to_unicode): - super(BuccProcessor, self).__init__(process_text_fn) - self.languages = BuccProcessor.supported_languages - - def get_dev_examples(self, data_dir, file_pattern): - return self._create_examples( - self._read_tsv(os.path.join(data_dir, file_pattern.format("dev"))), - "sample") - - def get_test_examples(self, data_dir, file_pattern): - return self._create_examples( - self._read_tsv(os.path.join(data_dir, file_pattern.format("test"))), - "test") - - @staticmethod - def get_processor_name(): - """See base class.""" - return "BUCC" - - def _create_examples(self, lines, set_type): - """Creates examples for the training and dev sets.""" - examples = [] - for (i, line) in enumerate(lines): - guid = "%s-%s" % (set_type, i) - example_id = int(line[0].split("-")[1]) - text_a = self.process_text_fn(line[1]) - examples.append( - classifier_data_lib.InputExample( - guid=guid, text_a=text_a, example_id=example_id)) - return examples - - -class TatoebaProcessor(classifier_data_lib.DataProcessor): - """Procssor for Xtreme Tatoeba data set.""" - supported_languages = [ - "af", "ar", "bg", "bn", "de", "el", "es", "et", "eu", "fa", "fi", "fr", - "he", "hi", "hu", "id", "it", "ja", "jv", "ka", "kk", "ko", "ml", "mr", - "nl", "pt", "ru", "sw", "ta", "te", "th", "tl", "tr", "ur", "vi", "zh" - ] - - def __init__(self, process_text_fn=tokenization.convert_to_unicode): - super(TatoebaProcessor, self).__init__(process_text_fn) - self.languages = TatoebaProcessor.supported_languages - - def get_test_examples(self, data_dir, file_path): - return self._create_examples( - self._read_tsv(os.path.join(data_dir, file_path)), "test") - - @staticmethod - def get_processor_name(): - """See base class.""" - return "TATOEBA" - - def _create_examples(self, lines, set_type): - """Creates examples for the training and dev sets.""" - examples = [] - for (i, line) in enumerate(lines): - guid = "%s-%s" % (set_type, i) - text_a = self.process_text_fn(line[0]) - examples.append( - classifier_data_lib.InputExample( - guid=guid, text_a=text_a, example_id=i)) - return examples - - -def generate_sentence_retrevial_tf_record(processor, - data_dir, - tokenizer, - eval_data_output_path=None, - test_data_output_path=None, - max_seq_length=128): - """Generates the tf records for retrieval tasks. - - Args: - processor: Input processor object to be used for generating data. Subclass - of `DataProcessor`. - data_dir: Directory that contains train/eval data to process. Data files - should be in from. - tokenizer: The tokenizer to be applied on the data. - eval_data_output_path: Output to which processed tf record for evaluation - will be saved. - test_data_output_path: Output to which processed tf record for testing - will be saved. Must be a pattern template with {} if processor has - language specific test data. - max_seq_length: Maximum sequence length of the to be generated - training/eval data. - - Returns: - A dictionary containing input meta data. - """ - assert eval_data_output_path or test_data_output_path - - if processor.get_processor_name() == "BUCC": - path_pattern = "{}-en.{{}}.{}" - - if processor.get_processor_name() == "TATOEBA": - path_pattern = "{}-en.{}" - - meta_data = { - "processor_type": processor.get_processor_name(), - "max_seq_length": max_seq_length, - "number_eval_data": {}, - "number_test_data": {}, - } - logging.info("Start to process %s task data", processor.get_processor_name()) - - for lang_a in processor.languages: - for lang_b in [lang_a, "en"]: - if eval_data_output_path: - eval_input_data_examples = processor.get_dev_examples( - data_dir, os.path.join(path_pattern.format(lang_a, lang_b))) - - num_eval_data = len(eval_input_data_examples) - logging.info("Processing %d dev examples of %s-en.%s", num_eval_data, - lang_a, lang_b) - output_file = os.path.join( - eval_data_output_path, - "{}-en-{}.{}.tfrecords".format(lang_a, lang_b, "dev")) - classifier_data_lib.file_based_convert_examples_to_features( - eval_input_data_examples, None, max_seq_length, tokenizer, - output_file, None) - meta_data["number_eval_data"][f"{lang_a}-en.{lang_b}"] = num_eval_data - - if test_data_output_path: - test_input_data_examples = processor.get_test_examples( - data_dir, os.path.join(path_pattern.format(lang_a, lang_b))) - - num_test_data = len(test_input_data_examples) - logging.info("Processing %d test examples of %s-en.%s", num_test_data, - lang_a, lang_b) - output_file = os.path.join( - test_data_output_path, - "{}-en-{}.{}.tfrecords".format(lang_a, lang_b, "test")) - classifier_data_lib.file_based_convert_examples_to_features( - test_input_data_examples, None, max_seq_length, tokenizer, - output_file, None) - meta_data["number_test_data"][f"{lang_a}-en.{lang_b}"] = num_test_data - - return meta_data diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/data/squad_lib.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/data/squad_lib.py deleted file mode 100644 index 407aeaac4f4df3711b1283b76c79cccb99d27da9..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/data/squad_lib.py +++ /dev/null @@ -1,991 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Library to process data for SQuAD 1.1 and SQuAD 2.0.""" -# pylint: disable=g-bad-import-order -import collections -import copy -import json -import math -import os - -import six - -from absl import logging -import tensorflow as tf - -from official.nlp.bert import tokenization - - -class SquadExample(object): - """A single training/test example for simple sequence classification. - - For examples without an answer, the start and end position are -1. - - Attributes: - qas_id: ID of the question-answer pair. - question_text: Original text for the question. - doc_tokens: The list of tokens in the context obtained by splitting on - whitespace only. - orig_answer_text: Original text for the answer. - start_position: Starting index of the answer in `doc_tokens`. - end_position: Ending index of the answer in `doc_tokens`. - is_impossible: Whether the question is impossible to answer given the - context. Only used in SQuAD 2.0. - """ - - def __init__(self, - qas_id, - question_text, - doc_tokens, - orig_answer_text=None, - start_position=None, - end_position=None, - is_impossible=False): - self.qas_id = qas_id - self.question_text = question_text - self.doc_tokens = doc_tokens - self.orig_answer_text = orig_answer_text - self.start_position = start_position - self.end_position = end_position - self.is_impossible = is_impossible - - def __str__(self): - return self.__repr__() - - def __repr__(self): - s = "" - s += "qas_id: %s" % (tokenization.printable_text(self.qas_id)) - s += ", question_text: %s" % ( - tokenization.printable_text(self.question_text)) - s += ", doc_tokens: [%s]" % (" ".join(self.doc_tokens)) - if self.start_position: - s += ", start_position: %d" % (self.start_position) - if self.start_position: - s += ", end_position: %d" % (self.end_position) - if self.start_position: - s += ", is_impossible: %r" % (self.is_impossible) - return s - - -class InputFeatures(object): - """A single set of features of data.""" - - def __init__(self, - unique_id, - example_index, - doc_span_index, - tokens, - token_to_orig_map, - token_is_max_context, - input_ids, - input_mask, - segment_ids, - paragraph_mask=None, - class_index=None, - start_position=None, - end_position=None, - is_impossible=None): - self.unique_id = unique_id - self.example_index = example_index - self.doc_span_index = doc_span_index - self.tokens = tokens - self.token_to_orig_map = token_to_orig_map - self.token_is_max_context = token_is_max_context - self.input_ids = input_ids - self.input_mask = input_mask - self.segment_ids = segment_ids - self.start_position = start_position - self.end_position = end_position - self.is_impossible = is_impossible - self.paragraph_mask = paragraph_mask - self.class_index = class_index - - -class FeatureWriter(object): - """Writes InputFeature to TF example file.""" - - def __init__(self, filename, is_training): - self.filename = filename - self.is_training = is_training - self.num_features = 0 - tf.io.gfile.makedirs(os.path.dirname(filename)) - self._writer = tf.io.TFRecordWriter(filename) - - def process_feature(self, feature): - """Write a InputFeature to the TFRecordWriter as a tf.train.Example.""" - self.num_features += 1 - - def create_int_feature(values): - feature = tf.train.Feature( - int64_list=tf.train.Int64List(value=list(values))) - return feature - - features = collections.OrderedDict() - features["unique_ids"] = create_int_feature([feature.unique_id]) - features["input_ids"] = create_int_feature(feature.input_ids) - features["input_mask"] = create_int_feature(feature.input_mask) - features["segment_ids"] = create_int_feature(feature.segment_ids) - - if feature.paragraph_mask is not None: - features["paragraph_mask"] = create_int_feature(feature.paragraph_mask) - if feature.class_index is not None: - features["class_index"] = create_int_feature([feature.class_index]) - - if self.is_training: - features["start_positions"] = create_int_feature([feature.start_position]) - features["end_positions"] = create_int_feature([feature.end_position]) - impossible = 0 - if feature.is_impossible: - impossible = 1 - features["is_impossible"] = create_int_feature([impossible]) - - tf_example = tf.train.Example(features=tf.train.Features(feature=features)) - self._writer.write(tf_example.SerializeToString()) - - def close(self): - self._writer.close() - - -def read_squad_examples(input_file, is_training, - version_2_with_negative, - translated_input_folder=None): - """Read a SQuAD json file into a list of SquadExample.""" - with tf.io.gfile.GFile(input_file, "r") as reader: - input_data = json.load(reader)["data"] - - if translated_input_folder is not None: - translated_files = tf.io.gfile.glob( - os.path.join(translated_input_folder, "*.json")) - for file in translated_files: - with tf.io.gfile.GFile(file, "r") as reader: - input_data.extend(json.load(reader)["data"]) - - def is_whitespace(c): - if c == " " or c == "\t" or c == "\r" or c == "\n" or ord(c) == 0x202F: - return True - return False - - examples = [] - for entry in input_data: - for paragraph in entry["paragraphs"]: - paragraph_text = paragraph["context"] - doc_tokens = [] - char_to_word_offset = [] - prev_is_whitespace = True - for c in paragraph_text: - if is_whitespace(c): - prev_is_whitespace = True - else: - if prev_is_whitespace: - doc_tokens.append(c) - else: - doc_tokens[-1] += c - prev_is_whitespace = False - char_to_word_offset.append(len(doc_tokens) - 1) - - for qa in paragraph["qas"]: - qas_id = qa["id"] - question_text = qa["question"] - start_position = None - end_position = None - orig_answer_text = None - is_impossible = False - if is_training: - - if version_2_with_negative: - is_impossible = qa["is_impossible"] - if (len(qa["answers"]) != 1) and (not is_impossible): - raise ValueError( - "For training, each question should have exactly 1 answer.") - if not is_impossible: - answer = qa["answers"][0] - orig_answer_text = answer["text"] - answer_offset = answer["answer_start"] - answer_length = len(orig_answer_text) - start_position = char_to_word_offset[answer_offset] - end_position = char_to_word_offset[answer_offset + answer_length - - 1] - # Only add answers where the text can be exactly recovered from the - # document. If this CAN'T happen it's likely due to weird Unicode - # stuff so we will just skip the example. - # - # Note that this means for training mode, every example is NOT - # guaranteed to be preserved. - actual_text = " ".join(doc_tokens[start_position:(end_position + - 1)]) - cleaned_answer_text = " ".join( - tokenization.whitespace_tokenize(orig_answer_text)) - if actual_text.find(cleaned_answer_text) == -1: - logging.warning("Could not find answer: '%s' vs. '%s'", - actual_text, cleaned_answer_text) - continue - else: - start_position = -1 - end_position = -1 - orig_answer_text = "" - - example = SquadExample( - qas_id=qas_id, - question_text=question_text, - doc_tokens=doc_tokens, - orig_answer_text=orig_answer_text, - start_position=start_position, - end_position=end_position, - is_impossible=is_impossible) - examples.append(example) - - return examples - - -def convert_examples_to_features(examples, - tokenizer, - max_seq_length, - doc_stride, - max_query_length, - is_training, - output_fn, - xlnet_format=False, - batch_size=None): - """Loads a data file into a list of `InputBatch`s.""" - - base_id = 1000000000 - unique_id = base_id - feature = None - for (example_index, example) in enumerate(examples): - query_tokens = tokenizer.tokenize(example.question_text) - - if len(query_tokens) > max_query_length: - query_tokens = query_tokens[0:max_query_length] - - tok_to_orig_index = [] - orig_to_tok_index = [] - all_doc_tokens = [] - for (i, token) in enumerate(example.doc_tokens): - orig_to_tok_index.append(len(all_doc_tokens)) - sub_tokens = tokenizer.tokenize(token) - for sub_token in sub_tokens: - tok_to_orig_index.append(i) - all_doc_tokens.append(sub_token) - - tok_start_position = None - tok_end_position = None - if is_training and example.is_impossible: - tok_start_position = -1 - tok_end_position = -1 - if is_training and not example.is_impossible: - tok_start_position = orig_to_tok_index[example.start_position] - if example.end_position < len(example.doc_tokens) - 1: - tok_end_position = orig_to_tok_index[example.end_position + 1] - 1 - else: - tok_end_position = len(all_doc_tokens) - 1 - (tok_start_position, tok_end_position) = _improve_answer_span( - all_doc_tokens, tok_start_position, tok_end_position, tokenizer, - example.orig_answer_text) - - # The -3 accounts for [CLS], [SEP] and [SEP] - max_tokens_for_doc = max_seq_length - len(query_tokens) - 3 - - # We can have documents that are longer than the maximum sequence length. - # To deal with this we do a sliding window approach, where we take chunks - # of the up to our max length with a stride of `doc_stride`. - _DocSpan = collections.namedtuple( # pylint: disable=invalid-name - "DocSpan", ["start", "length"]) - doc_spans = [] - start_offset = 0 - while start_offset < len(all_doc_tokens): - length = len(all_doc_tokens) - start_offset - if length > max_tokens_for_doc: - length = max_tokens_for_doc - doc_spans.append(_DocSpan(start=start_offset, length=length)) - if start_offset + length == len(all_doc_tokens): - break - start_offset += min(length, doc_stride) - - for (doc_span_index, doc_span) in enumerate(doc_spans): - tokens = [] - token_to_orig_map = {} - token_is_max_context = {} - segment_ids = [] - - # Paragraph mask used in XLNet. - # 1 represents paragraph and class tokens. - # 0 represents query and other special tokens. - paragraph_mask = [] - - # pylint: disable=cell-var-from-loop - def process_query(seg_q): - for token in query_tokens: - tokens.append(token) - segment_ids.append(seg_q) - paragraph_mask.append(0) - tokens.append("[SEP]") - segment_ids.append(seg_q) - paragraph_mask.append(0) - - def process_paragraph(seg_p): - for i in range(doc_span.length): - split_token_index = doc_span.start + i - token_to_orig_map[len(tokens)] = tok_to_orig_index[split_token_index] - - is_max_context = _check_is_max_context(doc_spans, doc_span_index, - split_token_index) - token_is_max_context[len(tokens)] = is_max_context - tokens.append(all_doc_tokens[split_token_index]) - segment_ids.append(seg_p) - paragraph_mask.append(1) - tokens.append("[SEP]") - segment_ids.append(seg_p) - paragraph_mask.append(0) - - def process_class(seg_class): - class_index = len(segment_ids) - tokens.append("[CLS]") - segment_ids.append(seg_class) - paragraph_mask.append(1) - return class_index - - if xlnet_format: - seg_p, seg_q, seg_class, seg_pad = 0, 1, 2, 3 - process_paragraph(seg_p) - process_query(seg_q) - class_index = process_class(seg_class) - else: - seg_p, seg_q, seg_class, seg_pad = 1, 0, 0, 0 - class_index = process_class(seg_class) - process_query(seg_q) - process_paragraph(seg_p) - - input_ids = tokenizer.convert_tokens_to_ids(tokens) - - # The mask has 1 for real tokens and 0 for padding tokens. Only real - # tokens are attended to. - input_mask = [1] * len(input_ids) - - # Zero-pad up to the sequence length. - while len(input_ids) < max_seq_length: - input_ids.append(0) - input_mask.append(0) - segment_ids.append(seg_pad) - paragraph_mask.append(0) - - assert len(input_ids) == max_seq_length - assert len(input_mask) == max_seq_length - assert len(segment_ids) == max_seq_length - assert len(paragraph_mask) == max_seq_length - - start_position = 0 - end_position = 0 - span_contains_answer = False - - if is_training and not example.is_impossible: - # For training, if our document chunk does not contain an annotation - # we throw it out, since there is nothing to predict. - doc_start = doc_span.start - doc_end = doc_span.start + doc_span.length - 1 - span_contains_answer = (tok_start_position >= doc_start and - tok_end_position <= doc_end) - if span_contains_answer: - doc_offset = 0 if xlnet_format else len(query_tokens) + 2 - start_position = tok_start_position - doc_start + doc_offset - end_position = tok_end_position - doc_start + doc_offset - - if example_index < 20: - logging.info("*** Example ***") - logging.info("unique_id: %s", (unique_id)) - logging.info("example_index: %s", (example_index)) - logging.info("doc_span_index: %s", (doc_span_index)) - logging.info("tokens: %s", - " ".join([tokenization.printable_text(x) for x in tokens])) - logging.info( - "token_to_orig_map: %s", " ".join([ - "%d:%d" % (x, y) for (x, y) in six.iteritems(token_to_orig_map) - ])) - logging.info( - "token_is_max_context: %s", " ".join([ - "%d:%s" % (x, y) - for (x, y) in six.iteritems(token_is_max_context) - ])) - logging.info("input_ids: %s", " ".join([str(x) for x in input_ids])) - logging.info("input_mask: %s", " ".join([str(x) for x in input_mask])) - logging.info("segment_ids: %s", " ".join([str(x) for x in segment_ids])) - logging.info("paragraph_mask: %s", " ".join( - [str(x) for x in paragraph_mask])) - logging.info("class_index: %d", class_index) - if is_training: - if span_contains_answer: - answer_text = " ".join(tokens[start_position:(end_position + 1)]) - logging.info("start_position: %d", (start_position)) - logging.info("end_position: %d", (end_position)) - logging.info("answer: %s", tokenization.printable_text(answer_text)) - else: - logging.info("document span doesn't contain answer") - - feature = InputFeatures( - unique_id=unique_id, - example_index=example_index, - doc_span_index=doc_span_index, - tokens=tokens, - paragraph_mask=paragraph_mask, - class_index=class_index, - token_to_orig_map=token_to_orig_map, - token_is_max_context=token_is_max_context, - input_ids=input_ids, - input_mask=input_mask, - segment_ids=segment_ids, - start_position=start_position, - end_position=end_position, - is_impossible=not span_contains_answer) - - # Run callback - if is_training: - output_fn(feature) - else: - output_fn(feature, is_padding=False) - - unique_id += 1 - - if not is_training and feature: - assert batch_size - num_padding = 0 - num_examples = unique_id - base_id - if unique_id % batch_size != 0: - num_padding = batch_size - (num_examples % batch_size) - logging.info("Adding padding examples to make sure no partial batch.") - logging.info("Adds %d padding examples for inference.", num_padding) - dummy_feature = copy.deepcopy(feature) - for _ in range(num_padding): - dummy_feature.unique_id = unique_id - - # Run callback - output_fn(feature, is_padding=True) - unique_id += 1 - return unique_id - base_id - - -def _improve_answer_span(doc_tokens, input_start, input_end, tokenizer, - orig_answer_text): - """Returns tokenized answer spans that better match the annotated answer.""" - - # The SQuAD annotations are character based. We first project them to - # whitespace-tokenized words. But then after WordPiece tokenization, we can - # often find a "better match". For example: - # - # Question: What year was John Smith born? - # Context: The leader was John Smith (1895-1943). - # Answer: 1895 - # - # The original whitespace-tokenized answer will be "(1895-1943).". However - # after tokenization, our tokens will be "( 1895 - 1943 ) .". So we can match - # the exact answer, 1895. - # - # However, this is not always possible. Consider the following: - # - # Question: What country is the top exporter of electornics? - # Context: The Japanese electronics industry is the lagest in the world. - # Answer: Japan - # - # In this case, the annotator chose "Japan" as a character sub-span of - # the word "Japanese". Since our WordPiece tokenizer does not split - # "Japanese", we just use "Japanese" as the annotation. This is fairly rare - # in SQuAD, but does happen. - tok_answer_text = " ".join(tokenizer.tokenize(orig_answer_text)) - - for new_start in range(input_start, input_end + 1): - for new_end in range(input_end, new_start - 1, -1): - text_span = " ".join(doc_tokens[new_start:(new_end + 1)]) - if text_span == tok_answer_text: - return (new_start, new_end) - - return (input_start, input_end) - - -def _check_is_max_context(doc_spans, cur_span_index, position): - """Check if this is the 'max context' doc span for the token.""" - - # Because of the sliding window approach taken to scoring documents, a single - # token can appear in multiple documents. E.g. - # Doc: the man went to the store and bought a gallon of milk - # Span A: the man went to the - # Span B: to the store and bought - # Span C: and bought a gallon of - # ... - # - # Now the word 'bought' will have two scores from spans B and C. We only - # want to consider the score with "maximum context", which we define as - # the *minimum* of its left and right context (the *sum* of left and - # right context will always be the same, of course). - # - # In the example the maximum context for 'bought' would be span C since - # it has 1 left context and 3 right context, while span B has 4 left context - # and 0 right context. - best_score = None - best_span_index = None - for (span_index, doc_span) in enumerate(doc_spans): - end = doc_span.start + doc_span.length - 1 - if position < doc_span.start: - continue - if position > end: - continue - num_left_context = position - doc_span.start - num_right_context = end - position - score = min(num_left_context, num_right_context) + 0.01 * doc_span.length - if best_score is None or score > best_score: - best_score = score - best_span_index = span_index - - return cur_span_index == best_span_index - - -def write_predictions(all_examples, - all_features, - all_results, - n_best_size, - max_answer_length, - do_lower_case, - output_prediction_file, - output_nbest_file, - output_null_log_odds_file, - version_2_with_negative=False, - null_score_diff_threshold=0.0, - verbose=False): - """Write final predictions to the json file and log-odds of null if needed.""" - logging.info("Writing predictions to: %s", (output_prediction_file)) - logging.info("Writing nbest to: %s", (output_nbest_file)) - - all_predictions, all_nbest_json, scores_diff_json = ( - postprocess_output( - all_examples=all_examples, - all_features=all_features, - all_results=all_results, - n_best_size=n_best_size, - max_answer_length=max_answer_length, - do_lower_case=do_lower_case, - version_2_with_negative=version_2_with_negative, - null_score_diff_threshold=null_score_diff_threshold, - verbose=verbose)) - - write_to_json_files(all_predictions, output_prediction_file) - write_to_json_files(all_nbest_json, output_nbest_file) - if version_2_with_negative: - write_to_json_files(scores_diff_json, output_null_log_odds_file) - - -def postprocess_output(all_examples, - all_features, - all_results, - n_best_size, - max_answer_length, - do_lower_case, - version_2_with_negative=False, - null_score_diff_threshold=0.0, - xlnet_format=False, - verbose=False): - """Postprocess model output, to form predicton results.""" - - example_index_to_features = collections.defaultdict(list) - for feature in all_features: - example_index_to_features[feature.example_index].append(feature) - unique_id_to_result = {} - for result in all_results: - unique_id_to_result[result.unique_id] = result - - _PrelimPrediction = collections.namedtuple( # pylint: disable=invalid-name - "PrelimPrediction", - ["feature_index", "start_index", "end_index", "start_logit", "end_logit"]) - - all_predictions = collections.OrderedDict() - all_nbest_json = collections.OrderedDict() - scores_diff_json = collections.OrderedDict() - - for (example_index, example) in enumerate(all_examples): - features = example_index_to_features[example_index] - - prelim_predictions = [] - # keep track of the minimum score of null start+end of position 0 - score_null = 1000000 # large and positive - min_null_feature_index = 0 # the paragraph slice with min mull score - null_start_logit = 0 # the start logit at the slice with min null score - null_end_logit = 0 # the end logit at the slice with min null score - for (feature_index, feature) in enumerate(features): - if feature.unique_id not in unique_id_to_result: - logging.info("Skip eval example %s, not in pred.", feature.unique_id) - continue - result = unique_id_to_result[feature.unique_id] - - # if we could have irrelevant answers, get the min score of irrelevant - if version_2_with_negative: - if xlnet_format: - feature_null_score = result.class_logits - else: - feature_null_score = result.start_logits[0] + result.end_logits[0] - if feature_null_score < score_null: - score_null = feature_null_score - min_null_feature_index = feature_index - null_start_logit = result.start_logits[0] - null_end_logit = result.end_logits[0] - for (start_index, start_logit, - end_index, end_logit) in _get_best_indexes_and_logits( - result=result, - n_best_size=n_best_size, - xlnet_format=xlnet_format): - # We could hypothetically create invalid predictions, e.g., predict - # that the start of the span is in the question. We throw out all - # invalid predictions. - if start_index >= len(feature.tokens): - continue - if end_index >= len(feature.tokens): - continue - if start_index not in feature.token_to_orig_map: - continue - if end_index not in feature.token_to_orig_map: - continue - if not feature.token_is_max_context.get(start_index, False): - continue - if end_index < start_index: - continue - length = end_index - start_index + 1 - if length > max_answer_length: - continue - prelim_predictions.append( - _PrelimPrediction( - feature_index=feature_index, - start_index=start_index, - end_index=end_index, - start_logit=start_logit, - end_logit=end_logit)) - - if version_2_with_negative and not xlnet_format: - prelim_predictions.append( - _PrelimPrediction( - feature_index=min_null_feature_index, - start_index=0, - end_index=0, - start_logit=null_start_logit, - end_logit=null_end_logit)) - prelim_predictions = sorted( - prelim_predictions, - key=lambda x: (x.start_logit + x.end_logit), - reverse=True) - - _NbestPrediction = collections.namedtuple( # pylint: disable=invalid-name - "NbestPrediction", ["text", "start_logit", "end_logit"]) - - seen_predictions = {} - nbest = [] - for pred in prelim_predictions: - if len(nbest) >= n_best_size: - break - feature = features[pred.feature_index] - if pred.start_index > 0 or xlnet_format: # this is a non-null prediction - tok_tokens = feature.tokens[pred.start_index:(pred.end_index + 1)] - orig_doc_start = feature.token_to_orig_map[pred.start_index] - orig_doc_end = feature.token_to_orig_map[pred.end_index] - orig_tokens = example.doc_tokens[orig_doc_start:(orig_doc_end + 1)] - tok_text = " ".join(tok_tokens) - - # De-tokenize WordPieces that have been split off. - tok_text = tok_text.replace(" ##", "") - tok_text = tok_text.replace("##", "") - - # Clean whitespace - tok_text = tok_text.strip() - tok_text = " ".join(tok_text.split()) - orig_text = " ".join(orig_tokens) - - final_text = get_final_text( - tok_text, orig_text, do_lower_case, verbose=verbose) - if final_text in seen_predictions: - continue - - seen_predictions[final_text] = True - else: - final_text = "" - seen_predictions[final_text] = True - - nbest.append( - _NbestPrediction( - text=final_text, - start_logit=pred.start_logit, - end_logit=pred.end_logit)) - - # if we didn't inlude the empty option in the n-best, inlcude it - if version_2_with_negative and not xlnet_format: - if "" not in seen_predictions: - nbest.append( - _NbestPrediction( - text="", start_logit=null_start_logit, - end_logit=null_end_logit)) - # In very rare edge cases we could have no valid predictions. So we - # just create a nonce prediction in this case to avoid failure. - if not nbest: - nbest.append( - _NbestPrediction(text="empty", start_logit=0.0, end_logit=0.0)) - - assert len(nbest) >= 1 - - total_scores = [] - best_non_null_entry = None - for entry in nbest: - total_scores.append(entry.start_logit + entry.end_logit) - if not best_non_null_entry: - if entry.text: - best_non_null_entry = entry - - probs = _compute_softmax(total_scores) - - nbest_json = [] - for (i, entry) in enumerate(nbest): - output = collections.OrderedDict() - output["text"] = entry.text - output["probability"] = probs[i] - output["start_logit"] = entry.start_logit - output["end_logit"] = entry.end_logit - nbest_json.append(output) - - assert len(nbest_json) >= 1 - - if not version_2_with_negative: - all_predictions[example.qas_id] = nbest_json[0]["text"] - else: - # pytype: disable=attribute-error - # predict "" iff the null score - the score of best non-null > threshold - if best_non_null_entry is not None: - if xlnet_format: - score_diff = score_null - scores_diff_json[example.qas_id] = score_diff - all_predictions[example.qas_id] = best_non_null_entry.text - else: - score_diff = score_null - best_non_null_entry.start_logit - ( - best_non_null_entry.end_logit) - scores_diff_json[example.qas_id] = score_diff - if score_diff > null_score_diff_threshold: - all_predictions[example.qas_id] = "" - else: - all_predictions[example.qas_id] = best_non_null_entry.text - else: - logging.warning("best_non_null_entry is None") - scores_diff_json[example.qas_id] = score_null - all_predictions[example.qas_id] = "" - # pytype: enable=attribute-error - - all_nbest_json[example.qas_id] = nbest_json - - return all_predictions, all_nbest_json, scores_diff_json - - -def write_to_json_files(json_records, json_file): - with tf.io.gfile.GFile(json_file, "w") as writer: - writer.write(json.dumps(json_records, indent=4) + "\n") - - -def get_final_text(pred_text, orig_text, do_lower_case, verbose=False): - """Project the tokenized prediction back to the original text.""" - - # When we created the data, we kept track of the alignment between original - # (whitespace tokenized) tokens and our WordPiece tokenized tokens. So - # now `orig_text` contains the span of our original text corresponding to the - # span that we predicted. - # - # However, `orig_text` may contain extra characters that we don't want in - # our prediction. - # - # For example, let's say: - # pred_text = steve smith - # orig_text = Steve Smith's - # - # We don't want to return `orig_text` because it contains the extra "'s". - # - # We don't want to return `pred_text` because it's already been normalized - # (the SQuAD eval script also does punctuation stripping/lower casing but - # our tokenizer does additional normalization like stripping accent - # characters). - # - # What we really want to return is "Steve Smith". - # - # Therefore, we have to apply a semi-complicated alignment heruistic between - # `pred_text` and `orig_text` to get a character-to-charcter alignment. This - # can fail in certain cases in which case we just return `orig_text`. - - def _strip_spaces(text): - ns_chars = [] - ns_to_s_map = collections.OrderedDict() - for (i, c) in enumerate(text): - if c == " ": - continue - ns_to_s_map[len(ns_chars)] = i - ns_chars.append(c) - ns_text = "".join(ns_chars) - return (ns_text, ns_to_s_map) - - # We first tokenize `orig_text`, strip whitespace from the result - # and `pred_text`, and check if they are the same length. If they are - # NOT the same length, the heuristic has failed. If they are the same - # length, we assume the characters are one-to-one aligned. - tokenizer = tokenization.BasicTokenizer(do_lower_case=do_lower_case) - - tok_text = " ".join(tokenizer.tokenize(orig_text)) - - start_position = tok_text.find(pred_text) - if start_position == -1: - if verbose: - logging.info("Unable to find text: '%s' in '%s'", pred_text, orig_text) - return orig_text - end_position = start_position + len(pred_text) - 1 - - (orig_ns_text, orig_ns_to_s_map) = _strip_spaces(orig_text) - (tok_ns_text, tok_ns_to_s_map) = _strip_spaces(tok_text) - - if len(orig_ns_text) != len(tok_ns_text): - if verbose: - logging.info("Length not equal after stripping spaces: '%s' vs '%s'", - orig_ns_text, tok_ns_text) - return orig_text - - # We then project the characters in `pred_text` back to `orig_text` using - # the character-to-character alignment. - tok_s_to_ns_map = {} - for (i, tok_index) in six.iteritems(tok_ns_to_s_map): - tok_s_to_ns_map[tok_index] = i - - orig_start_position = None - if start_position in tok_s_to_ns_map: - ns_start_position = tok_s_to_ns_map[start_position] - if ns_start_position in orig_ns_to_s_map: - orig_start_position = orig_ns_to_s_map[ns_start_position] - - if orig_start_position is None: - if verbose: - logging.info("Couldn't map start position") - return orig_text - - orig_end_position = None - if end_position in tok_s_to_ns_map: - ns_end_position = tok_s_to_ns_map[end_position] - if ns_end_position in orig_ns_to_s_map: - orig_end_position = orig_ns_to_s_map[ns_end_position] - - if orig_end_position is None: - if verbose: - logging.info("Couldn't map end position") - return orig_text - - output_text = orig_text[orig_start_position:(orig_end_position + 1)] - return output_text - - -def _get_best_indexes_and_logits(result, - n_best_size, - xlnet_format=False): - """Generates the n-best indexes and logits from a list.""" - if xlnet_format: - for i in range(n_best_size): - for j in range(n_best_size): - j_index = i * n_best_size + j - yield (result.start_indexes[i], result.start_logits[i], - result.end_indexes[j_index], result.end_logits[j_index]) - else: - start_index_and_score = sorted(enumerate(result.start_logits), - key=lambda x: x[1], reverse=True) - end_index_and_score = sorted(enumerate(result.end_logits), - key=lambda x: x[1], reverse=True) - for i in range(len(start_index_and_score)): - if i >= n_best_size: - break - for j in range(len(end_index_and_score)): - if j >= n_best_size: - break - yield (start_index_and_score[i][0], start_index_and_score[i][1], - end_index_and_score[j][0], end_index_and_score[j][1]) - - -def _compute_softmax(scores): - """Compute softmax probability over raw logits.""" - if not scores: - return [] - - max_score = None - for score in scores: - if max_score is None or score > max_score: - max_score = score - - exp_scores = [] - total_sum = 0.0 - for score in scores: - x = math.exp(score - max_score) - exp_scores.append(x) - total_sum += x - - probs = [] - for score in exp_scores: - probs.append(score / total_sum) - return probs - - -def generate_tf_record_from_json_file(input_file_path, - vocab_file_path, - output_path, - translated_input_folder=None, - max_seq_length=384, - do_lower_case=True, - max_query_length=64, - doc_stride=128, - version_2_with_negative=False, - xlnet_format=False): - """Generates and saves training data into a tf record file.""" - train_examples = read_squad_examples( - input_file=input_file_path, - is_training=True, - version_2_with_negative=version_2_with_negative, - translated_input_folder=translated_input_folder) - tokenizer = tokenization.FullTokenizer( - vocab_file=vocab_file_path, do_lower_case=do_lower_case) - train_writer = FeatureWriter(filename=output_path, is_training=True) - number_of_examples = convert_examples_to_features( - examples=train_examples, - tokenizer=tokenizer, - max_seq_length=max_seq_length, - doc_stride=doc_stride, - max_query_length=max_query_length, - is_training=True, - output_fn=train_writer.process_feature, - xlnet_format=xlnet_format) - train_writer.close() - - meta_data = { - "task_type": "bert_squad", - "train_data_size": number_of_examples, - "max_seq_length": max_seq_length, - "max_query_length": max_query_length, - "doc_stride": doc_stride, - "version_2_with_negative": version_2_with_negative, - } - - return meta_data diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/data/squad_lib_sp.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/data/squad_lib_sp.py deleted file mode 100644 index b999bbdfa3a5bd811fe7d8ebbd73d8153805f4c5..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/data/squad_lib_sp.py +++ /dev/null @@ -1,992 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Run ALBERT on SQuAD 1.1 and SQuAD 2.0 using sentence piece tokenization. - -The file is forked from: - -https://github.com/google-research/ALBERT/blob/master/run_squad_sp.py -""" -import collections -import copy -import json -import math -import os - -from absl import logging -import numpy as np -import tensorflow as tf - -from official.nlp.bert import tokenization - - -class SquadExample(object): - """A single training/test example for simple sequence classification. - - For examples without an answer, the start and end position are -1. - """ - - def __init__(self, - qas_id, - question_text, - paragraph_text, - orig_answer_text=None, - start_position=None, - end_position=None, - is_impossible=False): - self.qas_id = qas_id - self.question_text = question_text - self.paragraph_text = paragraph_text - self.orig_answer_text = orig_answer_text - self.start_position = start_position - self.end_position = end_position - self.is_impossible = is_impossible - - def __str__(self): - return self.__repr__() - - def __repr__(self): - s = "" - s += "qas_id: %s" % (tokenization.printable_text(self.qas_id)) - s += ", question_text: %s" % ( - tokenization.printable_text(self.question_text)) - s += ", paragraph_text: [%s]" % (" ".join(self.paragraph_text)) - if self.start_position: - s += ", start_position: %d" % (self.start_position) - if self.start_position: - s += ", end_position: %d" % (self.end_position) - if self.start_position: - s += ", is_impossible: %r" % (self.is_impossible) - return s - - -class InputFeatures(object): - """A single set of features of data.""" - - def __init__(self, - unique_id, - example_index, - doc_span_index, - tok_start_to_orig_index, - tok_end_to_orig_index, - token_is_max_context, - tokens, - input_ids, - input_mask, - segment_ids, - paragraph_len, - class_index=None, - paragraph_mask=None, - start_position=None, - end_position=None, - is_impossible=None): - self.unique_id = unique_id - self.example_index = example_index - self.doc_span_index = doc_span_index - self.tok_start_to_orig_index = tok_start_to_orig_index - self.tok_end_to_orig_index = tok_end_to_orig_index - self.token_is_max_context = token_is_max_context - self.tokens = tokens - self.input_ids = input_ids - self.input_mask = input_mask - self.paragraph_mask = paragraph_mask - self.segment_ids = segment_ids - self.paragraph_len = paragraph_len - self.class_index = class_index - self.start_position = start_position - self.end_position = end_position - self.is_impossible = is_impossible - - -def read_squad_examples(input_file, - is_training, - version_2_with_negative, - translated_input_folder=None): - """Read a SQuAD json file into a list of SquadExample.""" - del version_2_with_negative - with tf.io.gfile.GFile(input_file, "r") as reader: - input_data = json.load(reader)["data"] - - if translated_input_folder is not None: - translated_files = tf.io.gfile.glob( - os.path.join(translated_input_folder, "*.json")) - for file in translated_files: - with tf.io.gfile.GFile(file, "r") as reader: - input_data.extend(json.load(reader)["data"]) - - examples = [] - for entry in input_data: - for paragraph in entry["paragraphs"]: - paragraph_text = paragraph["context"] - - for qa in paragraph["qas"]: - qas_id = qa["id"] - question_text = qa["question"] - start_position = None - orig_answer_text = None - is_impossible = False - - if is_training: - is_impossible = qa.get("is_impossible", False) - if (len(qa["answers"]) != 1) and (not is_impossible): - raise ValueError( - "For training, each question should have exactly 1 answer.") - if not is_impossible: - answer = qa["answers"][0] - orig_answer_text = answer["text"] - start_position = answer["answer_start"] - else: - start_position = -1 - orig_answer_text = "" - - example = SquadExample( - qas_id=qas_id, - question_text=question_text, - paragraph_text=paragraph_text, - orig_answer_text=orig_answer_text, - start_position=start_position, - is_impossible=is_impossible) - examples.append(example) - - return examples - - -def _convert_index(index, pos, m=None, is_start=True): - """Converts index.""" - if index[pos] is not None: - return index[pos] - n = len(index) - rear = pos - while rear < n - 1 and index[rear] is None: - rear += 1 - front = pos - while front > 0 and index[front] is None: - front -= 1 - assert index[front] is not None or index[rear] is not None - if index[front] is None: - if index[rear] >= 1: - if is_start: - return 0 - else: - return index[rear] - 1 - return index[rear] - if index[rear] is None: - if m is not None and index[front] < m - 1: - if is_start: - return index[front] + 1 - else: - return m - 1 - return index[front] - if is_start: - if index[rear] > index[front] + 1: - return index[front] + 1 - else: - return index[rear] - else: - if index[rear] > index[front] + 1: - return index[rear] - 1 - else: - return index[front] - - -def convert_examples_to_features(examples, - tokenizer, - max_seq_length, - doc_stride, - max_query_length, - is_training, - output_fn, - do_lower_case, - xlnet_format=False, - batch_size=None): - """Loads a data file into a list of `InputBatch`s.""" - cnt_pos, cnt_neg = 0, 0 - base_id = 1000000000 - unique_id = base_id - max_n, max_m = 1024, 1024 - f = np.zeros((max_n, max_m), dtype=np.float32) - - for (example_index, example) in enumerate(examples): - - if example_index % 100 == 0: - logging.info("Converting %d/%d pos %d neg %d", example_index, - len(examples), cnt_pos, cnt_neg) - - query_tokens = tokenization.encode_ids( - tokenizer.sp_model, - tokenization.preprocess_text( - example.question_text, lower=do_lower_case)) - - if len(query_tokens) > max_query_length: - query_tokens = query_tokens[0:max_query_length] - - paragraph_text = example.paragraph_text - para_tokens = tokenization.encode_pieces( - tokenizer.sp_model, - tokenization.preprocess_text( - example.paragraph_text, lower=do_lower_case)) - - chartok_to_tok_index = [] - tok_start_to_chartok_index = [] - tok_end_to_chartok_index = [] - char_cnt = 0 - for i, token in enumerate(para_tokens): - new_token = token.replace(tokenization.SPIECE_UNDERLINE, " ") - chartok_to_tok_index.extend([i] * len(new_token)) - tok_start_to_chartok_index.append(char_cnt) - char_cnt += len(new_token) - tok_end_to_chartok_index.append(char_cnt - 1) - - tok_cat_text = "".join(para_tokens).replace(tokenization.SPIECE_UNDERLINE, - " ") - n, m = len(paragraph_text), len(tok_cat_text) - - if n > max_n or m > max_m: - max_n = max(n, max_n) - max_m = max(m, max_m) - f = np.zeros((max_n, max_m), dtype=np.float32) - - g = {} - - # pylint: disable=cell-var-from-loop - def _lcs_match(max_dist, n=n, m=m): - """Longest-common-substring algorithm.""" - f.fill(0) - g.clear() - - ### longest common sub sequence - # f[i, j] = max(f[i - 1, j], f[i, j - 1], f[i - 1, j - 1] + match(i, j)) - for i in range(n): - - # unlike standard LCS, this is specifically optimized for the setting - # because the mismatch between sentence pieces and original text will - # be small - for j in range(i - max_dist, i + max_dist): - if j >= m or j < 0: - continue - - if i > 0: - g[(i, j)] = 0 - f[i, j] = f[i - 1, j] - - if j > 0 and f[i, j - 1] > f[i, j]: - g[(i, j)] = 1 - f[i, j] = f[i, j - 1] - - f_prev = f[i - 1, j - 1] if i > 0 and j > 0 else 0 - if (tokenization.preprocess_text( - paragraph_text[i], lower=do_lower_case, - remove_space=False) == tok_cat_text[j] and f_prev + 1 > f[i, j]): - g[(i, j)] = 2 - f[i, j] = f_prev + 1 - - # pylint: enable=cell-var-from-loop - - max_dist = abs(n - m) + 5 - for _ in range(2): - _lcs_match(max_dist) - if f[n - 1, m - 1] > 0.8 * n: - break - max_dist *= 2 - - orig_to_chartok_index = [None] * n - chartok_to_orig_index = [None] * m - i, j = n - 1, m - 1 - while i >= 0 and j >= 0: - if (i, j) not in g: - break - if g[(i, j)] == 2: - orig_to_chartok_index[i] = j - chartok_to_orig_index[j] = i - i, j = i - 1, j - 1 - elif g[(i, j)] == 1: - j = j - 1 - else: - i = i - 1 - - if (all(v is None for v in orig_to_chartok_index) or - f[n - 1, m - 1] < 0.8 * n): - logging.info("MISMATCH DETECTED!") - continue - - tok_start_to_orig_index = [] - tok_end_to_orig_index = [] - for i in range(len(para_tokens)): - start_chartok_pos = tok_start_to_chartok_index[i] - end_chartok_pos = tok_end_to_chartok_index[i] - start_orig_pos = _convert_index( - chartok_to_orig_index, start_chartok_pos, n, is_start=True) - end_orig_pos = _convert_index( - chartok_to_orig_index, end_chartok_pos, n, is_start=False) - - tok_start_to_orig_index.append(start_orig_pos) - tok_end_to_orig_index.append(end_orig_pos) - - if not is_training: - tok_start_position = tok_end_position = None - - if is_training and example.is_impossible: - tok_start_position = 0 - tok_end_position = 0 - - if is_training and not example.is_impossible: - start_position = example.start_position - end_position = start_position + len(example.orig_answer_text) - 1 - - start_chartok_pos = _convert_index( - orig_to_chartok_index, start_position, is_start=True) - tok_start_position = chartok_to_tok_index[start_chartok_pos] - - end_chartok_pos = _convert_index( - orig_to_chartok_index, end_position, is_start=False) - tok_end_position = chartok_to_tok_index[end_chartok_pos] - assert tok_start_position <= tok_end_position - - def _piece_to_id(x): - return tokenizer.sp_model.PieceToId(x) - - all_doc_tokens = list(map(_piece_to_id, para_tokens)) - - # The -3 accounts for [CLS], [SEP] and [SEP] - max_tokens_for_doc = max_seq_length - len(query_tokens) - 3 - - # We can have documents that are longer than the maximum sequence length. - # To deal with this we do a sliding window approach, where we take chunks - # of the up to our max length with a stride of `doc_stride`. - _DocSpan = collections.namedtuple( # pylint: disable=invalid-name - "DocSpan", ["start", "length"]) - doc_spans = [] - start_offset = 0 - - while start_offset < len(all_doc_tokens): - length = len(all_doc_tokens) - start_offset - if length > max_tokens_for_doc: - length = max_tokens_for_doc - doc_spans.append(_DocSpan(start=start_offset, length=length)) - if start_offset + length == len(all_doc_tokens): - break - start_offset += min(length, doc_stride) - - for (doc_span_index, doc_span) in enumerate(doc_spans): - tokens = [] - token_is_max_context = {} - segment_ids = [] - - # Paragraph mask used in XLNet. - # 1 represents paragraph and class tokens. - # 0 represents query and other special tokens. - paragraph_mask = [] - - cur_tok_start_to_orig_index = [] - cur_tok_end_to_orig_index = [] - - # pylint: disable=cell-var-from-loop - def process_query(seg_q): - for token in query_tokens: - tokens.append(token) - segment_ids.append(seg_q) - paragraph_mask.append(0) - tokens.append(tokenizer.sp_model.PieceToId("[SEP]")) - segment_ids.append(seg_q) - paragraph_mask.append(0) - - def process_paragraph(seg_p): - for i in range(doc_span.length): - split_token_index = doc_span.start + i - - cur_tok_start_to_orig_index.append( - tok_start_to_orig_index[split_token_index]) - cur_tok_end_to_orig_index.append( - tok_end_to_orig_index[split_token_index]) - - is_max_context = _check_is_max_context(doc_spans, doc_span_index, - split_token_index) - token_is_max_context[len(tokens)] = is_max_context - tokens.append(all_doc_tokens[split_token_index]) - segment_ids.append(seg_p) - paragraph_mask.append(1) - tokens.append(tokenizer.sp_model.PieceToId("[SEP]")) - segment_ids.append(seg_p) - paragraph_mask.append(0) - return len(tokens) - - def process_class(seg_class): - class_index = len(segment_ids) - tokens.append(tokenizer.sp_model.PieceToId("[CLS]")) - segment_ids.append(seg_class) - paragraph_mask.append(1) - return class_index - - if xlnet_format: - seg_p, seg_q, seg_class, seg_pad = 0, 1, 2, 3 - paragraph_len = process_paragraph(seg_p) - process_query(seg_q) - class_index = process_class(seg_class) - else: - seg_p, seg_q, seg_class, seg_pad = 1, 0, 0, 0 - class_index = process_class(seg_class) - process_query(seg_q) - paragraph_len = process_paragraph(seg_p) - - input_ids = tokens - - # The mask has 1 for real tokens and 0 for padding tokens. Only real - # tokens are attended to. - input_mask = [1] * len(input_ids) - - # Zero-pad up to the sequence length. - while len(input_ids) < max_seq_length: - input_ids.append(0) - input_mask.append(0) - segment_ids.append(seg_pad) - paragraph_mask.append(0) - - assert len(input_ids) == max_seq_length - assert len(input_mask) == max_seq_length - assert len(segment_ids) == max_seq_length - assert len(paragraph_mask) == max_seq_length - - span_is_impossible = example.is_impossible - start_position = None - end_position = None - if is_training and not span_is_impossible: - # For training, if our document chunk does not contain an annotation - # we throw it out, since there is nothing to predict. - doc_start = doc_span.start - doc_end = doc_span.start + doc_span.length - 1 - out_of_span = False - if not (tok_start_position >= doc_start and - tok_end_position <= doc_end): - out_of_span = True - if out_of_span: - # continue - start_position = 0 - end_position = 0 - span_is_impossible = True - else: - doc_offset = 0 if xlnet_format else len(query_tokens) + 2 - start_position = tok_start_position - doc_start + doc_offset - end_position = tok_end_position - doc_start + doc_offset - - if is_training and span_is_impossible: - start_position = class_index - end_position = class_index - - if example_index < 20: - logging.info("*** Example ***") - logging.info("unique_id: %s", (unique_id)) - logging.info("example_index: %s", (example_index)) - logging.info("doc_span_index: %s", (doc_span_index)) - logging.info("tok_start_to_orig_index: %s", - " ".join([str(x) for x in cur_tok_start_to_orig_index])) - logging.info("tok_end_to_orig_index: %s", - " ".join([str(x) for x in cur_tok_end_to_orig_index])) - logging.info( - "token_is_max_context: %s", " ".join( - ["%d:%s" % (x, y) for (x, y) in token_is_max_context.items()])) - logging.info( - "input_pieces: %s", - " ".join([tokenizer.sp_model.IdToPiece(x) for x in tokens])) - logging.info("input_ids: %s", " ".join([str(x) for x in input_ids])) - logging.info("input_mask: %s", " ".join([str(x) for x in input_mask])) - logging.info("segment_ids: %s", " ".join([str(x) for x in segment_ids])) - logging.info("paragraph_mask: %s", " ".join( - [str(x) for x in paragraph_mask])) - logging.info("class_index: %d", class_index) - - if is_training and span_is_impossible: - logging.info("impossible example span") - - if is_training and not span_is_impossible: - pieces = [ - tokenizer.sp_model.IdToPiece(token) - for token in tokens[start_position:(end_position + 1)] - ] - answer_text = tokenizer.sp_model.DecodePieces(pieces) - logging.info("start_position: %d", (start_position)) - logging.info("end_position: %d", (end_position)) - logging.info("answer: %s", (tokenization.printable_text(answer_text))) - - # With multi processing, the example_index is actually the index - # within the current process therefore we use example_index=None - # to avoid being used in the future. - # The current code does not use example_index of training data. - if is_training: - feat_example_index = None - else: - feat_example_index = example_index - - feature = InputFeatures( - unique_id=unique_id, - example_index=feat_example_index, - doc_span_index=doc_span_index, - tok_start_to_orig_index=cur_tok_start_to_orig_index, - tok_end_to_orig_index=cur_tok_end_to_orig_index, - token_is_max_context=token_is_max_context, - tokens=[tokenizer.sp_model.IdToPiece(x) for x in tokens], - input_ids=input_ids, - input_mask=input_mask, - paragraph_mask=paragraph_mask, - segment_ids=segment_ids, - paragraph_len=paragraph_len, - class_index=class_index, - start_position=start_position, - end_position=end_position, - is_impossible=span_is_impossible) - - # Run callback - if is_training: - output_fn(feature) - else: - output_fn(feature, is_padding=False) - - unique_id += 1 - if span_is_impossible: - cnt_neg += 1 - else: - cnt_pos += 1 - - if not is_training and feature: - assert batch_size - num_padding = 0 - num_examples = unique_id - base_id - if unique_id % batch_size != 0: - num_padding = batch_size - (num_examples % batch_size) - dummy_feature = copy.deepcopy(feature) - for _ in range(num_padding): - dummy_feature.unique_id = unique_id - - # Run callback - output_fn(feature, is_padding=True) - unique_id += 1 - - logging.info("Total number of instances: %d = pos %d neg %d", - cnt_pos + cnt_neg, cnt_pos, cnt_neg) - return unique_id - base_id - - -def _check_is_max_context(doc_spans, cur_span_index, position): - """Check if this is the 'max context' doc span for the token.""" - - # Because of the sliding window approach taken to scoring documents, a single - # token can appear in multiple documents. E.g. - # Doc: the man went to the store and bought a gallon of milk - # Span A: the man went to the - # Span B: to the store and bought - # Span C: and bought a gallon of - # ... - # - # Now the word 'bought' will have two scores from spans B and C. We only - # want to consider the score with "maximum context", which we define as - # the *minimum* of its left and right context (the *sum* of left and - # right context will always be the same, of course). - # - # In the example the maximum context for 'bought' would be span C since - # it has 1 left context and 3 right context, while span B has 4 left context - # and 0 right context. - best_score = None - best_span_index = None - for (span_index, doc_span) in enumerate(doc_spans): - end = doc_span.start + doc_span.length - 1 - if position < doc_span.start: - continue - if position > end: - continue - num_left_context = position - doc_span.start - num_right_context = end - position - score = min(num_left_context, num_right_context) + 0.01 * doc_span.length - if best_score is None or score > best_score: - best_score = score - best_span_index = span_index - - return cur_span_index == best_span_index - - -def write_predictions(all_examples, - all_features, - all_results, - n_best_size, - max_answer_length, - do_lower_case, - output_prediction_file, - output_nbest_file, - output_null_log_odds_file, - version_2_with_negative=False, - null_score_diff_threshold=0.0, - verbose=False): - """Write final predictions to the json file and log-odds of null if needed.""" - logging.info("Writing predictions to: %s", (output_prediction_file)) - logging.info("Writing nbest to: %s", (output_nbest_file)) - - all_predictions, all_nbest_json, scores_diff_json = ( - postprocess_output( - all_examples=all_examples, - all_features=all_features, - all_results=all_results, - n_best_size=n_best_size, - max_answer_length=max_answer_length, - do_lower_case=do_lower_case, - version_2_with_negative=version_2_with_negative, - null_score_diff_threshold=null_score_diff_threshold, - verbose=verbose)) - - write_to_json_files(all_predictions, output_prediction_file) - write_to_json_files(all_nbest_json, output_nbest_file) - if version_2_with_negative: - write_to_json_files(scores_diff_json, output_null_log_odds_file) - - -def postprocess_output(all_examples, - all_features, - all_results, - n_best_size, - max_answer_length, - do_lower_case, - version_2_with_negative=False, - null_score_diff_threshold=0.0, - xlnet_format=False, - verbose=False): - """Postprocess model output, to form predicton results.""" - - del do_lower_case, verbose - example_index_to_features = collections.defaultdict(list) - for feature in all_features: - example_index_to_features[feature.example_index].append(feature) - - unique_id_to_result = {} - for result in all_results: - unique_id_to_result[result.unique_id] = result - - _PrelimPrediction = collections.namedtuple( # pylint: disable=invalid-name - "PrelimPrediction", - ["feature_index", "start_index", "end_index", "start_logit", "end_logit"]) - - all_predictions = collections.OrderedDict() - all_nbest_json = collections.OrderedDict() - scores_diff_json = collections.OrderedDict() - - for (example_index, example) in enumerate(all_examples): - features = example_index_to_features[example_index] - - prelim_predictions = [] - # keep track of the minimum score of null start+end of position 0 - score_null = 1000000 # large and positive - min_null_feature_index = 0 # the paragraph slice with min mull score - null_start_logit = 0 # the start logit at the slice with min null score - null_end_logit = 0 # the end logit at the slice with min null score - for (feature_index, feature) in enumerate(features): - if feature.unique_id not in unique_id_to_result: - logging.info("Skip eval example %s, not in pred.", feature.unique_id) - continue - result = unique_id_to_result[feature.unique_id] - - # if we could have irrelevant answers, get the min score of irrelevant - if version_2_with_negative: - if xlnet_format: - feature_null_score = result.class_logits - else: - feature_null_score = result.start_logits[0] + result.end_logits[0] - if feature_null_score < score_null: - score_null = feature_null_score - min_null_feature_index = feature_index - null_start_logit = result.start_logits[0] - null_end_logit = result.end_logits[0] - - doc_offset = 0 if xlnet_format else feature.tokens.index("[SEP]") + 1 - - for (start_index, start_logit, - end_index, end_logit) in _get_best_indexes_and_logits( - result=result, - n_best_size=n_best_size, - xlnet_format=xlnet_format): - # We could hypothetically create invalid predictions, e.g., predict - # that the start of the span is in the question. We throw out all - # invalid predictions. - if start_index - doc_offset >= len(feature.tok_start_to_orig_index): - continue - if end_index - doc_offset >= len(feature.tok_end_to_orig_index): - continue - if not feature.token_is_max_context.get(start_index, False): - continue - if end_index < start_index: - continue - length = end_index - start_index + 1 - if length > max_answer_length: - continue - prelim_predictions.append( - _PrelimPrediction( - feature_index=feature_index, - start_index=start_index - doc_offset, - end_index=end_index - doc_offset, - start_logit=start_logit, - end_logit=end_logit)) - - if version_2_with_negative and not xlnet_format: - prelim_predictions.append( - _PrelimPrediction( - feature_index=min_null_feature_index, - start_index=-1, - end_index=-1, - start_logit=null_start_logit, - end_logit=null_end_logit)) - prelim_predictions = sorted( - prelim_predictions, - key=lambda x: (x.start_logit + x.end_logit), - reverse=True) - - _NbestPrediction = collections.namedtuple( # pylint: disable=invalid-name - "NbestPrediction", ["text", "start_logit", "end_logit"]) - - seen_predictions = {} - nbest = [] - for pred in prelim_predictions: - if len(nbest) >= n_best_size: - break - feature = features[pred.feature_index] - if pred.start_index >= 0 or xlnet_format: # this is a non-null prediction - tok_start_to_orig_index = feature.tok_start_to_orig_index - tok_end_to_orig_index = feature.tok_end_to_orig_index - start_orig_pos = tok_start_to_orig_index[pred.start_index] - end_orig_pos = tok_end_to_orig_index[pred.end_index] - - paragraph_text = example.paragraph_text - final_text = paragraph_text[start_orig_pos:end_orig_pos + 1].strip() - if final_text in seen_predictions: - continue - - seen_predictions[final_text] = True - else: - final_text = "" - seen_predictions[final_text] = True - - nbest.append( - _NbestPrediction( - text=final_text, - start_logit=pred.start_logit, - end_logit=pred.end_logit)) - - # if we didn't inlude the empty option in the n-best, include it - if version_2_with_negative and not xlnet_format: - if "" not in seen_predictions: - nbest.append( - _NbestPrediction( - text="", start_logit=null_start_logit, - end_logit=null_end_logit)) - # In very rare edge cases we could have no valid predictions. So we - # just create a nonce prediction in this case to avoid failure. - if not nbest: - nbest.append( - _NbestPrediction(text="empty", start_logit=0.0, end_logit=0.0)) - - assert len(nbest) >= 1 - - total_scores = [] - best_non_null_entry = None - for entry in nbest: - total_scores.append(entry.start_logit + entry.end_logit) - if not best_non_null_entry: - if entry.text: - best_non_null_entry = entry - - probs = _compute_softmax(total_scores) - - nbest_json = [] - for (i, entry) in enumerate(nbest): - output = collections.OrderedDict() - output["text"] = entry.text - output["probability"] = probs[i] - output["start_logit"] = entry.start_logit - output["end_logit"] = entry.end_logit - nbest_json.append(output) - - assert len(nbest_json) >= 1 - - if not version_2_with_negative: - all_predictions[example.qas_id] = nbest_json[0]["text"] - else: - assert best_non_null_entry is not None - if xlnet_format: - score_diff = score_null - scores_diff_json[example.qas_id] = score_diff - all_predictions[example.qas_id] = best_non_null_entry.text - else: - # predict "" iff the null score - the score of best non-null > threshold - score_diff = score_null - best_non_null_entry.start_logit - ( - best_non_null_entry.end_logit) - scores_diff_json[example.qas_id] = score_diff - if score_diff > null_score_diff_threshold: - all_predictions[example.qas_id] = "" - else: - all_predictions[example.qas_id] = best_non_null_entry.text - - all_nbest_json[example.qas_id] = nbest_json - - return all_predictions, all_nbest_json, scores_diff_json - - -def write_to_json_files(json_records, json_file): - with tf.io.gfile.GFile(json_file, "w") as writer: - writer.write(json.dumps(json_records, indent=4) + "\n") - - -def _get_best_indexes_and_logits(result, - n_best_size, - xlnet_format=False): - """Generates the n-best indexes and logits from a list.""" - if xlnet_format: - for i in range(n_best_size): - for j in range(n_best_size): - j_index = i * n_best_size + j - yield (result.start_indexes[i], result.start_logits[i], - result.end_indexes[j_index], result.end_logits[j_index]) - else: - start_index_and_score = sorted(enumerate(result.start_logits), - key=lambda x: x[1], reverse=True) - end_index_and_score = sorted(enumerate(result.end_logits), - key=lambda x: x[1], reverse=True) - for i in range(len(start_index_and_score)): - if i >= n_best_size: - break - for j in range(len(end_index_and_score)): - if j >= n_best_size: - break - yield (start_index_and_score[i][0], start_index_and_score[i][1], - end_index_and_score[j][0], end_index_and_score[j][1]) - - -def _compute_softmax(scores): - """Compute softmax probability over raw logits.""" - if not scores: - return [] - - max_score = None - for score in scores: - if max_score is None or score > max_score: - max_score = score - - exp_scores = [] - total_sum = 0.0 - for score in scores: - x = math.exp(score - max_score) - exp_scores.append(x) - total_sum += x - - probs = [] - for score in exp_scores: - probs.append(score / total_sum) - return probs - - -class FeatureWriter(object): - """Writes InputFeature to TF example file.""" - - def __init__(self, filename, is_training): - self.filename = filename - self.is_training = is_training - self.num_features = 0 - tf.io.gfile.makedirs(os.path.dirname(filename)) - self._writer = tf.io.TFRecordWriter(filename) - - def process_feature(self, feature): - """Write a InputFeature to the TFRecordWriter as a tf.train.Example.""" - self.num_features += 1 - - def create_int_feature(values): - feature = tf.train.Feature( - int64_list=tf.train.Int64List(value=list(values))) - return feature - - features = collections.OrderedDict() - features["unique_ids"] = create_int_feature([feature.unique_id]) - features["input_ids"] = create_int_feature(feature.input_ids) - features["input_mask"] = create_int_feature(feature.input_mask) - features["segment_ids"] = create_int_feature(feature.segment_ids) - if feature.paragraph_mask is not None: - features["paragraph_mask"] = create_int_feature(feature.paragraph_mask) - if feature.class_index is not None: - features["class_index"] = create_int_feature([feature.class_index]) - - if self.is_training: - features["start_positions"] = create_int_feature([feature.start_position]) - features["end_positions"] = create_int_feature([feature.end_position]) - impossible = 0 - if feature.is_impossible: - impossible = 1 - features["is_impossible"] = create_int_feature([impossible]) - - tf_example = tf.train.Example(features=tf.train.Features(feature=features)) - self._writer.write(tf_example.SerializeToString()) - - def close(self): - self._writer.close() - - -def generate_tf_record_from_json_file(input_file_path, - sp_model_file, - output_path, - translated_input_folder=None, - max_seq_length=384, - do_lower_case=True, - max_query_length=64, - doc_stride=128, - xlnet_format=False, - version_2_with_negative=False): - """Generates and saves training data into a tf record file.""" - train_examples = read_squad_examples( - input_file=input_file_path, - is_training=True, - version_2_with_negative=version_2_with_negative, - translated_input_folder=translated_input_folder) - tokenizer = tokenization.FullSentencePieceTokenizer( - sp_model_file=sp_model_file) - train_writer = FeatureWriter( - filename=output_path, is_training=True) - number_of_examples = convert_examples_to_features( - examples=train_examples, - tokenizer=tokenizer, - max_seq_length=max_seq_length, - doc_stride=doc_stride, - max_query_length=max_query_length, - is_training=True, - output_fn=train_writer.process_feature, - xlnet_format=xlnet_format, - do_lower_case=do_lower_case) - train_writer.close() - - meta_data = { - "task_type": "bert_squad", - "train_data_size": number_of_examples, - "max_seq_length": max_seq_length, - "max_query_length": max_query_length, - "doc_stride": doc_stride, - "version_2_with_negative": version_2_with_negative, - } - - return meta_data diff --git a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/data/tagging_data_lib.py b/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/data/tagging_data_lib.py deleted file mode 100644 index 9550eadf2ce75265cd6c9512533b57c8b6432f1f..0000000000000000000000000000000000000000 --- a/TensorFlow/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/official/nlp/data/tagging_data_lib.py +++ /dev/null @@ -1,442 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Library to process data for tagging task such as NER/POS.""" -import collections -import os - -from absl import logging -import tensorflow as tf - -from official.nlp.bert import tokenization -from official.nlp.data import classifier_data_lib - -# A negative label id for the padding label, which will not contribute -# to loss/metrics in training. -_PADDING_LABEL_ID = -1 - -# The special unknown token, used to substitute a word which has too many -# subwords after tokenization. -_UNK_TOKEN = "[UNK]" - - -class InputExample(object): - """A single training/test example for token classification.""" - - def __init__(self, - sentence_id, - sub_sentence_id=0, - words=None, - label_ids=None): - """Constructs an InputExample.""" - self.sentence_id = sentence_id - self.sub_sentence_id = sub_sentence_id - self.words = words if words else [] - self.label_ids = label_ids if label_ids else [] - - def add_word_and_label_id(self, word, label_id): - """Adds word and label_id pair in the example.""" - self.words.append(word) - self.label_ids.append(label_id) - - -def _read_one_file(file_name, label_list): - """Reads one file and returns a list of `InputExample` instances.""" - lines = tf.io.gfile.GFile(file_name, "r").readlines() - examples = [] - label_id_map = {label: i for i, label in enumerate(label_list)} - sentence_id = 0 - example = InputExample(sentence_id=0) - for line in lines: - line = line.strip("\n") - if line: - # The format is: \t