diff --git a/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/.idea/amoeba_net.iml b/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/.idea/amoeba_net.iml new file mode 100644 index 0000000000000000000000000000000000000000..15fc560d0e9d4555616d7b92616edad2a31bb0a9 --- /dev/null +++ b/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/.idea/amoeba_net.iml @@ -0,0 +1,12 @@ + + + + + + + + + + \ No newline at end of file diff --git a/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/.idea/inspectionProfiles/profiles_settings.xml b/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000000000000000000000000000000000000..105ce2da2d6447d11dfe32bfb846c3d5b199fc99 --- /dev/null +++ b/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/.idea/misc.xml b/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/.idea/misc.xml new file mode 100644 index 0000000000000000000000000000000000000000..44fd3cc6ebc6880186d43216b6590d40e2d56f44 --- /dev/null +++ b/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/.idea/misc.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/.idea/modules.xml b/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/.idea/modules.xml new file mode 100644 index 0000000000000000000000000000000000000000..933dff8b69bf62ce67e602ff3c8cbaa975c71612 --- /dev/null +++ b/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/.idea/workspace.xml b/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/.idea/workspace.xml new file mode 100644 index 0000000000000000000000000000000000000000..cca82cf0786da4920555282a3af0a4c4cbd9e7c9 --- /dev/null +++ b/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/.idea/workspace.xml @@ -0,0 +1,63 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 1633974512150 + + + + \ No newline at end of file diff --git a/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/.keep b/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/.keep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/LICENSE b/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..15ae42140452d32ccf929f59f7eca01a3c7b555f --- /dev/null +++ b/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/LICENSE @@ -0,0 +1,203 @@ +Copyright 2017 The TensorFlow Authors. All rights reserved. + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2017, The TensorFlow Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/README.md b/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f4cdfb8425ab974cec2693a5ff18673cad3d3cd0 --- /dev/null +++ b/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/README.md @@ -0,0 +1,269 @@ +- [基本信息](#基本信息.md) +- [概述](#概述.md) +- [训练环境准备](#训练环境准备.md) +- [快速上手](#快速上手.md) +- [高级参考](#高级参考.md) +

基本信息

+ +**发布者(Publisher):Huawei** + +**应用领域(Application Domain): Image Classification** + +**修改时间(Modified) :2022.5.9** + +**框架(Framework):TensorFlow 1.15.0** + +**模型格式(Model Format):ckpt** + +**精度(Precision):Mixed** + +**处理器(Processor):昇腾910** + +**应用级别(Categories):Official** + +**描述(Description):基于TensorFlow框架的AmoebaNet-D图像分类网络训练代码** + +

概述

+ +AmoebaNet-D是由AmoebaNet演化神经架构搜索算法搜索出的一个图像分类神经网络。 + +- 参考论文: + + [Real, E., Aggarwal, A., Huang, Y., & Le, Q. V. (2019, July). Regularized evolution for image classifier architecture search. In Proceedings of the aaai conference on artificial intelligence (Vol. 33, No. 01, pp. 4780-4789).](https://arxiv.org/pdf/1802.01548.pdf) + + +- 参考实现: + + + +- 适配昇腾 AI 处理器的实现: + + [https://gitee.com/zero167/ModelZoo-TensorFlow/tree/master/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow](https://gitee.com/zero167/ModelZoo-TensorFlow/tree/master/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow) + + + +## 混合精度训练 + +昇腾910 AI处理器提供自动混合精度功能,可以针对全网中float32数据类型的算子,按照内置的优化策略,自动将部分float32的算子降低精度到float16,从而在精度损失很小的情况下提升系统性能并减少内存使用。 + +## 开启混合精度 + +脚本已默认开启混合精度,设置precision_mode参数的脚本参考如下。 + + ``` + + npu_config = NPURunConfig( + model_dir=FLAGS.model_dir, + save_checkpoints_steps=save_checkpoints_steps, + save_summary_steps=0, + # dump_config=dump_config, + # fusion_switch_file="/home/test_user03/tpu-master/models/official/amoeba_net/fusion_switch.cfg", + session_config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False), + #precision_mode="allow_mix_precision") + precision_mode="allow_fp32_to_fp16") + #precision_mode="force_fp32") + ``` + + +

训练环境准备

+ +1. 硬件环境准备请参见各硬件产品文档"[驱动和固件安装升级指南]( https://support.huawei.com/enterprise/zh/category/ai-computing-platform-pid-1557196528909)"。需要在硬件设备上安装与CANN版本配套的固件与驱动。 +2. 宿主机上需要安装Docker并登录[Ascend Hub中心](https://ascendhub.huawei.com/#/detail?name=ascend-tensorflow-arm)获取镜像。 + + 当前模型支持的镜像列表如[表1](#zh-cn_topic_0000001074498056_table1519011227314)所示。 + + **表 1** 镜像列表 + + + + + + + + + + + + +

镜像名称

+

镜像版本

+

配套CANN版本

+
+

20.2.0

+

20.2

+
+ + +

快速上手

+ +- 数据集准备 +1. 模型训练使用ImageNet2012数据集,数据集请用户自行获取。 + +2. 数据集训练前需要做预处理操作,请用户参考[Tensorflow-Slim](https://github.com/tensorflow/models/tree/master/research/slim),将数据集封装为tfrecord格式。 + +3. 数据集处理后,放入模型目录下,在训练脚本中指定数据集路径,可正常使用。 + + +## 模型训练 + +- 单击“立即下载”,并选择合适的下载方式下载源码包。 + +- 启动训练之前,首先要配置程序运行相关环境变量。 + + 环境变量配置信息参见: + + [Ascend 910训练平台环境变量设置](https://gitee.com/ascend/modelzoo/wikis/Ascend%20910%E8%AE%AD%E7%BB%83%E5%B9%B3%E5%8F%B0%E7%8E%AF%E5%A2%83%E5%8F%98%E9%87%8F%E8%AE%BE%E7%BD%AE?sort_id=3148819) + +- 单卡训练 + + 1. 配置训练参数。 + + 首先在脚本train_performance_1p.sh中,配置训练数据集路径和checkpoint保存路径,请用户根据实际路径配置,示例如下所示: + + ``` + DATA_DIR=/home/test_user03/tf_records/ + MODEL_DIR=/home/test_user03/hh + + ``` + + 2. 启动训练。(脚本为train_performance_1p.sh) + + ``` + bash train_performance_1p.sh + ``` + + +- 验证。 + + 1. 配置验证参数。 + + 首先在脚本train_full_1p.sh中,配置训练数据集路径和checkpoint保存路径,请用户根据实际路径配置,示例如下所示: + + ``` + DATA_DIR=/home/test_user03/tf_records/ + MODEL_DIR=/home/test_user03/hh + + ``` + + 2. 启动验证。(脚本为train_full_1p.sh) + + ``` + bash train_full_1p.sh + ``` + + + + + +

高级参考

+ +## 脚本和示例代码 + +``` +├── amoeba_net.py //训练与测试入口 +├── README.md //代码说明文档 +├── amoeba_net_model.py //模型功能 +├── model_builder.py //根据用户传入的参数构建模型 +├── model_specs.py //Amoeba_net架构配置 +├── network_utils.py //Amoeba-net使用的常见操作的自定义模块 +├── network_utils_test.py //对network_utils自定义模块的测试 +├── tf_hub.py //模型导出和评估 +├── inception_preprocessing.py //图像预处理 +├── train_testcase.sh //训练测试用例 +├── online_inference_testcase.sh //在线推理测试用例 +├── train_performance_1p.sh //训练入口 +├── train_full_1p.sh //训练入口,包含准确率评估 +├── modelzoo_level.txt //网络状态描述文件 +├── requirements.txt +├── common +│ ├──imagenet.py //为ImageNet ILSVRC 2012数据集提供数据帮助程序 +│ ├──inference_warmup.py //inference warmup实现``` + +``` +## 脚本参数 +``` +--use_tpu 是否使用tpu,默认:False(由于该代码从tpu版本迁移过来,在晟腾910上只能是False) +--mode 运行模式,可选:train_and_eval,train,eval +--data_dir 数据集目录 +--mmodel_dir 保存checkpoint的目录 +--num_cells 网络结构中cell的数量,默认:6 +--image_size 图像尺寸,默认:224 +--num_epochs 训练迭代次数,默认:35 +--train_batch_size 训练的batch size,默认:64 +--eval_batch_size 验证的batch size, 默认:64 +--lr 初始学习率,默认:2.56 +--lr_decay_value 学习率指数衰减 默认:0.88 +--lr_warmup_epochs 初始学习率从0增长到指定学习率的迭代数,默认:0.35 +``` + + +## 训练过程 + +1. 通过“模型训练”中的训练指令启动单卡训练。 + +2. 训练脚本log中包括如下信息。 + +``` +I0420 23:35:31.718360 281472996956240 basic_session_run_hooks.py:692] global_step/sec: 4.13978 +INFO:tensorflow:global_step...700647 +I0420 23:35:31.722282 281472996956240 npu_hook.py:132] global_step...700647 +INFO:tensorflow:global_step...700648 +I0420 23:35:31.963601 281472996956240 npu_hook.py:132] global_step...700648 +... +INFO:tensorflow:Saving checkpoints for 700662 into /home/test_user03/ckpt5/model.ckpt. +I0420 23:35:35.366074 281472996956240 basic_session_run_hooks.py:606] Saving checkpoints for 700662 into /home/test_user03/ckpt5/model.ckpt. +INFO:tensorflow:global_step...700663 +I0420 23:36:39.784266 281472996956240 npu_hook.py:132] global_step...700663 +INFO:tensorflow:global_step...700664 +I0420 23:36:40.024840 281472996956240 npu_hook.py:132] global_step...700664 +INFO:tensorflow:global_step...700665 +I0420 23:36:40.267009 281472996956240 npu_hook.py:132] global_step...700665 +INFO:tensorflow:NPUCheckpointSaverHook end... +I0420 23:36:40.267664 281472996956240 npu_hook.py:137] NPUCheckpointSaverHook end... +INFO:tensorflow:Saving checkpoints for 700665 into /home/test_user03/ckpt5/model.ckpt. +I0420 23:36:40.269501 281472996956240 basic_session_run_hooks.py:606] Saving checkpoints for 700665 into /home/test_user03/ckpt5/model.ckpt. +INFO:tensorflow:Loss for final step: 4.1664658. +I0420 23:38:08.704852 281472996956240 estimator.py:371] Loss for final step: 4.1664658. +``` + +## 推理/验证过程 + +1. 通过“模型训练”中的验证指令启动验证。 + +2. 当前只能针对该工程训练出的checkpoint进行推理测试。 + +3. 测试结束后会打印测试集的top1 accuracy和top5 accuracy,如下所示。 + +``` +ow:Evaluation [78/781] +I0420 23:40:00.258475 281472996956240 evaluation.py:167] Evaluation [78/781] +INFO:tensorflow:Evaluation [156/781] +I0420 23:40:07.798311 281472996956240 evaluation.py:167] Evaluation [156/781] +INFO:tensorflow:Evaluation [234/781] +I0420 23:40:15.336721 281472996956240 evaluation.py:167] Evaluation [234/781] +INFO:tensorflow:Evaluation [312/781] +I0420 23:40:22.876575 281472996956240 evaluation.py:167] Evaluation [312/781] +INFO:tensorflow:Evaluation [390/781] +I0420 23:40:30.432068 281472996956240 evaluation.py:167] Evaluation [390/781] +INFO:tensorflow:Evaluation [468/781] +I0420 23:40:38.020324 281472996956240 evaluation.py:167] Evaluation [468/781] +INFO:tensorflow:Evaluation [546/781] +I0420 23:40:45.564076 281472996956240 evaluation.py:167] Evaluation [546/781] +INFO:tensorflow:Evaluation [624/781] +I0420 23:40:53.106832 281472996956240 evaluation.py:167] Evaluation [624/781] +INFO:tensorflow:Evaluation [702/781] +I0420 23:41:00.634234 281472996956240 evaluation.py:167] Evaluation [702/781] +INFO:tensorflow:Evaluation [780/781] +I0420 23:41:08.236136 281472996956240 evaluation.py:167] Evaluation [780/781] +INFO:tensorflow:Evaluation [781/781] +I0420 23:41:08.331177 281472996956240 evaluation.py:167] Evaluation [781/781] +2022-04-20 23:41:08.749352: I /home/phisik3/jenkins/workspace/work_code/tmp/host-prefix/src/host-build/asl/tfadaptor/CMakeFiles/tf_adapter.dir/compiler_depend.ts:805] The model has been compiled on the Ascend AI processor, current graph id is: 71 +INFO:tensorflow:Finished evaluation at 2022-04-20-23:41:13 +I0420 23:41:13.806376 281472996956240 evaluation.py:275] Finished evaluation at 2022-04-20-23:41:13 +INFO:tensorflow:Saving dict for global step 700665: global_step = 700665, loss = 1.8883309, top_1_accuracy = 0.75600195, top_5_accuracy = 0.9269366 +I0420 23:41:13.807576 281472996956240 estimator.py:2049] Saving dict for global step 700665: global_step = 700665, loss = 1.8883309, top_1_accuracy = 0.75600195, top_5_accuracy = 0.9269366 +INFO:tensorflow:Saving 'checkpoint_path' summary for global step 700665: /home/test_user03/ckpt5/model.ckpt-700665 +I0420 23:41:13.810555 281472996956240 estimator.py:2109] Saving 'checkpoint_path' summary for global step 700665: /home/test_user03/ckpt5/model.ckpt-700665 +INFO:tensorflow:Evaluation results: {'loss': 1.8883309, 'top_1_accuracy': 0.75600195, 'top_5_accuracy': 0.9269366, 'global_step': 700665} +I0420 23:41:13.813197 281472996956240 amoeba_net.py:467] Evaluation results: {'loss': 1.8883309, 'top_1_accuracy': 0.75600195, 'top_5_accuracy': 0.9269366, 'global_step': 700665} +``` diff --git a/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/__pycache__/amoeba_net_model.cpython-37.pyc b/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/__pycache__/amoeba_net_model.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7b4b828e812518b4c163eb609e5908216b5929dd Binary files /dev/null and b/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/__pycache__/amoeba_net_model.cpython-37.pyc differ diff --git a/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/__pycache__/inception_preprocessing.cpython-37.pyc b/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/__pycache__/inception_preprocessing.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..36531ab7aee0b5d38681c8bee572dc98fdb095a7 Binary files /dev/null and b/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/__pycache__/inception_preprocessing.cpython-37.pyc differ diff --git a/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/__pycache__/model_builder.cpython-37.pyc b/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/__pycache__/model_builder.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2cd83cc1248f38b2d1ebe5787a9a447cf4600c64 Binary files /dev/null and b/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/__pycache__/model_builder.cpython-37.pyc differ diff --git a/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/__pycache__/model_specs.cpython-37.pyc b/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/__pycache__/model_specs.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e393e37178dc5b14ef5fd9b128d81e555b8dffb6 Binary files /dev/null and b/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/__pycache__/model_specs.cpython-37.pyc differ diff --git a/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/__pycache__/network_utils.cpython-37.pyc b/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/__pycache__/network_utils.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cd2219575d1671ef3804653ea879efe1cac77edf Binary files /dev/null and b/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/__pycache__/network_utils.cpython-37.pyc differ diff --git a/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/amoeba_net.py b/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/amoeba_net.py new file mode 100644 index 0000000000000000000000000000000000000000..fe0db18e282652c457d7e3e8815ff0c9eb73edc5 --- /dev/null +++ b/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/amoeba_net.py @@ -0,0 +1,525 @@ + + + +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pylint: disable=line-too-long +r"""TensorFlow AmoebaNet Example. + +GCP Run Example +python amoeba_net.py --data_dir=gs://cloud-tpu-datasets/imagenet-data --model_dir=gs://cloud-tpu-ckpts/models/ameoba_net_x/ \ +--drop_connect_keep_prob=1.0 --cell_name=evol_net_x --num_cells=12 --reduction_size=256 --image_size=299 --num_epochs=48 \ +--train_batch_size=256 --num_epochs_per_eval=4.0 --lr_decay_value=0.89 --lr_num_epochs_per_decay=1 --alsologtostderr \ +--tpu=huangyp-tpu-0 +""" +# pylint: enable=line-too-long + + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import faulthandler +faulthandler.enable() + + +from npu_bridge.npu_init import * +from npu_bridge.estimator import npu_ops +from npu_bridge.estimator.npu.npu_config import NPURunConfig +from npu_bridge.estimator.npu.npu_config import ProfilingConfig +import sys +import itertools +import math +from absl import app +from absl import flags +import absl.logging as _logging # pylint: disable=unused-import +import tensorflow.compat.v1 as tf +import amoeba_net_model as model_lib +from common import inference_warmup +from tensorflow.contrib import cluster_resolver as contrib_cluster_resolver +from tensorflow.contrib import tpu as contrib_tpu +from tensorflow.contrib.training.python.training import evaluation + + +import os +import argparse + + + + + +# Cloud TPU Cluster Resolvers +flags.DEFINE_string( + 'tpu', default=None, + help='The Cloud TPU to use for training. This should be either the name ' + 'used when creating the Cloud TPU, or a grpc://ip.address.of.tpu:8470 url.') + +flags.DEFINE_string( + 'gcp_project', default=None, + help='Project name for the Cloud TPU-enabled project. If not specified, we ' + 'will attempt to automatically detect the GCE project from metadata.') + +flags.DEFINE_string( + 'tpu_zone', default=None, + help='GCE zone where the Cloud TPU is located in. If not specified, we ' + 'will attempt to automatically detect the GCE project from metadata.') + +# General Parameters +flags.DEFINE_integer( + 'num_shards', 1, + 'Number of shards (TPU cores).') + +flags.DEFINE_integer( + 'distributed_group_size', 1, + help='Size of the distributed batch norm. group.' + 'Default is normalization over local examples only.' + 'When set to a value greater than 1, it will enable' + 'a distribtued batch norm. To enable a global batch norm.' + 'set distributed_group_size to FLAGS.num_shards') + +flags.DEFINE_bool( + 'use_tpu', False, + 'Use TPUs rather than CPU or GPU.') + +flags.DEFINE_string( + 'data_dir', '/home/test_user03/tf_records/', + 'Directory where input data is stored') + +flags.DEFINE_string( + 'model_dir', '/home/test_user03/hh', + 'Directory where model output is stored') + +flags.DEFINE_string( + 'export_dir', '/home/test_user03/result', + 'The directory where the exported SavedModel will be stored.') + +flags.DEFINE_bool( + 'export_to_tpu', False, + help='Whether to export additional metagraph with "serve, tpu" tags' + ' in addition to "serve" only metagraph.') + +flags.DEFINE_integer( + 'iterations_per_loop', 1251, + 'Number of iterations per TPU training loop.') + +flags.DEFINE_integer( + 'train_batch_size', 64, + 'Global (not per-shard) batch size for training') + +flags.DEFINE_integer( + 'eval_batch_size', 64, + 'Global (not per-shard) batch size for evaluation') + +flags.DEFINE_float( + 'num_epochs', 35., + 'Number of steps use for training.') + +flags.DEFINE_float( + 'num_epochs_per_eval', 1., + 'Number of training epochs to run between evaluations.') + +flags.DEFINE_string( + 'mode', 'train_and_eval', + 'Mode to run: train, eval, train_and_eval, or predict') + +flags.DEFINE_integer( + 'save_checkpoints_steps', 0, + 'Interval (in steps) at which the model data ' + 'should be checkpointed. Set to 0 to disable.') + +flags.DEFINE_bool( + 'enable_hostcall', True, + 'Skip the host_call which is executed every training step. This is' + ' generally used for generating training summaries (train loss,' + ' learning rate, etc...). When --enable_hostcall=True, there could' + ' be a performance drop if host_call function is slow and cannot' + ' keep up with the TPU-side computation.') + +# Model specific parameters +flags.DEFINE_bool('use_aux_head', True, 'Include aux head or not.') +flags.DEFINE_float( + 'aux_scaling', 0.4, 'Scaling factor of aux_head') +flags.DEFINE_float( + 'batch_norm_decay', 0.9, 'Batch norm decay.') +flags.DEFINE_float( + 'batch_norm_epsilon', 1e-5, 'Batch norm epsilon.') +flags.DEFINE_float( + 'dense_dropout_keep_prob', None, 'Dense dropout keep probability.') +flags.DEFINE_float( + 'drop_connect_keep_prob', 1.0, 'Drop connect keep probability.') +flags.DEFINE_string( + 'drop_connect_version', None, 'Drop connect version.') +flags.DEFINE_string( + 'cell_name', 'amoeba_net_d', 'Which network to run.') +flags.DEFINE_integer( + 'num_cells', 6, 'Total number of cells.') +flags.DEFINE_integer( + 'reduction_size', 256, 'Default cell reduction size.') +flags.DEFINE_integer( + 'stem_reduction_size', 32, 'Stem filter size.') +flags.DEFINE_float( + 'weight_decay', 4e-05, 'Weight decay for slim model.') +flags.DEFINE_integer( + 'num_label_classes', 1001, 'The number of classes that images fit into.') + +# Training hyper-parameters +flags.DEFINE_float( + 'lr', 2.56, 'Learning rate.') +flags.DEFINE_string( + 'optimizer', 'sgd', + 'Optimizer (one of sgd, rmsprop, momentum)') +flags.DEFINE_float( + 'moving_average_decay', 0.9999, + 'moving average decay rate') +flags.DEFINE_float( + 'lr_decay_value', 0.88, + 'Exponential decay rate used in learning rate adjustment') +flags.DEFINE_integer( + 'lr_num_epochs_per_decay', 1, + 'Exponential decay epochs used in learning rate adjustment') +flags.DEFINE_string( + 'lr_decay_method', 'exponential', + 'Method of decay: exponential, cosine, constant, stepwise') +flags.DEFINE_float( + 'lr_warmup_epochs', 0.35, + 'Learning rate increased from zero linearly to lr for the first ' + 'lr_warmup_epochs.') +flags.DEFINE_float('gradient_clipping_by_global_norm', 0, + 'gradient_clipping_by_global_norm') + +flags.DEFINE_integer( + 'image_size', 224, 'Size of image, assuming image height and width.') + +# flags.DEFINE_integer( +# 'num_train_images', 10000, 'The number of images in the training set.') +# flags.DEFINE_integer( +# 'num_eval_images', 1000, 'The number of images in the evaluation set.') +flags.DEFINE_integer( + 'num_train_images', 1281167, 'The number of images in the training set.') +flags.DEFINE_integer( + 'num_eval_images', 50000, 'The number of images in the evaluation set.') + +flags.DEFINE_bool( + 'use_bp16', True, 'If True, use bfloat16 for activations') + +flags.DEFINE_integer( + 'eval_timeout', 60*60*24, + 'Maximum seconds between checkpoints before evaluation terminates.') + +# Inference configuration. +flags.DEFINE_bool( + 'add_warmup_requests', False, + 'Whether to add warmup requests into the export saved model dir,' + 'especially for TPU inference.') +flags.DEFINE_string('model_name', 'amoeba_net', + 'Serving model name used for the model server.') +# flags.DEFINE_multi_integer( +# 'inference_batch_sizes', [8], +# 'Known inference batch sizes used to warm up for each core.') +flags.DEFINE_multi_integer( + 'inference_batch_sizes', [1], + 'Known inference batch sizes used to warm up for each core.') +FLAGS = flags.FLAGS + + +def build_run_config(): + """Return RunConfig for TPU estimator.""" + tpu_cluster_resolver = contrib_cluster_resolver.TPUClusterResolver( + FLAGS.tpu, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) + + eval_steps = FLAGS.num_eval_images // FLAGS.eval_batch_size + iterations_per_loop = (eval_steps if FLAGS.mode == 'eval' + else FLAGS.iterations_per_loop) + save_checkpoints_steps = FLAGS.save_checkpoints_steps or iterations_per_loop + run_config = contrib_tpu.RunConfig( + cluster=tpu_cluster_resolver, + model_dir=FLAGS.model_dir, + save_checkpoints_steps=save_checkpoints_steps, + keep_checkpoint_max=None, + tpu_config=contrib_tpu.TPUConfig( + iterations_per_loop=iterations_per_loop, + num_shards=FLAGS.num_shards, + per_host_input_for_training=contrib_tpu.InputPipelineConfig + .PER_HOST_V2)) + return run_config + + +def build_image_serving_input_receiver_fn(shape, + dtype=tf.float32): + """Returns a input_receiver_fn for raw images during serving.""" + + def _preprocess_image(encoded_image): + """Preprocess a single raw image.""" + image = tf.image.decode_image(encoded_image, channels=shape[-1]) + image.set_shape(shape) + return tf.cast(image, dtype) + + def serving_input_receiver_fn(): + image_bytes_list = tf.placeholder( + shape=[None], + dtype=tf.string, + ) + images = tf.map_fn( + _preprocess_image, image_bytes_list, back_prop=False, dtype=dtype) + return tf.estimator.export.TensorServingInputReceiver( + features=images, receiver_tensors=image_bytes_list) + + return serving_input_receiver_fn + + +# TODO(ereal): simplify this. +def override_with_flags(hparams): + """Overrides parameters with flag values.""" + override_flag_names = [ + 'aux_scaling', + 'train_batch_size', + 'batch_norm_decay', + 'batch_norm_epsilon', + 'dense_dropout_keep_prob', + 'drop_connect_keep_prob', + 'drop_connect_version', + 'eval_batch_size', + 'gradient_clipping_by_global_norm', + 'lr', + 'lr_decay_method', + 'lr_decay_value', + 'lr_num_epochs_per_decay', + 'moving_average_decay', + 'image_size', + 'num_cells', + 'reduction_size', + 'stem_reduction_size', + 'num_epochs', + 'num_epochs_per_eval', + 'optimizer', + 'enable_hostcall', + 'use_aux_head', + 'use_bp16', + 'use_tpu', + 'lr_warmup_epochs', + 'weight_decay', + 'num_shards', + 'distributed_group_size', + 'num_train_images', + 'num_eval_images', + 'num_label_classes', + ] + for flag_name in override_flag_names: + flag_value = getattr(FLAGS, flag_name, 'INVALID') + if flag_value == 'INVALID': + tf.logging.fatal('Unknown flag %s.' % str(flag_name)) + if flag_value is not None: + _set_or_add_hparam(hparams, flag_name, flag_value) + + +def build_hparams(): + """Build tf.Hparams for training Amoeba Net.""" + hparams = model_lib.build_hparams(FLAGS.cell_name) + override_with_flags(hparams) + return hparams + + +def _terminate_eval(): + tf.logging.info('Timeout passed with no new checkpoints ... terminating eval') + return True + + +def _get_next_checkpoint(): + return evaluation.checkpoints_iterator( + FLAGS.model_dir, timeout=FLAGS.eval_timeout, timeout_fn=_terminate_eval) + + +def _set_or_add_hparam(hparams, name, value): + if getattr(hparams, name, None) is None: + hparams.add_hparam(name, value) + else: + hparams.set_hparam(name, value) + + +def _load_global_step_from_checkpoint_dir(checkpoint_dir): + try: + checkpoint_reader = tf.train.NewCheckpointReader( + tf.train.latest_checkpoint(checkpoint_dir)) + return checkpoint_reader.get_tensor(tf.GraphKeys.GLOBAL_STEP) + except: # pylint: disable=bare-except + return 0 + + +def main(_): + mode = FLAGS.mode + data_dir = FLAGS.data_dir + model_dir = FLAGS.model_dir + hparams = build_hparams() + + estimator_parmas = {} + + train_steps_per_epoch = int( + math.ceil(hparams.num_train_images / float(hparams.train_batch_size))) + eval_steps = hparams.num_eval_images // hparams.eval_batch_size + eval_batch_size = (None if mode == 'train' else + hparams.eval_batch_size) + + model = model_lib.AmoebaNetEstimatorModel(hparams, model_dir) + + if hparams.use_tpu: + run_config = build_run_config() + # Temporary treatment until flags are released. + image_classifier = contrib_tpu.TPUEstimator( + model_fn=model.model_fn, + use_tpu=True, + config=npu_run_config_init(run_config=run_config), + params=estimator_parmas, + predict_batch_size=eval_batch_size, + train_batch_size=hparams.train_batch_size, + eval_batch_size=eval_batch_size, + export_to_tpu=FLAGS.export_to_tpu) + else: + save_checkpoints_steps = (FLAGS.save_checkpoints_steps or + FLAGS.iterations_per_loop) + + # dump_config = npu_tf_config.estimator_dump_config(action='dump') + # dump_config = DumpConfig(enable_dump_debug=True, dump_path="/home/test_user03/tpu-master/models/official/amoeba_net/output/output",dump_step="0|5|10", dump_debug_mode="all") + + # dump_config = npu_tf_config.estimator_dump_config(action='dump') + # # dump_config = DumpConfig(enable_dump_debug=True, + # # dump_path="/home/test_user03/tpu-master/models/official/amoeba_net/output/output", + # # dump_step="0", dump_debug_mode="all") + npu_config = NPURunConfig( + model_dir=FLAGS.model_dir, + save_checkpoints_steps=save_checkpoints_steps, + save_summary_steps=0, + # dump_config=dump_config, + # fusion_switch_file="/home/test_user03/tpu-master/models/official/amoeba_net/fusion_switch.cfg", + session_config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False), + #precision_mode="allow_mix_precision") + precision_mode="allow_fp32_to_fp16") + #precision_mode="force_fp32") + + ''' + run_config = tf.estimator.RunConfig( + model_dir=FLAGS.model_dir, + save_checkpoints_steps=save_checkpoints_steps, save_summary_steps=0) + ''' + image_classifier = NPUEstimator( + model_fn=model.model_fn, + config=npu_config, + params=estimator_parmas) + + + # Input pipelines are slightly different (with regards to shuffling and + # preprocessing) between training and evaluation. + imagenet_train = model_lib.InputPipeline( + is_training=True, data_dir=data_dir, hparams=hparams) + imagenet_eval = model_lib.InputPipeline( + is_training=False, data_dir=data_dir, hparams=hparams) + + if hparams.moving_average_decay < 1: + eval_hooks = [model_lib.LoadEMAHook(model_dir, + hparams.moving_average_decay)] + else: + eval_hooks = [] + + if mode == 'eval': + for checkpoint in _get_next_checkpoint(): + tf.logging.info('Starting to evaluate.') + try: + eval_results = image_classifier.evaluate( + input_fn=imagenet_eval.input_fn, + steps=eval_steps, + hooks=eval_hooks, + checkpoint_path=checkpoint) + tf.logging.info('Evaluation results: %s' % eval_results) + except tf.errors.NotFoundError: + # skip checkpoint if it gets deleted prior to evaluation + tf.logging.info('Checkpoint %s no longer exists ... skipping') + elif mode == 'train_and_eval': + current_step = _load_global_step_from_checkpoint_dir(model_dir) + total_step = int(hparams.num_epochs * train_steps_per_epoch) + tf.logging.info('Starting training at step=%d.' % current_step) + train_steps_per_eval = int( + hparams.num_epochs_per_eval * train_steps_per_epoch) + # Final Evaluation if training is finished. + if current_step >= hparams.num_epochs * train_steps_per_epoch: + eval_results = image_classifier.evaluate( + input_fn=imagenet_eval.input_fn, steps=eval_steps, hooks=eval_hooks) + tf.logging.info('Evaluation results: %s' % eval_results) + while current_step < hparams.num_epochs * train_steps_per_epoch: + image_classifier.train( + input_fn=imagenet_train.input_fn, steps=train_steps_per_eval) + current_step += train_steps_per_eval + tf.logging.info('Starting evaluation at step=%d.' % current_step) + eval_results = image_classifier.evaluate( + input_fn=imagenet_eval.input_fn, steps=eval_steps, hooks=eval_hooks) + tf.logging.info('Evaluation results: %s' % eval_results) + + elif mode == 'predict': + for checkpoint in _get_next_checkpoint(): + tf.logging.info('Starting prediction ...') + time_hook = model_lib.SessionTimingHook() + eval_hooks.append(time_hook) + result_iter = image_classifier.predict( + input_fn=imagenet_eval.input_fn, + hooks=eval_hooks, + checkpoint_path=checkpoint, + yield_single_examples=False) + results = list(itertools.islice(result_iter, eval_steps)) + tf.logging.info('Inference speed = {} images per second.'.format( + time_hook.compute_speed(len(results) * eval_batch_size))) + elif mode == 'train': + current_step = _load_global_step_from_checkpoint_dir(model_dir) + total_step = int(hparams.num_epochs * train_steps_per_epoch) + if current_step < total_step: + tf.logging.info('Starting training ...') + image_classifier.train( + input_fn=imagenet_train.input_fn, + steps=total_step-current_step) + else: + tf.logging.info('Mode not found.') + + if FLAGS.export_dir is not None: + tf.logging.info('Starting exporting saved model ...') + serving_shape = [hparams.image_size, hparams.image_size, 3] + export_path = image_classifier.export_saved_model( + export_dir_base=FLAGS.export_dir, + serving_input_receiver_fn=build_image_serving_input_receiver_fn( + serving_shape), + as_text=True) + if FLAGS.add_warmup_requests: + inference_warmup.write_warmup_requests( + export_path, + FLAGS.model_name, + hparams.image_size, + batch_sizes=FLAGS.inference_batch_sizes) + + +if __name__ == '__main__': + tf.logging.set_verbosity(tf.logging.INFO) + app.run(main) + + diff --git a/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/amoeba_net_k8s.yaml b/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/amoeba_net_k8s.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b4a31e00d872852cc63b2340857b47c99fa9e2a7 --- /dev/null +++ b/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/amoeba_net_k8s.yaml @@ -0,0 +1,53 @@ +# Train AmoebaNet-D with fake ImageNet dataset using Cloud TPU and Google +# Kubernetes Engine. +# +# [Training Data] +# In this example, we use the randomly generated fake ImageNet dataset at +# gs://cloud-tpu-test-datasets/fake_imagenet as the training data. +# +# [Instructions] +# 1. Follow the instructions on https://cloud.google.com/tpu/docs/kubernetes-engine-setup +# to create a Kubernetes Engine cluster. +# Note: Use a base machine type with more memory than the default n1-standard-1. +# 2. Change the environment variable MODEL_BUCKET in the Job spec to the +# Google Cloud Storage location where you want to store the output model. +# 3. Run `kubectl create -f amoeba_net_k8s.yaml`. + +apiVersion: batch/v1 +kind: Job +metadata: + name: amoeba-net-tpu +spec: + template: + metadata: + annotations: + # The Cloud TPUs that will be created for this Job must support + # TensorFlow 1.11. This version MUST match the TensorFlow version that + # your model is built on. + tf-version.cloud-tpus.google.com: "1.11" + spec: + restartPolicy: Never + containers: + - name: amoeba-net-tpu + # The official TensorFlow 1.11 TPU model image built from https://github.com/tensorflow/tpu/blob/r1.11/tools/docker/Dockerfile. + image: gcr.io/tensorflow/tpu-models:r1.11 + command: + - python + - /tensorflow_tpu_models/models/official/amoeba_net/amoeba_net.py + - --data_dir=$(DATA_BUCKET) + - --model_dir=$(MODEL_BUCKET) + env: + # The Google Cloud Storage location where the fake ImageNet dataset is + # stored. + - name: DATA_BUCKET + value: "gs://cloud-tpu-test-datasets/fake_imagenet" + # [REQUIRED] Must specify the Google Cloud Storage location where your + # output model will be stored. + - name: MODEL_BUCKET + value: "gs:///amoeba_net" + resources: + limits: + # Request a single v2-8 Cloud TPU device to train the model. + # A single v2-8 Cloud TPU device consists of 4 chips, each of which + # has 2 cores, so there are 8 cores in total. + cloud-tpus.google.com/v2: 8 diff --git a/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/amoeba_net_model.py b/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/amoeba_net_model.py new file mode 100644 index 0000000000000000000000000000000000000000..bfea61e303576cc6256b08a03bad5313fa75a8ba --- /dev/null +++ b/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/amoeba_net_model.py @@ -0,0 +1,680 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""AmoebaNet ImageNet model functions.""" + + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + + +import faulthandler +faulthandler.enable() + + +from npu_bridge.npu_init import * +from npu_bridge.estimator import npu_ops + +import copy +import os +import time + +import numpy as np +import six +import tensorflow.compat.v1 as tf + +import inception_preprocessing +import model_builder +import model_specs +from tensorflow.contrib import framework as contrib_framework +from tensorflow.contrib import tpu as contrib_tpu +from tensorflow.contrib import training as contrib_training + + +# Random cropping constants +_RESIZE_SIDE_MIN = 300 +_RESIZE_SIDE_MAX = 600 + +# Constants dictating the learning rate schedule. +RMSPROP_DECAY = 0.9 # Decay term for RMSProp. +RMSPROP_MOMENTUM = 0.9 # Momentum in RMSProp. +RMSPROP_EPSILON = 1.0 # Epsilon term for RMSProp. + + +def imagenet_hparams(): + """Returns default ImageNet training params. + + These defaults are for full training. For search training, some should be + modified to increase the speed of the search. + """ + return contrib_training.HParams( + ########################################################################## + # Input pipeline params. ################################################# + ########################################################################## + image_size=299, + num_train_images=1281167, + num_eval_images=50000, + num_label_classes=1001, + ########################################################################## + # Architectural params. ################################################## + ########################################################################## + + # The total number of regular cells (summed across all stacks). Reduction + # cells are not included. + num_cells=18, + reduction_size=256, + stem_reduction_size=32, + + # How many reduction cells to use between the stacks of regular cells. + num_reduction_layers=2, + + # Stem. + stem_type='imagenet', # 'imagenet' or others + num_stem_cells=2, # 2 if stem_type == 'imagenet' else 0 + + # Implementation details. + data_format='NCHW', # 'NHWC' or 'NCHW'. + + ########################################################################## + # Training params. ####################################################### + ########################################################################## + + # Summed across all TPU cores training a model. + train_batch_size=32, + num_epochs=100., + + # Auxiliary head. + use_aux_head=True, + aux_scaling=0.4, + + # Regularization. + l1_decay_rate=0.0, + label_smoothing=0.1, + drop_connect_keep_prob=0.7, + # `drop_connect_version` determines how the drop_connect probabilites are + # set/increased over time: + # -v1: increase dropout probability over training, + # -v2: increase dropout probability as you increase the number of cells, + # so the top cell has the highest dropout and the lowest cell has the + # lowest dropout, + # -v3: Do both v1 and v2. + drop_connect_version='v1', + drop_path_burn_in_steps=0, + # `drop_connect_condition` determines under what conditions drop_connect + # is used: + # -identity: Dropout all paths except identity connections, + # -all: Dropout all paths, + # -separable: Dropout only paths containing a separable conv operation. + dense_dropout_keep_prob=0.5, + batch_norm_epsilon=0.001, + batch_norm_decay=0.9997, + shuffle_buffer=20000, + + # Any value <= 0 means it is unused + gradient_clipping_by_global_norm=10.0, + + # Learning rate schedule. + lr=0.015, + lr_decay_method='exponential', + lr_decay_value=0.97, + lr_num_epochs_per_decay=2.4, + lr_warmup_epochs=3.0, + weight_decay=4e-05, + + # Optimizer. + optimizer='sgd', # 'sgd', 'mom', 'adam' or 'rmsprop' + rmsprop_decay=0.9, + rmsprop_momentum_rate=0.9, + rmsprop_epsilon=1.0, + momentum_rate=0.9, + use_nesterov=1, + + ########################################################################## + # Eval and reporting params. ############################################# + ########################################################################## + + # This number should be a multiple of the number of TPU shards + # used for eval (e.g., 2 for a 1x1 or 8 for a 2x2). + eval_batch_size=40, + + # How many different crops are fed into one model. Also affects training. + num_input_images=1, + moving_average_decay=0.9999, + write_summaries=0, + + ########################################################################## + # Other params. ########################################################## + ########################################################################## + num_shards=None, + distributed_group_size=1, + use_tpu=False) + + +def build_hparams(cell_name='amoeba_net_d'): + """Build tf.Hparams for training Amoeba Net. + + Args: + cell_name: Which of the cells in model_specs.py to use to build the + amoebanet neural network; the cell names defined in that + module correspond to architectures discovered by an + evolutionary search described in + https://arxiv.org/abs/1802.01548. + + Returns: + A set of tf.HParams suitable for Amoeba Net training. + """ + hparams = imagenet_hparams() + operations, hiddenstate_indices, used_hiddenstates = ( + model_specs.get_normal_cell(cell_name)) + hparams.add_hparam('normal_cell_operations', operations) + hparams.add_hparam('normal_cell_hiddenstate_indices', + hiddenstate_indices) + hparams.add_hparam('normal_cell_used_hiddenstates', + used_hiddenstates) + operations, hiddenstate_indices, used_hiddenstates = ( + model_specs.get_reduction_cell(cell_name)) + hparams.add_hparam('reduction_cell_operations', + operations) + hparams.add_hparam('reduction_cell_hiddenstate_indices', + hiddenstate_indices) + hparams.add_hparam('reduction_cell_used_hiddenstates', + used_hiddenstates) + hparams.set_hparam('data_format', 'NHWC') + return hparams + + +def formatted_hparams(hparams): + """Formatts the hparams into a readable string. + + Also looks for attributes that have not correctly been added to the hparams + and prints the keys as "bad keys". These bad keys may be left out of iterators + and cirumvent type checking. + + Args: + hparams: an HParams instance. + + Returns: + A string. + """ + # Look for bad keys (see docstring). + good_keys = set(hparams.values().keys()) + bad_keys = [] + for key in hparams.__dict__: + if key not in good_keys and not key.startswith('_'): + bad_keys.append(key) + bad_keys.sort() + + # Format hparams. + readable_items = [ + '%s: %s' % (k, v) for k, v in sorted(six.iteritems(hparams.values())) + ] + readable_items.append('Bad keys: %s' % ','.join(bad_keys)) + readable_string = ('\n'.join(readable_items)) + return readable_string + + +class AmoebaNetEstimatorModel(object): + """Definition of AmoebaNet.""" + + def __init__(self, hparams, model_dir): + self.hparams = hparams + self.model_dir = model_dir + + def _calc_num_trainable_params(self): + self.num_trainable_params = np.sum([ + np.prod(var.get_shape().as_list()) for var in tf.trainable_variables() + ]) + tf.logging.info( + 'number of trainable params: {}'.format(self.num_trainable_params)) + + def _build_learning_rate_schedule(self, global_step): + """Build learning rate.""" + steps_per_epoch = ( + self.hparams.num_train_images // self.hparams.train_batch_size) + lr_warmup_epochs = 0 + if self.hparams.lr_decay_method == 'exponential': + lr_warmup_epochs = self.hparams.lr_warmup_epochs + learning_rate = model_builder.build_learning_rate( + self.hparams.lr, + self.hparams.lr_decay_method, + global_step, + total_steps=steps_per_epoch * self.hparams.num_epochs, + decay_steps=steps_per_epoch * self.hparams.lr_num_epochs_per_decay, + decay_factor=self.hparams.lr_decay_value, + add_summary=False, + warmup_steps=int(lr_warmup_epochs * steps_per_epoch)) + + learning_rate = tf.maximum( + learning_rate, 0.0001 * self.hparams.lr, name='learning_rate') + return learning_rate + + def _build_network(self, features, labels, mode): + """Build a network that returns loss and logits from features and labels.""" + is_training = (mode == tf.estimator.ModeKeys.TRAIN) + is_predict = (mode == tf.estimator.ModeKeys.PREDICT) + steps_per_epoch = float( + self.hparams.num_train_images) / self.hparams.train_batch_size + num_total_steps = int(steps_per_epoch * self.hparams.num_epochs) + self.hparams.set_hparam('drop_path_burn_in_steps', num_total_steps) + + hparams = copy.deepcopy(self.hparams) + if not is_training: + hparams.set_hparam('use_aux_head', False) + hparams.set_hparam('weight_decay', 0) + hparams.set_hparam('use_bp16', False) + + tf.logging.info( + 'Amoeba net received hparams for {}:\n{}'.format( + 'training' if is_training else 'eval', + formatted_hparams(hparams))) + + logits, end_points = model_builder.build_network( + features, hparams.num_label_classes, is_training, hparams) + + if not is_predict: + labels = tf.one_hot(labels, hparams.num_label_classes) + loss = model_builder.build_softmax_loss( + logits, + end_points, + labels, + label_smoothing=hparams.label_smoothing, + add_summary=False) + + # Calculate and print the number of trainable parameters in the model + if is_training: + flops = model_builder.compute_flops_per_example(hparams.train_batch_size) + else: + flops = model_builder.compute_flops_per_example(hparams.eval_batch_size) + tf.logging.info('number of flops: {}'.format(flops)) + self._calc_num_trainable_params() + + if is_predict: + return None, logits + + return loss, logits + + def _build_optimizer(self, learning_rate): + """Build optimizer.""" + if self.hparams.optimizer == 'sgd': + tf.logging.info('Using SGD optimizer') + optimizer = tf.train.GradientDescentOptimizer( + learning_rate=learning_rate) + opt_tmp = optimizer + loss_scale_manager = ExponentialUpdateLossScaleManager(init_loss_scale=2 ** 32, incr_every_n_steps=1000, + decr_every_n_nan_or_inf=2, decr_ratio=0.5) + optimizer = NPULossScaleOptimizer(opt_tmp, loss_scale_manager) + + # elif self.hparams.optimizer == 'adam': + # tf.logging.info('Using adam optimizer') + # optimizer = tf.train.MomentumOptimizer( + # learning_rate=learning_rate, + # momentum=self.hparams.momentum_rate) + # opt_tmp = optimizer + # loss_scale_manager = ExponentialUpdateLossScaleManager(init_loss_scale=2 ** 32, incr_every_n_steps=1000, + # decr_every_n_nan_or_inf=2, decr_ratio=0.5) + # optimizer = NPULossScaleOptimizer(opt_tmp, loss_scale_manager) + + elif self.hparams.optimizer == 'momentum': + tf.logging.info('Using Momoptimizerentum ') + optimizer = tf.train.MomentumOptimizer( + learning_rate=learning_rate, + momentum=self.hparams.momentum_rate) + opt_tmp = optimizer + loss_scale_manager = ExponentialUpdateLossScaleManager(init_loss_scale=2 ** 32, incr_every_n_steps=1000, + decr_every_n_nan_or_inf=2, decr_ratio=0.5) + optimizer = NPULossScaleOptimizer(opt_tmp, loss_scale_manager) + + elif self.hparams.optimizer == 'rmsprop': + tf.logging.info('Using RMSProp optimizer') + optimizer = tf.train.RMSPropOptimizer( + learning_rate, + RMSPROP_DECAY, + momentum=RMSPROP_MOMENTUM, + epsilon=RMSPROP_EPSILON) + opt_tmp = optimizer + loss_scale_manager = ExponentialUpdateLossScaleManager(init_loss_scale=2 ** 32, incr_every_n_steps=1000, + decr_every_n_nan_or_inf=2, decr_ratio=0.5) + optimizer = NPULossScaleOptimizer(opt_tmp, loss_scale_manager) + + else: + tf.logging.fatal('Unknown optimizer:', self.hparams.optimizer) + + if self.hparams.use_tpu: + optimizer = contrib_tpu.CrossShardOptimizer(optimizer) + return optimizer + + def _build_train_op(self, optimizer, loss, global_step): + """Build train_op from optimizer and loss.""" + grads_and_vars = optimizer.compute_gradients(loss) + if self.hparams.gradient_clipping_by_global_norm > 0.0: + g, v = zip(*grads_and_vars) + g, _ = tf.clip_by_global_norm( + g, self.hparams.gradient_clipping_by_global_norm) + grads_and_vars = zip(g, v) + + return optimizer.apply_gradients(grads_and_vars, global_step=global_step) + + def model_fn(self, features, labels, mode, params): + """Build the model based on features, labels, and mode. + + Args: + features: The features dictionary containing the data Tensor + and the number of examples. + labels: The labels Tensor resulting from calling the model. + mode: A string indicating the training mode. + params: A dictionary of hyperparameters. + + Returns: + A tf.estimator.EstimatorSpec. + """ + del params + is_training = (mode == tf.estimator.ModeKeys.TRAIN) + eval_active = (mode == tf.estimator.ModeKeys.EVAL) + is_predict = (mode == tf.estimator.ModeKeys.PREDICT) + + if is_training: + features = tf.transpose(features, [3, 0, 1, 2]) # HWCN to NHWC + loss, logits = self._build_network(features, labels, mode) + + if is_predict: + predictions = {'logits': logits} + if self.hparams.use_tpu: + return contrib_tpu.TPUEstimatorSpec(mode=mode, predictions=predictions) + else: + return tf.estimator.EstimatorSpec(mode=mode, + predictions=predictions) + host_call = None + train_op = None + + if is_training: + global_step = tf.train.get_or_create_global_step() + gs_t = tf.reshape(tf.cast(global_step, tf.int32), [1]) + + # Setup learning rate schedule + learning_rate = self._build_learning_rate_schedule(global_step) + + # Setup optimizer. + optimizer = self._build_optimizer(learning_rate) + ''' + opt_tmp = optimizer + loss_scale_manager = ExponentialUpdateLossScaleManager(init_loss_scale=2 ** 32, incr_every_n_steps=1000, + decr_every_n_nan_or_inf=2, decr_ratio=0.5) + optimizer = NPULossScaleOptimizer(opt_tmp, loss_scale_manager) + ''' + update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) + with tf.control_dependencies(update_ops): + train_op = self._build_train_op(optimizer, loss, + global_step=global_step) + if self.hparams.moving_average_decay > 0: + ema = tf.train.ExponentialMovingAverage( + decay=self.hparams.moving_average_decay, num_updates=global_step) + variables_to_average = (tf.trainable_variables() + + tf.moving_average_variables()) + with tf.control_dependencies([train_op]): + with tf.name_scope('moving_average'): + train_op = ema.apply(variables_to_average) + + lr_t = tf.reshape(learning_rate, [1]) + host_call = None + if self.hparams.enable_hostcall: + def host_call_fn(gs, lr): + # Outfeed supports int32 but global_step is expected to be int64. + gs = tf.cast(tf.reduce_mean(gs), tf.int64) + with tf.summary.create_file_writer(self.model_dir).as_default(): + with tf.summary.always_record_summaries(): + tf.summary.scalar('learning_rate', tf.reduce_mean(lr), step=gs) + return tf.summary.all_summary_ops() + + host_call = (host_call_fn, [gs_t, lr_t]) + + eval_metrics = None + eval_metric_ops = None + if eval_active: + def metric_fn(labels, logits): + """Evaluation metric fn. Performed on CPU, do not reference TPU ops.""" + # Outfeed supports int32 but global_step is expected to be int64. + predictions = tf.argmax(logits, axis=1) + categorical_labels = labels + top_1_accuracy = tf.metrics.accuracy(categorical_labels, predictions) + in_top_5 = tf.cast(tf.nn.in_top_k(logits, categorical_labels, 5), + tf.float32) + top_5_accuracy = tf.metrics.mean(in_top_5) + + return { + 'top_1_accuracy': top_1_accuracy, + 'top_5_accuracy': top_5_accuracy, + } + + eval_metrics = (metric_fn, [labels, logits]) + eval_metric_ops = metric_fn(labels, logits) + + if self.hparams.use_tpu: + return contrib_tpu.tpu.TPUEstimatorSpec( + mode=mode, + loss=loss, + train_op=train_op, + host_call=host_call, + eval_metrics=eval_metrics) + return tf.estimator.EstimatorSpec( + mode=mode, loss=loss, train_op=train_op, + eval_metric_ops=eval_metric_ops) + + +class InputPipeline(object): + """Generates ImageNet input_fn for training or evaluation. + + The training data is assumed to be in TFRecord format with keys as specified + in the dataset_parser below, sharded across 1024 files, named sequentially: + train-00000-of-01024 + train-00001-of-01024 + ... + train-01023-of-01024 + + The validation data is in the same format but sharded in 128 files. + + The format of the data required is created by the script at: + https://github.com/tensorflow/tpu/blob/master/tools/datasets/imagenet_to_gcs.py + + Args: + is_training: `bool` for whether the input is for training + """ + + def __init__(self, is_training, data_dir, hparams, eval_from_hub=False): + self.is_training = is_training + self.data_dir = data_dir + self.hparams = hparams + self.num_classes = 1001 + self.eval_from_hub = eval_from_hub + + def _dataset_parser(self, serialized_proto): + """Parse an Imagenet record from value.""" + keys_to_features = { + 'image/encoded': + tf.FixedLenFeature((), tf.string, default_value=''), + 'image/format': + tf.FixedLenFeature((), tf.string, default_value='jpeg'), + 'image/class/label': + tf.FixedLenFeature([], dtype=tf.int64, default_value=-1), + 'image/class/text': + tf.FixedLenFeature([], dtype=tf.string, default_value=''), + 'image/object/bbox/xmin': + tf.VarLenFeature(dtype=tf.float32), + 'image/object/bbox/ymin': + tf.VarLenFeature(dtype=tf.float32), + 'image/object/bbox/xmax': + tf.VarLenFeature(dtype=tf.float32), + 'image/object/bbox/ymax': + tf.VarLenFeature(dtype=tf.float32), + 'image/object/class/label': + tf.VarLenFeature(dtype=tf.int64), + } + + features = tf.parse_single_example(serialized_proto, keys_to_features) + + bbox = None + + image = features['image/encoded'] + image = tf.image.decode_jpeg(image, channels=3) + image = tf.image.convert_image_dtype(image, dtype=tf.float32) + + image = inception_preprocessing.preprocess_image( + image=image, + output_height=self.hparams.image_size, + output_width=self.hparams.image_size, + is_training=self.is_training, + # If eval_from_hub, do not scale the images during preprocessing. + scaled_images=not self.eval_from_hub, + bbox=bbox) + + label = tf.cast( + tf.reshape(features['image/class/label'], shape=[]), dtype=tf.int32) + + return image, label + + def input_fn(self, params): + """Input function which provides a single batch for train or eval. + + Args: + params: `dict` of parameters passed from the `TPUEstimator`. + + Returns: + A callable dataset object. + """ + # Retrieves the batch size for the current shard. The # of shards is + # computed according to the input pipeline deployment. See + # tf.contrib.tpu.RunConfig for details. + if 'batch_size' in params: + batch_size = params['batch_size'] + else: + batch_size = (self.hparams.train_batch_size if self.is_training + else self.hparams.eval_batch_size) + file_pattern = os.path.join( + self.data_dir, 'train/train-*' if self.is_training else 'validation/validation-*') + dataset = tf.data.Dataset.list_files(file_pattern, shuffle=self.is_training) + if self.is_training: + dataset = dataset.repeat() + def fetch_dataset(filename): + buffer_size = 8 * 1024 * 1024 # 8 MiB per file + dataset = tf.data.TFRecordDataset(filename, buffer_size=buffer_size) + return dataset + + dataset = dataset.apply( + tf.data.experimental.parallel_interleave( + fetch_dataset, cycle_length=64, sloppy=True)) + dataset = dataset.shuffle(1024) + + # Use the fused map-and-batch operation. + # + # For XLA, we must used fixed shapes. Because we repeat the source training + # dataset indefinitely, we can use `drop_remainder=True` to get fixed-size + # batches without dropping any training examples. + # + # When evaluating, `drop_remainder=True` prevents accidentally evaluating + # the same image twice by dropping the final batch if it is less than a full + # batch size. As long as this validation is done with consistent batch size, + # exactly the same images will be used. + + dataset = dataset.apply( + tf.data.experimental.map_and_batch( + self._dataset_parser, batch_size=batch_size, + num_parallel_batches=8, drop_remainder=True)) + ''' + dataset = dataset.apply( + tf.data.experimental.map_and_batch( + self._dataset_parser, batch_size=batch_size, + num_parallel_batches=1, drop_remainder=True)) + ''' + if self.is_training: + dataset = dataset.map( + lambda images, labels: (tf.transpose(images, [1, 2, 3, 0]), labels), + num_parallel_calls=8) + ''' + if self.is_training: + dataset = dataset.map( + lambda images, labels: (tf.transpose(images, [1, 2, 3, 0]), labels)) + ''' + dataset = dataset.prefetch(32) # Prefetch overlaps in-feed with training + return dataset # Must return the dataset and not tensors for high perf! + + +class LoadEMAHook(tf.train.SessionRunHook): + """Hook to load EMA into their corresponding variables.""" + + def __init__(self, model_dir, moving_average_decay): + super(LoadEMAHook, self).__init__() + self._model_dir = model_dir + self.moving_average_decay = moving_average_decay + + def begin(self): + ema = tf.train.ExponentialMovingAverage(self.moving_average_decay) + variables_to_restore = ema.variables_to_restore() + self._load_ema = contrib_framework.assign_from_checkpoint_fn( + tf.train.latest_checkpoint(self._model_dir), variables_to_restore) + + def after_create_session(self, sess, coord): + tf.logging.info('Reloading EMA...') + self._load_ema(sess) + + +class SessionTimingHook(tf.train.SessionRunHook): + """Hook that computes speed based on session run time.""" + + def __init__(self): + # Lists of walltime. + self._before_runs = [] + self._after_runs = [] + + def before_run(self, run_context): + self._before_runs.append(time.time()) + + def after_run(self, run_context, results): + self._after_runs.append(time.time()) + + def compute_speed(self, num_samples): + """Returns speed, in number of samples per second.""" + num_runs = len(self._before_runs) + if num_runs == 0: + raise ValueError('Session run time never recorded') + if len(self._after_runs) != num_runs: + raise ValueError( + 'Number of before_run events (%d) does not match ' + 'number of after_run events (%d)' % + (len(self._before_runs), len(self._after_runs))) + total_eval_time = sum(self._after_runs[i] - self._before_runs[i] + for i in range(num_runs)) + if num_runs <= 1: + tf.logging.warn( + 'Speed will be inaccurate with only one session run') + else: + # Exclude the first run, which tends to take much longer than other runs. + total_eval_time -= (self._after_runs[0] - self._before_runs[0]) + # We assume num_samples are evenly distributed across runs. + num_samples *= (float(num_runs - 1) / num_runs) + return num_samples / max(total_eval_time, 1e-6) + diff --git a/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/common/__init__.py b/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/common/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..50a56ea6a26f4605441a03dffbc94a5732669d3d --- /dev/null +++ b/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/common/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== diff --git a/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/common/__pycache__/__init__.cpython-37.pyc b/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/common/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6b7aacfc220b3db7784edd2f7ff6b2b5f340dc00 Binary files /dev/null and b/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/common/__pycache__/__init__.cpython-37.pyc differ diff --git a/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/common/__pycache__/inference_warmup.cpython-37.pyc b/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/common/__pycache__/inference_warmup.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a90dafac5a7a750aff6052c9d4cf4728befb3262 Binary files /dev/null and b/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/common/__pycache__/inference_warmup.cpython-37.pyc differ diff --git a/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/common/imagenet.py b/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/common/imagenet.py new file mode 100644 index 0000000000000000000000000000000000000000..9432e168c6678e3cb2c10eeffbd98aedd3aa0c5f --- /dev/null +++ b/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/common/imagenet.py @@ -0,0 +1,74 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Provides data helpers for the ImageNet ILSVRC 2012 Dataset.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from six.moves import urllib + + +def create_readable_names_for_imagenet_labels(): + """Create a dict mapping label id to human readable string. + + Returns: + labels_to_names: dictionary where keys are integers from to 1000 + and values are human-readable names. + + We retrieve a synset file, which contains a list of valid synset labels used + by ILSVRC competition. There is one synset one per line, eg. + # n01440764 + # n01443537 + We also retrieve a synset_to_human_file, which contains a mapping from synsets + to human-readable names for every synset in Imagenet. These are stored in a + tsv format, as follows: + # n02119247 black fox + # n02119359 silver fox + We assign each synset (in alphabetical order) an integer, starting from 1 + (since 0 is reserved for the background class). + """ + + # pylint: disable=g-line-too-long + base_url = 'https://raw.githubusercontent.com/tensorflow/models/master/research/inception/inception/data/' + # pylint: enable=g-line-too-long + synset_url = '{}/imagenet_lsvrc_2015_synsets.txt'.format(base_url) + synset_to_human_url = '{}/imagenet_metadata.txt'.format(base_url) + + filename, _ = urllib.request.urlretrieve(synset_url) + synset_list = [s.strip() for s in open(filename).readlines()] + num_synsets_in_ilsvrc = len(synset_list) + assert num_synsets_in_ilsvrc == 1000 + + filename, _ = urllib.request.urlretrieve(synset_to_human_url) + synset_to_human_list = open(filename).readlines() + num_synsets_in_all_imagenet = len(synset_to_human_list) + assert num_synsets_in_all_imagenet == 21842 + + synset_to_human = {} + for s in synset_to_human_list: + parts = s.strip().split('\t') + assert len(parts) == 2 + synset = parts[0] + human = parts[1] + synset_to_human[synset] = human + + label_index = 1 + labels_to_names = {0: 'background'} + for synset in synset_list: + name = synset_to_human[synset] + labels_to_names[label_index] = name + label_index += 1 + + return labels_to_names diff --git a/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/common/inference_warmup.py b/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/common/inference_warmup.py new file mode 100644 index 0000000000000000000000000000000000000000..53380b4b0ec7bd0781fefda999887ee96b5522a7 --- /dev/null +++ b/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/common/inference_warmup.py @@ -0,0 +1,97 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Writer for inference warmup requests.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import io +import os +import numpy as np +import tensorflow.compat.v1 as tf + + +def _encode_image(image_array, fmt): + """encodes an (numpy) image array to string. + + Args: + image_array: (numpy) image array + fmt: image format to use + + Returns: + encoded image string + """ + from PIL import Image # pylint: disable=g-import-not-at-top + pil_image = Image.fromarray(image_array) + image_io = io.BytesIO() + pil_image.save(image_io, format=fmt) + return image_io.getvalue() + + +def write_warmup_requests(savedmodel_dir, + model_name, + image_size, + batch_sizes=None, + num_requests=8, + image_format='PNG', + input_signature='input'): + """Writes warmup requests for inference into a tfrecord file. + + Args: + savedmodel_dir: string, the file to the exported model folder. + model_name: string, a model name used inside the model server. + image_size: tuple/list or int, size of image. For list/tuple input, assuming + it contains image height and width. + batch_sizes: list, a list of batch sizes to create different input requests. + num_requests: int, number of requests per batch size. + image_format: string, the format of the image to write (PNG, JPEG) + input_signature: string, input signature defined in exported saved model. + + Raises: + ValueError: if batch_sizes is not a valid integer list. + """ + from tensorflow_serving.apis import predict_pb2 # pylint: disable=g-import-not-at-top + from tensorflow_serving.apis import prediction_log_pb2 # pylint: disable=g-import-not-at-top + if not isinstance(batch_sizes, list) or not batch_sizes: + raise ValueError('batch sizes should be a valid non-empty list.') + extra_assets_dir = os.path.join(savedmodel_dir, 'assets.extra') + tf.gfile.MkDir(extra_assets_dir) + if isinstance(image_size, int): + height = image_size + width = image_size + elif isinstance(image_size, tuple) or isinstance(image_size, list): + height = image_size[0] + width = image_size[1] + else: + raise ValueError( + 'image_size is not a supported type: %s' % type(image_size)) + + with tf.python_io.TFRecordWriter( + os.path.join(extra_assets_dir, 'tf_serving_warmup_requests')) as writer: + for batch_size in batch_sizes: + for _ in range(num_requests): + request = predict_pb2.PredictRequest() + image = np.uint8(np.random.rand(height, width, 3) * 255) + request.inputs[input_signature].CopyFrom( + tf.make_tensor_proto( + [_encode_image(image, image_format)] * batch_size, + shape=[batch_size])) + request.model_spec.name = model_name + request.model_spec.signature_name = 'serving_default' + log = prediction_log_pb2.PredictionLog( + predict_log=prediction_log_pb2.PredictLog(request=request)) + writer.write(log.SerializeToString()) diff --git a/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/common/tpu_profiler_hook.py b/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/common/tpu_profiler_hook.py new file mode 100644 index 0000000000000000000000000000000000000000..e8d6e262e2e47316dd29af523aa15a44909b52ab --- /dev/null +++ b/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/common/tpu_profiler_hook.py @@ -0,0 +1,120 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""TPU Profiler Hook.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import subprocess + +import tensorflow.compat.v1 as tf + + +class TPUProfilerHook(tf.train.SessionRunHook): + """Captures TPU profiling information every N steps or seconds. + + Collects profiles using the cloud tpu profiler. The hook only works in + google cloud with cloud_tpu_profiler installed. + See https://cloud.google.com/tpu/docs/cloud-tpu-tools for detailed usage + for the capture_tpu_profile command. These profiles can be viewed in + Tensorboard. TPU profiling should not be invoked more frequently than every + 10 seconds. + """ + + def __init__(self, + tpu, + output_dir, + save_steps=None, + save_secs=None, + tpu_profiler_command=None): + """Initializes a hook that takes periodic profiling snapshots. + + Args: + tpu: Grpc address to the tpu master. + output_dir: `string`, the directory to save the profile traces to. + save_steps: `int`, save profile traces every N steps. Exactly one of + `save_secs` and `save_steps` should be set. + save_secs: `int` or `float`, save profile traces every N seconds. + tpu_profiler_command: Custom tpu profiler command (e.g. + $install_loc/capture_tpu_profile --duration_ms=20000 + --num_tracing_attempts=10). If not specified, profiling 2 secs with + 3 attempts by default. + + Raises: + ValueError: if `tpu` is not a string. + """ + if not isinstance(tpu, str): + raise ValueError("--tpu should be provided with a string.") + + self._timer = tf.train.SecondOrStepTimer( + every_secs=save_secs, every_steps=save_steps) + self._tpu_profiler_command = None + + if tpu_profiler_command is None: + tpu_profiler_command = ["/usr/local/bin/capture_tpu_profile"] + self._tpu_profiler_command = tpu_profiler_command + if tpu.startswith("grpc://"): + tf.logging.warn( + "Profiling single TPU pointed by %s. Use tpu name to profile a pod." % + tpu) + service_addr = tpu.split("://")[1] + worker = service_addr.split(":")[0] + self._tpu_profiler_command += [ + "--service_addr=" + service_addr, "--workers_list=" + worker + ] + else: + self._tpu_profiler_command += ["--tpu=" + tpu] + self._tpu_profiler_command += ["--logdir=" + output_dir] + self._running_process = None + self._ran_first_step = False + + def begin(self): + self._global_step_tensor = tf.train.get_or_create_global_step() # pylint: disable=protected-access + + def before_run(self, run_context): + return tf.train.SessionRunArgs({"global_step": self._global_step_tensor}) + + def after_run(self, run_context, run_values): + stale_global_step = run_values.results["global_step"] + if not self._ran_first_step: + # Update the timer so that it does not activate until N steps or seconds + # have passed. + self._timer.update_last_triggered_step(stale_global_step) + self._ran_first_step = True + + global_step = stale_global_step + 1 + if (stale_global_step > 1 and + self._timer.should_trigger_for_step(stale_global_step)): + global_step = run_context.session.run(self._global_step_tensor) + self._timer.update_last_triggered_step(global_step) + self._collect_tpu_profile(global_step) + + def _collect_tpu_profile(self, step): + """Run capture_tpu_profile if not already running.""" + + if self._running_process is not None: + exit_code = self._running_process.poll() + if exit_code is not None: + tf.logging.info("Previous profile exited with status: %s", exit_code) + else: + tf.logging.info( + "Profiler is already running, skipping collection at step %d", step) + return + tf.logging.info( + "Saving profile at step %d with command %s", step, + self._tpu_profiler_command) + self._running_process = subprocess.Popen(self._tpu_profiler_command) diff --git a/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/fusion_switch.cfg b/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/fusion_switch.cfg new file mode 100644 index 0000000000000000000000000000000000000000..71a3d36cfea766441a1115151c3734bf5b15ccaf --- /dev/null +++ b/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/fusion_switch.cfg @@ -0,0 +1,37 @@ +{ + "Switch":{ + "GraphFusion":{ + "AReduceMeanFusionPass":"off", + "AReduceSumFusionPass":"off", + "ConvToFullyConnectionFusionPass":"off", + "ConcatCToNOptimizeFusionPass":"off", + "Conv2DbpFilterMulFusionPass":"off", + "Conv2DbpInputDilationFusionPass":"off", + "ConvBatchnormFusionPass":"off", + "ConvConcatFusionPass":"off", + "ConvWeightCompressFusionPass":"off", + "FusedBatchNormBertFusionPass":"off", + "SoftmaxFusionPass":"off", + "V100RequantFusionPass":"off", + "V100NotRequantFusionPass":"off", + "SplitConvConcatFusionPass":"off", + "StridedSliceGradFusionPass":"off", + "MatMulBiasAddFusionPass":"off", + "MulAddFusionPass":"off", + "MulAddNL2LossFusionPass":"off", + "MulGradFusionPass":"off", + "MulAddNPass":"off", + "PoolingFusionPass":"off", + "Pow2SquareFusionPass":"off", + "Resnet50DbnDwFusionPass":"off", + "ZConcatv2dFusionPass":"off", + "ZConcatExt2FusionPass":"off", + "TransdataCastFusionPass":"off", + "TfMergeSubFusionPass":"off" + }, + "UBFusion":{ + "TbePool2dQuantFusionPass":"off", + "FusionVirtualOpSetSwitch":"off" + } + } +} diff --git a/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/inception_preprocessing.py b/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/inception_preprocessing.py new file mode 100644 index 0000000000000000000000000000000000000000..7cac012db97d42805ca77c2f49dce14b75a02fca --- /dev/null +++ b/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/inception_preprocessing.py @@ -0,0 +1,419 @@ + + + +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Provides utilities to preprocess images for the Inception networks.""" + + + + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + + +import faulthandler +faulthandler.enable() + + +from npu_bridge.npu_init import * + +from absl import flags +import tensorflow.compat.v1 as tf + +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import random_ops + + +flags.DEFINE_float( + 'cb_distortion_range', 0.1, 'Cb distortion range +/-') + +flags.DEFINE_float( + 'cr_distortion_range', 0.1, 'Cr distortion range +/-') + +flags.DEFINE_boolean( + 'use_fast_color_distort', True, + 'apply fast color/chroma distortion if True, else apply' + 'brightness/saturation/hue/contrast distortion') + +FLAGS = flags.FLAGS + + +def apply_with_random_selector(x, func, num_cases): + """Computes func(x, sel), with sel sampled from [0...num_cases-1]. + + Args: + x: input Tensor. + func: Python function to apply. + num_cases: Python int32, number of cases to sample sel from. + + Returns: + The result of func(x, sel), where func receives the value of the + selector as a python integer, but sel is sampled dynamically. + """ + sel = tf.random_uniform([], maxval=num_cases, dtype=tf.int32) + # Pass the real x only to one of the func calls. + return control_flow_ops.merge([ + func(control_flow_ops.switch(x, tf.equal(sel, case))[1], case) + for case in range(num_cases)])[0] + + +def distort_color(image, color_ordering=0, fast_mode=True, scope=None): + """Distort the color of a Tensor image. + + Each color distortion is non-commutative and thus ordering of the color ops + matters. Ideally we would randomly permute the ordering of the color ops. + Rather then adding that level of complication, we select a distinct ordering + of color ops for each preprocessing thread. + + Args: + image: 3-D Tensor containing single image in [0, 1]. + color_ordering: Python int, a type of distortion (valid values: 0-3). + fast_mode: Avoids slower ops (random_hue and random_contrast) + scope: Optional scope for name_scope. + Returns: + 3-D Tensor color-distorted image on range [0, 1] + Raises: + ValueError: if color_ordering not in [0, 3] + """ + with tf.name_scope(scope, 'distort_color', [image]): + if fast_mode: + if color_ordering == 0: + image = tf.image.random_brightness(image, max_delta=32. / 255.) + image = tf.image.random_saturation(image, lower=0.5, upper=1.5) + else: + image = tf.image.random_saturation(image, lower=0.5, upper=1.5) + image = tf.image.random_brightness(image, max_delta=32. / 255.) + else: + if color_ordering == 0: + image = tf.image.random_brightness(image, max_delta=32. / 255.) + image = tf.image.random_saturation(image, lower=0.5, upper=1.5) + image = tf.image.random_hue(image, max_delta=0.2) + image = tf.image.random_contrast(image, lower=0.5, upper=1.5) + elif color_ordering == 1: + image = tf.image.random_saturation(image, lower=0.5, upper=1.5) + image = tf.image.random_brightness(image, max_delta=32. / 255.) + image = tf.image.random_contrast(image, lower=0.5, upper=1.5) + image = tf.image.random_hue(image, max_delta=0.2) + elif color_ordering == 2: + image = tf.image.random_contrast(image, lower=0.5, upper=1.5) + image = tf.image.random_hue(image, max_delta=0.2) + image = tf.image.random_brightness(image, max_delta=32. / 255.) + image = tf.image.random_saturation(image, lower=0.5, upper=1.5) + elif color_ordering == 3: + image = tf.image.random_hue(image, max_delta=0.2) + image = tf.image.random_saturation(image, lower=0.5, upper=1.5) + image = tf.image.random_contrast(image, lower=0.5, upper=1.5) + image = tf.image.random_brightness(image, max_delta=32. / 255.) + else: + raise ValueError('color_ordering must be in [0, 3]') + + # The random_* ops do not necessarily clamp. + return tf.minimum(tf.maximum(image, 0.0), 1.0) + + +def distort_color_fast(image, scope=None): + """Distort the color of a Tensor image. + + Distort brightness and chroma values of input image + + Args: + image: 3-D Tensor containing single image in [0, 1]. + scope: Optional scope for name_scope. + Returns: + 3-D Tensor color-distorted image on range [0, 1] + """ + with tf.name_scope(scope, 'distort_color', [image]): + br_delta = random_ops.random_uniform([], -32./255., 32./255., seed=None) + cb_factor = random_ops.random_uniform( + [], -FLAGS.cb_distortion_range, FLAGS.cb_distortion_range, seed=None) + cr_factor = random_ops.random_uniform( + [], -FLAGS.cr_distortion_range, FLAGS.cr_distortion_range, seed=None) + + channels = tf.split(axis=2, num_or_size_splits=3, value=image) + red_offset = 1.402 * cr_factor + br_delta + green_offset = -0.344136 * cb_factor - 0.714136 * cr_factor + br_delta + blue_offset = 1.772 * cb_factor + br_delta + channels[0] += red_offset + channels[1] += green_offset + channels[2] += blue_offset + image = tf.concat(axis=2, values=channels) + image = tf.minimum(tf.maximum(image, 0.), 1.) + return image + + +def distorted_bounding_box_crop(image, + bbox, + min_object_covered=0.1, + aspect_ratio_range=(3./4., 4./3.), + area_range=(0.05, 1.0), + max_attempts=100, + scope=None): + """Generates cropped_image using a one of the bboxes randomly distorted. + + See `tf.image.sample_distorted_bounding_box` for more documentation. + + Args: + image: 3-D Tensor of image (it will be converted to floats in [0, 1]). + bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] + where each coordinate is [0, 1) and the coordinates are arranged + as [ymin, xmin, ymax, xmax]. If num_boxes is 0 then it would use the whole + image. + min_object_covered: An optional `float`. Defaults to `0.1`. The cropped + area of the image must contain at least this fraction of any bounding box + supplied. + aspect_ratio_range: An optional list of `floats`. The cropped area of the + image must have an aspect ratio = width / height within this range. + area_range: An optional list of `floats`. The cropped area of the image + must contain a fraction of the supplied image within in this range. + max_attempts: An optional `int`. Number of attempts at generating a cropped + region of the image of the specified constraints. After `max_attempts` + failures, return the entire image. + scope: Optional scope for name_scope. + Returns: + A tuple, a 3-D Tensor cropped_image and the distorted bbox + """ + with tf.name_scope(scope, 'distorted_bounding_box_crop', [image, bbox]): + # Each bounding box has shape [1, num_boxes, box coords] and + # the coordinates are ordered [ymin, xmin, ymax, xmax]. + + # A large fraction of image datasets contain a human-annotated bounding + # box delineating the region of the image containing the object of interest. + # We choose to create a new bounding box for the object which is a randomly + # distorted version of the human-annotated bounding box that obeys an + # allowed range of aspect ratios, sizes and overlap with the human-annotated + # bounding box. If no box is supplied, then we assume the bounding box is + # the entire image. + sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box( + tf.shape(image), + bounding_boxes=bbox, + min_object_covered=min_object_covered, + aspect_ratio_range=aspect_ratio_range, + area_range=area_range, + max_attempts=max_attempts, + use_image_if_no_bounding_boxes=True) + bbox_begin, bbox_size, distort_bbox = sample_distorted_bounding_box + + # Crop the image to the specified bounding box. + cropped_image = tf.slice(image, bbox_begin, bbox_size) + return cropped_image, distort_bbox + + +def preprocess_for_train(image, height, width, bbox, + min_object_covered=0.1, + fast_mode=True, + scope=None, + add_image_summaries=True): + """Distort one image for training a network. + + Distorting images provides a useful technique for augmenting the data + set during training in order to make the network invariant to aspects + of the image that do not effect the label. + + Additionally it would create image_summaries to display the different + transformations applied to the image. + + Args: + image: 3-D Tensor of image. If dtype is tf.float32 then the range should be + [0, 1], otherwise it would converted to tf.float32 assuming that the range + is [0, MAX], where MAX is largest positive representable number for + int(8/16/32) data type (see `tf.image.convert_image_dtype` for details). + height: integer + width: integer + bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] + where each coordinate is [0, 1) and the coordinates are arranged + as [ymin, xmin, ymax, xmax]. + min_object_covered: An optional `float`. Defaults to `0.1`. The cropped + area of the image must contain at least this fraction of any bounding box + supplied. + fast_mode: Optional boolean, if True avoids slower transformations (i.e. + bi-cubic resizing, random_hue or random_contrast). + scope: Optional scope for name_scope. + add_image_summaries: Enable image summaries. + Returns: + 3-D float Tensor of distorted image used for training with range [-1, 1]. + """ + with tf.name_scope(scope, 'distort_image', [image, height, width, bbox]): + if bbox is None: + bbox = tf.constant([0.0, 0.0, 1.0, 1.0], + dtype=tf.float32, + shape=[1, 1, 4]) + if image.dtype != tf.float32: + image = tf.image.convert_image_dtype(image, dtype=tf.float32) + if add_image_summaries: + # Each bounding box has shape [1, num_boxes, box coords] and + # the coordinates are ordered [ymin, xmin, ymax, xmax]. + image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0), + bbox) + tf.summary.image('image_with_bounding_boxes', image_with_box) + + distorted_image, distorted_bbox = distorted_bounding_box_crop( + image, + bbox, + min_object_covered=min_object_covered, + area_range=(min_object_covered, 1.0)) + # Restore the shape since the dynamic slice based upon the bbox_size loses + # the third dimension. + distorted_image.set_shape([None, None, 3]) + if add_image_summaries: + image_with_distorted_box = tf.image.draw_bounding_boxes( + tf.expand_dims(image, 0), distorted_bbox) + tf.summary.image('images_with_distorted_bounding_box', + image_with_distorted_box) + + # This resizing operation may distort the images because the aspect + # ratio is not respected. We select a resize method in a round robin + # fashion based on the thread number. + # Note that ResizeMethod contains 4 enumerated resizing methods. + + # We select only 1 case for fast_mode bilinear. + num_resize_cases = 1 if fast_mode else 4 + distorted_image = apply_with_random_selector( + distorted_image, + lambda x, method: tf.image.resize_images(x, [height, width], method), + num_cases=num_resize_cases) + + if add_image_summaries: + tf.summary.image('cropped_resized_image', + tf.expand_dims(distorted_image, 0)) + + # Randomly flip the image horizontally. + distorted_image = tf.image.random_flip_left_right(distorted_image) + + # Randomly distort the colors. There are 1 or 4 ways to do it. + if FLAGS.use_fast_color_distort: + distorted_image = distort_color_fast(distorted_image) + else: + num_distort_cases = 1 if fast_mode else 4 + distorted_image = apply_with_random_selector( + distorted_image, + lambda x, ordering: distort_color(x, ordering, fast_mode), + num_cases=num_distort_cases) + + if add_image_summaries: + tf.summary.image('final_distorted_image', + tf.expand_dims(distorted_image, 0)) + return distorted_image + + +def preprocess_for_eval(image, height, width, + central_fraction=0.875, scope=None): + """Prepare one image for evaluation. + + If height and width are specified it would output an image with that size by + applying resize_bilinear. + + If central_fraction is specified it would crop the central fraction of the + input image. + + Args: + image: 3-D Tensor of image. If dtype is tf.float32 then the range should be + [0, 1], otherwise it would converted to tf.float32 assuming that the range + is [0, MAX], where MAX is largest positive representable number for + int(8/16/32) data type (see `tf.image.convert_image_dtype` for details). + height: integer + width: integer + central_fraction: Optional Float, fraction of the image to crop. + scope: Optional scope for name_scope. + Returns: + 3-D float Tensor of prepared image. + """ + with tf.name_scope(scope, 'eval_image', [image, height, width]): + if image.dtype != tf.float32: + image = tf.image.convert_image_dtype(image, dtype=tf.float32) + # Crop the central region of the image with an area containing 87.5% of + # the original image. + if central_fraction: + image = tf.image.central_crop(image, central_fraction=central_fraction) + + if height and width: + # Resize the image to the specified height and width. + image = tf.expand_dims(image, 0) + image = tf.image.resize_bilinear(image, [height, width], + align_corners=False) + image = tf.squeeze(image, [0]) + image.set_shape([height, width, 3]) + return image + + +def preprocess_image(image, + output_height, + output_width, + is_training=False, + scaled_images=True, + bbox=None, + min_object_covered=0.1, + fast_mode=True, + add_image_summaries=False): + """Pre-process one image for training or evaluation. + + Args: + image: 3-D Tensor [height, width, channels] with the image. If dtype is + tf.float32 then the range should be [0, 1], otherwise it would converted + to tf.float32 assuming that the range is [0, MAX], where MAX is largest + positive representable number for int(8/16/32) data type (see + `tf.image.convert_image_dtype` for details). + output_height: integer, image expected height. + output_width: integer, image expected width. + is_training: Boolean. If true it would transform an image for train, + otherwise it would transform it for evaluation. + scaled_images: Whether to scale pixel values to the range [-1, 1]. + If set to false, pixel values are in the range [0, 1]. + bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] + where each coordinate is [0, 1) and the coordinates are arranged as + [ymin, xmin, ymax, xmax]. + min_object_covered: An optional `float`. Defaults to `0.1`. The cropped + area of the image must contain at least this fraction of any bounding box + supplied. + fast_mode: Optional boolean, if True avoids slower transformations. + add_image_summaries: Enable image summaries. + + Returns: + 3-D float Tensor containing an appropriately scaled image + + Raises: + ValueError: if user does not provide bounding box + """ + if is_training: + image = preprocess_for_train( + image, + output_height, + output_width, + bbox, + min_object_covered, + fast_mode, + add_image_summaries=add_image_summaries) + else: + image = preprocess_for_eval(image, output_height, output_width) + if scaled_images: + image = tf.subtract(image, 0.5) + image = tf.multiply(image, 2.0) + return image + diff --git a/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/model_builder.py b/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/model_builder.py new file mode 100644 index 0000000000000000000000000000000000000000..532d0d28819347103c6f5210d044e72b194d9747 --- /dev/null +++ b/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/model_builder.py @@ -0,0 +1,451 @@ + + +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Constructs a generic image model based on the hparams the user passes in. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import faulthandler +faulthandler.enable() + + +from npu_bridge.npu_init import * + +import functools + +import numpy as np +import tensorflow.compat.v1 as tf + +import network_utils +from tensorflow.contrib import layers as contrib_layers +from tensorflow.contrib import slim +from tensorflow.contrib.framework.python.ops import arg_scope + + +def _build_loss(loss_fn, loss_name, logits, end_points, labels, + add_summary=False): + """Compute total loss based on the specified loss function.""" + # Collect all losses explicitly to sum up the total_loss. + losses = [] + + # Whethere to add aux loss is controled in network_fn. Once an aux head is + # built, an aux loss would be added here automatically. + aux_head_endpoint = None + if 'AuxLogits' in end_points: + # For Inception/Genet aux head. + aux_head_endpoint = end_points['AuxLogits'] + elif 'aux_logits' in end_points: + # For AmoebaNet aux head. + aux_head_endpoint = end_points['aux_logits'], + if aux_head_endpoint: + aux_loss = loss_fn( + labels, + tf.squeeze(aux_head_endpoint, axis=[0]), + weights=0.4, + scope='aux_loss') + tf.logging.info('Adding to aux loss.') + if add_summary: + tf.summary.scalar('losses/aux_loss', aux_loss) + + losses.append(aux_loss) + + # Add the empirical loss. + primary_loss = loss_fn(labels, logits, weights=1.0, scope=loss_name) + losses.append(primary_loss) + + # Add regularization losses. + reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) + if reg_losses: + fp32_reg_losses = [] + for reg_loss in reg_losses: + fp32_reg_losses.append(tf.cast(reg_loss, tf.float32)) + reg_loss = tf.add_n(fp32_reg_losses, name='regularization_loss') + losses.append(reg_loss) + + total_loss = tf.add_n(losses, name='total_loss') + if add_summary: + tf.summary.scalar('losses/' + loss_name, primary_loss) + tf.summary.scalar('losses/regularization_loss', reg_loss) + tf.summary.scalar('losses/total_loss', total_loss) + + return total_loss + + +def build_softmax_loss(logits, + end_points, + labels, + label_smoothing=0.1, + add_summary=True): + loss_fn = functools.partial( + tf.losses.softmax_cross_entropy, label_smoothing=label_smoothing) + return _build_loss( + loss_fn=loss_fn, + loss_name='softmax_loss', + logits=logits, + end_points=end_points, + labels=labels, + add_summary=add_summary) + + +def compute_flops_per_example(batch_size): + # TODO(ereal): remove this function and other unnecessary reporting. + options = tf.profiler.ProfileOptionBuilder.float_operation() + options['output'] = 'none' + flops = ( + tf.profiler.profile( + tf.get_default_graph(), + options=options + ).total_float_ops / batch_size) + return flops + + +def build_learning_rate(initial_lr, + lr_decay_type, + global_step, + decay_factor=None, + decay_steps=None, + stepwise_epoch=None, + total_steps=None, + add_summary=True, + warmup_steps=0): + """Build learning rate.""" + if lr_decay_type == 'exponential': + assert decay_steps is not None + assert decay_factor is not None + lr = tf.train.exponential_decay( + initial_lr, global_step, decay_steps, decay_factor, staircase=True) + elif lr_decay_type == 'cosine': + assert total_steps is not None + lr = 0.5 * initial_lr * ( + 1 + tf.cos(np.pi * tf.cast(global_step, tf.float32) / total_steps)) + elif lr_decay_type == 'constant': + lr = initial_lr + elif lr_decay_type == 'stepwise': + assert stepwise_epoch is not None + boundaries = [ + 10 * stepwise_epoch, + 20 * stepwise_epoch, + ] + values = [initial_lr, initial_lr * 0.1, initial_lr * 0.01] + lr = tf.train.piecewise_constant(global_step, boundaries, values) + else: + assert False, 'Unknown lr_decay_type : %s' % lr_decay_type + + # By default, warmup_steps_fraction = 0.0 which means no warmup steps. + tf.logging.info('Learning rate warmup_steps: %d' % warmup_steps) + warmup_lr = ( + initial_lr * tf.cast(global_step, tf.float32) / tf.cast( + warmup_steps, tf.float32)) + lr = tf.cond(global_step < warmup_steps, lambda: warmup_lr, lambda: lr) + + if add_summary: + tf.summary.scalar('learning_rate', lr) + + return lr + + +def _build_aux_head(net, end_points, num_classes, hparams, scope): + """Auxiliary head used for all models across all datasets.""" + aux_scaling = 1.0 + # TODO(huangyp): double check aux_scaling with vrv@. + if hasattr(hparams, 'aux_scaling'): + aux_scaling = hparams.aux_scaling + tf.logging.info('aux scaling: {}'.format(aux_scaling)) + with tf.variable_scope(scope, custom_getter=network_utils.bp16_getter): + aux_logits = tf.identity(net) + with tf.variable_scope('aux_logits'): + aux_logits = slim.avg_pool2d( + aux_logits, [5, 5], stride=3, padding='VALID') + aux_logits = slim.conv2d(aux_logits, int(128 * aux_scaling), + [1, 1], scope='proj') + aux_logits = network_utils.batch_norm(aux_logits, scope='aux_bn0') + aux_logits = tf.nn.relu(aux_logits) + # Shape of feature map before the final layer. + shape = aux_logits.shape + if hparams.data_format == 'NHWC': + shape = shape[1:3] + else: + shape = shape[2:4] + aux_logits = slim.conv2d(aux_logits, int(768 * aux_scaling), + shape, padding='VALID') + aux_logits = network_utils.batch_norm(aux_logits, scope='aux_bn1') + aux_logits = tf.nn.relu(aux_logits) + aux_logits = contrib_layers.flatten(aux_logits) + aux_logits = slim.fully_connected(aux_logits, num_classes) + end_point_name = ( + 'aux_logits' if 'aux_logits' not in end_points else 'aux_logits_2') + end_points[end_point_name] = tf.cast(aux_logits, tf.float32) + + +def _imagenet_stem(inputs, hparams, stem_cell, filter_scaling_rate): + """Stem used for models trained on ImageNet.""" + # 149 x 149 x 32 + num_stem_filters = hparams.stem_reduction_size + with tf.variable_scope('stem', custom_getter=network_utils.bp16_getter): + net = slim.conv2d( + inputs, num_stem_filters, [3, 3], stride=2, scope='conv0', + padding='VALID') + net = network_utils.batch_norm(net, scope='conv0_bn') + tf.logging.info('imagenet_stem shape: {}'.format(net.shape)) + # Run the reduction cells + cell_outputs = [None, net] + filter_scaling = 1.0 / (filter_scaling_rate**hparams.num_stem_cells) + for cell_num in range(hparams.num_stem_cells): + net = stem_cell( + net, + scope='cell_stem_{}'.format(cell_num), + filter_scaling=filter_scaling, + stride=2, + prev_layer=cell_outputs[-2], + cell_num=cell_num) + cell_outputs.append(net) + filter_scaling *= filter_scaling_rate + tf.logging.info('imagenet_stem net shape at reduction layer {}: {}'.format( + cell_num, net.shape)) + # Only include cells in the cell_outputs. + return net, cell_outputs + + +def _basic_stem(inputs, hparams): + num_stem_filters = hparams.stem_reduction_size + with tf.variable_scope('stem', custom_getter=network_utils.bp16_getter): + net = slim.conv2d( + inputs, num_stem_filters, [3, 3], stride=1, scope='conv0', + padding='VALID') + net = network_utils.batch_norm(net, scope='conv0_bn') + tf.logging.info('basic_stem shape: {}'.format(net.shape)) + return net, [None, net] + + +def network_arg_scope(weight_decay=5e-5, + batch_norm_decay=0.9997, + batch_norm_epsilon=1e-3, + is_training=True, + data_format='NHWC', + num_shards=None, + distributed_group_size=1): + """Defines the default arg scope for the AmoebaNet ImageNet model. + + Args: + weight_decay: The weight decay to use for regularizing the model. + batch_norm_decay: Decay for batch norm moving average. + batch_norm_epsilon: Small float added to variance to avoid dividing by zero + in batch norm. + is_training: whether is training or not. + Useful for fine-tuning a model with different num_classes. + data_format: Input data format. + num_shards: Number of shards in the job + distributed_group_size: Size of the group to average for batch norm. + Returns: + An `arg_scope` to use for the AmoebaNet Model. + """ + batch_norm_params = { + # Decay for the moving averages. + 'decay': batch_norm_decay, + # epsilon to prevent 0s in variance. + 'epsilon': batch_norm_epsilon, + 'scale': True, + 'moving_vars': 'moving_vars', + 'is_training': is_training, + 'data_format': data_format, + 'num_shards': num_shards, + 'distributed_group_size': distributed_group_size, + } + weights_regularizer = contrib_layers.l2_regularizer(weight_decay) + weights_initializer = contrib_layers.variance_scaling_initializer( + mode='FAN_OUT') + with arg_scope([slim.fully_connected, slim.conv2d, slim.separable_conv2d], + weights_regularizer=weights_regularizer, + weights_initializer=weights_initializer): + with arg_scope([slim.fully_connected], + activation_fn=None, scope='FC'): + with arg_scope([slim.conv2d, slim.separable_conv2d], + activation_fn=None, biases_initializer=None): + with arg_scope([network_utils.batch_norm], **batch_norm_params): + with arg_scope( + [slim.dropout, network_utils.drop_path], is_training=is_training): + with arg_scope([slim.avg_pool2d, + slim.max_pool2d, + slim.conv2d, + slim.separable_conv2d, + network_utils.factorized_reduction, + network_utils.global_avg_pool, + network_utils.get_channel_index, + network_utils.get_channel_dim], + data_format=data_format) as sc: + return sc + + +def build_network(inputs, + num_classes, + is_training=True, + hparams=None): + """Builds an image model. + + Builds a model the takes inputs and return logits and endpoints. + + Args: + inputs: a tensor of size [batch_size, height, width, channels]. + num_classes: number of classes needed to be predicted by the model. If None, + only returns the feature vector endpoints after global_pool. + is_training: whether is training or not. + Useful for fine-tuning a model with different num_classes. + hparams: hparams used to construct the imagenet model. + + Returns: + a list containing 'logits', 'aux_logits' Tensors. + + Raises: + ValueError: upon invalid hparams. + """ + total_num_cells = (hparams.num_cells + + hparams.num_reduction_layers + + hparams.num_stem_cells) + normal_cell = network_utils.BaseCell( + hparams.reduction_size, hparams.normal_cell_operations, + hparams.normal_cell_used_hiddenstates, + hparams.normal_cell_hiddenstate_indices, hparams.drop_connect_keep_prob, + total_num_cells, hparams.drop_path_burn_in_steps) + reduction_cell = network_utils.BaseCell( + hparams.reduction_size, hparams.reduction_cell_operations, + hparams.reduction_cell_used_hiddenstates, + hparams.reduction_cell_hiddenstate_indices, + hparams.drop_connect_keep_prob, total_num_cells, + hparams.drop_path_burn_in_steps) + num_shards = hparams.num_shards + distributed_group_size = hparams.distributed_group_size + assert distributed_group_size == 1 or hparams.use_tpu + sc = network_arg_scope(weight_decay=hparams.weight_decay, + batch_norm_decay=hparams.batch_norm_decay, + batch_norm_epsilon=hparams.batch_norm_epsilon, + is_training=is_training, + data_format=hparams.data_format, + num_shards=num_shards, + distributed_group_size=distributed_group_size) + with arg_scope(sc): + return _build_network_base(inputs, + normal_cell=normal_cell, + reduction_cell=reduction_cell, + num_classes=num_classes, + hparams=hparams, + is_training=is_training) + + +def _build_network_base(images, + normal_cell, + reduction_cell, + num_classes, + hparams, + is_training): + """Constructs a AmoebaNet image model.""" + if hparams.get('use_bp16', False) and hparams.get('use_tpu', False): + images = tf.cast(images, dtype=tf.bfloat16) + end_points = {} + filter_scaling_rate = 2 + # Find where to place the reduction cells or stride normal cells + reduction_indices = network_utils.calc_reduction_layers( + hparams.num_cells, hparams.num_reduction_layers) + stem_cell = reduction_cell + + if hparams.stem_type == 'imagenet': + net, cell_outputs = _imagenet_stem(images, hparams, stem_cell, + filter_scaling_rate) + else: + net, cell_outputs = _basic_stem(images, hparams) + + # Setup for building in the auxiliary head. + aux_head_cell_idxes = [] + if len(reduction_indices) >= 2: + aux_head_cell_idxes.append(reduction_indices[1] - 1) + # Run the cells + filter_scaling = 1.0 + # true_cell_num accounts for the stem cells + true_cell_num = hparams.num_stem_cells + for cell_num in range(hparams.num_cells): + tf.logging.info('Current cell num: {}'.format(true_cell_num)) + + if cell_num in reduction_indices: + filter_scaling *= filter_scaling_rate + net = reduction_cell( + net, + scope='reduction_cell_{}'.format(reduction_indices.index(cell_num)), + filter_scaling=filter_scaling, + stride=2, + prev_layer=cell_outputs[-2], + cell_num=true_cell_num) + cell_outputs.append(net) + tf.logging.info('Reduction cell shape at layer {}: {}'.format( + true_cell_num, net.shape)) + true_cell_num += 1 + net = normal_cell( + net, + scope='cell_{}'.format(cell_num), + filter_scaling=filter_scaling, + stride=1, + prev_layer=cell_outputs[-2], + cell_num=true_cell_num) + if (hparams.use_aux_head and cell_num in aux_head_cell_idxes and + num_classes and is_training): + aux_net = tf.nn.relu(net) + _build_aux_head(aux_net, end_points, num_classes, hparams, + scope='aux_{}'.format(cell_num)) + cell_outputs.append(net) + tf.logging.info('Normal net shape at layer {}: {}'.format( + true_cell_num, net.shape)) + true_cell_num += 1 + + # Final softmax layer + with tf.variable_scope('final_layer', + custom_getter=network_utils.bp16_getter): + net = tf.nn.relu(net) + net = network_utils.global_avg_pool(net) + end_points['global_pool'] = net + if not num_classes: + return net, end_points + ''' + net = tf.to_float( + net, + name='ToFloat' + ) + net = tf.cast(net, tf.float16) + net = npu_ops.dropout(net, hparams.dense_dropout_keep_prob) + ''' + net = slim.dropout(net, hparams.dense_dropout_keep_prob, scope='dropout') + logits = slim.fully_connected(net, num_classes) + logits = tf.cast(logits, tf.float32) + predictions = tf.nn.softmax(logits, name='predictions') + end_points['logits'] = logits + end_points['predictions'] = predictions + end_points['cell_outputs'] = cell_outputs + return logits, end_points + diff --git a/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/model_specs.py b/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/model_specs.py new file mode 100644 index 0000000000000000000000000000000000000000..787e4451160ebecfa9c684c9eceb0b339549eb45 --- /dev/null +++ b/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/model_specs.py @@ -0,0 +1,115 @@ + + + +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Configs for various AmoebaNet architectures.""" + + + + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + + +import faulthandler +faulthandler.enable() + + +from npu_bridge.npu_init import * +from npu_bridge.estimator import npu_ops + +def get_normal_cell(cell_name): + """Return normal cell spec.""" + operations = [] + hiddenstate_indices = [] + used_hiddenstates = [] + if cell_name == 'evol_net_g' or cell_name == 'amoeba_net_a': + operations = ['avg_pool_3x3', 'max_pool_3x3', 'separable_3x3_2', 'none', + 'none', 'avg_pool_3x3', 'separable_3x3_2', 'separable_5x5_2', + 'avg_pool_3x3', 'separable_3x3_2'] + hiddenstate_indices = [0, 0, 1, 1, 0, 1, 0, 2, 5, 0] + used_hiddenstates = [1, 0, 1, 0, 0, 1, 0] + elif cell_name == 'evol_net_h' or cell_name == 'amoeba_net_b': + operations = ['1x1', 'max_pool_3x3', 'none', 'separable_3x3_2', '1x1', + 'separable_3x3_2', '1x1', 'none', 'avg_pool_3x3', '1x1'] + hiddenstate_indices = [1, 1, 1, 0, 1, 0, 2, 2, 1, 5] + used_hiddenstates = [0, 1, 1, 0, 0, 1, 0] + elif cell_name == 'evol_net_a' or cell_name == 'amoeba_net_c': + operations = ['avg_pool_3x3', 'separable_3x3_2', 'none', 'separable_3x3_2', + 'avg_pool_3x3', 'separable_3x3_2', 'none', 'separable_3x3_2', + 'avg_pool_3x3', 'separable_3x3_2'] + hiddenstate_indices = [0, 0, 0, 0, 2, 1, 0, 1, 3, 0] + used_hiddenstates = [1, 0, 0, 1, 0, 0, 0] + elif cell_name == 'evol_net_x' or cell_name == 'amoeba_net_d': + operations = ['1x1', 'max_pool_3x3', 'none', '1x7_7x1', '1x1', '1x7_7x1', + 'max_pool_3x3', 'none', 'avg_pool_3x3', '1x1'] + hiddenstate_indices = [1, 1, 1, 0, 0, 0, 2, 2, 1, 5] + used_hiddenstates = [0, 1, 1, 0, 0, 1, 0] + else: + raise ValueError('Unsupported cell name: %s.' % cell_name) + return operations, hiddenstate_indices, used_hiddenstates + + +def get_reduction_cell(cell_name): + """Return reduction cell spec.""" + operations = [] + hiddenstate_indices = [] + used_hiddenstates = [] + if cell_name == 'evol_net_g' or cell_name == 'amoeba_net_a': + operations = ['separable_3x3_2', 'avg_pool_3x3', 'max_pool_3x3', + 'separable_7x7_2', 'max_pool_3x3', 'max_pool_3x3', + 'separable_3x3_2', '1x7_7x1', 'avg_pool_3x3', + 'separable_7x7_2'] + hiddenstate_indices = [1, 0, 0, 2, 1, 0, 4, 0, 1, 0] + used_hiddenstates = [1, 1, 0, 0, 0, 0, 0] + elif cell_name == 'evol_net_h' or cell_name == 'amoeba_net_b': + operations = ['max_pool_2x2', 'max_pool_3x3', 'none', '3x3', + 'dil_2_separable_5x5_2', 'max_pool_3x3', 'none', + 'separable_3x3_2', 'avg_pool_3x3', '1x1'] + hiddenstate_indices = [0, 0, 2, 1, 2, 2, 3, 1, 4, 3] + used_hiddenstates = [1, 1, 1, 1, 1, 0, 0] + elif cell_name == 'evol_net_a' or cell_name == 'amoeba_net_c': + operations = ['max_pool_3x3', 'max_pool_3x3', 'separable_7x7_2', + 'separable_3x3_2', 'separable_7x7_2', 'max_pool_3x3', + 'separable_5x5_2', 'separable_5x5_2', 'max_pool_3x3', + 'separable_3x3_2'] + hiddenstate_indices = [0, 0, 2, 0, 0, 1, 4, 4, 1, 1] + used_hiddenstates = [0, 1, 0, 0, 0, 0, 0] + elif cell_name == 'evol_net_x' or cell_name == 'amoeba_net_d': + operations = ['max_pool_2x2', 'max_pool_3x3', 'none', '3x3', '1x7_7x1', + 'max_pool_3x3', 'none', 'max_pool_2x2', 'avg_pool_3x3', + '1x1'] + hiddenstate_indices = [0, 0, 2, 1, 2, 2, 3, 1, 2, 3] + used_hiddenstates = [1, 1, 1, 1, 0, 0, 0] + else: + raise ValueError('Unsupported cell name: %s.' % cell_name) + return operations, hiddenstate_indices, used_hiddenstates + diff --git a/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/modelzoo_level.txt b/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/modelzoo_level.txt new file mode 100644 index 0000000000000000000000000000000000000000..382bc227b3d2fdc0af914f620b00713f84048c85 --- /dev/null +++ b/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/modelzoo_level.txt @@ -0,0 +1,6 @@ +GPUStatus:OK +NPUMigrationStatus:POK +FuncStatus:OK +PrecisionStatus:OK +AutoTune:OK +PerfStatus:PERFECT diff --git a/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/network_utils.py b/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/network_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..ae5e85fac514e00f1c34d644f9e3172c52691c23 --- /dev/null +++ b/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/network_utils.py @@ -0,0 +1,791 @@ + + + + +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""A custom module for some common operations used by AmoebaNet. + +Functions exposed in this file: +- bp16_getter +- calc_reduction_layers +- get_channel_index +- get_channel_dim +- global_avg_pool +- factorized_reduction +- drop_path + +Classes exposed in this file: +- BaseCell +""" + + + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import faulthandler +faulthandler.enable() + + +from npu_bridge.npu_init import * +from npu_bridge.estimator import npu_ops + +import tensorflow.compat.v1 as tf + +from tensorflow.contrib import framework as contrib_framework +from tensorflow.contrib import slim +from tensorflow.contrib.framework.python.ops import add_arg_scope +from tensorflow.contrib.tpu.python.ops import tpu_ops +from tensorflow.python.training import moving_averages # pylint: disable=g-direct-tensorflow-import + +DATA_FORMAT_NCHW = 'NCHW' +DATA_FORMAT_NHWC = 'NHWC' +INVALID = 'null' + + +def cross_replica_average(inputs, num_shards, distributed_group_size): + """Calculates the average value of inputs tensor across TPU replicas.""" + group_assignment = None + if num_shards is not None and distributed_group_size != num_shards: + group_size = distributed_group_size + group_assignment = [] + for g in range(num_shards // group_size): + replica_ids = [g * group_size + i for i in range(group_size)] + group_assignment.append(replica_ids) + + return tpu_ops.cross_replica_sum(inputs, group_assignment) / tf.cast( + distributed_group_size, inputs.dtype) + + +def bp16_getter(getter, *args, **kwargs): + """Returns a custom getter that this class's methods must be called.""" + cast_to_bfloat16 = False + requested_dtype = kwargs['dtype'] + if requested_dtype == tf.bfloat16: + # Keep a master copy of weights in fp32 and cast to bp16 when the weights + # are used. + kwargs['dtype'] = tf.float32 + cast_to_bfloat16 = True + var = getter(*args, **kwargs) + # This if statement is needed to guard the cast, because batch norm + # assigns directly to the return value of this custom getter. The cast + # makes the return value not a variable so it cannot be assigned. Batch + # norm variables are always in fp32 so this if statement is never + # triggered for them. + if cast_to_bfloat16: + var = tf.cast(var, tf.bfloat16) + return var + + +def calc_reduction_layers(num_cells, num_reduction_layers): + """Figure out what layers should have reductions.""" + reduction_layers = [] + for pool_num in range(1, num_reduction_layers + 1): + layer_num = (float(pool_num) / (num_reduction_layers + 1)) * num_cells + layer_num = int(layer_num) + reduction_layers.append(layer_num) + return reduction_layers + + +@add_arg_scope +def get_channel_index(data_format=INVALID): + assert data_format != INVALID + axis = 3 if data_format == 'NHWC' else 1 + return axis + + +@add_arg_scope +def get_channel_dim(shape, data_format=INVALID): + assert data_format != INVALID + assert len(shape) == 4 + if data_format == 'NHWC': + return int(shape[3]) + elif data_format == 'NCHW': + return int(shape[1]) + else: + raise ValueError('Not a valid data_format', data_format) + + +@add_arg_scope +def global_avg_pool(x, data_format=INVALID): + """Average pool away the height and width spatial dimensions of x.""" + assert data_format != INVALID + assert data_format in ['NHWC', 'NCHW'] + assert x.shape.ndims == 4 + if data_format == 'NHWC': + return tf.reduce_mean(x, [1, 2]) + else: + return tf.reduce_mean(x, [2, 3]) + + +@add_arg_scope +def factorized_reduction(net, output_filters, stride, data_format=INVALID): + """Reduces the shape of net without information loss due to striding.""" + assert output_filters % 2 == 0, ( + 'Need even number of filters when using this factorized reduction.') + assert data_format != INVALID + if stride == 1: + net = slim.conv2d(net, output_filters, 1, scope='path_conv') + net = batch_norm(net, scope='path_bn') + return net + if data_format == 'NHWC': + stride_spec = [1, stride, stride, 1] + else: + stride_spec = [1, 1, stride, stride] + + # Skip path 1 + path1 = tf.nn.avg_pool( + net, [1, 1, 1, 1], stride_spec, 'VALID', data_format=data_format) + path1 = slim.conv2d(path1, int(output_filters / 2), 1, scope='path1_conv') + + # Skip path 2 + # First pad with 0's on the right and bottom, then shift the filter to + # include those 0's that were added. + if data_format == 'NHWC': + pad_arr = [[0, 0], [0, 1], [0, 1], [0, 0]] + path2 = tf.pad(net, pad_arr)[:, 1:, 1:, :] + concat_axis = 3 + else: + pad_arr = [[0, 0], [0, 0], [0, 1], [0, 1]] + path2 = tf.pad(net, pad_arr)[:, :, 1:, 1:] + concat_axis = 1 + + path2 = tf.nn.avg_pool( + path2, [1, 1, 1, 1], stride_spec, 'VALID', data_format=data_format) + path2 = slim.conv2d(path2, int(output_filters / 2), 1, scope='path2_conv') + + # Concat and apply BN + final_path = tf.concat(values=[path1, path2], axis=concat_axis) + final_path = batch_norm(final_path, scope='final_path_bn') + return final_path + + +@add_arg_scope +def drop_path(net, keep_prob, is_training=True): + """Drops out a whole example hiddenstate with the specified probability.""" + if is_training: + batch_size = tf.shape(net)[0] + noise_shape = [batch_size, 1, 1, 1] + keep_prob = tf.cast(keep_prob, dtype=net.dtype) + random_tensor = keep_prob + random_tensor += tf.random_uniform(noise_shape, dtype=net.dtype) + binary_tensor = tf.floor(random_tensor) + net = tf.div(net, keep_prob) * binary_tensor + return net + + +def _operation_to_filter_shape(operation): + splitted_operation = operation.split('x') + filter_shape = int(splitted_operation[0][-1]) + assert filter_shape == int( + splitted_operation[1][0]), 'Rectangular filters not supported.' + return filter_shape + + +def _operation_to_num_layers(operation): + splitted_operation = operation.split('_') + if 'x' in splitted_operation[-1]: + return 1 + return int(splitted_operation[-1]) + + +def _operation_to_info(operation): + """Takes in operation name and returns meta information. + + An example would be 'separable_3x3_4' -> (3, 4). + + Args: + operation: String that corresponds to convolution operation. + + Returns: + Tuple of (filter shape, num layers). + """ + num_layers = _operation_to_num_layers(operation) + filter_shape = _operation_to_filter_shape(operation) + return num_layers, filter_shape + + +def _stacked_separable_conv(net, stride, operation, filter_size): + """Takes in an operations and parses it to the correct sep operation.""" + num_layers, kernel_size = _operation_to_info(operation) + for layer_num in range(num_layers - 1): + net = tf.nn.relu(net) + net = slim.separable_conv2d( + net, + filter_size, + kernel_size, + depth_multiplier=1, + scope='separable_{0}x{0}_{1}'.format(kernel_size, layer_num + 1), + stride=stride) + net = batch_norm( + net, scope='bn_sep_{0}x{0}_{1}'.format(kernel_size, layer_num + 1)) + stride = 1 + net = tf.nn.relu(net) + net = slim.separable_conv2d( + net, + filter_size, + kernel_size, + depth_multiplier=1, + scope='separable_{0}x{0}_{1}'.format(kernel_size, num_layers), + stride=stride) + net = batch_norm( + net, scope='bn_sep_{0}x{0}_{1}'.format(kernel_size, num_layers)) + return net + + +def _operation_to_pooling_type(operation): + """Takes in the operation string and returns the pooling type.""" + splitted_operation = operation.split('_') + return splitted_operation[0] + + +def _operation_to_pooling_shape(operation): + """Takes in the operation string and returns the pooling kernel shape.""" + splitted_operation = operation.split('_') + shape = splitted_operation[-1] + assert 'x' in shape + filter_height, filter_width = shape.split('x') + assert filter_height == filter_width + return int(filter_height) + + +def _operation_to_pooling_info(operation): + """Parses the pooling operation string to return its type and shape.""" + pooling_type = _operation_to_pooling_type(operation) + pooling_shape = _operation_to_pooling_shape(operation) + return pooling_type, pooling_shape + + +def _pooling(net, stride, operation): + """Parses operation and performs the correct pooling operation on net.""" + padding = 'SAME' + pooling_type, pooling_shape = _operation_to_pooling_info(operation) + if pooling_type == 'avg': + net = slim.avg_pool2d(net, pooling_shape, stride=stride, padding=padding) + elif pooling_type == 'max': + net = slim.max_pool2d(net, pooling_shape, stride=stride, padding=padding) + elif pooling_type == 'min': + net = slim.max_pool2d(-1 * net, pooling_shape, stride=stride, + padding=padding) + net = -1 * net + else: + raise NotImplementedError('Unimplemented pooling type: ', pooling_type) + return net + + +class BaseCell(object): + """Base Cell class that is used as a 'layer' in image architectures. + + Args: + num_conv_filters: The number of filters for each convolution operation. + operations: List of operations that are performed in the AmoebaNet Cell in + order. + used_hiddenstates: Binary array that signals if the hiddenstate was used + within the cell. This is used to determine what outputs of the cell + should be concatenated together. + hiddenstate_indices: Determines what hiddenstates should be combined + together with the specified operations to create the AmoebaNet cell. + """ + + def __init__(self, num_conv_filters, operations, used_hiddenstates, + hiddenstate_indices, drop_path_keep_prob, total_num_cells, + drop_path_burn_in_steps): + self._num_conv_filters = num_conv_filters + self._operations = operations + self._used_hiddenstates = used_hiddenstates + self._hiddenstate_indices = hiddenstate_indices + self._drop_path_keep_prob = drop_path_keep_prob + self._total_num_cells = total_num_cells + self._drop_path_burn_in_steps = drop_path_burn_in_steps + + def _reduce_prev_layer(self, prev_layer, curr_layer): + """Matches dimension of prev_layer to the curr_layer.""" + # Set the prev layer to the current layer if it is none + if prev_layer is None: + return curr_layer + curr_num_filters = self._filter_size + prev_num_filters = get_channel_dim(prev_layer.shape) + curr_filter_shape = int(curr_layer.shape[2]) + prev_filter_shape = int(prev_layer.shape[2]) + if curr_filter_shape != prev_filter_shape: + prev_layer = tf.nn.relu(prev_layer) + prev_layer = factorized_reduction( + prev_layer, curr_num_filters, stride=2) + elif curr_num_filters != prev_num_filters: + prev_layer = tf.nn.relu(prev_layer) + prev_layer = slim.conv2d( + prev_layer, curr_num_filters, 1, scope='prev_1x1') + prev_layer = batch_norm(prev_layer, scope='prev_bn') + return prev_layer + + def _cell_base(self, net, prev_layer): + """Runs the beginning of the conv cell before the predicted ops are run.""" + num_filters = self._filter_size + + # Check to be sure prev layer stuff is setup correctly + prev_layer = self._reduce_prev_layer(prev_layer, net) + + net = tf.nn.relu(net) + net = slim.conv2d(net, num_filters, 1, scope='1x1') + net = batch_norm(net, scope='beginning_bn') + split_axis = get_channel_index() + net = tf.split(axis=split_axis, num_or_size_splits=1, value=net) + for split in net: + assert int(split.shape[split_axis] == int(self._num_conv_filters * + self._filter_scaling)) + net.append(prev_layer) + return net + + def __call__(self, net, scope=None, filter_scaling=1, stride=1, + prev_layer=None, cell_num=-1): + """Runs the conv cell.""" + self._cell_num = cell_num + self._filter_scaling = filter_scaling + self._filter_size = int(self._num_conv_filters * filter_scaling) + + i = 0 + with tf.variable_scope(scope, custom_getter=bp16_getter): + net = self._cell_base(net, prev_layer) + for iteration in range(5): + with tf.variable_scope('comb_iter_{}'.format(iteration)): + left_hiddenstate_idx, right_hiddenstate_idx = ( + self._hiddenstate_indices[i], + self._hiddenstate_indices[i + 1]) + original_input_left = left_hiddenstate_idx < 2 + original_input_right = right_hiddenstate_idx < 2 + h1 = net[left_hiddenstate_idx] + h2 = net[right_hiddenstate_idx] + + operation_left = self._operations[i] + operation_right = self._operations[i+1] + i += 2 + # Apply conv operations + with tf.variable_scope('left'): + h1 = self._apply_operation(h1, operation_left, + stride, original_input_left) + with tf.variable_scope('right'): + h2 = self._apply_operation(h2, operation_right, + stride, original_input_right) + + # Combine hidden states using 'add'. + with tf.variable_scope('combine'): + h = h1 + h2 + + # Add hiddenstate to the list of hiddenstates we can choose from + net.append(h) + + with tf.variable_scope('cell_output'): + net = self._combine_unused_states(net) + + return net + + def _apply_conv_operation(self, net, operation, stride, filter_size): + """Takes in a hiddenstate and applies an operation to it. + + Args: + net: This is a hiddenstate from the network that will have an operation + applied to it. + operation: This is a string that specifies what operations should be + applied to net. + stride: Stride for the operations being passed in. + filter_size: Number of filters output from this operation. + + Returns: + The hiddenstate net after it had the operation passed in applied to it. + """ + + if operation == '1x1': + net = slim.conv2d(net, filter_size, 1, scope='1x1') + elif operation == '3x3': + net = slim.conv2d(net, filter_size, 3, scope='3x3', stride=stride) + elif operation == '1x7_7x1': + net = slim.conv2d(net, filter_size, [1, 7], scope='1x7', + stride=[1, stride]) + net = batch_norm(net, scope='bn_1x7_7x1') + net = tf.nn.relu(net) + net = slim.conv2d(net, filter_size, [7, 1], scope='7x1', + stride=[stride, 1]) + elif operation == '1x3_3x1': + net = slim.conv2d(net, filter_size, [1, 3], scope='1x3', + stride=[1, stride]) + net = batch_norm(net, scope='bn_1x3_3x1') + net = tf.nn.relu(net) + net = slim.conv2d(net, filter_size, [3, 1], scope='3x1', + stride=[stride, 1]) + elif operation in ['dilated_3x3_rate_2', 'dilated_3x3_rate_4', + 'dilated_3x3_rate_6']: + dilation_rate = int(operation.split('_')[-1]) + net = slim.conv2d( + net, + filter_size, + 3, + rate=dilation_rate, + stride=stride, + scope='dilated_3x3') + else: + raise NotImplementedError('Unimplemented conv operation: ', operation) + return net + + def _apply_operation(self, net, operation, + stride, is_from_original_input): + """Applies the predicted conv operation to net.""" + # Dont stride if this is not one of the original hiddenstates + if stride > 1 and not is_from_original_input: + stride = 1 + input_filters = get_channel_dim(net.shape) + filter_size = self._filter_size + if 'separable' in operation: + net = _stacked_separable_conv(net, stride, operation, filter_size) + elif operation in ['dilated_3x3_rate_2', 'dilated_3x3_rate_4', + 'dilated_3x3_rate_6', '3x3', '1x7_7x1', '1x3_3x1']: + if operation == '1x3_3x1': + reduced_filter_size = int(3 * filter_size / 8) + else: + reduced_filter_size = int(filter_size / 4) + if reduced_filter_size < 1: + # If the intermediate number of channels would be too small, then don't + # use a bottleneck layer. + net = tf.nn.relu(net) + net = self._apply_conv_operation(net, operation, stride, filter_size) + net = batch_norm(net, scope='bn') + else: + # Use a bottleneck layer. + net = tf.nn.relu(net) + net = slim.conv2d(net, reduced_filter_size, 1, scope='pre_1x1') + net = batch_norm(net, scope='pre_bn') + net = tf.nn.relu(net) + net = self._apply_conv_operation( + net, operation, stride, reduced_filter_size) + net = batch_norm(net, scope='bn') + net = tf.nn.relu(net) + net = slim.conv2d(net, filter_size, 1, scope='post_1x1') + net = batch_norm(net, scope='post_bn') + elif operation in ['none', '1x1']: + # Check if a stride is needed, then use a strided 1x1 here + if stride > 1 or operation == '1x1' or (input_filters != filter_size): + net = tf.nn.relu(net) + net = slim.conv2d(net, filter_size, 1, stride=stride, scope='1x1') + net = batch_norm(net, scope='bn_1') + elif 'pool' in operation: + net = _pooling(net, stride, operation) + if input_filters != filter_size: + net = slim.conv2d(net, filter_size, 1, stride=1, scope='1x1') + net = batch_norm(net, scope='bn_1') + else: + raise ValueError('Unimplemented operation', operation) + + if operation != 'none': + net = self._apply_drop_path(net) + + tf.logging.info('Net shape after {}: {}'.format(operation, net.shape)) + return net + + def _combine_unused_states(self, net): + """Concatenate the unused hidden states of the cell.""" + used_hiddenstates = self._used_hiddenstates + + final_height = int(net[-1].shape[2]) + final_num_filters = get_channel_dim(net[-1].shape) + assert len(used_hiddenstates) == len(net) + for idx, used_h in enumerate(used_hiddenstates): + curr_height = int(net[idx].shape[2]) + curr_num_filters = get_channel_dim(net[idx].shape) + + # Determine if a reduction should be applied to make the number of + # filters match. + should_reduce = final_num_filters != curr_num_filters + should_reduce = (final_height != curr_height) or should_reduce + should_reduce = should_reduce and not used_h + if should_reduce: + stride = 2 if final_height != curr_height else 1 + with tf.variable_scope('reduction_{}'.format(idx)): + net[idx] = factorized_reduction( + net[idx], final_num_filters, stride) + + states_to_combine = ( + [h for h, is_used in zip(net, used_hiddenstates) if not is_used]) + + # Return the concat of all the states + concat_axis = get_channel_index() + net = tf.concat(values=states_to_combine, axis=concat_axis) + return net + + @add_arg_scope # No public API. For internal use only. + def _apply_drop_path(self, net, current_step=None, + drop_connect_version='v1'): + """Apply drop_path regularization. + + Args: + net: the Tensor that gets drop_path regularization applied. + current_step: a float32 Tensor with the current global_step value, + to be divided by hparams.drop_path_burn_in_steps. Usually None, which + defaults to tf.train.get_or_create_global_step() properly casted. + drop_connect_version: one of 'v1', 'v2', 'v3', controlling whether + the dropout rate is scaled by current_step (v1, the default), + layer (v2), or both (v3). + + Returns: + The dropped-out value of `net`. + """ + drop_path_keep_prob = self._drop_path_keep_prob + if drop_path_keep_prob < 1.0: + assert drop_connect_version in ['v1', 'v2', 'v3'] + if drop_connect_version in ['v2', 'v3']: + # Scale keep prob by layer number + assert self._cell_num != -1 + # The added 2 is for the reduction cells + num_cells = self._total_num_cells + layer_ratio = (self._cell_num + 1)/float(num_cells) + drop_path_keep_prob = 1 - layer_ratio * (1 - drop_path_keep_prob) + if drop_connect_version in ['v1', 'v3']: + if self._drop_path_burn_in_steps: + # Decrease the keep probability over time + current_step = ( + current_step or + tf.cast(tf.train.get_or_create_global_step(), tf.float32)) + current_ratio = current_step / self._drop_path_burn_in_steps + current_ratio = tf.minimum(1.0, current_ratio) + drop_path_keep_prob = (1 - current_ratio * (1 - drop_path_keep_prob)) + net = drop_path(net, drop_path_keep_prob) + return net + + +# TODO(huangyp): find out the difference and use the layers batch_norm. +@add_arg_scope +def batch_norm(inputs, + decay=0.999, + center=True, + scale=False, + epsilon=0.001, + moving_vars='moving_vars', + activation_fn=None, + is_training=True, + data_format='NHWC', + reuse=None, + num_shards=None, + distributed_group_size=1, + scope=None): + """Adds a Batch Normalization layer from http://arxiv.org/abs/1502.03167. + + "Batch Normalization: Accelerating Deep Network Training by Reducing + Internal Covariate Shift" + + Sergey Ioffe, Christian Szegedy + + Can be used as a normalizer function for conv2d and fully_connected. + + Note: When is_training is True the moving_mean and moving_variance need to be + updated, by default the update_ops are placed in `tf.GraphKeys.UPDATE_OPS` so + they need to be added as a dependency to the `train_op`, example: + + update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) + if update_ops: + updates = tf.group(*update_ops) + total_loss = control_flow_ops.with_dependencies([updates], total_loss) + + One can set updates_collections=None to force the updates in place, but that + can have speed penalty, especially in distributed settings. + + Args: + inputs: A tensor with 2 or more dimensions, where the first dimension has + `batch_size`. The normalization is over all but the last dimension if + `data_format` is `NHWC` and the second dimension if `data_format` is + `NCHW`. + decay: Decay for the moving average. Reasonable values for `decay` are close + to 1.0, typically in the multiple-nines range: 0.999, 0.99, 0.9, etc. + Lower `decay` value (recommend trying `decay`=0.9) if model experiences + reasonably good training performance but poor validation and/or test + performance. + center: If True, add offset of `beta` to normalized tensor. If False, + `beta` is ignored. + scale: If True, multiply by `gamma`. If False, `gamma` is + not used. When the next layer is linear (also e.g. `nn.relu`), this can be + disabled since the scaling can be done by the next layer. + epsilon: Small float added to variance to avoid dividing by zero. + moving_vars: Name of collection created for moving variables. + activation_fn: Activation function, default set to None to skip it and + maintain a linear activation. + is_training: Whether or not the layer is in training mode. In training mode + it would accumulate the statistics of the moments into `moving_mean` and + `moving_variance` using an exponential moving average with the given + `decay`. When it is not in training mode then it would use the values of + the `moving_mean` and the `moving_variance`. + data_format: input data format. NHWC or NCHW + reuse: Whether or not the layer and its variables should be reused. To be + able to reuse the layer scope must be given. + num_shards: Number of shards that participate in the global + reduction. Default is set to None, that will skip the cross replica sum in + and normalize across local examples only. + distributed_group_size: Number of replicas to normalize across in the + distributed batch normalization. + scope: Optional scope for `variable_scope`. + + Returns: + A `Tensor` representing the output of the operation. + + Raises: + ValueError: If the rank of `inputs` is undefined. + ValueError: If the rank of `inputs` is neither 2 or 4. + ValueError: If rank or `C` dimension of `inputs` is undefined. + """ + trainable = True + + with tf.variable_scope(scope, 'BatchNorm', [inputs], reuse=reuse): + inputs = tf.convert_to_tensor(inputs) + original_shape = inputs.get_shape() + original_rank = original_shape.ndims + if original_rank is None: + raise ValueError('Inputs %s has undefined rank' % inputs.name) + elif original_rank not in [2, 4]: + raise ValueError('Inputs %s has unsupported rank.' + ' Expected 2 or 4 but got %d' % (inputs.name, + original_rank)) + if original_rank == 2: + channels = inputs.get_shape()[-1].value + if channels is None: + raise ValueError('`C` dimension must be known but is None') + new_shape = [-1, 1, 1, channels] + + if data_format == 'NCHW': + new_shape = [-1, channels, 1, 1] + inputs = tf.reshape(inputs, new_shape) + inputs_shape = inputs.get_shape() + if data_format == 'NHWC': + params_shape = inputs_shape[-1:] + else: + params_shape = inputs_shape[1:2] + if not params_shape.is_fully_defined(): + raise ValueError('Inputs %s has undefined `C` dimension %s.' % + (inputs.name, params_shape)) + + # Allocate parameters for the beta and gamma of the normalization. + trainable_beta = trainable and center + collections = [tf.GraphKeys.MODEL_VARIABLES, tf.GraphKeys.GLOBAL_VARIABLES] + beta = contrib_framework.variable( + 'beta', + params_shape, + collections=collections, + initializer=tf.zeros_initializer(), + trainable=trainable_beta) + trainable_gamma = trainable and scale + gamma = contrib_framework.variable( + 'gamma', + params_shape, + collections=collections, + initializer=tf.ones_initializer(), + trainable=trainable_gamma) + + # Create moving_mean and moving_variance variables and add them to the + # appropiate collections. + moving_collections = [moving_vars, + tf.GraphKeys.MOVING_AVERAGE_VARIABLES, + tf.GraphKeys.MODEL_VARIABLES, + tf.GraphKeys.GLOBAL_VARIABLES] + # Disable partition setting for moving_mean and moving_variance + # as assign_moving_average op below doesn't support partitioned variable. + scope = tf.get_variable_scope() + partitioner = scope.partitioner + scope.set_partitioner(None) + moving_mean = contrib_framework.variable( + 'moving_mean', + params_shape, + initializer=tf.zeros_initializer(), + trainable=False, + collections=moving_collections) + moving_variance = contrib_framework.variable( + 'moving_variance', + params_shape, + initializer=tf.ones_initializer(), + trainable=False, + collections=moving_collections) + # Restore scope's partitioner setting. + scope.set_partitioner(partitioner) + + # Add cross replica sum to do subset mean and variance calculation + # First compute mean and variance + if is_training: + if distributed_group_size > 1: + # Execute a distributed batch normalization + if data_format == 'NCHW': + axis = 1 + else: + axis = 3 + input_shape = inputs.get_shape() + inputs_dtype = inputs.dtype + inputs = tf.cast(inputs, tf.float32) + ndims = len(input_shape) + reduction_axes = [i for i in range(ndims) if i != axis] + counts, mean_ss, variance_ss, _ = tf.nn.sufficient_statistics( + inputs, reduction_axes, keep_dims=False) + mean_ss = cross_replica_average(mean_ss, num_shards, + distributed_group_size) + variance_ss = cross_replica_average(variance_ss, num_shards, + distributed_group_size) + mean, variance = tf.nn.normalize_moments( + counts, mean_ss, variance_ss, shift=None) + outputs = tf.nn.batch_normalization(inputs, mean, variance, beta, gamma, + epsilon) + outputs = tf.cast(outputs, inputs_dtype) + else: + with tf.compat.forward_compatibility_horizon(2019, 5, 1): + outputs, mean, variance = tf.nn.fused_batch_norm(inputs, gamma, beta, epsilon=epsilon, data_format=data_format) + else: + with tf.compat.forward_compatibility_horizon(2019, 5, 1): + outputs, mean, variance = tf.nn.fused_batch_norm( + inputs, + gamma, + beta, + mean=moving_mean, + variance=moving_variance, + epsilon=epsilon, + is_training=False, + data_format=data_format) + + if is_training: + update_moving_mean = moving_averages.assign_moving_average( + moving_mean, + tf.cast(mean, moving_mean.dtype), + decay, + zero_debias=False) + update_moving_variance = moving_averages.assign_moving_average( + moving_variance, + tf.cast(variance, moving_variance.dtype), + decay, + zero_debias=False) + tf.add_to_collection('update_ops', update_moving_mean) + tf.add_to_collection('update_ops', update_moving_variance) + outputs.set_shape(inputs_shape) + if original_shape.ndims == 2: + outputs = tf.reshape(outputs, original_shape) + if activation_fn is not None: + outputs = activation_fn(outputs) + return outputs + diff --git a/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/network_utils_test.py b/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/network_utils_test.py new file mode 100644 index 0000000000000000000000000000000000000000..62c24ad582a5cd8473bdf884be675a8309e5dfbe --- /dev/null +++ b/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/network_utils_test.py @@ -0,0 +1,88 @@ + + +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for slim.nets.nasnet.nasnet_utils.""" + + + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + + +import faulthandler +faulthandler.enable() + + +from npu_bridge.npu_init import * + +import tensorflow.compat.v1 as tf + +import network_utils + + +class NetworkUtilsTest(tf.test.TestCase): + + def testCalcReductionLayers(self): + num_cells = 18 + num_reduction_layers = 2 + reduction_layers = network_utils.calc_reduction_layers( + num_cells, num_reduction_layers) + self.assertEqual(len(reduction_layers), 2) + self.assertEqual(reduction_layers[0], 6) + self.assertEqual(reduction_layers[1], 12) + + def testGetChannelIndex(self): + data_formats = ['NHWC', 'NCHW'] + for data_format in data_formats: + index = network_utils.get_channel_index(data_format) + correct_index = 3 if data_format == 'NHWC' else 1 + self.assertEqual(index, correct_index) + + def testGetChannelDim(self): + data_formats = ['NHWC', 'NCHW'] + shape = [10, 20, 30, 40] + for data_format in data_formats: + dim = network_utils.get_channel_dim(shape, data_format) + correct_dim = shape[3] if data_format == 'NHWC' else shape[1] + self.assertEqual(dim, correct_dim) + + def testGlobalAvgPool(self): + data_formats = ['NHWC', 'NCHW'] + inputs = tf.placeholder(tf.float32, (5, 10, 20, 10)) + for data_format in data_formats: + output = network_utils.global_avg_pool( + inputs, data_format) + self.assertEqual(output.shape, [5, 10]) + + +if __name__ == '__main__': + tf.test.main() + diff --git a/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/online_inference_testcase.sh b/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/online_inference_testcase.sh new file mode 100644 index 0000000000000000000000000000000000000000..fa4b6006f8e57b05da4bb5ab04f6241b22bc5725 --- /dev/null +++ b/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/online_inference_testcase.sh @@ -0,0 +1,40 @@ +#! /bin/bash + +#使用ImageNet2012数据集,配置DATA_DIR数据集路径和MODEL_DIR保存checkpoint路径 +DATA_DIR=/home/test_user03/tf_records/ +MODEL_DIR=/home/test_user03/hh + +#开发者个人独立预置的数据集、预训练模型、ATC-OM模型等,支持从OBS仓下载 +#obsutil cp obs://obsxxx/xxx/xxx.ckpt ./model/ -f -r + +#testcase主体,开发者根据不同模型写作 +python3 amoeba_net.py \ + --data_dir=${DATA_DIR} \ + --model_dir=${MODEL_DIR} \ + --num_cells=6 \ + --image_size=224 \ + --num_epochs=1 \ + --train_batch_size=64 \ + --eval_batch_size=64 \ + --lr=2.56 \ + --lr_decay_value=0.88 \ + --lr_warmup_epochs=0.35 \ + --mode=predict \ + --iterations_per_loop=1251 \ + --num_train_images=10000 \ + --num_eval_images=1000 \ + --eval_timeout=10 \ + > predict.log 2>&1 +#在线推理测试用例只推理1000张图片,并保存打印信息至predict.log + +#结果判断,功能检查输出ckpt/日志关键字、精度检查loss值/accucy关键字、性能检查耗时打点/ThroughOutput等关键字 +key1="Restoring parameters from" #功能检查字 +key2="Inference speed =" #性能检查字 + + + +if [ `grep -c "$key1" "predict.log"` -ne '0' ] && [ `grep -c "$key2" "predict.log"` -ne '0' ];then #可以根据需要调整检查逻辑 + echo "Run testcase success!" +else + echo "Run testcase failed!" +fi \ No newline at end of file diff --git a/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/requirements.txt b/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/tf_hub.py b/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/tf_hub.py new file mode 100644 index 0000000000000000000000000000000000000000..7b7fa094de5b7e665201a778f9f2422d8de63c06 --- /dev/null +++ b/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/tf_hub.py @@ -0,0 +1,361 @@ + + + +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +r"""Provide functionalities to export and eval tf_hub module. + +Example use of export_to_hub: + + tf_hub.py --tf_hub_mode='export_to_hub' --cell_name=amoeba_net_a \ + --reduction_size=448 --num_cells=18 --image_size=331 \ + --drop_connect_keep_prob=0.7 \ + --export_path=/tmp/module_export \ + --model_dir=/ADD_PATH_WITH_1001_CLASSES_HERE \ + --alsologtostderr + +Example use of eval_from_hub + tf_hub.py --tf_hub_mode='eval_from_hub' --cell_name=amoeba_net_a \ + --reduction_size=448 --num_cells=18 --image_size=331 \ + --export_path=/tmp/module_export \ + --data_dir=/ADD_DATA_PATH_HERE \ + --model_dir=/ADD_PATH_WITH_1001_CLASSES_HERE \ + --eval_batch_size=40 --alsologtostderr +""" + + + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import faulthandler +faulthandler.enable() + + +from npu_bridge.npu_init import * +from npu_bridge.estimator import npu_ops + +import re + +# Standard Imports +from absl import app +from absl import flags +import tensorflow.compat.v1 as tf +import tensorflow_hub as hub + +import amoeba_net +import amoeba_net_model as model_lib +import model_builder +from tensorflow.contrib import slim + +flags.DEFINE_string('tf_hub_mode', 'export_to_hub', + 'export_to_hub|eval_from_hub') + +flags.DEFINE_string('export_path', None, + 'Directory where export output is stored') + +flags.DEFINE_bool( + 'export_feature_vector', False, + 'If true, network builder only returns feature vector after global_pool ' + 'without the fully connected layer.') + +flags.DEFINE_bool( + 'dryrun_with_untrained_weights', None, + 'FOR TESTING USE ONLY. If set, export_to_hub is done without restoring ' + 'the model\'s trained weights. This helps test the Python code quickly but ' + 'makes the resulting module useless.') + + +FLAGS = flags.FLAGS + + +def _make_module_fn(hparams, num_classes): + """Returns a module_fn for use with hub.create_module_spec().""" + + def _module_fn(is_training): + """A module_fn for use with hub.create_module_spec(). + + Args: + is_training: a boolean, passed to the config.network_fn. + This is meant to control whether batch norm, dropout etc. are built + in training or inference mode for this graph version. + + Raises: + ValueError: if network_fn outputs are not as expected. + """ + # Set up the module input, and attach an ImageModuleInfo about it. + with tf.name_scope('hub_input'): + default_size = (hparams.image_size,) * 2 + image_module_info = hub.ImageModuleInfo() + size_info = image_module_info.default_image_size + size_info.height, size_info.width = default_size + # TODO(b/72731449): Support variable input size. + shape = (None,) + default_size + (3,) + images = tf.placeholder(dtype=tf.float32, shape=shape, name='images') + hub.attach_image_module_info(image_module_info) + # The input is expected to have RGB color values in the range [0,1] + # and gets converted for AmoebaNet to the Inception-style range [-1,+1]. + scaled_images = tf.multiply(images, 2.0) + scaled_images = tf.subtract(scaled_images, 1.0) + + # Build the net. + logits, end_points = model_builder.build_network(scaled_images, num_classes, + is_training, hparams) + + with tf.name_scope('hub_output'): + # Extract the feature_vectors output. + try: + feature_vectors = end_points['global_pool'] + except KeyError: + tf.logging.error('Valid keys of end_points are:', ', '.join(end_points)) + raise + with tf.name_scope('feature_vector'): + if feature_vectors.shape.ndims != 2: + raise ValueError( + 'Wrong rank (expected 2 after squeeze) ' + 'in feature_vectors:', feature_vectors) + # Extract the logits output (if applicable). + if num_classes: + with tf.name_scope('classification'): + if logits.shape.ndims != 2: + raise ValueError('Wrong rank (expected 2) in logits:', logits) + + # Add named signatures. + hub.add_signature('image_feature_vector', dict(images=images), + dict(end_points, default=feature_vectors)) + if num_classes: + hub.add_signature('image_classification', dict(images=images), + dict(end_points, default=logits)) + # Add the default signature. + if num_classes: + hub.add_signature('default', dict(images=images), dict(default=logits)) + else: + hub.add_signature('default', dict(images=images), + dict(default=feature_vectors)) + return _module_fn + + +def export_to_hub(checkpoint_path, export_path, num_classes, hparams): + """Exports the network as a TF-Hub Module. + + If a positive integer num_classes is given, a module for image classification + is exported. If num_classes is 0 or None, a module for feature vector + extraction is exported. In both cases, the default signature returns + a default output that matches the Python slim API net, _ = network_fn(...). + + Args: + checkpoint_path: a string with the file name of the checkpoint from which + the trained weights are copied into the Module. + FOR TESTING USE ONLY, this can be set to empty or None, to skip + restoring weights, which ignores the checkpoint and copies the random + initializer values of the weights instead. + export_path: a string with the directory to pass to hub.Module.export(). + num_classes: an integer with the number of classes for which the given + checkpoint has been trained. If 0 or None, the classification layer + is omitted. + hparams: hyper parameters. + """ + module_fn = _make_module_fn(hparams, num_classes) + tags_and_args = [ + # The default graph is built with batch_norm, dropout etc. in inference + # mode. This graph version is good for inference, not training. + ([], { + 'is_training': False + }), + # A separate 'train' graph builds batch_norm, dropout etc. in training + # mode. + (['train'], { + 'is_training': True + }), + ] + drop_collections = [ + 'moving_vars', tf.GraphKeys.GLOBAL_STEP, + tf.GraphKeys.MOVING_AVERAGE_VARIABLES + ] + spec = hub.create_module_spec(module_fn, tags_and_args, drop_collections) + + with tf.Graph().as_default(): + module = hub.Module(spec) + init_fn = _get_init_fn( + module, + checkpoint_path, + hparams.moving_average_decay > 0, + moving_averages_blacklist_regex='global_step') + config = tf.ConfigProto() + custom_op = config.graph_options.rewrite_options.custom_optimizers.add() + custom_op.name = "NpuOptimizer" + + config.graph_options.rewrite_options.remapping = RewriterConfig.OFF # 必须显式关闭 + config.graph_options.rewrite_options.memory_optimization = RewriterConfig.OFF # 必须显式关闭 + with tf.Session(config=config) as session: + init_fn(session) + module.export(export_path, session=session) + + tf.logging.info('Export to {} succeeded.'.format(export_path)) + + +def _get_init_fn(module, + checkpoint_path, + export_moving_averages=False, + moving_averages_blacklist_regex=None): + """Returns init_fn for the session that calls hub.Module.export().""" + if not checkpoint_path: + tf.logging.warn('DRYRUN: using random weight initializers, no checkpoint') + return lambda session: session.run(tf.global_variables_initializer()) + + # Build `variables_to_restore` as a map from names in the checkpoint to the + # variable in the instantiated module. + if export_moving_averages: + variables_to_restore = {} + num_averaged = num_blacklisted = 0 + for variable_name, variable in module.variable_map.items(): + if (moving_averages_blacklist_regex and + re.match(moving_averages_blacklist_regex, variable_name)): + num_blacklisted += 1 + else: + variable_name += '/ExponentialMovingAverage' + num_averaged += 1 + variables_to_restore[variable_name] = variable + tf.logging.info('Export of moving averages is applied to %d variables ' + 'with %d exempted by matching the blacklist_regex' % + (num_averaged, num_blacklisted)) + else: + variables_to_restore = module.variable_map + tf.logging.info('Export of moving averages is disabled') + + unchecked_init_fn = slim.assign_from_checkpoint_fn(checkpoint_path, + variables_to_restore) + def init_fn(session): + unchecked_init_fn(session) + _check_shapes_of_restored_variables(session, variables_to_restore) + + return init_fn + + +def _check_shapes_of_restored_variables(session, variables_to_restore): + """Raises TypeError if restored variables have unexpected shapes.""" + num_errors = 0 + for variable_name, variable in variables_to_restore.items(): + graph_shape = variable.value().shape + # Values are big, but tf.shape(..) whould echo graph_shape if fully defined. + checkpoint_shape = session.run(variable.value()).shape + if not graph_shape.is_compatible_with(checkpoint_shape): + tf.logging.error('Shape mismatch for variable %s: ' + 'graph expects %s but checkpoint has %s' % + (variable_name, graph_shape, checkpoint_shape)) + num_errors += 1 + if num_errors: + raise TypeError( + 'Shape mismatch for %d variables, see error log for list.' % num_errors) + + +def _make_model_fn(hub_module_spec): + """Returns a model_fn for estimator using hub_module.""" + + def _model_fn(features, labels, mode, params): + """model_fn for estimator.""" + del params + features = tf.transpose(features, [3, 0, 1, 2]) # HWCN to NHWC + hub_module = hub.Module(spec=hub_module_spec, trainable=False) + logits = hub_module(features) + labels_onehot = tf.one_hot(labels, logits.shape[1]) + loss = tf.losses.softmax_cross_entropy(labels_onehot, logits) + + eval_metric_ops = None + + def metric_fn(labels, logits): + """Evaluation metric fn. Performed on CPU, do not reference TPU ops.""" + predictions = tf.argmax(logits, axis=1) + top_1_accuracy = tf.metrics.accuracy(labels, predictions) + in_top_5 = tf.cast(tf.nn.in_top_k(logits, labels, 5), tf.float32) + top_5_accuracy = tf.metrics.mean(in_top_5) + + return { + 'top_1_accuracy': top_1_accuracy, + 'top_5_accuracy': top_5_accuracy, + } + + eval_metric_ops = metric_fn(labels, logits) + return tf.estimator.EstimatorSpec( + mode=mode, loss=loss, train_op=None, eval_metric_ops=eval_metric_ops) + + return _model_fn + + +def eval_from_hub(model_dir, input_fn, eval_steps): + """Eval using hub module.""" + hub_module_spec = hub.load_module_spec(model_dir) + npu_config = NPURunConfig(model_dir=model_dir, save_summary_steps=0) + #run_config = tf.estimator.RunConfig(model_dir=model_dir, save_summary_steps=0) + image_classifier = NPUEstimator( + model_fn=_make_model_fn(hub_module_spec), config=npu_config, params={}) + eval_results = image_classifier.evaluate(input_fn=input_fn, steps=eval_steps) + tf.logging.info('Evaluation results: %s' % eval_results) + + +def main(_): + mode = FLAGS.tf_hub_mode + data_dir = amoeba_net.FLAGS.data_dir + model_dir = amoeba_net.FLAGS.model_dir + hparams = amoeba_net.build_hparams() + hparams.add_hparam('drop_path_burn_in_steps', 0) + hparams.set_hparam('use_tpu', False) + # So far, there is no standardized way of exposing aux heads for + # fine-tuning Hub image modules. Disable aux heads to avoid putting unused + # variables and ops into the module. + hparams.set_hparam('use_aux_head', False) + eval_steps = FLAGS.num_eval_images // FLAGS.eval_batch_size + export_path = FLAGS.export_path or (model_dir + '/export') + + input_pipeline = model_lib.InputPipeline( + is_training=False, data_dir=data_dir, hparams=hparams, eval_from_hub=True) + + if mode == 'eval_from_hub': + eval_from_hub(export_path, input_pipeline.input_fn, eval_steps=eval_steps) + elif mode == 'export_to_hub': + num_classes = (None if FLAGS.export_feature_vector else + input_pipeline.num_classes) + + if FLAGS.dryrun_with_untrained_weights: + checkpoint_path = None + else: + checkpoint_path = tf.train.latest_checkpoint(model_dir) + if not checkpoint_path: + raise IOError('No checkpoint found.') + export_to_hub( + checkpoint_path, export_path, num_classes, hparams) + else: + raise ValueError('Unsupported tf_hub_mode = {}'.format(mode)) + + +if __name__ == '__main__': + tf.logging.set_verbosity(tf.logging.INFO) + app.run(main) + diff --git a/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/train_full_1p.sh b/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/train_full_1p.sh new file mode 100644 index 0000000000000000000000000000000000000000..a7123af1512948f4f6fef3b04a7afeb714d31f11 --- /dev/null +++ b/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/train_full_1p.sh @@ -0,0 +1,21 @@ +#! /bin/bash + +DATA_DIR=/home/test_user03/tf_records/ +MODEL_DIR=/home/test_user03/xx + + +nohup python3 amoeba_net.py \ + --data_dir=${DATA_DIR} \ + --model_dir=${MODEL_DIR} \ + --num_cells=6 \ + --image_size=224 \ + --num_epochs=35 \ + --train_batch_size=64 \ + --eval_batch_size=64 \ + --lr=2.56 \ + --lr_decay_value=0.88 \ + --lr_warmup_epochs=0.35 \ + --mode=train_and_eval \ + --iterations_per_loop=1251 \ + > train_full_1p.log 2>&1 & + diff --git a/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/train_performance_1p.sh b/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/train_performance_1p.sh new file mode 100644 index 0000000000000000000000000000000000000000..2a0c81757f37b08615f8df3c9e2a4a6ee2202445 --- /dev/null +++ b/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/train_performance_1p.sh @@ -0,0 +1,21 @@ +#! /bin/bash + +DATA_DIR=/home/test_user03/tf_records/ +MODEL_DIR=/home/test_user03/xx + + +nohup python3 amoeba_net.py \ + --data_dir=${DATA_DIR} \ + --model_dir=${MODEL_DIR} \ + --num_cells=6 \ + --image_size=224 \ + --num_epochs=35 \ + --train_batch_size=64 \ + --eval_batch_size=64 \ + --lr=2.56 \ + --lr_decay_value=0.88 \ + --lr_warmup_epochs=0.35 \ + --mode=train \ + --iterations_per_loop=1251 \ + > train_performance_1p.log 2>&1 & + diff --git a/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/train_testcase.sh b/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/train_testcase.sh new file mode 100644 index 0000000000000000000000000000000000000000..1dd6d0781710871ed6d00aaf50286b22c060aaf1 --- /dev/null +++ b/TensorFlow/contrib/cv/AmoebaNet-D_ID2073_for_TensorFlow/train_testcase.sh @@ -0,0 +1,39 @@ +#! /bin/bash + +#使用ImageNet2012数据集,配置DATA_DIR数据集路径和MODEL_DIR保存checkpoint路径 +DATA_DIR=./tf_records/ +MODEL_DIR=./ckpt/ + +#开发者个人独立预置的数据集、预训练模型、ATC-OM模型等,支持从OBS仓下载 +obsutil cp obs://amoebanet/tf_records_testcase/ ./tf_records/ -f -r + +#testcase主体,开发者根据不同模型写作 +python3 amoeba_net.py \ + --data_dir=${DATA_DIR} \ + --model_dir=${MODEL_DIR} \ + --num_cells=6 \ + --image_size=224 \ + --num_epochs=1 \ + --train_batch_size=64 \ + --eval_batch_size=64 \ + --lr=2.56 \ + --lr_decay_value=0.88 \ + --lr_warmup_epochs=0.35 \ + --mode=train \ + --iterations_per_loop=1251 \ + --num_train_images=10000 \ + --num_eval_images=1000 \ + > train.log 2>&1 +#训练测试用例只训练10000张图片,并保存打印信息至train.log + +#结果判断,功能检查输出ckpt/日志关键字、精度检查loss值/accucy关键字、性能检查耗时打点/ThroughOutput等关键字 +key1="Saving checkpoints" #功能检查字 +key2="global_step/sec:" #性能检查字 +key3="loss = " #精度检查字 + + +if [ `grep -c "$key1" "train.log"` -ne '0' ] && [ `grep -c "$key2" "train.log"` -ne '0' ] && [ `grep -c "$key3" "train.log"` -ne '0' ];then #可以根据需要调整检查逻辑 + echo "Run testcase success!" +else + echo "Run testcase failed!" +fi \ No newline at end of file