diff --git a/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /.keep b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /.keep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /LICENSE b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..261eeb9e9f8b2b4b0d119366dda99c6fd7d35c64 --- /dev/null +++ b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /README.md b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /README.md new file mode 100644 index 0000000000000000000000000000000000000000..01f40a87c0e15b18f9f11cf17e24f74dbcb6b10a --- /dev/null +++ b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /README.md @@ -0,0 +1,191 @@ +- [基本信息](#基本信息.md) +- [概述](#概述.md) +- [训练环境准备](#训练环境准备.md) +- [快速上手](#快速上手.md) +- [训练结果](#训练结果.md) +- [高级参考](#高级参考.md) +

基本信息

+ +**发布者(Publisher):Huawei** + +**应用领域(Application Domain):CV** + +**版本(Version):** + +**修改时间(Modified) :2022.4.16** + +**大小(Size):** + +**框架(Framework):TensorFlow 1.15.0** + +**模型格式(Model Format):h5** + +**精度(Precision):Mixed** + +**处理器(Processor):昇腾910** + +**应用级别(Categories):Research** + +**描述(Description):基于TensorFlow框架的EfficientDet网络训练代码** + + +- 参考论文: + + https://arxiv.org/abs/1911.09070 + +- 参考实现: + + https://github.com/xuannianz/EfficientDet + +- 适配昇腾 AI 处理器的实现: + + + https://gitee.com/dw8023/ModelZoo-TensorFlow/tree/master/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow%20 + + +- 通过Git获取对应commit\_id的代码方法如下: + + ``` + git clone {repository_url} # 克隆仓库的代码 + cd {repository_name} # 切换到模型的代码仓目录 + git checkout {branch} # 切换到对应分支 + git reset --hard {commit_id} # 代码设置到对应的commit_id + cd {code_path} # 切换到模型代码所在路径,若仓库下只有该模型,则无需切换 + ``` + +## 支持特性 + +| 特性列表 | 是否支持 | +|-------|------| +| 分布式训练 | 否 | +| 混合精度 | 是 | +| 并行数据 | 否 | + +## 混合精度训练 + +昇腾910 AI处理器提供自动混合精度功能,可以针对全网中float32数据类型的算子,按照内置的优化策略,自动将部分float32的算子降低精度到float16,从而在精度损失很小的情况下提升系统性能并减少内存使用。 + +## 开启混合精度 + +脚本已默认开启混合精度,设置precision_mode参数的脚本参考如下。 + + ``` + custom_op = session_config.graph_options.rewrite_options.custom_optimizers.add() + custom_op.name = 'NpuOptimizer' + custom_op.parameter_map["precision_mode"].s = tf.compat.as_bytes(str(args.precision_mode)) + ``` + +

训练环境准备

+ +1. 硬件环境准备请参见各硬件产品文档"[驱动和固件安装升级指南]( https://support.huawei.com/enterprise/zh/category/ai-computing-platform-pid-1557196528909)"。需要在硬件设备上安装与CANN版本配套的固件与驱动。 +2. 宿主机上需要安装Docker并登录[Ascend Hub中心](https://ascendhub.huawei.com/#/detail?name=ascend-tensorflow-arm)获取镜像。 + + 当前模型支持的镜像列表如[表1](#zh-cn_topic_0000001074498056_table1519011227314)所示。 + + **表 1** 镜像列表 + + + + + + + + + + + + +

镜像名称

+

镜像版本

+

配套CANN版本

+
+

20.2.0

+

20.2

+
+ + +

快速上手

+ +- 数据集准备 +1. 模型训练使用VOC2007数据集,数据集obs链接如下: + + obs://efficientdet-id0693/dataset/ + +- 源码obs链接如下: + + obs://efficientdet-id0693/npu + +- 单卡训练 + + 1. 配置训练参数。 + + 在脚本train_full_1p.sh中配置python train_sess.py所需参数。 + + ``` + --pretrained_model 预训练模型路径 + (预训练模型链接:链接:https://pan.baidu.com/s/1OGnFSCCr3eTI3jO-v4hxNQ 提取码:oy34) + + pascal /home/dataset/VOCdevkit/VOC2007 voc数据集位置 + + ``` + + 2. 配置测试参数。 + + 然后在脚本train_full_1p.sh中,配置python common.py所需参数。 + + ``` + --data_path='/home/dataset/VOCdevkit/VOC2007' voc数据集位置 + ``` + + 3. 启动训练和测试。 + + 启动单卡训练 (脚本为EfficientDet_ID0693_for_TensorFlow/train_full_1p.sh) + + ``` + bash train_full_1p.sh + ``` + +

训练结果

+ +- 精度结果比对 + +|精度指标项|论文发布|GPU实测|NPU实测| +|---|---|---|---| +|mAP|xxx|72.6%|67.6%| + +- 性能结果比对 (暂无) + +|性能指标项|论文发布|GPU实测|NPU实测| +|---|---|---|---| +|FPS|XXX|YYY|ZZZ| + + +

高级参考

+ +## 脚本和示例代码 + +``` +├── callback.py //回调函数 +├── README.md //代码说明文档 +├── common.py //精度测试代码 +├── losses.py //loss函数定义代码 +├── model.py //模型定义代码 +├── layers.py //层定义代码 +├── efficientdet.py //backbone网络结构代码 +├── requirements.txt //训练python依赖列表 +├── train_full_1p.sh //训练启动脚本 +├── utils //训练与测试流程工具代码文件夹 +├── generators //数据集generators生成代码文件夹 +├── augmentor //数据增强代码文件夹 +├── eval //测试精度工具代码文件夹 +│ 、、、 +``` + + +## 训练过程 + +1. 通过“模型训练”中的训练指令启动单卡卡训练。 + +2. 参考脚本的模型存储路径为checkpoints/*.h5。 + + diff --git a/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /augmentor/.keep b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /augmentor/.keep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /augmentor/__init__.py b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /augmentor/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..0f45a2c79aa9e815ecbb58bc3e5e8cc1ef8f1dc4 --- /dev/null +++ b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /augmentor/__init__.py @@ -0,0 +1,27 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. \ No newline at end of file diff --git a/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /augmentor/color.py b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /augmentor/color.py new file mode 100644 index 0000000000000000000000000000000000000000..ef99d6249eca4d0c1834a31b09a317c494fd8ea4 --- /dev/null +++ b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /augmentor/color.py @@ -0,0 +1,207 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from npu_bridge.npu_init import * +import numpy as np +from PIL import Image, ImageEnhance, ImageOps + + +def autocontrast(image, prob=0.5): + random_prob = np.random.uniform() + if random_prob > prob: + return image + image = Image.fromarray(image[..., ::-1]) + image = ImageOps.autocontrast(image) + image = np.array(image)[..., ::-1] + return image + + +def equalize(image, prob=0.5): + random_prob = np.random.uniform() + if random_prob > prob: + return image + image = Image.fromarray(image[..., ::-1]) + image = ImageOps.equalize(image) + image = np.array(image)[..., ::-1] + return image + + +def solarize(image, prob=0.5, threshold=128.): + random_prob = np.random.uniform() + if random_prob > prob: + return image + image = Image.fromarray(image[..., ::-1]) + image = ImageOps.solarize(image, threshold=threshold) + image = np.array(image)[..., ::-1] + return image + + +def sharpness(image, prob=0.5, min=0, max=2, factor=None): + random_prob = np.random.uniform() + if random_prob > prob: + return image + if factor is None: + # 0 模糊一点, 1 原图, 2 清晰一点 + factor = np.random.uniform(min, max) + image = Image.fromarray(image[..., ::-1]) + enhancer = ImageEnhance.Sharpness(image) + image = enhancer.enhance(factor=factor) + return np.array(image)[..., ::-1] + + +def color(image, prob=0.5, min=0., max=1., factor=None): + random_prob = np.random.uniform() + if random_prob > prob: + return image + if factor is None: + # factor=0 返回黑白色, factor=1 返回原图 + factor = np.random.uniform(min, max) + image = Image.fromarray(image[..., ::-1]) + enhancer = ImageEnhance.Color(image) + image = enhancer.enhance(factor=factor) + return np.array(image)[..., ::-1] + + +def contrast(image, prob=0.5, min=0.2, max=1., factor=None): + random_prob = np.random.uniform() + if random_prob > prob: + return image + if factor is None: + # factor=0 返回灰色, factor=1 返回原图 + factor = np.random.uniform(min, max) + image = Image.fromarray(image[..., ::-1]) + enhancer = ImageEnhance.Contrast(image) + image = enhancer.enhance(factor=factor) + return np.array(image)[..., ::-1] + + +def brightness(image, prob=0.5, min=0.8, max=1., factor=None): + random_prob = np.random.uniform() + if random_prob > prob: + return image + if factor is None: + # factor=0 返回全黑色, factor=1 返回原图 + factor = np.random.uniform(min, max) + image = Image.fromarray(image[..., ::-1]) + enhancer = ImageEnhance.Brightness(image) + image = enhancer.enhance(factor=factor) + return np.array(image)[..., ::-1] + + +class VisualEffect: + """ + Struct holding parameters and applying image color transformation. + + Args + solarize_threshold: + color_factor: A factor for adjusting color. + contrast_factor: A factor for adjusting contrast. + brightness_factor: A factor for adjusting brightness. + sharpness_factor: A factor for adjusting sharpness. + """ + + def __init__( + self, + color_factor=None, + contrast_factor=None, + brightness_factor=None, + sharpness_factor=None, + color_prob=0.5, + contrast_prob=0.5, + brightness_prob=0.5, + sharpness_prob=0.5, + autocontrast_prob=0.5, + equalize_prob=0.5, + solarize_prob=0.1, + solarize_threshold=128., + + ): + self.color_factor = color_factor + self.contrast_factor = contrast_factor + self.brightness_factor = brightness_factor + self.sharpness_factor = sharpness_factor + self.color_prob = color_prob + self.contrast_prob = contrast_prob + self.brightness_prob = brightness_prob + self.sharpness_prob = sharpness_prob + self.autocontrast_prob = autocontrast_prob + self.equalize_prob = equalize_prob + self.solarize_prob = solarize_prob + self.solarize_threshold = solarize_threshold + + def __call__(self, image): + """ + Apply a visual effect on the image. + + Args + image: Image to adjust + """ + random_enhance_id = np.random.randint(0, 4) + if random_enhance_id == 0: + image = color(image, prob=self.color_prob, factor=self.color_factor) + elif random_enhance_id == 1: + image = contrast(image, prob=self.contrast_prob, factor=self.contrast_factor) + elif random_enhance_id == 2: + image = brightness(image, prob=self.brightness_prob, factor=self.brightness_factor) + else: + image = sharpness(image, prob=self.sharpness_prob, factor=self.sharpness_factor) + + random_ops_id = np.random.randint(0, 3) + if random_ops_id == 0: + image = autocontrast(image, prob=self.autocontrast_prob) + elif random_ops_id == 1: + image = equalize(image, prob=self.equalize_prob) + else: + image = solarize(image, prob=self.solarize_prob, threshold=self.solarize_threshold) + return image + + +if __name__ == '__main__': + from generators.pascal import PascalVocGenerator + import cv2 + + train_generator = PascalVocGenerator( + 'datasets/VOC0712', + 'trainval', + skip_difficult=True, + anchors_path='voc_anchors_416.txt', + batch_size=1 + ) + visual_effect = VisualEffect() + for i in range(train_generator.size()): + image = train_generator.load_image(i) + image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) + annotations = train_generator.load_annotations(i) + boxes = annotations['bboxes'] + for box in boxes.astype(np.int32): + cv2.rectangle(image, (box[0], box[1]), (box[2], box[3]), (0, 0, 255), 2) + src_image = image.copy() + image = visual_effect(image) + cv2.namedWindow('image', cv2.WINDOW_NORMAL) + cv2.imshow('image', np.concatenate([src_image, image], axis=1)) + cv2.waitKey(0) + diff --git a/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /augmentor/misc.py b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /augmentor/misc.py new file mode 100644 index 0000000000000000000000000000000000000000..a4ae004a7df327f7b098f4b3959db131992d0f6e --- /dev/null +++ b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /augmentor/misc.py @@ -0,0 +1,292 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from npu_bridge.npu_init import * +import cv2 +import numpy as np +from augmentor.transform import translation_xy, change_transform_origin, scaling_xy +from utils import reorder_vertexes + + +def rotate(image, annotations, prob=0.5, border_value=(128, 128, 128)): + assert 'bboxes' in annotations, 'annotations should contain bboxes even if it is empty' + + random_prob = np.random.uniform() + if random_prob < (1 - prob): + return image, annotations + + rotate_degree = np.random.uniform(low=-10, high=10) + h, w = image.shape[:2] + # Compute the rotation matrix. + M = cv2.getRotationMatrix2D(center=(w / 2, h / 2), + angle=rotate_degree, + scale=1) + + # Get the sine and cosine from the rotation matrix. + abs_cos_angle = np.abs(M[0, 0]) + abs_sin_angle = np.abs(M[0, 1]) + + # Compute the new bounding dimensions of the image. + new_w = int(h * abs_sin_angle + w * abs_cos_angle) + new_h = int(h * abs_cos_angle + w * abs_sin_angle) + + # Adjust the rotation matrix to take into account the translation. + M[0, 2] += new_w // 2 - w // 2 + M[1, 2] += new_h // 2 - h // 2 + + # Rotate the image. + image = cv2.warpAffine(image, M=M, dsize=(new_w, new_h), flags=cv2.INTER_CUBIC, + borderMode=cv2.BORDER_CONSTANT, + borderValue=border_value) + + bboxes = annotations['bboxes'] + if bboxes.shape[0] != 0: + new_bboxes = [] + for bbox in bboxes: + x1, y1, x2, y2 = bbox + points = M.dot([ + [x1, x2, x1, x2], + [y1, y2, y2, y1], + [1, 1, 1, 1], + ]) + # Extract the min and max corners again. + min_xy = np.sort(points, axis=1)[:, :2] + min_x = np.mean(min_xy[0]) + min_y = np.mean(min_xy[1]) + max_xy = np.sort(points, axis=1)[:, 2:] + max_x = np.mean(max_xy[0]) + max_y = np.mean(max_xy[1]) + new_bboxes.append([min_x, min_y, max_x, max_y]) + annotations['bboxes'] = np.array(new_bboxes, dtype=np.float32) + + if 'quadrangles' in annotations and annotations['quadrangles'].shape[0] != 0: + quadrangles = annotations['quadrangles'] + rotated_quadrangles = [] + for quadrangle in quadrangles: + quadrangle = np.concatenate([quadrangle, np.ones((4, 1))], axis=-1) + rotated_quadrangle = M.dot(quadrangle.T).T[:, :2] + quadrangle = reorder_vertexes(rotated_quadrangle) + rotated_quadrangles.append(quadrangle) + quadrangles = np.stack(rotated_quadrangles) + annotations['quadrangles'] = quadrangles + xmin = np.min(quadrangles, axis=1)[:, 0] + ymin = np.min(quadrangles, axis=1)[:, 1] + xmax = np.max(quadrangles, axis=1)[:, 0] + ymax = np.max(quadrangles, axis=1)[:, 1] + bboxes = np.stack([xmin, ymin, xmax, ymax], axis=1) + annotations['bboxes'] = bboxes + return image, annotations + + +def crop(image, annotations, prob=0.5): + assert 'bboxes' in annotations, 'annotations should contain bboxes even if it is empty' + + random_prob = np.random.uniform() + if random_prob < (1 - prob): + return image, annotations + h, w = image.shape[:2] + bboxes = annotations['bboxes'] + if bboxes.shape[0] != 0: + min_x1, min_y1 = np.min(bboxes, axis=0)[:2] + max_x2, max_y2 = np.max(bboxes, axis=0)[2:] + random_x1 = np.random.randint(0, max(min_x1 // 2, 1)) + random_y1 = np.random.randint(0, max(min_y1 // 2, 1)) + random_x2 = np.random.randint(max_x2 + 1, max(min(w, max_x2 + (w - max_x2) // 2), max_x2 + 2)) + random_y2 = np.random.randint(max_y2 + 1, max(min(h, max_y2 + (h - max_y2) // 2), max_y2 + 2)) + image = image[random_y1:random_y2, random_x1:random_x2] + bboxes[:, [0, 2]] = bboxes[:, [0, 2]] - random_x1 + bboxes[:, [1, 3]] = bboxes[:, [1, 3]] - random_y1 + if 'quadrangles' in annotations and annotations['quadrangles'].shape[0] != 0: + quadrangles = annotations['quadrangles'] + quadrangles[:, :, 0] = quadrangles[:, :, 0] - random_x1 + quadrangles[:, :, 1] = quadrangles[:, :, 1] - random_y1 + else: + random_x1 = np.random.randint(0, max(w // 8, 1)) + random_y1 = np.random.randint(0, max(h // 8, 1)) + random_x2 = np.random.randint(7 * w // 8, w - 1) + random_y2 = np.random.randint(7 * h // 8, h - 1) + image = image[random_y1:random_y2, random_x1:random_x2] + return image, annotations + + +def flipx(image, annotations, prob=0.5): + assert 'bboxes' in annotations, 'annotations should contain bboxes even if it is empty' + + random_prob = np.random.uniform() + if random_prob < (1 - prob): + return image, annotations + bboxes = annotations['bboxes'] + h, w = image.shape[:2] + image = image[:, ::-1] + if bboxes.shape[0] != 0: + tmp = bboxes.copy() + bboxes[:, 0] = w - 1 - bboxes[:, 2] + bboxes[:, 2] = w - 1 - tmp[:, 0] + if 'quadrangles' in annotations and annotations['quadrangles'].shape[0] != 0: + quadrangles = annotations['quadrangles'] + tmp = quadrangles.copy() + quadrangles[:, 0, 0] = w - 1 - quadrangles[:, 0, 0] + quadrangles[:, 1, 0] = w - 1 - tmp[:, 3, 0] + quadrangles[:, 1, 1] = tmp[:, 3, 1] + quadrangles[:, 2, 0] = w - 1 - quadrangles[:, 2, 0] + quadrangles[:, 3, 0] = w - 1 - tmp[:, 1, 0] + quadrangles[:, 3, 1] = tmp[:, 1, 1] + return image, annotations + + +def multi_scale(image, annotations, prob=1.): + assert 'bboxes' in annotations, 'annotations should contain bboxes even if it is empty' + + random_prob = np.random.uniform() + if random_prob < (1 - prob): + return image, annotations + h, w = image.shape[:2] + scale = np.random.choice(np.arange(0.7, 1.4, 0.1)) + nh, nw = int(round(h * scale)), int(round(w * scale)) + image = cv2.resize(image, (nw, nh), interpolation=cv2.INTER_LINEAR) + bboxes = annotations['bboxes'] + if bboxes.shape[0] != 0: + annotations['bboxes'] = np.round(bboxes * scale) + if 'quadrangles' in annotations and annotations['quadrangles'].shape[0] != 0: + quadrangles = annotations['quadrangles'] + annotations['quadrangles'] = np.round(quadrangles * scale) + return image, annotations + + +def translate(image, annotations, prob=0.5, border_value=(128, 128, 128)): + assert 'bboxes' in annotations, 'annotations should contain bboxes even if it is empty' + + random_prob = np.random.uniform() + if random_prob < (1 - prob): + return image, annotations + h, w = image.shape[:2] + bboxes = annotations['bboxes'] + if bboxes.shape[0] != 0: + min_x1, min_y1 = np.min(bboxes, axis=0)[:2].astype(np.int32) + max_x2, max_y2 = np.max(bboxes, axis=0)[2:].astype(np.int32) + translation_matrix = translation_xy(min=(min(-(min_x1 // 2), 0), min(-(min_y1 // 2), 0)), + max=(max((w - 1 - max_x2) // 2, 1), max((h - 1 - max_y2) // 2, 1)), + prob=1.) + else: + translation_matrix = translation_xy(min=(min(-w // 8, 0), min(-h // 8, 0)), + max=(max(w // 8, 1), max(h // 8, 1))) + translation_matrix = change_transform_origin(translation_matrix, (w / 2, h / 2)) + image = cv2.warpAffine( + image, + translation_matrix[:2, :], + dsize=(w, h), + flags=cv2.INTER_CUBIC, + borderMode=cv2.BORDER_CONSTANT, + borderValue=border_value, + ) + if bboxes.shape[0] != 0: + new_bboxes = [] + for bbox in bboxes: + x1, y1, x2, y2 = bbox + points = translation_matrix.dot([ + [x1, x2, x1, x2], + [y1, y2, y2, y1], + [1, 1, 1, 1], + ]) + min_x, min_y = np.min(points, axis=1)[:2] + max_x, max_y = np.max(points, axis=1)[:2] + new_bboxes.append([min_x, min_y, max_x, max_y]) + annotations['bboxes'] = np.array(new_bboxes).astype(np.float32) + + if 'quadrangles' in annotations and annotations['quadrangles'].shape[0] != 0: + quadrangles = annotations['quadrangles'] + translated_quadrangles = [] + for quadrangle in quadrangles: + quadrangle = np.concatenate([quadrangle, np.ones((4, 1))], axis=-1) + translated_quadrangle = translation_matrix.dot(quadrangle.T).T[:, :2] + quadrangle = reorder_vertexes(translated_quadrangle) + translated_quadrangles.append(quadrangle) + quadrangles = np.stack(translated_quadrangles) + annotations['quadrangles'] = quadrangles + xmin = np.min(quadrangles, axis=1)[:, 0] + ymin = np.min(quadrangles, axis=1)[:, 1] + xmax = np.max(quadrangles, axis=1)[:, 0] + ymax = np.max(quadrangles, axis=1)[:, 1] + bboxes = np.stack([xmin, ymin, xmax, ymax], axis=1) + annotations['bboxes'] = bboxes + + return image, annotations + + +class MiscEffect: + def __init__(self, multi_scale_prob=0.5, rotate_prob=0.05, flip_prob=0.5, crop_prob=0.5, translate_prob=0.5, + border_value=(128, 128, 128)): + self.multi_scale_prob = multi_scale_prob + self.rotate_prob = rotate_prob + self.flip_prob = flip_prob + self.crop_prob = crop_prob + self.translate_prob = translate_prob + self.border_value = border_value + + def __call__(self, image, annotations): + image, annotations = multi_scale(image, annotations, prob=self.multi_scale_prob) + image, annotations = rotate(image, annotations, prob=self.rotate_prob, border_value=self.border_value) + image, annotations = flipx(image, annotations, prob=self.flip_prob) + image, annotations = crop(image, annotations, prob=self.crop_prob) + image, annotations = translate(image, annotations, prob=self.translate_prob, border_value=self.border_value) + return image, annotations + + +if __name__ == '__main__': + from generators.csv_ import CSVGenerator + + train_generator = CSVGenerator('datasets/ic15/train.csv', + 'datasets/ic15/classes.csv', + detect_text=True, + batch_size=1, + phi=5, + shuffle_groups=False) + misc_effect = MiscEffect() + for i in range(train_generator.size()): + image = train_generator.load_image(i) + image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) + annotations = train_generator.load_annotations(i) + boxes = annotations['bboxes'].astype(np.int32) + quadrangles = annotations['quadrangles'].astype(np.int32) + for box in boxes: + cv2.rectangle(image, (box[0], box[1]), (box[2], box[3]), (0, 0, 255), 1) + cv2.drawContours(image, quadrangles, -1, (0, 255, 255), 1) + src_image = image.copy() + # cv2.namedWindow('src_image', cv2.WINDOW_NORMAL) + cv2.imshow('src_image', src_image) + # image, annotations = misc_effect(image, annotations) + image, annotations = multi_scale(image, annotations, prob=1.) + image = image.copy() + boxes = annotations['bboxes'].astype(np.int32) + quadrangles = annotations['quadrangles'].astype(np.int32) + for box in boxes: + cv2.rectangle(image, (box[0], box[1]), (box[2], box[3]), (0, 255, 0), 1) + cv2.drawContours(image, quadrangles, -1, (255, 255, 0), 1) + cv2.namedWindow('image', cv2.WINDOW_NORMAL) + cv2.imshow('image', image) + cv2.waitKey(0) + diff --git a/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /augmentor/transform.py b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /augmentor/transform.py new file mode 100644 index 0000000000000000000000000000000000000000..f8cce349c97efaa707ab0b79d110b137daf1a202 --- /dev/null +++ b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /augmentor/transform.py @@ -0,0 +1,537 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from npu_bridge.npu_init import * +import cv2 +import numpy as np + +identity_matrix = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) + + +def colvec(*args): + """ + Create a numpy array representing a column vector. + """ + return np.array([args]).T + + +def transform_aabb(transform_matrix, aabb): + """ + Apply a transformation to an axis aligned bounding box. + + The result is a new AABB in the same coordinate system as the original AABB. + The new AABB contains all corner points of the original AABB after applying the given transformation. + + Args + transform: The transformation to apply. + x1: The minimum x value of the AABB. + y1: The minimum y value of the AABB. + x2: The maximum x value of the AABB. + y2: The maximum y value of the AABB. + Returns + The new AABB as tuple (x1, y1, x2, y2) + """ + x1, y1, x2, y2 = aabb + # Transform all 4 corners of the AABB. + points = transform_matrix.dot([ + [x1, x2, x1, x2], + [y1, y2, y2, y1], + [1, 1, 1, 1], + ]) + + # Extract the min and max corners again. + # (3, ) (min_x, min_y, 1) + min_corner = points.min(axis=1) + # (3, ) (max_x, max_y, 1) + max_corner = points.max(axis=1) + + return [min_corner[0], min_corner[1], max_corner[0], max_corner[1]] + + +def random_value(min, max): + return np.random.uniform(min, max) + + +def random_vector(min, max): + """ + Construct a random vector between min and max. + + Args + min: the minimum value for each component, (n, ) + max: the maximum value for each component, (n, ) + """ + min = np.array(min) + max = np.array(max) + assert min.shape == max.shape + assert len(min.shape) == 1 + return np.random.uniform(min, max) + + +def rotation(min=0, max=0, prob=0.5): + """ + Construct a homogeneous 2D rotation matrix. + + Args + min: a scalar for the minimum absolute angle in radians + max: a scalar for the maximum absolute angle in radians + Returns + the rotation matrix as 3 by 3 numpy array + """ + random_prob = np.random.uniform() + if random_prob > prob: + # angle: the angle in radians + angle = random_value(min=min, max=max) + return np.array([ + [np.cos(angle), -np.sin(angle), 0], + [np.sin(angle), np.cos(angle), 0], + [0, 0, 1] + ]) + else: + return identity_matrix + + +def translation_x(min=0, max=0, prob=0.5): + """ + Construct a homogeneous 2D translation matrix. + + Args: + min: a scalar for the minimum translation for x axis + max: a scalar for the maximum translation for x axis + + Returns: + the translation matrix as 3 by 3 numpy array + + """ + random_prob = np.random.uniform() + if random_prob > prob: + # translation: the translation 2D vector + translation = random_value(min=min, max=max) + return np.array([ + [1, 0, translation], + [0, 1, ], + [0, 0, 1] + ]) + else: + return identity_matrix + + +def translation_y(min=0, max=0, prob=0.5): + """ + Construct a homogeneous 2D translation matrix. + + Args: + min: a scalar for the minimum translation for y axis + max: a scalar for the maximum translation for y axis + + Returns: + the translation matrix as 3 by 3 numpy array + + """ + random_prob = np.random.uniform() + if random_prob > prob: + # translation: the translation 2D vector + translation = random_value(min=min, max=max) + return np.array([ + [1, 0], + [0, 1, translation], + [0, 0, 1] + ]) + else: + return identity_matrix + + +def translation_xy(min=(0, 0), max=(0, 0), prob=0.5): + """ + Construct a homogeneous 2D translation matrix. + + Args: + min: a scalar for the minimum translation for y axis + max: a scalar for the maximum translation for y axis + + Returns: + the translation matrix as 3 by 3 numpy array + + """ + random_prob = np.random.uniform() + if random_prob < prob: + # translation: the translation 2D vector + dx = np.random.randint(min[0], max[0]) + dy = np.random.randint(min[1], max[1]) + return np.array([ + [1, 0, dx], + [0, 1, dy], + [0, 0, 1] + ]) + else: + return identity_matrix + + +def shear_x(min=0, max=0, prob=0.5): + """ + Construct a homogeneous 2D shear matrix. + + Args + min: the minimum shear angle in radians. + max: the maximum shear angle in radians. + Returns + the shear matrix as 3 by 3 numpy array + """ + random_prob = np.random.uniform() + if random_prob > prob: + # angle: the shear angle in radians + angle = random_value(min=min, max=max) + return np.array([ + [1, np.tan(angle), 0], + [0, 1, 0], + [0, 0, 1] + ]) + else: + return identity_matrix + + +def shear_y(min, max, prob=0.5): + """ + Construct a homogeneous 2D shear matrix. + + Args + min: the minimum shear angle in radians. + max: the maximum shear angle in radians. + Returns + the shear matrix as 3 by 3 numpy array + """ + random_prob = np.random.uniform() + if random_prob > prob: + # angle: the shear angle in radians + angle = random_value(min=min, max=max) + return np.array([ + [1, 0, 0], + [np.tan(angle), 1, 0], + [0, 0, 1] + ]) + else: + return identity_matrix + + +def scaling_x(min=0.9, max=1.1, prob=0.5): + """ + Construct a homogeneous 2D scaling matrix. + + Args + factor: a 2D vector for X and Y scaling + Returns + the zoom matrix as 3 by 3 numpy array + """ + + random_prob = np.random.uniform() + if random_prob > prob: + # angle: the shear angle in radians + factor = random_value(min=min, max=max) + return np.array([ + [factor, 0, 0], + [0, 1, 0], + [0, 0, 1] + ]) + else: + return identity_matrix + + +def scaling_y(min=0.9, max=1.1, prob=0.5): + """ + Construct a homogeneous 2D scaling matrix. + + Args + factor: a 2D vector for X and Y scaling + Returns + the zoom matrix as 3 by 3 numpy array + """ + + random_prob = np.random.uniform() + if random_prob > prob: + # angle: the shear angle in radians + factor = random_value(min=min, max=max) + return np.array([ + [1, 0, 0], + [0, factor, 0], + [0, 0, 1] + ]) + else: + return identity_matrix + + +def scaling_xy(min=(0.9, 0.9), max=(1.1, 1.1), prob=0.5): + """ + Construct a homogeneous 2D scaling matrix. + + Args + min: a 2D vector containing the minimum scaling factor for X and Y. + min: a 2D vector containing The maximum scaling factor for X and Y. + Returns + the zoom matrix as 3 by 3 numpy array + """ + + random_prob = np.random.uniform() + if random_prob > prob: + # factor: a 2D vector for X and Y scaling + factor = random_vector(min=min, max=max) + return np.array([ + [factor[0], 0, 0], + [0, factor[1], 0], + [0, 0, 1] + ]) + else: + return identity_matrix + + +def flip_x(prob=0.8): + """ + Construct a transformation randomly containing X/Y flips (or not). + + Args + flip_x_chance: The chance that the result will contain a flip along the X axis. + flip_y_chance: The chance that the result will contain a flip along the Y axis. + Returns + a homogeneous 3 by 3 transformation matrix + """ + random_prob = np.random.uniform() + if random_prob > prob: + # 1 - 2 * bool gives 1 for False and -1 for True. + return np.array([ + [-1, 0, 0], + [0, 1, 0], + [0, 0, 1] + ]) + else: + return identity_matrix + + +def flip_y(prob=0.8): + """ + Construct a transformation randomly containing X/Y flips (or not). + + Args + flip_x_chance: The chance that the result will contain a flip along the X axis. + flip_y_chance: The chance that the result will contain a flip along the Y axis. + Returns + a homogeneous 3 by 3 transformation matrix + """ + random_prob = np.random.uniform() + if random_prob > prob: + # 1 - 2 * bool gives 1 for False and -1 for True. + return np.array([ + [1, 0, 0], + [0, -1, 0], + [0, 0, 1] + ]) + else: + return identity_matrix + + +def change_transform_origin(transform, center): + """ + Create a new transform representing the same transformation, only with the origin of the linear part changed. + + Args + transform: the transformation matrix + center: the new origin of the transformation + Returns + translate(center) * transform * translate(-center) + """ + center = np.array(center) + return np.linalg.multi_dot([np.array([[1, 0, center[0]], [0, 1, center[1]], [0, 0, 1]]), + transform, + np.array([[1, 0, -center[0]], [0, 1, -center[1]], [0, 0, 1]])]) + + +def random_transform( + min_rotation=0, + max_rotation=0, + min_translation=(0, 0), + max_translation=(0, 0), + min_shear=0, + max_shear=0, + min_scaling=(1, 1), + max_scaling=(1, 1), +): + """ + Create a random transformation. + + The transformation consists of the following operations in this order (from left to right): + * rotation + * translation + * shear + * scaling + * flip x (if applied) + * flip y (if applied) + + Note that by default, the data generators in `keras_retinanet.preprocessing.generators` interpret the translation + as factor of the image size. So an X translation of 0.1 would translate the image by 10% of it's width. + Set `relative_translation` to `False` in the `TransformParameters` of a data generator to have it interpret + the translation directly as pixel distances instead. + + Args + min_rotation: The minimum rotation in radians for the transform as scalar. + max_rotation: The maximum rotation in radians for the transform as scalar. + min_translation: The minimum translation for the transform as 2D column vector. + max_translation: The maximum translation for the transform as 2D column vector. + min_shear: The minimum shear angle for the transform in radians. + max_shear: The maximum shear angle for the transform in radians. + min_scaling: The minimum scaling for the transform as 2D column vector. + max_scaling: The maximum scaling for the transform as 2D column vector. + """ + return np.linalg.multi_dot([ + rotation(min_rotation, max_rotation), + translation_xy(min_translation, max_translation), + shear_x(min_shear, max_shear) if np.random.uniform() > 0.5 else shear_y(min_shear, max_shear), + scaling_xy(min_scaling, max_scaling), + flip_x() if np.random.uniform() > 0.5 else flip_y(), + ]) + + +def random_transform_generator(**kwargs): + """ + Create a random transform generator. + + The transformation consists of the following operations in this order (from left to right): + * rotation + * translation + * shear + * scaling + * flip x (if applied) + * flip y (if applied) + + Note that by default, the data generators in `keras_retinanet.preprocessing.generators` interpret the translation + as factor of the image size. So an X translation of 0.1 would translate the image by 10% of it's width. + Set `relative_translation` to `False` in the `TransformParameters` of a data generator to have it interpret + the translation directly as pixel distances instead. + + Args + min_rotation: The minimum rotation in radians for the transform as scalar. + max_rotation: The maximum rotation in radians for the transform as scalar. + min_translation: The minimum translation for the transform as 2D column vector. + max_translation: The maximum translation for the transform as 2D column vector. + min_shear: The minimum shear angle for the transform in radians. + max_shear: The maximum shear angle for the transform in radians. + min_scaling: The minimum scaling for the transform as 2D column vector. + max_scaling: The maximum scaling for the transform as 2D column vector. + """ + + while True: + yield random_transform(**kwargs) + + +def adjust_transform_for_image(transform, image, relative_translation): + """ + Adjust a transformation for a specific image. + + The translation of the matrix will be scaled with the size of the image. + The linear part of the transformation will adjusted so that the origin of the transformation will be at the center of the image. + """ + height, width, channels = image.shape + + result = transform + + # Scale the translation with the image size if specified. + if relative_translation: + result[0:2, 2] *= [width, height] + + # Move the origin of transformation. + result = change_transform_origin(transform, (0.5 * width, 0.5 * height)) + + return result + + +class TransformParameters: + """ + Struct holding parameters determining how to apply a transformation to an image. + + Args + fill_mode: One of: 'constant', 'nearest', 'reflect', 'wrap' + interpolation: One of: 'nearest', 'linear', 'cubic', 'area', 'lanczos4' + cval: Fill value to use with fill_mode='constant' + relative_translation: If true (the default), interpret translation as a factor of the image size. + If false, interpret it as absolute pixels. + """ + + def __init__( + self, + fill_mode='nearest', + interpolation='linear', + cval=0, + relative_translation=True, + ): + self.fill_mode = fill_mode + self.cval = cval + self.interpolation = interpolation + self.relative_translation = relative_translation + + def cv_border_mode(self): + if self.fill_mode == 'constant': + return cv2.BORDER_CONSTANT + if self.fill_mode == 'nearest': + return cv2.BORDER_REPLICATE + if self.fill_mode == 'reflect': + return cv2.BORDER_REFLECT_101 + if self.fill_mode == 'wrap': + return cv2.BORDER_WRAP + + def cv_interpolation(self): + if self.interpolation == 'nearest': + return cv2.INTER_NEAREST + if self.interpolation == 'linear': + return cv2.INTER_LINEAR + if self.interpolation == 'cubic': + return cv2.INTER_CUBIC + if self.interpolation == 'area': + return cv2.INTER_AREA + if self.interpolation == 'lanczos4': + return cv2.INTER_LANCZOS4 + + +def apply_transform(matrix, image, params): + """ + Apply a transformation to an image. + + The origin of transformation is at the top left corner of the image. + + The matrix is interpreted such that a point (x, y) on the original image is moved to transform * (x, y) in the generated image. + Mathematically speaking, that means that the matrix is a transformation from the transformed image space to the original image space. + + Args + matrix: A homogeneous 3 by 3 matrix holding representing the transformation to apply. + image: The image to transform. + params: The transform parameters (see TransformParameters) + """ + output = cv2.warpAffine( + image, + matrix[:2, :], + dsize=(image.shape[1], image.shape[0]), + flags=params.cvInterpolation(), + borderMode=params.cvBorderMode(), + borderValue=params.cval, + ) + return output + diff --git a/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /callbacks.py b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /callbacks.py new file mode 100644 index 0000000000000000000000000000000000000000..de58b1279bf3edeb5e26c1abb8a9ff1ecba01e63 --- /dev/null +++ b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /callbacks.py @@ -0,0 +1,103 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from npu_bridge.npu_init import * +from tensorflow.keras.callbacks import Callback +import tensorflow.keras.backend as K +import numpy as np + + +class CosineAnnealingScheduler(Callback): + def __init__(self, cycle_iterations, min_lr, t_mu=2, start_iteration=0): + self.iteration_id = 0 + self.start_iteration = start_iteration + self.cycle_iteration_id = 0 + self.lrs = [] + self.min_lr = min_lr + self.cycle_iterations = cycle_iterations + self.t_mu = t_mu + super(CosineAnnealingScheduler, self).__init__() + + def on_batch_end(self, batch, logs): + if self.iteration_id > self.start_iteration: + # (1, 0) + cosine_decay = 0.5 * (1 + np.cos(np.pi * (self.cycle_iteration_id / self.cycle_iterations))) + decayed_lr = (self.max_lr - self.min_lr) * cosine_decay + self.min_lr + K.set_value(self.model.optimizer.lr, decayed_lr) + if self.cycle_iteration_id == self.cycle_iterations: + self.cycle_iteration_id = 0 + self.cycle_iterations = int(self.cycle_iterations * self.t_mu) + else: + self.cycle_iteration_id = self.cycle_iteration_id + 1 + self.lrs.append(decayed_lr) + elif self.iteration_id == self.start_iteration: + self.max_lr = K.get_value(self.model.optimizer.lr) + self.iteration_id += 1 + + def on_train_begin(self, logs={}): + self.max_lr = K.get_value(self.model.optimizer.lr) + + +class ExponentialScheduler(Callback): + def __init__(self, min_lr, max_lr, iterations): + self.factor = np.exp(np.log(max_lr / min_lr) / iterations) + self.min_lr = min_lr + self.max_lr = max_lr + # debug + self.lrs = [] + self.losses = [] + + def on_batch_end(self, batch, logs): + lr = K.get_value(self.model.optimizer.lr) + self.lrs.append(lr) + self.losses.append(logs["loss"]) + K.set_value(self.model.optimizer.lr, lr * self.factor) + + def on_train_begin(self, logs={}): + K.set_value(self.model.optimizer.lr, self.min_lr) + + +class LinearWarmUpScheduler(Callback): + def __init__(self, iterations, min_lr): + self.iterations = iterations + self.min_lr = min_lr + self.iteration_id = 0 + # debug + self.lrs = [] + + def on_batch_begin(self, batch, logs): + if self.iteration_id < self.iterations: + lr = (self.max_lr - self.min_lr) / self.iterations * (self.iteration_id + 1) + self.min_lr + K.set_value(self.model.optimizer.lr, lr) + self.iteration_id += 1 + self.lrs.append(K.get_value(self.model.optimizer.lr)) + + def on_train_begin(self, logs={}): + self.max_lr = K.get_value(self.model.optimizer.lr) + K.set_value(self.model.optimizer.lr, self.min_lr) + self.lrs.append(K.get_value(self.model.optimizer.lr)) + diff --git a/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /common.py b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /common.py new file mode 100644 index 0000000000000000000000000000000000000000..1524c639bb5a4db9c01443e36ada63564e3040da --- /dev/null +++ b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /common.py @@ -0,0 +1,328 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from npu_bridge.npu_init import * +from generators.pascal import PascalVocGenerator +from model import efficientdet +import os +from utils.compute_overlap import compute_overlap +from utils.visualization import draw_detections, draw_annotations +import numpy as np +import cv2 +import progressbar + +assert (callable(progressbar.progressbar)), "Using wrong progressbar module, install 'progressbar2' instead." + + +def _compute_ap(recall, precision): + """ + Compute the average precision, given the recall and precision curves. + + Code originally from https://github.com/rbgirshick/py-faster-rcnn. + + Args: + recall: The recall curve (list). + precision: The precision curve (list). + + Returns: + The average precision as computed in py-faster-rcnn. + + """ + # correct AP calculation + # first append sentinel values at the end + mrec = np.concatenate(([0.], recall, [1.])) + mpre = np.concatenate(([0.], precision, [0.])) + + # compute the precision envelope + for i in range(mpre.size - 1, 0, -1): + mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) + + # to calculate area under PR curve, look for points + # where X axis (recall) changes value + i = np.where(mrec[1:] != mrec[:-1])[0] + + # and sum (delta recall) * prec + ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) + return ap + + +def _get_detections(generator, model, score_threshold=0.05, max_detections=100, visualize=False): + """ + Get the detections from the model using the generator. + + The result is a list of lists such that the size is: + all_detections[num_images][num_classes] = detections[num_class_detections, 5] + + Args: + generator: The generator used to run images through the model. + model: The model to run on the images. + score_threshold: The score confidence threshold to use. + max_detections: The maximum number of detections to use per image. + save_path: The path to save the images with visualized detections to. + + Returns: + A list of lists containing the detections for each image in the generator. + + """ + all_detections = [[None for i in range(generator.num_classes()) if generator.has_label(i)] for j in + range(generator.size())] + + for i in progressbar.progressbar(range(generator.size()), prefix='Running network: '): + image = generator.load_image(i) + src_image = image.copy() + h, w = image.shape[:2] + + anchors = generator.anchors + image, scale = generator.preprocess_image(image) + + # run network + boxes, scores, *_, labels = model.predict_on_batch([np.expand_dims(image, axis=0)]) + boxes /= scale + boxes[:, :, 0] = np.clip(boxes[:, :, 0], 0, w - 1) + boxes[:, :, 1] = np.clip(boxes[:, :, 1], 0, h - 1) + boxes[:, :, 2] = np.clip(boxes[:, :, 2], 0, w - 1) + boxes[:, :, 3] = np.clip(boxes[:, :, 3], 0, h - 1) + + # select indices which have a score above the threshold + indices = np.where(scores[0, :] > score_threshold)[0] + + # select those scores + scores = scores[0][indices] + + # find the order with which to sort the scores + scores_sort = np.argsort(-scores)[:max_detections] + + # select detections + # (n, 4) + image_boxes = boxes[0, indices[scores_sort], :] + # (n, ) + image_scores = scores[scores_sort] + # (n, ) + image_labels = labels[0, indices[scores_sort]] + # (n, 6) + detections = np.concatenate( + [image_boxes, np.expand_dims(image_scores, axis=1), np.expand_dims(image_labels, axis=1)], axis=1) + + if visualize: + draw_annotations(src_image, generator.load_annotations(i), label_to_name=generator.label_to_name) + draw_detections(src_image, detections[:5, :4], detections[:5, 4], detections[:5, 5].astype(np.int32), + label_to_name=generator.label_to_name, + score_threshold=score_threshold) + + # cv2.imwrite(os.path.join(save_path, '{}.png'.format(i)), raw_image) + cv2.namedWindow('{}'.format(i), cv2.WINDOW_NORMAL) + cv2.imshow('{}'.format(i), src_image) + cv2.waitKey(0) + + # copy detections to all_detections + for class_id in range(generator.num_classes()): + all_detections[i][class_id] = detections[detections[:, -1] == class_id, :-1] + + return all_detections + + +def _get_annotations(generator): + """ + Get the ground truth annotations from the generator. + + The result is a list of lists such that the size is: + all_annotations[num_images][num_classes] = annotations[num_class_annotations, 5] + + Args: + generator: The generator used to retrieve ground truth annotations. + + Returns: + A list of lists containing the annotations for each image in the generator. + + """ + all_annotations = [[None for i in range(generator.num_classes())] for j in range(generator.size())] + + for i in progressbar.progressbar(range(generator.size()), prefix='Parsing annotations: '): + # load the annotations + annotations = generator.load_annotations(i) + + # copy detections to all_annotations + for label in range(generator.num_classes()): + if not generator.has_label(label): + continue + + all_annotations[i][label] = annotations['bboxes'][annotations['labels'] == label, :].copy() + + return all_annotations + + +def evaluate( + generator, + model, + iou_threshold=0.5, + score_threshold=0.01, + max_detections=100, + visualize=False, + epoch=0 +): + """ + Evaluate a given dataset using a given model. + + Args: + generator: The generator that represents the dataset to evaluate. + model: The model to evaluate. + iou_threshold: The threshold used to consider when a detection is positive or negative. + score_threshold: The score confidence threshold to use for detections. + max_detections: The maximum number of detections to use per image. + visualize: Show the visualized detections or not. + + Returns: + A dict mapping class names to mAP scores. + + """ + # gather all detections and annotations + all_detections = _get_detections(generator, model, score_threshold=score_threshold, max_detections=max_detections, + visualize=visualize) + all_annotations = _get_annotations(generator) + average_precisions = {} + num_tp = 0 + num_fp = 0 + + # process detections and annotations + for label in range(generator.num_classes()): + if not generator.has_label(label): + continue + + false_positives = np.zeros((0,)) + true_positives = np.zeros((0,)) + scores = np.zeros((0,)) + num_annotations = 0.0 + + for i in range(generator.size()): + detections = all_detections[i][label] + annotations = all_annotations[i][label] + num_annotations += annotations.shape[0] + detected_annotations = [] + + for d in detections: + scores = np.append(scores, d[4]) + + if annotations.shape[0] == 0: + false_positives = np.append(false_positives, 1) + true_positives = np.append(true_positives, 0) + continue + overlaps = compute_overlap(np.expand_dims(d, axis=0), annotations) + assigned_annotation = np.argmax(overlaps, axis=1) + max_overlap = overlaps[0, assigned_annotation] + + if max_overlap >= iou_threshold and assigned_annotation not in detected_annotations: + false_positives = np.append(false_positives, 0) + true_positives = np.append(true_positives, 1) + detected_annotations.append(assigned_annotation) + else: + false_positives = np.append(false_positives, 1) + true_positives = np.append(true_positives, 0) + + # no annotations -> AP for this class is 0 (is this correct?) + if num_annotations == 0: + average_precisions[label] = 0, 0 + continue + + # sort by score + indices = np.argsort(-scores) + false_positives = false_positives[indices] + true_positives = true_positives[indices] + + # compute false positives and true positives + false_positives = np.cumsum(false_positives) + true_positives = np.cumsum(true_positives) + + if false_positives.shape[0] == 0: + num_fp += 0 + else: + num_fp += false_positives[-1] + if true_positives.shape[0] == 0: + num_tp += 0 + else: + num_tp += true_positives[-1] + + # compute recall and precision + recall = true_positives / num_annotations + precision = true_positives / np.maximum(true_positives + false_positives, np.finfo(np.float64).eps) + + # compute average precision + average_precision = _compute_ap(recall, precision) + average_precisions[label] = average_precision, num_annotations + print('num_fp={}, num_tp={}'.format(num_fp, num_tp)) + + return average_precisions + + +def parse_args(args): + """ + Parse the arguments. + """ + parser.add_argument('--model_path', help='path to tfrecord', default='/home/dingwei/efficientdet/checkpoints/pascal_13_0.3418_0.5143.h5') + parser.add_argument('--data_path', help='path to dataset', default='/home/dataset/VOCdevkit/VOC2007') + print(vars(parser.parse_args(args))) + return parser.parse_args(args) + + +def main(args=None): + if args is None: + args = sys.argv[1:] + args = parse_args(args) + + phi = 0 + weighted_bifpn = False + common_args = { + 'batch_size': 4, + 'phi': phi, + } + test_generator = PascalVocGenerator( + args.data_path, + 'test', + shuffle_groups=False, + skip_truncated=False, + skip_difficult=True, + **common_args + ) + input_shape = (test_generator.image_size, test_generator.image_size) + anchors = test_generator.anchors + num_classes = test_generator.num_classes() + model, prediction_model = efficientdet(phi=phi, num_classes=num_classes, weighted_bifpn=weighted_bifpn) + prediction_model.load_weights(args.model_path, by_name=True) + average_precisions = evaluate(test_generator, prediction_model, visualize=False) + # compute per class average precision + total_instances = [] + precisions = [] + for label, (average_precision, num_annotations) in average_precisions.items(): + print('{:.0f} instances of class'.format(num_annotations), test_generator.label_to_name(label), + 'with average precision: {:.4f}'.format(average_precision)) + total_instances.append(num_annotations) + precisions.append(average_precision) + mean_ap = sum(precisions) / sum(x > 0 for x in total_instances) + print('mAP: {:.4f}'.format(mean_ap)) + +if __name__ == '__main__': + main() + diff --git a/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /efficientnet.py b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /efficientnet.py new file mode 100644 index 0000000000000000000000000000000000000000..7793514242161923299e0d931a1227ef22139dfc --- /dev/null +++ b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /efficientnet.py @@ -0,0 +1,577 @@ +# Copyright 2019 The TensorFlow Authors, Pavel Yakubovskiy, Björn Barz. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Contains definitions for EfficientNet model. + +[1] Mingxing Tan, Quoc V. Le + EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks. + ICML'19, https://arxiv.org/abs/1905.11946 +""" + +# Code of this model implementation is mostly written by +# Björn Barz ([@Callidior](https://github.com/Callidior)) + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from npu_bridge.npu_init import * + +import os +import json +import math +import string +import collections +import numpy as np + +from six.moves import xrange +from keras_applications.imagenet_utils import _obtain_input_shape +from keras_applications.imagenet_utils import decode_predictions +from keras_applications.imagenet_utils import preprocess_input as _preprocess_input + +from utils import get_submodules_from_kwargs +from layers import BatchNormalization + +backend = None +layers = None +models = None +keras_utils = None + + +BASE_WEIGHTS_PATH = ( + 'https://github.com/Callidior/keras-applications/' + 'releases/download/efficientnet/') + +WEIGHTS_HASHES = { + 'efficientnet-b0': ('163292582f1c6eaca8e7dc7b51b01c61' + '5b0dbc0039699b4dcd0b975cc21533dc', + 'c1421ad80a9fc67c2cc4000f666aa507' + '89ce39eedb4e06d531b0c593890ccff3'), + 'efficientnet-b1': ('d0a71ddf51ef7a0ca425bab32b7fa7f1' + '6043ee598ecee73fc674d9560c8f09b0', + '75de265d03ac52fa74f2f510455ba64f' + '9c7c5fd96dc923cd4bfefa3d680c4b68'), + 'efficientnet-b2': ('bb5451507a6418a574534aa76a91b106' + 'f6b605f3b5dde0b21055694319853086', + '433b60584fafba1ea3de07443b74cfd3' + '2ce004a012020b07ef69e22ba8669333'), + 'efficientnet-b3': ('03f1fba367f070bd2545f081cfa7f3e7' + '6f5e1aa3b6f4db700f00552901e75ab9', + 'c5d42eb6cfae8567b418ad3845cfd63a' + 'a48b87f1bd5df8658a49375a9f3135c7'), + 'efficientnet-b4': ('98852de93f74d9833c8640474b2c698d' + 'b45ec60690c75b3bacb1845e907bf94f', + '7942c1407ff1feb34113995864970cd4' + 'd9d91ea64877e8d9c38b6c1e0767c411'), + 'efficientnet-b5': ('30172f1d45f9b8a41352d4219bf930ee' + '3339025fd26ab314a817ba8918fefc7d', + '9d197bc2bfe29165c10a2af8c2ebc675' + '07f5d70456f09e584c71b822941b1952'), + 'efficientnet-b6': ('f5270466747753485a082092ac9939ca' + 'a546eb3f09edca6d6fff842cad938720', + '1d0923bb038f2f8060faaf0a0449db4b' + '96549a881747b7c7678724ac79f427ed'), + 'efficientnet-b7': ('876a41319980638fa597acbbf956a82d' + '10819531ff2dcb1a52277f10c7aefa1a', + '60b56ff3a8daccc8d96edfd40b204c11' + '3e51748da657afd58034d54d3cec2bac') +} + +BlockArgs = collections.namedtuple('BlockArgs', [ + 'kernel_size', 'num_repeat', 'input_filters', 'output_filters', + 'expand_ratio', 'id_skip', 'strides', 'se_ratio' +]) +# defaults will be a public argument for namedtuple in Python 3.7 +# https://docs.python.org/3/library/collections.html#collections.namedtuple +BlockArgs.__new__.__defaults__ = (None,) * len(BlockArgs._fields) + +DEFAULT_BLOCKS_ARGS = [ + BlockArgs(kernel_size=3, num_repeat=1, input_filters=32, output_filters=16, + expand_ratio=1, id_skip=True, strides=[1, 1], se_ratio=0.25), + BlockArgs(kernel_size=3, num_repeat=2, input_filters=16, output_filters=24, + expand_ratio=6, id_skip=True, strides=[2, 2], se_ratio=0.25), + BlockArgs(kernel_size=5, num_repeat=2, input_filters=24, output_filters=40, + expand_ratio=6, id_skip=True, strides=[2, 2], se_ratio=0.25), + BlockArgs(kernel_size=3, num_repeat=3, input_filters=40, output_filters=80, + expand_ratio=6, id_skip=True, strides=[2, 2], se_ratio=0.25), + BlockArgs(kernel_size=5, num_repeat=3, input_filters=80, output_filters=112, + expand_ratio=6, id_skip=True, strides=[1, 1], se_ratio=0.25), + BlockArgs(kernel_size=5, num_repeat=4, input_filters=112, output_filters=192, + expand_ratio=6, id_skip=True, strides=[2, 2], se_ratio=0.25), + BlockArgs(kernel_size=3, num_repeat=1, input_filters=192, output_filters=320, + expand_ratio=6, id_skip=True, strides=[1, 1], se_ratio=0.25) +] + +CONV_KERNEL_INITIALIZER = { + 'class_name': 'VarianceScaling', + 'config': { + 'scale': 2.0, + 'mode': 'fan_out', + # EfficientNet actually uses an untruncated normal distribution for + # initializing conv layers, but keras.initializers.VarianceScaling use + # a truncated distribution. + # We decided against a custom initializer for better serializability. + 'distribution': 'normal' + } +} + +DENSE_KERNEL_INITIALIZER = { + 'class_name': 'VarianceScaling', + 'config': { + 'scale': 1. / 3., + 'mode': 'fan_out', + 'distribution': 'uniform' + } +} + + +def preprocess_input(x, **kwargs): + kwargs = {k: v for k, v in kwargs.items() if k in ['backend', 'layers', 'models', 'utils']} + return _preprocess_input(x, mode='torch', **kwargs) + + +def get_swish(**kwargs): + backend, layers, models, keras_utils = get_submodules_from_kwargs(kwargs) + + def swish(x): + """Swish activation function: x * sigmoid(x). + Reference: [Searching for Activation Functions](https://arxiv.org/abs/1710.05941) + """ + + if backend.backend() == 'tensorflow': + try: + # The native TF implementation has a more + # memory-efficient gradient implementation + return backend.tf.nn.swish(x) + except AttributeError: + pass + + return x * backend.sigmoid(x) + + return swish + + +def get_dropout(**kwargs): + """Wrapper over custom dropout. Fix problem of ``None`` shape for tf.keras. + It is not possible to define FixedDropout class as global object, + because we do not have modules for inheritance at first time. + + Issue: + https://github.com/tensorflow/tensorflow/issues/30946 + """ + backend, layers, models, keras_utils = get_submodules_from_kwargs(kwargs) + + class FixedDropout(layers.Dropout): + def _get_noise_shape(self, inputs): + if self.noise_shape is None: + return self.noise_shape + + symbolic_shape = backend.shape(inputs) + noise_shape = [symbolic_shape[axis] if shape is None else shape + for axis, shape in enumerate(self.noise_shape)] + return tuple(noise_shape) + + return FixedDropout + + +def round_filters(filters, width_coefficient, depth_divisor): + """Round number of filters based on width multiplier.""" + + filters *= width_coefficient + new_filters = int(filters + depth_divisor / 2) // depth_divisor * depth_divisor + new_filters = max(depth_divisor, new_filters) + # Make sure that round down does not go down by more than 10%. + if new_filters < 0.9 * filters: + new_filters += depth_divisor + return int(new_filters) + + +def round_repeats(repeats, depth_coefficient): + """Round number of repeats based on depth multiplier.""" + + return int(math.ceil(depth_coefficient * repeats)) + + +def mb_conv_block(inputs, block_args, activation, drop_rate=None, prefix='', freeze_bn=False): + """Mobile Inverted Residual Bottleneck.""" + + has_se = (block_args.se_ratio is not None) and (0 < block_args.se_ratio <= 1) + bn_axis = 3 if backend.image_data_format() == 'channels_last' else 1 + + # workaround over non working dropout with None in noise_shape in tf.keras + Dropout = get_dropout( + backend=backend, + layers=layers, + models=models, + utils=keras_utils + ) + + # Expansion phase + filters = block_args.input_filters * block_args.expand_ratio + if block_args.expand_ratio != 1: + x = layers.Conv2D(filters, 1, + padding='same', + use_bias=False, + kernel_initializer=CONV_KERNEL_INITIALIZER, + name=prefix + 'expand_conv')(inputs) + # x = BatchNormalization(freeze=freeze_bn, axis=bn_axis, name=prefix + 'expand_bn')(x) + x = layers.BatchNormalization(axis=bn_axis, name=prefix + 'expand_bn')(x) + x = layers.Activation(activation, name=prefix + 'expand_activation')(x) + else: + x = inputs + + # Depthwise Convolution + x = layers.DepthwiseConv2D(block_args.kernel_size, + strides=block_args.strides, + padding='same', + use_bias=False, + depthwise_initializer=CONV_KERNEL_INITIALIZER, + name=prefix + 'dwconv')(x) + # x = BatchNormalization(freeze=freeze_bn, axis=bn_axis, name=prefix + 'bn')(x) + x = layers.BatchNormalization(axis=bn_axis, name=prefix + 'bn')(x) + x = layers.Activation(activation, name=prefix + 'activation')(x) + + # Squeeze and Excitation phase + if has_se: + num_reduced_filters = max(1, int( + block_args.input_filters * block_args.se_ratio + )) + se_tensor = layers.GlobalAveragePooling2D(name=prefix + 'se_squeeze')(x) + + target_shape = (1, 1, filters) if backend.image_data_format() == 'channels_last' else (filters, 1, 1) + se_tensor = layers.Reshape(target_shape, name=prefix + 'se_reshape')(se_tensor) + se_tensor = layers.Conv2D(num_reduced_filters, 1, + activation=activation, + padding='same', + use_bias=True, + kernel_initializer=CONV_KERNEL_INITIALIZER, + name=prefix + 'se_reduce')(se_tensor) + se_tensor = layers.Conv2D(filters, 1, + activation='sigmoid', + padding='same', + use_bias=True, + kernel_initializer=CONV_KERNEL_INITIALIZER, + name=prefix + 'se_expand')(se_tensor) + if backend.backend() == 'theano': + # For the Theano backend, we have to explicitly make + # the excitation weights broadcastable. + pattern = ([True, True, True, False] if backend.image_data_format() == 'channels_last' + else [True, False, True, True]) + se_tensor = layers.Lambda( + lambda x: backend.pattern_broadcast(x, pattern), + name=prefix + 'se_broadcast')(se_tensor) + x = layers.multiply([x, se_tensor], name=prefix + 'se_excite') + + # Output phase + x = layers.Conv2D(block_args.output_filters, 1, + padding='same', + use_bias=False, + kernel_initializer=CONV_KERNEL_INITIALIZER, + name=prefix + 'project_conv')(x) + # x = BatchNormalization(freeze=freeze_bn, axis=bn_axis, name=prefix + 'project_bn')(x) + x = layers.BatchNormalization(axis=bn_axis, name=prefix + 'project_bn')(x) + if block_args.id_skip and all( + s == 1 for s in block_args.strides + ) and block_args.input_filters == block_args.output_filters: + if drop_rate and (drop_rate > 0): + x = Dropout(drop_rate, + noise_shape=(None, 1, 1, 1), + name=prefix + 'drop')(x) + x = layers.add([x, inputs], name=prefix + 'add') + + return x + + +def EfficientNet(width_coefficient, + depth_coefficient, + default_resolution, + dropout_rate=0.2, + drop_connect_rate=0.2, + depth_divisor=8, + blocks_args=DEFAULT_BLOCKS_ARGS, + model_name='efficientnet', + include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + freeze_bn=False, + **kwargs): + """Instantiates the EfficientNet architecture using given scaling coefficients. + Optionally loads weights pre-trained on ImageNet. + Note that the data format convention used by the model is + the one specified in your Keras config at `~/.keras/keras.json`. + # Arguments + width_coefficient: float, scaling coefficient for network width. + depth_coefficient: float, scaling coefficient for network depth. + default_resolution: int, default input image size. + dropout_rate: float, dropout rate before final classifier layer. + drop_connect_rate: float, dropout rate at skip connections. + depth_divisor: int. + blocks_args: A list of BlockArgs to construct block modules. + model_name: string, model name. + include_top: whether to include the fully-connected + layer at the top of the network. + weights: one of `None` (random initialization), + 'imagenet' (pre-training on ImageNet), + or the path to the weights file to be loaded. + input_tensor: optional Keras tensor + (i.e. output of `layers.Input()`) + to use as image input for the model. + input_shape: optional shape tuple, only to be specified + if `include_top` is False. + It should have exactly 3 inputs channels. + pooling: optional pooling mode for feature extraction + when `include_top` is `False`. + - `None` means that the output of the model will be + the 4D tensor output of the + last convolutional layer. + - `avg` means that global average pooling + will be applied to the output of the + last convolutional layer, and thus + the output of the model will be a 2D tensor. + - `max` means that global max pooling will + be applied. + classes: optional number of classes to classify images + into, only to be specified if `include_top` is True, and + if no `weights` argument is specified. + # Returns + A Keras model instance. + # Raises + ValueError: in case of invalid argument for `weights`, + or invalid input shape. + """ + global backend, layers, models, keras_utils + backend, layers, models, keras_utils = get_submodules_from_kwargs(kwargs) + features = [] + if not (weights in {'imagenet', None} or os.path.exists(weights)): + raise ValueError('The `weights` argument should be either ' + '`None` (random initialization), `imagenet` ' + '(pre-training on ImageNet), ' + 'or the path to the weights file to be loaded.') + + if weights == 'imagenet' and include_top and classes != 1000: + raise ValueError('If using `weights` as `"imagenet"` with `include_top`' + ' as true, `classes` should be 1000') + + # Determine proper input shape + input_shape = _obtain_input_shape(input_shape, + default_size=default_resolution, + min_size=32, + data_format=backend.image_data_format(), + require_flatten=include_top, + weights=weights) + + if input_tensor is None: + img_input = layers.Input(shape=input_shape) + else: + if backend.backend() == 'tensorflow': + from tensorflow.python.keras.backend import is_keras_tensor + else: + is_keras_tensor = backend.is_keras_tensor + if not is_keras_tensor(input_tensor): + img_input = layers.Input(tensor=input_tensor, shape=input_shape) + else: + img_input = input_tensor + + bn_axis = 3 if backend.image_data_format() == 'channels_last' else 1 + activation = get_swish(**kwargs) + + # Build stem + x = img_input + x = layers.Conv2D(round_filters(32, width_coefficient, depth_divisor), 3, + strides=(2, 2), + padding='same', + use_bias=False, + kernel_initializer=CONV_KERNEL_INITIALIZER, + name='stem_conv')(x) + # x = BatchNormalization(freeze=freeze_bn, axis=bn_axis, name='stem_bn')(x) + x = layers.BatchNormalization(axis=bn_axis, name='stem_bn')(x) + x = layers.Activation(activation, name='stem_activation')(x) + # Build blocks + num_blocks_total = sum(block_args.num_repeat for block_args in blocks_args) + block_num = 0 + for idx, block_args in enumerate(blocks_args): + assert block_args.num_repeat > 0 + # Update block input and output filters based on depth multiplier. + block_args = block_args._replace( + input_filters=round_filters(block_args.input_filters, + width_coefficient, depth_divisor), + output_filters=round_filters(block_args.output_filters, + width_coefficient, depth_divisor), + num_repeat=round_repeats(block_args.num_repeat, depth_coefficient)) + + # The first block needs to take care of stride and filter size increase. + drop_rate = drop_connect_rate * float(block_num) / num_blocks_total + x = mb_conv_block(x, block_args, + activation=activation, + drop_rate=drop_rate, + prefix='block{}a_'.format(idx + 1), + freeze_bn=freeze_bn + ) + block_num += 1 + if block_args.num_repeat > 1: + # pylint: disable=protected-access + block_args = block_args._replace( + input_filters=block_args.output_filters, strides=[1, 1]) + # pylint: enable=protected-access + for bidx in xrange(block_args.num_repeat - 1): + drop_rate = drop_connect_rate * float(block_num) / num_blocks_total + block_prefix = 'block{}{}_'.format( + idx + 1, + string.ascii_lowercase[bidx + 1] + ) + x = mb_conv_block(x, block_args, + activation=activation, + drop_rate=drop_rate, + prefix=block_prefix, + freeze_bn=freeze_bn + ) + block_num += 1 + if idx < len(blocks_args) - 1 and blocks_args[idx + 1].strides[0] == 2: + features.append(x) + elif idx == len(blocks_args) - 1: + features.append(x) + return features + + +def EfficientNetB0(include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + **kwargs): + return EfficientNet(1.0, 1.0, 224, 0.2, + model_name='efficientnet-b0', + include_top=include_top, weights=weights, + input_tensor=input_tensor, input_shape=input_shape, + pooling=pooling, classes=classes, + **kwargs) + + +def EfficientNetB1(include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + **kwargs): + return EfficientNet(1.0, 1.1, 240, 0.2, + model_name='efficientnet-b1', + include_top=include_top, weights=weights, + input_tensor=input_tensor, input_shape=input_shape, + pooling=pooling, classes=classes, + **kwargs) + + +def EfficientNetB2(include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + **kwargs): + return EfficientNet(1.1, 1.2, 260, 0.3, + model_name='efficientnet-b2', + include_top=include_top, weights=weights, + input_tensor=input_tensor, input_shape=input_shape, + pooling=pooling, classes=classes, + **kwargs) + + +def EfficientNetB3(include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + **kwargs): + return EfficientNet(1.2, 1.4, 300, 0.3, + model_name='efficientnet-b3', + include_top=include_top, weights=weights, + input_tensor=input_tensor, input_shape=input_shape, + pooling=pooling, classes=classes, + **kwargs) + + +def EfficientNetB4(include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + **kwargs): + return EfficientNet(1.4, 1.8, 380, 0.4, + model_name='efficientnet-b4', + include_top=include_top, weights=weights, + input_tensor=input_tensor, input_shape=input_shape, + pooling=pooling, classes=classes, + **kwargs) + + +def EfficientNetB5(include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + **kwargs): + return EfficientNet(1.6, 2.2, 456, 0.4, + model_name='efficientnet-b5', + include_top=include_top, weights=weights, + input_tensor=input_tensor, input_shape=input_shape, + pooling=pooling, classes=classes, + **kwargs) + + +def EfficientNetB6(include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + **kwargs): + return EfficientNet(1.8, 2.6, 528, 0.5, + model_name='efficientnet-b6', + include_top=include_top, weights=weights, + input_tensor=input_tensor, input_shape=input_shape, + pooling=pooling, classes=classes, + **kwargs) + + +def EfficientNetB7(include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + **kwargs): + return EfficientNet(2.0, 3.1, 600, 0.5, + model_name='efficientnet-b7', + include_top=include_top, weights=weights, + input_tensor=input_tensor, input_shape=input_shape, + pooling=pooling, classes=classes, + **kwargs) + + +setattr(EfficientNetB0, '__doc__', EfficientNet.__doc__) +setattr(EfficientNetB1, '__doc__', EfficientNet.__doc__) +setattr(EfficientNetB2, '__doc__', EfficientNet.__doc__) +setattr(EfficientNetB3, '__doc__', EfficientNet.__doc__) +setattr(EfficientNetB4, '__doc__', EfficientNet.__doc__) +setattr(EfficientNetB5, '__doc__', EfficientNet.__doc__) +setattr(EfficientNetB6, '__doc__', EfficientNet.__doc__) +setattr(EfficientNetB7, '__doc__', EfficientNet.__doc__) + diff --git a/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /eval/.keep b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /eval/.keep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /eval/__init__.py b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /eval/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..0f45a2c79aa9e815ecbb58bc3e5e8cc1ef8f1dc4 --- /dev/null +++ b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /eval/__init__.py @@ -0,0 +1,27 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. \ No newline at end of file diff --git a/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /eval/coco.py b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /eval/coco.py new file mode 100644 index 0000000000000000000000000000000000000000..0a9113e38a83c27594b374f7ba832b4f821cee60 --- /dev/null +++ b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /eval/coco.py @@ -0,0 +1,204 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from npu_bridge.npu_init import * + +# import keras +from tensorflow import keras +import tensorflow as tf + +from pycocotools.cocoeval import COCOeval +import numpy as np +import json +from tqdm import trange +import cv2 + +from generators.coco import CocoGenerator + + +def evaluate(generator, model, threshold=0.01): + """ + Use the pycocotools to evaluate a COCO model on a dataset. + + Args + generator: The generator for generating the evaluation data. + model: The model to evaluate. + threshold: The score threshold to use. + """ + # start collecting results + results = [] + image_ids = [] + for index in trange(generator.size(), desc='COCO evaluation: '): + image = generator.load_image(index) + src_image = image.copy() + h, w = image.shape[:2] + + image, scale = generator.preprocess_image(image) + + # run network + boxes, scores, labels = model.predict_on_batch([np.expand_dims(image, axis=0)]) + boxes /= scale + boxes[:, :, 0] = np.clip(boxes[:, :, 0], 0, w - 1) + boxes[:, :, 1] = np.clip(boxes[:, :, 1], 0, h - 1) + boxes[:, :, 2] = np.clip(boxes[:, :, 2], 0, w - 1) + boxes[:, :, 3] = np.clip(boxes[:, :, 3], 0, h - 1) + + # change to (x, y, w, h) (MS COCO standard) + boxes[:, :, 2] -= boxes[:, :, 0] + boxes[:, :, 3] -= boxes[:, :, 1] + + # select indices which have a score above the threshold + indices = np.where(scores[0, :] > threshold)[0] + boxes = boxes[0, indices] + scores = scores[0, indices] + class_ids = labels[0, indices] + + # compute predicted labels and scores + for box, score, class_id in zip(boxes, scores, class_ids): + # append detection for each positively labeled class + image_result = { + 'image_id': generator.image_ids[index], + 'category_id': int(class_id) + 1, + 'score': float(score), + 'bbox': box.tolist(), + } + # append detection to results + results.append(image_result) + + # box = np.round(box).astype(np.int32) + # class_name = generator.label_to_name(generator.coco_label_to_label(class_id + 1)) + # ret, baseline = cv2.getTextSize(class_name, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1) + # cv2.rectangle(src_image, (box[0], box[1]), (box[0] + box[2], box[1] + box[3]), (0, 255, 0), 1) + # cv2.putText(src_image, class_name, (box[0], box[1] + box[3] - baseline), cv2.FONT_HERSHEY_SIMPLEX, 0.5, + # (0, 0, 0), 1) + # cv2.namedWindow('image', cv2.WINDOW_NORMAL) + # cv2.imshow('image', src_image) + # cv2.waitKey(0) + + # append image to list of processed images + image_ids.append(generator.image_ids[index]) + + if not len(results): + return + + # write output + json.dump(results, open('{}_bbox_results.json'.format(generator.set_name), 'w'), indent=4) + json.dump(image_ids, open('{}_processed_image_ids.json'.format(generator.set_name), 'w'), indent=4) + + # # load results in COCO evaluation tool + # coco_true = generator.coco + # coco_pred = coco_true.loadRes('{}_bbox_results.json'.format(generator.set_name)) + # + # # run COCO evaluation + # coco_eval = COCOeval(coco_true, coco_pred, 'bbox') + # coco_eval.params.imgIds = image_ids + # coco_eval.evaluate() + # coco_eval.accumulate() + # coco_eval.summarize() + # return coco_eval.stats + + +class Evaluate(keras.callbacks.Callback): + """ Performs COCO evaluation on each epoch. + """ + + def __init__(self, generator, model, tensorboard=None, threshold=0.01): + """ Evaluate callback initializer. + + Args + generator : The generator used for creating validation data. + model: prediction model + tensorboard : If given, the results will be written to tensorboard. + threshold : The score threshold to use. + """ + self.generator = generator + self.active_model = model + self.threshold = threshold + self.tensorboard = tensorboard + + super(Evaluate, self).__init__() + + def on_epoch_end(self, epoch, logs=None): + logs = logs or {} + + coco_tag = ['AP @[ IoU=0.50:0.95 | area= all | maxDets=100 ]', + 'AP @[ IoU=0.50 | area= all | maxDets=100 ]', + 'AP @[ IoU=0.75 | area= all | maxDets=100 ]', + 'AP @[ IoU=0.50:0.95 | area= small | maxDets=100 ]', + 'AP @[ IoU=0.50:0.95 | area=medium | maxDets=100 ]', + 'AP @[ IoU=0.50:0.95 | area= large | maxDets=100 ]', + 'AR @[ IoU=0.50:0.95 | area= all | maxDets= 1 ]', + 'AR @[ IoU=0.50:0.95 | area= all | maxDets= 10 ]', + 'AR @[ IoU=0.50:0.95 | area= all | maxDets=100 ]', + 'AR @[ IoU=0.50:0.95 | area= small | maxDets=100 ]', + 'AR @[ IoU=0.50:0.95 | area=medium | maxDets=100 ]', + 'AR @[ IoU=0.50:0.95 | area= large | maxDets=100 ]'] + coco_eval_stats = evaluate(self.generator, self.active_model, self.threshold) + if coco_eval_stats is not None and self.tensorboard is not None: + if tf.version.VERSION < '2.0.0' and self.tensorboard.writer is not None: + summary = tf.Summary() + for index, result in enumerate(coco_eval_stats): + summary_value = summary.value.add() + summary_value.simple_value = result + summary_value.tag = '{}. {}'.format(index + 1, coco_tag[index]) + self.tensorboard.writer.add_summary(summary, epoch) + logs[coco_tag[index]] = result + else: + for index, result in enumerate(coco_eval_stats): + tag = '{}. {}'.format(index + 1, coco_tag[index]) + tf.summary.scalar(tag, result, epoch) + + +if __name__ == '__main__': + npu_keras_sess = set_keras_session_npu_config() + from model import efficientdet + import os + from generators.coco import CocoGenerator + + os.environ['CUDA_VISIBLE_DEVICES'] = '0' + + phi = 2 + weighted_bifpn = True + model_path = 'efficientdet-d2.h5' + common_args = { + 'batch_size': 1, + 'phi': phi, + } + + test_generator = CocoGenerator( + '/cache/MScoco', + 'test-dev2017', + shuffle_groups=False, + **common_args + ) + num_classes = test_generator.num_classes() + model, prediction_model = efficientdet(phi=phi, num_classes=num_classes, weighted_bifpn=weighted_bifpn, + score_threshold=0.01) + prediction_model.load_weights(model_path, by_name=True) + evaluate(test_generator, prediction_model, threshold=0.01) + close_session(npu_keras_sess) + diff --git a/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /eval/pascal.py b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /eval/pascal.py new file mode 100644 index 0000000000000000000000000000000000000000..767ac263ca75a2a48da0edbb1ec050e9ac4ff00f --- /dev/null +++ b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /eval/pascal.py @@ -0,0 +1,119 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from npu_bridge.npu_init import * + +# import keras +from tensorflow import keras +import tensorflow as tf +from eval.common import evaluate + + +class Evaluate(keras.callbacks.Callback): + """ + Evaluation callback for arbitrary datasets. + """ + + def __init__( + self, + generator, + model, + iou_threshold=0.5, + score_threshold=0.01, + max_detections=100, + save_path=None, + tensorboard=None, + weighted_average=False, + verbose=1 + ): + """ + Evaluate a given dataset using a given model at the end of every epoch during training. + + Args: + generator: The generator that represents the dataset to evaluate. + iou_threshold: The threshold used to consider when a detection is positive or negative. + score_threshold: The score confidence threshold to use for detections. + max_detections: The maximum number of detections to use per image. + save_path: The path to save images with visualized detections to. + tensorboard: Instance of keras.callbacks.TensorBoard used to log the mAP value. + weighted_average: Compute the mAP using the weighted average of precisions among classes. + verbose: Set the verbosity level, by default this is set to 1. + """ + self.generator = generator + self.iou_threshold = iou_threshold + self.score_threshold = score_threshold + self.max_detections = max_detections + self.save_path = save_path + self.tensorboard = tensorboard + self.weighted_average = weighted_average + self.verbose = verbose + self.active_model = model + + super(Evaluate, self).__init__() + + def on_epoch_end(self, epoch, logs=None): + logs = logs or {} + + # run evaluation + average_precisions = evaluate( + self.generator, + self.active_model, + iou_threshold=self.iou_threshold, + score_threshold=self.score_threshold, + max_detections=self.max_detections, + visualize=False + ) + + # compute per class average precision + total_instances = [] + precisions = [] + for label, (average_precision, num_annotations) in average_precisions.items(): + if self.verbose == 1: + print('{:.0f} instances of class'.format(num_annotations), + self.generator.label_to_name(label), 'with average precision: {:.4f}'.format(average_precision)) + total_instances.append(num_annotations) + precisions.append(average_precision) + if self.weighted_average: + self.mean_ap = sum([a * b for a, b in zip(total_instances, precisions)]) / sum(total_instances) + else: + self.mean_ap = sum(precisions) / sum(x > 0 for x in total_instances) + + if self.tensorboard is not None: + if tf.version.VERSION < '2.0.0' and self.tensorboard.writer is not None: + summary = tf.Summary() + summary_value = summary.value.add() + summary_value.simple_value = self.mean_ap + summary_value.tag = "mAP" + self.tensorboard.writer.add_summary(summary, epoch) + else: + tf.summary.scalar('mAP', self.mean_ap, epoch) + + logs['mAP'] = self.mean_ap + + if self.verbose == 1: + print('mAP: {:.4f}'.format(self.mean_ap)) + diff --git a/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /generators/.keep b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /generators/.keep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /generators/__init__.py b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /generators/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..0f45a2c79aa9e815ecbb58bc3e5e8cc1ef8f1dc4 --- /dev/null +++ b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /generators/__init__.py @@ -0,0 +1,27 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. \ No newline at end of file diff --git a/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /generators/coco.py b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /generators/coco.py new file mode 100644 index 0000000000000000000000000000000000000000..48f4d42b44a05310be292d439e72b80d1ac15c2f --- /dev/null +++ b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /generators/coco.py @@ -0,0 +1,175 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from npu_bridge.npu_init import * + +from generators.common import Generator +import os +import numpy as np +from pycocotools.coco import COCO +import cv2 + + +class CocoGenerator(Generator): + """ + Generate data from the COCO dataset. + See https://github.com/cocodataset/cocoapi/tree/master/PythonAPI for more information. + """ + + def __init__(self, data_dir, set_name, **kwargs): + """ + Initialize a COCO data generator. + + Args + data_dir: Path to where the COCO dataset is stored. + set_name: Name of the set to parse. + """ + self.data_dir = data_dir + self.set_name = set_name + if set_name in ['train2017', 'val2017']: + self.coco = COCO(os.path.join(data_dir, 'annotations', 'instances_' + set_name + '.json')) + else: + self.coco = COCO(os.path.join(data_dir, 'annotations', 'image_info_' + set_name + '.json')) + self.image_ids = self.coco.getImgIds() + + self.load_classes() + + super(CocoGenerator, self).__init__(**kwargs) + + def load_classes(self): + """ + Loads the class to label mapping (and inverse) for COCO. + """ + # load class names (name -> label) + categories = self.coco.loadCats(self.coco.getCatIds()) + categories.sort(key=lambda x: x['id']) + + self.classes = {} + self.coco_labels = {} + self.coco_labels_inverse = {} + for c in categories: + self.coco_labels[len(self.classes)] = c['id'] + self.coco_labels_inverse[c['id']] = len(self.classes) + self.classes[c['name']] = len(self.classes) + + # also load the reverse (label -> name) + self.labels = {} + for key, value in self.classes.items(): + self.labels[value] = key + + def size(self): + """ Size of the COCO dataset. + """ + return len(self.image_ids) + + def num_classes(self): + """ Number of classes in the dataset. For COCO this is 80. + """ + return 90 + + def has_label(self, label): + """ Return True if label is a known label. + """ + return label in self.labels + + def has_name(self, name): + """ Returns True if name is a known class. + """ + return name in self.classes + + def name_to_label(self, name): + """ Map name to label. + """ + return self.classes[name] + + def label_to_name(self, label): + """ Map label to name. + """ + return self.labels[label] + + def coco_label_to_label(self, coco_label): + """ Map COCO label to the label as used in the network. + COCO has some gaps in the order of labels. The highest label is 90, but there are 80 classes. + """ + return self.coco_labels_inverse[coco_label] + + def coco_label_to_name(self, coco_label): + """ Map COCO label to name. + """ + return self.label_to_name(self.coco_label_to_label(coco_label)) + + def label_to_coco_label(self, label): + """ Map label as used by the network to labels as used by COCO. + """ + return self.coco_labels[label] + + def image_aspect_ratio(self, image_index): + """ Compute the aspect ratio for an image with image_index. + """ + image = self.coco.loadImgs(self.image_ids[image_index])[0] + return float(image['width']) / float(image['height']) + + def load_image(self, image_index): + """ + Load an image at the image_index. + """ + # {'license': 2, 'file_name': '000000259765.jpg', 'coco_url': 'http://images.cocodataset.org/test2017/000000259765.jpg', 'height': 480, 'width': 640, 'date_captured': '2013-11-21 04:02:31', 'id': 259765} + image_info = self.coco.loadImgs(self.image_ids[image_index])[0] + path = os.path.join(self.data_dir, 'images', self.set_name, image_info['file_name']) + image = cv2.imread(path) + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + return image + + def load_annotations(self, image_index): + """ Load annotations for an image_index. + """ + # get ground truth annotations + annotations_ids = self.coco.getAnnIds(imgIds=self.image_ids[image_index], iscrowd=False) + annotations = {'labels': np.empty((0,), dtype=np.float32), 'bboxes': np.empty((0, 4), dtype=np.float32)} + + # some images appear to miss annotations (like image with id 257034) + if len(annotations_ids) == 0: + return annotations + + # parse annotations + coco_annotations = self.coco.loadAnns(annotations_ids) + for idx, a in enumerate(coco_annotations): + # some annotations have basically no width / height, skip them + if a['bbox'][2] < 1 or a['bbox'][3] < 1: + continue + + annotations['labels'] = np.concatenate( + [annotations['labels'], [a['category_id'] - 1]], axis=0) + annotations['bboxes'] = np.concatenate([annotations['bboxes'], [[ + a['bbox'][0], + a['bbox'][1], + a['bbox'][0] + a['bbox'][2], + a['bbox'][1] + a['bbox'][3], + ]]], axis=0) + + return annotations + diff --git a/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /generators/common.py b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /generators/common.py new file mode 100644 index 0000000000000000000000000000000000000000..ce23e0591a21f874cc6b7bc27ff5c11b982dbdaf --- /dev/null +++ b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /generators/common.py @@ -0,0 +1,514 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from npu_bridge.npu_init import * +import numpy as np +import random +import warnings +import cv2 +from tensorflow import keras + +from utils.anchors import anchors_for_shape, anchor_targets_bbox, AnchorParameters + + +class Generator(keras.utils.Sequence): + """ + Abstract generator class. + """ + + def __init__( + self, + phi=0, + image_sizes=(512, 640, 768, 896, 1024, 1280, 1408), + misc_effect=None, + visual_effect=None, + batch_size=1, + group_method='random', # one of 'none', 'random', 'ratio' + shuffle_groups=True, + detect_text=False, + detect_quadrangle=False, + ): + """ + Initialize Generator object. + + Args: + batch_size: The size of the batches to generate. + group_method: Determines how images are grouped together (defaults to 'ratio', one of ('none', 'random', 'ratio')). + shuffle_groups: If True, shuffles the groups each epoch. + image_sizes: + """ + self.misc_effect = misc_effect + self.visual_effect = visual_effect + self.batch_size = int(batch_size) + self.group_method = group_method + self.shuffle_groups = shuffle_groups + self.detect_text = detect_text + self.detect_quadrangle = detect_quadrangle + self.image_size = image_sizes[phi] + self.groups = None + self.anchor_parameters = AnchorParameters.default if not self.detect_text else AnchorParameters( + ratios=(0.25, 0.5, 1., 2.), + sizes=(16, 32, 64, 128, 256)) + self.anchors = anchors_for_shape((self.image_size, self.image_size), anchor_params=self.anchor_parameters) + self.num_anchors = self.anchor_parameters.num_anchors() + + # Define groups + self.group_images() + + # Shuffle when initializing + if self.shuffle_groups: + random.shuffle(self.groups) + + def on_epoch_end(self): + if self.shuffle_groups: + random.shuffle(self.groups) + + def size(self): + """ + Size of the dataset. + """ + raise NotImplementedError('size method not implemented') + + def get_anchors(self): + """ + loads the anchors from a txt file + """ + with open(self.anchors_path) as f: + anchors = f.readline() + anchors = [float(x) for x in anchors.split(',')] + # (N, 2), wh + return np.array(anchors).reshape(-1, 2) + + def num_classes(self): + """ + Number of classes in the dataset. + """ + raise NotImplementedError('num_classes method not implemented') + + def has_label(self, label): + """ + Returns True if label is a known label. + """ + raise NotImplementedError('has_label method not implemented') + + def has_name(self, name): + """ + Returns True if name is a known class. + """ + raise NotImplementedError('has_name method not implemented') + + def name_to_label(self, name): + """ + Map name to label. + """ + raise NotImplementedError('name_to_label method not implemented') + + def label_to_name(self, label): + """ + Map label to name. + """ + raise NotImplementedError('label_to_name method not implemented') + + def image_aspect_ratio(self, image_index): + """ + Compute the aspect ratio for an image with image_index. + """ + raise NotImplementedError('image_aspect_ratio method not implemented') + + def load_image(self, image_index): + """ + Load an image at the image_index. + """ + raise NotImplementedError('load_image method not implemented') + + def load_annotations(self, image_index): + """ + Load annotations for an image_index. + """ + raise NotImplementedError('load_annotations method not implemented') + + def load_annotations_group(self, group): + """ + Load annotations for all images in group. + """ + annotations_group = [self.load_annotations(image_index) for image_index in group] + for annotations in annotations_group: + assert (isinstance(annotations, + dict)), '\'load_annotations\' should return a list of dictionaries, received: {}'.format( + type(annotations)) + assert ( + 'labels' in annotations), '\'load_annotations\' should return a list of dictionaries that contain \'labels\' and \'bboxes\'.' + assert ( + 'bboxes' in annotations), '\'load_annotations\' should return a list of dictionaries that contain \'labels\' and \'bboxes\'.' + + return annotations_group + + def filter_annotations(self, image_group, annotations_group, group): + """ + Filter annotations by removing those that are outside of the image bounds or whose width/height < 0. + """ + # test all annotations + for index, (image, annotations) in enumerate(zip(image_group, annotations_group)): + # test x2 < x1 | y2 < y1 | x1 < 0 | y1 < 0 | x2 <= 0 | y2 <= 0 | x2 >= image.shape[1] | y2 >= image.shape[0] + invalid_indices = np.where( + (annotations['bboxes'][:, 2] <= annotations['bboxes'][:, 0]) | + (annotations['bboxes'][:, 3] <= annotations['bboxes'][:, 1]) | + (annotations['bboxes'][:, 0] < 0) | + (annotations['bboxes'][:, 1] < 0) | + (annotations['bboxes'][:, 2] <= 0) | + (annotations['bboxes'][:, 3] <= 0) | + (annotations['bboxes'][:, 2] > image.shape[1]) | + (annotations['bboxes'][:, 3] > image.shape[0]) + )[0] + + # delete invalid indices + if len(invalid_indices): + warnings.warn('Image with id {} (shape {}) contains the following invalid boxes: {}.'.format( + group[index], + image.shape, + annotations['bboxes'][invalid_indices, :] + )) + for k in annotations_group[index].keys(): + annotations_group[index][k] = np.delete(annotations[k], invalid_indices, axis=0) + # if annotations['bboxes'].shape[0] == 0: + # warnings.warn('Image with id {} (shape {}) contains no valid boxes before transform'.format( + # group[index], + # image.shape, + # )) + return image_group, annotations_group + + def clip_transformed_annotations(self, image_group, annotations_group, group): + """ + Filter annotations by removing those that are outside of the image bounds or whose width/height < 0. + """ + # test all annotations + filtered_image_group = [] + filtered_annotations_group = [] + for index, (image, annotations) in enumerate(zip(image_group, annotations_group)): + image_height = image.shape[0] + image_width = image.shape[1] + # x1 + annotations['bboxes'][:, 0] = np.clip(annotations['bboxes'][:, 0], 0, image_width - 2) + # y1 + annotations['bboxes'][:, 1] = np.clip(annotations['bboxes'][:, 1], 0, image_height - 2) + # x2 + annotations['bboxes'][:, 2] = np.clip(annotations['bboxes'][:, 2], 1, image_width - 1) + # y2 + annotations['bboxes'][:, 3] = np.clip(annotations['bboxes'][:, 3], 1, image_height - 1) + # test x2 < x1 | y2 < y1 | x1 < 0 | y1 < 0 | x2 <= 0 | y2 <= 0 | x2 >= image.shape[1] | y2 >= image.shape[0] + small_indices = np.where( + (annotations['bboxes'][:, 2] - annotations['bboxes'][:, 0] < 3) | + (annotations['bboxes'][:, 3] - annotations['bboxes'][:, 1] < 3) + )[0] + + # delete invalid indices + if len(small_indices): + for k in annotations_group[index].keys(): + annotations_group[index][k] = np.delete(annotations[k], small_indices, axis=0) + # import cv2 + # for invalid_index in small_indices: + # x1, y1, x2, y2 = annotations['bboxes'][invalid_index] + # label = annotations['labels'][invalid_index] + # class_name = self.labels[label] + # print('width: {}'.format(x2 - x1)) + # print('height: {}'.format(y2 - y1)) + # cv2.rectangle(image, (int(round(x1)), int(round(y1))), (int(round(x2)), int(round(y2))), (0, 255, 0), 2) + # cv2.putText(image, class_name, (int(round(x1)), int(round(y1))), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 255), 1) + # cv2.namedWindow('image', cv2.WINDOW_NORMAL) + # cv2.imshow('image', image) + # cv2.waitKey(0) + filtered_image_group.append(image) + filtered_annotations_group.append(annotations_group[index]) + + return filtered_image_group, filtered_annotations_group + + def load_image_group(self, group): + """ + Load images for all images in a group. + """ + return [self.load_image(image_index) for image_index in group] + + def random_visual_effect_group_entry(self, image, annotations): + """ + Randomly transforms image and annotation. + """ + # apply visual effect + image = self.visual_effect(image) + return image, annotations + + def random_visual_effect_group(self, image_group, annotations_group): + """ + Randomly apply visual effect on each image. + """ + assert (len(image_group) == len(annotations_group)) + + if self.visual_effect is None: + # do nothing + return image_group, annotations_group + + for index in range(len(image_group)): + # apply effect on a single group entry + image_group[index], annotations_group[index] = self.random_visual_effect_group_entry( + image_group[index], annotations_group[index] + ) + + return image_group, annotations_group + + def random_misc_group_entry(self, image, annotations): + """ + Randomly transforms image and annotation. + """ + # randomly transform both image and annotations + image, annotations = self.misc_effect(image, annotations) + return image, annotations + + def random_misc_group(self, image_group, annotations_group): + """ + Randomly transforms each image and its annotations. + """ + + assert (len(image_group) == len(annotations_group)) + + if self.misc_effect is None: + return image_group, annotations_group + + for index in range(len(image_group)): + # transform a single group entry + image_group[index], annotations_group[index] = self.random_misc_group_entry(image_group[index], + annotations_group[index]) + + return image_group, annotations_group + + def preprocess_group_entry(self, image, annotations): + """ + Preprocess image and its annotations. + """ + + # preprocess the image + image, scale = self.preprocess_image(image) + + # apply resizing to annotations too + annotations['bboxes'] *= scale + if self.detect_quadrangle: + annotations['quadrangles'] *= scale + return image, annotations + + def preprocess_group(self, image_group, annotations_group): + """ + Preprocess each image and its annotations in its group. + """ + assert (len(image_group) == len(annotations_group)) + + for index in range(len(image_group)): + # preprocess a single group entry + image_group[index], annotations_group[index] = self.preprocess_group_entry(image_group[index], + annotations_group[index]) + + return image_group, annotations_group + + def group_images(self): + """ + Order the images according to self.order and makes groups of self.batch_size. + """ + # determine the order of the images + + order = list(range(self.size())) + if self.group_method == 'random': + random.shuffle(order) + elif self.group_method == 'ratio': + order.sort(key=lambda x: self.image_aspect_ratio(x)) + + # divide into groups, one group = one batch + self.groups = [[order[x % len(order)] for x in range(i, i + self.batch_size)] for i in + range(0, len(order), self.batch_size)] + + def compute_inputs(self, image_group, annotations_group): + """ + Compute inputs for the network using an image_group. + """ + batch_images = np.array(image_group).astype(np.float32) + return [batch_images] + + def compute_alphas_and_ratios(self, annotations_group): + for i, annotations in enumerate(annotations_group): + quadrangles = annotations['quadrangles'] + alphas = np.zeros((quadrangles.shape[0], 4), dtype=np.float32) + xmin = np.min(quadrangles, axis=1)[:, 0] + ymin = np.min(quadrangles, axis=1)[:, 1] + xmax = np.max(quadrangles, axis=1)[:, 0] + ymax = np.max(quadrangles, axis=1)[:, 1] + # alpha1, alpha2, alpha3, alpha4 + alphas[:, 0] = (quadrangles[:, 0, 0] - xmin) / (xmax - xmin) + alphas[:, 1] = (quadrangles[:, 1, 1] - ymin) / (ymax - ymin) + alphas[:, 2] = (xmax - quadrangles[:, 2, 0]) / (xmax - xmin) + alphas[:, 3] = (ymax - quadrangles[:, 3, 1]) / (ymax - ymin) + annotations['alphas'] = alphas + # ratio + area1 = 0.5 * alphas[:, 0] * (1 - alphas[:, 3]) + area2 = 0.5 * alphas[:, 1] * (1 - alphas[:, 0]) + area3 = 0.5 * alphas[:, 2] * (1 - alphas[:, 1]) + area4 = 0.5 * alphas[:, 3] * (1 - alphas[:, 2]) + annotations['ratios'] = 1 - area1 - area2 - area3 - area4 + + def compute_targets(self, image_group, annotations_group): + """ + Compute target outputs for the network using images and their annotations. + """ + """ + Compute target outputs for the network using images and their annotations. + """ + + batches_targets = anchor_targets_bbox( + self.anchors, + image_group, + annotations_group, + num_classes=self.num_classes(), + detect_quadrangle=self.detect_quadrangle + ) + return list(batches_targets) + + def compute_inputs_targets(self, group, debug=False): + """ + Compute inputs and target outputs for the network. + """ + + # load images and annotations + # list + image_group = self.load_image_group(group) + annotations_group = self.load_annotations_group(group) + + # check validity of annotations + image_group, annotations_group = self.filter_annotations(image_group, annotations_group, group) + + # randomly apply visual effect + image_group, annotations_group = self.random_visual_effect_group(image_group, annotations_group) + + # randomly transform data + # image_group, annotations_group = self.random_transform_group(image_group, annotations_group) + + # randomly apply misc effect + image_group, annotations_group = self.random_misc_group(image_group, annotations_group) + + # perform preprocessing steps + image_group, annotations_group = self.preprocess_group(image_group, annotations_group) + + # check validity of annotations + image_group, annotations_group = self.clip_transformed_annotations(image_group, annotations_group, group) + + assert len(image_group) != 0 + assert len(image_group) == len(annotations_group) + + if self.detect_quadrangle: + # compute alphas and ratio for targets + self.compute_alphas_and_ratios(annotations_group) + + # compute network inputs + inputs = self.compute_inputs(image_group, annotations_group) + + # compute network targets + targets = self.compute_targets(image_group, annotations_group) + + if debug: + return inputs, targets, annotations_group + + return inputs, targets + + def __len__(self): + """ + Number of batches for generator. + """ + + return len(self.groups) + + def __getitem__(self, index): + """ + Keras sequence method for generating batches. + """ + group = self.groups[index] + inputs, targets = self.compute_inputs_targets(group) + return inputs, targets + + def preprocess_image(self, image): + # image, RGB + image_height, image_width = image.shape[:2] + if image_height > image_width: + scale = self.image_size / image_height + resized_height = self.image_size + resized_width = int(image_width * scale) + else: + scale = self.image_size / image_width + resized_height = int(image_height * scale) + resized_width = self.image_size + + image = cv2.resize(image, (resized_width, resized_height)) + image = image.astype(np.float32) + image /= 255. + mean = [0.485, 0.456, 0.406] + std = [0.229, 0.224, 0.225] + image -= mean + image /= std + pad_h = self.image_size - resized_height + pad_w = self.image_size - resized_width + image = np.pad(image, [(0, pad_h), (0, pad_w), (0, 0)], mode='constant') + return image, scale + + def get_augmented_data(self, group): + """ + Compute inputs and target outputs for the network. + """ + + # load images and annotations + # list + image_group = self.load_image_group(group) + annotations_group = self.load_annotations_group(group) + + # check validity of annotations + image_group, annotations_group = self.filter_annotations(image_group, annotations_group, group) + + # randomly apply visual effect + # image_group, annotations_group = self.random_visual_effect_group(image_group, annotations_group) + + # randomly transform data + # image_group, annotations_group = self.random_transform_group(image_group, annotations_group) + + # randomly apply misc effect + # image_group, annotations_group = self.random_misc_group(image_group, annotations_group) + + # perform preprocessing steps + image_group, annotations_group = self.preprocess_group(image_group, annotations_group) + + # check validity of annotations + image_group, annotations_group = self.clip_transformed_annotations(image_group, annotations_group, group) + + assert len(image_group) != 0 + assert len(image_group) == len(annotations_group) + + # compute alphas for targets + self.compute_alphas_and_ratios(annotations_group) + + return image_group, annotations_group + diff --git a/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /generators/csv_.py b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /generators/csv_.py new file mode 100644 index 0000000000000000000000000000000000000000..65eb2cd67e668a7d43d2e87901c6685607d1bff9 --- /dev/null +++ b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /generators/csv_.py @@ -0,0 +1,369 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from npu_bridge.npu_init import * + +from generators.common import Generator +import cv2 +import numpy as np +from PIL import Image +from six import raise_from +import csv +import sys +import os.path as osp +from collections import OrderedDict + + +def _parse(value, function, fmt): + """ + Parse a string into a value, and format a nice ValueError if it fails. + + Returns `function(value)`. + Any `ValueError` raised is catched and a new `ValueError` is raised + with message `fmt.format(e)`, where `e` is the caught `ValueError`. + """ + try: + return function(value) + except ValueError as e: + raise_from(ValueError(fmt.format(e)), None) + + +def _read_classes(csv_reader): + """ + Parse the classes file given by csv_reader. + """ + result = OrderedDict() + for line, row in enumerate(csv_reader): + line += 1 + + try: + class_name, class_id = row + except ValueError: + raise_from(ValueError('line {}: format should be \'class_name,class_id\''.format(line)), None) + class_id = _parse(class_id, int, 'line {}: malformed class ID: {{}}'.format(line)) + + if class_name in result: + raise ValueError('line {}: duplicate class name: \'{}\''.format(line, class_name)) + result[class_name] = class_id + return result + + +def _read_quadrangle_annotations(csv_reader, classes, detect_text=False): + """ + Read annotations from the csv_reader. + Args: + csv_reader: csv reader of args.annotations_path + classes: list[str] all the class names read from args.classes_path + + Returns: + result: dict, dict is like {image_path: [{'x1': x1, 'y1': y1, 'x2': x2, 'y2': y2, + 'x3': x3, 'y3': y3, 'x4': x4, 'y4': y4, 'class': class_name}]} + + """ + result = OrderedDict() + for line, row in enumerate(csv_reader, 1): + try: + img_file, x1, y1, x2, y2, x3, y3, x4, y4, class_name = row[:10] + if img_file not in result: + result[img_file] = [] + + # If a row contains only an image path, it's an image without annotations. + if (x1, y1, x2, y2, x3, y3, x4, y4, class_name) == ('', '', '', '', '', '', '', '', ''): + continue + + x1 = _parse(x1, int, 'line {}: malformed x1: {{}}'.format(line)) + y1 = _parse(y1, int, 'line {}: malformed y1: {{}}'.format(line)) + x2 = _parse(x2, int, 'line {}: malformed x2: {{}}'.format(line)) + y2 = _parse(y2, int, 'line {}: malformed y2: {{}}'.format(line)) + x3 = _parse(x3, int, 'line {}: malformed x3: {{}}'.format(line)) + y3 = _parse(y3, int, 'line {}: malformed y3: {{}}'.format(line)) + x4 = _parse(x4, int, 'line {}: malformed x4: {{}}'.format(line)) + y4 = _parse(y4, int, 'line {}: malformed y4: {{}}'.format(line)) + + # check if the current class name is correctly present + if detect_text: + if class_name == '###': + continue + else: + class_name = 'text' + + if class_name not in classes: + raise ValueError(f'line {line}: unknown class name: \'{class_name}\' (classes: {classes})') + + result[img_file].append({'x1': x1, 'y1': y1, 'x2': x2, 'y2': y2, + 'x3': x3, 'y3': y3, 'x4': x4, 'y4': y4, 'class': class_name}) + except ValueError: + raise_from(ValueError( + f'line {line}: format should be \'img_file,x1,y1,x2,y2,x3,y3,x4,y4,class_name\' or \'img_file,,,,,\''), + None) + + return result + + +def _read_annotations(csv_reader, classes): + """ + Read annotations from the csv_reader. + Args: + csv_reader: csv reader of args.annotations_path + classes: list[str] all the class names read from args.classes_path + + Returns: + result: dict, dict is like {image_path: [{'x1': x1, 'y1': y1, 'x2': x2, 'y2': y2, 'class': class_name}]} + + """ + result = OrderedDict() + for line, row in enumerate(csv_reader, 1): + try: + img_file, x1, y1, x2, y2, class_name = row[:10] + if img_file not in result: + result[img_file] = [] + + # If a row contains only an image path, it's an image without annotations. + if (x1, y1, x2, y2, class_name) == ('', '', '', '', ''): + continue + + x1 = _parse(x1, int, 'line {}: malformed x1: {{}}'.format(line)) + y1 = _parse(y1, int, 'line {}: malformed y1: {{}}'.format(line)) + x2 = _parse(x2, int, 'line {}: malformed x2: {{}}'.format(line)) + y2 = _parse(y2, int, 'line {}: malformed y2: {{}}'.format(line)) + + if class_name not in classes: + raise ValueError(f'line {line}: unknown class name: \'{class_name}\' (classes: {classes})') + + result[img_file].append({'x1': x1, 'y1': y1, 'x2': x2, 'y2': y2, 'class': class_name}) + except ValueError: + raise_from(ValueError( + f'line {line}: format should be \'img_file,x1,y1,x2,y2,class_name\' or \'img_file,,,,,\''), + None) + + return result + + +def _open_for_csv(path): + """ + Open a file with flags suitable for csv.reader. + + This is different for python2 it means with mode 'rb', for python3 this means 'r' with "universal newlines". + """ + if sys.version_info[0] < 3: + return open(path, 'rb') + else: + return open(path, 'r', newline='') + + +class CSVGenerator(Generator): + """ + Generate data for a custom CSV dataset. + + See https://github.com/fizyr/keras-retinanet#csv-datasets for more information. + """ + + def __init__( + self, + csv_data_file, + csv_class_file, + base_dir=None, + detect_quadrangle=False, + detect_text=False, + **kwargs + ): + """ + Initialize a CSV data generator. + + Args + csv_data_file: Path to the CSV annotations file. + csv_class_file: Path to the CSV classes file. + detect_text: if do text detection + base_dir: Directory w.r.t. where the files are to be searched (defaults to the directory containing the csv_data_file). + """ + self.image_names = [] + self.image_data = {} + self.base_dir = base_dir + self.detect_quadrangle = detect_quadrangle + self.detect_text = detect_text + + # Take base_dir from annotations file if not explicitly specified. + if self.base_dir is None: + if osp.exists(csv_data_file): + self.base_dir = '' + else: + self.base_dir = osp.dirname(csv_data_file) + + # parse the provided class file + try: + with _open_for_csv(csv_class_file) as file: + # class_name --> class_id + self.classes = _read_classes(csv.reader(file, delimiter=',')) + except ValueError as e: + raise_from(ValueError('invalid CSV class file: {}: {}'.format(csv_class_file, e)), None) + + self.labels = {} + # class_id --> class_name + for key, value in self.classes.items(): + self.labels[value] = key + + # csv with img_path, x1, y1, x2, y2, x3, y3, x4, y4, class_name + try: + with _open_for_csv(csv_data_file) as file: + # {'img_path1':[{'x1':xx,'y1':xx,'x2':xx,'y2':xx,'x3':xx,'y3':xx,'x4':xx,'y4':xx, 'class':xx}...],...} + if self.detect_quadrangle: + self.image_data = _read_quadrangle_annotations(csv.reader(file, delimiter=','), self.classes, + self.detect_text) + else: + self.image_data = _read_annotations(csv.reader(file, delimiter=','), self.classes) + except ValueError as e: + raise_from(ValueError('invalid CSV annotations file: {}: {}'.format(csv_data_file, e)), None) + self.image_names = list(self.image_data.keys()) + + super(CSVGenerator, self).__init__(detect_text=detect_text, detect_quadrangle=detect_quadrangle, **kwargs) + + def size(self): + """ + Size of the dataset. + """ + return len(self.image_names) + + def num_classes(self): + """ + Number of classes in the dataset. + """ + return max(self.classes.values()) + 1 + + def has_label(self, label): + """ + Return True if label is a known label. + """ + return label in self.labels + + def has_name(self, name): + """ + Returns True if name is a known class. + """ + return name in self.classes + + def name_to_label(self, name): + """ + Map name to label. + """ + return self.classes[name] + + def label_to_name(self, label): + """ + Map label to name. + """ + return self.labels[label] + + def image_path(self, image_index): + """ + Returns the image path for image_index. + """ + return osp.join(self.base_dir, self.image_names[image_index]) + + def image_aspect_ratio(self, image_index): + """ + Compute the aspect ratio for an image with image_index. + """ + # PIL is fast for metadata + image = Image.open(self.image_path(image_index)) + return float(image.width) / float(image.height) + + def load_image(self, image_index): + """ + Load an image at the image_index. + """ + image = cv2.imread(self.image_path(image_index)) + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + return image + + def load_annotations(self, image_index): + """ + Load annotations for an image_index. + """ + path = self.image_names[image_index] + annotations = {'labels': np.empty((0,), dtype=np.int32), + 'bboxes': np.empty((0, 4), dtype=np.float32), + 'quadrangles': np.empty((0, 4, 2), dtype=np.float32), + } + + for idx, annot in enumerate(self.image_data[path]): + annotations['labels'] = np.concatenate((annotations['labels'], [self.name_to_label(annot['class'])])) + if self.detect_quadrangle: + quadrangle = np.array([[float(annot['x1']), float(annot['y1'])], + [float(annot['x2']), float(annot['y2'])], + [float(annot['x3']), float(annot['y3'])], + [float(annot['x4']), float(annot['y4'])]]) + ordered_quadrangle = self.reorder_vertexes(quadrangle) + annotations['quadrangles'] = np.concatenate((annotations['quadrangles'], ordered_quadrangle[None])) + annotations['bboxes'] = np.concatenate((annotations['bboxes'], [[ + float(min(annot['x1'], annot['x2'], annot['x3'], annot['x4'])), + float(min(annot['y1'], annot['y2'], annot['y3'], annot['y4'])), + float(max(annot['x1'], annot['x2'], annot['x3'], annot['x4'])), + float(max(annot['y1'], annot['y2'], annot['y3'], annot['y4'])), + ]])) + else: + annotations['bboxes'] = np.concatenate((annotations['bboxes'], [[ + float(annot['x1']), + float(annot['y1']), + float(annot['x2']), + float(annot['y2']), + ]])) + return annotations + + def reorder_vertexes(self, vertexes): + """ + reorder vertexes as the paper shows, (top, right, bottom, left) + Args: + vertexes: + + Returns: + + """ + assert vertexes.shape == (4, 2) + xmin, ymin = np.min(vertexes, axis=0) + xmax, ymax = np.max(vertexes, axis=0) + + # determine the first point with the smallest y, + # if two vertexes has same y, choose that with smaller x, + ordered_idxes = np.argsort(vertexes, axis=0) + ymin1_idx = ordered_idxes[0, 1] + ymin2_idx = ordered_idxes[1, 1] + if vertexes[ymin1_idx, 1] == vertexes[ymin2_idx, 1]: + if vertexes[ymin1_idx, 0] <= vertexes[ymin2_idx, 0]: + first_vertex_idx = ymin1_idx + else: + first_vertex_idx = ymin2_idx + else: + first_vertex_idx = ymin1_idx + ordered_idxes = [(first_vertex_idx + i) % 4 for i in range(4)] + ordered_vertexes = vertexes[ordered_idxes] + # drag the point to the corresponding edge + ordered_vertexes[0, 1] = ymin + ordered_vertexes[1, 0] = xmax + ordered_vertexes[2, 1] = ymax + ordered_vertexes[3, 0] = xmin + return ordered_vertexes + diff --git a/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /generators/pascal.py b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /generators/pascal.py new file mode 100644 index 0000000000000000000000000000000000000000..ab94140b9c968ae8c85314b80bea7f194477aba6 --- /dev/null +++ b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /generators/pascal.py @@ -0,0 +1,297 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from npu_bridge.npu_init import * + +from generators.common import Generator +import os +os.system("pwd") +os.system("ls") +import numpy as np +from six import raise_from +import cv2 +import xml.etree.ElementTree as ET + +voc_classes = { + 'aeroplane': 0, + 'bicycle': 1, + 'bird': 2, + 'boat': 3, + 'bottle': 4, + 'bus': 5, + 'car': 6, + 'cat': 7, + 'chair': 8, + 'cow': 9, + 'diningtable': 10, + 'dog': 11, + 'horse': 12, + 'motorbike': 13, + 'person': 14, + 'pottedplant': 15, + 'sheep': 16, + 'sofa': 17, + 'train': 18, + 'tvmonitor': 19 +} + + +def _findNode(parent, name, debug_name=None, parse=None): + if debug_name is None: + debug_name = name + + result = parent.find(name) + if result is None: + raise ValueError('missing element \'{}\''.format(debug_name)) + if parse is not None: + try: + return parse(result.text) + except ValueError as e: + raise_from(ValueError('illegal value for \'{}\': {}'.format(debug_name, e)), None) + return result + + +class PascalVocGenerator(Generator): + """ + Generate data for a Pascal VOC dataset. + + See http://host.robots.ox.ac.uk/pascal/VOC/ for more information. + """ + + def __init__( + self, + data_dir, + set_name, + classes=voc_classes, + image_extension='.jpg', + skip_truncated=False, + skip_difficult=False, + **kwargs + ): + """ + Initialize a Pascal VOC data generator. + + Args: + data_dir: the path of directory which contains ImageSets directory + set_name: test|trainval|train|val + classes: class names tos id mapping + image_extension: image filename ext + skip_truncated: + skip_difficult: + **kwargs: + """ + self.data_dir = data_dir + self.set_name = set_name + self.classes = classes + + self.image_names = [l.strip().split(None, 1)[0] for l in + open(os.path.join(data_dir, 'ImageSets', 'Main', set_name + '.txt')).readlines()] + self.image_extension = image_extension + self.skip_truncated = skip_truncated + self.skip_difficult = skip_difficult + # class ids to names mapping + self.labels = {} + for key, value in self.classes.items(): + self.labels[value] = key + + super(PascalVocGenerator, self).__init__(**kwargs) + + def size(self): + """ + Size of the dataset. + """ + return len(self.image_names) + + def num_classes(self): + """ + Number of classes in the dataset. + """ + return len(self.classes) + + def has_label(self, label): + """ + Return True if label is a known label. + """ + return label in self.labels + + def has_name(self, name): + """ + Returns True if name is a known class. + """ + return name in self.classes + + def name_to_label(self, name): + """ + Map name to label. + """ + return self.classes[name] + + def label_to_name(self, label): + """ + Map label to name. + """ + return self.labels[label] + + def image_aspect_ratio(self, image_index): + """ + Compute the aspect ratio for an image with image_index. + """ + path = os.path.join(self.data_dir, 'JPEGImages', self.image_names[image_index] + self.image_extension) + image = cv2.imread(path) + h, w = image.shape[:2] + return float(w) / float(h) + + def load_image(self, image_index): + """ + Load an image at the image_index. + """ + path = os.path.join(self.data_dir, 'JPEGImages', self.image_names[image_index] + self.image_extension) + image = cv2.imread(path) + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + return image + + def __parse_annotation(self, element): + """ + Parse an annotation given an XML element. + """ + truncated = _findNode(element, 'truncated', parse=int) + difficult = _findNode(element, 'difficult', parse=int) + + class_name = _findNode(element, 'name').text + if class_name not in self.classes: + raise ValueError('class name \'{}\' not found in classes: {}'.format(class_name, list(self.classes.keys()))) + + box = np.zeros((4,)) + label = self.name_to_label(class_name) + + bndbox = _findNode(element, 'bndbox') + box[0] = _findNode(bndbox, 'xmin', 'bndbox.xmin', parse=float) - 1 + box[1] = _findNode(bndbox, 'ymin', 'bndbox.ymin', parse=float) - 1 + box[2] = _findNode(bndbox, 'xmax', 'bndbox.xmax', parse=float) - 1 + box[3] = _findNode(bndbox, 'ymax', 'bndbox.ymax', parse=float) - 1 + + return truncated, difficult, box, label + + def __parse_annotations(self, xml_root): + """ + Parse all annotations under the xml_root. + """ + annotations = {'labels': np.empty((0,), dtype=np.int32), + 'bboxes': np.empty((0, 4))} + for i, element in enumerate(xml_root.iter('object')): + try: + truncated, difficult, box, label = self.__parse_annotation(element) + except ValueError as e: + raise_from(ValueError('could not parse object #{}: {}'.format(i, e)), None) + + if truncated and self.skip_truncated: + continue + if difficult and self.skip_difficult: + continue + + annotations['bboxes'] = np.concatenate([annotations['bboxes'], [box]]) + annotations['labels'] = np.concatenate([annotations['labels'], [label]]) + + return annotations + + def load_annotations(self, image_index): + """ + Load annotations for an image_index. + """ + filename = self.image_names[image_index] + '.xml' + try: + tree = ET.parse(os.path.join(self.data_dir, 'Annotations', filename)) + return self.__parse_annotations(tree.getroot()) + except ET.ParseError as e: + raise_from(ValueError('invalid annotations file: {}: {}'.format(filename, e)), None) + except ValueError as e: + raise_from(ValueError('invalid annotations file: {}: {}'.format(filename, e)), None) + + +if __name__ == '__main__': + train_generator = PascalVocGenerator( + '/cache/VOCdevkit/VOC2012', + 'train', + phi=0, + skip_difficult=True, + batch_size=1, + misc_effect=None, + visual_effect=None, + ) + mean = [0.485, 0.456, 0.406] + std = [0.229, 0.224, 0.225] + anchors = train_generator.anchors + for batch_inputs, batch_targets in train_generator: + image = batch_inputs[0][0] + image[..., 0] *= std[0] + image[..., 1] *= std[1] + image[..., 2] *= std[2] + image[..., 0] += mean[0] + image[..., 1] += mean[1] + image[..., 2] += mean[2] + image *= 255. + + regression = batch_targets[0][0] + valid_ids = np.where(regression[:, -1] == 1)[0] + boxes = anchors[valid_ids] + deltas = regression[valid_ids] + class_ids = np.argmax(batch_targets[1][0][valid_ids], axis=-1) + mean_ = [0, 0, 0, 0] + std_ = [0.2, 0.2, 0.2, 0.2] + + width = boxes[:, 2] - boxes[:, 0] + height = boxes[:, 3] - boxes[:, 1] + + x1 = boxes[:, 0] + (deltas[:, 0] * std_[0] + mean_[0]) * width + y1 = boxes[:, 1] + (deltas[:, 1] * std_[1] + mean_[1]) * height + x2 = boxes[:, 2] + (deltas[:, 2] * std_[2] + mean_[2]) * width + y2 = boxes[:, 3] + (deltas[:, 3] * std_[3] + mean_[3]) * height + for x1_, y1_, x2_, y2_, class_id in zip(x1, y1, x2, y2, class_ids): + x1_, y1_, x2_, y2_ = int(x1_), int(y1_), int(x2_), int(y2_) + cv2.rectangle(image, (x1_, y1_), (x2_, y2_), (0, 255, 0), 2) + class_name = train_generator.labels[class_id] + label = class_name + ret, baseline = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.3, 1) + cv2.rectangle(image, (x1_, y2_ - ret[1] - baseline), (x1_ + ret[0], y2_), (255, 255, 255), -1) + cv2.putText(image, label, (x1_, y2_ - baseline), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1) + cv2.imshow('image', image.astype(np.uint8)[..., ::-1]) + cv2.waitKey(0) + # 36864, 46080, 48384, 48960, 49104 + # if first_valid_id < 36864: + # stride = 8 + # elif 36864 <= first_valid_id < 46080: + # stride = 16 + # elif 46080 <= first_valid_id < 48384: + # stride = 32 + # elif 48384 <= first_valid_id < 48960: + # stride = 64 + # else: + # stride = 128 + pass + + + diff --git a/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /initializers.py b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /initializers.py new file mode 100644 index 0000000000000000000000000000000000000000..87b54577a31457c906eea922615f4ff2fa19ef13 --- /dev/null +++ b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /initializers.py @@ -0,0 +1,54 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from npu_bridge.npu_init import * + +# import keras +from tensorflow import keras + +import numpy as np +import math + + +class PriorProbability(keras.initializers.Initializer): + """ Apply a prior probability to the weights. + """ + + def __init__(self, probability=0.01): + self.probability = probability + + def get_config(self): + return { + 'probability': self.probability + } + + def __call__(self, shape, dtype=None): + # set bias to -log((1 - p)/p) for foreground + result = np.ones(shape, dtype=np.float32) * -math.log((1 - self.probability) / self.probability) + + return result + diff --git a/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /layers.py b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /layers.py new file mode 100644 index 0000000000000000000000000000000000000000..02b400db22e451f86a83959a36ba299fe5408110 --- /dev/null +++ b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /layers.py @@ -0,0 +1,407 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from npu_bridge.npu_init import * +from tensorflow import keras +import tensorflow as tf + + +class BatchNormalization(keras.layers.BatchNormalization): + """ + Identical to keras.layers.BatchNormalization, but adds the option to freeze parameters. + """ + + def __init__(self, freeze, *args, **kwargs): + self.freeze = freeze + super(BatchNormalization, self).__init__(*args, **kwargs) + + # set to non-trainable if freeze is true + self.trainable = not self.freeze + + def call(self, inputs, training=None, **kwargs): + # return super.call, but set training + if not training: + return super(BatchNormalization, self).call(inputs, training=False) + else: + return super(BatchNormalization, self).call(inputs, training=(not self.freeze)) + + def get_config(self): + config = super(BatchNormalization, self).get_config() + config.update({'freeze': self.freeze}) + return config + + +class wBiFPNAdd(keras.layers.Layer): + def __init__(self, epsilon=1e-4, **kwargs): + super(wBiFPNAdd, self).__init__(**kwargs) + self.epsilon = epsilon + + def build(self, input_shape): + num_in = len(input_shape) + self.w = self.add_weight(name=self.name, + shape=(num_in,), + initializer=keras.initializers.constant(1 / num_in), + trainable=True, + dtype=tf.float32) + + def call(self, inputs, **kwargs): + w = keras.activations.relu(self.w) + x = tf.reduce_sum([w[i] * inputs[i] for i in range(len(inputs))], axis=0) + x = x / (tf.reduce_sum(w) + self.epsilon) + return x + + def compute_output_shape(self, input_shape): + return input_shape[0] + + def get_config(self): + config = super(wBiFPNAdd, self).get_config() + config.update({ + 'epsilon': self.epsilon + }) + return config + + +def bbox_transform_inv(boxes, deltas, scale_factors=None): + cxa = (boxes[..., 0] + boxes[..., 2]) / 2 + cya = (boxes[..., 1] + boxes[..., 3]) / 2 + wa = boxes[..., 2] - boxes[..., 0] + ha = boxes[..., 3] - boxes[..., 1] + ty, tx, th, tw = deltas[..., 0], deltas[..., 1], deltas[..., 2], deltas[..., 3] + if scale_factors: + ty *= scale_factors[0] + tx *= scale_factors[1] + th *= scale_factors[2] + tw *= scale_factors[3] + w = tf.exp(tw) * wa + h = tf.exp(th) * ha + cy = ty * ha + cya + cx = tx * wa + cxa + ymin = cy - h / 2. + xmin = cx - w / 2. + ymax = cy + h / 2. + xmax = cx + w / 2. + return tf.stack([xmin, ymin, xmax, ymax], axis=-1) + + +class ClipBoxes(keras.layers.Layer): + def call(self, inputs, **kwargs): + image, boxes = inputs + shape = keras.backend.cast(keras.backend.shape(image), keras.backend.floatx()) + height = shape[1] + width = shape[2] + x1 = tf.clip_by_value(boxes[:, :, 0], 0, width - 1) + y1 = tf.clip_by_value(boxes[:, :, 1], 0, height - 1) + x2 = tf.clip_by_value(boxes[:, :, 2], 0, width - 1) + y2 = tf.clip_by_value(boxes[:, :, 3], 0, height - 1) + + return keras.backend.stack([x1, y1, x2, y2], axis=2) + + def compute_output_shape(self, input_shape): + return input_shape[1] + + +class RegressBoxes(keras.layers.Layer): + def __init__(self, *args, **kwargs): + super(RegressBoxes, self).__init__(*args, **kwargs) + + def call(self, inputs, **kwargs): + anchors, regression = inputs + return bbox_transform_inv(anchors, regression) + + def compute_output_shape(self, input_shape): + return input_shape[0] + + def get_config(self): + config = super(RegressBoxes, self).get_config() + return config + + +def filter_detections( + boxes, + classification, + alphas=None, + ratios=None, + class_specific_filter=True, + nms=True, + score_threshold=0.01, + max_detections=100, + nms_threshold=0.5, + detect_quadrangle=False, +): + """ + Filter detections using the boxes and classification values. + + Args + boxes: Tensor of shape (num_boxes, 4) containing the boxes in (x1, y1, x2, y2) format. + classification: Tensor of shape (num_boxes, num_classes) containing the classification scores. + other: List of tensors of shape (num_boxes, ...) to filter along with the boxes and classification scores. + class_specific_filter: Whether to perform filtering per class, or take the best scoring class and filter those. + nms: Flag to enable/disable non maximum suppression. + score_threshold: Threshold used to prefilter the boxes with. + max_detections: Maximum number of detections to keep. + nms_threshold: Threshold for the IoU value to determine when a box should be suppressed. + + Returns + A list of [boxes, scores, labels, other[0], other[1], ...]. + boxes is shaped (max_detections, 4) and contains the (x1, y1, x2, y2) of the non-suppressed boxes. + scores is shaped (max_detections,) and contains the scores of the predicted class. + labels is shaped (max_detections,) and contains the predicted label. + other[i] is shaped (max_detections, ...) and contains the filtered other[i] data. + In case there are less than max_detections detections, the tensors are padded with -1's. + """ + + def _filter_detections(scores_, labels_): + # threshold based on score + # (num_score_keeps, 1) + indices_ = tf.where(keras.backend.greater(scores_, score_threshold)) + + if nms: + # (num_score_keeps, 4) + filtered_boxes = tf.gather_nd(boxes, indices_) + # In [4]: scores = np.array([0.1, 0.5, 0.4, 0.2, 0.7, 0.2]) + # In [5]: tf.greater(scores, 0.4) + # Out[5]: + # In [6]: tf.where(tf.greater(scores, 0.4)) + # Out[6]: + # + # + # In [7]: tf.gather(scores, tf.where(tf.greater(scores, 0.4))) + # Out[7]: + # + filtered_scores = keras.backend.gather(scores_, indices_)[:, 0] + + # perform NMS + # filtered_boxes = tf.concat([filtered_boxes[..., 1:2], filtered_boxes[..., 0:1], + # filtered_boxes[..., 3:4], filtered_boxes[..., 2:3]], axis=-1) + nms_indices = tf.image.non_max_suppression(filtered_boxes, filtered_scores, max_output_size=max_detections, + iou_threshold=nms_threshold) + + # filter indices based on NMS + # (num_score_nms_keeps, 1) + indices_ = keras.backend.gather(indices_, nms_indices) + + # add indices to list of all indices + # (num_score_nms_keeps, ) + labels_ = tf.gather_nd(labels_, indices_) + # (num_score_nms_keeps, 2) + indices_ = keras.backend.stack([indices_[:, 0], labels_], axis=1) + + return indices_ + + if class_specific_filter: + all_indices = [] + # perform per class filtering + for c in range(int(classification.shape[1])): + scores = classification[:, c] + labels = c * tf.ones((keras.backend.shape(scores)[0],), dtype='int64') + all_indices.append(_filter_detections(scores, labels)) + + # concatenate indices to single tensor + # (concatenated_num_score_nms_keeps, 2) + indices = keras.backend.concatenate(all_indices, axis=0) + else: + scores = keras.backend.max(classification, axis=1) + labels = keras.backend.argmax(classification, axis=1) + indices = _filter_detections(scores, labels) + + # select top k + scores = tf.gather_nd(classification, indices) + labels = indices[:, 1] + scores, top_indices = tf.nn.top_k(scores, k=keras.backend.minimum(max_detections, keras.backend.shape(scores)[0])) + + # filter input using the final set of indices + indices = keras.backend.gather(indices[:, 0], top_indices) + boxes = keras.backend.gather(boxes, indices) + labels = keras.backend.gather(labels, top_indices) + + # zero pad the outputs + pad_size = keras.backend.maximum(0, max_detections - keras.backend.shape(scores)[0]) + boxes = tf.pad(boxes, [[0, pad_size], [0, 0]], constant_values=-1) + scores = tf.pad(scores, [[0, pad_size]], constant_values=-1) + labels = tf.pad(labels, [[0, pad_size]], constant_values=-1) + labels = keras.backend.cast(labels, 'int32') + + # set shapes, since we know what they are + boxes.set_shape([max_detections, 4]) + scores.set_shape([max_detections]) + labels.set_shape([max_detections]) + + if detect_quadrangle: + alphas = keras.backend.gather(alphas, indices) + ratios = keras.backend.gather(ratios, indices) + alphas = tf.pad(alphas, [[0, pad_size], [0, 0]], constant_values=-1) + ratios = tf.pad(ratios, [[0, pad_size]], constant_values=-1) + alphas.set_shape([max_detections, 4]) + ratios.set_shape([max_detections]) + return [boxes, scores, alphas, ratios, labels] + else: + return [boxes, scores, labels] + + +class FilterDetections(keras.layers.Layer): + """ + Keras layer for filtering detections using score threshold and NMS. + """ + + def __init__( + self, + nms=True, + class_specific_filter=True, + nms_threshold=0.5, + score_threshold=0.01, + max_detections=100, + parallel_iterations=32, + detect_quadrangle=False, + **kwargs + ): + """ + Filters detections using score threshold, NMS and selecting the top-k detections. + + Args + nms: Flag to enable/disable NMS. + class_specific_filter: Whether to perform filtering per class, or take the best scoring class and filter those. + nms_threshold: Threshold for the IoU value to determine when a box should be suppressed. + score_threshold: Threshold used to prefilter the boxes with. + max_detections: Maximum number of detections to keep. + parallel_iterations: Number of batch items to process in parallel. + """ + self.nms = nms + self.class_specific_filter = class_specific_filter + self.nms_threshold = nms_threshold + self.score_threshold = score_threshold + self.max_detections = max_detections + self.parallel_iterations = parallel_iterations + self.detect_quadrangle = detect_quadrangle + super(FilterDetections, self).__init__(**kwargs) + + def call(self, inputs, **kwargs): + """ + Constructs the NMS graph. + + Args + inputs : List of [boxes, classification, other[0], other[1], ...] tensors. + """ + boxes = inputs[0] + classification = inputs[1] + if self.detect_quadrangle: + alphas = inputs[2] + ratios = inputs[3] + + # wrap nms with our parameters + def _filter_detections(args): + boxes_ = args[0] + classification_ = args[1] + alphas_ = args[2] if self.detect_quadrangle else None + ratios_ = args[3] if self.detect_quadrangle else None + + return filter_detections( + boxes_, + classification_, + alphas_, + ratios_, + nms=self.nms, + class_specific_filter=self.class_specific_filter, + score_threshold=self.score_threshold, + max_detections=self.max_detections, + nms_threshold=self.nms_threshold, + detect_quadrangle=self.detect_quadrangle, + ) + + # call filter_detections on each batch item + if self.detect_quadrangle: + outputs = tf.map_fn( + _filter_detections, + elems=[boxes, classification, alphas, ratios], + dtype=['float32', 'float32', 'float32', 'float32', 'int32'], + parallel_iterations=self.parallel_iterations + ) + else: + outputs = tf.map_fn( + _filter_detections, + elems=[boxes, classification], + dtype=['float32', 'float32', 'int32'], + parallel_iterations=self.parallel_iterations + ) + + return outputs + + def compute_output_shape(self, input_shape): + """ + Computes the output shapes given the input shapes. + + Args + input_shape : List of input shapes [boxes, classification]. + + Returns + List of tuples representing the output shapes: + [filtered_boxes.shape, filtered_scores.shape, filtered_labels.shape, filtered_other[0].shape, filtered_other[1].shape, ...] + """ + if self.detect_quadrangle: + return [ + (input_shape[0][0], self.max_detections, 4), + (input_shape[1][0], self.max_detections), + (input_shape[1][0], self.max_detections, 4), + (input_shape[1][0], self.max_detections), + (input_shape[1][0], self.max_detections), + ] + else: + return [ + (input_shape[0][0], self.max_detections, 4), + (input_shape[1][0], self.max_detections), + (input_shape[1][0], self.max_detections), + ] + + def compute_mask(self, inputs, mask=None): + """ + This is required in Keras when there is more than 1 output. + """ + return (len(inputs) + 1) * [None] + + def get_config(self): + """ + Gets the configuration of this layer. + + Returns + Dictionary containing the parameters of this layer. + """ + config = super(FilterDetections, self).get_config() + config.update({ + 'nms': self.nms, + 'class_specific_filter': self.class_specific_filter, + 'nms_threshold': self.nms_threshold, + 'score_threshold': self.score_threshold, + 'max_detections': self.max_detections, + 'parallel_iterations': self.parallel_iterations, + }) + + return config + diff --git a/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /losses.py b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /losses.py new file mode 100644 index 0000000000000000000000000000000000000000..70a6a9e34183139ece67157a5793101e8cef8dc3 --- /dev/null +++ b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /losses.py @@ -0,0 +1,202 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License.""" + +from npu_bridge.npu_init import * + +# import keras +from tensorflow import keras +import tensorflow as tf + + +def focal(alpha=0.25, gamma=1.5): + """ + Create a functor for computing the focal loss. + + Args + alpha: Scale the focal weight with alpha. + gamma: Take the power of the focal weight with gamma. + + Returns + A functor that computes the focal loss using the alpha and gamma. + """ + + def _focal(y_true, y_pred): + """ + Compute the focal loss given the target tensor and the predicted tensor. + + As defined in https://arxiv.org/abs/1708.02002 + + Args + y_true: Tensor of target data from the generator with shape (B, N, num_classes). + y_pred: Tensor of predicted data from the network with shape (B, N, num_classes). + + Returns + The focal loss of y_pred w.r.t. y_true. + """ + labels = y_true[:, :, :-1] + # -1 for ignore, 0 for background, 1 for object + anchor_state = y_true[:, :, -1] + classification = y_pred + + # filter out "ignore" anchors + indices = tf.where(keras.backend.not_equal(anchor_state, -1)) + labels = tf.gather_nd(labels, indices) + classification = tf.gather_nd(classification, indices) + + # compute the focal loss + alpha_factor = keras.backend.ones_like(labels) * alpha + alpha_factor = tf.where(keras.backend.equal(labels, 1), alpha_factor, 1 - alpha_factor) + # (1 - 0.99) ** 2 = 1e-4, (1 - 0.9) ** 2 = 1e-2 + focal_weight = tf.where(keras.backend.equal(labels, 1), 1 - classification, classification) + focal_weight = alpha_factor * focal_weight ** gamma + cls_loss = focal_weight * keras.backend.binary_crossentropy(labels, classification) + + # compute the normalizer: the number of positive anchors + normalizer = tf.where(keras.backend.equal(anchor_state, 1)) + normalizer = keras.backend.cast(keras.backend.shape(normalizer)[0], keras.backend.floatx()) + normalizer = keras.backend.maximum(keras.backend.cast_to_floatx(1.0), normalizer) + + return keras.backend.sum(cls_loss) / normalizer + + return _focal + + +def smooth_l1(sigma=3.0): + """ + Create a smooth L1 loss functor. + Args + sigma: This argument defines the point where the loss changes from L2 to L1. + Returns + A functor for computing the smooth L1 loss given target data and predicted data. + """ + sigma_squared = sigma ** 2 + + def _smooth_l1(y_true, y_pred): + """ Compute the smooth L1 loss of y_pred w.r.t. y_true. + Args + y_true: Tensor from the generator of shape (B, N, 5). The last value for each box is the state of the anchor (ignore, negative, positive). + y_pred: Tensor from the network of shape (B, N, 4). + Returns + The smooth L1 loss of y_pred w.r.t. y_true. + """ + # separate target and state + regression = y_pred + regression_target = y_true[:, :, :-1] + anchor_state = y_true[:, :, -1] + + # filter out "ignore" anchors + indices = tf.where(keras.backend.equal(anchor_state, 1)) + regression = tf.gather_nd(regression, indices) + regression_target = tf.gather_nd(regression_target, indices) + + # compute smooth L1 loss + # f(x) = 0.5 * (sigma * x)^2 if |x| < 1 / sigma / sigma + # |x| - 0.5 / sigma / sigma otherwise + regression_diff = regression - regression_target + regression_diff = keras.backend.abs(regression_diff) + regression_loss = tf.where( + keras.backend.less(regression_diff, 1.0 / sigma_squared), + 0.5 * sigma_squared * keras.backend.pow(regression_diff, 2), + regression_diff - 0.5 / sigma_squared + ) + + # compute the normalizer: the number of positive anchors + normalizer = keras.backend.maximum(1, keras.backend.shape(indices)[0]) + normalizer = keras.backend.cast(normalizer, dtype=keras.backend.floatx()) + return keras.backend.sum(regression_loss) / normalizer + + return _smooth_l1 + + +def smooth_l1_quad(sigma=3.0): + """ + Create a smooth L1 loss functor. + + Args + sigma: This argument defines the point where the loss changes from L2 to L1. + + Returns + A functor for computing the smooth L1 loss given target data and predicted data. + """ + sigma_squared = sigma ** 2 + + def _smooth_l1(y_true, y_pred): + """ Compute the smooth L1 loss of y_pred w.r.t. y_true. + + Args + y_true: Tensor from the generator of shape (B, N, 5). The last value for each box is the state of the anchor (ignore, negative, positive). + y_pred: Tensor from the network of shape (B, N, 4). + + Returns + The smooth L1 loss of y_pred w.r.t. y_true. + """ + # separate target and state + regression = y_pred + regression = tf.concat([regression[..., :4], tf.sigmoid(regression[..., 4:9])], axis=-1) + regression_target = y_true[:, :, :-1] + anchor_state = y_true[:, :, -1] + + # filter out "ignore" anchors + indices = tf.where(keras.backend.equal(anchor_state, 1)) + regression = tf.gather_nd(regression, indices) + regression_target = tf.gather_nd(regression_target, indices) + + # compute smooth L1 loss + # f(x) = 0.5 * (sigma * x)^2 if |x| < 1 / sigma / sigma + # |x| - 0.5 / sigma / sigma otherwise + regression_diff = regression - regression_target + regression_diff = keras.backend.abs(regression_diff) + box_regression_loss = tf.where( + keras.backend.less(regression_diff[..., :4], 1.0 / sigma_squared), + 0.5 * sigma_squared * keras.backend.pow(regression_diff[..., :4], 2), + regression_diff[..., :4] - 0.5 / sigma_squared + ) + + alpha_regression_loss = tf.where( + keras.backend.less(regression_diff[..., 4:8], 1.0 / sigma_squared), + 0.5 * sigma_squared * keras.backend.pow(regression_diff[..., 4:8], 2), + regression_diff[..., 4:8] - 0.5 / sigma_squared + ) + + ratio_regression_loss = tf.where( + keras.backend.less(regression_diff[..., 8], 1.0 / sigma_squared), + 0.5 * sigma_squared * keras.backend.pow(regression_diff[..., 8], 2), + regression_diff[..., 8] - 0.5 / sigma_squared + ) + # compute the normalizer: the number of positive anchors + normalizer = keras.backend.maximum(1, keras.backend.shape(indices)[0]) + normalizer = keras.backend.cast(normalizer, dtype=keras.backend.floatx()) + + box_regression_loss = tf.reduce_sum(box_regression_loss) / normalizer + alpha_regression_loss = tf.reduce_sum(alpha_regression_loss) / normalizer + ratio_regression_loss = tf.reduce_sum(ratio_regression_loss) / normalizer + + return box_regression_loss + alpha_regression_loss + 16 * ratio_regression_loss + + return _smooth_l1 + diff --git a/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /model.py b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /model.py new file mode 100644 index 0000000000000000000000000000000000000000..a8e69cc84333112337636cbdb1775a8692c1755e --- /dev/null +++ b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /model.py @@ -0,0 +1,505 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from npu_bridge.npu_init import * +from functools import reduce + +# from keras import layers +# from keras import initializers +# from keras import models +# from keras_ import EfficientNetB0, EfficientNetB1, EfficientNetB2 +# from keras_ import EfficientNetB3, EfficientNetB4, EfficientNetB5, EfficientNetB6 + +import tensorflow as tf +from tensorflow.keras import layers +from tensorflow.keras import initializers +from tensorflow.keras import models +from tfkeras import EfficientNetB0, EfficientNetB1, EfficientNetB2 +from tfkeras import EfficientNetB3, EfficientNetB4, EfficientNetB5, EfficientNetB6 + +from layers import ClipBoxes, RegressBoxes, FilterDetections, wBiFPNAdd, BatchNormalization +from initializers import PriorProbability +from utils.anchors import anchors_for_shape +import numpy as np + +w_bifpns = [64, 88, 112, 160, 224, 288, 384] +d_bifpns = [3, 4, 5, 6, 7, 7, 8] +d_heads = [3, 3, 3, 4, 4, 4, 5] +image_sizes = [512, 640, 768, 896, 1024, 1280, 1408] +backbones = [EfficientNetB0, EfficientNetB1, EfficientNetB2, + EfficientNetB3, EfficientNetB4, EfficientNetB5, EfficientNetB6] + +MOMENTUM = 0.997 +EPSILON = 1e-4 + + +def SeparableConvBlock(num_channels, kernel_size, strides, name, freeze_bn=False): + f1 = layers.SeparableConv2D(num_channels, kernel_size=kernel_size, strides=strides, padding='same', + use_bias=True, name=f'{name}/conv') + f2 = layers.BatchNormalization(momentum=MOMENTUM, epsilon=EPSILON, name=f'{name}/bn') + # f2 = BatchNormalization(freeze=freeze_bn, name=f'{name}/bn') + return reduce(lambda f, g: lambda *args, **kwargs: g(f(*args, **kwargs)), (f1, f2)) + + +def ConvBlock(num_channels, kernel_size, strides, name, freeze_bn=False): + f1 = layers.Conv2D(num_channels, kernel_size=kernel_size, strides=strides, padding='same', + use_bias=True, name='{}_conv'.format(name)) + f2 = layers.BatchNormalization(momentum=MOMENTUM, epsilon=EPSILON, name='{}_bn'.format(name)) + # f2 = BatchNormalization(freeze=freeze_bn, name='{}_bn'.format(name)) + f3 = layers.ReLU(name='{}_relu'.format(name)) + return reduce(lambda f, g: lambda *args, **kwargs: g(f(*args, **kwargs)), (f1, f2, f3)) + + +def build_wBiFPN(features, num_channels, id, freeze_bn=False): + if id == 0: + _, _, C3, C4, C5 = features + P3_in = C3 + P4_in = C4 + P5_in = C5 + P6_in = layers.Conv2D(num_channels, kernel_size=1, padding='same', name='resample_p6/conv2d')(C5) + P6_in = layers.BatchNormalization(momentum=MOMENTUM, epsilon=EPSILON, name='resample_p6/bn')(P6_in) + # P6_in = BatchNormalization(freeze=freeze_bn, name='resample_p6/bn')(P6_in) + P6_in = layers.MaxPooling2D(pool_size=3, strides=2, padding='same', name='resample_p6/maxpool')(P6_in) + P7_in = layers.MaxPooling2D(pool_size=3, strides=2, padding='same', name='resample_p7/maxpool')(P6_in) + P7_U = layers.UpSampling2D()(P7_in) + P6_td = wBiFPNAdd(name=f'fpn_cells/cell_{id}/fnode0/add')([P6_in, P7_U]) + P6_td = layers.Activation(lambda x: tf.nn.swish(x))(P6_td) + P6_td = SeparableConvBlock(num_channels=num_channels, kernel_size=3, strides=1, + name=f'fpn_cells/cell_{id}/fnode0/op_after_combine5')(P6_td) + P5_in_1 = layers.Conv2D(num_channels, kernel_size=1, padding='same', + name=f'fpn_cells/cell_{id}/fnode1/resample_0_2_6/conv2d')(P5_in) + P5_in_1 = layers.BatchNormalization(momentum=MOMENTUM, epsilon=EPSILON, + name=f'fpn_cells/cell_{id}/fnode1/resample_0_2_6/bn')(P5_in_1) + # P5_in_1 = BatchNormalization(freeze=freeze_bn, name=f'fpn_cells/cell_{id}/fnode1/resample_0_2_6/bn')(P5_in_1) + P6_U = layers.UpSampling2D()(P6_td) + P5_td = wBiFPNAdd(name=f'fpn_cells/cell_{id}/fnode1/add')([P5_in_1, P6_U]) + P5_td = layers.Activation(lambda x: tf.nn.swish(x))(P5_td) + P5_td = SeparableConvBlock(num_channels=num_channels, kernel_size=3, strides=1, + name=f'fpn_cells/cell_{id}/fnode1/op_after_combine6')(P5_td) + P4_in_1 = layers.Conv2D(num_channels, kernel_size=1, padding='same', + name=f'fpn_cells/cell_{id}/fnode2/resample_0_1_7/conv2d')(P4_in) + P4_in_1 = layers.BatchNormalization(momentum=MOMENTUM, epsilon=EPSILON, + name=f'fpn_cells/cell_{id}/fnode2/resample_0_1_7/bn')(P4_in_1) + # P4_in_1 = BatchNormalization(freeze=freeze_bn, name=f'fpn_cells/cell_{id}/fnode2/resample_0_1_7/bn')(P4_in_1) + P5_U = layers.UpSampling2D()(P5_td) + P4_td = wBiFPNAdd(name=f'fpn_cells/cell_{id}/fnode2/add')([P4_in_1, P5_U]) + P4_td = layers.Activation(lambda x: tf.nn.swish(x))(P4_td) + P4_td = SeparableConvBlock(num_channels=num_channels, kernel_size=3, strides=1, + name=f'fpn_cells/cell_{id}/fnode2/op_after_combine7')(P4_td) + P3_in = layers.Conv2D(num_channels, kernel_size=1, padding='same', + name=f'fpn_cells/cell_{id}/fnode3/resample_0_0_8/conv2d')(P3_in) + P3_in = layers.BatchNormalization(momentum=MOMENTUM, epsilon=EPSILON, + name=f'fpn_cells/cell_{id}/fnode3/resample_0_0_8/bn')(P3_in) + # P3_in = BatchNormalization(freeze=freeze_bn, name=f'fpn_cells/cell_{id}/fnode3/resample_0_0_8/bn')(P3_in) + P4_U = layers.UpSampling2D()(P4_td) + P3_out = wBiFPNAdd(name=f'fpn_cells/cell_{id}/fnode3/add')([P3_in, P4_U]) + P3_out = layers.Activation(lambda x: tf.nn.swish(x))(P3_out) + P3_out = SeparableConvBlock(num_channels=num_channels, kernel_size=3, strides=1, + name=f'fpn_cells/cell_{id}/fnode3/op_after_combine8')(P3_out) + P4_in_2 = layers.Conv2D(num_channels, kernel_size=1, padding='same', + name=f'fpn_cells/cell_{id}/fnode4/resample_0_1_9/conv2d')(P4_in) + P4_in_2 = layers.BatchNormalization(momentum=MOMENTUM, epsilon=EPSILON, + name=f'fpn_cells/cell_{id}/fnode4/resample_0_1_9/bn')(P4_in_2) + # P4_in_2 = BatchNormalization(freeze=freeze_bn, name=f'fpn_cells/cell_{id}/fnode4/resample_0_1_9/bn')(P4_in_2) + P3_D = layers.MaxPooling2D(pool_size=3, strides=2, padding='same')(P3_out) + P4_out = wBiFPNAdd(name=f'fpn_cells/cell_{id}/fnode4/add')([P4_in_2, P4_td, P3_D]) + P4_out = layers.Activation(lambda x: tf.nn.swish(x))(P4_out) + P4_out = SeparableConvBlock(num_channels=num_channels, kernel_size=3, strides=1, + name=f'fpn_cells/cell_{id}/fnode4/op_after_combine9')(P4_out) + + P5_in_2 = layers.Conv2D(num_channels, kernel_size=1, padding='same', + name=f'fpn_cells/cell_{id}/fnode5/resample_0_2_10/conv2d')(P5_in) + P5_in_2 = layers.BatchNormalization(momentum=MOMENTUM, epsilon=EPSILON, + name=f'fpn_cells/cell_{id}/fnode5/resample_0_2_10/bn')(P5_in_2) + # P5_in_2 = BatchNormalization(freeze=freeze_bn, name=f'fpn_cells/cell_{id}/fnode5/resample_0_2_10/bn')(P5_in_2) + P4_D = layers.MaxPooling2D(pool_size=3, strides=2, padding='same')(P4_out) + P5_out = wBiFPNAdd(name=f'fpn_cells/cell_{id}/fnode5/add')([P5_in_2, P5_td, P4_D]) + P5_out = layers.Activation(lambda x: tf.nn.swish(x))(P5_out) + P5_out = SeparableConvBlock(num_channels=num_channels, kernel_size=3, strides=1, + name=f'fpn_cells/cell_{id}/fnode5/op_after_combine10')(P5_out) + + P5_D = layers.MaxPooling2D(pool_size=3, strides=2, padding='same')(P5_out) + P6_out = wBiFPNAdd(name=f'fpn_cells/cell_{id}/fnode6/add')([P6_in, P6_td, P5_D]) + P6_out = layers.Activation(lambda x: tf.nn.swish(x))(P6_out) + P6_out = SeparableConvBlock(num_channels=num_channels, kernel_size=3, strides=1, + name=f'fpn_cells/cell_{id}/fnode6/op_after_combine11')(P6_out) + + P6_D = layers.MaxPooling2D(pool_size=3, strides=2, padding='same')(P6_out) + P7_out = wBiFPNAdd(name=f'fpn_cells/cell_{id}/fnode7/add')([P7_in, P6_D]) + P7_out = layers.Activation(lambda x: tf.nn.swish(x))(P7_out) + P7_out = SeparableConvBlock(num_channels=num_channels, kernel_size=3, strides=1, + name=f'fpn_cells/cell_{id}/fnode7/op_after_combine12')(P7_out) + + else: + P3_in, P4_in, P5_in, P6_in, P7_in = features + P7_U = layers.UpSampling2D()(P7_in) + P6_td = wBiFPNAdd(name=f'fpn_cells/cell_{id}/fnode0/add')([P6_in, P7_U]) + P6_td = layers.Activation(lambda x: tf.nn.swish(x))(P6_td) + P6_td = SeparableConvBlock(num_channels=num_channels, kernel_size=3, strides=1, + name=f'fpn_cells/cell_{id}/fnode0/op_after_combine5')(P6_td) + P6_U = layers.UpSampling2D()(P6_td) + P5_td = wBiFPNAdd(name=f'fpn_cells/cell_{id}/fnode1/add')([P5_in, P6_U]) + P5_td = layers.Activation(lambda x: tf.nn.swish(x))(P5_td) + P5_td = SeparableConvBlock(num_channels=num_channels, kernel_size=3, strides=1, + name=f'fpn_cells/cell_{id}/fnode1/op_after_combine6')(P5_td) + P5_U = layers.UpSampling2D()(P5_td) + P4_td = wBiFPNAdd(name=f'fpn_cells/cell_{id}/fnode2/add')([P4_in, P5_U]) + P4_td = layers.Activation(lambda x: tf.nn.swish(x))(P4_td) + P4_td = SeparableConvBlock(num_channels=num_channels, kernel_size=3, strides=1, + name=f'fpn_cells/cell_{id}/fnode2/op_after_combine7')(P4_td) + P4_U = layers.UpSampling2D()(P4_td) + P3_out = wBiFPNAdd(name=f'fpn_cells/cell_{id}/fnode3/add')([P3_in, P4_U]) + P3_out = layers.Activation(lambda x: tf.nn.swish(x))(P3_out) + P3_out = SeparableConvBlock(num_channels=num_channels, kernel_size=3, strides=1, + name=f'fpn_cells/cell_{id}/fnode3/op_after_combine8')(P3_out) + P3_D = layers.MaxPooling2D(pool_size=3, strides=2, padding='same')(P3_out) + P4_out = wBiFPNAdd(name=f'fpn_cells/cell_{id}/fnode4/add')([P4_in, P4_td, P3_D]) + P4_out = layers.Activation(lambda x: tf.nn.swish(x))(P4_out) + P4_out = SeparableConvBlock(num_channels=num_channels, kernel_size=3, strides=1, + name=f'fpn_cells/cell_{id}/fnode4/op_after_combine9')(P4_out) + + P4_D = layers.MaxPooling2D(pool_size=3, strides=2, padding='same')(P4_out) + P5_out = wBiFPNAdd(name=f'fpn_cells/cell_{id}/fnode5/add')([P5_in, P5_td, P4_D]) + P5_out = layers.Activation(lambda x: tf.nn.swish(x))(P5_out) + P5_out = SeparableConvBlock(num_channels=num_channels, kernel_size=3, strides=1, + name=f'fpn_cells/cell_{id}/fnode5/op_after_combine10')(P5_out) + + P5_D = layers.MaxPooling2D(pool_size=3, strides=2, padding='same')(P5_out) + P6_out = wBiFPNAdd(name=f'fpn_cells/cell_{id}/fnode6/add')([P6_in, P6_td, P5_D]) + P6_out = layers.Activation(lambda x: tf.nn.swish(x))(P6_out) + P6_out = SeparableConvBlock(num_channels=num_channels, kernel_size=3, strides=1, + name=f'fpn_cells/cell_{id}/fnode6/op_after_combine11')(P6_out) + + P6_D = layers.MaxPooling2D(pool_size=3, strides=2, padding='same')(P6_out) + P7_out = wBiFPNAdd(name=f'fpn_cells/cell_{id}/fnode7/add')([P7_in, P6_D]) + P7_out = layers.Activation(lambda x: tf.nn.swish(x))(P7_out) + P7_out = SeparableConvBlock(num_channels=num_channels, kernel_size=3, strides=1, + name=f'fpn_cells/cell_{id}/fnode7/op_after_combine12')(P7_out) + return P3_out, P4_td, P5_td, P6_td, P7_out + + +def build_BiFPN(features, num_channels, id, freeze_bn=False): + if id == 0: + _, _, C3, C4, C5 = features + P3_in = C3 + P4_in = C4 + P5_in = C5 + P6_in = layers.Conv2D(num_channels, kernel_size=1, padding='same', name='resample_p6/conv2d')(C5) + P6_in = layers.BatchNormalization(momentum=MOMENTUM, epsilon=EPSILON, name='resample_p6/bn')(P6_in) + # P6_in = BatchNormalization(freeze=freeze_bn, name='resample_p6/bn')(P6_in) + P6_in = layers.MaxPooling2D(pool_size=3, strides=2, padding='same', name='resample_p6/maxpool')(P6_in) + P7_in = layers.MaxPooling2D(pool_size=3, strides=2, padding='same', name='resample_p7/maxpool')(P6_in) + P7_U = layers.UpSampling2D()(P7_in) + P6_td = layers.Add(name=f'fpn_cells/cell_{id}/fnode0/add')([P6_in, P7_U]) + P6_td = layers.Activation(lambda x: tf.nn.swish(x))(P6_td) + P6_td = SeparableConvBlock(num_channels=num_channels, kernel_size=3, strides=1, + name=f'fpn_cells/cell_{id}/fnode0/op_after_combine5')(P6_td) + P5_in_1 = layers.Conv2D(num_channels, kernel_size=1, padding='same', + name=f'fpn_cells/cell_{id}/fnode1/resample_0_2_6/conv2d')(P5_in) + P5_in_1 = layers.BatchNormalization(momentum=MOMENTUM, epsilon=EPSILON, + name=f'fpn_cells/cell_{id}/fnode1/resample_0_2_6/bn')(P5_in_1) + # P5_in_1 = BatchNormalization(freeze=freeze_bn, name=f'fpn_cells/cell_{id}/fnode1/resample_0_2_6/bn')(P5_in_1) + P6_U = layers.UpSampling2D()(P6_td) + P5_td = layers.Add(name=f'fpn_cells/cell_{id}/fnode1/add')([P5_in_1, P6_U]) + P5_td = layers.Activation(lambda x: tf.nn.swish(x))(P5_td) + P5_td = SeparableConvBlock(num_channels=num_channels, kernel_size=3, strides=1, + name=f'fpn_cells/cell_{id}/fnode1/op_after_combine6')(P5_td) + P4_in_1 = layers.Conv2D(num_channels, kernel_size=1, padding='same', + name=f'fpn_cells/cell_{id}/fnode2/resample_0_1_7/conv2d')(P4_in) + P4_in_1 = layers.BatchNormalization(momentum=MOMENTUM, epsilon=EPSILON, + name=f'fpn_cells/cell_{id}/fnode2/resample_0_1_7/bn')(P4_in_1) + # P4_in_1 = BatchNormalization(freeze=freeze_bn, name=f'fpn_cells/cell_{id}/fnode2/resample_0_1_7/bn')(P4_in_1) + P5_U = layers.UpSampling2D()(P5_td) + P4_td = layers.Add(name=f'fpn_cells/cell_{id}/fnode2/add')([P4_in_1, P5_U]) + P4_td = layers.Activation(lambda x: tf.nn.swish(x))(P4_td) + P4_td = SeparableConvBlock(num_channels=num_channels, kernel_size=3, strides=1, + name=f'fpn_cells/cell_{id}/fnode2/op_after_combine7')(P4_td) + P3_in = layers.Conv2D(num_channels, kernel_size=1, padding='same', + name=f'fpn_cells/cell_{id}/fnode3/resample_0_0_8/conv2d')(P3_in) + P3_in = layers.BatchNormalization(momentum=MOMENTUM, epsilon=EPSILON, + name=f'fpn_cells/cell_{id}/fnode3/resample_0_0_8/bn')(P3_in) + # P3_in = BatchNormalization(freeze=freeze_bn, name=f'fpn_cells/cell_{id}/fnode3/resample_0_0_8/bn')(P3_in) + P4_U = layers.UpSampling2D()(P4_td) + P3_out = layers.Add(name=f'fpn_cells/cell_{id}/fnode3/add')([P3_in, P4_U]) + P3_out = layers.Activation(lambda x: tf.nn.swish(x))(P3_out) + P3_out = SeparableConvBlock(num_channels=num_channels, kernel_size=3, strides=1, + name=f'fpn_cells/cell_{id}/fnode3/op_after_combine8')(P3_out) + P4_in_2 = layers.Conv2D(num_channels, kernel_size=1, padding='same', + name=f'fpn_cells/cell_{id}/fnode4/resample_0_1_9/conv2d')(P4_in) + P4_in_2 = layers.BatchNormalization(momentum=MOMENTUM, epsilon=EPSILON, + name=f'fpn_cells/cell_{id}/fnode4/resample_0_1_9/bn')(P4_in_2) + # P4_in_2 = BatchNormalization(freeze=freeze_bn, name=f'fpn_cells/cell_{id}/fnode4/resample_0_1_9/bn')(P4_in_2) + P3_D = layers.MaxPooling2D(pool_size=3, strides=2, padding='same')(P3_out) + P4_out = layers.Add(name=f'fpn_cells/cell_{id}/fnode4/add')([P4_in_2, P4_td, P3_D]) + P4_out = layers.Activation(lambda x: tf.nn.swish(x))(P4_out) + P4_out = SeparableConvBlock(num_channels=num_channels, kernel_size=3, strides=1, + name=f'fpn_cells/cell_{id}/fnode4/op_after_combine9')(P4_out) + + P5_in_2 = layers.Conv2D(num_channels, kernel_size=1, padding='same', + name=f'fpn_cells/cell_{id}/fnode5/resample_0_2_10/conv2d')(P5_in) + P5_in_2 = layers.BatchNormalization(momentum=MOMENTUM, epsilon=EPSILON, + name=f'fpn_cells/cell_{id}/fnode5/resample_0_2_10/bn')(P5_in_2) + # P5_in_2 = BatchNormalization(freeze=freeze_bn, name=f'fpn_cells/cell_{id}/fnode5/resample_0_2_10/bn')(P5_in_2) + P4_D = layers.MaxPooling2D(pool_size=3, strides=2, padding='same')(P4_out) + P5_out = layers.Add(name=f'fpn_cells/cell_{id}/fnode5/add')([P5_in_2, P5_td, P4_D]) + P5_out = layers.Activation(lambda x: tf.nn.swish(x))(P5_out) + P5_out = SeparableConvBlock(num_channels=num_channels, kernel_size=3, strides=1, + name=f'fpn_cells/cell_{id}/fnode5/op_after_combine10')(P5_out) + + P5_D = layers.MaxPooling2D(pool_size=3, strides=2, padding='same')(P5_out) + P6_out = layers.Add(name=f'fpn_cells/cell_{id}/fnode6/add')([P6_in, P6_td, P5_D]) + P6_out = layers.Activation(lambda x: tf.nn.swish(x))(P6_out) + P6_out = SeparableConvBlock(num_channels=num_channels, kernel_size=3, strides=1, + name=f'fpn_cells/cell_{id}/fnode6/op_after_combine11')(P6_out) + + P6_D = layers.MaxPooling2D(pool_size=3, strides=2, padding='same')(P6_out) + P7_out = layers.Add(name=f'fpn_cells/cell_{id}/fnode7/add')([P7_in, P6_D]) + P7_out = layers.Activation(lambda x: tf.nn.swish(x))(P7_out) + P7_out = SeparableConvBlock(num_channels=num_channels, kernel_size=3, strides=1, + name=f'fpn_cells/cell_{id}/fnode7/op_after_combine12')(P7_out) + + else: + P3_in, P4_in, P5_in, P6_in, P7_in = features + P7_U = layers.UpSampling2D()(P7_in) + P6_td = layers.Add(name=f'fpn_cells/cell_{id}/fnode0/add')([P6_in, P7_U]) + P6_td = layers.Activation(lambda x: tf.nn.swish(x))(P6_td) + P6_td = SeparableConvBlock(num_channels=num_channels, kernel_size=3, strides=1, + name=f'fpn_cells/cell_{id}/fnode0/op_after_combine5')(P6_td) + P6_U = layers.UpSampling2D()(P6_td) + P5_td = layers.Add(name=f'fpn_cells/cell_{id}/fnode1/add')([P5_in, P6_U]) + P5_td = layers.Activation(lambda x: tf.nn.swish(x))(P5_td) + P5_td = SeparableConvBlock(num_channels=num_channels, kernel_size=3, strides=1, + name=f'fpn_cells/cell_{id}/fnode1/op_after_combine6')(P5_td) + P5_U = layers.UpSampling2D()(P5_td) + P4_td = layers.Add(name=f'fpn_cells/cell_{id}/fnode2/add')([P4_in, P5_U]) + P4_td = layers.Activation(lambda x: tf.nn.swish(x))(P4_td) + P4_td = SeparableConvBlock(num_channels=num_channels, kernel_size=3, strides=1, + name=f'fpn_cells/cell_{id}/fnode2/op_after_combine7')(P4_td) + P4_U = layers.UpSampling2D()(P4_td) + P3_out = layers.Add(name=f'fpn_cells/cell_{id}/fnode3/add')([P3_in, P4_U]) + P3_out = layers.Activation(lambda x: tf.nn.swish(x))(P3_out) + P3_out = SeparableConvBlock(num_channels=num_channels, kernel_size=3, strides=1, + name=f'fpn_cells/cell_{id}/fnode3/op_after_combine8')(P3_out) + P3_D = layers.MaxPooling2D(pool_size=3, strides=2, padding='same')(P3_out) + P4_out = layers.Add(name=f'fpn_cells/cell_{id}/fnode4/add')([P4_in, P4_td, P3_D]) + P4_out = layers.Activation(lambda x: tf.nn.swish(x))(P4_out) + P4_out = SeparableConvBlock(num_channels=num_channels, kernel_size=3, strides=1, + name=f'fpn_cells/cell_{id}/fnode4/op_after_combine9')(P4_out) + + P4_D = layers.MaxPooling2D(pool_size=3, strides=2, padding='same')(P4_out) + P5_out = layers.Add(name=f'fpn_cells/cell_{id}/fnode5/add')([P5_in, P5_td, P4_D]) + P5_out = layers.Activation(lambda x: tf.nn.swish(x))(P5_out) + P5_out = SeparableConvBlock(num_channels=num_channels, kernel_size=3, strides=1, + name=f'fpn_cells/cell_{id}/fnode5/op_after_combine10')(P5_out) + + P5_D = layers.MaxPooling2D(pool_size=3, strides=2, padding='same')(P5_out) + P6_out = layers.Add(name=f'fpn_cells/cell_{id}/fnode6/add')([P6_in, P6_td, P5_D]) + P6_out = layers.Activation(lambda x: tf.nn.swish(x))(P6_out) + P6_out = SeparableConvBlock(num_channels=num_channels, kernel_size=3, strides=1, + name=f'fpn_cells/cell_{id}/fnode6/op_after_combine11')(P6_out) + + P6_D = layers.MaxPooling2D(pool_size=3, strides=2, padding='same')(P6_out) + P7_out = layers.Add(name=f'fpn_cells/cell_{id}/fnode7/add')([P7_in, P6_D]) + P7_out = layers.Activation(lambda x: tf.nn.swish(x))(P7_out) + P7_out = SeparableConvBlock(num_channels=num_channels, kernel_size=3, strides=1, + name=f'fpn_cells/cell_{id}/fnode7/op_after_combine12')(P7_out) + return P3_out, P4_td, P5_td, P6_td, P7_out + + +class BoxNet(models.Model): + def __init__(self, width, depth, num_anchors=9, separable_conv=True, freeze_bn=False, detect_quadrangle=False, **kwargs): + super(BoxNet, self).__init__(**kwargs) + self.width = width + self.depth = depth + self.num_anchors = num_anchors + self.separable_conv = separable_conv + self.detect_quadrangle = detect_quadrangle + num_values = 9 if detect_quadrangle else 4 + options = { + 'kernel_size': 3, + 'strides': 1, + 'padding': 'same', + 'bias_initializer': 'zeros', + } + if separable_conv: + kernel_initializer = { + 'depthwise_initializer': initializers.VarianceScaling(), + 'pointwise_initializer': initializers.VarianceScaling(), + } + options.update(kernel_initializer) + self.convs = [layers.SeparableConv2D(filters=width, name=f'{self.name}/box-{i}', **options) for i in + range(depth)] + self.head = layers.SeparableConv2D(filters=num_anchors * num_values, + name=f'{self.name}/box-predict', **options) + else: + kernel_initializer = { + 'kernel_initializer': initializers.RandomNormal(mean=0.0, stddev=0.01, seed=None) + } + options.update(kernel_initializer) + self.convs = [layers.Conv2D(filters=width, name=f'{self.name}/box-{i}', **options) for i in range(depth)] + self.head = layers.Conv2D(filters=num_anchors * num_values, name=f'{self.name}/box-predict', **options) + self.bns = [ + [layers.BatchNormalization(momentum=MOMENTUM, epsilon=EPSILON, name=f'{self.name}/box-{i}-bn-{j}') for j in + range(3, 8)] + for i in range(depth)] + # self.bns = [[BatchNormalization(freeze=freeze_bn, name=f'{self.name}/box-{i}-bn-{j}') for j in range(3, 8)] + # for i in range(depth)] + self.relu = layers.Lambda(lambda x: tf.nn.swish(x)) + self.reshape = layers.Reshape((-1, num_values)) + self.level = 0 + + def call(self, inputs, **kwargs): + feature, level = inputs + for i in range(self.depth): + feature = self.convs[i](feature) + feature = self.bns[i][self.level](feature) + feature = self.relu(feature) + outputs = self.head(feature) + outputs = self.reshape(outputs) + self.level += 1 + return outputs + + +class ClassNet(models.Model): + def __init__(self, width, depth, num_classes=20, num_anchors=9, separable_conv=True, freeze_bn=False, **kwargs): + super(ClassNet, self).__init__(**kwargs) + self.width = width + self.depth = depth + self.num_classes = num_classes + self.num_anchors = num_anchors + self.separable_conv = separable_conv + options = { + 'kernel_size': 3, + 'strides': 1, + 'padding': 'same', + } + if self.separable_conv: + kernel_initializer = { + 'depthwise_initializer': initializers.VarianceScaling(), + 'pointwise_initializer': initializers.VarianceScaling(), + } + options.update(kernel_initializer) + self.convs = [layers.SeparableConv2D(filters=width, bias_initializer='zeros', name=f'{self.name}/class-{i}', + **options) + for i in range(depth)] + self.head = layers.SeparableConv2D(filters=num_classes * num_anchors, + bias_initializer=PriorProbability(probability=0.01), + name=f'{self.name}/class-predict', **options) + else: + kernel_initializer = { + 'kernel_initializer': initializers.RandomNormal(mean=0.0, stddev=0.01, seed=None) + } + options.update(kernel_initializer) + self.convs = [layers.Conv2D(filters=width, bias_initializer='zeros', name=f'{self.name}/class-{i}', + **options) + for i in range(depth)] + self.head = layers.Conv2D(filters=num_classes * num_anchors, + bias_initializer=PriorProbability(probability=0.01), + name='class-predict', **options) + self.bns = [ + [layers.BatchNormalization(momentum=MOMENTUM, epsilon=EPSILON, name=f'{self.name}/class-{i}-bn-{j}') for j + in range(3, 8)] + for i in range(depth)] + # self.bns = [[BatchNormalization(freeze=freeze_bn, name=f'{self.name}/class-{i}-bn-{j}') for j in range(3, 8)] + # for i in range(depth)] + self.relu = layers.Lambda(lambda x: tf.nn.swish(x)) + self.reshape = layers.Reshape((-1, num_classes)) + self.activation = layers.Activation('sigmoid') + self.level = 0 + + def call(self, inputs, **kwargs): + feature, level = inputs + for i in range(self.depth): + feature = self.convs[i](feature) + feature = self.bns[i][self.level](feature) + feature = self.relu(feature) + outputs = self.head(feature) + outputs = self.reshape(outputs) + outputs = self.activation(outputs) + self.level += 1 + return outputs + + +def efficientdet(phi, num_classes=20, num_anchors=9, weighted_bifpn=False, freeze_bn=False, + score_threshold=0.01, detect_quadrangle=False, anchor_parameters=None, separable_conv=True): + assert phi in range(7) + input_size = image_sizes[phi] + input_shape = (input_size, input_size, 3) + image_input = layers.Input(input_shape) + w_bifpn = w_bifpns[phi] + d_bifpn = d_bifpns[phi] + w_head = w_bifpn + d_head = d_heads[phi] + backbone_cls = backbones[phi] + features = backbone_cls(input_tensor=image_input, freeze_bn=freeze_bn) + if weighted_bifpn: + fpn_features = features + for i in range(d_bifpn): + fpn_features = build_wBiFPN(fpn_features, w_bifpn, i, freeze_bn=freeze_bn) + else: + fpn_features = features + for i in range(d_bifpn): + fpn_features = build_BiFPN(fpn_features, w_bifpn, i, freeze_bn=freeze_bn) + box_net = BoxNet(w_head, d_head, num_anchors=num_anchors, separable_conv=separable_conv, freeze_bn=freeze_bn, + detect_quadrangle=detect_quadrangle, name='box_net') + class_net = ClassNet(w_head, d_head, num_classes=num_classes, num_anchors=num_anchors, + separable_conv=separable_conv, freeze_bn=freeze_bn, name='class_net') + classification = [class_net([feature, i]) for i, feature in enumerate(fpn_features)] + classification = layers.Concatenate(axis=1, name='classification')(classification) + regression = [box_net([feature, i]) for i, feature in enumerate(fpn_features)] + regression = layers.Concatenate(axis=1, name='regression')(regression) + + model = models.Model(inputs=[image_input], outputs=[classification, regression], name='efficientdet') + + # apply predicted regression to anchors + anchors = anchors_for_shape((input_size, input_size), anchor_params=anchor_parameters) + anchors_input = np.expand_dims(anchors, axis=0) + boxes = RegressBoxes(name='boxes')([anchors_input, regression[..., :4]]) + boxes = ClipBoxes(name='clipped_boxes')([image_input, boxes]) + + # filter detections (apply NMS / score threshold / select top-k) + if detect_quadrangle: + detections = FilterDetections( + name='filtered_detections', + score_threshold=score_threshold, + detect_quadrangle=True + )([boxes, classification, regression[..., 4:8], regression[..., 8]]) + else: + detections = FilterDetections( + name='filtered_detections', + score_threshold=score_threshold + )([boxes, classification]) + + prediction_model = models.Model(inputs=[image_input], outputs=detections, name='efficientdet_p') + return model, prediction_model + + +if __name__ == '__main__': + npu_keras_sess = set_keras_session_npu_config() + x, y = efficientdet(1) + close_session(npu_keras_sess) + diff --git a/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /modelzoo_level.txt b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /modelzoo_level.txt new file mode 100644 index 0000000000000000000000000000000000000000..357d87784ead1792c24762291a4801d2b2018b08 --- /dev/null +++ b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /modelzoo_level.txt @@ -0,0 +1,3 @@ +FuncStatus:OK +PerfStatus:NOK +PrecisionStatus:NOK \ No newline at end of file diff --git a/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /requirements.txt b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..05d268dbbb610fe6f529dc0f44a63ed7fdcea363 --- /dev/null +++ b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /requirements.txt @@ -0,0 +1,130 @@ +absl-py==0.13.0 +addict==2.4.0 +albumentations @ http://100.95.151.167:6868/aarch64/euler/dls-release/euleros-arm/compiled-wheel/albumentations-0.4.5-cp37-none-any.whl +asgiref==3.4.1 +astor==0.8.1 +attrs==19.3.0 +auto-tune @ file:///tmp/selfgz1419329419/fwkacllib/lib64/auto_tune-0.1.0-py3-none-any.whl +backcall==0.2.0 +boto3==1.12.22 +botocore==1.15.49 +certifi==2020.6.20 +cffi @ http://100.95.151.167:6868/aarch64/euler/dls-release/euleros-arm/compiled-wheel/cffi-1.14.0-cp37-cp37m-linux_aarch64.whl +chardet==3.0.4 +charset-normalizer==2.0.4 +click==8.0.1 +cloudpickle==1.3.0 +cycler==0.10.0 +Cython @ http://100.95.151.167:6868/aarch64/euler/dls-release/euleros-arm/compiled-wheel/Cython-0.29.14-cp37-cp37m-linux_aarch64.whl +dask==2.18.1 +decorator==4.4.1 +deep-moxing @ http://100.95.151.167:6868/aarch64/euler/dls-release/ubuntu-16.04/deep-moxing/latest/deep_moxing-1.0.2.e45a4759-py3-none-any.whl +Django==3.2.6 +docutils==0.15.2 +esdk-obs-python==3.20.1 +et-xmlfile==1.1.0 +Flask==1.1.1 +gast==0.2.2 +google-pasta==0.2.0 +grpcio @ http://100.95.151.167:6868/aarch64/euler/dls-release/euleros-arm/compiled-wheel/grpcio-1.26.0-cp37-cp37m-linux_aarch64.whl +grpcio-tools @ http://100.95.151.167:6868/aarch64/euler/dls-release/euleros-arm/compiled-wheel/grpcio_tools-1.26.0-cp37-cp37m-linux_aarch64.whl +gunicorn==20.0.4 +h5py @ http://100.95.151.167:6868/aarch64/euler/dls-release/euleros-arm/compiled-wheel/h5py-2.10.0-cp37-cp37m-linux_aarch64.whl +hccl @ file:///tmp/selfgz1419329419/fwkacllib/lib64/hccl-0.1.0-py3-none-any.whl +huaweicloud-sdk-python-modelarts-dataset @ http://100.95.151.167:6868/aarch64/euler/dls-release/euleros-arm/compiled-wheel/huaweicloud_sdk_python_modelarts_dataset-0.1.5-py2.py3-none-any.whl +idna==2.10 +image==1.5.28 +imageio==2.9.0 +imgaug==0.2.6 +importlib-metadata==4.8.1 +ipykernel==5.3.4 +ipython==7.25.0 +ipython-genutils==0.2.0 +itsdangerous==2.0.1 +jdcal==1.4.1 +jedi==0.18.0 +Jinja2==3.0.1 +jmespath==0.10.0 +jupyter-client==6.1.12 +jupyter-core==4.7.1 +Keras==2.3.1 +Keras-Applications==1.0.8 +Keras-Preprocessing==1.1.2 +kfac==0.2.0 +kiwisolver @ http://100.95.151.167:6868/aarch64/euler/dls-release/euleros-arm/compiled-wheel/kiwisolver-1.1.0-cp37-cp37m-linux_aarch64.whl +lazy-import==0.2.2 +llvmlite @ http://100.95.151.167:6868/aarch64/euler/dls-release/euleros-arm/compiled-wheel/llvmlite-0.31.0-cp37-cp37m-linux_aarch64.whl +lxml @ http://100.95.151.167:6868/aarch64/euler/dls-release/euleros-arm/compiled-wheel/lxml-4.4.2-cp37-cp37m-linux_aarch64.whl +Markdown==3.3.4 +MarkupSafe==2.0.1 +matplotlib @ http://100.95.151.167:6868/aarch64/euler/dls-release/euleros-arm/compiled-wheel/matplotlib-3.1.2-cp37-cp37m-linux_aarch64.whl +matplotlib-inline==0.1.2 +mmcv @ http://100.95.151.167:6868/aarch64/euler/dls-release/euleros-arm/compiled-wheel/mmcv-0.2.14-cp37-cp37m-linux_aarch64.whl +modelarts-service @ http://100.95.151.167:6868/aarch64/euler/dls-release/euler-2.8/modelarts_service/modelarts_service-1.0.2-py3-none-any.whl +moxing-framework @ http://100.95.151.167:6868/aarch64/euler/dls-release/ubuntu-16.04/moxing_framework/moxing_framework-2.0.0.rc2.4b57a67b-py2.py3-none-any.whl +moxing-tensorflow @ http://100.95.151.167:6868/aarch64/euler/dls-release/ubuntu-16.04/moxing_tensorflow/moxing_tensorflow-2.0.0.rc2.65f98f7d-py2.py3-none-any.whl +mpmath==1.2.1 +networkx==2.6.2 +npu-bridge @ file:///tmp/selfgz1881668/tfplugin/bin/npu_bridge-1.15.0-py3-none-any.whl +numba==0.49.1 +numexpr @ http://100.95.151.167:6868/aarch64/euler/dls-release/euleros-arm/compiled-wheel/numexpr-2.7.1-cp37-cp37m-linux_aarch64.whl +numpy==1.19.3 +opencv-contrib-python==4.5.4.60 +opencv-python-headless==4.5.4.60 +openpyxl==3.0.3 +opt-einsum==3.3.0 +pandas @ http://100.95.151.167:6868/aarch64/euler/dls-release/euleros-arm/compiled-wheel/pandas-0.24.2-cp37-cp37m-linux_aarch64.whl +parso==0.8.2 +pathlib2==2.3.6 +pexpect==4.8.0 +pickleshare==0.7.5 +Pillow @ http://100.95.151.167:6868/aarch64/euler/dls-release/euleros-arm/compiled-wheel/Pillow-7.0.0-cp37-cp37m-linux_aarch64.whl +prometheus-client==0.8.0 +prompt-toolkit==3.0.19 +protobuf @ http://100.95.151.167:6868/aarch64/euler/dls-release/euleros-arm/compiled-wheel/protobuf-3.11.3-cp37-cp37m-linux_aarch64.whl +psutil @ http://100.95.151.167:6868/aarch64/euler/dls-release/euleros-arm/compiled-wheel/psutil-5.7.0-cp37-cp37m-linux_aarch64.whl +ptyprocess==0.7.0 +pycocotools @ http://100.95.151.167:6868/aarch64/euler/dls-release/euleros-arm/compiled-wheel/pycocotools-2.0.0-cp37-cp37m-linux_aarch64.whl +pycparser==2.20 +Pygments==2.9.0 +pyparsing==2.4.7 +python-dateutil==2.8.2 +pytz==2021.1 +PyWavelets @ http://100.95.151.167:6868/aarch64/euler/dls-release/euleros-arm/compiled-wheel/PyWavelets-1.1.1-cp37-cp37m-linux_aarch64.whl +PyYAML @ http://100.95.151.167:6868/aarch64/euler/dls-release/euleros-arm/compiled-wheel/PyYAML-5.3.1-cp37-cp37m-linux_aarch64.whl +pyzmq==22.1.0 +requests==2.26.0 +s3transfer==0.3.7 +schedule-search @ file:///tmp/selfgz1419329419/fwkacllib/lib64/schedule_search-0.1.0-py3-none-any.whl +scikit-image @ http://100.95.151.167:6868/aarch64/euler/dls-release/euleros-arm/compiled-wheel/scikit_image-0.17.2-cp37-cp37m-linux_aarch64.whl +scikit-learn @ http://100.95.151.167:6868/aarch64/euler/dls-release/euleros-arm/compiled-wheel/scikit_learn-0.20.0-cp37-cp37m-linux_aarch64.whl +scipy @ http://100.95.151.167:6868/aarch64/euler/dls-release/euleros-arm/compiled-wheel/scipy-1.3.3-cp37-cp37m-linux_aarch64.whl +Shapely==1.7.1 +six==1.16.0 +sqlparse==0.4.1 +sympy==1.4 +tables @ http://100.95.151.167:6868/aarch64/euler/dls-release/euleros-arm/compiled-wheel/tables-3.6.1-cp37-cp37m-linux_aarch64.whl +te @ file:///tmp/selfgz1419329419/fwkacllib/lib64/te-0.4.0-py3-none-any.whl +tensorboard==1.15.0 +tensorflow @ http://100.95.151.167:6868/aarch64/euler/dls-release/euleros-arm/compiled-wheel/tensorflow-1.15.0-cp37-cp37m-linux_aarch64.whl +tensorflow-estimator==1.15.1 +tensorflow-probability==0.10.1 +termcolor==1.1.0 +terminaltables==3.1.0 +tf-slim==1.1.0 +tflearn==0.5.0 +tifffile==2021.8.30 +toml==0.10.1 +topi @ file:///tmp/selfgz1419329419/fwkacllib/lib64/topi-0.4.0-py3-none-any.whl +tornado==6.1 +tqdm==4.46.1 +traitlets==5.0.5 +typing-extensions==3.10.0.2 +umap-learn==0.4.6 +umap-learn-modified @ http://100.95.151.167:6868/aarch64/euler/dls-release/euleros-arm/compiled-wheel/umap_learn_modified-0.3.8-py3-none-any.whl +urllib3==1.26.6 +wcwidth==0.2.5 +Werkzeug==2.0.1 +wrapt==1.12.1 +xmltodict==0.12.0 +zipp==3.5.0 diff --git a/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /test/.keep b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /test/.keep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /test/train_full_1p.sh b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /test/train_full_1p.sh new file mode 100644 index 0000000000000000000000000000000000000000..32f3ab8f4f522283edad41a2529062170b6f7f68 --- /dev/null +++ b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /test/train_full_1p.sh @@ -0,0 +1,213 @@ +#!/bin/bash + +########################################################## +#########第3行 至 100行,请一定不要、不要、不要修改########## +#########第3行 至 100行,请一定不要、不要、不要修改########## +#########第3行 至 100行,请一定不要、不要、不要修改########## +########################################################## +# shell脚本所在路径 +cur_path=`echo $(cd $(dirname $0);pwd)` + +# 判断当前shell是否是performance +perf_flag=`echo $0 | grep performance | wc -l` + +# 当前执行网络的名称 +Network=`echo $(cd $(dirname $0);pwd) | awk -F"/" '{print $(NF-1)}'` + +export RANK_SIZE=1 +export RANK_ID=0 +export JOB_ID=10087 + +# 路径参数初始化 +data_path='' +output_path='' + +# 帮助信息,不需要修改 +if [[ $1 == --help || $1 == -h ]];then + echo"usage:./train_performance_1P.sh " + echo " " + echo "parameter explain: + --data_path # dataset of training + --output_path # output of training + --train_steps # max_step for training + --train_epochs # max_epoch for training + --batch_size # batch size + -h/--help show help message + " + exit 1 +fi + +# 参数校验,不需要修改 +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --output_path* ]];then + output_path=`echo ${para#*=}` + elif [[ $para == --train_steps* ]];then + train_steps=`echo ${para#*=}` + elif [[ $para == --train_epochs* ]];then + train_epochs=`echo ${para#*=}` + elif [[ $para == --batch_size* ]];then + batch_size=`echo ${para#*=}` + fi +done + +# 校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be config" + exit 1 +fi + +# 校验是否传入output_path,不需要修改 +if [[ $output_path == "" ]];then + output_path="./test/output/${ASCEND_DEVICE_ID}" +fi + +# 设置打屏日志文件名,请保留,文件名为${print_log} +print_log="./test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log" +modelarts_flag=${MODELARTS_MODEL_PATH} +if [ x"${modelarts_flag}" != x ]; +then + echo "running without etp..." + print_log_name=`ls /home/ma-user/modelarts/log/ | grep proc-rank` + print_log="/home/ma-user/modelarts/log/${print_log_name}" +fi +echo "### get your log here : ${print_log}" + +CaseName="" +function get_casename() +{ + if [ x"${perf_flag}" = x1 ]; + then + CaseName=${Network}_bs${batch_size}_${RANK_SIZE}'p'_'perf' + else + CaseName=${Network}_bs${batch_size}_${RANK_SIZE}'p'_'acc' + fi +} + +# 跳转到code目录 +cd ${cur_path}/../ +rm -rf ./test/output/${ASCEND_DEVICE_ID} +mkdir -p ./test/output/${ASCEND_DEVICE_ID} + +# 训练开始时间记录,不需要修改 +start_time=$(date +%s) +########################################################## +#########第3行 至 100行,请一定不要、不要、不要修改########## +#########第3行 至 100行,请一定不要、不要、不要修改########## +#########第3行 至 100行,请一定不要、不要、不要修改########## +########################################################## + +#========================================================= +#========================================================= +#========训练执行命令,需要根据您的网络进行修改============== +#========================================================= +#========================================================= +# 基础参数,需要模型审视修改 +# 您的训练数据集在${data_path}路径下,请直接使用这个变量获取 +# 您的训练输出目录在${output_path}路径下,请直接使用这个变量获取 +# 您的其他基础参数,可以自定义增加,但是batch_size请保留,并且设置正确的值 + + +if [ x"${modelarts_flag}" != x ]; +then + python ./train_sess.py --snapshot imagenet --phi 0 \ + --no-evaluation --random-transform \ + --compute-val-loss --freeze-backbone --step1\ + --batch-size 32 --steps 1000 --epochs=40\ + --pretrained_model='/home/dingwei/efficientdet/efficientnet-b0_weights_tf_dim_ordering_tf_kernels_autoaugment_notop.h5'\ + pascal /home/dataset/VOCdevkit/VOC2007 + + python ./train_sess.py --snapshot checkpoints/pascal_ft_10.h5 --phi 0 \ + --no-evaluation --random-transform \ + --compute-val-loss --freeze-bn \ + --batch-size 4 --steps 10000 --epochs=10\ + pascal /home/dataset/VOCdevkit/VOC2007 + + python ./common.py --model_path='checkpoints/pascal_ft_10.h5' \ + --data_path='/home/dataset/VOCdevkit/VOC2007' + +else + python ./train_sess.py --snapshot imagenet --phi 0 \ + --no-evaluation --random-transform \ + --compute-val-loss --freeze-backbone --step1\ + --batch-size 32 --steps 1000 --epochs=40\ + --pretrained_model='/home/dingwei/efficientdet/efficientnet-b0_weights_tf_dim_ordering_tf_kernels_autoaugment_notop.h5'\ + pascal /home/dataset/VOCdevkit/VOC2007 + + python ./train_sess.py --snapshot checkpoints/pascal_ft_10.h5 --phi 0 \ + --no-evaluation --random-transform \ + --compute-val-loss --freeze-bn \ + --batch-size 4 --steps 10000 --epochs=10\ + pascal /home/dataset/VOCdevkit/VOC2007 + + python ./common.py --model_path='checkpoints/pascal_ft_10.h5' \ + --data_path='/home/dataset/VOCdevkit/VOC2007' +fi + +# 性能相关数据计算 +StepTime=`grep "s/step :" ${print_log} | tail -n 10 | awk '{print $NF}' | awk '{sum+=$1} END {print sum/NR}'` +FPS=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'/'${StepTime}'}'` + +# 精度相关数据计算 +train_accuracy=`grep "Final Accuracy accuracy" ${print_log} | awk '{print $NF}'` +# 提取所有loss打印信息 +grep "loss :" ${print_log} | awk -F ":" '{print $4}' | awk -F "-" '{print $1}' > ./test/output/${ASCEND_DEVICE_ID}/my_output_loss.txt + + +########################################################### +#########后面的所有内容请不要修改########################### +#########后面的所有内容请不要修改########################### +#########后面的所有内容请不要修改########################### +########################################################### + +# 判断本次执行是否正确使用Ascend NPU +use_npu_flag=`grep "The model has been compiled on the Ascend AI processor" ${print_log} | wc -l` +if [ x"${use_npu_flag}" == x0 ]; +then + echo "------------------ ERROR NOTICE START ------------------" + echo "ERROR, your task haven't used Ascend NPU, please check your npu Migration." + echo "------------------ ERROR NOTICE END------------------" +else + echo "------------------ INFO NOTICE START------------------" + echo "INFO, your task have used Ascend NPU, please check your result." + echo "------------------ INFO NOTICE END------------------" +fi + +# 获取最终的casename,请保留,case文件名为${CaseName} +get_casename + +# 重命名loss文件 +if [ -f ./test/output/${ASCEND_DEVICE_ID}/my_output_loss.txt ]; +then + mv ./test/output/${ASCEND_DEVICE_ID}/my_output_loss.txt ./test/output/${ASCEND_DEVICE_ID}/${CaseName}_loss.txt +fi + +# 训练端到端耗时 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +echo "------------------ Final result ------------------" +# 输出性能FPS/单step耗时/端到端耗时 +echo "Final Performance images/sec : $FPS" +echo "Final Performance ms/step : $StepTime" +echo "E2E Training Duration sec : $e2e_time" + +# 输出训练精度 +echo "Final Train Accuracy : ${train_accuracy}" + +# 最后一个迭代loss值,不需要修改 +ActualLoss=(`awk 'END {print $NF}' $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}_loss.txt`) + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${batch_size}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = `uname -m`" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${FPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${StepTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log diff --git a/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /tfkeras.py b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /tfkeras.py new file mode 100644 index 0000000000000000000000000000000000000000..911f2bb1317e90dc2f2acd38aebeb87bf31db38e --- /dev/null +++ b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /tfkeras.py @@ -0,0 +1,44 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from npu_bridge.npu_init import * +from utils import inject_tfkeras_modules, init_tfkeras_custom_objects +import efficientnet as model + +EfficientNetB0 = inject_tfkeras_modules(model.EfficientNetB0) +EfficientNetB1 = inject_tfkeras_modules(model.EfficientNetB1) +EfficientNetB2 = inject_tfkeras_modules(model.EfficientNetB2) +EfficientNetB3 = inject_tfkeras_modules(model.EfficientNetB3) +EfficientNetB4 = inject_tfkeras_modules(model.EfficientNetB4) +EfficientNetB5 = inject_tfkeras_modules(model.EfficientNetB5) +EfficientNetB6 = inject_tfkeras_modules(model.EfficientNetB6) +EfficientNetB7 = inject_tfkeras_modules(model.EfficientNetB7) + +preprocess_input = inject_tfkeras_modules(model.preprocess_input) + +init_tfkeras_custom_objects() + diff --git a/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /train_sess.py b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /train_sess.py new file mode 100644 index 0000000000000000000000000000000000000000..3d4275266b576c511ad5298f629b281f6cf9c653 --- /dev/null +++ b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /train_sess.py @@ -0,0 +1,356 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import argparse +from datetime import date +import sys +import tensorflow as tf +from npu_bridge.npu_init import * + + +# import keras +# import keras.preprocessing.image +# import keras.backend as K +# from keras.optimizers import Adam, SGD + +import tensorflow.python.keras as keras +from tensorflow.python.keras import backend as K +from tensorflow.keras.optimizers import Adam, SGD +from augmentor.color import VisualEffect +from augmentor.misc import MiscEffect +from model import efficientdet +from losses import smooth_l1, focal, smooth_l1_quad +from efficientnet import BASE_WEIGHTS_PATH, WEIGHTS_HASHES +from tensorflow.core.protobuf.rewriter_config_pb2 import RewriterConfig + +def makedirs(path): + # Intended behavior: try to create the directory, + # pass if the directory exists already, fails otherwise. + # Meant for Python 2.7/3.n compatibility. + try: + os.makedirs(path) + except OSError: + if not os.path.isdir(path): + raise + + +def create_callbacks(training_model, prediction_model, validation_generator, args): + """ + Creates the callbacks to use during training. + + Args + training_model: The model that is used for training. + prediction_model: The model that should be used for validation. + validation_generator: The generator for creating validation data. + args: parseargs args object. + + Returns: + A list of callbacks used for training. + """ + callbacks = [] + + tensorboard_callback = None + + if args.tensorboard_dir: + if tf.version.VERSION > '2.0.0': + file_writer = tf.summary.create_file_writer(args.tensorboard_dir) + file_writer.set_as_default() + tensorboard_callback = keras.callbacks.TensorBoard( + log_dir=args.tensorboard_dir, + histogram_freq=0, + batch_size=args.batch_size, + write_graph=True, + write_grads=False, + write_images=False, + embeddings_freq=0, + embeddings_layer_names=None, + embeddings_metadata=None + ) + callbacks.append(tensorboard_callback) + + if args.evaluation and validation_generator: + if args.dataset_type == 'coco': + from eval.coco import Evaluate + # use prediction model for evaluation + evaluation = Evaluate(validation_generator, prediction_model, tensorboard=tensorboard_callback) + else: + from eval.pascal import Evaluate + evaluation = Evaluate(validation_generator, prediction_model, tensorboard=tensorboard_callback) + callbacks.append(evaluation) + + # save the model + if args.snapshots: + # ensure directory created first; otherwise h5py will error after epoch. + makedirs(args.snapshot_path) + checkpoint = keras.callbacks.ModelCheckpoint( + os.path.join( + args.snapshot_path, + f'{args.dataset_type}_{{epoch:02d}}.h5' if args.step1 + else f'{args.dataset_type}_ft_{{epoch:02d}}.h5' + ), + verbose=1, + save_weights_only=True, + # save_best_only=True, + # monitor="mAP", + # mode='max' + ) + callbacks.append(checkpoint) + return callbacks + + +def create_generators(args): + """ + Create generators for training and validation. + + Args + args: parseargs object containing configuration for generators. + preprocess_image: Function that preprocesses an image for the network. + """ + common_args = { + 'batch_size': args.batch_size, + 'phi': args.phi, + 'detect_text': args.detect_text, + 'detect_quadrangle': args.detect_quadrangle + } + + # create random transform generator for augmenting training data + if args.random_transform: + misc_effect = MiscEffect() + visual_effect = VisualEffect() + else: + misc_effect = None + visual_effect = None + + if args.dataset_type == 'pascal': + from generators.pascal import PascalVocGenerator + train_generator = PascalVocGenerator( + args.pascal_path, + 'trainval', + skip_difficult=True, + misc_effect=misc_effect, + visual_effect=visual_effect, + **common_args + ) + + validation_generator = PascalVocGenerator( + args.pascal_path, + 'val', + skip_difficult=True, + shuffle_groups=False, + **common_args + ) + elif args.dataset_type == 'csv': + from generators.csv_ import CSVGenerator + train_generator = CSVGenerator( + args.annotations_path, + args.classes_path, + misc_effect=misc_effect, + visual_effect=visual_effect, + **common_args + ) + + if args.val_annotations_path: + validation_generator = CSVGenerator( + args.val_annotations_path, + args.classes_path, + shuffle_groups=False, + **common_args + ) + else: + validation_generator = None + + elif args.dataset_type == 'coco': + # import here to prevent unnecessary dependency on cocoapi + from generators.coco import CocoGenerator + train_generator = CocoGenerator( + args.coco_path, + 'train2017', + misc_effect=misc_effect, + visual_effect=visual_effect, + group_method='random', + **common_args + ) + + validation_generator = CocoGenerator( + args.coco_path, + 'val2017', + shuffle_groups=False, + **common_args + ) + else: + raise ValueError('Invalid data type received: {}'.format(args.dataset_type)) + + return train_generator, validation_generator + + +def parse_args(args): + """ + Parse the arguments. + """ + today = str(date.today()) + parser = argparse.ArgumentParser(description='Simple training script for training a RetinaNet network.') + subparsers = parser.add_subparsers(help='Arguments for specific dataset types.', dest='dataset_type') + subparsers.required = True + + coco_parser = subparsers.add_parser('coco') + coco_parser.add_argument('coco_path', help='Path to dataset directory (ie. /tmp/COCO).') + + pascal_parser = subparsers.add_parser('pascal') + pascal_parser.add_argument('pascal_path', help='Path to dataset directory (ie. /tmp/VOCdevkit).') + + csv_parser = subparsers.add_parser('csv') + csv_parser.add_argument('annotations_path', help='Path to CSV file containing annotations for training.') + csv_parser.add_argument('classes_path', help='Path to a CSV file containing class label mapping.') + csv_parser.add_argument('--val-annotations-path', + help='Path to CSV file containing annotations for validation (optional).') + parser.add_argument('--detect-quadrangle', help='If to detect quadrangle.', action='store_true', default=False) + parser.add_argument('--detect-text', help='If is text detection task.', action='store_true', default=False) + + parser.add_argument('--snapshot', help='Resume training from a snapshot.') + parser.add_argument('--freeze-backbone', help='Freeze training of backbone layers.', action='store_true') + parser.add_argument('--freeze-bn', help='Freeze training of BatchNormalization layers.', action='store_true') + parser.add_argument('--weighted-bifpn', help='Use weighted BiFPN', action='store_true') + + parser.add_argument('--batch-size', help='Size of the batches.', default=1, type=int) + parser.add_argument('--phi', help='Hyper parameter phi', default=0, type=int, choices=(0, 1, 2, 3, 4, 5, 6)) + # parser.add_argument('--gpu', help='Id of the GPU to use (as reported by nvidia-smi).') + parser.add_argument('--epochs', help='Number of epochs to train.', type=int, default=50) + parser.add_argument('--steps', help='Number of steps per epoch.', type=int, default=10000) + parser.add_argument('--snapshot_path', + help='Path to store snapshots of models during training', + default='checkpoints/') + parser.add_argument('--tensorboard-dir', help='Log directory for Tensorboard output', + default='logs/{}'.format(today)) + parser.add_argument('--no-snapshots', help='Disable saving snapshots.', dest='snapshots', action='store_false') + parser.add_argument('--no-evaluation', help='Disable per epoch evaluation.', dest='evaluation', + action='store_false') + parser.add_argument('--random-transform', help='Randomly transform image and annotations.', action='store_true') + parser.add_argument('--compute-val-loss', help='Compute validation loss during training', dest='compute_val_loss', + action='store_true') + parser.add_argument('--step1', help='train step1', dest='step1', + action='store_true') + # Fit generator arguments + parser.add_argument('--multiprocessing', help='Use multiprocessing in fit_generator.', action='store_true') + parser.add_argument('--workers', help='Number of generator workers.', type=int, default=1) + parser.add_argument('--max-queue-size', help='Queue length for multiprocessing workers in fit_generator.', type=int, + default=10) + parser.add_argument('--train_file_pattern', help='path to tfrecord', default='') + parser.add_argument('--pretrained_model', help='path to tfrecord', default='/home/dingwei/efficientdet/efficientnet-b0_weights_tf_dim_ordering_tf_kernels_autoaugment_notop.h5') + print(vars(parser.parse_args(args))) + return parser.parse_args(args) + # return check_args(parser.parse_args(args)) + + +def main(args=None): + print(1) + # parse arguments + if args is None: + args = sys.argv[1:] + args = parse_args(args) + + # create the generators + train_generator, validation_generator = create_generators(args) + num_classes = train_generator.num_classes() + num_anchors = train_generator.num_anchors + model, prediction_model = efficientdet(args.phi, + num_classes=num_classes, + num_anchors=num_anchors, + weighted_bifpn=args.weighted_bifpn, + freeze_bn=args.freeze_bn, + detect_quadrangle=args.detect_quadrangle + ) + + # load pretrained weights + if args.snapshot: + if args.snapshot == 'imagenet': + model.load_weights(args.pretrained_model, by_name=True) + else: + print('Loading model, this may take a second...') + model.load_weights(args.snapshot, by_name=True) + + # freeze backbone layers + if args.freeze_backbone: + # 227, 329, 329, 374, 464, 566, 656 + for i in range(1, [227, 329, 329, 374, 464, 566, 656][args.phi]): + model.layers[i].trainable = False + + loss_scale_manager = ExponentialUpdateLossScaleManager(init_loss_scale=2**32, incr_every_n_steps=1000, decr_every_n_nan_or_inf=2, decr_ratio=0.5) + opt_tmp = npu_tf_optimizer(tf.train.AdamOptimizer(learning_rate=1e-3)) + optimizer = NPULossScaleOptimizer(opt_tmp, loss_scale_manager) + + # compile model + model.compile(optimizer=optimizer, loss={ + 'regression': smooth_l1_quad() if args.detect_quadrangle else smooth_l1(), + 'classification': focal() + }, ) + # create the callbacks + callbacks = create_callbacks( + model, + prediction_model, + validation_generator, + args, + ) + if not args.compute_val_loss: + validation_generator = None + elif args.compute_val_loss and validation_generator is None: + raise ValueError('When you have no validation data, you should not specify --compute-val-loss.') + + # start training + return model.fit_generator( + generator=train_generator, + steps_per_epoch=args.steps, + initial_epoch=0, + epochs=args.epochs, + verbose=1, + callbacks=callbacks, + workers=args.workers, + use_multiprocessing=args.multiprocessing, + max_queue_size=args.max_queue_size, + validation_data=validation_generator + ) + + + + +if __name__ == '__main__': + # NPU setting + K.clear_session() + sess_config = tf.ConfigProto() + custom_op = sess_config.graph_options.rewrite_options.custom_optimizers.add() + #custom_op.parameter_map["precision_mode"].s = tf.compat.as_bytes("force_fp32") + custom_op.parameter_map["precision_mode"].s = tf.compat.as_bytes("allow_mix_precision") + custom_op.name = "NpuOptimizer" + custom_op.parameter_map["use_off_line"].b = True + sess_config.graph_options.rewrite_options.remapping = RewriterConfig.OFF + sess_config.graph_options.rewrite_options.memory_optimization = RewriterConfig.OFF + sess = tf.Session(config=sess_config) + K.set_session(sess) + + main() + + sess.close() diff --git a/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /utils/.keep b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /utils/.keep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /utils/__init__.py b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..739f04a2067ea4018fda16f1a78344ba7f8dc742 --- /dev/null +++ b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /utils/__init__.py @@ -0,0 +1,197 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from npu_bridge.npu_init import * +import functools +import cv2 +import numpy as np + +_KERAS_BACKEND = None +_KERAS_LAYERS = None +_KERAS_MODELS = None +_KERAS_UTILS = None + + +def get_submodules_from_kwargs(kwargs): + backend = kwargs.get('backend', _KERAS_BACKEND) + layers = kwargs.get('layers', _KERAS_LAYERS) + models = kwargs.get('models', _KERAS_MODELS) + utils = kwargs.get('utils', _KERAS_UTILS) + for key in kwargs.keys(): + if key not in ['backend', 'layers', 'models', 'utils']: + raise TypeError('Invalid keyword argument: %s', key) + return backend, layers, models, utils + + +def inject_keras_modules(func): + import keras + @functools.wraps(func) + def wrapper(*args, **kwargs): + kwargs['backend'] = keras.backend + kwargs['layers'] = keras.layers + kwargs['models'] = keras.models + kwargs['utils'] = keras.utils + return func(*args, **kwargs) + + return wrapper + + +def inject_tfkeras_modules(func): + import tensorflow.keras as tfkeras + @functools.wraps(func) + def wrapper(*args, **kwargs): + kwargs['backend'] = tfkeras.backend + kwargs['layers'] = tfkeras.layers + kwargs['models'] = tfkeras.models + kwargs['utils'] = tfkeras.utils + return func(*args, **kwargs) + + return wrapper + + +def init_keras_custom_objects(): + import keras + import efficientnet as model + + custom_objects = { + 'swish': inject_keras_modules(model.get_swish)(), + 'FixedDropout': inject_keras_modules(model.get_dropout)() + } + + keras.utils.generic_utils.get_custom_objects().update(custom_objects) + + +def init_tfkeras_custom_objects(): + import tensorflow.keras as tfkeras + import efficientnet as model + + custom_objects = { + 'swish': inject_tfkeras_modules(model.get_swish)(), + 'FixedDropout': inject_tfkeras_modules(model.get_dropout)() + } + + tfkeras.utils.get_custom_objects().update(custom_objects) + + +def preprocess_image(image, image_size): + # image, RGB + image_height, image_width = image.shape[:2] + if image_height > image_width: + scale = image_size / image_height + resized_height = image_size + resized_width = int(image_width * scale) + else: + scale = image_size / image_width + resized_height = int(image_height * scale) + resized_width = image_size + + image = cv2.resize(image, (resized_width, resized_height)) + image = image.astype(np.float32) + image /= 255. + mean = [0.485, 0.456, 0.406] + std = [0.229, 0.224, 0.225] + image -= mean + image /= std + pad_h = image_size - resized_height + pad_w = image_size - resized_width + image = np.pad(image, [(0, pad_h), (0, pad_w), (0, 0)], mode='constant') + + return image, scale + + +def rotate_image(image): + rotate_degree = np.random.uniform(low=-45, high=45) + h, w = image.shape[:2] + # Compute the rotation matrix. + M = cv2.getRotationMatrix2D(center=(w / 2, h / 2), + angle=rotate_degree, + scale=1) + + # Get the sine and cosine from the rotation matrix. + abs_cos_angle = np.abs(M[0, 0]) + abs_sin_angle = np.abs(M[0, 1]) + + # Compute the new bounding dimensions of the image. + new_w = int(h * abs_sin_angle + w * abs_cos_angle) + new_h = int(h * abs_cos_angle + w * abs_sin_angle) + + # Adjust the rotation matrix to take into account the translation. + M[0, 2] += new_w // 2 - w // 2 + M[1, 2] += new_h // 2 - h // 2 + + # Rotate the image. + image = cv2.warpAffine(image, M=M, dsize=(new_w, new_h), flags=cv2.INTER_CUBIC, + borderMode=cv2.BORDER_CONSTANT, + borderValue=(128, 128, 128)) + + return image + + +def reorder_vertexes(vertexes): + """ + reorder vertexes as the paper shows, (top, right, bottom, left) + Args: + vertexes: np.array (4, 2), should be in clockwise + + Returns: + + """ + assert vertexes.shape == (4, 2) + xmin, ymin = np.min(vertexes, axis=0) + xmax, ymax = np.max(vertexes, axis=0) + + # determine the first point with the smallest y, + # if two vertexes has same y, choose that with smaller x, + ordered_idxes = np.argsort(vertexes, axis=0) + ymin1_idx = ordered_idxes[0, 1] + ymin2_idx = ordered_idxes[1, 1] + if vertexes[ymin1_idx, 1] == vertexes[ymin2_idx, 1]: + if vertexes[ymin1_idx, 0] <= vertexes[ymin2_idx, 0]: + first_vertex_idx = ymin1_idx + else: + first_vertex_idx = ymin2_idx + else: + first_vertex_idx = ymin1_idx + ordered_idxes = [(first_vertex_idx + i) % 4 for i in range(4)] + ordered_vertexes = vertexes[ordered_idxes] + # drag the point to the corresponding edge + ordered_vertexes[0, 1] = ymin + ordered_vertexes[1, 0] = xmax + ordered_vertexes[2, 1] = ymax + ordered_vertexes[3, 0] = xmin + return ordered_vertexes + + +def postprocess_boxes(boxes, scale, height, width): + boxes /= scale + boxes[:, 0] = np.clip(boxes[:, 0], 0, width - 1) + boxes[:, 1] = np.clip(boxes[:, 1], 0, height - 1) + boxes[:, 2] = np.clip(boxes[:, 2], 0, width - 1) + boxes[:, 3] = np.clip(boxes[:, 3], 0, height - 1) + return boxes + diff --git a/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /utils/anchors.py b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /utils/anchors.py new file mode 100644 index 0000000000000000000000000000000000000000..8b3ef982a9e734a811639751ea0e0338d4e7ed37 --- /dev/null +++ b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /utils/anchors.py @@ -0,0 +1,404 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from npu_bridge.npu_init import * +import os +import numpy as np +from tensorflow import keras +# import compute_overlap as compute_overlap + + +def compute_overlap(a, b): + #a [N,4] + #b [M,4] + area = (b[:, 2] - b[:, 0] + 1) * (b[:, 3] - b[:, 1] + 1) + iw = np.minimum(np.expand_dims(a[:, 2], axis=1), b[:, 2]) - np.maximum(np.expand_dims(a[:, 0], axis=1), b[:, 0]) + 1 + ih = np.minimum(np.expand_dims(a[:, 3], axis=1), b[:, 3]) - np.maximum(np.expand_dims(a[:, 1], axis=1), b[:, 1]) + 1 + # 假设a的数目是N,b的数目是M + # np.expand_dims((N,),axis=1)将(N,)变成(N,1) + # np.minimum((N,1),(M,)) 得到 (N M) 的矩阵 代表a和b逐一比较的结果 + # 取x和y中较小的值 来计算intersection + # iw和ih分别是intersection的宽和高 iw和ih的shape都是(N,M), 代表每个anchor和groundTruth之间的intersection + iw = np.maximum(iw, 0) + ih = np.maximum(ih, 0) #不允许iw或者ih小于0 + + ua = np.expand_dims((a[:, 2] - a[:, 0] + 1) *(a[:, 3] - a[:, 1] + 1), axis=1) + area - iw * ih + # 并集的计算 S_a+S_b-interection_ab + ua = np.maximum(ua, np.finfo(float).eps) + + intersection = iw * ih + return intersection / ua # (N,M) + +class AnchorParameters: + """ + The parameters that define how anchors are generated. + + Args + sizes : List of sizes to use. Each size corresponds to one feature level. + strides : List of strides to use. Each stride correspond to one feature level. + ratios : List of ratios to use per location in a feature map. + scales : List of scales to use per location in a feature map. + """ + + def __init__(self, sizes=(32, 64, 128, 256, 512), + strides=(8, 16, 32, 64, 128), + ratios=(1, 0.5, 2), + scales=(2 ** 0, 2 ** (1. / 3.), 2 ** (2. / 3.))): + self.sizes = sizes + self.strides = strides + self.ratios = np.array(ratios, dtype=keras.backend.floatx()) + self.scales = np.array(scales, dtype=keras.backend.floatx()) + + def num_anchors(self): + return len(self.ratios) * len(self.scales) + + +""" +The default anchor parameters. +""" +AnchorParameters.default = AnchorParameters( + sizes=[32, 64, 128, 256, 512], + strides=[8, 16, 32, 64, 128], + # ratio=h/w + ratios=np.array([1, 0.5, 2], keras.backend.floatx()), + scales=np.array([2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)], keras.backend.floatx()), +) + + +def anchor_targets_bbox( + anchors, + image_group, + annotations_group, + num_classes, + negative_overlap=0.4, + positive_overlap=0.5, + detect_quadrangle=False +): + """ + Generate anchor targets for bbox detection. + + Args + anchors: np.array of annotations of shape (N, 4) for (x1, y1, x2, y2). + image_group: List of BGR images. + annotations_group: List of annotations (np.array of shape (N, 5) for (x1, y1, x2, y2, label)). + num_classes: Number of classes to predict. + mask_shape: If the image is padded with zeros, mask_shape can be used to mark the relevant part of the image. + negative_overlap: IoU overlap for negative anchors (all anchors with overlap < negative_overlap are negative). + positive_overlap: IoU overlap or positive anchors (all anchors with overlap > positive_overlap are positive). + + Returns + labels_batch: batch that contains labels & anchor states (np.array of shape (batch_size, N, num_classes + 1), + where N is the number of anchors for an image and the last column defines the anchor state + (-1 for ignore, 0 for bg, 1 for fg). + regression_batch: batch that contains bounding-box regression targets for an image & anchor states + (np.array of shape (batch_size, N, 4 + 1), where N is the number of anchors for an image, + the first 4 columns define regression targets for (x1, y1, x2, y2) and the last column defines + anchor states (-1 for ignore, 0 for bg, 1 for fg). + """ + + assert (len(image_group) == len(annotations_group)), "The length of the images and annotations need to be equal." + assert (len(annotations_group) > 0), "No data received to compute anchor targets for." + for annotations in annotations_group: + assert ('bboxes' in annotations), "Annotations should contain bboxes." + assert ('labels' in annotations), "Annotations should contain labels." + + batch_size = len(image_group) + + if detect_quadrangle: + regression_batch = np.zeros((batch_size, anchors.shape[0], 9 + 1), dtype=np.float32) + else: + regression_batch = np.zeros((batch_size, anchors.shape[0], 4 + 1), dtype=np.float32) + labels_batch = np.zeros((batch_size, anchors.shape[0], num_classes + 1), dtype=np.float32) + + # compute labels and regression targets + for index, (image, annotations) in enumerate(zip(image_group, annotations_group)): + if annotations['bboxes'].shape[0]: + # obtain indices of gt annotations with the greatest overlap + # argmax_overlaps_inds: id of ground truth box has greatest overlap with anchor + # (N, ), (N, ), (N, ) N is num_anchors + positive_indices, ignore_indices, argmax_overlaps_inds = compute_gt_annotations(anchors, + annotations['bboxes'], + negative_overlap, + positive_overlap) + labels_batch[index, ignore_indices, -1] = -1 + labels_batch[index, positive_indices, -1] = 1 + + regression_batch[index, ignore_indices, -1] = -1 + regression_batch[index, positive_indices, -1] = 1 + + # compute target class labels + labels_batch[ + index, positive_indices, annotations['labels'][argmax_overlaps_inds[positive_indices]].astype(int)] = 1 + + regression_batch[index, :, :4] = bbox_transform(anchors, annotations['bboxes'][argmax_overlaps_inds, :]) + if detect_quadrangle: + regression_batch[index, :, 4:8] = annotations['alphas'][argmax_overlaps_inds, :] + regression_batch[index, :, 8] = annotations['ratios'][argmax_overlaps_inds] + + # ignore anchors outside of image + if image.shape: + anchors_centers = np.vstack([(anchors[:, 0] + anchors[:, 2]) / 2, (anchors[:, 1] + anchors[:, 3]) / 2]).T + indices = np.logical_or(anchors_centers[:, 0] >= image.shape[1], anchors_centers[:, 1] >= image.shape[0]) + + labels_batch[index, indices, -1] = -1 + regression_batch[index, indices, -1] = -1 + + return labels_batch, regression_batch + + +def compute_gt_annotations( + anchors, + annotations, + negative_overlap=0.4, + positive_overlap=0.5 +): + """ + Obtain indices of gt annotations with the greatest overlap. + + Args + anchors: np.array of annotations of shape (N, 4) for (x1, y1, x2, y2). + annotations: np.array of shape (K, 5) for (x1, y1, x2, y2, label). + negative_overlap: IoU overlap for negative anchors (all anchors with overlap < negative_overlap are negative). + positive_overlap: IoU overlap or positive anchors (all anchors with overlap > positive_overlap are positive). + + Returns + positive_indices: indices of positive anchors, (N, ) + ignore_indices: indices of ignored anchors, (N, ) + argmax_overlaps_inds: ordered overlaps indices, (N, ) + """ + # (N, K) + overlaps = compute_overlap(anchors.astype(np.float64), annotations.astype(np.float64)) + # (N, ) + argmax_overlaps_inds = np.argmax(overlaps, axis=1) + # (N, ) + max_overlaps = overlaps[np.arange(overlaps.shape[0]), argmax_overlaps_inds] + + # assign "dont care" labels + # (N, ) + positive_indices = max_overlaps >= positive_overlap + + # adam: in case of there are gt boxes has no matched positive anchors + # nonzero_inds = np.nonzero(overlaps == np.max(overlaps, axis=0)) + # positive_indices[nonzero_inds[0]] = 1 + + # (N, ) + ignore_indices = (max_overlaps > negative_overlap) & ~positive_indices + + return positive_indices, ignore_indices, argmax_overlaps_inds + + +def layer_shapes(image_shape, model): + """ + Compute layer shapes given input image shape and the model. + + Args + image_shape: The shape of the image. + model: The model to use for computing how the image shape is transformed in the pyramid. + + Returns + A dictionary mapping layer names to image shapes. + """ + shape = { + model.layers[0].name: (None,) + image_shape, + } + + for layer in model.layers[1:]: + nodes = layer._inbound_nodes + for node in nodes: + input_shapes = [shape[inbound_layer.name] for inbound_layer in node.inbound_layers] + if not input_shapes: + continue + shape[layer.name] = layer.compute_output_shape(input_shapes[0] if len(input_shapes) == 1 else input_shapes) + + return shape + + +def make_shapes_callback(model): + """ + Make a function for getting the shape of the pyramid levels. + """ + + def get_shapes(image_shape, pyramid_levels): + shape = layer_shapes(image_shape, model) + image_shapes = [shape["P{}".format(level)][1:3] for level in pyramid_levels] + return image_shapes + + return get_shapes + + +def guess_shapes(image_shape, pyramid_levels): + """ + Guess shapes based on pyramid levels. + + Args + image_shape: The shape of the image. + pyramid_levels: A list of what pyramid levels are used. + + Returns + A list of image shapes at each pyramid level. + """ + image_shape = np.array(image_shape[:2]) + image_shapes = [(image_shape + 2 ** x - 1) // (2 ** x) for x in pyramid_levels] + return image_shapes + + +def anchors_for_shape( + image_shape, + pyramid_levels=None, + anchor_params=None, + shapes_callback=None, +): + """ + Generators anchors for a given shape. + + Args + image_shape: The shape of the image. + pyramid_levels: List of ints representing which pyramids to use (defaults to [3, 4, 5, 6, 7]). + anchor_params: Struct containing anchor parameters. If None, default values are used. + shapes_callback: Function to call for getting the shape of the image at different pyramid levels. + + Returns + np.array of shape (N, 4) containing the (x1, y1, x2, y2) coordinates for the anchors. + """ + + if pyramid_levels is None: + pyramid_levels = [3, 4, 5, 6, 7] + + if anchor_params is None: + anchor_params = AnchorParameters.default + + if shapes_callback is None: + shapes_callback = guess_shapes + feature_map_shapes = shapes_callback(image_shape, pyramid_levels) + + # compute anchors over all pyramid levels + all_anchors = np.zeros((0, 4), dtype=np.float32) + for idx, p in enumerate(pyramid_levels): + anchors = generate_anchors( + base_size=anchor_params.sizes[idx], + ratios=anchor_params.ratios, + scales=anchor_params.scales + ) + shifted_anchors = shift(feature_map_shapes[idx], anchor_params.strides[idx], anchors) + all_anchors = np.append(all_anchors, shifted_anchors, axis=0) + + return all_anchors.astype(np.float32) + + +def shift(feature_map_shape, stride, anchors): + """ + Produce shifted anchors based on shape of the map and stride size. + + Args + feature_map_shape : Shape to shift the anchors over. + stride : Stride to shift the anchors with over the shape. + anchors: The anchors to apply at each location. + """ + + # create a grid starting from half stride from the top left corner + shift_x = (np.arange(0, feature_map_shape[1]) + 0.5) * stride + shift_y = (np.arange(0, feature_map_shape[0]) + 0.5) * stride + + shift_x, shift_y = np.meshgrid(shift_x, shift_y) + + shifts = np.vstack(( + shift_x.ravel(), shift_y.ravel(), + shift_x.ravel(), shift_y.ravel() + )).transpose() + + A = anchors.shape[0] + K = shifts.shape[0] + all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))) + all_anchors = all_anchors.reshape((K * A, 4)) + + return all_anchors + + +def generate_anchors(base_size=16, ratios=None, scales=None): + """ + Generate anchor (reference) windows by enumerating aspect ratios X scales w.r.t. a reference window. + + Args: + base_size: + ratios: + scales: + + Returns: + + """ + if ratios is None: + ratios = AnchorParameters.default.ratios + + if scales is None: + scales = AnchorParameters.default.scales + + num_anchors = len(ratios) * len(scales) + + # initialize output anchors + anchors = np.zeros((num_anchors, 4)) + + anchors[:, 2:] = base_size * np.tile(np.repeat(scales, len(ratios))[None], (2, 1)).T + + areas = anchors[:, 2] * anchors[:, 3] + + # correct for ratios + anchors[:, 2] = np.sqrt(areas / np.tile(ratios, len(scales))) + anchors[:, 3] = anchors[:, 2] * np.tile(ratios, len(scales)) + + anchors[:, 0::2] -= np.tile(anchors[:, 2] * 0.5, (2, 1)).T + anchors[:, 1::2] -= np.tile(anchors[:, 3] * 0.5, (2, 1)).T + + return anchors + + +def bbox_transform(anchors, gt_boxes, scale_factors=None): + wa = anchors[:, 2] - anchors[:, 0] + ha = anchors[:, 3] - anchors[:, 1] + cxa = anchors[:, 0] + wa / 2. + cya = anchors[:, 1] + ha / 2. + + w = gt_boxes[:, 2] - gt_boxes[:, 0] + h = gt_boxes[:, 3] - gt_boxes[:, 1] + cx = gt_boxes[:, 0] + w / 2. + cy = gt_boxes[:, 1] + h / 2. + # Avoid NaN in division and log below. + ha += 1e-7 + wa += 1e-7 + h += 1e-7 + w += 1e-7 + tx = (cx - cxa) / wa + ty = (cy - cya) / ha + tw = np.log(w / wa) + th = np.log(h / ha) + if scale_factors: + ty /= scale_factors[0] + tx /= scale_factors[1] + th /= scale_factors[2] + tw /= scale_factors[3] + targets = np.stack([ty, tx, th, tw], axis=1) + return targets + diff --git a/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /utils/colors.py b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /utils/colors.py new file mode 100644 index 0000000000000000000000000000000000000000..8a92e2df01e9a4d9ee5f46c2d4196615081dd031 --- /dev/null +++ b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /utils/colors.py @@ -0,0 +1,141 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from npu_bridge.npu_init import * +import warnings + + +def label_color(label): + """ Return a color from a set of predefined colors. Contains 80 colors in total. + + Args + label: The label to get the color for. + + Returns + A list of three values representing a RGB color. + + If no color is defined for a certain label, the color green is returned and a warning is printed. + """ + if label < len(colors): + return colors[label] + else: + warnings.warn('Label {} has no color, returning default.'.format(label)) + return (0, 255, 0) + + +""" +Generated using: + +``` +colors = [list((matplotlib.colors.hsv_to_rgb([x, 1.0, 1.0]) * 255).astype(int)) for x in np.arange(0, 1, 1.0 / 80)] +shuffle(colors) +pprint(colors) +``` +""" +colors = [ + [31 , 0 , 255] , + [0 , 159 , 255] , + [255 , 95 , 0] , + [255 , 19 , 0] , + [255 , 0 , 0] , + [255 , 38 , 0] , + [0 , 255 , 25] , + [255 , 0 , 133] , + [255 , 172 , 0] , + [108 , 0 , 255] , + [0 , 82 , 255] , + [0 , 255 , 6] , + [255 , 0 , 152] , + [223 , 0 , 255] , + [12 , 0 , 255] , + [0 , 255 , 178] , + [108 , 255 , 0] , + [184 , 0 , 255] , + [255 , 0 , 76] , + [146 , 255 , 0] , + [51 , 0 , 255] , + [0 , 197 , 255] , + [255 , 248 , 0] , + [255 , 0 , 19] , + [255 , 0 , 38] , + [89 , 255 , 0] , + [127 , 255 , 0] , + [255 , 153 , 0] , + [0 , 255 , 255] , + [0 , 255 , 216] , + [0 , 255 , 121] , + [255 , 0 , 248] , + [70 , 0 , 255] , + [0 , 255 , 159] , + [0 , 216 , 255] , + [0 , 6 , 255] , + [0 , 63 , 255] , + [31 , 255 , 0] , + [255 , 57 , 0] , + [255 , 0 , 210] , + [0 , 255 , 102] , + [242 , 255 , 0] , + [255 , 191 , 0] , + [0 , 255 , 63] , + [255 , 0 , 95] , + [146 , 0 , 255] , + [184 , 255 , 0] , + [255 , 114 , 0] , + [0 , 255 , 235] , + [255 , 229 , 0] , + [0 , 178 , 255] , + [255 , 0 , 114] , + [255 , 0 , 57] , + [0 , 140 , 255] , + [0 , 121 , 255] , + [12 , 255 , 0] , + [255 , 210 , 0] , + [0 , 255 , 44] , + [165 , 255 , 0] , + [0 , 25 , 255] , + [0 , 255 , 140] , + [0 , 101 , 255] , + [0 , 255 , 82] , + [223 , 255 , 0] , + [242 , 0 , 255] , + [89 , 0 , 255] , + [165 , 0 , 255] , + [70 , 255 , 0] , + [255 , 0 , 172] , + [255 , 76 , 0] , + [203 , 255 , 0] , + [204 , 0 , 255] , + [255 , 0 , 229] , + [255 , 133 , 0] , + [127 , 0 , 255] , + [0 , 235 , 255] , + [0 , 255 , 197] , + [255 , 0 , 191] , + [0 , 44 , 255] , + [50 , 255 , 0] +] + diff --git a/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /utils/compute_overlap.py b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /utils/compute_overlap.py new file mode 100644 index 0000000000000000000000000000000000000000..d5193be1c0961644997fda8713b9d62d5fcf2b00 --- /dev/null +++ b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /utils/compute_overlap.py @@ -0,0 +1,49 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np + +def compute_overlap(a, b): + #a [N,4] + #b [M,4] + area = (b[:, 2] - b[:, 0] + 1) * (b[:, 3] - b[:, 1] + 1) + iw = np.minimum(np.expand_dims(a[:, 2], axis=1), b[:, 2]) - np.maximum(np.expand_dims(a[:, 0], axis=1), b[:, 0]) + 1 + ih = np.minimum(np.expand_dims(a[:, 3], axis=1), b[:, 3]) - np.maximum(np.expand_dims(a[:, 1], axis=1), b[:, 1]) + 1 + # 假设a的数目是N,b的数目是M + # np.expand_dims((N,),axis=1)将(N,)变成(N,1) + # np.minimum((N,1),(M,)) 得到 (N M) 的矩阵 代表a和b逐一比较的结果 + # 取x和y中较小的值 来计算intersection + # iw和ih分别是intersection的宽和高 iw和ih的shape都是(N,M), 代表每个anchor和groundTruth之间的intersection + iw = np.maximum(iw, 0) + ih = np.maximum(ih, 0) #不允许iw或者ih小于0 + + ua = np.expand_dims((a[:, 2] - a[:, 0] + 1) *(a[:, 3] - a[:, 1] + 1), axis=1) + area - iw * ih + # 并集的计算 S_a+S_b-interection_ab + ua = np.maximum(ua, np.finfo(float).eps) + + intersection = iw * ih + return intersection / ua # (N,M) \ No newline at end of file diff --git a/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /utils/draw_boxes.py b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /utils/draw_boxes.py new file mode 100644 index 0000000000000000000000000000000000000000..d1fe66a7234c8b09e17a7d1c12f908ae5dc805ed --- /dev/null +++ b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /utils/draw_boxes.py @@ -0,0 +1,46 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from npu_bridge.npu_init import * +import cv2 + + +def draw_boxes(image, boxes, scores, labels, colors, classes): + for b, l, s in zip(boxes, labels, scores): + class_id = int(l) + class_name = classes[class_id] + + xmin, ymin, xmax, ymax = list(map(int, b)) + score = '{:.4f}'.format(s) + color = colors[class_id] + label = '-'.join([class_name, score]) + + ret, baseline = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1) + cv2.rectangle(image, (xmin, ymin), (xmax, ymax), color, 1) + cv2.rectangle(image, (xmin, ymax - ret[1] - baseline), (xmin + ret[0], ymax), color, -1) + cv2.putText(image, label, (xmin, ymax - baseline), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1) + diff --git a/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /utils/image.py b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /utils/image.py new file mode 100644 index 0000000000000000000000000000000000000000..0b92454ecff0f111a05a634756b3d5dea29a48c5 --- /dev/null +++ b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /utils/image.py @@ -0,0 +1,386 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import division +from npu_bridge.npu_init import * +import numpy as np +import cv2 +from PIL import Image + +from .transform import change_transform_origin + + +def read_image_bgr(path): + """ + Read an image in BGR format. + + Args + path: Path to the image. + """ + # We deliberately don't use cv2.imread here, since it gives no feedback on errors while reading the image. + image = np.asarray(Image.open(path).convert('RGB')) + return image[:, :, ::-1].copy() + + +def preprocess_image(x, mode='caffe'): + """ + Preprocess an image by subtracting the ImageNet mean. + + Args + x: np.array of shape (None, None, 3) or (3, None, None). + mode: One of "caffe" or "tf". + - caffe: will zero-center each color channel with + respect to the ImageNet dataset, without scaling. + - tf: will scale pixels between -1 and 1, sample-wise. + + Returns + The input with the ImageNet mean subtracted. + """ + # mostly identical to "https://github.com/keras-team/keras-applications/blob/master/keras_applications/imagenet_utils.py" + # except for converting RGB -> BGR since we assume BGR already + + # covert always to float32 to keep compatibility with opencv + x = x.astype(np.float32) + + if mode == 'tf': + x /= 127.5 + x -= 1. + elif mode == 'caffe': + x[..., 0] -= 103.939 + x[..., 1] -= 116.779 + x[..., 2] -= 123.68 + + return x + + +def adjust_transform_for_image(transform, image, relative_translation): + """ + Adjust a transformation for a specific image. + + The translation of the matrix will be scaled with the size of the image. + The linear part of the transformation will adjusted so that the origin of the transformation will be at the center of the image. + """ + height, width, channels = image.shape + + result = transform + + # Scale the translation with the image size if specified. + if relative_translation: + result[0:2, 2] *= [width, height] + + # Move the origin of transformation. + result = change_transform_origin(transform, (0.5 * width, 0.5 * height)) + + return result + + +class TransformParameters: + """ + Struct holding parameters determining how to apply a transformation to an image. + + Args + fill_mode: One of: 'constant', 'nearest', 'reflect', 'wrap' + interpolation: One of: 'nearest', 'linear', 'cubic', 'area', 'lanczos4' + cval: Fill value to use with fill_mode='constant' + relative_translation: If true (the default), interpret translation as a factor of the image size. + If false, interpret it as absolute pixels. + """ + + def __init__( + self, + fill_mode='nearest', + interpolation='linear', + cval=0, + relative_translation=True, + ): + self.fill_mode = fill_mode + self.cval = cval + self.interpolation = interpolation + self.relative_translation = relative_translation + + def cvBorderMode(self): + if self.fill_mode == 'constant': + return cv2.BORDER_CONSTANT + if self.fill_mode == 'nearest': + return cv2.BORDER_REPLICATE + if self.fill_mode == 'reflect': + return cv2.BORDER_REFLECT_101 + if self.fill_mode == 'wrap': + return cv2.BORDER_WRAP + + def cvInterpolation(self): + if self.interpolation == 'nearest': + return cv2.INTER_NEAREST + if self.interpolation == 'linear': + return cv2.INTER_LINEAR + if self.interpolation == 'cubic': + return cv2.INTER_CUBIC + if self.interpolation == 'area': + return cv2.INTER_AREA + if self.interpolation == 'lanczos4': + return cv2.INTER_LANCZOS4 + + +def apply_transform(matrix, image, params): + """ + Apply a transformation to an image. + + The origin of transformation is at the top left corner of the image. + + The matrix is interpreted such that a point (x, y) on the original image is moved to transform * (x, y) in the generated image. + Mathematically speaking, that means that the matrix is a transformation from the transformed image space to the original image space. + + Args + matrix: A homogeneous 3 by 3 matrix holding representing the transformation to apply. + image: The image to transform. + params: The transform parameters (see TransformParameters) + """ + output = cv2.warpAffine( + image, + matrix[:2, :], + dsize=(image.shape[1], image.shape[0]), + flags=params.cvInterpolation(), + borderMode=params.cvBorderMode(), + borderValue=params.cval, + ) + return output + + +def compute_resize_scale(image_shape, min_side=800, max_side=1333): + """ + Compute an image scale such that the image size is constrained to min_side and max_side. + + Args + min_side: The image's min side will be equal to min_side after resizing. + max_side: If after resizing the image's max side is above max_side, resize until the max side is equal to max_side. + + Returns + A resizing scale. + """ + (rows, cols, _) = image_shape + + smallest_side = min(rows, cols) + + # rescale the image so the smallest side is min_side + scale = min_side / smallest_side + + # check if the largest side is now greater than max_side, which can happen + # when images have a large aspect ratio + largest_side = max(rows, cols) + if largest_side * scale > max_side: + scale = max_side / largest_side + + return scale + + +def resize_image(img, min_side=800, max_side=1333): + """ + Resize an image such that the size is constrained to min_side and max_side. + + Args + min_side: The image's min side will be equal to min_side after resizing. + max_side: If after resizing the image's max side is above max_side, resize until the max side is equal to max_side. + + Returns + A resized image. + """ + # compute scale to resize the image + scale = compute_resize_scale(img.shape, min_side=min_side, max_side=max_side) + + # resize the image with the computed scale + img = cv2.resize(img, None, fx=scale, fy=scale) + + return img, scale + + +def _uniform(val_range): + """ + Uniformly sample from the given range. + + Args + val_range: A pair of lower and upper bound. + """ + return np.random.uniform(val_range[0], val_range[1]) + + +def _check_range(val_range, min_val=None, max_val=None): + """ + Check whether the range is a valid range. + + Args + val_range: A pair of lower and upper bound. + min_val: Minimal value for the lower bound. + max_val: Maximal value for the upper bound. + """ + if val_range[0] > val_range[1]: + raise ValueError('interval lower bound > upper bound') + if min_val is not None and val_range[0] < min_val: + raise ValueError('invalid interval lower bound') + if max_val is not None and val_range[1] > max_val: + raise ValueError('invalid interval upper bound') + + +def _clip(image): + """ + Clip and convert an image to np.uint8. + + Args + image: Image to clip. + """ + return np.clip(image, 0, 255).astype(np.uint8) + + +class VisualEffect: + """ + Struct holding parameters and applying image color transformation. + + Args + contrast_factor: A factor for adjusting contrast. Should be between 0 and 3. + brightness_delta: Brightness offset between -1 and 1 added to the pixel values. + hue_delta: Hue offset between -1 and 1 added to the hue channel. + saturation_factor: A factor multiplying the saturation values of each pixel. + """ + + def __init__( + self, + contrast_factor, + brightness_delta, + hue_delta, + saturation_factor, + ): + self.contrast_factor = contrast_factor + self.brightness_delta = brightness_delta + self.hue_delta = hue_delta + self.saturation_factor = saturation_factor + + def __call__(self, image): + """ + Apply a visual effect on the image. + + Args + image: Image to adjust + """ + + if self.contrast_factor: + image = adjust_contrast(image, self.contrast_factor) + if self.brightness_delta: + image = adjust_brightness(image, self.brightness_delta) + + if self.hue_delta or self.saturation_factor: + image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) + if self.hue_delta: + image = adjust_hue(image, self.hue_delta) + if self.saturation_factor: + image = adjust_saturation(image, self.saturation_factor) + + image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR) + + return image + + +def random_visual_effect_generator( + contrast_range=(0.9, 1.1), + brightness_range=(-.1, .1), + hue_range=(-0.05, 0.05), + saturation_range=(0.95, 1.05) +): + """ + Generate visual effect parameters uniformly sampled from the given intervals. + + Args + contrast_factor: A factor interval for adjusting contrast. Should be between 0 and 3. + brightness_delta: An interval between -1 and 1 for the amount added to the pixels. + hue_delta: An interval between -1 and 1 for the amount added to the hue channel. + The values are rotated if they exceed 180. + saturation_factor: An interval for the factor multiplying the saturation values of each + pixel. + """ + _check_range(contrast_range, 0) + _check_range(brightness_range, -1, 1) + _check_range(hue_range, -1, 1) + _check_range(saturation_range, 0) + + def _generate(): + while True: + yield VisualEffect( + contrast_factor=_uniform(contrast_range), + brightness_delta=_uniform(brightness_range), + hue_delta=_uniform(hue_range), + saturation_factor=_uniform(saturation_range), + ) + + return _generate() + + +def adjust_contrast(image, factor): + """ + Adjust contrast of an image. + + Args + image: Image to adjust. + factor: A factor for adjusting contrast. + """ + mean = image.mean(axis=0).mean(axis=0) + return _clip((image - mean) * factor + mean) + + +def adjust_brightness(image, delta): + """ + Adjust brightness of an image + + Args + image: Image to adjust. + delta: Brightness offset between -1 and 1 added to the pixel values. + """ + return _clip(image + delta * 255) + + +def adjust_hue(image, delta): + """ + Adjust hue of an image. + + Args + image: Image to adjust. + delta: An interval between -1 and 1 for the amount added to the hue channel. + The values are rotated if they exceed 180. + """ + image[..., 0] = np.mod(image[..., 0] + delta * 180, 180) + return image + + +def adjust_saturation(image, factor): + """ + Adjust saturation of an image. + + Args + image: Image to adjust. + factor: An interval for the factor multiplying the saturation values of each pixel. + """ + image[..., 1] = np.clip(image[..., 1] * factor, 0, 255) + return image + diff --git a/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /utils/transform.py b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /utils/transform.py new file mode 100644 index 0000000000000000000000000000000000000000..e34b3c1babe68794432fdca422a9c0c857f1db12 --- /dev/null +++ b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /utils/transform.py @@ -0,0 +1,333 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from npu_bridge.npu_init import * + +import numpy as np + +DEFAULT_PRNG = np.random + + +def colvec(*args): + """ + Create a numpy array representing a column vector. + """ + return np.array([args]).T + + +def transform_aabb(transform, aabb): + """ + Apply a transformation to an axis aligned bounding box. + + The result is a new AABB in the same coordinate system as the original AABB. + The new AABB contains all corner points of the original AABB after applying the given transformation. + + Args + transform: The transformation to apply. + x1: The minimum x value of the AABB. + y1: The minimum y value of the AABB. + x2: The maximum x value of the AABB. + y2: The maximum y value of the AABB. + Returns + The new AABB as tuple (x1, y1, x2, y2) + """ + x1, y1, x2, y2 = aabb + # Transform all 4 corners of the AABB. + points = transform.dot([ + [x1, x2, x1, x2], + [y1, y2, y2, y1], + [1, 1, 1, 1], + ]) + + # Extract the min and max corners again. + # (3, ) (min_x, min_y, 1) + min_corner = points.min(axis=1) + # (3, ) (max_x, max_y, 1) + max_corner = points.max(axis=1) + + return [min_corner[0], min_corner[1], max_corner[0], max_corner[1]] + + +def _random_vector(min, max, prng=DEFAULT_PRNG): + """ + Construct a random vector between min and max. + + Args + min: the minimum value for each component, (n, ) + max: the maximum value for each component, (n, ) + """ + min = np.array(min) + max = np.array(max) + assert min.shape == max.shape + assert len(min.shape) == 1 + return prng.uniform(min, max) + + +def rotation(angle): + """ + Construct a homogeneous 2D rotation matrix. + + Args + angle: the angle in radians + Returns + the rotation matrix as 3 by 3 numpy array + """ + return np.array([ + [np.cos(angle), -np.sin(angle), 0], + [np.sin(angle), np.cos(angle), 0], + [0, 0, 1] + ]) + + +def random_rotation(min, max, prng=DEFAULT_PRNG): + """ + Construct a random rotation between -max and max. + + Args + min: a scalar for the minimum absolute angle in radians + max: a scalar for the maximum absolute angle in radians + prng: the pseudo-random number generator to use. + Returns + a homogeneous 3 by 3 rotation matrix + """ + return rotation(prng.uniform(min, max)) + + +def translation(translation): + """ + Construct a homogeneous 2D translation matrix. + + Args: + translation: the translation 2D vector + + Returns: + the translation matrix as 3 by 3 numpy array + + """ + return np.array([ + [1, 0, translation[0]], + [0, 1, translation[1]], + [0, 0, 1] + ]) + + +def random_translation(min, max, prng=DEFAULT_PRNG): + """ + Construct a random 2D translation between min and max. + + Args + min: a 2D vector with the minimum translation for each dimension + max: a 2D vector with the maximum translation for each dimension + prng: the pseudo-random number generator to use. + Returns + a homogeneous 3 by 3 translation matrix + """ + return translation(_random_vector(min, max, prng)) + + +def shear(angle): + """ + Construct a homogeneous 2D shear matrix. + + Args + angle: the shear angle in radians + Returns + the shear matrix as 3 by 3 numpy array + """ + return np.array([ + [1, -np.sin(angle), 0], + [0, np.cos(angle), 0], + [0, 0, 1] + ]) + + +def random_shear(min, max, prng=DEFAULT_PRNG): + """ + Construct a random 2D shear matrix with shear angle between -max and max. + + Args + min: the minimum shear angle in radians. + max: the maximum shear angle in radians. + prng: the pseudo-random number generator to use. + Returns + a homogeneous 3 by 3 shear matrix + """ + return shear(prng.uniform(min, max)) + + +def scaling(factor): + """ + Construct a homogeneous 2D scaling matrix. + + Args + factor: a 2D vector for X and Y scaling + Returns + the zoom matrix as 3 by 3 numpy array + """ + + return np.array([ + [factor[0], 0, 0], + [0, factor[1], 0], + [0, 0, 1] + ]) + + +def random_scaling(min, max, prng=DEFAULT_PRNG): + """ + Construct a random 2D scale matrix between -max and max. + + Args + min: a 2D vector containing the minimum scaling factor for X and Y. + min: a 2D vector containing The maximum scaling factor for X and Y. + prng: the pseudo-random number generator to use. + Returns + a homogeneous 3 by 3 scaling matrix + """ + return scaling(_random_vector(min, max, prng)) + + +def random_flip(flip_x_chance, flip_y_chance, prng=DEFAULT_PRNG): + """ + Construct a transformation randomly containing X/Y flips (or not). + + Args + flip_x_chance: The chance that the result will contain a flip along the X axis. + flip_y_chance: The chance that the result will contain a flip along the Y axis. + prng: The pseudo-random number generator to use. + Returns + a homogeneous 3 by 3 transformation matrix + """ + flip_x = prng.uniform(0, 1) < flip_x_chance + flip_y = prng.uniform(0, 1) < flip_y_chance + # 1 - 2 * bool gives 1 for False and -1 for True. + return scaling((1 - 2 * flip_x, 1 - 2 * flip_y)) + + +def change_transform_origin(transform, center): + """ + Create a new transform representing the same transformation, only with the origin of the linear part changed. + + Args + transform: the transformation matrix + center: the new origin of the transformation + Returns + translate(center) * transform * translate(-center) + """ + center = np.array(center) + return np.linalg.multi_dot([translation(center), transform, translation(-center)]) + + +def random_transform( + min_rotation=0, + max_rotation=0, + min_translation=(0, 0), + max_translation=(0, 0), + min_shear=0, + max_shear=0, + min_scaling=(1, 1), + max_scaling=(1, 1), + flip_x_chance=0, + flip_y_chance=0, + prng=DEFAULT_PRNG +): + """ + Create a random transformation. + + The transformation consists of the following operations in this order (from left to right): + * rotation + * translation + * shear + * scaling + * flip x (if applied) + * flip y (if applied) + + Note that by default, the data generators in `keras_retinanet.preprocessing.generators` interpret the translation + as factor of the image size. So an X translation of 0.1 would translate the image by 10% of it's width. + Set `relative_translation` to `False` in the `TransformParameters` of a data generator to have it interpret + the translation directly as pixel distances instead. + + Args + min_rotation: The minimum rotation in radians for the transform as scalar. + max_rotation: The maximum rotation in radians for the transform as scalar. + min_translation: The minimum translation for the transform as 2D column vector. + max_translation: The maximum translation for the transform as 2D column vector. + min_shear: The minimum shear angle for the transform in radians. + max_shear: The maximum shear angle for the transform in radians. + min_scaling: The minimum scaling for the transform as 2D column vector. + max_scaling: The maximum scaling for the transform as 2D column vector. + flip_x_chance: The chance (0 to 1) that a transform will contain a flip along X direction. + flip_y_chance: The chance (0 to 1) that a transform will contain a flip along Y direction. + prng: The pseudo-random number generator to use. + """ + return np.linalg.multi_dot([ + random_rotation(min_rotation, max_rotation, prng), + random_translation(min_translation, max_translation, prng), + random_shear(min_shear, max_shear, prng), + random_scaling(min_scaling, max_scaling, prng), + random_flip(flip_x_chance, flip_y_chance, prng) + ]) + + +def random_transform_generator(prng=None, **kwargs): + """ + Create a random transform generator. + Uses a dedicated, newly created, properly seeded PRNG by default instead of the global DEFAULT_PRNG. + + The transformation consists of the following operations in this order (from left to right): + * rotation + * translation + * shear + * scaling + * flip x (if applied) + * flip y (if applied) + + Note that by default, the data generators in `keras_retinanet.preprocessing.generators` interpret the translation + as factor of the image size. So an X translation of 0.1 would translate the image by 10% of it's width. + Set `relative_translation` to `False` in the `TransformParameters` of a data generator to have it interpret + the translation directly as pixel distances instead. + + Args + min_rotation: The minimum rotation in radians for the transform as scalar. + max_rotation: The maximum rotation in radians for the transform as scalar. + min_translation: The minimum translation for the transform as 2D column vector. + max_translation: The maximum translation for the transform as 2D column vector. + min_shear: The minimum shear angle for the transform in radians. + max_shear: The maximum shear angle for the transform in radians. + min_scaling: The minimum scaling for the transform as 2D column vector. + max_scaling: The maximum scaling for the transform as 2D column vector. + flip_x_chance: The chance (0 to 1) that a transform will contain a flip along X direction. + flip_y_chance: The chance (0 to 1) that a transform will contain a flip along Y direction. + prng: The pseudo-random number generator to use. + """ + + if prng is None: + # RandomState automatically seeds using the best available method. + prng = np.random.RandomState() + + while True: + yield random_transform(prng=prng, **kwargs) + diff --git a/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /utils/visualization.py b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /utils/visualization.py new file mode 100644 index 0000000000000000000000000000000000000000..4743641e822579b32af7488452b5f757c4bda215 --- /dev/null +++ b/TensorFlow/contrib/cv/EfficientDet_ID0693_for_TensorFlow /utils/visualization.py @@ -0,0 +1,120 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from npu_bridge.npu_init import * + +import cv2 +import numpy as np + +from .colors import label_color + + +def draw_box(image, box, color, thickness=2): + """ Draws a box on an image with a given color. + + # Arguments + image : The image to draw on. + box : A list of 4 elements (x1, y1, x2, y2). + color : The color of the box. + thickness : The thickness of the lines to draw a box with. + """ + b = np.array(box).astype(np.int32) + cv2.rectangle(image, (b[0], b[1]), (b[2], b[3]), color, thickness, cv2.LINE_AA) + + +def draw_caption(image, box, caption): + """ Draws a caption above the box in an image. + + # Arguments + image : The image to draw on. + box : A list of 4 elements (x1, y1, x2, y2). + caption : String containing the text to draw. + """ + b = np.array(box).astype(int) + cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 0), 2) + cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1) + + +def draw_boxes(image, boxes, color, thickness=2): + """ Draws boxes on an image with a given color. + + # Arguments + image : The image to draw on. + boxes : A [N, 4] matrix (x1, y1, x2, y2). + color : The color of the boxes. + thickness : The thickness of the lines to draw boxes with. + """ + for b in boxes: + draw_box(image, b, color, thickness=thickness) + + +def draw_detections(image, boxes, scores, labels, colors, label_to_name=None, score_threshold=0.5): + """ Draws detections in an image. + + # Arguments + image : The image to draw on. + boxes : A [N, 4] matrix (x1, y1, x2, y2). + scores : A list of N classification scores. + labels : A list of N labels. + colors : The colors of the boxes. + label_to_name : (optional) Functor for mapping a label to a name. + score_threshold : Threshold used for determining what detections to draw. + """ + selection = np.where(scores > score_threshold)[0] + + for i in selection: + c = colors[int(labels[i])] + draw_box(image, boxes[i, :], color=c) + + # draw labels + caption = (label_to_name(labels[i]) if label_to_name else labels[i]) + ': {0:.2f}'.format(scores[i]) + draw_caption(image, boxes[i, :], caption) + + +def draw_annotations(image, annotations, color=(0, 255, 0), label_to_name=None): + """ Draws annotations in an image. + + # Arguments + image : The image to draw on. + annotations : A [N, 5] matrix (x1, y1, x2, y2, label) or dictionary containing bboxes (shaped [N, 4]) and labels (shaped [N]). + color : The color of the boxes. By default the color from keras_retinanet.utils.colors.label_color will be used. + label_to_name : (optional) Functor for mapping a label to a name. + """ + if isinstance(annotations, np.ndarray): + annotations = {'bboxes': annotations[:, :4], 'labels': annotations[:, 4]} + + assert('bboxes' in annotations) + assert('labels' in annotations) + assert(annotations['bboxes'].shape[0] == annotations['labels'].shape[0]) + + for i in range(annotations['bboxes'].shape[0]): + label = annotations['labels'][i] + c = color if color is not None else label_color(label) + caption = '{}'.format(label_to_name(label) if label_to_name else label) + draw_caption(image, annotations['bboxes'][i], caption) + draw_box(image, annotations['bboxes'][i], color=c) +