From d4a16a3a110c4bf51be62d68a6fca0c71480ca57 Mon Sep 17 00:00:00 2001 From: yixuanyuan Date: Tue, 13 Dec 2022 23:31:50 +0800 Subject: [PATCH] BiSeNet first commit --- TensorFlow/contrib/cv/BiSeNet/.gitignore | 0 .../contrib/cv/BiSeNet/Dataset/__init__.py | 14 + .../contrib/cv/BiSeNet/Dataset/dataset.py | 253 +++++++++ TensorFlow/contrib/cv/BiSeNet/LICENSE | 202 ++++++++ TensorFlow/contrib/cv/BiSeNet/README.md | 227 +++++++++ .../contrib/cv/BiSeNet/builders/__init__.py | 14 + .../cv/BiSeNet/builders/frontend_builder.py | 92 ++++ .../contrib/cv/BiSeNet/configuration.py | 119 +++++ .../contrib/cv/BiSeNet/frontends/__init__.py | 14 + .../cv/BiSeNet/frontends/conv_blocks.py | 358 +++++++++++++ .../contrib/cv/BiSeNet/frontends/densenet.py | 246 +++++++++ .../cv/BiSeNet/frontends/inception_utils.py | 78 +++ .../cv/BiSeNet/frontends/inception_v4.py | 354 +++++++++++++ .../cv/BiSeNet/frontends/mobilenet_base.py | 479 ++++++++++++++++++ .../cv/BiSeNet/frontends/mobilenet_v2.py | 205 ++++++++ .../cv/BiSeNet/frontends/resnet_utils.py | 254 ++++++++++ .../contrib/cv/BiSeNet/frontends/resnet_v1.py | 271 ++++++++++ .../contrib/cv/BiSeNet/frontends/resnet_v2.py | 329 ++++++++++++ .../cv/BiSeNet/frontends/se_resnext.py | 202 ++++++++ .../contrib/cv/BiSeNet/frontends/xception.py | 131 +++++ .../contrib/cv/BiSeNet/models/__init__.py | 14 + .../contrib/cv/BiSeNet/models/bisenet.py | 305 +++++++++++ .../contrib/cv/BiSeNet/test/train_full_1p.sh | 176 +++++++ .../cv/BiSeNet/test/train_performance_1p.sh | 176 +++++++ TensorFlow/contrib/cv/BiSeNet/test_npu.py | 65 +++ TensorFlow/contrib/cv/BiSeNet/train_npu.py | 238 +++++++++ .../contrib/cv/BiSeNet/utils/__init__.py | 14 + .../utils/get_pretrained_checkpoints.py | 67 +++ .../contrib/cv/BiSeNet/utils/misc_utils.py | 200 ++++++++ 29 files changed, 5097 insertions(+) create mode 100644 TensorFlow/contrib/cv/BiSeNet/.gitignore create mode 100644 TensorFlow/contrib/cv/BiSeNet/Dataset/__init__.py create mode 100644 TensorFlow/contrib/cv/BiSeNet/Dataset/dataset.py create mode 100644 TensorFlow/contrib/cv/BiSeNet/LICENSE create mode 100644 TensorFlow/contrib/cv/BiSeNet/README.md create mode 100644 TensorFlow/contrib/cv/BiSeNet/builders/__init__.py create mode 100644 TensorFlow/contrib/cv/BiSeNet/builders/frontend_builder.py create mode 100644 TensorFlow/contrib/cv/BiSeNet/configuration.py create mode 100644 TensorFlow/contrib/cv/BiSeNet/frontends/__init__.py create mode 100644 TensorFlow/contrib/cv/BiSeNet/frontends/conv_blocks.py create mode 100644 TensorFlow/contrib/cv/BiSeNet/frontends/densenet.py create mode 100644 TensorFlow/contrib/cv/BiSeNet/frontends/inception_utils.py create mode 100644 TensorFlow/contrib/cv/BiSeNet/frontends/inception_v4.py create mode 100644 TensorFlow/contrib/cv/BiSeNet/frontends/mobilenet_base.py create mode 100644 TensorFlow/contrib/cv/BiSeNet/frontends/mobilenet_v2.py create mode 100644 TensorFlow/contrib/cv/BiSeNet/frontends/resnet_utils.py create mode 100644 TensorFlow/contrib/cv/BiSeNet/frontends/resnet_v1.py create mode 100644 TensorFlow/contrib/cv/BiSeNet/frontends/resnet_v2.py create mode 100644 TensorFlow/contrib/cv/BiSeNet/frontends/se_resnext.py create mode 100644 TensorFlow/contrib/cv/BiSeNet/frontends/xception.py create mode 100644 TensorFlow/contrib/cv/BiSeNet/models/__init__.py create mode 100644 TensorFlow/contrib/cv/BiSeNet/models/bisenet.py create mode 100644 TensorFlow/contrib/cv/BiSeNet/test/train_full_1p.sh create mode 100644 TensorFlow/contrib/cv/BiSeNet/test/train_performance_1p.sh create mode 100644 TensorFlow/contrib/cv/BiSeNet/test_npu.py create mode 100644 TensorFlow/contrib/cv/BiSeNet/train_npu.py create mode 100644 TensorFlow/contrib/cv/BiSeNet/utils/__init__.py create mode 100644 TensorFlow/contrib/cv/BiSeNet/utils/get_pretrained_checkpoints.py create mode 100644 TensorFlow/contrib/cv/BiSeNet/utils/misc_utils.py diff --git a/TensorFlow/contrib/cv/BiSeNet/.gitignore b/TensorFlow/contrib/cv/BiSeNet/.gitignore new file mode 100644 index 000000000..e69de29bb diff --git a/TensorFlow/contrib/cv/BiSeNet/Dataset/__init__.py b/TensorFlow/contrib/cv/BiSeNet/Dataset/__init__.py new file mode 100644 index 000000000..6a1eaa12e --- /dev/null +++ b/TensorFlow/contrib/cv/BiSeNet/Dataset/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== \ No newline at end of file diff --git a/TensorFlow/contrib/cv/BiSeNet/Dataset/dataset.py b/TensorFlow/contrib/cv/BiSeNet/Dataset/dataset.py new file mode 100644 index 000000000..c805dd0fd --- /dev/null +++ b/TensorFlow/contrib/cv/BiSeNet/Dataset/dataset.py @@ -0,0 +1,253 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import os.path as osp +from utils.misc_utils import get, get_label_info +import logging +import tensorflow as tf +from tensorflow.python.ops import control_flow_ops +import random + + +def one_hot_it(label, label_values): + """ + Convert a segmentation image label array to one-hot format + by replacing each pixel value with a vector of length num_classes + + # Arguments + label: The 2D array segmentation image label + label_values + + # Returns + A 2D array with the same width and hieght as the input, but + with a depth size of num_classes + """ + semantic_map = [] + for colour in label_values: + # colour_map = np.full((label.shape[0], label.shape[1], label.shape[2]), colour, dtype=int) + equality = tf.equal(label, colour) + class_map = tf.reduce_all(equality, axis=-1) + semantic_map.append(class_map) + semantic_map = tf.stack(semantic_map, axis=-1) + + return semantic_map + + +def _apply_with_random_selector(x, func, num_cases, label): + sel = tf.random.uniform([], maxval=num_cases, dtype=tf.int32) + # Pass the real x only to one of the func calls. + return control_flow_ops.merge([ + func(control_flow_ops.switch(x, tf.equal(sel, case))[1], case) + for case in range(num_cases)])[0], label + + +def _distort_color(image, color_ordering=0, fast_mode=True, scope=None): + with tf.name_scope(scope, 'distort_color', [image]): + if fast_mode: + if color_ordering == 0: + image = tf.image.random_brightness(image, max_delta=32. / 255.) + image = tf.image.random_saturation(image, lower=0.5, upper=1.5) + else: + image = tf.image.random_saturation(image, lower=0.5, upper=1.5) + image = tf.image.random_brightness(image, max_delta=32. / 255.) + else: + if color_ordering == 0: + image = tf.image.random_brightness(image, max_delta=32. / 255.) + image = tf.image.random_saturation(image, lower=0.5, upper=1.5) + image = tf.image.random_hue(image, max_delta=0.2) + image = tf.image.random_contrast(image, lower=0.5, upper=1.5) + elif color_ordering == 1: + image = tf.image.random_saturation(image, lower=0.5, upper=1.5) + image = tf.image.random_brightness(image, max_delta=32. / 255.) + image = tf.image.random_contrast(image, lower=0.5, upper=1.5) + image = tf.image.random_hue(image, max_delta=0.2) + elif color_ordering == 2: + image = tf.image.random_contrast(image, lower=0.5, upper=1.5) + image = tf.image.random_hue(image, max_delta=0.2) + image = tf.image.random_brightness(image, max_delta=32. / 255.) + image = tf.image.random_saturation(image, lower=0.5, upper=1.5) + elif color_ordering == 3: + image = tf.image.random_hue(image, max_delta=0.2) + image = tf.image.random_saturation(image, lower=0.5, upper=1.5) + image = tf.image.random_contrast(image, lower=0.5, upper=1.5) + image = tf.image.random_brightness(image, max_delta=32. / 255.) + else: + raise ValueError('color_ordering must be in [0, 3]') + + # The random_* ops do not necessarily clamp. + return tf.clip_by_value(image, 0.0, 1.0) + + +def _parse_function(image_filename, label_filename, img_mean, class_dict): + img_contents = tf.read_file(image_filename) + label_contents = tf.read_file(label_filename) + + # Decode image & label + img = tf.image.decode_png(img_contents, channels=3) + img = tf.image.convert_image_dtype(img, dtype=tf.float32) + if img_mean is not None: + img -= img_mean/255 + + label = tf.image.decode_png(label_contents, channels=3) + _, label_values = get_label_info(class_dict) + label = one_hot_it(label, label_values) + label = tf.cast(label, dtype=tf.int32) + label = tf.argmax(label, axis=-1, output_type=tf.int32) + label = tf.expand_dims(label, axis=-1) + + return img, label + + +def _image_mirroring(img, label): + distort_left_right_random = tf.random.uniform([1], 0, 1.0, dtype=tf.float32)[0] + mirror = tf.less(tf.stack([1.0, distort_left_right_random, 1.0]), 0.5) + mirror = tf.boolean_mask([0, 1, 2], mirror) + img = tf.reverse(img, mirror) + label = tf.reverse(label, mirror) + + return img, label + + +def _image_scaling(img, label): + scale = tf.random.uniform([1], minval=0.5, maxval=2.0, dtype=tf.float32) + h_new = tf.cast(tf.multiply(tf.cast(tf.shape(img)[0], dtype=tf.float32), scale), dtype=tf.int32) + w_new = tf.cast(tf.multiply(tf.cast(tf.shape(img)[1], dtype=tf.float32), scale), dtype=tf.int32) + new_shape = tf.squeeze(tf.stack([h_new, w_new]), axis=[1]) + img = tf.image.resize_images(img, new_shape) + label = tf.image.resize_nearest_neighbor(tf.expand_dims(label, 0), new_shape) + label = tf.squeeze(label, axis=[0]) + + return img, label + + +def _random_crop_and_pad_image_and_labels(image, label, crop_h, crop_w): + label = tf.cast(label, dtype=tf.float32) + image_shape = tf.shape(image) + # TODO: only useful in camvid, to fix + pad_h = tf.maximum(crop_h, image_shape[0])-image_shape[0] + pad_w = tf.maximum(crop_w, image_shape[1])-image_shape[0] + image = tf.pad(image, [[0, pad_h], [0, pad_w], [0, 0]], constant_values=0) + label = tf.pad(label, [[0, pad_h], [0, pad_w], [0, 0]], constant_values=30) + + combined = tf.concat(axis=2, values=[image, label]) + # combined = tf.image.pad_to_bounding_box( + # combined, + # 0, + # 0, + # tf.maximum(crop_h, image_shape[0]), + # tf.maximum(crop_w, image_shape[1])) + + last_image_dim = tf.shape(image)[-1] + last_label_dim = tf.shape(label)[-1] + combined_crop = tf.random_crop(combined, [crop_h, crop_w, last_image_dim+last_label_dim]) + img_crop = combined_crop[:, :, :last_image_dim] + label_crop = combined_crop[:, :, last_image_dim:] + label_crop = tf.cast(label_crop, dtype=tf.int32) + + # Set static shape so that tensorflow knows shape at compile time. + img_crop.set_shape((crop_h, crop_w, 3)) + label_crop.set_shape((crop_h, crop_w, 1)) + # label_crop = tf.image.resize_nearest_neighbor(tf.expand_dims(label_crop, 0), [crop_h//8, crop_w//8]) + # label_crop = tf.squeeze(label_crop, axis=0) + + return img_crop, label_crop + + +def _check_size(image, label, crop_h, crop_w): + new_shape = tf.squeeze(tf.stack([[crop_h], [crop_w]]), axis=[1]) + image = tf.image.resize_images(image, new_shape) + label = tf.image.resize_nearest_neighbor(tf.expand_dims(label, 0), new_shape) + label = tf.squeeze(label, axis=[0]) + # Set static shape so that tensorflow knows shape at compile time. + image.set_shape((crop_h, crop_w, 3)) + label.set_shape((crop_h, crop_w, 1)) + label = tf.squeeze(label, axis=2) + return image, label + + +class DataLoader(object): + def __init__(self, config, Dataset='CamVid', class_dict='./CamVid/class_dict.csv'): + self.config = config + self.dataSet_dir = Dataset + self.class_dict = class_dict + self.dataset = None + self.iterator = None + self.build() + + def build(self): + self.prepare_data() + self.build_iterator() + + def prepare_data(self): + # Parameter prepare + dataset_dir = self.dataSet_dir + input_dir = self.config['input_dir'] + output_dir = self.config['output_dir'] + crop_h = self.config['crop_h'] + crop_w = self.config['crop_w'] + threads = self.config['prefetch_threads'] + img_mean = get(self.config, 'img_mean', None) + preprocess_name = get(self.config, 'preprocessing_name', None) + random_scale = get(self.config, 'random_scale', False) + random_mirror = get(self.config, 'random_mirror', True) + batch_size = get(self.config, 'batch_size', 8) + + input_names = [] + output_names = [] + for file in os.listdir(osp.join(dataset_dir, input_dir)): + input_names.append(osp.join(dataset_dir, input_dir) + "/" + file) + for file in os.listdir(osp.join(dataset_dir, output_dir)): + output_names.append(osp.join(dataset_dir, output_dir) + "/" + file) + + input_names.sort(), output_names.sort() + + dataset = tf.data.Dataset.from_tensor_slices((input_names, output_names)) + dataset = dataset.map(lambda x, y: _parse_function(x, y, img_mean, self.class_dict), num_parallel_calls=threads) + + logging.info('preproces -- {}'.format(preprocess_name)) + if preprocess_name == 'augment': + if random_mirror: + dataset = dataset.map(_image_mirroring, num_parallel_calls=threads) + if random_scale: + dataset = dataset.map(_image_scaling, num_parallel_calls=threads) + + dataset = dataset.map(lambda x, y: _random_crop_and_pad_image_and_labels(x, y, crop_h, crop_w), + num_parallel_calls=threads) + dataset = dataset.map(lambda image, label: _apply_with_random_selector(image, lambda x, ordering: _distort_color + (x, ordering, fast_mode=True), + num_cases=4, label=label)) + + dataset = dataset.map(lambda image, label: _check_size(image, label, crop_h, crop_w)) + dataset = dataset.shuffle(buffer_size=100) + dataset = dataset.batch(batch_size, drop_remainder=True) + # dataset = dataset.batch(batch_size) + dataset = dataset.repeat() + self.dataset = dataset + + def build_iterator(self): + # self.iterator = self.dataset.make_one_shot_iterator() + self.iterator = tf.compat.v1.data.make_one_shot_iterator(self.dataset) + + def get_one_batch(self): + return self.iterator.get_next() + + + + + diff --git a/TensorFlow/contrib/cv/BiSeNet/LICENSE b/TensorFlow/contrib/cv/BiSeNet/LICENSE new file mode 100644 index 000000000..57bc88a15 --- /dev/null +++ b/TensorFlow/contrib/cv/BiSeNet/LICENSE @@ -0,0 +1,202 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + diff --git a/TensorFlow/contrib/cv/BiSeNet/README.md b/TensorFlow/contrib/cv/BiSeNet/README.md new file mode 100644 index 000000000..74c6d3f9d --- /dev/null +++ b/TensorFlow/contrib/cv/BiSeNet/README.md @@ -0,0 +1,227 @@ +- [基本信息](#基本信息.md) +- [概述](#概述.md) +- [训练环境准备](#训练环境准备.md) +- [快速上手](#快速上手.md) +- [迁移学习指导](#迁移学习指导.md) +- [高级参考](#高级参考.md) +

基本信息

+ +**发布者(Publisher):Huawei** + +**应用领域(Application Domain):Semantic Segmentation** + +**版本(Version):1.1** + +**修改时间(Modified) :2022.12.13** + +**大小(Size):256KB** + +**框架(Framework):TensorFlow_1.15.0** + +**模型格式(Model Format):ckpt** + +**精度(Precision):Mixed** + +**处理器(Processor):昇腾910** + +**应用级别(Categories):Official** + +**描述(Description):基于TensorFlow框架的BiSeNet训练代码** + +

概述

+ +## 简述 + + BiSeNet是一种新的双向分割网络的Tensorflow 实现。用于实时性语义分割 + +- 参考论文: + + [BiSeNet: Bilateral Segmentation Network for Real-time Semantic Segmentation](https://arxiv.org/abs/1808.00897) + +- 参考实现: + + https://github.com/pdoublerainbow/bisenet-tensorflow + +- 适配昇腾 AI 处理器的实现: + + https://gitee.com/ascend/ModelZoo-TensorFlow/tree/master/TensorFlow/contrib/cv/BiSeNet + +- 通过Git获取对应commit\_id的代码方法如下: + + git clone {repository_url} # 克隆仓库的代码 + cd {repository_name} # 切换到模型的代码仓目录 + git checkout {branch} # 切换到对应分支 + git reset --hard {commit_id} # 代码设置到对应的commit_id + cd {code_path} # 切换到模型代码所在路径,若仓库下只有该模型,则无需切换 + + +## 默认配置 + +- 训练超参(单卡): + + - BATCH_SIZE = 8 + - LEARNING_RATE = 1.e-6 + - MOMENTUM = 0.05 + - RANDOM_SEED = 123 + - WEIGHT_DECAY = 0.0005 + - MAX_EPOCH = 2000 + + +## 支持特性 + +| 特性列表 | 是否支持 | +| ---------- | -------- | +| 混合精度 | 是 | + + +## 混合精度训练 + +昇腾910 AI处理器提供自动混合精度功能,可以针对全网中float32数据类型的算子,按照内置的优化策略,自动将部分float32的算子降低精度到float16,从而在精度损失很小的情况下提升系统性能并减少内存使用。 + +## 开启混合精度 + +脚本默认开启混合精度,代码如下: + +``` + config = tf.ConfigProto() + custom_op = config.graph_options.rewrite_options.custom_optimizers.add() + custom_op.name = "NpuOptimizer" + + custom_op.parameter_map["use_off_line"].b = True + custom_op.parameter_map["precision_mode"].s = tf.compat.as_bytes("allow_mix_precision") + + config.graph_options.rewrite_options.remapping = RewriterConfig.OFF # 必须显式关闭remap + config.graph_options.rewrite_options.memory_optimization = RewriterConfig.OFF + +``` + +

训练环境准备

+ +- 硬件环境和运行环境准备请参见《[CANN软件安装指南](https://support.huawei.com/enterprise/zh/ascend-computing/cann-pid-251168373?category=installation-update)》 +- 运行以下命令安装依赖。 +``` +pip3 install requirements.txt +``` +说明:依赖配置文件requirements.txt文件位于模型的根目录 + +

快速上手

+ +## 数据集准备 + +1、模型训练使用CamVid数据集,数据集请用户自行获取(下载链接http://mi.eng.cam.ac.uk/research/projects/VideoRec/CamVid/) + +2、将数据集分为421张train、112张val、168张test + +3、数据集处理后,放入模型目录下,在训练脚本中指定数据集路径,可正常使用 + +数据集目录示例 +``` +├── CamVid +│ ├── train +│ │ ├── xxx.png +│ │ ├── xxx.png +│ ├── train_labels +│ │ ├── xxx.png +│ │ ├── xxx.png +│ ├── val +│ │ ├── xxx.png +│ │ ├── xxx.png +│ ├── val_labels +│ │ ├── xxx.png +│ │ ├── xxx.png +│ ├── test +│ │ ├── xxx.png +│ │ ├── xxx.png +│ ├── test_labels +│ │ ├── xxx.png +│ │ ├── xxx.png +│ ├── class_dict.csv +``` + +4、BiSeNet训练的模型及数据集可以参考"简述 -> 参考实现" + + +## 模型训练 + +- 单击“立即下载”,并选择合适的下载方式下载源码包。 +- 开始训练。 + + - 启动训练之前,首先要配置程序运行相关环境变量。 + + 环境变量配置信息参见: + + [Ascend 910训练平台环境变量设置](https://gitee.com/ascend/ModelZoo-TensorFlow/wikis/01.%E8%AE%AD%E7%BB%83%E8%84%9A%E6%9C%AC%E8%BF%81%E7%A7%BB%E6%A1%88%E4%BE%8B/Ascend%20910%E8%AE%AD%E7%BB%83%E5%B9%B3%E5%8F%B0%E7%8E%AF%E5%A2%83%E5%8F%98%E9%87%8F%E8%AE%BE%E7%BD%AE) + + - 单卡训练 + + 1. 配置训练参数 + + 首先在脚本train_full_1p.sh中,配置训练数据集路径,请用户根据实际路径配置data_path,output_path,示例如下所示: + + ``` + # 路径参数初始化 + --data_path=${data_path} + --output_path=${output_path} + ``` + + 2. 启动训练(脚本为./test/train_full_1p.sh) + + ``` + bash train_full_1p.sh --data_path + ``` + + + +

高级参考

+ +## 脚本和示例代码 + +``` +├── builders // 前端预训练权重获取 +├── configuration.py // 模型超参数配置 +├── Dataset // 模型数据集处理 +├── frontends // 模型前端部分代码 +├── LICENSE +├── Logs // 权重文件默认生成目录 +├── models // 模型整体代码 +├── README.md +├── test +│   ├── train_full_1p.sh // 训练性能入口 +│   └── train_performance_1p.sh // 训练精度入口,包含准确率评估 +├── test_npu.py // 测试启动文件 +├── train_npu.py // 训练启动文件 +└── utils // 调用模块 +``` + +## 脚本参数 + +``` +--data_path +--output_path +--train_epochs +--batch_size +``` + +## 训练过程 + +通过“模型训练”中的训练指令启动单卡或者多卡训练。单卡和多卡通过运行不同脚本,支持单卡训练。模型存储路径为${cur_path}/output/$ASCEND_DEVICE_ID,包括训练的log以及checkpoints文件。以单卡训练为例,loss信息在文件${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log中。 + +## NPU/GPU 网络测试精度 +| | NPU | GPU | +| ----------- | ------------ | ------------ | +| mean IOU | 0.48 | 0.48 | +``` +测试时使用测试集,运行test_npu.py脚本 +``` + +## NPU/GPU 网络训练性能 + +| | NPU | GPU | +| ----------- | ------------ | ------------ | +| step time | 0.75s | 1.50s | +``` +其中GPU为v100 +``` +## 综合评价 +NPU上训练后的精度与GPU基本一致,但是达不到论文上的结果。 +NPU在训练性能上高于GPU。 \ No newline at end of file diff --git a/TensorFlow/contrib/cv/BiSeNet/builders/__init__.py b/TensorFlow/contrib/cv/BiSeNet/builders/__init__.py new file mode 100644 index 000000000..6a1eaa12e --- /dev/null +++ b/TensorFlow/contrib/cv/BiSeNet/builders/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== \ No newline at end of file diff --git a/TensorFlow/contrib/cv/BiSeNet/builders/frontend_builder.py b/TensorFlow/contrib/cv/BiSeNet/builders/frontend_builder.py new file mode 100644 index 000000000..e7b8639dd --- /dev/null +++ b/TensorFlow/contrib/cv/BiSeNet/builders/frontend_builder.py @@ -0,0 +1,92 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +from tensorflow.contrib import slim +from frontends import resnet_v2 +from frontends import mobilenet_v2 +from frontends import inception_v4 +from frontends import densenet +from frontends import xception +import os +import subprocess + + +def download_checkpoints(model_name): + subprocess.check_output(["python", "utils/get_pretrained_checkpoints.py", "--model=" + model_name]) + + +def build_frontend(inputs, frontend_config, is_training=True, reuse=False): + frontend = frontend_config['frontend'] + pretrained_dir = frontend_config['pretrained_dir'] + + if "ResNet50" == frontend and not os.path.isfile("pretrain/resnet_v2_50.ckpt"): + download_checkpoints("ResNet50") + if "ResNet101" == frontend and not os.path.isfile("pretrain/resnet_v2_101.ckpt"): + download_checkpoints("ResNet101") + if "ResNet152" == frontend and not os.path.isfile("pretrain/resnet_v2_152.ckpt"): + download_checkpoints("ResNet152") + if "MobileNetV2" == frontend and not os.path.isfile("pretrain/mobilenet_v2.ckpt.data-00000-of-00001"): + download_checkpoints("MobileNetV2") + if "InceptionV4" == frontend and not os.path.isfile("pretrain/inception_v4.ckpt"): + download_checkpoints("InceptionV4") + + if frontend == 'ResNet50': + with slim.arg_scope(resnet_v2.resnet_arg_scope()): + logits, end_points = resnet_v2.resnet_v2_50(inputs, is_training=is_training, scope='resnet_v2_50', reuse=reuse) + frontend_scope='resnet_v2_50' + init_fn = slim.assign_from_checkpoint_fn(model_path=os.path.join(pretrained_dir, 'resnet_v2_50.ckpt'), var_list=slim.get_model_variables('resnet_v2_50'), ignore_missing_vars=True) + elif frontend == 'ResNet101': + with slim.arg_scope(resnet_v2.resnet_arg_scope()): + logits, end_points = resnet_v2.resnet_v2_101(inputs, is_training=is_training, scope='resnet_v2_101', reuse=reuse) + frontend_scope='resnet_v2_101' + init_fn = slim.assign_from_checkpoint_fn(model_path=os.path.join(pretrained_dir, 'resnet_v2_101.ckpt'), var_list=slim.get_model_variables('resnet_v2_101'), ignore_missing_vars=True) + elif frontend == 'ResNet152': + with slim.arg_scope(resnet_v2.resnet_arg_scope()): + logits, end_points = resnet_v2.resnet_v2_152(inputs, is_training=is_training, scope='resnet_v2_152', reuse=reuse) + frontend_scope='resnet_v2_152' + init_fn = slim.assign_from_checkpoint_fn(model_path=os.path.join(pretrained_dir, 'resnet_v2_152.ckpt'), var_list=slim.get_model_variables('resnet_v2_152'), ignore_missing_vars=True) + elif frontend == 'MobileNetV2': + with slim.arg_scope(mobilenet_v2.training_scope()): + logits, end_points = mobilenet_v2.mobilenet(inputs, is_training=is_training, scope='mobilenet_v2', base_only=True, reuse=reuse) + frontend_scope='mobilenet_v2' + init_fn = slim.assign_from_checkpoint_fn(model_path=os.path.join(pretrained_dir, 'mobilenet_v2.ckpt'), var_list=slim.get_model_variables('mobilenet_v2'), ignore_missing_vars=True) + elif frontend == 'InceptionV4': + with slim.arg_scope(inception_v4.inception_v4_arg_scope()): + logits, end_points = inception_v4.inception_v4(inputs, is_training=is_training, scope='inception_v4', reuse=reuse) + frontend_scope='inception_v4' + init_fn = slim.assign_from_checkpoint_fn(model_path=os.path.join(pretrained_dir, 'inception_v4.ckpt'), var_list=slim.get_model_variables('inception_v4'), ignore_missing_vars=True) + elif frontend == 'DenseNet121': + with slim.arg_scope(densenet.densenet_arg_scope()): + logits, end_points = densenet.densenet121(inputs, is_training=is_training, scope='densenet121', reuse=reuse) + frontend_scope ='densenet121' + init_fn = slim.assign_from_checkpoint_fn(model_path=os.path.join(pretrained_dir, 'tf-densenet121/tf-densenet121.ckpt'), var_list=slim.get_model_variables('densenet121'), ignore_missing_vars=True) + elif frontend == 'DenseNet161': + with slim.arg_scope(densenet.densenet_arg_scope()): + logits, end_points = densenet.densenet121(inputs, is_training=is_training, scope='densenet161', reuse=reuse) + frontend_scope='densenet161' + init_fn = slim.assign_from_checkpoint_fn(model_path=os.path.join(pretrained_dir, 'tf-densenet161.ckpt'), var_list=slim.get_model_variables('densenet161'), ignore_missing_vars=True) + elif frontend == 'DenseNet169': + with slim.arg_scope(densenet.densenet_arg_scope()): + logits, end_points= densenet.densenet121(inputs, is_training=is_training, scope='densenet169', reuse=reuse) + frontend_scope='densenet169' + init_fn = slim.assign_from_checkpoint_fn(model_path=os.path.join(pretrained_dir, 'tf-densenet169.ckpt'), var_list=slim.get_model_variables('densenet169'), ignore_missing_vars=True) + elif frontend == 'Xception39': + with slim.arg_scope(xception.xception_arg_scope()): + logits, end_points = xception.xception39(inputs, is_training=is_training, scope='xception39', reuse=reuse) + frontend_scope='Xception39' + init_fn = None + else: + raise ValueError("Unsupported fronetnd model '%s'. This function only supports ResNet50, ResNet101, ResNet152, and MobileNetV2" % (frontend)) + + return logits, end_points, frontend_scope, init_fn \ No newline at end of file diff --git a/TensorFlow/contrib/cv/BiSeNet/configuration.py b/TensorFlow/contrib/cv/BiSeNet/configuration.py new file mode 100644 index 000000000..bc51e80f8 --- /dev/null +++ b/TensorFlow/contrib/cv/BiSeNet/configuration.py @@ -0,0 +1,119 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Default configurations of model specification, training and tracking + +For most of the time, DO NOT modify the configurations within this file. +Use the configurations here as the default configurations and only update +them following the examples in the `experiments` directory. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os.path as osp +import tensorflow as tf + +LOG_DIR = 'Logs/bisenet' # where checkpoints, logs are saved +RUN_NAME = 'bisenet-v2' # identifier of the experiment + +MODEL_CONFIG = { + + 'frontend_config': {'frontend': 'Xception39', + 'pretrained_dir': 'pretrain', # path of the pretrained frontend model. + 'train_frontend': True, + 'use_bn': True, + 'bn_scale': True, + 'bn_momentum': 0.05, + 'bn_epsilon': 1e-6, + 'weight_decay': 5e-4, + 'stride': 8, }, + 'conv_config': {"init_method": "kaiming_normal", + }, + 'batch_norm_params': {"scale": True, + # Decay for the moving averages. + "decay": 0.9, + # Epsilon to prevent 0s in variance. + "epsilon": 1e-5, + 'updates_collections': tf.compat.v1.GraphKeys.UPDATE_OPS, # Ensure that updates are done within a frame + }, + +} + +TRAIN_CONFIG = { + 'DataSet': 'CamVid', + 'class_dict': './CamVid/class_dict.csv', + 'train_dir': osp.join(LOG_DIR, 'checkpoints', RUN_NAME), + + 'seed': 123, # fix seed for reproducing experiments + + 'train_data_config': {'preprocessing_name': 'augment', + 'input_dir': 'train', + 'output_dir': 'train_labels', + 'crop_h': 800, + 'crop_w': 800, + 'random_scale': True, + 'random_mirror': True, + 'num_examples_per_epoch': 421, + 'epoch': 2000, + 'batch_size': 8, + 'prefetch_threads': 8, }, + + 'validation_data_config': {'preprocessing_name': 'None', + 'input_dir': 'val', + 'output_dir': 'val_labels', + 'crop_h': 736, + 'crop_w': 960, + 'batch_size': 2, + 'prefetch_threads': 4, }, + + 'test_data_config': {'preprocessing_name': 'None', + 'input_dir': 'test', + 'output_dir': 'test_labels', + 'crop_h': 736, + 'crop_w': 960, + 'num_examples_per_epoch': 421, + 'batch_size': 8, + 'prefetch_threads': 4, + 'test_dir': osp.join(LOG_DIR, 'checkpoints', RUN_NAME+'test')}, + + + # Optimizer for training the model. + 'optimizer_config': {'optimizer': 'MOMENTUM', # SGD, RMSProp and MOMENTUM are supported + 'momentum': 0.9, + 'use_nesterov': False, + 'decay': 0.9, }, # Discounting factor for history gradient(useful in RMSProp Mode) + + # Learning rate configs + 'lr_config': {'policy': 'polynomial', # piecewise_constant, exponential, polynomial and cosine + 'initial_lr': 0.01, + 'power': 0.9, # Only useful in polynomial + 'num_epochs_per_decay': 1, + 'lr_decay_factor': 0.8685113737513527, + 'staircase': True, }, + + # If not None, clip gradients to this value. + 'clip_gradients': None, + + # Frequency at which loss and global step are logged + 'log_every_n_steps': 10, + + # Frequency to save model + 'save_model_every_n_step': 421, # save model every epoch + + # How many model checkpoints to keep. No limit if None. + 'max_checkpoints_to_keep': 20, +} + diff --git a/TensorFlow/contrib/cv/BiSeNet/frontends/__init__.py b/TensorFlow/contrib/cv/BiSeNet/frontends/__init__.py new file mode 100644 index 000000000..6a1eaa12e --- /dev/null +++ b/TensorFlow/contrib/cv/BiSeNet/frontends/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== \ No newline at end of file diff --git a/TensorFlow/contrib/cv/BiSeNet/frontends/conv_blocks.py b/TensorFlow/contrib/cv/BiSeNet/frontends/conv_blocks.py new file mode 100644 index 000000000..ac4be85f8 --- /dev/null +++ b/TensorFlow/contrib/cv/BiSeNet/frontends/conv_blocks.py @@ -0,0 +1,358 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Convolution blocks for mobilenet.""" +import contextlib +import functools + +import tensorflow as tf + +slim = tf.contrib.slim + + +def _fixed_padding(inputs, kernel_size, rate=1): + """Pads the input along the spatial dimensions independently of input size. + + Pads the input such that if it was used in a convolution with 'VALID' padding, + the output would have the same dimensions as if the unpadded input was used + in a convolution with 'SAME' padding. + + Args: + inputs: A tensor of size [batch, height_in, width_in, channels]. + kernel_size: The kernel to be used in the conv2d or max_pool2d operation. + rate: An integer, rate for atrous convolution. + + Returns: + output: A tensor of size [batch, height_out, width_out, channels] with the + input, either intact (if kernel_size == 1) or padded (if kernel_size > 1). + """ + kernel_size_effective = [kernel_size[0] + (kernel_size[0] - 1) * (rate - 1), + kernel_size[0] + (kernel_size[0] - 1) * (rate - 1)] + pad_total = [kernel_size_effective[0] - 1, kernel_size_effective[1] - 1] + pad_beg = [pad_total[0] // 2, pad_total[1] // 2] + pad_end = [pad_total[0] - pad_beg[0], pad_total[1] - pad_beg[1]] + padded_inputs = tf.pad(inputs, [[0, 0], [pad_beg[0], pad_end[0]], + [pad_beg[1], pad_end[1]], [0, 0]]) + return padded_inputs + + +def _make_divisible(v, divisor, min_value=None): + if min_value is None: + min_value = divisor + new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) + # Make sure that round down does not go down by more than 10%. + if new_v < 0.9 * v: + new_v += divisor + return new_v + + +def _split_divisible(num, num_ways, divisible_by=8): + """Evenly splits num, num_ways so each piece is a multiple of divisible_by.""" + assert num % divisible_by == 0 + assert num / num_ways >= divisible_by + # Note: want to round down, we adjust each split to match the total. + base = num // num_ways // divisible_by * divisible_by + result = [] + accumulated = 0 + for i in range(num_ways): + r = base + while accumulated + r < num * (i + 1) / num_ways: + r += divisible_by + result.append(r) + accumulated += r + assert accumulated == num + return result + + +@contextlib.contextmanager +def _v1_compatible_scope_naming(scope): + if scope is None: # Create uniqified separable blocks. + with tf.compat.v1.variable_scope(None, default_name='separable') as s, \ + tf.name_scope(s.original_name_scope): + yield '' + else: + # We use scope_depthwise, scope_pointwise for compatibility with V1 ckpts. + # which provide numbered scopes. + scope += '_' + yield scope + + +@slim.add_arg_scope +def split_separable_conv2d(input_tensor, + num_outputs, + scope=None, + normalizer_fn=None, + stride=1, + rate=1, + endpoints=None, + use_explicit_padding=False): + """Separable mobilenet V1 style convolution. + + Depthwise convolution, with default non-linearity, + followed by 1x1 depthwise convolution. This is similar to + slim.separable_conv2d, but differs in tha it applies batch + normalization and non-linearity to depthwise. This matches + the basic building of Mobilenet Paper + (https://arxiv.org/abs/1704.04861) + + Args: + input_tensor: input + num_outputs: number of outputs + scope: optional name of the scope. Note if provided it will use + scope_depthwise for deptwhise, and scope_pointwise for pointwise. + normalizer_fn: which normalizer function to use for depthwise/pointwise + stride: stride + rate: output rate (also known as dilation rate) + endpoints: optional, if provided, will export additional tensors to it. + use_explicit_padding: Use 'VALID' padding for convolutions, but prepad + inputs so that the output dimensions are the same as if 'SAME' padding + were used. + + Returns: + output tesnor + """ + + with _v1_compatible_scope_naming(scope) as scope: + dw_scope = scope + 'depthwise' + endpoints = endpoints if endpoints is not None else {} + kernel_size = [3, 3] + padding = 'SAME' + if use_explicit_padding: + padding = 'VALID' + input_tensor = _fixed_padding(input_tensor, kernel_size, rate) + net = slim.separable_conv2d( + input_tensor, + None, + kernel_size, + depth_multiplier=1, + stride=stride, + rate=rate, + normalizer_fn=normalizer_fn, + padding=padding, + scope=dw_scope) + + endpoints[dw_scope] = net + + pw_scope = scope + 'pointwise' + net = slim.conv2d( + net, + num_outputs, [1, 1], + stride=1, + normalizer_fn=normalizer_fn, + scope=pw_scope) + endpoints[pw_scope] = net + return net + + +def expand_input_by_factor(n, divisible_by=8): + return lambda num_inputs, **_: _make_divisible(num_inputs * n, divisible_by) + + +@slim.add_arg_scope +def expanded_conv(input_tensor, + num_outputs, + expansion_size=expand_input_by_factor(6), + stride=1, + rate=1, + kernel_size=(3, 3), + residual=True, + normalizer_fn=None, + project_activation_fn=tf.identity, + split_projection=1, + split_expansion=1, + expansion_transform=None, + depthwise_location='expansion', + depthwise_channel_multiplier=1, + endpoints=None, + use_explicit_padding=False, + padding='SAME', + scope=None): + """Depthwise Convolution Block with expansion. + + Builds a composite convolution that has the following structure + expansion (1x1) -> depthwise (kernel_size) -> projection (1x1) + + Args: + input_tensor: input + num_outputs: number of outputs in the final layer. + expansion_size: the size of expansion, could be a constant or a callable. + If latter it will be provided 'num_inputs' as an input. For forward + compatibility it should accept arbitrary keyword arguments. + Default will expand the input by factor of 6. + stride: depthwise stride + rate: depthwise rate + kernel_size: depthwise kernel + residual: whether to include residual connection between input + and output. + normalizer_fn: batchnorm or otherwise + project_activation_fn: activation function for the project layer + split_projection: how many ways to split projection operator + (that is conv expansion->bottleneck) + split_expansion: how many ways to split expansion op + (that is conv bottleneck->expansion) ops will keep depth divisible + by this value. + expansion_transform: Optional function that takes expansion + as a single input and returns output. + depthwise_location: where to put depthwise covnvolutions supported + values None, 'input', 'output', 'expansion' + depthwise_channel_multiplier: depthwise channel multiplier: + each input will replicated (with different filters) + that many times. So if input had c channels, + output will have c x depthwise_channel_multpilier. + endpoints: An optional dictionary into which intermediate endpoints are + placed. The keys "expansion_output", "depthwise_output", + "projection_output" and "expansion_transform" are always populated, even + if the corresponding functions are not invoked. + use_explicit_padding: Use 'VALID' padding for convolutions, but prepad + inputs so that the output dimensions are the same as if 'SAME' padding + were used. + padding: Padding type to use if `use_explicit_padding` is not set. + scope: optional scope. + + Returns: + Tensor of depth num_outputs + + Raises: + TypeError: on inval + """ + with tf.compat.v1.variable_scope(scope, default_name='expanded_conv') as s, \ + tf.name_scope(s.original_name_scope): + prev_depth = input_tensor.get_shape().as_list()[3] + if depthwise_location not in [None, 'input', 'output', 'expansion']: + raise TypeError('%r is unknown value for depthwise_location' % + depthwise_location) + if use_explicit_padding: + if padding != 'SAME': + raise TypeError('`use_explicit_padding` should only be used with ' + '"SAME" padding.') + padding = 'VALID' + depthwise_func = functools.partial( + slim.separable_conv2d, + num_outputs=None, + kernel_size=kernel_size, + depth_multiplier=depthwise_channel_multiplier, + stride=stride, + rate=rate, + normalizer_fn=normalizer_fn, + padding=padding, + scope='depthwise') + # b1 -> b2 * r -> b2 + # i -> (o * r) (bottleneck) -> o + input_tensor = tf.identity(input_tensor, 'input') + net = input_tensor + + if depthwise_location == 'input': + if use_explicit_padding: + net = _fixed_padding(net, kernel_size, rate) + net = depthwise_func(net, activation_fn=None) + + if callable(expansion_size): + inner_size = expansion_size(num_inputs=prev_depth) + else: + inner_size = expansion_size + + if inner_size > net.shape[3]: + net = split_conv( + net, + inner_size, + num_ways=split_expansion, + scope='expand', + stride=1, + normalizer_fn=normalizer_fn) + net = tf.identity(net, 'expansion_output') + if endpoints is not None: + endpoints['expansion_output'] = net + + if depthwise_location == 'expansion': + if use_explicit_padding: + net = _fixed_padding(net, kernel_size, rate) + net = depthwise_func(net) + + net = tf.identity(net, name='depthwise_output') + if endpoints is not None: + endpoints['depthwise_output'] = net + if expansion_transform: + net = expansion_transform(expansion_tensor=net, input_tensor=input_tensor) + # Note in contrast with expansion, we always have + # projection to produce the desired output size. + net = split_conv( + net, + num_outputs, + num_ways=split_projection, + stride=1, + scope='project', + normalizer_fn=normalizer_fn, + activation_fn=project_activation_fn) + if endpoints is not None: + endpoints['projection_output'] = net + if depthwise_location == 'output': + if use_explicit_padding: + net = _fixed_padding(net, kernel_size, rate) + net = depthwise_func(net, activation_fn=None) + + if callable(residual): # custom residual + net = residual(input_tensor=input_tensor, output_tensor=net) + elif (residual and + # stride check enforces that we don't add residuals when spatial + # dimensions are None + stride == 1 and + # Depth matches + net.get_shape().as_list()[3] == + input_tensor.get_shape().as_list()[3]): + net += input_tensor + return tf.identity(net, name='output') + + +def split_conv(input_tensor, + num_outputs, + num_ways, + scope, + divisible_by=8, + **kwargs): + """Creates a split convolution. + + Split convolution splits the input and output into + 'num_blocks' blocks of approximately the same size each, + and only connects $i$-th input to $i$ output. + + Args: + input_tensor: input tensor + num_outputs: number of output filters + num_ways: num blocks to split by. + scope: scope for all the operators. + divisible_by: make sure that every part is divisiable by this. + **kwargs: will be passed directly into conv2d operator + Returns: + tensor + """ + b = input_tensor.get_shape().as_list()[3] + + if num_ways == 1 or min(b // num_ways, + num_outputs // num_ways) < divisible_by: + # Don't do any splitting if we end up with less than 8 filters + # on either side. + return slim.conv2d(input_tensor, num_outputs, [1, 1], scope=scope, **kwargs) + + outs = [] + input_splits = _split_divisible(b, num_ways, divisible_by=divisible_by) + output_splits = _split_divisible( + num_outputs, num_ways, divisible_by=divisible_by) + inputs = tf.split(input_tensor, input_splits, axis=3, name='split_' + scope) + base = scope + for i, (input_tensor, out_size) in enumerate(zip(inputs, output_splits)): + scope = base + '_part_%d' % (i,) + n = slim.conv2d(input_tensor, out_size, [1, 1], scope=scope, **kwargs) + n = tf.identity(n, scope + '_output') + outs.append(n) + return tf.concat(outs, 3, name=scope + '_concat') \ No newline at end of file diff --git a/TensorFlow/contrib/cv/BiSeNet/frontends/densenet.py b/TensorFlow/contrib/cv/BiSeNet/frontends/densenet.py new file mode 100644 index 000000000..bc9d97d47 --- /dev/null +++ b/TensorFlow/contrib/cv/BiSeNet/frontends/densenet.py @@ -0,0 +1,246 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Contains the definition of the DenseNet architecture. + +As described in https://arxiv.org/abs/1608.06993. + + Densely Connected Convolutional Networks + Gao Huang, Zhuang Liu, Kilian Q. Weinberger, Laurens van der Maaten +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf + +slim = tf.contrib.slim + +from npu_bridge.npu_init import * + +@slim.add_arg_scope +def _global_avg_pool2d(inputs, data_format='NHWC', scope=None, outputs_collections=None): + with tf.variable_scope(scope, 'xx', [inputs]) as sc: + axis = [1, 2] if data_format == 'NHWC' else [2, 3] + net = tf.reduce_mean(inputs, axis=axis, keep_dims=True) + net = slim.utils.collect_named_outputs(outputs_collections, sc.name, net) + return net + + +@slim.add_arg_scope +def _conv(inputs, num_filters, kernel_size, stride=1, dropout_rate=None, + scope=None, outputs_collections=None, dilation_rate=1): + with tf.variable_scope(scope, 'xx', [inputs]) as sc: + net = slim.batch_norm(inputs) + net = tf.nn.relu(net) + net = slim.conv2d(net, num_filters, kernel_size, rate=dilation_rate) + + if dropout_rate: + net = npu_ops.dropout(net) + + net = slim.utils.collect_named_outputs(outputs_collections, sc.name, net) + + return net + + +@slim.add_arg_scope +def _conv_block(inputs, num_filters, data_format='NHWC', scope=None, outputs_collections=None, dilation_rate=1): + with tf.variable_scope(scope, 'conv_blockx', [inputs]) as sc: + net = inputs + net = _conv(net, num_filters*4, 1, scope='x1') + net = _conv(net, num_filters, 3, scope='x2', dilation_rate=dilation_rate) + if data_format == 'NHWC': + net = tf.concat([inputs, net], axis=3) + else: # "NCHW" + net = tf.concat([inputs, net], axis=1) + + net = slim.utils.collect_named_outputs(outputs_collections, sc.name, net) + + return net + + +@slim.add_arg_scope +def _dense_block(inputs, num_layers, num_filters, growth_rate, + grow_num_filters=True, scope=None, outputs_collections=None, dilation_rate=1): + + with tf.variable_scope(scope, 'dense_blockx', [inputs]) as sc: + net = inputs + for i in range(num_layers): + branch = i + 1 + net = _conv_block(net, growth_rate, scope='conv_block'+str(branch), dilation_rate=dilation_rate) + + if grow_num_filters: + num_filters += growth_rate + + net = slim.utils.collect_named_outputs(outputs_collections, sc.name, net) + + return net, num_filters + + +@slim.add_arg_scope +def _transition_block(inputs, num_filters, compression=1.0, + scope=None, outputs_collections=None, stride=2): + + num_filters = int(num_filters * compression) + with tf.variable_scope(scope, 'transition_blockx', [inputs]) as sc: + net = inputs + net = _conv(net, num_filters, 1, scope='blk') + if stride == 2: + net = slim.avg_pool2d(net, 2) + + net = slim.utils.collect_named_outputs(outputs_collections, sc.name, net) + + return net, num_filters + + +def densenet(inputs, + num_classes=None, + reduction=None, + growth_rate=None, + num_filters=None, + num_layers=None, + dropout_rate=None, + data_format='NHWC', + is_training=True, + reuse=None, + scope=None): + assert reduction is not None + assert growth_rate is not None + assert num_filters is not None + assert num_layers is not None + + compression = 1.0 - reduction + num_dense_blocks = len(num_layers) + + + if data_format == 'NCHW': + inputs = tf.transpose(inputs, [0, 3, 1, 2]) + + with tf.variable_scope(scope, 'densenetxxx', [inputs], + reuse=reuse) as sc: + end_points_collection = sc.name + '_end_points' + with slim.arg_scope([slim.batch_norm, slim.dropout], + is_training=is_training), \ + slim.arg_scope([slim.conv2d, _conv, _conv_block, + _dense_block, _transition_block], + outputs_collections=end_points_collection), \ + slim.arg_scope([_conv], dropout_rate=dropout_rate): + net = inputs + + # initial convolution + net = slim.conv2d(net, num_filters, 7, stride=2, scope='conv1') + net = slim.batch_norm(net) + net = tf.nn.relu(net) + net = slim.max_pool2d(net, 3, stride=2, padding='SAME') + + dilation = [1, 1, 2, 4] + transition_stride = [2, 2, 1, 1] + # blocks + for i in range(num_dense_blocks): + # dense blocks + net, num_filters = _dense_block(net, num_layers[i], num_filters, + growth_rate, + scope='dense_block' + str(i+1), dilation_rate=dilation[i]) + + # Add transition_block + net, num_filters = _transition_block(net, num_filters, + compression=compression, + scope='transition_block' + str(i+1), stride=transition_stride[i]) + + # net, num_filters = _dense_block( + # net, num_layers[-1], num_filters, + # growth_rate, + # scope='dense_block' + str(num_dense_blocks)) + + # final blocks + # with tf.variable_scope('final_block', [inputs]): + net = slim.batch_norm(net, scope='postnorm') + # net = tf.nn.relu(net) + # net = _global_avg_pool2d(net, scope='global_avg_pool') + + # net = slim.conv2d(net, num_classes, 1, + # biases_initializer=tf.zeros_initializer(), + # scope='logits') + + end_points = slim.utils.convert_collection_to_dict( + end_points_collection) + + # if num_classes is not None: + # end_points['pool'] = end_points + + return net, end_points + + +def densenet121(inputs, num_classes=None, data_format='NHWC', is_training=True, reuse=None, scope='densenet121'): + return densenet(inputs, + num_classes=num_classes, + reduction=0.5, + growth_rate=32, + num_filters=64, + num_layers=[6,12,24,16], + data_format=data_format, + is_training=is_training, + reuse=reuse, + scope=scope) +densenet121.default_image_size = 224 + + +def densenet161(inputs, num_classes=None, data_format='NHWC', is_training=True, reuse=None, scope='densenet161'): + return densenet(inputs, + num_classes=num_classes, + reduction=0.5, + growth_rate=48, + num_filters=96, + num_layers=[6,12,36,24], + data_format=data_format, + is_training=is_training, + reuse=reuse, + scope=scope) +densenet161.default_image_size = 224 + + +def densenet169(inputs, num_classes=None, data_format='NHWC', is_training=True, reuse=None, scope='densenet169'): + return densenet(inputs, + num_classes=num_classes, + reduction=0.5, + growth_rate=32, + num_filters=64, + num_layers=[6,12,32,32], + data_format=data_format, + is_training=is_training, + reuse=reuse, + scope=scope) +densenet169.default_image_size = 224 + + +def densenet_arg_scope(weight_decay=1e-4, + batch_norm_decay=0.99, + batch_norm_epsilon=1.1e-5, + data_format='NHWC'): + with slim.arg_scope([slim.conv2d, slim.batch_norm, slim.avg_pool2d, slim.max_pool2d, + _conv_block, _global_avg_pool2d], + data_format=data_format): + with slim.arg_scope([slim.conv2d], + weights_regularizer=slim.l2_regularizer(weight_decay), + activation_fn=None, + biases_initializer=None): + with slim.arg_scope([slim.batch_norm], + scale=True, + decay=batch_norm_decay, + epsilon=batch_norm_epsilon, + updates_collections=None, + fused=True) as scope: + return scope + + diff --git a/TensorFlow/contrib/cv/BiSeNet/frontends/inception_utils.py b/TensorFlow/contrib/cv/BiSeNet/frontends/inception_utils.py new file mode 100644 index 000000000..81020e95e --- /dev/null +++ b/TensorFlow/contrib/cv/BiSeNet/frontends/inception_utils.py @@ -0,0 +1,78 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Contains common code shared by all inception models. + +Usage of arg scope: + with slim.arg_scope(inception_arg_scope()): + logits, end_points = inception.inception_v3(images, num_classes, + is_training=is_training) + +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf + +slim = tf.contrib.slim + + +def inception_arg_scope(weight_decay=0.00004, + use_batch_norm=True, + batch_norm_decay=0.9997, + batch_norm_epsilon=0.001, + activation_fn=tf.nn.relu, + batch_norm_updates_collections=tf.compat.v1.GraphKeys.UPDATE_OPS): + """Defines the default arg scope for inception models. + + Args: + weight_decay: The weight decay to use for regularizing the model. + use_batch_norm: "If `True`, batch_norm is applied after each convolution. + batch_norm_decay: Decay for batch norm moving average. + batch_norm_epsilon: Small float added to variance to avoid dividing by zero + in batch norm. + activation_fn: Activation function for conv2d. + batch_norm_updates_collections: Collection for the update ops for + batch norm. + + Returns: + An `arg_scope` to use for the inception models. + """ + batch_norm_params = { + # Decay for the moving averages. + 'decay': batch_norm_decay, + # epsilon to prevent 0s in variance. + 'epsilon': batch_norm_epsilon, + # collection containing update_ops. + 'updates_collections': batch_norm_updates_collections, + # use fused batch norm if possible. + 'fused': None, + } + if use_batch_norm: + normalizer_fn = slim.batch_norm + normalizer_params = batch_norm_params + else: + normalizer_fn = None + normalizer_params = {} + # Set weight_decay for weights in Conv and FC layers. + with slim.arg_scope([slim.conv2d, slim.fully_connected], + weights_regularizer=slim.l2_regularizer(weight_decay)): + with slim.arg_scope( + [slim.conv2d], + weights_initializer=slim.variance_scaling_initializer(), + activation_fn=activation_fn, + normalizer_fn=normalizer_fn, + normalizer_params=normalizer_params) as sc: + return sc \ No newline at end of file diff --git a/TensorFlow/contrib/cv/BiSeNet/frontends/inception_v4.py b/TensorFlow/contrib/cv/BiSeNet/frontends/inception_v4.py new file mode 100644 index 000000000..b3beb0340 --- /dev/null +++ b/TensorFlow/contrib/cv/BiSeNet/frontends/inception_v4.py @@ -0,0 +1,354 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Contains the definition of the Inception V4 architecture. + +As described in http://arxiv.org/abs/1602.07261. + + Inception-v4, Inception-ResNet and the Impact of Residual Connections + on Learning + Christian Szegedy, Sergey Ioffe, Vincent Vanhoucke, Alex Alemi +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf + +from frontends import inception_utils + +slim = tf.contrib.slim + + +def block_inception_a(inputs, scope=None, reuse=None): + """Builds Inception-A block for Inception v4 network.""" + # By default use stride=1 and SAME padding + with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d], + stride=1, padding='SAME'): + with tf.variable_scope(scope, 'BlockInceptionA', [inputs], reuse=reuse): + with tf.variable_scope('Branch_0'): + branch_0 = slim.conv2d(inputs, 96, [1, 1], scope='Conv2d_0a_1x1') + with tf.variable_scope('Branch_1'): + branch_1 = slim.conv2d(inputs, 64, [1, 1], scope='Conv2d_0a_1x1') + branch_1 = slim.conv2d(branch_1, 96, [3, 3], scope='Conv2d_0b_3x3') + with tf.variable_scope('Branch_2'): + branch_2 = slim.conv2d(inputs, 64, [1, 1], scope='Conv2d_0a_1x1') + branch_2 = slim.conv2d(branch_2, 96, [3, 3], scope='Conv2d_0b_3x3') + branch_2 = slim.conv2d(branch_2, 96, [3, 3], scope='Conv2d_0c_3x3') + with tf.variable_scope('Branch_3'): + branch_3 = slim.avg_pool2d(inputs, [3, 3], scope='AvgPool_0a_3x3') + branch_3 = slim.conv2d(branch_3, 96, [1, 1], scope='Conv2d_0b_1x1') + return tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3]) + + +def block_reduction_a(inputs, scope=None, reuse=None): + """Builds Reduction-A block for Inception v4 network.""" + # By default use stride=1 and SAME padding + with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d], + stride=1, padding='SAME'): + with tf.variable_scope(scope, 'BlockReductionA', [inputs], reuse=reuse): + with tf.variable_scope('Branch_0'): + branch_0 = slim.conv2d(inputs, 384, [3, 3], stride=2, padding='VALID', + scope='Conv2d_1a_3x3') + with tf.variable_scope('Branch_1'): + branch_1 = slim.conv2d(inputs, 192, [1, 1], scope='Conv2d_0a_1x1') + branch_1 = slim.conv2d(branch_1, 224, [3, 3], scope='Conv2d_0b_3x3') + branch_1 = slim.conv2d(branch_1, 256, [3, 3], stride=2, + padding='VALID', scope='Conv2d_1a_3x3') + with tf.variable_scope('Branch_2'): + branch_2 = slim.max_pool2d(inputs, [3, 3], stride=2, padding='VALID', + scope='MaxPool_1a_3x3') + return tf.concat(axis=3, values=[branch_0, branch_1, branch_2]) + + +def block_inception_b(inputs, scope=None, reuse=None): + """Builds Inception-B block for Inception v4 network.""" + # By default use stride=1 and SAME padding + with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d], + stride=1, padding='SAME'): + with tf.variable_scope(scope, 'BlockInceptionB', [inputs], reuse=reuse): + with tf.variable_scope('Branch_0'): + branch_0 = slim.conv2d(inputs, 384, [1, 1], scope='Conv2d_0a_1x1') + with tf.variable_scope('Branch_1'): + branch_1 = slim.conv2d(inputs, 192, [1, 1], scope='Conv2d_0a_1x1') + branch_1 = slim.conv2d(branch_1, 224, [1, 7], scope='Conv2d_0b_1x7') + branch_1 = slim.conv2d(branch_1, 256, [7, 1], scope='Conv2d_0c_7x1') + with tf.variable_scope('Branch_2'): + branch_2 = slim.conv2d(inputs, 192, [1, 1], scope='Conv2d_0a_1x1') + branch_2 = slim.conv2d(branch_2, 192, [7, 1], scope='Conv2d_0b_7x1') + branch_2 = slim.conv2d(branch_2, 224, [1, 7], scope='Conv2d_0c_1x7') + branch_2 = slim.conv2d(branch_2, 224, [7, 1], scope='Conv2d_0d_7x1') + branch_2 = slim.conv2d(branch_2, 256, [1, 7], scope='Conv2d_0e_1x7') + with tf.variable_scope('Branch_3'): + branch_3 = slim.avg_pool2d(inputs, [3, 3], scope='AvgPool_0a_3x3') + branch_3 = slim.conv2d(branch_3, 128, [1, 1], scope='Conv2d_0b_1x1') + return tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3]) + + +def block_reduction_b(inputs, scope=None, reuse=None): + """Builds Reduction-B block for Inception v4 network.""" + # By default use stride=1 and SAME padding + with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d], + stride=1, padding='SAME'): + with tf.variable_scope(scope, 'BlockReductionB', [inputs], reuse=reuse): + with tf.variable_scope('Branch_0'): + branch_0 = slim.conv2d(inputs, 192, [1, 1], scope='Conv2d_0a_1x1') + branch_0 = slim.conv2d(branch_0, 192, [3, 3], stride=2, + padding='VALID', scope='Conv2d_1a_3x3') + with tf.variable_scope('Branch_1'): + branch_1 = slim.conv2d(inputs, 256, [1, 1], scope='Conv2d_0a_1x1') + branch_1 = slim.conv2d(branch_1, 256, [1, 7], scope='Conv2d_0b_1x7') + branch_1 = slim.conv2d(branch_1, 320, [7, 1], scope='Conv2d_0c_7x1') + branch_1 = slim.conv2d(branch_1, 320, [3, 3], stride=2, + padding='VALID', scope='Conv2d_1a_3x3') + with tf.variable_scope('Branch_2'): + branch_2 = slim.max_pool2d(inputs, [3, 3], stride=2, padding='VALID', + scope='MaxPool_1a_3x3') + return tf.concat(axis=3, values=[branch_0, branch_1, branch_2]) + + +def block_inception_c(inputs, scope=None, reuse=None): + """Builds Inception-C block for Inception v4 network.""" + # By default use stride=1 and SAME padding + with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d], + stride=1, padding='SAME'): + with tf.variable_scope(scope, 'BlockInceptionC', [inputs], reuse=reuse): + with tf.variable_scope('Branch_0'): + branch_0 = slim.conv2d(inputs, 256, [1, 1], scope='Conv2d_0a_1x1') + with tf.variable_scope('Branch_1'): + branch_1 = slim.conv2d(inputs, 384, [1, 1], scope='Conv2d_0a_1x1') + branch_1 = tf.concat(axis=3, values=[ + slim.conv2d(branch_1, 256, [1, 3], scope='Conv2d_0b_1x3'), + slim.conv2d(branch_1, 256, [3, 1], scope='Conv2d_0c_3x1')]) + with tf.variable_scope('Branch_2'): + branch_2 = slim.conv2d(inputs, 384, [1, 1], scope='Conv2d_0a_1x1') + branch_2 = slim.conv2d(branch_2, 448, [3, 1], scope='Conv2d_0b_3x1') + branch_2 = slim.conv2d(branch_2, 512, [1, 3], scope='Conv2d_0c_1x3') + branch_2 = tf.concat(axis=3, values=[ + slim.conv2d(branch_2, 256, [1, 3], scope='Conv2d_0d_1x3'), + slim.conv2d(branch_2, 256, [3, 1], scope='Conv2d_0e_3x1')]) + with tf.variable_scope('Branch_3'): + branch_3 = slim.avg_pool2d(inputs, [3, 3], scope='AvgPool_0a_3x3') + branch_3 = slim.conv2d(branch_3, 256, [1, 1], scope='Conv2d_0b_1x1') + return tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3]) + + +def inception_v4_base(inputs, final_endpoint='Mixed_7d', scope=None): + """Creates the Inception V4 network up to the given final endpoint. + + Args: + inputs: a 4-D tensor of size [batch_size, height, width, 3]. + final_endpoint: specifies the endpoint to construct the network up to. + It can be one of [ 'Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3', + 'Mixed_3a', 'Mixed_4a', 'Mixed_5a', 'Mixed_5b', 'Mixed_5c', 'Mixed_5d', + 'Mixed_5e', 'Mixed_6a', 'Mixed_6b', 'Mixed_6c', 'Mixed_6d', 'Mixed_6e', + 'Mixed_6f', 'Mixed_6g', 'Mixed_6h', 'Mixed_7a', 'Mixed_7b', 'Mixed_7c', + 'Mixed_7d'] + scope: Optional variable_scope. + + Returns: + logits: the logits outputs of the model. + end_points: the set of end_points from the inception model. + + Raises: + ValueError: if final_endpoint is not set to one of the predefined values, + """ + end_points = {} + + def add_and_check_final(name, net): + end_points[name] = net + return name == final_endpoint + + with tf.variable_scope(scope, 'InceptionV4', [inputs]): + with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], + stride=1, padding='SAME'): + # 299 x 299 x 3 + net = slim.conv2d(inputs, 32, [3, 3], stride=2, + padding='VALID', scope='Conv2d_1a_3x3') + + end_points["pool1"] = net + + if add_and_check_final('Conv2d_1a_3x3', net): return net, end_points + # 149 x 149 x 32 + net = slim.conv2d(net, 32, [3, 3], padding='VALID', + scope='Conv2d_2a_3x3') + if add_and_check_final('Conv2d_2a_3x3', net): return net, end_points + # 147 x 147 x 32 + net = slim.conv2d(net, 64, [3, 3], scope='Conv2d_2b_3x3') + if add_and_check_final('Conv2d_2b_3x3', net): return net, end_points + # 147 x 147 x 64 + with tf.variable_scope('Mixed_3a'): + with tf.variable_scope('Branch_0'): + branch_0 = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID', + scope='MaxPool_0a_3x3') + with tf.variable_scope('Branch_1'): + branch_1 = slim.conv2d(net, 96, [3, 3], stride=2, padding='VALID', + scope='Conv2d_0a_3x3') + net = tf.concat(axis=3, values=[branch_0, branch_1]) + + end_points["pool2"] = net + + if add_and_check_final('Mixed_3a', net): return net, end_points + + # 73 x 73 x 160 + with tf.variable_scope('Mixed_4a'): + with tf.variable_scope('Branch_0'): + branch_0 = slim.conv2d(net, 64, [1, 1], scope='Conv2d_0a_1x1') + branch_0 = slim.conv2d(branch_0, 96, [3, 3], padding='VALID', + scope='Conv2d_1a_3x3') + with tf.variable_scope('Branch_1'): + branch_1 = slim.conv2d(net, 64, [1, 1], scope='Conv2d_0a_1x1') + branch_1 = slim.conv2d(branch_1, 64, [1, 7], scope='Conv2d_0b_1x7') + branch_1 = slim.conv2d(branch_1, 64, [7, 1], scope='Conv2d_0c_7x1') + branch_1 = slim.conv2d(branch_1, 96, [3, 3], padding='VALID', + scope='Conv2d_1a_3x3') + net = tf.concat(axis=3, values=[branch_0, branch_1]) + if add_and_check_final('Mixed_4a', net): return net, end_points + + # 71 x 71 x 192 + with tf.variable_scope('Mixed_5a'): + with tf.variable_scope('Branch_0'): + branch_0 = slim.conv2d(net, 192, [3, 3], stride=2, padding='VALID', + scope='Conv2d_1a_3x3') + with tf.variable_scope('Branch_1'): + branch_1 = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID', + scope='MaxPool_1a_3x3') + net = tf.concat(axis=3, values=[branch_0, branch_1]) + + end_points["pool3"] = net + + if add_and_check_final('Mixed_5a', net): return net, end_points + + + + # 35 x 35 x 384 + # 4 x Inception-A blocks + for idx in range(4): + block_scope = 'Mixed_5' + chr(ord('b') + idx) + net = block_inception_a(net, block_scope) + if add_and_check_final(block_scope, net): return net, end_points + + # 35 x 35 x 384 + # Reduction-A block + net = block_reduction_a(net, 'Mixed_6a') + + end_points["pool4"] = net + + if add_and_check_final('Mixed_6a', net): return net, end_points + + # 17 x 17 x 1024 + # 7 x Inception-B blocks + for idx in range(7): + block_scope = 'Mixed_6' + chr(ord('b') + idx) + net = block_inception_b(net, block_scope) + if add_and_check_final(block_scope, net): return net, end_points + + # 17 x 17 x 1024 + # Reduction-B block + net = block_reduction_b(net, 'Mixed_7a') + + end_points["pool5"] = net + + if add_and_check_final('Mixed_7a', net): return net, end_points + + # 8 x 8 x 1536 + # 3 x Inception-C blocks + for idx in range(3): + block_scope = 'Mixed_7' + chr(ord('b') + idx) + net = block_inception_c(net, block_scope) + if add_and_check_final(block_scope, net): return net, end_points + raise ValueError('Unknown final endpoint %s' % final_endpoint) + + +def inception_v4(inputs, num_classes=1001, is_training=True, + dropout_keep_prob=0.8, + reuse=None, + scope='InceptionV4', + create_aux_logits=True): + """Creates the Inception V4 model. + + Args: + inputs: a 4-D tensor of size [batch_size, height, width, 3]. + num_classes: number of predicted classes. If 0 or None, the logits layer + is omitted and the input features to the logits layer (before dropout) + are returned instead. + is_training: whether is training or not. + dropout_keep_prob: float, the fraction to keep before final layer. + reuse: whether or not the network and its variables should be reused. To be + able to reuse 'scope' must be given. + scope: Optional variable_scope. + create_aux_logits: Whether to include the auxiliary logits. + + Returns: + net: a Tensor with the logits (pre-softmax activations) if num_classes + is a non-zero integer, or the non-dropped input to the logits layer + if num_classes is 0 or None. + end_points: the set of end_points from the inception model. + """ + end_points = {} + with tf.variable_scope(scope, 'InceptionV4', [inputs], reuse=reuse) as scope: + with slim.arg_scope([slim.batch_norm, slim.dropout], + is_training=is_training): + net, end_points = inception_v4_base(inputs, scope=scope) + + # with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], + # stride=1, padding='SAME'): + # # Auxiliary Head logits + # if create_aux_logits and num_classes: + # with tf.variable_scope('AuxLogits'): + # # 17 x 17 x 1024 + # aux_logits = end_points['Mixed_6h'] + # aux_logits = slim.avg_pool2d(aux_logits, [5, 5], stride=3, + # padding='VALID', + # scope='AvgPool_1a_5x5') + # aux_logits = slim.conv2d(aux_logits, 128, [1, 1], + # scope='Conv2d_1b_1x1') + # aux_logits = slim.conv2d(aux_logits, 768, + # aux_logits.get_shape()[1:3], + # padding='VALID', scope='Conv2d_2a') + # aux_logits = slim.flatten(aux_logits) + # aux_logits = slim.fully_connected(aux_logits, num_classes, + # activation_fn=None, + # scope='Aux_logits') + # end_points['AuxLogits'] = aux_logits + + # # Final pooling and prediction + # # TODO(sguada,arnoegw): Consider adding a parameter global_pool which + # # can be set to False to disable pooling here (as in resnet_*()). + # with tf.variable_scope('Logits'): + # # 8 x 8 x 1536 + # kernel_size = net.get_shape()[1:3] + # if kernel_size.is_fully_defined(): + # net = slim.avg_pool2d(net, kernel_size, padding='VALID', + # scope='AvgPool_1a') + # else: + # net = tf.reduce_mean(net, [1, 2], keep_dims=True, + # name='global_pool') + # end_points['global_pool'] = net + # if not num_classes: + # return net, end_points + # # 1 x 1 x 1536 + # net = slim.dropout(net, dropout_keep_prob, scope='Dropout_1b') + # net = slim.flatten(net, scope='PreLogitsFlatten') + # end_points['PreLogitsFlatten'] = net + # # 1536 + # logits = slim.fully_connected(net, num_classes, activation_fn=None, + # scope='Logits') + # end_points['Logits'] = logits + # end_points['Predictions'] = tf.nn.softmax(logits, name='Predictions') + return net, end_points +inception_v4.default_image_size = 299 + + +inception_v4_arg_scope = inception_utils.inception_arg_scope \ No newline at end of file diff --git a/TensorFlow/contrib/cv/BiSeNet/frontends/mobilenet_base.py b/TensorFlow/contrib/cv/BiSeNet/frontends/mobilenet_base.py new file mode 100644 index 000000000..38dfffef2 --- /dev/null +++ b/TensorFlow/contrib/cv/BiSeNet/frontends/mobilenet_base.py @@ -0,0 +1,479 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Mobilenet Base Class.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import collections +import contextlib +import copy +import os + +import tensorflow as tf + + +slim = tf.contrib.slim + + +@slim.add_arg_scope +def apply_activation(x, name=None, activation_fn=None): + return activation_fn(x, name=name) if activation_fn else x + + +def _fixed_padding(inputs, kernel_size, rate=1): + """Pads the input along the spatial dimensions independently of input size. + + Pads the input such that if it was used in a convolution with 'VALID' padding, + the output would have the same dimensions as if the unpadded input was used + in a convolution with 'SAME' padding. + + Args: + inputs: A tensor of size [batch, height_in, width_in, channels]. + kernel_size: The kernel to be used in the conv2d or max_pool2d operation. + rate: An integer, rate for atrous convolution. + + Returns: + output: A tensor of size [batch, height_out, width_out, channels] with the + input, either intact (if kernel_size == 1) or padded (if kernel_size > 1). + """ + kernel_size_effective = [kernel_size[0] + (kernel_size[0] - 1) * (rate - 1), + kernel_size[0] + (kernel_size[0] - 1) * (rate - 1)] + pad_total = [kernel_size_effective[0] - 1, kernel_size_effective[1] - 1] + pad_beg = [pad_total[0] // 2, pad_total[1] // 2] + pad_end = [pad_total[0] - pad_beg[0], pad_total[1] - pad_beg[1]] + padded_inputs = tf.pad(inputs, [[0, 0], [pad_beg[0], pad_end[0]], + [pad_beg[1], pad_end[1]], [0, 0]]) + return padded_inputs + + +def _make_divisible(v, divisor, min_value=None): + if min_value is None: + min_value = divisor + new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) + # Make sure that round down does not go down by more than 10%. + if new_v < 0.9 * v: + new_v += divisor + return new_v + + +@contextlib.contextmanager +def _set_arg_scope_defaults(defaults): + """Sets arg scope defaults for all items present in defaults. + + Args: + defaults: dictionary/list of pairs, containing a mapping from + function to a dictionary of default args. + + Yields: + context manager where all defaults are set. + """ + if hasattr(defaults, 'items'): + items = list(defaults.items()) + else: + items = defaults + if not items: + yield + else: + func, default_arg = items[0] + with slim.arg_scope(func, **default_arg): + with _set_arg_scope_defaults(items[1:]): + yield + + +@slim.add_arg_scope +def depth_multiplier(output_params, + multiplier, + divisible_by=8, + min_depth=8, + **unused_kwargs): + if 'num_outputs' not in output_params: + return + d = output_params['num_outputs'] + output_params['num_outputs'] = _make_divisible(d * multiplier, divisible_by, + min_depth) + + +_Op = collections.namedtuple('Op', ['op', 'params', 'multiplier_func']) + + +def op(opfunc, **params): + multiplier = params.pop('multiplier_transorm', depth_multiplier) + return _Op(opfunc, params=params, multiplier_func=multiplier) + + +class NoOpScope(object): + """No-op context manager.""" + + def __enter__(self): + return None + + def __exit__(self, exc_type, exc_value, traceback): + return False + + +def safe_arg_scope(funcs, **kwargs): + """Returns `slim.arg_scope` with all None arguments removed. + + Arguments: + funcs: Functions to pass to `arg_scope`. + **kwargs: Arguments to pass to `arg_scope`. + + Returns: + arg_scope or No-op context manager. + + Note: can be useful if None value should be interpreted as "do not overwrite + this parameter value". + """ + filtered_args = {name: value for name, value in kwargs.items() + if value is not None} + if filtered_args: + return slim.arg_scope(funcs, **filtered_args) + else: + return NoOpScope() + + +@slim.add_arg_scope +def mobilenet_base( # pylint: disable=invalid-name + inputs, + conv_defs, + multiplier=1.0, + final_endpoint=None, + output_stride=None, + use_explicit_padding=False, + scope=None, + is_training=False): + """Mobilenet base network. + + Constructs a network from inputs to the given final endpoint. By default + the network is constructed in inference mode. To create network + in training mode use: + + with slim.arg_scope(mobilenet.training_scope()): + logits, endpoints = mobilenet_base(...) + + Args: + inputs: a tensor of shape [batch_size, height, width, channels]. + conv_defs: A list of op(...) layers specifying the net architecture. + multiplier: Float multiplier for the depth (number of channels) + for all convolution ops. The value must be greater than zero. Typical + usage will be to set this value in (0, 1) to reduce the number of + parameters or computation cost of the model. + final_endpoint: The name of last layer, for early termination for + for V1-based networks: last layer is "layer_14", for V2: "layer_20" + output_stride: An integer that specifies the requested ratio of input to + output spatial resolution. If not None, then we invoke atrous convolution + if necessary to prevent the network from reducing the spatial resolution + of the activation maps. Allowed values are 1 or any even number, excluding + zero. Typical values are 8 (accurate fully convolutional mode), 16 + (fast fully convolutional mode), and 32 (classification mode). + + NOTE- output_stride relies on all consequent operators to support dilated + operators via "rate" parameter. This might require wrapping non-conv + operators to operate properly. + + use_explicit_padding: Use 'VALID' padding for convolutions, but prepad + inputs so that the output dimensions are the same as if 'SAME' padding + were used. + scope: optional variable scope. + is_training: How to setup batch_norm and other ops. Note: most of the time + this does not need be set directly. Use mobilenet.training_scope() to set + up training instead. This parameter is here for backward compatibility + only. It is safe to set it to the value matching + training_scope(is_training=...). It is also safe to explicitly set + it to False, even if there is outer training_scope set to to training. + (The network will be built in inference mode). If this is set to None, + no arg_scope is added for slim.batch_norm's is_training parameter. + + Returns: + tensor_out: output tensor. + end_points: a set of activations for external use, for example summaries or + losses. + + Raises: + ValueError: depth_multiplier <= 0, or the target output_stride is not + allowed. + """ + if multiplier <= 0: + raise ValueError('multiplier is not greater than zero.') + + # Set conv defs defaults and overrides. + conv_defs_defaults = conv_defs.get('defaults', {}) + conv_defs_overrides = conv_defs.get('overrides', {}) + if use_explicit_padding: + conv_defs_overrides = copy.deepcopy(conv_defs_overrides) + conv_defs_overrides[ + (slim.conv2d, slim.separable_conv2d)] = {'padding': 'VALID'} + + if output_stride is not None: + if output_stride == 0 or (output_stride > 1 and output_stride % 2): + raise ValueError('Output stride must be None, 1 or a multiple of 2.') + + # a) Set the tensorflow scope + # b) set padding to default: note we might consider removing this + # since it is also set by mobilenet_scope + # c) set all defaults + # d) set all extra overrides. + with _scope_all(scope, default_scope='Mobilenet'), \ + safe_arg_scope([slim.batch_norm], is_training=is_training), \ + _set_arg_scope_defaults(conv_defs_defaults), \ + _set_arg_scope_defaults(conv_defs_overrides): + # The current_stride variable keeps track of the output stride of the + # activations, i.e., the running product of convolution strides up to the + # current network layer. This allows us to invoke atrous convolution + # whenever applying the next convolution would result in the activations + # having output stride larger than the target output_stride. + + ####################################### + ####################################### + ####################################### + downsample_count = 0 + + + current_stride = 1 + + # The atrous convolution rate parameter. + rate = 1 + + net = inputs + # Insert default parameters before the base scope which includes + # any custom overrides set in mobilenet. + end_points = {} + scopes = {} + for i, opdef in enumerate(conv_defs['spec']): + params = dict(opdef.params) + opdef.multiplier_func(params, multiplier) + stride = params.get('stride', 1) + if output_stride is not None and current_stride == output_stride: + # If we have reached the target output_stride, then we need to employ + # atrous convolution with stride=1 and multiply the atrous rate by the + # current unit's stride for use in subsequent layers. + layer_stride = 1 + layer_rate = rate + rate *= stride + else: + layer_stride = stride + layer_rate = 1 + current_stride *= stride + # Update params. + params['stride'] = layer_stride + # Only insert rate to params if rate > 1. + if layer_rate > 1: + params['rate'] = layer_rate + # Set padding + if use_explicit_padding: + if 'kernel_size' in params: + net = _fixed_padding(net, params['kernel_size'], layer_rate) + else: + params['use_explicit_padding'] = True + + end_point = 'layer_%d' % (i + 1) + try: + net = opdef.op(net, **params) + except Exception: + print('Failed to create op %i: %r params: %r' % (i, opdef, params)) + raise + + if layer_stride == 2: + downsample_count += 1 + end_points["pool" + str(downsample_count)] = net + + end_points[end_point] = net + scope = os.path.dirname(net.name) + scopes[scope] = end_point + if final_endpoint is not None and end_point == final_endpoint: + break + + # Add all tensors that end with 'output' to + # endpoints + for t in net.graph.get_operations(): + scope = os.path.dirname(t.name) + bn = os.path.basename(t.name) + if scope in scopes and t.name.endswith('output'): + end_points[scopes[scope] + '/' + bn] = t.outputs[0] + return net, end_points + + +@contextlib.contextmanager +def _scope_all(scope, default_scope=None): + with tf.variable_scope(scope, default_name=default_scope) as s,\ + tf.name_scope(s.original_name_scope): + yield s + + +@slim.add_arg_scope +def mobilenet(inputs, + num_classes=1001, + prediction_fn=slim.softmax, + reuse=None, + scope='Mobilenet', + base_only=False, + **mobilenet_args): + """Mobilenet model for classification, supports both V1 and V2. + + Note: default mode is inference, use mobilenet.training_scope to create + training network. + + + Args: + inputs: a tensor of shape [batch_size, height, width, channels]. + num_classes: number of predicted classes. If 0 or None, the logits layer + is omitted and the input features to the logits layer (before dropout) + are returned instead. + prediction_fn: a function to get predictions out of logits + (default softmax). + reuse: whether or not the network and its variables should be reused. To be + able to reuse 'scope' must be given. + scope: Optional variable_scope. + base_only: if True will only create the base of the network (no pooling + and no logits). + **mobilenet_args: passed to mobilenet_base verbatim. + - conv_defs: list of conv defs + - multiplier: Float multiplier for the depth (number of channels) + for all convolution ops. The value must be greater than zero. Typical + usage will be to set this value in (0, 1) to reduce the number of + parameters or computation cost of the model. + - output_stride: will ensure that the last layer has at most total stride. + If the architecture calls for more stride than that provided + (e.g. output_stride=16, but the architecture has 5 stride=2 operators), + it will replace output_stride with fractional convolutions using Atrous + Convolutions. + + Returns: + logits: the pre-softmax activations, a tensor of size + [batch_size, num_classes] + end_points: a dictionary from components of the network to the corresponding + activation tensor. + + Raises: + ValueError: Input rank is invalid. + """ + is_training = mobilenet_args.get('is_training', False) + input_shape = inputs.get_shape().as_list() + if len(input_shape) != 4: + raise ValueError('Expected rank 4 input, was: %d' % len(input_shape)) + + with tf.variable_scope(scope, 'Mobilenet', reuse=reuse) as scope: + inputs = tf.identity(inputs, 'input') + net, end_points = mobilenet_base(inputs, scope=scope, **mobilenet_args) + if base_only: + return net, end_points + + net = tf.identity(net, name='embedding') + + with tf.variable_scope('Logits'): + net = global_pool(net) + end_points['global_pool'] = net + if not num_classes: + return net, end_points + net = slim.dropout(net, scope='Dropout', is_training=is_training) + # 1 x 1 x num_classes + # Note: legacy scope name. + logits = slim.conv2d( + net, + num_classes, [1, 1], + activation_fn=None, + normalizer_fn=None, + biases_initializer=tf.zeros_initializer(), + scope='Conv2d_1c_1x1') + + logits = tf.squeeze(logits, [1, 2]) + + logits = tf.identity(logits, name='output') + end_points['Logits'] = logits + if prediction_fn: + end_points['Predictions'] = prediction_fn(logits, 'Predictions') + return logits, end_points + + +def global_pool(input_tensor, pool_op=tf.nn.avg_pool2d): + """Applies avg pool to produce 1x1 output. + + NOTE: This function is funcitonally equivalenet to reduce_mean, but it has + baked in average pool which has better support across hardware. + + Args: + input_tensor: input tensor + pool_op: pooling op (avg pool is default) + Returns: + a tensor batch_size x 1 x 1 x depth. + """ + shape = input_tensor.get_shape().as_list() + if shape[1] is None or shape[2] is None: + kernel_size = tf.convert_to_tensor( + [1, tf.shape(input_tensor)[1], + tf.shape(input_tensor)[2], 1]) + else: + kernel_size = [1, shape[1], shape[2], 1] + output = pool_op( + input_tensor, ksize=kernel_size, strides=[1, 1, 1, 1], padding='VALID') + # Recover output shape, for unknown shape. + output.set_shape([None, 1, 1, None]) + return output + + +def training_scope(is_training=True, + weight_decay=0.00004, + stddev=0.09, + dropout_keep_prob=0.8, + bn_decay=0.997): + """Defines Mobilenet training scope. + + Usage: + with tf.contrib.slim.arg_scope(mobilenet.training_scope()): + logits, endpoints = mobilenet_v2.mobilenet(input_tensor) + + # the network created will be trainble with dropout/batch norm + # initialized appropriately. + Args: + is_training: if set to False this will ensure that all customizations are + set to non-training mode. This might be helpful for code that is reused + across both training/evaluation, but most of the time training_scope with + value False is not needed. If this is set to None, the parameters is not + added to the batch_norm arg_scope. + + weight_decay: The weight decay to use for regularizing the model. + stddev: Standard deviation for initialization, if negative uses xavier. + dropout_keep_prob: dropout keep probability (not set if equals to None). + bn_decay: decay for the batch norm moving averages (not set if equals to + None). + + Returns: + An argument scope to use via arg_scope. + """ + # Note: do not introduce parameters that would change the inference + # model here (for example whether to use bias), modify conv_def instead. + batch_norm_params = { + 'decay': bn_decay, + 'is_training': is_training + } + if stddev < 0: + weight_intitializer = slim.initializers.xavier_initializer() + else: + weight_intitializer = tf.truncated_normal_initializer(stddev=stddev) + + # Set weight_decay for weights in Conv and FC layers. + with slim.arg_scope( + [slim.conv2d, slim.fully_connected, slim.separable_conv2d], + weights_initializer=weight_intitializer, + normalizer_fn=slim.batch_norm), \ + slim.arg_scope([mobilenet_base, mobilenet], is_training=is_training),\ + safe_arg_scope([slim.batch_norm], **batch_norm_params), \ + safe_arg_scope([slim.dropout], is_training=is_training, + keep_prob=dropout_keep_prob), \ + slim.arg_scope([slim.conv2d], \ + weights_regularizer=slim.l2_regularizer(weight_decay)), \ + slim.arg_scope([slim.separable_conv2d], weights_regularizer=None) as s: + return s \ No newline at end of file diff --git a/TensorFlow/contrib/cv/BiSeNet/frontends/mobilenet_v2.py b/TensorFlow/contrib/cv/BiSeNet/frontends/mobilenet_v2.py new file mode 100644 index 000000000..3c1802c0e --- /dev/null +++ b/TensorFlow/contrib/cv/BiSeNet/frontends/mobilenet_v2.py @@ -0,0 +1,205 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Implementation of Mobilenet V2. + +Architecture: https://arxiv.org/abs/1801.04381 + +The base model gives 72.2% accuracy on ImageNet, with 300MMadds, +3.4 M parameters. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import copy +import functools + +import tensorflow as tf + +from frontends import conv_blocks as ops +from frontends import mobilenet_base as lib + +slim = tf.contrib.slim +op = lib.op + +expand_input = ops.expand_input_by_factor + +# pyformat: disable +# Architecture: https://arxiv.org/abs/1801.04381 +V2_DEF = dict( + defaults={ + # Note: these parameters of batch norm affect the architecture + # that's why they are here and not in training_scope. + (slim.batch_norm,): {'center': True, 'scale': True}, + (slim.conv2d, slim.fully_connected, slim.separable_conv2d): { + 'normalizer_fn': slim.batch_norm, 'activation_fn': tf.nn.relu6 + }, + (ops.expanded_conv,): { + 'expansion_size': expand_input(6), + 'split_expansion': 1, + 'normalizer_fn': slim.batch_norm, + 'residual': True + }, + (slim.conv2d, slim.separable_conv2d): {'padding': 'SAME'} + }, + spec=[ + op(slim.conv2d, stride=2, num_outputs=32, kernel_size=[3, 3]), + op(ops.expanded_conv, + expansion_size=expand_input(1, divisible_by=1), + num_outputs=16), + op(ops.expanded_conv, stride=2, num_outputs=24), + op(ops.expanded_conv, stride=1, num_outputs=24), + op(ops.expanded_conv, stride=2, num_outputs=32), + op(ops.expanded_conv, stride=1, num_outputs=32), + op(ops.expanded_conv, stride=1, num_outputs=32), + op(ops.expanded_conv, stride=2, num_outputs=64), + op(ops.expanded_conv, stride=1, num_outputs=64), + op(ops.expanded_conv, stride=1, num_outputs=64), + op(ops.expanded_conv, stride=1, num_outputs=64), + op(ops.expanded_conv, stride=1, num_outputs=96), + op(ops.expanded_conv, stride=1, num_outputs=96), + op(ops.expanded_conv, stride=1, num_outputs=96), + op(ops.expanded_conv, stride=2, num_outputs=160), + op(ops.expanded_conv, stride=1, num_outputs=160), + op(ops.expanded_conv, stride=1, num_outputs=160), + op(ops.expanded_conv, stride=1, num_outputs=320), + op(slim.conv2d, stride=1, kernel_size=[1, 1], num_outputs=1280) + ], +) +# pyformat: enable + + +@slim.add_arg_scope +def mobilenet(input_tensor, + num_classes=1001, + depth_multiplier=1.0, + scope='MobilenetV2', + conv_defs=None, + finegrain_classification_mode=False, + min_depth=None, + divisible_by=None, + **kwargs): + """Creates mobilenet V2 network. + + Inference mode is created by default. To create training use training_scope + below. + + with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope()): + logits, endpoints = mobilenet_v2.mobilenet(input_tensor) + + Args: + input_tensor: The input tensor + num_classes: number of classes + depth_multiplier: The multiplier applied to scale number of + channels in each layer. Note: this is called depth multiplier in the + paper but the name is kept for consistency with slim's model builder. + scope: Scope of the operator + conv_defs: Allows to override default conv def. + finegrain_classification_mode: When set to True, the model + will keep the last layer large even for small multipliers. Following + https://arxiv.org/abs/1801.04381 + suggests that it improves performance for ImageNet-type of problems. + *Note* ignored if final_endpoint makes the builder exit earlier. + min_depth: If provided, will ensure that all layers will have that + many channels after application of depth multiplier. + divisible_by: If provided will ensure that all layers # channels + will be divisible by this number. + **kwargs: passed directly to mobilenet.mobilenet: + prediction_fn- what prediction function to use. + reuse-: whether to reuse variables (if reuse set to true, scope + must be given). + Returns: + logits/endpoints pair + + Raises: + ValueError: On invalid arguments + """ + if conv_defs is None: + conv_defs = V2_DEF + if 'multiplier' in kwargs: + raise ValueError('mobilenetv2 doesn\'t support generic ' + 'multiplier parameter use "depth_multiplier" instead.') + if finegrain_classification_mode: + conv_defs = copy.deepcopy(conv_defs) + if depth_multiplier < 1: + conv_defs['spec'][-1].params['num_outputs'] /= depth_multiplier + + depth_args = {} + # NB: do not set depth_args unless they are provided to avoid overriding + # whatever default depth_multiplier might have thanks to arg_scope. + if min_depth is not None: + depth_args['min_depth'] = min_depth + if divisible_by is not None: + depth_args['divisible_by'] = divisible_by + + with slim.arg_scope((lib.depth_multiplier,), **depth_args): + return lib.mobilenet( + input_tensor, + num_classes=num_classes, + conv_defs=conv_defs, + scope=scope, + multiplier=depth_multiplier, + **kwargs) + + +def wrapped_partial(func, *args, **kwargs): + partial_func = functools.partial(func, *args, **kwargs) + functools.update_wrapper(partial_func, func) + return partial_func + + +# Wrappers for mobilenet v2 with depth-multipliers. Be noticed that +# 'finegrain_classification_mode' is set to True, which means the embedding +# layer will not be shrinked when given a depth-multiplier < 1.0. +mobilenet_v2_140 = wrapped_partial(mobilenet, depth_multiplier=1.4) +mobilenet_v2_050 = wrapped_partial(mobilenet, depth_multiplier=0.50, + finegrain_classification_mode=True) +mobilenet_v2_035 = wrapped_partial(mobilenet, depth_multiplier=0.35, + finegrain_classification_mode=True) + + +@slim.add_arg_scope +def mobilenet_base(input_tensor, depth_multiplier=1.0, **kwargs): + """Creates base of the mobilenet (no pooling and no logits) .""" + return mobilenet(input_tensor, + depth_multiplier=depth_multiplier, + base_only=True, **kwargs) + + +def training_scope(**kwargs): + """Defines MobilenetV2 training scope. + + Usage: + with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope()): + logits, endpoints = mobilenet_v2.mobilenet(input_tensor) + + with slim. + + Args: + **kwargs: Passed to mobilenet.training_scope. The following parameters + are supported: + weight_decay- The weight decay to use for regularizing the model. + stddev- Standard deviation for initialization, if negative uses xavier. + dropout_keep_prob- dropout keep probability + bn_decay- decay for the batch norm moving averages. + + Returns: + An `arg_scope` to use for the mobilenet v2 model. + """ + return lib.training_scope(**kwargs) + + +__all__ = ['training_scope', 'mobilenet_base', 'mobilenet', 'V2_DEF'] \ No newline at end of file diff --git a/TensorFlow/contrib/cv/BiSeNet/frontends/resnet_utils.py b/TensorFlow/contrib/cv/BiSeNet/frontends/resnet_utils.py new file mode 100644 index 000000000..99d993a55 --- /dev/null +++ b/TensorFlow/contrib/cv/BiSeNet/frontends/resnet_utils.py @@ -0,0 +1,254 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Contains building blocks for various versions of Residual Networks. + +Residual networks (ResNets) were proposed in: + Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun + Deep Residual Learning for Image Recognition. arXiv:1512.03385, 2015 + +More variants were introduced in: + Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun + Identity Mappings in Deep Residual Networks. arXiv: 1603.05027, 2016 + +We can obtain different ResNet variants by changing the network depth, width, +and form of residual unit. This module implements the infrastructure for +building them. Concrete ResNet units and full ResNet networks are implemented in +the accompanying resnet_v1.py and resnet_v2.py modules. + +Compared to https://github.com/KaimingHe/deep-residual-networks, in the current +implementation we subsample the output activations in the last residual unit of +each block, instead of subsampling the input activations in the first residual +unit of each block. The two implementations give identical results but our +implementation is more memory efficient. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections +import tensorflow as tf + +slim = tf.contrib.slim + + +class Block(collections.namedtuple('Block', ['scope', 'unit_fn', 'args'])): + """A named tuple describing a ResNet block. + + Its parts are: + scope: The scope of the `Block`. + unit_fn: The ResNet unit function which takes as input a `Tensor` and + returns another `Tensor` with the output of the ResNet unit. + args: A list of length equal to the number of units in the `Block`. The list + contains one (depth, depth_bottleneck, stride) tuple for each unit in the + block to serve as argument to unit_fn. + """ + + +def subsample(inputs, factor, scope=None): + """Subsamples the input along the spatial dimensions. + + Args: + inputs: A `Tensor` of size [batch, height_in, width_in, channels]. + factor: The subsampling factor. + scope: Optional variable_scope. + + Returns: + output: A `Tensor` of size [batch, height_out, width_out, channels] with the + input, either intact (if factor == 1) or subsampled (if factor > 1). + """ + if factor == 1: + return inputs + else: + return slim.max_pool2d(inputs, [1, 1], stride=factor, scope=scope) + + +def conv2d_same(inputs, num_outputs, kernel_size, stride, rate=1, scope=None): + """Strided 2-D convolution with 'SAME' padding. + + When stride > 1, then we do explicit zero-padding, followed by conv2d with + 'VALID' padding. + + Note that + + net = conv2d_same(inputs, num_outputs, 3, stride=stride) + + is equivalent to + + net = slim.conv2d(inputs, num_outputs, 3, stride=1, padding='SAME') + net = subsample(net, factor=stride) + + whereas + + net = slim.conv2d(inputs, num_outputs, 3, stride=stride, padding='SAME') + + is different when the input's height or width is even, which is why we add the + current function. For more details, see ResnetUtilsTest.testConv2DSameEven(). + + Args: + inputs: A 4-D tensor of size [batch, height_in, width_in, channels]. + num_outputs: An integer, the number of output filters. + kernel_size: An int with the kernel_size of the filters. + stride: An integer, the output stride. + rate: An integer, rate for atrous convolution. + scope: Scope. + + Returns: + output: A 4-D tensor of size [batch, height_out, width_out, channels] with + the convolution output. + """ + if stride == 1: + return slim.conv2d(inputs, num_outputs, kernel_size, stride=1, rate=rate, + padding='SAME', scope=scope) + else: + kernel_size_effective = kernel_size + (kernel_size - 1) * (rate - 1) + pad_total = kernel_size_effective - 1 + pad_beg = pad_total // 2 + pad_end = pad_total - pad_beg + inputs = tf.pad(inputs, + [[0, 0], [pad_beg, pad_end], [pad_beg, pad_end], [0, 0]]) + return slim.conv2d(inputs, num_outputs, kernel_size, stride=stride, + rate=rate, padding='VALID', scope=scope) + + +@slim.add_arg_scope +def stack_blocks_dense(net, blocks, multi_grid, output_stride=None, + outputs_collections=None): + """Stacks ResNet `Blocks` and controls output feature density. + + First, this function creates scopes for the ResNet in the form of + 'block_name/unit_1', 'block_name/unit_2', etc. + + Second, this function allows the user to explicitly control the ResNet + output_stride, which is the ratio of the input to output spatial resolution. + This is useful for dense prediction tasks such as semantic segmentation or + object detection. + + Most ResNets consist of 4 ResNet blocks and subsample the activations by a + factor of 2 when transitioning between consecutive ResNet blocks. This results + to a nominal ResNet output_stride equal to 8. If we set the output_stride to + half the nominal network stride (e.g., output_stride=4), then we compute + responses twice. + + Control of the output feature density is implemented by atrous convolution. + + Args: + net: A `Tensor` of size [batch, height, width, channels]. + blocks: A list of length equal to the number of ResNet `Blocks`. Each + element is a ResNet `Block` object describing the units in the `Block`. + output_stride: If `None`, then the output will be computed at the nominal + network stride. If output_stride is not `None`, it specifies the requested + ratio of input to output spatial resolution, which needs to be equal to + the product of unit strides from the start up to some level of the ResNet. + For example, if the ResNet employs units with strides 1, 2, 1, 3, 4, 1, + then valid values for the output_stride are 1, 2, 6, 24 or None (which + is equivalent to output_stride=24). + outputs_collections: Collection to add the ResNet block outputs. + + Returns: + net: Output tensor with stride equal to the specified output_stride. + + Raises: + ValueError: If the target output_stride is not valid. + """ + # The current_stride variable keeps track of the effective stride of the + # activations. This allows us to invoke atrous convolution whenever applying + # the next residual unit would result in the activations having stride larger + # than the target output_stride. + current_stride = 1 + + # The atrous convolution rate parameter. + rate = 1 + + for block in blocks: + with tf.variable_scope(block.scope, 'block', [net]) as sc: + for i, unit in enumerate(block.args): + if output_stride is not None and current_stride > output_stride: + raise ValueError('The target output_stride cannot be reached.') + + with tf.variable_scope('unit_%d' % (i + 1), values=[net]): + # If we have reached the target output_stride, then we need to employ + # atrous convolution with stride=1 and multiply the atrous rate by the + # current unit's stride for use in subsequent layers. + if output_stride is not None and current_stride == output_stride: + # Only uses atrous convolutions with multi-graid rates in the last (block4) block + if block.scope == "block4": + net = block.unit_fn(net, rate=rate * multi_grid[i], **dict(unit, stride=1)) + else: + net = block.unit_fn(net, rate=rate, **dict(unit, stride=1)) + rate *= unit.get('stride', 1) + else: + net = block.unit_fn(net, rate=1, **unit) + current_stride *= unit.get('stride', 1) + net = slim.utils.collect_named_outputs(outputs_collections, sc.name, net) + + if output_stride is not None and current_stride != output_stride: + raise ValueError('The target output_stride cannot be reached.') + + return net + + +def resnet_arg_scope(weight_decay=0.0001, + is_training=True, + batch_norm_decay=0.9, + batch_norm_epsilon=1e-5, + batch_norm_scale=True, + activation_fn=tf.nn.relu, + use_batch_norm=True): + """Defines the default ResNet arg scope. + + TODO(gpapan): The batch-normalization related default values above are + appropriate for use in conjunction with the reference ResNet models + released at https://github.com/KaimingHe/deep-residual-networks. When + training ResNets from scratch, they might need to be tuned. + + Args: + weight_decay: The weight decay to use for regularizing the model. + batch_norm_decay: The moving average decay when estimating layer activation + statistics in batch normalization. + batch_norm_epsilon: Small constant to prevent division by zero when + normalizing activations by their variance in batch normalization. + batch_norm_scale: If True, uses an explicit `gamma` multiplier to scale the + activations in the batch normalization layer. + activation_fn: The activation function which is used in ResNet. + use_batch_norm: Whether or not to use batch normalization. + + Returns: + An `arg_scope` to use for the resnet models. + """ + batch_norm_params = { + 'decay': batch_norm_decay, + 'epsilon': batch_norm_epsilon, + 'scale': batch_norm_scale, + 'updates_collections': tf.GraphKeys.UPDATE_OPS, + 'is_training': is_training, + 'fused': True, # Use fused batch norm if possible. + } + + with slim.arg_scope( + [slim.conv2d], + weights_regularizer=slim.l2_regularizer(weight_decay), + weights_initializer=slim.variance_scaling_initializer(), + activation_fn=activation_fn, + normalizer_fn=slim.batch_norm if use_batch_norm else None, + normalizer_params=batch_norm_params): + with slim.arg_scope([slim.batch_norm], **batch_norm_params): + # The following implies padding='SAME' for pool1, which makes feature + # alignment easier for dense prediction tasks. This is also used in + # https://github.com/facebook/fb.resnet.torch. However the accompanying + # code of 'Deep Residual Learning for Image Recognition' uses + # padding='VALID' for pool1. You can switch to that choice by setting + # slim.arg_scope([slim.max_pool2d], padding='VALID'). + with slim.arg_scope([slim.max_pool2d], padding='SAME') as arg_sc: + return arg_sc \ No newline at end of file diff --git a/TensorFlow/contrib/cv/BiSeNet/frontends/resnet_v1.py b/TensorFlow/contrib/cv/BiSeNet/frontends/resnet_v1.py new file mode 100644 index 000000000..ae4666a2b --- /dev/null +++ b/TensorFlow/contrib/cv/BiSeNet/frontends/resnet_v1.py @@ -0,0 +1,271 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +import tensorflow as tf +from tensorflow.contrib import slim +from frontends import resnet_utils + +resnet_arg_scope = resnet_utils.resnet_arg_scope + +@slim.add_arg_scope +def bottleneck(inputs, depth, depth_bottleneck, stride, rate=1, + outputs_collections=None, scope=None): + """Bottleneck residual unit variant with BN after convolutions. + This is the original residual unit proposed in [1]. See Fig. 1(a) of [2] for + its definition. Note that we use here the bottleneck variant which has an + extra bottleneck layer. + When putting together two consecutive ResNet blocks that use this unit, one + should use stride = 2 in the last unit of the first block. + Args: + inputs: A tensor of size [batch, height, width, channels]. + depth: The depth of the ResNet unit output. + depth_bottleneck: The depth of the bottleneck layers. + stride: The ResNet unit's stride. Determines the amount of downsampling of + the units output compared to its input. + rate: An integer, rate for atrous convolution. + outputs_collections: Collection to add the ResNet unit output. + scope: Optional variable_scope. + Returns: + The ResNet unit's output. + """ + with tf.variable_scope(scope, 'bottleneck_v1', [inputs]) as sc: + depth_in = slim.utils.last_dimension(inputs.get_shape(), min_rank=4) + if depth == depth_in: + shortcut = resnet_utils.subsample(inputs, stride, 'shortcut') + else: + shortcut = slim.conv2d(inputs, depth, [1, 1], stride=stride, + activation_fn=None, scope='shortcut') + residual = slim.conv2d(inputs, depth_bottleneck, [1, 1], stride=1, + scope='conv1') + residual = resnet_utils.conv2d_same(residual, depth_bottleneck, 3, stride, + rate=rate, scope='conv2') + residual = slim.conv2d(residual, depth, [1, 1], stride=1, + activation_fn=None, scope='conv3') + + output = tf.nn.relu(shortcut + residual) + + return slim.utils.collect_named_outputs(outputs_collections, + sc.original_name_scope, + output) + + +def resnet_v1(inputs, + blocks, + num_classes=None, + is_training=True, + global_pool=True, + output_stride=None, + include_root_block=True, + spatial_squeeze=True, + reuse=None, + scope=None): + """Generator for v1 ResNet models. + + This function generates a family of ResNet v1 models. See the resnet_v1_*() + methods for specific model instantiations, obtained by selecting different + block instantiations that produce ResNets of various depths. + + Training for image classification on Imagenet is usually done with [224, 224] + inputs, resulting in [7, 7] feature maps at the output of the last ResNet + block for the ResNets defined in [1] that have nominal stride equal to 32. + However, for dense prediction tasks we advise that one uses inputs with + spatial dimensions that are multiples of 32 plus 1, e.g., [321, 321]. In + this case the feature maps at the ResNet output will have spatial shape + [(height - 1) / output_stride + 1, (width - 1) / output_stride + 1] + and corners exactly aligned with the input image corners, which greatly + facilitates alignment of the features to the image. Using as input [225, 225] + images results in [8, 8] feature maps at the output of the last ResNet block. + + For dense prediction tasks, the ResNet needs to run in fully-convolutional + (FCN) mode and global_pool needs to be set to False. The ResNets in [1, 2] all + have nominal stride equal to 32 and a good choice in FCN mode is to use + output_stride=16 in order to increase the density of the computed features at + small computational and memory overhead, cf. http://arxiv.org/abs/1606.00915. + + Args: + inputs: A tensor of size [batch, height_in, width_in, channels]. + blocks: A list of length equal to the number of ResNet blocks. Each element + is a resnet_utils.Block object describing the units in the block. + num_classes: Number of predicted classes for classification tasks. If None + we return the features before the logit layer. + is_training: whether is training or not. + global_pool: If True, we perform global average pooling before computing the + logits. Set to True for image classification, False for dense prediction. + output_stride: If None, then the output will be computed at the nominal + network stride. If output_stride is not None, it specifies the requested + ratio of input to output spatial resolution. + include_root_block: If True, include the initial convolution followed by + max-pooling, if False excludes it. + spatial_squeeze: if True, logits is of shape [B, C], if false logits is + of shape [B, 1, 1, C], where B is batch_size and C is number of classes. + reuse: whether or not the network and its variables should be reused. To be + able to reuse 'scope' must be given. + scope: Optional variable_scope. + + Returns: + net: A rank-4 tensor of size [batch, height_out, width_out, channels_out]. + If global_pool is False, then height_out and width_out are reduced by a + factor of output_stride compared to the respective height_in and width_in, + else both height_out and width_out equal one. If num_classes is None, then + net is the output of the last ResNet block, potentially after global + average pooling. If num_classes is not None, net contains the pre-softmax + activations. + end_points: A dictionary from components of the network to the corresponding + activation. + + Raises: + ValueError: If the target output_stride is not valid. + """ + with tf.variable_scope(scope, 'resnet_v1', [inputs], reuse=reuse) as sc: + end_points_collection = sc.name + '_end_points' + with slim.arg_scope([slim.conv2d, bottleneck, + resnet_utils.stack_blocks_dense], + outputs_collections=end_points_collection): + with slim.arg_scope([slim.batch_norm], is_training=is_training): + net = inputs + if include_root_block: + if output_stride is not None: + if output_stride % 4 != 0: + raise ValueError('The output_stride needs to be a multiple of 4.') + output_stride /= 4 + net = resnet_utils.conv2d_same(net, 64, 7, stride=2, scope='conv1') + net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool1') + + net = slim.utils.collect_named_outputs(end_points_collection, 'pool2', net) + + net = resnet_utils.stack_blocks_dense(net, blocks, output_stride) + end_points = slim.utils.convert_collection_to_dict(end_points_collection) + + end_points['pool3'] = end_points[scope + '/block1'] + end_points['pool4'] = end_points[scope + '/block2'] + end_points['pool5'] = net + return net, end_points + + +resnet_v1.default_image_size = 224 + +def resnet_v1_50(inputs, + num_classes=None, + is_training=True, + global_pool=True, + output_stride=None, + spatial_squeeze=True, + reuse=None, + scope='resnet_v1_50'): + """ResNet-50 model of [1]. See resnet_v1() for arg and return description.""" + blocks = [ + resnet_utils.Block( + 'block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), + resnet_utils.Block( + 'block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]), + resnet_utils.Block( + 'block3', bottleneck, [(1024, 256, 1)] * 5 + [(1024, 256, 2)]), + resnet_utils.Block( + 'block4', bottleneck, [(2048, 512, 1)] * 3) + ] + return resnet_v1(inputs, blocks, num_classes, is_training, + global_pool=global_pool, output_stride=output_stride, + include_root_block=True, spatial_squeeze=spatial_squeeze, + reuse=reuse, scope=scope) + + +resnet_v1_50.default_image_size = resnet_v1.default_image_size + + +def resnet_v1_101(inputs, + num_classes=None, + is_training=True, + global_pool=True, + output_stride=None, + spatial_squeeze=True, + reuse=None, + scope='resnet_v1_101'): + """ResNet-101 model of [1]. See resnet_v1() for arg and return description.""" + blocks = [ + resnet_utils.Block( + 'block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), + resnet_utils.Block( + 'block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]), + resnet_utils.Block( + 'block3', bottleneck, [(1024, 256, 1)] * 22 + [(1024, 256, 2)]), + resnet_utils.Block( + 'block4', bottleneck, [(2048, 512, 1)] * 3) + ] + return resnet_v1(inputs, blocks, num_classes, is_training, + global_pool=global_pool, output_stride=output_stride, + include_root_block=True, spatial_squeeze=spatial_squeeze, + reuse=reuse, scope=scope) + + +resnet_v1_101.default_image_size = resnet_v1.default_image_size + + +def resnet_v1_152(inputs, + num_classes=None, + is_training=True, + global_pool=True, + output_stride=None, + spatial_squeeze=True, + reuse=None, + scope='resnet_v1_152'): + """ResNet-152 model of [1]. See resnet_v1() for arg and return description.""" + blocks = [ + resnet_utils.Block( + 'block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), + resnet_utils.Block( + 'block2', bottleneck, [(512, 128, 1)] * 7 + [(512, 128, 2)]), + resnet_utils.Block( + 'block3', bottleneck, [(1024, 256, 1)] * 35 + [(1024, 256, 2)]), + resnet_utils.Block( + 'block4', bottleneck, [(2048, 512, 1)] * 3)] + return resnet_v1(inputs, blocks, num_classes, is_training, + global_pool=global_pool, output_stride=output_stride, + include_root_block=True, spatial_squeeze=spatial_squeeze, + reuse=reuse, scope=scope) + + +resnet_v1_152.default_image_size = resnet_v1.default_image_size + + +def resnet_v1_200(inputs, + num_classes=None, + is_training=True, + global_pool=True, + output_stride=None, + spatial_squeeze=True, + reuse=None, + scope='resnet_v1_200'): + """ResNet-200 model of [2]. See resnet_v1() for arg and return description.""" + blocks = [ + resnet_utils.Block( + 'block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), + resnet_utils.Block( + 'block2', bottleneck, [(512, 128, 1)] * 23 + [(512, 128, 2)]), + resnet_utils.Block( + 'block3', bottleneck, [(1024, 256, 1)] * 35 + [(1024, 256, 2)]), + resnet_utils.Block( + 'block4', bottleneck, [(2048, 512, 1)] * 3)] + return resnet_v1(inputs, blocks, num_classes, is_training, + global_pool=global_pool, output_stride=output_stride, + include_root_block=True, spatial_squeeze=spatial_squeeze, + reuse=reuse, scope=scope) + + +resnet_v1_200.default_image_size = resnet_v1.default_image_size + + +if __name__ == '__main__': + input = tf.placeholder(tf.float32, shape=(None, 224, 224, 3), name='input') + with slim.arg_scope(resnet_arg_scope()) as sc: + logits = resnet_v1_50(input) \ No newline at end of file diff --git a/TensorFlow/contrib/cv/BiSeNet/frontends/resnet_v2.py b/TensorFlow/contrib/cv/BiSeNet/frontends/resnet_v2.py new file mode 100644 index 000000000..a3be4bdf3 --- /dev/null +++ b/TensorFlow/contrib/cv/BiSeNet/frontends/resnet_v2.py @@ -0,0 +1,329 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Contains definitions for the preactivation form of Residual Networks. + +Residual networks (ResNets) were originally proposed in: +[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun + Deep Residual Learning for Image Recognition. arXiv:1512.03385 + +The full preactivation 'v2' ResNet variant implemented in this module was +introduced by: +[2] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun + Identity Mappings in Deep Residual Networks. arXiv: 1603.05027 + +The key difference of the full preactivation 'v2' variant compared to the +'v1' variant in [1] is the use of batch normalization before every weight layer. + +Typical use: + + from tensorflow.contrib.slim.nets import resnet_v2 + +ResNet-101 for image classification into 1000 classes: + + # inputs has shape [batch, 224, 224, 3] + with slim.arg_scope(resnet_v2.resnet_arg_scope()): + net, end_points = resnet_v2.resnet_v2_101(inputs, 1000, is_training=False) + +ResNet-101 for semantic segmentation into 21 classes: + + # inputs has shape [batch, 513, 513, 3] + with slim.arg_scope(resnet_v2.resnet_arg_scope()): + net, end_points = resnet_v2.resnet_v2_101(inputs, + 21, + is_training=False, + global_pool=False, + output_stride=16) +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf + +from frontends import resnet_utils + +slim = tf.contrib.slim +resnet_arg_scope = resnet_utils.resnet_arg_scope + + +@slim.add_arg_scope +def bottleneck(inputs, depth, depth_bottleneck, stride, rate=1, + outputs_collections=None, scope=None): + """Bottleneck residual unit variant with BN before convolutions. + + This is the full preactivation residual unit variant proposed in [2]. See + Fig. 1(b) of [2] for its definition. Note that we use here the bottleneck + variant which has an extra bottleneck layer. + + When putting together two consecutive ResNet blocks that use this unit, one + should use stride = 2 in the last unit of the first block. + + Args: + inputs: A tensor of size [batch, height, width, channels]. + depth: The depth of the ResNet unit output. + depth_bottleneck: The depth of the bottleneck layers. + stride: The ResNet unit's stride. Determines the amount of downsampling of + the units output compared to its input. + rate: An integer, rate for atrous convolution. + outputs_collections: Collection to add the ResNet unit output. + scope: Optional variable_scope. + + Returns: + The ResNet unit's output. + """ + with tf.compat.v1.variable_scope(scope, 'bottleneck_v2', [inputs]) as sc: + depth_in = slim.utils.last_dimension(inputs.get_shape(), min_rank=4) + preact = slim.batch_norm(inputs, activation_fn=tf.nn.relu, scope='preact') + if depth == depth_in: + shortcut = resnet_utils.subsample(inputs, stride, 'shortcut') + else: + shortcut = slim.conv2d(preact, depth, [1, 1], stride=stride, + normalizer_fn=None, activation_fn=None, + scope='shortcut') + + residual = slim.conv2d(preact, depth_bottleneck, [1, 1], stride=1, + scope='conv1') + residual = resnet_utils.conv2d_same(residual, depth_bottleneck, 3, stride, + rate=rate, scope='conv2') + residual = slim.conv2d(residual, depth, [1, 1], stride=1, + normalizer_fn=None, activation_fn=None, + scope='conv3') + + output = shortcut + residual + + return slim.utils.collect_named_outputs(outputs_collections, + sc.name, + output) + + +def resnet_v2(inputs, + blocks, + num_classes=None, + is_training=True, + global_pool=True, + output_stride=None, + include_root_block=True, + spatial_squeeze=True, + reuse=None, + scope=None): + """Generator for v2 (preactivation) ResNet models. + + This function generates a family of ResNet v2 models. See the resnet_v2_*() + methods for specific model instantiations, obtained by selecting different + block instantiations that produce ResNets of various depths. + + Training for image classification on Imagenet is usually done with [224, 224] + inputs, resulting in [7, 7] feature maps at the output of the last ResNet + block for the ResNets defined in [1] that have nominal stride equal to 32. + However, for dense prediction tasks we advise that one uses inputs with + spatial dimensions that are multiples of 32 plus 1, e.g., [321, 321]. In + this case the feature maps at the ResNet output will have spatial shape + [(height - 1) / output_stride + 1, (width - 1) / output_stride + 1] + and corners exactly aligned with the input image corners, which greatly + facilitates alignment of the features to the image. Using as input [225, 225] + images results in [8, 8] feature maps at the output of the last ResNet block. + + For dense prediction tasks, the ResNet needs to run in fully-convolutional + (FCN) mode and global_pool needs to be set to False. The ResNets in [1, 2] all + have nominal stride equal to 32 and a good choice in FCN mode is to use + output_stride=16 in order to increase the density of the computed features at + small computational and memory overhead, cf. http://arxiv.org/abs/1606.00915. + + Args: + inputs: A tensor of size [batch, height_in, width_in, channels]. + blocks: A list of length equal to the number of ResNet blocks. Each element + is a resnet_utils.Block object describing the units in the block. + num_classes: Number of predicted classes for classification tasks. + If 0 or None, we return the features before the logit layer. + is_training: whether batch_norm layers are in training mode. + global_pool: If True, we perform global average pooling before computing the + logits. Set to True for image classification, False for dense prediction. + output_stride: If None, then the output will be computed at the nominal + network stride. If output_stride is not None, it specifies the requested + ratio of input to output spatial resolution. + include_root_block: If True, include the initial convolution followed by + max-pooling, if False excludes it. If excluded, `inputs` should be the + results of an activation-less convolution. + spatial_squeeze: if True, logits is of shape [B, C], if false logits is + of shape [B, 1, 1, C], where B is batch_size and C is number of classes. + To use this parameter, the input images must be smaller than 300x300 + pixels, in which case the output logit layer does not contain spatial + information and can be removed. + reuse: whether or not the network and its variables should be reused. To be + able to reuse 'scope' must be given. + scope: Optional variable_scope. + + + Returns: + net: A rank-4 tensor of size [batch, height_out, width_out, channels_out]. + If global_pool is False, then height_out and width_out are reduced by a + factor of output_stride compared to the respective height_in and width_in, + else both height_out and width_out equal one. If num_classes is 0 or None, + then net is the output of the last ResNet block, potentially after global + average pooling. If num_classes is a non-zero integer, net contains the + pre-softmax activations. + end_points: A dictionary from components of the network to the corresponding + activation. + + Raises: + ValueError: If the target output_stride is not valid. + """ + with tf.compat.v1.variable_scope(scope, 'resnet_v2', [inputs], reuse=reuse) as sc: + end_points_collection = sc.original_name_scope + '_end_points' + with slim.arg_scope([slim.conv2d, bottleneck, + resnet_utils.stack_blocks_dense], + outputs_collections=end_points_collection): + with slim.arg_scope([slim.batch_norm], is_training=is_training): + net = inputs + if include_root_block: + if output_stride is not None: + if output_stride % 4 != 0: + raise ValueError('The output_stride needs to be a multiple of 4.') + output_stride /= 4 + # We do not include batch normalization or activation functions in + # conv1 because the first ResNet unit will perform these. Cf. + # Appendix of [2]. + with slim.arg_scope([slim.conv2d], + activation_fn=None, normalizer_fn=None): + net = resnet_utils.conv2d_same(net, 64, 7, stride=2, scope='conv1') + net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool1') + + net = slim.utils.collect_named_outputs(end_points_collection, 'pool2', net) + net = resnet_utils.stack_blocks_dense(net, blocks, output_stride) + # This is needed because the pre-activation variant does not have batch + # normalization or activation functions in the residual unit output. See + # Appendix of [2]. + net = slim.batch_norm(net, activation_fn=tf.nn.relu, scope='postnorm') + # Convert end_points_collection into a dictionary of end_points. + end_points = slim.utils.convert_collection_to_dict(end_points_collection) + + end_points['pool3'] = end_points[scope + '/block1'] + end_points['pool4'] = end_points[scope + '/block2'] + end_points['pool5'] = net + return net, end_points +resnet_v2.default_image_size = 224 + + +def resnet_v2_block(scope, base_depth, num_units, stride): + """Helper function for creating a resnet_v2 bottleneck block. + + Args: + scope: The scope of the block. + base_depth: The depth of the bottleneck layer for each unit. + num_units: The number of units in the block. + stride: The stride of the block, implemented as a stride in the last unit. + All other units have stride=1. + + Returns: + A resnet_v2 bottleneck block. + """ + return resnet_utils.Block(scope, bottleneck, [{ + 'depth': base_depth * 4, + 'depth_bottleneck': base_depth, + 'stride': 1 + }] * (num_units - 1) + [{ + 'depth': base_depth * 4, + 'depth_bottleneck': base_depth, + 'stride': stride + }]) +resnet_v2.default_image_size = 224 + + +def resnet_v2_50(inputs, + num_classes=None, + is_training=True, + global_pool=True, + output_stride=None, + spatial_squeeze=True, + reuse=None, + scope='resnet_v2_50'): + """ResNet-50 model of [1]. See resnet_v2() for arg and return description.""" + blocks = [ + resnet_v2_block('block1', base_depth=64, num_units=3, stride=2), + resnet_v2_block('block2', base_depth=128, num_units=4, stride=2), + resnet_v2_block('block3', base_depth=256, num_units=6, stride=2), + resnet_v2_block('block4', base_depth=512, num_units=3, stride=1), + ] + return resnet_v2(inputs, blocks, num_classes, is_training=is_training, + global_pool=global_pool, output_stride=output_stride, + include_root_block=True, spatial_squeeze=spatial_squeeze, + reuse=reuse, scope=scope) +resnet_v2_50.default_image_size = resnet_v2.default_image_size + + +def resnet_v2_101(inputs, + num_classes=None, + is_training=True, + global_pool=True, + output_stride=None, + spatial_squeeze=True, + reuse=None, + scope='resnet_v2_101'): + """ResNet-101 model of [1]. See resnet_v2() for arg and return description.""" + blocks = [ + resnet_v2_block('block1', base_depth=64, num_units=3, stride=2), + resnet_v2_block('block2', base_depth=128, num_units=4, stride=2), + resnet_v2_block('block3', base_depth=256, num_units=23, stride=2), + resnet_v2_block('block4', base_depth=512, num_units=3, stride=1), + ] + return resnet_v2(inputs, blocks, num_classes, is_training=is_training, + global_pool=global_pool, output_stride=output_stride, + include_root_block=True, spatial_squeeze=spatial_squeeze, + reuse=reuse, scope=scope) +resnet_v2_101.default_image_size = resnet_v2.default_image_size + + +def resnet_v2_152(inputs, + num_classes=None, + is_training=True, + global_pool=True, + output_stride=None, + spatial_squeeze=True, + reuse=None, + scope='resnet_v2_152'): + """ResNet-152 model of [1]. See resnet_v2() for arg and return description.""" + blocks = [ + resnet_v2_block('block1', base_depth=64, num_units=3, stride=2), + resnet_v2_block('block2', base_depth=128, num_units=8, stride=2), + resnet_v2_block('block3', base_depth=256, num_units=36, stride=2), + resnet_v2_block('block4', base_depth=512, num_units=3, stride=1), + ] + return resnet_v2(inputs, blocks, num_classes, is_training=is_training, + global_pool=global_pool, output_stride=output_stride, + include_root_block=True, spatial_squeeze=spatial_squeeze, + reuse=reuse, scope=scope) +resnet_v2_152.default_image_size = resnet_v2.default_image_size + + +def resnet_v2_200(inputs, + num_classes=None, + is_training=True, + global_pool=True, + output_stride=None, + spatial_squeeze=True, + reuse=None, + scope='resnet_v2_200'): + """ResNet-200 model of [2]. See resnet_v2() for arg and return description.""" + blocks = [ + resnet_v2_block('block1', base_depth=64, num_units=3, stride=2), + resnet_v2_block('block2', base_depth=128, num_units=24, stride=2), + resnet_v2_block('block3', base_depth=256, num_units=36, stride=2), + resnet_v2_block('block4', base_depth=512, num_units=3, stride=1), + ] + return resnet_v2(inputs, blocks, num_classes, is_training=is_training, + global_pool=global_pool, output_stride=output_stride, + include_root_block=True, spatial_squeeze=spatial_squeeze, + reuse=reuse, scope=scope) +resnet_v2_200.default_image_size = resnet_v2.default_image_size diff --git a/TensorFlow/contrib/cv/BiSeNet/frontends/se_resnext.py b/TensorFlow/contrib/cv/BiSeNet/frontends/se_resnext.py new file mode 100644 index 000000000..d66e62191 --- /dev/null +++ b/TensorFlow/contrib/cv/BiSeNet/frontends/se_resnext.py @@ -0,0 +1,202 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +import tensorflow as tf + +import math + +USE_FUSED_BN = True +BN_EPSILON = 9.999999747378752e-06 +BN_MOMENTUM = 0.99 + +VAR_LIST = [] + +# input image order: BGR, range [0-255] +# mean_value: 104, 117, 123 +# only subtract mean is used +def constant_xavier_initializer(shape, group, dtype=tf.float32, uniform=True): + """Initializer function.""" + if not dtype.is_floating: + raise TypeError('Cannot create initializer for non-floating point type.') + # Estimating fan_in and fan_out is not possible to do perfectly, but we try. + # This is the right thing for matrix multiply and convolutions. + if shape: + fan_in = float(shape[-2]) if len(shape) > 1 else float(shape[-1]) + fan_out = float(shape[-1])/group + else: + fan_in = 1.0 + fan_out = 1.0 + for dim in shape[:-2]: + fan_in *= float(dim) + fan_out *= float(dim) + + # Average number of inputs and output connections. + n = (fan_in + fan_out) / 2.0 + if uniform: + # To get stddev = math.sqrt(factor / n) need to adjust for uniform. + limit = math.sqrt(3.0 * 1.0 / n) + return tf.random_uniform(shape, -limit, limit, dtype, seed=None) + else: + # To get stddev = math.sqrt(factor / n) need to adjust for truncated. + trunc_stddev = math.sqrt(1.3 * 1.0 / n) + return tf.truncated_normal(shape, 0.0, trunc_stddev, dtype, seed=None) + +# for root block, use dummy input_filters, e.g. 128 rather than 64 for the first block +def se_bottleneck_block(inputs, input_filters, name_prefix, is_training, group, data_format='channels_last', need_reduce=True, is_root=False, reduced_scale=16): + bn_axis = -1 if data_format == 'channels_last' else 1 + strides_to_use = 1 + residuals = inputs + if need_reduce: + strides_to_use = 1 if is_root else 2 + proj_mapping = tf.layers.conv2d(inputs, input_filters, (1, 1), use_bias=False, + name=name_prefix + '_1x1_proj', strides=(strides_to_use, strides_to_use), + padding='valid', data_format=data_format, activation=None, + kernel_initializer=tf.contrib.layers.xavier_initializer(), + bias_initializer=tf.zeros_initializer()) + residuals = tf.layers.batch_normalization(proj_mapping, momentum=BN_MOMENTUM, + name=name_prefix + '_1x1_proj/bn', axis=bn_axis, + epsilon=BN_EPSILON, training=is_training, reuse=None, fused=USE_FUSED_BN) + + reduced_inputs = tf.layers.conv2d(inputs, input_filters // 2, (1, 1), use_bias=False, + name=name_prefix + '_1x1_reduce', strides=(1, 1), + padding='valid', data_format=data_format, activation=None, + kernel_initializer=tf.contrib.layers.xavier_initializer(), + bias_initializer=tf.zeros_initializer()) + reduced_inputs_bn = tf.layers.batch_normalization(reduced_inputs, momentum=BN_MOMENTUM, + name=name_prefix + '_1x1_reduce/bn', axis=bn_axis, + epsilon=BN_EPSILON, training=is_training, reuse=None, fused=USE_FUSED_BN) + reduced_inputs_relu = tf.nn.relu(reduced_inputs_bn, name=name_prefix + '_1x1_reduce/relu') + + if data_format == 'channels_first': + reduced_inputs_relu = tf.pad(reduced_inputs_relu, paddings = [[0, 0], [0, 0], [1, 1], [1, 1]]) + weight_shape = [3, 3, reduced_inputs_relu.get_shape().as_list()[1]//group, input_filters // 2] + weight_ = tf.Variable(constant_xavier_initializer(weight_shape, group=group, dtype=tf.float32), trainable=is_training, name=name_prefix + '_3x3/kernel') + weight_groups = tf.split(weight_, num_or_size_splits=group, axis=-1, name=name_prefix + '_weight_split') + xs = tf.split(reduced_inputs_relu, num_or_size_splits=group, axis=1, name=name_prefix + '_inputs_split') + else: + reduced_inputs_relu = tf.pad(reduced_inputs_relu, paddings = [[0, 0], [1, 1], [1, 1], [0, 0]]) + weight_shape = [3, 3, reduced_inputs_relu.get_shape().as_list()[-1]//group, input_filters // 2] + weight_ = tf.Variable(constant_xavier_initializer(weight_shape, group=group, dtype=tf.float32), trainable=is_training, name=name_prefix + '_3x3/kernel') + weight_groups = tf.split(weight_, num_or_size_splits=group, axis=-1, name=name_prefix + '_weight_split') + xs = tf.split(reduced_inputs_relu, num_or_size_splits=group, axis=-1, name=name_prefix + '_inputs_split') + + convolved = [tf.nn.convolution(x, weight, padding='VALID', strides=[strides_to_use, strides_to_use], name=name_prefix + '_group_conv', + data_format=('NCHW' if data_format == 'channels_first' else 'NHWC')) for (x, weight) in zip(xs, weight_groups)] + + if data_format == 'channels_first': + conv3_inputs = tf.concat(convolved, axis=1, name=name_prefix + '_concat') + else: + conv3_inputs = tf.concat(convolved, axis=-1, name=name_prefix + '_concat') + + conv3_inputs_bn = tf.layers.batch_normalization(conv3_inputs, momentum=BN_MOMENTUM, name=name_prefix + '_3x3/bn', + axis=bn_axis, epsilon=BN_EPSILON, training=is_training, reuse=None, fused=USE_FUSED_BN) + conv3_inputs_relu = tf.nn.relu(conv3_inputs_bn, name=name_prefix + '_3x3/relu') + + + increase_inputs = tf.layers.conv2d(conv3_inputs_relu, input_filters, (1, 1), use_bias=False, + name=name_prefix + '_1x1_increase', strides=(1, 1), + padding='valid', data_format=data_format, activation=None, + kernel_initializer=tf.contrib.layers.xavier_initializer(), + bias_initializer=tf.zeros_initializer()) + increase_inputs_bn = tf.layers.batch_normalization(increase_inputs, momentum=BN_MOMENTUM, + name=name_prefix + '_1x1_increase/bn', axis=bn_axis, + epsilon=BN_EPSILON, training=is_training, reuse=None, fused=USE_FUSED_BN) + + if data_format == 'channels_first': + pooled_inputs = tf.reduce_mean(increase_inputs_bn, [2, 3], name=name_prefix + '_global_pool', keep_dims=True) + else: + pooled_inputs = tf.reduce_mean(increase_inputs_bn, [1, 2], name=name_prefix + '_global_pool', keep_dims=True) + + down_inputs = tf.layers.conv2d(pooled_inputs, input_filters // reduced_scale, (1, 1), use_bias=True, + name=name_prefix + '_1x1_down', strides=(1, 1), + padding='valid', data_format=data_format, activation=None, + kernel_initializer=tf.contrib.layers.xavier_initializer(), + bias_initializer=tf.zeros_initializer()) + down_inputs_relu = tf.nn.relu(down_inputs, name=name_prefix + '_1x1_down/relu') + + up_inputs = tf.layers.conv2d(down_inputs_relu, input_filters, (1, 1), use_bias=True, + name=name_prefix + '_1x1_up', strides=(1, 1), + padding='valid', data_format=data_format, activation=None, + kernel_initializer=tf.contrib.layers.xavier_initializer(), + bias_initializer=tf.zeros_initializer()) + prob_outputs = tf.nn.sigmoid(up_inputs, name=name_prefix + '_prob') + + rescaled_feat = tf.multiply(prob_outputs, increase_inputs_bn, name=name_prefix + '_mul') + pre_act = tf.add(residuals, rescaled_feat, name=name_prefix + '_add') + return tf.nn.relu(pre_act, name=name_prefix + '/relu') + #return tf.nn.relu(residuals + prob_outputs * increase_inputs_bn, name=name_prefix + '/relu') + +def se_resnext(input_image, scope, is_training = False, group=16, data_format='channels_last', net_depth=50): + end_points = dict() + + bn_axis = -1 if data_format == 'channels_last' else 1 + # the input image should in BGR order, note that this is not the common case in Tensorflow + # convert from RGB to BGR + if data_format == 'channels_last': + image_channels = tf.unstack(input_image, axis=-1) + swaped_input_image = tf.stack([image_channels[2], image_channels[1], image_channels[0]], axis=-1) + else: + image_channels = tf.unstack(input_image, axis=1) + swaped_input_image = tf.stack([image_channels[2], image_channels[1], image_channels[0]], axis=1) + #swaped_input_image = input_image + + if net_depth not in [50, 101]: + raise TypeError('Only ResNeXt50 or ResNeXt101 are currently supported.') + input_depth = [256, 512, 1024, 2048] # the input depth of the the first block is dummy input + num_units = [3, 4, 6, 3] if net_depth==50 else [3, 4, 23, 3] + + block_name_prefix = ['conv2_{}', 'conv3_{}', 'conv4_{}', 'conv5_{}'] + + if data_format == 'channels_first': + swaped_input_image = tf.pad(swaped_input_image, paddings = [[0, 0], [0, 0], [3, 3], [3, 3]]) + else: + swaped_input_image = tf.pad(swaped_input_image, paddings = [[0, 0], [3, 3], [3, 3], [0, 0]]) + + inputs_features = tf.layers.conv2d(swaped_input_image, input_depth[0]//4, (7, 7), use_bias=False, + name='conv1/7x7_s2', strides=(2, 2), + padding='valid', data_format=data_format, activation=None, + kernel_initializer=tf.contrib.layers.xavier_initializer(), + bias_initializer=tf.zeros_initializer()) + VAR_LIST.append('conv1/7x7_s2') + + inputs_features = tf.layers.batch_normalization(inputs_features, momentum=BN_MOMENTUM, + name='conv1/7x7_s2/bn', axis=bn_axis, + epsilon=BN_EPSILON, training=is_training, reuse=None, fused=USE_FUSED_BN) + inputs_features = tf.nn.relu(inputs_features, name='conv1/relu_7x7_s2') + + inputs_features = tf.layers.max_pooling2d(inputs_features, [3, 3], [2, 2], padding='same', data_format=data_format, name='pool1/3x3_s2') + + is_root = True + for ind, num_unit in enumerate(num_units): + need_reduce = True + for unit_index in range(1, num_unit+1): + inputs_features = se_bottleneck_block(inputs_features, input_depth[ind], block_name_prefix[ind].format(unit_index), is_training=is_training, group=group, data_format=data_format, need_reduce=need_reduce, is_root=is_root) + need_reduce = False + end_points['pool' + str(ind)] = inputs_features + is_root = False + + if data_format == 'channels_first': + pooled_inputs = tf.reduce_mean(inputs_features, [2, 3], name='pool5/7x7_s1', keep_dims=True) + else: + pooled_inputs = tf.reduce_mean(inputs_features, [1, 2], name='pool5/7x7_s1', keep_dims=True) + + pooled_inputs = tf.layers.flatten(pooled_inputs) + + # logits_output = tf.layers.dense(pooled_inputs, num_classes, + # kernel_initializer=tf.contrib.layers.xavier_initializer(), + # bias_initializer=tf.zeros_initializer(), use_bias=True) + + logits_output = None + + return logits_output, end_points, VAR_LIST diff --git a/TensorFlow/contrib/cv/BiSeNet/frontends/xception.py b/TensorFlow/contrib/cv/BiSeNet/frontends/xception.py new file mode 100644 index 000000000..51d37cb26 --- /dev/null +++ b/TensorFlow/contrib/cv/BiSeNet/frontends/xception.py @@ -0,0 +1,131 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf + +from npu_bridge.npu_init import * + +slim = tf.contrib.slim + +''' +================================================================== +Based on the Xception Paper (https://arxiv.org/pdf/1610.02357.pdf) +================================================================== +''' + + +def block(input, mid_out_channels, has_proj, stride, dilation=1, expansion=4): + if has_proj: + shortcut = slim.separable_conv2d(input, mid_out_channels*expansion, [3, 3], stride=stride) + shortcut = slim.batch_norm(shortcut) + else: + shortcut = input + residual = slim.separable_conv2d(input, mid_out_channels, [3, 3], stride=stride, rate=dilation) + residual = slim.batch_norm(residual) + residual = tf.nn.relu(residual) + residual = slim.separable_conv2d(residual, mid_out_channels, [3, 3]) + residual = slim.batch_norm(residual) + residual = tf.nn.relu(residual) + residual = slim.separable_conv2d(residual, mid_out_channels*expansion, [3, 3]) + residual = slim.batch_norm(residual) + output = tf.nn.relu(residual+shortcut) + return output + + +@slim.add_arg_scope +def make_layers(input, layers, channel, stride, scope, outputs_collections=None): + with tf.compat.v1.variable_scope(scope, 'stage', [input]) as sc: + has_proj = True if stride > 1 else False + with tf.compat.v1.variable_scope('block1'): + net = block(input, channel, has_proj, stride) + for i in range(1, layers): + with tf.compat.v1.variable_scope('block'+str(i+1)): + net = block(net, channel, False, stride=1) + return slim.utils.collect_named_outputs(outputs_collections, sc.name, net) + + +def xception(inputs, layers, channels, + is_training=True, + reuse=False, + scope='Xception'): + ''' + The Xception Model! + + Note: + The padding is included by default in slim.conv2d to preserve spatial dimensions. + INPUTS: + - inputs(Tensor): a 4D Tensor input of shape [batch_size, height, width, num_channels] + - num_classes(int): the number of classes to predict + - is_training(bool): Whether or not to train + OUTPUTS: + - logits (Tensor): raw, unactivated outputs of the final layer + - end_points(dict): dictionary containing the outputs for each layer, including the 'Predictions' + containing the probabilities of each output. + ''' + with tf.compat.v1.variable_scope(scope, 'xception', [inputs], reuse=reuse) as sc: + end_points_collection = sc.name + '_end_points' + + with slim.arg_scope([slim.separable_conv2d], depth_multiplier=1), \ + slim.arg_scope([make_layers], outputs_collections=end_points_collection), \ + slim.arg_scope([slim.batch_norm], is_training=is_training): + # ===========ENTRY FLOW============== + net = slim.conv2d(inputs, 8, [3, 3], stride=2, padding='same', scope='pool1') + net = slim.batch_norm(net, scope='pool1_bn1') + net = tf.nn.relu(net, name='pool1_relu1') + net = slim.max_pool2d(net, [3, 3], stride=2, padding='same', scope='pool2') + + # =========== STAGE ============== + for i in range(len(layers)): + net = make_layers(net, layers[i], channels[i], stride=2, scope='stage'+str(i+1)) + + end_points = slim.utils.convert_collection_to_dict(end_points_collection) + end_points['pool3'] = end_points[scope + '/stage1'] + end_points['pool4'] = end_points[scope + '/stage2'] + end_points['pool5'] = net + + return net, end_points + + +def xception39(inputs, is_training=True, reuse=None, scope='Xception39'): + layers = [4, 8, 4] + channels = [16, 32, 64] + return xception(inputs, layers, channels, is_training=is_training, reuse=reuse, scope=scope) + + +def xception_arg_scope(weight_decay=0.00001, + batch_norm_decay=0.9, + batch_norm_epsilon=1e-5): + ''' + The arg scope for xception model. The weight decay is 1e-5 as seen in the paper. + INPUTS: + - weight_decay(float): the weight decay for weights variables in conv2d and separable conv2d + - batch_norm_decay(float): decay for the moving average of batch_norm momentums. + - batch_norm_epsilon(float): small float added to variance to avoid dividing by zero. + OUTPUTS: + - scope(arg_scope): a tf-slim arg_scope with the parameters needed for xception. + ''' + # Set weight_decay for weights in conv2d and separable_conv2d layers. + with slim.arg_scope([slim.conv2d, slim.separable_conv2d], + weights_regularizer=slim.l2_regularizer(weight_decay), + biases_initializer=None, + activation_fn=None): + # Set parameters for batch_norm. Note: Do not set activation function as it's preset to None already. + with slim.arg_scope([slim.batch_norm], + decay=batch_norm_decay, + epsilon=batch_norm_epsilon) as scope: + return scope diff --git a/TensorFlow/contrib/cv/BiSeNet/models/__init__.py b/TensorFlow/contrib/cv/BiSeNet/models/__init__.py new file mode 100644 index 000000000..6a1eaa12e --- /dev/null +++ b/TensorFlow/contrib/cv/BiSeNet/models/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== \ No newline at end of file diff --git a/TensorFlow/contrib/cv/BiSeNet/models/bisenet.py b/TensorFlow/contrib/cv/BiSeNet/models/bisenet.py new file mode 100644 index 000000000..4342434b5 --- /dev/null +++ b/TensorFlow/contrib/cv/BiSeNet/models/bisenet.py @@ -0,0 +1,305 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf +from tensorflow.contrib import slim +from Dataset.dataset import DataLoader +from builders import frontend_builder +import numpy as np + +colors = np.array([[64,128,64], +[192,0,128], +[0,128, 192], +[0, 128, 64], +[128, 0, 0], +[64, 0, 128], +[64, 0, 192], +[192, 128, 64], +[192, 192, 128], +[64, 64, 128], +[128, 0, 192], +[192, 0, 64], +[128, 128, 64], +[192, 0, 192], +[128, 64, 64], +[64, 192, 128], +[64, 64, 0], +[128, 64, 128], +[128, 128, 192], +[0, 0, 192], +[192, 128, 128], +[128, 128, 128], +[64, 128,192], +[0, 0, 64], +[0, 64, 64], +[192, 64, 128], +[128, 128, 0], +[192, 128, 192], +[64, 0, 64], +[192, 192, 0], +[0, 0, 0], +[64, 192, 0]], dtype=np.float32) + + +def Upsampling(inputs, scale): + return tf.compat.v1.image.resize_bilinear(inputs, size=[tf.shape(inputs)[1] * scale, tf.shape(inputs)[2] * scale]) + + +def ConvBlock(inputs, n_filters, kernel_size=[3, 3], strides=1): + """ + Basic conv block for Encoder-Decoder + Apply successivly Convolution, BatchNormalization, ReLU nonlinearity + """ + net = slim.conv2d(inputs, n_filters, kernel_size, stride=[strides, strides], activation_fn=None) + net = tf.nn.relu(slim.batch_norm(net, fused=True)) + return net + + +def AttentionRefinementModule(inputs, n_filters): + inputs = slim.conv2d(inputs, n_filters, [3, 3], activation_fn=None) + inputs = tf.nn.relu(slim.batch_norm(inputs, fused=True)) + + # Global average pooling + net = tf.reduce_mean(inputs, [1, 2], keep_dims=True) + + net = slim.conv2d(net, n_filters, kernel_size=[1, 1]) + net = slim.batch_norm(net, fused=True) + net = tf.sigmoid(net) + + net = tf.multiply(inputs, net) + + return net + + +def FeatureFusionModule(input_1, input_2, n_filters): + inputs = tf.concat([input_1, input_2], axis=-1) + inputs = ConvBlock(inputs, n_filters=n_filters, kernel_size=[3, 3]) + + # Global average pooling + net = tf.reduce_mean(inputs, [1, 2], keep_dims=True) + + net = slim.conv2d(net, n_filters, kernel_size=[1, 1]) + net = tf.nn.relu(net) + net = slim.conv2d(net, n_filters, kernel_size=[1, 1]) + net = tf.sigmoid(net) + + net = tf.multiply(inputs, net) + + net = tf.add(inputs, net) + + return net + + +class BiseNet(object): + def __init__(self, model_config, train_config, num_classes, mode): + self.model_config = model_config + self.train_config = train_config + self.num_classes = num_classes + self.mode = mode + assert mode in ['train', 'validation', 'inference', 'test'] + if self.mode == 'train': + self.data_config = self.train_config['train_data_config'] + elif self.mode == 'validation': + self.data_config = self.train_config['validation_data_config'] + elif self.mode == 'test': + self.data_config = self.train_config['test_data_config'] + + self.images = None + self.images_feed = None + self.labels = None + self.net = None + self.sup1 = None + self.sup2 = None + self.init_fn = None + self.loss = None + self.total_loss = None + self.response = None + + def build_inputs(self): + """Input fetching and batching + + Outputs: + self.images: image batch of shape [batch, hz, wz, 3] + labels: image batch of shape [batch, hx, wx, num_classes] + """ + if self.mode in ['train', 'validation', 'test']: + # Put data loading and preprocessing in CPU is substantially faster + # DataSet prepare + with tf.device("/cpu:0"): + dataset = DataLoader(self.data_config, self.train_config['DataSet'], self.train_config['class_dict']) + self.images, labels = dataset.get_one_batch() + self.labels = tf.one_hot(labels, self.num_classes) + + else: + self.images_feed = tf.placeholder(shape=[None, None, None, 3], + dtype=tf.uint8, name='images_input') + + self.images = tf.cast(self.images_feed, dtype=tf.float32)/255 + + def is_training(self): + """Returns true if the model is built for training mode""" + return self.mode == 'train' + + def setup_global_step(self): + global_step = tf.Variable( + initial_value=0, + name='global_step', + trainable=False, + collections=[tf.compat.v1.GraphKeys.GLOBAL_STEP, tf.compat.v1.GraphKeys.GLOBAL_VARIABLES]) + + self.global_step = global_step + + def build_bisenet(self, reuse=False): + """ + Builds the BiSeNet model. + + Arguments: + reuse: Reuse variable or not + + Returns: + BiSeNet model + """ + + ### The spatial path + ### The number of feature maps for each convolution is not specified in the paper + ### It was chosen here to be equal to the number of feature maps of a classification + ### model at each corresponding stage + batch_norm_params = self.model_config['batch_norm_params'] + init_method = self.model_config['conv_config']['init_method'] + + if init_method == 'kaiming_normal': + initializer = slim.variance_scaling_initializer(factor=2.0, mode='FAN_IN', uniform=False) + else: + initializer = slim.xavier_initializer() + + with tf.compat.v1.variable_scope('spatial_net', reuse=reuse): + with slim.arg_scope([slim.conv2d], biases_initializer=None, weights_initializer=initializer): + with slim.arg_scope([slim.batch_norm], is_training=self.is_training(), **batch_norm_params): + spatial_net = ConvBlock(self.images, n_filters=64, kernel_size=[7, 7], strides=2) + spatial_net = ConvBlock(spatial_net, n_filters=64, kernel_size=[3, 3], strides=2) + spatial_net = ConvBlock(spatial_net, n_filters=64, kernel_size=[3, 3], strides=2) + spatial_net = ConvBlock(spatial_net, n_filters=128, kernel_size=[1, 1]) + + frontend_config = self.model_config['frontend_config'] + ### Context path + logits, end_points, frontend_scope, init_fn = frontend_builder.build_frontend(self.images, frontend_config, + self.is_training(), reuse) + + ### Combining the paths + with tf.compat.v1.variable_scope('combine_path', reuse=reuse): + with slim.arg_scope([slim.conv2d], biases_initializer=None, weights_initializer=initializer): + with slim.arg_scope([slim.batch_norm], is_training=self.is_training(), **batch_norm_params): + # tail part + size = tf.shape(end_points['pool5'])[1:3] + global_context = tf.reduce_mean(end_points['pool5'], [1, 2], keep_dims=True) + global_context = slim.conv2d(global_context, 128, 1, [1, 1], activation_fn=None) + global_context = tf.nn.relu(slim.batch_norm(global_context, fused=True)) + global_context = tf.compat.v1.image.resize_bilinear(global_context, size=size) + + net_5 = AttentionRefinementModule(end_points['pool5'], n_filters=128) + net_4 = AttentionRefinementModule(end_points['pool4'], n_filters=128) + + net_5 = tf.add(net_5, global_context) + net_5 = Upsampling(net_5, scale=2) + net_5 = ConvBlock(net_5, n_filters=128, kernel_size=[3, 3]) + net_4 = tf.add(net_4, net_5) + net_4 = Upsampling(net_4, scale=2) + net_4 = ConvBlock(net_4, n_filters=128, kernel_size=[3, 3]) + + context_net = net_4 + + net = FeatureFusionModule(input_1=spatial_net, input_2=context_net, n_filters=256) + net_5 = ConvBlock(net_5, n_filters=128, kernel_size=[3, 3]) + net_4 = ConvBlock(net_4, n_filters=128, kernel_size=[3, 3]) + net = ConvBlock(net, n_filters=64, kernel_size=[3, 3]) + + # Upsampling + dilation or only Upsampling + net = Upsampling(net, scale=2) + net = slim.conv2d(net, 64, [3, 3], rate=2, activation_fn=tf.nn.relu, biases_initializer=None, + normalizer_fn=slim.batch_norm) + + net = slim.conv2d(net, self.num_classes, [1, 1], activation_fn=None, scope='logits') + self.net = Upsampling(net, 4) + + # net = slim.conv2d(net, self.num_classes, [1, 1], activation_fn=None, scope='logits') + # self.net = Upsampling(net, scale=8) + + if self.mode in ['train', 'validation', 'test']: + sup1 = slim.conv2d(net_5, self.num_classes, [1, 1], activation_fn=None, scope='supl1') + sup2 = slim.conv2d(net_4, self.num_classes, [1, 1], activation_fn=None, scope='supl2') + self.sup1 = Upsampling(sup1, scale=16) + self.sup2 = Upsampling(sup2, scale=8) + self.init_fn = init_fn + + def build_loss(self): + loss1 = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.net, labels=self.labels)) + loss2 = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.sup1, labels=self.labels)) + loss3 = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.sup2, labels=self.labels)) + loss = loss1+loss2+loss3 + tf.compat.v1.losses.add_loss(loss) + + self.loss = loss1 + self.total_loss = tf.compat.v1.losses.get_total_loss() + + # shape = tf.shape(self.labels) + + # Tensorboard inspection + + """ + tf.compat.v1.summary.image('image', self.images, family=self.mode, max_outputs=1) + tf.compat.v1.summary.image('GT', tf.reshape( + tf.matmul(tf.reshape(self.labels, [-1, 32]), colors), [-1, shape[1], shape[2], 3]), + family=self.mode, max_outputs=1) + tf.compat.v1.summary.image('response', tf.reshape(tf.matmul( + tf.reshape(tf.one_hot(tf.argmax(self.net, -1), self.num_classes), [-1, 32]), colors), + [-1, shape[1], shape[2], 3]), family=self.mode, max_outputs=1) + tf.compat.v1.summary.scalar('total_loss', self.total_loss, family=self.mode) + tf.compat.v1.summary.scalar('loss', self.loss, family=self.mode) + """ + + self.accuracy = tf.compat.v1.metrics.accuracy(predictions = tf.argmax(self.net, -1), + labels= tf.argmax(self.labels, -1)) + self.mean_IOU = tf.compat.v1.metrics.mean_iou(predictions=tf.argmax(self.net, -1), + labels=tf.argmax(self.labels, -1), + num_classes=self.num_classes) + + """ + with tf.control_dependencies([accuracy_update, mean_IOU_update]): + tf.compat.v1.summary.scalar('accuracy', accuracy, family=self.mode) + tf.compat.v1.summary.scalar('mean_IOU', mean_IOU, family=self.mode) + """ + + def predict(self): + self.response = self.net + + def build(self, reuse=False): + """Creates all ops for training and evaluation""" + with tf.name_scope(self.mode): + self.build_inputs() + self.build_bisenet(reuse=reuse) + if self.mode in ['train', 'validation', 'test']: + self.build_loss() + else: + self.predict() + + if self.is_training(): + self.setup_global_step() + + + + diff --git a/TensorFlow/contrib/cv/BiSeNet/test/train_full_1p.sh b/TensorFlow/contrib/cv/BiSeNet/test/train_full_1p.sh new file mode 100644 index 000000000..e17504013 --- /dev/null +++ b/TensorFlow/contrib/cv/BiSeNet/test/train_full_1p.sh @@ -0,0 +1,176 @@ +#!/bin/bash + +########################################################## +#########第3行 至 100行,请一定不要、不要、不要修改########## +#########第3行 至 100行,请一定不要、不要、不要修改########## +#########第3行 至 100行,请一定不要、不要、不要修改########## +########################################################## +# shell脚本所在路径 +cur_path=`echo $(cd $(dirname $0);pwd)` + +# 判断当前shell是否是performance +perf_flag=`echo $0 | grep performance | wc -l` + +# 当前执行网络的名称 +Network=`echo $(cd $(dirname $0);pwd) | awk -F"/" '{print $(NF-1)}'` + +export RANK_SIZE=1 +export RANK_ID=0 +export JOB_ID=10087 + +# 路径参数初始化 +data_path="" +output_path="" + +# 帮助信息,不需要修改 +if [[ $1 == --help || $1 == -h ]];then + echo"usage:./train_full_1P.sh " + echo " " + echo "parameter explain: + --data_path # dataset of training + --output_path # output of training + --train_epochs # max_epoch for training + --batch_size # batch size + -h/--help show help message + " + exit 1 +fi + +# 参数校验,不需要修改 +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --output_path* ]];then + output_path=`echo ${para#*=}` + elif [[ $para == --train_epochs* ]];then + train_epochs=`echo ${para#*=}` + elif [[ $para == --batch_size* ]];then + batch_size=`echo ${para#*=}` + fi +done + +# 校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be config" + exit 1 +fi + +# 校验是否传入output_path,不需要修改 +if [[ $output_path == "" ]];then + output_path="./test/output/${ASCEND_DEVICE_ID}" +fi + +# 设置打屏日志文件名,请保留,文件名为${print_log} +print_log="./test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log" +modelarts_flag=${MODELARTS_MODEL_PATH} + +echo "### get your log here : ${print_log}" + +CaseName="" +function get_casename() +{ + if [ x"${perf_flag}" = x1 ]; + then + CaseName=${Network}_bs${batch_size}_${RANK_SIZE}'p'_'perf' + else + CaseName=${Network}_bs${batch_size}_${RANK_SIZE}'p'_'acc' + fi +} + +# 跳转到code目录 +cd ${cur_path}/../ +rm -rf ./test/output/${ASCEND_DEVICE_ID} +mkdir -p ./test/output/${ASCEND_DEVICE_ID} + +# 训练开始时间记录,不需要修改 +start_time=$(date +%s) +########################################################## +#########第3行 至 100行,请一定不要、不要、不要修改########## +#########第3行 至 100行,请一定不要、不要、不要修改########## +#########第3行 至 100行,请一定不要、不要、不要修改########## +########################################################## + +#========================================================= +#========================================================= +#========训练执行命令,需要根据您的网络进行修改============== +#========================================================= +#========================================================= +# 基础参数,需要模型审视修改 +# 您的训练数据集在${data_path}路径下,请直接使用这个变量获取 +# 您的训练输出目录在${output_path}路径下,请直接使用这个变量获取 +# 您的其他基础参数,可以自定义增加,但是batch_size请保留,并且设置正确的值 +train_epochs=2000 +batch_size=8 + +python3.7 train_npu.py \ + --data_path=${data_path} \ + --output_path=${output_path} \ + --train_epochs=${train_epochs} \ + --batch_size=${batch_size} \ + 1>${print_log} 2>&1 + +# 性能相关数据计算 +StepTime=`grep -a 'step time' ${print_log}|awk -F " " '{print $21}'|awk 'END {print}'` +FPS=`grep -a 'FPS' ${print_log}|awk -F " " '{print $17}'|awk 'END {print}'` + +# 精度相关数据计算 +train_accuracy=`grep -a 'accuracy' ${print_log}|awk -F " " '{print $10}'|awk 'END {print}'` +# 提取所有loss打印信息 +grep "loss =" ${print_log} | awk -F " " '{print $7}' > ./test/output/${ASCEND_DEVICE_ID}/my_output_loss.txt + + +########################################################### +#########后面的所有内容请不要修改########################### +#########后面的所有内容请不要修改########################### +#########后面的所有内容请不要修改########################### +########################################################### + +# 判断本次执行是否正确使用Ascend NPU +use_npu_flag=`grep "The model has been compiled on the Ascend AI processor" ${print_log} | wc -l` +if [ x"${use_npu_flag}" == x0 ]; +then + echo "------------------ ERROR NOTICE START ------------------" + echo "ERROR, your task haven't used Ascend NPU, please check your npu Migration." + echo "------------------ ERROR NOTICE END------------------" +else + echo "------------------ INFO NOTICE START------------------" + echo "INFO, your task have used Ascend NPU, please check your result." + echo "------------------ INFO NOTICE END------------------" +fi + +# 获取最终的casename,请保留,case文件名为${CaseName} +get_casename + +# 重命名loss文件 +if [ -f test/output/${ASCEND_DEVICE_ID}/my_output_loss.txt ]; +then + mv test/output/${ASCEND_DEVICE_ID}/my_output_loss.txt test/output/${ASCEND_DEVICE_ID}/${CaseName}_loss.txt +fi + +# 训练端到端耗时 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +echo "------------------ Final result ------------------" +# 输出性能FPS/单step耗时/端到端耗时 +echo "Final Performance images/sec : $FPS" +echo "Final Performance sec/step : $StepTime" +echo "E2E Training Duration sec : $e2e_time" + +# 输出训练精度 +echo "Final Train Accuracy : ${train_accuracy}" + +# 最后一个迭代loss值,不需要修改 +ActualLoss=(`awk 'END {print $NF}' $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}_loss.txt`) + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${batch_size}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = `uname -m`" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${FPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${StepTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file diff --git a/TensorFlow/contrib/cv/BiSeNet/test/train_performance_1p.sh b/TensorFlow/contrib/cv/BiSeNet/test/train_performance_1p.sh new file mode 100644 index 000000000..211aaa201 --- /dev/null +++ b/TensorFlow/contrib/cv/BiSeNet/test/train_performance_1p.sh @@ -0,0 +1,176 @@ +#!/bin/bash + +########################################################## +#########第3行 至 100行,请一定不要、不要、不要修改########## +#########第3行 至 100行,请一定不要、不要、不要修改########## +#########第3行 至 100行,请一定不要、不要、不要修改########## +########################################################## +# shell脚本所在路径 +cur_path=`echo $(cd $(dirname $0);pwd)` + +# 判断当前shell是否是performance +perf_flag=`echo $0 | grep performance | wc -l` + +# 当前执行网络的名称 +Network=`echo $(cd $(dirname $0);pwd) | awk -F"/" '{print $(NF-1)}'` + +export RANK_SIZE=1 +export RANK_ID=0 +export JOB_ID=10087 + +# 路径参数初始化 +data_path="" +output_path="" + +# 帮助信息,不需要修改 +if [[ $1 == --help || $1 == -h ]];then + echo"usage:./train_full_1P.sh " + echo " " + echo "parameter explain: + --data_path # dataset of training + --output_path # output of training + --train_epochs # max_epoch for training + --batch_size # batch size + -h/--help show help message + " + exit 1 +fi + +# 参数校验,不需要修改 +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --output_path* ]];then + output_path=`echo ${para#*=}` + elif [[ $para == --train_epochs* ]];then + train_epochs=`echo ${para#*=}` + elif [[ $para == --batch_size* ]];then + batch_size=`echo ${para#*=}` + fi +done + +# 校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be config" + exit 1 +fi + +# 校验是否传入output_path,不需要修改 +if [[ $output_path == "" ]];then + output_path="./test/output/${ASCEND_DEVICE_ID}" +fi + +# 设置打屏日志文件名,请保留,文件名为${print_log} +print_log="./test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log" +modelarts_flag=${MODELARTS_MODEL_PATH} + +echo "### get your log here : ${print_log}" + +CaseName="" +function get_casename() +{ + if [ x"${perf_flag}" = x1 ]; + then + CaseName=${Network}_bs${batch_size}_${RANK_SIZE}'p'_'perf' + else + CaseName=${Network}_bs${batch_size}_${RANK_SIZE}'p'_'acc' + fi +} + +# 跳转到code目录 +cd ${cur_path}/../ +rm -rf ./test/output/${ASCEND_DEVICE_ID} +mkdir -p ./test/output/${ASCEND_DEVICE_ID} + +# 训练开始时间记录,不需要修改 +start_time=$(date +%s) +########################################################## +#########第3行 至 100行,请一定不要、不要、不要修改########## +#########第3行 至 100行,请一定不要、不要、不要修改########## +#########第3行 至 100行,请一定不要、不要、不要修改########## +########################################################## + +#========================================================= +#========================================================= +#========训练执行命令,需要根据您的网络进行修改============== +#========================================================= +#========================================================= +# 基础参数,需要模型审视修改 +# 您的训练数据集在${data_path}路径下,请直接使用这个变量获取 +# 您的训练输出目录在${output_path}路径下,请直接使用这个变量获取 +# 您的其他基础参数,可以自定义增加,但是batch_size请保留,并且设置正确的值 +train_epochs=2 +batch_size=8 + +python3.7 train_npu.py \ + --data_path=${data_path} \ + --output_path=${output_path} \ + --train_epochs=${train_epochs} \ + --batch_size=${batch_size} \ + 1>${print_log} 2>&1 + +# 性能相关数据计算 +StepTime=`grep -a 'step time' ${print_log}|awk -F " " '{print $21}'|awk 'END {print}'` +FPS=`grep -a 'FPS' ${print_log}|awk -F " " '{print $17}'|awk 'END {print}'` + +# 精度相关数据计算 +train_accuracy=`grep -a 'accuracy' ${print_log}|awk -F " " '{print $10}'|awk 'END {print}'` +# 提取所有loss打印信息 +grep "loss =" ${print_log} | awk -F " " '{print $7}' > ./test/output/${ASCEND_DEVICE_ID}/my_output_loss.txt + + +########################################################### +#########后面的所有内容请不要修改########################### +#########后面的所有内容请不要修改########################### +#########后面的所有内容请不要修改########################### +########################################################### + +# 判断本次执行是否正确使用Ascend NPU +use_npu_flag=`grep "The model has been compiled on the Ascend AI processor" ${print_log} | wc -l` +if [ x"${use_npu_flag}" == x0 ]; +then + echo "------------------ ERROR NOTICE START ------------------" + echo "ERROR, your task haven't used Ascend NPU, please check your npu Migration." + echo "------------------ ERROR NOTICE END------------------" +else + echo "------------------ INFO NOTICE START------------------" + echo "INFO, your task have used Ascend NPU, please check your result." + echo "------------------ INFO NOTICE END------------------" +fi + +# 获取最终的casename,请保留,case文件名为${CaseName} +get_casename + +# 重命名loss文件 +if [ -f test/output/${ASCEND_DEVICE_ID}/my_output_loss.txt ]; +then + mv test/output/${ASCEND_DEVICE_ID}/my_output_loss.txt test/output/${ASCEND_DEVICE_ID}/${CaseName}_loss.txt +fi + +# 训练端到端耗时 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +echo "------------------ Final result ------------------" +# 输出性能FPS/单step耗时/端到端耗时 +echo "Final Performance images/sec : $FPS" +echo "Final Performance sec/step : $StepTime" +echo "E2E Training Duration sec : $e2e_time" + +# 输出训练精度 +echo "Final Train Accuracy : ${train_accuracy}" + +# 最后一个迭代loss值,不需要修改 +ActualLoss=(`awk 'END {print $NF}' $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}_loss.txt`) + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${batch_size}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = `uname -m`" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${FPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${StepTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file diff --git a/TensorFlow/contrib/cv/BiSeNet/test_npu.py b/TensorFlow/contrib/cv/BiSeNet/test_npu.py new file mode 100644 index 000000000..c1a6cdfaf --- /dev/null +++ b/TensorFlow/contrib/cv/BiSeNet/test_npu.py @@ -0,0 +1,65 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# import npu_bridge +from npu_bridge.npu_init import * + +import tensorflow as tf +from models.bisenet import BiseNet +import configuration +import logging + +logging.getLogger().setLevel(logging.INFO) + +if __name__ == '__main__': + model_config = configuration.MODEL_CONFIG + train_config = configuration.TRAIN_CONFIG + + g = tf.Graph() + with g.as_default(): + # Build the test model + model = BiseNet(model_config, train_config, 32, 'test') + model.build() + + saver = tf.compat.v1.train.Saver() + + config = tf.ConfigProto() + custom_op = config.graph_options.rewrite_options.custom_optimizers.add() + custom_op.name = "NpuOptimizer" + custom_op.parameter_map["use_off_line"].b = True + + custom_op.parameter_map["precision_mode"].s = tf.compat.as_bytes("force_fp32") + config.graph_options.rewrite_options.remapping = RewriterConfig.OFF # 必须显式关闭 + config.graph_options.rewrite_options.memory_optimization = RewriterConfig.OFF # 必须显式关闭 + + sess = tf.compat.v1.Session(config=config) + model_path = tf.train.latest_checkpoint(train_config['train_dir']) + + config = train_config['test_data_config'] + total_steps = config['num_examples_per_epoch']//config['batch_size'] + logging.info('Train for {} steps'.format(total_steps)) + + local_variables_init_op = tf.local_variables_initializer() + + sess.run(local_variables_init_op) + saver.restore(sess, model_path) + + for step in range(total_steps): + predict_loss, loss, accuracy, mean_IOU = sess.run([model.loss, model.total_loss, model.accuracy, model.mean_IOU]) + format_str = 'step %d, loss = %.2f, accuracy = %.2f, mean_IOU = %.2f' + logging.info(format_str % (step, loss, accuracy[0], mean_IOU[0])) diff --git a/TensorFlow/contrib/cv/BiSeNet/train_npu.py b/TensorFlow/contrib/cv/BiSeNet/train_npu.py new file mode 100644 index 000000000..82c2503c3 --- /dev/null +++ b/TensorFlow/contrib/cv/BiSeNet/train_npu.py @@ -0,0 +1,238 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import logging +import os + +import random +import time +from datetime import datetime + +# import npu_bridge +from npu_bridge.npu_init import * + +import numpy as np +import tensorflow as tf + +from models.bisenet import BiseNet + +import configuration +from utils.misc_utils import mkdir_p, save_cfgs + +import argparse + + +def _configure_learning_rate(train_config, global_step): + lr_config = train_config['lr_config'] + + num_batches_per_epoch = \ + int(train_config['train_data_config']['num_examples_per_epoch'] + / train_config['train_data_config']['batch_size']) + + lr_policy = lr_config['policy'] + if lr_policy == 'piecewise_constant': + lr_boundaries = [int(e * num_batches_per_epoch) + for e in lr_config['lr_boundaries']] + return tf.train.piecewise_constant(global_step, + lr_boundaries, + lr_config['lr_values']) + elif lr_policy == 'exponential': + decay_steps = int(num_batches_per_epoch) \ + * lr_config['num_epochs_per_decay'] + return tf.train.exponential_decay(lr_config['initial_lr'], + global_step, + decay_steps=decay_steps, + decay_rate=lr_config['lr_decay_factor'], + staircase=lr_config['staircase']) + elif lr_policy == 'polynomial': + T_total = (int(num_batches_per_epoch)+1) \ + * train_config['train_data_config']['epoch'] + return lr_config['initial_lr'] * (1 - tf.cast(global_step, dtype=tf.float32)/T_total)**lr_config['power'] + elif lr_policy == 'cosine': + T_total = train_config['train_data_config']['epoch'] \ + * num_batches_per_epoch + return 0.5 * lr_config['initial_lr'] * (1 + tf.cos(np.pi * tf.cast(global_step, dtype=tf.float32) / T_total)) + else: + raise ValueError( + 'Learning rate policy [%s] was not recognized', lr_policy) + + +def _configure_optimizer(train_config, learning_rate): + optimizer_config = train_config['optimizer_config'] + optimizer_name = optimizer_config['optimizer'].upper() + if optimizer_name == 'MOMENTUM': + optimizer = tf.compat.v1.train.MomentumOptimizer( + learning_rate, + momentum=optimizer_config['momentum'], + use_nesterov=optimizer_config['use_nesterov'], + name='Momentum') + elif optimizer_name == 'SGD': + optimizer = tf.train.GradientDescentOptimizer(learning_rate) + elif optimizer_name == 'RMSProp': + optimizer = tf.train.RMSPropOptimizer( + learning_rate, optimizer_config['decay'], optimizer_config['momentum']) + else: + raise ValueError( + 'Optimizer [%s] was not recognized', optimizer_config['optimizer']) + return optimizer + + +def main(): + logging.basicConfig(level=logging.DEBUG) + + parser = argparse.ArgumentParser() + parser.add_argument("--data_path", type=str, default="CamVid") + parser.add_argument("--class_dict", type=str, default="./CamVid/class_dict.csv") + parser.add_argument("--output_path", type=str, default="") + parser.add_argument("--batch_size", type=int, default=8) + parser.add_argument("--train_epochs", type=int, default=2) + parser.add_argument("--load_checkpoint", type=bool, default=False) + parser.add_argument("--checkpoint_path", type=str, default=None) + args = parser.parse_args() + + model_config = configuration.MODEL_CONFIG + train_config = configuration.TRAIN_CONFIG + + train_config['DataSet'] = args.data_path + train_config['class_dict'] = args.class_dict + train_config['train_data_config']['batch_size'] = args.batch_size + train_config['validation_data_config']['batch_size'] = args.batch_size + train_config['test_data_config']['batch_size'] = args.batch_size + train_config['train_data_config']['epoch'] = args.train_epochs + + num_classes = 32 # TODO: num_classes need to fix + + train_dir = train_config['train_dir'] + if not os.path.isdir(train_dir): + logging.info('Creating training directory: %s', train_dir) + mkdir_p(train_dir) + + g = tf.Graph() + with g.as_default(): + # Set fixed seed for reproducible experiments + random.seed(train_config['seed']) + np.random.seed(train_config['seed']) + tf.compat.v1.set_random_seed(train_config['seed']) + + # Build the training and validation model + model = BiseNet(model_config, train_config, num_classes, mode="train") + model.build() + model_va = BiseNet(model_config, train_config, + num_classes, mode="validation") + model_va.build(reuse=True) + + # Save configurations for future reference + save_cfgs(train_dir, model_config, train_config) + + learning_rate = _configure_learning_rate( + train_config, model.global_step) + optimizer = _configure_optimizer(train_config, learning_rate) + tf.compat.v1.summary.scalar('learning_rate', learning_rate) + update_ops = tf.compat.v1.get_collection( + tf.compat.v1.GraphKeys.UPDATE_OPS) + + opt_tmp = optimizer + loss_scale_manager = ExponentialUpdateLossScaleManager(init_loss_scale=2**32, incr_every_n_steps=1000, decr_every_n_nan_or_inf=2, decr_ratio=0.5) + optimizer = NPULossScaleOptimizer(opt_tmp, loss_scale_manager) + with tf.control_dependencies(update_ops): + train_op = tf.contrib.layers.optimize_loss(loss=model.total_loss, + global_step=model.global_step, + learning_rate=learning_rate, + optimizer=optimizer, + clip_gradients=train_config['clip_gradients'], + learning_rate_decay_fn=None, + summaries=['learning_rate']) + + saver = tf.compat.v1.train.Saver(tf.compat.v1.global_variables(), + max_to_keep=train_config['max_checkpoints_to_keep']) + + global_variables_init_op = tf.compat.v1.global_variables_initializer() + local_variables_init_op = tf.compat.v1.local_variables_initializer() + g.finalize() # Finalize graph to avoid adding ops by mistake + + config = tf.ConfigProto() + custom_op = config.graph_options.rewrite_options.custom_optimizers.add() + custom_op.name = "NpuOptimizer" + + custom_op.parameter_map["use_off_line"].b = True + custom_op.parameter_map["precision_mode"].s = tf.compat.as_bytes("allow_mix_precision") + + config.graph_options.rewrite_options.remapping = RewriterConfig.OFF # 必须显式关闭 + config.graph_options.rewrite_options.memory_optimization = RewriterConfig.OFF # 必须显式关闭 + + sess = tf.compat.v1.Session(config=config) + model_path = tf.train.latest_checkpoint(train_config['train_dir']) + + if args.load_checkpoint: + if not model_path: + raise FileNotFoundError + logging.info('Restore from last checkpoint: {}'.format(model_path)) + sess.run(local_variables_init_op) + saver.restore(sess, model_path) + start_step = tf.compat.v1.train.global_step( + sess, model.global_step.name) + 1 + else: + sess.run(global_variables_init_op) + sess.run(local_variables_init_op) + start_step = 0 + + if model_config['frontend_config']['pretrained_dir'] and model.init_fn: + model.init_fn(sess) + + data_config = train_config['train_data_config'] + + total_steps = int(data_config['epoch'] + * data_config['num_examples_per_epoch'] + / data_config['batch_size']) + + logging.info('Train for {} steps'.format(total_steps)) + + begin_time = time.time() + + for step in range(start_step, total_steps): + start_time = time.time() + _, predict_loss, loss, accuracy, mean_IOU = sess.run( + [train_op, model.loss, model.total_loss, model.accuracy, model.mean_IOU]) + duration = time.time() - start_time + + # 打印日志 + if step % 10 == 0: + FPS = data_config['batch_size'] / float(duration) + time_remain = data_config['batch_size'] * (total_steps - step) / FPS + m, s = divmod(time_remain, 60) + h, m = divmod(m, 60) + format_str = ('%s: step %d ' + 'loss = %.2f ' # predict loss = %.2f + 'accuracy = %.2f mean IOU = %.2f ' + 'FPS = %.1f step time = %.2f ' + '(%dh:%02dm:%02ds remains)') + logging.info(format_str % (datetime.now(), step, loss, # predict_loss, + accuracy[0], mean_IOU[0], + FPS, duration, + h, m, s)) + + if step % train_config['save_model_every_n_step'] == 0 or (step + 1) == total_steps: + checkpoint_path = os.path.join( + train_config['train_dir'], 'model.ckpt') + saver.save(sess, checkpoint_path, global_step=step) + + logging.info('%s: Train finish after %d steps' % (datetime.now(), total_steps)) + sess.close() + +if __name__ == "__main__": + main() diff --git a/TensorFlow/contrib/cv/BiSeNet/utils/__init__.py b/TensorFlow/contrib/cv/BiSeNet/utils/__init__.py new file mode 100644 index 000000000..6a1eaa12e --- /dev/null +++ b/TensorFlow/contrib/cv/BiSeNet/utils/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== \ No newline at end of file diff --git a/TensorFlow/contrib/cv/BiSeNet/utils/get_pretrained_checkpoints.py b/TensorFlow/contrib/cv/BiSeNet/utils/get_pretrained_checkpoints.py new file mode 100644 index 000000000..0c44abc7b --- /dev/null +++ b/TensorFlow/contrib/cv/BiSeNet/utils/get_pretrained_checkpoints.py @@ -0,0 +1,67 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +import subprocess +import argparse + +parser = argparse.ArgumentParser() +parser.add_argument('--model', type=str, default="ALL", help='Which model weights to download') +args = parser.parse_args() + + +if args.model == "ResNet50" or args.model == "ALL": + subprocess.check_output(['wget','http://download.tensorflow.org/models/resnet_v2_50_2017_04_14.tar.gz', "-P", "pretrain"]) + try: + subprocess.check_output(['tar', '-xvf', 'pretrain/resnet_v2_50_2017_04_14.tar.gz', "-C", "pretrain"]) + subprocess.check_output(['rm', 'pretrain/resnet_v2_50_2017_04_14.tar.gz']) + except Exception as e: + print(e) + pass + +if args.model == "ResNet101" or args.model == "ALL": + subprocess.check_output(['wget','http://download.tensorflow.org/models/resnet_v2_101_2017_04_14.tar.gz', "-P", "pretrain"]) + try: + subprocess.check_output(['tar', '-xvf', 'pretrain/resnet_v2_101_2017_04_14.tar.gz', "-C", "pretrain"]) + subprocess.check_output(['rm', 'pretrain/resnet_v2_101_2017_04_14.tar.gz']) + except Exception as e: + print(e) + pass + +if args.model == "ResNet152" or args.model == "ALL": + subprocess.check_output(['wget','http://download.tensorflow.org/models/resnet_v2_152_2017_04_14.tar.gz', "-P", "pretrain"]) + try: + subprocess.check_output(['tar', '-xvf', 'pretrain/resnet_v2_152_2017_04_14.tar.gz', "-C", "pretrain"]) + subprocess.check_output(['rm', 'pretrain/resnet_v2_152_2017_04_14.tar.gz']) + except Exception as e: + print(e) + pass + +if args.model == "MobileNetV2" or args.model == "ALL": + subprocess.check_output(['wget','https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_1.4_224.tgz', "-P", "pretrain"]) + try: + subprocess.check_output(['tar', '-xvf', 'pretrain/mobilenet_v2_1.4_224.tgz', "-C", "pretrain"]) + subprocess.check_output(['rm', 'pretrain/mobilenet_v2_1.4_224.tgz']) + except Exception as e: + print(e) + pass + +if args.model == "InceptionV4" or args.model == "ALL": + subprocess.check_output( + ['wget', 'http://download.tensorflow.org/models/inception_v4_2016_09_09.tar.gz', "-P", "pretrain"]) + try: + subprocess.check_output(['tar', '-xvf', 'pretrain/inception_v4_2016_09_09.tar.gz', "-C", "pretrain"]) + subprocess.check_output(['rm', 'pretrain/inception_v4_2016_09_09.tar.gz']) + except Exception as e: + print(e) + pass diff --git a/TensorFlow/contrib/cv/BiSeNet/utils/misc_utils.py b/TensorFlow/contrib/cv/BiSeNet/utils/misc_utils.py new file mode 100644 index 000000000..f62afd041 --- /dev/null +++ b/TensorFlow/contrib/cv/BiSeNet/utils/misc_utils.py @@ -0,0 +1,200 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Miscellaneous Utilities.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import errno +import json +import logging +import os +import re +import sys +from os import path as osp +import csv +import numpy as np + + +try: + import pynvml # nvidia-ml provides utility for NVIDIA management + + HAS_NVML = True +except: + HAS_NVML = False + + +def auto_select_gpu(): + """Select gpu which has largest free memory""" + if HAS_NVML: + pynvml.nvmlInit() + deviceCount = pynvml.nvmlDeviceGetCount() + largest_free_mem = 0 + largest_free_idx = 0 + for i in range(deviceCount): + handle = pynvml.nvmlDeviceGetHandleByIndex(i) + info = pynvml.nvmlDeviceGetMemoryInfo(handle) + if info.free > largest_free_mem: + largest_free_mem = info.free + largest_free_idx = i + pynvml.nvmlShutdown() + largest_free_mem = largest_free_mem / 1024. / 1024. # Convert to MB + + idx_to_gpu_id = {} + for i in range(deviceCount): + idx_to_gpu_id[i] = '{}'.format(i) + + gpu_id = idx_to_gpu_id[largest_free_idx] + logging.info('Using largest free memory GPU {} with free memory {}MB'.format(gpu_id, largest_free_mem)) + return gpu_id + else: + logging.info('nvidia-ml-py is not installed, automatically select gpu is disabled!') + return '0' + + +def get_center(x): + return (x - 1.) / 2. + + +def get(config, key, default): + """Get value in config by key, use default if key is not set + + This little function is useful for dynamical experimental settings. + For example, we can add a new configuration without worrying compatibility with older versions. + You can also achieve this by just calling config.get(key, default), but add a warning is even better : ) + """ + val = config.get(key) + if val is None: + logging.warning('{} is not explicitly specified, using default value: {}'.format(key, default)) + val = default + return val + + +def mkdir_p(path): + """mimic the behavior of mkdir -p in bash""" + try: + os.makedirs(path) + except OSError as exc: # Python >2.5 + if exc.errno == errno.EEXIST and os.path.isdir(path): + pass + else: + raise + + +def tryfloat(s): + try: + return float(s) + except: + return s + + +def alphanum_key(s): + """ Turn a string into a list of string and number chunks. + "z23a" -> ["z", 23, "a"] + """ + return [tryfloat(c) for c in re.split('([0-9.]+)', s)] + + +def sort_nicely(l): + """Sort the given list in the way that humans expect.""" + return sorted(l, key=alphanum_key) + + +class Tee(object): + """Mimic the behavior of tee in bash + + From: http://web.archive.org/web/20141016185743/https://mail.python.org/pipermail/python-list/2007-May/460639.html + Usage: + tee=Tee('logfile', 'w') + print 'abcdefg' + print 'another line' + tee.close() + print 'screen only' + del tee # should do nothing + """ + + def __init__(self, name, mode): + self.file = open(name, mode) + self.stdout = sys.stdout + sys.stdout = self + + def close(self): + if self.stdout is not None: + sys.stdout = self.stdout + self.stdout = None + if self.file is not None: + self.file.close() + self.file = None + + def write(self, data): + self.file.write(data) + self.stdout.write(data) + + def flush(self): + self.file.flush() + self.stdout.flush() + + def __del__(self): + self.close() + + +def save_cfgs(train_dir, model_config, train_config): + """Save all configurations in JSON format for future reference""" + with open(osp.join(train_dir, 'model_config.json'), 'w') as f: + json.dump(model_config, f, indent=2) + with open(osp.join(train_dir, 'train_config.json'), 'w') as f: + json.dump(train_config, f, indent=2) + + +def load_cfgs(checkpoint): + if osp.isdir(checkpoint): + train_dir = checkpoint + else: + train_dir = osp.dirname(checkpoint) + + with open(osp.join(train_dir, 'model_config.json'), 'r') as f: + model_config = json.load(f) + with open(osp.join(train_dir, 'train_config.json'), 'r') as f: + train_config = json.load(f) + + return model_config, train_config + + +def get_label_info(csv_path): + """ + Retrieve the class names and label values for the selected dataset. + Must be in CSV format! + + # Arguments + csv_path: The file path of the class dictionairy + + # Returns + Two lists: one for the class names and the other for the label values + """ + filename, file_extension = os.path.splitext(csv_path) + if not file_extension == ".csv": + return ValueError("File is not a CSV!") + + class_names = [] + label_values = [] + with open(csv_path, 'r') as csvfile: + file_reader = csv.reader(csvfile, delimiter=',') + header = next(file_reader) + for row in file_reader: + class_names.append(row[0]) + label_values.append([int(row[1]), int(row[2]), int(row[3])]) + # print(class_dict) + return class_names, label_values + -- Gitee