From ed34246ddd154537608826e44a316ccee3b4eef1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=8B=E5=AD=90=E6=B5=A9?= Date: Sun, 24 Jul 2022 07:36:23 +0000 Subject: [PATCH 1/2] =?UTF-8?q?polygen=E6=BA=90=E7=A0=81=E6=8F=90=E4=BA=A4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../cv/Polygen_ID2061_for_TensorFlow/LICENSE | 202 +++ .../Polygen_ID2061_for_TensorFlow/README.md | 208 +++ .../check_result.tf.json | 40 + .../data_utils.py | 462 +++++ .../fusion_result.json | 661 +++++++ .../meshes/cone.obj | 66 + .../meshes/cube.obj | 14 + .../meshes/cylinder.obj | 98 ++ .../meshes/icosphere.obj | 122 ++ .../model_test.py | 191 ++ .../modelzoo_level.txt | 6 + .../Polygen_ID2061_for_TensorFlow/modules.py | 1535 +++++++++++++++++ .../requirements.txt | 5 + .../test/model_test.sh | 19 + .../test/train_full_1p.sh | 49 + .../cv/Polygen_ID2061_for_TensorFlow/train.py | 223 +++ 16 files changed, 3901 insertions(+) create mode 100644 TensorFlow/contrib/cv/Polygen_ID2061_for_TensorFlow/LICENSE create mode 100644 TensorFlow/contrib/cv/Polygen_ID2061_for_TensorFlow/README.md create mode 100644 TensorFlow/contrib/cv/Polygen_ID2061_for_TensorFlow/check_result.tf.json create mode 100644 TensorFlow/contrib/cv/Polygen_ID2061_for_TensorFlow/data_utils.py create mode 100644 TensorFlow/contrib/cv/Polygen_ID2061_for_TensorFlow/fusion_result.json create mode 100644 TensorFlow/contrib/cv/Polygen_ID2061_for_TensorFlow/meshes/cone.obj create mode 100644 TensorFlow/contrib/cv/Polygen_ID2061_for_TensorFlow/meshes/cube.obj create mode 100644 TensorFlow/contrib/cv/Polygen_ID2061_for_TensorFlow/meshes/cylinder.obj create mode 100644 TensorFlow/contrib/cv/Polygen_ID2061_for_TensorFlow/meshes/icosphere.obj create mode 100644 TensorFlow/contrib/cv/Polygen_ID2061_for_TensorFlow/model_test.py create mode 100644 TensorFlow/contrib/cv/Polygen_ID2061_for_TensorFlow/modelzoo_level.txt create mode 100644 TensorFlow/contrib/cv/Polygen_ID2061_for_TensorFlow/modules.py create mode 100644 TensorFlow/contrib/cv/Polygen_ID2061_for_TensorFlow/requirements.txt create mode 100644 TensorFlow/contrib/cv/Polygen_ID2061_for_TensorFlow/test/model_test.sh create mode 100644 TensorFlow/contrib/cv/Polygen_ID2061_for_TensorFlow/test/train_full_1p.sh create mode 100644 TensorFlow/contrib/cv/Polygen_ID2061_for_TensorFlow/train.py diff --git a/TensorFlow/contrib/cv/Polygen_ID2061_for_TensorFlow/LICENSE b/TensorFlow/contrib/cv/Polygen_ID2061_for_TensorFlow/LICENSE new file mode 100644 index 000000000..9b5e4019d --- /dev/null +++ b/TensorFlow/contrib/cv/Polygen_ID2061_for_TensorFlow/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/TensorFlow/contrib/cv/Polygen_ID2061_for_TensorFlow/README.md b/TensorFlow/contrib/cv/Polygen_ID2061_for_TensorFlow/README.md new file mode 100644 index 000000000..6c1382fc3 --- /dev/null +++ b/TensorFlow/contrib/cv/Polygen_ID2061_for_TensorFlow/README.md @@ -0,0 +1,208 @@ +- [基本信息](#基本信息.md) +- [概述](#概述.md) +- [训练环境准备](#训练环境准备.md) +- [快速上手](#快速上手.md) +- [训练结果](#训练结果.md) +- [高级参考](#高级参考.md) + +

基本信息

+ +**发布者(Publisher): Huawei** + +**应用领域(Application Domain): Computer Vision** + +**版本(Version):1.0** + +**修改时间(Modified) :2022.07.24** + +**大小(Size):126kb** + +**框架(Framework): TensorFlow 1.15.0** + +**模型格式(Model Format):ckpt** + +**精度(Precision): Mixed** + +**处理器(Processor):昇腾910** + +**应用级别(Categories): Research** + +**描述(Description):基于TensorFlow框架的Polygen网络训练代码** + +

概述

+ +PolyGen是三维网格的生成模型,可顺序输出网格顶点和面。PolyGen由两部分组成:一个是顶点模型,它无条件地对网格顶点进行建模,另一个是面模型,它对以输入顶点为条件的网格面进行建模。顶点模型使用一个masked Transformer解码器来表示顶点序列上的分布。对于面模型,PolyGen将Transformer与pointer network相结合,以表示可变长度顶点序列上的分布。 + +- 参考论文: + + [[2002.10880\] PolyGen: An Autoregressive Generative Model of 3D Meshes (arxiv.org)](https://arxiv.org/abs/2002.10880) + +- 参考实现: + [https://github.com/deepmind/deepmind-research/tree/master/polygen](https://github.com/deepmind/deepmind-research/tree/master/polygen) + +- 适配昇腾 AI 处理器的实现: + https://gitee.com/ascend/ModelZoo-TensorFlow/tree/master/TensorFlow/contrib/cv/Polygen_ID2601_for_TensorFlow + + +- 通过Git获取对应commit\_id的代码方法如下: + + ``` + git clone {repository_url} # 克隆仓库的代码 + cd {repository_name} # 切换到模型的代码仓目录 + git checkout {branch} # 切换到对应分支 + git reset --hard {commit_id} # 代码设置到对应的commit_id + cd {code_path} # 切换到模型代码所在路径,若仓库下只有该模型,则无需切换 + ``` + +## 默认配置 + + +- 训练超参 + * Batch size:1 + * Training step:5000 + * Learning rate: 5e-4 + +- 编码器与解码器结构超参 + + - dropout rate:0 + - number of layers: 3 + - hidden layer : 128 + - fc layer : 512 + +## 支持特性 +| 特性列表 | 是否支持 | +|-------|------| +| 分布式训练 | 否 | +| 混合精度 | 是 | +| 数据并行 | 否 | + +## 混合精度训练 +昇腾910 AI处理器提供自动混合精度功能,可以针对全网中float32数据类型的算子,按照内置的优化策略,自动将部分float32的算子降低精度到float16,从而在精度损失很小的情况下提升系统性能并减少内存使用。 + +## 开启混合精度 +``` python +config_proto = tf.ConfigProto() +custom_op = config_proto.graph_options.rewrite_options.custom_optimizers.add() +custom_op.name = 'NpuOptimizer' +custom_op.parameter_map["precision_mode"].s = tf.compat.as_bytes("allow_mix_precision") +config = npu_config_proto(config_proto=config_proto) +with tf.Session(config=config) as sess: +``` + +

训练环境准备

+1. 准备裸机环境 + + Atlas服务器包含昇腾AI处理器,可用于模型训练,训练前请参考《[CANN软件安装指南](https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/51RC2alpha007/softwareinstall/instg/atlasdeploy_03_0002.html)》进行环境搭建。除OS依赖、固件、驱动、CANN等软件包之外,用户还需参考文档在裸机环境中安装TensorFlow框架相关模块。 + +4. Requirements +``` +python==3.7.5 +dm-sonnet==1.36 +numpy==1.18.0 +tensor2tensor==1.14 +tensorboard==1.15.0 +tensorflow==1.15.0 +``` + + + + +

快速上手

+ +## 数据集准备 +在训练脚本中已指定数据集路径,可正常使用。 + +## 模型训练 +- 选择合适的下载方式下载源码与数据集,并上传到裸机环境。 + +- 启动训练之前,首先要配置程序运行相关环境变量。 + + 环境变量配置信息参见: + + [Ascend 910训练平台环境变量设置 - Wiki - Gitee.com](https://gitee.com/ascend/modelzoo/wikis/其他案例/Ascend 910训练平台环境变量设置) + + +- 单卡训练 + + 1. 配置训练参数。 + + 首先在脚本test/train_full_1p.sh中,配置training steps,precision_mode等参数,请用户根据实际路径配置data_path,或者在启动训练的命令行中以参数形式下发。 + + ``` + training_steps=5000 + data_path="../meshes" + ``` + + 2. 启动训练。 + + 启动单卡训练 (脚本为 AvatarGAN_ID1305_for_TensorFlow/train_full_1p.sh) + + ``` + bash train_full_1p.sh --data_path=../meshes --training-steps=5000 --precision_mode=mix + ``` + + +

训练结果

+ +## 结果比对 + +精度结果对比 + +| Platform | Loss(vertices) | Loss(faces) | +| ------------------- | -------------- | ----------- | +| GPU | 0.01837750 | 0.01971974 | +| NPU(不加混合精度) | 0.01822554 | 0.01276514 | +| NPU(加混合精度) | 0.07918512 | 0.04801641 | + +性能结果比对 + +Platform | second per step | TimeToTrain(5000 steps) +--- | --- | --- +GPU | 1.4925 seconds | 124min 22s +NPU(不加混合精度) | 2.2745 seconds | 189min 32s +NPU(加混合精度) | 0.1096 seconds | 9min 48s + + + +

高级参考

+ +## 脚本和示例代码 + +```bash +Polygen +└─ + ├─meshes 数据集 + | ├─cone.obj + | ├─cube.obj + | ├─cylinder.obj + | ├─icosphere.obj + ├─test 测试脚本 + | ├─model_test.sh + | ├─train_full_1p.sh + ├─data_utils.py + ├─model_test.py 模型测试代码 + ├─modelzoo_level.txt + ├─module.py 自定义网络模型 + ├─README.md + ├─train.py 执行训练主函数 + ├─requirements.txt 依赖需求 +``` + +## 脚本参数 + +| 参数 | 默认值 | 说明| +|---| ---|---| +|--training_steps|5000|number of training steps| +|--precision_mode|mix|开启混合精度| +|--data_path|"../meshes"|设置数据集路径| + +## 训练过程 + +1. 通过“模型训练”中的训练指令启动单卡训练。 + +2. 参考脚本的模型存储路径为./output。 + + + + + diff --git a/TensorFlow/contrib/cv/Polygen_ID2061_for_TensorFlow/check_result.tf.json b/TensorFlow/contrib/cv/Polygen_ID2061_for_TensorFlow/check_result.tf.json new file mode 100644 index 000000000..cee769ce3 --- /dev/null +++ b/TensorFlow/contrib/cv/Polygen_ID2061_for_TensorFlow/check_result.tf.json @@ -0,0 +1,40 @@ +{ + "op": [ + { + "is_support": false, + "name": "IteratorGetNext", + "not_support_reason": { + "code": 1, + "message": "This op is not exsit on npu." + }, + "type": "IteratorGetNext" + }, + { + "is_support": false, + "name": "IteratorGetNext_1", + "not_support_reason": { + "code": 1, + "message": "This op is not exsit on npu." + }, + "type": "IteratorGetNext" + }, + { + "is_support": false, + "name": "IteratorGetNext_2", + "not_support_reason": { + "code": 1, + "message": "This op is not exsit on npu." + }, + "type": "IteratorGetNext" + }, + { + "is_support": false, + "name": "OptimizeDataset/TensorDataset", + "not_support_reason": { + "code": 1, + "message": "This op is not exsit on npu." + }, + "type": "TensorDataset" + } + ] +} \ No newline at end of file diff --git a/TensorFlow/contrib/cv/Polygen_ID2061_for_TensorFlow/data_utils.py b/TensorFlow/contrib/cv/Polygen_ID2061_for_TensorFlow/data_utils.py new file mode 100644 index 000000000..d0a760d0e --- /dev/null +++ b/TensorFlow/contrib/cv/Polygen_ID2061_for_TensorFlow/data_utils.py @@ -0,0 +1,462 @@ +# Copyright 2020 Deepmind Technologies Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Mesh data utilities.""" +from npu_bridge.npu_init import * +import matplotlib.pyplot as plt +import modules +from mpl_toolkits import mplot3d # pylint: disable=unused-import +from mpl_toolkits.mplot3d.art3d import Poly3DCollection +import networkx as nx +import numpy as np +import six +from six.moves import range +import tensorflow as tf +import tensorflow.compat.v1 as tf +import tensorflow_probability as tfp + +tfd = tfp.distributions + +class_dict = {'02691156': 0, '02747177': 1, '02773838': 2, '02801938': 3, '02808440': 4, '02818832': 5, '02828884': 6, + '02843684': 7, '02871439': 8, '02876657': 9, '02880940': 10, '02924116': 11, '02933112': 12, + '02942699': 13, '02946921': 14, '02954340': 15, '02958343': 16, '02992529': 17, '03001627': 18, + '03046257': 19, '03085013': 20, '03207941': 21, '03211117': 22, '03261776': 23, '03325088': 24, + '03337140': 25, '03467517': 26, '03513137': 27, '03593526': 28, '03624134': 29, '03636649': 30, + '03642806': 31, '03691459': 32, '03710193': 33, '03759954': 34, '03761084': 35, '03790512': 36, + '03797390': 37, '03928116': 38, '03938244': 39, '03948459': 40, '03991062': 41, + '04004475': 42, '04074963': 43, '04090263': 44, '04099429': 45, '04225987': 46, + '04256520': 47, '04330267': 48, '04379243': 49, '04401088': 50, '04460130': 51, '04468005': 52, + '04530566': 53, '04554684': 54} + + +def random_shift(vertices, shift_factor=0.25): + """Apply random shift to vertices.""" + max_shift_pos = tf.cast(255 - tf.reduce_max(vertices, axis=0), tf.float32) + max_shift_pos = tf.maximum(max_shift_pos, 1e-9) + + max_shift_neg = tf.cast(tf.reduce_min(vertices, axis=0), tf.float32) + max_shift_neg = tf.maximum(max_shift_neg, 1e-9) + + shift = tfd.TruncatedNormal( + tf.zeros([1, 3]), shift_factor * 255, -max_shift_neg, + max_shift_pos).sample() + shift = tf.cast(shift, tf.int32) + vertices += shift + return vertices + + +def make_vertex_model_dataset(ds, apply_random_shift=False): + """Prepare dataset for vertex model training.""" + + def _vertex_model_map_fn(example): + vertices = example['vertices'] + + # Randomly shift vertices + if apply_random_shift: + vertices = random_shift(vertices) + + # Re-order vertex coordinates as (z, y, x). + vertices_permuted = tf.stack( + [vertices[:, 2], vertices[:, 1], vertices[:, 0]], axis=-1) + + # Flatten quantized vertices, reindex starting from 1, and pad with a + # zero stopping token. + vertices_flat = tf.reshape(vertices_permuted, [-1]) + example['vertices_flat'] = tf.pad(vertices_flat + 1, [[0, 1]]) + + # Create mask to indicate valid tokens after padding and batching. + example['vertices_flat_mask'] = tf.ones_like( + example['vertices_flat'], dtype=tf.float32) + return example + + return ds.map(_vertex_model_map_fn) + + +def make_face_model_dataset( + ds, apply_random_shift=False, shuffle_vertices=True, quantization_bits=8): + """Prepare dataset for face model training.""" + + def _face_model_map_fn(example): + vertices = example['vertices'] + + # Randomly shift vertices + if apply_random_shift: + vertices = random_shift(vertices) + example['num_vertices'] = tf.shape(vertices)[0] + + # Optionally shuffle vertices and re-order faces to match + if shuffle_vertices: + permutation = tf.random_shuffle(tf.range(example['num_vertices'])) + vertices = tf.gather(vertices, permutation) + face_permutation = tf.concat( + [tf.constant([0, 1], dtype=tf.int32), tf.argsort(permutation) + 2], + axis=0) + example['faces'] = tf.cast( + tf.gather(face_permutation, example['faces']), tf.int64) + + # Vertices are quantized. So convert to floats for input to face model + example['vertices'] = modules.dequantize_verts(vertices, quantization_bits) + example['vertices_mask'] = tf.ones_like( + example['vertices'][..., 0], dtype=tf.float32) + example['faces_mask'] = tf.ones_like(example['faces'], dtype=tf.float32) + return example + + return ds.map(_face_model_map_fn) + + +def read_obj(obj_path): + """Read vertices and faces from .obj file.""" + vertex_list = [] + flat_vertices_list = [] + flat_vertices_indices = {} + flat_triangles = [] + + with open(obj_path) as obj_file: + for line in obj_file: + tokens = line.split() + if not tokens: + continue + line_type = tokens[0] + # We skip lines not starting with v or f. + if line_type == 'v': + vertex_list.append([float(x) for x in tokens[1:]]) + elif line_type == 'f': + triangle = [] + for i in range(len(tokens) - 1): + vertex_name = tokens[i + 1].split('/')[0] + if vertex_name in flat_vertices_indices: + triangle.append(flat_vertices_indices[vertex_name]) + continue + flat_vertex = [] + for index in six.ensure_str(vertex_name).split('/'): + if not index: + continue + # obj triangle indices are 1 indexed, so subtract 1 here. + flat_vertex += vertex_list[int(index) - 1] + flat_vertex_index = len(flat_vertices_list) + flat_vertices_list.append(flat_vertex) + flat_vertices_indices[vertex_name] = flat_vertex_index + triangle.append(flat_vertex_index) + flat_triangles.append(triangle) + + return np.array(flat_vertices_list, dtype=np.float32), flat_triangles + + +def write_obj(vertices, faces, file_path, transpose=True, scale=1.): + """Write vertices and faces to obj.""" + if transpose: + vertices = vertices[:, [1, 2, 0]] + vertices *= scale + if faces is not None: + if min(min(faces)) == 0: + f_add = 1 + else: + f_add = 0 + with open(file_path, 'w') as f: + for v in vertices: + f.write('v {} {} {}\n'.format(v[0], v[1], v[2])) + for face in faces: + line = 'f' + for i in face: + line += ' {}'.format(i + f_add) + line += '\n' + f.write(line) + + +def quantize_verts(verts, n_bits=8): + """Convert vertices in [-1., 1.] to discrete values in [0, n_bits**2 - 1].""" + min_range = -0.5 + max_range = 0.5 + range_quantize = 2 ** n_bits - 1 + verts_quantize = (verts - min_range) * range_quantize / ( + max_range - min_range) + return verts_quantize.astype('int32') + + +def dequantize_verts(verts, n_bits=8, add_noise=False): + """Convert quantized vertices to floats.""" + min_range = -0.5 + max_range = 0.5 + range_quantize = 2 ** n_bits - 1 + verts = verts.astype('float32') + verts = verts * (max_range - min_range) / range_quantize + min_range + if add_noise: + verts += np.random.uniform(size=verts.shape) * (1 / range_quantize) + return verts + + +def face_to_cycles(face): + """Find cycles in face.""" + g = nx.Graph() + for v in range(len(face) - 1): + g.add_edge(face[v], face[v + 1]) + g.add_edge(face[-1], face[0]) + return list(nx.cycle_basis(g)) + + +def flatten_faces(faces): + """Converts from list of faces to flat face array with stopping indices.""" + if not faces: + return np.array([0]) + else: + l = [f + [-1] for f in faces[:-1]] + l += [faces[-1] + [-2]] + return np.array([item for sublist in l for item in sublist]) + 2 # pylint: disable=g-complex-comprehension + + +def unflatten_faces(flat_faces): + """Converts from flat face sequence to a list of separate faces.""" + + def group(seq): + g = [] + for el in seq: + if el == 0 or el == -1: + yield g + g = [] + else: + g.append(el - 1) + yield g + + outputs = list(group(flat_faces - 1))[:-1] + # Remove empty faces + return [o for o in outputs if len(o) > 2] + + +def center_vertices(vertices): + """Translate the vertices so that bounding box is centered at zero.""" + vert_min = vertices.min(axis=0) + vert_max = vertices.max(axis=0) + vert_center = 0.5 * (vert_min + vert_max) + return vertices - vert_center + + +def normalize_vertices_scale(vertices): + """Scale the vertices so that the long diagonal of the bounding box is one.""" + vert_min = vertices.min(axis=0) + vert_max = vertices.max(axis=0) + extents = vert_max - vert_min + scale = np.sqrt(np.sum(extents ** 2)) + return vertices / scale + + +def quantize_process_mesh(vertices, faces, tris=None, quantization_bits=8): + """Quantize vertices, remove resulting duplicates and reindex faces.""" + vertices = quantize_verts(vertices, quantization_bits) + vertices, inv = np.unique(vertices, axis=0, return_inverse=True) + + # Sort vertices by z then y then x. + sort_inds = np.lexsort(vertices.T) + vertices = vertices[sort_inds] + + # Re-index faces and tris to re-ordered vertices. + faces = [np.argsort(sort_inds)[inv[f]] for f in faces] + if tris is not None: + tris = np.array([np.argsort(sort_inds)[inv[t]] for t in tris]) + + # Merging duplicate vertices and re-indexing the faces causes some faces to + # contain loops (e.g [2, 3, 5, 2, 4]). Split these faces into distinct + # sub-faces. + sub_faces = [] + for f in faces: + cliques = face_to_cycles(f) + for c in cliques: + c_length = len(c) + # Only append faces with more than two verts. + if c_length > 2: + d = np.argmin(c) + # Cyclically permute faces just that first index is the smallest. + sub_faces.append([c[(d + i) % c_length] for i in range(c_length)]) + faces = sub_faces + if tris is not None: + tris = np.array([v for v in tris if len(set(v)) == len(v)]) + + # Sort faces by lowest vertex indices. If two faces have the same lowest + # index then sort by next lowest and so on. + faces.sort(key=lambda f: tuple(sorted(f))) + if tris is not None: + tris = tris.tolist() + tris.sort(key=lambda f: tuple(sorted(f))) + tris = np.array(tris) + + # After removing degenerate faces some vertices are now unreferenced. + # Remove these. + num_verts = vertices.shape[0] + vert_connected = np.equal( + np.arange(num_verts)[:, None], np.hstack(faces)[None]).any(axis=-1) + vertices = vertices[vert_connected] + + # Re-index faces and tris to re-ordered vertices. + vert_indices = ( + np.arange(num_verts) - np.cumsum(1 - vert_connected.astype('int'))) + faces = [vert_indices[f].tolist() for f in faces] + if tris is not None: + tris = np.array([vert_indices[t].tolist() for t in tris]) + + return vertices, faces, tris + + +def load_process_mesh(mesh_obj_path, quantization_bits=8): + """Load obj file and process.""" + # Load mesh + vertices, faces = read_obj(mesh_obj_path) + # Transpose so that z-axis is vertical. + vertices = vertices[:, [2, 0, 1]] + + # Translate the vertices so that bounding box is centered at zero. + vertices = center_vertices(vertices) + + # Scale the vertices so that the long diagonal of the bounding box is equal + # to one. + vertices = normalize_vertices_scale(vertices) + + # Quantize and sort vertices, remove resulting duplicates, sort and reindex + # faces. + vertices, faces, _ = quantize_process_mesh( + vertices, faces, quantization_bits=quantization_bits) + + # Flatten faces and add 'new face' = 1 and 'stop' = 0 tokens. + faces = flatten_faces(faces) + # Discard degenerate meshes without faces. + if len(vertices) >= 800 or len(faces) >= 2800: + return { + 'vertices': vertices, + 'faces': faces, + }, False + else: + return { + 'vertices': vertices, + 'faces': faces, + }, True + + +def plot_meshes(mesh_list, + ax_lims=0.3, + fig_size=4, + el=30, + rot_start=120, + vert_size=10, + vert_alpha=0.75, + n_cols=4): + """Plots mesh data using matplotlib.""" + + n_plot = len(mesh_list) + n_cols = np.minimum(n_plot, n_cols) + n_rows = np.ceil(n_plot / n_cols).astype('int') + fig = plt.figure(figsize=(fig_size * n_cols, fig_size * n_rows)) + for p_inc, mesh in enumerate(mesh_list): + + for key in [ + 'vertices', 'faces', 'vertices_conditional', 'pointcloud', 'class_name' + ]: + if key not in list(mesh.keys()): + mesh[key] = None + + ax = fig.add_subplot(n_rows, n_cols, p_inc + 1, projection='3d') + + if mesh['faces'] is not None: + if mesh['vertices_conditional'] is not None: + face_verts = np.concatenate( + [mesh['vertices_conditional'], mesh['vertices']], axis=0) + else: + face_verts = mesh['vertices'] + collection = [] + for f in mesh['faces']: + collection.append(face_verts[f]) + plt_mesh = Poly3DCollection(collection) + plt_mesh.set_edgecolor((0., 0., 0., 0.3)) + plt_mesh.set_facecolor((1, 0, 0, 0.2)) + ax.add_collection3d(plt_mesh) + + if mesh['vertices'] is not None: + ax.scatter3D( + mesh['vertices'][:, 0], + mesh['vertices'][:, 1], + mesh['vertices'][:, 2], + lw=0., + s=vert_size, + c='g', + alpha=vert_alpha) + + if mesh['vertices_conditional'] is not None: + ax.scatter3D( + mesh['vertices_conditional'][:, 0], + mesh['vertices_conditional'][:, 1], + mesh['vertices_conditional'][:, 2], + lw=0., + s=vert_size, + c='b', + alpha=vert_alpha) + + if mesh['pointcloud'] is not None: + ax.scatter3D( + mesh['pointcloud'][:, 0], + mesh['pointcloud'][:, 1], + mesh['pointcloud'][:, 2], + lw=0., + s=2.5 * vert_size, + c='b', + alpha=1.) + + ax.set_xlim(-ax_lims, ax_lims) + ax.set_ylim(-ax_lims, ax_lims) + ax.set_zlim(-ax_lims, ax_lims) + + ax.view_init(el, rot_start) + + display_string = '' + if mesh['faces'] is not None: + display_string += 'Num. faces: {}\n'.format(len(collection)) + if mesh['vertices'] is not None: + num_verts = mesh['vertices'].shape[0] + if mesh['vertices_conditional'] is not None: + num_verts += mesh['vertices_conditional'].shape[0] + display_string += 'Num. verts: {}\n'.format(num_verts) + if mesh['class_name'] is not None: + display_string += 'Synset: {}'.format(mesh['class_name']) + if mesh['pointcloud'] is not None: + display_string += 'Num. pointcloud: {}\n'.format( + mesh['pointcloud'].shape[0]) + ax.text2D(0.05, 0.8, display_string, transform=ax.transAxes) + plt.subplots_adjust( + left=0., right=1., bottom=0., top=1., wspace=0.025, hspace=0.025) + plt.show() + diff --git a/TensorFlow/contrib/cv/Polygen_ID2061_for_TensorFlow/fusion_result.json b/TensorFlow/contrib/cv/Polygen_ID2061_for_TensorFlow/fusion_result.json new file mode 100644 index 000000000..5938bdf61 --- /dev/null +++ b/TensorFlow/contrib/cv/Polygen_ID2061_for_TensorFlow/fusion_result.json @@ -0,0 +1,661 @@ +{ + "graph_fusion": { + "MulAddFusionPass": { + "effect_times": "0", + "match_times": "152" + }, + "MulSquareFusionPass": { + "effect_times": "0", + "match_times": "76" + } + }, + "session_and_graph_id": "0_1", + "ub_fusion": { + "AutomaticUbFusion": { + "effect_times": "76", + "match_times": "76" + } + } +}{ + "graph_fusion": { + "AAMatMulNzToNdFusionPass": { + "effect_times": "0", + "match_times": "112" + }, + "AReduceMeanFusionPass": { + "effect_times": "0", + "match_times": "48" + }, + "AReduceSumFusionPass": { + "effect_times": "0", + "match_times": "4" + }, + "ASoftmaxFusionPass": { + "effect_times": "0", + "match_times": "12" + }, + "BatchMatMulFusionPass": { + "effect_times": "12", + "match_times": "93" + }, + "BatchMatMulV2ReduceFusionPass": { + "effect_times": "0", + "match_times": "25" + }, + "BatchMatMulV2ReshapeFusionPass": { + "effect_times": "0", + "match_times": "25" + }, + "CastRemoveFusionPass": { + "effect_times": "0", + "match_times": "3" + }, + "ConfusionTransposeNzFusionPass": { + "effect_times": "0", + "match_times": "18" + }, + "ConstToAttrGatherV2Fusion": { + "effect_times": "0", + "match_times": "9" + }, + "ConstToAttrPass": { + "effect_times": "120", + "match_times": "120" + }, + "ConstToAttrReduceSumFusion": { + "effect_times": "4", + "match_times": "4" + }, + "ConstToAttrStridedSliceFusion": { + "effect_times": "11", + "match_times": "11" + }, + "ConvConcatFusionPass": { + "effect_times": "0", + "match_times": "3" + }, + "ConvWeightCompressFusionPass": { + "effect_times": "0", + "match_times": "68" + }, + "FIXPIPEAPREQUANTFUSIONPASS": { + "effect_times": "0", + "match_times": "93" + }, + "FIXPIPEFUSIONPASS": { + "effect_times": "0", + "match_times": "93" + }, + "ForceFp16CastFusionPass": { + "effect_times": "1", + "match_times": "3" + }, + "LayerNormFusionPass": { + "effect_times": "0", + "match_times": "24" + }, + "MatMulReshapeBiasAddFusionPass": { + "effect_times": "20", + "match_times": "20" + }, + "MatMulV2FusionPass": { + "effect_times": "0", + "match_times": "20" + }, + "MulAddFusionPass": { + "effect_times": "0", + "match_times": "89" + }, + "MulGradFusionPass": { + "effect_times": "0", + "match_times": "2" + }, + "MulSquareFusionPass": { + "effect_times": "0", + "match_times": "95" + }, + "PadV2FusionPass": { + "effect_times": "1", + "match_times": "1" + }, + "RefreshInt64ToInt32FusionPass": { + "effect_times": "1", + "match_times": "1" + }, + "ReshapeTransposeFusionPass": { + "effect_times": "12", + "match_times": "36" + }, + "SoftmaxFusionPass": { + "effect_times": "0", + "match_times": "12" + }, + "SparseSoftMaxFusionPass": { + "effect_times": "2", + "match_times": "2" + }, + "SplitConvConcatFusionPass": { + "effect_times": "0", + "match_times": "3" + }, + "StridedSliceRemovePass": { + "effect_times": "0", + "match_times": "11" + }, + "SubFusionPass": { + "effect_times": "0", + "match_times": "27" + }, + "TileConstToAttrFusion": { + "effect_times": "2", + "match_times": "2" + }, + "TransdataCastFusionPass": { + "effect_times": "0", + "match_times": "252" + }, + "TransposeReshapeFusionPass": { + "effect_times": "6", + "match_times": "12" + }, + "TransposedUpdateFusionPass": { + "effect_times": "42", + "match_times": "42" + }, + "ZConcatExt2FusionPass": { + "effect_times": "3", + "match_times": "3" + }, + "ZConcatv2dFusionPass": { + "effect_times": "0", + "match_times": "3" + }, + "ZReduceMeanVarianceFusionPass": { + "effect_times": "0", + "match_times": "24" + }, + "softmaxTransFusionPass": { + "effect_times": "0", + "match_times": "12" + } + }, + "session_and_graph_id": "0_11", + "ub_fusion": { + "AutomaticUbFusion": { + "effect_times": "34", + "match_times": "34" + }, + "BatchMatmulConfusiontransposeUbFusion": { + "effect_times": "0", + "match_times": "6" + }, + "MatmulConfusiontransposeUbFusion": { + "effect_times": "0", + "match_times": "12" + }, + "MatmulTransdataFusionPass": { + "effect_times": "16", + "match_times": "16" + }, + "TbeEltwiseCastFusionPass": { + "effect_times": "21", + "match_times": "22" + }, + "TbeEltwiseFusionPass": { + "effect_times": "24", + "match_times": "24" + }, + "TbeFullyconnectionElemwiseDequantFusionPass": { + "effect_times": "1", + "match_times": "1" + }, + "TbeMultiOutputFusionPass": { + "effect_times": "1", + "match_times": "1" + } + } +}{ + "graph_fusion": { + "AAMatMulNzToNdFusionPass": { + "effect_times": "0", + "match_times": "172" + }, + "AReduceAllFusionPass": { + "effect_times": "0", + "match_times": "2" + }, + "AReduceAnyFusionPass": { + "effect_times": "0", + "match_times": "4" + }, + "AReduceMaxFusionPass": { + "effect_times": "0", + "match_times": "2" + }, + "AReduceMeanFusionPass": { + "effect_times": "0", + "match_times": "48" + }, + "AReduceProdFusionPass": { + "effect_times": "45", + "match_times": "90" + }, + "AReduceSumFusionPass": { + "effect_times": "0", + "match_times": "2" + }, + "ASoftmaxFusionPass": { + "effect_times": "0", + "match_times": "14" + }, + "ArgMaxV2FusionPass": { + "effect_times": "0", + "match_times": "2" + }, + "BatchMatMulFusionPass": { + "effect_times": "4", + "match_times": "93" + }, + "BatchMatMulV2ReduceFusionPass": { + "effect_times": "0", + "match_times": "25" + }, + "BatchMatMulV2ReshapeFusionPass": { + "effect_times": "0", + "match_times": "25" + }, + "CastRemoveFusionPass": { + "effect_times": "0", + "match_times": "12" + }, + "ConstToAttrGatherV2Fusion": { + "effect_times": "0", + "match_times": "101" + }, + "ConstToAttrPass": { + "effect_times": "4", + "match_times": "185" + }, + "ConstToAttrReduceSumFusion": { + "effect_times": "2", + "match_times": "2" + }, + "ConstToAttrStridedSliceFusion": { + "effect_times": "127", + "match_times": "145" + }, + "ConvConcatFusionPass": { + "effect_times": "0", + "match_times": "66" + }, + "ConvWeightCompressFusionPass": { + "effect_times": "0", + "match_times": "68" + }, + "FIXPIPEAPREQUANTFUSIONPASS": { + "effect_times": "0", + "match_times": "109" + }, + "FIXPIPEFUSIONPASS": { + "effect_times": "0", + "match_times": "109" + }, + "ForceFp16CastFusionPass": { + "effect_times": "2", + "match_times": "12" + }, + "MatMulReshapeBiasAddFusionPass": { + "effect_times": "0", + "match_times": "20" + }, + "MulAddFusionPass": { + "effect_times": "0", + "match_times": "85" + }, + "MulSquareFusionPass": { + "effect_times": "0", + "match_times": "97" + }, + "PackFusionPass": { + "effect_times": "0", + "match_times": "134" + }, + "PadFusionPass": { + "effect_times": "0", + "match_times": "2" + }, + "PadV2FusionPass": { + "effect_times": "0", + "match_times": "2" + }, + "RangeFusionPass": { + "effect_times": "0", + "match_times": "5" + }, + "RefreshInt64ToInt32FusionPass": { + "effect_times": "1", + "match_times": "1" + }, + "ReshapeTransposeFusionPass": { + "effect_times": "0", + "match_times": "3" + }, + "SoftmaxFusionPass": { + "effect_times": "0", + "match_times": "14" + }, + "SplitConvConcatFusionPass": { + "effect_times": "0", + "match_times": "66" + }, + "StridedSliceRemovePass": { + "effect_times": "1", + "match_times": "145" + }, + "SubFusionPass": { + "effect_times": "0", + "match_times": "33" + }, + "TileConstToAttrFusion": { + "effect_times": "0", + "match_times": "4" + }, + "TopKFusionPass": { + "effect_times": "0", + "match_times": "2" + }, + "TransdataCastFusionPass": { + "effect_times": "0", + "match_times": "344" + }, + "TransposedUpdateFusionPass": { + "effect_times": "3", + "match_times": "3" + }, + "ZConcatExt2FusionPass": { + "effect_times": "66", + "match_times": "66" + }, + "ZConcatv2dFusionPass": { + "effect_times": "0", + "match_times": "66" + }, + "softmaxTransFusionPass": { + "effect_times": "0", + "match_times": "14" + } + }, + "session_and_graph_id": "0_21", + "ub_fusion": { + "AutomaticUbFusion": { + "effect_times": "2", + "match_times": "2" + }, + "TbeDynamicElemwiseBroadcastFusionPass": { + "effect_times": "76", + "match_times": "76" + }, + "TbeDynamicElemwiseReduceFusionPass": { + "effect_times": "2", + "match_times": "2" + } + } +}{ + "graph_fusion": { + "AAMatMulNzToNdFusionPass": { + "effect_times": "0", + "match_times": "261" + }, + "AReduceMeanFusionPass": { + "effect_times": "0", + "match_times": "48" + }, + "AReduceSumFusionPass": { + "effect_times": "227", + "match_times": "386" + }, + "ASoftmaxFusionPass": { + "effect_times": "0", + "match_times": "12" + }, + "AddNFusionPass": { + "effect_times": "0", + "match_times": "59" + }, + "ApplyAddOutputPass": { + "effect_times": "167", + "match_times": "167" + }, + "BatchMatMulFusionPass": { + "effect_times": "0", + "match_times": "279" + }, + "BatchMatMulV2ReduceFusionPass": { + "effect_times": "0", + "match_times": "75" + }, + "BatchMatMulV2ReshapeFusionPass": { + "effect_times": "0", + "match_times": "75" + }, + "CastCastFusionPass": { + "effect_times": "0", + "match_times": "20" + }, + "CastRemoveFusionPass": { + "effect_times": "0", + "match_times": "3" + }, + "ConfusionTransposeNzFusionPass": { + "effect_times": "0", + "match_times": "30" + }, + "ConstToAttrGatherV2Fusion": { + "effect_times": "0", + "match_times": "9" + }, + "ConstToAttrPass": { + "effect_times": "367", + "match_times": "374" + }, + "ConstToAttrReduceSumFusion": { + "effect_times": "159", + "match_times": "159" + }, + "ConstToAttrSliceFusion": { + "effect_times": "0", + "match_times": "6" + }, + "ConstToAttrStridedSliceFusion": { + "effect_times": "11", + "match_times": "11" + }, + "ConvConcatFusionPass": { + "effect_times": "0", + "match_times": "3" + }, + "ConvWeightCompressFusionPass": { + "effect_times": "0", + "match_times": "204" + }, + "DreluFusionPass": { + "effect_times": "0", + "match_times": "9" + }, + "FCTransdataMergePass": { + "effect_times": "2", + "match_times": "2" + }, + "FIXPIPEAPREQUANTFUSIONPASS": { + "effect_times": "0", + "match_times": "279" + }, + "FIXPIPEFUSIONPASS": { + "effect_times": "0", + "match_times": "279" + }, + "ForceFp16CastFusionPass": { + "effect_times": "1", + "match_times": "3" + }, + "MatMulReshapeBiasAddFusionPass": { + "effect_times": "20", + "match_times": "20" + }, + "MatMulV2FusionPass": { + "effect_times": "0", + "match_times": "20" + }, + "MatmulCastFusionPass": { + "effect_times": "77", + "match_times": "78" + }, + "MulAddFusionPass": { + "effect_times": "0", + "match_times": "89" + }, + "MulAddNL2LossFusionPass": { + "effect_times": "0", + "match_times": "25" + }, + "MulAddNPass": { + "effect_times": "0", + "match_times": "25" + }, + "MulGradFusionPass": { + "effect_times": "0", + "match_times": "105" + }, + "MulSquareFusionPass": { + "effect_times": "0", + "match_times": "329" + }, + "PadV2FusionPass": { + "effect_times": "1", + "match_times": "1" + }, + "RealDiv2MulsFusionPass": { + "effect_times": "48", + "match_times": "48" + }, + "ReshapeTransposeFusionPass": { + "effect_times": "18", + "match_times": "48" + }, + "SoftmaxFusionPass": { + "effect_times": "0", + "match_times": "12" + }, + "SoftmaxGradFusionPass": { + "effect_times": "9", + "match_times": "12" + }, + "SparseSoftMaxFusionPass": { + "effect_times": "2", + "match_times": "2" + }, + "SplitConvConcatFusionPass": { + "effect_times": "0", + "match_times": "3" + }, + "StridedSliceGradFusionPass": { + "effect_times": "0", + "match_times": "3" + }, + "StridedSliceRemovePass": { + "effect_times": "0", + "match_times": "11" + }, + "SubFusionPass": { + "effect_times": "0", + "match_times": "54" + }, + "TileConstToAttrFusion": { + "effect_times": "50", + "match_times": "50" + }, + "TransdataCastFusionPass": { + "effect_times": "0", + "match_times": "864" + }, + "TransposeReshapeFusionPass": { + "effect_times": "12", + "match_times": "48" + }, + "TransposedUpdateFusionPass": { + "effect_times": "114", + "match_times": "114" + }, + "UnsortedSegmentSumFusionPass": { + "effect_times": "0", + "match_times": "9" + }, + "ZConcatExt2FusionPass": { + "effect_times": "3", + "match_times": "3" + }, + "ZConcatv2dFusionPass": { + "effect_times": "0", + "match_times": "3" + }, + "ZConfusionSoftmaxGradFusionPass": { + "effect_times": "3", + "match_times": "3" + }, + "ZReduceMeanVarianceFusionPass": { + "effect_times": "0", + "match_times": "24" + }, + "ZUnsortedSegmentSumUpdateFusionPass": { + "effect_times": "8", + "match_times": "8" + }, + "softmaxTransFusionPass": { + "effect_times": "0", + "match_times": "12" + } + }, + "session_and_graph_id": "0_31", + "ub_fusion": { + "AutomaticUbFusion": { + "effect_times": "77", + "match_times": "77" + }, + "BatchMatmulConfusiontransposeUbFusion": { + "effect_times": "0", + "match_times": "6" + }, + "MatmulAtomicAddUbFusion": { + "effect_times": "38", + "match_times": "38" + }, + "MatmulConfusiontransposeUbFusion": { + "effect_times": "0", + "match_times": "12" + }, + "MatmulTransdataFusionPass": { + "effect_times": "47", + "match_times": "47" + }, + "TbeEltwiseCastFusionPass": { + "effect_times": "6", + "match_times": "7" + }, + "TbeEltwiseFusionPass": { + "effect_times": "26", + "match_times": "26" + }, + "TbeFullyconnectionElemwiseDequantFusionPass": { + "effect_times": "1", + "match_times": "1" + }, + "TbeMultiOutputFusionPass": { + "effect_times": "49", + "match_times": "49" + }, + "TbeReduceElemwiseFusionPass": { + "effect_times": "72", + "match_times": "72" + } + } +} \ No newline at end of file diff --git a/TensorFlow/contrib/cv/Polygen_ID2061_for_TensorFlow/meshes/cone.obj b/TensorFlow/contrib/cv/Polygen_ID2061_for_TensorFlow/meshes/cone.obj new file mode 100644 index 000000000..c66b623d0 --- /dev/null +++ b/TensorFlow/contrib/cv/Polygen_ID2061_for_TensorFlow/meshes/cone.obj @@ -0,0 +1,66 @@ +v 0.000000 -1.000000 -1.000000 +v 0.195090 -1.000000 -0.980785 +v 0.382683 -1.000000 -0.923880 +v 0.555570 -1.000000 -0.831470 +v 0.707107 -1.000000 -0.707107 +v 0.831470 -1.000000 -0.555570 +v 0.923880 -1.000000 -0.382683 +v 0.980785 -1.000000 -0.195090 +v 1.000000 -1.000000 -0.000000 +v 0.980785 -1.000000 0.195090 +v 0.923880 -1.000000 0.382683 +v 0.831470 -1.000000 0.555570 +v 0.707107 -1.000000 0.707107 +v 0.555570 -1.000000 0.831470 +v 0.382683 -1.000000 0.923880 +v 0.195090 -1.000000 0.980785 +v -0.000000 -1.000000 1.000000 +v -0.195091 -1.000000 0.980785 +v -0.382684 -1.000000 0.923879 +v -0.555571 -1.000000 0.831469 +v -0.707107 -1.000000 0.707106 +v -0.831470 -1.000000 0.555570 +v -0.923880 -1.000000 0.382683 +v 0.000000 1.000000 0.000000 +v -0.980785 -1.000000 0.195089 +v -1.000000 -1.000000 -0.000001 +v -0.980785 -1.000000 -0.195091 +v -0.923879 -1.000000 -0.382684 +v -0.831469 -1.000000 -0.555571 +v -0.707106 -1.000000 -0.707108 +v -0.555569 -1.000000 -0.831470 +v -0.382682 -1.000000 -0.923880 +v -0.195089 -1.000000 -0.980786 +f 1 24 2 +f 2 24 3 +f 3 24 4 +f 4 24 5 +f 5 24 6 +f 6 24 7 +f 7 24 8 +f 8 24 9 +f 9 24 10 +f 10 24 11 +f 11 24 12 +f 12 24 13 +f 13 24 14 +f 14 24 15 +f 15 24 16 +f 16 24 17 +f 17 24 18 +f 18 24 19 +f 19 24 20 +f 20 24 21 +f 21 24 22 +f 22 24 23 +f 23 24 25 +f 25 24 26 +f 26 24 27 +f 27 24 28 +f 28 24 29 +f 29 24 30 +f 30 24 31 +f 31 24 32 +f 32 24 33 +f 33 24 1 +f 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 25 26 27 28 29 30 31 32 33 diff --git a/TensorFlow/contrib/cv/Polygen_ID2061_for_TensorFlow/meshes/cube.obj b/TensorFlow/contrib/cv/Polygen_ID2061_for_TensorFlow/meshes/cube.obj new file mode 100644 index 000000000..08cc43049 --- /dev/null +++ b/TensorFlow/contrib/cv/Polygen_ID2061_for_TensorFlow/meshes/cube.obj @@ -0,0 +1,14 @@ +v -1.000000 -1.000000 1.000000 +v -1.000000 1.000000 1.000000 +v -1.000000 -1.000000 -1.000000 +v -1.000000 1.000000 -1.000000 +v 1.000000 -1.000000 1.000000 +v 1.000000 1.000000 1.000000 +v 1.000000 -1.000000 -1.000000 +v 1.000000 1.000000 -1.000000 +f 1 2 4 3 +f 3 4 8 7 +f 7 8 6 5 +f 5 6 2 1 +f 3 7 5 1 +f 8 4 2 6 diff --git a/TensorFlow/contrib/cv/Polygen_ID2061_for_TensorFlow/meshes/cylinder.obj b/TensorFlow/contrib/cv/Polygen_ID2061_for_TensorFlow/meshes/cylinder.obj new file mode 100644 index 000000000..df07a4efd --- /dev/null +++ b/TensorFlow/contrib/cv/Polygen_ID2061_for_TensorFlow/meshes/cylinder.obj @@ -0,0 +1,98 @@ +v 0.000000 -1.000000 -1.000000 +v 0.000000 1.000000 -1.000000 +v 0.195090 -1.000000 -0.980785 +v 0.195090 1.000000 -0.980785 +v 0.382683 -1.000000 -0.923880 +v 0.382683 1.000000 -0.923880 +v 0.555570 -1.000000 -0.831470 +v 0.555570 1.000000 -0.831470 +v 0.707107 -1.000000 -0.707107 +v 0.707107 1.000000 -0.707107 +v 0.831470 -1.000000 -0.555570 +v 0.831470 1.000000 -0.555570 +v 0.923880 -1.000000 -0.382683 +v 0.923880 1.000000 -0.382683 +v 0.980785 -1.000000 -0.195090 +v 0.980785 1.000000 -0.195090 +v 1.000000 -1.000000 -0.000000 +v 1.000000 1.000000 -0.000000 +v 0.980785 -1.000000 0.195090 +v 0.980785 1.000000 0.195090 +v 0.923880 -1.000000 0.382683 +v 0.923880 1.000000 0.382683 +v 0.831470 -1.000000 0.555570 +v 0.831470 1.000000 0.555570 +v 0.707107 -1.000000 0.707107 +v 0.707107 1.000000 0.707107 +v 0.555570 -1.000000 0.831470 +v 0.555570 1.000000 0.831470 +v 0.382683 -1.000000 0.923880 +v 0.382683 1.000000 0.923880 +v 0.195090 -1.000000 0.980785 +v 0.195090 1.000000 0.980785 +v -0.000000 -1.000000 1.000000 +v -0.000000 1.000000 1.000000 +v -0.195091 -1.000000 0.980785 +v -0.195091 1.000000 0.980785 +v -0.382684 -1.000000 0.923879 +v -0.382684 1.000000 0.923879 +v -0.555571 -1.000000 0.831469 +v -0.555571 1.000000 0.831469 +v -0.707107 -1.000000 0.707106 +v -0.707107 1.000000 0.707106 +v -0.831470 -1.000000 0.555570 +v -0.831470 1.000000 0.555570 +v -0.923880 -1.000000 0.382683 +v -0.923880 1.000000 0.382683 +v -0.980785 -1.000000 0.195089 +v -0.980785 1.000000 0.195089 +v -1.000000 -1.000000 -0.000001 +v -1.000000 1.000000 -0.000001 +v -0.980785 -1.000000 -0.195091 +v -0.980785 1.000000 -0.195091 +v -0.923879 -1.000000 -0.382684 +v -0.923879 1.000000 -0.382684 +v -0.831469 -1.000000 -0.555571 +v -0.831469 1.000000 -0.555571 +v -0.707106 -1.000000 -0.707108 +v -0.707106 1.000000 -0.707108 +v -0.555569 -1.000000 -0.831470 +v -0.555569 1.000000 -0.831470 +v -0.382682 -1.000000 -0.923880 +v -0.382682 1.000000 -0.923880 +v -0.195089 -1.000000 -0.980786 +v -0.195089 1.000000 -0.980786 +f 1 2 4 3 +f 3 4 6 5 +f 5 6 8 7 +f 7 8 10 9 +f 9 10 12 11 +f 11 12 14 13 +f 13 14 16 15 +f 15 16 18 17 +f 17 18 20 19 +f 19 20 22 21 +f 21 22 24 23 +f 23 24 26 25 +f 25 26 28 27 +f 27 28 30 29 +f 29 30 32 31 +f 31 32 34 33 +f 33 34 36 35 +f 35 36 38 37 +f 37 38 40 39 +f 39 40 42 41 +f 41 42 44 43 +f 43 44 46 45 +f 45 46 48 47 +f 47 48 50 49 +f 49 50 52 51 +f 51 52 54 53 +f 53 54 56 55 +f 55 56 58 57 +f 57 58 60 59 +f 59 60 62 61 +f 4 2 64 62 60 58 56 54 52 50 48 46 44 42 40 38 36 34 32 30 28 26 24 22 20 18 16 14 12 10 8 6 +f 61 62 64 63 +f 63 64 2 1 +f 1 3 5 7 9 11 13 15 17 19 21 23 25 27 29 31 33 35 37 39 41 43 45 47 49 51 53 55 57 59 61 63 diff --git a/TensorFlow/contrib/cv/Polygen_ID2061_for_TensorFlow/meshes/icosphere.obj b/TensorFlow/contrib/cv/Polygen_ID2061_for_TensorFlow/meshes/icosphere.obj new file mode 100644 index 000000000..2e2be7864 --- /dev/null +++ b/TensorFlow/contrib/cv/Polygen_ID2061_for_TensorFlow/meshes/icosphere.obj @@ -0,0 +1,122 @@ +v 0.000000 -1.000000 0.000000 +v 0.723607 -0.447220 0.525725 +v -0.276388 -0.447220 0.850649 +v -0.894426 -0.447216 0.000000 +v -0.276388 -0.447220 -0.850649 +v 0.723607 -0.447220 -0.525725 +v 0.276388 0.447220 0.850649 +v -0.723607 0.447220 0.525725 +v -0.723607 0.447220 -0.525725 +v 0.276388 0.447220 -0.850649 +v 0.894426 0.447216 0.000000 +v 0.000000 1.000000 0.000000 +v -0.162456 -0.850654 0.499995 +v 0.425323 -0.850654 0.309011 +v 0.262869 -0.525738 0.809012 +v 0.850648 -0.525736 0.000000 +v 0.425323 -0.850654 -0.309011 +v -0.525730 -0.850652 0.000000 +v -0.688189 -0.525736 0.499997 +v -0.162456 -0.850654 -0.499995 +v -0.688189 -0.525736 -0.499997 +v 0.262869 -0.525738 -0.809012 +v 0.951058 0.000000 0.309013 +v 0.951058 0.000000 -0.309013 +v 0.000000 0.000000 1.000000 +v 0.587786 0.000000 0.809017 +v -0.951058 0.000000 0.309013 +v -0.587786 0.000000 0.809017 +v -0.587786 0.000000 -0.809017 +v -0.951058 0.000000 -0.309013 +v 0.587786 0.000000 -0.809017 +v 0.000000 0.000000 -1.000000 +v 0.688189 0.525736 0.499997 +v -0.262869 0.525738 0.809012 +v -0.850648 0.525736 0.000000 +v -0.262869 0.525738 -0.809012 +v 0.688189 0.525736 -0.499997 +v 0.162456 0.850654 0.499995 +v 0.525730 0.850652 0.000000 +v -0.425323 0.850654 0.309011 +v -0.425323 0.850654 -0.309011 +v 0.162456 0.850654 -0.499995 +f 1 14 13 +f 2 14 16 +f 1 13 18 +f 1 18 20 +f 1 20 17 +f 2 16 23 +f 3 15 25 +f 4 19 27 +f 5 21 29 +f 6 22 31 +f 2 23 26 +f 3 25 28 +f 4 27 30 +f 5 29 32 +f 6 31 24 +f 7 33 38 +f 8 34 40 +f 9 35 41 +f 10 36 42 +f 11 37 39 +f 39 42 12 +f 39 37 42 +f 37 10 42 +f 42 41 12 +f 42 36 41 +f 36 9 41 +f 41 40 12 +f 41 35 40 +f 35 8 40 +f 40 38 12 +f 40 34 38 +f 34 7 38 +f 38 39 12 +f 38 33 39 +f 33 11 39 +f 24 37 11 +f 24 31 37 +f 31 10 37 +f 32 36 10 +f 32 29 36 +f 29 9 36 +f 30 35 9 +f 30 27 35 +f 27 8 35 +f 28 34 8 +f 28 25 34 +f 25 7 34 +f 26 33 7 +f 26 23 33 +f 23 11 33 +f 31 32 10 +f 31 22 32 +f 22 5 32 +f 29 30 9 +f 29 21 30 +f 21 4 30 +f 27 28 8 +f 27 19 28 +f 19 3 28 +f 25 26 7 +f 25 15 26 +f 15 2 26 +f 23 24 11 +f 23 16 24 +f 16 6 24 +f 17 22 6 +f 17 20 22 +f 20 5 22 +f 20 21 5 +f 20 18 21 +f 18 4 21 +f 18 19 4 +f 18 13 19 +f 13 3 19 +f 16 17 6 +f 16 14 17 +f 14 1 17 +f 13 15 3 +f 13 14 15 +f 14 2 15 diff --git a/TensorFlow/contrib/cv/Polygen_ID2061_for_TensorFlow/model_test.py b/TensorFlow/contrib/cv/Polygen_ID2061_for_TensorFlow/model_test.py new file mode 100644 index 000000000..f98802f01 --- /dev/null +++ b/TensorFlow/contrib/cv/Polygen_ID2061_for_TensorFlow/model_test.py @@ -0,0 +1,191 @@ +# Copyright 2020 Deepmind Technologies Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for the PolyGen open-source version.""" +from npu_bridge.npu_init import * +from modules import FaceModel +from modules import VertexModel +import numpy as np +import tensorflow as tf +import tensorflow.compat.v1 as tf + +_BATCH_SIZE = 4 +_TRANSFORMER_CONFIG = { + 'num_layers': 2, + 'hidden_size': 64, + 'fc_size': 256 +} +_CLASS_CONDITIONAL = True +_NUM_CLASSES = 4 +_NUM_INPUT_VERTS = 50 +_NUM_PAD_VERTS = 10 +_NUM_INPUT_FACE_INDICES = 200 +_QUANTIZATION_BITS = 8 +_VERTEX_MODEL_USE_DISCRETE_EMBEDDINGS = True +_FACE_MODEL_DECODER_CROSS_ATTENTION = True +_FACE_MODEL_DISCRETE_EMBEDDINGS = True +_MAX_SAMPLE_LENGTH_VERTS = 10 +_MAX_SAMPLE_LENGTH_FACES = 10 + + +def _get_vertex_model_batch(): + """Returns batch with placeholders for vertex model inputs.""" + return { + 'class_label': tf.range(_BATCH_SIZE), + 'vertices_flat': tf.placeholder( + dtype=tf.int32, shape=[_BATCH_SIZE, None]), + } + + +def _get_face_model_batch(): + """Returns batch with placeholders for face model inputs.""" + return { + 'vertices': tf.placeholder( + dtype=tf.float32, shape=[_BATCH_SIZE, None, 3]), + 'vertices_mask': tf.placeholder( + dtype=tf.float32, shape=[_BATCH_SIZE, None]), + 'faces': tf.placeholder( + dtype=tf.int32, shape=[_BATCH_SIZE, None]), + } + + +class VertexModelTest(tf.test.TestCase): + + def setUp(self): + """Defines a vertex model.""" + super(VertexModelTest, self).setUp() + self.model = VertexModel( + decoder_config=_TRANSFORMER_CONFIG, + class_conditional=_CLASS_CONDITIONAL, + num_classes=_NUM_CLASSES, + max_num_input_verts=_NUM_INPUT_VERTS, + quantization_bits=_QUANTIZATION_BITS, + use_discrete_embeddings=_VERTEX_MODEL_USE_DISCRETE_EMBEDDINGS) + + def test_model_runs(self): + """Tests if the model runs without crashing.""" + batch = _get_vertex_model_batch() + pred_dist = self.model(batch, is_training=False) + logits = pred_dist.logits + with self.session() as sess: + sess.run(tf.global_variables_initializer()) + vertices_flat = np.random.randint( + 2 ** _QUANTIZATION_BITS + 1, + size=[_BATCH_SIZE, _NUM_INPUT_VERTS * 3 + 1]) + sess.run(logits, {batch['vertices_flat']: vertices_flat}) + + def test_sample_outputs_range(self): + """Tests if the model produces samples in the correct range.""" + context = {'class_label': tf.zeros((_BATCH_SIZE,), dtype=tf.int32)} + sample_dict = self.model.sample( + _BATCH_SIZE, max_sample_length=_MAX_SAMPLE_LENGTH_VERTS, + context=context) + with self.session() as sess: + sess.run(tf.global_variables_initializer()) + sample_dict_np = sess.run(sample_dict) + in_range = np.logical_and( + 0 <= sample_dict_np['vertices'], + sample_dict_np['vertices'] <= 2 ** _QUANTIZATION_BITS).all() + self.assertTrue(in_range) + + +class FaceModelTest(tf.test.TestCase): + + def setUp(self): + """Defines a face model.""" + super(FaceModelTest, self).setUp() + self.model = FaceModel( + encoder_config=_TRANSFORMER_CONFIG, + decoder_config=_TRANSFORMER_CONFIG, + class_conditional=False, + max_seq_length=_NUM_INPUT_FACE_INDICES, + decoder_cross_attention=_FACE_MODEL_DECODER_CROSS_ATTENTION, + use_discrete_vertex_embeddings=_FACE_MODEL_DISCRETE_EMBEDDINGS, + quantization_bits=_QUANTIZATION_BITS) + + def test_model_runs(self): + """Tests if the model runs without crashing.""" + batch = _get_face_model_batch() + pred_dist = self.model(batch, is_training=False) + logits = pred_dist.logits + with self.session() as sess: + sess.run(tf.global_variables_initializer()) + vertices = np.random.rand(_BATCH_SIZE, _NUM_INPUT_VERTS, 3) - 0.5 + vertices_mask = np.ones([_BATCH_SIZE, _NUM_INPUT_VERTS]) + faces = np.random.randint( + _NUM_INPUT_VERTS + 2, size=[_BATCH_SIZE, _NUM_INPUT_FACE_INDICES]) + sess.run( + logits, + {batch['vertices']: vertices, + batch['vertices_mask']: vertices_mask, + batch['faces']: faces} + ) + + def test_sample_outputs_range(self): + """Tests if the model produces samples in the correct range.""" + context = _get_face_model_batch() + del context['faces'] + sample_dict = self.model.sample( + context, max_sample_length=_MAX_SAMPLE_LENGTH_FACES) + with self.session() as sess: + sess.run(tf.global_variables_initializer()) + # Pad the vertices in order to test that the face model only outputs + # vertex indices in the unpadded range + vertices = np.pad( + np.random.rand(_BATCH_SIZE, _NUM_INPUT_VERTS, 3) - 0.5, + [[0, 0], [0, _NUM_PAD_VERTS], [0, 0]], mode='constant') + vertices_mask = np.pad( + np.ones([_BATCH_SIZE, _NUM_INPUT_VERTS]), + [[0, 0], [0, _NUM_PAD_VERTS]], mode='constant') + sample_dict_np = sess.run( + sample_dict, + {context['vertices']: vertices, + context['vertices_mask']: vertices_mask}) + in_range = np.logical_and( + 0 <= sample_dict_np['faces'], + sample_dict_np['faces'] <= _NUM_INPUT_VERTS + 1).all() + self.assertTrue(in_range) + + +if __name__ == '__main__': + tf.test.main() + diff --git a/TensorFlow/contrib/cv/Polygen_ID2061_for_TensorFlow/modelzoo_level.txt b/TensorFlow/contrib/cv/Polygen_ID2061_for_TensorFlow/modelzoo_level.txt new file mode 100644 index 000000000..8db1b61c8 --- /dev/null +++ b/TensorFlow/contrib/cv/Polygen_ID2061_for_TensorFlow/modelzoo_level.txt @@ -0,0 +1,6 @@ +GPUStatus:OK +NPUMigrationStatus:OK +FuncStatus:OK +PrecisionStatus:OK +AutoTune:OK +PerfStatus:PERFECT \ No newline at end of file diff --git a/TensorFlow/contrib/cv/Polygen_ID2061_for_TensorFlow/modules.py b/TensorFlow/contrib/cv/Polygen_ID2061_for_TensorFlow/modules.py new file mode 100644 index 000000000..78e223abc --- /dev/null +++ b/TensorFlow/contrib/cv/Polygen_ID2061_for_TensorFlow/modules.py @@ -0,0 +1,1535 @@ +# Copyright 2020 Deepmind Technologies Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Modules and networks for mesh generation.""" +from npu_bridge.npu_init import * +import sonnet as snt +from tensor2tensor.layers import common_attention +from tensor2tensor.layers import common_layers +import tensorflow as tf +import tensorflow.compat.v1 as tf +from tensorflow.python.framework import function +import tensorflow_probability as tfp + +tfd = tfp.distributions +tfb = tfp.bijectors + + +def dequantize_verts(verts, n_bits, add_noise=False): + """Quantizes vertices and outputs integers with specified n_bits.""" + min_range = -0.5 + max_range = 0.5 + range_quantize = 2 ** n_bits - 1 + verts = tf.cast(verts, tf.float32) + verts = verts * (max_range - min_range) / range_quantize + min_range + if add_noise: + verts += tf.random_uniform(tf.shape(verts)) * (1 / float(range_quantize)) + return verts + + +def quantize_verts(verts, n_bits): + """Dequantizes integer vertices to floats.""" + min_range = -0.5 + max_range = 0.5 + range_quantize = 2 ** n_bits - 1 + verts_quantize = ( + (verts - min_range) * range_quantize / (max_range - min_range)) + return tf.cast(verts_quantize, tf.int32) + + +def top_k_logits(logits, k): + """Masks logits such that logits not in top-k are small.""" + if k == 0: + return logits + else: + values, _ = tf.math.top_k(logits, k=k) + k_largest = tf.reduce_min(values) + logits = tf.where(tf.less_equal(logits, k_largest), + tf.ones_like(logits) * -1e9, logits) + return logits + + +def top_p_logits(logits, p): + """Masks logits using nucleus (top-p) sampling.""" + if p == 1: + return logits + else: + logit_shape = tf.shape(logits) + seq, dim = logit_shape[1], logit_shape[2] + logits = tf.reshape(logits, [-1, dim]) + sort_indices = tf.argsort(logits, axis=-1, direction='DESCENDING') + probs = tf.gather(tf.nn.softmax(logits), sort_indices, batch_dims=1) + cumprobs = tf.cumsum(probs, axis=-1, exclusive=True) + # The top 1 candidate always will not be masked. + # This way ensures at least 1 indices will be selected. + sort_mask = tf.cast(tf.greater(cumprobs, p), logits.dtype) + batch_indices = tf.tile( + tf.expand_dims(tf.range(tf.shape(logits)[0]), axis=-1), [1, dim]) + top_p_mask = tf.scatter_nd( + tf.stack([batch_indices, sort_indices], axis=-1), sort_mask, + tf.shape(logits)) + logits -= top_p_mask * 1e9 + return tf.reshape(logits, [-1, seq, dim]) + + +_function_cache = {} # For multihead_self_attention_memory_efficient + + +def multihead_self_attention_memory_efficient(x, + bias, + num_heads, + head_size=None, + cache=None, + epsilon=1e-6, + forget=True, + test_vars=None, + name=None): + """Memory-efficient Multihead scaled-dot-product self-attention. + + Based on Tensor2Tensor version but adds optional caching. + + Returns multihead-self-attention(layer_norm(x)) + + Computes one attention head at a time to avoid exhausting memory. + + If forget=True, then forget all forwards activations and recompute on + the backwards pass. + + Args: + x: a Tensor with shape [batch, length, input_size] + bias: an attention bias tensor broadcastable to [batch, 1, length, length] + num_heads: an integer + head_size: an optional integer - defaults to input_size/num_heads + cache: Optional dict containing tensors which are the results of previous + attentions, used for fast decoding. Expects the dict to contain two + keys ('k' and 'v'), for the initial call the values for these keys + should be empty Tensors of the appropriate shape. + 'k' [batch_size, 0, key_channels] 'v' [batch_size, 0, value_channels] + epsilon: a float, for layer norm + forget: a boolean - forget forwards activations and recompute on backprop + test_vars: optional tuple of variables for testing purposes + name: an optional string + + Returns: + A Tensor. + """ + io_size = x.get_shape().as_list()[-1] + if head_size is None: + assert io_size % num_heads == 0 + head_size = io_size / num_heads + + def forward_internal(x, wqkv, wo, attention_bias, norm_scale, norm_bias): + """Forward function.""" + n = common_layers.layer_norm_compute(x, epsilon, norm_scale, norm_bias) + wqkv_split = tf.unstack(wqkv, num=num_heads) + wo_split = tf.unstack(wo, num=num_heads) + y = 0 + if cache is not None: + cache_k = [] + cache_v = [] + for h in range(num_heads): + with tf.control_dependencies([y] if h > 0 else []): + combined = tf.nn.conv1d(n, wqkv_split[h], 1, 'SAME') + q, k, v = tf.split(combined, 3, axis=2) + if cache is not None: + k = tf.concat([cache['k'][:, h], k], axis=1) + v = tf.concat([cache['v'][:, h], v], axis=1) + cache_k.append(k) + cache_v.append(v) + o = common_attention.scaled_dot_product_attention_simple( + q, k, v, attention_bias) + y += tf.nn.conv1d(o, wo_split[h], 1, 'SAME') + if cache is not None: + cache['k'] = tf.stack(cache_k, axis=1) + cache['v'] = tf.stack(cache_v, axis=1) + return y + + key = ( + 'multihead_self_attention_memory_efficient %s %s' % (num_heads, epsilon)) + if not forget: + forward_fn = forward_internal + elif key in _function_cache: + forward_fn = _function_cache[key] + else: + + @function.Defun(compiled=True) + def grad_fn(x, wqkv, wo, attention_bias, norm_scale, norm_bias, dy): + """Custom gradient function.""" + with tf.control_dependencies([dy]): + n = common_layers.layer_norm_compute(x, epsilon, norm_scale, norm_bias) + wqkv_split = tf.unstack(wqkv, num=num_heads) + wo_split = tf.unstack(wo, num=num_heads) + deps = [] + dwqkvs = [] + dwos = [] + dn = 0 + for h in range(num_heads): + with tf.control_dependencies(deps): + combined = tf.nn.conv1d(n, wqkv_split[h], 1, 'SAME') + q, k, v = tf.split(combined, 3, axis=2) + o = common_attention.scaled_dot_product_attention_simple( + q, k, v, attention_bias) + partial_y = tf.nn.conv1d(o, wo_split[h], 1, 'SAME') + pdn, dwqkvh, dwoh = tf.gradients( + ys=[partial_y], + xs=[n, wqkv_split[h], wo_split[h]], + grad_ys=[dy]) + dn += pdn + dwqkvs.append(dwqkvh) + dwos.append(dwoh) + deps = [dn, dwqkvh, dwoh] + dwqkv = tf.stack(dwqkvs) + dwo = tf.stack(dwos) + with tf.control_dependencies(deps): + dx, dnorm_scale, dnorm_bias = tf.gradients( + ys=[n], xs=[x, norm_scale, norm_bias], grad_ys=[dn]) + return (dx, dwqkv, dwo, tf.zeros_like(attention_bias), dnorm_scale, + dnorm_bias) + + @function.Defun( + grad_func=grad_fn, compiled=True, separate_compiled_gradients=True) + def forward_fn(x, wqkv, wo, attention_bias, norm_scale, norm_bias): + return forward_internal(x, wqkv, wo, attention_bias, norm_scale, + norm_bias) + + _function_cache[key] = forward_fn + + if bias is not None: + bias = tf.squeeze(bias, 1) + with tf.variable_scope(name, default_name='multihead_attention', values=[x]): + if test_vars is not None: + wqkv, wo, norm_scale, norm_bias = list(test_vars) + else: + wqkv = tf.get_variable( + 'wqkv', [num_heads, 1, io_size, 3 * head_size], + initializer=tf.random_normal_initializer(stddev=io_size ** -0.5)) + wo = tf.get_variable( + 'wo', [num_heads, 1, head_size, io_size], + initializer=tf.random_normal_initializer( + stddev=(head_size * num_heads) ** -0.5)) + norm_scale, norm_bias = common_layers.layer_norm_vars(io_size) + y = forward_fn(x, wqkv, wo, bias, norm_scale, norm_bias) + y.set_shape(x.get_shape()) # pytype: disable=attribute-error + return y + + +class TransformerEncoder(snt.AbstractModule): + """Transformer encoder. + + Sonnet Transformer encoder module as described in Vaswani et al. 2017. Uses + the Tensor2Tensor multihead_attention function for full self attention + (no masking). Layer norm is applied inside the residual path as in sparse + transformers (Child 2019). + + This module expects inputs to be already embedded, and does not add position + embeddings. + """ + + def __init__(self, + hidden_size=256, + fc_size=1024, + num_heads=4, + layer_norm=True, + num_layers=8, + dropout_rate=0.2, + re_zero=True, + memory_efficient=False, + name='transformer_encoder'): + """Initializes TransformerEncoder. + + Args: + hidden_size: Size of embedding vectors. + fc_size: Size of fully connected layer. + num_heads: Number of attention heads. + layer_norm: If True, apply layer normalization + num_layers: Number of Transformer blocks, where each block contains a + multi-head attention layer and a MLP. + dropout_rate: Dropout rate applied immediately after the ReLU in each + fully-connected layer. + re_zero: If True, alpha scale residuals with zero init. + memory_efficient: If True, recompute gradients for memory savings. + name: Name of variable scope + """ + super(TransformerEncoder, self).__init__(name=name) + self.hidden_size = hidden_size + self.num_heads = num_heads + self.layer_norm = layer_norm + self.fc_size = fc_size + self.num_layers = num_layers + self.dropout_rate = dropout_rate + self.re_zero = re_zero + self.memory_efficient = memory_efficient + + def _build(self, inputs, is_training=False): + """Passes inputs through Transformer encoder network. + + Args: + inputs: Tensor of shape [batch_size, sequence_length, embed_size]. Zero + embeddings are masked in self-attention. + is_training: If True, dropout is applied. + + Returns: + output: Tensor of shape [batch_size, sequence_length, embed_size]. + """ + if is_training: + dropout_rate = self.dropout_rate + else: + dropout_rate = 0. + + # Identify elements with all zeros as padding, and create bias to mask + # out padding elements in self attention. + encoder_padding = common_attention.embedding_to_padding(inputs) + encoder_self_attention_bias = ( + common_attention.attention_bias_ignore_padding(encoder_padding)) + + x = inputs + for layer_num in range(self.num_layers): + with tf.variable_scope('layer_{}'.format(layer_num)): + + # Multihead self-attention from Tensor2Tensor. + res = x + if self.memory_efficient: + res = multihead_self_attention_memory_efficient( + res, + bias=encoder_self_attention_bias, + num_heads=self.num_heads, + head_size=self.hidden_size // self.num_heads, + forget=True if is_training else False, + name='self_attention' + ) + else: + if self.layer_norm: + res = common_layers.layer_norm(res, name='self_attention') + res = common_attention.multihead_attention( + res, + memory_antecedent=None, + bias=encoder_self_attention_bias, + total_key_depth=self.hidden_size, + total_value_depth=self.hidden_size, + output_depth=self.hidden_size, + num_heads=self.num_heads, + dropout_rate=0., + make_image_summary=False, + name='self_attention') + if self.re_zero: + res *= tf.get_variable('self_attention/alpha', initializer=0.) + if dropout_rate: + res = npu_ops.dropout(res, keep_prob=1 - dropout_rate) + x += res + + # MLP + res = x + if self.layer_norm: + res = common_layers.layer_norm(res, name='fc') + res = tf.layers.dense( + res, self.fc_size, activation=tf.nn.relu, name='fc_1') + res = tf.layers.dense(res, self.hidden_size, name='fc_2') + if self.re_zero: + res *= tf.get_variable('fc/alpha', initializer=0.) + if dropout_rate: + res = npu_ops.dropout(res, keep_prob=1 - dropout_rate) + x += res + + if self.layer_norm: + output = common_layers.layer_norm(x, name='output') + else: + output = x + return output + + +class TransformerDecoder(snt.AbstractModule): + """Transformer decoder. + + Sonnet Transformer decoder module as described in Vaswani et al. 2017. Uses + the Tensor2Tensor multihead_attention function for masked self attention, and + non-masked cross attention attention. Layer norm is applied inside the + residual path as in sparse transformers (Child 2019). + + This module expects inputs to be already embedded, and does not + add position embeddings. + """ + + def __init__(self, + hidden_size=256, + fc_size=1024, + num_heads=4, + layer_norm=True, + num_layers=8, + dropout_rate=0.2, + re_zero=True, + memory_efficient=False, + name='transformer_decoder'): + """Initializes TransformerDecoder. + + Args: + hidden_size: Size of embedding vectors. + fc_size: Size of fully connected layer. + num_heads: Number of attention heads. + layer_norm: If True, apply layer normalization. If mem_efficient_attention + is True, then layer norm is always applied. + num_layers: Number of Transformer blocks, where each block contains a + multi-head attention layer and a MLP. + dropout_rate: Dropout rate applied immediately after the ReLU in each + fully-connected layer. + re_zero: If True, alpha scale residuals with zero init. + memory_efficient: If True, recompute gradients for memory savings. + name: Name of variable scope + """ + super(TransformerDecoder, self).__init__(name=name) + self.hidden_size = hidden_size + self.num_heads = num_heads + self.layer_norm = layer_norm + self.fc_size = fc_size + self.num_layers = num_layers + self.dropout_rate = dropout_rate + self.re_zero = re_zero + self.memory_efficient = memory_efficient + + def _build(self, + inputs, + sequential_context_embeddings=None, + is_training=False, + cache=None): + """Passes inputs through Transformer decoder network. + + Args: + inputs: Tensor of shape [batch_size, sequence_length, embed_size]. Zero + embeddings are masked in self-attention. + sequential_context_embeddings: Optional tensor with global context + (e.g image embeddings) of shape + [batch_size, context_seq_length, context_embed_size]. + is_training: If True, dropout is applied. + cache: Optional dict containing tensors which are the results of previous + attentions, used for fast decoding. Expects the dict to contain two + keys ('k' and 'v'), for the initial call the values for these keys + should be empty Tensors of the appropriate shape. + 'k' [batch_size, 0, key_channels] 'v' [batch_size, 0, value_channels] + + Returns: + output: Tensor of shape [batch_size, sequence_length, embed_size]. + """ + if is_training: + dropout_rate = self.dropout_rate + else: + dropout_rate = 0. + + # create bias to mask future elements for causal self-attention. + seq_length = tf.shape(inputs)[1] + decoder_self_attention_bias = common_attention.attention_bias_lower_triangle( + seq_length) + + # If using sequential_context, identify elements with all zeros as padding, + # and create bias to mask out padding elements in self attention. + if sequential_context_embeddings is not None: + encoder_padding = common_attention.embedding_to_padding( + sequential_context_embeddings) + encoder_decoder_attention_bias = ( + common_attention.attention_bias_ignore_padding(encoder_padding)) + + x = inputs + for layer_num in range(self.num_layers): + with tf.variable_scope('layer_{}'.format(layer_num)): + + # If using cached decoding, access cache for current layer, and create + # bias that enables un-masked attention into the cache + if cache is not None: + layer_cache = cache[layer_num] + layer_decoder_bias = tf.zeros([1, 1, 1, 1]) + # Otherwise use standard masked bias + else: + layer_cache = None + layer_decoder_bias = decoder_self_attention_bias + + # Multihead self-attention from Tensor2Tensor. + res = x + if self.memory_efficient: + res = multihead_self_attention_memory_efficient( + res, + bias=layer_decoder_bias, + cache=layer_cache, + num_heads=self.num_heads, + head_size=self.hidden_size // self.num_heads, + forget=True if is_training else False, + name='self_attention' + ) + else: + if self.layer_norm: + res = common_layers.layer_norm(res, name='self_attention') + res = common_attention.multihead_attention( + res, + memory_antecedent=None, + bias=layer_decoder_bias, + total_key_depth=self.hidden_size, + total_value_depth=self.hidden_size, + output_depth=self.hidden_size, + num_heads=self.num_heads, + cache=layer_cache, + dropout_rate=0., + make_image_summary=False, + name='self_attention') + if self.re_zero: + res *= tf.get_variable('self_attention/alpha', initializer=0.) + if dropout_rate: + res = npu_ops.dropout(res, keep_prob=1 - dropout_rate) + x += res + + # Optional cross attention into sequential context + if sequential_context_embeddings is not None: + res = x + if self.layer_norm: + res = common_layers.layer_norm(res, name='cross_attention') + res = common_attention.multihead_attention( + res, + memory_antecedent=sequential_context_embeddings, + bias=encoder_decoder_attention_bias, + total_key_depth=self.hidden_size, + total_value_depth=self.hidden_size, + output_depth=self.hidden_size, + num_heads=self.num_heads, + dropout_rate=0., + make_image_summary=False, + name='cross_attention') + if self.re_zero: + res *= tf.get_variable('cross_attention/alpha', initializer=0.) + if dropout_rate: + res = npu_ops.dropout(res, keep_prob=1 - dropout_rate) + x += res + + # FC layers + res = x + if self.layer_norm: + res = common_layers.layer_norm(res, name='fc') + res = tf.layers.dense( + res, self.fc_size, activation=tf.nn.relu, name='fc_1') + res = tf.layers.dense(res, self.hidden_size, name='fc_2') + if self.re_zero: + res *= tf.get_variable('fc/alpha', initializer=0.) + if dropout_rate: + res = npu_ops.dropout(res, keep_prob=1 - dropout_rate) + x += res + + if self.layer_norm: + output = common_layers.layer_norm(x, name='output') + else: + output = x + return output + + def create_init_cache(self, batch_size): + """Creates empty cache dictionary for use in fast decoding.""" + + def compute_cache_shape_invariants(tensor): + """Helper function to get dynamic shapes for cache tensors.""" + shape_list = tensor.shape.as_list() + if len(shape_list) == 4: + return tf.TensorShape( + [shape_list[0], shape_list[1], None, shape_list[3]]) + elif len(shape_list) == 3: + return tf.TensorShape([shape_list[0], None, shape_list[2]]) + + # Build cache + k = common_attention.split_heads( + tf.zeros([batch_size, 1, self.hidden_size]), self.num_heads) + v = common_attention.split_heads( + tf.zeros([batch_size, 1, self.hidden_size]), self.num_heads) + cache = [{'k': k, 'v': v} for _ in range(self.num_layers)] + shape_invariants = tf.nest.map_structure( + compute_cache_shape_invariants, cache) + return cache, shape_invariants + + +def conv_residual_block(inputs, + output_channels=None, + downsample=False, + kernel_size=3, + re_zero=True, + dropout_rate=0., + name='conv_residual_block'): + """Convolutional block with residual connections for 2D or 3D inputs. + + Args: + inputs: Input tensor of shape [batch_size, height, width, channels] or + [batch_size, height, width, depth, channels]. + output_channels: Number of output channels. + downsample: If True, downsample by 1/2 in this block. + kernel_size: Spatial size of convolutional kernels. + re_zero: If True, alpha scale residuals with zero init. + dropout_rate: Dropout rate applied after second ReLU in residual path. + name: Name for variable scope. + + Returns: + outputs: Output tensor of shape [batch_size, height, width, output_channels] + or [batch_size, height, width, depth, output_channels]. + """ + with tf.variable_scope(name): + input_shape = inputs.get_shape().as_list() + num_dims = len(input_shape) - 2 + + if num_dims == 2: + conv = tf.layers.conv2d + elif num_dims == 3: + conv = tf.layers.conv3d + + input_channels = input_shape[-1] + if output_channels is None: + output_channels = input_channels + if downsample: + shortcut = conv( + inputs, + filters=output_channels, + strides=2, + kernel_size=kernel_size, + padding='same', + name='conv_shortcut') + else: + shortcut = inputs + + res = inputs + res = tf.nn.relu(res) + res = conv( + res, filters=input_channels, kernel_size=kernel_size, padding='same', + name='conv_1') + + res = tf.nn.relu(res) + if dropout_rate: + res = npu_ops.dropout(res, keep_prob=1 - dropout_rate) + if downsample: + out_strides = 2 + else: + out_strides = 1 + res = conv( + res, + filters=output_channels, + kernel_size=kernel_size, + padding='same', + strides=out_strides, + name='conv_2') + if re_zero: + res *= tf.get_variable('alpha', initializer=0.) + return shortcut + res + + +class ResNet(snt.AbstractModule): + """ResNet architecture for 2D image or 3D voxel inputs.""" + + def __init__(self, + num_dims, + hidden_sizes=(64, 256), + num_blocks=(2, 2), + dropout_rate=0.1, + re_zero=True, + name='res_net'): + """Initializes ResNet. + + Args: + num_dims: Number of spatial dimensions. 2 for images or 3 for voxels. + hidden_sizes: Sizes of hidden layers in resnet blocks. + num_blocks: Number of resnet blocks at each size. + dropout_rate: Dropout rate applied immediately after the ReLU in each + fully-connected layer. + re_zero: If True, alpha scale residuals with zero init. + name: Name of variable scope + """ + super(ResNet, self).__init__(name=name) + self.num_dims = num_dims + self.hidden_sizes = hidden_sizes + self.num_blocks = num_blocks + self.dropout_rate = dropout_rate + self.re_zero = re_zero + + def _build(self, inputs, is_training=False): + """Passes inputs through resnet. + + Args: + inputs: Tensor of shape [batch_size, height, width, channels] or + [batch_size, height, width, depth, channels]. + is_training: If True, dropout is applied. + + Returns: + output: Tensor of shape [batch_size, height, width, depth, output_size]. + """ + if is_training: + dropout_rate = self.dropout_rate + else: + dropout_rate = 0. + + # Initial projection with large kernel as in original resnet architecture + if self.num_dims == 3: + conv = tf.layers.conv3d + elif self.num_dims == 2: + conv = tf.layers.conv2d + x = conv( + inputs, + filters=self.hidden_sizes[0], + kernel_size=7, + strides=2, + padding='same', + name='conv_input') + + if self.num_dims == 2: + x = tf.layers.max_pooling2d( + x, strides=2, pool_size=3, padding='same', name='pool_input') + + for d, (hidden_size, + blocks) in enumerate(zip(self.hidden_sizes, self.num_blocks)): + + with tf.variable_scope('resolution_{}'.format(d)): + + # Downsample at the start of each collection of blocks + x = conv_residual_block( + x, + downsample=False if d == 0 else True, + dropout_rate=dropout_rate, + output_channels=hidden_size, + re_zero=self.re_zero, + name='block_1_downsample') + for i in range(blocks - 1): + x = conv_residual_block( + x, + dropout_rate=dropout_rate, + output_channels=hidden_size, + re_zero=self.re_zero, + name='block_{}'.format(i + 2)) + return x + + +class VertexModel(snt.AbstractModule): + """Autoregressive generative model of quantized mesh vertices. + + Operates on flattened vertex sequences with a stopping token: + + [z_0, y_0, x_0, z_1, y_1, x_1, ..., z_n, y_n, z_n, STOP] + + Input vertex coordinates are embedded and tagged with learned coordinate and + position indicators. A transformer decoder outputs logits for a quantized + vertex distribution. + """ + + def __init__(self, + decoder_config, + quantization_bits, + class_conditional=False, + num_classes=55, + max_num_input_verts=2500, + use_discrete_embeddings=True, + name='vertex_model'): + """Initializes VertexModel. + + Args: + decoder_config: Dictionary with TransformerDecoder config + quantization_bits: Number of quantization used in mesh preprocessing. + class_conditional: If True, then condition on learned class embeddings. + num_classes: Number of classes to condition on. + max_num_input_verts: Maximum number of vertices. Used for learned position + embeddings. + use_discrete_embeddings: If True, use discrete rather than continuous + vertex embeddings. + name: Name of variable scope + """ + super(VertexModel, self).__init__(name=name) + self.embedding_dim = decoder_config['hidden_size'] + self.class_conditional = class_conditional + self.num_classes = num_classes + self.max_num_input_verts = max_num_input_verts + self.quantization_bits = quantization_bits + self.use_discrete_embeddings = use_discrete_embeddings + + with self._enter_variable_scope(): + self.decoder = TransformerDecoder(**decoder_config) + + @snt.reuse_variables + def _embed_class_label(self, labels): + """Embeds class label with learned embedding matrix.""" + init_dict = {'embeddings': tf.glorot_uniform_initializer} + return snt.Embed( + vocab_size=self.num_classes, + embed_dim=self.embedding_dim, + initializers=init_dict, + densify_gradients=True, + name='class_label')(labels) + + @snt.reuse_variables + def _prepare_context(self, context, is_training=False): + """Prepare class label context.""" + if self.class_conditional: + global_context_embedding = self._embed_class_label(context['class_label']) + else: + global_context_embedding = None + return global_context_embedding, None + + @snt.reuse_variables + def _embed_inputs(self, vertices, global_context_embedding=None): + """Embeds flat vertices and adds position and coordinate information.""" + # Dequantize inputs and get shapes + input_shape = tf.shape(vertices) + batch_size, seq_length = input_shape[0], input_shape[1] + + # Coord indicators (x, y, z) + coord_embeddings = snt.Embed( + vocab_size=3, + embed_dim=self.embedding_dim, + initializers={'embeddings': tf.glorot_uniform_initializer}, + densify_gradients=True, + name='coord_embeddings')(tf.mod(tf.range(seq_length), 3)) + + # Position embeddings + pos_embeddings = snt.Embed( + vocab_size=self.max_num_input_verts, + embed_dim=self.embedding_dim, + initializers={'embeddings': tf.glorot_uniform_initializer}, + densify_gradients=True, + name='coord_embeddings')(tf.floordiv(tf.range(seq_length), 3)) + + # Discrete vertex value embeddings + if self.use_discrete_embeddings: + vert_embeddings = snt.Embed( + vocab_size=2 ** self.quantization_bits + 1, + embed_dim=self.embedding_dim, + initializers={'embeddings': tf.glorot_uniform_initializer}, + densify_gradients=True, + name='value_embeddings')(vertices) + # Continuous vertex value embeddings + else: + vert_embeddings = tf.layers.dense( + dequantize_verts(vertices[..., None], self.quantization_bits), + self.embedding_dim, + use_bias=True, + name='value_embeddings') + + # Step zero embeddings + if global_context_embedding is None: + zero_embed = tf.get_variable( + 'embed_zero', shape=[1, 1, self.embedding_dim]) + zero_embed_tiled = tf.tile(zero_embed, [batch_size, 1, 1]) + else: + zero_embed_tiled = global_context_embedding[:, None] + + # Aggregate embeddings + embeddings = vert_embeddings + (coord_embeddings + pos_embeddings)[None] + embeddings = tf.concat([zero_embed_tiled, embeddings], axis=1) + + return embeddings + + @snt.reuse_variables + def _project_to_logits(self, inputs): + """Projects transformer outputs to logits for predictive distribution.""" + return tf.layers.dense( + inputs, + 2 ** self.quantization_bits + 1, # + 1 for stopping token + use_bias=True, + kernel_initializer=tf.zeros_initializer(), + name='project_to_logits') + + @snt.reuse_variables + def _create_dist(self, + vertices, + global_context_embedding=None, + sequential_context_embeddings=None, + temperature=1., + top_k=0, + top_p=1., + is_training=False, + cache=None): + """Outputs categorical dist for quantized vertex coordinates.""" + + # Embed inputs + decoder_inputs = self._embed_inputs(vertices, global_context_embedding) + if cache is not None: + decoder_inputs = decoder_inputs[:, -1:] + + # pass through decoder + outputs = self.decoder( + decoder_inputs, cache=cache, + sequential_context_embeddings=sequential_context_embeddings, + is_training=is_training) + + # Get logits and optionally process for sampling + logits = self._project_to_logits(outputs) + logits /= temperature + logits = top_k_logits(logits, top_k) + logits = top_p_logits(logits, top_p) + cat_dist = tfd.Categorical(logits=logits) + return cat_dist + + def _build(self, batch, is_training=False): + """Pass batch through vertex model and get log probabilities under model. + + Args: + batch: Dictionary containing: + 'vertices_flat': int32 vertex tensors of shape [batch_size, seq_length]. + is_training: If True, use dropout. + + Returns: + pred_dist: tfd.Categorical predictive distribution with batch shape + [batch_size, seq_length]. + """ + global_context, seq_context = self._prepare_context( + batch, is_training=is_training) + pred_dist = self._create_dist( + batch['vertices_flat'][:, :-1], # Last element not used for preds + global_context_embedding=global_context, + sequential_context_embeddings=seq_context, + is_training=is_training) + return pred_dist + + def sample(self, + num_samples, + context=None, + max_sample_length=None, + temperature=1., + top_k=0, + top_p=1., + recenter_verts=True, + only_return_complete=True): + """Autoregressive sampling with caching. + + Args: + num_samples: Number of samples to produce. + context: Dictionary of context, such as class labels. See _prepare_context + for details. + max_sample_length: Maximum length of sampled vertex sequences. Sequences + that do not complete are truncated. + temperature: Scalar softmax temperature > 0. + top_k: Number of tokens to keep for top-k sampling. + top_p: Proportion of probability mass to keep for top-p sampling. + recenter_verts: If True, center vertex samples around origin. This should + be used if model is trained using shift augmentations. + only_return_complete: If True, only return completed samples. Otherwise + return all samples along with completed indicator. + + Returns: + outputs: Output dictionary with fields: + 'completed': Boolean tensor of shape [num_samples]. If True then + corresponding sample completed within max_sample_length. + 'vertices': Tensor of samples with shape [num_samples, num_verts, 3]. + 'num_vertices': Tensor indicating number of vertices for each example + in padded vertex samples. + 'vertices_mask': Tensor of shape [num_samples, num_verts] that masks + corresponding invalid elements in 'vertices'. + """ + # Obtain context for decoder + global_context, seq_context = self._prepare_context( + context, is_training=False) + + # num_samples is the minimum value of num_samples and the batch size of + # context inputs (if present). + if global_context is not None: + num_samples = tf.minimum(num_samples, tf.shape(global_context)[0]) + global_context = global_context[:num_samples] + if seq_context is not None: + seq_context = seq_context[:num_samples] + elif seq_context is not None: + num_samples = tf.minimum(num_samples, tf.shape(seq_context)[0]) + seq_context = seq_context[:num_samples] + + def _loop_body(i, samples, cache): + """While-loop body for autoregression calculation.""" + cat_dist = self._create_dist( + samples, + global_context_embedding=global_context, + sequential_context_embeddings=seq_context, + cache=cache, + temperature=temperature, + top_k=top_k, + top_p=top_p) + next_sample = cat_dist.sample() + samples = tf.concat([samples, next_sample], axis=1) + return i + 1, samples, cache + + def _stopping_cond(i, samples, cache): + """Stopping condition for sampling while-loop.""" + del i, cache # Unused + return tf.reduce_any(tf.reduce_all(tf.not_equal(samples, 0), axis=-1)) + + # Initial values for loop variables + samples = tf.zeros([num_samples, 1], dtype=tf.int32) + max_sample_length = max_sample_length or self.max_num_input_verts + cache, cache_shape_invariants = self.decoder.create_init_cache(num_samples) + _, v, _ = tf.while_loop( + cond=_stopping_cond, + body=_loop_body, + loop_vars=(0, samples, cache), + shape_invariants=(tf.TensorShape([]), tf.TensorShape([None, None]), + cache_shape_invariants), + maximum_iterations=max_sample_length * 3 + 1, + back_prop=False, + parallel_iterations=1) + + # Check if samples completed. Samples are complete if the stopping token + # is produced. + completed = tf.reduce_any(tf.equal(v, 0), axis=-1) + + # Get the number of vertices in the sample. This requires finding the + # index of the stopping token. For complete samples use to argmax to get + # first nonzero index. + stop_index_completed = tf.argmax( + tf.cast(tf.equal(v, 0), tf.int32), axis=-1, output_type=tf.int32) + # For incomplete samples the stopping index is just the maximum index. + stop_index_incomplete = ( + max_sample_length * 3 * tf.ones_like(stop_index_completed)) + stop_index = tf.where( + completed, stop_index_completed, stop_index_incomplete) + num_vertices = tf.floordiv(stop_index, 3) + + # Convert to 3D vertices by reshaping and re-ordering x -> y -> z + v = v[:, :(tf.reduce_max(num_vertices) * 3)] - 1 + verts_dequantized = dequantize_verts(v, self.quantization_bits) + vertices = tf.reshape(verts_dequantized, [num_samples, -1, 3]) + vertices = tf.stack( + [vertices[..., 2], vertices[..., 1], vertices[..., 0]], axis=-1) + + # Pad samples to max sample length. This is required in order to concatenate + # Samples across different replicator instances. Pad with stopping tokens + # for incomplete samples. + pad_size = max_sample_length - tf.shape(vertices)[1] + vertices = tf.pad(vertices, [[0, 0], [0, pad_size], [0, 0]]) + + # 3D Vertex mask + vertices_mask = tf.cast( + tf.range(max_sample_length)[None] < num_vertices[:, None], tf.float32) + + if recenter_verts: + vert_max = tf.reduce_max( + vertices - 1e10 * (1. - vertices_mask)[..., None], axis=1, + keepdims=True) + vert_min = tf.reduce_min( + vertices + 1e10 * (1. - vertices_mask)[..., None], axis=1, + keepdims=True) + vert_centers = 0.5 * (vert_max + vert_min) + vertices -= vert_centers + vertices *= vertices_mask[..., None] + + if only_return_complete: + vertices = tf.boolean_mask(vertices, completed) + num_vertices = tf.boolean_mask(num_vertices, completed) + vertices_mask = tf.boolean_mask(vertices_mask, completed) + completed = tf.boolean_mask(completed, completed) + + # Outputs + outputs = { + 'completed': completed, + 'vertices': vertices, + 'num_vertices': num_vertices, + 'vertices_mask': vertices_mask, + } + return outputs + + +class ImageToVertexModel(VertexModel): + """Generative model of quantized mesh vertices with image conditioning. + + Operates on flattened vertex sequences with a stopping token: + + [z_0, y_0, x_0, z_1, y_1, x_1, ..., z_n, y_n, z_n, STOP] + + Input vertex coordinates are embedded and tagged with learned coordinate and + position indicators. A transformer decoder outputs logits for a quantized + vertex distribution. Image inputs are encoded and used to condition the + vertex decoder. + """ + + def __init__(self, + res_net_config, + decoder_config, + quantization_bits, + use_discrete_embeddings=True, + max_num_input_verts=2500, + name='image_to_vertex_model'): + """Initializes VoxelToVertexModel. + + Args: + res_net_config: Dictionary with ResNet config. + decoder_config: Dictionary with TransformerDecoder config. + quantization_bits: Number of quantization used in mesh preprocessing. + use_discrete_embeddings: If True, use discrete rather than continuous + vertex embeddings. + max_num_input_verts: Maximum number of vertices. Used for learned position + embeddings. + name: Name of variable scope + """ + super(ImageToVertexModel, self).__init__( + decoder_config=decoder_config, + quantization_bits=quantization_bits, + max_num_input_verts=max_num_input_verts, + use_discrete_embeddings=use_discrete_embeddings, + name=name) + + with self._enter_variable_scope(): + self.res_net = ResNet(num_dims=2, **res_net_config) + + @snt.reuse_variables + def _prepare_context(self, context, is_training=False): + # Pass images through encoder + image_embeddings = self.res_net( + context['image'] - 0.5, is_training=is_training) + + # Add 2D coordinate grid embedding + processed_image_resolution = tf.shape(image_embeddings)[1] + x = tf.linspace(-1., 1., processed_image_resolution) + image_coords = tf.stack(tf.meshgrid(x, x), axis=-1) + image_coord_embeddings = tf.layers.dense( + image_coords, + self.embedding_dim, + use_bias=True, + name='image_coord_embeddings') + image_embeddings += image_coord_embeddings[None] + + # Reshape spatial grid to sequence + batch_size = tf.shape(image_embeddings)[0] + sequential_context_embedding = tf.reshape( + image_embeddings, [batch_size, -1, self.embedding_dim]) + + return None, sequential_context_embedding + + +class VoxelToVertexModel(VertexModel): + """Generative model of quantized mesh vertices with voxel conditioning. + + Operates on flattened vertex sequences with a stopping token: + + [z_0, y_0, x_0, z_1, y_1, x_1, ..., z_n, y_n, z_n, STOP] + + Input vertex coordinates are embedded and tagged with learned coordinate and + position indicators. A transformer decoder outputs logits for a quantized + vertex distribution. Image inputs are encoded and used to condition the + vertex decoder. + """ + + def __init__(self, + res_net_config, + decoder_config, + quantization_bits, + use_discrete_embeddings=True, + max_num_input_verts=2500, + name='voxel_to_vertex_model'): + """Initializes VoxelToVertexModel. + + Args: + res_net_config: Dictionary with ResNet config. + decoder_config: Dictionary with TransformerDecoder config. + quantization_bits: Integer number of bits used for vertex quantization. + use_discrete_embeddings: If True, use discrete rather than continuous + vertex embeddings. + max_num_input_verts: Maximum number of vertices. Used for learned position + embeddings. + name: Name of variable scope + """ + super(VoxelToVertexModel, self).__init__( + decoder_config=decoder_config, + quantization_bits=quantization_bits, + max_num_input_verts=max_num_input_verts, + use_discrete_embeddings=use_discrete_embeddings, + name=name) + + with self._enter_variable_scope(): + self.res_net = ResNet(num_dims=3, **res_net_config) + + @snt.reuse_variables + def _prepare_context(self, context, is_training=False): + # Embed binary input voxels + voxel_embeddings = snt.Embed( + vocab_size=2, + embed_dim=self.pre_embed_dim, + initializers={'embeddings': tf.glorot_uniform_initializer}, + densify_gradients=True, + name='voxel_embeddings')(context['voxels']) + + # Pass embedded voxels through voxel encoder + voxel_embeddings = self.res_net( + voxel_embeddings, is_training=is_training) + + # Add 3D coordinate grid embedding + processed_voxel_resolution = tf.shape(voxel_embeddings)[1] + x = tf.linspace(-1., 1., processed_voxel_resolution) + voxel_coords = tf.stack(tf.meshgrid(x, x, x), axis=-1) + voxel_coord_embeddings = tf.layers.dense( + voxel_coords, + self.embedding_dim, + use_bias=True, + name='voxel_coord_embeddings') + voxel_embeddings += voxel_coord_embeddings[None] + + # Reshape spatial grid to sequence + batch_size = tf.shape(voxel_embeddings)[0] + sequential_context_embedding = tf.reshape( + voxel_embeddings, [batch_size, -1, self.embedding_dim]) + + return None, sequential_context_embedding + + +class FaceModel(snt.AbstractModule): + """Autoregressive generative model of n-gon meshes. + + Operates on sets of input vertices as well as flattened face sequences with + new face and stopping tokens: + + [f_0^0, f_0^1, f_0^2, NEW, f_1^0, f_1^1, ..., STOP] + + Input vertices are encoded using a Transformer encoder. + + Input face sequences are embedded and tagged with learned position indicators, + as well as their corresponding vertex embeddings. A transformer decoder + outputs a pointer which is compared to each vertex embedding to obtain a + distribution over vertex indices. + """ + + def __init__(self, + encoder_config, + decoder_config, + class_conditional=True, + num_classes=55, + decoder_cross_attention=True, + use_discrete_vertex_embeddings=True, + quantization_bits=8, + max_seq_length=5000, + name='face_model'): + """Initializes FaceModel. + + Args: + encoder_config: Dictionary with TransformerEncoder config. + decoder_config: Dictionary with TransformerDecoder config. + class_conditional: If True, then condition on learned class embeddings. + num_classes: Number of classes to condition on. + decoder_cross_attention: If True, the use cross attention from decoder + querys into encoder outputs. + use_discrete_vertex_embeddings: If True, use discrete vertex embeddings. + quantization_bits: Number of quantization bits for discrete vertex + embeddings. + max_seq_length: Maximum face sequence length. Used for learned position + embeddings. + name: Name of variable scope + """ + super(FaceModel, self).__init__(name=name) + self.embedding_dim = decoder_config['hidden_size'] + self.class_conditional = class_conditional + self.num_classes = num_classes + self.max_seq_length = max_seq_length + self.decoder_cross_attention = decoder_cross_attention + self.use_discrete_vertex_embeddings = use_discrete_vertex_embeddings + self.quantization_bits = quantization_bits + + with self._enter_variable_scope(): + self.decoder = TransformerDecoder(**decoder_config) + self.encoder = TransformerEncoder(**encoder_config) + + @snt.reuse_variables + def _embed_class_label(self, labels): + """Embeds class label with learned embedding matrix.""" + init_dict = {'embeddings': tf.glorot_uniform_initializer} + return snt.Embed( + vocab_size=self.num_classes, + embed_dim=self.embedding_dim, + initializers=init_dict, + densify_gradients=True, + name='class_label')(labels) + + @snt.reuse_variables + def _prepare_context(self, context, is_training=False): + """Prepare class label and vertex context.""" + if self.class_conditional: + global_context_embedding = self._embed_class_label(context['class_label']) + else: + global_context_embedding = None + vertex_embeddings = self._embed_vertices( + context['vertices'], context['vertices_mask'], + is_training=is_training) + if self.decoder_cross_attention: + sequential_context_embeddings = ( + vertex_embeddings * + tf.pad(context['vertices_mask'], [[0, 0], [2, 0]], + constant_values=1)[..., None]) + else: + sequential_context_embeddings = None + return (vertex_embeddings, global_context_embedding, + sequential_context_embeddings) + + @snt.reuse_variables + def _embed_vertices(self, vertices, vertices_mask, is_training=False): + """Embeds vertices with transformer encoder.""" + # num_verts = tf.shape(vertices)[1] + if self.use_discrete_vertex_embeddings: + vertex_embeddings = 0. + verts_quantized = quantize_verts(vertices, self.quantization_bits) + for c in range(3): + vertex_embeddings += snt.Embed( + vocab_size=256, + embed_dim=self.embedding_dim, + initializers={'embeddings': tf.glorot_uniform_initializer}, + densify_gradients=True, + name='coord_{}'.format(c))(verts_quantized[..., c]) + else: + vertex_embeddings = tf.layers.dense( + vertices, self.embedding_dim, use_bias=True, name='vertex_embeddings') + vertex_embeddings *= vertices_mask[..., None] + + # Pad vertex embeddings with learned embeddings for stopping and new face + # tokens + stopping_embeddings = tf.get_variable( + 'stopping_embeddings', shape=[1, 2, self.embedding_dim]) + stopping_embeddings = tf.tile(stopping_embeddings, + [tf.shape(vertices)[0], 1, 1]) + vertex_embeddings = tf.concat( + [stopping_embeddings, vertex_embeddings], axis=1) + + # Pass through Transformer encoder + vertex_embeddings = self.encoder(vertex_embeddings, is_training=is_training) + return vertex_embeddings + + @snt.reuse_variables + def _embed_inputs(self, faces_long, vertex_embeddings, + global_context_embedding=None): + """Embeds face sequences and adds within and between face positions.""" + + # Face value embeddings are gathered vertex embeddings + face_embeddings = tf.gather(vertex_embeddings, faces_long, batch_dims=1) + + # Position embeddings + pos_embeddings = snt.Embed( + vocab_size=self.max_seq_length, + embed_dim=self.embedding_dim, + initializers={'embeddings': tf.glorot_uniform_initializer}, + densify_gradients=True, + name='coord_embeddings')(tf.range(tf.shape(faces_long)[1])) + + # Step zero embeddings + batch_size = tf.shape(face_embeddings)[0] + if global_context_embedding is None: + zero_embed = tf.get_variable( + 'embed_zero', shape=[1, 1, self.embedding_dim]) + zero_embed_tiled = tf.tile(zero_embed, [batch_size, 1, 1]) + else: + zero_embed_tiled = global_context_embedding[:, None] + + # Aggregate embeddings + embeddings = face_embeddings + pos_embeddings[None] + embeddings = tf.concat([zero_embed_tiled, embeddings], axis=1) + + return embeddings + + @snt.reuse_variables + def _project_to_pointers(self, inputs): + """Projects transformer outputs to pointer vectors.""" + return tf.layers.dense( + inputs, + self.embedding_dim, + use_bias=True, + kernel_initializer=tf.zeros_initializer(), + name='project_to_pointers' + ) + + @snt.reuse_variables + def _create_dist(self, + vertex_embeddings, + vertices_mask, + faces_long, + global_context_embedding=None, + sequential_context_embeddings=None, + temperature=1., + top_k=0, + top_p=1., + is_training=False, + cache=None): + """Outputs categorical dist for vertex indices.""" + + # Embed inputs + decoder_inputs = self._embed_inputs( + faces_long, vertex_embeddings, global_context_embedding) + + # Pass through Transformer decoder + if cache is not None: + decoder_inputs = decoder_inputs[:, -1:] + decoder_outputs = self.decoder( + decoder_inputs, + cache=cache, + sequential_context_embeddings=sequential_context_embeddings, + is_training=is_training) + + # Get pointers + pred_pointers = self._project_to_pointers(decoder_outputs) + + # Get logits and mask + logits = tf.matmul(pred_pointers, vertex_embeddings, transpose_b=True) + logits /= tf.sqrt(float(self.embedding_dim)) + f_verts_mask = tf.pad( + vertices_mask, [[0, 0], [2, 0]], constant_values=1.)[:, None] + logits *= f_verts_mask + logits -= (1. - f_verts_mask) * 1e9 + logits /= temperature + logits = top_k_logits(logits, top_k) + logits = top_p_logits(logits, top_p) + return tfd.Categorical(logits=logits) + + def _build(self, batch, is_training=False): + """Pass batch through face model and get log probabilities. + + Args: + batch: Dictionary containing: + 'vertices_dequantized': Tensor of shape [batch_size, num_vertices, 3]. + 'faces': int32 tensor of shape [batch_size, seq_length] with flattened + faces. + 'vertices_mask': float32 tensor with shape + [batch_size, num_vertices] that masks padded elements in 'vertices'. + is_training: If True, use dropout. + + Returns: + pred_dist: tfd.Categorical predictive distribution with batch shape + [batch_size, seq_length]. + """ + vertex_embeddings, global_context, seq_context = self._prepare_context( + batch, is_training=is_training) + pred_dist = self._create_dist( + vertex_embeddings, + batch['vertices_mask'], + batch['faces'][:, :-1], + global_context_embedding=global_context, + sequential_context_embeddings=seq_context, + is_training=is_training) + return pred_dist + + def sample(self, + context, + max_sample_length=None, + temperature=1., + top_k=0, + top_p=1., + only_return_complete=True): + """Sample from face model using caching. + + Args: + context: Dictionary of context, including 'vertices' and 'vertices_mask'. + See _prepare_context for details. + max_sample_length: Maximum length of sampled vertex sequences. Sequences + that do not complete are truncated. + temperature: Scalar softmax temperature > 0. + top_k: Number of tokens to keep for top-k sampling. + top_p: Proportion of probability mass to keep for top-p sampling. + only_return_complete: If True, only return completed samples. Otherwise + return all samples along with completed indicator. + + Returns: + outputs: Output dictionary with fields: + 'completed': Boolean tensor of shape [num_samples]. If True then + corresponding sample completed within max_sample_length. + 'faces': Tensor of samples with shape [num_samples, num_verts, 3]. + 'num_face_indices': Tensor indicating number of vertices for each + example in padded vertex samples. + """ + vertex_embeddings, global_context, seq_context = self._prepare_context( + context, is_training=False) + num_samples = tf.shape(vertex_embeddings)[0] + + def _loop_body(i, samples, cache): + """While-loop body for autoregression calculation.""" + pred_dist = self._create_dist( + vertex_embeddings, + context['vertices_mask'], + samples, + global_context_embedding=global_context, + sequential_context_embeddings=seq_context, + cache=cache, + temperature=temperature, + top_k=top_k, + top_p=top_p) + next_sample = pred_dist.sample()[:, -1:] + samples = tf.concat([samples, next_sample], axis=1) + return i + 1, samples, cache + + def _stopping_cond(i, samples, cache): + """Stopping conditions for autoregressive calculation.""" + del i, cache # Unused + return tf.reduce_any(tf.reduce_all(tf.not_equal(samples, 0), axis=-1)) + + # While loop sampling with caching + samples = tf.zeros([num_samples, 1], dtype=tf.int32) + max_sample_length = max_sample_length or self.max_seq_length + cache, cache_shape_invariants = self.decoder.create_init_cache(num_samples) + _, f, _ = tf.while_loop( + cond=_stopping_cond, + body=_loop_body, + loop_vars=(0, samples, cache), + shape_invariants=(tf.TensorShape([]), tf.TensorShape([None, None]), + cache_shape_invariants), + back_prop=False, + parallel_iterations=1, + maximum_iterations=max_sample_length) + + # Record completed samples + complete_samples = tf.reduce_any(tf.equal(f, 0), axis=-1) + + # Find number of faces + sample_length = tf.shape(f)[-1] + # Get largest new face (1) index as stopping point for incomplete samples. + max_one_ind = tf.reduce_max( + tf.range(sample_length)[None] * tf.cast(tf.equal(f, 1), tf.int32), + axis=-1) + zero_inds = tf.cast( + tf.argmax(tf.cast(tf.equal(f, 0), tf.int32), axis=-1), tf.int32) + num_face_indices = tf.where(complete_samples, zero_inds, max_one_ind) + 1 + + # Mask faces beyond stopping token with zeros + # This mask has a -1 in order to replace the last new face token with zero + faces_mask = tf.cast( + tf.range(sample_length)[None] < num_face_indices[:, None] - 1, tf.int32) + f *= faces_mask + # This is the real mask + faces_mask = tf.cast( + tf.range(sample_length)[None] < num_face_indices[:, None], tf.int32) + + # Pad to maximum size with zeros + pad_size = max_sample_length - sample_length + f = tf.pad(f, [[0, 0], [0, pad_size]]) + + if only_return_complete: + f = tf.boolean_mask(f, complete_samples) + num_face_indices = tf.boolean_mask(num_face_indices, complete_samples) + context = tf.nest.map_structure( + lambda x: tf.boolean_mask(x, complete_samples), context) + complete_samples = tf.boolean_mask(complete_samples, complete_samples) + + # outputs + outputs = { + 'context': context, + 'completed': complete_samples, + 'faces': f, + 'num_face_indices': num_face_indices, + } + return outputs + diff --git a/TensorFlow/contrib/cv/Polygen_ID2061_for_TensorFlow/requirements.txt b/TensorFlow/contrib/cv/Polygen_ID2061_for_TensorFlow/requirements.txt new file mode 100644 index 000000000..7bf69ab5e --- /dev/null +++ b/TensorFlow/contrib/cv/Polygen_ID2061_for_TensorFlow/requirements.txt @@ -0,0 +1,5 @@ +dm-sonnet==1.36 +numpy==1.18.0 +tensor2tensor==1.14 +tensorboard==1.15.0 +tensorflow==1.15.0 \ No newline at end of file diff --git a/TensorFlow/contrib/cv/Polygen_ID2061_for_TensorFlow/test/model_test.sh b/TensorFlow/contrib/cv/Polygen_ID2061_for_TensorFlow/test/model_test.sh new file mode 100644 index 000000000..59544670d --- /dev/null +++ b/TensorFlow/contrib/cv/Polygen_ID2061_for_TensorFlow/test/model_test.sh @@ -0,0 +1,19 @@ +#!/bin/sh +# Copyright 2020 Deepmind Technologies Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +python -m venv polygen +source polygen/bin/activate +python3 ../model_test.py +deactivate diff --git a/TensorFlow/contrib/cv/Polygen_ID2061_for_TensorFlow/test/train_full_1p.sh b/TensorFlow/contrib/cv/Polygen_ID2061_for_TensorFlow/test/train_full_1p.sh new file mode 100644 index 000000000..b1e666a9b --- /dev/null +++ b/TensorFlow/contrib/cv/Polygen_ID2061_for_TensorFlow/test/train_full_1p.sh @@ -0,0 +1,49 @@ +#!/bin/ba`sh +# Copyright 2020 Deepmind Technologies Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +#code path +code_path=`pwd`/../ + +#training steps (default 5000) +training_steps=5000 + +#dataset path +data_path=`pwd`/../meshes + +#precision mode +precision_mode='' + +#参数校验,不需要修改 +for para in $* +do + if [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --training_steps* ]];then + traning_steps=`echo ${para#*=}` + fi +done + + + +#start training +python3 $code_path/train.py \ + --dataset="${data_path}" \ + --training_steps=${training_steps} \ + --precision_mode=${precision_mode} + + diff --git a/TensorFlow/contrib/cv/Polygen_ID2061_for_TensorFlow/train.py b/TensorFlow/contrib/cv/Polygen_ID2061_for_TensorFlow/train.py new file mode 100644 index 000000000..6227687d0 --- /dev/null +++ b/TensorFlow/contrib/cv/Polygen_ID2061_for_TensorFlow/train.py @@ -0,0 +1,223 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + + +from npu_bridge.npu_init import * +import os +import time +import numpy as np +import tensorflow.compat.v1 as tf + +tf.logging.set_verbosity(tf.logging.ERROR) # Hide TF deprecation messages +import matplotlib.pyplot as plt + +import modules +import data_utils + +flags = tf.flags +FLAGS = flags.FLAGS +flags.DEFINE_string("dataset", "dataset", "dataset path") +flags.DEFINE_integer("training_steps", 5000, "training steps") +flags.DEFINE_string("precision_mode", "mix", "precision mode") +flags.DEFINE_string("output_path", "./output", "output path") + + +def main(): + print("===>>>dataset:{}".format(FLAGS.dataset)) + # Prepare synthetic dataset + print("===>>>Prepare synthetic dataset") + ex_list = [] + + for k, mesh in enumerate(['cube', 'cylinder', 'cone', 'icosphere']): + mesh_dict, flag = data_utils.load_process_mesh( + os.path.join(FLAGS.dataset, '{}.obj'.format(mesh))) + if flag: + mesh_dict['class_label'] = k + ex_list.append(mesh_dict) + synthetic_dataset = tf.data.Dataset.from_generator( + lambda: ex_list, + output_types={ + 'vertices': tf.int32, 'faces': tf.int32, 'class_label': tf.int32}, + output_shapes={ + 'vertices': tf.TensorShape([None, 3]), 'faces': tf.TensorShape([None]), + 'class_label': tf.TensorShape(())} + ) + ex = synthetic_dataset.make_one_shot_iterator().get_next() + + # Inspect the first mesh + with tf.Session(config=npu_config_proto()) as sess: + ex_np = sess.run(ex) + print(ex_np) + + # Plot the meshes + mesh_list = [] + with tf.Session(config=npu_config_proto()) as sess: + for i in range(4): + ex_np = sess.run(ex) + mesh_list.append( + {'vertices': data_utils.dequantize_verts(ex_np['vertices']), + 'faces': data_utils.unflatten_faces(ex_np['faces'])}) + data_utils.plot_meshes(mesh_list, ax_lims=0.4) + + print("===>>>Prepare vertex model") + # Prepare the dataset for vertex model training + vertex_model_dataset = data_utils.make_vertex_model_dataset( + synthetic_dataset, apply_random_shift=False) + vertex_model_dataset = vertex_model_dataset.repeat() + vertex_model_dataset = vertex_model_dataset.padded_batch( + 4, padded_shapes=vertex_model_dataset.output_shapes) + vertex_model_dataset = vertex_model_dataset.prefetch(1) + vertex_model_batch = vertex_model_dataset.make_one_shot_iterator().get_next() + + # Create vertex model + vertex_model = modules.VertexModel( + decoder_config={ + 'hidden_size': 128, + 'fc_size': 512, + 'num_layers': 3, + 'dropout_rate': 0. + }, + class_conditional=True, + num_classes=4, + max_num_input_verts=250, + quantization_bits=8, + ) + vertex_model_pred_dist = vertex_model(vertex_model_batch) + vertex_model_loss = -tf.reduce_sum( + vertex_model_pred_dist.log_prob(vertex_model_batch['vertices_flat']) * + vertex_model_batch['vertices_flat_mask']) + vertex_samples = vertex_model.sample( + 4, context=vertex_model_batch, max_sample_length=200, top_p=0.95, + recenter_verts=False, only_return_complete=False) + + print(vertex_model_batch) + print(vertex_model_pred_dist) + print(vertex_samples) + + print("===>>>Prepare face model") + face_model_dataset = data_utils.make_face_model_dataset( + synthetic_dataset, apply_random_shift=False) + face_model_dataset = face_model_dataset.repeat() + face_model_dataset = face_model_dataset.padded_batch( + 4, padded_shapes=face_model_dataset.output_shapes) + face_model_dataset = face_model_dataset.prefetch(1) + face_model_batch = face_model_dataset.make_one_shot_iterator().get_next() + + # Create face model + face_model = modules.FaceModel( + encoder_config={ + 'hidden_size': 128, + 'fc_size': 512, + 'num_layers': 3, + 'dropout_rate': 0. + }, + decoder_config={ + 'hidden_size': 128, + 'fc_size': 512, + 'num_layers': 3, + 'dropout_rate': 0. + }, + class_conditional=False, + max_seq_length=500, + quantization_bits=8, + decoder_cross_attention=True, + use_discrete_vertex_embeddings=True, + ) + face_model_pred_dist = face_model(face_model_batch) + face_model_loss = -tf.reduce_sum( + face_model_pred_dist.log_prob(face_model_batch['faces']) * + face_model_batch['faces_mask']) + face_samples = face_model.sample( + context=vertex_samples, max_sample_length=500, top_p=0.95, + only_return_complete=False) + + print(face_model_batch) + print(face_model_pred_dist) + print(face_samples) + + # Optimization settings + learning_rate = 5e-4 + training_steps = FLAGS.training_steps + check_step = 5 + plot_step = 100 + + # Create an optimizer an minimize the summed log probability of the mesh + # sequences + optimizer = tf.train.AdamOptimizer(learning_rate) + vertex_model_optim_op = optimizer.minimize(vertex_model_loss) + face_model_optim_op = optimizer.minimize(face_model_loss) + + print("===>>>Training") + # Training start time + start_time = time.time() + + # Mixed Precision + config_proto = tf.ConfigProto() + if FLAGS.precision_mode == 'mix': + print("precision mode: mix") + custom_op = config_proto.graph_options.rewrite_options.custom_optimizers.add() + custom_op.name = 'NpuOptimizer' + custom_op.parameter_map["precision_mode"].s = tf.compat.as_bytes("allow_mix_precision") + + # Training loop + config = npu_config_proto(config_proto=config_proto) + with tf.Session(config=config) as sess: + sess.run(tf.global_variables_initializer()) + for n in range(training_steps): + if n % check_step == 0: + v_loss, f_loss = sess.run((vertex_model_loss, face_model_loss)) + print('Step {}'.format(n)) + print('Loss (vertices) {}'.format(v_loss)) + print('Loss (faces) {}'.format(f_loss)) + v_samples_np, f_samples_np = sess.run( + (vertex_samples, face_samples)) + mesh_list = [] + if n % plot_step == 0: + for n in range(4): + mesh_list.append( + { + 'vertices': v_samples_np['vertices'][n][:v_samples_np['num_vertices'][n]], + 'faces': data_utils.unflatten_faces( + f_samples_np['faces'][n][:f_samples_np['num_face_indices'][n]]) + } + ) + # data_utils.plot_meshes(mesh_list, ax_lims=0.5) + sess.run((vertex_model_optim_op, face_model_optim_op)) + # Saving model + # saver = tf.train.Saver() + # saver.save(sess, os.join(FLAGS.ckpt_path,'model.ckpt')) + + # Training end time + end_time = time.time() + print('''TimetoTrain: %4.4f StepTime: %4.4f %4.4f ''' + % ((end_time - start_time), (end_time - start_time) / training_steps)) + + +if __name__ == '__main__': + main() -- Gitee From f17f31d8b612f847053d474685482de63840b69c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=8B=E5=AD=90=E6=B5=A9?= Date: Sun, 24 Jul 2022 07:41:48 +0000 Subject: [PATCH 2/2] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E6=B5=8B=E8=AF=95?= =?UTF-8?q?=E8=84=9A=E6=9C=AC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../test/train_performance_1p.sh | 49 +++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 TensorFlow/contrib/cv/Polygen_ID2061_for_TensorFlow/test/train_performance_1p.sh diff --git a/TensorFlow/contrib/cv/Polygen_ID2061_for_TensorFlow/test/train_performance_1p.sh b/TensorFlow/contrib/cv/Polygen_ID2061_for_TensorFlow/test/train_performance_1p.sh new file mode 100644 index 000000000..97a16cac5 --- /dev/null +++ b/TensorFlow/contrib/cv/Polygen_ID2061_for_TensorFlow/test/train_performance_1p.sh @@ -0,0 +1,49 @@ +#!/bin/ba`sh +# Copyright 2020 Deepmind Technologies Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +#code path +code_path=`pwd`/../ + +#training steps (default 5000) +training_steps=5000 + +#dataset path +data_path=`pwd`/../meshes + +#precision mode +precision_mode='mix' + +#参数校验,不需要修改 +for para in $* +do + if [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --training_steps* ]];then + traning_steps=`echo ${para#*=}` + fi +done + + + +#start training +python3 $code_path/train.py \ + --dataset="${data_path}" \ + --training_steps=${training_steps} \ + --precision_mode=${precision_mode} + + -- Gitee