diff --git a/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/.DS_Store b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..903925f4243e1957bb69edcbb07a63bcf3bfe7dc Binary files /dev/null and b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/.DS_Store differ diff --git a/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/.gitignore b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..894a44cc066a027465cd26d634948d56d13af9af --- /dev/null +++ b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/.gitignore @@ -0,0 +1,104 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ diff --git a/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/.idea/.gitignore b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/.idea/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..eaf91e2ac647df635a09f01b8a2a254252aae8d7 --- /dev/null +++ b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/.idea/.gitignore @@ -0,0 +1,3 @@ +# Default ignored files +/shelf/ +/workspace.xml diff --git a/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/.idea/BigGAN-tensorflow.iml b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/.idea/BigGAN-tensorflow.iml new file mode 100644 index 0000000000000000000000000000000000000000..1d426b97b2b08fbefe4ef33ed06ae522c3e65504 --- /dev/null +++ b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/.idea/BigGAN-tensorflow.iml @@ -0,0 +1,15 @@ + + + + + + + + + + + + \ No newline at end of file diff --git a/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/.idea/inspectionProfiles/profiles_settings.xml b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000000000000000000000000000000000000..105ce2da2d6447d11dfe32bfb846c3d5b199fc99 --- /dev/null +++ b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/.idea/misc.xml b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/.idea/misc.xml new file mode 100644 index 0000000000000000000000000000000000000000..7e45480a8572c6d832d9cdf5eb92a77555fbeabb --- /dev/null +++ b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/.idea/misc.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/.idea/modules.xml b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/.idea/modules.xml new file mode 100644 index 0000000000000000000000000000000000000000..0c178f724249a0840cfadb27e50e556187af101c --- /dev/null +++ b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/.idea/vcs.xml b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/.idea/vcs.xml new file mode 100644 index 0000000000000000000000000000000000000000..9661ac713428efbad557d3ba3a62216b5bb7d226 --- /dev/null +++ b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/LICENSE b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..5135900a8875c8b79c245aaa94f915b25e33b8c9 --- /dev/null +++ b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2019 MingtaoGuo + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/README.md b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/README.md new file mode 100644 index 0000000000000000000000000000000000000000..52b37eefc5cf6149f9f859d36545bb073ec6a0b6 --- /dev/null +++ b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/README.md @@ -0,0 +1,220 @@ +- [基本信息](#基本信息.md) +- [概述](#概述.md) +- [训练环境准备](#训练环境准备.md) +- [快速上手](#快速上手.md) +- [训练结果](#训练结果.md) +- [高级参考](#高级参考.md) +

基本信息

+ +**发布者(Publisher):Huawei** + +**应用领域(Application Domain):Computer Vision** + +**版本(Version):1.0** + +**修改时间(Modified) :2022.04.15** + +**大小(Size):2.57MB** + +**框架(Framework):TensorFlow 1.15.0** + +**模型格式(Model Format):ckpt** + +**精度(Precision):fp32** + +**处理器(Processor):昇腾910** + +**应用级别(Categories):Research** + +**描述(Description):基于TensorFlow框架进行条件图像生成的训练代码** + +

概述

+ +BigGAN 是一种用于条件图像生成的 GAN 网络,可以用于生成指定类别的高质量图像。BigGAN 继承了 SAGAN 的主要思想,使用了自注意力模块来增强网络捕捉全局特征的能力,同时使用 hinge loss、谱归一化以及 TTUR 来增强训练的稳定性和效率。在此之上,BigGAN 通过大量的实验探索了大规模训练 GAN 网络的技巧,并通过加大批大小以及网络的深度和广度,大幅提升了模型性能。为了更有效地完成条件图像生成的任务,BigGAN 利用 shared-embedding、skip-z 和条件批归一化来向 Generator 提供类别信息,用投影的方法向 Discriminator 提供类别信息,进一步提升了模型性能。此外,BigGAN 还提出了截断技巧以及增强截断技巧稳定性的正交正则化用于平衡图像生成质量与多样性。 + +- 参考论文: + + https://arxiv.org/abs/1809.11096 + +- 参考实现: + + https://github.com/MingtaoGuo/BigGAN-tensorflow + +- 适配昇腾 AI 处理器的实现: + + + https://gitee.com/ascend/ModelZoo-TensorFlow/tree/master/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow + + + +- 通过Git获取对应commit\_id的代码方法如下: + + ``` + git clone {repository_url} # 克隆仓库的代码 + cd {repository_name} # 切换到模型的代码仓目录 + git checkout {branch} # 切换到对应分支 + git reset --hard {commit_id} # 代码设置到对应的commit_id + cd {code_path} # 切换到模型代码所在路径,若仓库下只有该模型,则无需切换 + ``` + +## 默认配置 + +- 训练数据集预处理(以 Cifar-10 训练集为例,仅作为用户参考示例): + + - 图像的输入尺寸为 $32\times32$ + - 图像输入格式:.mat + - 生成图像类别数:10 +- 训练超参 + + - Batch size:64 + - Train step: 100000 + - Train step: 1000 + - Generator lr:1e-4;Discriminator lr:4e-4;beta1:0.0;beta2:0.9 + - Discriminator train step:2 + - Orthogonal regularization strength:1e-4 + - Truncation threshold:2.0 +- 模型结构超参 + - Base channel:96 + - Latent space dimensionality:120 + - Shared embedding dimensionality:128 + + + +## 支持特性 + +| 特性列表 | 是否支持 | +| ---------- | -------- | +| 分布式训练 | 否 | +| 混合精度 | 否 | +| 并行数据 | 否 | + +

训练环境准备

+ +1. 硬件环境准备请参见各硬件产品文档"[驱动和固件安装升级指南]( https://support.huawei.com/enterprise/zh/category/ai-computing-platform-pid-1557196528909)"。需要在硬件设备上安装与CANN版本配套的固件与驱动。 +2. 宿主机上需要安装Docker并登录[Ascend Hub中心](https://ascendhub.huawei.com/#/detail?name=ascend-tensorflow-arm)获取镜像。 + + 当前模型支持的镜像列表如[表1](#zh-cn_topic_0000001074498056_table1519011227314)所示。 + + **表 1** 镜像列表 + + + + + + + + + + + + +

镜像名称

+

镜像版本

+

配套CANN版本

+
+

20.2.0

+

20.2

+
+ + +

快速上手

+ +- 数据集准备 +1. 模型训练使用Cifar-10/ImageNet数据集,数据集请用户自行获取。 + +## 模型训练 + +- 单击“立即下载”,并选择合适的下载方式下载源码包。 + +- 启动训练之前,首先要配置程序运行相关环境变量。 + + 环境变量配置信息参见: + + [Ascend 910训练平台环境变量设置](https://gitee.com/ascend/modelzoo/wikis/Ascend%20910%E8%AE%AD%E7%BB%83%E5%B9%B3%E5%8F%B0%E7%8E%AF%E5%A2%83%E5%8F%98%E9%87%8F%E8%AE%BE%E7%BD%AE?sort_id=3148819) + +- 单卡训练 + + 1. 配置训练参数。 + + 首先在脚本test/train_full_1p.sh中,配置batch_size、data_path、output_path等参数,请用户根据实际路径配置data_path,或者在启动训练的命令行中以参数形式下发。 + + ``` + batch_size=64 + data_path="../dataset" + output_path="../output" + ``` + + 2. 启动训练。 + + 启动单卡训练 (脚本为AnimeFaceGAN_ID1062_for_Tensorflow/test/train_full_1p.sh) + + ``` + bash train_full_1p.sh --data_path=../dataset --output_path=../output + ``` + +

训练结果

+ +- 精度结果比对 + +| 精度指标项 | 论文发布 | GPU实测 | NPU实测 | +| --------------- | -------- | ------- | ------- | +| Inception score | 9.22 | 6.66 | 6.98 | +| FID | 14.73 | 45.06 | 38.47 | + +- 性能结果比对 + +| 性能指标项 | GPU实测 | NPU实测 | +| ---------- | ------- | ------- | +| StepTime | 347ms | 732ms | + +*** + +- NPU 训练模型生成 $32\times32$ 图片 + + ![horse2car](assets/horse2car.gif) + + ![frog2dog](assets/frog2dog.gif) + + ![truck2bird](assets/truck2bird.gif) + + ![](assets/gen_image.jpg) + +

高级参考

+ +## 脚本和示例代码 + +``` +├── train.py //网络训练与测试代码 +├── README.md //代码说明文档 +├── pb_frozen.py //训练模型固化为pb模型代码 +├── test_pb.py //测试pb模型代码 +├── requirements.txt //训练python依赖列表 +├── utils.py //工具函数代码 +├── ops.py //BigGAN基础模块代码 +├── networks_32.py //用于训练32x32图像的网络结构代码 +├── networks_64.py //用于训练64x64图像的网络结构代码 +├── networks_128.py //用于训练128x128图像的网络结构代码 +├── help_modelarts.py //Modelarts训练工具代码 +├── boot_modelarts.py //Modelarts训练代码 +├── generate_fake_img.py //在线推理代码 +├── calc_IS_FID.py //计算IS、FID代码 +├── input2bin.py //将输入转化为.bin,用于离线推理 +├── test_om.py //测试离线推理精度 +├── test +│ ├──train_performance_1p.sh //单卡训练验证性能启动脚本 +│ ├──train_full_1p.sh //单卡全量训练启动脚本 +│ ├──train_full_1p_modelarts.sh //modelarts全量训练启动脚本 +├── scripts +│ ├──run_1p.sh //Modelarts训练脚本 +│ ├──run_cpu.sh //CPU训练脚本 +│ ├──run_gpu.sh //GPU训练脚本 +│ ├──run_msprof.sh //解析Profiling数据脚本 +├── metrics //计算IS、FID相关代码 +│ ├──... +``` + +## 训练过程 + +1. 通过“模型训练”中的训练指令启动单卡卡训练。 + +2. 参考脚本的模型存储路径为../output/model/xx/model.ckpt,其中"xx"为训练时的图片大小,取值为32/64/128。 \ No newline at end of file diff --git a/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/assets/frog2dog.gif b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/assets/frog2dog.gif new file mode 100644 index 0000000000000000000000000000000000000000..55a4d8e1a9f5a85588e708f0875d32e097fcde9e Binary files /dev/null and b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/assets/frog2dog.gif differ diff --git a/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/assets/gen_image.jpg b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/assets/gen_image.jpg new file mode 100644 index 0000000000000000000000000000000000000000..447b710c54910c27fc12728f6cc35c58dfebec84 Binary files /dev/null and b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/assets/gen_image.jpg differ diff --git a/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/assets/horse2car.gif b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/assets/horse2car.gif new file mode 100644 index 0000000000000000000000000000000000000000..f3ec334b6158962e44bcc45a77be5ce25495d340 Binary files /dev/null and b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/assets/horse2car.gif differ diff --git a/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/assets/truck2bird.gif b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/assets/truck2bird.gif new file mode 100644 index 0000000000000000000000000000000000000000..1795e7db637bff623ebee4a87dd631ea2e23e6e5 Binary files /dev/null and b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/assets/truck2bird.gif differ diff --git a/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/boot_modelarts.py b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/boot_modelarts.py new file mode 100644 index 0000000000000000000000000000000000000000..237461a8bac70e64ff82a27a91bd9e71f39c0d55 --- /dev/null +++ b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/boot_modelarts.py @@ -0,0 +1,57 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +This is the boot file for ModelArts platform. +Firstly, the train datasets are copyed from obs to ModelArts. +Then, the string of train shell command is concated and using 'os.system()' to execute +""" +import os +import argparse +from help_modelarts import obs_data2modelarts + +print(os.system('env')) + +if __name__ == '__main__': + # Note: the code dir is not the same as work dir on ModelArts Platform!!! + code_dir = os.path.dirname(__file__) + work_dir = os.getcwd() + print("===>>>code_dir:{}, work_dir:{}".format(code_dir, work_dir)) + + parser = argparse.ArgumentParser() + parser.add_argument("--train_url", type=str, default="./output", help="output path in OBS") + parser.add_argument("--data_url", type=str, default="./dataset", help="data path in OBS") + parser.add_argument("--modelarts_data_dir", type=str, default="/cache/dataset", + help="data path in ModelArts platform") + parser.add_argument("--modelarts_result_dir", type=str, default="/cache/result", + help="output path in ModelArts platform") + # parser.add_argument("--num_gpus", type=int, default=1, help="number of gpu") + config = parser.parse_args() + + print("--------config----------") + for k in list(vars(config).keys()): + print("key:{}: value:{}".format(k, vars(config)[k])) + print("--------config----------") + + # copy dataset from obs to modelarts + obs_data2modelarts(config) + + # start to train on Modelarts platform + if not os.path.exists(config.modelarts_result_dir): + os.makedirs(config.modelarts_result_dir) + bash_header = os.path.join(code_dir, 'test/train_full_1p_modelarts.sh') + arg_url = '--data_path=%s --output_path=%s --obs_url=%s' % (config.modelarts_data_dir, config.modelarts_result_dir, + config.train_url) + bash_command = 'bash %s %s' % (bash_header, arg_url) + print("bash command:", bash_command) + os.system(bash_command) \ No newline at end of file diff --git a/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/calc_IS_FID.py b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/calc_IS_FID.py new file mode 100644 index 0000000000000000000000000000000000000000..45e5dbd8b4369010efdf301a8c48702c9cf858e5 --- /dev/null +++ b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/calc_IS_FID.py @@ -0,0 +1,77 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import numpy as np +import tensorflow as tf +import os +from metrics.inception_score_official_tf import get_inception_score +from metrics.fid_official_tf import calculate_activation_statistics, calculate_frechet_distance +from utils import read_images, session_config + + +def get_FID(images, args): + # load from precalculated + f = np.load(args.precalculated_path) + mu1, sigma1 = f['mu'][:], f['sigma'][:] + f.close() + + # session configuration + config = session_config(args) + + # calc from image ndarray + # images should be Numpy array of dimension (N, H, W, C). images should be in 0~255 + with tf.Session(config=config) as sess: + sess.run(tf.global_variables_initializer()) + mu2, sigma2 = calculate_activation_statistics(images, sess, batch_size=args.batch_size) + return calculate_frechet_distance(mu1, sigma1, mu2, sigma2) + + +def get_IS(images_list, args, splits=10): + return get_inception_score(images_list, splits=splits, sess_config=session_config(args)) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--chip", type=str, default="gpu", help="run on which chip, cpu or gpu or npu") + parser.add_argument("--fake_img_path", type=str, default="../output/test/fake/32", help="fake image path") + parser.add_argument("--gpu", type=str, default="0", help="GPU to use (leave blank for CPU only)") + parser.add_argument("--batch_size", type=int, default=100, help="batch size") + parser.add_argument("--precalculated_path", type=str, default="./metrics/res/stats_tf/fid_stats_cifar10_train.npz", + help="precalculated statistics for datasets, used in FID") + args = parser.parse_args() + + os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu + + images_list = read_images(args.fake_img_path) + images = np.array(images_list).astype(np.float32) + + fid_score = get_FID(images, args) + is_mean, is_std = get_IS(images_list, args, splits=10) + + print("IS : (%f, %f)" % (is_mean, is_std)) + print("FID : %f" % fid_score) diff --git a/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/fusion_switch.cfg b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/fusion_switch.cfg new file mode 100644 index 0000000000000000000000000000000000000000..89d8736b8b86fa16ee319bce45a16cb5616a50fe --- /dev/null +++ b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/fusion_switch.cfg @@ -0,0 +1,10 @@ +{ + "Switch":{ + "GraphFusion":{ + "ALL":"off" + }, + "UBFusion":{ + "ALL":"off" + } + } +} \ No newline at end of file diff --git a/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/generate_fake_img.py b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/generate_fake_img.py new file mode 100644 index 0000000000000000000000000000000000000000..de620ba8c06813794de03d4c8f2be1467b7c692a --- /dev/null +++ b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/generate_fake_img.py @@ -0,0 +1,206 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import tensorflow as tf +import numpy as np +from PIL import Image +from utils import truncated_noise_sample, restore_img +import datetime +import os +import argparse +import math +import shutil +import imageio +import cv2 + + +def consecutive_category_morphing(args, fake_img_path, sess, fake_img_morphing_op, z, y, y_end, alpha, + class1=0, class2=1, fps=2): + if os.path.exists(fake_img_path): + shutil.rmtree(fake_img_path) # delete previous images + os.makedirs(fake_img_path) + + Z = truncated_noise_sample(args.batch_size, args.z_dim, args.truncation) + + count = 0 + img_paths = [] + for Alpha in [i / 10.0 for i in range(10, -1, -1)]: + Alpha = np.ones([args.batch_size, 1]) * Alpha + fake = sess.run(fake_img_morphing_op, feed_dict={z: Z, y: class1 * np.ones([args.batch_size]), + y_end: class2 * np.ones([args.batch_size]), + alpha: Alpha}) + # display a batch of images in a grid + grid_size = int(args.batch_size ** 0.5) + concat_img = np.zeros([grid_size * args.img_h, grid_size * args.img_w, 3]) + c = 0 + for i in range(grid_size): + for j in range(grid_size): + resized_img = cv2.resize(fake[c], dsize=(args.img_h, args.img_w), interpolation=cv2.INTER_LINEAR) + concat_img[i * args.img_h: i * args.img_h + args.img_h, j * args.img_w: j * args.img_w + args.img_w] = resized_img + c += 1 + img_path = os.path.join(fake_img_path, "%dto%d_%d.jpg" % (class1, class2, count)) + Image.fromarray(np.uint8(restore_img(concat_img))).save(img_path) + img_paths.append(img_path) + count += 1 + + # make gif + gif_images = [] + for path in img_paths: + gif_images.append(imageio.imread(path)) + gif_path = os.path.join(fake_img_path, "%dto%d.gif" % (class1, class2)) + imageio.mimsave(gif_path, gif_images, fps=fps) + + +def generate_img_of_one_class(args, class_labels, img_name, fake_img_path, sess, fake_img_op, z, y): + Z = truncated_noise_sample(args.batch_size, args.z_dim, args.truncation) + fake = sess.run(fake_img_op, feed_dict={z: Z, y: class_labels}) + + # display a batch of images in a grid + grid_size = int(args.batch_size ** 0.5) + concat_img = np.zeros([grid_size * args.img_h, grid_size * args.img_w, 3]) + c = 0 + for i in range(grid_size): + for j in range(grid_size): + resized_img = cv2.resize(fake[c], dsize=(args.img_h, args.img_w), interpolation=cv2.INTER_LINEAR) + concat_img[i * args.img_h: i * args.img_h + args.img_h, j * args.img_w: j * args.img_w + args.img_w] = resized_img + c += 1 + Image.fromarray(np.uint8(restore_img(concat_img))).save(os.path.join(fake_img_path, img_name)) + + +def generate_img_by_class(args, fake_img_path, sess, fake_img_op, z, y): + """For each class, generate some images and display them in a grid""" + if os.path.exists(fake_img_path): + shutil.rmtree(fake_img_path) # delete previous images + os.makedirs(fake_img_path) + + for nums_c in range(args.num_classes): + class_labels = nums_c * np.ones([args.batch_size]) + img_name = "%d.jpg" % nums_c + generate_img_of_one_class(args, class_labels, img_name, fake_img_path, sess, fake_img_op, z, y) + + +def generate_img(args, fake_img_path, sess, fake_img_op, z, y): + """generate fake images with random classes""" + if os.path.exists(fake_img_path): + shutil.rmtree(fake_img_path) # delete previous images + os.makedirs(fake_img_path) + + for b in range(math.ceil(args.gen_num // args.batch_size)): + Z = truncated_noise_sample(args.batch_size, args.z_dim, args.truncation) + fake = sess.run(fake_img_op, feed_dict={z: Z, y: np.random.randint(args.num_classes, size=args.batch_size)}) + + for i in range(args.batch_size): + img = cv2.resize(fake[i], dsize=(args.img_h, args.img_w), interpolation=cv2.INTER_LINEAR) + Image.fromarray(np.uint8(restore_img(img))).save(os.path.join(fake_img_path, "%d_fake.jpg" % (b * args.batch_size + i))) + + +# TODO: add NPU config +if __name__ == "__main__": + parser = argparse.ArgumentParser() + # platform arguments (Huawei Ascend) + parser.add_argument("--chip", type=str, default="gpu", help="run on which chip, cpu or gpu or npu") + # data arguments + parser.add_argument("--gen_num", type=int, default=5000, help="number of generated images") + parser.add_argument("--output", type=str, default=os.path.join("..", "output"), help="output path") + parser.add_argument("-b", "--batch_size", type=int, default=64, help="batch size") + parser.add_argument("-c", "--num_classes", type=int, default=10, help="number of classes") + parser.add_argument("--img_h", type=int, default=32, help="image height") + parser.add_argument("--img_w", type=int, default=32, help="image width") + parser.add_argument("--train_img_size", type=int, default=32, + help="image will be resized to this size when training") + # model arguments + parser.add_argument("--base_channel", type=int, default=96, help="base channel number for G and D") + parser.add_argument("--z_dim", type=int, default=120, help="latent space dimensionality") + parser.add_argument("--truncation", type=float, default=2.0, help="truncation threshold") + parser.add_argument("--ema", type=bool, default=True, help="use exponential moving average for G") + parser.add_argument("--shared_dim", type=int, default=128, help="shared embedding dimensionality") + # function arguments + parser.add_argument("--function", type=str, default="fake", + help="generate fake images or do category morphing (fake / morphing)") + parser.add_argument("--morphing_class", type=str, default="0_1", + help="generate category morphing images between two classes") + args = parser.parse_args() + + # use different architectures for different image sizes + if args.train_img_size == 128: + from networks_128 import Generator, Discriminator + elif args.train_img_size == 64: + from networks_64 import Generator, Discriminator + elif args.train_img_size == 32: + from networks_32 import Generator, Discriminator + + # get current time + now = datetime.datetime.now() + now_str = now.strftime('%Y_%m_%d_%H_%M_%S') + # check output dir + test_path = os.path.join(args.output, "test") + fake_img_path = os.path.join(test_path, "fake", str(args.train_img_size)) + image_of_each_class_path = os.path.join(test_path, "image_of_each_class", str(args.train_img_size)) + category_morphing_path = os.path.join(test_path, "category_morphing", str(args.train_img_size)) + # get model path + model_path = os.path.join(args.output, "model", str(args.train_img_size), "model.ckpt") + ema_model_path = os.path.join(args.output, "model", str(args.train_img_size), "ema.ckpt") + resume_path = ema_model_path if args.ema else model_path + + if args.chip == "gpu": + config = tf.ConfigProto(allow_soft_placement=True) + config.gpu_options.allow_growth = True + elif args.chip == 'cpu': + config = tf.ConfigProto() + + train_phase = tf.Variable(tf.constant(False, dtype=tf.bool), name="train_phase") + # train_phase = tf.placeholder(tf.bool) # is training or not + z = tf.placeholder(tf.float32, [args.batch_size, args.z_dim]) # latent vector + y = tf.placeholder(tf.int32, [None]) # class info + y_end = tf.placeholder(tf.int32, [None]) # category morphing + alpha = tf.placeholder(tf.float32, [None, 1]) + + G = Generator("generator", args.base_channel) + with tf.variable_scope("generator", reuse=tf.AUTO_REUSE): + embed_w = tf.get_variable("embed_w", [args.num_classes, args.shared_dim], initializer=tf.orthogonal_initializer()) + + if args.function == "fake": + fake_img = G(z, train_phase, y, embed_w, args.num_classes) + elif args.function == "morphing": + fake_img_morphing = G(z, train_phase, y, embed_w, args.num_classes, y_end, alpha) + + with tf.Session(config=config) as sess: + sess.run(tf.global_variables_initializer()) + # load model + saver = tf.train.Saver(tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, "generator")) + saver.restore(sess, resume_path) + + if args.function == "fake": + # generate fake images + generate_img(args, fake_img_path, sess, fake_img, z, y) + # generate fake images for each class + generate_img_by_class(args, image_of_each_class_path, sess, fake_img, z, y) + elif args.function == "morphing": + # category morphing + classes = args.morphing_class.split("_") + consecutive_category_morphing(args, category_morphing_path, sess, fake_img_morphing, z, y, y_end, alpha, + class1=int(classes[0]), class2=int(classes[1]), fps=2) diff --git a/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/help_modelarts.py b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/help_modelarts.py new file mode 100644 index 0000000000000000000000000000000000000000..c717183d6d7d215fb006dbec0d676c92c74474ed --- /dev/null +++ b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/help_modelarts.py @@ -0,0 +1,52 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import datetime +import moxing as mox + +def obs_data2modelarts(config): + """ + Copy train data from obs to modelarts by using moxing api. + """ + start = datetime.datetime.now() + print("===>>>Copy files from obs:{} to modelarts dir:{}".format(config.data_url, config.modelarts_data_dir)) + mox.file.copy_parallel(src_url=config.data_url, dst_url=config.modelarts_data_dir) + end = datetime.datetime.now() + print("===>>>Copy from obs to modelarts, time use:{}(s)".format((end - start).seconds)) + files = os.listdir(config.modelarts_data_dir) + print("===>>>Files:", files) + + +def modelarts_result2obs(config): + """ + Copy debug data from modelarts to obs. + According to the swich flags, the debug data may contains auto tune repository, + dump data for precision comparision, even the computation graph and profiling data. + """ + ## copy result from modelarts to obs + obs_result_dir = os.path.join(config.obs_dir, 'result') + if not mox.file.exists(obs_result_dir): + mox.file.make_dirs(obs_result_dir) + mox.file.copy_parallel(src_url=config.output, dst_url=obs_result_dir) + print("===>>>Copy Event or Checkpoint from modelarts dir:{} to obs:{}".format(config.output, obs_result_dir)) + + ## Copy profiling data. Comment this snippets if npu_profiling is off. + if config.profiling: + modelarts_profiling_dir = config.profiling_dir + print("Profiling dir:", modelarts_profiling_dir) + obs_profiling_dir = os.path.join(config.obs_dir, 'npu_profiling') + if not mox.file.exists(obs_profiling_dir): + mox.file.make_dirs(obs_profiling_dir) + mox.file.copy_parallel(modelarts_profiling_dir, obs_profiling_dir) + print("===>>>Profiling data:{} on OBS dir:{}".format(mox.file.list_directory(obs_profiling_dir), obs_profiling_dir)) diff --git a/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/input2bin.py b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/input2bin.py new file mode 100644 index 0000000000000000000000000000000000000000..e8239af6c8736398c37b43e9e2114e91db55be26 --- /dev/null +++ b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/input2bin.py @@ -0,0 +1,59 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +from utils import truncated_noise_sample, check_dir +import numpy as np +import os + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + # data arguments + parser.add_argument("--gen_num", type=int, default=5000, help="number of generated images") + parser.add_argument("--output", type=str, default="../output", help="output path") + parser.add_argument("-c", "--num_classes", type=int, default=10, help="number of classes") + parser.add_argument("--img_h", type=int, default=32, help="image height") + parser.add_argument("--img_w", type=int, default=32, help="image width") + parser.add_argument("--train_img_size", type=int, default=32, + help="image will be resized to this size when training") + # model arguments + parser.add_argument("--z_dim", type=int, default=120, help="latent space dimensionality") + parser.add_argument("--truncation", type=float, default=2.0, help="truncation threshold") + args = parser.parse_args() + + bin_path = os.path.join(args.output, "input_bin", str(args.train_img_size)) + z_bin_path = os.path.join(bin_path, "z") + y_bin_path = os.path.join(bin_path, "y") + check_dir(z_bin_path) + check_dir(y_bin_path) + + for i in range(args.gen_num): + z = truncated_noise_sample(1, args.z_dim, args.truncation) + y = np.random.randint(args.num_classes, size=(1, 1)) + z.tofile(os.path.join(z_bin_path, f"{i}.bin")) + y.tofile(os.path.join(y_bin_path, f"{i}.bin")) diff --git a/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/metrics/.gitignore b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/metrics/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..45259a338b72a8010c3616b69994ad9d8eb45368 --- /dev/null +++ b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/metrics/.gitignore @@ -0,0 +1,109 @@ +.idea/ +*.tgz +*.npz +!fid_stats_cifar10_train.npz + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ diff --git a/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/metrics/LICENSE b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/metrics/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..19a95744785f913edf324042e7db6dd676bdea27 --- /dev/null +++ b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/metrics/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2019 Tzu-Heng Lin + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/metrics/README.md b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/metrics/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b7a81e88f74d33aa4d5e9d98038b948be28ec1da --- /dev/null +++ b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/metrics/README.md @@ -0,0 +1,156 @@ +# metrics + +This repo contains information/implementation (PyTorch, Tensorflow) about IS and FID score. This is a handy toolbox that you can easily add to your projects. TF implementations are intended to compute the exact same output as the official ones for reporting in papers. Discussion/PR/Issues are very welcomed. + + + +## Usage + +Put this `metrics/` folder in your projects, and __see below (Pytorch), and each .py's head comment__ for usage. + +We also need to download some files in [res/](res/), see [res/README.md](res/README.md) for more details. + + + +## TF implementations (almost the same as official, just changed the interface, can be reported in papers) + +- [x] [inception_score_official_tf.py](inception_score_official_tf.py): inception score +- [x] [fid_official_tf.py](fid_official_tf.py): FID score +- [x] [precalc_stats_official_tf.py](precalc_stats_official_tf.py): calculate stats (mu, sigma) + + + +## Pytorch Implementation (CANNOT report in papers, but can get an quick view) + +* Requirements + + * pytorch, torchvision, scipy, numpy, tqdm +* [is_fid_pytorch.py](is_fid_pytorch.py) + * [x] inception score, get around `mean=9.67278, std=0.14992` for CIFAR-10 train data when n_split=10 + * [x] FID score + * [x] calculate stats for custom images in a folder (mu, sigma) + * [x] multi-GPU support by `nn.DataParallel` + * e.g. `CUDA_VISIBLE_DEVICES=0,1,2,3` will use 4 GPU. +* command line usage + * calculate IS, FID + ```bash + # calc IS score on CIFAR10, will download CIFAR10 data to ../data/cifar10 + python is_fid_pytorch.py + + # calc IS score on custom images in a folder/ + python is_fid_pytorch.py --path foldername/ + + # calc IS, FID score on custom images in a folder/, compared to CIFAR10 (given precalculated stats) + python is_fid_pytorch.py --path foldername/ --fid res/stats_pytorch/fid_stats_cifar10_train.npz + + # calc FID on custom images in two folders/ + python is_fid_pytorch.py --path foldername1/ --fid foldername2/ + + # calc FID on two precalculated stats + python is_fid_pytorch.py --path res/stats_pytorch/fid_stats_cifar10_train.npz --fid res/stats_pytorch/fid_stats_cifar10_train.npz + ``` + + * precalculate stats + ```bash + # precalculate stats store as npz for CIFAR 10, will download CIFAR10 data to ../data/cifar10 + python is_fid_pytorch.py --save-stats-path res/stats_pytorch/fid_stats_cifar10_train.npz + + # precalculate stats store as npz for images in folder/ + python is_fid_pytorch.py --path foldername/ --save-stats-path res/stats_pytorch/fid_stats_folder.npz + ``` + + + +* in code usage + + * `mode=1`: image tensor has already normalized by `mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]` + * `mode=2`: image tensor has already normalized by `mean=[0.500, 0.500, 0.500], std=[0.500, 0.500, 0.500]` + ```python + from metrics import is_fid_pytorch + + # using precalculated stats (.npz) for FID calculation + is_fid_model = is_fid_pytorch.ScoreModel(mode=2, stats_file='res/stats_pytorch/fid_stats_cifar10_train.npz', cuda=cuda) + imgs_nchw = torch.Tensor(50000, C, H, W) # torch.Tensor in -1~1, normalized by mean=[0.500, 0.500, 0.500], std=[0.500, 0.500, 0.500] + is_mean, is_std, fid = is_fid_model.get_score_image_tensor(imgs_nchw) + + # we can also pass in mu, sigma for get_score_image_tensor() + is_fid_model = is_fid_pytorch.ScoreModel(mode=2, cuda=cuda) + mu, sigma = is_fid_pytorch.read_stats_file('res/stats_pytorch/fid_stats_cifar10_train.npz') + is_mean, is_std, fid = is_fid_model.get_score_image_tensor(imgs_nchw, mu1=mu, sigma1=sigma) + + # if no need FID + is_fid_model = is_fid_pytorch.ScoreModel(mode=2, cuda=cuda) + is_mean, is_std, _ = is_fid_model.get_score_image_tensor(imgs_nchw) + + # if want stats (mu, sigma) for imgs_nchw, send in return_stats=True + is_mean, is_std, _, mu, sigma = is_fid_model.get_score_image_tensor(imgs_nchw, return_stats=True) + + # from pytorch dataset, use get_score_dataset(), instead of get_score_image_tensor(), other usage is the same + cifar = dset.CIFAR10(root='../data/cifar10', download=True, + transform=transforms.Compose([ + transforms.Resize(32), + transforms.ToTensor(), + transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) + ]) + ) + IgnoreLabelDataset(cifar) + is_mean, is_std, _ = is_fid_model.get_score_dataset(IgnoreLabelDataset(cifar)) + ``` + + +## TODO + +- [ ] Refactor TF implementation of IS, FID Together +- [ ] MS-SSIM score - PyTorch +- [ ] MS-SSIM score - Tensorflow + + + +## Info + +### Inception Score (IS) + +* Assumption + * MEANINGFUL: The generated image should be clear, the output probability of a classifier network should be [0.9, 0.05, ...] (largely skewed to a class). $p(y|\mathbf{x})$ is of __low entropy__. + * DIVERSITY: If we have 10 classes, the generated image should be averagely distributed. So that the marginal distribution $p(y) = \frac{1}{N} \sum_{i=1}^{N} p(y|\mathbf{x}^{(i)})$ is of __high entropy__. + * Better models: KL Divergence of $p(y|\mathbf{x})$ and $p(y)$ should be high. +* Formulation + * $\mathbf{IS} = \exp (\mathbb{E}_{\mathbf{x} \sim p_g} D_{KL} [p(y|\mathbf{x}) || p(y)] )$ + * where + * $\mathbf{x}$ is sampled from generated data + * $p(y|\mathbf{x})​$ is the output probability of Inception v3 when input is $\mathbf{x}​$ + * $p(y) = \frac{1}{N} \sum_{i=1}^{N} p(y|\mathbf{x}^{(i)})$ is the average output probability of all generated data (from InceptionV3, 1000-dim vector) + * $D_{KL} (\mathbf{p}||\mathbf{q}) = \sum_{j} p_{j} \log \frac{p_j}{q_j}$, where $j$ is the dimension of the output probability. + +* Explanation + * $p(y)$ is a evenly distributed vector + * larger $\mathbf{IS}​$ score -> larger KL divergence -> larger diversity and clearness +* Reference + * Official TF implementation is in [openai/improved-gan](https://github.com/openai/improved-gan) + * Pytorch Implementation: [sbarratt/inception-score-pytorch](https://github.com/sbarratt/inception-score-pytorch) + * TF seemed to provide a [good implementation](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/gan/python/eval/python/classifier_metrics_impl.py) + * [scipy.stats.entropy](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.entropy.html) + * [zhihu: Inception Score 的原理和局限性](https://zhuanlan.zhihu.com/p/54146307) + * [A Note on the Inception Score](https://arxiv.org/abs/1801.01973) + + + +### Fréchet Inception Distance (FID) + +* Formulation + * $\mathbf{FID} = ||\mu_r - \mu_g||^2 + Tr(\Sigma_{r} + \Sigma_{g} - 2(\Sigma_r \Sigma_g)^{1/2})​$ + * where + * $Tr$ is [trace of a matrix (wikipedia)](https://en.wikipedia.org/wiki/Trace_(linear_algebra)) + * $X_r \sim \mathcal{N}(\mu_r, \Sigma_r)$ and $X_g \sim \mathcal{N}(\mu_g, \Sigma_g)$ are the 2048-dim activations the InceptionV3 pool3 layer + * $\mu_r$ is the mean of real photo's feature + * $\mu_g$ is the mean of generated photo's feature + * $\Sigma_r$ is the covariance matrix of real photo's feature + * $\Sigma_g$ is the covariance matrix of generated photo's feature + +* Reference + * Official TF implementation: [bioinf-jku/TTUR](https://github.com/bioinf-jku/TTUR) + * Pytorch Implementation: [mseitzer/pytorch-fid](https://github.com/mseitzer/pytorch-fid) + * TF seemed to provide a [good implementation](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/gan/python/eval/python/classifier_metrics_impl.py) + * [zhihu: Frechet Inception Score (FID)](https://zhuanlan.zhihu.com/p/54213305) + * [Explanation from Neal Jean](https://nealjean.com/ml/frechet-inception-distance/) + diff --git a/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/metrics/__init__.py b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/metrics/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/metrics/fid_official_tf.py b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/metrics/fid_official_tf.py new file mode 100644 index 0000000000000000000000000000000000000000..9380fa8fdf18e657954a58c81635e55653057896 --- /dev/null +++ b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/metrics/fid_official_tf.py @@ -0,0 +1,402 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +@Brief: + Tensorflow implementation of FID score, should be the same as the official one + modified from official inception score implementation + [bioinf-jku/TTUR](https://github.com/bioinf-jku/TTUR) +@Author: lzhbrian (https://lzhbrian.me) +@Date: 2019.4.7 +@Usage: + # CMD + # from 2 precalculated stats + python fid_official_tf.py res/stats_tf/fid_stats_imagenet_valid.npz res/stats_tf/fid_stats_imagenet_train.npz --gpu 0 + + # from 1 precalculated stats, 1 image foldername/ + python fid_official_tf.py res/stats_tf/fid_stats_imagenet_valid.npz /path/to/image/foldername/ --gpu 0 + + # from 2 image foldername/ + python fid_official_tf.py /path/to/image/foldername1/ /path/to/image/foldername2/ --gpu 0 + + # used in code + ``` + import tensorflow as tf + + # load from precalculated + f = np.load('res/stats_tf/fid_stats_imagenet_train.npz') + mu1, sigma1 = f['mu'][:], f['sigma'][:] + f.close() + + # calc from image ndarray + # images should be Numpy array of dimension (N, H, W, C). images should be in 0~255 + with tf.Session() as sess: + sess.run(tf.global_variables_initializer()) + mu2, sigma2 = fid_official_tf.calculate_activation_statistics(images, sess, batch_size=100) + fid_score = calculate_frechet_distance(mu1, sigma1, mu2, sigma2) + ``` + +@Note: + Need to first download stats_tf of datasets in stats_tf/, see README.md + + also, the same as inception_score_official_tf.py, the inception model used + contains resize and normalization layers + so the input of our images should be 0~255, and arbitrary HxW size + + For calculating mu and sigma for foldername/, see precalc_stats_official_tf.py +""" + +import numpy as np +import os +import tensorflow as tf +import imageio +from PIL import Image +# from scipy.misc import imread +from scipy import linalg +import pathlib +import urllib +import warnings +from tqdm import tqdm + + +def imread(filename): + x = imageio.imread(filename) + return np.array(Image.fromarray(x)) + +cur_dirname = os.path.dirname(os.path.abspath(__file__)) + +MODEL_DIR = '%s/res/' % cur_dirname + +class InvalidFIDException(Exception): + pass + + +def create_inception_graph(pth): + """Creates a graph from saved GraphDef file.""" + # Creates graph from saved graph_def.pb. + with tf.gfile.FastGFile(pth, 'rb') as f: + graph_def = tf.GraphDef() + graph_def.ParseFromString(f.read()) + _ = tf.import_graph_def(graph_def, name='') + + +# ------------------------------------------------------------------------------- + + +# code for handling inception net derived from +# https://github.com/openai/improved-gan/blob/master/inception_score/model.py +def _get_inception_layer(sess): + """Prepares inception net for batched usage and returns pool_3 layer. """ + layername = 'pool_3:0' + pool3 = sess.graph.get_tensor_by_name(layername) + # ops = pool3.graph.get_operations() + # for op_idx, op in enumerate(ops): + # for o in op.outputs: + # shape = o.get_shape() + # if shape._dims != []: + # shape = [s.value for s in shape] + # new_shape = [] + # for j, s in enumerate(shape): + # if s == 1 and j == 0: + # new_shape.append(None) + # else: + # new_shape.append(s) + # o.__dict__['_shape_val'] = tf.TensorShape(new_shape) + return pool3 + + +# ------------------------------------------------------------------------------- + + +def get_activations(images, sess, batch_size=50, verbose=False): + """Calculates the activations of the pool_3 layer for all images. + Params: + -- images : Numpy array of dimension (n_images, hi, wi, 3). The values + must lie between 0 and 256. + -- sess : current session + -- batch_size : the images numpy array is split into batches with batch size + batch_size. A reasonable batch size depends on the disposable hardware. + -- verbose : If set to True and parameter out_step is given, the number of calculated + batches is reported. + Returns: + -- A numpy array of dimension (num images, 2048) that contains the + activations of the given tensor when feeding inception with the query tensor. + """ + inception_layer = _get_inception_layer(sess) + d0 = images.shape[0] + if batch_size > d0: + print("warning: batch size is bigger than the data size. setting batch size to data size") + batch_size = d0 + n_batches = d0 // batch_size + n_used_imgs = n_batches * batch_size + pred_arr = np.empty((n_used_imgs, 2048)) + for i in tqdm(range(n_batches)): + if verbose: + print("\rPropagating batch %d/%d" % (i + 1, n_batches)) + start = i * batch_size + end = start + batch_size + batch = images[start:end] + pred = sess.run(inception_layer, {'InputTensor:0': batch}) + pred_arr[start:end] = pred.reshape(batch_size, -1) + if verbose: + print(" done") + return pred_arr + + +# ------------------------------------------------------------------------------- + + +def calculate_frechet_distance(mu1, sigma1, mu2, sigma2, eps=1e-6): + """Numpy implementation of the Frechet Distance. + The Frechet distance between two multivariate Gaussians X_1 ~ N(mu_1, C_1) + and X_2 ~ N(mu_2, C_2) is + d^2 = ||mu_1 - mu_2||^2 + Tr(C_1 + C_2 - 2*sqrt(C_1*C_2)). + + Stable version by Dougal J. Sutherland. + Params: + -- mu1 : Numpy array containing the activations of the pool_3 layer of the + inception net ( like returned by the function 'get_predictions') + for generated samples. + -- mu2 : The sample mean over activations of the pool_3 layer, precalcualted + on an representive data set. + -- sigma1: The covariance matrix over activations of the pool_3 layer for + generated samples. + -- sigma2: The covariance matrix over activations of the pool_3 layer, + precalcualted on an representive data set. + Returns: + -- : The Frechet Distance. + """ + + mu1 = np.atleast_1d(mu1) + mu2 = np.atleast_1d(mu2) + + sigma1 = np.atleast_2d(sigma1) + sigma2 = np.atleast_2d(sigma2) + + assert mu1.shape == mu2.shape, "Training and test mean vectors have different lengths" + assert sigma1.shape == sigma2.shape, "Training and test covariances have different dimensions" + + diff = mu1 - mu2 + + # product might be almost singular + covmean, _ = linalg.sqrtm(sigma1.dot(sigma2), disp=False) + if not np.isfinite(covmean).all(): + msg = "fid calculation produces singular product; adding %s to diagonal of cov estimates" % eps + warnings.warn(msg) + offset = np.eye(sigma1.shape[0]) * eps + covmean = linalg.sqrtm((sigma1 + offset).dot(sigma2 + offset)) + + # numerical error might give slight imaginary component + if np.iscomplexobj(covmean): + if not np.allclose(np.diagonal(covmean).imag, 0, atol=1e-3): + m = np.max(np.abs(covmean.imag)) + raise ValueError("Imaginary component {}".format(m)) + covmean = covmean.real + + tr_covmean = np.trace(covmean) + + return diff.dot(diff) + np.trace(sigma1) + np.trace(sigma2) - 2 * tr_covmean + + +# ------------------------------------------------------------------------------- + + +def calculate_activation_statistics(images, sess, batch_size=50, verbose=False): + """Calculation of the statistics used by the FID. + Params: + -- images : Numpy array of dimension (n_images, hi, wi, 3). The values + must lie between 0 and 255. + -- sess : current session + -- batch_size : the images numpy array is split into batches with batch size + batch_size. A reasonable batch size depends on the available hardware. + -- verbose : If set to True and parameter out_step is given, the number of calculated + batches is reported. + Returns: + -- mu : The mean over samples of the activations of the pool_3 layer of + the inception model. + -- sigma : The covariance matrix of the activations of the pool_3 layer of + the inception model. + """ + act = get_activations(images, sess, batch_size, verbose) + mu = np.mean(act, axis=0) + sigma = np.cov(act, rowvar=False) + return mu, sigma + + +# ------------------ +# The following methods are implemented to obtain a batched version of the activations. +# This has the advantage to reduce memory requirements, at the cost of slightly reduced efficiency. +# - Pyrestone +# ------------------ + + +def load_image_batch(files): + """Convenience method for batch-loading images + Params: + -- files : list of paths to image files. Images need to have same dimensions for all files. + Returns: + -- A numpy array of dimensions (num_images,hi, wi, 3) representing the image pixel values. + """ + return np.array([imread(str(fn)).astype(np.float32) for fn in files]) + + +def get_activations_from_files(files, sess, batch_size=50, verbose=False): + """Calculates the activations of the pool_3 layer for all images. + Params: + -- files : list of paths to image files. Images need to have same dimensions for all files. + -- sess : current session + -- batch_size : the images numpy array is split into batches with batch size + batch_size. A reasonable batch size depends on the disposable hardware. + -- verbose : If set to True and parameter out_step is given, the number of calculated + batches is reported. + Returns: + -- A numpy array of dimension (num images, 2048) that contains the + activations of the given tensor when feeding inception with the query tensor. + """ + inception_layer = _get_inception_layer(sess) + d0 = len(files) + if batch_size > d0: + print("warning: batch size is bigger than the data size. setting batch size to data size") + batch_size = d0 + n_batches = d0 // batch_size + n_used_imgs = n_batches * batch_size + pred_arr = np.empty((n_used_imgs, 2048)) + for i in range(n_batches): + if verbose: + print("\rPropagating batch %d/%d" % (i + 1, n_batches)) + start = i * batch_size + end = start + batch_size + batch = load_image_batch(files[start:end]) + pred = sess.run(inception_layer, {'InputTensor:0': batch}) + pred_arr[start:end] = pred.reshape(batch_size, -1) + del batch # clean up memory + if verbose: + print(" done") + return pred_arr + + +def calculate_activation_statistics_from_files(files, sess, batch_size=50, verbose=False): + """Calculation of the statistics used by the FID. + Params: + -- files : list of paths to image files. Images need to have same dimensions for all files. + -- sess : current session + -- batch_size : the images numpy array is split into batches with batch size + batch_size. A reasonable batch size depends on the available hardware. + -- verbose : If set to True and parameter out_step is given, the number of calculated + batches is reported. + Returns: + -- mu : The mean over samples of the activations of the pool_3 layer of + the inception model. + -- sigma : The covariance matrix of the activations of the pool_3 layer of + the inception model. + """ + act = get_activations_from_files(files, sess, batch_size, verbose) + mu = np.mean(act, axis=0) + sigma = np.cov(act, rowvar=False) + return mu, sigma + + +# ------------------------------------------------------------------------------- + + +# ------------------------------------------------------------------------------- +# The following functions aren't needed for calculating the FID +# they're just here to make this module work as a stand-alone script +# for calculating FID scores +# ------------------------------------------------------------------------------- +def check_or_download_inception(inception_path): + ''' Checks if the path to the inception file is valid, or downloads + the file if it is not present. ''' + INCEPTION_URL = 'http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz' + if inception_path is None: + inception_path = MODEL_DIR + inception_path = pathlib.Path(inception_path) + model_file = inception_path / 'classify_image_graph_def.pb' + if not model_file.exists(): + print("Downloading Inception model") + from urllib import request + import tarfile + fn, _ = request.urlretrieve(INCEPTION_URL) + with tarfile.open(fn, mode='r') as f: + f.extract('classify_image_graph_def.pb', str(model_file.parent)) + return str(model_file) + + +def _handle_path(path, sess, low_profile=False): + if path.endswith('.npz'): + f = np.load(path) + m, s = f['mu'][:], f['sigma'][:] + f.close() + else: + path = pathlib.Path(path) + files = [] + for ext in ('*.png', '*.jpg', '*.jpeg', '.bmp'): + files.extend( list(path.glob(ext)) ) + + if low_profile: + m, s = calculate_activation_statistics_from_files(files, sess) + else: + # x = np.array([scipy.misc.imresize(imread(str(fn), mode='RGB'), (299, 299), interp='bilinear').astype(np.float32) for fn in files]) + x = np.array([imread(str(fn)).astype(np.float32) for fn in files]) + m, s = calculate_activation_statistics(x, sess) + del x # clean up memory + return m, s + + +def calculate_fid_given_paths(paths, inception_path, low_profile=False): + ''' Calculates the FID of two paths. ''' + inception_path = check_or_download_inception(inception_path) + + for p in paths: + if not os.path.exists(p): + raise RuntimeError("Invalid path: %s" % p) + + create_inception_graph(str(inception_path)) + with tf.Session() as sess: + sess.run(tf.global_variables_initializer()) + m1, s1 = _handle_path(paths[0], sess, low_profile=low_profile) + m2, s2 = _handle_path(paths[1], sess, low_profile=low_profile) + fid_value = calculate_frechet_distance(m1, s1, m2, s2) + return fid_value + + +if __name__ == "__main__": + from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter + + parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter) + parser.add_argument("path", type=str, nargs=2, + help='Path to the generated images or to .npz statistic files') + parser.add_argument("-i", "--inception", type=str, default=None, + help='Path to Inception model (will be downloaded if not provided)') + parser.add_argument("--gpu", default="", type=str, + help='GPU to use (leave blank for CPU only)') + parser.add_argument("--lowprofile", action="store_true", + help='Keep only one batch of images in memory at a time. This reduces memory footprint, but may decrease speed slightly.') + args = parser.parse_args() + os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu + fid_value = calculate_fid_given_paths(args.path, args.inception, low_profile=args.lowprofile) + print("FID: ", fid_value) diff --git a/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/metrics/inception_score_official_tf.py b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/metrics/inception_score_official_tf.py new file mode 100644 index 0000000000000000000000000000000000000000..304e68069bbf9052c11a2bcdaf4f3605b9dfdfe3 --- /dev/null +++ b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/metrics/inception_score_official_tf.py @@ -0,0 +1,298 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +@Brief: + Tensorflow implementation of inception score, should be the same as the official one + modified from official inception score implementation + [openai/improved-gan](https://github.com/openai/improved-gan) +@Author: lzhbrian (https://lzhbrian.me) +@Date: 2019.4.5 +@Last Modified: 2019.4.7 +@Usage: + # CMD + # calculate IS on CIFAR10 train + python inception_score_official_tf.py + + # calculate IS on custom images in foldername/ + python inception_score_official_tf.py foldername/ + python inception_score_official_tf.py /data4/linziheng/datasets/fashionai-attributes-challenge-baseline/fashionAI_attributes_test/test/Images/coat_length_labels/ + + # use it in code + ``` + from metrics import inception_score_official_tf + is_mean, is_std = get_inception_score(img_list, splits=10) + ``` +@Note: + Updated 2019.4.7: + after checking out http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz + I found that this model file has already contained: + + 1. The Bilinear Resize Layer + resizing arbitrary input to 299x299 + ``` + import tensorflow as tf + import os + with tf.gfile.FastGFile('classify_image_graph_def.pb', 'rb') as f: + graph_def = tf.GraphDef() + graph_def.ParseFromString(f.read()) + _ = tf.import_graph_def(graph_def, name='') + sess = tf.Session() + resize_bilinear = sess.graph.get_tensor_by_name('ResizeBilinear:0') + sess.run(resize_bilinear, {'ExpandDims:0': np.ones((1, H, W, 3))}).shape # (1, 299, 299, 3) + ``` + so this code can fit arbitrary input image size + + 2. The Normalization Layer + input: 0~255 + normalization: subtracted by 128, then divided by 128 + output: -1~1 + ``` + import tensorflow as tf + import os + with tf.gfile.FastGFile(os.path.join('classify_image_graph_def.pb'), 'rb') as f: + graph_def = tf.GraphDef() + graph_def.ParseFromString(f.read()) + _ = tf.import_graph_def(graph_def, name='') + sess = tf.Session() + + Sub = sess.graph.get_tensor_by_name('Sub:0') + sess.run(Sub, {'ExpandDims:0': 255 * np.zeros((1,299,299,3))}) + # output is all -128 + + Mul = sess.graph.get_tensor_by_name('Mul:0') + sess.run(Mul, {'ExpandDims:0': 255 * np.zeros((1,299,299,3))}) + # output is all -1 + ``` + so the input image range of this code shall be 0~255 + + + Results: + + On CIFAR-10 train, n_split=10, tf-1.10: + without random.shuffle, input is 32x32 + get mean=11.237364, std=0.11623184 (consistent with paper) + with random.shuffle, input is 32x32 + 1) get mean=11.242347, std=0.18466103 + 2) get mean=11.237335, std=0.10733857 + 3) get mean=11.234492, std=0.17140374 + + On Imagenet 64x64, n_split=10, tf-1.10: + with random.shuffle + get mean=63.40744, std=1.3286287 +""" + +import os +import os.path +import sys + +import numpy as np +from six.moves import urllib +import tensorflow as tf +import math + +from tqdm import tqdm + +cur_dirname = os.path.dirname(os.path.abspath(__file__)) + +MODEL_DIR = '%s/res/' % cur_dirname +DATA_URL = 'http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz' +softmax = None + + +# Call this function with list of images. Each of elements should be a +# numpy array with values ranging from 0 to 255. +# numpy array shape should be in H x W x C +def get_inception_score(images, splits=10, bs=256, sess_config=tf.ConfigProto()): + assert(type(images) == list) + assert(type(images[0]) == np.ndarray) + assert(len(images[0].shape) == 3) + assert(np.max(images[0]) > 10) + assert(np.min(images[0]) >= 0.0) + inps = [] + for img in images: + img = img.astype(np.float32) + inps.append(np.expand_dims(img, 0)) + with tf.Session(config=sess_config) as sess: + preds = [] + n_batches = int(math.ceil(float(len(inps)) / float(bs))) + + print('passing through inception network ...') + for i in tqdm(range(n_batches)): + # sys.stdout.write(".") + # sys.stdout.flush() + inp = inps[(i * bs):min((i + 1) * bs, len(inps))] + + inp = np.concatenate(inp, 0) + pred = sess.run(softmax, {'InputTensor:0': inp}) + preds.append(pred) + preds = np.concatenate(preds, 0) + scores = [] + for i in range(splits): + part = preds[(i * preds.shape[0] // splits):((i + 1) * preds.shape[0] // splits), :] + kl = part * (np.log(part) - np.log(np.expand_dims(np.mean(part, 0), 0))) + kl = np.mean(np.sum(kl, 1)) + scores.append(np.exp(kl)) + return np.mean(scores), np.std(scores) + + +# This function is called automatically. +def _init_inception(): + global softmax + if not os.path.exists(MODEL_DIR): + os.makedirs(MODEL_DIR) + filename = DATA_URL.split('/')[-1] + filepath = os.path.join(MODEL_DIR, filename) + if not os.path.exists(filepath): + def _progress(count, block_size, total_size): + sys.stdout.write('\r>> Downloading %s %.1f%%' % ( + filename, float(count * block_size) / float(total_size) * 100.0)) + sys.stdout.flush() + filepath, _ = urllib.request.urlretrieve(DATA_URL, filepath, _progress) + print() + statinfo = os.stat(filepath) + print('Succesfully downloaded', filename, statinfo.st_size, 'bytes.') + + import tarfile + tarfile.open(filepath, 'r:gz').extractall(MODEL_DIR) + with tf.gfile.FastGFile(os.path.join(MODEL_DIR, 'classify_image_graph_def.pb'), 'rb') as f: + graph_def = tf.GraphDef() + graph_def.ParseFromString(f.read()) + # Import model with a modification in the input tensor to accept arbitrary batch size. + input_tensor = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='InputTensor') + _ = tf.import_graph_def(graph_def, name='', input_map={'ExpandDims:0': input_tensor}) + # Works with an arbitrary minibatch size. + with tf.Session() as sess: + pool3 = sess.graph.get_tensor_by_name('pool_3:0') + ops = pool3.graph.get_operations() + for op_idx, op in enumerate(ops): + for o in op.outputs: + shape = o.get_shape() + if shape._dims != []: + shape = [s.value for s in shape] + new_shape = [] + for j, s in enumerate(shape): + if s == 1 and j == 0: + new_shape.append(None) + else: + new_shape.append(s) + o.__dict__['_shape_val'] = tf.TensorShape(new_shape) + w = sess.graph.get_operation_by_name("softmax/logits/MatMul").inputs[1] + logits = tf.matmul(tf.squeeze(pool3, [1, 2]), w) + softmax = tf.nn.softmax(logits) + + +if softmax is None: + _init_inception() + + +if __name__ == '__main__': + + import random + + def cal_on_cifar10_train(): + # CIFAR 10 utils + def maybe_download_and_extract(data_dir, url='http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz'): + if not os.path.exists(os.path.join(data_dir, 'cifar-10-batches-py')): + if not os.path.exists(data_dir): + os.makedirs(data_dir) + filename = url.split('/')[-1] + filepath = os.path.join(data_dir, filename) + if not os.path.exists(filepath): + def _progress(count, block_size, total_size): + sys.stdout.write('\r>> Downloading %s %.1f%%' % (filename, + float(count * block_size) / float( + total_size) * 100.0)) + sys.stdout.flush() + + filepath, _ = urllib.request.urlretrieve(url, filepath, _progress) + print() + statinfo = os.stat(filepath) + print('Successfully downloaded', filename, statinfo.st_size, 'bytes.') + tarfile.open(filepath, 'r:gz').extractall(data_dir) + + def unpickle(file): + import pickle + fo = open(file, 'rb') + d = pickle.load(fo, encoding='latin1') + fo.close() + # load as N x H x W x C + return {'x': d['data'].reshape((len(d['data']), 3, 32, 32)).transpose(0, 2, 3, 1), + 'y': np.array(d['labels']).astype(np.uint8)} + + # normalized to -1 ~ +1 + # return {'x': np.cast[np.float32]((-127.5 + d['data'].reshape((10000, 3, 32, 32))) / 128.).transpose(0, 2, 3, 1), + # 'y': np.array(d['labels']).astype(np.uint8)} + + def load(data_dir, subset='train'): + maybe_download_and_extract(data_dir) + if subset == 'train': + train_data = [unpickle(os.path.join(data_dir, 'cifar-10-batches-py/data_batch_' + str(i))) for i in + range(1, 6)] + trainx = np.concatenate([d['x'] for d in train_data], axis=0) + trainy = np.concatenate([d['y'] for d in train_data], axis=0) + return trainx, trainy + elif subset == 'test': + test_data = unpickle(os.path.join(data_dir, 'cifar-10-batches-py/test_batch')) + testx = test_data['x'] + testy = test_data['y'] + return testx, testy + else: + raise NotImplementedError('subset should be either train or test') + + train_x, train_y = load('%s/../data/cifar10' % cur_dirname, subset='train') + train_x = list(train_x) + random.shuffle(train_x) + + # train_x is list of images (shape = H x W x C, val = 0~255) + is_mean, is_std = get_inception_score(train_x, splits=10) + print(is_mean, is_std) + + # if no arg, calc cifar10 train IS score + if len(sys.argv) == 1: + cal_on_cifar10_train() + + # if argv have foldername, calc IS score of pictures in this folder + else: + import scipy.misc + + # read a folder + foldername = sys.argv[1] + + from glob import glob + files = [] + for ext in ('*.png', '*.jpg', '*.jpeg', '.bmp'): + files.extend(glob(os.path.join(foldername, ext))) + + img_list = [] + print('reading images ...') + for file in tqdm(files): + img = scipy.misc.imread(file, mode='RGB') + img_list.append(img) + random.shuffle(img_list) + is_mean, is_std = get_inception_score(img_list, splits=10) + print(is_mean, is_std) diff --git a/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/metrics/is_fid_pytorch.py b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/metrics/is_fid_pytorch.py new file mode 100644 index 0000000000000000000000000000000000000000..0f0615e323e19a14b40845ea81d9bbb31c5c0b02 --- /dev/null +++ b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/metrics/is_fid_pytorch.py @@ -0,0 +1,506 @@ +# BSD 3-Clause License +# +# Copyright (c) 2017 xxxx +# All rights reserved. +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ============================================================================ +""" +@Brief: IS, FID Implementation + Now support using + 1) NCHW Tensor + 2) Pytorch Dataset +@Modified from: + https://github.com/sbarratt/inception-score-pytorch/blob/master/inception_score.py + https://github.com/mseitzer/pytorch-fid +@Author: Tzu-Heng Lin (https://lzhbrian.me) +@Date: 2019.4.7 +@Usage: + # CMD + # calc IS score on CIFAR10 + python is_fid_pytorch.py + + # calc IS score on custom images in a foldername + python is_fid_pytorch.py --path foldername/ + + # calc IS, FID score on custom images, compared to CIFAR10 (given precalculated stats) + python is_fid_pytorch.py --path foldername/ --fid res/stats_pytorch/fid_stats_cifar10_train.npz + + # calc FID on two custom images foldername/ + python is_fid_pytorch.py --path foldername/ --fid foldername/ + + # calc FID on two precalculated stats + python is_fid_pytorch.py --path res/stats_pytorch/fid_stats_cifar10_train.npz --fid res/stats_pytorch/fid_stats_cifar10_train.npz + + # precalculate stats store as npz for CIFAR 10 + python is_fid_pytorch.py --save-stats-path res/stats_pytorch/fid_stats_cifar10_train.npz + + # precalculate stats store as npz for foldername/ + python is_fid_pytorch.py --path foldername/ --save-stats-path res/stats_pytorch/fid_stats_folder.npz + + # use it in code: + * `mode=1`: image tensor passed in is already normalized by `mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]` + * `mode=2`: image tensor passed in is already normalized by `mean=[0.500, 0.500, 0.500], std=[0.500, 0.500, 0.500]` + + ``` + from metrics import is_fid_pytorch + + # using precalculated stats (.npz) for FID calculation + is_fid_model = is_fid_pytorch.ScoreModel(mode=2, stats_file='res/stats_pytorch/fid_stats_cifar10_train.npz', cuda=cuda) + imgs_nchw = torch.Tensor(50000, C, H, W) # torch.Tensor in -1~1, normalized by mean=[0.500, 0.500, 0.500], std=[0.500, 0.500, 0.500] + is_mean, is_std, fid = is_fid_model.get_score_image_tensor(imgs_nchw) + + # we can also pass in mu, sigma for get_score_image_tensor() + is_fid_model = is_fid_pytorch.ScoreModel(mode=2, cuda=cuda) + mu, sigma = is_fid_pytorch.read_stats_file('res/stats_pytorch/fid_stats_cifar10_train.npz') + is_mean, is_std, fid = is_fid_model.get_score_image_tensor(imgs_nchw, mu1=mu, sigma1=sigma) + + # if no need FID + is_fid_model = is_fid_pytorch.ScoreModel(mode=2, cuda=cuda) + is_mean, is_std, _ = is_fid_model.get_score_image_tensor(imgs_nchw) + + # if want stats (mu, sigma) for imgs_nchw, send in return_stats=True + is_mean, is_std, _, mu, sigma = is_fid_model.get_score_image_tensor(imgs_nchw, return_stats=True) + + # from pytorch dataset, use get_score_dataset(), instead of get_score_image_tensor(), other usage is the same + cifar = dset.CIFAR10(root='../data/cifar10', download=True, + transform=transforms.Compose([ + transforms.Resize(32), + transforms.ToTensor(), + transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) + ]) + ) + IgnoreLabelDataset(cifar) + is_mean, is_std, _ = is_fid_model.get_score_dataset(IgnoreLabelDataset(cifar)) + ``` + +@Note: + This Pytorch version is only for getting an overview + on pytorch=1.0.1, torchvision=0.2.2, n_split=10 + + result for CIFAR10 train data: + IS = 9.672782245310362, 0.14991606199684318 + + if we want to report value in papers, + we should use the official tensorflow implementation: + [openai/improved-gan](https://github.com/openai/improved-gan) + [bioinf-jku/TTUR](https://github.com/bioinf-jku/TTUR) + + see paperwithcode: + * https://paperswithcode.com/sota/image-generation-generative-models-of-ci + * https://paperswithcode.com/task/conditional-image-generation +""" + +import torch +import torch.nn as nn +from torch.autograd import Variable +from torch.nn import functional as F +import torch.utils.data +from torchvision.models.inception import inception_v3 + +from scipy.stats import entropy +import scipy.misc +from scipy import linalg +import numpy as np +from tqdm import tqdm +from glob import glob +import pathlib +import os +import sys +import random + +CUR_DIRNAME = os.path.dirname(os.path.abspath(__file__)) + + +def read_stats_file(filepath): + """read mu, sigma from .npz""" + if filepath.endswith('.npz'): + f = np.load(filepath) + m, s = f['mu'][:], f['sigma'][:] + f.close() + else: + raise Exception('ERROR! pls pass in correct npz file %s' % filepath) + return m, s + + +def calculate_frechet_distance(mu1, sigma1, mu2, sigma2, eps=1e-6): + """Numpy implementation of the Frechet Distance. + The Frechet distance between two multivariate Gaussians X_1 ~ N(mu_1, C_1) + and X_2 ~ N(mu_2, C_2) is + d^2 = ||mu_1 - mu_2||^2 + Tr(C_1 + C_2 - 2*sqrt(C_1*C_2)). + Stable version by Dougal J. Sutherland. + Params: + -- mu1 : Numpy array containing the activations of a layer of the + inception net (like returned by the function 'get_predictions') + for generated samples. + -- mu2 : The sample mean over activations, precalculated on an + representative data set. + -- sigma1: The covariance matrix over activations for generated samples. + -- sigma2: The covariance matrix over activations, precalculated on an + representative data set. + Returns: + -- : The Frechet Distance. + """ + + mu1 = np.atleast_1d(mu1) + mu2 = np.atleast_1d(mu2) + + sigma1 = np.atleast_2d(sigma1) + sigma2 = np.atleast_2d(sigma2) + + assert mu1.shape == mu2.shape, \ + 'Training and test mean vectors have different lengths %s, %s' % (mu1.shape, mu2.shape) + assert sigma1.shape == sigma2.shape, \ + 'Training and test covariances have different dimensions %s, %s' % (sigma1.shape, sigma2.shape) + diff = mu1 - mu2 + # Product might be almost singular + covmean, _ = linalg.sqrtm(sigma1.dot(sigma2), disp=False) + if not np.isfinite(covmean).all(): + msg = ('fid calculation produces singular product; ' + 'adding %s to diagonal of cov estimates') % eps + print(msg) + offset = np.eye(sigma1.shape[0]) * eps + covmean = linalg.sqrtm((sigma1 + offset).dot(sigma2 + offset)) + # Numerical error might give slight imaginary component + if np.iscomplexobj(covmean): + if not np.allclose(np.diagonal(covmean).imag, 0, atol=1e-3): + m = np.max(np.abs(covmean.imag)) + raise ValueError('Imaginary component {}'.format(m)) + covmean = covmean.real + tr_covmean = np.trace(covmean) + return diff.dot(diff) + np.trace(sigma1) + np.trace(sigma2) - 2 * tr_covmean + + +class ScoreModel: + def __init__(self, mode, cuda=True, + stats_file='', mu1=0, sigma1=0): + """ + Computes the inception score of the generated images + cuda -- whether or not to run on GPU + mode -- image passed in inceptionV3 is normalized by mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5] + and in range of [-1, 1] + 1: image passed in is normalized by mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] + 2: image passed in is normalized by mean=[0.500, 0.500, 0.500], std=[0.500, 0.500, 0.500] + """ + # load mu, sigma for calc FID + self.calc_fid = False + if stats_file: + self.calc_fid = True + self.mu1, self.sigma1 = read_stats_file(stats_file) + elif type(mu1) == type(sigma1) == np.ndarray: + self.calc_fid = True + self.mu1, self.sigma1 = mu1, sigma1 + + # Set up dtype + if cuda: + self.dtype = torch.cuda.FloatTensor + else: + if torch.cuda.is_available(): + print("WARNING: You have a CUDA device, so you should probably set cuda=True") + self.dtype = torch.FloatTensor + + # setup image normalization mode + self.mode = mode + if self.mode == 1: + transform_input = True + elif self.mode == 2: + transform_input = False + else: + raise Exception("ERR: unknown input img type, pls specify norm method!") + self.inception_model = inception_v3(pretrained=True, transform_input=transform_input).type(self.dtype) + self.inception_model.eval() + # self.up = nn.Upsample(size=(299, 299), mode='bilinear', align_corners=False).type(self.dtype) + + # remove inception_model.fc to get pool3 output 2048 dim vector + self.fc = self.inception_model.fc + self.inception_model.fc = nn.Sequential() + + # wrap with nn.DataParallel + self.inception_model = nn.DataParallel(self.inception_model) + self.fc = nn.DataParallel(self.fc) + + def __forward(self, x): + """ + x should be N x 3 x 299 x 299 + and should be in range [-1, 1] + """ + x = F.interpolate(x, size=(299, 299), mode='bilinear', align_corners=False) + x = self.inception_model(x) + pool3_ft = x.data.cpu().numpy() + + x = self.fc(x) + preds = F.softmax(x, 1).data.cpu().numpy() + return pool3_ft, preds + + @staticmethod + def __calc_is(preds, n_split, return_each_score=False): + """ + regularly, return (is_mean, is_std) + if n_split==1 and return_each_score==True: + return (scores, 0) + # scores is a list with len(scores) = n_img = preds.shape[0] + """ + + n_img = preds.shape[0] + # Now compute the mean kl-div + split_scores = [] + for k in range(n_split): + part = preds[k * (n_img // n_split): (k + 1) * (n_img // n_split), :] + py = np.mean(part, axis=0) + scores = [] + for i in range(part.shape[0]): + pyx = part[i, :] + scores.append(entropy(pyx, py)) + split_scores.append(np.exp(np.mean(scores))) + if n_split == 1 and return_each_score: + return scores, 0 + return np.mean(split_scores), np.std(split_scores) + + @staticmethod + def __calc_stats(pool3_ft): + mu = np.mean(pool3_ft, axis=0) + sigma = np.cov(pool3_ft, rowvar=False) + return mu, sigma + + def get_score_image_tensor(self, imgs_nchw, mu1=0, sigma1=0, + n_split=10, batch_size=32, return_stats=False, + return_each_score=False): + """ + param: + imgs_nchw -- Pytorch Tensor, size=(N,C,H,W), in range of [-1, 1] + batch_size -- batch size for feeding into Inception v3 + n_splits -- number of splits + return: + is_mean, is_std, fid + mu, sigma of dataset + + regularly, return (is_mean, is_std) + if n_split==1 and return_each_score==True: + return (scores, 0) + # scores is a list with len(scores) = n_img = preds.shape[0] + """ + + n_img = imgs_nchw.shape[0] + + assert batch_size > 0 + assert n_img > batch_size + + pool3_ft = np.zeros((n_img, 2048)) + preds = np.zeros((n_img, 1000)) + for i in tqdm(range(np.int32(np.ceil(1.0 * n_img / batch_size)))): + batch_size_i = min((i+1) * batch_size, n_img) - i * batch_size + batchv = Variable(imgs_nchw[i * batch_size:i * batch_size + batch_size_i, ...].type(self.dtype)) + pool3_ft[i * batch_size:i * batch_size + batch_size_i], preds[i * batch_size:i * batch_size + batch_size_i] = self.__forward(batchv) + + # if want to return stats + # or want to calc fid + if return_stats or \ + type(mu1) == type(sigma1) == np.ndarray or self.calc_fid: + mu2, sigma2 = self.__calc_stats(pool3_ft) + + if self.calc_fid: + mu1 = self.mu1 + sigma1 = self.sigma1 + + is_mean, is_std = self.__calc_is(preds, n_split, return_each_score) + + fid = -1 + if type(mu1) == type(sigma1) == np.ndarray or self.calc_fid: + fid = calculate_frechet_distance(mu1, sigma1, mu2, sigma2) + + if return_stats: + return is_mean, is_std, fid, mu2, sigma2 + else: + return is_mean, is_std, fid + + def get_score_dataset(self, dataset, mu1=0, sigma1=0, + n_split=10, batch_size=32, return_stats=False, + return_each_score=False): + """ + get score from a dataset + param: + dataset -- pytorch dataset, img in range of [-1, 1] + batch_size -- batch size for feeding into Inception v3 + n_splits -- number of splits + return: + is_mean, is_std, fid + mu, sigma of dataset + + regularly, return (is_mean, is_std) + if n_split==1 and return_each_score==True: + return (scores, 0) + # scores is a list with len(scores) = n_img = preds.shape[0] + """ + + n_img = len(dataset) + dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size) + + pool3_ft = np.zeros((n_img, 2048)) + preds = np.zeros((n_img, 1000)) + for i, batch in tqdm(enumerate(dataloader, 0)): + batch = batch.type(self.dtype) + batchv = Variable(batch) + batch_size_i = batch.size()[0] + pool3_ft[i * batch_size:i * batch_size + batch_size_i], preds[i * batch_size:i * batch_size + batch_size_i] = self.__forward(batchv) + + # if want to return stats + # or want to calc fid + if return_stats or \ + type(mu1) == type(sigma1) == np.ndarray or self.calc_fid: + mu2, sigma2 = self.__calc_stats(pool3_ft) + + if self.calc_fid: + mu1 = self.mu1 + sigma1 = self.sigma1 + + is_mean, is_std = self.__calc_is(preds, n_split, return_each_score) + + fid = -1 + if type(mu1) == type(sigma1) == np.ndarray or self.calc_fid: + fid = calculate_frechet_distance(mu1, sigma1, mu2, sigma2) + + if return_stats: + return is_mean, is_std, fid, mu2, sigma2 + else: + return is_mean, is_std, fid + + +if __name__ == '__main__': + + from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter + parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter) + parser.add_argument('--path', type=str, default='', help='Path to the generated images or to .npz statistic files') + parser.add_argument('--fid', type=str, default='', help='Path to the generated images or to .npz statistic files') + parser.add_argument('--save-stats-path', type=str, default='', help='Path to save .npz statistic files') + args = parser.parse_args() + + # read folder, return torch Tensor in NCHW, normalized + def read_folder(foldername): + files = [] + for ext in ('*.png', '*.jpg', '*.jpeg', '*.bmp'): + files.extend(glob(os.path.join(foldername, ext))) + + img_list = [] + print('Reading Images from %s ...' % foldername) + for file in tqdm(files): + img = scipy.misc.imread(file, mode='RGB') + img = scipy.misc.imresize(img, (299, 299), interp='bilinear') + img = np.cast[np.float32]((-128 + img) / 128.) # 0~255 -> -1~1 + img = np.expand_dims(img, axis=0).transpose(0, 3, 1, 2) # NHWC -> NCHW + img_list.append(img) + random.shuffle(img_list) + img_list_tensor = torch.Tensor(np.concatenate(img_list, axis=0)) + return img_list_tensor + + # if no args.path, calc cifar10 train IS score + if not args.path: + class IgnoreLabelDataset(torch.utils.data.Dataset): + def __init__(self, orig): + self.orig = orig + + def __getitem__(self, index): + return self.orig[index][0] + + def __len__(self): + return len(self.orig) + + import torchvision.datasets as dset + import torchvision.transforms as transforms + + cifar = dset.CIFAR10(root='%s/../data/cifar10' % CUR_DIRNAME, download=True, + transform=transforms.Compose([ + transforms.Resize(32), + transforms.ToTensor(), + transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) + ]) + ) + IgnoreLabelDataset(cifar) + + print ("Calculating IS score on CIFAR 10...") + is_fid_model = ScoreModel(mode=2, cuda=True) + # save calculated npz + if args.save_stats_path: + is_mean, is_std, _, mu, sigma = is_fid_model.get_score_dataset(IgnoreLabelDataset(cifar), + n_split=10, return_stats=True) + print(is_mean, is_std) + np.savez_compressed(args.save_stats_path, mu=mu, sigma=sigma) + print('Stats save to %s' % args.save_stats_path) + else: + is_mean, is_std, _ = is_fid_model.get_score_dataset(IgnoreLabelDataset(cifar), n_split=10) + print(is_mean, is_std) + + elif args.path.endswith('.npz') and args.fid.endswith('.npz'): + mu1, sigma1 = read_stats_file(args.path) + mu2, sigma2 = read_stats_file(args.fid) + fid = calculate_frechet_distance(mu1, sigma1, mu2, sigma2) + print('FID =', fid) + + # if argv have foldername/, calc IS score of pictures in this folder + elif args.path: + + if args.fid.endswith('.npz'): + is_fid_model = ScoreModel(mode=2, stats_file=args.fid, cuda=True) + img_list_tensor = read_folder(args.path) + is_mean, is_std, fid = is_fid_model.get_score_image_tensor(img_list_tensor, n_split=10) + print(is_mean, is_std) + print('FID =', fid) + + # args.fid == a foldername/ + elif args.fid: + is_fid_model = ScoreModel(mode=2, cuda=True) + + img_list_tensor1 = read_folder(args.path) + img_list_tensor2 = read_folder(args.fid) + + print('Calculating 1st stat ...') + is_mean1, is_std1, _, mu1, sigma1 = \ + is_fid_model.get_score_image_tensor(img_list_tensor1, n_split=10, return_stats=True) + + print('Calculating 2nd stat ...') + is_mean2, is_std2, fid = is_fid_model.get_score_image_tensor(img_list_tensor2, + mu1=mu1, sigma1=sigma1, + n_split=10) + + print('1st IS score =', is_mean1, ',', is_std1) + print('2nd IS score =', is_mean2, ',', is_std2) + print('FID =', fid) + + # no args.fid + else: + is_fid_model = ScoreModel(mode=2, cuda=True) + img_list_tensor = read_folder(args.path) + + # save calculated npz + if args.save_stats_path: + is_mean, is_std, _, mu, sigma = is_fid_model.get_score_image_tensor(img_list_tensor, + n_split=10, return_stats=True) + print(is_mean, is_std) + np.savez_compressed(args.save_stats_path, mu=mu, sigma=sigma) + print('Stats save to %s' % args.save_stats_path) + else: + is_mean, is_std, _ = is_fid_model.get_score_image_tensor(img_list_tensor, n_split=10) + print(is_mean, is_std) diff --git a/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/metrics/precalc_stats_official_tf.py b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/metrics/precalc_stats_official_tf.py new file mode 100644 index 0000000000000000000000000000000000000000..ba306691fcd4c92e07f30b6d11db3f791b463e91 --- /dev/null +++ b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/metrics/precalc_stats_official_tf.py @@ -0,0 +1,90 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +@Brief: + calc stats for a foldername/ + modified from official inception score implementation + [bioinf-jku/TTUR](https://github.com/bioinf-jku/TTUR) +@Author: lzhbrian (https://lzhbrian.me) +@Date: 2019.4.7 +@Usage: + python precalc_stats_official_tf.py foldername/ output_path/ + python precalc_stats_official_tf.py /data4/linziheng/datasets/imagenet/valid_64x64/ imagenet_valid_stats_test.npz +""" + +import sys +import os +from glob import glob +import numpy as np +import fid_official_tf +from scipy.misc import imread +import tensorflow as tf + +######## +# PATHS +######## +# data_path = 'data' # set path to training set images +# output_path = 'fid_stats.npz' # path for where to store the statistics + +data_path = sys.argv[1] +output_path = sys.argv[2] + +# if you have downloaded and extracted +# http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz +# set this path to the directory where the extracted files are, otherwise +# just set it to None and the script will later download the files for you + +cur_dirname = os.path.dirname(os.path.abspath(__file__)) +MODEL_DIR = '%s/res/' % cur_dirname + +inception_path = '%s/' % MODEL_DIR +print("check for inception model..") +inception_path = fid_official_tf.check_or_download_inception(inception_path) # download inception if necessary +print("ok") + +# loads all images into memory (this might require a lot of RAM!) +print("load images..") +image_list = [] +for ext in ('*.png', '*.jpg', '*.jpeg', '.bmp'): + image_list.extend(glob(os.path.join(data_path, ext))) + +images = np.array([imread(str(fn)).astype(np.float32) for fn in image_list]) +print("%d images found and loaded" % len(images)) + + +print("create inception graph..") +fid_official_tf.create_inception_graph(inception_path) # load the graph into the current TF graph +print("ok") + + +print("calculate FID stats..") +with tf.Session() as sess: + sess.run(tf.global_variables_initializer()) + mu, sigma = fid_official_tf.calculate_activation_statistics(images, sess, batch_size=100) + np.savez_compressed(output_path, mu=mu, sigma=sigma) +print("finished") diff --git a/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/metrics/res/README.md b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/metrics/res/README.md new file mode 100644 index 0000000000000000000000000000000000000000..5cca112630d5dc8f78b9cfac9208219c840bb6e5 --- /dev/null +++ b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/metrics/res/README.md @@ -0,0 +1,14 @@ +# res/ + +* in this [res/](./) dir, should contain: + * inception pretrained weights `inception-2015-12-05.tgz` from [link](http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz) (for using the TF implementation) + +* in [res/stats_tf/](stats_tf/) dir, should contain: + * precalculated statistics for datasets from [link](http://bioinf.jku.at/research/ttur/) + * [Cropped CelebA](http://bioinf.jku.at/research/ttur/ttur_stats/fid_stats_celeba.npz), [LSUN bedroom](http://bioinf.jku.at/research/ttur/ttur_stats/fid_stats_lsun_train.npz), [CIFAR 10](http://bioinf.jku.at/research/ttur/ttur_stats/fid_stats_cifar10_train.npz), [SVHN](http://bioinf.jku.at/research/ttur/ttur_stats/fid_stats_svhn_train.npz), [ImageNet Train](http://bioinf.jku.at/research/ttur/ttur_stats/fid_stats_imagenet_train.npz), [ImageNet Valid](http://bioinf.jku.at/research/ttur/ttur_stats/fid_stats_imagenet_valid.npz) + +* in [res/stats_pytorch](stats_pytorch/) dir + * store precalculated stats using [is_fid_pytorch.py](../is_fid_pytorch.py) + + + diff --git a/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/modelzoo_level.txt b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/modelzoo_level.txt new file mode 100644 index 0000000000000000000000000000000000000000..aac47fa5aa57194dbb4cb1d825da033987898f41 --- /dev/null +++ b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/modelzoo_level.txt @@ -0,0 +1,6 @@ +GPUStatus:OK +NPUMigrationStatus:OK +FuncStatus:OK +PrecisionStatus:OK +AutoTune:NOK +PerfStatus:POK \ No newline at end of file diff --git a/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/networks_128.py b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/networks_128.py new file mode 100644 index 0000000000000000000000000000000000000000..a7cf5f85a5b3c4e016586ded9e9446ff58a4b3d9 --- /dev/null +++ b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/networks_128.py @@ -0,0 +1,113 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from ops import * + + +class Generator: + def __init__(self, name, base_channel): + self.name = name + self.base_channel = base_channel + + def __call__(self, inputs, train_phase, y, embed_w, nums_class, y_end=None, alpha=1.0): + """ + Args: + inputs: a noise vector. shape: [batch_size, z_dim] + train_phase: is training or not + y: class info + embed_w: weight for shared embedding + nums_class: number of image classes + """ + # hierarchical latent space: split z into one chunk per resolution + z_dim = int(inputs.shape[-1]) + nums_layer = 6 + remain = z_dim % nums_layer + chunk_size = (z_dim - remain) // nums_layer + z_split = tf.split(inputs, [chunk_size] * (nums_layer - 1) + [chunk_size + remain], axis=1) + y = tf.one_hot(y, nums_class) + + if not y_end is None: + # category morphing + y_end = tf.one_hot(y_end, nums_class) + y = y * alpha + y_end * (1 - alpha) + + embed_y = tf.matmul(y, embed_w) # shared embedding + inputs = tf.concat([z_split[0], embed_y], axis=1) + + ch = self.base_channel # base channel number per layer + out_channels = [ch * i for i in [16, 8, 4, 2, 1]] + + with tf.variable_scope(name_or_scope=self.name, reuse=tf.AUTO_REUSE): + inputs = dense("dense", inputs, 4*4*out_channels[0]) + inputs = tf.reshape(inputs, [-1, 4, 4, out_channels[0]]) + inputs = G_Resblock("ResBlock1", inputs, out_channels[0], train_phase, z_split[1], embed_y, nums_class) + inputs = G_Resblock("ResBlock2", inputs, out_channels[1], train_phase, z_split[2], embed_y, nums_class) + inputs = G_Resblock("ResBlock3", inputs, out_channels[2], train_phase, z_split[3], embed_y, nums_class) + inputs = G_Resblock("ResBlock4", inputs, out_channels[3], train_phase, z_split[4], embed_y, nums_class) + inputs = non_local("Non-local", inputs, None, is_sn=True) + inputs = G_Resblock("ResBlock5", inputs, out_channels[4], train_phase, z_split[5], embed_y, nums_class) + inputs = tf.nn.relu(conditional_batchnorm(inputs, train_phase, "BN")) # batch normalization + inputs = conv("conv", inputs, k_size=3, nums_out=3, strides=1, is_sn=True) + return tf.nn.tanh(inputs) + + def var_list(self): + return tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, self.name) + + +class Discriminator: + def __init__(self, name, base_channel): + self.name = name + self.base_channel = base_channel + + def __call__(self, inputs, train_phase, y, nums_class, update_collection=None): + """ + Args: + inputs: an image. shape: [batch_size, 128, 128, 3] + y: class info (scalar) + nums_class: number of image classes + """ + ch = self.base_channel # base channel number per layer + out_channels = [ch * i for i in [1, 2, 4, 8, 16, 16]] + + with tf.variable_scope(name_or_scope=self.name, reuse=tf.AUTO_REUSE): + inputs = D_Resblock("ResBlock1", inputs, out_channels[0], train_phase, update_collection, is_down=True) # [N, 64, 64, ch] + inputs = non_local("Non-local", inputs, update_collection, True) + inputs = D_Resblock("ResBlock2", inputs, out_channels[1], train_phase, update_collection, is_down=True) # [N, 32, 32, 2*ch] + inputs = D_Resblock("ResBlock3", inputs, out_channels[2], train_phase, update_collection, is_down=True) # [N, 16, 16, 4*ch] + inputs = D_Resblock("ResBlock4", inputs, out_channels[3], train_phase, update_collection, is_down=True) # [N, 8, 8, 8*ch] + inputs = D_Resblock("ResBlock5", inputs, out_channels[4], train_phase, update_collection, is_down=True) # [N, 4, 4, 16*ch] + inputs = D_Resblock("ResBlock6", inputs, out_channels[5], train_phase, update_collection, is_down=False) + inputs = tf.nn.relu(inputs) + inputs = global_sum_pooling(inputs) # [N, 16*ch] + temp = d_projection(inputs, y, nums_class, update_collection) # [N, 1] + inputs = dense("dense", inputs, 1, update_collection, is_sn=True) # [N, 1] + inputs = temp + inputs + return inputs + + def var_list(self): + return tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, self.name) + diff --git a/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/networks_32.py b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/networks_32.py new file mode 100644 index 0000000000000000000000000000000000000000..04009bfac65db09a4da4c670450b64930f88926c --- /dev/null +++ b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/networks_32.py @@ -0,0 +1,109 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from ops import * + + +class Generator: + def __init__(self, name, base_channel): + self.name = name + self.base_channel = base_channel + + def __call__(self, inputs, train_phase, y, embed_w, nums_class, y_end=None, alpha=1.0): + """ + Args: + inputs: a noise vector. shape: [batch_size, z_dim] + train_phase: is training or not + y: class info + embed_w: weight for shared embedding + nums_class: number of image classes + """ + # hierarchical latent space: split z into one chunk per resolution + z_dim = int(inputs.shape[-1]) + nums_layer = 4 + remain = z_dim % nums_layer + chunk_size = (z_dim - remain) // nums_layer + z_split = tf.split(inputs, [chunk_size] * (nums_layer - 1) + [chunk_size + remain], axis=1) + y = tf.one_hot(y, nums_class) + + if not y_end is None: + # category morphing + y_end = tf.one_hot(y_end, nums_class) + y = y * alpha + y_end * (1 - alpha) + + embed_y = tf.matmul(y, embed_w) # shared embedding + inputs = tf.concat([z_split[0], embed_y], axis=1) + + ch = self.base_channel # base channel number per layer + out_channels = [ch * i for i in [4, 4, 4]] + + with tf.variable_scope(name_or_scope=self.name, reuse=tf.AUTO_REUSE): + inputs = dense("dense", inputs, 4*4*out_channels[0]) + inputs = tf.reshape(inputs, [-1, 4, 4, out_channels[0]]) # [N, 4, 4, out_channels[0]] + inputs = G_Resblock("ResBlock1", inputs, out_channels[0], train_phase, z_split[1], embed_y, nums_class) # [N, 8, 8, out_channels[0]] + inputs = G_Resblock("ResBlock2", inputs, out_channels[1], train_phase, z_split[2], embed_y, nums_class) # [N, 16, 16, out_channels[1]] + inputs = non_local("Non-local", inputs, None, is_sn=True) + inputs = G_Resblock("ResBlock3", inputs, out_channels[2], train_phase, z_split[3], embed_y, nums_class) # [N, 32, 32, out_channels[2]] + inputs = tf.nn.relu(conditional_batchnorm(inputs, train_phase, "BN")) # batch normalization + inputs = conv("conv", inputs, k_size=3, nums_out=3, strides=1, is_sn=True) + return tf.nn.tanh(inputs) + + def var_list(self): + return tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, self.name) + + +class Discriminator: + def __init__(self, name, base_channel): + self.name = name + self.base_channel = base_channel + + def __call__(self, inputs, train_phase, y, nums_class, update_collection=None): + """ + Args: + inputs: an image. shape: [batch_size, 32, 32, 3] + y: class info (scalar) + nums_class: number of image classes + """ + ch = self.base_channel # base channel number per layer + out_channels = [ch * i for i in [4, 4, 4, 4]] + + with tf.variable_scope(name_or_scope=self.name, reuse=tf.AUTO_REUSE): + inputs = D_Resblock("ResBlock1", inputs, out_channels[0], train_phase, update_collection, is_down=True) + inputs = non_local("Non-local", inputs, update_collection, True) + inputs = D_Resblock("ResBlock2", inputs, out_channels[1], train_phase, update_collection, is_down=True) + inputs = D_Resblock("ResBlock3", inputs, out_channels[2], train_phase, update_collection, is_down=False) + inputs = D_Resblock("ResBlock4", inputs, out_channels[3], train_phase, update_collection, is_down=False) + inputs = tf.nn.relu(inputs) + inputs = global_sum_pooling(inputs) # [N, ch] + temp = d_projection(inputs, y, nums_class, update_collection) # [N, 1] + inputs = dense("dense", inputs, 1, update_collection, is_sn=True) # [N, 1] + inputs = temp + inputs + return inputs + + def var_list(self): + return tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, self.name) + diff --git a/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/networks_64.py b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/networks_64.py new file mode 100644 index 0000000000000000000000000000000000000000..e5e69aa908a8b1b6e02a619ae190c79c76025d9a --- /dev/null +++ b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/networks_64.py @@ -0,0 +1,111 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from ops import * + + +class Generator: + def __init__(self, name, base_channel): + self.name = name + self.base_channel = base_channel + + def __call__(self, inputs, train_phase, y, embed_w, nums_class, y_end=None, alpha=1.0): + """ + Args: + inputs: a noise vector. shape: [batch_size, z_dim] + train_phase: is training or not + y: class info + embed_w: weight for shared embedding + nums_class: number of image classes + """ + # hierarchical latent space: split z into one chunk per resolution + z_dim = int(inputs.shape[-1]) + nums_layer = 5 + remain = z_dim % nums_layer + chunk_size = (z_dim - remain) // nums_layer + z_split = tf.split(inputs, [chunk_size] * (nums_layer - 1) + [chunk_size + remain], axis=1) + y = tf.one_hot(y, nums_class) + + if not y_end is None: + # category morphing + y_end = tf.one_hot(y_end, nums_class) + y = y * alpha + y_end * (1 - alpha) + + embed_y = tf.matmul(y, embed_w) # shared embedding + inputs = tf.concat([z_split[0], embed_y], axis=1) + + ch = self.base_channel # base channel number per layer + out_channels = [ch * i for i in [16, 8, 4, 2]] + + with tf.variable_scope(name_or_scope=self.name, reuse=tf.AUTO_REUSE): + inputs = dense("dense", inputs, 4*4*out_channels[0]) + inputs = tf.reshape(inputs, [-1, 4, 4, out_channels[0]]) + inputs = G_Resblock("ResBlock1", inputs, out_channels[0], train_phase, z_split[1], embed_y, nums_class) + inputs = G_Resblock("ResBlock2", inputs, out_channels[1], train_phase, z_split[2], embed_y, nums_class) + inputs = G_Resblock("ResBlock3", inputs, out_channels[2], train_phase, z_split[3], embed_y, nums_class) + inputs = non_local("Non-local", inputs, None, is_sn=True) + inputs = G_Resblock("ResBlock4", inputs, out_channels[3], train_phase, z_split[4], embed_y, nums_class) + inputs = tf.nn.relu(conditional_batchnorm(inputs, train_phase, "BN")) # batch normalization + inputs = conv("conv", inputs, k_size=3, nums_out=3, strides=1, is_sn=True) + return tf.nn.tanh(inputs) + + def var_list(self): + return tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, self.name) + + +class Discriminator: + def __init__(self, name, base_channel): + self.name = name + self.base_channel = base_channel + + def __call__(self, inputs, train_phase, y, nums_class, update_collection=None): + """ + Args: + inputs: an image. shape: [batch_size, 64, 64, 3] + y: class info (scalar) + nums_class: number of image classes + """ + ch = self.base_channel # base channel number per layer + out_channels = [ch * i for i in [1, 2, 4, 8, 16]] + + with tf.variable_scope(name_or_scope=self.name, reuse=tf.AUTO_REUSE): + inputs = D_Resblock("ResBlock1", inputs, out_channels[0], train_phase, update_collection, is_down=True) + inputs = non_local("Non-local", inputs, update_collection, True) + inputs = D_Resblock("ResBlock2", inputs, out_channels[1], train_phase, update_collection, is_down=True) + inputs = D_Resblock("ResBlock3", inputs, out_channels[2], train_phase, update_collection, is_down=True) + inputs = D_Resblock("ResBlock4", inputs, out_channels[3], train_phase, update_collection, is_down=True) + inputs = D_Resblock("ResBlock5", inputs, out_channels[4], train_phase, update_collection, is_down=False) + inputs = tf.nn.relu(inputs) + inputs = global_sum_pooling(inputs) + temp = d_projection(inputs, y, nums_class, update_collection) # [N, 1] + inputs = dense("dense", inputs, 1, update_collection, is_sn=True) # [N, 1] + inputs = temp + inputs + return inputs + + def var_list(self): + return tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, self.name) + diff --git a/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/ops.py b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/ops.py new file mode 100644 index 0000000000000000000000000000000000000000..bd69dfcd2d82d7743142afd9724b269b1c23a747 --- /dev/null +++ b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/ops.py @@ -0,0 +1,303 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import tensorflow as tf + + +def spectral_normalization(name, weight, n_itr=1, update_collection=None): + """ + Args: + weight: shape -> fc: [in_dim, out_dim] + conv: [h, w, c_in, c_out] + """ + w_shape = weight.shape.as_list() + weight = tf.reshape(weight, [-1, w_shape[-1]]) # treat conv weight as a 2-D matrix: [h*w*c_in, c_out] + + # power iteration method + u = tf.get_variable(name + 'u', [1, w_shape[-1]], initializer=tf.truncated_normal_initializer(), + trainable=False) + u_hat = u # right singular vector + v_hat = None # left singular vector + # Because the weights change slowly, we only need to perform a single power iteration + # on the current version of these vectors for each step of learning + for _ in range(n_itr): + v_hat = tf.nn.l2_normalize(tf.matmul(u_hat, tf.transpose(weight))) + u_hat = tf.nn.l2_normalize(tf.matmul(v_hat, weight)) + + # spectral normalization + sigma = tf.squeeze(tf.matmul(tf.matmul(v_hat, weight), tf.transpose(u_hat))) + weight /= sigma + + if update_collection is None: + with tf.control_dependencies([u.assign(u_hat)]): + w_norm = tf.reshape(weight, w_shape) # get original shape + else: + w_norm = tf.reshape(weight, w_shape) + if update_collection != 'NO_OPS': + tf.add_to_collection(update_collection, u.assign(u_hat)) + + return w_norm + + +def conv(name, inputs, nums_out, k_size, strides, update_collection=None, is_sn=False): + """convolution layer (with spectral normalization)""" + nums_in = inputs.shape[-1] # num of input channels + with tf.variable_scope(name): + w = tf.get_variable("w", [k_size, k_size, nums_in, nums_out], initializer=tf.orthogonal_initializer()) + b = tf.get_variable("b", [nums_out], initializer=tf.constant_initializer([0.0])) + if is_sn: + w = spectral_normalization("sn", w, update_collection=update_collection) + op = tf.nn.conv2d(inputs, w, strides=[1, strides, strides, 1], padding="SAME") + return tf.nn.bias_add(op, b) + + +def dense(name, inputs, nums_out, update_collection=None, is_sn=False): + """fully connected layer (with spectral normalization)""" + nums_in = inputs.shape[-1] + with tf.variable_scope(name): + w = tf.get_variable("w", [nums_in, nums_out], initializer=tf.orthogonal_initializer()) + b = tf.get_variable("b", [nums_out], initializer=tf.constant_initializer([0.0])) + if is_sn: + w = spectral_normalization("sn", w, update_collection=update_collection) + return tf.nn.bias_add(tf.matmul(inputs, w), b) + + +def conditional_batchnorm(x, train_phase, name, split_z=None, embed_y=None): + """implementation of shared embedding and skip-z in the BigGAN paper + + Args: + split_z: vector -> one chunk of the noise vector "z" + embed_y: class info (shared embedding) + """ + with tf.variable_scope(name): + epsilon = 1e-5 # variance epsilon for batch norm + decay = 0.9 # decay rate for exponential moving average in batch norm + + if embed_y is None: + # batch normalization + beta = tf.get_variable(name=name + 'beta', shape=[x.shape[-1]], + initializer=tf.constant_initializer([0.]), trainable=True) + gamma = tf.get_variable(name=name + 'gamma', shape=[x.shape[-1]], + initializer=tf.constant_initializer([1.]), trainable=True) + else: + # conditional batch normalization + z = tf.concat([split_z, embed_y], axis=1) # get conditional vector + # use conditional vector to get batchNorm gains and biases + gamma = dense("gamma", z, x.shape[-1], is_sn=True) # scale + beta = dense("beta", z, x.shape[-1], is_sn=True) # offset + gamma = tf.reshape(gamma, [-1, 1, 1, x.shape[-1]]) + beta = tf.reshape(beta, [-1, 1, 1, x.shape[-1]]) + + # calculate batch mean and variance + batch_mean, batch_var = tf.nn.moments(x, [0, 1, 2], name='moments', keep_dims=True) + + ema = tf.train.ExponentialMovingAverage(decay=decay) + + def mean_var_with_update(): + ema_apply_op = ema.apply([batch_mean, batch_var]) + with tf.control_dependencies([ema_apply_op]): + return tf.identity(batch_mean), tf.identity(batch_var) + + mean, var = tf.cond(train_phase, mean_var_with_update, + lambda: (ema.average(batch_mean), ema.average(batch_var))) + normed = tf.nn.batch_normalization(x, mean, var, beta, gamma, epsilon) + return normed + + +def down_sampling(inputs): + """down-sampling: avg pool with zero-padding (out_size = in_size / 2) + """ + return tf.nn.avg_pool(inputs, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME") + + +def up_sampling(inputs): + """nearest-neighbors up-sampling (out_size = in_size * 2) + """ + h, w = inputs.shape[1], inputs.shape[2] + return tf.image.resize_nearest_neighbor(inputs, [h * 2, w * 2]) + + +def non_local(name, inputs, update_collection, is_sn): + """attention module + + This implementation is different from the bigGAN paper. Please check this paper: Non-local Neural Networks. + It also uses down sampling to reduce computation. + """ + h, w, num_channels = inputs.shape[1], inputs.shape[2], inputs.shape[3] + location_num = h * w + down_sampled_num = location_num // 4 # after down sampling, feature map shrinks to a quarter of its size + + with tf.variable_scope(name): + # theta: [h*w, c//8] + theta = conv("f", inputs, num_channels // 8, 1, 1, update_collection, is_sn) + theta = tf.reshape(theta, [-1, location_num, num_channels // 8]) + # phi: [d_h*d_w, c//8] + phi = conv("h", inputs, num_channels // 8, 1, 1, update_collection, is_sn) + phi = down_sampling(phi) + phi = tf.reshape(phi, [-1, down_sampled_num, num_channels // 8]) + # attention map: [h*w, d_h*d_w] + attn = tf.matmul(theta, phi, transpose_b=True) + attn = tf.nn.softmax(attn) + # g: [d_h*d_w, c//2] + g = conv("g", inputs, num_channels // 2, 1, 1, update_collection, is_sn) + g = down_sampling(g) + g = tf.reshape(g, [-1, down_sampled_num, num_channels // 2]) + # attn_g: [h*w, c//2] + attn_g = tf.matmul(attn, g) + attn_g = tf.reshape(attn_g, [-1, h, w, num_channels // 2]) + # attn_g: [h*w, c] + attn_g = conv("attn", attn_g, num_channels, 1, 1, update_collection, is_sn) + + sigma = tf.get_variable("sigma_ratio", [], initializer=tf.constant_initializer(0.0)) + return inputs + sigma * attn_g + + +def non_local_bigGAN(name, inputs, update_collection, is_sn): + """attention module + + This implementation follows the bigGAN paper. + """ + H = inputs.shape[1] + W = inputs.shape[2] + C = inputs.shape[3] + C_ = C // 8 + inputs_ = tf.transpose(inputs, perm=[0, 3, 1, 2]) + inputs_ = tf.reshape(inputs_, [-1, C, H * W]) + with tf.variable_scope(name): + f = conv("f", inputs, C_, 1, 1, update_collection, is_sn) # key + g = conv("g", inputs, C_, 1, 1, update_collection, is_sn) # query + h = conv("h", inputs, C, 1, 1, update_collection, is_sn) # value + f = tf.transpose(f, [0, 3, 1, 2]) + f = tf.reshape(f, [-1, C_, H * W]) + g = tf.transpose(g, [0, 3, 1, 2]) + g = tf.reshape(g, [-1, C_, H * W]) + h = tf.transpose(h, [0, 3, 1, 2]) + h = tf.reshape(h, [-1, C, H * W]) + # attention map + s = tf.matmul(f, g, transpose_a=True) + beta = tf.nn.softmax(s, dim=0) + o = tf.matmul(h, beta) + gamma = tf.get_variable("gamma", [], initializer=tf.constant_initializer(0.)) + y = gamma * o + inputs_ + y = tf.reshape(y, [-1, C, H, W]) + y = tf.transpose(y, perm=[0, 2, 3, 1]) + return y + + +def global_sum_pooling(inputs): + """global sum pooling + + Args: + inputs -> shape: [N, H, W, C] + + Returns: + shape: [N, C] + """ + return tf.reduce_sum(inputs, axis=[1, 2], keep_dims=False) + + +def Hinge_loss(real_logits, fake_logits): + d_loss = -tf.reduce_mean(tf.minimum(0., -1.0 + real_logits)) - tf.reduce_mean(tf.minimum(0., -1.0 - fake_logits)) + g_loss = -tf.reduce_mean(fake_logits) + return d_loss, g_loss + + +def ortho_reg(vars_list): + """apply orthogonal regularization to convolutional layers + """ + s = 0 + for var in vars_list: + if "w" in var.name and var.shape.__len__() == 4: + # w shape: [k_size, k_size, in_channels, out_channels] + nums_kernel = int(var.shape[-1]) + w = tf.transpose(var, perm=[3, 0, 1, 2]) # [out_channels, k_size, k_size, in_channels] + w = tf.reshape(w, [nums_kernel, -1]) # [out_channels, k_size*k_size*in_channels] + ones = tf.ones([nums_kernel, nums_kernel]) + eyes = tf.eye(nums_kernel, nums_kernel) + y = tf.matmul(w, w, transpose_b=True) * (ones - eyes) + s += tf.nn.l2_loss(y) + return s + + +def d_projection(global_pooled, y, nums_class, update_collection=None): + """paper: cGANs with Projection Discriminator + + Args: + global_pooled: hidden layer after global sum pooling. shape -> [N, C] + y: class info (a scalar, not one-hot encoding!) + nums_class: number of classes + """ + w = global_pooled.shape[-1] + v = tf.get_variable("v", [nums_class, w], initializer=tf.orthogonal_initializer()) + v = tf.transpose(v) + # V^T acts like a fully connected layer, so we need to perform spectral norm on V^T instead of V + v = spectral_normalization("embed", v, update_collection=update_collection) + v = tf.transpose(v) + # Embed(y); same as V^Ty (, assuming y is a one-hot vector) + temp = tf.nn.embedding_lookup(v, y) + # Embed(y) . h + temp = tf.reduce_sum(temp * global_pooled, axis=1, keep_dims=True) + return temp + + +def G_Resblock(name, inputs, nums_out, train_phase, split_z, embed_y, nums_class, is_up=True): + """A residual block in BigGAN's generator""" + with tf.variable_scope(name): + temp = tf.identity(inputs) + inputs = conditional_batchnorm(inputs, train_phase, "bn1", split_z, embed_y) + inputs = tf.nn.relu(inputs) + if is_up: + inputs = up_sampling(inputs) + inputs = conv("conv1", inputs, nums_out, 3, 1, is_sn=True) + inputs = conditional_batchnorm(inputs, train_phase, "bn2", split_z, embed_y) + inputs = tf.nn.relu(inputs) + inputs = conv("conv2", inputs, nums_out, 3, 1, is_sn=True) + # skip connection + if is_up: + temp = up_sampling(temp) + temp = conv("identity", temp, nums_out, 1, 1, is_sn=True) + return inputs + temp + + +def D_Resblock(name, inputs, nums_out, train_phase, update_collection=None, is_down=True): + """A residual block in BigGAN's discriminator""" + with tf.variable_scope(name): + temp = tf.identity(inputs) + # inputs = conditional_batchnorm(inputs, train_phase, "BN1") + inputs = tf.nn.relu(inputs) + inputs = conv("conv1", inputs, nums_out, 3, 1, update_collection, is_sn=True) + # inputs = conditional_batchnorm(inputs, train_phase, "BN2") + inputs = tf.nn.relu(inputs) + inputs = conv("conv2", inputs, nums_out, 3, 1, update_collection, is_sn=True) + if is_down: + inputs = down_sampling(inputs) + # skip connection + temp = conv("identity", temp, nums_out, 1, 1, update_collection, is_sn=True) + temp = down_sampling(temp) + else: + temp = conv("identity", temp, nums_out, 1, 1, update_collection, is_sn=True) + return inputs + temp diff --git a/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/pb_frozen.py b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/pb_frozen.py new file mode 100644 index 0000000000000000000000000000000000000000..5631a79494316398d26139c4dd8a3003a11921f6 --- /dev/null +++ b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/pb_frozen.py @@ -0,0 +1,139 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import tensorflow as tf +from tensorflow.python.tools import freeze_graph +from tensorflow.python.framework import graph_util +import os +import argparse + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + # data arguments + parser.add_argument("--gen_num", type=int, default=5000, help="number of generated images") + parser.add_argument("--output", type=str, default="../output", help="output path") + parser.add_argument("-b", "--batch_size", type=int, default=64, help="batch size") + parser.add_argument("-c", "--num_classes", type=int, default=10, help="number of classes") + parser.add_argument("--img_h", type=int, default=32, help="image height") + parser.add_argument("--img_w", type=int, default=32, help="image width") + parser.add_argument("--train_img_size", type=int, default=32, + help="image will be resized to this size when training") + # model arguments + parser.add_argument("--base_channel", type=int, default=96, help="base channel number for G and D") + parser.add_argument("--z_dim", type=int, default=120, help="latent space dimensionality") + parser.add_argument("--ema", type=bool, default=False, help="use exponential moving average for G") + parser.add_argument("--shared_dim", type=int, default=128, help="shared embedding dimensionality") + args = parser.parse_args() + + # use different architectures for different image sizes + if args.train_img_size == 128: + from networks_128 import Generator, Discriminator + elif args.train_img_size == 64: + from networks_64 import Generator, Discriminator + elif args.train_img_size == 32: + from networks_32 import Generator, Discriminator + + # model path + base_path = os.path.join(args.output, "model", str(args.train_img_size)) + model_path = os.path.join(base_path, "model.ckpt") + ema_model_path = os.path.join(base_path, "ema.ckpt") + ckpt_path = ema_model_path if args.ema else model_path + + # pb path + pb_path = os.path.join(args.output, "pb_model", str(args.train_img_size)) + graph_pb_path = os.path.join(pb_path, "tmp_model.pb") + model_pb_path = os.path.join(pb_path, "model.pb") + final_pb_path = os.path.join(pb_path, "final_model.pb") + + tf.reset_default_graph() + train_phase = tf.Variable(tf.constant(False, dtype=tf.bool), name="train_phase") + # train_phase = tf.placeholder(tf.bool) # is training or not + z = tf.placeholder(tf.float32, [None, args.z_dim], name="z") # latent vector + y = tf.placeholder(tf.int32, [None, 1], name="y") # class info + y = tf.reshape(y, [-1]) + + G = Generator("generator", args.base_channel) + with tf.variable_scope("generator", reuse=tf.AUTO_REUSE): + embed_w = tf.get_variable("embed_w", [args.num_classes, args.shared_dim], initializer=tf.orthogonal_initializer()) + + fake_img = G(z, train_phase, y, embed_w, args.num_classes) + output = tf.identity(fake_img, name="output") + + with tf.Session() as sess: + tf.train.write_graph(sess.graph_def, pb_path, "tmp_model.pb") + # freeze model + freeze_graph.freeze_graph( + input_graph=graph_pb_path, + input_saver='', + input_binary=False, + input_checkpoint=ckpt_path, + output_node_names="output", + restore_op_name='save/restore_all', + filename_tensor_name='save/Const:0', + output_graph=model_pb_path, + clear_devices=False, + initializer_nodes='') + + """ + see https://blog.csdn.net/u011765925/article/details/103038349 and + https://github.com/onnx/tensorflow-onnx/issues/77 + """ + tf.reset_default_graph() + with tf.gfile.FastGFile(model_pb_path, "rb") as f: + graph_def = tf.GraphDef() + graph_def.ParseFromString(f.read()) + + for node in graph_def.node: + if node.op == 'RefSwitch': + node.op = 'Switch' + for index in range(len(node.input)): + if 'moving_' in node.input[index]: + node.input[index] = node.input[index] + '/read' + elif node.op == 'AssignSub': + node.op = 'Sub' + if 'use_locking' in node.attr: + del node.attr['use_locking'] + elif node.op == 'Assign': + node.op = 'Identity' + if 'use_locking' in node.attr: + del node.attr['use_locking'] + if 'validate_shape' in node.attr: + del node.attr['validate_shape'] + if len(node.input) == 2: + # input0: ref: Should be from a Variable node. May be uninitialized. + # input1: value: The value to be assigned to the variable. + node.input[0] = node.input[1] + del node.input[1] + elif node.op == 'AssignAdd': + node.op = 'Add' + if 'use_locking' in node.attr: + del node.attr['use_locking'] + with tf.Session() as sess: + converted_graph_def = graph_util.convert_variables_to_constants(sess, graph_def, ['output']) + tf.train.write_graph(converted_graph_def, pb_path, "final_model.pb", as_text=False) + diff --git a/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/requirements.txt b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..4bdba62afb1db6af510bb2e5b435e5372037cfdd --- /dev/null +++ b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/requirements.txt @@ -0,0 +1,33 @@ +absl-py==0.11.0 +astor==0.8.1 +cached-property==1.5.2 +cycler==0.10.0 +gast==0.2.2 +google-pasta==0.2.0 +grpcio==1.35.0 +h5py==3.1.0 +imageio==2.16.2 +importlib-metadata==3.4.0 +Keras-Applications==1.0.8 +Keras-Preprocessing==1.1.2 +kiwisolver==1.3.1 +Markdown==3.3.3 +matplotlib==3.3.4 +numpy==1.20.0 +opencv-python==4.5.5.64 +opt-einsum==3.3.0 +Pillow==9.1.0 +protobuf==3.14.0 +pyparsing==2.4.7 +python-dateutil==2.8.1 +scipy==1.7.3 +six==1.15.0 +tensorboard==1.15.0 +tensorflow-estimator==1.15.1 +tensorflow-gpu==1.15.0 +termcolor==1.1.0 +tqdm==4.56.0 +typing-extensions==3.7.4.3 +Werkzeug==1.0.1 +wrapt==1.12.1 +zipp==3.4.0 diff --git a/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/scripts/run_1p.sh b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/scripts/run_1p.sh new file mode 100644 index 0000000000000000000000000000000000000000..f63debacb18ef4be2bc46d6172b4cf796a0c2824 --- /dev/null +++ b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/scripts/run_1p.sh @@ -0,0 +1,28 @@ +#!/bin/bash +### Do not need to Configure CANN Environment on Modelarts Platform, because it has been set already. +### Modelarts Platform command for train +export TF_CPP_MIN_LOG_LEVEL=2 ## Tensorflow api print Log Config +export ASCEND_SLOG_PRINT_TO_STDOUT=0 ## Print log on terminal on(1), off(0) + +code_dir=${1} +data_dir=${2} +result_dir=${3} +obs_url=${4} + +current_time=`date "+%Y-%m-%d-%H-%M-%S"` + +python3.7 ${code_dir}/train.py \ + --dataset=${data_dir} \ + --output=${result_dir} \ + --obs_dir=${obs_url} \ + --chip=npu \ + --platform=modelarts \ + --num_classes=10 \ + --img_h=32 \ + --img_w=32 \ + --train_img_size=32 \ + --train_itr=100000 \ + --batch_size=64 \ +# --use_fp16 \ +# --profiling \ +# --load_model \ diff --git a/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/scripts/run_cpu.sh b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/scripts/run_cpu.sh new file mode 100644 index 0000000000000000000000000000000000000000..ed67da96bb25c546085ac15ae44656cebabb0473 --- /dev/null +++ b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/scripts/run_cpu.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +python train.py \ + --dataset=../dataset \ + --output=../output \ + --chip=cpu \ + --platform=linux \ + --num_classes=10 \ + --img_h=32 \ + --img_w=32 \ + --train_img_size=32 \ + --batch_size=64 \ + --train_itr=100000 \ + # --load_model \ No newline at end of file diff --git a/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/scripts/run_gpu.sh b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/scripts/run_gpu.sh new file mode 100644 index 0000000000000000000000000000000000000000..db6a3eb47ac45d2fcd5fa93e28670214805bfa73 --- /dev/null +++ b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/scripts/run_gpu.sh @@ -0,0 +1,20 @@ +#!/bin/bash +#set env +### GPU Platform command for train +# export CUDA_VISIBLE_DEVICES=0 +# export LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:${LD_LIBRARY_PATH} + +current_time=`date "+%Y-%m-%d-%H-%M-%S"` + +python train.py \ + --dataset=../dataset \ + --output=../output \ + --chip=gpu \ + --platform=linux \ + --num_classes=10 \ + --img_h=32 \ + --img_w=32 \ + --train_img_size=32 \ + --batch_size=64 \ + --train_itr=100000 \ + # --load_model diff --git a/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/scripts/run_msprof.sh b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/scripts/run_msprof.sh new file mode 100644 index 0000000000000000000000000000000000000000..4081bca18b12b332813a4631e1c7f684c654bfa6 --- /dev/null +++ b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/scripts/run_msprof.sh @@ -0,0 +1,26 @@ +#!/bin/bash +set -e +### Before run this shell, make sure you have generated profiling data, and have installed CANN toolkit package +### refer to link: https://support.huaweicloud.com/Development-tg-cann202training1/atlasprofilingtrain_16_0015.html +### $1 is the absolute directory of profiling data. +### start commands sample: sh scripts/run_msprof.sh /home/npu_profiling + +PROFILING_DIR=$1 + +## Be careful the $MSPROF_DIR, you may change it on different plateform +## arm architecture, `uname -a` +# MSPROF_DIR=/home/HwHiAiUser/Ascend/ascend-toolkit/latest/arm64-linux/toolkit/tools/profiler/profiler_tool/analysis/msprof +## x86 architecture, `uname -a` For Ai1S platform +MSPROF_DIR=/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/toolkit/tools/profiler/profiler_tool/analysis/msprof + +python3.7 ${MSPROF_DIR}/msprof.py import -dir ${PROFILING_DIR} +echo "===>>>[OK] msprof sqlite.\n" + +python3.7 ${MSPROF_DIR}/msprof.py query -dir ${PROFILING_DIR} +echo "===>>>[OK] msprof query.\n" + +python3.7 ${MSPROF_DIR}/msprof.py export timeline -dir ${PROFILING_DIR} +echo "===>>>[OK] msprof timeline.\n" + +python3.7 ${MSPROF_DIR}/msprof.py export summary -dir ${PROFILING_DIR} +echo "===>>>[OK] msprof summary.\n" \ No newline at end of file diff --git a/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/test/train_full_1p.sh b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/test/train_full_1p.sh new file mode 100644 index 0000000000000000000000000000000000000000..edfe9cfc7d1aaf157d28dcd1750f1aa68a3a386b --- /dev/null +++ b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/test/train_full_1p.sh @@ -0,0 +1,212 @@ +#!/bin/bash + +########################################################## +#########第3行 至 100行,请一定不要、不要、不要修改########## +#########第3行 至 100行,请一定不要、不要、不要修改########## +#########第3行 至 100行,请一定不要、不要、不要修改########## +########################################################## +# shell脚本所在路径 +cur_path=`echo $(cd $(dirname $0);pwd)` + +# 判断当前shell是否是performance +perf_flag=`echo $0 | grep performance | wc -l` + +# 当前执行网络的名称 +Network=`echo $(cd $(dirname $0);pwd) | awk -F"/" '{print $(NF-1)}'` + +export RANK_SIZE=1 +export RANK_ID=0 +export JOB_ID=10087 + +# 路径参数初始化 +data_path="" +output_path="" +obs_url="" + +# 帮助信息,不需要修改 +if [[ $1 == --help || $1 == -h ]];then + echo"usage:./train_performance_1P.sh " + echo " " + echo "parameter explain: + --data_path # dataset of training + --output_path # output of training + --obs_url # output path in OBS + --train_steps # max_step for training + --train_epochs # max_epoch for training + --batch_size # batch size + -h/--help show help message + " + exit 1 +fi + +# 参数校验,不需要修改 +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --output_path* ]];then + output_path=`echo ${para#*=}` + elif [[ $para == --obs_url* ]];then + obs_url=`echo ${para#*=}` + elif [[ $para == --train_steps* ]];then + train_steps=`echo ${para#*=}` + elif [[ $para == --train_epochs* ]];then + train_epochs=`echo ${para#*=}` + elif [[ $para == --batch_size* ]];then + batch_size=`echo ${para#*=}` + fi +done + +# 校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be config" + exit 1 +fi + +# 校验是否传入output_path,不需要修改 +if [[ $output_path == "" ]];then + output_path="./test/output/${ASCEND_DEVICE_ID}" +fi + +# 设置打屏日志文件名,请保留,文件名为${print_log} +print_log="./test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log" +modelarts_flag=${MODELARTS_MODEL_PATH} +if [ x"${modelarts_flag}" != x ]; +then + echo "running without etp..." + print_log_name=`ls /home/ma-user/modelarts/log/ | grep proc-rank` + print_log="/home/ma-user/modelarts/log/${print_log_name}" +fi +echo "### get your log here : ${print_log}" + +CaseName="" +function get_casename() +{ + if [ x"${perf_flag}" = x1 ]; + then + CaseName=${Network}_bs${batch_size}_${RANK_SIZE}'p'_'perf' + else + CaseName=${Network}_bs${batch_size}_${RANK_SIZE}'p'_'acc' + fi +} + +# 跳转到code目录 +cd ${cur_path}/../ +rm -rf ./test/output/${ASCEND_DEVICE_ID} +mkdir -p ./test/output/${ASCEND_DEVICE_ID} + +# 训练开始时间记录,不需要修改 +start_time=$(date +%s) +########################################################## +#########第3行 至 100行,请一定不要、不要、不要修改########## +#########第3行 至 100行,请一定不要、不要、不要修改########## +#########第3行 至 100行,请一定不要、不要、不要修改########## +########################################################## + +#========================================================= +#========================================================= +#========训练执行命令,需要根据您的网络进行修改============== +#========================================================= +#========================================================= +# 基础参数,需要模型审视修改 +# 您的训练数据集在${data_path}路径下,请直接使用这个变量获取 +# 您的训练输出目录在${output_path}路径下,请直接使用这个变量获取 +# 您的其他基础参数,可以自定义增加,但是batch_size请保留,并且设置正确的值 +batch_size=64 + +if [ x"${modelarts_flag}" != x ]; +then + python3.7 ${cur_path}/../train.py \ + --dataset=${data_path} \ + --output=${output_path} \ + --chip=npu \ + --platform=linux \ + --num_classes=10 \ + --img_h=32 \ + --img_w=32 \ + --train_img_size=32 \ + --train_itr=100000 \ + --batch_size=${batch_size} \ +# --use_fp16 +else + python3.7 ${cur_path}/../train.py \ + --dataset=${data_path} \ + --output=${output_path} \ + --chip=npu \ + --platform=linux \ + --num_classes=10 \ + --img_h=32 \ + --img_w=32 \ + --train_img_size=32 \ + --train_itr=100000 \ + --batch_size=${batch_size} \ +# --use_fp16 + 1>${print_log} 2>&1 +fi + +# 性能相关数据计算 +StepTime=`grep "Iteration" ${print_log} | tail -n 10 | awk '{print $8,$10,$NF}' | awk '{sum+=$1+$2+$3} END {print sum/NR}'` +FPS=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'/'${StepTime}'}'` + +# 精度相关数据计算 +#train_accuracy=`grep "Final Accuracy accuracy" ${print_log} | awk '{print $NF}' +train_accuracy='No Acc' +# 提取所有loss打印信息 +grep "Iteration" ${print_log} | awk '{print $3,$4,$5,$6}' > ./test/output/${ASCEND_DEVICE_ID}/my_output_loss.txt + + +########################################################### +#########后面的所有内容请不要修改########################### +#########后面的所有内容请不要修改########################### +#########后面的所有内容请不要修改########################### +########################################################### + +# 判断本次执行是否正确使用Ascend NPU +use_npu_flag=`grep "The model has been compiled on the Ascend AI processor" ${print_log} | wc -l` +if [ x"${use_npu_flag}" == x0 ]; +then + echo "------------------ ERROR NOTICE START ------------------" + echo "ERROR, your task haven't used Ascend NPU, please check your npu Migration." + echo "------------------ ERROR NOTICE END------------------" +else + echo "------------------ INFO NOTICE START------------------" + echo "INFO, your task have used Ascend NPU, please check your result." + echo "------------------ INFO NOTICE END------------------" +fi + +# 获取最终的casename,请保留,case文件名为${CaseName} +get_casename + +# 重命名loss文件 +if [ -f ./test/output/${ASCEND_DEVICE_ID}/my_output_loss.txt ]; +then + mv ./test/output/${ASCEND_DEVICE_ID}/my_output_loss.txt ./test/output/${ASCEND_DEVICE_ID}/${CaseName}_loss.txt +fi + +# 训练端到端耗时 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +echo "------------------ Final result ------------------" +# 输出性能FPS/单step耗时/端到端耗时 +echo "Final Performance images/sec : $FPS" +echo "Final Performance sec/step : $StepTime" +echo "E2E Training Duration sec : $e2e_time" + +# 输出训练精度 +echo "Final Train Accuracy : ${train_accuracy}" + +# 最后一个迭代loss值,不需要修改 +ActualLoss=(`awk 'END {print $NF}' $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}_loss.txt`) + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${batch_size}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = `uname -m`" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${FPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${StepTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file diff --git a/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/test/train_full_1p_modelarts.sh b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/test/train_full_1p_modelarts.sh new file mode 100644 index 0000000000000000000000000000000000000000..54d896229a4c8e04f27d52af57d91e97017bbdb0 --- /dev/null +++ b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/test/train_full_1p_modelarts.sh @@ -0,0 +1,214 @@ +#!/bin/bash + +########################################################## +#########第3行 至 100行,请一定不要、不要、不要修改########## +#########第3行 至 100行,请一定不要、不要、不要修改########## +#########第3行 至 100行,请一定不要、不要、不要修改########## +########################################################## +# shell脚本所在路径 +cur_path=`echo $(cd $(dirname $0);pwd)` + +# 判断当前shell是否是performance +perf_flag=`echo $0 | grep performance | wc -l` + +# 当前执行网络的名称 +Network=`echo $(cd $(dirname $0);pwd) | awk -F"/" '{print $(NF-1)}'` + +export RANK_SIZE=1 +export RANK_ID=0 +export JOB_ID=10087 + +# 路径参数初始化 +data_path="" +output_path="" +obs_url="" + +# 帮助信息,不需要修改 +if [[ $1 == --help || $1 == -h ]];then + echo"usage:./train_performance_1P.sh " + echo " " + echo "parameter explain: + --data_path # dataset of training + --output_path # output of training + --obs_url # output path in OBS + --train_steps # max_step for training + --train_epochs # max_epoch for training + --batch_size # batch size + -h/--help show help message + " + exit 1 +fi + +# 参数校验,不需要修改 +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --output_path* ]];then + output_path=`echo ${para#*=}` + elif [[ $para == --obs_url* ]];then + obs_url=`echo ${para#*=}` + elif [[ $para == --train_steps* ]];then + train_steps=`echo ${para#*=}` + elif [[ $para == --train_epochs* ]];then + train_epochs=`echo ${para#*=}` + elif [[ $para == --batch_size* ]];then + batch_size=`echo ${para#*=}` + fi +done + +# 校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be config" + exit 1 +fi + +# 校验是否传入output_path,不需要修改 +if [[ $output_path == "" ]];then + output_path="./test/output/${ASCEND_DEVICE_ID}" +fi + +# 设置打屏日志文件名,请保留,文件名为${print_log} +print_log="./test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log" +modelarts_flag=${MODELARTS_MODEL_PATH} +if [ x"${modelarts_flag}" != x ]; +then + echo "running without etp..." + print_log_name=`ls /home/ma-user/modelarts/log/ | grep proc-rank` + print_log="/home/ma-user/modelarts/log/${print_log_name}" +fi +echo "### get your log here : ${print_log}" + +CaseName="" +function get_casename() +{ + if [ x"${perf_flag}" = x1 ]; + then + CaseName=${Network}_bs${batch_size}_${RANK_SIZE}'p'_'perf' + else + CaseName=${Network}_bs${batch_size}_${RANK_SIZE}'p'_'acc' + fi +} + +# 跳转到code目录 +cd ${cur_path}/../ +rm -rf ./test/output/${ASCEND_DEVICE_ID} +mkdir -p ./test/output/${ASCEND_DEVICE_ID} + +# 训练开始时间记录,不需要修改 +start_time=$(date +%s) +########################################################## +#########第3行 至 100行,请一定不要、不要、不要修改########## +#########第3行 至 100行,请一定不要、不要、不要修改########## +#########第3行 至 100行,请一定不要、不要、不要修改########## +########################################################## + +#========================================================= +#========================================================= +#========训练执行命令,需要根据您的网络进行修改============== +#========================================================= +#========================================================= +# 基础参数,需要模型审视修改 +# 您的训练数据集在${data_path}路径下,请直接使用这个变量获取 +# 您的训练输出目录在${output_path}路径下,请直接使用这个变量获取 +# 您的其他基础参数,可以自定义增加,但是batch_size请保留,并且设置正确的值 +batch_size=64 + +if [ x"${modelarts_flag}" != x ]; +then + python3.7 ${cur_path}/../train.py \ + --dataset=${data_path} \ + --output=${output_path} \ + --obs_dir=${obs_url} \ + --chip=npu \ + --platform=modelarts \ + --num_classes=10 \ + --img_h=32 \ + --img_w=32 \ + --train_img_size=32 \ + --train_itr=100000 \ + --batch_size=${batch_size} \ +# --use_fp16 +else + python3.7 ${cur_path}/../train.py \ + --dataset=${data_path} \ + --output=${output_path} \ + --obs_dir=${obs_url} \ + --chip=npu \ + --platform=modelarts \ + --num_classes=10 \ + --img_h=32 \ + --img_w=32 \ + --train_img_size=32 \ + --train_itr=100000 \ + --batch_size=${batch_size} \ +# --use_fp16 + 1>${print_log} 2>&1 +fi + +# 性能相关数据计算 +StepTime=`grep "Iteration" ${print_log} | tail -n 10 | awk '{print $8,$10,$NF}' | awk '{sum+=$1+$2+$3} END {print sum/NR}'` +FPS=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'/'${StepTime}'}'` + +# 精度相关数据计算 +#train_accuracy=`grep "Final Accuracy accuracy" ${print_log} | awk '{print $NF}' +train_accuracy='No Acc' +# 提取所有loss打印信息 +grep "Iteration" ${print_log} | awk '{print $3,$4,$5,$6}' > ./test/output/${ASCEND_DEVICE_ID}/my_output_loss.txt + + +########################################################### +#########后面的所有内容请不要修改########################### +#########后面的所有内容请不要修改########################### +#########后面的所有内容请不要修改########################### +########################################################### + +# 判断本次执行是否正确使用Ascend NPU +use_npu_flag=`grep "The model has been compiled on the Ascend AI processor" ${print_log} | wc -l` +if [ x"${use_npu_flag}" == x0 ]; +then + echo "------------------ ERROR NOTICE START ------------------" + echo "ERROR, your task haven't used Ascend NPU, please check your npu Migration." + echo "------------------ ERROR NOTICE END------------------" +else + echo "------------------ INFO NOTICE START------------------" + echo "INFO, your task have used Ascend NPU, please check your result." + echo "------------------ INFO NOTICE END------------------" +fi + +# 获取最终的casename,请保留,case文件名为${CaseName} +get_casename + +# 重命名loss文件 +if [ -f ./test/output/${ASCEND_DEVICE_ID}/my_output_loss.txt ]; +then + mv ./test/output/${ASCEND_DEVICE_ID}/my_output_loss.txt ./test/output/${ASCEND_DEVICE_ID}/${CaseName}_loss.txt +fi + +# 训练端到端耗时 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +echo "------------------ Final result ------------------" +# 输出性能FPS/单step耗时/端到端耗时 +echo "Final Performance images/sec : $FPS" +echo "Final Performance sec/step : $StepTime" +echo "E2E Training Duration sec : $e2e_time" + +# 输出训练精度 +echo "Final Train Accuracy : ${train_accuracy}" + +# 最后一个迭代loss值,不需要修改 +ActualLoss=(`awk 'END {print $NF}' $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}_loss.txt`) + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${batch_size}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = `uname -m`" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${FPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${StepTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file diff --git a/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/test/train_performance_1p.sh b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/test/train_performance_1p.sh new file mode 100644 index 0000000000000000000000000000000000000000..d3a8fff381c9154291dc1af8d51eed60c9efb94a --- /dev/null +++ b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/test/train_performance_1p.sh @@ -0,0 +1,213 @@ +#!/bin/bash + +########################################################## +#########第3行 至 100行,请一定不要、不要、不要修改########## +#########第3行 至 100行,请一定不要、不要、不要修改########## +#########第3行 至 100行,请一定不要、不要、不要修改########## +########################################################## +# shell脚本所在路径 +cur_path=`echo $(cd $(dirname $0);pwd)` + +# 判断当前shell是否是performance +perf_flag=`echo $0 | grep performance | wc -l` + +# 当前执行网络的名称 +Network=`echo $(cd $(dirname $0);pwd) | awk -F"/" '{print $(NF-1)}'` + +export RANK_SIZE=1 +export RANK_ID=0 +export JOB_ID=10087 + +# 路径参数初始化 +data_path="" +output_path="" +obs_url="" + +# 帮助信息,不需要修改 +if [[ $1 == --help || $1 == -h ]];then + echo"usage:./train_performance_1P.sh " + echo " " + echo "parameter explain: + --data_path # dataset of training + --output_path # output of training + --obs_url # output path in OBS + --train_steps # max_step for training + --train_epochs # max_epoch for training + --batch_size # batch size + -h/--help show help message + " + exit 1 +fi + +# 参数校验,不需要修改 +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --output_path* ]];then + output_path=`echo ${para#*=}` + elif [[ $para == --obs_url* ]];then + obs_url=`echo ${para#*=}` + elif [[ $para == --train_steps* ]];then + train_steps=`echo ${para#*=}` + elif [[ $para == --train_epochs* ]];then + train_epochs=`echo ${para#*=}` + elif [[ $para == --batch_size* ]];then + batch_size=`echo ${para#*=}` + fi +done + +# 校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be config" + exit 1 +fi + +# 校验是否传入output_path,不需要修改 +if [[ $output_path == "" ]];then + output_path="./test/output/${ASCEND_DEVICE_ID}" +fi + +# 设置打屏日志文件名,请保留,文件名为${print_log} +print_log="./test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log" +modelarts_flag=${MODELARTS_MODEL_PATH} +if [ x"${modelarts_flag}" != x ]; +then + echo "running with modelarts..." + print_log_name=`ls /home/ma-user/modelarts/log/ | grep proc-rank` + print_log="/home/ma-user/modelarts/log/${print_log_name}" +fi +echo "### get your log here : ${print_log}" + +CaseName="" +function get_casename() +{ + if [ x"${perf_flag}" = x1 ]; + then + CaseName=${Network}_bs${batch_size}_${RANK_SIZE}'p'_'perf' + else + CaseName=${Network}_bs${batch_size}_${RANK_SIZE}'p'_'acc' + fi +} + +# 跳转到code目录 +cd ${cur_path}/../ +rm -rf ./test/output/${ASCEND_DEVICE_ID} +mkdir -p ./test/output/${ASCEND_DEVICE_ID} + +# 训练开始时间记录,不需要修改 +start_time=$(date +%s) +########################################################## +#########第3行 至 100行,请一定不要、不要、不要修改########## +#########第3行 至 100行,请一定不要、不要、不要修改########## +#########第3行 至 100行,请一定不要、不要、不要修改########## +########################################################## + +#========================================================= +#========================================================= +#========训练执行命令,需要根据您的网络进行修改============== +#========================================================= +#========================================================= +# 基础参数,需要模型审视修改 +# 您的训练数据集在${data_path}路径下,请直接使用这个变量获取 +# 您的训练输出目录在${output_path}路径下,请直接使用这个变量获取 +# 您的其他基础参数,可以自定义增加,但是batch_size请保留,并且设置正确的值 +#train_epochs=2 +train_steps=100 +batch_size=64 + +if [ x"${modelarts_flag}" != x ]; +then + python3.7 ${cur_path}/../train.py \ + --dataset=${data_path} \ + --output=${output_path} \ + --chip=npu \ + --platform=linux \ + --num_classes=10 \ + --img_h=32 \ + --img_w=32 \ + --train_img_size=32 \ + --train_itr=${train_steps} \ + --batch_size=${batch_size} \ +# --use_fp16 +else + python3.7 ${cur_path}/../train.py \ + --dataset=${data_path} \ + --output=${output_path} \ + --chip=npu \ + --platform=linux \ + --num_classes=10 \ + --img_h=32 \ + --img_w=32 \ + --train_img_size=32 \ + --train_itr=${train_steps} \ + --batch_size=${batch_size} \ +# --use_fp16 + 1>${print_log} 2>&1 +fi + +# 性能相关数据计算 +StepTime=`grep "Iteration" ${print_log} | tail -n 10 | awk '{print $8,$10,$NF}' | awk '{sum+=$1+$2+$3} END {print sum/NR}'` +FPS=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'/'${StepTime}'}'` + +# 精度相关数据计算 +#train_accuracy=`grep "Final Accuracy accuracy" ${print_log} | awk '{print $NF}'` +train_accuracy='No Acc' +# 提取所有loss打印信息 +grep "Iteration" ${print_log} | awk '{print $3,$4,$5,$6}' > ./test/output/${ASCEND_DEVICE_ID}/my_output_loss.txt + + +########################################################### +#########后面的所有内容请不要修改########################### +#########后面的所有内容请不要修改########################### +#########后面的所有内容请不要修改########################### +########################################################### + +# 判断本次执行是否正确使用Ascend NPU +use_npu_flag=`grep "The model has been compiled on the Ascend AI processor" ${print_log} | wc -l` +if [ x"${use_npu_flag}" == x0 ]; +then + echo "------------------ ERROR NOTICE START ------------------" + echo "ERROR, your task haven't used Ascend NPU, please check your npu Migration." + echo "------------------ ERROR NOTICE END------------------" +else + echo "------------------ INFO NOTICE START------------------" + echo "INFO, your task have used Ascend NPU, please check your result." + echo "------------------ INFO NOTICE END------------------" +fi + +# 获取最终的casename,请保留,case文件名为${CaseName} +get_casename + +# 重命名loss文件 +if [ -f ./test/output/${ASCEND_DEVICE_ID}/my_output_loss.txt ]; +then + mv ./test/output/${ASCEND_DEVICE_ID}/my_output_loss.txt ./test/output/${ASCEND_DEVICE_ID}/${CaseName}_loss.txt +fi + +# 训练端到端耗时 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +echo "------------------ Final result ------------------" +# 输出性能FPS/单step耗时/端到端耗时 +echo "Final Performance images/sec : $FPS" +echo "Final Performance sec/step : $StepTime" +echo "E2E Training Duration sec : $e2e_time" + +# 输出训练精度 +echo "Final Train Accuracy : ${train_accuracy}" + +# 最后一个迭代loss值,不需要修改 +ActualLoss=(`awk 'END {print $NF}' $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}_loss.txt`) + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${batch_size}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = `uname -m`" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${FPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${StepTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file diff --git a/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/test_om.py b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/test_om.py new file mode 100644 index 0000000000000000000000000000000000000000..e194a9a4e7c0384fc0b984caeb04da06b9aaaed2 --- /dev/null +++ b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/test_om.py @@ -0,0 +1,72 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np +import argparse +import os +from PIL import Image +from utils import restore_img, check_dir, read_images +from calc_IS_FID import get_FID, get_IS +from tqdm import tqdm + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--output", type=str, default="../output", help="output path") + parser.add_argument("--train_img_size", type=int, default=32, + help="image will be resized to this size when training") + parser.add_argument("--chip", type=str, default="gpu", help="run on which chip, cpu or gpu or npu") + parser.add_argument("--gpu", type=str, default="0", help="GPU to use (leave blank for CPU only)") + parser.add_argument("--batch_size", type=int, default=100, help="batch size") + parser.add_argument("--precalculated_path", type=str, default="./metrics/res/stats_tf/fid_stats_cifar10_train.npz", + help="precalculated statistics for datasets, used in FID") + args = parser.parse_args() + + bin_path = os.path.join(args.output, "inference", str(args.train_img_size), "bin") + image_path = os.path.join(args.output, "inference", str(args.train_img_size), "image") + check_dir(image_path) + + # recover image from bin + print("Recovering image from bin...") + files = os.listdir(bin_path) + output_num = 0 + for file in tqdm(files): + if file.endswith(".bin"): + output_num += 1 + file_bin_path = os.path.join(bin_path, file) + file_image_path = os.path.join(image_path, file.replace(".bin", ".jpg")) + image = np.fromfile(file_bin_path, dtype='float32').reshape(args.train_img_size, args.train_img_size, 3) + Image.fromarray(np.uint8(restore_img(image))).save(file_image_path) + + # calc FID and IS + print("Calculating FID and IS...") + images_list = read_images(image_path) + images = np.array(images_list).astype(np.float32) + fid_score = get_FID(images, args) + is_mean, is_std = get_IS(images_list, args, splits=10) + print("IS : (%f, %f)" % (is_mean, is_std)) + print("FID : %f" % fid_score) diff --git a/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/test_pb.py b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/test_pb.py new file mode 100644 index 0000000000000000000000000000000000000000..8d8501dff3366f120acb401a14174bfcc949a495 --- /dev/null +++ b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/test_pb.py @@ -0,0 +1,84 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import tensorflow as tf +from tensorflow.python.framework import graph_util +from google.protobuf import text_format +import os +import argparse +from utils import session_config, check_dir +import numpy as np +from generate_fake_img import generate_img_of_one_class + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + # platform arguments (Huawei Ascend) + parser.add_argument("--chip", type=str, default="gpu", help="run on which chip, cpu or gpu or npu") + # data arguments + parser.add_argument("--output", type=str, default="../output", help="output path") + parser.add_argument("-b", "--batch_size", type=int, default=64, help="batch size") + parser.add_argument("-c", "--num_classes", type=int, default=10, help="number of classes") + parser.add_argument("--img_h", type=int, default=32, help="image height") + parser.add_argument("--img_w", type=int, default=32, help="image width") + parser.add_argument("--train_img_size", type=int, default=32, + help="image will be resized to this size when training") + # model arguments + parser.add_argument("--base_channel", type=int, default=96, help="base channel number for G and D") + parser.add_argument("--z_dim", type=int, default=120, help="latent space dimensionality") + parser.add_argument("--truncation", type=float, default=2.0, help="truncation threshold") + parser.add_argument("--ema", type=bool, default=True, help="use exponential moving average for G") + parser.add_argument("--shared_dim", type=int, default=128, help="shared embedding dimensionality") + args = parser.parse_args() + + # get output dir + inference_path = os.path.join(args.output, "inference", str(args.train_img_size)) + check_dir(inference_path) + # pb path + pb_path = os.path.join(args.output, "pb_model", str(args.train_img_size)) + graph_pb_path = os.path.join(pb_path, "tmp_model.pb") + model_pb_path = os.path.join(pb_path, "model.pb") + final_pb_path = os.path.join(pb_path, "final_model.pb") + + tf.reset_default_graph() + with tf.gfile.FastGFile(final_pb_path, "rb") as f: + graph_def = tf.GraphDef() + graph_def.ParseFromString(f.read()) + # text_format.Merge(f.read(), graph_def) + + _ = tf.import_graph_def(graph_def, name="") + + config = session_config(args) + with tf.Session(config=config) as sess: + sess.run(tf.global_variables_initializer()) + + z = sess.graph.get_tensor_by_name("z:0") + y = sess.graph.get_tensor_by_name("y:0") + fake_img = sess.graph.get_tensor_by_name("output:0") + + class_labels = np.random.randint(0, 11, size=(args.batch_size, 1)) + generate_img_of_one_class(args, class_labels, "inference.jpg", inference_path, sess, fake_img, z, y) diff --git a/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/train.py b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/train.py new file mode 100644 index 0000000000000000000000000000000000000000..6dbf691e8530fcf78ff0cc328b038b715b0a6ade --- /dev/null +++ b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/train.py @@ -0,0 +1,342 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from ops import Hinge_loss, ortho_reg +import tensorflow as tf +import numpy as np +from utils import truncated_noise_sample, get_one_batch, session_config, read_images, check_dir +import cv2 +import datetime +import scipy.io as sio +import argparse +import os +from generate_fake_img import generate_img, generate_img_by_class +from calc_IS_FID import get_IS, get_FID + +# TODO: larger batch size may have better performance +parser = argparse.ArgumentParser() +# platform arguments (Huawei Ascend) +parser.add_argument("--chip", type=str, default="gpu", help="run on which chip, cpu or gpu or npu") +parser.add_argument("--gpu", type=str, default="0", help="GPU to use (leave blank for CPU only)") +parser.add_argument("--platform", type=str, default="linux", help="Run on linux/apulis/modelarts platform. Modelarts " + "Platform has some extra data copy operations") +parser.add_argument("--obs_dir", type=str, default="obs://lianlio/log", help="obs result path, not need on gpu and apulis platform") +parser.add_argument("--profiling", action="store_true", help="profiling for performance or not") +# data arguments +parser.add_argument("--dataset", type=str, default="../dataset", help="dataset path") +parser.add_argument("--output", type=str, default="../output", help="output path") +parser.add_argument("-c", "--num_classes", type=int, default=10, help="number of classes") +parser.add_argument("--img_h", type=int, default=32, help="image height") +parser.add_argument("--img_w", type=int, default=32, help="image width") +parser.add_argument("--train_img_size", type=int, default=32, help="image will be resized to this size when training") +parser.add_argument("--data", type=str, default="cifar10", help="which dataset to use (cifar10 / imagenet64)") +# metrics arguments +parser.add_argument("--metrics", type=str, default="fid", help="use FID or IS as metrics (fid / is)") +parser.add_argument("--precalculated_path", type=str, default="./metrics/res/stats_tf/fid_stats_cifar10_train.npz", + help="precalculated statistics for datasets, used in FID") +parser.add_argument("--gen_num", type=int, default=5000, help="number of generated images to calc IS or FID " + "(at least 2048 for FID)") +# training arguments +parser.add_argument('--use_fp16', action="store_true", help='enable mixed precision training') +parser.add_argument("--load_model", action="store_true", help="load model and continue to train") +parser.add_argument("--save_freq", type=int, default=1000, help="frequency of saving model") +parser.add_argument("--log_freq", type=int, default=50, help="frequency of logging") +parser.add_argument("-b", "--batch_size", type=int, default=64, help="batch size") +parser.add_argument("-i", "--train_itr", type=int, default=100000, help="number of training iterations") +parser.add_argument("--d_lr", type=float, default=4e-4, help="learning rate for discriminator") +parser.add_argument("--g_lr", type=float, default=1e-4, help="learning rate for generator") +parser.add_argument("--d_train_step", type=int, default=2, help="number of D training steps per G training step") +parser.add_argument('--beta1', type=float, default=0.0, help='beta1 for Adam optimizer') +parser.add_argument('--beta2', type=float, default=0.9, help='beta2 for Adam optimizer') +# model arguments +parser.add_argument("--base_channel", type=int, default=96, help="base channel number for G and D") +parser.add_argument("--z_dim", type=int, default=120, help="latent space dimensionality") +parser.add_argument("--shared_dim", type=int, default=128, help="shared embedding dimensionality") +parser.add_argument("--beta", type=float, default=1e-4, help="orthogonal regularization strength") +parser.add_argument("--truncation", type=float, default=2.0, help="truncation threshold") +parser.add_argument("--ema_decay", type=float, default=0.9999, help="decay rate of exponential moving average for the weights of G") +# other arguments +parser.add_argument("--debug", action="store_true", help="debug or not") +args = parser.parse_args() + +if args.chip == "npu": + from npu_bridge.npu_init import * +if args.debug is True: + from tensorflow.python import debug as tf_dbg + +os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu +args.metrics = args.metrics.upper() + +# use different architectures for different image sizes +if args.train_img_size == 128: + from networks_128 import Generator, Discriminator +elif args.train_img_size == 64: + from networks_64 import Generator, Discriminator +elif args.train_img_size == 32: + from networks_32 import Generator, Discriminator + +# get current time +now = datetime.datetime.now() +now_str = now.strftime('%Y_%m_%d_%H_%M_%S') + +# check output dir +model_path = os.path.join(args.output, "model", str(args.train_img_size)) +resume_path = os.path.join(model_path, "model.ckpt") +ema_model_path = os.path.join(model_path, "ema.ckpt") +log_path = os.path.join(args.output, "log", str(args.train_img_size)) +test_path = os.path.join(args.output, "gen_img") +fake_img_path = os.path.join(test_path, "fake", str(args.train_img_size)) +image_of_each_class_path = os.path.join(test_path, "image_of_each_class", str(args.train_img_size)) +check_dir(model_path) +check_dir(log_path) +if args.profiling is True: + args.profiling_dir = "/tmp/profiling" + check_dir(args.profiling_dir) + + +def train(): + train_phase = tf.Variable(tf.constant(True, dtype=tf.bool), name="train_phase") + # train_phase = tf.placeholder(tf.bool) # is training or not + x = tf.placeholder(tf.float32, [None, args.train_img_size, args.train_img_size, 3]) # input image(, which will be resized to 128x128) + z = tf.placeholder(tf.float32, [None, args.z_dim]) # latent vector + y = tf.placeholder(tf.int32, [None]) # class info + + with tf.variable_scope("generator"): + embed_w = tf.get_variable("embed_w", [args.num_classes, args.shared_dim], initializer=tf.orthogonal_initializer()) # weight for shared embedding + + global_step = tf.Variable(0, trainable=False) # global training step + add_step = global_step.assign(global_step + 1) + + set_train_phase_true = tf.assign(train_phase, True) + set_train_phase_false = tf.assign(train_phase, False) + + G = Generator('generator', args.base_channel) + D = Discriminator('discriminator', args.base_channel) + fake_img = G(z, train_phase, y, embed_w, args.num_classes) # generate fake img + fake_logits = D(fake_img, train_phase, y, args.num_classes, None) # D(G(z), y) + real_logits = D(x, train_phase, y, args.num_classes, 'NO_OPS') # D(x, y) + + D_loss, G_loss = Hinge_loss(real_logits, fake_logits) + G_ortho = args.beta * ortho_reg(G.var_list()) # Orthogonal Regularization + G_loss += G_ortho # get total loss + + D_opt = tf.train.AdamOptimizer(args.d_lr, beta1=args.beta1, beta2=args.beta2).minimize(D_loss, var_list=D.var_list()) + G_opt = tf.train.AdamOptimizer(args.g_lr, beta1=args.beta1, beta2=args.beta2).minimize(G_loss, var_list=G.var_list()) + + # loss scale for mixed precision training + if args.use_fp16 is True and args.chip == "npu": + loss_scale_manager = ExponentialUpdateLossScaleManager(init_loss_scale=2 ** 32, incr_every_n_steps=1000, + decr_every_n_nan_or_inf=2, decr_ratio=0.5) + D_opt = NPULossScaleOptimizer(tf.train.AdamOptimizer(args.d_lr, beta1=args.beta1, beta2=args.beta2), loss_scale_manager).minimize(D_loss, var_list=D.var_list()) + G_opt = NPULossScaleOptimizer(tf.train.AdamOptimizer(args.g_lr, beta1=args.beta1, beta2=args.beta2), loss_scale_manager).minimize(G_loss, var_list=G.var_list()) + + # add exponential moving average for G's weights + with tf.variable_scope("ema_weights"): + var_ema = tf.train.ExponentialMovingAverage(args.ema_decay, global_step) + with tf.control_dependencies([G_opt]): + G_opt_ema = var_ema.apply(tf.trainable_variables(scope='generator')) + # assign ema weights + assign_vars = [] + for var in tf.trainable_variables(scope='generator'): + v = var_ema.average(var) + if v is not None: + assign_vars.append(tf.assign(var, v)) + + with tf.variable_scope("metrics", reuse=tf.AUTO_REUSE): + FID_now = tf.get_variable("FID_now", shape=[], initializer=tf.constant_initializer(1e3), trainable=False) + IS_now = tf.get_variable("IS_now", shape=[], initializer=tf.constant_initializer(0.0), trainable=False) + FID_best = tf.get_variable("FID_best", shape=[], initializer=tf.constant_initializer(1e3), trainable=False) + IS_best = tf.get_variable("IS_best", shape=[], initializer=tf.constant_initializer(0.0), trainable=False) + + # log loss, FID, IS + log_suffix = "_" + str(args.train_img_size) + "_bs_" + str(args.batch_size) + "_ch_" + str(args.base_channel) + tf.summary.scalar(now_str + '/d_loss' + log_suffix, D_loss) + tf.summary.scalar(now_str + '/g_loss' + log_suffix, G_loss) + # tf.summary.scalar(now_str + '/IS' + log_suffix, IS_now) + # tf.summary.scalar(now_str + '/FID' + log_suffix, FID_now) + summary_op = tf.summary.merge_all() + + config = session_config(args) + + print("Using", args.chip, "!") + + if args.data == "cifar10": + # get cifar-10 training data + data_path = os.path.join(args.dataset, "data_batch_") + test_data_path = os.path.join(args.dataset, "test_batch.mat") + raw_data = np.concatenate((sio.loadmat(data_path + "1.mat")["data"], + sio.loadmat(data_path + "2.mat")["data"], + sio.loadmat(data_path + "3.mat")["data"], + sio.loadmat(data_path + "4.mat")["data"], + sio.loadmat(data_path + "5.mat")["data"], + sio.loadmat(test_data_path)["data"] + ), + axis=0) + raw_data = np.reshape(raw_data, [-1, 3, args.img_h, args.img_w]) + raw_data = np.transpose(raw_data, axes=[0, 2, 3, 1]) # (N, H, W, C) + labels = np.concatenate((sio.loadmat(data_path + "1.mat")["labels"], + sio.loadmat(data_path + "2.mat")["labels"], + sio.loadmat(data_path + "3.mat")["labels"], + sio.loadmat(data_path + "4.mat")["labels"], + sio.loadmat(data_path + "5.mat")["labels"], + sio.loadmat(test_data_path)["labels"] + ), + axis=0)[:, 0] + elif args.data == "imagenet64": + # get imagenet64 training data + data_path = os.path.join(args.dataset, "imagenet64.mat") + data_and_label = sio.loadmat(data_path) + labels = data_and_label["labels"][0, :] + raw_data = data_and_label["data"] + else: + pass + + # resize images to training size + start = datetime.datetime.now() + data = np.zeros(shape=[raw_data.shape[0], args.train_img_size, args.train_img_size, 3], dtype=raw_data.dtype) + for i, img in enumerate(raw_data): + data[i] = cv2.resize(img, dsize=(args.train_img_size, args.train_img_size), interpolation=cv2.INTER_LINEAR) + end = datetime.datetime.now() + print("data preprocess time:", (end - start).total_seconds()) + + with tf.Session(config=config) as sess: + summary_writer = tf.summary.FileWriter(logdir=log_path, graph=sess.graph) + sess.run(tf.global_variables_initializer()) + + if args.debug is True: + sess = tf_dbg.LocalCLIDebugWrapperSession(sess) + + # load model + saver = tf.train.Saver() + if args.load_model is True: + print('Loading checkpoint from {}...'.format(resume_path)) + saver.restore(sess, save_path=resume_path) + + for itr in range(args.train_itr): + d_update_time = 0 # discriminator update time + g_update_time = 0 # generator update time + data_preprocess_time = 0 + + # Train Discriminator + for d in range(args.d_train_step): + # read one mini-batch + start = datetime.datetime.now() + batch, Y = get_one_batch(data, labels, args.batch_size) # get one batch + end = datetime.datetime.now() + data_preprocess_time += (end - start).total_seconds() + + # truncation trick + Z = truncated_noise_sample(args.batch_size, args.z_dim, args.truncation) + + start = datetime.datetime.now() + sess.run(set_train_phase_true) + sess.run(D_opt, feed_dict={z: Z, x: batch, y: Y}) + end = datetime.datetime.now() + d_update_time += (end - start).total_seconds() + + # Train Generator + Z = truncated_noise_sample(args.batch_size, args.z_dim, args.truncation) + start = datetime.datetime.now() + sess.run(set_train_phase_true) + sess.run([G_opt_ema, add_step, global_step], feed_dict={z: Z, y: Y}) + end = datetime.datetime.now() + g_update_time += (end - start).total_seconds() + + if itr % args.log_freq == 0: + sess.run(set_train_phase_false) + summary, d_loss, g_loss, is_now, is_best, fid_now, fid_best = sess.run([summary_op, D_loss, G_loss, IS_now, IS_best, FID_now, FID_best], + feed_dict={z: Z, x: batch, y: Y}) + summary_writer.add_summary(summary, itr) + metrics_best = fid_best if args.metrics == "FID" else is_best + # print("Iteration: %d, D_loss: %f, G_loss: %f, IS: %f, FID: %f, best %s: %f, " + # "D_updata_time: %f(s), G_updata_time: %f(s), data preprocess time: %f(s)" + # % (itr, d_loss, g_loss, is_now, fid_now, args.metrics, metrics_best, + # d_update_time, g_update_time, data_preprocess_time)) + print("Iteration: %d, D_loss: %f, G_loss: %f, " + "D_updata_time: %f(s), G_updata_time: %f(s), data preprocess time: %f(s)" + % (itr, d_loss, g_loss, d_update_time, g_update_time, data_preprocess_time)) + # generate fake images for each class + generate_img_by_class(args, image_of_each_class_path, sess, fake_img, z, y) + + # print loss scale value + if args.use_fp16 is True and args.chip == "npu": + lossScale = tf.get_default_graph().get_tensor_by_name("loss_scale:0") + overflow_status_reduce_all = tf.get_default_graph().get_tensor_by_name( + "overflow_status_reduce_all:0") + l_s, overflow_status_reduce_all = sess.run([lossScale, overflow_status_reduce_all]) + print('loss_scale is: ', l_s) + print("overflow_status_reduce_all:", overflow_status_reduce_all) + if itr % args.save_freq == 0: + saver.save(sess, save_path=resume_path) # save current model + print("Model saved in", resume_path) + sess.run(set_train_phase_false) + sess.run(assign_vars, feed_dict={z: Z, y: Y}) # get ema model + + # calc FID and IS + # generate_img(args, fake_img_path, sess, fake_img, z, y) # generate fake images + # images_list = read_images(fake_img_path) + # images = np.array(images_list).astype(np.float32) + + # fid_now = get_FID(images, args) + # is_now, _ = get_IS(images_list, args, splits=10) + # + # if args.metrics == "FID": + # fid_best = sess.run(FID_best) + # if fid_now < fid_best: + # fid_best = fid_now + # saver.save(sess, save_path=ema_model_path) # save ema model + # print("New best model!\nBest FID:", fid_best) + # else: + # is_best = sess.run(IS_best) + # if is_now > is_best: + # is_best = is_now + # saver.save(sess, save_path=ema_model_path) # save ema model + # print("New best model!\nBest IS:", is_best) + saver.save(sess, save_path=ema_model_path) # save ema model + print("EMA Model saved in", ema_model_path) + saver.restore(sess, save_path=resume_path) # restore current model + + # if args.metrics == "FID": + # sess.run(tf.assign(FID_best, tf.cast(tf.constant(fid_best), tf.float32))) # update best FID / IS + # else: + # sess.run(tf.assign(IS_best, tf.cast(tf.constant(is_best), tf.float32))) + # + # sess.run(tf.assign(IS_now, tf.cast(tf.constant(is_now), tf.float32))) # update FID and IS + # sess.run(tf.assign(FID_now, tf.cast(tf.constant(fid_now), tf.float32))) + + summary_writer.close() + + if args.platform.lower() == 'modelarts': + from help_modelarts import modelarts_result2obs + modelarts_result2obs(args) + print("Data transferred to OBS!") + + print("Training finished!") + + +if __name__ == "__main__": + train() diff --git a/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/utils.py b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..fa3d5572301f8fa2220583fa63860fce6d260ca6 --- /dev/null +++ b/TensorFlow/contrib/cv/AnimeFaceGAN_ID1062_for_Tensorflow/utils.py @@ -0,0 +1,117 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np +from scipy.stats import truncnorm +import tensorflow as tf +import imageio +from PIL import Image +import os +from glob import glob +import os + + +def truncated_noise_sample(batch_size=1, dim_z=128, truncation=1., seed=None): + """truncation trick""" + state = None if seed is None else np.random.RandomState(seed) + if truncation <= 0: + return np.random.normal(size=(batch_size, dim_z)) # do not use truncation + else: + return truncnorm.rvs(-truncation, truncation, size=(batch_size, dim_z), random_state=state).astype(np.float32) + + +def read_image(filename): + x = imageio.imread(filename) + return np.array(Image.fromarray(x)) + + +def read_images(img_path): + filenames = glob(os.path.join(img_path, '*.*')) + images_list = [read_image(filename) for filename in filenames] + return images_list + + +def normalize_img(img): + return img / 127.5 - 1 + + +def restore_img(img): + return (img + 1) * 127.5 + + +def get_one_batch(data, labels, batch_size): + rand_select = np.random.randint(0, data.shape[0], batch_size) + batch_labels = labels[rand_select] + batch = data[rand_select] + + return normalize_img(batch), batch_labels + + +def session_config(args): + if args.chip == "npu": + from tensorflow.core.protobuf.rewriter_config_pb2 import RewriterConfig + config = tf.ConfigProto() + custom_op = config.graph_options.rewrite_options.custom_optimizers.add() + custom_op.name = "NpuOptimizer" + if args.use_fp16 is True: + custom_op.parameter_map['precision_mode'].s = tf.compat.as_bytes('allow_mix_precision') + config.graph_options.rewrite_options.remapping = RewriterConfig.OFF + config.graph_options.rewrite_options.memory_optimization = RewriterConfig.OFF + fusion_cfg_path = os.path.join(os.path.dirname(__file__), "fusion_switch.cfg") + custom_op.parameter_map["fusion_switch_file"].s = tf.compat.as_bytes(fusion_cfg_path) + # custom_op.parameter_map["auto_tune_mode"].s = tf.compat.as_bytes("RL,GA") + if args.profiling is True: + custom_op.parameter_map["use_off_line"].b = True + custom_op.parameter_map["profiling_mode"].b = True + custom_op.parameter_map["profiling_options"].s = tf.compat.as_bytes( + '{"output":"/tmp/profiling","task_trace":"on","aicpu":"on"}') + elif args.chip == "gpu": + config = tf.ConfigProto(allow_soft_placement=True) + config.gpu_options.allow_growth = True + elif args.chip == 'cpu': + config = tf.ConfigProto() + return config + + +def check_dir(path): + if not os.path.exists(path): + os.makedirs(path) + + +if __name__ == '__main__': + import matplotlib.pyplot as plt + import scipy + + truncation = 3.0 + N = scipy.stats.norm(loc=0., scale=1.) + + fig = plt.figure() + ax1 = fig.add_subplot(2, 1, 1) + ax1.hist(truncated_noise_sample(dim_z=10000, truncation=truncation).squeeze(), normed=True, bins=30) # 截断正态分布的直方图 + ax2 = fig.add_subplot(2, 1, 2) + ax2.hist(N.rvs(10000), normed=True, bins=30) # 常规正态分布的直方图 + plt.show()