diff --git a/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/.keep b/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/.keep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/Figure_1.png b/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/Figure_1.png new file mode 100644 index 0000000000000000000000000000000000000000..be4f93287683b50573e88ded7596b308ce1957d9 Binary files /dev/null and b/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/Figure_1.png differ diff --git a/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/cfg.py b/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/cfg.py new file mode 100644 index 0000000000000000000000000000000000000000..2bb891047241bdbde9f4f073de9c66a82d266662 --- /dev/null +++ b/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/cfg.py @@ -0,0 +1,111 @@ +""" +SRNet - Editing Text in the Wild +Some configurations. +Copyright (c) 2019 Netease Youdao Information Technology Co.,Ltd. +Licensed under the GPL License (see LICENSE for details) +Written by Yu Qian +""" + +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import argparse +import moxing as mox + +# pretrained vgg +vgg19_weights = r'/home/ma-user/modelarts/user-job-dir/code/model_logs/vgg19/vgg19_weights_tf_dim_ordering_tf_kernels_notop.pb' + +# model parameters +lt = 1. +lt_alpha = 1. +lb = 1. +lb_beta = 10. +lf = 1. +lf_theta_1 = 10. +lf_theta_2 = 1. +lf_theta_3 = 500. +epsilon = 1e-8 + +# train +learning_rate = 1e-4 # default 1e-3 +decay_rate = 0.9 +decay_steps = 10000 +staircase = False +beta1 = 0.9 # default 0.9 +beta2 = 0.999 # default 0.999 +max_iter = 500000 +show_loss_interval = 50 +write_log_interval = 50 +save_ckpt_interval = 10000 +gen_example_interval = 1000 +checkpoint_savedir = '/cache/out/model_logs/checkpoints' +tensorboard_dir = '/cache/out/model_logs/train_logs' +pretrained_ckpt_path = None +train_name = None # used for name examples and tensorboard logdirs, set None to use time + +# data +batch_size = 8 +data_shape = [64, None] +data_dir = '/cache/data' +i_t_dir = 'i_t' +i_s_dir = 'i_s' +t_sk_dir = 't_sk' +t_t_dir = 't_t' +t_b_dir = 't_b' +t_f_dir = 't_f' +mask_t_dir = 'mask_t' +example_data_dir = r'/home/ma-user/modelarts/user-job-dir/code/examples/labels' +example_result_dir = '/cache/out/genLogs' + +# predict +predict_ckpt_path = None +predict_data_dir = None +predict_result_dir = 'examples/result' + +#out_dir = '' + +def main(): + code_dir = os.path.dirname(__file__) + + parser = argparse.ArgumentParser() + parser.add_argument("--train_url", type=str) + parser.add_argument("--data_url", type=str) + parser.add_argument("--modelarts_data_dir", type=str, default="/cache/data") + parser.add_argument("--modelarts_output_dir", type=str, default="/cache/out") + paraConfig = parser.parse_args() + + #out_dir = paraConfig.modelarts_output_dir + os.makedirs(paraConfig.modelarts_data_dir) + os.makedirs(paraConfig.modelarts_output_dir) + os.makedirs(r'/cache/out/genLogs') + mox.file.copy_parallel(src_url=paraConfig.data_url, dst_url=paraConfig.modelarts_data_dir) + os.system("python3" + ' ' + code_dir + '/train.py') + +if __name__ == "__main__": + main() diff --git a/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/datagen.py b/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/datagen.py new file mode 100644 index 0000000000000000000000000000000000000000..7d71eb94b5bb99dbbe2eb29c0d27e929b5013c61 --- /dev/null +++ b/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/datagen.py @@ -0,0 +1,139 @@ +""" +SRNet - Editing Text in the Wild +Data generator. +Copyright (c) 2019 Netease Youdao Information Technology Co.,Ltd. +Licensed under the GPL License (see LICENSE for details) +Written by Yu Qian +""" + +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from npu_bridge.npu_init import * + +import os +import cv2 +import numpy as np +import random +import cfg + +def srnet_datagen(): + + # generator SRNet data for training + name_list = os.listdir(os.path.join(cfg.data_dir, cfg.t_b_dir)) + random.shuffle(name_list) + name_num = len(name_list) + idx = 0 + + while True: + i_t_batch, i_s_batch = [], [] + t_sk_batch, t_t_batch, t_b_batch, t_f_batch = [], [], [], [] + mask_t_batch = [] + + for _ in range(cfg.batch_size): + name = name_list[idx] + + i_t = cv2.imread(os.path.join(cfg.data_dir, cfg.i_t_dir, name)) + i_s = cv2.imread(os.path.join(cfg.data_dir, cfg.i_s_dir, name)) + t_sk = cv2.imread(os.path.join(cfg.data_dir, cfg.t_sk_dir, name), cv2.IMREAD_GRAYSCALE) + t_t = cv2.imread(os.path.join(cfg.data_dir, cfg.t_t_dir, name)) + t_b = cv2.imread(os.path.join(cfg.data_dir, cfg.t_b_dir, name)) + t_f = cv2.imread(os.path.join(cfg.data_dir, cfg.t_f_dir, name)) + mask_t = cv2.imread(os.path.join(cfg.data_dir, cfg.mask_t_dir, name), cv2.IMREAD_GRAYSCALE) + + i_t_batch.append(i_t) + i_s_batch.append(i_s) + t_sk_batch.append(t_sk) + t_t_batch.append(t_t) + t_b_batch.append(t_b) + t_f_batch.append(t_f) + mask_t_batch.append(mask_t) + idx = (idx + 1) % name_num + + w_sum = 0 + for t_b in t_b_batch: + h, w = t_b.shape[:2] + scale_ratio = cfg.data_shape[0] / h + w_sum += int(w * scale_ratio) + + to_h = cfg.data_shape[0] + to_w = 128 + #to_w = w_sum // cfg.batch_size + #to_w = int(round(to_w / 8)) * 8 + to_scale = (to_w, to_h) # w first for cv2 + for i in range(cfg.batch_size): + i_t_batch[i] = cv2.resize(i_t_batch[i], to_scale) + i_s_batch[i] = cv2.resize(i_s_batch[i], to_scale) + t_sk_batch[i] = cv2.resize(t_sk_batch[i], to_scale, interpolation=cv2.INTER_NEAREST) + t_t_batch[i] = cv2.resize(t_t_batch[i], to_scale) + t_b_batch[i] = cv2.resize(t_b_batch[i], to_scale) + t_f_batch[i] = cv2.resize(t_f_batch[i], to_scale) + mask_t_batch[i] = cv2.resize(mask_t_batch[i], to_scale, interpolation=cv2.INTER_NEAREST) + + i_t_batch = np.stack(i_t_batch) + i_s_batch = np.stack(i_s_batch) + t_sk_batch = np.expand_dims(np.stack(t_sk_batch), axis = -1) + t_t_batch = np.stack(t_t_batch) + t_b_batch = np.stack(t_b_batch) + t_f_batch = np.stack(t_f_batch) + mask_t_batch = np.expand_dims(np.stack(mask_t_batch), axis = -1) + + i_t_batch = i_t_batch.astype(np.float32) / 127.5 - 1. + i_s_batch = i_s_batch.astype(np.float32) / 127.5 - 1. + t_sk_batch = t_sk_batch.astype(np.float32) / 255. + t_t_batch = t_t_batch.astype(np.float32) / 127.5 - 1. + t_b_batch = t_b_batch.astype(np.float32) / 127.5 - 1. + t_f_batch = t_f_batch.astype(np.float32) / 127.5 - 1. + mask_t_batch = mask_t_batch.astype(np.float32) / 255. + + yield [i_t_batch, i_s_batch, t_sk_batch, t_t_batch, t_b_batch, t_f_batch, mask_t_batch] + +#每隔一段时间我们要利用当前训练的结果进行预测 +#这里就是获取用来预测的数据的 +def get_input_data(data_dir = cfg.example_data_dir): + + # get input data from dir + data_list = os.listdir(data_dir) + data_list = [data_name.split('_')[0] + '_' for data_name in data_list] + data_list = list(set(data_list)) + res_list = [] + for data_name in data_list: + i_t = cv2.imread(os.path.join(cfg.example_data_dir, data_name + 'i_t.png')) + i_s = cv2.imread(os.path.join(cfg.example_data_dir, data_name + 'i_s.png')) + h, w = i_t.shape[:2] + scale_ratio = cfg.data_shape[0] / h + to_h = cfg.data_shape[0] + to_w = 128 + #to_w = int(round(int(w * scale_ratio) / 8)) * 8 + to_scale = (to_w, to_h) # w first for cv2 + i_t = cv2.resize(i_t, to_scale).astype(np.float32) / 127.5 - 1. + i_s = cv2.resize(i_s, to_scale).astype(np.float32) / 127.5 - 1. + i_t = np.expand_dims(i_t, axis = 0) + i_s = np.expand_dims(i_s, axis = 0) + res_list.append([i_t, i_s, (w, h), data_name]) # w first for cv2 + return res_list + diff --git a/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/examples/example/data.png b/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/examples/example/data.png new file mode 100644 index 0000000000000000000000000000000000000000..d12c358080d797566daed2f7c385bce17beb1541 Binary files /dev/null and b/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/examples/example/data.png differ diff --git a/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/examples/example/example.png b/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/examples/example/example.png new file mode 100644 index 0000000000000000000000000000000000000000..fe529fc02e5fcf50352ed931e2e2cb6cbee8e25f Binary files /dev/null and b/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/examples/example/example.png differ diff --git a/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/examples/gen_logs/.gitignore b/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/examples/gen_logs/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/examples/labels/001_i_s.png b/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/examples/labels/001_i_s.png new file mode 100644 index 0000000000000000000000000000000000000000..60d7fc57cf0e40bc73ddc5260760b3ae20e820ca Binary files /dev/null and b/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/examples/labels/001_i_s.png differ diff --git a/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/examples/labels/001_i_t.png b/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/examples/labels/001_i_t.png new file mode 100644 index 0000000000000000000000000000000000000000..5cfa6404fb9d6a489edc5b854b20d50c20fa9165 Binary files /dev/null and b/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/examples/labels/001_i_t.png differ diff --git a/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/examples/labels/002_i_s.png b/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/examples/labels/002_i_s.png new file mode 100644 index 0000000000000000000000000000000000000000..e3c02ef629fdfac88e2d60a9ab5deef80930a891 Binary files /dev/null and b/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/examples/labels/002_i_s.png differ diff --git a/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/examples/labels/002_i_t.png b/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/examples/labels/002_i_t.png new file mode 100644 index 0000000000000000000000000000000000000000..36a7472076f0888aaeb25ef6397d11dc5f9ae1d1 Binary files /dev/null and b/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/examples/labels/002_i_t.png differ diff --git a/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/examples/labels/003_i_s.png b/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/examples/labels/003_i_s.png new file mode 100644 index 0000000000000000000000000000000000000000..4ac500ed2466ef12c3315c88eba823f710d8b9e1 Binary files /dev/null and b/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/examples/labels/003_i_s.png differ diff --git a/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/examples/labels/003_i_t.png b/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/examples/labels/003_i_t.png new file mode 100644 index 0000000000000000000000000000000000000000..9e35c38e8d01f8cbaa0a8c1e33e1ded2ec68e006 Binary files /dev/null and b/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/examples/labels/003_i_t.png differ diff --git a/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/examples/labels/004_i_s.png b/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/examples/labels/004_i_s.png new file mode 100644 index 0000000000000000000000000000000000000000..c8368d5b258d94af32fdab42ca2fc10d8cea1676 Binary files /dev/null and b/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/examples/labels/004_i_s.png differ diff --git a/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/examples/labels/004_i_t.png b/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/examples/labels/004_i_t.png new file mode 100644 index 0000000000000000000000000000000000000000..14f3adb4dea83d4e7eb2f90221411cc95480c65d Binary files /dev/null and b/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/examples/labels/004_i_t.png differ diff --git a/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/examples/labels/005_i_s.png b/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/examples/labels/005_i_s.png new file mode 100644 index 0000000000000000000000000000000000000000..db976a333872c66e04860521aa08c427af690ddc Binary files /dev/null and b/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/examples/labels/005_i_s.png differ diff --git a/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/examples/labels/005_i_t.png b/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/examples/labels/005_i_t.png new file mode 100644 index 0000000000000000000000000000000000000000..0bf4afe2d6850c744e32860833f2462ca7c6e030 Binary files /dev/null and b/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/examples/labels/005_i_t.png differ diff --git a/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/examples/labels/006_i_s.png b/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/examples/labels/006_i_s.png new file mode 100644 index 0000000000000000000000000000000000000000..f9cbf20ab52a474f18b7c033098e9c7a783f60bd Binary files /dev/null and b/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/examples/labels/006_i_s.png differ diff --git a/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/examples/labels/006_i_t.png b/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/examples/labels/006_i_t.png new file mode 100644 index 0000000000000000000000000000000000000000..5da774881d6b11b69cc596231e33bfc7e82634d0 Binary files /dev/null and b/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/examples/labels/006_i_t.png differ diff --git a/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/examples/labels/007_i_s.png b/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/examples/labels/007_i_s.png new file mode 100644 index 0000000000000000000000000000000000000000..e1cc3ccc6aad5cab3c2a977d3f4bd8e2aa4f3f71 Binary files /dev/null and b/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/examples/labels/007_i_s.png differ diff --git a/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/examples/labels/007_i_t.png b/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/examples/labels/007_i_t.png new file mode 100644 index 0000000000000000000000000000000000000000..8142bff0a54f78091479a12c11565ac4acedbf34 Binary files /dev/null and b/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/examples/labels/007_i_t.png differ diff --git a/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/examples/labels/008_i_s.png b/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/examples/labels/008_i_s.png new file mode 100644 index 0000000000000000000000000000000000000000..d412256afb079c196b58f8ad559b024530401376 Binary files /dev/null and b/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/examples/labels/008_i_s.png differ diff --git a/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/examples/labels/008_i_t.png b/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/examples/labels/008_i_t.png new file mode 100644 index 0000000000000000000000000000000000000000..ac34d93a02f7ab722c22b88d744f8ff512cceb43 Binary files /dev/null and b/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/examples/labels/008_i_t.png differ diff --git a/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/examples/results/.gitignore b/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/examples/results/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/loss.py b/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/loss.py new file mode 100644 index 0000000000000000000000000000000000000000..fc9fd1fa32c78f7a1cac3a3ce27bfac647985a83 --- /dev/null +++ b/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/loss.py @@ -0,0 +1,133 @@ +""" +SRNet - Editing Text in the Wild +Definition of loss functions. +Copyright (c) 2019 Netease Youdao Information Technology Co.,Ltd. +Licensed under the GPL License (see LICENSE for details) +Written by Yu Qian +""" + +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from npu_bridge.npu_init import * + +import tensorflow as tf +import cfg + +def build_discriminator_loss(x, name = 'd_loss'): + + x_true, x_pred = tf.split(x, 2, name = name + '_split') + d_loss = -tf.reduce_mean(tf.log(tf.clip_by_value(x_true, cfg.epsilon, 1.0)) \ + + tf.log(tf.clip_by_value(1.0 - x_pred, cfg.epsilon, 1.0))) + return d_loss + +def build_dice_loss(x_t, x_o, name = 'dice_loss'): + + intersection = tf.reduce_sum(x_t * x_o, axis = [1,2,3]) + union = tf.reduce_sum(x_t, axis = [1,2,3]) + tf.reduce_sum(x_o, axis = [1,2,3]) + return 1. - tf.reduce_mean((2. * intersection + cfg.epsilon)/(union + cfg.epsilon), axis = 0) + +def build_l1_loss(x_t, x_o, name = 'l1_loss'): + + return tf.reduce_mean(tf.abs(x_t - x_o)) + +def build_l1_loss_with_mask(x_t, x_o, mask, name = 'l1_loss'): + + mask_ratio = 1. - tf.reduce_sum(mask) / tf.cast(tf.size(mask), tf.float32) + l1 = tf.abs(x_t - x_o) + return mask_ratio * tf.reduce_mean(l1 * mask) + (1. - mask_ratio) * tf.reduce_mean(l1 * (1. - mask)) + +def build_perceptual_loss(x, name = 'per_loss'): + + l = [] + for i, f in enumerate(x): + l.append(build_l1_loss(f[0], f[1], name = name + '_l1_' + str(i + 1))) + l = tf.stack(l, axis = 0, name = name + '_stack') + l = tf.reduce_sum(l, name = name + '_sum') + return l + +def build_gram_matrix(x, name = 'gram_matrix'): + + x_shape = tf.shape(x) + h, w, c = x_shape[1], x_shape[2], x_shape[3] + matrix = tf.reshape(x, shape = [-1, h * w, c]) + gram = tf.matmul(matrix, matrix, transpose_a = True) / tf.cast(h * w * c, tf.float32) + return gram + +def build_style_loss(x, name = 'style_loss'): + + l = [] + for i, f in enumerate(x): + f_shape = tf.size(f[0]) + f_norm = 1. / tf.cast(f_shape, tf.float32) + gram_true = build_gram_matrix(f[0], name = name + '_gram_true_' + str(i + 1)) + gram_pred = build_gram_matrix(f[1], name = name + '_gram_pred_' + str(i + 1)) + l.append(f_norm * (build_l1_loss(gram_true, gram_pred, name = name + '_l1_' + str(i + 1)))) + l = tf.stack(l, axis = 0, name = name + '_stack') + l = tf.reduce_sum(l, name = name + '_sum') + return l + +def build_vgg_loss(x, name = 'vgg_loss'): + + splited = [] + for i, f in enumerate(x): + splited.append(tf.split(f, 2, name = name + '_split_' + str(i + 1))) + l_per = build_perceptual_loss(splited, name = name + '_per') + l_style = build_style_loss(splited, name = name + '_style') + return l_per, l_style + +def build_gan_loss(x, name = 'gan_loss'): + + x_true, x_pred = tf.split(x, 2, name = name + '_split') + gan_loss = -tf.reduce_mean(tf.log(tf.clip_by_value(x_pred, cfg.epsilon, 1.0))) + return gan_loss + +def build_generator_loss(out_g, out_d, out_vgg, labels, name = 'g_loss'): + + o_sk, o_t, o_b, o_f, mask_t = out_g + o_db, o_df = out_d + o_vgg = out_vgg + t_sk, t_t, t_b, t_f = labels + + l_t_sk = cfg.lt_alpha * build_dice_loss(t_sk, o_sk, name = name + '_dice_loss') + l_t_l1 = build_l1_loss_with_mask(t_t, o_t, mask_t, name = name + '_lt_l1_loss') + l_t = l_t_l1 + l_t_sk + + l_b_gan = build_gan_loss(o_db, name = name + '_lb_gan_loss') + l_b_l1 = cfg.lb_beta * build_l1_loss(t_b, o_b, name = name + '_lb_l1_loss') + l_b = l_b_gan + l_b_l1 + + l_f_gan = build_gan_loss(o_df, name = name + '_lf_gan_loss') + l_f_l1 = cfg.lf_theta_1 * build_l1_loss(t_f, o_f, name = name + '_lf_l1_loss') + l_f_vgg_per, l_f_vgg_style = build_vgg_loss(o_vgg, name = name + '_lf_vgg_loss') + l_f_vgg_per = cfg.lf_theta_2 * l_f_vgg_per + l_f_vgg_style = cfg.lf_theta_3 * l_f_vgg_style + l_f = l_f_gan + l_f_l1 + l_f_vgg_per + l_f_vgg_style + + l = cfg.lt * l_t + cfg.lb * l_b + cfg.lf * l_f + return l, [l_t_sk, l_t_l1, l_b_gan, l_b_l1, l_f_gan, l_f_l1, l_f_vgg_per, l_f_vgg_style] + diff --git a/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/model.py b/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/model.py new file mode 100644 index 0000000000000000000000000000000000000000..80694c25d2bd402639aa843612729f983ebc5ba0 --- /dev/null +++ b/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/model.py @@ -0,0 +1,346 @@ +""" +SRNet - Editing Text in the Wild +The main SRNet model implementation. +Copyright (c) 2019 Netease Youdao Information Technology Co.,Ltd. +Licensed under the GPL License (see LICENSE for details) +Written by Yu Qian +""" + +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from npu_bridge.npu_init import * + +import os +import cv2 +import numpy as np +import tensorflow as tf +from loss import build_discriminator_loss, build_generator_loss +import cfg + +class SRNet(): + + def __init__(self, shape = [224, 224], name = ''): + + self.name = name + self.cnum = 32 + self.graph = tf.Graph() + with self.graph.as_default(): + self.i_t = tf.placeholder(dtype = tf.float32, shape = [None] + shape + [3]) + self.i_s = tf.placeholder(dtype = tf.float32, shape = [None] + shape + [3]) + self.t_sk = tf.placeholder(dtype = tf.float32, shape = [None] + shape + [1]) + self.t_t = tf.placeholder(dtype = tf.float32, shape = [None] + shape + [3]) + self.t_b = tf.placeholder(dtype = tf.float32, shape = [None] + shape + [3]) + self.t_f = tf.placeholder(dtype = tf.float32, shape = [None] + shape + [3]) + self.mask_t = tf.placeholder(dtype = tf.float32, shape = [None] + shape + [1]) + self.global_step = tf.Variable(tf.constant(0)) + self.build_whole_net_with_loss() + self.build_optimizer() + self.build_summary_op() + + def _res_block(self, x, activation = tf.nn.leaky_relu, padding = 'SAME', name = 'res_block'): + + cnum = x.get_shape().as_list()[-1] + xin = x + x = tf.layers.conv2d(x, cnum // 4, kernel_size = 1, strides = 1, activation = activation, padding = padding, name = name + '_conv1') + x = tf.layers.conv2d(x, cnum // 4, kernel_size = 3, strides = 1, activation = activation, padding = padding, name = name + '_conv2') + x = tf.layers.conv2d(x, cnum, kernel_size = 1, strides = 1, activation = None, padding = padding, name = name + '_conv3') + x = tf.add(xin, x, name = name + '_add') + x = tf.layers.batch_normalization(x, name = name + '_bn') + x = activation(x, name = name + '_out') + return x + + def _conv_bn_relu(self, x, cnum = None, activation = tf.nn.leaky_relu, padding = 'SAME', name = 'conv_bn_relu'): + + cnum = x.get_shape().as_list()[-1] if cnum is None else cnum + x = tf.layers.conv2d(x, cnum, kernel_size = 3, strides = 1, activation = None, padding = padding, name = name + '_conv') + x = tf.layers.batch_normalization(x, name = name + '_bn') + x = activation(x, name = name + '_out') + return x + + def build_res_net(self, x, activation = tf.nn.leaky_relu, padding = 'SAME', name = 'res_net'): + + x = self._res_block(x, activation = activation, padding = padding, name = name + '_block1') + x = self._res_block(x, activation = activation, padding = padding, name = name + '_block2') + x = self._res_block(x, activation = activation, padding = padding, name = name + '_block3') + x = self._res_block(x, activation = activation, padding = padding, name = name + '_block4') + return x + + def build_encoder_net(self, x, activation = tf.nn.leaky_relu, padding = 'SAME', name = 'encoder_net', get_feature_map = False): + + x = self._conv_bn_relu(x, self.cnum, name = name + '_conv1_1') + x = self._conv_bn_relu(x, self.cnum, name = name + '_conv1_2') + + x = tf.layers.conv2d(x, 2 * self.cnum, kernel_size = 3, strides = 2, activation = activation, padding = padding, name = name + '_pool1') + x = self._conv_bn_relu(x, 2 * self.cnum, name = name + '_conv2_1') + x = self._conv_bn_relu(x, 2 * self.cnum, name = name + '_conv2_2') + f1 = x + + x = tf.layers.conv2d(x, 4 * self.cnum, kernel_size = 3, strides = 2, activation = activation, padding = padding, name = name + '_pool2') + x = self._conv_bn_relu(x, 4 * self.cnum, name = name + '_conv3_1') + x = self._conv_bn_relu(x, 4 * self.cnum, name = name + '_conv3_2') + f2 = x + + x = tf.layers.conv2d(x, 8 * self.cnum, kernel_size = 3, strides = 2, activation = activation, padding = padding, name = name + '_pool3') + x = self._conv_bn_relu(x, 8 * self.cnum, name = name + '_conv4_1') + x = self._conv_bn_relu(x, 8 * self.cnum, name = name + '_conv4_2') + if get_feature_map: + return x, [f2, f1] + else: + return x + + def build_decoder_net(self, x, fuse = None, activation = tf.nn.leaky_relu, padding = 'SAME', name = 'decoder_net', get_feature_map = False): + + if fuse and fuse[0] is not None: + x = tf.concat([x, fuse[0]], axis = -1, name = name + '_fuse1') + x = self._conv_bn_relu(x, 8 * self.cnum, name = name + '_conv1_1') + x = self._conv_bn_relu(x, 8 * self.cnum, name = name + '_conv1_2') + f1 = x + + x = tf.layers.Conv2DTranspose(4 * self.cnum, kernel_size = 3, strides = 2, activation = activation, padding = padding, name = name + '_deconv1')(x) + if fuse and fuse[1] is not None: + x = tf.concat([x, fuse[1]], axis = -1, name = name + '_fuse2') + x = self._conv_bn_relu(x, 4 * self.cnum, name = name + '_conv2_1') + x = self._conv_bn_relu(x, 4 * self.cnum, name = name + '_conv2_2') + f2 = x + + x = x = tf.layers.Conv2DTranspose(2 * self.cnum, kernel_size = 3, strides = 2, activation = activation, padding = padding, name = name + '_deconv2')(x) + if fuse and fuse[2] is not None: + x = tf.concat([x, fuse[2]], axis = -1, name = name + '_fuse3') + x = self._conv_bn_relu(x, 2 * self.cnum, name = name + '_conv3_1') + x = self._conv_bn_relu(x, 2 * self.cnum, name = name + '_conv3_2') + f3 = x + + x = x = tf.layers.Conv2DTranspose(self.cnum, kernel_size = 3, strides = 2, activation = activation, padding = padding, name = name + '_deconv3')(x) + x = self._conv_bn_relu(x, self.cnum, name = name + '_conv4_1') + x = self._conv_bn_relu(x, self.cnum, name = name + '_conv4_2') + if get_feature_map: + return x, [f1, f2, f3] + else: + return x + + def build_text_conversion_net(self, x_t, x_s, padding = 'SAME', name = 'tcn'): + + x_t = self.build_encoder_net(x_t, name = name + '_t_encoder') + x_t = self.build_res_net(x_t, name = name + '_t_res') + + x_s = self.build_encoder_net(x_s, name = name + '_s_encoder') + x_s = self.build_res_net(x_s, name = name + '_s_res') + + x = tf.concat([x_t, x_s], axis = -1, name = name + '_concat1') + + y_sk = self.build_decoder_net(x, name = name + '_sk_decoder') + y_sk_out = tf.layers.conv2d(y_sk, 1, kernel_size = 3, strides = 1, activation = 'sigmoid', padding = padding, name = name + '_sk_out') + + y_t = self.build_decoder_net(x, name = name + '_t_decoder') + y_t = tf.concat([y_sk, y_t], axis = -1, name = name + '_concat2') + y_t = self._conv_bn_relu(y_t, name = name + '_t_cbr') + y_t_out = tf.layers.conv2d(y_t, 3, kernel_size = 3, strides = 1, activation = 'tanh', padding = padding, name = name + '_t_out') + return y_sk_out, y_t_out + + def build_background_inpainting_net(self, x, padding = 'SAME', name = 'bin'): + + x, f_encoder = self.build_encoder_net(x, name = name + '_encoder', get_feature_map = True) + x = self.build_res_net(x, name = name + '_res') + x, fuse = self.build_decoder_net(x, fuse = [None] + f_encoder, name = name + '_decoder', get_feature_map = True) + x = tf.layers.conv2d(x, 3, kernel_size = 3, strides = 1, activation = 'tanh', padding = padding, name = name + '_out') + return x, fuse + + def build_fusion_net(self, x, fuse, padding = 'SAME', name = 'fn'): + + x = self.build_encoder_net(x, name = name + '_encoder') + x = self.build_res_net(x, name = name + '_res') + x = self.build_decoder_net(x, fuse, name = name + '_decoder') + x = tf.layers.conv2d(x, 3, kernel_size = 3, strides = 1, activation = 'tanh', padding = padding, name = name + '_out') + return x + + def build_discriminator(self, x, activation = tf.nn.leaky_relu, padding = 'SAME', name = 'discriminator'): + + with tf.variable_scope('D'): + x = tf.layers.conv2d(x, 64, kernel_size = 3, strides = 2, activation = activation, padding = padding, name = name + '_conv1') + x = tf.layers.conv2d(x, 128, kernel_size = 3, strides = 2, activation = None, padding = padding, name = name + '_conv2') + x = tf.layers.batch_normalization(x, name = name + '_conv2_bn') + x = activation(x, name = name + '_conv2_activation') + x = tf.layers.conv2d(x, 256, kernel_size = 3, strides = 2, activation = None, padding = padding, name = name + '_conv3') + x = tf.layers.batch_normalization(x, name = name + '_conv3_bn') + x = activation(x, name = name + '_conv3_activation') + x = tf.layers.conv2d(x, 512, kernel_size = 3, strides = 2, activation = None, padding = padding, name = name + '_conv4') + x = tf.layers.batch_normalization(x, name = name + '_conv4_bn') + x = activation(x, name = name + '_conv4_activation') + x = tf.layers.conv2d(x, 1, kernel_size = 3, strides = 1, activation = None, padding = padding, name = name + '_conv5') + x = tf.layers.batch_normalization(x, name = name + '_conv5_bn') + x = tf.nn.sigmoid(x, name = '_out') + return x + + def build_generator(self, inputs, name = 'generator'): + + i_t, i_s = inputs + with tf.variable_scope('G'): + o_sk, o_t = self.build_text_conversion_net(i_t, i_s, name = name + '_tcn') + o_b, fuse = self.build_background_inpainting_net(i_s, name = name + '_bin') + o_f = self.build_fusion_net(o_t, fuse, name = name + '_fn') + return o_sk, o_t, o_b, o_f + + def build_whole_net_with_loss(self): + + i_t, i_s = self.i_t, self.i_s + t_sk, t_t, t_b, t_f, mask_t = self.t_sk, self.t_t, self.t_b, self.t_f, self.mask_t + inputs = [i_t, i_s] + labels = [t_sk, t_t, t_b, t_f] + + o_sk, o_t, o_b, o_f = self.build_generator(inputs) + self.o_sk = tf.identity(o_sk, name = 'o_sk') + self.o_t = tf.identity(o_t, name = 'o_t') + self.o_b = tf.identity(o_b, name = 'o_b') + self.o_f = tf.identity(o_f, name = 'o_f') + + i_db_true = tf.concat([t_b, i_s], axis = -1, name = 'db_true_concat') + i_db_pred = tf.concat([o_b, i_s], axis = -1, name = 'db_pred_concat') + i_db = tf.concat([i_db_true, i_db_pred], axis = 0, name = 'db_concat') + + i_df_true = tf.concat([t_f, i_t], axis = -1, name = 'df_true_concat') + i_df_pred = tf.concat([o_f, i_t], axis = -1, name = 'df_pred_concat') + i_df = tf.concat([i_df_true, i_df_pred], axis = 0, name = 'df_concat') + + o_db = self.build_discriminator(i_db, name = 'db') + o_df = self.build_discriminator(i_df, name = 'df') + + i_vgg = tf.concat([t_f, o_f], axis = 0, name = 'vgg_concat') + + vgg_graph_def = tf.GraphDef() + vgg_graph_path = cfg.vgg19_weights + with open(vgg_graph_path, 'rb') as f: + vgg_graph_def.ParseFromString(f.read()) + _ = tf.import_graph_def(vgg_graph_def, input_map = {"inputs:0": i_vgg}) + with tf.Session(config=npu_config_proto()) as sess: + o_vgg_1 = sess.graph.get_tensor_by_name("import/block1_conv1/Relu:0") + o_vgg_2 = sess.graph.get_tensor_by_name("import/block2_conv1/Relu:0") + o_vgg_3 = sess.graph.get_tensor_by_name("import/block3_conv1/Relu:0") + o_vgg_4 = sess.graph.get_tensor_by_name("import/block4_conv1/Relu:0") + o_vgg_5 = sess.graph.get_tensor_by_name("import/block5_conv1/Relu:0") + + out_g = [o_sk, o_t, o_b, o_f, mask_t] + out_d = [o_db, o_df] + out_vgg = [o_vgg_1, o_vgg_2, o_vgg_3, o_vgg_4, o_vgg_5] + + db_loss = build_discriminator_loss(o_db, name = 'db_loss') + df_loss = build_discriminator_loss(o_df, name = 'df_loss') + self.d_loss_detail = [db_loss, df_loss] + self.d_loss = tf.add(db_loss, df_loss, name = 'd_loss') + self.g_loss, self.g_loss_detail = build_generator_loss(out_g, out_d, out_vgg, labels, name = 'g_loss') + + def build_optimizer(self): + + self.learning_rate = tf.train.exponential_decay(learning_rate = cfg.learning_rate, global_step = self.global_step, + decay_steps = cfg.decay_steps, decay_rate = cfg.decay_rate, staircase = cfg.staircase) + d_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope='D') + with tf.control_dependencies(d_update_ops): + self.d_train_step = tf.train.AdamOptimizer(self.learning_rate, cfg.beta1, cfg.beta2).minimize(self.d_loss, + var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope = 'D')) + g_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope='G') + with tf.control_dependencies(g_update_ops): + self.g_train_step = tf.train.AdamOptimizer(self.learning_rate, cfg.beta1, cfg.beta2).minimize(self.g_loss, + var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope = 'G')) + + def build_summary_op(self): + + d_summary_loss = tf.summary.scalar("loss", self.d_loss) + d_summary_loss_db = tf.summary.scalar("l_db", self.d_loss_detail[0]) + d_summary_loss_df = tf.summary.scalar("l_df", self.d_loss_detail[1]) + + g_summary_loss = tf.summary.scalar("loss", self.g_loss) + g_summary_loss_t_sk = tf.summary.scalar("l_t_sk", self.g_loss_detail[0]) + g_summary_loss_t_l1 = tf.summary.scalar("l_t_l1", self.g_loss_detail[1]) + g_summary_loss_b_gan = tf.summary.scalar("l_b_gan", self.g_loss_detail[2]) + g_summary_loss_b_l1 = tf.summary.scalar("l_b_l1", self.g_loss_detail[3]) + g_summary_loss_f_gan = tf.summary.scalar("l_f_gan", self.g_loss_detail[4]) + g_summary_loss_f_l1 = tf.summary.scalar("l_f_l1", self.g_loss_detail[5]) + g_summary_loss_f_vgg_per = tf.summary.scalar("l_f_vgg_per", self.g_loss_detail[6]) + g_summary_loss_f_vgg_style = tf.summary.scalar("l_f_vgg_style", self.g_loss_detail[7]) + + self.d_summary_op = tf.summary.merge([d_summary_loss, d_summary_loss_db, d_summary_loss_df]) + self.g_summary_op = tf.summary.merge([g_summary_loss, g_summary_loss_t_sk, g_summary_loss_t_l1, + g_summary_loss_b_gan, g_summary_loss_b_l1, g_summary_loss_f_gan, + g_summary_loss_f_l1, g_summary_loss_f_vgg_per, g_summary_loss_f_vgg_style]) + + self.d_writer = tf.summary.FileWriter(os.path.join(cfg.tensorboard_dir, self.name, 'descriminator'), self.graph) + self.g_writer = tf.summary.FileWriter(os.path.join(cfg.tensorboard_dir, self.name, 'generator'), self.graph) + + def train_step(self, sess, global_step, i_t, i_s, t_sk, t_t, t_b, t_f, mask_t): + + feed_dict = { + self.i_t: i_t, + self.i_s: i_s, + self.t_sk: t_sk, + self.t_t: t_t, + self.t_b: t_b, + self.t_f: t_f, + self.mask_t: mask_t, + self.global_step: global_step + } + + with self.graph.as_default(): + _, d_loss, d_log = sess.run([self.d_train_step, self.d_loss, self.d_summary_op], feed_dict = feed_dict) + _, g_loss, g_log = sess.run([self.g_train_step, self.g_loss, self.g_summary_op], feed_dict = feed_dict) + return d_loss, g_loss, d_log, g_log + + def predict(self, sess, i_t, i_s, to_shape = None): + + assert i_t.shape == i_s.shape and i_t.dtype == i_s.dtype + assert len(i_t.shape) == 3 or (len(i_t.shape) == 4 and to_shape is not None \ + and i_t.shape[1] == cfg.data_shape[0] \ + and i_t.shape[2] % 8 == 0 \ + and i_t.dtype == np.float32) + assert i_t.dtype == np.uint8 \ + or (i_t.dtype == np.float32 and np.min(i_t) >= -1 and np.max(i_t) <= 1) + + # process raw image, len(i_t.shape) == 3 + if len(i_t.shape) == 3: + if not to_shape: + h, w = i_t.shape[:2] + to_shape = (w, h) # w first for cv2 + if i_t.shape[0] != cfg.data_shape[0]: + ratio = cfg.data_shape[0] / h + predict_h = cfg.data_shape[0] + predict_w = round(int(w * ratio) / 8) * 8 + predict_scale = (predict_w, predict_h) # w first for cv2 + i_t = cv2.resize(i_t, predict_scale) + i_s = cv2.resize(i_s, predict_scale) + if i_t.dtype == np.uint8: + i_t = i_t.astype(np.float32) / 127.5 - 1. + i_s = i_s.astype(np.float32) / 127.5 - 1. + i_t = np.expand_dims(i_t, axis = 0) + i_s = np.expand_dims(i_s, axis = 0) + + result = sess.run([self.o_sk, self.o_t, self.o_b, self.o_f], feed_dict = {self.i_t: i_t, self.i_s: i_s}) + o_sk, o_t, o_b, o_f = result + o_sk = cv2.resize((o_sk[0] * 255.).astype(np.uint8), to_shape, interpolation=cv2.INTER_NEAREST) + o_t = cv2.resize(((o_t[0] + 1.) * 127.5).astype(np.uint8), to_shape) + o_b = cv2.resize(((o_b[0] + 1.) * 127.5).astype(np.uint8), to_shape) + o_f = cv2.resize(((o_f[0] + 1.) * 127.5).astype(np.uint8), to_shape) + return [o_sk, o_t, o_b, o_f] + diff --git a/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/model_logs/vgg19/.keep b/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/model_logs/vgg19/.keep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/modelzoo_level.txt b/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/modelzoo_level.txt new file mode 100644 index 0000000000000000000000000000000000000000..2a8d1227b23f1e1a9664ab221f934ff27258b908 --- /dev/null +++ b/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/modelzoo_level.txt @@ -0,0 +1,6 @@ +FuncStatus:OK +GPUStatus:OK +NPUMigrationStatus:OK +PrecisionStatus:OK +AutoTune:OK +PerfStatus:POK diff --git a/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/predict.py b/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/predict.py new file mode 100644 index 0000000000000000000000000000000000000000..995119cf791f68057432df8d8fa7c9253e594665 --- /dev/null +++ b/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/predict.py @@ -0,0 +1,101 @@ +""" +SRNet - Editing Text in the Wild +Data prediction. +Copyright (c) 2019 Netease Youdao Information Technology Co.,Ltd. +Licensed under the GPL License (see LICENSE for details) +Written by Yu Qian +""" + +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from npu_bridge.npu_init import * + +import tensorflow as tf +from model import SRNet +import numpy as np +import os +import cfg +from utils import * +from datagen import srnet_datagen, get_input_data +import argparse + +def main(): + + parser = argparse.ArgumentParser() + parser.add_argument('--gpu', help = 'gpu id', default = 0) + parser.add_argument('--i_s', help = 'input original text patch') + parser.add_argument('--i_t', help = 'input standard text patch') + parser.add_argument('--input_dir', help = 'Directory containing xxx_i_s and xxx_i_t with same prefix', + default = cfg.predict_data_dir) + parser.add_argument('--save_dir', help = 'Directory to save result', default = cfg.predict_result_dir) + parser.add_argument('--save_mode', help = '1 to save all and 0 to save onle o_f', type = int, default = 0) + parser.add_argument('--checkpoint', help = 'tensorflow ckpt', default = cfg.predict_ckpt_path) + args = parser.parse_args() + + assert (args.input_dir is not None and args.i_s is None and args.i_t is None) \ + or (args.input_dir is None and args.i_s is not None and args.i_t is not None) + assert args.save_dir is not None + assert args.save_mode == 0 or args.save_mode == 1 + assert args.checkpoint is not None + + # gpu + os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu) + + # define model + print_log('model compiling start.', content_color = PrintColor['yellow']) + model = SRNet(shape = cfg.data_shape, name = 'predict') + print_log('model compiled.', content_color = PrintColor['yellow']) + + with model.graph.as_default(): + with tf.Session(config=npu_config_proto()) as sess: + saver = tf.train.Saver(tf.global_variables()) + + # load pretrained weights + print_log('weight loading start.', content_color = PrintColor['yellow']) + saver.restore(sess, args.checkpoint) + print_log('weight loaded.', content_color = PrintColor['yellow']) + + # predict + print_log('predicting start.', content_color = PrintColor['yellow']) + if args.input_dir is None: + i_s = cv2.imread(args.i_s) + i_t = cv2.imread(args.i_t) + o_sk, o_t, o_b, o_f = model.predict(sess, i_t, i_s) + + cv2.imwrite(os.path.join(args.save_dir, 'result.png'), o_f) + if args.save_mode == 1: + cv2.imwrite(os.path.join(args.save_dir, 'result_sk.png'), o_sk) + cv2.imwrite(os.path.join(args.save_dir, 'result_t.png'), o_t) + cv2.imwrite(os.path.join(args.save_dir, 'result_b.png'), o_b) + else: + predict_data_list(model, sess, args.save_dir, get_input_data(args.input_dir), mode = args.save_mode) + print_log('predicting finished.', content_color = PrintColor['yellow']) + +if __name__ == '__main__': + main() + diff --git a/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/readme.md b/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/readme.md new file mode 100644 index 0000000000000000000000000000000000000000..b0ef0b2870a8f3af8e0acef9fede8f238dbc517f --- /dev/null +++ b/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/readme.md @@ -0,0 +1,94 @@ +# SRNet - the implementation of paper "Editing Text in the Wild" +*** + +## 基本信息 +#### 发布者(Publisher):Huawei +#### 应用领域(Application Domain): Image Generator +#### 版本(Version):1.0 +#### 修改时间(Modified) :2022.3.4 +#### 大小(size): 78.5M左右 +#### 框架(Framework):TensorFlow 1.15.0 +#### 模型格式(Model Format): pb +#### 处理器(Processor): 昇腾910 +#### 描述(Description): 基于TensorFlow框架的文本图片风格迁移网络训练代码 +*** + +## 概述 +此模型的来源是ACM Multimedia 2019上的一篇文章“Editing Text in the Wild”. +这篇文章主要设计了一个模块化的DNN,完成了对替换给定图片之中文本内容的任务。在替换过程之中,源文本的文字大小、字体、颜色、朝向等细节将会被保留。DNN的各个模块分别完成整个任务的一部分,最后由一个合成模块将其余模块的输出融合在一起,生成目标图片。 + +本实现是基于[有道ai团队](https://github.com/youdao-ai)对原论文的一个[开源实现](https://github.com/youdao-ai/SRNet)之上进行NPU适配得到的结果。 + +参考论文: +* [Editing Text in the Wild](https://dl.acm.org/doi/pdf/10.1145/3343031.3350929) + +论文的主要亮点是: +* 以往的图片文本迁移工作都是以单字母或者单字为单位的,此文之中实现的方法是以单词为单位的 +* 网络是采用模块化设计的,但是训练并不是一个模块一个模块单独进行训练,而是将整个网络进行端到端的训练 +*** + +## 训练 +### 数据集获取 +按照原文,训练此模型,需要提供的数据集包含两部分,即输入图片i\_s, i\_t以及标记图片t\_sk, t\_t, t\_b, t\_f。 +在此版本的实现之中,新增了标记项mask\_t,代表原图片之中文本部分的二进制掩码。加入此项的原因是,此项会使得模型的训练收敛的更快。 +由于对于现实之中的图片,无法模型训练所需要提供的标记部分。因此,训练模型的数据集是人工合成的数据集。 +合成数据集需要提供纯净无文字的背景图片以及一些字体文件(.tff文件),在完成准备工作之中,可以使用[SRNet_datagenerator](https://github.com/youdao-ai/SRNet-Datagen)来合成数据集。 + +### 模型训练 +由于本实现针对华为的Modelarts平台进行了适配,因此训练部分将分成NPU训练(基于modelarts平台)以及GPU训练两部分。 +测试所使用的环境为: +* numpy 1.21.2 +* opencv-python 4.5.3 +* opencv-contrib-python 4.5.4.58 +* python 3.7 +* tensorflow 1.15 + +由于Gitee平台对单文件大小的限制,训练过程之中采用的预训练模型无法直接上传,请通过OBS服务器[下载](https://cann-nju-srnet.obs.cn-north-4.myhuaweicloud.com:443/vgg19_weights_tf_dim_ordering_tf_kernels_notop.pb?AccessKeyId=DNOFMBDXF3DTPNGYLYN7&Expires=1679819348&Signature=xZTguiVqpKyGuGzko/AI8fu0ilM%3D) +进行训练时,请将预训练模型保存在model_logs/vgg19目录下 + +训练依托于华为的Modelarts平台。 +首先,下载并安装pycharm以及[华为modelart插件](https://modelarts-pycharm-plugin.obs.cn-north-1.myhuaweicloud.com/Pycharm-ToolKit-3.0.zip),配置密钥 +然后,将准备好的数据集上传到华为OBS服务器上,并在modelarts插件之中进行如下配置 +* Fruquently-used + * AI Engine: Ascend-Powered-Engine, tensorflow_1.15-xxx + * Boot File Path: 仓库中npu_version/cfg.py对应的本地文件路径 + * Code Directory: 仓库中npu-version对应的本地文件夹路径 + * OBS Path: 训练时会自动将代码上传至OBS服务器,OBS Path对应OBS服务器中代码自动上传的位置 + * Data Path in OBS: 数据集在OBS服务器之中存放的位置 +然后即可进行训练,最终的训练结果需要拷贝回OBS服务器,通过更改train.py第101行之中的参数dst_url来指定拷贝的目标路径。 + +## 测试 +### 测试流程 +在完成对模型的训练之后,可以采用训练得到的结果进行预测。 +预测的方式如下: +```console +$ python3 predict.py --i_s xxx --i_t xxx --save_dir xxx --checkpoint xxx +``` +或是预测一整个文件夹之中的数据: +```console +$ python3 predict.py --input_dir xxx --save_dir xxx --checkpoint xxx +``` +此时,需要保证input\_dir之中的i\_s和i\_t有相同的前缀,且后缀分别为"\_i\_s"以及"\_i\_t"。 +或者可以直接修改cfg.py之中的路径信息,然后直接调用predict.py +```console +$ python3 predict.py +``` + +### 训练结果对比 +模型以同样的数据集、参数在GPU和NPU平台上进行了训练。 +GPU平台使用的GPU为:NVIDIA T4, 1 * 16G +NPU平台使用的NPU为:Ascend 910 +#### 时间(性能)对比 +GPU平台上每50轮训练平均耗时为30s +NPU平台上每50轮训练平均耗时约59s +#### 效果对比 +GPU和NPU上训练效果对比如下图: +![训练效果](Figure_1.png) +不难看出,二者的训练效果相当,且均达到了[复现团队训练时的效果](https://github.com/youdao-ai/SRNet/issues/11)。 +*** + +## 数据集与OBS桶 +训练过程之中所采用的数据集为:[数据集](https://e-share.obs-website.cn-north-1.myhuaweicloud.com?token=F2eF/Z8D+mFBW+7kVK2hix+8Sp1M7bu7F8ghXFWNcp9uxF9jNxIB9bDWCokG6mGrFNRAGp+hhntj2rwzM21E37Ky0dZSgxFetwTBTUL+RhE+OB7BVR2HD5rZrDdeUaH6gejEiAArg7yvDQ087n9uWxH2URogbKlWQLmw7xBZYmTXEenFva32stWa2uyAxPrxmh9dLma7LSj5bqDQFgbvWsWsGVTWQi5fvMd/nqsi+oj3/Ex4KiCCNHYd0/rbOjb1Kk4mT7zm8C6sblxB24o9XnpTYMG+7j0HuClV2ecyM/YDN8FuE7ulj7rwbyMn48rUFg+KuTbpKHq9QDnGipDk9MViMuYvHip7Ngm+0oaapyQTFG1yZB5OjqziH5mnkkb7dihsYwY91uIZ0NOJz3b9AYZ/50f9gv/TPaiLmhTk90acdcp8VxXMdgqWM0oIPWHCGq8Y3C/gLthjsqABjGalW7YdHYl2RNVylcIbSWEa6hSzqf7gCb8/qAeC3urdxBSJM+Ww17WEIjAeuDosgbXll1ZMSwaj/jQuK81EAh38TJQAi3Aqyqn3+ljFIxjINB36) +数据集的Access Code为:xx11yy +训练采用的OBS桶的地址为:[OBS桶](obs://cann-nju-srnet/) + diff --git a/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/train.py b/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/train.py new file mode 100644 index 0000000000000000000000000000000000000000..e3ff3d842a2eb79ef8be576d1b905c00d8ff573c --- /dev/null +++ b/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/train.py @@ -0,0 +1,133 @@ +""" +SRNet - Editing Text in the Wild +Model training. +Copyright (c) 2019 Netease Youdao Information Technology Co.,Ltd. +Licensed under the GPL License (see LICENSE for details) +Written by Yu Qian +""" + +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import moxing as mox + +from npu_bridge.npu_init import * + +import tensorflow as tf +from model import SRNet +import numpy as np +import os +import cfg +from utils import * +from datagen import srnet_datagen, get_input_data + +from tensorflow.core.protobuf.rewriter_config_pb2 import RewriterConfig + +def main(): + # define train_name + if not cfg.train_name: + train_name = get_train_name() + else: + train_name = cfg.train_name + + # define model + print_log('model compiling start.', content_color = PrintColor['yellow']) + model = SRNet(shape = cfg.data_shape, name = train_name) + print_log('model compiled.', content_color = PrintColor['yellow']) + + # define data generator + #datagen()之中包含yield,所以不会正真执行,而是返回一个生成器(迭代器) + gen = srnet_datagen() + + with model.graph.as_default(): + init = tf.global_variables_initializer() + trainCfg = tf.ConfigProto() + custom_op = trainCfg.graph_options.rewrite_options.custom_optimizers.add() + custom_op.name = "NpuOptimizer" + trainCfg.graph_options.rewrite_options.remapping = RewriterConfig.OFF + + #trainCfg.graph_options.rewrite_options.memory_optimization = RewriterConfig.OFF + #custom_op.parameter_map["dynamic_input"].b = True + + #custom_op.parameter_map["dynamic_graph_execute_mode"].s = tf.compat.as_bytes("dynamic_execute") + #custom_op.parameter_map["dynamic_inputs_shape_range"].s = tf.compat.as_bytes("data:[-1, 224, 224, 3], [-1, 224, 224, 3], [-1, 224, 224, 1], [-1, 224, 224, 3], [-1, 224, 224, 3], [-1, 224, 224, 3], [-1, 224, 224, 1]") + + + with tf.Session(config=npu_config_proto()) as sess: + saver = tf.train.Saver(tf.global_variables(), max_to_keep = 100) + + # load pretrained weights or initialize variables + if cfg.pretrained_ckpt_path: + print_log('weight loading start.', content_color = PrintColor['yellow']) + saver.restore(sess, cfg.pretrained_ckpt_path) + print_log('weight loaded.', content_color = PrintColor['yellow']) + else: + print_log('weight initialize start.', content_color = PrintColor['yellow']) + sess.run(init) + print_log('weight initialized.', content_color = PrintColor['yellow']) + + # train + print_log('training start.', content_color = PrintColor['yellow']) + for step in range(cfg.max_iter): + global_step = step + 1 + + # train and get loss + d_loss, g_loss, d_log, g_log = model.train_step(sess, global_step, *next(gen)) + + # show loss + if global_step % cfg.show_loss_interval == 0 or step == 0: + print_log ("step: {:>6d} d_loss: {:>3.5f} g_loss: {:>3.5f}".format(global_step, d_loss, g_loss)) + + # write tensorboard + if global_step % cfg.write_log_interval == 0: + write_summary(model.d_writer, model.g_writer, d_log, g_log, global_step) + + # gen example + if global_step % cfg.gen_example_interval == 0: + savedir = os.path.join(cfg.example_result_dir, train_name, 'iter-' + str(global_step).zfill(len(str(cfg.max_iter)))) + predict_data_list(model, sess, savedir, get_input_data()) + print_log ("example generated in dir {}".format(savedir), content_color = PrintColor['green']) + + # save checkpoint + if global_step % cfg.save_ckpt_interval == 0: + savedir = os.path.join(cfg.checkpoint_savedir, train_name, 'iter') + if not os.path.exists(savedir): + os.makedirs(savedir) + save_checkpoint(sess, saver, savedir, global_step) + print_log ("checkpoint saved in dir {}".format(savedir), content_color = PrintColor['green']) + + print_log('training finished.', content_color = PrintColor['yellow']) + pb_savepath = os.path.join(cfg.checkpoint_savedir, train_name, 'final.pb') + save_pb(sess, pb_savepath, ['o_sk', 'o_t', 'o_b', 'o_f']) + print_log('pb model saved in dir {}'.format(pb_savepath), content_color = PrintColor['green']) + + mox.file.copy_parallel(src_url = r'\cache\out', dst_url = r'obs:\\cann-nju-srnet\trainout') + +if __name__ == '__main__': + main() + diff --git a/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/utils.py b/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..54e6fa7c4e44cf807d2169f76c75d46ffabfaad0 --- /dev/null +++ b/TensorFlow/contrib/graph/SRNET_ID1089_for_TensorFlow/utils.py @@ -0,0 +1,127 @@ +""" +SRNet - Editing Text in the Wild +Common utility functions and classes. +Copyright (c) 2019 Netease Youdao Information Technology Co.,Ltd. +Licensed under the GPL License (see LICENSE for details) +Written by Yu Qian +""" + +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from npu_bridge.npu_init import * + +import tensorflow as tf +from tensorflow.python.framework import graph_util +import os +import cv2 +from datetime import datetime + +PrintColor = { + 'black': 30, + 'red': 31, + 'green': 32, + 'yellow': 33, + 'blue': 34, + 'amaranth': 35, + 'ultramarine': 36, + 'white': 37 +} + +PrintStyle = { + 'default': 0, + 'highlight': 1, + 'underline': 4, + 'flicker': 5, + 'inverse': 7, + 'invisible': 8 +} + +def get_train_name(): + + # get current time for train name + return datetime.now().strftime('%Y%m%d%H%M%S') + +def print_log(s, time_style = PrintStyle['default'], time_color = PrintColor['blue'], + content_style = PrintStyle['default'], content_color = PrintColor['white']): + + # colorful print s with time log + cur_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3] + log = '\033[{};{}m[{}]\033[0m \033[{};{}m{}\033[0m'.format \ + (time_style, time_color, cur_time, content_style, content_color, s) + print (log) + +def print_nodes(graph): + + # print all nodes of the graph + nodes = [n.name for n in graph.as_graph_def().node] + for node in nodes: + print (node) + +def write_summary(d_writer, g_writer, d_log, g_log, global_step): + + # write summaries for tensorboard + d_writer.add_summary(d_log, global_step) + g_writer.add_summary(g_log, global_step) + +def save_result(save_dir, result, name, mode): + + # save output images + o_sk, o_t, o_b, o_f = result + if not os.path.exists(save_dir): + os.makedirs(save_dir) + cv2.imwrite(os.path.join(save_dir, name + 'o_f.png'), o_f, [int(cv2.IMWRITE_PNG_COMPRESSION), 0]) + if mode == 1: + cv2.imwrite(os.path.join(save_dir, name + 'o_sk.png'), o_sk, [int(cv2.IMWRITE_PNG_COMPRESSION), 0]) + cv2.imwrite(os.path.join(save_dir, name + 'o_t.png'), o_t, [int(cv2.IMWRITE_PNG_COMPRESSION), 0]) + cv2.imwrite(os.path.join(save_dir, name + 'o_b.png'), o_b, [int(cv2.IMWRITE_PNG_COMPRESSION), 0]) + +def predict_data_list(model, sess, save_dir, input_data_list, mode = 1): + + # predict output images and save them + for data in input_data_list: + i_t, i_s, ori_shape, data_name = data + result = model.predict(sess, i_t, i_s, ori_shape) + save_result(save_dir, result, data_name, mode = mode) + +def save_checkpoint(sess, saver, save_dir, global_step): + + # save tensorflow ckpt files + saver.save(sess, save_dir, global_step = global_step) + +def save_pb(sess, save_path, outputs = ['o_sk', 'o_t', 'o_b', 'o_f']): + + # save tensorflow pb model + save_dir = os.path.split(save_path)[0] + if not os.path.exists(save_dir): + os.makedirs(save_dir) + constant_graph = graph_util.convert_variables_to_constants(sess, sess.graph_def, outputs) + with tf.gfile.FastGFile(save_path, mode='wb') as f: + f.write(constant_graph.SerializeToString()) + +