From a57fa25e04d2a271bbfce084d9dd4ac09ed6f36e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=92=92=E5=BE=AE=E7=AC=91?= <hitleepong@163.com>
Date: Wed, 18 May 2022 10:32:13 +0000
Subject: [PATCH 1/3] update
 TensorFlow/contrib/cv/CascadeNet_ID2121_for_TensorFlow/README.md.

---
 .../contrib/cv/CascadeNet_ID2121_for_TensorFlow/README.md    | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/TensorFlow/contrib/cv/CascadeNet_ID2121_for_TensorFlow/README.md b/TensorFlow/contrib/cv/CascadeNet_ID2121_for_TensorFlow/README.md
index 6c6554da8..32b8c9a40 100644
--- a/TensorFlow/contrib/cv/CascadeNet_ID2121_for_TensorFlow/README.md
+++ b/TensorFlow/contrib/cv/CascadeNet_ID2121_for_TensorFlow/README.md
@@ -43,9 +43,8 @@ Cascade模型是论文“A Deep Cascade of Convolutional Neural Networks for Dyn
 # 训练性能
 |                |  GPU | NPU   |
 |----------------|------|--------|
-| Epoch | 2 min | 6 min |
-* 训练性能单个epoch耗时NPU约为GPU3倍，提交ISSUE，分析后主要原因为： 网络涉及大量FFT与IFFT操作，该算子NPU训练时不支持，训练时速度无法提高
-* 但离线推理时，推理性能不受影响，离线推理速度： 重建一组数据耗时0.52s
+| 平均单Step耗时 | ~7.0E-4 s | ~7.7E-4 s |
+* 训练性能平均单个Step耗时NPU略高于GPU，提交ISSUE，分析后主要原因为： 网络涉及大量FFT与IFFT操作，该算子NPU尚不支持，训练时速度无法提高
 # 离线推理命令参考
 * ./out/msame --model="cascade_om.om" --input="./feature/,./mask/" --output="./" --outfmt BIN
 # pb转om命令参考
-- 
Gitee


From 63a9b511492c58ac4eace69c5547179039812194 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=92=92=E5=BE=AE=E7=AC=91?= <hitleepong@163.com>
Date: Wed, 18 May 2022 10:35:52 +0000
Subject: [PATCH 2/3] update
 TensorFlow/contrib/cv/CascadeNet_ID2121_for_TensorFlow/modelzoo_level.txt.

---
 .../cv/CascadeNet_ID2121_for_TensorFlow/modelzoo_level.txt      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/TensorFlow/contrib/cv/CascadeNet_ID2121_for_TensorFlow/modelzoo_level.txt b/TensorFlow/contrib/cv/CascadeNet_ID2121_for_TensorFlow/modelzoo_level.txt
index bdba79da8..0c3c74019 100644
--- a/TensorFlow/contrib/cv/CascadeNet_ID2121_for_TensorFlow/modelzoo_level.txt
+++ b/TensorFlow/contrib/cv/CascadeNet_ID2121_for_TensorFlow/modelzoo_level.txt
@@ -3,4 +3,4 @@ PerfStatus:POK
 PrecisionStatus:OK
 GPUStatus:OK
 NPUMigrationStatus:POK
-AutoTune:POK 
\ No newline at end of file
+AutoTune:OK 
\ No newline at end of file
-- 
Gitee


From c69e9e324ec83d8ee62508e6738ccd3eca97fb37 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=92=92=E5=BE=AE=E7=AC=91?= <hitleepong@163.com>
Date: Wed, 18 May 2022 10:37:30 +0000
Subject: [PATCH 3/3] update
 TensorFlow/contrib/cv/CascadeNet_ID2121_for_TensorFlow/train.py.

---
 .../CascadeNet_ID2121_for_TensorFlow/train.py | 77 +++++++++++--------
 1 file changed, 45 insertions(+), 32 deletions(-)

diff --git a/TensorFlow/contrib/cv/CascadeNet_ID2121_for_TensorFlow/train.py b/TensorFlow/contrib/cv/CascadeNet_ID2121_for_TensorFlow/train.py
index d60a51242..3347f3639 100644
--- a/TensorFlow/contrib/cv/CascadeNet_ID2121_for_TensorFlow/train.py
+++ b/TensorFlow/contrib/cv/CascadeNet_ID2121_for_TensorFlow/train.py
@@ -38,37 +38,51 @@ import os
 import time
 from models.pre_input import get_right_images
 import models.model_tf as mm
-#import moxing as mx
-from npu_bridge.npu_init import RewriterConfig
+import moxing as mx
+from npu_bridge.npu_init import NPULossScaleOptimizer, npu_config_proto, RewriterConfig, \
+    ExponentialUpdateLossScaleManager, FixedLossScaleManager
+
 # if not work, please use import *
 # from tensorflow.core.protobuf.rewriter_config_pb2 import RewriterConfig
 
 
-
 flags = tf.app.flags
 FLAGS = flags.FLAGS
 flags.DEFINE_integer('batch_size', 16, 'Number of samples per batch')
 flags.DEFINE_integer('image_size', 256, 'Image sample size in pixels')
 flags.DEFINE_integer('random_seed', 0, 'Seed used to initializer rng')
-flags.DEFINE_integer('num_epoch', 500, 'number of epoch')
+flags.DEFINE_integer('num_epoch', 5, 'number of epoch')
 flags.DEFINE_integer('checkpoint_period', 10, 'save the model every time')
+flags.DEFINE_integer(
+    'Dn', 11, ' the number of the convolution layers in one residual block')
+flags.DEFINE_integer('Dc', 5, 'the number of the data consistency layers')
+flags.DEFINE_string('model_name', 'dc', 'model name')
+flags.DEFINE_string('data_url', 'obs://imagenet2012-lp/cascade_re/data/',
+                    'the path of train data in obs')
+flags.DEFINE_string(
+    'data_train_dir', '/home/ma-user/modelarts/inputs/data_url_0/chest_train_acc3.hdf5',
+    'the path of train data')
 flags.DEFINE_float('learning_rate', 1e-3, 'initial learning rate')
 flags.DEFINE_bool('continue_training', False, 'continue training')
-flags.DEFINE_string('data_url', 'obs://imagenet2012-lp/cascade_re/data/','the path of train data in obs')
-flags.DEFINE_string('data_train_dir', '/home/ma-user/modelarts/inputs/data_url_0/chest_train_acc3.hdf5',
-    'the path of train data')
-flags.DEFINE_string('save_ckpt_Dir','./', 'the path of train data')
-flags.DEFINE_integer('Dn', 10, ' the number of the convolution layers in one residual block')
-flags.DEFINE_integer('Dc', 7, 'the number of the data consistency layers')
+flags.DEFINE_string(
+    'train_url', 'obs://imagenet2012-lp/cascade_log/', 'the path of train log in obs')
+flags.DEFINE_string('last_checkpoint_dir',
+                    'obs://imagenet2012-lp/cascade_log/MA-cascade_modelarts-10-19-15-26/output/V0018',
+                    'the path of train data')
+flags.DEFINE_string('last_checkpoint_dir_name',
+                    '/D11-C5-25-19/', 'the path of train data')
 
 print('***************************************************')
 start_time = time.time()
 # creat checkpoint save path
-
-directory = FLAGS.save_ckpt_Dir
+saveDir = '/cache/saveModels'
+cwd = os.getcwd()
+directory = saveDir + '/' + 'D' + \
+            str(FLAGS.Dn) + '-C' + str(FLAGS.Dc) + \
+            '-' + datetime.now().strftime("%d-%H")
 if not os.path.exists(directory):
     os.makedirs(directory)
-    
+sessFileName = directory + '/model'
 image_size = FLAGS.image_size
 # net architecture
 K = FLAGS.Dc
@@ -78,9 +92,10 @@ tf.reset_default_graph()
 config = tf.ConfigProto()
 custom_op = config.graph_options.rewrite_options.custom_optimizers.add()
 custom_op.name = "NpuOptimizer"
+custom_op.parameter_map["use_off_line"].b = True
 # set precision mode allow_fp32_to_fp16  allow_mix_precision
 custom_op.parameter_map['precision_mode'].s = tf.compat.as_bytes(
-    'allow_fp32_to_fp16')
+    'allow_mix_precision')
 # # dump path
 # custom_op.parameter_map['dump_path'].s = tf.compat.as_bytes(saveDir + '/')
 # # set dump debug
@@ -102,14 +117,8 @@ feature = tf.placeholder(tf.float32, shape=(
 out = mm.makeModel(feature, mask, train=False, nLayers=numlayers, K=K)
 predTst = out['dc' + str(K)]
 predTst = tf.identity(predTst, name='predTst')
+sessFileNameTst = directory + '/modelTst'
 
-sessFileName = os.path.join(directory + '/model')
-if not os.path.exists(sessFileName):
-    os.makedirs(sessFileName)
-sessFileNameTst = os.path.join(directory + '/modelTst')
-if not os.path.exists(sessFileNameTst):
-    os.makedirs(sessFileNameTst)
-    
 saver = tf.train.Saver()
 with tf.Session(config=config) as sess:
     sess.run(tf.global_variables_initializer())
@@ -121,9 +130,9 @@ print('testing model saved:' + saveFile)
 # mx.file.copy_parallel(FLAGS.data_url, '/cache/data/')  # copy to modelarts
 path_train = FLAGS.data_train_dir
 feature_trn, label_trn, mask_trn = get_right_images(path_train)
-#if FLAGS.continue_training:
-    #mx.file.copy_parallel(FLAGS.last_checkpoint_dir + FLAGS.last_checkpoint_dir_name,
-                          #saveDir + FLAGS.last_checkpoint_dir_name)
+if FLAGS.continue_training:
+    mx.file.copy_parallel(FLAGS.last_checkpoint_dir + FLAGS.last_checkpoint_dir_name,
+                          saveDir + FLAGS.last_checkpoint_dir_name)
 
 tf.reset_default_graph()
 rows = image_size
@@ -163,11 +172,13 @@ global_step = tf.Variable(
 decayed_lr = tf.train.exponential_decay(
     FLAGS.learning_rate, global_step, 1000, 0.98, staircase=True)
 # opti = tf.train.AdamOptimizer(learning_rate=decayed_lr, name='optimizer')
-opti = tf.train.GradientDescentOptimizer(decayed_lr,
-                                         name='optimizer')
+loss_scale_manager = ExponentialUpdateLossScaleManager(init_loss_scale=2 ** 32, incr_every_n_steps=100,
+                                                       decr_every_n_nan_or_inf=2, decr_ratio=0.5)
+opti_tmp = tf.train.GradientDescentOptimizer(decayed_lr,
+                                             name='optimizer')
 # loss_scale_manager = ExponentialUpdateLossScaleManager(init_loss_scale=2**32,
 # incr_every_n_steps=1000, decr_every_n_nan_or_inf=2, decr_ratio=0.5)
-# opti = NPULossScaleOptimizer(opt, loss_scale_manager)
+opti = NPULossScaleOptimizer(opti_tmp, loss_scale_manager)
 
 update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
 # with tf.control_dependencies(update_ops):
@@ -213,14 +224,16 @@ with tf.Session(config=config) as sess:
             if np.remainder(step + 1, nBatch) == 0:
                 ep += 1
                 avgTrnLoss = np.mean(totalLoss) / nTrn
-                start_time = time.time()
+                step_start_time = time.time()
                 summary = sess.run(merged, feed_dict={lossT: avgTrnLoss})
                 writer.add_summary(summary, ep)
                 saveLoss.append(avgTrnLoss)
                 totalLoss = []
-                step_time=time.time()-start_time
+                step_time = time.time() - step_start_time
+                scale_value = sess.run([loss_scale_manager.get_loss_scale()])
                 print(datetime.now().strftime("%H:%M"),
-                      '---Epoch: ', ep, '---AvgLoss: ', avgTrnLoss, '---steptime:', step_time)
+                      '---Epoch: ', ep, '---AvgLoss: ', avgTrnLoss, '---StepTime:', step_time, '---ScaleValue',
+                      scale_value)
                 # todo
                 if np.remainder(ep, FLAGS.checkpoint_period) == 0:
                     savedfile = saver.save(
@@ -235,5 +248,5 @@ print('Training completed in minutes', ((end_time - start_time) / 60))
 print('training completed at', datetime.now().strftime('%d-%b-%Y %I:%M%p'))
 print('****************************************************')
 # copy results to obs
-#mx.file.copy_parallel('/cache/saveModels', FLAGS.train_url)
-#print('copy saved model to obs.')
+mx.file.copy_parallel('/cache/saveModels', FLAGS.train_url)
+print('copy saved model to obs.')
-- 
Gitee