From 0971c47d6cb302788d769febe6e52a9ffcde9453 Mon Sep 17 00:00:00 2001
From: ykxia <xiayankang@outlook.com>
Date: Fri, 11 Nov 2022 10:58:37 +0800
Subject: [PATCH 1/9] =?UTF-8?q?CTPN=5FID0054=5Ffor=5FTensorFlow=E9=80=82?=
 =?UTF-8?q?=E9=85=8DRT2.0+=E4=BA=8C=E8=BF=9B=E5=88=B6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../main/train_npu_rt.py                      | 249 ------------------
 .../test/train_RT2_performance_1p.sh          |   2 +-
 2 files changed, 1 insertion(+), 250 deletions(-)
 delete mode 100644 TensorFlow/built-in/cv/detection/CTPN_ID0054_for_TensorFlow/main/train_npu_rt.py

diff --git a/TensorFlow/built-in/cv/detection/CTPN_ID0054_for_TensorFlow/main/train_npu_rt.py b/TensorFlow/built-in/cv/detection/CTPN_ID0054_for_TensorFlow/main/train_npu_rt.py
deleted file mode 100644
index 2fa675326..000000000
--- a/TensorFlow/built-in/cv/detection/CTPN_ID0054_for_TensorFlow/main/train_npu_rt.py
+++ /dev/null
@@ -1,249 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-# Copyright 2021 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import datetime
-import os
-import sys
-import time
-
-import tensorflow as tf
-import numpy as np
-sys.path.append(os.getcwd())
-
-cur_path = os.path.abspath(os.path.dirname(__file__))
-working_dir = os.path.join(cur_path, '../')
-sys.path.append(working_dir)
-
-from tensorflow.contrib import slim
-
-tf.app.flags.DEFINE_float('learning_rate', 1e-5, '')
-tf.app.flags.DEFINE_integer('max_steps', 50000, '')
-tf.app.flags.DEFINE_integer('decay_steps', 30000, '')
-tf.app.flags.DEFINE_float('decay_rate', 0.1, '')
-tf.app.flags.DEFINE_float('moving_average_decay', 0.997, '')
-tf.app.flags.DEFINE_integer('num_readers', 4, '')
-tf.app.flags.DEFINE_string('gpu', '0', '')
-tf.app.flags.DEFINE_string('checkpoint_path',"checkpoints_mlt/" , '')
-tf.app.flags.DEFINE_string('logs_path', 'logs_mlt/', '')
-tf.app.flags.DEFINE_string('pretrained_model_path', 'data/vgg_16.ckpt', '')
-tf.app.flags.DEFINE_boolean('restore', False, '')
-tf.app.flags.DEFINE_integer('save_checkpoint_steps', 2000, '')
-tf.app.flags.DEFINE_string('dataset_dir', 'resized/', '')
-tf.app.flags.DEFINE_integer('num_bbox', 256, '')
-tf.app.flags.DEFINE_integer('loss_scale', 4096, '')
-tf.app.flags.DEFINE_integer('inputs_height', 600, '')
-tf.app.flags.DEFINE_integer('inputs_width', 900, '')
-tf.app.flags.DEFINE_integer('device_id', 1, '')
-tf.app.flags.DEFINE_integer('npu_nums', 1, '')
-tf.app.flags.DEFINE_string('DEVICE_ID', '0', '')
-#modify for NPU start
-tf.app.flags.DEFINE_string('precision_mode', 'allow_fp32_to_fp16', '')
-#modify for NPU end
-
-FLAGS = tf.app.flags.FLAGS
-
-
-from nets import model_train as model
-from utils.dataset import data_provider as data_provider
-from hccl.split.api import set_split_strategy_by_size
-# npu libs
-from npu_bridge.estimator import npu_ops
-from tensorflow.core.protobuf.rewriter_config_pb2 import RewriterConfig
-from npu_bridge.estimator.npu.npu_estimator import NPUEstimator
-from npu_bridge.estimator.npu.npu_optimizer import allreduce
-from npu_bridge.estimator.npu.npu_optimizer import NPUDistributedOptimizer
-from npu_bridge.hccl import hccl_ops
-from npu_bridge.estimator.npu.npu_loss_scale_optimizer import NPULossScaleOptimizer
-from npu_bridge.estimator.npu.npu_loss_scale_manager import FixedLossScaleManager
-
-from tensorflow.python.client import timeline
-
-# modify for NPU start
-from npu_bridge.npu_init import *
-# modify for NPU end
-
-def pad_input(inputs,target_shape=[1216,1216,3]):
-
-    h,w = inputs.shape[:2]
-    out = np.zeros(target_shape).astype(np.uint8)
-    out[0:h,0:w,:] = inputs
-
-    return out
-
-
-def pad_bbox(inputs, count=256):
-    if len(inputs)>count:
-        return inputs[:count].copy()
-   
-    else:    
-        out = inputs.copy()
-        num_inputs = len(out)
-        num_pad = count - num_inputs
-        
-        for i in range(num_pad):
-            out.append([0,0,0,0,1])
-        return out
-
-
-def broadcast_global_variables(root_rank, index):
-    op_list = []
-    for var in tf.global_variables():
-        if "float" in var.dtype.name:
-            inputs = [var]
-            outputs = hccl_ops.broadcast(tensor=inputs, root_rank=root_rank)
-            if outputs is not None:
-                op_list.append(outputs[0].op)
-                op_list.append(tf.assign(var, outputs[0]))
-    return tf.group(op_list)
-
-def main(argv=None):
-    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu
-    now = datetime.datetime.now()
-    StyleTime = now.strftime("%Y-%m-%d-%H-%M-%S")
-    os.makedirs(FLAGS.logs_path + FLAGS.DEVICE_ID)
-    if not os.path.exists(FLAGS.checkpoint_path):
-        os.makedirs(FLAGS.checkpoint_path)
-
-    input_image = tf.placeholder(tf.float32, 
-            shape=[1,FLAGS.inputs_height, FLAGS.inputs_width, 3], 
-            name='input_image')
-    input_bbox = tf.placeholder(tf.float32, 
-            shape=[FLAGS.num_bbox, 5], name='input_bbox')
-
-    global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False)
-    learning_rate = tf.Variable(FLAGS.learning_rate, trainable=False)
-    tf.summary.scalar('learning_rate', learning_rate)
-    opt = tf.train.AdamOptimizer(learning_rate)
-    if FLAGS.npu_nums == 8:
-        opt = NPUDistributedOptimizer(opt)
-
- #    opt = NPUDistributedOptimizer(opt)
-    # modify for NPU start
-    if FLAGS.precision_mode == "allow_mix_precision":
-        loss_scale_manager = ExponentialUpdateLossScaleManager(
-            init_loss_scale=2**32,
-            incr_every_n_steps=1000,
-            decr_every_n_nan_or_inf=2,
-            decr_ratio=0.5)
-    else:
-        loss_scale_manager = FixedLossScaleManager(loss_scale=FLAGS.loss_scale)
-    # modify for NPU end
-
-    opt = NPULossScaleOptimizer(opt, loss_scale_manager)
-    
-
-    with tf.name_scope('model' ) as scope:
-        bbox_pred, cls_pred, cls_prob = model.model(input_image)
-
-        total_loss, model_loss, rpn_cross_entropy, rpn_loss_box = model.loss_v2(bbox_pred, cls_pred, input_bbox)
-                                                                             
-        batch_norm_updates_op = tf.group(*tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope))
-        grads = opt.compute_gradients(total_loss)
-
-    apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)
-
-    summary_op = tf.summary.merge_all()
-    variable_averages = tf.train.ExponentialMovingAverage(
-        FLAGS.moving_average_decay, global_step)
-    variables_averages_op = variable_averages.apply(tf.trainable_variables())
-    with tf.control_dependencies([variables_averages_op, apply_gradient_op, batch_norm_updates_op]):
-        train_op = tf.no_op(name='train_op')
-
-    saver = tf.train.Saver(tf.global_variables(), max_to_keep=5)
-    summary_writer = tf.summary.FileWriter(FLAGS.logs_path + StyleTime, tf.get_default_graph())
-
-    init = tf.global_variables_initializer()
-
-    if FLAGS.pretrained_model_path is not None:
-        variable_restore_op = slim.assign_from_checkpoint_fn(FLAGS.pretrained_model_path,
-                                                             slim.get_trainable_variables(),
-                                                           ignore_missing_vars=True)
-    #for NPU
-    config = tf.ConfigProto(allow_soft_placement=True)
-    custom_op =  config.graph_options.rewrite_options.custom_optimizers.add()
-    custom_op.name =  "NpuOptimizer"
-    custom_op.parameter_map["use_off_line"].b = True
-    custom_op.parameter_map["hcom_parallel"].b = True
-    custom_op.parameter_map["jit_compile"].b = False
-    config.graph_options.rewrite_options.remapping = RewriterConfig.OFF
-    # modify for NPU start
-    if FLAGS.precision_mode == "allow_mix_precision":
-        custom_op.parameter_map["precision_mode"].s = tf.compat.as_bytes("allow_mix_precision")
-    # modify for NPU end
-    # for NPU
-    if FLAGS.npu_nums == 8:
-        bcast_op = broadcast_global_variables(0, 1)
-    with tf.Session(config=config) as sess:
-        if FLAGS.npu_nums == 8:
-            sess.run(bcast_op)
-        if FLAGS.restore:
-            ckpt = tf.train.latest_checkpoint(FLAGS.checkpoint_path)
-            restore_step = int(ckpt.split('.')[0].split('_')[-1])
-            print("continue training from previous checkpoint {}".format(restore_step))
-            saver.restore(sess, ckpt)
-        else:
-            sess.run(init)
-            if FLAGS.npu_nums == 8:
-                set_split_strategy_by_size([80, 20])
-            restore_step = 0
-            if FLAGS.pretrained_model_path is not None:
-                variable_restore_op(sess)
-        data_generator = data_provider.get_batch(num_workers=FLAGS.num_readers)
-        start = time.time()
-
-        for step in range(restore_step, FLAGS.max_steps):
-            data = next(data_generator)
-            inputs_padded = data[0]
-            bbox_padded = pad_bbox(data[1],FLAGS.num_bbox)
-            input_image_np = inputs_padded
-            input_bbox_np = bbox_padded
-            
-            ml, tl,ce_loss, bbox_loss, _, summary_str = sess.run([model_loss, total_loss,
-                                               rpn_cross_entropy,
-                                               rpn_loss_box,
-                                               train_op, summary_op],
-                                              feed_dict={input_image: input_image_np,
-                                                         input_bbox: input_bbox_np})
-            summary_writer.add_summary(summary_str, global_step=step)
-            print('model loss :', ml, 'ce_loss: ', ce_loss, 'box_loss:',bbox_loss)
-            if step != 0 and step % FLAGS.decay_steps == 0:
-                sess.run(tf.assign(learning_rate, learning_rate.eval() * FLAGS.decay_rate))
-
-            if step % 10 == 0:
-                avg_time_per_step = (time.time() - start) / 10
-                start = time.time()
-                print('Step {:06d}, ce_loss {:.6f}, bbox_loss {:.6f}  model loss {:.4f}, total loss {:.4f}, {:.2f} seconds/step, LR: {:.6f}'.format(
-                    step, ce_loss, bbox_loss, ml, tl, avg_time_per_step, learning_rate.eval()))
-
-            if (step + 1) % FLAGS.save_checkpoint_steps == 0:
-                filename = ('ctpn_{:d}'.format(step + 1) + '.ckpt')
-                filename = os.path.join(FLAGS.checkpoint_path, filename)
-                saver.save(sess, filename)
-                print('Write model to: {:s}'.format(filename))
-
-if __name__ == '__main__':
-    tf.app.run()
diff --git a/TensorFlow/built-in/cv/detection/CTPN_ID0054_for_TensorFlow/test/train_RT2_performance_1p.sh b/TensorFlow/built-in/cv/detection/CTPN_ID0054_for_TensorFlow/test/train_RT2_performance_1p.sh
index 101e3c01e..b567d6169 100644
--- a/TensorFlow/built-in/cv/detection/CTPN_ID0054_for_TensorFlow/test/train_RT2_performance_1p.sh
+++ b/TensorFlow/built-in/cv/detection/CTPN_ID0054_for_TensorFlow/test/train_RT2_performance_1p.sh
@@ -113,7 +113,7 @@ do
     fi
     #执行训练脚本，以下传参不需要修改，其他需要模型审视修改
     #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path，--data_dump_flag，--data_dump_step，--data_dump_path，--profiling，--profiling_dump_path，--autotune
-    nohup python3 main/train_npu_rt.py \
+    nohup python3 main/train_npu.py \
         --precision_mode=$precision_mode \
         --pretrained_model_path=$data_path/vgg_16.ckpt \
         --dataset_dir=$data_path \
-- 
Gitee


From 98d89398bc19b19422354dc2692989dddad08bdc Mon Sep 17 00:00:00 2001
From: ykxia <xiayankang@outlook.com>
Date: Fri, 11 Nov 2022 11:00:34 +0800
Subject: [PATCH 2/9] =?UTF-8?q?CTPN=5FID0054=5Ffor=5FTensorFlow=E9=80=82?=
 =?UTF-8?q?=E9=85=8DRT2.0+=E4=BA=8C=E8=BF=9B=E5=88=B6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../CTPN_ID0054_for_TensorFlow/test/train_RT2_performance_8p.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/TensorFlow/built-in/cv/detection/CTPN_ID0054_for_TensorFlow/test/train_RT2_performance_8p.sh b/TensorFlow/built-in/cv/detection/CTPN_ID0054_for_TensorFlow/test/train_RT2_performance_8p.sh
index cfd98d12c..98a7658a4 100644
--- a/TensorFlow/built-in/cv/detection/CTPN_ID0054_for_TensorFlow/test/train_RT2_performance_8p.sh
+++ b/TensorFlow/built-in/cv/detection/CTPN_ID0054_for_TensorFlow/test/train_RT2_performance_8p.sh
@@ -127,7 +127,7 @@ do
   
     #执行训练脚本，以下传参不需要修改，其他需要模型审视修改
     cd $cur_path/../
-    nohup python3 main/train_npu_rt.py \
+    nohup python3 main/train_npu.py \
         --precision_mode=$precision_mode \
         --pretrained_model_path=$data_path/vgg_16.ckpt \
         --dataset_dir=$data_path \
-- 
Gitee


From bb8a96ecec2737b0239a3983b692d74c61cdb1da Mon Sep 17 00:00:00 2001
From: ykxia <xiayankang@outlook.com>
Date: Fri, 11 Nov 2022 11:09:49 +0800
Subject: [PATCH 3/9] =?UTF-8?q?SSD-Resnet50V1-FPN=5FID1463=5Ffor=5FTensorF?=
 =?UTF-8?q?low=E9=80=82=E9=85=8DRT2.0+=E4=BA=8C=E8=BF=9B=E5=88=B6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../models/research/object_detection/model_lib_rt.py             | 1 -
 1 file changed, 1 deletion(-)

diff --git a/TensorFlow/built-in/cv/detection/SSD-Resnet50V1-FPN_ID1463_for_TensorFlow/models/research/object_detection/model_lib_rt.py b/TensorFlow/built-in/cv/detection/SSD-Resnet50V1-FPN_ID1463_for_TensorFlow/models/research/object_detection/model_lib_rt.py
index bf6d3834c..a40980745 100644
--- a/TensorFlow/built-in/cv/detection/SSD-Resnet50V1-FPN_ID1463_for_TensorFlow/models/research/object_detection/model_lib_rt.py
+++ b/TensorFlow/built-in/cv/detection/SSD-Resnet50V1-FPN_ID1463_for_TensorFlow/models/research/object_detection/model_lib_rt.py
@@ -332,7 +332,6 @@ def create_estimator_and_inputs(run_config, hparams, pipeline_config_path, eval_
     # custom_op.parameter_map["dynamic_input"].b = True
     # custom_op.parameter_map["dynamic_graph_execute_mode"].s = tf.compat.as_bytes("lazy_recompile")
     custom_op.parameter_map["hcom_parallel"].b = True
-    custom_op.parameter_map["jit_compile"].b = False
 
     run_config = tf.estimator.RunConfig(model_dir=run_config.model_dir, session_config=run_config.session_config, 
                               save_checkpoints_steps=train_steps // eval_count)
-- 
Gitee


From 5a260c13b8c36aecd09d3a630b3414ffaa46db3c Mon Sep 17 00:00:00 2001
From: ykxia <xiayankang@outlook.com>
Date: Fri, 11 Nov 2022 11:16:40 +0800
Subject: [PATCH 4/9] =?UTF-8?q?SSD-Resnet50V1-FPN=5FID1463=5Ffor=5FTensorF?=
 =?UTF-8?q?low=E9=80=82=E9=85=8DRT2.0+=E4=BA=8C=E8=BF=9B=E5=88=B6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../test/train_RT2_performance_1p.sh          | 194 +++++++++++++++++
 ...rain_RT2_performance_1p_mobilenetv1_fpn.sh | 174 +++++++++++++++
 .../test/train_RT2_performance_8p.sh          | 205 ++++++++++++++++++
 ...rain_RT2_performance_8p_mobilenetv1_fpn.sh | 150 +++++++++++++
 4 files changed, 723 insertions(+)
 create mode 100644 TensorFlow/built-in/cv/detection/SSD-Resnet50V1-FPN_ID1463_for_TensorFlow/test/train_RT2_performance_1p.sh
 create mode 100644 TensorFlow/built-in/cv/detection/SSD-Resnet50V1-FPN_ID1463_for_TensorFlow/test/train_RT2_performance_1p_mobilenetv1_fpn.sh
 create mode 100644 TensorFlow/built-in/cv/detection/SSD-Resnet50V1-FPN_ID1463_for_TensorFlow/test/train_RT2_performance_8p.sh
 create mode 100644 TensorFlow/built-in/cv/detection/SSD-Resnet50V1-FPN_ID1463_for_TensorFlow/test/train_RT2_performance_8p_mobilenetv1_fpn.sh

diff --git a/TensorFlow/built-in/cv/detection/SSD-Resnet50V1-FPN_ID1463_for_TensorFlow/test/train_RT2_performance_1p.sh b/TensorFlow/built-in/cv/detection/SSD-Resnet50V1-FPN_ID1463_for_TensorFlow/test/train_RT2_performance_1p.sh
new file mode 100644
index 000000000..01abaf5d8
--- /dev/null
+++ b/TensorFlow/built-in/cv/detection/SSD-Resnet50V1-FPN_ID1463_for_TensorFlow/test/train_RT2_performance_1p.sh
@@ -0,0 +1,194 @@
+#!/bin/bash
+cur_path=`pwd`
+export PYTHONPATH=$cur_path/../models/research:$cur_path/../models/research/slim:$PYTHONPATH
+#集合通信
+export RANK_SIZE=1
+export RANK_TABLE_FILE=$cur_path/../configs/${RANK_SIZE}p_${ASCEND_DEVICE_ID}.json
+export JOB_ID=10087
+RANK_ID_START=0
+
+#使能RT2.0
+export ENABLE_RUNTIME_V2=1
+
+#数据集参数
+data_path="/data"
+use_conda=0
+
+#训练参数，需要根据模型修改
+Network="SSD-Resnet50V1-FPN_ID1463_for_TensorFlow"
+num_train_steps=500
+batch_size=32
+ckpt_path=/checkpoints
+pipeline_config=$cur_path/../models/research/configs/ssd320_full_1gpus.config
+
+#维测参数
+overflow_dump=False
+overflow_dump_path=$cur_path/output/overflow_dump
+step_dump=False
+step_dump_path=$cur_path/output/step_dump
+check_loss_scale=Flase
+
+#帮助提示，需要根据网络修改
+if [[ $1 == --help || $1 == -h ]];then 
+	echo "usage: ./train_performance_1p.sh <args>"
+
+	echo ""
+	echo "parameter explain:
+	--num_train_steps           training steps
+	--data_path                 source data of training
+	--ckpt_path                  pre-checkpoint path
+	--pipeline_config           pipeline config path
+    --overflow_dump        overflow detection，default is False
+    --overflow_dump_path   overflow dump path
+    --check_loss_scale     check whether loss scale is valid, default is False
+    --step_dump            Dump step data, default is False, can only set when overflow_dump is False
+	--step_dump_path      step_dump_path
+    --skip_eval  whether to skip eval
+    -h/--help             Show help message
+	"
+	exit 1
+fi
+
+#入参设置，需要根据网络修改
+for para in $*
+do
+    if [[ $para == --num_train_steps* ]];then
+		num_train_steps=`echo ${para#*=}`
+	elif [[ $para == --data_path* ]];then
+		data_path=`echo ${para#*=}`
+	elif [[ $para == --ckpt_path* ]];then
+		ckpt_path=`echo ${para#*=}`
+	elif [[ $para == --pipeline_config* ]];then
+		pipeline_config=`echo ${para#*=}`
+    elif [[ $para == --overflow_dump* ]];then
+		overflow_dump=`echo ${para#*=}`
+        if [  -d ${overflow_dump_path}  ];then
+            echo "overflow dump path: ${overflow_dump_path}"
+        else
+            mkdir -p ${overflow_dump_path}
+        fi
+    elif [[ $para == --check_loss_scale* ]];then
+		check_loss_scale=`echo ${para#*=}`
+    elif [[ $para == --step_dump* ]];then
+		step_dump=`echo ${para#*=}`
+        if [  -d ${step_dump_path}  ];then
+            echo "step dump path: ${step_dump_path}"
+        else
+            mkdir -p ${step_dump_path}
+        fi
+    elif [[ $para == --skip_eval* ]];then
+	    skip_eval=`echo ${para#*=}`
+    elif [[ $para == --use_conda* ]];then
+	    use_conda=`echo ${para#*=}`
+    fi
+done	
+
+if [[ $data_path == "" ]];then
+	echo "[Error] para \"data_path\" must be config"
+	exit 1
+fi
+
+
+
+##########################执行训练#########################
+start_time=$(date +%s)
+cd $cur_path/../models/research
+if [  -f ${pipeline_config}.bak ];then
+   cp ${pipeline_config}.bak ${pipeline_config}
+else
+   cp ${pipeline_config} ${pipeline_config}.bak
+fi
+
+sed -i "s%/checkpoints%${ckpt_path}%p" ${pipeline_config} 
+sed -i "s%/data/coco2017_tfrecords%${data_path}/coco2017_tfrecords%p" ${pipeline_config} 
+
+for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++));
+ do
+  echo "Device ID: $ASCEND_DEVICE_ID"
+  export RANK_ID=$RANK_ID
+  if [   -d $cur_path/output/${ASCEND_DEVICE_ID} ];then
+     rm -rf $cur_path/output/${ASCEND_DEVICE_ID}
+     mkdir -p $cur_path/output/${ASCEND_DEVICE_ID}
+  else
+     mkdir -p $cur_path/output/${ASCEND_DEVICE_ID}
+  fi
+
+#训练执行脚本，需要根据网络修改
+  nohup python3 -u ./object_detection/model_main_rt.py \
+       --pipeline_config_path=${pipeline_config} \
+       --model_dir=$cur_path/output/${ASCEND_DEVICE_ID} \
+       --data_path=${data_path}   \
+       --overflow_dump_path=${overflow_dump_path}   \
+       --step_dump_path=${step_dump_path}   \
+       --alsologtostder \
+       --amp \
+       --num_train_steps=${num_train_steps}  \
+       "${@:1}"  > $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 &
+done
+wait
+
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+##########################业务日志#########################
+grep ERROR $HOME/ascend/log/plog/*.log > $cur_path/output/$ASCEND_DEVICE_ID/plog_err.log
+
+################################性能结果处理#########################
+echo "-----------------------Final result------------------------"
+#性能FPS计算，需要根据网络修改
+#FPS=`grep -a 'INFO:tensorflow:global_step/sec: ' $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk 'END {print $2}'` 
+FPS=`grep -a 'INFO:tensorflow:global_step/sec: ' $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk 'NR>2{print line}{line=$0}'|awk '{print $2}'|awk '{sum+=$1} END {print  sum/NR}'` 
+
+FPS=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'*'${FPS}'}'`
+echo "Final Performance images/sec : $FPS"
+################################精度结果处理#########################
+#精度计算，需要根据网络修改
+train_accuracy=`grep Precision $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep Average |awk 'NR==1 {print $13}'`
+
+#echo 'Final Training Accuracy mAP: $train_accuracy'
+################################E2E训练时长##########################
+echo "Final Training Duration sec : $e2e_time"
+
+################################性能看护#############################
+DeviceType=`uname -m`
+CaseName=${Network}_bs${batch_size}_${RANK_SIZE}'p'_'RT2'_'perf'
+ActualFPS=${FPS}
+TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'*1000/'${FPS}'}'`
+
+#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型修改
+grep INFO:tensorflow:loss $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk '{print $3}'|sed 's/,//g'|sed '/^$/d' >> $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
+
+ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
+echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${batch_size}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+
+#eval版本需求开发中，精度结果临时看护最终的loss
+echo "Final Training Accuracy loss: $ActualLoss"
+
+if [[ $skip_eval == "" ]];then
+##获取错误信息
+#系统错误消息
+#error_msg="CanonicalizeShape failed, node:Postprocessor/BatchMultiClassNonMaxSuppression/MultiClassNonMaxSuppression/non_max_suppression/NonMaxSuppressionV3"
+error_msg="E19999: Inner Error"
+#判断错误信息是否和历史版本一致
+Status=`grep "${error_msg}" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log | wc -l`
+
+#失败阶段
+ModelStatus="图执行FAIL"
+
+#DTS单号
+#DTS_Number="DTS202105130LVO7FP0J00,DTS202105130O6E1SP1400"
+DTS_Number="DTS202105200RLRJ1P1300"
+echo "ModelStatus = ${ModelStatus}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DTS_Number = ${DTS_Number}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "Status = ${Status}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "error_msg = ${error_msg}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+fi
+
diff --git a/TensorFlow/built-in/cv/detection/SSD-Resnet50V1-FPN_ID1463_for_TensorFlow/test/train_RT2_performance_1p_mobilenetv1_fpn.sh b/TensorFlow/built-in/cv/detection/SSD-Resnet50V1-FPN_ID1463_for_TensorFlow/test/train_RT2_performance_1p_mobilenetv1_fpn.sh
new file mode 100644
index 000000000..a690a02b4
--- /dev/null
+++ b/TensorFlow/built-in/cv/detection/SSD-Resnet50V1-FPN_ID1463_for_TensorFlow/test/train_RT2_performance_1p_mobilenetv1_fpn.sh
@@ -0,0 +1,174 @@
+#!bin/bash
+cur_path=`pwd`
+#临时补丁，需要根据网络修改
+#cp $ASCEND_OPP_PATH/op_impl/built-in/ai_core/tbe/config/ascend910/aic-ascend910-ops-info.json $cur_path/aic-ascend910-ops-info.json.bak -f
+#python3 ops_info_patch.py
+
+#环境设置，需要根据网络修改
+export PYTHONPATH=$cur_path/../models/research:$cur_path/../models/research/slim:$PYTHONPATH
+
+#集合通信
+export RANK_SIZE=1
+export RANK_TABLE_FILE=$cur_path/../configs/${RANK_SIZE}p_${ASCEND_DEVICE_ID}.json
+export JOB_ID=10087
+RANK_ID_START=0
+
+#使能RT2.0
+export ENABLE_RUNTIME_V2=1
+
+#数据集参数
+data_path="/data"
+use_conda=0
+
+#训练参数，需要根据模型修改
+Network="SSD-MobilenetV1-FPN_ID1459_for_TensorFlow"
+num_train_steps=1000
+batch_size=16
+ckpt_path=/checkpoints
+pipeline_config=$cur_path/../models/research/configs/ssd_mobilenet_v1_fpn_shared_box_predictor_640x640_coco14_sync_1gpus.config
+
+#维测参数
+overflow_dump=False
+overflow_dump_path=$cur_path/output/overflow_dump
+step_dump=False
+step_dump_path=$cur_path/output/step_dump
+check_loss_scale=Flase
+
+#帮助提示，需要根据网络修改
+if [[ $1 == --help || $1 == -h ]];then 
+    echo "usage: ./train_performance_1p.sh <args>"
+
+    echo ""
+    echo "parameter explain:
+    --num_train_steps           training steps
+    --data_path                 source data of training
+    --ckpt_path                  pre-checkpoint path
+    --pipeline_config           pipeline config path
+    --overflow_dump        overflow detection，default is False
+    --overflow_dump_path   overflow dump path
+    --check_loss_scale     check whether loss scale is valid, default is False
+    --step_dump            Dump step data, default is False, can only set when overflow_dump is False
+    --step_dump_path      step_dump_path
+    -h/--help             Show help message
+    "
+    exit 1
+fi
+
+#入参设置，需要根据网络修改
+for para in $*
+do
+    if [[ $para == --num_train_steps* ]];then
+        num_train_steps=`echo ${para#*=}`
+    elif [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    elif [[ $para == --ckpt_path* ]];then
+        ckpt_path=`echo ${para#*=}`
+    elif [[ $para == --pipeline_config* ]];then
+        pipeline_config=`echo ${para#*=}`
+    elif [[ $para == --overflow_dump* ]];then
+        overflow_dump=`echo ${para#*=}`
+        if [  -d ${overflow_dump_path}  ];then
+            echo "overflow dump path: ${overflow_dump_path}"
+        else
+            mkdir -p ${overflow_dump_path}
+        fi
+    elif [[ $para == --check_loss_scale* ]];then
+        check_loss_scale=`echo ${para#*=}`
+    elif [[ $para == --step_dump* ]];then
+        step_dump=`echo ${para#*=}`
+        if [  -d ${step_dump_path}  ];then
+            echo "step dump path: ${step_dump_path}"
+        else
+            mkdir -p ${step_dump_path}
+        fi
+    elif [[ $para == --use_conda* ]];then
+	    use_conda=`echo ${para#*=}`    
+    fi
+done
+
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be config"
+    exit 1
+fi
+if [[ $use_conda == "1" ]];then
+	echo "======start use_conda======"
+    source activate python3.7.5
+fi
+
+##########################执行训练#########################
+start_time=$(date +%s)
+cd $cur_path/../models/research
+if [  -f ${pipeline_config}.bak ];then
+   cp ${pipeline_config}.bak ${pipeline_config}
+else
+   cp ${pipeline_config} ${pipeline_config}.bak
+fi
+
+#sed -i "s%/checkpoints%${ckpt_path}%p" ${pipeline_config} 
+sed -i "s%/data/coco2017_tfrecords%${data_path}/coco2017_tfrecords%p" ${pipeline_config} 
+
+for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++));
+ do
+  echo "Device ID: $ASCEND_DEVICE_ID"
+  export RANK_ID=$RANK_ID
+  if [   -d $cur_path/output/${ASCEND_DEVICE_ID} ];then
+     rm -rf $cur_path/output/${ASCEND_DEVICE_ID}
+     mkdir -p $cur_path/output/${ASCEND_DEVICE_ID}
+  else
+     mkdir -p $cur_path/output/${ASCEND_DEVICE_ID}
+  fi
+
+#训练执行脚本，需要根据网络修改
+  nohup python3 -u ./object_detection/model_main_rt.py \
+       --pipeline_config_path=${pipeline_config} \
+       --model_dir=$cur_path/output/${ASCEND_DEVICE_ID}/npu_ckpt_mobilenetv1_fpn_${RANK_SIZE}p \
+       --data_path=${data_path}   \
+       --overflow_dump_path=${overflow_dump_path}   \
+       --step_dump_path=${step_dump_path}   \
+       --alsologtostder \
+       --amp \
+       --num_train_steps=${num_train_steps}  \
+       --skip_eval=True \
+       "${@:1}"  > $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 &
+done
+wait
+
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+################################性能结果处理#########################
+echo "-----------------------Final result------------------------"
+#性能FPS计算，需要根据网络修改
+FPS=`grep -a 'INFO:tensorflow:global_step/sec: ' $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk '{print $2}'|tail -2|head -1` 
+
+FPS=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'*'${FPS}'}'`
+echo "Final Performance images/sec : $FPS"
+################################精度结果处理#########################
+#精度计算，需要根据网络修改
+cp -r ${ckpt_path} $cur_path/output/$ASCEND_DEVICE_ID
+train_accuracy=`grep Precision $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep Average |awk 'NR==1 {print $13}'`
+
+#echo 'Final Training Accuracy mAP: $train_accuracy'
+################################E2E训练时长##########################
+echo "Final Training Duration sec : $e2e_time"
+
+################################性能看护#############################
+DeviceType=`uname -m`
+CaseName=${Network}_bs${batch_size}_${RANK_SIZE}'p'_'RT2'_'perf'
+ActualFPS=${FPS}
+TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'*1000/'${FPS}'}'`
+
+#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型修改
+grep INFO:tensorflow:loss $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk '{print $3}'|sed 's/,//g'|sed '/^$/d' >> $cur_path/output/${ASCEND_DEVICE_ID}/train_${CaseName}_loss.txt
+
+ActualLoss=`awk 'END {print}' $cur_path/output/${ASCEND_DEVICE_ID}/train_${CaseName}_loss.txt`
+echo "Network = ${Network}" > $cur_path/output/${ASCEND_DEVICE_ID}/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/${ASCEND_DEVICE_ID}/${CaseName}.log
+echo "BatchSize = ${batch_size}" >> $cur_path/output/${ASCEND_DEVICE_ID}/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> $cur_path/output/${ASCEND_DEVICE_ID}/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $cur_path/output/${ASCEND_DEVICE_ID}/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/${ASCEND_DEVICE_ID}/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/${ASCEND_DEVICE_ID}/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/${ASCEND_DEVICE_ID}/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/${ASCEND_DEVICE_ID}/${CaseName}.log
+conda deactivate
diff --git a/TensorFlow/built-in/cv/detection/SSD-Resnet50V1-FPN_ID1463_for_TensorFlow/test/train_RT2_performance_8p.sh b/TensorFlow/built-in/cv/detection/SSD-Resnet50V1-FPN_ID1463_for_TensorFlow/test/train_RT2_performance_8p.sh
new file mode 100644
index 000000000..07a8a8984
--- /dev/null
+++ b/TensorFlow/built-in/cv/detection/SSD-Resnet50V1-FPN_ID1463_for_TensorFlow/test/train_RT2_performance_8p.sh
@@ -0,0 +1,205 @@
+#!bin/bash
+cur_path=`pwd`
+export PYTHONPATH=$cur_path/../models/research:$cur_path/../models/research/slim:$PYTHONPATH
+#集合通信
+export RANK_SIZE=8
+export RANK_TABLE_FILE=$cur_path/../configs/${RANK_SIZE}p.json
+export JOB_ID=10087
+RANK_ID_START=0
+ASCEND_DEVICE_ID_START=0
+
+#使能RT2.0
+export ENABLE_RUNTIME_V2=1
+
+#数据集参数
+data_path=""
+use_conda=0
+#训练参数，需要根据模型修改
+Network="SSD-Resnet50V1-FPN_ID1463_for_TensorFlow"
+num_train_steps=1000
+batch_size=32
+ckpt_path=/checkpoints
+pipeline_config=$cur_path/../models/research/configs/ssd320_full_8gpus.config
+
+#维测参数
+overflow_dump=False
+overflow_dump_path=$cur_path/output/overflow_dump
+step_dump=False
+step_dump_path=$cur_path/output/step_dump
+check_loss_scale=Flase
+
+#帮助提示，需要根据网络修改
+if [[ $1 == --help || $1 == -h ]];then 
+	echo "usage: ./train_performance_4p.sh <args>"
+
+	echo ""
+	echo "parameter explain:
+	--num_train_steps           training steps
+	--data_path                 source data of training
+	--ckpt_path                  pre-checkpoint path
+	--pipeline_config           pipeline config path
+    --overflow_dump        overflow detection，default is False
+    --overflow_dump_path   overflow dump path
+    --check_loss_scale     check whether loss scale is valid, default is False
+    --step_dump            Dump step data, default is False, can only set when overflow_dump is False
+	--step_dump_path      step_dump_path
+    -h/--help             Show help message
+	"
+	exit 1
+fi
+
+#入参设置，需要根据网络修改
+for para in $*
+do
+    if [[ $para == --num_train_steps* ]];then
+		num_train_steps=`echo ${para#*=}`
+	elif [[ $para == --data_path* ]];then
+		data_path=`echo ${para#*=}`
+    elif [[ $para == --bind_core* ]]; then
+        bind_core=`echo ${para#*=}`
+        name_bind="_bindcore"
+	elif [[ $para == --ckpt_path* ]];then
+		ckpt_path=`echo ${para#*=}`
+	elif [[ $para == --pipeline_config* ]];then
+		pipeline_config=`echo ${para#*=}`
+    elif [[ $para == --overflow_dump* ]];then
+		overflow_dump=`echo ${para#*=}`
+        if [  -d ${overflow_dump_path}  ];then
+            echo "overflow dump path: ${overflow_dump_path}"
+        else
+            mkdir -p ${overflow_dump_path}
+        fi
+    elif [[ $para == --check_loss_scale* ]];then
+		check_loss_scale=`echo ${para#*=}`
+    elif [[ $para == --step_dump* ]];then
+		step_dump=`echo ${para#*=}`
+        if [  -d ${step_dump_path}  ];then
+            echo "step dump path: ${step_dump_path}"
+        else
+            mkdir -p ${step_dump_path}
+        fi
+    elif [[ $para == --use_conda* ]];then
+	    use_conda=`echo ${para#*=}`
+    fi
+done	
+
+if [[ $data_path == "" ]];then
+	echo "[Error] para \"data_path\" must be config"
+	exit 1
+fi
+
+
+
+##########################执行训练#########################
+start_time=$(date +%s)
+cd $cur_path/../models/research
+if [  -f ${pipeline_config}.bak ];then
+   cp ${pipeline_config}.bak ${pipeline_config}
+else
+   cp ${pipeline_config} ${pipeline_config}.bak
+fi
+
+sed -i "s%/checkpoints%${ckpt_path}%p" ${pipeline_config} 
+sed -i "s%/data/coco2017_tfrecords%${data_path}/coco2017_tfrecords%p" ${pipeline_config} 
+
+for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++));
+ do
+  export RANK_ID=$RANK_ID
+  export ASCEND_DEVICE_ID=$((ASCEND_DEVICE_ID_START+RANK_ID))
+  echo "Device ID: $ASCEND_DEVICE_ID"
+  if [   -d $cur_path/output/${ASCEND_DEVICE_ID} ];then
+     rm -rf $cur_path/output/${ASCEND_DEVICE_ID}
+     mkdir -p $cur_path/output/${ASCEND_DEVICE_ID}
+  else
+     mkdir -p $cur_path/output/${ASCEND_DEVICE_ID}
+  fi
+
+#训练执行脚本，需要根据网络修改
+    corenum=`cat /proc/cpuinfo |grep 'processor' |wc -l`
+    let a=RANK_ID*${corenum}/8
+    let b=RANK_ID+1
+    let c=b*${corenum}/8-1
+    if [ "x${bind_core}" != x ];then
+        bind_core="taskset -c $a-$c"
+    fi
+  nohup ${bind_core} python3 -u ./object_detection/model_main_rt.py \
+       --pipeline_config_path=${pipeline_config} \
+       --model_dir=$cur_path/output/${ASCEND_DEVICE_ID_START} \
+       --data_path=${data_path}   \
+       --overflow_dump_path=${overflow_dump_path}   \
+       --step_dump_path=${step_dump_path}   \
+       --alsologtostder \
+       --amp \
+       --skip_eval=True \
+       --num_train_steps=${num_train_steps}  \
+       "${@:1}"  > $cur_path/output/$ASCEND_DEVICE_ID/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+done
+wait
+
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+ASCEND_DEVICE_ID=0
+
+
+##########################业务日志#########################
+grep ERROR $HOME/ascend/log/plog/*.log > $cur_path/output/${ASCEND_DEVICE_ID}/plog_err.log
+
+################################性能结果处理#########################
+echo "-----------------------Final result------------------------"
+#性能FPS计算，需要根据网络修改
+#FPS=`grep -a 'INFO:tensorflow:global_step/sec: ' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $2}'` 
+
+FPS=`grep -a 'INFO:tensorflow:global_step/sec: ' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'NR>2{print line}{line=$0}'|awk '{print $2}'|awk '{sum+=$1} END {print  sum/NR}'`
+FPS=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'*'${FPS}'*'${RANK_SIZE}'}'`
+echo "Final Performance images/sec : $FPS"
+################################精度结果处理#########################
+#精度计算，需要根据网络修改
+train_accuracy=`grep Precision $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|grep Average |awk 'NR==1 {print $13}'`
+
+#echo 'Final Training Accuracy mAP: $train_accuracy'
+################################E2E训练时长##########################
+echo "Final Training Duration sec : $e2e_time"
+
+################################性能看护#############################
+DeviceType=`uname -m`
+CaseName=${Network}${name_bind}_bs${batch_size}_${RANK_SIZE}'p'_'RT2'_'perf'
+ActualFPS=${FPS}
+TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'*1000/'${FPS}'}'`
+
+#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型修改
+grep INFO:tensorflow:loss $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk '{print $3}'|sed 's/,//g'|sed '/^$/d' >> $cur_path/output/${ASCEND_DEVICE_ID}/train_${CaseName}_loss.txt
+
+ActualLoss=`awk 'END {print}' $cur_path/output/${ASCEND_DEVICE_ID}/train_${CaseName}_loss.txt`
+echo "Network = ${Network}" > $cur_path/output/${ASCEND_DEVICE_ID}/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/${ASCEND_DEVICE_ID}/${CaseName}.log
+echo "BatchSize = ${batch_size}" >> $cur_path/output/${ASCEND_DEVICE_ID}/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> $cur_path/output/${ASCEND_DEVICE_ID}/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $cur_path/output/${ASCEND_DEVICE_ID}/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/${ASCEND_DEVICE_ID}/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/${ASCEND_DEVICE_ID}/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/${ASCEND_DEVICE_ID}/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/${ASCEND_DEVICE_ID}/${CaseName}.log
+
+#eval版本需求开发中，精度结果临时看护最终的loss
+echo "Final Training Accuracy loss: $ActualLoss"
+
+##获取错误信息
+#系统错误消息
+#error_msg="CanonicalizeShape failed, node:Postprocessor/BatchMultiClassNonMaxSuppression/MultiClassNonMaxSuppression/non_max_suppression/NonMaxSuppressionV3"
+error_msg="9999: Inner Error"
+
+#判断错误信息是否和历史版本一致
+Status=`grep "${error_msg}" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | wc -l`
+
+#失败阶段
+ModelStatus="图执行FAIL"
+
+#DTS单号
+#DTS_Number="DTS202105130LVO7FP0J00,DTS202105130O6E1SP1400"
+DTS_Number="DTS202105200RLRJ1P1300"
+
+echo "ModelStatus = ${ModelStatus}" >> $cur_path/output/${ASCEND_DEVICE_ID}/${CaseName}.log
+echo "DTS_Number = ${DTS_Number}" >> $cur_path/output/${ASCEND_DEVICE_ID}/${CaseName}.log
+echo "Status = ${Status}" >> $cur_path/output/${ASCEND_DEVICE_ID}/${CaseName}.log
+echo "error_msg = ${error_msg}" >> $cur_path/output/${ASCEND_DEVICE_ID}/${CaseName}.log
diff --git a/TensorFlow/built-in/cv/detection/SSD-Resnet50V1-FPN_ID1463_for_TensorFlow/test/train_RT2_performance_8p_mobilenetv1_fpn.sh b/TensorFlow/built-in/cv/detection/SSD-Resnet50V1-FPN_ID1463_for_TensorFlow/test/train_RT2_performance_8p_mobilenetv1_fpn.sh
new file mode 100644
index 000000000..e914e7250
--- /dev/null
+++ b/TensorFlow/built-in/cv/detection/SSD-Resnet50V1-FPN_ID1463_for_TensorFlow/test/train_RT2_performance_8p_mobilenetv1_fpn.sh
@@ -0,0 +1,150 @@
+#!/bin/bash
+cur_path=`pwd`
+export PYTHONPATH=$cur_path/../models/research:$cur_path/../models/research/slim:$PYTHONPATH
+export HCCL_CONNECT_TIMEOUT=300
+#集合通信
+export RANK_SIZE=8
+export RANK_TABLE_FILE=$cur_path/../configs/${RANK_SIZE}p.json
+export JOB_ID=10087
+RANK_ID_START=0
+ASCEND_DEVICE_ID_START=0
+
+#使能RT2.0
+export ENABLE_RUNTIME_V2=1
+
+#数据集参数
+data_path="/data"
+use_conda=0
+
+#训练参数，需要根据模型修改
+Network="SSD-MobilenetV1-FPN_ID1459_for_TensorFlow"
+num_train_steps=800
+batch_size=16
+ckpt_path=/checkpoints
+pipeline_config=$cur_path/../models/research/configs/ssd_mobilenet_v1_fpn_shared_box_predictor_640x640_coco14_sync_8gpus.config
+
+#帮助提示，需要根据网络修改
+if [[ $1 == --help || $1 == -h ]];then 
+	echo "usage: ./train_performance_8p.sh <args>"
+
+	echo ""
+	echo "parameter explain:
+	--num_train_steps           training steps
+	--data_path                 source data of training
+	--ckpt_path                  pre-checkpoint path
+	--pipeline_config           pipeline config path
+	--skip_eval  whether to skip eval
+    -h/--help             Show help message
+	"
+	exit 1
+fi
+
+#入参设置，需要根据网络修改
+for para in $*
+do
+    if [[ $para == --num_train_steps* ]];then
+		num_train_steps=`echo ${para#*=}`
+	elif [[ $para == --data_path* ]];then
+		data_path=`echo ${para#*=}`
+	elif [[ $para == --ckpt_path* ]];then
+		ckpt_path=`echo ${para#*=}`
+	elif [[ $para == --pipeline_config* ]];then
+		pipeline_config=`echo ${para#*=}`
+    elif [[ $para == --use_conda* ]];then
+	    use_conda=`echo ${para#*=}`
+	elif [[ $para == --skip_eval* ]];then
+	    skip_eval=`echo ${para#*=}`
+    fi
+done	
+
+if [[ $data_path == "" ]];then
+	echo "[Error] para \"data_path\" must be config"
+	exit 1
+fi
+if [[ $use_conda == "1" ]];then
+	echo "======start use_conda======"
+    source activate python3.7.5
+fi
+
+
+##########################执行训练#########################
+start_time=$(date +%s)
+cd $cur_path/../models/research
+if [  -f ${pipeline_config}.bak ];then
+   cp ${pipeline_config}.bak ${pipeline_config}
+else
+   cp ${pipeline_config} ${pipeline_config}.bak
+fi
+
+#sed -i "s%/checkpoints%${ckpt_path}%p" ${pipeline_config} 
+sed -i "s%/data/coco2017_tfrecords%${data_path}/coco2017_tfrecords%p" ${pipeline_config} 
+
+for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++));
+ do
+  export RANK_ID=$RANK_ID
+  export ASCEND_DEVICE_ID=$((ASCEND_DEVICE_ID_START+RANK_ID))
+  echo "Device ID: $ASCEND_DEVICE_ID"
+  if [   -d $cur_path/output/${ASCEND_DEVICE_ID} ];then
+     rm -rf $cur_path/output/${ASCEND_DEVICE_ID}
+     mkdir -p $cur_path/output/${ASCEND_DEVICE_ID}
+  else
+     mkdir -p $cur_path/output/${ASCEND_DEVICE_ID}
+  fi
+
+#训练执行脚本，需要根据网络修改
+  nohup python3 -u ./object_detection/model_main_rt.py \
+       --pipeline_config_path=${pipeline_config} \
+       --model_dir=$cur_path/output/${ASCEND_DEVICE_ID}/npu_ckpt_mobilenetv1_fpn_${RANK_SIZE}p \
+       --data_path=${data_path}   \
+       --alsologtostder \
+       --amp \
+       --num_train_steps=${num_train_steps}  \
+	   --skip_eval=True \
+       "${@:1}"  > $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 &
+done
+wait
+
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+ASCEND_DEVICE_ID=0
+
+
+##########################业务日志#########################
+grep ERROR $HOME/ascend/log/plog/*.log > $cur_path/output/${ASCEND_DEVICE_ID}/plog_err.log
+
+################################性能结果处理#########################
+echo "-----------------------Final result------------------------"
+#性能FPS计算，需要根据网络修改
+FPS=`grep -a 'INFO:tensorflow:global_step/sec: ' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk '{print $2}'|tail -2|head -1` 
+
+FPS=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'*'${FPS}'*'${RANK_SIZE}'}'`
+echo "Final Performance images/sec : $FPS"
+################################精度结果处理#########################
+#精度计算，需要根据网络修改
+train_accuracy=`grep Precision $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|grep Average |awk 'NR==1 {print $13}'`
+
+#echo 'Final Training Accuracy mAP: $train_accuracy'
+################################E2E训练时长##########################
+echo "Final Training Duration sec : $e2e_time"
+
+################################性能看护#############################
+DeviceType=`uname -m`
+CaseName=${Network}_bs${batch_size}_${RANK_SIZE}'p'_'RT2'_'perf'
+ActualFPS=${FPS}
+TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'*1000/'${FPS}'}'`
+
+#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型修改
+grep INFO:tensorflow:loss $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk '{print $3}'|sed 's/,//g'|sed '/^$/d' >> $cur_path/output/7/train_${CaseName}_loss.txt
+
+ActualLoss=`awk 'END {print}' $cur_path/output/7/train_${CaseName}_loss.txt`
+echo "Network = ${Network}" > $cur_path/output/7/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/7/${CaseName}.log
+echo "BatchSize = ${batch_size}" >> $cur_path/output/7/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> $cur_path/output/7/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $cur_path/output/7/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/7/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/7/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/7/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/7/${CaseName}.log
+
+conda deactivate
-- 
Gitee


From d08db262d68ed81fd05334396a4c5f82ab96a23d Mon Sep 17 00:00:00 2001
From: ykxia <xiayankang@outlook.com>
Date: Fri, 11 Nov 2022 11:43:25 +0800
Subject: [PATCH 5/9] =?UTF-8?q?2D=5FUnet=5FID2337=5Ffor=5FTensorFlow?=
 =?UTF-8?q?=E9=80=82=E9=85=8DRT2.0+=E4=BA=8C=E8=BF=9B=E5=88=B6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../2D_Unet_ID2337_for_TensorFlow/main_rt.py  |  74 +++++++++++
 .../test/train_RT2_performance_1p.sh          | 115 ++++++++++++++++++
 2 files changed, 189 insertions(+)
 create mode 100644 TensorFlow/built-in/cv/image_segmentation/2D_Unet_ID2337_for_TensorFlow/main_rt.py
 create mode 100644 TensorFlow/built-in/cv/image_segmentation/2D_Unet_ID2337_for_TensorFlow/test/train_RT2_performance_1p.sh

diff --git a/TensorFlow/built-in/cv/image_segmentation/2D_Unet_ID2337_for_TensorFlow/main_rt.py b/TensorFlow/built-in/cv/image_segmentation/2D_Unet_ID2337_for_TensorFlow/main_rt.py
new file mode 100644
index 000000000..0ec68d05f
--- /dev/null
+++ b/TensorFlow/built-in/cv/image_segmentation/2D_Unet_ID2337_for_TensorFlow/main_rt.py
@@ -0,0 +1,74 @@
+#
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+from npu_bridge.npu_init import *
+from model import *
+from data import *
+import argparse
+
+#os.environ["CUDA_VISIBLE_DEVICES"] = "0"
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--epochs', default=5, type=int)
+parser.add_argument('--train_data_path', default='./data/membrane/train')
+parser.add_argument('--test_data_path', default='./data/membrane/test')
+parser.add_argument('--predict_data_path', default='./data/membrane/predict')
+args = parser.parse_args()
+
+def main():
+    data_gen_args = dict(rotation_range=0.2,
+                    width_shift_range=0.05,
+                    height_shift_range=0.05,
+                    shear_range=0.05,
+                    zoom_range=0.05,
+                    horizontal_flip=True,
+                    fill_mode='nearest')
+    myGene = trainGenerator(2,args.train_data_path,'image','label',data_gen_args,save_to_dir = None)
+
+    model = unet()
+    model_checkpoint = ModelCheckpoint('unet_membrane.hdf5', monitor='loss',verbose=2, save_best_only=True)
+    model.fit_generator(myGene,steps_per_epoch=300,epochs=args.epochs,callbacks=[model_checkpoint])
+
+    testGene = testGenerator(args.test_data_path)
+    results = model.predict_generator(testGene,30,verbose=2)
+    saveResult(args.predict_data_path,results)
+
+if __name__ == '__main__':
+    # ***** npu modify begin *****
+    global_config = tf.ConfigProto(log_device_placement=False)
+    custom_op = global_config.graph_options.rewrite_options.custom_optimizers.add()
+    custom_op.name = "NpuOptimizer"
+    custom_op.parameter_map["precision_mode"].s = tf.compat.as_bytes("allow_mix_precision")
+    # custom_op.parameter_map["dynamic_input"].b = 1
+    # custom_op.parameter_map["dynamic_graph_execute_mode"].s = tf.compat.as_bytes("lazy_recompile")
+    npu_keras_sess = set_keras_session_npu_config(config=global_config)
+    # ***** npu modify end *****
+    main()
+    close_session(npu_keras_sess)
+
diff --git a/TensorFlow/built-in/cv/image_segmentation/2D_Unet_ID2337_for_TensorFlow/test/train_RT2_performance_1p.sh b/TensorFlow/built-in/cv/image_segmentation/2D_Unet_ID2337_for_TensorFlow/test/train_RT2_performance_1p.sh
new file mode 100644
index 000000000..73c514e23
--- /dev/null
+++ b/TensorFlow/built-in/cv/image_segmentation/2D_Unet_ID2337_for_TensorFlow/test/train_RT2_performance_1p.sh
@@ -0,0 +1,115 @@
+#!/bin/bash
+cur_path=`pwd`/../
+
+#基础参数，需要模型审视修改
+#Batch Size
+batch_size=2
+#网络名称，同目录名称
+Network="2D_Unet_ID2337_for_TensorFlow"
+#Device数量，单卡默认为1
+RANK_SIZE=1
+export RANK_SIZE=1
+#训练epoch，可选
+train_epochs=5
+#训练step
+train_steps=
+#学习率
+learning_rate=
+
+#使能RT2.0
+export ENABLE_RUNTIME_V2=1
+
+#参数配置
+data_path=""
+
+if [[ $1 == --help || $1 == --h ]];then
+   echo "usage:./train_performance_1p.sh --data_path=./datasets"
+   exit 1
+fi
+
+for para in $*
+do
+   if [[ $para == --data_path* ]];then
+      data_path=`echo ${para#*=}`
+   fi
+done
+
+if [[ $data_path  == "" ]];then
+   echo "[Error] para \"data_path \" must be config"
+   exit 1
+fi
+
+##############执行训练##########
+cd $cur_path
+if [ -d $cur_path/test/output ];then
+   rm -rf $cur_path/test/output/*
+   mkdir -p $cur_path/test/output/$ASCEND_DEVICE_ID
+else
+   mkdir -p $cur_path/test/output/$ASCEND_DEVICE_ID
+fi
+wait
+
+mkdir -p ${cur_path}/test/predict
+start=$(date +%s)
+python3 main_rt.py --epochs=${train_epochs} \
+                   --train_data_path=${data_path}/data/membrane/train \
+                   --test_data_path=${data_path}/data/membrane/test \
+                   --predict_data_path=${cur_path}/test/predict > $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 &
+wait
+
+end=$(date +%s)
+e2e_time=$(( $end - $start ))
+
+#echo "Final Performance ms/step : $average_perf"
+echo "Final Training Duration sec : $e2e_time"
+
+#结果打印，不需要修改
+echo "------------------ Final result ------------------"
+#输出性能FPS，需要模型审视修改
+TrainingTime=`grep "300/300" $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk 'END {print $5}'|cut -d 's' -f -1`
+if echo "${TrainingTime}" | grep -q -E 'm$'
+then
+    TrainingTime=`grep "300/300" $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk 'END {print $5}' |cut -d 'm' -f -1`
+    FPS=`awk 'BEGIN{printf "%.2f\n",'1000'*'${batch_size}'/'${TrainingTime}'}'`
+else
+    FPS=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'/'${TrainingTime}'}'`
+fi
+#FPS=`awk 'BEGIN{printf "%.2f\n",'1000'*'${batch_size}'/'${TrainingTime}'}'`
+#FPS=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'/'${TrainingTime}'}'`
+#打印，不需要修改
+echo "Final Performance images/sec : $FPS"
+
+#输出训练精度,需要模型审视修改
+train_accuracy=`grep "300/300" $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk 'END {print $11}'`
+#打印，不需要修改
+echo "Final Train Accuracy : ${train_accuracy}"
+
+#性能看护结果汇总
+#训练用例信息，不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'RT2'_'perf'
+
+##获取性能数据，不需要修改
+#吞吐量
+ActualFPS=${FPS}
+#单迭代训练时长
+#TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${BatchSize}'*1000/'${FPS}'}'`
+
+#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型审视
+grep "300/300" $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk '{print $8}' > $cur_path/test/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
+
+#最后一个迭代loss值，不需要修改
+ActualLoss=`awk 'END {print $1}' $cur_path/test/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
-- 
Gitee


From d113ff4f3bb6d60d0067c5fcd38459333acd49a1 Mon Sep 17 00:00:00 2001
From: ykxia <xiayankang@outlook.com>
Date: Fri, 11 Nov 2022 13:00:16 +0800
Subject: [PATCH 6/9] =?UTF-8?q?InceptionV2=5FID0670=5Ffor=5FTensorFlow?=
 =?UTF-8?q?=E9=80=82=E9=85=8DRT2.0+=E4=BA=8C=E8=BF=9B=E5=88=B6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../test/train_RT2_performance_1p.sh          |  91 +++++++++
 .../test/train_RT2_performance_8p.sh          | 192 ++++++++++++++++++
 2 files changed, 283 insertions(+)
 create mode 100644 TensorFlow/built-in/cv/image_classification/InceptionV2_ID0670_for_TensorFlow/test/train_RT2_performance_1p.sh
 create mode 100644 TensorFlow/built-in/cv/image_classification/InceptionV2_ID0670_for_TensorFlow/test/train_RT2_performance_8p.sh

diff --git a/TensorFlow/built-in/cv/image_classification/InceptionV2_ID0670_for_TensorFlow/test/train_RT2_performance_1p.sh b/TensorFlow/built-in/cv/image_classification/InceptionV2_ID0670_for_TensorFlow/test/train_RT2_performance_1p.sh
new file mode 100644
index 000000000..d56cff5f4
--- /dev/null
+++ b/TensorFlow/built-in/cv/image_classification/InceptionV2_ID0670_for_TensorFlow/test/train_RT2_performance_1p.sh
@@ -0,0 +1,91 @@
+#!/bin/bash
+cur_path=`pwd`/../
+export JOB_ID=10087
+export RANK_SIZE=1
+#基础参数，需要模型审视修改
+#Batch Size
+batch_size=128
+#网络名称，同目录名称
+Network="InceptionV2_ID0670_for_TensorFlow"
+#Device数量，单卡默认为1
+RankSize=1
+#参数配置
+data_path="../VCTK-Corpus"
+
+#使能RT2.0
+export ENABLE_RUNTIME_V2=1
+
+if [[ $1 == --help || $1 == --h ]];then
+   echo "usage:./train_performance_1p.sh --data_path=../VCTK-Corpus"
+   exit 1
+fi
+for para in $*
+do
+   if [[ $para == --data_path* ]];then
+      data_path=`echo ${para#*=}`
+   fi
+done
+if [[ $data_path  == "" ]];then
+   echo "[Error] para \"data_path\" must be config"
+   exit 1
+fi
+##############执行训练##########
+cd $cur_path
+if [ -d $cur_path/test/output ];then
+   rm -rf $cur_path/test/output/*
+   mkdir -p $cur_path/test/output/$ASCEND_DEVICE_ID
+else
+   mkdir -p $cur_path/test/output/$ASCEND_DEVICE_ID
+fi
+wait
+
+
+start=$(date +%s)
+nohup python3.7 $cur_path/train.py --rank_size=1 \
+    --mode=train \
+    --max_epochs=1 \
+    --iterations_per_loop=10 \
+    --data_dir=${data_path} \
+    --batch_size=${batch_size} \
+    --lr=0.045 \
+    --display_every=100 \
+    --log_dir=$cur_path/test/output/model \
+    --eval_dir=$cur_path/test/output/model \
+    --log_name=inception_v2.log > $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 &
+wait
+end=$(date +%s)
+e2e_time=$(( $end - $start ))
+
+
+#输出性能FPS，需要模型审视修改
+FPS=`grep "epoch" $cur_path/test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|sed "1d" |awk  -F 'FPS: ' '{print $2}'|awk -F " " '{print $1}'|awk '{sum+=$1} END {print "AVG",sum/NR}'|awk -F " " '{print $2}'`
+#打印，不需要修改
+echo "Final Performance images/sec : $FPS"
+echo "E2E Training Duration sec : $e2e_time"
+#性能看护结果汇总
+#训练用例信息，不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'RT2'_'perf'
+train_accuracy="None"
+##获取性能数据，不需要修改
+#吞吐量
+ActualFPS=${FPS}
+#单迭代训练时长
+TrainingTime=`awk 'BEGIN {printf "%.2f\n",'${batch_size}'*1000/'${ActualFPS}'}'`
+#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型审视
+grep "epoch" $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F "loss: " '{print $2}'|awk -F " " '{print $1}' > $cur_path/test/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
+#最后一个迭代loss值，不需要修改
+ActualLoss=`awk 'END {print}' $cur_path/test/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}"               > $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}"           >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}"          >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}"        >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}"            >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}"          >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}"    >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}"        >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}"     >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
diff --git a/TensorFlow/built-in/cv/image_classification/InceptionV2_ID0670_for_TensorFlow/test/train_RT2_performance_8p.sh b/TensorFlow/built-in/cv/image_classification/InceptionV2_ID0670_for_TensorFlow/test/train_RT2_performance_8p.sh
new file mode 100644
index 000000000..07db92a79
--- /dev/null
+++ b/TensorFlow/built-in/cv/image_classification/InceptionV2_ID0670_for_TensorFlow/test/train_RT2_performance_8p.sh
@@ -0,0 +1,192 @@
+#!/bin/bash
+source env.sh
+#当前路径,不需要修改
+cur_path=`pwd`
+
+#集合通信参数,不需要修改
+#保证rank table file 文件rank_table_8p.json存放在和test同级的configs目录下
+export RANK_SIZE=8
+export RANK_TABLE_FILE=${cur_path}/8p.json
+export JOB_ID=10087
+RANK_ID_START=0
+
+#使能RT2.0
+export ENABLE_RUNTIME_V2=1
+
+# 数据集路径,保持为空,不需要修改
+data_path="/npu/traindata/imagenet_TF"
+
+#设置默认日志级别,不需要修改
+export ASCEND_GLOBAL_LOG_LEVEL=3
+
+#基础参数 需要模型审视修改
+#网络名称，同目录名称
+Network="InceptionV2_for_TensorFlow"
+#训练epoch
+train_epochs=1
+#训练batch_size
+batch_size=256
+#训练step
+train_steps=`expr 1281167 / ${batch_size}`
+#学习率
+learning_rate=""
+
+#TF2.X独有，不需要修改
+export NPU_LOOP_SIZE=${train_steps}
+
+#维测参数，precision_mode需要模型审视修改
+precision_mode="allow_mix_precision"
+#维持参数，以下不需要修改
+over_dump=False
+data_dump_flag=False
+data_dump_step="10"
+profiling=False
+autotune=False
+
+# 帮助信息，不需要修改
+if [[ $1 == --help || $1 == -h ]];then
+    echo"usage:./train_full_8p.sh <args>"
+    echo " "
+    echo "parameter explain:
+    --precision_mode           precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision)
+    --over_dump		           if or not over detection, default is False
+    --data_dump_flag		   data dump flag, default is 0
+    --data_dump_step		   data dump step, default is 10
+    --profiling		           if or not profiling for performance debug, default is False
+    --autotune                 whether to enable autotune, default is False
+    --data_path		           source data of training
+    -h/--help		           show help message
+    "
+    exit 1
+fi
+
+#参数校验，不需要修改
+for para in $*
+do
+    if [[ $para == --precision_mode* ]];then
+        precision_mode=`echo ${para#*=}`
+    elif [[ $para == --over_dump* ]];then
+        over_dump=`echo ${para#*=}`
+        over_dump_path=${cur_path}/output/overflow_dump
+        mkdir -p ${over_dump_path}
+    elif [[ $para == --data_dump_flag* ]];then
+        data_dump_flag=`echo ${para#*=}`
+        data_dump_path=${cur_path}/output/data_dump
+        mkdir -p ${data_dump_path}
+    elif [[ $para == --data_dump_step* ]];then
+        data_dump_step=`echo ${para#*=}`
+    elif [[ $para == --profiling* ]];then
+        profiling=`echo ${para#*=}`
+        profiling_dump_path=${cur_path}/output/profiling
+        mkdir -p ${profiling_dump_path}
+    elif [[ $para == --autotune* ]];then
+        autotune=`echo ${para#*=}`
+        mv $install_path/fwkacllib/data/rl/Ascend910/custom $install_path/fwkacllib/data/rl/Ascend910/custom_bak
+        mv $install_path/fwkacllib/data/tiling/Ascend910/custom $install_path/fwkacllib/data/tiling/Ascend910/custom_bak
+        autotune_dump_path=${cur_path}/output/autotune_dump
+        mkdir -p ${autotune_dump_path}/GA
+        mkdir -p ${autotune_dump_path}/rl
+        cp -rf $install_path/fwkacllib/data/tiling/Ascend910/custom ${autotune_dump_path}/GA/
+        cp -rf $install_path/fwkacllib/data/rl/Ascend910/custom ${autotune_dump_path}/RL/
+    elif [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    fi
+done
+
+#校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be confing"
+    exit 1
+fi
+
+#autotune时，先开启autotune执行单P训练，不需要修改
+if [[ $autotune == True ]]; then
+    train_full_1p.sh --autotune=$autotune --data_path=$data_path
+    wait
+    autotune=False
+fi
+
+#训练开始时间，不需要修改
+start_time=$(date +%s)
+
+#进入训练脚本目录，需要模型审视修改
+cd $cur_path/../
+for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++));
+do
+    #设置环境变量，不需要修改
+    echo "Device ID: $RANK_ID"
+    export RANK_ID=$RANK_ID
+    export ASCEND_DEVICE_ID=$RANK_ID
+    ASCEND_DEVICE_ID=$RANK_ID
+    
+    #创建DeviceID输出目录，不需要修改
+    if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then
+        rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID}
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    else
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    fi
+    
+
+    #执行训练脚本，以下传参不需要修改，其他需要模型审视修改
+    #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path，--data_dump_flag，--data_dump_step，--data_dump_path，--profiling，--profiling_dump_path
+    nohup python3.7 $cur_path/../train.py --rank_size=8 \
+    --mode=train_and_evaluate \
+    --max_epochs=100 \
+    --T_max=100 \
+    --iterations_per_loop=10 \
+    --batch_size=64 \
+    --display_every=100 \
+    --data_dir=${data_path} \
+    --lr=0.045 \
+    --log_dir=./model \
+    --eval_dir=./model \
+    --log_name=inception_v2.log  > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+done 
+wait
+
+#训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+#结果打印，不需要修改
+echo "------------------ Final result ------------------"
+#输出性能FPS，需要模型审视修改
+FPS=`grep -a 'FPS' $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk 'END {print $6}'| awk -F "." '{print $1}'`
+#打印，不需要修改
+echo "Final Performance images/sec : $FPS"
+
+#输出训练精度,需要模型审视修改
+train_accuracy=`grep -A 1 top1 $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $3}'`
+#打印，不需要修改
+echo "Final Train Accuracy : ${train_accuracy}"
+echo "E2E Training Duration sec : $e2e_time"
+
+#稳定性精度看护结果汇总
+#训练用例信息，不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'RT2'_'perf'
+
+##获取性能数据
+#吞吐量，不需要修改
+ActualFPS=${FPS}
+#单迭代训练时长，不需要修改
+TrainingTime=`expr ${batch_size} \* ${RANK_SIZE} \* 1000 \/ ${FPS}`
+
+#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型审视
+grep loss $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log | grep -v top1 | awk -F  " " '{print $(NF-3)}' >> $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
+
+#最后一个迭代loss值，不需要修改
+ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
-- 
Gitee


From c85277dc3d9656a1c5046b4a7dd1ddd9ffeae2c9 Mon Sep 17 00:00:00 2001
From: ykxia <xiayankang@outlook.com>
Date: Fri, 11 Nov 2022 13:08:29 +0800
Subject: [PATCH 7/9] =?UTF-8?q?CNN-CTC=5FID0683=5Ffor=5FTensorFlow?=
 =?UTF-8?q?=E9=80=82=E9=85=8DRT2.0+=E4=BA=8C=E8=BF=9B=E5=88=B6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 TensorFlow/built-in/nlp/CNN-CTC_ID0683_for_TensorFlow/main_rt.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/TensorFlow/built-in/nlp/CNN-CTC_ID0683_for_TensorFlow/main_rt.py b/TensorFlow/built-in/nlp/CNN-CTC_ID0683_for_TensorFlow/main_rt.py
index 2e81c1ac1..9f92b8446 100644
--- a/TensorFlow/built-in/nlp/CNN-CTC_ID0683_for_TensorFlow/main_rt.py
+++ b/TensorFlow/built-in/nlp/CNN-CTC_ID0683_for_TensorFlow/main_rt.py
@@ -79,7 +79,6 @@ def train(train_dir=None, val_dir=None, mode='train'):
     custom_op.parameter_map["precision_mode"].s = tf.compat.as_bytes("allow_mix_precision")
     #custom_op.parameter_map["dynamic_input"].b = 1
     #custom_op.parameter_map["dynamic_graph_execute_mode"].s = tf.compat.as_bytes("lazy_recompile")
-    custom_op.parameter_map["jit_compile"].b = False
     global_config.graph_options.rewrite_options.remapping = RewriterConfig.OFF
     global_config.graph_options.rewrite_options.memory_optimization = RewriterConfig.OFF
     # ***** npu modify end ******
-- 
Gitee


From 4498957f399605951771f92832b9b4999b22aa79 Mon Sep 17 00:00:00 2001
From: ykxia <xiayankang@outlook.com>
Date: Fri, 11 Nov 2022 14:20:15 +0800
Subject: [PATCH 8/9] =?UTF-8?q?Siamese=5FID0506=5Ffor=5FTensorFlow?=
 =?UTF-8?q?=E9=80=82=E9=85=8DRT2.0+=E4=BA=8C=E8=BF=9B=E5=88=B6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../siamese_network_rt.py                     | 138 ++++++++
 .../test/train_RT2_performance_1p.sh          | 119 +++++++
 .../test/train_RT2_performance_8p.sh          | 243 ++++++++++++++
 .../Siamese_ID0506_for_TensorFlow/train_rt.py | 307 ++++++++++++++++++
 4 files changed, 807 insertions(+)
 create mode 100644 TensorFlow/built-in/nlp/Siamese_ID0506_for_TensorFlow/siamese_network_rt.py
 create mode 100644 TensorFlow/built-in/nlp/Siamese_ID0506_for_TensorFlow/test/train_RT2_performance_1p.sh
 create mode 100644 TensorFlow/built-in/nlp/Siamese_ID0506_for_TensorFlow/test/train_RT2_performance_8p.sh
 create mode 100644 TensorFlow/built-in/nlp/Siamese_ID0506_for_TensorFlow/train_rt.py

diff --git a/TensorFlow/built-in/nlp/Siamese_ID0506_for_TensorFlow/siamese_network_rt.py b/TensorFlow/built-in/nlp/Siamese_ID0506_for_TensorFlow/siamese_network_rt.py
new file mode 100644
index 000000000..4587cee9a
--- /dev/null
+++ b/TensorFlow/built-in/nlp/Siamese_ID0506_for_TensorFlow/siamese_network_rt.py
@@ -0,0 +1,138 @@
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+from npu_bridge.npu_init import *
+import tensorflow as tf
+import numpy as np
+from npu_bridge.estimator.npu.npu_dynamic_rnn import DynamicRNN
+
+
+class SiameseLSTM(object):
+    """
+    A LSTM based deep Siamese network for text similarity.
+    Uses an character embedding layer, followed by a biLSTM and Energy Loss layer.
+    """
+
+    def BiRNN(self, x, dropout, scope, embedding_size, sequence_length, hidden_units):
+        n_hidden = hidden_units
+        n_layers = 3
+        # Prepare data shape to match `static_rnn` function requirements
+        x = tf.unstack(tf.transpose(x, perm=[1, 0, 2]))
+        print(x)
+        # Define lstm cells with tensorflow
+        # Forward direction cell
+        with tf.name_scope("fw" + scope), tf.variable_scope("fw" + scope):
+            stacked_rnn_fw = []
+            for _ in range(n_layers):
+                fw_cell = tf.nn.rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0, state_is_tuple=True)
+                lstm_fw_cell = tf.contrib.rnn.DropoutWrapper(fw_cell, output_keep_prob=dropout)
+                stacked_rnn_fw.append(lstm_fw_cell)
+            lstm_fw_cell_m = tf.nn.rnn_cell.MultiRNNCell(cells=stacked_rnn_fw, state_is_tuple=True)
+
+        with tf.name_scope("bw" + scope), tf.variable_scope("bw" + scope):
+            stacked_rnn_bw = []
+            for _ in range(n_layers):
+                bw_cell = tf.nn.rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0, state_is_tuple=True)
+                lstm_bw_cell = tf.contrib.rnn.DropoutWrapper(bw_cell, output_keep_prob=dropout)
+                stacked_rnn_bw.append(lstm_bw_cell)
+            lstm_bw_cell_m = tf.nn.rnn_cell.MultiRNNCell(cells=stacked_rnn_bw, state_is_tuple=True)
+        # Get lstm cell output
+
+        with tf.name_scope("bw" + scope), tf.variable_scope("bw" + scope):
+            outputs, _, _ = tf.nn.static_bidirectional_rnn(lstm_fw_cell_m, lstm_bw_cell_m, x, dtype=tf.float32)
+        return outputs[-1]
+
+    def BiRNN_npu(self, x, dropout, scope, embedding_size, sequence_length, hidden_units):
+        n_hidden = hidden_units
+        # n_layers=3
+        # Prepare data shape to match `static_rnn` function requirements
+        x = tf.transpose(x, perm=[1, 0, 2], name="transpose_inputdata")
+        print(x)
+
+        # dropout_rate is 1, so not add dropout
+        with tf.name_scope(scope), tf.variable_scope(scope):
+            fw_cell1 = DynamicRNN(hidden_size=n_hidden, forget_bias=1.0, dtype=tf.float32)
+            fw_y1, output_h, output_c, i, j, f, o, tanhc = fw_cell1(x)
+            bw_cell1 = DynamicRNN(hidden_size=n_hidden, forget_bias=1.0, dtype=tf.float32)
+            bw_y1, output_h, output_c, i, j, f, o, tanhc = bw_cell1(tf.reverse(x, axis=[0]))
+            output_rnn1 = tf.concat((fw_y1, tf.reverse(bw_y1, axis=[0])), axis=2)
+
+            fw_cell2 = DynamicRNN(hidden_size=n_hidden, forget_bias=1.0, dtype=tf.float32)
+            fw_y2, output_h, output_c, i, j, f, o, tanhc = fw_cell2(output_rnn1)
+            bw_cell2 = DynamicRNN(hidden_size=n_hidden, forget_bias=1.0, dtype=tf.float32)
+            bw_y2, output_h, output_c, i, j, f, o, tanhc = bw_cell2(tf.reverse(output_rnn1, axis=[0]))
+            output_rnn2 = tf.concat((fw_y2, tf.reverse(bw_y2, axis=[0])), axis=2)
+
+            fw_cell3 = DynamicRNN(hidden_size=n_hidden, forget_bias=1.0, dtype=tf.float32)
+            fw_y3, output_h, output_c, i, j, f, o, tanhc = fw_cell3(output_rnn2)
+            bw_cell3 = DynamicRNN(hidden_size=n_hidden, forget_bias=1.0, dtype=tf.float32)
+            bw_y3, output_h, output_c, i, j, f, o, tanhc = bw_cell3(tf.reverse(output_rnn2, axis=[0]))
+            output_rnn3 = tf.concat((fw_y3, tf.reverse(bw_y3, axis=[0])), axis=2)
+
+            outputs = tf.transpose(output_rnn3, perm=[1, 0, 2], name="transpose_outdata")
+            print(outputs)
+
+        return outputs[:, -1, :]
+
+    def contrastive_loss(self, y, d, batch_size):
+        tmp = y * tf.square(d)
+        # tmp= tf.mul(y,tf.square(d))
+        tmp2 = (1 - y) * tf.square(tf.maximum((1 - d), 0))
+        return tf.reduce_sum(tmp + tmp2) / batch_size / 2
+
+    def __init__(
+            self, sequence_length, vocab_size, embedding_size, hidden_units, l2_reg_lambda, batch_size):
+
+        # Placeholders for input, output and dropout
+        self.input_x1 = tf.placeholder(tf.int32, [None, sequence_length], name="input_x1")
+        self.input_x2 = tf.placeholder(tf.int32, [None, sequence_length], name="input_x2")
+        self.input_y = tf.placeholder(tf.float32, [None], name="input_y")
+        self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob")
+
+        # Keeping track of l2 regularization loss (optional)
+        l2_loss = tf.constant(0.0, name="l2_loss")
+
+        # Embedding layer
+        with tf.name_scope("embedding"):
+            self.W = tf.Variable(
+                tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0),
+                trainable=True, name="W")
+            self.embedded_chars1 = tf.nn.embedding_lookup(self.W, self.input_x1)
+            # self.embedded_chars_expanded1 = tf.expand_dims(self.embedded_chars1, -1)
+            self.embedded_chars2 = tf.nn.embedding_lookup(self.W, self.input_x2)
+            # self.embedded_chars_expanded2 = tf.expand_dims(self.embedded_chars2, -1)
+
+        # Create a convolution + maxpool layer for each filter size
+        with tf.name_scope("output"):
+            self.out1 = self.BiRNN_npu(self.embedded_chars1, self.dropout_keep_prob, "side1", embedding_size,
+                                       sequence_length, hidden_units)
+            self.out2 = self.BiRNN_npu(self.embedded_chars2, self.dropout_keep_prob, "side2", embedding_size,
+                                       sequence_length, hidden_units)
+            self.distance = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(self.out1, self.out2)), 1, keep_dims=True))
+            self.distance = tf.div(self.distance,
+                                   tf.add(tf.sqrt(tf.reduce_sum(tf.square(self.out1), 1, keep_dims=True)),
+                                          tf.sqrt(tf.reduce_sum(tf.square(self.out2), 1, keep_dims=True))))
+            self.distance = tf.reshape(self.distance, [-1], name="distance")
+            #self.distance = util.set_graph_exec_config(self.distance, dynamic_input=True,
+                                                       #dynamic_graph_execute_mode='dynamic_execute',
+                                                       #dynamic_inputs_shape_range='data:[64,15],[64,15],[64]')
+        with tf.name_scope("loss"):
+            self.loss = self.contrastive_loss(self.input_y, self.distance, batch_size)
+        #### Accuracy computation is outside of this class.
+        with tf.name_scope("accuracy"):
+            self.temp_sim = tf.subtract(tf.ones_like(self.distance), tf.rint(self.distance),
+                                        name="temp_sim")  # auto threshold 0.5
+            correct_predictions = tf.equal(self.temp_sim, self.input_y)
+            self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")
diff --git a/TensorFlow/built-in/nlp/Siamese_ID0506_for_TensorFlow/test/train_RT2_performance_1p.sh b/TensorFlow/built-in/nlp/Siamese_ID0506_for_TensorFlow/test/train_RT2_performance_1p.sh
new file mode 100644
index 000000000..2a5cb1c2b
--- /dev/null
+++ b/TensorFlow/built-in/nlp/Siamese_ID0506_for_TensorFlow/test/train_RT2_performance_1p.sh
@@ -0,0 +1,119 @@
+#!/bin/bash
+cur_path=`pwd`/../
+
+
+#设置默认日志级别,不需要修改
+# export ASCEND_GLOBAL_LOG_LEVEL=3
+
+#基础参数，需要模型审视修改
+#Batch Size
+batch_size=64
+#网络名称，同目录名称
+Network="Siamese_ID0506_for_TensorFlow"
+#Device数量，单卡默认为1
+RANK_SIZE=1
+#训练epoch，可选
+train_epochs=10 # init 1
+#训练step
+train_steps=
+#学习率
+learning_rate=5e-5
+#ASCEND_DEVICE_ID=0
+
+#使能RT2.0
+export ENABLE_RUNTIME_V2=1
+
+#参数配置
+data_path=""
+
+if [[ $1 == --help || $1 == --h ]];then
+   echo "usage:./train_performance_1p.sh"
+   exit 1
+fi
+
+for para in $*
+do
+   if [[ $para == --data_path* ]];then
+      data_path=`echo ${para#*=}`
+   fi
+done
+
+if [[ $data_path  == "" ]];then
+   echo "[Error] para \"data_path \" must be config"
+   exit 1
+fi
+##############执行训练##########
+cd $cur_path
+
+wait
+
+if [ -d $cur_path/test/output ];then
+   rm -rf $cur_path/test/output/*
+   mkdir -p $cur_path/test/output/$ASCEND_DEVICE_ID
+else
+   mkdir -p $cur_path/test/output/$ASCEND_DEVICE_ID
+fi
+wait
+
+start=$(date +%s)
+nohup python3 train_rt.py \
+    --num_epochs $train_epochs \
+    --training_files=$data_path/person_match.train2 \
+    --hidden_units=64 \
+    --embedding_dim=304 \
+    --device_size=$RANK_SIZE \
+    --device_id=$ASCEND_DEVICE_ID \
+    --evaluate_every=10000 \
+    --checkpoint_every=10000 > $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 &
+wait
+
+end=$(date +%s)
+e2e_time=$(( $end - $start ))
+
+#echo "Final Performance ms/step : $average_perf"
+echo "Final Training Duration sec : $e2e_time"
+
+#结果打印，不需要修改
+echo "------------------ Final result ------------------"
+#输出性能FPS，需要模型审视修改
+TrainingTime=`grep "TRAIN " $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk 'END {print $6}'`
+wait
+FPS=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'/'${TrainingTime}'*1000}'`
+#打印，不需要修改
+echo "Final Performance images/sec : $FPS"
+
+#输出训练精度,需要模型审视修改
+train_accuracy=`grep "TRAIN " $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk 'END {print $10}'`
+#打印，不需要修改
+#echo "Final Train Accuracy : ${train_accuracy}"
+
+
+#性能看护结果汇总
+#训练用例信息，不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'RT2'_'perf'
+
+##获取性能数据，不需要修改
+#吞吐量
+ActualFPS=${FPS}
+#单迭代训练时长
+TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${BatchSize}'/'${FPS}'}'`
+
+#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型审视
+grep "TRAIN " $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk '{print $8}' > $cur_path/test/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
+
+#最后一个迭代loss值，不需要修改
+ActualLoss=`awk 'END {print $1}' $cur_path/test/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
diff --git a/TensorFlow/built-in/nlp/Siamese_ID0506_for_TensorFlow/test/train_RT2_performance_8p.sh b/TensorFlow/built-in/nlp/Siamese_ID0506_for_TensorFlow/test/train_RT2_performance_8p.sh
new file mode 100644
index 000000000..1d56056d7
--- /dev/null
+++ b/TensorFlow/built-in/nlp/Siamese_ID0506_for_TensorFlow/test/train_RT2_performance_8p.sh
@@ -0,0 +1,243 @@
+#!/bin/bash
+
+#当前路径,不需要修改
+cur_path=`pwd`/../
+
+#集合通信参数,不需要修改
+
+export RANK_SIZE=8
+export JOB_ID=10087
+export RANK_ID=8p
+RANK_ID_START=0
+export RANK_TABLE_FILE=${cur_path}/test/8p.json
+export HCCL_CONNECT_TIMEOUT=600
+RANK_SIZE=8
+
+#使能RT2.0
+export ENABLE_RUNTIME_V2=1
+
+#export ASCEND_SLOG_PRINT_TO_STDOUT=1
+
+# 数据集路径,保持为空,不需要修改
+data_path=""
+
+#基础参数，需要模型审视修改
+#网络名称，同目录名称
+Network="Siamese_ID0506_for_TensorFlow"
+#训练epoch
+train_epochs=10 #init1
+#训练batch_size
+batch_size=64
+# 训练step
+train_steps=
+# 学习率
+learning_rate=4e-4
+
+
+#维测参数，precision_mode需要模型审视修改
+#precision_mode="allow_mix_precision"
+#维持参数，以下不需要修改
+over_dump=False
+data_dump_flag=False
+data_dump_step="10"
+profiling=False
+autotune=False
+# 帮助信息，不需要修改
+if [[ $1 == --help || $1 == -h ]];then
+    echo"usage:./train_performance_1P.sh <args>"
+    echo " "
+    echo "parameter explain:
+    --precision_mode         precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision)
+    --over_dump		           if or not over detection, default is False
+    --data_dump_flag		     data dump flag, default is False
+    --data_dump_step		     data dump step, default is 10
+    --profiling		           if or not profiling for performance debug, default is False
+    --data_path		           source data of training
+    -h/--help		             show help message
+    "
+    exit 1
+fi
+
+#参数校验，不需要修改
+for para in $*
+do
+    if [[ $para == --precision_mode* ]];then
+        precision_mode=`echo ${para#*=}`
+    elif [[ $para == --over_dump* ]];then
+        over_dump=`echo ${para#*=}`
+        over_dump_path=${cur_path}/output/overflow_dump
+        mkdir -p ${over_dump_path}
+    elif [[ $para == --data_dump_flag* ]];then
+        data_dump_flag=`echo ${para#*=}`
+        data_dump_path=${cur_path}/output/data_dump
+        mkdir -p ${data_dump_path}
+    elif [[ $para == --data_dump_step* ]];then
+        data_dump_step=`echo ${para#*=}`
+    elif [[ $para == --profiling* ]];then
+        profiling=`echo ${para#*=}`
+        profiling_dump_path=${cur_path}/output/profiling
+        mkdir -p ${profiling_dump_path}
+    elif [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    fi
+done
+
+#校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be confing"
+    exit 1
+fi
+
+#训练开始时间，不需要修改
+start_time=$(date +%s)
+
+#进入训练脚本目录，需要模型审视修改
+cd $cur_path
+#sed -i "s|/scratch/shiyichu/dataset/FaceDatabases/CASIA-Webface/casia_mtcnncaffe_aligned|${data_path}|g" ./data/list_casia_mtcnncaffe_aligned_nooverlap.txt
+#for i in 0 1 2 3 4 5 6 7
+for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++));
+do
+    #设置环境变量，不需要修改
+    echo "Device ID: $RANK_ID"
+    export RANK_ID=$RANK_ID
+    export DEVICE_INDEX=$RANK_ID
+    export ASCEND_DEVICE_ID=$RANK_ID
+    ASCEND_DEVICE_ID=$RANK_ID
+    export DEVICE_ID=$ASCEND_DEVICE_ID
+    echo "Device ID: $ASCEND_DEVICE_ID"
+
+
+    #创建DeviceID输出目录，不需要修改
+    if [ -d ${cur_path}/test/output/${ASCEND_DEVICE_ID} ];then
+        rm -rf ${cur_path}/test/output/${ASCEND_DEVICE_ID}
+        mkdir -p ${cur_path}/test/output/$ASCEND_DEVICE_ID/ckpt
+    else
+        mkdir -p ${cur_path}/test/output/$ASCEND_DEVICE_ID/ckpt
+    fi
+
+    # corenum=`cat /proc/cpuinfo |grep 'processor' | wc -l`
+    # let a=RANK_ID*${corenum}/8
+    # let b=RANK_ID+1
+    # let c=b*${corenum}/8-1
+    # if [ "x${bind_core}" != x]；then
+    #     bind_core="taskset -c $a-$c"
+    # fi
+
+    # sed -i "s|ind_start = 0 * part_int|ind_start = ${i} * part_int|g" $cur_path/../train.py
+    
+    #执行训练脚本，以下传参不需要修改，其他需要模型审视修改
+    #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path，--data_dump_flag，--data_dump_step，--data_dump_path，--profiling，--profiling_dump_path
+    # timeout -s SIGINT 3600 nohup python3 $cur_path/../train_8p.py \
+    nohup python3 ${cur_path}/train_rt.py \
+    --num_epochs $train_epochs \
+    --training_files=$data_path/person_match.train2 \
+    --hidden_units=64 \
+    --embedding_dim=304 \
+    --device_size=8 \
+    --device_id=$RANK_ID \
+    --evaluate_every=10000 \
+    --checkpoint_every=10000 > ${cur_path}/test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+
+    # sed -i "s|ind_start = ${i} * part_int|ind_start = 0 * part_int|g" $cur_path/../train.py
+
+
+    # sleep 60
+    # num=`grep 'ERROR' ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | grep "out of bounds"| wc -l`
+    # while [ $num -eq 0 ]
+    # do
+    #    num=`grep 'ERROR' ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | grep "out of bounds"| wc -l`
+    #    echo "${num}"
+    #    sleep 5
+    # done
+    # ps -ef | grep python3 |grep max_epoch |grep config |awk '{system("kill -9 "$2)}'
+    # echo "killed Yolov4"
+
+done
+wait
+#sed -i "s|${data_path}|/scratch/shiyichu/dataset/FaceDatabases/CASIA-Webface/casia_mtcnncaffe_aligned|g" ./data/list_casia_mtcnncaffe_aligned_nooverlap.txt
+
+#训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+echo "E2E Training Duration sec : $e2e_time"
+
+# 结果打印，不需要修改
+TrainingTime=`grep "TRAIN " ${cur_path}/test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $6}'`
+FPS=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'/'${TrainingTime}'*1000*'${RANK_SIZE}'}'`
+# 打印，不需要修改
+echo "Final Performance images/sec: $FPS"
+
+
+#输出训练精度，需要模型审视修改
+train_accuracy=`grep "TRAIN " ${cur_path}/test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $10}'`
+
+# 训练用例信息，不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'RT2'_'perf'
+
+#吞吐量
+ActualFPS=${FPS}
+#单迭代训练时长
+TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${BatchSize}'/'${FPS}' }'`
+
+grep "TRAIN " ${cur_path}/test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk '{print $8}' > ${cur_path}/test/output/${ASCEND_DEVICE_ID}/train_${CaseName}_loss.txt
+
+# 最后一个loss值
+ActualLoss=`awk 'END {print $1}' ${cur_path}/test/output/${ASCEND_DEVICE_ID}/train_${CaseName}_loss.txt`
+
+
+
+#关键信息打印到CaseName.log中，此处无需修改
+echo "Network = ${Network}" > $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${batch_size}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+# ###下面字段用于冒烟看护
+# BatchSize=${batch_size}
+# #设备类型，自动获取
+# DeviceType=`uname -m`
+# # #用例名称，自动获取
+# # CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf'
+
+# ##获取错误信息
+# #系统错误信息
+# error_msg="of dimension 1  out of bounds"
+# #判断错误信息是否和历史状态一致，此处无需修改
+# Status=`grep "${error_msg}" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log | wc -l`
+# #失败阶段，枚举值图准备FAIL/图拆分FAIL/图优化FAIL/图编译FAIL/图执行FAIL/流程OK
+# ModelStatus="图执行FAIL"
+# #DTS单号或者issue链接
+# DTS_Number="DTS20211112715497"
+
+# #关键信息打印到CaseName.log中，此处无需修改
+# echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+# echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+# echo "BatchSize = ${batch_size}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+# echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+# echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+# echo "ModelStatus = ${ModelStatus}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+# echo "DTS_Number = ${DTS_Number}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+# echo "Status = ${Status}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+# echo "error_msg = ${error_msg}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+# echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
diff --git a/TensorFlow/built-in/nlp/Siamese_ID0506_for_TensorFlow/train_rt.py b/TensorFlow/built-in/nlp/Siamese_ID0506_for_TensorFlow/train_rt.py
new file mode 100644
index 000000000..82f1a942e
--- /dev/null
+++ b/TensorFlow/built-in/nlp/Siamese_ID0506_for_TensorFlow/train_rt.py
@@ -0,0 +1,307 @@
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+#! /usr/bin/env python
+
+from npu_bridge.npu_init import *
+import tensorflow as tf
+import numpy as np
+import re
+import os
+import time
+import datetime
+import gc
+from input_helpers import InputHelper
+from siamese_network_rt import SiameseLSTM
+from siamese_network_semantic import SiameseLSTMw2v
+from tensorflow.contrib import learn
+import gzip
+from random import random
+# Parameters
+# ==================================================
+
+tf.flags.DEFINE_boolean("is_char_based", True, "is character based syntactic similarity. "
+                                               "if false then word embedding based semantic similarity is used."
+                                               "(default: True)")
+
+tf.flags.DEFINE_string("word2vec_model", "wiki.simple.vec", "word2vec pre-trained embeddings file (default: None)")
+tf.flags.DEFINE_string("word2vec_format", "text", "word2vec pre-trained embeddings file format (bin/text/textgz)(default: None)")
+
+tf.flags.DEFINE_integer("embedding_dim", 300, "Dimensionality of character embedding (default: 300)")
+tf.flags.DEFINE_float("dropout_keep_prob", 1.0, "Dropout keep probability (default: 1.0)")
+tf.flags.DEFINE_float("l2_reg_lambda", 0.0, "L2 regularizaion lambda (default: 0.0)")
+tf.flags.DEFINE_string("training_files", "person_match.train2", "training file (default: None)")  #for sentence semantic similarity use "train_snli.txt"
+tf.flags.DEFINE_integer("hidden_units", 50, "Number of hidden units (default:50)")
+
+# Training parameters
+tf.flags.DEFINE_integer("batch_size", 64, "Batch Size (default: 64)")
+tf.flags.DEFINE_integer("num_epochs", 300, "Number of training epochs (default: 200)")
+tf.flags.DEFINE_integer("evaluate_every", 1000, "Evaluate model on dev set after this many steps (default: 100)")
+tf.flags.DEFINE_integer("checkpoint_every", 1000, "Save model after this many steps (default: 100)")
+# Misc Parameters
+tf.flags.DEFINE_boolean("allow_soft_placement", True, "Allow device soft device placement")
+tf.flags.DEFINE_boolean("log_device_placement", False, "Log placement of ops on devices")
+
+
+# rankid
+tf.flags.DEFINE_integer("device_size", 1, "device_size: 1p/8p")
+tf.flags.DEFINE_integer("device_id", 0, "device_id")
+
+FLAGS = tf.flags.FLAGS
+
+print("\nParameters:")
+for attr, value in sorted(FLAGS.__flags.items()):
+    print("{}={}".format(attr.upper(), value))
+print("")
+
+if FLAGS.training_files==None:
+    print("Input Files List is empty. use --training_files argument.")
+    exit()
+
+
+max_document_length=15
+inpH = InputHelper()
+train_set, dev_set, vocab_processor,sum_no_of_batches = inpH.getDataSets(FLAGS.training_files,max_document_length, 10,
+                                                                         FLAGS.batch_size, FLAGS.is_char_based)
+trainableEmbeddings=False
+if FLAGS.is_char_based==True:
+    FLAGS.word2vec_model = False
+else:
+    if FLAGS.word2vec_model==None:
+        trainableEmbeddings=True
+        print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"
+          "You are using word embedding based semantic similarity but "
+          "word2vec model path is empty. It is Recommended to use  --word2vec_model  argument. "
+          "Otherwise now the code is automatically trying to learn embedding values (may not help in accuracy)"
+          "\n!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n")
+    else:
+        inpH.loadW2V(FLAGS.word2vec_model, FLAGS.word2vec_format)
+
+# Training
+# ==================================================
+print("starting graph def")
+with tf.Graph().as_default():
+    session_conf = tf.ConfigProto(allow_soft_placement=FLAGS.allow_soft_placement,log_device_placement=FLAGS.log_device_placement)
+    session_conf = tf.ConfigProto()
+    custom_op = session_conf.graph_options.rewrite_options.custom_optimizers.add()
+    custom_op.name = 'NpuOptimizer'
+    custom_op.parameter_map['precision_mode'].s = tf.compat.as_bytes('allow_mix_precision')
+    custom_op.parameter_map['use_off_line'].b = True
+    #custom_op.parameter_map['dynamic_input'].b = True
+    session_conf.graph_options.rewrite_options.remapping = RewriterConfig.OFF
+    sess = tf.Session(config=npu_config_proto(config_proto=session_conf))
+
+    print("started session")
+    with sess.as_default():
+        if FLAGS.is_char_based:
+            siameseModel = SiameseLSTM(
+                sequence_length=max_document_length,
+                vocab_size=len(vocab_processor.vocabulary_),
+                embedding_size=FLAGS.embedding_dim,
+                hidden_units=FLAGS.hidden_units,
+                l2_reg_lambda=FLAGS.l2_reg_lambda,
+                batch_size=FLAGS.batch_size
+            )
+        else:
+            siameseModel = SiameseLSTMw2v(
+                sequence_length=max_document_length,
+                vocab_size=len(vocab_processor.vocabulary_),
+                embedding_size=FLAGS.embedding_dim,
+                hidden_units=FLAGS.hidden_units,
+                l2_reg_lambda=FLAGS.l2_reg_lambda,
+                batch_size=FLAGS.batch_size,
+                trainableEmbeddings=trainableEmbeddings
+            )
+        # Define Training procedure
+        global_step = tf.Variable(0, name="global_step", trainable=False)
+        #############self add############
+        if FLAGS.device_size>1:
+            optimizer = npu_tf_optimizer(tf.train.AdamOptimizer(8e-3))
+        else:
+            optimizer = npu_tf_optimizer(tf.train.AdamOptimizer(1e-3))
+        #################################
+        print("initialized siameseModel object")
+    
+    grads_and_vars=optimizer.compute_gradients(siameseModel.loss)
+    tr_op_set = optimizer.apply_gradients(grads_and_vars, global_step=global_step)
+    print("defined training_ops")
+    # Keep track of gradient values and sparsity (optional)
+    grad_summaries = []
+    for g, v in grads_and_vars:
+        if g is not None:
+            grad_hist_summary = tf.summary.histogram("{}/grad/hist".format(v.name), g)
+            sparsity_summary = tf.summary.scalar("{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g))
+            grad_summaries.append(grad_hist_summary)
+            grad_summaries.append(sparsity_summary)
+    grad_summaries_merged = tf.summary.merge(grad_summaries)
+    print("defined gradient summaries")
+    # Output directory for models and summaries
+    timestamp = str(int(time.time()))
+    out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp))
+    print("Writing to {}\n".format(out_dir))
+
+    # Summaries for loss and accuracy
+    loss_summary = tf.summary.scalar("loss", siameseModel.loss)
+    acc_summary = tf.summary.scalar("accuracy", siameseModel.accuracy)
+
+    # Train Summaries
+    train_summary_op = tf.summary.merge([loss_summary, acc_summary, grad_summaries_merged])
+    train_summary_dir = os.path.join(out_dir, "summaries", "train")
+    train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph)
+
+    # Dev summaries
+    dev_summary_op = tf.summary.merge([loss_summary, acc_summary])
+    dev_summary_dir = os.path.join(out_dir, "summaries", "dev")
+    dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph)
+
+    # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
+    checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints"))
+    checkpoint_prefix = os.path.join(checkpoint_dir, "model")
+    if not os.path.exists(checkpoint_dir):
+        os.makedirs(checkpoint_dir)
+    saver = tf.train.Saver(tf.global_variables(), max_to_keep=100)
+
+    # Write vocabulary
+    vocab_processor.save(os.path.join(checkpoint_dir, "vocab"))
+
+    # Initialize all variables
+    sess.run(tf.global_variables_initializer())
+    
+    print("init all variables")
+    graph_def = tf.get_default_graph().as_graph_def()
+    graphpb_txt = str(graph_def)
+    with open(os.path.join(checkpoint_dir, "graphpb.txt"), 'w') as f:
+        f.write(graphpb_txt)
+
+    if FLAGS.word2vec_model :
+        # initial matrix with random uniform
+        initW = np.random.uniform(-0.25,0.25,(len(vocab_processor.vocabulary_), FLAGS.embedding_dim))
+        #initW = np.zeros(shape=(len(vocab_processor.vocabulary_), FLAGS.embedding_dim))
+        # load any vectors from the word2vec
+        print("initializing initW with pre-trained word2vec embeddings")
+        for w in vocab_processor.vocabulary_._mapping:
+            arr=[]
+            s = re.sub('[^0-9a-zA-Z]+', '', w)
+            if w in inpH.pre_emb:
+                arr=inpH.pre_emb[w]
+            elif w.lower() in inpH.pre_emb:
+                arr=inpH.pre_emb[w.lower()]
+            elif s in inpH.pre_emb:
+                arr=inpH.pre_emb[s]
+            elif s.isdigit():
+                arr=inpH.pre_emb["zero"]
+            if len(arr)>0:
+                idx = vocab_processor.vocabulary_.get(w)
+                initW[idx]=np.asarray(arr).astype(np.float32)
+        print("Done assigning intiW. len="+str(len(initW)))
+        inpH.deletePreEmb()
+        gc.collect()
+        sess.run(siameseModel.W.assign(initW))
+
+    def train_step(x1_batch, x2_batch, y_batch):
+        """
+        A single training step
+        """
+        if random()>0.5:
+            feed_dict = {
+                siameseModel.input_x1: x1_batch,
+                siameseModel.input_x2: x2_batch,
+                siameseModel.input_y: y_batch,
+                siameseModel.dropout_keep_prob: FLAGS.dropout_keep_prob,
+            }
+        else:
+            feed_dict = {
+                siameseModel.input_x1: x2_batch,
+                siameseModel.input_x2: x1_batch,
+                siameseModel.input_y: y_batch,
+                siameseModel.dropout_keep_prob: FLAGS.dropout_keep_prob,
+            }
+        import time
+        begin = time.time()
+        _, step, loss, accuracy, dist, sim, summaries = sess.run([tr_op_set, global_step, siameseModel.loss, siameseModel.accuracy, siameseModel.distance, siameseModel.temp_sim, train_summary_op],  feed_dict)
+        end = time.time()
+        costtime = (end - begin) * 1000
+        time_str = datetime.datetime.now().isoformat()
+        print("TRAIN {}: step {} time(ms) {:g} loss {:g} acc {:g}".format(time_str, step, costtime, loss, accuracy))
+        train_summary_writer.add_summary(summaries, step)
+        #print(y_batch, dist, sim)
+
+    def dev_step(x1_batch, x2_batch, y_batch):
+        """
+        A single training step
+        """ 
+        if random()>0.5:
+            feed_dict = {
+                siameseModel.input_x1: x1_batch,
+                siameseModel.input_x2: x2_batch,
+                siameseModel.input_y: y_batch,
+                siameseModel.dropout_keep_prob: 1.0,
+            }
+        else:
+            feed_dict = {
+                siameseModel.input_x1: x2_batch,
+                siameseModel.input_x2: x1_batch,
+                siameseModel.input_y: y_batch,
+                siameseModel.dropout_keep_prob: 1.0,
+            }
+        import time
+        begin = time.time()
+        step, loss, accuracy, sim, summaries = sess.run([global_step, siameseModel.loss, siameseModel.accuracy, siameseModel.temp_sim, dev_summary_op],  feed_dict)
+        end = time.time()
+        costtime = (end - begin) * 1000
+        time_str = datetime.datetime.now().isoformat()
+        print("DEV {}: step {} time(ms) {:g} loss {:g} acc {:g}".format(time_str, step, costtime, loss, accuracy))
+        dev_summary_writer.add_summary(summaries, step)
+        #print (y_batch, sim)
+        return accuracy
+
+    # Generate batches
+    batches=inpH.batch_iter(
+                list(zip(train_set[0], train_set[1], train_set[2])), FLAGS.batch_size, FLAGS.num_epochs,device_size=FLAGS.device_size,device_id=FLAGS.device_id)
+    ##############8p#################
+    if FLAGS.device_size > 1:
+        sum_no_of_batches = sum_no_of_batches//8
+    #################################
+
+    ptr=0
+    max_validation_acc=0.0
+    for nn in range(sum_no_of_batches*FLAGS.num_epochs):
+        batch = next(batches)
+        if len(batch)<1:
+            continue
+        x1_batch,x2_batch, y_batch = zip(*batch)
+        if len(y_batch)<1:
+            continue
+        train_step(x1_batch, x2_batch, y_batch)
+        current_step = tf.train.global_step(sess, global_step)
+        sum_acc=0.0
+        if current_step % FLAGS.evaluate_every == 0:
+            print("\nEvaluation:")
+            dev_batches = inpH.batch_iter(list(zip(dev_set[0],dev_set[1],dev_set[2])), FLAGS.batch_size, 1)
+            for db in dev_batches:
+                if len(db)<1:
+                    continue
+                x1_dev_b,x2_dev_b,y_dev_b = zip(*db)
+                if len(y_dev_b)<1:
+                    continue
+                acc = dev_step(x1_dev_b, x2_dev_b, y_dev_b)
+                sum_acc = sum_acc + acc
+            print("")
+        if current_step % FLAGS.checkpoint_every == 0:
+            if sum_acc >= max_validation_acc:
+                max_validation_acc = sum_acc
+                saver.save(sess, checkpoint_prefix, global_step=current_step)
+                tf.train.write_graph(sess.graph.as_graph_def(), checkpoint_prefix, "graph"+str(nn)+".pb", as_text=False)
+                print("Saved model {} with sum_accuracy={} checkpoint to {}\n".format(nn, max_validation_acc, checkpoint_prefix))
-- 
Gitee


From b4071d7bc0afd686cd62c55972fb1202771bbafc Mon Sep 17 00:00:00 2001
From: ykxia <xiayankang@outlook.com>
Date: Fri, 11 Nov 2022 14:31:02 +0800
Subject: [PATCH 9/9] =?UTF-8?q?PixelLink=5FID3056=5Ffor=5FTensorFlow?=
 =?UTF-8?q?=E9=80=82=E9=85=8DRT2.0+=E4=BA=8C=E8=BF=9B=E5=88=B6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../test/train_RT2_performance_1p.sh          | 171 ++++++++++++++++++
 1 file changed, 171 insertions(+)
 create mode 100644 TensorFlow/built-in/cv/image_classification/PixelLink_ID3056_for_TensorFlow/test/train_RT2_performance_1p.sh

diff --git a/TensorFlow/built-in/cv/image_classification/PixelLink_ID3056_for_TensorFlow/test/train_RT2_performance_1p.sh b/TensorFlow/built-in/cv/image_classification/PixelLink_ID3056_for_TensorFlow/test/train_RT2_performance_1p.sh
new file mode 100644
index 000000000..e6ec771ef
--- /dev/null
+++ b/TensorFlow/built-in/cv/image_classification/PixelLink_ID3056_for_TensorFlow/test/train_RT2_performance_1p.sh
@@ -0,0 +1,171 @@
+#!/bin/bash
+set -x
+#当前路径,不需要修改
+cur_path=`pwd`
+export PYTHONPATH=${cur_path}/../pylib/src:$PYTHONPATH
+#集合通信参数,不需要修改
+
+export RANK_SIZE=1
+export JOB_ID=10087
+RANK_ID_START=0
+RankSize=1
+
+#使能RT2.0
+export ENABLE_RUNTIME_V2=1
+
+# 数据集路径,保持为空,不需要修改
+data_path=""
+#export ASCEND_SLOG_PRINT_TO_STDOUT=1
+
+#基础参数，需要模型审视修改
+#网络名称，同目录名称
+Network="PixelLink_ID3056_for_TensorFlow"
+#训练epoch
+train_epochs=
+#训练batch_size
+batch_size=24
+#训练step
+train_steps=200
+#学习率
+learning_rate=
+
+#维测参数，precision_mode需要模型审视修改
+precision_mode="allow_fp32_to_fp16"
+#维持参数，以下不需要修改
+over_dump=False
+data_dump_flag=False
+data_dump_step="10"
+profiling=False
+
+# 帮助信息，不需要修改
+if [[ $1 == --help || $1 == -h ]];then
+    echo"usage:./train_performance_1P.sh <args>"
+    echo " "
+    echo "parameter explain:
+    --precision_mode         precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision)
+    --over_dump		           if or not over detection, default is False
+    --data_dump_flag		     data dump flag, default is False
+    --data_dump_step		     data dump step, default is 10
+    --profiling		           if or not profiling for performance debug, default is False
+	--data_path		           source data of training
+    -h/--help		             show help message
+    "
+    exit 1
+fi
+
+#参数校验，不需要修改
+for para in $*
+do
+    if [[ $para == --precision_mode* ]];then
+        precision_mode=`echo ${para#*=}`
+    elif [[ $para == --over_dump* ]];then
+        over_dump=`echo ${para#*=}`
+        over_dump_path=${cur_path}/output/overflow_dump
+        mkdir -p ${over_dump_path}
+    elif [[ $para == --data_dump_flag* ]];then
+        data_dump_flag=`echo ${para#*=}`
+        data_dump_path=${cur_path}/output/data_dump
+        mkdir -p ${data_dump_path}
+    elif [[ $para == --data_dump_step* ]];then
+        data_dump_step=`echo ${para#*=}`
+    elif [[ $para == --profiling* ]];then
+        profiling=`echo ${para#*=}`
+        profiling_dump_path=${cur_path}/output/profiling
+        mkdir -p ${profiling_dump_path}
+    elif [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    fi
+done
+
+#校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be confing"
+    exit 1
+fi
+
+#训练开始时间，不需要修改
+start_time=$(date +%s)
+
+#进入训练脚本目录，需要模型审视修改
+cd $cur_path/..
+
+for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++));
+do
+    #设置环境变量，不需要修改
+    echo "Device ID: $ASCEND_DEVICE_ID"
+    export RANK_ID=$RANK_ID
+
+    #创建DeviceID输出目录，不需要修改
+    if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then
+        rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID}
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    else
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    fi
+    
+    #执行训练脚本，以下传参不需要修改，其他需要模型审视修改
+    #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path，--data_dump_flag，--data_dump_step，--data_dump_path，--profiling，--profiling_dump_path
+    nohup python3 train_pixel_link.py \
+            --train_dir=./models/pixel_link \
+            --num_gpus=1 \
+            --learning_rate=1e-3 \
+            --gpu_memory_fraction=-1 \
+            --train_image_width=512 \
+            --train_image_height=512 \
+            --batch_size=${batch_size}\
+            --dataset_dir=${data_path} \
+            --dataset_name=icdar2015 \
+            --dataset_split_name=train \
+            --max_number_of_steps=${train_steps} \
+            --checkpoint_path=${CKPT_PATH} \
+            --using_moving_average=1 > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+done 
+wait
+
+#训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+#结果打印，不需要修改
+echo "------------------ Final result ------------------"
+
+#单迭代训练时长
+TrainingTime=`grep 'loss =' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log |awk -F "(" '{print $2}' |awk -F " " '{print $1}' |tail -10|awk '{sum+=$1}END {print"",sum/NR}'|sed s/[[:space:]]//g`
+# #输出性能FPS，需要模型审视修改
+FPS=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'/'${TrainingTime}'}'`
+#打印，不需要修改
+echo "Final Performance item/sec : $FPS"
+
+
+# #输出训练精度,需要模型审视修改
+#train_accuracy=`grep "test AUC" ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk '{print $3}'`
+# #打印，不需要修改
+#echo "Final Train Accuracy : ${train_accuracy}"
+echo "E2E Training Duration sec : $e2e_time"
+
+#性能看护结果汇总
+#训练用例信息，不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'RT2'_'perf'
+
+#吞吐量
+ActualFPS=${FPS}
+
+#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型审视
+grep 'loss =' $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F " " '{print $6}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
+
+#最后一个迭代loss值，不需要修改
+ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+#echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
-- 
Gitee