diff --git a/TensorFlow/built-in/cv/detection/FasterRcnn_resnet50_ID0010_for_TensorFlow/test/train_RT2__performance_8p.sh b/TensorFlow/built-in/cv/detection/FasterRcnn_resnet50_ID0010_for_TensorFlow/test/train_RT2__performance_8p.sh
new file mode 100644
index 0000000000000000000000000000000000000000..85d4d145f7d5b48b9225672fa3af0a4171f62996
--- /dev/null
+++ b/TensorFlow/built-in/cv/detection/FasterRcnn_resnet50_ID0010_for_TensorFlow/test/train_RT2__performance_8p.sh
@@ -0,0 +1,201 @@
+#!/bin/bash
+
+export JOB_ID=10000
+export ENABLE_RUNTIME_V2=1
+
+exec_mode='train' # or 'train_and_eval'
+eval_after_training=False
+
+backbone='resnet50'
+backbone_ckpt_path='/npu/traindata/resnet50_ckpt'
+data_path='/npu/traindata/coco_official_2017'
+
+batch_size=2
+steps=1000
+
+learning_rate_type='cosine' # or 'step'
+learning_rate=0.02
+warmup_learning_rate=0.0067
+warmup_steps=500
+learning_rate_levels='[0.002, 0.0002]'
+learning_rate_steps='[60000, 80000]'
+
+precision_mode='allow_mix_precision'
+loss_scale_flag=0
+loss_scale_value=256
+overflow_dump=False
+
+########## params from command line ##########
+
+for arg in $* ; do
+  if [ ${arg:0:2} == '--' ]; then
+    arg=${arg:2}
+    pos=`expr index "$arg" =`
+    if [ $pos > 0 ]; then
+      var_name=${arg:0:$pos-1}
+      var_value=${arg:$pos}
+      eval $var_name=$var_value
+    fi
+  fi
+done
+
+for para in $*
+do
+   if [[ $para == --bind_core* ]];then
+      bind_core=`echo ${para#*=}`
+      name_bind="_bindcore"
+   fi
+done
+
+if [ ! $output_dir ]; then
+  output_dir="`pwd`/output/"
+fi
+echo output_dir=$output_dir
+
+training_file_pattern=${training_file_pattern:-$data_path'/tfrecord/train*'}
+validation_file_pattern=${validation_file_pattern:-$data_path'/tfrecord/val*'}
+val_json_file=${val_json_file:-$data_path'/annotations/instances_val2017.json'}
+
+########## build params_override ##########
+
+unset params_override
+params_override=${params_override}backbone=$backbone,
+params_override=${params_override}checkpoint="'$backbone_ckpt_path'",
+params_override=${params_override}training_file_pattern="'$training_file_pattern'",
+params_override=${params_override}validation_file_pattern="'$validation_file_pattern'",
+params_override=${params_override}val_json_file="'$val_json_file'",
+params_override=${params_override}train_batch_size=$batch_size,
+params_override=${params_override}total_steps=$steps,
+params_override=${params_override}learning_rate_type=$learning_rate_type,
+params_override=${params_override}init_learning_rate=$learning_rate,
+params_override=${params_override}warmup_learning_rate=$warmup_learning_rate,
+params_override=${params_override}warmup_steps=$warmup_steps,
+params_override=${params_override}learning_rate_levels="'$learning_rate_levels'",
+params_override=${params_override}learning_rate_steps="'$learning_rate_steps'",
+params_override=${params_override}npu_precision_mode=$precision_mode,
+params_override=${params_override}npu_loss_scale_flag=$loss_scale_flag,
+params_override=${params_override}npu_loss_scale=$loss_scale_value,
+params_override=${params_override}npu_overflow_dump=$overflow_dump,
+
+echo [params_override] "$params_override"
+
+########## prepare environment ##########
+
+export RANK_SIZE=8
+export RANK_ID_START=0
+
+BASE_PATH=`cd $(dirname $0); pwd`/../FasterRcnn
+echo "BASE_PATH="$BASE_PATH
+
+export RANK_TABLE_FILE=$BASE_PATH/npu_config/8p.json
+
+rm -rf /root/ascend/log
+
+########## run ##########
+
+start_time=$(date +%s)
+
+pids=
+for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++));
+do
+  echo
+  /usr/local/Ascend/driver/tools/msnpureport -d $RANK_ID -g error
+
+  TMP_PATH=$output_dir/$RANK_ID
+  mkdir -p $TMP_PATH
+  cd $TMP_PATH
+
+  rm -f configs
+  ln -s $BASE_PATH/configs configs
+
+  export RANK_ID
+  export DEVICE_ID=$RANK_ID
+  export ASCEND_DEVICE_ID=$RANK_ID
+  export DEVICE_INDEX=$RANK_ID
+
+    corenum=`cat /proc/cpuinfo |grep 'processor' |wc -l`
+    let a=RANK_ID*${corenum}/8
+    let b=RANK_ID+1
+    let c=b*${corenum}/8-1
+    bind_core="taskset -c $a-$c"
+
+  ${bind_core} python3 $BASE_PATH/mask_rcnn_main.py --mode=$exec_mode \
+                                       --eval_after_training=$eval_after_training \
+                                       --model_dir=$TMP_PATH/result \
+                                       --num_gpus=$RANK_SIZE \
+                                       --params_override="$params_override" \
+                                       $@ 2>&1 | tee $TMP_PATH/train_${RANK_ID}.log &
+
+  pids[$RANK_ID-$RANK_ID_START]="$RANK_ID $!"
+  cd -
+done
+
+sleep 1
+echo "########## Waiting for pids: "${pids[*]}
+
+for pid in "${pids[@]}"; do
+  pid=($pid)
+  RANK_ID=${pid[0]}
+  pid=${pid[1]}
+
+  wait $pid
+  ret=$?
+  echo "******************** train finished ******************** $RANK_ID - $pid - ret : $ret"
+
+  ############################## E2E训练时长 ##############################
+  end_time=$(date +%s)
+  e2e_time=$(( $end_time - $start_time ))
+  echo "Final Training Duration sec : $e2e_time"
+
+  ############################## 业务日志 ##############################
+  grep ERROR /root/ascend/log/plog/plog-${pid}_*.log > $output_dir/$RANK_ID/plog_err.log
+
+  log_file=$output_dir/$RANK_ID/train_${RANK_ID}.log
+
+  ############################## 性能结果处理 ##############################
+  echo "-------------------- Final result --------------------"
+  #性能FPS计算，需要根据网络修改
+  FPS=`grep -a 'INFO:tensorflow:global_step/sec: ' $log_file|awk 'END {print $2}'`
+  FPS=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'*'${FPS}'*8}'`
+  echo "Final Performance images/sec : $FPS"
+
+  ############################## 精度结果处理 ##############################
+  #精度计算，需要根据网络修改
+  train_accuracy=`grep "Average Precision" $log_file | awk 'NR==1 {print $NF}'`
+  if [ $train_accuracy ]; then
+    echo "Final Training Accuracy mAP: $train_accuracy"
+  fi
+
+  ############################## 性能看护 ##############################
+
+  Network=FasterRcnn_resnet50_ID0010_for_TensorFlow
+
+  DeviceType=`uname -m`
+  CaseName=${Network}${name_bind}_${backbone}_bs${batch_size}_${RANK_SIZE}'p'_'perf'
+  ActualFPS=${FPS}
+  TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'*1000/'${FPS}'}'`
+
+  # 提取Loss到train_${CaseName}_loss.txt中，需要根据模型修改
+  grep "INFO:tensorflow:loss" $log_file|awk '{print $3}'|sed 's/,//g'|sed '/^$/d' >> $output_dir/$RANK_ID/train_${CaseName}_loss.txt
+
+  ActualLoss=`awk 'END {print}' $output_dir/7/train_${CaseName}_loss.txt`
+  echo "Network = ${Network}" > $output_dir/7/${CaseName}.log
+  echo "RankSize = ${RANK_SIZE}" >> $output_dir/7/${CaseName}.log
+  echo "BatchSize = ${batch_size}" >> $output_dir/7/${CaseName}.log
+  echo "DeviceType = ${DeviceType}" >> $output_dir/7/${CaseName}.log
+  echo "CaseName = ${CaseName}" >> $output_dir/7/${CaseName}.log
+  echo "ActualFPS = ${ActualFPS}" >> $output_dir/7/${CaseName}.log
+  echo "TrainingTime = ${TrainingTime}" >> $output_dir/7/${CaseName}.log
+  echo "ActualLoss = ${ActualLoss}" >> $output_dir/7/${CaseName}.log
+  echo "E2ETrainingTime = ${e2e_time}" >> $output_dir/7/${CaseName}.log
+  if [ $train_accuracy ]; then
+    echo "TrainAccuracy = ${train_accuracy}" >> $output_dir/7/${CaseName}.log
+  fi
+
+  #eval版本需求开发中，精度结果临时看护最终的loss
+  echo "Final Training Accuracy loss: $ActualLoss"
+done
+
+echo "########## copying slog ##########"
+cp -r /root/ascend/log/ $output_dir/slog
+echo "########## DONE copying slog ##########"
diff --git a/TensorFlow/built-in/cv/detection/FasterRcnn_resnet50_ID0010_for_TensorFlow/test/train_RT2_performance_1p.sh b/TensorFlow/built-in/cv/detection/FasterRcnn_resnet50_ID0010_for_TensorFlow/test/train_RT2_performance_1p.sh
new file mode 100644
index 0000000000000000000000000000000000000000..4c8193f82dc16974ac4f16282a8143264eadab15
--- /dev/null
+++ b/TensorFlow/built-in/cv/detection/FasterRcnn_resnet50_ID0010_for_TensorFlow/test/train_RT2_performance_1p.sh
@@ -0,0 +1,190 @@
+#!/bin/bash
+
+export JOB_ID=10000
+export ENABLE_RUNTIME_V2=1
+
+exec_mode='train' # or 'train_and_eval'
+eval_after_training=False
+
+backbone='resnet50'
+backbone_ckpt_path='/npu/traindata/resnet50_ckpt'
+data_path='/npu/traindata/coco_official_2017'
+
+batch_size=2
+steps=1000
+
+learning_rate_type='cosine' # or 'step'
+learning_rate=0.003
+warmup_learning_rate=0.00025
+warmup_steps=16000
+learning_rate_levels='[0.0003, 0.00003]'
+learning_rate_steps='[480000, 640000]'
+
+precision_mode='allow_mix_precision'
+loss_scale_flag=0
+loss_scale_value=256
+overflow_dump=False
+
+########## params from command line ##########
+
+for arg in $* ; do
+  if [ ${arg:0:2} == '--' ]; then
+    arg=${arg:2}
+    pos=`expr index "$arg" =`
+    if [ $pos > 0 ]; then
+      var_name=${arg:0:$pos-1}
+      var_value=${arg:$pos}
+      eval $var_name=$var_value
+    fi
+  fi
+done
+
+if [ ! $output_dir ]; then
+  output_dir="`pwd`/output/"
+fi
+echo output_dir=$output_dir
+
+training_file_pattern=${training_file_pattern:-$data_path'/tfrecord/train*'}
+validation_file_pattern=${validation_file_pattern:-$data_path'/tfrecord/val*'}
+val_json_file=${val_json_file:-$data_path'/annotations/instances_val2017.json'}
+
+########## build params_override ##########
+
+unset params_override
+params_override=${params_override}backbone=$backbone,
+params_override=${params_override}checkpoint="'$backbone_ckpt_path'",
+params_override=${params_override}training_file_pattern="'$training_file_pattern'",
+params_override=${params_override}validation_file_pattern="'$validation_file_pattern'",
+params_override=${params_override}val_json_file="'$val_json_file'",
+params_override=${params_override}train_batch_size=$batch_size,
+params_override=${params_override}total_steps=$steps,
+params_override=${params_override}learning_rate_type=$learning_rate_type,
+params_override=${params_override}init_learning_rate=$learning_rate,
+params_override=${params_override}warmup_learning_rate=$warmup_learning_rate,
+params_override=${params_override}warmup_steps=$warmup_steps,
+params_override=${params_override}learning_rate_levels="'$learning_rate_levels'",
+params_override=${params_override}learning_rate_steps="'$learning_rate_steps'",
+params_override=${params_override}npu_precision_mode=$precision_mode,
+params_override=${params_override}npu_loss_scale_flag=$loss_scale_flag,
+params_override=${params_override}npu_loss_scale=$loss_scale_value,
+params_override=${params_override}npu_overflow_dump=$overflow_dump,
+
+echo [params_override] "$params_override"
+
+########## prepare environment ##########
+
+export RANK_SIZE=1
+
+if [ ! $RANK_ID_START ]; then
+  if [ $ASCEND_DEVICE_ID ]; then
+    RANK_ID_START=$ASCEND_DEVICE_ID
+  elif [ $DEVICE_ID ]; then
+    RANK_ID_START=$DEVICE_ID
+  else
+    RANK_ID_START=0
+  fi
+fi
+export RANK_ID_START
+echo "RANK_ID_START="$RANK_ID_START
+
+BASE_PATH=`cd $(dirname $0); pwd`/../FasterRcnn
+echo "BASE_PATH="$BASE_PATH
+
+########## run ##########
+
+start_time=$(date +%s)
+
+pids=
+for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++));
+do
+  echo
+  /usr/local/Ascend/driver/tools/msnpureport -d $RANK_ID -g error
+
+  TMP_PATH=$output_dir/$RANK_ID
+  mkdir -p $TMP_PATH
+  cd $TMP_PATH
+
+  rm -f configs
+  ln -s $BASE_PATH/configs configs
+
+  export RANK_ID
+  export DEVICE_ID=$RANK_ID
+  export ASCEND_DEVICE_ID=$RANK_ID
+  export DEVICE_INDEX=$RANK_ID
+
+  python3 $BASE_PATH/mask_rcnn_main.py --mode=$exec_mode \
+                                       --eval_after_training=$eval_after_training \
+                                       --model_dir=$TMP_PATH/result \
+                                       --num_gpus=$RANK_SIZE \
+                                       --params_override="$params_override" \
+                                       $@ 2>&1 | tee $TMP_PATH/train_${RANK_ID}.log &
+
+  pids[$RANK_ID-$RANK_ID_START]="$RANK_ID $!"
+  cd -
+done
+
+sleep 1
+echo "########## Waiting for pids: "${pids[*]}
+
+for pid in "${pids[@]}"; do
+  pid=($pid)
+  RANK_ID=${pid[0]}
+  pid=${pid[1]}
+
+  wait $pid
+  ret=$?
+  echo "******************** train finished ******************** $RANK_ID - $pid - ret : $ret"
+
+  ############################## E2E训练时长 ##############################
+  end_time=$(date +%s)
+  e2e_time=$(( $end_time - $start_time ))
+  echo "Final Training Duration sec : $e2e_time"
+
+  ############################## 业务日志 ##############################
+  grep ERROR /root/ascend/log/plog/plog-${pid}_*.log > $output_dir/$RANK_ID/plog_err.log
+
+  log_file=$output_dir/$RANK_ID/train_${RANK_ID}.log
+
+  ############################## 性能结果处理 ##############################
+  echo "-------------------- Final result --------------------"
+  #性能FPS计算，需要根据网络修改
+  FPS=`grep -a 'INFO:tensorflow:global_step/sec: ' $log_file|awk 'END {print $2}'`
+  FPS=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'*'${FPS}'}'`
+  echo "Final Performance images/sec : $FPS"
+
+  ############################## 精度结果处理 ##############################
+  #精度计算，需要根据网络修改
+  train_accuracy=`grep "Average Precision" $log_file | awk 'NR==1 {print $NF}'`
+  if [ $train_accuracy ]; then
+    echo "Final Training Accuracy mAP: $train_accuracy"
+  fi
+
+  ############################## 性能看护 ##############################
+
+  Network=FasterRcnn_resnet50_ID0010_for_TensorFlow
+
+  DeviceType=`uname -m`
+  CaseName=${Network}_${backbone}_bs${batch_size}_${RANK_SIZE}'p'_'perf'
+  ActualFPS=${FPS}
+  TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'*1000/'${FPS}'}'`
+
+  # 提取Loss到train_${CaseName}_loss.txt中，需要根据模型修改
+  grep "INFO:tensorflow:loss" $log_file|awk '{print $3}'|sed 's/,//g'|sed '/^$/d' >> $output_dir/$RANK_ID/train_${CaseName}_loss.txt
+
+  ActualLoss=`awk 'END {print}' $output_dir/$RANK_ID/train_${CaseName}_loss.txt`
+  echo "Network = ${Network}" > $output_dir/$RANK_ID/${CaseName}.log
+  echo "RankSize = ${RANK_SIZE}" >> $output_dir/$RANK_ID/${CaseName}.log
+  echo "BatchSize = ${batch_size}" >> $output_dir/$RANK_ID/${CaseName}.log
+  echo "DeviceType = ${DeviceType}" >> $output_dir/$RANK_ID/${CaseName}.log
+  echo "CaseName = ${CaseName}" >> $output_dir/$RANK_ID/${CaseName}.log
+  echo "ActualFPS = ${ActualFPS}" >> $output_dir/$RANK_ID/${CaseName}.log
+  echo "TrainingTime = ${TrainingTime}" >> $output_dir/$RANK_ID/${CaseName}.log
+  echo "ActualLoss = ${ActualLoss}" >> $output_dir/$RANK_ID/${CaseName}.log
+  echo "E2ETrainingTime = ${e2e_time}" >> $output_dir/$RANK_ID/${CaseName}.log
+  if [ $train_accuracy ]; then
+    echo "TrainAccuracy = ${train_accuracy}" >> $output_dir/$RANK_ID/${CaseName}.log
+  fi
+
+  #eval版本需求开发中，精度结果临时看护最终的loss
+  echo "Final Training Accuracy loss: $ActualLoss"
+done
\ No newline at end of file
diff --git a/TensorFlow/built-in/cv/detection/MaskRcnn_ID0011_for_TensorFlow/test/train_RT2_performance_1p.sh b/TensorFlow/built-in/cv/detection/MaskRcnn_ID0011_for_TensorFlow/test/train_RT2_performance_1p.sh
new file mode 100644
index 0000000000000000000000000000000000000000..3f0ce9cc497e67ba3663906d0c8efcae721d8d11
--- /dev/null
+++ b/TensorFlow/built-in/cv/detection/MaskRcnn_ID0011_for_TensorFlow/test/train_RT2_performance_1p.sh
@@ -0,0 +1,224 @@
+#!/bin/bash
+
+#当前路径,不需要修改
+cur_path=`pwd`
+
+export PYTHONWARNINGS='ignore:semaphore_tracker:UserWarning'
+
+#集合通信参数,不需要修改
+#保证rank table file 文件rank_table_8p.json存放在和test同级的configs目录下
+export JOB_ID=9999001
+export RANK_SIZE=1
+export ENABLE_RUNTIME_V2=1
+#export RANK_TABLE_FILE=${cur_path}/../configs/rank_table_8p.json
+RANK_ID_START=0
+
+
+# 数据集路径,保持为空,不需要修改
+data_path=""
+
+#设置默认日志级别,不需要修改
+export ASCEND_GLOBAL_LOG_LEVEL=3
+
+#基础参数 需要模型审视修改
+#网络名称，同目录名称
+Network="MaskRcnn_ID0011_for_TensorFlow"
+
+batch_size=2
+total_steps=20
+
+#TF2.X独有，不需要修改
+#export NPU_LOOP_SIZE=${train_steps}
+
+#维测参数，precision_mode需要模型审视修改
+precision_mode="allow_mix_precision"
+#维持参数，以下不需要修改
+over_dump=False
+data_dump_flag=False
+data_dump_step="10"
+profiling=False
+autotune=False
+
+# 帮助信息，不需要修改
+if [[ $1 == --help || $1 == -h ]];then
+    echo"usage:./train_full_8p.sh <args>"
+    echo " "
+    echo "parameter explain:
+    --precision_mode           precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision)
+    --over_dump		           if or not over detection, default is False
+    --data_dump_flag		   data dump flag, default is 0
+    --data_dump_step		   data dump step, default is 10
+    --profiling		           if or not profiling for performance debug, default is False
+    --autotune                 whether to enable autotune, default is False
+    --data_path		           source data of training
+    -h/--help		           show help message
+    "
+    exit 1
+fi
+
+#参数校验，不需要修改
+for para in $*
+do
+    if [[ $para == --precision_mode* ]];then
+        precision_mode=`echo ${para#*=}`
+    elif [[ $para == --over_dump* ]];then
+        over_dump=`echo ${para#*=}`
+        over_dump_path=${cur_path}/output/overflow_dump
+        mkdir -p ${over_dump_path}
+    elif [[ $para == --data_dump_flag* ]];then
+        data_dump_flag=`echo ${para#*=}`
+        data_dump_path=${cur_path}/output/data_dump
+        mkdir -p ${data_dump_path}
+    elif [[ $para == --data_dump_step* ]];then
+        data_dump_step=`echo ${para#*=}`
+    elif [[ $para == --profiling* ]];then
+        profiling=`echo ${para#*=}`
+        profiling_dump_path=${cur_path}/output/profiling
+        mkdir -p ${profiling_dump_path}
+    elif [[ $para == --autotune* ]];then
+        autotune=`echo ${para#*=}`
+        mv $install_path/fwkacllib/data/rl/Ascend910/custom $install_path/fwkacllib/data/rl/Ascend910/custom_bak
+        mv $install_path/fwkacllib/data/tiling/Ascend910/custom $install_path/fwkacllib/data/tiling/Ascend910/custom_bak
+        autotune_dump_path=${cur_path}/output/autotune_dump
+        mkdir -p ${autotune_dump_path}/GA
+        mkdir -p ${autotune_dump_path}/rl
+        cp -rf $install_path/fwkacllib/data/tiling/Ascend910/custom ${autotune_dump_path}/GA/
+        cp -rf $install_path/fwkacllib/data/rl/Ascend910/custom ${autotune_dump_path}/RL/
+    elif [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    elif [[ $para == --bind_core* ]]; then
+        bind_core=`echo ${para#*=}`
+        name_bind="_bindcore"
+    fi
+done
+
+#校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be confing"
+    exit 1
+fi
+
+#autotune时，先开启autotune执行单P训练，不需要修改
+if [[ $autotune == True ]]; then
+    train_full_1p.sh --autotune=$autotune --data_path=$data_path
+    wait
+    autotune=False
+fi
+
+#修改save ckpt,print
+sed -i "s|save_checkpoints_steps=90000|save_checkpoints_steps=${total_steps}|g" $cur_path/../distributed_executer.py
+sed -i "s|log_step_count_steps=100|log_step_count_steps=1|g" $cur_path/../distributed_executer.py
+
+#训练开始时间，不需要修改
+start_time=$(date +%s)
+
+#进入训练脚本目录，需要模型审视修改
+cd $cur_path/../
+for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++));
+do
+    #设置环境变量，不需要修改
+    echo "Device ID: $RANK_ID"
+    export RANK_ID=$RANK_ID
+	
+	# 自行添加环境变量
+
+	export DEVICE_ID=$RANK_ID
+	DEVICE_INDEX=$DEVICE_ID
+    export DEVICE_INDEX=${DEVICE_INDEX}
+	export FUSION_TENSOR_SIZE=1000000000
+    # for producible results
+    export TF_DETERMINISTIC_OPS=1
+    export TF_CUDNN_DETERMINISM=1
+    
+    #创建DeviceID输出目录，不需要修改
+    if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then
+        rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID}
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    else
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    fi
+    
+    
+
+    #执行训练脚本，以下传参不需要修改，其他需要模型审视修改
+    #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path，--data_dump_flag，--data_dump_step，--data_dump_path，--profiling，--profiling_dump_path
+    corenum=`cat /proc/cpuinfo |grep 'processor' |wc -l`
+    let a=RANK_ID*${corenum}/8
+    let b=RANK_ID+1
+    let c=b*${corenum}/8-1
+    if [ "x${bind_core}" != x ];then
+        bind_core="taskset -c $a-$c"
+    fi
+    ${bind_core} python3 mask_rcnn_main.py --mode=train \
+        --rank=$RANK_ID \
+        --total_steps=$total_steps \
+        --Data_path=$data_path \
+        --train_batch_size=2 \
+        --training_file_pattern=${data_path}/train* \
+        --validation_file_pattern=${data_path}/val* \
+        --val_json_file=${data_path}/instances_val2017.json \
+        --eval_batch_size=2 \
+        --model_dir=result_npu\
+        --over_dump=${over_dump} \
+        --over_dump_path=${over_dump_path} \
+        > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+        #--data_dump_flag=${data_dump_flag} \
+        #--data_dump_step=${data_dump_step} \
+        #--data_dump_path=${data_dump_path} \
+        #--profiling=${profiling} \
+        #--profiling_dump_path=${profiling_dump_path} \
+        #--autotune=${autotune} > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+done 
+wait
+
+#训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+#参数回改
+sed -i "s|save_checkpoints_steps=${total_steps}|save_checkpoints_steps=90000|g" $cur_path/../distributed_executer.py
+sed -i "s|log_step_count_steps=1|log_step_count_steps=100|g" $cur_path/../distributed_executer.py
+
+#结果打印，不需要修改
+echo "------------------ Final result ------------------"
+#输出性能FPS，需要模型审视修改
+FPSper=`grep "] global_step/sec:" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk '{print $6}'|tail -n +3|awk '{sum+=$1} END {print  sum/NR}'`
+FPS=`awk 'BEGIN{printf "%f\n",'${batch_size}'*'${RANK_SIZE}'*'${FPSper}'}'`
+#打印，不需要修改
+echo "Final Performance images/sec : $FPS"
+
+#输出训练精度,需要模型审视修改
+train_accuracy=`grep "Average Precision" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|head -1|awk '{print $13}'`
+#打印，不需要修改
+echo "Final Train Accuracy : ${train_accuracy}"
+echo "E2E Training Duration sec : $e2e_time"
+
+#稳定性精度看护结果汇总
+#训练用例信息，不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZE}'p_RT2_perf'
+
+##获取性能数据
+#吞吐量，不需要修改
+ActualFPS=${FPS}
+#单迭代训练时长，不需要修改
+TrainingTime=`awk 'BEGIN{printf "%.2f\n",1/'${FPSper}'}'`
+
+#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型审视
+grep "] loss =" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk '{print $7}'|cut -d , -f 1 > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
+
+#最后一个迭代loss值，不需要修改
+ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+#echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
diff --git a/TensorFlow/built-in/cv/detection/MaskRcnn_ID0011_for_TensorFlow/test/train_RT2_performance_8p.sh b/TensorFlow/built-in/cv/detection/MaskRcnn_ID0011_for_TensorFlow/test/train_RT2_performance_8p.sh
new file mode 100644
index 0000000000000000000000000000000000000000..b6fcf9836eb37f49bb77e7d92583602e6db6d4cb
--- /dev/null
+++ b/TensorFlow/built-in/cv/detection/MaskRcnn_ID0011_for_TensorFlow/test/train_RT2_performance_8p.sh
@@ -0,0 +1,228 @@
+#!/bin/bash
+
+#当前路径,不需要修改
+cur_path=`pwd`
+
+export PYTHONWARNINGS='ignore:semaphore_tracker:UserWarning'
+
+#集合通信参数,不需要修改
+#保证rank table file 文件rank_table_8p.json存放在和test同级的configs目录下
+export JOB_ID=9999001
+export RANK_SIZE=8
+export RANK_TABLE_FILE=${cur_path}/../configs/rank_table_8p.json
+export ENABLE_RUNTIME_V2=1
+
+RANK_ID_START=0
+
+
+# 数据集路径,保持为空,不需要修改
+data_path=""
+
+#设置默认日志级别,不需要修改
+export ASCEND_GLOBAL_LOG_LEVEL=3
+
+#基础参数 需要模型审视修改
+#网络名称，同目录名称
+Network="MaskRcnn_ID0011_for_TensorFlow"
+
+batch_size=2
+total_steps=20
+
+#TF2.X独有，不需要修改
+#export NPU_LOOP_SIZE=${train_steps}
+
+#维测参数，precision_mode需要模型审视修改
+precision_mode="allow_mix_precision"
+#维持参数，以下不需要修改
+over_dump=False
+data_dump_flag=False
+data_dump_step="10"
+profiling=False
+autotune=False
+
+# 帮助信息，不需要修改
+if [[ $1 == --help || $1 == -h ]];then
+    echo"usage:./train_full_8p.sh <args>"
+    echo " "
+    echo "parameter explain:
+    --precision_mode           precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision)
+    --over_dump		           if or not over detection, default is False
+    --data_dump_flag		   data dump flag, default is 0
+    --data_dump_step		   data dump step, default is 10
+    --profiling		           if or not profiling for performance debug, default is False
+    --autotune                 whether to enable autotune, default is False
+    --data_path		           source data of training
+    -h/--help		           show help message
+    "
+    exit 1
+fi
+
+#参数校验，不需要修改
+for para in $*
+do
+    if [[ $para == --precision_mode* ]];then
+        precision_mode=`echo ${para#*=}`
+    elif [[ $para == --over_dump* ]];then
+        over_dump=`echo ${para#*=}`
+        over_dump_path=${cur_path}/output/overflow_dump
+        mkdir -p ${over_dump_path}
+    elif [[ $para == --data_dump_flag* ]];then
+        data_dump_flag=`echo ${para#*=}`
+        data_dump_path=${cur_path}/output/data_dump
+        mkdir -p ${data_dump_path}
+    elif [[ $para == --data_dump_step* ]];then
+        data_dump_step=`echo ${para#*=}`
+    elif [[ $para == --profiling* ]];then
+        profiling=`echo ${para#*=}`
+        profiling_dump_path=${cur_path}/output/profiling
+        mkdir -p ${profiling_dump_path}
+    elif [[ $para == --autotune* ]];then
+        autotune=`echo ${para#*=}`
+        mv $install_path/fwkacllib/data/rl/Ascend910/custom $install_path/fwkacllib/data/rl/Ascend910/custom_bak
+        mv $install_path/fwkacllib/data/tiling/Ascend910/custom $install_path/fwkacllib/data/tiling/Ascend910/custom_bak
+        autotune_dump_path=${cur_path}/output/autotune_dump
+        mkdir -p ${autotune_dump_path}/GA
+        mkdir -p ${autotune_dump_path}/rl
+        cp -rf $install_path/fwkacllib/data/tiling/Ascend910/custom ${autotune_dump_path}/GA/
+        cp -rf $install_path/fwkacllib/data/rl/Ascend910/custom ${autotune_dump_path}/RL/
+    elif [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    elif [[ $para == --bind_core* ]]; then
+        bind_core=`echo ${para#*=}`
+        name_bind="_bindcore"
+    fi
+done
+
+#校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be confing"
+    exit 1
+fi
+
+#autotune时，先开启autotune执行单P训练，不需要修改
+if [[ $autotune == True ]]; then
+    train_full_1p.sh --autotune=$autotune --data_path=$data_path
+    wait
+    autotune=False
+fi
+
+#修改save ckpt,print
+sed -i "s|save_checkpoints_steps=90000|save_checkpoints_steps=${total_steps}|g" $cur_path/../distributed_executer.py
+sed -i "s|log_step_count_steps=100|log_step_count_steps=1|g" $cur_path/../distributed_executer.py
+
+#训练开始时间，不需要修改
+start_time=$(date +%s)
+
+#进入训练脚本目录，需要模型审视修改
+cd $cur_path/../
+for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++));
+do
+    #设置环境变量，不需要修改
+    echo "Device ID: $RANK_ID"
+    export RANK_ID=$RANK_ID
+    export ASCEND_DEVICE_ID=$RANK_ID
+    ASCEND_DEVICE_ID=$RANK_ID
+	
+	# 自行添加环境变量
+
+	export DEVICE_ID=$RANK_ID
+	DEVICE_INDEX=$DEVICE_ID
+    export DEVICE_INDEX=${DEVICE_INDEX}
+	export FUSION_TENSOR_SIZE=1000000000
+    # for producible results
+    export TF_DETERMINISTIC_OPS=1
+    export TF_CUDNN_DETERMINISM=1
+    
+    #创建DeviceID输出目录，不需要修改
+    if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then
+        rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID}
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    else
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    fi
+    
+    
+
+    #执行训练脚本，以下传参不需要修改，其他需要模型审视修改
+    #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path，--data_dump_flag，--data_dump_step，--data_dump_path，--profiling，--profiling_dump_path
+    corenum=`cat /proc/cpuinfo |grep 'processor' |wc -l`
+    let a=RANK_ID*${corenum}/8
+    let b=RANK_ID+1
+    let c=b*${corenum}/8-1
+    if [ "x${bind_core}" != x ];then
+        bind_core="taskset -c $a-$c"
+    fi
+    ${bind_core} python3 mask_rcnn_rt_main.py --mode=train \
+        --rank=$RANK_ID \
+        --total_steps=$total_steps \
+        --Data_path=$data_path \
+        --train_batch_size=2 \
+        --training_file_pattern=${data_path}/train* \
+        --validation_file_pattern=${data_path}/val* \
+        --val_json_file=${data_path}/instances_val2017.json \
+        --eval_batch_size=2 \
+        --model_dir=result_npu\
+        --over_dump=${over_dump} \
+        --over_dump_path=${over_dump_path} \
+        > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+        #--data_dump_flag=${data_dump_flag} \
+        #--data_dump_step=${data_dump_step} \
+        #--data_dump_path=${data_dump_path} \
+        #--profiling=${profiling} \
+        #--profiling_dump_path=${profiling_dump_path} \
+        #--autotune=${autotune} > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+done 
+wait
+
+#训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+#参数回改
+sed -i "s|save_checkpoints_steps=${total_steps}|save_checkpoints_steps=90000|g" $cur_path/../distributed_executer.py
+sed -i "s|log_step_count_steps=1|log_step_count_steps=100|g" $cur_path/../distributed_executer.py
+
+#结果打印，不需要修改
+echo "------------------ Final result ------------------"
+#输出性能FPS，需要模型审视修改
+#FPSper=`grep "] global_step/sec:" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $6}'`
+FPSper=`grep "] global_step/sec:" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk '{print $6}'|tail -n 10|awk '{sum+=$1} END {print sum/NR}'`
+FPS=`awk 'BEGIN{printf "%f\n",'${batch_size}'*'${RANK_SIZE}'*'${FPSper}'}'`
+#打印，不需要修改
+echo "Final Performance images/sec : $FPS"
+
+#输出训练精度,需要模型审视修改
+train_accuracy=`grep "Average Precision" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|head -1|awk '{print $13}'`
+#打印，不需要修改
+echo "Final Train Accuracy : ${train_accuracy}"
+echo "E2E Training Duration sec : $e2e_time"
+
+#稳定性精度看护结果汇总
+#训练用例信息，不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZE}'p_RT2_perf'
+
+##获取性能数据
+#吞吐量，不需要修改
+ActualFPS=${FPS}
+#单迭代训练时长，不需要修改
+TrainingTime=`awk 'BEGIN{printf "%.2f\n",1/'${FPSper}'}'`
+
+#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型审视
+grep "] loss =" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk '{print $7}'|cut -d , -f 1 > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
+
+#最后一个迭代loss值，不需要修改
+ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+#echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
diff --git a/TensorFlow/built-in/cv/image_classification/Face-ResNet50_ID1372_for_TensorFlow/TrainResNet_rt.py b/TensorFlow/built-in/cv/image_classification/Face-ResNet50_ID1372_for_TensorFlow/TrainResNet_rt.py
new file mode 100644
index 0000000000000000000000000000000000000000..976408b01f27d5afffe97fa7fd9efd6e58721398
--- /dev/null
+++ b/TensorFlow/built-in/cv/image_classification/Face-ResNet50_ID1372_for_TensorFlow/TrainResNet_rt.py
@@ -0,0 +1,264 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# Copyright 2019 Google LLC
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Used to train ResNet-50
+Author: Kaihua Tang
+"""
+#npu modify begin
+from npu_bridge.npu_init import *
+#npu modify end
+import argparse
+import math
+import time
+import tensorflow as tf
+import ResNet as resnet
+import numpy as np
+import scipy.io as scio
+from scipy import misc
+from utils import *
+
+def parse_args():
+	desc = "MAIN"
+	parser = argparse.ArgumentParser(description=desc)
+	parser.add_argument('--label_path', type=str, default='./label/label_1200.npy', help='Path of Label.npy')
+	parser.add_argument('--image_name_path', type=str, default='./label/name_1200.npy', help='Path of image file names')
+	############################add train_data_path##################################
+	parser.add_argument('--train_data_path', type=str, default='./train_data/1200_data.npy', help='Path of train data')
+	############################add train_data_path##################################
+	parser.add_argument('--parentPath', type=str, default='./CACD2000_Crop/', help='image path')
+	parser.add_argument('--epochs', type=int, default=100, help='NUM_EPOCHS')
+	return parser.parse_args()
+args = parse_args()
+
+# image size
+WIDTH = 224
+HEIGHT = 224
+CHANNELS = 3
+#"Mini batch size"
+MINI_BATCH_SIZE = 32
+#"Path of Label.npy"
+label_path = args.label_path
+#"Path of image file names"
+image_name_path = args.image_name_path
+# image path
+parentPath = args.parentPath
+# train data Path: n * 224 * 224 * 3 numpy matrix
+data_path = args.train_data_path
+
+def dataset_generator(image, label):
+    for i in range(image.shape[0]):
+        yield image[i], label[i]-1
+
+def make_dataset(allImageData, trainLabelList, batch_size, epoch):
+    ds = tf.data.Dataset.from_generator(lambda: dataset_generator(allImageData, trainLabelList),
+                                        (tf.float32, tf.int32),
+                                        (tf.TensorShape([WIDTH, HEIGHT, CHANNELS]), tf.TensorShape([]))
+                                        )
+    ds = ds.shuffle(buffer_size=100971)
+    ds = ds.batch(batch_size)
+    ds = ds.repeat(epoch+1)
+    ds = ds.prefetch(buffer_size=tf.contrib.data.AUTOTUNE)
+    return ds
+
+def Train(epochs=100):
+    """
+    HyperParameters of the Net
+    model_path: path of pretrained model, set None if there is no such a model.
+    LABELSNUM: Number of output labels
+    learning_rate_orig : original learning rate
+    NUM_EPOCHS: number of epochs
+    save_frequency: frequency of saving model (number of epoches)
+    """
+    model_path = None
+    LABELSNUM = 1200
+    learning_rate_orig = 1e-06
+    NUM_EPOCHS = epochs
+    save_frequency = 2
+    """
+    Classification Layer
+    final_layer_type: softmax or sigmoid
+    is_sparse: when final layer is softmax, is it sparse
+    """
+    final_layer_type ="softmax"
+    is_sparse = True
+    """
+    Tensorboard Setting
+    tensorboard_on: Turn on Tensorboard or not
+    TensorBoard_refresh: refresh rate (number of batches)
+    monitoring_rate: Print output rate
+    """
+    tensorboard_on = False
+    TensorBoard_refresh = 50
+    monitoring_rate = 50
+
+    #Lists that store name of image and its label
+    trainNameList = np.load(image_name_path)
+    trainLabelList = np.load(label_path)
+    if(data_path is None):
+        allImageData = load_all_image(trainNameList, HEIGHT, WIDTH, CHANNELS, parentPath, create_npy=True)
+    else:
+        allImageData = np.load(data_path)
+
+    #num of total training image
+    num_train_image = trainLabelList.shape[0]
+
+    #############npu modify start###############
+    global_config = tf.ConfigProto()
+    custom_op = global_config.graph_options.rewrite_options.custom_optimizers.add()
+    custom_op.name = "NpuOptimizer"
+    #custom_op.parameter_map["dynamic_input"].b = 1
+    #custom_op.parameter_map["dynamic_graph_execute_mode"].s = tf.compat.as_bytes("lazy_recompile")
+    global_config.graph_options.rewrite_options.remapping = RewriterConfig.OFF
+    global_config.graph_options.rewrite_options.memory_optimization = RewriterConfig.OFF
+    #with tf.Session() as sess:
+    with tf.Session(config=global_config) as sess:
+        train_dataset = make_dataset(allImageData, trainLabelList, MINI_BATCH_SIZE, NUM_EPOCHS)
+        iterator = train_dataset.make_initializable_iterator()
+        next_element = iterator.get_next()
+    #############npu modify end###############
+        images = tf.placeholder(tf.float32, shape = [None, WIDTH, HEIGHT, CHANNELS])
+        if(is_sparse):
+            labels = tf.placeholder(tf.int64, shape = [None])
+        else:
+            labels = tf.placeholder(tf.float32, shape = [None, LABELSNUM])
+
+        # build resnet model
+        resnet_model = resnet.ResNet(ResNet_npy_path = model_path)
+        resnet_model.build(images, LABELSNUM, final_layer_type)
+        # number of batches per epoch
+        # num_minibatches = int(num_train_image / MINI_BATCH_SIZE)
+        num_minibatches = math.ceil(num_train_image / MINI_BATCH_SIZE)
+
+        # cost function
+        # learning_rate = learning_rate_orig
+        with tf.name_scope("cost"):
+            if(final_layer_type == "sigmoid"):
+                print("Using weighted sigmoid loss")
+                loss = tf.nn.weighted_cross_entropy_with_logits(logits = resnet_model.fc1, targets = labels, pos_weight = 5.0)
+            elif(final_layer_type == "softmax" and is_sparse):
+                print("Using sparse softmax loss")
+                loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits = resnet_model.fc1, labels = labels)
+            elif(final_layer_type == "softmax" and (not is_sparse)):
+                print("Using softmax loss")
+                loss = tf.nn.softmax_cross_entropy_with_logits(logits = resnet_model.fc1, labels = labels)
+            cost = tf.reduce_sum(loss)
+        with tf.name_scope("train"):
+            global_steps = tf.Variable(0, name='global_step', trainable=False)
+            learning_rate = tf.train.exponential_decay(learning_rate_orig, global_steps, num_minibatches * 40, 0.1, staircase = True)
+            #train = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
+            #train = tf.train.AdamOptimizer(learning_rate).minimize(cost)
+            #npu modify begin
+            train = tf.train.MomentumOptimizer(learning_rate, 0.9).minimize(cost, global_step=global_steps)
+            # train = npu_tf_optimizer(tf.train.MomentumOptimizer(learning_rate, 0.9)).minimize(cost, global_step=global_steps)
+            #npu modify end
+
+        sess.run(tf.global_variables_initializer())
+        sess.run(iterator.initializer)
+        print(resnet_model.get_var_count())
+
+        if(tensorboard_on):
+            merged_summary = tf.summary.merge_all()
+            writer = tf.summary.FileWriter("./TensorBoard/Result")
+            writer.add_graph(sess.graph)
+            # used in tensorboard to count record times
+            summary_times = 0
+
+        for epoch in range(NUM_EPOCHS):
+            print("Start Epoch %i" % (epoch + 1))
+            start_time = time.time()
+            minibatch_cost = 0.0
+            # count the number of batch
+            # batch_index = 0
+            # get index for all mini batches
+            # minibatches = random_mini_batches(num_train_image, MINI_BATCH_SIZE, random = True)
+
+            # for minibatch in minibatches:
+            for batch_index in range(num_minibatches):
+                # get train examples from each mini batch
+                # (minibatch_X, minibatch_Y) = get_minibatch(minibatch, trainLabelList, HEIGHT, WIDTH, CHANNELS, LABELSNUM, allImageData, is_sparse)
+                (minibatch_X, minibatch_Y) = sess.run(next_element)
+                # change learning rate
+                print('======================',(sess.run(global_steps)))
+				#sess.run(global_steps.assign(epoch * num_minibatches + batch_index))
+
+                # record examples to monitoring the training process
+                if((batch_index % monitoring_rate == 0)):
+                    resnet_model.set_is_training(False)
+                    fc1, prob = sess.run([resnet_model.fc1, resnet_model.prob], feed_dict={images: minibatch_X})
+                    countMax = np.sum(np.argmax(prob,1) == minibatch_Y)
+                    print("Epoch %i Batch %i Before Optimization Count %i" %(epoch + 1,batch_index, countMax))
+
+                # Training and calculating cost
+                resnet_model.set_is_training(True)
+                temp_cost, _ = sess.run([cost, train], feed_dict={images: minibatch_X, labels: minibatch_Y})
+                minibatch_cost += np.sum(temp_cost)
+
+                # tensorboard
+                if(tensorboard_on) and (batch_index % TensorBoard_refresh == 0):
+                    s = sess.run(merged_summary, feed_dict={images: minibatch_X, labels: minibatch_Y})
+                    writer.add_summary(s, summary_times)
+                    summary_times = summary_times + 1
+                    # record cost in tensorflow
+                    tf.summary.scalar('cost', temp_cost)
+
+                # record examples to monitoring the training process
+                if((batch_index % monitoring_rate == 0)):
+                    resnet_model.set_is_training(False)
+                    fc1, prob = sess.run([resnet_model.fc1, resnet_model.prob], feed_dict={images: minibatch_X})
+                    countMax = np.sum(np.argmax(prob,1) == minibatch_Y)
+                    print("Epoch %i Batch %i After Optimization Count %i" %(epoch + 1,batch_index, countMax))
+                    # Temp Cost & learning rate
+                    print("Epoch %i Batch %i Batch Cost %f Learning_rate %f" %(epoch + 1,batch_index, np.sum(temp_cost), sess.run(learning_rate) * 1e10))
+
+                # batch_index += 1
+
+            end_time = time.time()
+            print("steps_per_s: ", str(num_train_image/(end_time - start_time)/MINI_BATCH_SIZE))
+            # print total cost of this epoch
+            print("End Epoch %i" % (epoch + 1))
+            print("Total cost of Epoch %f" % minibatch_cost)
+
+            # save model
+            if((epoch + 1) % save_frequency == 0):
+                resnet_model.save_npy(sess, './model/temp-model%i.npy' % (epoch + 1))
+
+if __name__ == '__main__':
+    Train(args.epochs)
diff --git a/TensorFlow/built-in/cv/image_classification/Face-ResNet50_ID1372_for_TensorFlow/test/train_RT2_performance_1p.sh b/TensorFlow/built-in/cv/image_classification/Face-ResNet50_ID1372_for_TensorFlow/test/train_RT2_performance_1p.sh
new file mode 100644
index 0000000000000000000000000000000000000000..31cd52203451e8d1919c8e2e52e30db591839255
--- /dev/null
+++ b/TensorFlow/built-in/cv/image_classification/Face-ResNet50_ID1372_for_TensorFlow/test/train_RT2_performance_1p.sh
@@ -0,0 +1,117 @@
+#!/bin/bash
+
+cur_path=`pwd`/../
+#失败用例打屏
+#export ASCEND_SLOG_PRINT_TO_STDOUT=1
+export ENABLE_RUNTIME_V2=1
+
+#基础参数，需要模型审视修改
+#Batch Size
+batch_size=32
+#网络名称，同目录名称
+Network="Face-ResNet50_ID1372_for_TensorFlow"
+#Device数量，单卡默认为1
+RANK_SIZE=1
+#训练epoch，可选
+train_epochs=2
+#训练step
+train_steps=
+#学习率
+learning_rate=
+
+#参数配置
+data_path=""
+#work_dir="$cur_path/estimator_working_dir"
+#export_path="$cur_path/outputs/models/000001-first_generation"
+
+if [[ $1 == --help || $1 == --h ]];then
+   echo "usage:./train_performance_1p.sh"
+   exit 1
+fi
+
+for para in $*
+do
+   if [[ $para == --data_path* ]];then
+      data_path=`echo ${para#*=}`
+   fi
+done
+
+if [[ $data_path  == "" ]];then
+   echo "[Error] para \"data_path\" must be config"
+   exit 1
+fi
+##############执行训练##########
+cd $cur_path
+if [ -d $cur_path/test/output ];then
+   rm -rf $cur_path/test/output/*
+   mkdir -p $cur_path/test/output/$ASCEND_DEVICE_ID
+else
+   mkdir -p $cur_path/test/output/$ASCEND_DEVICE_ID
+fi
+wait
+
+#sed -i "s|./CACD2000_Crop/|${data_path}/|g" TrainResNet.py
+#sed -i "s|./label|${data_path}/label|g" TrainResNet.py
+
+start=$(date +%s)
+nohup python3 TrainResNet_rt.py \
+				--label_path ${data_path}/label/label_1200.npy \
+				--image_name_path ${data_path}/label/name_1200.npy \
+                --train_data_path ${data_path}/train_data/1200_data.npy \
+				--parentPath ${data_path}/CACD2000_Crop/ \
+				--epochs 2 > $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 &
+wait
+end=$(date +%s)
+e2e_time=$(( $end - $start ))
+
+#结果打印，不需要修改
+echo "------------------ Final result ------------------"
+
+
+#输出性能FPS，需要模型审视修改
+steps_per_s=`grep steps_per_s ${cur_path}/test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk 'END{print $2}'`
+FPS=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'*'${steps_per_s}'}'`
+#打印，不需要修改
+echo "Final Performance images/sec : $FPS"
+
+
+#输出训练精度,需要模型审视修改
+train_accuracy="None"
+#打印，不需要修改
+echo "Final Train Accuracy : ${train_accuracy}"
+echo "E2E Training Duration sec : $e2e_time"
+
+#性能看护结果汇总
+#训练用例信息，不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p_RT2_perf'
+
+
+##获取性能数据，不需要修改
+#吞吐量
+ActualFPS=${FPS}
+
+
+#单迭代训练时长
+TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${BatchSize}'/'${FPS}'}'`
+
+
+#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型审视
+grep Cost $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log | awk '{print $7}' > $cur_path/test/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
+
+
+#最后一个迭代loss值(Read-Only)
+ActualLoss=`awk 'END {print}' $cur_path/test/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
+
+#关键信息打印到${CaseName}.log中(Read-Only)
+echo "Network = ${Network}"                 > $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}"              >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}"             >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}"           >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}"               >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}"             >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}"       >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainAccuracy = ${train_accuracy}"    >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}"           >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}"        >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
\ No newline at end of file
diff --git a/TensorFlow/built-in/cv/image_classification/MiniGo_ID0629_for_TensorFlow/dual_net_rt.py b/TensorFlow/built-in/cv/image_classification/MiniGo_ID0629_for_TensorFlow/dual_net_rt.py
new file mode 100644
index 0000000000000000000000000000000000000000..90b8f4228b3c89b46a0ce3c3eb21ec24324b4500
--- /dev/null
+++ b/TensorFlow/built-in/cv/image_classification/MiniGo_ID0629_for_TensorFlow/dual_net_rt.py
@@ -0,0 +1,850 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# Copyright 2018 Google LLC
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+The policy and value networks share a majority of their architecture.
+This helps the intermediate layers extract concepts that are relevant to both
+move prediction and score estimation.
+"""
+from npu_bridge.npu_init import *
+
+from absl import flags
+import functools
+import json
+import logging
+import os.path
+import struct
+import tempfile
+import time
+import numpy as np
+import random
+
+import tensorflow as tf
+from tensorflow.contrib import cluster_resolver as contrib_cluster_resolver
+from tensorflow.contrib import quantize as contrib_quantize
+from tensorflow.contrib import summary as contrib_summary
+from tensorflow.contrib import tpu as contrib_tpu
+from tensorflow.contrib.tpu.python.tpu import tpu_config as contrib_tpu_python_tpu_tpu_config
+from tensorflow.contrib.tpu.python.tpu import tpu_estimator as contrib_tpu_python_tpu_tpu_estimator
+from tensorflow.contrib.tpu.python.tpu import tpu_optimizer as contrib_tpu_python_tpu_tpu_optimizer
+
+import features as features_lib
+import go
+import symmetries
+import minigo_model
+
+
+flags.DEFINE_integer('train_batch_size', 256,
+                     'Batch size to use for train/eval evaluation. For GPU '
+                     'this is batch size as expected. If \"use_tpu\" is set,'
+                     'final batch size will be = train_batch_size * num_tpu_cores')
+
+flags.DEFINE_integer('conv_width', 256 if go.N == 19 else 32,
+                     'The width of each conv layer in the shared trunk.')
+
+flags.DEFINE_integer('policy_conv_width', 2,
+                     'The width of the policy conv layer.')
+
+flags.DEFINE_integer('value_conv_width', 1,
+                     'The width of the value conv layer.')
+
+flags.DEFINE_integer('fc_width', 256 if go.N == 19 else 64,
+                     'The width of the fully connected layer in value head.')
+
+flags.DEFINE_integer('trunk_layers', go.N,
+                     'The number of resnet layers in the shared trunk.')
+
+flags.DEFINE_multi_integer('lr_boundaries', [400000, 600000],
+                           'The number of steps at which the learning rate will decay')
+
+flags.DEFINE_multi_float('lr_rates', [0.01, 0.001, 0.0001],
+                         'The different learning rates')
+
+flags.DEFINE_integer('training_seed', 0,
+                     'Random seed to use for training and validation')
+
+flags.register_multi_flags_validator(
+    ['lr_boundaries', 'lr_rates'],
+    lambda flags: len(flags['lr_boundaries']) == len(flags['lr_rates']) - 1,
+    'Number of learning rates must be exactly one greater than the number of boundaries')
+
+flags.DEFINE_float('l2_strength', 1e-4,
+                   'The L2 regularization parameter applied to weights.')
+
+flags.DEFINE_float('value_cost_weight', 1.0,
+                   'Scalar for value_cost, AGZ paper suggests 1/100 for '
+                   'supervised learning')
+
+flags.DEFINE_float('sgd_momentum', 0.9,
+                   'Momentum parameter for learning rate.')
+
+flags.DEFINE_string('work_dir', None,
+                    'The Estimator working directory. Used to dump: '
+                    'checkpoints, tensorboard logs, etc..')
+
+flags.DEFINE_bool('use_tpu', False, 'Whether to use TPU for training.')
+
+flags.DEFINE_string(
+    'tpu_name', None,
+    'The Cloud TPU to use for training. This should be either the name used'
+    'when creating the Cloud TPU, or a grpc://ip.address.of.tpu:8470 url.')
+
+flags.DEFINE_integer(
+    'num_tpu_cores', default=8,
+    help=('Number of TPU cores. For a single TPU device, this is 8 because each'
+          ' TPU has 4 chips each with 2 cores.'))
+
+flags.DEFINE_string('gpu_device_list', None,
+                    'Comma-separated list of GPU device IDs to use.')
+
+flags.DEFINE_bool('quantize', False,
+                  'Whether create a quantized model. When loading a model for '
+                  'inference, this must match how the model was trained.')
+
+flags.DEFINE_integer('quant_delay', 700 * 1024,
+                     'Number of training steps after which weights and '
+                     'activations are quantized.')
+
+flags.DEFINE_integer(
+    'iterations_per_loop', 128,
+    help=('Number of steps to run on TPU before outfeeding metrics to the CPU.'
+          ' If the number of iterations in the loop would exceed the number of'
+          ' train steps, the loop will exit before reaching'
+          ' --iterations_per_loop. The larger this value is, the higher the'
+          ' utilization on the TPU.'))
+
+flags.DEFINE_integer(
+    'summary_steps', default=256,
+    help='Number of steps between logging summary scalars.')
+
+flags.DEFINE_integer(
+    'keep_checkpoint_max', default=5, help='Number of checkpoints to keep.')
+
+flags.DEFINE_bool(
+    'use_random_symmetry', True,
+    help='If true random symmetries be used when doing inference.')
+
+flags.DEFINE_bool(
+    'use_SE', False,
+    help='Use Squeeze and Excitation.')
+
+flags.DEFINE_bool(
+    'use_SE_bias', False,
+    help='Use Squeeze and Excitation with bias.')
+
+flags.DEFINE_integer(
+    'SE_ratio', 2,
+    help='Squeeze and Excitation ratio.')
+
+flags.DEFINE_bool(
+    'use_swish', False,
+    help=('Use Swish activation function inplace of ReLu. '
+          'https://arxiv.org/pdf/1710.05941.pdf'))
+
+flags.DEFINE_bool(
+    'bool_features', False,
+    help='Use bool input features instead of float')
+
+flags.DEFINE_string(
+    'input_features', 'agz',
+    help='Type of input features: "agz" or "mlperf07"')
+
+flags.DEFINE_string(
+    'input_layout', 'nhwc',
+    help='Layout of input features: "nhwc" or "nchw"')
+
+flags.DEFINE_string(
+    'dynamic_input', '1',
+    help='--dynamic_input=1 Use fuzzy compilation. --dynamic_input=lazy_recompile Compile using lazy static graph')
+
+# TODO(seth): Verify if this is still required.
+flags.register_multi_flags_validator(
+    ['use_tpu', 'iterations_per_loop', 'summary_steps'],
+    lambda flags: (not flags['use_tpu'] or
+                   flags['summary_steps'] % flags['iterations_per_loop'] == 0),
+    'If use_tpu, summary_steps must be a multiple of iterations_per_loop')
+
+FLAGS = flags.FLAGS
+
+
+class DualNetwork():
+    def __init__(self, save_file):
+        ############################ set dynamic_input = True start###########################################
+        #set dynamic_input = True
+        global_config = tf.ConfigProto()
+        custom_op = global_config.graph_options.rewrite_options.custom_optimizers.add()
+        custom_op.name = "NpuOptimizer"
+        #custom_op.parameter_map["dynamic_input"].b = True
+        print('========= DualNetwork DYNAMIC INPUT = %s =========' % FLAGS.dynamic_input)
+        #if FLAGS.dynamic_input == "lazy_recompile":
+        #    custom_op.parameter_map["dynamic_graph_execute_mode"].s = tf.compat.as_bytes("lazy_recompile")
+        #if FLAGS.dynamic_input == "1":
+        #    custom_op.parameter_map["dynamic_graph_execute_mode"].s = tf.compat.as_bytes("dynamic_execute")
+        #else:
+        #    print("Enter correct compilation parameters.")
+        global_config.graph_options.rewrite_options.remapping = RewriterConfig.OFF
+        global_config.graph_options.rewrite_options.memory_optimization = RewriterConfig.OFF
+        
+        self.save_file = save_file
+        self.inference_input = None
+        self.inference_output = None
+        config = npu_config_proto(config_proto=global_config)
+        ############################ set dynamic_input = True end###########################################
+
+        config.gpu_options.allow_growth = True
+        if FLAGS.gpu_device_list is not None:
+            config.gpu_options.visible_device_list = FLAGS.gpu_device_list
+        self.sess = tf.Session(config=npu_config_proto(config_proto=config), graph=tf.Graph())
+        self.initialize_graph()
+
+    def initialize_graph(self):
+        with self.sess.graph.as_default():
+            features, labels = get_inference_input()
+            params = FLAGS.flag_values_dict()
+            logging.info('TPU inference is supported on C++ only. '
+                         'DualNetwork will ignore use_tpu=True')
+            params['use_tpu'] = False
+            estimator_spec = model_fn(features, labels,
+                                      tf.estimator.ModeKeys.PREDICT,
+                                      params=params)
+            self.inference_input = features
+            self.inference_output = estimator_spec.predictions
+            if self.save_file is not None:
+                self.initialize_weights(self.save_file)
+            else:
+                self.sess.run(tf.global_variables_initializer())
+
+    def initialize_weights(self, save_file):
+        """Initialize the weights from the given save_file.
+        Assumes that the graph has been constructed, and the
+        save_file contains weights that match the graph. Used
+        to set the weights to a different version of the player
+        without redifining the entire graph."""
+        tf.train.Saver().restore(self.sess, save_file)
+
+    def run(self, position):
+        probs, values = self.run_many([position])
+        return probs[0], values[0]
+
+    def run_many(self, positions):
+        f = get_features()
+        processed = [features_lib.extract_features(p, f) for p in positions]
+        if FLAGS.use_random_symmetry:
+            syms_used, processed = symmetries.randomize_symmetries_feat(
+                processed)
+        outputs = self.sess.run(self.inference_output,
+                                feed_dict={self.inference_input: processed})
+        probabilities, value = outputs['policy_output'], outputs['value_output']
+        if FLAGS.use_random_symmetry:
+            probabilities = symmetries.invert_symmetries_pi(
+                syms_used, probabilities)
+        return probabilities, value
+
+
+def get_features_planes():
+    if FLAGS.input_features == 'agz':
+        return features_lib.AGZ_FEATURES_PLANES
+    elif FLAGS.input_features == 'mlperf07':
+        return features_lib.MLPERF07_FEATURES_PLANES
+    else:
+        raise ValueError('unrecognized input features "%s"' %
+                         FLAGS.input_features)
+
+
+def get_features():
+    if FLAGS.input_features == 'agz':
+        return features_lib.AGZ_FEATURES
+    elif FLAGS.input_features == 'mlperf07':
+        return features_lib.MLPERF07_FEATURES
+    else:
+        raise ValueError('unrecognized input features "%s"' %
+                         FLAGS.input_features)
+
+
+def get_inference_input():
+    """Set up placeholders for input features/labels.
+
+    Returns the feature, output tensors that get passed into model_fn."""
+    feature_type = tf.bool if FLAGS.bool_features else tf.float32
+    if FLAGS.input_layout == 'nhwc':
+        feature_shape = [None, go.N, go.N, get_features_planes()]
+    elif FLAGS.input_layout == 'nchw':
+        feature_shape = [None, get_features_planes(), go.N, go.N]
+    else:
+        raise ValueError('invalid input_layout "%s"' % FLAGS.input_layout)
+    return (tf.placeholder(feature_type, feature_shape, name='pos_tensor'),
+            {'pi_tensor': tf.placeholder(tf.float32, [None, go.N * go.N + 1]),
+             'value_tensor': tf.placeholder(tf.float32, [None])})
+
+
+def model_fn(features, labels, mode, params):
+    """
+    Create the model for estimator api
+
+    Args:
+        features: if input_layout == 'nhwc', a tensor with shape:
+                [BATCH_SIZE, go.N, go.N, get_features_planes()]
+            else, a tensor with shape:
+                [BATCH_SIZE, get_features_planes(), go.N, go.N]
+        labels: dict from string to tensor with shape
+            'pi_tensor': [BATCH_SIZE, go.N * go.N + 1]
+            'value_tensor': [BATCH_SIZE]
+        mode: a tf.estimator.ModeKeys (batchnorm params update for TRAIN only)
+        params: A dictionary (Typically derived from the FLAGS object.)
+    Returns: tf.estimator.EstimatorSpec with props
+        mode: same as mode arg
+        predictions: dict of tensors
+            'policy': [BATCH_SIZE, go.N * go.N + 1]
+            'value': [BATCH_SIZE]
+        loss: a single value tensor
+        train_op: train op
+        eval_metric_ops
+    return dict of tensors
+        logits: [BATCH_SIZE, go.N * go.N + 1]
+    """
+
+    policy_output, value_output, logits = model_inference_fn(
+        features, mode == tf.estimator.ModeKeys.TRAIN, params)
+
+    # train ops
+    policy_cost = tf.reduce_mean(
+        tf.nn.softmax_cross_entropy_with_logits_v2(
+            logits=logits, labels=tf.stop_gradient(labels['pi_tensor'])))
+
+    value_cost = params['value_cost_weight'] * tf.reduce_mean(
+        tf.square(value_output - labels['value_tensor']))
+
+    reg_vars = [v for v in tf.trainable_variables()
+                if 'bias' not in v.name and 'beta' not in v.name]
+    l2_cost = params['l2_strength'] * \
+        tf.add_n([tf.nn.l2_loss(v) for v in reg_vars])
+
+    combined_cost = policy_cost + value_cost + l2_cost
+
+    global_step = tf.train.get_or_create_global_step()
+    learning_rate = tf.train.piecewise_constant(
+        global_step, params['lr_boundaries'], params['lr_rates'])
+    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
+
+    # Insert quantization ops if requested
+    if params['quantize']:
+        if mode == tf.estimator.ModeKeys.TRAIN:
+            contrib_quantize.create_training_graph(
+                quant_delay=params['quant_delay'])
+        else:
+            contrib_quantize.create_eval_graph()
+    
+    ######################################## NPU_8p start #####################################
+    #optimizer = npu_tf_optimizer(tf.train.MomentumOptimizer(
+        #learning_rate, params['sgd_momentum']))
+    optimizer = NPUDistributedOptimizer(tf.train.MomentumOptimizer(
+        learning_rate, params['sgd_momentum']))
+    ######################################## NPU_8p end #####################################
+
+    if params['use_tpu']:
+        optimizer = contrib_tpu_python_tpu_tpu_optimizer.CrossShardOptimizer(
+            optimizer)
+    with tf.control_dependencies(update_ops):
+        train_op = optimizer.minimize(combined_cost, global_step=global_step)
+
+    # Computations to be executed on CPU, outside of the main TPU queues.
+    def eval_metrics_host_call_fn(policy_output, value_output, pi_tensor,
+                                  value_tensor, policy_cost, value_cost,
+                                  l2_cost, combined_cost, step,
+                                  est_mode=tf.estimator.ModeKeys.TRAIN):
+        policy_entropy = -tf.reduce_mean(tf.reduce_sum(
+            policy_output * tf.log(policy_output), axis=1))
+        # pi_tensor is one_hot when generated from sgfs (for supervised learning)
+        # and soft-max when using self-play records. argmax normalizes the two.
+        policy_target_top_1 = tf.argmax(pi_tensor, axis=1)
+
+        policy_output_in_top1 = tf.to_float(
+            tf.nn.in_top_k(policy_output, policy_target_top_1, k=1))
+        policy_output_in_top3 = tf.to_float(
+            tf.nn.in_top_k(policy_output, policy_target_top_1, k=3))
+
+        policy_top_1_confidence = tf.reduce_max(policy_output, axis=1)
+        policy_target_top_1_confidence = tf.boolean_mask(
+            policy_output,
+            tf.one_hot(policy_target_top_1, tf.shape(policy_output)[1]))
+
+        value_cost_normalized = value_cost / params['value_cost_weight']
+        avg_value_observed = tf.reduce_mean(value_tensor)
+
+        with tf.variable_scope('metrics'):
+            metric_ops = {
+                'policy_cost': tf.metrics.mean(policy_cost),
+                'value_cost': tf.metrics.mean(value_cost),
+                'value_cost_normalized': tf.metrics.mean(value_cost_normalized),
+                'l2_cost': tf.metrics.mean(l2_cost),
+                'policy_entropy': tf.metrics.mean(policy_entropy),
+                'combined_cost': tf.metrics.mean(combined_cost),
+                'avg_value_observed': tf.metrics.mean(avg_value_observed),
+                'policy_accuracy_top_1': tf.metrics.mean(policy_output_in_top1),
+                'policy_accuracy_top_3': tf.metrics.mean(policy_output_in_top3),
+                'policy_top_1_confidence': tf.metrics.mean(policy_top_1_confidence),
+                'policy_target_top_1_confidence': tf.metrics.mean(
+                    policy_target_top_1_confidence),
+                'value_confidence': tf.metrics.mean(tf.abs(value_output)),
+            }
+
+        if est_mode == tf.estimator.ModeKeys.EVAL:
+            return metric_ops
+
+        # NOTE: global_step is rounded to a multiple of FLAGS.summary_steps.
+        eval_step = tf.reduce_min(step)
+
+        # Create summary ops so that they show up in SUMMARIES collection
+        # That way, they get logged automatically during training
+        
+        ######################################## host_call_fn: start #####################################
+        #summary_writer = contrib_summary.create_file_writer(FLAGS.work_dir)
+        #with summary_writer.as_default(), contrib_summary.record_summaries_every_n_global_steps(params['summary_steps'], eval_step):
+            #for metric_name, metric_op in metric_ops.items():
+                #contrib_summary.scalar(metric_name, metric_op[1], step=eval_step)
+        def host_call_fn(work_dir, metric_ops, eval_step):
+            with contrib_summary.create_file_writer(work_dir, max_queue=params['iterations_per_loop']).as_default():
+                with contrib_summary.record_summaries_every_n_global_steps(params['summary_steps'], eval_step):
+                    for metric_name, metric_op in metric_ops.items():
+                        contrib_summary.scalar(metric_name, metric_op[1], step=eval_step)
+            return contrib_summary.all_summary_ops()
+        ######################################## host_call_fn: end #####################################
+
+        # Reset metrics occasionally so that they are mean of recent batches.
+        reset_op = tf.variables_initializer(tf.local_variables('metrics'))
+        cond_reset_op = tf.cond(
+            tf.equal(eval_step % params['summary_steps'], tf.to_int64(1)),
+            lambda: reset_op,
+            lambda: tf.no_op())
+
+        ######################################## host_call_fn: start #####################################
+        #return contrib_summary.all_summary_ops() + [cond_reset_op]
+        return host_call_fn(FLAGS.work_dir, metric_ops, eval_step) + [cond_reset_op]
+        ######################################## host_call_fn: end #####################################
+
+    metric_args = [
+        policy_output,
+        value_output,
+        labels['pi_tensor'],
+        labels['value_tensor'],
+        tf.reshape(policy_cost, [1]),
+        tf.reshape(value_cost, [1]),
+        tf.reshape(l2_cost, [1]),
+        tf.reshape(combined_cost, [1]),
+        tf.reshape(global_step, [1]),
+    ]
+
+    predictions = {
+        'policy_output': policy_output,
+        'value_output': value_output,
+    }
+
+    eval_metrics_only_fn = functools.partial(
+        eval_metrics_host_call_fn, est_mode=tf.estimator.ModeKeys.EVAL)
+    host_call_fn = functools.partial(
+        eval_metrics_host_call_fn, est_mode=tf.estimator.ModeKeys.TRAIN)
+
+    ######################################## host_call_fn: start #####################################
+    #tpu_estimator_spec = contrib_tpu_python_tpu_tpu_estimator.TPUEstimatorSpec(
+    tpu_estimator_spec = NPUEstimatorSpec(
+        mode=mode,
+        predictions=predictions,
+        loss=combined_cost,
+        train_op=train_op,
+        #eval_metrics=(eval_metrics_only_fn, metric_args),
+        host_call=(host_call_fn, metric_args)
+    )
+    return tpu_estimator_spec
+    #if params['use_tpu']:
+    #    return tpu_estimator_spec
+    #else:
+    #    return tpu_estimator_spec.as_estimator_spec()
+    ######################################## host_call_fn: end #####################################
+
+
+def model_inference_fn(features, training, params):
+    """Builds just the inference part of the model graph.
+
+    Args:
+        features: input features tensor.
+        training: True if the model is training.
+        params: A dictionary
+
+    Returns:
+        (policy_output, value_output, logits) tuple of tensors.
+    """
+
+    if FLAGS.bool_features:
+        features = tf.dtypes.cast(features, dtype=tf.float32)
+
+    if FLAGS.input_layout == 'nhwc':
+        bn_axis = -1
+        data_format = 'channels_last'
+    else:
+        bn_axis = 1
+        data_format = 'channels_first'
+
+    mg_batchn = functools.partial(
+        tf.layers.batch_normalization,
+        axis=bn_axis,
+        momentum=.95,
+        epsilon=1e-5,
+        center=True,
+        scale=True,
+        fused=True,
+        training=training)
+
+    mg_conv2d = functools.partial(
+        tf.layers.conv2d,
+        filters=params['conv_width'],
+        kernel_size=3,
+        padding='same',
+        use_bias=False,
+        data_format=data_format)
+
+    mg_global_avgpool2d = functools.partial(
+        tf.layers.average_pooling2d,
+        pool_size=go.N,
+        strides=1,
+        padding='valid',
+        data_format=data_format)
+
+    def mg_activation(inputs):
+        if FLAGS.use_swish:
+            return tf.nn.swish(inputs)
+
+        return tf.nn.relu(inputs)
+
+    def residual_inner(inputs):
+        conv_layer1 = mg_batchn(mg_conv2d(inputs))
+        initial_output = mg_activation(conv_layer1)
+        conv_layer2 = mg_batchn(mg_conv2d(initial_output))
+        return conv_layer2
+
+    def mg_res_layer(inputs):
+        residual = residual_inner(inputs)
+        output = mg_activation(inputs + residual)
+        return output
+
+    def mg_squeeze_excitation_layer(inputs):
+        # Hu, J., Shen, L., & Sun, G. (2018). Squeeze-and-Excitation Networks.
+        # 2018 IEEE/CVF Conference on Computer Vision, 7132-7141.
+        # arXiv:1709.01507 [cs.CV]
+
+        channels = params['conv_width']
+        ratio = FLAGS.SE_ratio
+        assert channels % ratio == 0
+
+        residual = residual_inner(inputs)
+        pool = mg_global_avgpool2d(residual)
+        fc1 = tf.layers.dense(pool, units=channels // ratio)
+        squeeze = mg_activation(fc1)
+
+        if FLAGS.use_SE_bias:
+            fc2 = tf.layers.dense(squeeze, units=2*channels)
+            # Channels_last so axis = 3 = -1
+            gamma, bias = tf.split(fc2, 2, axis=3)
+        else:
+            gamma = tf.layers.dense(squeeze, units=channels)
+            bias = 0
+
+        sig = tf.nn.sigmoid(gamma)
+        # Explicitly signal the broadcast.
+        scale = tf.reshape(sig, [-1, 1, 1, channels])
+
+        excitation = tf.multiply(scale, residual) + bias
+        return mg_activation(inputs + excitation)
+
+    initial_block = mg_activation(mg_batchn(mg_conv2d(features)))
+
+    # the shared stack
+    shared_output = initial_block
+    for _ in range(params['trunk_layers']):
+        if FLAGS.use_SE or FLAGS.use_SE_bias:
+            shared_output = mg_squeeze_excitation_layer(shared_output)
+        else:
+            shared_output = mg_res_layer(shared_output)
+
+    # Policy head
+    policy_conv = mg_conv2d(
+        shared_output, filters=params['policy_conv_width'], kernel_size=1)
+    policy_conv = mg_activation(
+        mg_batchn(policy_conv, center=False, scale=False))
+    logits = tf.layers.dense(
+        tf.reshape(
+            policy_conv, [-1, params['policy_conv_width'] * go.N * go.N]),
+        go.N * go.N + 1)
+
+    policy_output = tf.nn.softmax(logits, name='policy_output')
+
+    # Value head
+    value_conv = mg_conv2d(
+        shared_output, filters=params['value_conv_width'], kernel_size=1)
+    value_conv = mg_activation(
+        mg_batchn(value_conv, center=False, scale=False))
+
+    value_fc_hidden = mg_activation(tf.layers.dense(
+        tf.reshape(value_conv, [-1, params['value_conv_width'] * go.N * go.N]),
+        params['fc_width']))
+    value_output = tf.nn.tanh(
+        tf.reshape(tf.layers.dense(value_fc_hidden, 1), [-1]),
+        name='value_output')
+
+    return policy_output, value_output, logits
+
+
+def tpu_model_inference_fn(features):
+    """Builds the model graph suitable for running on TPU.
+
+    It does two things:
+     1) Mark all weights as constant, which improves TPU inference performance
+        because it prevents the weights being transferred to the TPU every call
+        to Session.run().
+     2) Adds constant to the graph with a unique value and marks it as a
+        dependency on the rest of the model. This works around a TensorFlow bug
+        that prevents multiple models being run on a single TPU.
+
+    Returns:
+        (policy_output, value_output, logits) tuple of tensors.
+    """
+    def custom_getter(getter, name, *args, **kwargs):
+        with tf.control_dependencies(None):
+            return tf.guarantee_const(
+                getter(name, *args, **kwargs), name=name + '/GuaranteeConst')
+    with tf.variable_scope('', custom_getter=custom_getter):
+        # TODO(tommadams): remove the tf.control_dependencies context manager
+        # when a fixed version of TensorFlow is released.
+        t = int(time.time())
+        epoch_time = tf.constant(t, name='epoch_time_%d' % t)
+        with tf.control_dependencies([epoch_time]):
+            if FLAGS.input_layout == 'nhwc':
+                feature_shape = [-1, go.N, go.N, get_features_planes()]
+            else:
+                feature_shape = [-1, get_features_planes(), go.N, go.N]
+            features = tf.reshape(features, feature_shape)
+            return model_inference_fn(features, False, FLAGS.flag_values_dict())
+
+
+def maybe_set_seed():
+    if FLAGS.training_seed != 0:
+        random.seed(FLAGS.training_seed)
+        tf.set_random_seed(FLAGS.training_seed)
+        np.random.seed(FLAGS.training_seed)
+
+
+def get_estimator():
+    if FLAGS.use_tpu:
+        return _get_tpu_estimator()
+    else:
+        return _get_nontpu_estimator()
+
+
+def _get_nontpu_estimator():
+    session_config = tf.ConfigProto(allow_soft_placement=True)
+    custom_op = session_config.graph_options.rewrite_options.custom_optimizers.add()
+    custom_op.name = 'NpuOptimizer'
+    custom_op.parameter_map["precision_mode"].s = tf.compat.as_bytes("allow_mix_precision")
+    #session_config = npu_config_proto(config_proto=tf.ConfigProto())
+    session_config.gpu_options.allow_growth = True
+
+    run_config = tf.estimator.RunConfig(
+        save_summary_steps=FLAGS.summary_steps,
+        keep_checkpoint_max=FLAGS.keep_checkpoint_max,
+        session_config=session_config)
+    return tf.estimator.Estimator(
+        model_fn,
+        model_dir=FLAGS.work_dir,
+        config=npu_run_config_init(run_config=run_config),
+        params=FLAGS.flag_values_dict())
+
+
+def _get_tpu_estimator():
+    tpu_cluster_resolver = contrib_cluster_resolver.TPUClusterResolver(
+        FLAGS.tpu_name, zone=None, project=None)
+    tpu_grpc_url = tpu_cluster_resolver.get_master()
+
+    run_config = contrib_tpu_python_tpu_tpu_config.RunConfig(
+        master=tpu_grpc_url,
+        evaluation_master=tpu_grpc_url,
+        model_dir=FLAGS.work_dir,
+        save_checkpoints_steps=max(1000, FLAGS.iterations_per_loop),
+        save_summary_steps=FLAGS.summary_steps,
+        keep_checkpoint_max=FLAGS.keep_checkpoint_max,
+        session_config=npu_config_proto(
+            config_proto=tf.ConfigProto(
+                                        allow_soft_placement=True, 
+                                        log_device_placement=True,)
+                                        ),
+        tpu_config=contrib_tpu_python_tpu_tpu_config.TPUConfig(
+            iterations_per_loop=FLAGS.iterations_per_loop,
+            num_shards=FLAGS.num_tpu_cores,
+            per_host_input_for_training=contrib_tpu_python_tpu_tpu_config.InputPipelineConfig.PER_HOST_V2))
+
+    ######################################## host_call_fn: start #####################################
+    #return contrib_tpu_python_tpu_tpu_estimator.TPUEstimator(
+    return NPUEstimator(
+        use_tpu=FLAGS.use_tpu,
+        model_fn=model_fn,
+        config=npu_run_config_init(run_config=run_config),
+        train_batch_size=FLAGS.train_batch_size * FLAGS.num_tpu_cores,
+        eval_batch_size=FLAGS.train_batch_size * FLAGS.num_tpu_cores,
+        params=FLAGS.flag_values_dict())
+    ######################################## host_call_fn: end #####################################
+
+def bootstrap():
+    """Initialize a tf.Estimator run with random initial weights."""
+    # a bit hacky - forge an initial checkpoint with the name that subsequent
+    # Estimator runs will expect to find.
+    #
+    # Estimator will do this automatically when you call train(), but calling
+    # train() requires data, and I didn't feel like creating training data in
+    # order to run the full train pipeline for 1 step.
+    maybe_set_seed()
+    initial_checkpoint_name = 'model.ckpt-1'
+    save_file = os.path.join(FLAGS.work_dir, initial_checkpoint_name)
+    sess = tf.Session(config=npu_config_proto(), graph=tf.Graph())
+    with sess.graph.as_default():
+        features, labels = get_inference_input()
+        model_fn(features, labels, tf.estimator.ModeKeys.PREDICT,
+                 params=FLAGS.flag_values_dict())
+        sess.run(tf.global_variables_initializer())
+        tf.train.Saver().save(sess, save_file)
+
+
+def export_model(model_path):
+    """Take the latest checkpoint and copy it to model_path.
+
+    Assumes that all relevant model files are prefixed by the same name.
+    (For example, foo.index, foo.meta and foo.data-00000-of-00001).
+
+    Args:
+        model_path: The path (can be a gs:// path) to export model
+    """
+    estimator = tf.estimator.Estimator(model_fn, model_dir=FLAGS.work_dir,
+                                       params=FLAGS.flag_values_dict(), config=npu_run_config_init())
+    latest_checkpoint = estimator.latest_checkpoint()
+    all_checkpoint_files = tf.gfile.Glob(latest_checkpoint + '*')
+    for filename in all_checkpoint_files:
+        suffix = filename.partition(latest_checkpoint)[2]
+        destination_path = model_path + suffix
+        print('Copying {} to {}'.format(filename, destination_path))
+        tf.gfile.Copy(filename, destination_path)
+
+
+def freeze_graph(model_path, use_trt=False, trt_max_batch_size=8,
+                 trt_precision='fp32'):
+    output_names = ['policy_output', 'value_output']
+
+    n = DualNetwork(model_path)
+    out_graph = tf.graph_util.convert_variables_to_constants(
+        n.sess, n.sess.graph.as_graph_def(), output_names)
+
+    if use_trt:
+        import tensorflow.contrib.tensorrt as trt
+        out_graph = trt.create_inference_graph(
+            input_graph_def=out_graph,
+            outputs=output_names,
+            max_batch_size=trt_max_batch_size,
+            max_workspace_size_bytes=1 << 29,
+            precision_mode=trt_precision)
+
+    metadata = make_model_metadata({
+        'engine': 'tf',
+        'use_trt': bool(use_trt),
+    })
+
+    minigo_model.write_graph_def(out_graph, metadata, model_path + '.minigo')
+
+
+def freeze_graph_tpu(model_path):
+    """Custom freeze_graph implementation for Cloud TPU."""
+
+    assert model_path
+    assert FLAGS.tpu_name
+    if FLAGS.tpu_name.startswith('grpc://'):
+        tpu_grpc_url = FLAGS.tpu_name
+    else:
+        tpu_cluster_resolver = contrib_cluster_resolver.TPUClusterResolver(
+            FLAGS.tpu_name, zone=None, project=None)
+        tpu_grpc_url = tpu_cluster_resolver.get_master()
+    sess = tf.Session(tpu_grpc_url, config=npu_config_proto())
+
+    output_names = []
+    with sess.graph.as_default():
+        # Replicate the inference function for each TPU core.
+        replicated_features = []
+        feature_type = tf.bool if FLAGS.bool_features else tf.float32
+        for i in range(FLAGS.num_tpu_cores):
+            name = 'pos_tensor_%d' % i
+            features = tf.placeholder(
+                feature_type, [None], name=name)
+            replicated_features.append((features,))
+        outputs = contrib_tpu.replicate(
+            tpu_model_inference_fn, replicated_features)
+
+        # The replicate op assigns names like output_0_shard_0 to the output
+        # names. Give them human readable names.
+        for i, (policy_output, value_output, _) in enumerate(outputs):
+            policy_name = 'policy_output_%d' % i
+            value_name = 'value_output_%d' % i
+            output_names.extend([policy_name, value_name])
+            tf.identity(policy_output, policy_name)
+            tf.identity(value_output, value_name)
+
+        tf.train.Saver().restore(sess, model_path)
+
+    out_graph = tf.graph_util.convert_variables_to_constants(
+        sess, sess.graph.as_graph_def(), output_names)
+
+    metadata = make_model_metadata({
+        'engine': 'tpu',
+        'num_replicas': FLAGS.num_tpu_cores,
+    })
+
+    minigo_model.write_graph_def(out_graph, metadata, model_path + '.minigo')
+
+
+def make_model_metadata(metadata):
+    for f in ['conv_width', 'fc_width', 'trunk_layers', 'use_SE', 'use_SE_bias',
+              'use_swish', 'input_features', 'input_layout']:
+        metadata[f] = getattr(FLAGS, f)
+    metadata['input_type'] = 'bool' if FLAGS.bool_features else 'float'
+    metadata['board_size'] = go.N
+    return metadata
diff --git a/TensorFlow/built-in/cv/image_classification/MiniGo_ID0629_for_TensorFlow/test/train_RT2_performance_1p.sh b/TensorFlow/built-in/cv/image_classification/MiniGo_ID0629_for_TensorFlow/test/train_RT2_performance_1p.sh
new file mode 100644
index 0000000000000000000000000000000000000000..61dffddbd77fdbd0e1ea2f5d8e98ed5467928867
--- /dev/null
+++ b/TensorFlow/built-in/cv/image_classification/MiniGo_ID0629_for_TensorFlow/test/train_RT2_performance_1p.sh
@@ -0,0 +1,112 @@
+#!/bin/bash
+
+cur_path=`pwd`/../
+rm -f $cur_path/outputs/models/*
+rm -f $cur_path/estimator_working_dir/*
+
+export ENABLE_RUNTIME_V2=1
+#基础参数，需要模型审视修改
+#Batch Size
+batch_size=128
+#网络名称，同目录名称
+Network="MiniGo_ID0629_for_TensorFlow"
+#Device数量，单卡默认为1
+RankSize=1
+#训练epoch，可选
+train_epochs=
+#训练step
+train_steps=500
+#学习率
+learning_rate=
+#动态输入模式，不需要修改
+dynamic_input=""
+
+#参数配置 npu param
+precision_mode="allow_fp32_to_fp16"
+over_dump=False
+data_dump_flag=False
+data_dump_step="10"
+profiling=False
+
+data_path="$./outputs/data/selfplay"
+
+if [[ $1 == --help || $1 == -h ]];then
+   echo "usage: ./train_performance_1p.sh $data_path --work_dir="$cur_path/estimator_working_dir" --export_path="$cur_path/outputs/models/000001-first_generation""
+   exit 1
+fi
+
+for para in $*
+do
+   if [[ $para  == --data_path* ]];then
+      data_path=`echo ${para#*=}`
+   elif [[ $para == --dynamic_input* ]];then
+      dynamic_input=`echo ${para#*=}`  	
+   fi
+done
+
+if [[ $data_path  == "" ]];then
+    echo "[Error] para \"data_path\" must be config"
+    exit 1
+fi
+
+##############执行训练##########
+cd $cur_path
+
+if [ -d $cur_path/test/output ];then
+   rm -rf $cur_path/test/output/*
+   mkdir -p $cur_path/test/output/$ASCEND_DEVICE_ID
+else
+   mkdir -p $cur_path/test/output/$ASCEND_DEVICE_ID
+fi
+wait
+
+#(Step1)初始化  一定要先运行这一步
+python3 bootstrap.py --work_dir=$cur_path/estimator_working_dir --export_path=$cur_path/outputs/models/000000-bootstrap
+wait
+
+start=$(date +%s)
+#(Step3)训练
+#python3 train.py --training_data_path=$data_path --steps_to_train=$train_steps --train_batch_size=$batch_size --work_dir=$cur_path/estimator_working_dir --export_path=$cur_path/outputs/models/000001-first_generation > $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 &
+python3 train_rt.py \
+	--training_data_path=$data_path \
+	--steps_to_train=$train_steps \
+	--train_batch_size=$batch_size \
+	--work_dir=$cur_path/estimator_working_dir \
+	--export_path=$cur_path/outputs/models/000001-first_generation \
+	--dynamic_input=${dynamic_input} > $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 &
+wait
+end=$(date +%s)
+e2etime=$(( $end - $start ))
+
+#echo "Final Performance ms/step : $average_perf"
+echo "Final Training Duration sec : $e2etime"
+
+
+###下面字段用于冒烟看护
+BatchSize=${batch_size}
+#设备类型，自动获取
+DeviceType=`uname -m`
+#用例名称，自动获取
+CaseName=${Network}_bs${BatchSize}_${RankSize}'p_RT2_perf'
+
+#获取性能
+TrainingTime=`grep "tensorflow:global_step/sec" $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk 'END {print $2}'`
+wait
+ActualFPS=`awk 'BEGIN{printf "%.2f\n", '${BatchSize}'*'${TrainingTime}'}'`
+
+#从train_*.log中提取Loss到${CaseName}_loss.txt中
+grep "] loss" $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk '{print $7}' |cut -d , -f 1 >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}_loss.txt
+ActualLoss=`awk 'END {print $1}' $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}_loss.txt`
+
+#关键信息打印到CaseName.log中，此处无需修改
+echo "Network = ${Network}" > $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RankSize}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${batch_size}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainAccuracy = ${ActualLoss}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2etime}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DynamicInput = ${dynamic_input}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
\ No newline at end of file
diff --git a/TensorFlow/built-in/cv/image_classification/MiniGo_ID0629_for_TensorFlow/test/train_RT2_performance_8p.sh b/TensorFlow/built-in/cv/image_classification/MiniGo_ID0629_for_TensorFlow/test/train_RT2_performance_8p.sh
new file mode 100644
index 0000000000000000000000000000000000000000..fdefa5091c42dbb6e74de5b1dae751ad40883299
--- /dev/null
+++ b/TensorFlow/built-in/cv/image_classification/MiniGo_ID0629_for_TensorFlow/test/train_RT2_performance_8p.sh
@@ -0,0 +1,138 @@
+#!/bin/bash
+
+cur_path=`pwd`/../
+rm -f $cur_path/outputs/models/*
+rm -f $cur_path/estimator_working_dir/*
+
+export ENABLE_RUNTIME_V2=1
+#基础参数，需要模型审视修改
+#Batch Size
+batch_size=128
+#网络名称，同目录名称
+Network="MiniGo_ID0629_for_TensorFlow"
+#Device数量，单卡默认为1
+RankSize=8
+#训练epoch，可选
+train_epochs=
+#训练step
+train_steps=500
+#学习率
+learning_rate=
+#动态输入模式，不需要修改
+dynamic_input=""
+
+#参数配置 npu param
+precision_mode="allow_fp32_to_fp16"
+over_dump=False
+data_dump_flag=False
+data_dump_step="10"
+profiling=False
+
+data_path="$./outputs/data/selfplay"
+
+if [[ $1 == --help || $1 == -h ]];then
+   echo "usage: ./train_performance_1p.sh $data_path --work_dir="$cur_path/estimator_working_dir" --export_path="$cur_path/outputs/models/000001-first_generation""
+   exit 1
+fi
+
+for para in $*
+do
+   if [[ $para  == --data_path* ]];then
+      data_path=`echo ${para#*=}`
+    elif [[ $para == --bind_core* ]]; then
+        bind_core=`echo ${para#*=}`
+        name_bind="_bindcore"
+    elif [[ $para == --dynamic_input* ]];then
+      dynamic_input=`echo ${para#*=}` 
+   fi
+done
+
+if [[ $data_path  == "" ]];then
+    echo "[Error] para \"data_path\" must be config"
+    exit 1
+fi
+
+##############执行训练##########
+cd $cur_path
+
+#(Step1)初始化  一定要先运行这一步
+python3 bootstrap.py --work_dir=$cur_path/estimator_working_dir --export_path=$cur_path/outputs/models/000000-bootstrap
+wait
+
+export ASCEND_DEVICE_ID=0
+export RANK_SIZE=8
+export RANK_TABLE_FILE="${cur_path}/test/8p.json"
+export JOB_ID=10086
+
+start=$(date +%s)
+
+# 8P训练模式
+for i in 0 1 2 3 4 5 6 7
+do
+    #设置环境变量
+    export RANK_ID=$i
+    export ASCEND_DEVICE_ID=$i
+    ASCEND_DEVICE_ID=$i
+    echo "Device ID: $ASCEND_DEVICE_ID"
+
+    if [ -d $cur_path/test/output/$ASCEND_DEVICE_ID ];then
+        rm -rf $cur_path/test/output/$ASCEND_DEVICE_ID
+        mkdir -p $cur_path/test/output/$ASCEND_DEVICE_ID
+    else
+        mkdir -p $cur_path/test/output/$ASCEND_DEVICE_ID
+    fi
+    echo $ASCEND_DEVICE_ID
+    #(Step3)训练
+    corenum=`cat /proc/cpuinfo |grep 'processor' |wc -l`
+    let a=RANK_ID*${corenum}/8
+    let b=RANK_ID+1
+    let c=b*${corenum}/8-1
+    if [ "x${bind_core}" != x ];then
+        bind_core="taskset -c $a-$c"
+    fi
+    #${bind_core} python3 train.py --training_data_path=$data_path --steps_to_train=$train_steps --train_batch_size=$batch_size --work_dir=$cur_path/estimator_working_dir --export_path=$cur_path/outputs/models/000001-first_generation > $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 &
+	${bind_core} python3 train_rt.py \
+		--training_data_path=$data_path \
+		--steps_to_train=$train_steps \
+		--train_batch_size=$batch_size \
+		--work_dir=$cur_path/estimator_working_dir \
+		--export_path=$cur_path/outputs/models/000001-first_generation \
+		--dynamic_input=${dynamic_input} > $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 &
+done
+wait
+
+end=$(date +%s)
+e2etime=$(( $end - $start ))
+
+#echo "Final Performance ms/step : $average_perf"
+echo "Final Training Duration sec : $e2etime"
+
+
+###下面字段用于冒烟看护
+BatchSize=${batch_size}
+#设备类型，自动获取
+DeviceType=`uname -m`
+#用例名称，自动获取
+CaseName=${Network}_bs${BatchSize}_${RankSize}'p_RT2_perf'
+
+#获取性能
+TrainingTime=`grep "tensorflow:global_step/sec" $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk 'END {print $2}'`
+wait
+ActualFPS=`awk 'BEGIN{printf "%.2f\n", '${BatchSize}'*'${RankSize}'*'${TrainingTime}'}'`
+
+#从train_*.log中提取Loss到${CaseName}_loss.txt中
+grep "] loss" $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk '{print $7}' |cut -d , -f 1 >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}_loss.txt
+ActualLoss=`awk 'END {print $1}' $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}_loss.txt`
+
+#关键信息打印到CaseName.log中，此处无需修改
+echo "Network = ${Network}" > $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RankSize}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${batch_size}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainAccuracy = ${ActualLoss}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2etime}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DynamicInput = ${dynamic_input}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
\ No newline at end of file
diff --git a/TensorFlow/built-in/cv/image_classification/MiniGo_ID0629_for_TensorFlow/train_rt.py b/TensorFlow/built-in/cv/image_classification/MiniGo_ID0629_for_TensorFlow/train_rt.py
new file mode 100644
index 0000000000000000000000000000000000000000..8cafcdafe0e8c667da91d61f73aa26c495c764f6
--- /dev/null
+++ b/TensorFlow/built-in/cv/image_classification/MiniGo_ID0629_for_TensorFlow/train_rt.py
@@ -0,0 +1,315 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# Copyright 2018 Google LLC
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Train a network.
+
+Usage:
+  BOARD_SIZE=19 python train.py tfrecord1 tfrecord2 tfrecord3
+"""
+from npu_bridge.npu_init import *
+
+#########################Solve Argument list too long : strat #########################
+import os
+#########################Solve Argument list too long : end #########################
+
+import logging
+import math
+
+from absl import app, flags
+import numpy as np
+import tensorflow as tf
+
+import bigtable_input
+import dual_net_rt
+import preprocessing
+import utils
+
+# See www.moderndescartes.com/essays/shuffle_viz for discussion on sizing
+flags.DEFINE_integer('shuffle_buffer_size', 2000,
+                     'Size of buffer used to shuffle train examples.')
+
+flags.DEFINE_boolean('shuffle_examples', True,
+                     'Whether to shuffle training examples.')
+
+flags.DEFINE_integer('steps_to_train', None,
+                     'Number of training steps to take. If not set, iterates '
+                     'once over training data.')
+
+flags.DEFINE_integer('num_examples', None,
+                     'Total number of input examples. This is only used if '
+                     'steps_to_train is not set. Requires that filter_amount '
+                     'is 1.0.')
+
+flags.DEFINE_integer('window_size', 500000,
+                     'Number of games to include in the window')
+
+flags.DEFINE_float('filter_amount', 1.0,
+                   'Fraction of positions to filter from golden chunks,'
+                   'default, 1.0 (no filter)')
+
+flags.DEFINE_string('export_path', None,
+                    'Where to export the model after training.')
+
+################## Solve Argument List Too long: start ##################
+flags.DEFINE_string('training_data_path', None,
+                    'training data path.')
+################## Solve Argument List Too long: end ##################
+
+flags.DEFINE_bool('use_bt', False,
+                  'Whether to use Bigtable as input.  '
+                  '(Only supported with --use_tpu, currently.)')
+
+flags.DEFINE_bool('freeze', False,
+                  'Whether to freeze the graph at the end of training.')
+
+flags.DEFINE_boolean(
+    'use_trt', False, 'True to write a GraphDef that uses the TRT runtime')
+flags.DEFINE_integer('trt_max_batch_size', None,
+                     'Maximum TRT batch size')
+flags.DEFINE_string('trt_precision', 'fp32',
+                    'Precision for TRT runtime: fp16, fp32 or int8')
+flags.register_multi_flags_validator(
+    ['use_trt', 'trt_max_batch_size'],
+    lambda flags: not flags['use_trt'] or flags['trt_max_batch_size'],
+    'trt_max_batch_size must be set if use_trt is true')
+
+
+flags.register_multi_flags_validator(
+    ['use_bt', 'use_tpu'],
+    lambda flags: flags['use_tpu'] if flags['use_bt'] else True,
+    '`use_bt` flag only valid with `use_tpu` as well')
+
+@flags.multi_flags_validator(
+    ['num_examples', 'steps_to_train', 'filter_amount'],
+    '`num_examples` requires `steps_to_train==0` and `filter_amount==1.0`')
+def _example_flags_validator(flags_dict):
+    if not flags_dict['num_examples']:
+        return True
+    return not flags_dict['steps_to_train'] and flags_dict['filter_amount'] == 1.0
+
+@flags.multi_flags_validator(
+    ['use_bt', 'cbt_project', 'cbt_instance', 'cbt_table'],
+    message='Cloud Bigtable configuration flags not correct')
+def _bt_checker(flags_dict):
+    if not flags_dict['use_bt']:
+        return True
+    return (flags_dict['cbt_project']
+            and flags_dict['cbt_instance']
+            and flags_dict['cbt_table'])
+
+
+# From dual_net.py
+flags.declare_key_flag('work_dir')
+flags.declare_key_flag('train_batch_size')
+flags.declare_key_flag('num_tpu_cores')
+flags.declare_key_flag('use_tpu')
+flags.declare_key_flag('dynamic_input')
+
+FLAGS = flags.FLAGS
+
+
+class EchoStepCounterHook(tf.train.StepCounterHook):
+    """A hook that logs steps per second."""
+
+    def _log_and_record(self, elapsed_steps, elapsed_time, global_step):
+        s_per_sec = elapsed_steps / elapsed_time
+        logging.info("{}: {:.3f} steps per second".format(global_step, s_per_sec))
+        super()._log_and_record(elapsed_steps, elapsed_time, global_step)
+
+
+def compute_update_ratio(weight_tensors, before_weights, after_weights):
+    """Compute the ratio of gradient norm to weight norm."""
+    deltas = [after - before for after,
+              before in zip(after_weights, before_weights)]
+    delta_norms = [np.linalg.norm(d.ravel()) for d in deltas]
+    weight_norms = [np.linalg.norm(w.ravel()) for w in before_weights]
+    ratios = [d / w for d, w in zip(delta_norms, weight_norms)]
+    all_summaries = [
+        tf.Summary.Value(tag='update_ratios/' +
+                         tensor.name, simple_value=ratio)
+        for tensor, ratio in zip(weight_tensors, ratios)]
+    return tf.Summary(value=all_summaries)
+
+
+class UpdateRatioSessionHook(tf.train.SessionRunHook):
+    """A hook that computes ||grad|| / ||weights|| (using frobenius norm)."""
+
+    def __init__(self, output_dir, every_n_steps=1000):
+        self.output_dir = output_dir
+        self.every_n_steps = every_n_steps
+        self.before_weights = None
+        self.file_writer = None
+        self.weight_tensors = None
+        self.global_step = None
+
+    def begin(self):
+        """Called once before using the session"""
+        # These calls only works because the SessionRunHook api guarantees this
+        # will get called within a graph context containing our model graph.
+
+        self.file_writer = tf.summary.FileWriterCache.get(self.output_dir)
+        self.weight_tensors = tf.trainable_variables()
+        self.global_step = tf.train.get_or_create_global_step()
+
+    def before_run(self, run_context):
+        """Called before each call to run()."""
+        global_step = run_context.session.run(self.global_step)
+        if global_step % self.every_n_steps == 0:
+            self.before_weights = run_context.session.run(self.weight_tensors)
+
+    def after_run(self, run_context, unused_run_values):
+        """Called after each call to run()."""
+        global_step = run_context.session.run(self.global_step)
+        if self.before_weights is not None:
+            after_weights = run_context.session.run(self.weight_tensors)
+            weight_update_summaries = compute_update_ratio(self.weight_tensors, self.before_weights, after_weights)
+            self.file_writer.add_summary(weight_update_summaries, global_step)
+            self.before_weights = None
+
+
+def train(*tf_records: "Records to train on"):
+    """Train on examples."""
+    tf.logging.set_verbosity(tf.logging.INFO)
+    estimator = dual_net_rt.get_estimator()
+
+    effective_batch_size = FLAGS.train_batch_size
+    if FLAGS.use_tpu:
+        effective_batch_size *= FLAGS.num_tpu_cores
+
+    if FLAGS.use_tpu:
+        if FLAGS.use_bt:
+            def _input_fn(params):
+                games = bigtable_input.GameQueue(
+                    FLAGS.cbt_project, FLAGS.cbt_instance, FLAGS.cbt_table)
+                games_nr = bigtable_input.GameQueue(
+                    FLAGS.cbt_project, FLAGS.cbt_instance, FLAGS.cbt_table + '-nr')
+                return preprocessing.get_tpu_bt_input_tensors(
+                    games,
+                    games_nr,
+                    params['batch_size'],
+                    params['input_layout'],
+                    number_of_games=FLAGS.window_size,
+                    random_rotation=True)
+        else:
+            def _input_fn(params):
+                return preprocessing.get_tpu_input_tensors(
+                    params['batch_size'],
+                    params['input_layout'],
+                    tf_records,
+                    filter_amount=FLAGS.filter_amount,
+                    shuffle_examples=FLAGS.shuffle_examples,
+                    shuffle_buffer_size=FLAGS.shuffle_buffer_size,
+                    random_rotation=True)
+        # Hooks are broken with TPUestimator at the moment.
+        hooks = []
+    else:
+        def _input_fn():
+            return preprocessing.get_input_tensors(
+                FLAGS.train_batch_size,
+                FLAGS.input_layout,
+                tf_records,
+                filter_amount=FLAGS.filter_amount,
+                shuffle_examples=FLAGS.shuffle_examples,
+                shuffle_buffer_size=FLAGS.shuffle_buffer_size,
+                random_rotation=True)
+
+        hooks = [UpdateRatioSessionHook(FLAGS.work_dir),
+                 EchoStepCounterHook(output_dir=FLAGS.work_dir)]
+
+    steps = FLAGS.steps_to_train
+    if not steps and FLAGS.num_examples:
+        batch_size = FLAGS.train_batch_size
+        if FLAGS.use_tpu:
+            batch_size *= FLAGS.num_tpu_cores
+        steps = math.floor(FLAGS.num_examples / batch_size)
+
+    logging.info("Training, steps = %s, batch = %s -> %s examples",
+                 steps or '?', effective_batch_size,
+                 (steps * effective_batch_size) if steps else '?')
+
+    if FLAGS.use_bt:
+        games = bigtable_input.GameQueue(
+            FLAGS.cbt_project, FLAGS.cbt_instance, FLAGS.cbt_table)
+        if not games.read_wait_cell():
+            games.require_fresh_games(20000)
+        latest_game = games.latest_game_number
+        index_from = max(latest_game, games.read_wait_cell())
+        print("== Last game before training:", latest_game, flush=True)
+        print("== Wait cell:", games.read_wait_cell(), flush=True)
+
+    try:
+        estimator.train(_input_fn, steps=steps, hooks=npu_hooks_append(hooks_list=hooks))
+        if FLAGS.use_bt:
+            bigtable_input.set_fresh_watermark(games, index_from,
+                                               FLAGS.window_size)
+    except:
+        if FLAGS.use_bt:
+            games.require_fresh_games(0)
+        raise
+
+
+def main(argv):
+    """Train on examples and export the updated model weights."""
+    ################## Solve Argument List Too long: start ##################
+    # tf_records = argv[1:]
+    tf_records = []
+    for presentdir, dirnames, filenames in os.walk(FLAGS.training_data_path):
+        for filename in filenames:
+            # files with path
+            file_with_path = os.path.join(presentdir, filename)
+            tf_records.append(file_with_path)
+    ################## Solve Argument List Too long: end ##################
+    
+    logging.info("Training on %s records: %s to %s",
+                 len(tf_records), tf_records[0], tf_records[-1])
+    with utils.logged_timer("Training"):
+        train(*tf_records)
+    if FLAGS.export_path:
+        dual_net_rt.export_model(FLAGS.export_path)
+    if FLAGS.freeze:
+        if FLAGS.use_tpu:
+            dual_net_rt.freeze_graph_tpu(FLAGS.export_path)
+        else:
+            dual_net_rt.freeze_graph(FLAGS.export_path, FLAGS.use_trt,
+                                  FLAGS.trt_max_batch_size, FLAGS.trt_precision)
+
+
+if __name__ == "__main__":
+    app.run(main)
diff --git a/TensorFlow/built-in/cv/image_segmentation/2Dattentionunet_ID0120_for_TensorFlow/test/train_RT2_performance_1p.sh b/TensorFlow/built-in/cv/image_segmentation/2Dattentionunet_ID0120_for_TensorFlow/test/train_RT2_performance_1p.sh
new file mode 100644
index 0000000000000000000000000000000000000000..2b733e78a08c3eaea7a3cf120fb9fa0202bc26a7
--- /dev/null
+++ b/TensorFlow/built-in/cv/image_segmentation/2Dattentionunet_ID0120_for_TensorFlow/test/train_RT2_performance_1p.sh
@@ -0,0 +1,197 @@
+#!/bin/bash
+
+#当前路径,不需要修改
+cur_path=`pwd`
+
+#集合通信参数,不需要修改
+
+export RANK_SIZE=1
+export JOB_ID=10087
+export ENABLE_RUNTIME_V2=1
+RANK_ID_START=0
+
+
+# 数据集路径,保持为空,不需要修改
+data_path=""
+
+#设置默认日志级别,不需要修改
+export ASCEND_GLOBAL_LOG_LEVEL=3
+
+#基础参数，需要模型审视修改
+#网络名称，同目录名称
+Network="2Dattentionunet_ID0120_for_TensorFlow"
+#训练参数
+model="aunet"
+mode="train"
+batch_size=32
+
+#TF2.X独有，不需要修改
+#export NPU_LOOP_SIZE=${train_steps}
+
+#维测参数，precision_mode需要模型审视修改
+precision_mode="allow_fp32_to_fp16"
+#维持参数，以下不需要修改
+over_dump=False
+data_dump_flag=False
+data_dump_step="10"
+profiling=False
+autotune=False
+
+# 帮助信息，不需要修改
+if [[ $1 == --help || $1 == -h ]];then
+    echo"usage:./train_performance_1p.sh <args>"
+    echo " "
+    echo "parameter explain:
+    --precision_mode         precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision)
+    --over_dump		         if or not over detection, default is False
+    --data_dump_flag	     data dump flag, default is False
+    --data_dump_step		 data dump step, default is 10
+    --profiling		         if or not profiling for performance debug, default is False
+    --autotune               whether to enable autotune, default is False
+    --data_path		         source data of training
+    -h/--help		         show help message
+    "
+    exit 1
+fi
+
+#参数校验，不需要修改
+for para in $*
+do
+    if [[ $para == --precision_mode* ]];then
+        precision_mode=`echo ${para#*=}`
+    elif [[ $para == --over_dump* ]];then
+        over_dump=`echo ${para#*=}`
+        over_dump_path=${cur_path}/output/overflow_dump
+        mkdir -p ${over_dump_path}
+    elif [[ $para == --data_dump_flag* ]];then
+        data_dump_flag=`echo ${para#*=}`
+        data_dump_path=${cur_path}/output/data_dump
+        mkdir -p ${data_dump_path}
+    elif [[ $para == --data_dump_step* ]];then
+        data_dump_step=`echo ${para#*=}`
+    elif [[ $para == --profiling* ]];then
+        profiling=`echo ${para#*=}`
+        profiling_dump_path=${cur_path}/output/profiling
+        mkdir -p ${profiling_dump_path}
+    elif [[ $para == --autotune* ]];then
+        autotune=`echo ${para#*=}`
+        mv $install_path/fwkacllib/data/rl/Ascend910/custom $install_path/fwkacllib/data/rl/Ascend910/custom_bak
+        mv $install_path/fwkacllib/data/tiling/Ascend910/custom $install_path/fwkacllib/data/tiling/Ascend910/custom_bak
+        autotune_dump_path=${cur_path}/output/autotune_dump
+        mkdir -p ${autotune_dump_path}/GA
+        mkdir -p ${autotune_dump_path}/rl
+        cp -rf $install_path/fwkacllib/data/tiling/Ascend910/custom ${autotune_dump_path}/GA/
+        cp -rf $install_path/fwkacllib/data/rl/Ascend910/custom ${autotune_dump_path}/RL/
+    elif [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    fi
+done
+
+#校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be confing"
+    exit 1
+fi
+
+
+
+
+
+
+
+
+#训练开始时间，不需要修改
+start_time=$(date +%s)
+
+#进入训练脚本目录，需要模型审视修改
+cd $cur_path/..
+for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++));
+do
+    #设置环境变量，不需要修改
+    echo "Device ID: $ASCEND_DEVICE_ID"
+    export RANK_ID=$RANK_ID
+    
+    
+    
+    #创建DeviceID输出目录，不需要修改
+    if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then
+        rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID}
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    else
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    fi
+
+    # 绑核，不需要的绑核的模型删除，需要的模型审视修改
+    # let a=RANK_ID*12
+    # let b=RANK_ID+1
+    # let c=b*12-1
+    
+    #执行训练脚本，以下传参不需要修改，其他需要模型审视修改
+    #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path，--data_dump_flag，--data_dump_step，--data_dump_path，--profiling，--profiling_dump_path，--autotune
+    nohup python3 mainNPU_v2.py \
+        --model=$model \
+        --mode=$mode \
+        --act=true \
+        --crop_height=112 \
+        --crop_width=112 \
+        --batch_size=${batch_size} \
+        --num_epoch=2 \
+        --data_dir=${data_path} \
+        --precision_mode=${precision_mode} \
+        --over_dump=${over_dump} \
+        --over_dump_path=${over_dump_path} \
+        --data_dump_flag=${data_dump_flag} \
+        --data_dump_step=${data_dump_step} \
+        --data_dump_path=${data_dump_path} \
+        --profiling=${profiling} \
+        --profiling_dump_path=${profiling_dump_path} \
+        --autotune=${autotune} > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+done 
+wait
+
+#训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+#结果打印，不需要修改
+echo "------------------ Final result ------------------"
+#输出性能FPS，需要模型审视修改
+FPS=`grep -a "Final performance FPS" ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F" " '{print $4}'`
+#打印，不需要修改
+echo "Final Performance images/sec : $FPS"
+
+#输出训练精度,需要模型审视修改
+TrainAccuracy=`grep -a "Final accuracy" ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F" " '{print $4}'`
+#打印，不需要修改
+echo "Final Train Accuracy : ${TrainAccuracy}"
+echo "E2E Training Duration sec : $e2e_time"
+
+#看护结果汇总
+#训练用例信息，不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p_RT2_perf'
+
+##获取性能数据
+#吞吐量，不需要修改
+ActualFPS=${FPS}
+#单迭代训练时长，不需要修改
+TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${BatchSize}'*1000/'${FPS}'}'`
+
+#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型审视
+grep -a "Current_Loss" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log | awk '{print $11}' >> $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
+
+#最后一个迭代loss值，不需要修改
+ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainAccuracy = ${TrainAccuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
diff --git a/TensorFlow/built-in/nlp/Textcnn_ID0123_For_Tensorflow/run_cnn_rt.py b/TensorFlow/built-in/nlp/Textcnn_ID0123_For_Tensorflow/run_cnn_rt.py
new file mode 100644
index 0000000000000000000000000000000000000000..f92e36462001d6b7421efc0b59c94c356d9f4b6a
--- /dev/null
+++ b/TensorFlow/built-in/nlp/Textcnn_ID0123_For_Tensorflow/run_cnn_rt.py
@@ -0,0 +1,300 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+from npu_bridge.npu_init import *
+#from npu_bridge import *
+import os
+import sys
+import time
+from datetime import timedelta
+import pickle
+import numpy as np
+import tensorflow as tf
+from sklearn import metrics
+from cnn_model import TCNNConfig, TextCNN
+from data.cnews_loader import read_vocab, read_category, batch_iter, process_file, build_vocab
+import argparse
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--save_dir', dest='save_dir', default='checkpoints/textcnn')
+parser.add_argument('--data_path', dest='data_path', default='./data/cnews', help='path of the dataset')
+parser.add_argument('--precision_mode', dest='precision_mode', default='allow_fp32_to_fp16', help='precision mode')
+parser.add_argument('--over_dump', dest='over_dump', default='False', help='if or not over detection')
+parser.add_argument('--over_dump_path', dest='over_dump_path', default='./overdump', help='over dump path')
+parser.add_argument('--data_dump_flag', dest='data_dump_flag', default='False', help='data dump flag')
+parser.add_argument('--data_dump_step', dest='data_dump_step', default='10', help='data dump step')
+parser.add_argument('--data_dump_path', dest='data_dump_path', default='./datadump', help='data dump path')
+parser.add_argument('--profiling', dest='profiling', default='False', help='if or not profiling for performance debug')
+parser.add_argument('--profiling_dump_path', dest='profiling_dump_path', default='./profiling', help='profiling path')
+parser.add_argument('--autotune', dest='autotune', default='False', help='whether to enable autotune, default is False')
+parser.add_argument('--npu_loss_scale', dest='npu_loss_scale', type=int, default=1)
+parser.add_argument('--mode', dest='mode', default='train', choices=('train', 'test', 'train_and_eval'))
+parser.add_argument('--batch_size', dest='batch_size', type=int, default=64)
+parser.add_argument('--learning_rate', dest='learning_rate', type=float, default=0.001)
+parser.add_argument('--num_epochs', dest='num_epochs', type=int, default=10)
+args = parser.parse_args()
+
+base_dir = args.data_path
+train_dir = os.path.join(base_dir, 'cnews.train.txt')
+test_dir = os.path.join(base_dir, 'cnews.test.txt')
+val_dir = os.path.join(base_dir, 'cnews.val.txt')
+vocab_dir = os.path.join(base_dir, 'cnews.vocab.txt')
+save_dir = args.save_dir
+save_path = os.path.join(save_dir, 'best_validation')
+
+def get_time_dif(start_time):
+    '获取已使用时间'
+    end_time = time.time()
+    time_dif = (end_time - start_time)
+    return timedelta(seconds=int(round(time_dif))), time_dif
+
+def feed_data(x_batch, y_batch, keep_prob):
+    feed_dict = {
+        model.input_x: x_batch,
+        model.input_y: y_batch,
+        model.keep_prob: keep_prob
+    }
+    return feed_dict
+
+
+def evaluate(sess, x,y):
+    """评估在某一数据上的准确率和损失"""    
+    total_loss = 0.0
+    total_acc = 0.0
+    data_len = len(x)
+    batch_train = batch_iter_(x, y,256)
+    for x_batch, y_batch in batch_train:
+        batch_len = len(x_batch)
+        feed_dict = feed_data(x_batch, y_batch, 1.0)
+        (loss, acc) = sess.run([model.loss, model.acc], feed_dict=feed_dict)
+        total_loss += (loss * batch_len)
+        total_acc += (acc * batch_len)
+    return ((total_loss / data_len), (total_acc / data_len))
+class data_load(object):
+    def __init__(self, sess,x,y,is_train=True):
+        
+        with tf.device('/cpu:0'):
+            self.x = x
+            self.y = y
+            self.x_ = tf.placeholder(self.x.dtype, self.x.shape)
+            self.y_ = tf.placeholder(self.y.dtype, self.y.shape)
+            self.sess = sess
+            dataset = tf.data.Dataset.from_tensor_slices((self.x_, self.y_))
+
+            if is_train:
+                dataset = dataset.shuffle(len(self.x))
+                dataset = dataset.repeat()
+                dataset = dataset.batch(len(self.x))
+            else:
+                dataset = dataset.batch(len(self.x))
+            
+            dataset = dataset.prefetch(2)
+            self.iterator = dataset.make_initializable_iterator()
+            self.next = self.iterator.get_next()
+            self.sess.run(self.iterator.initializer, feed_dict={self.x_: self.x,self.y_: self.y})
+        
+    def replay(self):
+        self.sess.run(self.iterator.initializer, feed_dict={self.x_: self.x,self.y_: self.y})
+    
+    
+def batch_iter_(x, y, batch_size=64):
+        data_len = len(x)
+        
+        num_batch = int((data_len - 1) / batch_size) + 1
+        for i in range(num_batch):
+            start_id = i * batch_size
+            end_id = min((i + 1) * batch_size, data_len)
+            yield x[start_id:end_id], y[start_id:end_id]
+def train():
+    print('Configuring TensorBoard and Saver...')
+    tensorboard_dir = 'tensorboard/textcnn'
+    if (not os.path.exists(tensorboard_dir)):
+        os.makedirs(tensorboard_dir)
+    tf.summary.scalar('loss', model.loss)
+    tf.summary.scalar('accuracy', model.acc)
+    merged_summary = tf.summary.merge_all()
+    writer = tf.summary.FileWriter(tensorboard_dir)
+    saver = tf.train.Saver()
+    if (not os.path.exists(save_dir)):
+        os.makedirs(save_dir)
+    print('Loading training and validation data...')
+    start_time = time.time()
+    (x_train, y_train) = process_file(train_dir, word_to_id, cat_to_id, config.seq_length)
+    (x_val, y_val) = process_file(val_dir, word_to_id, cat_to_id, config.seq_length)
+    time_dif = get_time_dif(start_time)
+    print('Time usage:', time_dif)
+
+    ############################ modify for run on npu ###############################
+    from npu_bridge.estimator import npu_ops
+    from tensorflow.core.protobuf.rewriter_config_pb2 import RewriterConfig
+    sess_config = tf.ConfigProto()
+    custom_op = sess_config.graph_options.rewrite_options.custom_optimizers.add()
+    custom_op.name =  "NpuOptimizer"
+    custom_op.parameter_map["use_off_line"].b = True # 必须显示开启，在昇腾AI处理器执行训练
+    sess_config.graph_options.rewrite_options.remapping = RewriterConfig.OFF  # 必须显示关闭remap
+    #custom_op.parameter_map["dynamic_input"].b = True
+    custom_op.parameter_map["precision_mode"].s = tf.compat.as_bytes("allow_mix_precision")
+    #custom_op.parameter_map["dynamic_graph_execute_mode"].s = tf.compat.as_bytes("lazy_recompile")
+    #custom_op.parameter_map["precision_mode"].s = tf.compat.as_bytes(args.precision_mode)
+    if args.data_dump_flag.strip() == "True":
+        custom_op.parameter_map["enable_dump"].b = True
+        custom_op.parameter_map["dump_path"].s = tf.compat.as_bytes(args.data_dump_path)
+        custom_op.parameter_map["dump_step"].s = tf.compat.as_bytes(args.data_dump_step)
+        custom_op.parameter_map["dump_mode"].s = tf.compat.as_bytes("all")
+    if args.over_dump.strip() == "True":
+        # dump_path：dump数据存放路径，该参数指定的目录需要在启动训练的环境上（容器或Host侧）提前创建且确保安装时配置的运行用户具有读写权限
+        custom_op.parameter_map["dump_path"].s = tf.compat.as_bytes(args.over_dump_path)
+        # enable_dump_debug：是否开启溢出检测功能
+        custom_op.parameter_map["enable_dump_debug"].b = True
+        # dump_debug_mode：溢出检测模式，取值：all/aicore_overflow/atomic_overflow
+        custom_op.parameter_map["dump_debug_mode"].s = tf.compat.as_bytes("all")
+    if args.profiling.strip() == "True":
+        custom_op.parameter_map["profiling_mode"].b = False
+        profilingvalue = (
+                '{"output":"%s","training_trace":"on","task_trace":"on","aicpu":"on","fp_point":"","bp_point":""}' % (
+            args.profiling_dump_path))
+        custom_op.parameter_map["profiling_options"].s = tf.compat.as_bytes(profilingvalue)
+    ############################ modify for run on npu ###############################
+    print("CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC Finish")
+    session = tf.Session(config=sess_config)
+    session.run(tf.global_variables_initializer())
+    writer.add_graph(session.graph)
+    train_len = len(x_train)
+    val_len = len(x_val)
+    train_data = data_load(session,x_train,y_train)
+    val = data_load(session,x_val,y_val,False)
+    x_v, y_v = session.run(val.next)
+    tf.io.write_graph(session.graph_def, 'checkpoints', 'train.pbtxt')
+    print('Training and evaluating...')
+    start_time = time.time()
+    data_time = 0
+    total_batch = 0
+    best_acc_val = 0.0
+    last_improved = 0
+    require_improvement = 10000
+    total_feed = 0
+    total_summary = 0
+    total_val = 0
+    total_save = 0
+    total_train = 0
+    flag = False
+    for epoch in range(config.num_epochs):
+        print('Epoch:', (epoch + 1))
+        x, y = session.run(train_data.next)
+        batch_train = batch_iter_(x, y, config.batch_size)
+        for (x_batch, y_batch) in batch_train:
+            feed_dict = feed_data(x_batch, y_batch, config.dropout_keep_prob)
+            #if total_batch % config.save_per_batch == 0:
+                # 每多少轮次将训练结果写入tensorboard scalar
+                #s = session.run(merged_summary, feed_dict=feed_dict)
+                #writer.add_summary(s, total_batch)
+            if ((total_batch % config.print_per_batch) == 0):
+                feed_dict[model.keep_prob] = 1.0
+                (loss_train, acc_train) = session.run([model.loss, model.acc], feed_dict=feed_dict)
+                (loss_val, acc_val) = evaluate(session, x_v, y_v)
+                if (acc_val > best_acc_val):
+                    best_acc_val = acc_val
+                    last_improved = total_batch
+                    saver.save(sess=session, save_path=save_path)
+                    improved_str = '*'
+                else:
+                    improved_str = ''
+                time_dif, time_sec = get_time_dif(start_time)
+                msg = ('Iter: {0:>6}, Train Loss: {1:>6.2}, Train Acc: {2:>7.2%},' + ' Val Loss: {3:>6.2}, Val Acc: {4:>7.2%}, Time: {5} {6} ({7})')
+                print(msg.format(total_batch, loss_train, acc_train, loss_val, acc_val, time_dif, improved_str, time_sec))
+            feed_dict[model.keep_prob] = config.dropout_keep_prob
+            session.run(model.optim, feed_dict=feed_dict)
+            #time_dif = get_time_dif(start_time)
+            #print("step:%d, time:%s"%(total_batch, time_dif))
+            total_batch += 1
+            if ((total_batch - last_improved) > require_improvement):
+                # 验证集正确率长期不提升，提前结束训练
+                print('No optimization for a long time, auto-stopping...')
+                flag = True
+                break  # 跳出循环
+        if flag:
+            break
+
+def test():
+    print('Loading test data...')
+    
+    x_test, y_test = process_file(test_dir, word_to_id, cat_to_id, config.seq_length)
+    sess_config = tf.ConfigProto()
+    custom_op = sess_config.graph_options.rewrite_options.custom_optimizers.add()
+    custom_op.name = "NpuOptimizer"
+    sess_config.graph_options.rewrite_options.remapping = RewriterConfig.OFF  # 必须显示关闭remap
+    #custom_op.parameter_map["dynamic_input"].b = True
+    custom_op.parameter_map["precision_mode"].s = tf.compat.as_bytes("allow_mix_precision")
+    #custom_op.parameter_map["dynamic_graph_execute_mode"].s = tf.compat.as_bytes("lazy_recompile")
+    session = tf.Session(config=sess_config)
+    session.run(tf.global_variables_initializer())
+    saver = tf.train.Saver()
+    saver.restore(sess=session, save_path=save_path)
+    start_time = time.time()
+    print('Testing...')
+    (loss_test, acc_test) = evaluate(session, x_test, y_test)
+    msg = 'Test Loss: {0:>6.2}, Test Acc: {1:>7.2%}'
+    print(msg.format(loss_test, acc_test))
+    batch_size = 256
+    data_len = len(x_test)
+    num_batch = (int(((data_len - 1) / batch_size)) + 1)
+    y_test_cls = np.argmax(y_test, 1)
+    y_pred_cls = np.zeros(shape=len(x_test), dtype=np.int32)
+    for i in range(num_batch):
+        start_id = (i * batch_size)
+        end_id = min(((i + 1) * batch_size), data_len)
+        feed_dict = {model.input_x: x_test[start_id:end_id], model.keep_prob: 1.0}
+        y_pred_cls[start_id:end_id] = session.run(model.y_pred_cls, feed_dict=feed_dict)
+    print('Precision, Recall and F1-Score...')
+    print(metrics.classification_report(y_test_cls, y_pred_cls, target_names=categories))
+    print('Confusion Matrix...')
+    cm = metrics.confusion_matrix(y_test_cls, y_pred_cls)
+    print(cm)
+    time_dif = get_time_dif(start_time)
+    print('Time usage:', time_dif)
+if (__name__ == '__main__'):
+    print('Configuring CNN model...')
+    config = TCNNConfig()
+    config.learning_rate = args.learning_rate
+    config.batch_size = args.batch_size
+    config.num_epochs = args.num_epochs
+    config.npu_loss_scale = args.npu_loss_scale
+    if (not os.path.exists(vocab_dir)):
+        build_vocab(train_dir, vocab_dir, config.vocab_size)
+    (categories, cat_to_id) = read_category()
+    (words, word_to_id) = read_vocab(vocab_dir)
+    config.vocab_size = len(words)
+    model = TextCNN(config)
+    if (args.mode == 'train'):
+        train()
+    elif (args.mode == 'test'):
+        test()
+    else:
+        train()
+        test()
\ No newline at end of file
diff --git a/TensorFlow/built-in/nlp/Textcnn_ID0123_For_Tensorflow/test/train_RT2_performance_1p.sh b/TensorFlow/built-in/nlp/Textcnn_ID0123_For_Tensorflow/test/train_RT2_performance_1p.sh
new file mode 100644
index 0000000000000000000000000000000000000000..a4e4401a59d591557ec88f7b1467067ab20dd110
--- /dev/null
+++ b/TensorFlow/built-in/nlp/Textcnn_ID0123_For_Tensorflow/test/train_RT2_performance_1p.sh
@@ -0,0 +1,180 @@
+#!/bin/bash
+
+#当前路径,不需要修改
+cur_path=`pwd`
+
+#集合通信参数,不需要修改
+
+export RANK_SIZE=1
+export JOB_ID=10087
+export ENABLE_RUNTIME_V2=1
+RANK_ID_START=0
+
+
+# 数据集路径,保持为空,不需要修改
+data_path=""
+
+#基础参数，需要模型审视修改
+#网络名称，同目录名称
+Network="Textcnn_ID0123_For_Tensorflow"
+#训练epoch
+train_epochs=10
+#训练batch_size
+batch_size=512
+#学习率
+learning_rate=0.001
+#训练模式
+mode="train"
+npu_loss_scale=1
+
+#维测参数，precision_mode需要模型审视修改
+precision_mode="allow_fp32_to_fp16"
+#维持参数，以下不需要修改
+over_dump=False
+data_dump_flag=False
+data_dump_step="10"
+profiling=False
+
+# 帮助信息，不需要修改
+if [[ $1 == --help || $1 == -h ]];then
+    echo"usage:./train_performance_1P.sh <args>"
+    echo " "
+    echo "parameter explain:
+    --precision_mode         precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision)
+    --over_dump		           if or not over detection, default is False
+    --data_dump_flag		     data dump flag, default is False
+    --data_dump_step		     data dump step, default is 10
+    --profiling		           if or not profiling for performance debug, default is False
+    --data_path		           source data of training
+    -h/--help		             show help message
+    "
+    exit 1
+fi
+
+#参数校验，不需要修改
+for para in $*
+do
+    if [[ $para == --precision_mode* ]];then
+        precision_mode=`echo ${para#*=}`
+    elif [[ $para == --over_dump* ]];then
+        over_dump=`echo ${para#*=}`
+        over_dump_path=${cur_path}/output/overflow_dump
+        mkdir -p ${over_dump_path}
+    elif [[ $para == --data_dump_flag* ]];then
+        data_dump_flag=`echo ${para#*=}`
+        data_dump_path=${cur_path}/output/data_dump
+        mkdir -p ${data_dump_path}
+    elif [[ $para == --data_dump_step* ]];then
+        data_dump_step=`echo ${para#*=}`
+    elif [[ $para == --profiling* ]];then
+        profiling=`echo ${para#*=}`
+        profiling_dump_path=${cur_path}/output/profiling
+        mkdir -p ${profiling_dump_path}
+    elif [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    elif [[ $para == --learning_rate* ]];then
+        learning_rate=`echo ${para#*=}`
+    elif [[ $para == --mode* ]];then
+        mode=`echo ${para#*=}`
+    elif [[ $para == --npu_loss_scale* ]];then
+        npu_loss_scale=`echo ${para#*=}`
+    fi
+done
+
+#校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be confing"
+    exit 1
+fi
+
+#训练开始时间，不需要修改
+start_time=$(date +%s)
+
+#进入训练脚本目录，需要模型审视修改
+cd $cur_path/..
+for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++));
+do
+    #设置环境变量，不需要修改
+    echo "Device ID: $ASCEND_DEVICE_ID"
+    export RANK_ID=$RANK_ID
+
+
+
+    #创建DeviceID输出目录，不需要修改
+    if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then
+        rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID}
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    else
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    fi
+    
+    #执行训练脚本，以下传参不需要修改，其他需要模型审视修改
+    #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path，--data_dump_flag，--data_dump_step，--data_dump_path，--profiling，--profiling_dump_path
+    nohup python3 run_cnn_rt.py \
+        --mode=${mode} \
+        --data_path=${data_path} \
+        --num_epochs=${train_epochs} \
+        --save_dir=${cur_path}/output/$ASCEND_DEVICE_ID/ckpt \
+        --learning_rate=${learning_rate} \
+        --precision_mode=${precision_mode} \
+        --over_dump=${over_dump} \
+        --over_dump_path=${over_dump_path} \
+        --data_dump_flag=${data_dump_flag} \
+        --data_dump_step=${data_dump_step} \
+        --data_dump_path=${data_dump_path} \
+        --batch_size=${batch_size} \
+        --profiling=${profiling} \
+        --npu_loss_scale=${npu_loss_scale} \
+        --profiling_dump_path=${profiling_dump_path}  > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+done 
+wait
+
+#训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+#结果打印，不需要修改
+echo "------------------ Final result ------------------"
+#输出性能FPS，需要模型审视修改
+time=(`grep -r "Time: " $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F '(' '{print $NF}' | cut -d ')' -f 1`)
+i=${#time[*]}
+train_time=`echo "${time[i-1]} ${time[1]} $i"|awk '{print ($1-$2)*10/($3-2)}'`
+FPS=`echo "$batch_size $train_time"|awk '{print $1*1000/$2}'`
+#打印，不需要修改
+echo "Final Performance images/sec : $FPS"
+
+#输出训练精度,需要模型审视修改
+train_accuracy=""
+#打印，不需要修改
+echo "Final Train Accuracy : ${train_accuracy}"
+echo "E2E Training Duration sec : $e2e_time"
+
+#性能看护结果汇总
+#训练用例信息，不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p_RT2_perf'
+
+##获取性能数据，不需要修改
+#吞吐量
+ActualFPS=${FPS}
+#单迭代训练时长
+TrainingTime=$train_time
+
+#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型审视
+grep "Train Loss:" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk '{print $5}'|cut -d ',' -f 1 >> $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
+
+#最后一个迭代loss值，不需要修改
+ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+#echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
diff --git a/TensorFlow/contrib/cv/senet_ID0145_for_TensorFlow/run_npu_rt.py b/TensorFlow/contrib/cv/senet_ID0145_for_TensorFlow/run_npu_rt.py
new file mode 100644
index 0000000000000000000000000000000000000000..2cfcc594807c7add49ac396c35e4b536a0602190
--- /dev/null
+++ b/TensorFlow/contrib/cv/senet_ID0145_for_TensorFlow/run_npu_rt.py
@@ -0,0 +1,213 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='MAIN')
+    parser.add_argument('--total_epochs',type=int,default=160,help='total_epochs')
+    parser.add_argument('--test_iteration',type=int,default=10,help='test_iteration')
+    parser.add_argument('--iteration',type=int,default=391,help='iteration')
+    parser.add_argument('--data_path',type=str,help='data_path')
+    args = parser.parse_args()
+    return args
+
+args = parse_args()
+
+weight_decay = 0.0001
+momentum = 0.9
+init_learning_rate = 0.01
+batch_size = 128
+iteration = args.iteration
+# 128 * 391 ~ 50,000
+total_epochs = args.total_epochs
+test_iteration = args.test_iteration
+data_dir = args.data_path
+
+
+
+if __name__ == '__main__':
+    def Evaluate(sess):
+        test_acc = 0.0
+        test_loss = 0.0
+        test_pre_index = 0
+        add = 1000
+    
+        for it in range(test_iteration):
+            test_batch_x = test_x[test_pre_index: test_pre_index + add]
+            test_batch_y = test_y[test_pre_index: test_pre_index + add]
+            test_pre_index = test_pre_index + add
+    
+            test_feed_dict = {
+                x: test_batch_x,
+                label: test_batch_y,
+                learning_rate: epoch_learning_rate,
+                training_flag: False
+            }
+    
+            loss_, acc_ = sess.run([cost, accuracy], feed_dict=test_feed_dict)
+    
+            test_loss += loss_
+            test_acc += acc_
+    
+        test_loss /= test_iteration  # average loss
+        test_acc /= test_iteration  # average accuracy
+    
+        summary = tf.Summary(value=[tf.Summary.Value(tag='test_loss', simple_value=test_loss),
+                                    tf.Summary.Value(tag='test_accuracy', simple_value=test_acc)])
+    
+        return test_acc, test_loss, summary
+    import os
+    from tensorflow.core.protobuf.rewriter_config_pb2 import RewriterConfig
+    from seresnetv2 import seresnet_v2
+    from cifar10 import *
+    import tensorflow as tf
+    from npu_bridge.npu_init import *
+    config = tf.ConfigProto()
+    custom_op = config.graph_options.rewrite_options.custom_optimizers.add()
+    custom_op.name = "NpuOptimizer"
+    custom_op.parameter_map["precision_mode"].s = tf.compat.as_bytes("allow_mix_precision")
+    #custom_op.parameter_map["dynamic_input"].b = True
+    #custom_op.parameter_map["dynamic_graph_execute_mode"].s = tf.compat.as_bytes("lazy_recompile")
+    custom_op.parameter_map["use_off_line"].b = True  # 必须显式开启，在昇腾AI处理器执行训练
+    config.graph_options.rewrite_options.remapping = RewriterConfig.OFF  # 必须显式关闭remap
+    config.graph_options.rewrite_options.memory_optimization = RewriterConfig.OFF
+    
+    #溢出检测
+    #custom_op.parameter_map["dump_path"].s = tf.compat.as_bytes("/home/HwHiAiUser/output")
+    #custom_op.parameter_map["enable_dump_debug"].b = True
+    #custom_op.parameter_map["dump_debug_mode"].s = tf.compat.as_bytes("all")
+    
+    train_x, train_y, test_x, test_y = prepare_data()
+    train_x, test_x = color_preprocessing(train_x, test_x)
+    
+    # image_size = 32, img_channels = 3, class_num = 10 in cifar10
+    x = tf.compat.v1.placeholder(tf.float32, shape=[None, image_size, image_size, img_channels])
+    label = tf.compat.v1.placeholder(tf.float32, shape=[None, class_num])
+    training_flag = tf.placeholder(tf.bool)
+    learning_rate = tf.placeholder(tf.float32, name='learning_rate')
+    
+    #模型
+    logits = seresnet_v2(x, 110)
+    
+    
+    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=label, logits=logits))
+    l2_loss = tf.add_n([tf.nn.l2_loss(var) for var in tf.trainable_variables()])
+    
+    
+    #loss—scale
+    loss_scale_manager = ExponentialUpdateLossScaleManager(init_loss_scale=2**32, incr_every_n_steps=1000, decr_every_n_nan_or_inf=2, decr_ratio=0.5)
+    opt_tmp = npu_tf_optimizer(
+         tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=momentum, use_nesterov=True))
+    optimizer = NPULossScaleOptimizer(opt_tmp, loss_scale_manager)
+    
+    #optimizer = npu_tf_optimizer(
+    #    tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=momentum, use_nesterov=True))
+    
+    train = optimizer.minimize(cost + l2_loss * weight_decay)
+    correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(label, 1))
+    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
+    saver = tf.train.Saver(tf.global_variables())
+    
+    
+    
+    
+    with tf.Session(config=config) as sess:
+        ckpt = tf.train.get_checkpoint_state('./model')
+        if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path):
+            saver.restore(sess, ckpt.model_checkpoint_path)
+        else:
+            sess.run(tf.global_variables_initializer())
+    
+        summary_writer = tf.summary.FileWriter('./logs', sess.graph)
+    
+        epoch_learning_rate = init_learning_rate
+        for epoch in range(1, total_epochs + 1):
+            if epoch == 2:
+                epoch_learning_rate = 0.1
+    
+            if epoch == 80:
+                epoch_learning_rate = 0.01
+    
+            if epoch == 120:
+                epoch_learning_rate = 0.001
+    
+    
+    
+            pre_index = 0
+            train_acc = 0.0
+            train_loss = 0.0
+    
+            for step in range(1, iteration + 1):
+                start = time.time()
+                if pre_index + batch_size < 50000:
+                    batch_x = train_x[pre_index: pre_index + batch_size]
+                    batch_y = train_y[pre_index: pre_index + batch_size]
+                else:
+                    batch_x = train_x[pre_index:]
+                    batch_y = train_y[pre_index:]
+    
+                batch_x = data_augmentation(batch_x)
+    
+                train_feed_dict = {
+                    x: batch_x,
+                    label: batch_y,
+                    learning_rate: epoch_learning_rate,
+                    training_flag: True
+                }
+    
+                _, batch_loss = sess.run([train, cost], feed_dict=train_feed_dict)
+                batch_acc = accuracy.eval(feed_dict=train_feed_dict)
+                step_time = time.time() - start
+                train_loss += batch_loss
+                train_acc += batch_acc
+                pre_index += batch_size
+                print(step_time)
+            train_loss /= iteration  # average loss
+            train_acc /= iteration  # average accuracy
+    
+            train_summary = tf.Summary(value=[tf.Summary.Value(tag='train_loss', simple_value=train_loss),
+                                              tf.Summary.Value(tag='train_accuracy', simple_value=train_acc)])
+    
+            #test_acc, test_loss, test_summary = Evaluate(sess)
+    
+            summary_writer.add_summary(summary=train_summary, global_step=epoch)
+            #summary_writer.add_summary(summary=test_summary, global_step=epoch)
+            summary_writer.flush()
+    
+            line = "epoch: %d/%d, train_loss: %.4f, train_acc: %.4f" % (
+                epoch, total_epochs, train_loss, train_acc)
+            print(line)
+    
+            with open('logs.txt', 'a') as f:
+                f.write(line)
+    
+            saver.save(sess=sess, save_path='model/senet110.ckpt')
+
+
+
diff --git a/TensorFlow/contrib/cv/senet_ID0145_for_TensorFlow/test/train_RT2_performance_1p.sh b/TensorFlow/contrib/cv/senet_ID0145_for_TensorFlow/test/train_RT2_performance_1p.sh
new file mode 100644
index 0000000000000000000000000000000000000000..d9b49b4d682f0d3a81bc9fe44a1d616d7fea5274
--- /dev/null
+++ b/TensorFlow/contrib/cv/senet_ID0145_for_TensorFlow/test/train_RT2_performance_1p.sh
@@ -0,0 +1,112 @@
+#!/bin/bash
+
+
+export RANK_SIZE=1
+export JOB_ID=10087
+export RANK_ID_START=0
+export ENABLE_RUNTIME_V2=1
+
+cur_path=`pwd`
+data_path=''
+ckpt_path=''
+Network='senet_ID0145_for_TensorFlow'
+batch_size=128
+total_epochs=1
+test_iteration=1
+iteration=10
+# train_performance_1p.sh perf
+# train_full_1p.sh acc
+CaseName="${Network}_bs${batch_size}_${RANK_SIZE}p_RT2_perf"
+
+
+if [[ $1 == --help || $1 == -h ]];then
+    echo"usage:./train_performance_1p.sh <args>"
+    echo " "
+    echo "parameter explain:
+    --precision_mode         precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision)
+    --over_dump		           if or not over detection, default is False
+    --data_dump_flag		     data dump flag, default is False
+    --data_dump_step		     data dump step, default is 10
+    --profiling		           if or not profiling for performance debug, default is False
+    --data_path		           source data of training
+    -h/--help		             show help message
+    "
+    exit 1
+fi
+for para in $*
+do
+    if [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+        echo "${data_path}"
+    elif [[ $para == --ckpt_path* ]];then
+        ckpt_path=`echo ${para#*=}`
+        echo "${ckpt_path}"
+    elif [[ $para == --total_epochs* ]];then
+        total_epochs=`echo ${para#*=}`
+        echo "${total_epochs}"
+    elif [[ $para == --test_iteration* ]];then
+        test_iteration=`echo ${para#*=}`
+        echo "${test_iteration}"
+    fi
+done
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be confing"
+    exit 1
+fi
+
+cd $cur_path/../
+# START
+start_time=$(date +%s)
+for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++));
+do
+    echo "Device ID: $ASCEND_DEVICE_ID"
+    export RANK_ID=$RANK_ID
+    if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then
+        rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID}
+        mkdir -p ${cur_path}/output/${ASCEND_DEVICE_ID}/ckpt
+    else
+        mkdir -p ${cur_path}/output/${ASCEND_DEVICE_ID}/ckpt
+    fi
+    nohup python3 run_npu_rt.py \
+        --data_path=${data_path}/cifar-10-batches-py \
+        --iteration=${iteration} \
+        --total_epochs=${total_epochs} \
+        --test_iteration=${test_iteration} > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+    # wait
+    # nohup python3 eval.py \
+    #    --data_path=${data_path}/cifar-10-batches-py >> ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+done 
+wait
+end_time=$(date +%s)
+e2e_time=$(( ${end_time} - ${start_time} ))
+
+
+echo "------------------ Final result ------------------"
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+# getFPS
+sec_per_step=`grep '^0.' ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | tail -n 1`
+FPS=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'/'${sec_per_step}'}'`
+ActualFPS=${FPS}
+TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${BatchSize}'/'${FPS}'}'`
+# getAcc
+# train_accuracy=`grep 'test_acc' ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F'test_acc: ' 'END{print $2}'`
+train_accuracy='None'
+# getLoss
+grep train_loss ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F'train_loss: ' '{print $2}' | awk -F',' '{print $1}' > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${CaseName}_loss.txt
+ActualLoss=`awk 'END {print}' ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${CaseName}_loss.txt`
+echo "Final Performance images/sec : ${FPS}"
+echo "Final Train Accuracy : ${train_accuracy}"
+echo "E2E Training Duration sec : ${e2e_time}"
+
+
+echo "Network = ${Network}"                  > ${cur_path}/output/${ASCEND_DEVICE_ID}/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}"              >> ${cur_path}/output/${ASCEND_DEVICE_ID}/${CaseName}.log
+echo "BatchSize = ${BatchSize}"             >> ${cur_path}/output/${ASCEND_DEVICE_ID}/${CaseName}.log
+echo "DeviceType = ${DeviceType}"           >> ${cur_path}/output/${ASCEND_DEVICE_ID}/${CaseName}.log
+echo "CaseName = ${CaseName}"               >> ${cur_path}/output/${ASCEND_DEVICE_ID}/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}"             >> ${cur_path}/output/${ASCEND_DEVICE_ID}/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}"       >> ${cur_path}/output/${ASCEND_DEVICE_ID}/${CaseName}.log
+echo "TrainAccuracy = ${train_accuracy}"    >> ${cur_path}/output/${ASCEND_DEVICE_ID}/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}"           >> ${cur_path}/output/${ASCEND_DEVICE_ID}/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}"        >> ${cur_path}/output/${ASCEND_DEVICE_ID}/${CaseName}.log
diff --git a/TensorFlow2/built-in/cv/detection/Swin-Transformer_ID2374_for_TensorFlow2.X/test/train_RT2_performance_1p.sh b/TensorFlow2/built-in/cv/detection/Swin-Transformer_ID2374_for_TensorFlow2.X/test/train_RT2_performance_1p.sh
new file mode 100644
index 0000000000000000000000000000000000000000..73abdafd3b9db6f989b7afd29f682d93f6d25112
--- /dev/null
+++ b/TensorFlow2/built-in/cv/detection/Swin-Transformer_ID2374_for_TensorFlow2.X/test/train_RT2_performance_1p.sh
@@ -0,0 +1,163 @@
+#!/bin/bash
+
+export ENABLE_RUNTIME_V2=1
+cur_path=`pwd`/../
+#失败用例打屏
+#export ASCEND_SLOG_PRINT_TO_STDOUT=1
+# export HYBRID_PROFILING_LEVEL=1
+#基础参数，需要模型审视修改
+#Batch Size
+batch_size=128
+#网络名称，同目录名称
+Network="Swin-Transformer_ID2374_for_TensorFlow2.X"
+#Device数量，单卡默认为1
+RankSize=1
+#训练epoch，可选
+train_epochs=3
+#训练step
+train_steps=
+#学习率
+learning_rate=0.01
+
+############维测参数##############
+precision_mode="allow_mix_precision"
+#维持参数，以下不需要修改
+over_dump=False
+if [[ $over_dump == True ]];then
+    over_dump_path=$cur_path/test/overflow_dump #此处cur_path为代码根目录
+    mkdir -p ${over_dump_path}
+fi
+data_dump_flag=False
+data_dump_step="10"
+profiling=False
+use_mixlist=False
+mixlist_file="./configs/ops_info.json"
+fusion_off_flag=False
+fusion_off_file="./configs/fusion_switch.cfg"
+############维测参数##############
+
+#参数配置
+data_path=""
+
+if [[ $1 == --help || $1 == --h ]];then
+   echo "usage:./train_performance_1p.sh "
+   echo " "
+   echo "parameter explain:
+    --precision_mode         precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision)
+   --over_dump		         if or not over detection, default is False
+   --data_dump_flag	     data dump flag, default is False
+   --data_dump_step		 data dump step, default is 10
+   --profiling		         if or not profiling for performance debug, default is False
+   --data_path		         source data of training
+   -h/--help		         show help message
+   "
+   exit 1
+fi
+
+for para in $*
+do
+    if [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    elif [[ $para == --precision_mode* ]];then
+        precision_mode=`echo ${para#*=}`
+    elif [[ $para == --over_dump* ]];then
+        over_dump=`echo ${para#*=}`
+        over_dump_path=${cur_path}/output/overflow_dump
+        mkdir -p ${over_dump_path}
+    elif [[ $para == --data_dump_flag* ]];then
+        data_dump_flag=`echo ${para#*=}`
+        data_dump_path=${cur_path}/output/data_dump
+        mkdir -p ${data_dump_path}
+    elif [[ $para == --data_dump_step* ]];then
+        data_dump_step=`echo ${para#*=}`
+    elif [[ $para == --profiling* ]];then
+        profiling=`echo ${para#*=}`
+        profiling_dump_path=${cur_path}/output/profiling
+        mkdir -p ${profiling_dump_path}
+    elif [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    elif [[ $para == --use_mixlist* ]];then
+        use_mixlist=`echo ${para#*=}`
+    elif [[ $para == --mixlist_file* ]];then
+        mixlist_file=`echo ${para#*=}`
+    elif [[ $para == --fusion_off_flag* ]];then
+        fusion_off_flag=`echo ${para#*=}`
+    elif [[ $para == --fusion_off_file* ]];then
+        fusion_off_file=`echo ${para#*=}`
+    fi
+done
+
+if [[ $data_path  == "" ]];then
+   echo "[Error] para \"data_path\" must be config"
+   exit 1
+fi
+##############执行训练##########
+cd $cur_path
+if [ -d $cur_path/test/output ];then
+   rm -rf $cur_path/test/output/*
+   mkdir -p $cur_path/test/output/$ASCEND_DEVICE_ID
+else
+   mkdir -p $cur_path/test/output/$ASCEND_DEVICE_ID
+fi
+wait
+
+#拷贝并修改数据集txt文件
+cp -r ${data_path}/cifar-100-python /root/.keras/datasets/
+
+start=$(date +%s)
+nohup python3 swin_transformers.py --epochs=${train_epochs} \
+        --precision_mode=${precision_mode} \
+        --over_dump=${over_dump} \
+        --over_dump_path=${over_dump_path} \
+        --data_dump_flag=${data_dump_flag} \
+        --data_dump_step=${data_dump_step} \
+        --data_dump_path=${data_dump_path} \
+        --profiling=${profiling} \
+        --use_mixlist=${use_mixlist} \
+        --fusion_off_flag=${fusion_off_flag} \
+        --mixlist_file=${mixlist_file} \
+        --fusion_off_file=${fusion_off_file} \
+        --profiling_dump_path=${profiling_dump_path}} > $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 &
+wait
+end=$(date +%s)
+e2etime=$(( $end - $start ))
+
+#echo "Final Performance ms/step : $average_perf"
+echo "Final Training Duration sec : $e2etime"
+
+#结果打印，不需要修改
+echo "------------------ Final result ------------------"
+#输出性能FPS，需要模型审视修改
+TrainingTime=`grep "352/352" $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F '352/352' '{print $2}'|grep -v 'ETA'|grep 'loss:'|awk 'END {print $4}'|cut -d 'm' -f -1`
+
+
+#性能看护结果汇总
+#训练用例信息，不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p_RT2_perf'
+
+##获取性能数据，不需要修改
+#吞吐量
+ActualFPS=`awk 'BEGIN{printf "%.2f\n", '1000'*'${batch_size}'/'${TrainingTime}'}'`
+
+#获取模型精度
+train_accuracy=`grep "352/352" $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F '352/352' '{print $2}'|grep 'loss:'|awk 'END {print $10}'`
+
+#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型审视
+grep "352/352" $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F '352/352' '{print $2}'|grep -v 'ETA'|grep 'loss:'|awk '{print $7}' > $cur_path/test/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
+
+#最后一个迭代loss值，不需要修改
+ActualLoss=`awk 'END {print}' $cur_path/test/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2etime}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
\ No newline at end of file
diff --git a/TensorFlow2/built-in/cv/detection/Swin-Transformer_ID2374_for_TensorFlow2.X/test/train_RT2_performance_8p.sh b/TensorFlow2/built-in/cv/detection/Swin-Transformer_ID2374_for_TensorFlow2.X/test/train_RT2_performance_8p.sh
new file mode 100644
index 0000000000000000000000000000000000000000..558a4a602b4eb2e4e5d8f5d9ac8158f77f6a97be
--- /dev/null
+++ b/TensorFlow2/built-in/cv/detection/Swin-Transformer_ID2374_for_TensorFlow2.X/test/train_RT2_performance_8p.sh
@@ -0,0 +1,185 @@
+#!/bin/bash
+
+export ENABLE_RUNTIME_V2=1
+cur_path=`pwd`/..
+#失败用例打屏
+
+#export DUMP_GRAPH_PATH=/home/dump_graph
+#export DUMP_GE_GRAPH=2
+#export DUMP_GRAPH_LEVEL=3
+
+export ASCEND_GLOBAL_LOG_LEVEL=3
+/usr/local/Ascend/driver/tools/msnpureport -g ERROR -d 0
+/usr/local/Ascend/driver/tools/msnpureport -g ERROR -d 1
+/usr/local/Ascend/driver/tools/msnpureport -g ERROR -d 2
+/usr/local/Ascend/driver/tools/msnpureport -g ERROR -d 3
+/usr/local/Ascend/driver/tools/msnpureport -g ERROR -d 4
+/usr/local/Ascend/driver/tools/msnpureport -g ERROR -d 5
+/usr/local/Ascend/driver/tools/msnpureport -g ERROR -d 6
+/usr/local/Ascend/driver/tools/msnpureport -g ERROR -d 7
+
+export RANK_SIZE=8
+export RANK_TABLE_FILE=${cur_path}/test/rank_table_8p.json
+export JOB_ID=10087
+RANK_ID_START=0
+#export ASCEND_DEVICE_ID=1
+#export PYTHONWARNINGS='ignore:semaphore_tracker:UserWarning'
+#基础参数，需要模型审视修改
+#Batch Size
+batch_size=1024
+#网络名称，同目录名称
+Network="Swin-Transformer_ID2374_for_TensorFlow2.X"
+#Device数量，单卡默认为1
+#RankSize=1
+#训练epoch，可选
+train_epochs=5
+#训练step
+train_steps=
+#学习率
+learning_rate=0.01
+
+#参数配置
+data_path="1"
+
+############维测参数##############
+precision_mode="allow_mix_precision"
+#维持参数，以下不需要修改
+over_dump=False
+if [[ $over_dump == True ]];then
+    over_dump_path=$cur_path/test/overflow_dump #此处cur_path为代码根目录
+    mkdir -p ${over_dump_path}
+fi
+data_dump_flag=False
+data_dump_step="10"
+profiling=False
+use_mixlist=False
+mixlist_file="./configs/ops_info.json"
+fusion_off_flag=False
+fusion_off_file="./configs/fusion_switch.cfg"
+############维测参数##############
+
+if [[ $1 == --help || $1 == --h ]];then
+   echo "usage:./train_performance_1p.sh "
+   exit 1
+fi
+
+for para in $*
+do
+    if [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    elif [[ $para == --precision_mode* ]];then
+        precision_mode=`echo ${para#*=}`
+    elif [[ $para == --over_dump* ]];then
+        over_dump=`echo ${para#*=}`
+        over_dump_path=${cur_path}/output/overflow_dump
+        mkdir -p ${over_dump_path}
+    elif [[ $para == --data_dump_flag* ]];then
+        data_dump_flag=`echo ${para#*=}`
+        data_dump_path=${cur_path}/output/data_dump
+        mkdir -p ${data_dump_path}
+    elif [[ $para == --data_dump_step* ]];then
+        data_dump_step=`echo ${para#*=}`
+    elif [[ $para == --profiling* ]];then
+        profiling=`echo ${para#*=}`
+        profiling_dump_path=${cur_path}/output/profiling
+        mkdir -p ${profiling_dump_path}
+    elif [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    elif [[ $para == --use_mixlist* ]];then
+        use_mixlist=`echo ${para#*=}`
+    elif [[ $para == --mixlist_file* ]];then
+        mixlist_file=`echo ${para#*=}`
+    elif [[ $para == --fusion_off_flag* ]];then
+        fusion_off_flag=`echo ${para#*=}`
+    elif [[ $para == --fusion_off_file* ]];then
+        fusion_off_file=`echo ${para#*=}`
+    fi
+done
+
+if [[ $data_path  == "" ]];then
+   echo "[Error] para \"data_path\" must be config"
+   exit 1
+fi
+##############执行训练##########
+cd $cur_path
+
+#拷贝并修改数据集txt文件
+cp -r ${data_path}/cifar-100-python /root/.keras/datasets/
+
+start=$(date +%s)
+for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++));
+do
+    #设置环境变量，不需要修改
+    echo "Device ID: $RANK_ID"
+    export RANK_ID=$RANK_ID
+    export ASCEND_DEVICE_ID=$RANK_ID
+    ASCEND_DEVICE_ID=$RANK_ID
+    
+
+    if [ -d ${cur_path}/test/output/${ASCEND_DEVICE_ID} ];then
+        rm -rf ${cur_path}/test/output/${ASCEND_DEVICE_ID}
+        mkdir -p ${cur_path}/test/output/$ASCEND_DEVICE_ID/ckpt_${learning_rate}
+    else
+        mkdir -p ${cur_path}/test/output/$ASCEND_DEVICE_ID/ckpt_${learning_rate}
+    fi
+    
+#    export DUMP_GRAPH_PATH=test/output/${RANK_ID}/dump_graph_${RANK_ID}
+       
+    nohup python3 swin_transformers.py --epochs=${train_epochs}  --batch_size=${batch_size} \
+        --rank_size=${RANK_SIZE} \
+        --device_id=${RANK_ID} \
+        --precision_mode=${precision_mode} \
+        --over_dump=${over_dump} \
+        --over_dump_path=${over_dump_path} \
+        --data_dump_flag=${data_dump_flag} \
+        --data_dump_step=${data_dump_step} \
+        --data_dump_path=${data_dump_path} \
+        --profiling=${profiling} \
+        --use_mixlist=${use_mixlist} \
+        --fusion_off_flag=${fusion_off_flag} \
+        --mixlist_file=${mixlist_file} \
+        --fusion_off_file=${fusion_off_file} \
+        --profiling_dump_path=${profiling_dump_path}} > $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 &
+done
+wait
+end=$(date +%s)
+e2etime=$(( $end - $start ))
+
+#echo "Final Performance ms/step : $average_perf"
+echo "Final Training Duration sec : $e2etime"
+
+#结果打印，不需要修改
+echo "------------------ Final result ------------------"
+#输出性能FPS，需要模型审视修改
+TrainingTime=`grep "44/44" $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F '44/44' '{print $2}'|grep -v 'ETA'|grep 'loss:'|awk 'END {print $4}'|cut -d 'm' -f -1`
+
+###下面字段用于冒烟看护
+BatchSize=${batch_size}
+#设备类型，自动获取
+DeviceType=`uname -m`
+#用例名称，自动获取
+CaseName=${Network}_bs${BatchSize}_${RankSize}'p_RT2_perf'
+
+#吞吐量
+ActualFPS=`awk 'BEGIN{printf "%.2f\n", '1000'*'${batch_size}'/'${TrainingTime}'}'`
+
+#获取模型精度
+train_accuracy=`grep "44/44" $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F '44/44' '{print $2}'|grep 'loss:'|awk 'END {print $10}'`
+
+#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型审视
+grep "44/44" $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F '44/44' '{print $2}'|grep -v 'ETA'|grep 'loss:'|awk '{print $7}' > $cur_path/test/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
+
+#最后一个迭代loss值，不需要修改
+ActualLoss=`awk 'END {print}' $cur_path/test/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2etime}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log