From 8e653e9f4ab3730857580d0e7b0a149385710b08 Mon Sep 17 00:00:00 2001
From: ykxia <xiayankang@outlook.com>
Date: Mon, 21 Nov 2022 17:23:20 +0800
Subject: [PATCH 1/5] =?UTF-8?q?OpenPose=5FID0117=5Ffor=5FTensorFlow?=
 =?UTF-8?q?=E4=BD=BF=E8=83=BDRT2.0=E4=BA=8C=E8=BF=9B=E5=88=B6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../Action/training/train_rt.py               | 268 ++++++++++++++++++
 .../test/train_RT2_performance_1p.sh          | 205 ++++++++++++++
 2 files changed, 473 insertions(+)
 create mode 100644 TensorFlow/built-in/cv/detection/OpenPose_ID0117_for_TensorFlow/Action/training/train_rt.py
 create mode 100644 TensorFlow/built-in/cv/detection/OpenPose_ID0117_for_TensorFlow/test/train_RT2_performance_1p.sh

diff --git a/TensorFlow/built-in/cv/detection/OpenPose_ID0117_for_TensorFlow/Action/training/train_rt.py b/TensorFlow/built-in/cv/detection/OpenPose_ID0117_for_TensorFlow/Action/training/train_rt.py
new file mode 100644
index 000000000..ecb86722b
--- /dev/null
+++ b/TensorFlow/built-in/cv/detection/OpenPose_ID0117_for_TensorFlow/Action/training/train_rt.py
@@ -0,0 +1,268 @@
+# ============================================================================
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from npu_bridge.npu_init import *
+import pandas as pd
+from enum import Enum
+import numpy as np
+from sklearn.preprocessing import LabelEncoder
+from sklearn.model_selection import train_test_split
+from keras.utils import np_utils
+from keras.models import Sequential
+from keras.layers import Dense, Dropout
+from keras.layers.normalization import BatchNormalization
+from keras.optimizers import Adam
+from keras.models import load_model
+
+import matplotlib.pyplot as plt
+from keras.callbacks import Callback
+import itertools
+from sklearn.metrics import confusion_matrix
+#*****npu modify begin*****
+import tensorflow as tf
+from keras import backend as K
+from npu_bridge.npu_init import *
+
+import argparse
+parser = argparse.ArgumentParser()
+parser.add_argument('--data_path', dest='data_path', default='./data/', help='path of the dataset')
+parser.add_argument('--precision_mode', dest='precision_mode', default='allow_mix_precision', help='precision mode')
+parser.add_argument('--over_dump', dest='over_dump', default='False', help='if or not over detection')
+parser.add_argument('--over_dump_path', dest='over_dump_path', default='./overdump', help='over dump path')
+parser.add_argument('--data_dump_flag', dest='data_dump_flag', default='False', help='data dump flag')
+parser.add_argument('--data_dump_step', dest='data_dump_step', default='10', help='data dump step')
+parser.add_argument('--data_dump_path', dest='data_dump_path', default='./datadump', help='data dump path')
+parser.add_argument('--profiling', dest='profiling', default='False', help='if or not profiling for performance debug')
+parser.add_argument('--profiling_dump_path', dest='profiling_dump_path', default='./profiling', help='profiling path')
+parser.add_argument('--autotune', dest='autotune', default='False', help='whether to enable autotune, default is False')
+
+parser.add_argument('--train_epoch', dest='train_epoch', type=int, default=2000, help='# of step for training')
+parser.add_argument('--modeldir', dest='modeldir', default='./ckpt', help='ckpt dir')
+parser.add_argument('--learning_rate', dest='learning_rate', type=float, default=0.0001, help='learning rate')
+parser.add_argument('--batch_size', dest='batch_size', type=int, default=64, help='# images in batch')
+
+parser.add_argument("--dynamic_input", type=str, default='1', help="--dynamic_input=1 Use fuzzy compilation. --dynamic_input=lazy_recompile Compile using lazy static graph")
+args = parser.parse_args()
+
+
+def npu_keras_optimizer(opt):
+    npu_opt = KerasDistributeOptimizer(opt)
+    return npu_opt
+#*****npu modify end*****
+
+class Actions(Enum):
+    # framewise_recognition.h5
+    # squat = 0
+    # stand = 1
+    # walk = 2
+    # wave = 3
+
+    # framewise_recognition_under_scene.h5
+    stand = 0
+    walk = 1
+    operate = 2
+    fall_down = 3
+    # run = 4
+
+
+# Callback class to visialize training progress
+class LossHistory(Callback):
+    def on_train_begin(self, logs={}):
+        self.losses = {'batch':[], 'epoch':[]}
+        self.accuracy = {'batch':[], 'epoch':[]}
+        self.val_loss = {'batch':[], 'epoch':[]}
+        self.val_acc = {'batch':[], 'epoch':[]}
+
+    def on_batch_end(self, batch, logs={}):
+        self.losses['batch'].append(logs.get('loss'))
+        self.accuracy['batch'].append(logs.get('acc'))
+        self.val_loss['batch'].append(logs.get('val_loss'))
+        self.val_acc['batch'].append(logs.get('val_acc'))
+
+    def on_epoch_end(self, batch, logs={}):
+        self.losses['epoch'].append(logs.get('loss'))
+        self.accuracy['epoch'].append(logs.get('acc'))
+        self.val_loss['epoch'].append(logs.get('val_loss'))
+        self.val_acc['epoch'].append(logs.get('val_acc'))
+
+    def loss_plot(self, loss_type):
+        iters = range(len(self.losses[loss_type]))
+        plt.figure()
+        # acc
+        plt.plot(iters, self.accuracy[loss_type], 'r', label='train acc')
+        # loss
+        plt.plot(iters, self.losses[loss_type], 'g', label='train loss')
+        if loss_type == 'epoch':
+            # val_acc
+            plt.plot(iters, self.val_acc[loss_type], 'b', label='val acc')
+            # val_loss
+            plt.plot(iters, self.val_loss[loss_type], 'k', label='val loss')
+        plt.grid(True)
+        plt.xlabel(loss_type)
+        plt.ylabel('acc-loss')
+        plt.legend(loc="upper right")
+        plt.show()
+
+
+def plot_confusion_matrix(cm, classes,
+                          normalize=False,
+                          title='Confusion matrix',
+                          cmap=plt.cm.Blues):
+    """
+    This function prints and plots the confusion matrix.
+    Normalization can be applied by setting `normalize=True`.
+    """
+    if normalize:
+        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
+        print("Normalized confusion matrix")
+    else:
+        print('Confusion matrix, without normalization')
+
+    print(cm)
+
+    plt.imshow(cm, interpolation='nearest', cmap=cmap)
+    plt.title(title)
+    plt.colorbar()
+    tick_marks = np.arange(len(classes))
+    plt.xticks(tick_marks, classes, rotation=45)
+    plt.yticks(tick_marks, classes)
+
+    fmt = '.2f' if normalize else 'd'
+    thresh = cm.max() / 2.
+    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
+        plt.text(j, i, format(cm[i, j], fmt),
+                 horizontalalignment="center",
+                 color="white" if cm[i, j] > thresh else "black")
+
+    plt.tight_layout()
+    plt.ylabel('True label')
+    plt.xlabel('Predicted label')
+
+
+# load data
+datapath = ("%s/data_with_scene.csv" %(args.data_path))
+raw_data = pd.read_csv(datapath, header=0)
+dataset = raw_data.values
+# X = dataset[:, 0:36].astype(float)
+# Y = dataset[:, 36]
+X = dataset[0:3289, 0:36].astype(float)  # 忽略run数据
+Y = dataset[0:3289, 36]
+
+# 将类别编码为数字
+# encoder = LabelEncoder()
+# encoder_Y = encoder.fit_transform(Y)
+# print(encoder_Y[0], encoder_Y[900], encoder_Y[1800], encoder_Y[2700])
+# encoder_Y = [0]*744 + [1]*722 + [2]*815 + [3]*1008 + [4]*811
+encoder_Y = [0]*744 + [1]*722 + [2]*815 + [3]*1008
+# one hot 编码
+dummy_Y = np_utils.to_categorical(encoder_Y)
+
+# train test split
+X_train, X_test, Y_train, Y_test = train_test_split(X, dummy_Y, test_size=0.1, random_state=9)
+
+# build keras model
+model = Sequential()
+model.add(Dense(units=128, activation='relu'))
+model.add(BatchNormalization())
+model.add(Dense(units=64, activation='relu'))
+model.add(BatchNormalization())
+model.add(Dense(units=16, activation='relu'))
+model.add(BatchNormalization())
+model.add(Dense(units=4, activation='softmax'))  # units = nums of classes
+
+# training
+#*****npu modify begin*****
+sess_config = tf.ConfigProto()
+custom_op = sess_config.graph_options.rewrite_options.custom_optimizers.add()
+custom_op.name = "NpuOptimizer"
+#custom_op.parameter_map["dynamic_input"].b = True
+#if args.dynamic_input == "lazy_recompile":
+#    custom_op.parameter_map["dynamic_graph_execute_mode"].s = tf.compat.as_bytes("lazy_recompile")
+#elif args.dynamic_input == "1":
+#    custom_op.parameter_map["dynamic_graph_execute_mode"].s = tf.compat.as_bytes("dynamic_execute")
+#else:
+#    print("Enter correct compilation parameters.")
+#custom_op.parameter_map["dynamic_graph_execute_mode"].s = tf.compat.as_bytes("lazy_recompile")
+custom_op.parameter_map["precision_mode"].s = tf.compat.as_bytes(args.precision_mode)
+if args.data_dump_flag.strip() == "True":
+    custom_op.parameter_map["enable_dump"].b = True
+    custom_op.parameter_map["dump_path"].s = tf.compat.as_bytes(args.data_dump_path)
+    custom_op.parameter_map["dump_step"].s = tf.compat.as_bytes(args.data_dump_step)
+    custom_op.parameter_map["dump_mode"].s = tf.compat.as_bytes("all")
+if args.over_dump.strip() == "True":
+    # dump_path：dump数据存放路径，该参数指定的目录需要在启动训练的环境上（容器或Host侧）提前创建且确保安装时配置的运行用户具有读写权限
+    custom_op.parameter_map["dump_path"].s = tf.compat.as_bytes(args.over_dump_path)
+    # enable_dump_debug：是否开启溢出检测功能
+    custom_op.parameter_map["enable_dump_debug"].b = True
+    # dump_debug_mode：溢出检测模式，取值：all/aicore_overflow/atomic_overflow
+    custom_op.parameter_map["dump_debug_mode"].s = tf.compat.as_bytes("all")
+if args.profiling.strip() == "True":
+    custom_op.parameter_map["profiling_mode"].b = False
+    profilingvalue = (
+            '{"output":"%s","training_trace":"on","task_trace":"on","aicpu":"on","fp_point":"","bp_point":""}' % (
+        args.profiling_dump_path))
+    custom_op.parameter_map["profiling_options"].s = tf.compat.as_bytes(profilingvalue)
+sess_config.graph_options.rewrite_options.remapping = RewriterConfig.OFF
+sess = tf.Session(config=sess_config)
+K.set_session(sess)
+#*****npu modify end*****
+
+his = LossHistory()
+model.compile(loss='categorical_crossentropy', optimizer=Adam(args.learning_rate), metrics=['accuracy'])
+model.fit(X_train, Y_train, batch_size=args.batch_size, epochs=args.train_epoch, verbose=1, validation_data=(X_test, Y_test), callbacks=[his])
+model.summary()
+his.loss_plot('epoch')
+
+#*****npu modify begin*****
+print('====save model====')
+os.makedirs(args.modeldir, exist_ok=True)
+ckptparams = ("%s/model_weights.h5" %(args.modeldir))
+ckptall = ("%s/model.h5" %(args.modeldir))
+model.save_weights(ckptparams)
+model.save(ckptall)
+sess.close()
+#*****npu modify end*****
+
+# model.save('framewise_recognition.h5')
+
+# # evaluate and draw confusion matrix
+# print('Test:')
+# score, accuracy = model.evaluate(X_test,Y_test,batch_size=32)
+# print('Test Score:{:.3}'.format(score))
+# print('Test accuracy:{:.3}'.format(accuracy))
+# # confusion matrix
+# Y_pred = model.predict(X_test)
+# cfm = confusion_matrix(np.argmax(Y_test,axis=1), np.argmax(Y_pred, axis=1))
+# np.set_printoptions(precision=2)
+#
+# plt.figure()
+# class_names = ['squat', 'stand', 'walk', 'wave']
+# plot_confusion_matrix(cfm, classes=class_names, title='Confusion Matrix')
+# plt.show()
+
+# # test
+# model = load_model('framewise_recognition.h5')
+#
+# test_input = [0.43, 0.46, 0.43, 0.52, 0.4, 0.52, 0.39, 0.61, 0.4,
+#               0.67, 0.46, 0.52, 0.46, 0.61, 0.46, 0.67, 0.42, 0.67,
+#               0.42, 0.81, 0.43, 0.91, 0.45, 0.67, 0.45, 0.81, 0.45,
+#               0.91, 0.42, 0.44, 0.43, 0.44, 0.42, 0.46, 0.44, 0.46]
+# test_np = np.array(test_input)
+# test_np = test_np.reshape(-1, 36)
+#
+# test_np = np.array(X[1033]).reshape(-1, 36)
+# if test_np.size > 0:
+#     pred = np.argmax(model.predict(test_np))
+#     init_label = Actions(pred).name
+#     print(init_label)
diff --git a/TensorFlow/built-in/cv/detection/OpenPose_ID0117_for_TensorFlow/test/train_RT2_performance_1p.sh b/TensorFlow/built-in/cv/detection/OpenPose_ID0117_for_TensorFlow/test/train_RT2_performance_1p.sh
new file mode 100644
index 000000000..b9c791e81
--- /dev/null
+++ b/TensorFlow/built-in/cv/detection/OpenPose_ID0117_for_TensorFlow/test/train_RT2_performance_1p.sh
@@ -0,0 +1,205 @@
+#!/bin/bash
+
+#当前路径,不需要修改
+cur_path=`pwd`
+
+#集合通信参数,不需要修改
+
+export RANK_SIZE=1
+export JOB_ID=10087
+RANK_ID_START=0
+
+#使能RT2.0
+export ENABLE_RUNTIME_V2=1
+
+# 数据集路径,保持为空,不需要修改
+data_path=""
+
+#设置默认日志级别,不需要修改
+#export ASCEND_GLOBAL_LOG_LEVEL=3
+
+#基础参数，需要模型审视修改
+#网络名称，同目录名称
+Network="OpenPose_ID0117_for_TensorFlow"
+#训练step
+train_epoch=20
+#训练batch_size
+batch_size=32
+#学习率
+learning_rate=0.0001
+#动态输入模式，不需要修改
+dynamic_input=""
+
+
+#维测参数，precision_mode需要模型审视修改
+precision_mode="allow_fp32_to_fp16"
+#维持参数，以下不需要修改
+over_dump=False
+data_dump_flag=False
+data_dump_step="10"
+profiling=False
+
+# 帮助信息，不需要修改
+if [[ $1 == --help || $1 == -h ]];then
+    echo"usage:./train_performance_1P.sh <args>"
+    echo " "
+    echo "parameter explain:
+    --precision_mode         precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision)
+    --over_dump              if or not over detection, default is False
+    --data_dump_flag         data dump flag, default is False
+    --data_dump_step         data dump step, default is 10
+    --profiling              if or not profiling for performance debug, default is False
+    --data_path              source data of training
+    --train_epoch            # of epoch for training
+    --learning_rate          learning rate
+    --batch                  batch size
+    --modeldir               model dir
+    -h/--help                show help message
+    "
+    exit 1
+fi
+
+#参数校验，不需要修改
+for para in $*
+do
+    if [[ $para == --precision_mode* ]];then
+        precision_mode=`echo ${para#*=}`
+    elif [[ $para == --over_dump* ]];then
+        over_dump=`echo ${para#*=}`
+        over_dump_path=${cur_path}/output/overflow_dump
+        mkdir -p ${over_dump_path}
+    elif [[ $para == --data_dump_flag* ]];then
+        data_dump_flag=`echo ${para#*=}`
+        data_dump_path=${cur_path}/output/data_dump
+        mkdir -p ${data_dump_path}
+    elif [[ $para == --data_dump_step* ]];then
+        data_dump_step=`echo ${para#*=}`
+    elif [[ $para == --profiling* ]];then
+        profiling=`echo ${para#*=}`
+        profiling_dump_path=${cur_path}/output/profiling
+        mkdir -p ${profiling_dump_path}
+    elif [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    elif [[ $para == --train_epoch* ]];then
+        train_epoch=`echo ${para#*=}`
+    elif [[ $para == --learning_rate* ]];then
+        learning_rate=`echo ${para#*=}`
+    elif [[ $para == --batch* ]];then
+        batch_size=`echo ${para#*=}`
+    elif [[ $para == --modeldir* ]];then
+        modeldir=`echo ${para#*=}`
+    elif [[ $para == --dynamic_input* ]];then
+        dynamic_input=`echo ${para#*=}` 
+    fi
+done
+
+#校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be config"
+    exit 1
+fi
+
+
+#############执行训练#########################
+#训练开始时间，不需要修改
+start_time=$(date +%s)
+
+#进入训练脚本目录，需要模型审视修改
+for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++));
+do
+    #设置环境变量，不需要修改
+    echo "Device ID: $ASCEND_DEVICE_ID"
+    export RANK_ID=$RANK_ID
+
+    #创建DeviceID输出目录，不需要修改
+    if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then
+        rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID}
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    else
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    fi
+
+    #执行训练脚本，以下传参不需要修改，其他需要模型审视修改
+    #--data_path, --model_dir, --precision_mode, --precision_mode, --over_dump, --over_dump_path，--data_dump_flag，--data_dump_step，--data_dump_path，--profiling，--profiling_dump_path，--autotune
+    nohup python3 ${cur_path}/../Action/training/train_rt.py \
+      --train_epoch=${train_epoch} \
+      --learning_rate=${learning_rate} \
+      --data_path=${data_path} \
+      --modeldir=${cur_path}/output/$ASCEND_DEVICE_ID/ckpt \
+      --precision_mode=${precision_mode} \
+      --over_dump=${over_dump} \
+      --over_dump_path=${over_dump_path} \
+      --data_dump_flag=${data_dump_flag} \
+      --data_dump_step=${data_dump_step} \
+      --data_dump_path=${data_dump_path} \
+      --batch=${batch_size} \
+      --profiling=${profiling} \
+      --profiling_dump_path=${profiling_dump_path} \
+      --dynamic_input=${dynamic_input} > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+if [ $? -ne 0 ];then
+  exit 1
+fi
+done
+wait
+
+#训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+#结果打印，不需要修改
+echo "------------------ Final result ------------------"
+#输出性能FPS，需要模型审视修改
+stepvalue=(`grep -r "/step" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log | awk -F - 'END {print $2}' | awk -F / '{print $1}'`)
+
+function strindex() {
+  x="${1%%$2*}"
+  if [[ $x = $1 ]];then
+    echo -1
+  else
+    echo ${#x}
+    return ${#x}
+  fi
+}
+index=$(strindex "${stepvalue[0]}" "s")
+second=${stepvalue[0]:0:$index}
+uindex=$(strindex "${stepvalue[1]}" "us")
+usecond=${stepvalue[1]:0:$uindex}
+step_sec=$(awk 'BEGIN{printf "%.4f\n",('$usecond'/'1000')}')
+FPS=`awk 'BEGIN {printf "%.2f\n", '1000'*'${batch_size}'/'${step_sec}'}'`
+#打印，不需要修改
+echo "Final Performance images/sec : $FPS"
+
+#输出训练精度,需要模型审视修改
+train_accuracy=(`grep -r "/step" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log | awk -F : 'END {print $3}' | awk '{print $1}'`)
+#打印，不需要修改
+echo "Final Train Accuracy : ${train_accuracy}"
+echo "E2E Training Duration sec : $e2e_time"
+
+#性能看护结果汇总
+#训练用例信息，不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'RT2'_'perf'
+
+##获取性能数据，不需要修改
+#吞吐量
+ActualFPS=${FPS}
+#单迭代训练时长
+TrainingTime=${step_sec}
+
+#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型审视
+grep "/step" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log | awk -F : '{print $2}' | awk '{print $1}'  > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}_loss.txt
+#最后一个迭代loss值，不需要修改
+ActualLoss=(`awk 'END {print $NF}' $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}_loss.txt`)
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DynamicInput = ${dynamic_input}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
-- 
Gitee


From 121221c55a9d94d61e88cde80e89a68320a06093 Mon Sep 17 00:00:00 2001
From: ykxia <xiayankang@outlook.com>
Date: Mon, 21 Nov 2022 19:04:06 +0800
Subject: [PATCH 2/5] =?UTF-8?q?FastText=5FID0135=5Ffor=5FTensorFlow?=
 =?UTF-8?q?=E4=BD=BF=E8=83=BDRT2.0=E4=BA=8C=E8=BF=9B=E5=88=B6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../FastText_ID0135_for_TensorFlow/main_rt.py | 114 ++++++
 .../test/train_RT2_performance_1p.sh          | 186 +++++++++
 .../word_embeddings_rt.py                     | 352 ++++++++++++++++++
 3 files changed, 652 insertions(+)
 create mode 100644 TensorFlow/built-in/nlp/FastText_ID0135_for_TensorFlow/main_rt.py
 create mode 100644 TensorFlow/built-in/nlp/FastText_ID0135_for_TensorFlow/test/train_RT2_performance_1p.sh
 create mode 100644 TensorFlow/built-in/nlp/FastText_ID0135_for_TensorFlow/word_embeddings_rt.py

diff --git a/TensorFlow/built-in/nlp/FastText_ID0135_for_TensorFlow/main_rt.py b/TensorFlow/built-in/nlp/FastText_ID0135_for_TensorFlow/main_rt.py
new file mode 100644
index 000000000..66213b733
--- /dev/null
+++ b/TensorFlow/built-in/nlp/FastText_ID0135_for_TensorFlow/main_rt.py
@@ -0,0 +1,114 @@
+#
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import json
+import numpy as np
+from sklearn.model_selection import train_test_split
+from word_embeddings_rt import load_data,prepare_data_for_word_vectors,building_word_vector_model,\
+classification_model,padding_input,prepare_data_for_word_vectors_imdb,ELMoEmbedding,data_prep_ELMo,Classification_model_with_ELMo
+import argparse
+
+parser = argparse.ArgumentParser(description='')
+parser.add_argument('--data_path', dest='data_path', default='/root/.keras/datasets/', help='path of the dataset')
+parser.add_argument('--precision_mode', dest='precision_mode', default='allow_mix_precision', help='precision mode')
+parser.add_argument('--over_dump', dest='over_dump', default='False', help='if or not over detection')
+parser.add_argument('--over_dump_path', dest='over_dump_path', default='./overdump', help='over dump path')
+parser.add_argument('--data_dump_flag', dest='data_dump_flag', default='False', help='data dump flag')
+parser.add_argument('--data_dump_step', dest='data_dump_step', default='10', help='data dump step')
+parser.add_argument('--data_dump_path', dest='data_dump_path', default='./datadump', help='data dump path')
+parser.add_argument('--profiling', dest='profiling', default='False', help='if or not profiling for performance debug')
+parser.add_argument('--profiling_dump_path', dest='profiling_dump_path', default='./profiling', help='profiling path')
+parser.add_argument('--lr', dest='lr', type=float, default=0.01, help='initial learning rate for adam')
+parser.add_argument('--loss_scale', dest='loss_scale', default='True', help='enable loss scale ,default is True')
+parser.add_argument('--epoch', dest='epoch', type=int, default=200, help='# of epoch')
+parser.add_argument('--batch_size', dest='batch_size', type=int, default=1, help='# images in batch')
+args = parser.parse_args()
+def json_to_dict(json_set):
+    for k,v in json_set.items():
+        if v == "True":
+            json_set[k]= True
+        elif v == "False":
+            json_set[k]=False
+        else:
+            json_set[k]=v
+    return json_set
+
+with open("config.json","r") as f:
+    params_set = json.load(f)
+params_set = json_to_dict(params_set)
+
+
+with open("model_params.json", "r") as f:
+    model_params = json.load(f)
+model_params = json_to_dict(model_params)
+
+'''
+    load_data function works on imdb data. In order to load your data, comment line 27 and pass your data in the form of X,y
+    X = text data column
+    y = label column(0,1 etc)
+
+'''
+# for imdb data
+if params_set["option"]in [0,1,2]:
+    x_train,x_test,y_train,y_test = load_data(args.data_path, params_set["vocab_size"],params_set["max_len"])
+    sentences,word_ix = prepare_data_for_word_vectors_imdb(args.data_path, x_train)
+    model_wv = building_word_vector_model(params_set["option"],sentences,params_set["embed_dim"],
+                                       params_set["workers"],params_set["window"],y_train)
+
+    # for other data:
+    # put your data in the form of X,y
+    '''
+    X = ["this is a sentence","this is another sentence by me","yet another sentence for training","one more again"]
+    y=np.array([0,1,1,0])
+
+    sentences_as_words,sentences,word_ix = prepare_data_for_word_vectors(X)
+    print("sentences loaded")
+    model_wv = building_word_vector_model(params_set["option"],sentences,params_set["embed_dim"],
+                                       params_set["workers"],params_set["window"],y)
+
+
+    print("word vector model built")
+    x_train, x_test, y_train, y_test = train_test_split(sentences, y, test_size=params_set["split_ratio"], random_state=42)
+    print("Data split done")
+    '''
+    x_train_pad,x_test_pad = padding_input(x_train,x_test,params_set["max_len"])
+
+    model = classification_model(args,params_set["embed_dim"],x_train_pad,x_test_pad,y_train,y_test,
+                                 params_set["vocab_size"],word_ix,model_wv,
+                                 params_set["trainable_param"],
+                                 params_set["option"])
+    print(model.summary())
+
+else:
+    x_train,x_test,y_train,y_test = load_data(args.data_path, params_set["vocab_size"],params_set["max_len"])
+
+    train_text,train_label,test_text,test_label = data_prep_ELMo(x_train,y_train,x_test,y_test,params_set["max_len"])
+
+    model = Classification_model_with_ELMo(train_text,train_label,test_text,test_label)
+    print(model.summary())
diff --git a/TensorFlow/built-in/nlp/FastText_ID0135_for_TensorFlow/test/train_RT2_performance_1p.sh b/TensorFlow/built-in/nlp/FastText_ID0135_for_TensorFlow/test/train_RT2_performance_1p.sh
new file mode 100644
index 000000000..c85d1c4fb
--- /dev/null
+++ b/TensorFlow/built-in/nlp/FastText_ID0135_for_TensorFlow/test/train_RT2_performance_1p.sh
@@ -0,0 +1,186 @@
+#!/bin/bash
+
+#当前路径,不需要修改
+cur_path=`pwd`
+
+#集合通信参数,不需要修改
+
+export RANK_SIZE=1
+export JOB_ID=10087
+RANK_ID_START=0
+
+#使能RT2.0
+export ENABLE_RUNTIME_V2=1
+
+# 数据集路径,保持为空,不需要修改
+data_path=""
+
+#基础参数，需要模型审视修改
+#网络名称，同目录名称
+Network="FastText_ID0135_for_TensorFlow"
+#训练epoch
+train_epochs=10
+#训练step
+train_steps=25000
+#训练batch_size
+batch_size=1024
+#学习率
+learning_rate=0.01
+
+#维测参数，precision_mode需要模型审视修改
+precision_mode="allow_mix_precision"
+#维持参数，以下不需要修改
+over_dump=False
+data_dump_flag=False
+data_dump_step="10"
+profiling=False
+loss_scale=True
+
+# 帮助信息，不需要修改
+if [[ $1 == --help || $1 == -h ]];then
+    echo"usage:./train_performance_1P.sh <args>"
+    echo " "
+    echo "parameter explain:
+    --precision_mode         precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision)
+    --over_dump              if or not over detection, default is False
+    --data_dump_flag         data dump flag, default is False
+    --data_dump_step         data dump step, default is 10
+    --profiling              if or not profiling for performance debug, default is False
+    --data_path              source data of training
+    --max_step               # of step for training
+    --learning_rate          learning rate
+    --batch                  batch size
+    --modeldir               model dir
+    --save_interval          save interval for ckpt
+    --loss_scale             enable loss scale ,default is False
+    -h/--help                show help message
+    "
+    exit 1
+fi
+
+#参数校验，不需要修改
+for para in $*
+do
+    if [[ $para == --precision_mode* ]];then
+        precision_mode=`echo ${para#*=}`
+    elif [[ $para == --over_dump* ]];then
+        over_dump=`echo ${para#*=}`
+        over_dump_path=${cur_path}/output/overflow_dump
+        mkdir -p ${over_dump_path}
+    elif [[ $para == --data_dump_flag* ]];then
+        data_dump_flag=`echo ${para#*=}`
+        data_dump_path=${cur_path}/output/data_dump
+        mkdir -p ${data_dump_path}
+    elif [[ $para == --data_dump_step* ]];then
+        data_dump_step=`echo ${para#*=}`
+    elif [[ $para == --profiling* ]];then
+        profiling=`echo ${para#*=}`
+        profiling_dump_path=${cur_path}/output/profiling
+        mkdir -p ${profiling_dump_path}
+    elif [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    elif [[ $para == --max_step* ]];then
+        train_steps=`echo ${para#*=}`
+    elif [[ $para == --learning_rate* ]];then
+        learning_rate=`echo ${para#*=}`
+    elif [[ $para == --batch* ]];then
+        batch_size=`echo ${para#*=}`
+    elif [[ $para == --modeldir* ]];then
+        modeldir=`echo ${para#*=}`
+    elif [[ $para == --save_interval* ]];then
+        save_interval=`echo ${para#*=}`
+    elif [[ $para == --loss_scale* ]];then
+        loss_scale=`echo ${para#*=}`
+    elif [[ $para == --epoch* ]];then
+        train_epochs=`echo ${para#*=}`
+    fi
+done
+
+#校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be config"
+    exit 1
+fi
+#############执行训练#########################
+#训练开始时间，不需要修改
+start_time=$(date +%s)
+cd $cur_path/../
+#进入训练脚本目录，需要模型审视修改
+for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++));
+do
+    #设置环境变量，不需要修改
+    echo "Device ID: $ASCEND_DEVICE_ID"
+    export RANK_ID=$RANK_ID
+
+    #创建DeviceID输出目录，不需要修改
+    if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then
+        rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID}
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    else
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    fi
+
+    #执行训练脚本，以下传参不需要修改，其他需要模型审视修改
+    #--data_path, --model_dir, --precision_mode, --precision_mode, --over_dump, --over_dump_path，--data_dump_flag，--data_dump_step，--data_dump_path，--profiling，--profiling_dump_path，--autotune
+    nohup python3 main_rt.py \
+      --epoch=${train_epochs} \
+      --data_path=${data_path} \
+      --precision_mode=${precision_mode} \
+      --loss_scale=${loss_scale} \
+      --over_dump=${over_dump} \
+      --over_dump_path=${over_dump_path} \
+      --data_dump_flag=${data_dump_flag} \
+      --data_dump_step=${data_dump_step} \
+      --data_dump_path=${data_dump_path} \
+      --batch_size=${batch_size} \
+      --profiling=${profiling} \
+      --profiling_dump_path=${profiling_dump_path} > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+if [ $? -ne 0 ];then
+  exit 1
+fi
+done
+wait
+
+#训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+#结果打印，不需要修改
+echo "------------------ Final result ------------------"
+#输出性能FPS，需要模型审视修改
+TrainingTime=`grep "us/step" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log | awk -F "/" 'END {print $2}' | awk -F " " '{print $5}' | awk -F "us" '{print $1}'`
+FPS=`awk 'BEGIN {printf "%.2f\n", '1000'*'1000'*'${batch_size}'/'${TrainingTime}'}'`
+#打印，不需要修改
+echo "Final Performance images/sec : $FPS"
+
+#输出训练精度,需要模型审视修改
+train_accuracy=`grep "Accuracy:" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log | awk -F " " '{print $2}'`
+#打印，不需要修改
+echo "Final Train Accuracy : ${train_accuracy}"
+echo "E2E Training Duration sec : $e2e_time"
+
+#性能看护结果汇总
+#训练用例信息，不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'RT2'_'perf'
+
+##获取性能数据，不需要修改
+#吞吐量
+ActualFPS=${FPS}
+
+#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型审视
+grep "loss:" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log | awk -F "loss:" '{print $2}' | awk -F " " '{print $1}'  > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}_loss.txt
+#最后一个迭代loss值，不需要修改
+ActualLoss=(`awk 'END {print $NF}' $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}_loss.txt`)
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
diff --git a/TensorFlow/built-in/nlp/FastText_ID0135_for_TensorFlow/word_embeddings_rt.py b/TensorFlow/built-in/nlp/FastText_ID0135_for_TensorFlow/word_embeddings_rt.py
new file mode 100644
index 000000000..0b87e8a0a
--- /dev/null
+++ b/TensorFlow/built-in/nlp/FastText_ID0135_for_TensorFlow/word_embeddings_rt.py
@@ -0,0 +1,352 @@
+#
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+from npu_bridge.npu_init import *
+import json
+import fasttext
+import numpy as np
+import pandas as pd
+import tensorflow as tf
+import tensorflow_hub as hub
+import keras.layers as layers
+from keras.models import Model
+from keras.datasets import imdb
+from gensim.models import Word2Vec
+from gensim.models import FastText
+from keras.preprocessing.text import Tokenizer
+from keras.preprocessing.sequence import pad_sequences
+from keras.layers import Input,Embedding,Dense,Flatten
+from sklearn.metrics import accuracy_score,classification_report
+
+#import tensorflow as tf
+import tensorflow.python.keras as keras
+#from tensorflow.python.keras import backend as K
+from keras import backend as K
+from npu_bridge.npu_init import *
+
+def npu_keras_optimizer(opt):
+    npu_opt = KerasDistributeOptimizer(opt)
+    return npu_opt
+
+def json_to_dict(json_set):
+    for k,v in json_set.items():
+        if v == "True":
+            json_set[k]= True
+        elif v == "False":
+            json_set[k]=False
+        else:
+            json_set[k]=v
+    return json_set
+
+
+with open("model_params.json", "r") as f:
+    model_params = json.load(f)
+model_params = json_to_dict(model_params)
+
+
+def load_data(datapath,vocab_size,max_len):
+    """
+        Loads the keras imdb dataset
+
+        Args:
+            vocab_size = {int} the size of the vocabulary
+            max_len = {int} the maximum length of input considered for padding
+
+        Returns:
+            X_train = tokenized train data
+            X_test = tokenized test data
+
+    """
+    INDEX_FROM = 3
+    data_path = ('%s/keras/datasets/imdb.npz' % (datapath))
+    (X_train,y_train),(X_test,y_test) = imdb.load_data(path = data_path, num_words = vocab_size,index_from = INDEX_FROM)
+
+    return X_train,X_test,y_train,y_test
+
+
+def prepare_data_for_word_vectors_imdb(data_path, X_train):
+    """
+        Prepares the input
+
+        Args:
+            X_train = tokenized train data
+
+        Returns:
+            sentences = {list} sentences containing words as tokens
+            word_index = {dict} word and its indexes in whole of imdb corpus
+
+    """
+    INDEX_FROM = 3
+    wordpath = ('%s/keras/datasets/imdb_word_index.json' % (data_path))
+    word_to_index = imdb.get_word_index(path = wordpath)
+    word_to_index = {k:(v+INDEX_FROM) for k,v in word_to_index.items()}
+
+    word_to_index["<START>"] =1
+    word_to_index["<UNK>"]=2
+
+    index_to_word = {v:k for k,v in word_to_index.items()}
+
+    sentences = []
+    for i in range(len(X_train)):
+        temp = [index_to_word[ids] for ids in X_train[i]]
+        sentences.append(temp)
+    """
+    tokenizer = Tokenizer()
+    tokenizer.fit_on_texts(sentences)
+    word_indexes = tokenizer.word_index
+    """
+    return sentences,word_to_index
+
+
+def prepare_data_for_word_vectors(X):
+    sentences_as_words=[]
+    word_to_index={}
+    count=1
+    for sent in X:
+        temp = sent.split()
+        sentences_as_words.append(temp)
+    for sent in sentences_as_words:
+        for word in sent:
+            if word_to_index.get(word,None) is None:
+                word_to_index[word] = count
+                count +=1
+    index_to_word = {v:k for k,v in word_to_index.items()}
+    sentences=[]
+    for i in range(len(sentences_as_words)):
+        temp = [word_to_index[w] for w in sentences_as_words[i]]
+        sentences.append(temp)
+
+
+    return sentences_as_words,sentences,word_to_index
+
+def data_prep_ELMo(train_x,train_y,test_x,test_y,max_len):
+
+    INDEX_FROM = 3
+    word_to_index = imdb.get_word_index()
+    word_to_index = {k:(v+INDEX_FROM) for k,v in word_to_index.items()}
+
+    word_to_index["<START>"] =1
+    word_to_index["<UNK>"]=2
+
+    index_to_word = {v:k for k,v in word_to_index.items()}
+
+    sentences=[]
+    for i in range(len(train_x)):
+        temp = [index_to_word[ids] for ids in train_x[i]]
+        sentences.append(temp)
+
+    test_sentences=[]
+    for i in range(len(test_x)):
+        temp = [index_to_word[ids] for ids in test_x[i]]
+        test_sentences.append(temp)
+
+    train_text = [' '.join(sentences[i][:max_len]) for i in range(len(sentences))]
+    train_text = np.array(train_text, dtype=object)[:, np.newaxis]
+    train_label = train_y.tolist()
+
+    test_text = [' '.join(test_sentences[i][:500]) for i in range(len(test_sentences))]
+    test_text = np.array(test_text , dtype=object)[:, np.newaxis]
+    test_label = test_y.tolist()
+
+    return train_text,train_label,test_text,test_label
+
+
+def building_word_vector_model(option,sentences,embed_dim,workers,window,y_train):
+    """
+        Builds the word vector
+
+        Args:
+            type = {bool} 0 for Word2vec. 1 for gensim Fastext. 2 for Fasttext 2018.
+            sentences = {list} list of tokenized words
+            embed_dim = {int} embedding dimension of the word vectors
+            workers = {int} no. of worker threads to train the model (faster training with multicore machines)
+            window = {int} max distance between current and predicted word
+            y_train = y_train
+
+        Returns:
+            model = Word2vec/Gensim fastText/ Fastext_2018 model trained on the training corpus
+
+
+    """
+    if option == 0:
+        print("Training a word2vec model")
+        model = Word2Vec(sentences=sentences, size = embed_dim, workers = workers, window = window)
+        print("Training complete")
+
+    elif option == 1:
+        print("Training a Gensim FastText model")
+        model = FastText(sentences=sentences, size = embed_dim, workers = workers, window = window)
+        print("Training complete")
+
+    elif option == 2:
+        print("Training a Fasttext model from Facebook Research")
+        y_train = ["__label__positive" if i==1 else "__label__negative" for i in y_train]
+
+        with open("imdb_train.txt","w") as text_file:
+            for i in range(len(sentences)):
+                print(sentences[i],y_train[i],file = text_file)
+
+        model = fasttext.skipgram("imdb_train.txt","model_ft_2018_imdb",dim = embed_dim)
+        print("Training complete")
+
+    return model
+
+def padding_input(X_train,X_test,maxlen):
+    """
+        Pads the input upto considered max length
+
+        Args:
+            X_train = tokenized train data
+            X_test = tokenized test data
+
+        Returns:
+            X_train_pad = padded tokenized train data
+            X_test_pad = padded tokenized test data
+
+    """
+
+    X_train_pad = pad_sequences(X_train,maxlen=maxlen,padding="post")
+
+    X_test_pad = pad_sequences(X_test,maxlen=maxlen,padding="post")
+
+    return X_train_pad,X_test_pad
+
+
+def ELMoEmbedding(x):
+    elmo_model = hub.Module("https://tfhub.dev/google/elmo/1", trainable=True)
+    return elmo_model(tf.squeeze(tf.cast(x, tf.string)), signature="default", as_dict=True)["default"]
+
+
+def classification_model(args,embed_dim,X_train_pad,X_test_pad,y_train,y_test,vocab_size,word_index,w2vmodel,
+                         trainable_param,option):
+    """
+        Builds the classification model for sentiment analysis
+
+        Args:
+            embded_dim = {int} dimension of the word vectors
+            X_train_pad = padded tokenized train data
+            X_test_pad = padded tokenized test data
+            vocab_size = {int} size of the vocabulary
+            word_index =  {dict} word and its indexes in whole of imdb corpus
+            w2vmodel = Word2Vec model
+            trainable_param = {bool} whether to train the word embeddings in the Embedding layer
+            option = {int} choice of word embedding
+    """
+    sess_config = tf.ConfigProto()
+    custom_op = sess_config.graph_options.rewrite_options.custom_optimizers.add()
+    custom_op.name = "NpuOptimizer"
+   # custom_op.parameter_map["dynamic_input"].b = True
+   # custom_op.parameter_map["dynamic_graph_execute_mode"].s = tf.compat.as_bytes("lazy_recompile")
+    custom_op.parameter_map["precision_mode"].s = tf.compat.as_bytes(args.precision_mode)
+    sess_config.graph_options.rewrite_options.remapping = RewriterConfig.OFF
+    sess_config.graph_options.rewrite_options.memory_optimization = RewriterConfig.OFF
+    if args.data_dump_flag.strip()=="True":
+        custom_op.parameter_map["enable_dump"].b = True
+        custom_op.parameter_map["dump_path"].s = tf.compat.as_bytes(args.data_dump_path)
+        custom_op.parameter_map["dump_step"].s = tf.compat.as_bytes(args.data_dump_step)
+        custom_op.parameter_map["dump_mode"].s = tf.compat.as_bytes("all")
+    if args.over_dump.strip()=="True":
+        # dump_path：dump数据存放路径，该参数指定的目录需要在启动训练的环境上（容器或Host侧）提前创建且确保安装时配置的运行用户具有读写权限
+        custom_op.parameter_map["dump_path"].s = tf.compat.as_bytes(args.over_dump_path)
+        # enable_dump_debug：是否开启溢出检测功能
+        custom_op.parameter_map["enable_dump_debug"].b = True
+        # dump_debug_mode：溢出检测模式，取值：all/aicore_overflow/atomic_overflow
+        custom_op.parameter_map["dump_debug_mode"].s = tf.compat.as_bytes("all")
+    if args.profiling.strip()=="True":
+        custom_op.parameter_map["profiling_mode"].b = False
+        profilingvalue=('{"output":"%s","training_trace":"on","task_trace":"on","aicpu":"on","fp_point":"","bp_point":""}' %(args.profiling_dump_path))
+        custom_op.parameter_map["profiling_options"].s = tf.compat.as_bytes(profilingvalue)
+
+    sess = tf.Session(config=sess_config)
+    K.set_session(sess)
+
+    embedding_matrix = np.zeros((vocab_size,embed_dim))
+    for word, i in word_index.items():
+        try:
+            embedding_vector = w2vmodel[word]
+        except:
+            pass
+        try:
+            if embedding_vector is not None:
+                embedding_matrix[i]=embedding_vector
+        except:
+            pass
+
+    embed_layer = Embedding(vocab_size,embed_dim,weights =[embedding_matrix],trainable=trainable_param)
+
+    input_seq = Input(shape=(X_train_pad.shape[1],))
+    embed_seq = embed_layer(input_seq)
+    x = Dense(256,activation ="relu")(embed_seq)
+    x = Flatten()(x)
+    preds = Dense(1,activation="sigmoid")(x)
+
+    model = Model(input_seq,preds)
+
+    optimizer = tf.train.AdamOptimizer(learning_rate=args.lr)
+    if args.loss_scale == 'True':
+        loss_scale_manager = ExponentialUpdateLossScaleManager(init_loss_scale=2 ** 32,
+                                                               incr_every_n_steps=1000, decr_every_n_nan_or_inf=2,
+                                                               decr_ratio=0.8)
+    if int(os.getenv('RANK_SIZE')) == 1:
+        optimizer = NPULossScaleOptimizer(optimizer, loss_scale_manager)
+    else:
+        optimizer = NPULossScaleOptimizer(optimizer, loss_scale_manager, is_distributed=True)
+    optim = npu_tf_optimizer(optimizer)
+
+    model.compile(loss=model_params["loss"],optimizer=optim,metrics= model_params["metrics"])
+    model.fit(X_train_pad,y_train,epochs=args.epoch,batch_size=args.batch_size,validation_data=(X_test_pad,y_test))
+    
+    print('====save model====')
+    #model.save_weights('./ckpt_gpu/model_weigits.h5')
+    #model.save('./ckpt_gpu/model.h5')
+    predictions = model.predict(X_test_pad, batch_size=1)
+    predictions = [0 if i<0.5 else 1 for i in predictions]
+    print("Accuracy: ",accuracy_score(y_test,predictions))
+    print("Classification Report: ",classification_report(y_test,predictions))
+    sess.close()
+    return model
+
+def Classification_model_with_ELMo(X_train_pad,y_train,X_test_pad,y_test):
+    input_text = layers.Input(shape=(1,), dtype=tf.string)
+    embed_seq = layers.Lambda(ELMoEmbedding, output_shape=(1024,))(input_text)
+    x = Dense(256,activation ="relu")(embed_seq)
+    preds = Dense(1,activation="sigmoid")(x)
+    model = Model(input_text,preds)
+
+
+    model.compile(loss="binary_crossentropy",optimizer="adam",metrics=["accuracy"])
+
+    model.fit(X_train_pad,y_train,epochs=10,batch_size=512,validation_data=(X_test_pad,y_test))
+
+    predictions = model.predict(X_test_pad)
+    predictions = [0 if i<0.5 else 1 for i in predictions]
+    print("Accuracy: ",accuracy_score(y_test,predictions))
+    print("Classification Report: ",classification_report(y_test,predictions))
+
+    return model
-- 
Gitee


From ee6b8f7ab13035b0778f3f1ac018c0a93c6b2f6e Mon Sep 17 00:00:00 2001
From: ykxia <xiayankang@outlook.com>
Date: Mon, 21 Nov 2022 19:14:48 +0800
Subject: [PATCH 3/5] =?UTF-8?q?texting=5FID0193=5Ffor=5FTensorFlow?=
 =?UTF-8?q?=E4=BD=BF=E8=83=BDRT2=E4=BA=8C=E8=BF=9B=E5=88=B6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../test/train_RT2_performance_1p.sh          | 168 +++++++++++++
 .../texting_ID0193_for_TensorFlow/train_rt.py | 237 ++++++++++++++++++
 2 files changed, 405 insertions(+)
 create mode 100644 TensorFlow/contrib/nlp/texting/texting_ID0193_for_TensorFlow/test/train_RT2_performance_1p.sh
 create mode 100644 TensorFlow/contrib/nlp/texting/texting_ID0193_for_TensorFlow/train_rt.py

diff --git a/TensorFlow/contrib/nlp/texting/texting_ID0193_for_TensorFlow/test/train_RT2_performance_1p.sh b/TensorFlow/contrib/nlp/texting/texting_ID0193_for_TensorFlow/test/train_RT2_performance_1p.sh
new file mode 100644
index 000000000..7dd277da3
--- /dev/null
+++ b/TensorFlow/contrib/nlp/texting/texting_ID0193_for_TensorFlow/test/train_RT2_performance_1p.sh
@@ -0,0 +1,168 @@
+#!/bin/bash
+
+#当前路径,不需要修改
+cur_path=`pwd`
+
+#集合通信参数,不需要修改
+
+export RANK_SIZE=1
+export JOB_ID=10087
+RANK_ID_START=0
+
+#使能RT2.0
+export ENABLE_RUNTIME_V2=1
+# 数据集路径,保持为空,不需要修改
+data_path=""
+
+#设置默认日志级别,不需要修改
+#export ASCEND_GLOBAL_LOG_LEVEL=3
+
+#基础参数，需要模型审视修改
+#网络名称，同目录名称
+Network="texting_ID0193_for_TensorFlow"
+#训练epoch
+epoch=6
+#训练batch_size
+RANK_SIZE=1
+batch_size=256
+#迭代数iteration
+iteration=100
+#训练step
+#train_steps=100
+#学习率
+#learning_rate=3.96
+
+#cp $data_path $cur_path/../datasets/
+
+#维测参数，precision_mode需要模型审视修改
+precision_mode="allow_mix_precision"
+#维持参数，以下不需要修改
+over_dump=False
+data_dump_flag=False
+data_dump_step="10"
+profiling=False
+autotune=False
+
+# 帮助信息，不需要修改
+if [[ $1 == --help || $1 == -h ]];then
+    echo"usage:./train_full_1p.sh <args>"
+    echo " "
+    echo "parameter explain:
+    --precision_mode         precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision)
+    --over_dump		         if or not over detection, default is False
+    --data_dump_flag	     data dump flag, default is False
+    --data_dump_step		 data dump step, default is 10
+    --profiling		         if or not profiling for performance debug, default is False
+    --autotune               whether to enable autotune, default is False
+    --data_path		         source data of training
+    -h/--help		         show help message
+    "
+    exit 1
+fi
+
+#参数校验，不需要修改
+for para in $*
+do
+    if [[ $para == --precision_mode* ]];then
+        precision_mode=`echo ${para#*=}`
+    elif [[ $para == --over_dump* ]];then
+        over_dump=`echo ${para#*=}`
+        over_dump_path=${cur_path}/output/overflow_dump
+        mkdir -p ${over_dump_path}
+    elif [[ $para == --data_dump_flag* ]];then
+        data_dump_flag=`echo ${para#*=}`
+        data_dump_path=${cur_path}/output/data_dump
+        mkdir -p ${data_dump_path}
+    elif [[ $para == --data_dump_step* ]];then
+        data_dump_step=`echo ${para#*=}`
+    elif [[ $para == --profiling* ]];then
+        profiling=`echo ${para#*=}`
+        profiling_dump_path=${cur_path}/output/profiling
+        mkdir -p ${profiling_dump_path}
+    elif [[ $para == --autotune* ]];then
+        autotune=`echo ${para#*=}`
+        #开autotune特有环境变量
+		autotune=True
+		export autotune=True
+		export REPEAT_TUNE=True
+		export ASCEND_DEVICE_ID=0
+		export ENABLE_TUNE_BANK=True
+		export TE_PARALLEL_COMPILER=32
+        mv $install_path/fwkacllib/data/rl/Ascend910/custom $install_path/fwkacllib/data/rl/Ascend910/custom_bak
+        mv $install_path/fwkacllib/data/tiling/Ascend910/custom $install_path/fwkacllib/data/tiling/Ascend910/custom_bak
+        autotune_dump_path=${cur_path}/output/autotune_dump
+        mkdir -p ${autotune_dump_path}/GA
+        mkdir -p ${autotune_dump_path}/rl
+    elif [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    fi
+done
+
+#校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be confing"
+    exit 1
+fi
+
+#训练开始时间，不需要修改
+start_time=$(date +%s)
+
+#进入训练脚本目录，需要模型审视修改
+cd $cur_path/../
+for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++));
+do
+    #设置环境变量，不需要修改
+    echo "Device ID: $ASCEND_DEVICE_ID"
+    export RANK_ID=$RANK_ID
+    export DEVICE_ID=$RANK_ID
+    
+    #创建DeviceID输出目录，不需要修改
+    if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then
+        rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID}
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    else
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    fi
+
+    
+    #执行训练脚本，以下传参不需要修改，其他需要模型审视修改
+    nohup python3 train_rt.py \
+        --data_url=$data_path \
+        --learning_rate=0.005 \
+        --epochs=$epoch \
+        --batch_size=256 \
+        --hidden=96 \
+        > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+done 
+wait
+
+#训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+#结果打印，不需要修改
+echo "------------------ Final result ------------------"
+#输出性能FPS，需要模型审视修改
+TrainingTime=`grep "time=" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk 'END {print $14}'`
+ActualFPS=`echo "scale=2;${batch_size} / ${TrainingTime}"|bc`
+
+echo "E2E Training Duration sec : $e2e_time"
+
+#稳定性精度看护结果汇总
+#训练用例信息，不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'RT2'_'perf'
+
+ActualLoss=`grep "train_loss" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log| awk 'END {print $4}'`
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
diff --git a/TensorFlow/contrib/nlp/texting/texting_ID0193_for_TensorFlow/train_rt.py b/TensorFlow/contrib/nlp/texting/texting_ID0193_for_TensorFlow/train_rt.py
new file mode 100644
index 000000000..837d0442d
--- /dev/null
+++ b/TensorFlow/contrib/nlp/texting/texting_ID0193_for_TensorFlow/train_rt.py
@@ -0,0 +1,237 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import division
+from __future__ import print_function
+
+import time
+import tensorflow as tf
+#from sklearn import metrics
+#import pickle as pkl
+
+from utils import *
+from models import GNN, MLP
+
+import npu_bridge
+from tensorflow.core.protobuf.rewriter_config_pb2 import RewriterConfig
+
+config = tf.ConfigProto()
+custom_op = config.graph_options.rewrite_options.custom_optimizers.add()
+custom_op.name = "NpuOptimizer"
+custom_op.parameter_map["use_off_line"].b = True
+#custom_op.parameter_map["dynamic_input"].b =True
+#custom_op.parameter_map["dynamic_graph_execute_mode"].s = tf.compat.as_bytes("lazy_recompile")
+config.graph_options.rewrite_options.remapping = RewriterConfig.OFF
+
+# Set random seed
+seed = 123
+np.random.seed(seed)
+tf.set_random_seed(seed)
+
+# Settings
+flags = tf.app.flags
+FLAGS = flags.FLAGS
+flags.DEFINE_string('dataset', 'mr', 'Dataset string.')  # 'mr','ohsumed','R8','R52'
+flags.DEFINE_string('data_url', './data', 'Path to dataset directory.')
+flags.DEFINE_string('train_url', './output', 'Path to output directory.')
+flags.DEFINE_string('model', 'gnn', 'Model string.') 
+flags.DEFINE_float('learning_rate', 0.005, 'Initial learning rate.')
+flags.DEFINE_integer('epochs', 50, 'Number of epochs to train.')
+flags.DEFINE_integer('batch_size', 1024, 'Size of batches per epoch.')
+flags.DEFINE_integer('input_dim', 300, 'Dimension of input.')
+flags.DEFINE_integer('hidden', 96, 'Number of units in hidden layer.') # 32, 64, 96, 128
+flags.DEFINE_integer('steps', 2, 'Number of graph layers.')
+flags.DEFINE_float('dropout', 0.5, 'Dropout rate (1 - keep probability).')
+flags.DEFINE_float('weight_decay', 0, 'Weight for L2 loss on embedding matrix.') # 5e-4
+flags.DEFINE_integer('early_stopping', -1, 'Tolerance for early stopping (# of epochs).')
+flags.DEFINE_integer('max_degree', 3, 'Maximum Chebyshev polynomial degree.') # Not used
+
+
+# Load data
+train_adj, train_feature, train_y, val_adj, val_feature, val_y, test_adj, test_feature, test_y = load_data(FLAGS.dataset, FLAGS.data_url)
+
+max_length = max([len(i) for i in train_adj] + [len(j) for j in val_adj] + [len(k) for k in test_adj])
+
+# Some preprocessing
+print('loading training set')
+train_adj, train_mask = preprocess_adj(train_adj, max_length)
+train_feature = preprocess_features(train_feature, max_length)
+print('loading validation set')
+val_adj, val_mask = preprocess_adj(val_adj, max_length)
+val_feature = preprocess_features(val_feature, max_length)
+print('loading test set')
+test_adj, test_mask = preprocess_adj(test_adj, max_length)
+test_feature = preprocess_features(test_feature, max_length)
+
+
+if FLAGS.model == 'gnn':
+    # support = [preprocess_adj(adj)]
+    # num_supports = 1
+    model_func = GNN
+elif FLAGS.model == 'gcn_cheby': # not used
+    # support = chebyshev_polynomials(adj, FLAGS.max_degree)
+    num_supports = 1 + FLAGS.max_degree
+    model_func = GNN
+elif FLAGS.model == 'dense': # not used
+    # support = [preprocess_adj(adj)]
+    num_supports = 1
+    model_func = MLP
+else:
+    raise ValueError('Invalid argument for model: ' + str(FLAGS.model))
+
+
+# Define placeholders
+placeholders = {
+    'support': tf.placeholder(tf.float32, shape=(None, max_length, max_length)),
+    'features': tf.placeholder(tf.float32, shape=(None, max_length, FLAGS.input_dim)),
+    'mask': tf.placeholder(tf.float32, shape=(None, max_length, 1)),
+    'labels': tf.placeholder(tf.float32, shape=(None, train_y.shape[1])),
+    'dropout': tf.placeholder_with_default(0., shape=()),
+    'num_features_nonzero': tf.placeholder(tf.int32)  # helper variable for sparse dropout
+}
+
+
+# label smoothing
+# label_smoothing = 0.1
+# num_classes = y_train.shape[1]
+# y_train = (1.0 - label_smoothing) * y_train + label_smoothing / num_classes
+
+
+# Create model
+model = model_func(placeholders, input_dim=FLAGS.input_dim, logging=True)
+
+# merged = tf.summary.merge_all()
+# writer = tf.summary.FileWriter('logs/', sess.graph)
+
+# Define model evaluation function
+def evaluate(features, support, mask, labels, placeholders):
+    t_test = time.time()
+    feed_dict_val = construct_feed_dict(features, support, mask, labels, placeholders)
+    outs_val = sess.run([model.loss, model.accuracy, model.embeddings, model.preds, model.labels], feed_dict=feed_dict_val)
+    return outs_val[0], outs_val[1], (time.time() - t_test), outs_val[2], outs_val[3], outs_val[4]
+
+
+cost_val = []
+best_val = 0
+best_epoch = 0
+best_acc = 0
+best_cost = 0
+test_doc_embeddings = None
+preds = None
+labels = None
+#tf.summary.scalar('loss', model.loss)
+#tf.summary.scalar('accuracy', model.accuracy)
+#summary_op = tf.summary.merge_all()
+
+print('train start...')
+# Train model
+# Initialize session
+with tf.Session(config=config) as sess:
+    # Init variables
+    init_op = tf.group(tf.local_variables_initializer(), tf.global_variables_initializer())
+    sess.run(init_op)
+    #train_writer = tf.summary.FileWriter(logdir=os.path.join(FLAGS.train_url, "train"), graph=sess.graph)
+    #test_writer = tf.summary.FileWriter(logdir=os.path.join(FLAGS.train_url, "test"), graph=sess.graph)
+
+    for epoch in range(FLAGS.epochs):
+        t = time.time()
+        
+        # Training step
+        indices = np.arange(0, len(train_y))
+        np.random.shuffle(indices)
+    
+        train_loss, train_acc = 0, 0
+        for start in range(0, len(train_y), FLAGS.batch_size):
+            end = start + FLAGS.batch_size
+            idx = indices[start:end]
+            # Construct feed dictionary
+            feed_dict = construct_feed_dict(train_feature[idx], train_adj[idx], train_mask[idx], train_y[idx], placeholders)
+            feed_dict.update({placeholders['dropout']: FLAGS.dropout})
+
+            outs = sess.run([model.opt_op, model.loss, model.accuracy], feed_dict=feed_dict)
+            train_loss += outs[1]*len(idx)
+            train_acc += outs[2]*len(idx)
+        train_loss /= len(train_y)
+        train_acc /= len(train_y)
+        #train_writer.add_summary(outs[3], epoch)
+
+        # Validation
+        val_cost, val_acc, val_duration, _, _, _ = evaluate(val_feature, val_adj, val_mask, val_y, placeholders)
+        cost_val.append(val_cost)
+    
+        # Test
+        test_cost, test_acc, test_duration, embeddings, pred, labels = evaluate(test_feature, test_adj, test_mask, test_y, placeholders)
+        #test_writer.add_summary(summary, epoch)
+
+        #if val_acc >= best_val:
+        #    best_val = val_acc
+        #    best_epoch = epoch
+        best_acc = test_acc
+        best_cost = test_cost
+        test_doc_embeddings = embeddings
+        preds = pred
+        #test_writer.add_summary(summary=summary, global_step=epoch)
+
+        # Print results
+        print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(train_loss),
+              "train_acc=", "{:.5f}".format(train_acc), "val_loss=", "{:.5f}".format(val_cost),
+              "val_acc=", "{:.5f}".format(val_acc), "test_acc=", "{:.5f}".format(test_acc), 
+              "time=", "{:.5f}".format(time.time() - t))
+
+        if FLAGS.early_stopping > 0 and epoch > FLAGS.early_stopping and cost_val[-1] > np.mean(cost_val[-(FLAGS.early_stopping+1):-1]):
+            print("Early stopping...")
+            break
+
+    #train_writer.close()
+    #test_writer.close()
+    print("Optimization Finished!")
+
+    # Best results
+    #print('Best epoch:', best_epoch)
+    print("Test set results:", "cost=", "{:.5f}".format(best_cost),
+          "accuracy=", "{:.5f}".format(best_acc))
+
+    #print("Test Precision, Recall and F1-Score...")
+    #print(metrics.classification_report(labels, preds, digits=4))
+    #print("Macro average Test Precision, Recall and F1-Score...")
+    #print(metrics.precision_recall_fscore_support(labels, preds, average='macro'))
+    #print("Micro average Test Precision, Recall and F1-Score...")
+    #print(metrics.precision_recall_fscore_support(labels, preds, average='micro'))
+
+'''
+# For visualization
+doc_vectors = []
+for i in range(len(test_doc_embeddings)):
+    doc_vector = test_doc_embeddings[i]
+    doc_vector_str = ' '.join([str(x) for x in doc_vector])
+    doc_vectors.append(str(np.argmax(test_y[i])) + ' ' + doc_vector_str)
+
+doc_embeddings_str = '\n'.join(doc_vectors)
+with open('data/' + FLAGS.dataset + '_doc_vectors.txt', 'w'):
+    f.write(doc_embeddings_str)
+'''
-- 
Gitee


From 5cb0d04f0640d40bd4204c8989d5324f2544497a Mon Sep 17 00:00:00 2001
From: ykxia <xiayankang@outlook.com>
Date: Mon, 21 Nov 2022 19:23:44 +0800
Subject: [PATCH 4/5] =?UTF-8?q?Oct-ResNet=5FID0251=5Ffor=5FTensorFlow?=
 =?UTF-8?q?=E4=BD=BF=E8=83=BDRT2=E4=BA=8C=E8=BF=9B=E5=88=B6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../test/train_RT2_performance_1p.sh          | 191 ++++++++++++++++++
 1 file changed, 191 insertions(+)
 create mode 100644 TensorFlow/built-in/cv/image_classification/Oct-ResNet_ID0251_for_TensorFlow/test/train_RT2_performance_1p.sh

diff --git a/TensorFlow/built-in/cv/image_classification/Oct-ResNet_ID0251_for_TensorFlow/test/train_RT2_performance_1p.sh b/TensorFlow/built-in/cv/image_classification/Oct-ResNet_ID0251_for_TensorFlow/test/train_RT2_performance_1p.sh
new file mode 100644
index 000000000..892421fb5
--- /dev/null
+++ b/TensorFlow/built-in/cv/image_classification/Oct-ResNet_ID0251_for_TensorFlow/test/train_RT2_performance_1p.sh
@@ -0,0 +1,191 @@
+#!/bin/bash
+
+#当前路径,不需要修改
+cur_path=`pwd`
+
+#集合通信参数,不需要修改
+
+export RANK_SIZE=1
+export JOB_ID=10087
+RANK_ID_START=0
+#使能RT2.0
+export ENABLE_RUNTIME_V2=1
+# 数据集路径,保持为空,不需要修改
+data_path=""
+
+#设置默认日志级别,不需要修改
+#export ASCEND_GLOBAL_LOG_LEVEL=3
+
+#基础参数，需要模型审视修改
+#网络名称，同目录名称
+Network="Oct-ResNet_ID0251_for_TensorFlow"
+#训练epoch
+train_epochs=3
+#训练step
+train_steps=1000
+#训练batch_size
+batch_size=32
+#学习率
+learning_rate=1e-3
+
+#维测参数，precision_mode需要模型审视修改
+precision_mode="allow_mix_precision"
+#维持参数，以下不需要修改
+over_dump=False
+data_dump_flag=False
+data_dump_step="10"
+profiling=False
+
+# 帮助信息，不需要修改
+if [[ $1 == --help || $1 == -h ]];then
+    echo"usage:./train_performance_1P.sh <args>"
+    echo " "
+    echo "parameter explain:
+    --precision_mode         precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision)
+    --over_dump              if or not over detection, default is False
+    --data_dump_flag         data dump flag, default is False
+    --data_dump_step         data dump step, default is 10
+    --profiling              if or not profiling for performance debug, default is False
+    --data_path              source data of training
+    --max_step               # of step for training
+    --learning_rate          learning rate
+    --batch                  batch size
+    --modeldir               model dir
+    --save_interval          save interval for ckpt
+    --loss_scale             enable loss scale ,default is False
+    -h/--help                show help message
+    "
+    exit 1
+fi
+
+#参数校验，不需要修改
+for para in $*
+do
+    if [[ $para == --precision_mode* ]];then
+        precision_mode=`echo ${para#*=}`
+    elif [[ $para == --over_dump* ]];then
+        over_dump=`echo ${para#*=}`
+        over_dump_path=${cur_path}/output/overflow_dump
+        mkdir -p ${over_dump_path}
+    elif [[ $para == --data_dump_flag* ]];then
+        data_dump_flag=`echo ${para#*=}`
+        data_dump_path=${cur_path}/output/data_dump
+        mkdir -p ${data_dump_path}
+    elif [[ $para == --data_dump_step* ]];then
+        data_dump_step=`echo ${para#*=}`
+    elif [[ $para == --profiling* ]];then
+        profiling=`echo ${para#*=}`
+        profiling_dump_path=${cur_path}/output/profiling
+        mkdir -p ${profiling_dump_path}
+    elif [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    elif [[ $para == --max_step* ]];then
+        train_steps=`echo ${para#*=}`
+    elif [[ $para == --learning_rate* ]];then
+        learning_rate=`echo ${para#*=}`
+    elif [[ $para == --batch* ]];then
+        batch_size=`echo ${para#*=}`
+    elif [[ $para == --modeldir* ]];then
+        modeldir=`echo ${para#*=}`
+    elif [[ $para == --save_interval* ]];then
+        save_interval=`echo ${para#*=}`
+    elif [[ $para == --loss_scale* ]];then
+        loss_scale=`echo ${para#*=}`
+    fi
+done
+
+#校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be config"
+    exit 1
+fi
+tmp="/root/.keras/datasets"
+
+if [ ! -f $tmp/cifar-10-python.tar.gz ];then
+	cp $data_path/cifar-10-python.tar.gz $tmp
+	#mv $tmp/cifar-10-python.tar.gz $tmp/cifar-10-batches-py.tar.gz
+	chmod 700 $tmp/cifar-10-python.tar.gz
+fi
+
+if [ ! -f $tmp/cifar-10-batches-py.tar.gz ];then
+	cp $data_path/cifar-10-batches-py.tar.gz $tmp
+	chmod 700 $tmp/cifar-10-batches-py.tar.gz
+fi
+
+
+#############执行训练#########################
+#训练开始时间，不需要修改
+start_time=$(date +%s)
+
+#进入训练脚本目录，需要模型审视修改
+cd ${cur_path}/../
+for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++));
+do
+    #设置环境变量，不需要修改
+    echo "Device ID: $ASCEND_DEVICE_ID"
+    export RANK_ID=$RANK_ID
+
+    #创建DeviceID输出目录，不需要修改
+    if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then
+        rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID}
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    else
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    fi
+
+    #执行训练脚本，以下传参不需要修改，其他需要模型审视修改
+    #--data_path, --model_dir, --precision_mode, --precision_mode, --over_dump, --over_dump_path，--data_dump_flag，--data_dump_step，--data_dump_path，--profiling，--profiling_dump_path，--autotune
+    nohup python3 train-npu.py 1 3 > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+if [ $? -ne 0 ];then
+  exit 1
+fi
+done
+wait
+
+#训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+#结果打印，不需要修改
+echo "------------------ Final result ------------------"
+#输出性能FPS，需要模型审视修改
+FPS=`cat ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | grep "FPS:" | awk -F "FPS: " '{print $2}' | awk -F 'loss:' '{print $1}' | tail -n +2 | awk '{sum+=$1} END {print sum/NR}'`
+TrainingTime=`awk 'BEGIN {printf "%.2f\n",1000*'${batch_size}'/'${FPS}'}'`
+#打印，不需要修改
+echo "Final Performance images/sec : $FPS"
+
+#输出训练精度,需要模型审视修改
+train_accuracy=`grep "acc =" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log | tail -n 1 | awk -F " " '{print $9}'`
+#打印，不需要修改
+echo "Final Train Accuracy : ${train_accuracy}"
+echo "E2E Training Duration sec : $e2e_time"
+
+#性能看护结果汇总
+#训练用例信息，不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'RT2'_'perf'
+
+##获取性能数据，不需要修改
+#吞吐量
+ActualFPS=${FPS}
+
+##清除生成文件
+rm -rf $cur_path/../model_dir/*
+
+#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型审视
+#cat $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log | tr -d '\b\r' | grep -Eo "loss: [0-9]*\.[0-9]*" | awk -F " " '{print $2}' > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}_loss.txt
+#最后一个迭代loss值，不需要修改
+ActualLoss=`grep "Loss for final step:" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log | tail -n 1 | awk -F " " '{print $5}'`
+ActualLoss=${ActualLoss%?}
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
-- 
Gitee


From 3cbf5a57209b3685e33662161a770f2dc013482d Mon Sep 17 00:00:00 2001
From: ykxia <xiayankang@outlook.com>
Date: Mon, 21 Nov 2022 19:30:18 +0800
Subject: [PATCH 5/5] =?UTF-8?q?ResUNetplusplus=5FID0275=5Ffor=5FTensorflow?=
 =?UTF-8?q?=E4=BD=BF=E8=83=BDRT2=E4=BA=8C=E8=BF=9B=E5=88=B6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../run_rt.py                                 | 183 ++++++++++++++++++
 .../test/train_RT2_performance_1p.sh          | 174 +++++++++++++++++
 2 files changed, 357 insertions(+)
 create mode 100644 TensorFlow/built-in/cv/image_segmentation/ResUNetplusplus_ID0275_for_Tensorflow/run_rt.py
 create mode 100644 TensorFlow/built-in/cv/image_segmentation/ResUNetplusplus_ID0275_for_Tensorflow/test/train_RT2_performance_1p.sh

diff --git a/TensorFlow/built-in/cv/image_segmentation/ResUNetplusplus_ID0275_for_Tensorflow/run_rt.py b/TensorFlow/built-in/cv/image_segmentation/ResUNetplusplus_ID0275_for_Tensorflow/run_rt.py
new file mode 100644
index 000000000..b9a40a66c
--- /dev/null
+++ b/TensorFlow/built-in/cv/image_segmentation/ResUNetplusplus_ID0275_for_Tensorflow/run_rt.py
@@ -0,0 +1,183 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import numpy as np
+import cv2
+from glob import glob
+import tensorflow as tf
+from tensorflow.keras.metrics import Precision, Recall, MeanIoU
+from tensorflow.keras.optimizers import Adam, Nadam, SGD, Adagrad
+from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, CSVLogger, TensorBoard
+from npu_bridge.npu_init import *
+from tensorflow.python.keras import backend as K
+from time import time
+import random
+import argparse
+import precision_tool.tf_config as npu_tf_config
+from tensorflow.keras.losses import binary_crossentropy
+from data_generator import DataGen
+from unet import Unet
+from resunet import ResUnet
+from m_resunet import ResUnetPlusPlus
+from metrics import dice_coef, dice_loss
+
+random.seed(0)
+tf.random.set_random_seed(0)
+np.random.seed(0)
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--save_dir', dest='save_dir', default='./test/out/checkpoints')
+parser.add_argument('--data_path', dest='data_path', default='./new_data/Kvasir-SEG', help='path of the dataset')
+parser.add_argument('--precision_mode', dest='precision_mode', default='allow_mix_precision', help='precision mode')
+parser.add_argument('--over_dump', dest='over_dump', default='False', help='if or not over detection')
+parser.add_argument('--over_dump_path', dest='over_dump_path', default='./overdump', help='over dump path')
+parser.add_argument('--data_dump_flag', dest='data_dump_flag', default='False', help='data dump flag')
+parser.add_argument('--data_dump_step', dest='data_dump_step', default='10', help='data dump step')
+parser.add_argument('--data_dump_path', dest='data_dump_path', default='./datadump', help='data dump path')
+parser.add_argument('--profiling', dest='profiling', default='False', help='if or not profiling for performance debug')
+parser.add_argument('--profiling_dump_path', dest='profiling_dump_path', default='./profiling', help='profiling path')
+parser.add_argument('--autotune', dest='autotune', default='False', help='whether to enable autotune, default is False')
+parser.add_argument('--npu_loss_scale', dest='npu_loss_scale', type=int, default=1)
+parser.add_argument('--mode', dest='mode', default='train', choices=('train', 'test', 'train_and_eval'))
+parser.add_argument('--batch_size', dest='batch_size', type=int, default=8)
+parser.add_argument('--learning_rate', dest='learning_rate', type=float, default=0.001)
+parser.add_argument('--num_epochs', dest='num_epochs', type=int, default=100)
+args = parser.parse_args()
+
+# os.environ["DUMP_GE_GRAPH"] = "2"
+# os.environ["DUMP_GRAPH_LEVEL"] = "2"
+# os.environ["ASCEND_GLOBAL_LOG_LEVEL"] = "0"
+# os.environ["EXPERIMENTAL_DYNAMIC_PARTITION"] = "1"
+
+sess_config = tf.ConfigProto()
+sess_config = npu_tf_config.session_dump_config(sess_config, action='fusion_switch')
+sess_config.allow_soft_placement = True
+sess_config.log_device_placement = False
+sess_config.graph_options.rewrite_options.remapping = RewriterConfig.OFF
+sess_config.graph_options.rewrite_options.memory_optimization = RewriterConfig.OFF
+sess_config.graph_options.optimizer_options.global_jit_level = config_pb2.OptimizerOptions.OFF
+
+custom_op = sess_config.graph_options.rewrite_options.custom_optimizers.add()
+custom_op.name = "NpuOptimizer"
+#custom_op.parameter_map["dynamic_input"].b = True
+#custom_op.parameter_map["dynamic_graph_execute_mode"].s = tf.compat.as_bytes("lazy_recompile")
+custom_op.parameter_map["precision_mode"].s = tf.compat.as_bytes("allow_mix_precision")
+
+# custom_op.parameter_map["variable_memory_max_size"].s = tf.compat.as_bytes(str(8*1024 * 1024 * 1024))
+# custom_op.parameter_map["dump_path"].s = tf.compat.as_bytes("/home/ma-user/work/rupp/data_dump")
+# custom_op.parameter_map["enable_dump_debug"].b = True
+# custom_op.parameter_map["dump_debug_mode"].s = tf.compat.as_bytes("all")
+# custom_op.parameter_map["enable_dump"].b = True
+# custom_op.parameter_map["dump_step"].s = tf.compat.as_bytes("0")
+# custom_op.parameter_map["dump_mode"].s = tf.compat.as_bytes("all")
+
+sess = tf.Session(config=sess_config)
+K.set_session(sess)
+
+if __name__ == "__main__":
+    ## Path
+    file_path = "files/"
+    model_path = file_path + "resunetplusplus.h5"
+
+    ## Create files folder
+    try:
+        os.makedirs(file_path)
+    except:
+        pass
+
+    train_path = os.path.join(args.data_path, "train")
+    valid_path = os.path.join(args.data_path, "valid")
+
+    ## Training
+    train_image_paths = glob(os.path.join(train_path, "images", "*"))
+    train_mask_paths = glob(os.path.join(train_path, "masks", "*"))
+    train_image_paths.sort()
+    train_mask_paths.sort()
+
+    # train_image_paths = train_image_paths[:2000]
+    # train_mask_paths = train_mask_paths[:2000]
+
+    ## Validation
+    valid_image_paths = glob(os.path.join(valid_path, "images", "*"))
+    valid_mask_paths = glob(os.path.join(valid_path, "masks", "*"))
+    valid_image_paths.sort()
+    valid_mask_paths.sort()
+
+    ## Parameters
+    image_size = 256
+    batch_size = args.batch_size
+    lr = 1e-5
+    epochs = args.num_epochs
+
+    train_steps = len(train_image_paths) // batch_size
+    valid_steps = len(valid_image_paths) // batch_size
+
+    ## Generator
+    train_gen = DataGen(image_size, train_image_paths, train_mask_paths, batch_size=batch_size)
+    valid_gen = DataGen(image_size, valid_image_paths, valid_mask_paths, batch_size=batch_size)
+
+    ## Unet
+    # arch = Unet(input_size=image_size)
+    # model = arch.build_model()
+
+    ## ResUnet
+    # arch = ResUnet(input_size=image_size)
+    # model = arch.build_model()
+
+    ## ResUnet++
+    arch = ResUnetPlusPlus(input_size=image_size)
+    model = arch.build_model()
+    opt = Nadam(lr)
+    '''
+    # 因为在2021-12版本Keras还不支持Loss Scale，所以不这个方法不行。
+    # opt = tf.train.AdamOptimizer(lr, name='AdamOptimizer')
+    # loss_scale_manager = ExponentialUpdateLossScaleManager(init_loss_scale=2**32, incr_every_n_steps=1000, decr_every_n_nan_or_inf=2, decr_ratio=0.5)
+    # opt= NPULossScaleOptimizer(opt, loss_scale_manager)
+    '''
+    metrics = [Recall(), Precision(), dice_coef, MeanIoU(num_classes=2)]
+    # binary_crossentropy dice_loss
+    model.compile(loss=dice_loss, optimizer=opt, metrics=metrics)
+
+    csv_logger = CSVLogger(f"{file_path}unet_{batch_size}.csv", append=False)
+    checkpoint = ModelCheckpoint(model_path, verbose=1, save_best_only=True, monitor="val_precision", mode="max")
+    reduce_lr = ReduceLROnPlateau(monitor='val_precision', factor=0.1, patience=10, min_lr=1e-6, verbose=1,mode="max")
+    early_stopping = EarlyStopping(monitor='val_precision', patience=30, restore_best_weights=False,mode="max")
+    tb = TensorBoard(log_dir=file_path, write_grads=True, histogram_freq=0, update_freq=100)
+    callbacks = [checkpoint, early_stopping, reduce_lr, tb, csv_logger]
+    StartTime1 = time()  # time add
+    model.fit_generator(train_gen,
+                        validation_data=valid_gen,
+                        steps_per_epoch=train_steps,
+                        validation_steps=valid_steps,
+                        epochs=epochs,
+                        callbacks=callbacks)
+    EndTime1 = time()  # time add
+    print('-------All epoch time : ' + str(EndTime1 - StartTime1))  # time add,Performance calculation requires division
+    print('-------Use all time average steps : ' + str(
+        (train_steps + valid_steps)* args.batch_size /(EndTime1 - StartTime1)))  # time add,Performance calculation requires division
diff --git a/TensorFlow/built-in/cv/image_segmentation/ResUNetplusplus_ID0275_for_Tensorflow/test/train_RT2_performance_1p.sh b/TensorFlow/built-in/cv/image_segmentation/ResUNetplusplus_ID0275_for_Tensorflow/test/train_RT2_performance_1p.sh
new file mode 100644
index 000000000..d073d82c0
--- /dev/null
+++ b/TensorFlow/built-in/cv/image_segmentation/ResUNetplusplus_ID0275_for_Tensorflow/test/train_RT2_performance_1p.sh
@@ -0,0 +1,174 @@
+#!/bin/bash
+
+#当前路径,不需要修改
+cur_path=`pwd`
+
+#集合通信参数,不需要修改
+
+export RANK_SIZE=1
+export JOB_ID=10087
+RANK_ID_START=0
+
+#使能RT2.0
+export ENABLE_RUNTIME_V2=1
+# 数据集路径,保持为空,不需要修改
+data_path=""
+
+#基础参数，需要模型审视修改
+#网络名称，同目录名称
+Network="ResUNetplusplus_ID0275_for_Tensorflow"
+#训练epoch
+train_epochs=1
+#训练batch_size
+batch_size=8
+#学习率
+learning_rate=0.00001
+#训练模式
+mode="train_and_eval"
+npu_loss_scale=1
+
+#维测参数，precision_mode需要模型审视修改
+precision_mode="allow_mix_precision"
+#维持参数，以下不需要修改
+over_dump=False
+data_dump_flag=False
+data_dump_step="10"
+profiling=False
+autotune=False
+
+# 帮助信息，不需要修改
+if [[ $1 == --help || $1 == -h ]];then
+    echo"usage:./train_performance_1P.sh <args>"
+    echo " "
+    echo "parameter explain:
+    --precision_mode         precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision)
+    --over_dump		           if or not over detection, default is False
+    --data_dump_flag		     data dump flag, default is False
+    --data_dump_step		     data dump step, default is 10
+    --profiling		           if or not profiling for performance debug, default is False
+    --data_path		           source data of training
+    -h/--help		             show help message
+    "
+    exit 1
+fi
+
+#参数校验，不需要修改
+for para in $*
+do
+    if [[ $para == --precision_mode* ]];then
+        precision_mode=`echo ${para#*=}`
+    elif [[ $para == --over_dump* ]];then
+        over_dump=`echo ${para#*=}`
+        over_dump_path=${cur_path}/output/overflow_dump
+        mkdir -p ${over_dump_path}
+    elif [[ $para == --data_dump_flag* ]];then
+        data_dump_flag=`echo ${para#*=}`
+        data_dump_path=${cur_path}/output/data_dump
+        mkdir -p ${data_dump_path}
+    elif [[ $para == --data_dump_step* ]];then
+        data_dump_step=`echo ${para#*=}`
+    elif [[ $para == --profiling* ]];then
+        profiling=`echo ${para#*=}`
+        profiling_dump_path=${cur_path}/output/profiling
+        mkdir -p ${profiling_dump_path}
+    elif [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    elif [[ $para == --learning_rate* ]];then
+        learning_rate=`echo ${para#*=}`
+    elif [[ $para == --mode* ]];then
+        mode=`echo ${para#*=}`
+    elif [[ $para == --npu_loss_scale* ]];then
+        npu_loss_scale=`echo ${para#*=}`
+    fi
+done
+
+#校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be confing"
+    exit 1
+fi
+
+
+#训练开始时间，不需要修改
+start_time=$(date +%s)
+
+#进入训练脚本目录，需要模型审视修改
+cd $cur_path/..
+for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++));
+do
+    #设置环境变量，不需要修改
+    echo "Device ID: $ASCEND_DEVICE_ID"
+    export RANK_ID=$RANK_ID
+
+
+
+    #创建DeviceID输出目录，不需要修改
+    if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then
+        rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID}
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    else
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    fi
+    
+    #执行训练脚本，以下传参不需要修改，其他需要模型审视修改
+    #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path，--data_dump_flag，--data_dump_step，--data_dump_path，--profiling，--profiling_dump_path
+    nohup python3 run_rt.py \
+        --data_path=${data_path} \
+        --num_epochs=$train_epochs \
+        --save_dir=${cur_path}/output/$ASCEND_DEVICE_ID/ckpt > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+done 
+wait
+
+#训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+#结果打印，不需要修改
+echo "------------------ Final result ------------------"
+#输出性能FPS，需要模型审视修改
+#time=(`grep -r "Time: " $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F '(' '{print $NF}' | cut -d ')' -f 1`)
+#i=${#time[*]}
+#train_time=`echo "${time[i-1]} ${time[1]} $i"|awk '{print ($1-$2)*10/($3-2)}'`
+train_time=(`grep -r "All epoch time" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk '{print $5}'`)
+epochsNum=(`grep -r "Epoch 00" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk 'END {print $2}'|awk -F : '{print $1}'`)
+averageFPS=(`grep -r "Use all time average steps" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk '{print $7}'`)
+#FPS=`echo "$batch_size $train_time"|awk '{print $1*1000/$2}'`
+FPS=`echo "$averageFPS $epochsNum"|awk '{print $1*$2}'`
+#打印，不需要修改,本模型是打印每个epoch耗时
+echo "Final Performance images/sec : $FPS"
+
+#输出训练精度,需要模型审视修改
+#train_accuracy=(`grep -r "val_precision did not improve from" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk 'END {print $8}'`)
+#打印，不需要修改
+#echo "Final Train Accuracy : ${train_accuracy}"
+echo "E2E Training Duration sec : $e2e_time"
+
+#性能看护结果汇总
+#训练用例信息，不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'RT2'_'perf'
+
+##获取性能数据，不需要修改
+#吞吐量
+ActualFPS=${FPS}
+#单迭代训练时长
+TrainingTime=$train_time
+
+#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型审视
+grep -r "loss:" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk 'END {print $8}' >> $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
+
+#最后一个迭代loss值，不需要修改
+ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+#echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
-- 
Gitee