From 8e653e9f4ab3730857580d0e7b0a149385710b08 Mon Sep 17 00:00:00 2001 From: ykxia Date: Mon, 21 Nov 2022 17:23:20 +0800 Subject: [PATCH 1/5] =?UTF-8?q?OpenPose=5FID0117=5Ffor=5FTensorFlow?= =?UTF-8?q?=E4=BD=BF=E8=83=BDRT2.0=E4=BA=8C=E8=BF=9B=E5=88=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../Action/training/train_rt.py | 268 ++++++++++++++++++ .../test/train_RT2_performance_1p.sh | 205 ++++++++++++++ 2 files changed, 473 insertions(+) create mode 100644 TensorFlow/built-in/cv/detection/OpenPose_ID0117_for_TensorFlow/Action/training/train_rt.py create mode 100644 TensorFlow/built-in/cv/detection/OpenPose_ID0117_for_TensorFlow/test/train_RT2_performance_1p.sh diff --git a/TensorFlow/built-in/cv/detection/OpenPose_ID0117_for_TensorFlow/Action/training/train_rt.py b/TensorFlow/built-in/cv/detection/OpenPose_ID0117_for_TensorFlow/Action/training/train_rt.py new file mode 100644 index 000000000..ecb86722b --- /dev/null +++ b/TensorFlow/built-in/cv/detection/OpenPose_ID0117_for_TensorFlow/Action/training/train_rt.py @@ -0,0 +1,268 @@ +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from npu_bridge.npu_init import * +import pandas as pd +from enum import Enum +import numpy as np +from sklearn.preprocessing import LabelEncoder +from sklearn.model_selection import train_test_split +from keras.utils import np_utils +from keras.models import Sequential +from keras.layers import Dense, Dropout +from keras.layers.normalization import BatchNormalization +from keras.optimizers import Adam +from keras.models import load_model + +import matplotlib.pyplot as plt +from keras.callbacks import Callback +import itertools +from sklearn.metrics import confusion_matrix +#*****npu modify begin***** +import tensorflow as tf +from keras import backend as K +from npu_bridge.npu_init import * + +import argparse +parser = argparse.ArgumentParser() +parser.add_argument('--data_path', dest='data_path', default='./data/', help='path of the dataset') +parser.add_argument('--precision_mode', dest='precision_mode', default='allow_mix_precision', help='precision mode') +parser.add_argument('--over_dump', dest='over_dump', default='False', help='if or not over detection') +parser.add_argument('--over_dump_path', dest='over_dump_path', default='./overdump', help='over dump path') +parser.add_argument('--data_dump_flag', dest='data_dump_flag', default='False', help='data dump flag') +parser.add_argument('--data_dump_step', dest='data_dump_step', default='10', help='data dump step') +parser.add_argument('--data_dump_path', dest='data_dump_path', default='./datadump', help='data dump path') +parser.add_argument('--profiling', dest='profiling', default='False', help='if or not profiling for performance debug') +parser.add_argument('--profiling_dump_path', dest='profiling_dump_path', default='./profiling', help='profiling path') +parser.add_argument('--autotune', dest='autotune', default='False', help='whether to enable autotune, default is False') + +parser.add_argument('--train_epoch', dest='train_epoch', type=int, default=2000, help='# of step for training') +parser.add_argument('--modeldir', dest='modeldir', default='./ckpt', help='ckpt dir') +parser.add_argument('--learning_rate', dest='learning_rate', type=float, default=0.0001, help='learning rate') +parser.add_argument('--batch_size', dest='batch_size', type=int, default=64, help='# images in batch') + +parser.add_argument("--dynamic_input", type=str, default='1', help="--dynamic_input=1 Use fuzzy compilation. --dynamic_input=lazy_recompile Compile using lazy static graph") +args = parser.parse_args() + + +def npu_keras_optimizer(opt): + npu_opt = KerasDistributeOptimizer(opt) + return npu_opt +#*****npu modify end***** + +class Actions(Enum): + # framewise_recognition.h5 + # squat = 0 + # stand = 1 + # walk = 2 + # wave = 3 + + # framewise_recognition_under_scene.h5 + stand = 0 + walk = 1 + operate = 2 + fall_down = 3 + # run = 4 + + +# Callback class to visialize training progress +class LossHistory(Callback): + def on_train_begin(self, logs={}): + self.losses = {'batch':[], 'epoch':[]} + self.accuracy = {'batch':[], 'epoch':[]} + self.val_loss = {'batch':[], 'epoch':[]} + self.val_acc = {'batch':[], 'epoch':[]} + + def on_batch_end(self, batch, logs={}): + self.losses['batch'].append(logs.get('loss')) + self.accuracy['batch'].append(logs.get('acc')) + self.val_loss['batch'].append(logs.get('val_loss')) + self.val_acc['batch'].append(logs.get('val_acc')) + + def on_epoch_end(self, batch, logs={}): + self.losses['epoch'].append(logs.get('loss')) + self.accuracy['epoch'].append(logs.get('acc')) + self.val_loss['epoch'].append(logs.get('val_loss')) + self.val_acc['epoch'].append(logs.get('val_acc')) + + def loss_plot(self, loss_type): + iters = range(len(self.losses[loss_type])) + plt.figure() + # acc + plt.plot(iters, self.accuracy[loss_type], 'r', label='train acc') + # loss + plt.plot(iters, self.losses[loss_type], 'g', label='train loss') + if loss_type == 'epoch': + # val_acc + plt.plot(iters, self.val_acc[loss_type], 'b', label='val acc') + # val_loss + plt.plot(iters, self.val_loss[loss_type], 'k', label='val loss') + plt.grid(True) + plt.xlabel(loss_type) + plt.ylabel('acc-loss') + plt.legend(loc="upper right") + plt.show() + + +def plot_confusion_matrix(cm, classes, + normalize=False, + title='Confusion matrix', + cmap=plt.cm.Blues): + """ + This function prints and plots the confusion matrix. + Normalization can be applied by setting `normalize=True`. + """ + if normalize: + cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] + print("Normalized confusion matrix") + else: + print('Confusion matrix, without normalization') + + print(cm) + + plt.imshow(cm, interpolation='nearest', cmap=cmap) + plt.title(title) + plt.colorbar() + tick_marks = np.arange(len(classes)) + plt.xticks(tick_marks, classes, rotation=45) + plt.yticks(tick_marks, classes) + + fmt = '.2f' if normalize else 'd' + thresh = cm.max() / 2. + for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])): + plt.text(j, i, format(cm[i, j], fmt), + horizontalalignment="center", + color="white" if cm[i, j] > thresh else "black") + + plt.tight_layout() + plt.ylabel('True label') + plt.xlabel('Predicted label') + + +# load data +datapath = ("%s/data_with_scene.csv" %(args.data_path)) +raw_data = pd.read_csv(datapath, header=0) +dataset = raw_data.values +# X = dataset[:, 0:36].astype(float) +# Y = dataset[:, 36] +X = dataset[0:3289, 0:36].astype(float) # 忽略run数据 +Y = dataset[0:3289, 36] + +# 将类别编码为数字 +# encoder = LabelEncoder() +# encoder_Y = encoder.fit_transform(Y) +# print(encoder_Y[0], encoder_Y[900], encoder_Y[1800], encoder_Y[2700]) +# encoder_Y = [0]*744 + [1]*722 + [2]*815 + [3]*1008 + [4]*811 +encoder_Y = [0]*744 + [1]*722 + [2]*815 + [3]*1008 +# one hot 编码 +dummy_Y = np_utils.to_categorical(encoder_Y) + +# train test split +X_train, X_test, Y_train, Y_test = train_test_split(X, dummy_Y, test_size=0.1, random_state=9) + +# build keras model +model = Sequential() +model.add(Dense(units=128, activation='relu')) +model.add(BatchNormalization()) +model.add(Dense(units=64, activation='relu')) +model.add(BatchNormalization()) +model.add(Dense(units=16, activation='relu')) +model.add(BatchNormalization()) +model.add(Dense(units=4, activation='softmax')) # units = nums of classes + +# training +#*****npu modify begin***** +sess_config = tf.ConfigProto() +custom_op = sess_config.graph_options.rewrite_options.custom_optimizers.add() +custom_op.name = "NpuOptimizer" +#custom_op.parameter_map["dynamic_input"].b = True +#if args.dynamic_input == "lazy_recompile": +# custom_op.parameter_map["dynamic_graph_execute_mode"].s = tf.compat.as_bytes("lazy_recompile") +#elif args.dynamic_input == "1": +# custom_op.parameter_map["dynamic_graph_execute_mode"].s = tf.compat.as_bytes("dynamic_execute") +#else: +# print("Enter correct compilation parameters.") +#custom_op.parameter_map["dynamic_graph_execute_mode"].s = tf.compat.as_bytes("lazy_recompile") +custom_op.parameter_map["precision_mode"].s = tf.compat.as_bytes(args.precision_mode) +if args.data_dump_flag.strip() == "True": + custom_op.parameter_map["enable_dump"].b = True + custom_op.parameter_map["dump_path"].s = tf.compat.as_bytes(args.data_dump_path) + custom_op.parameter_map["dump_step"].s = tf.compat.as_bytes(args.data_dump_step) + custom_op.parameter_map["dump_mode"].s = tf.compat.as_bytes("all") +if args.over_dump.strip() == "True": + # dump_path:dump数据存放路径,该参数指定的目录需要在启动训练的环境上(容器或Host侧)提前创建且确保安装时配置的运行用户具有读写权限 + custom_op.parameter_map["dump_path"].s = tf.compat.as_bytes(args.over_dump_path) + # enable_dump_debug:是否开启溢出检测功能 + custom_op.parameter_map["enable_dump_debug"].b = True + # dump_debug_mode:溢出检测模式,取值:all/aicore_overflow/atomic_overflow + custom_op.parameter_map["dump_debug_mode"].s = tf.compat.as_bytes("all") +if args.profiling.strip() == "True": + custom_op.parameter_map["profiling_mode"].b = False + profilingvalue = ( + '{"output":"%s","training_trace":"on","task_trace":"on","aicpu":"on","fp_point":"","bp_point":""}' % ( + args.profiling_dump_path)) + custom_op.parameter_map["profiling_options"].s = tf.compat.as_bytes(profilingvalue) +sess_config.graph_options.rewrite_options.remapping = RewriterConfig.OFF +sess = tf.Session(config=sess_config) +K.set_session(sess) +#*****npu modify end***** + +his = LossHistory() +model.compile(loss='categorical_crossentropy', optimizer=Adam(args.learning_rate), metrics=['accuracy']) +model.fit(X_train, Y_train, batch_size=args.batch_size, epochs=args.train_epoch, verbose=1, validation_data=(X_test, Y_test), callbacks=[his]) +model.summary() +his.loss_plot('epoch') + +#*****npu modify begin***** +print('====save model====') +os.makedirs(args.modeldir, exist_ok=True) +ckptparams = ("%s/model_weights.h5" %(args.modeldir)) +ckptall = ("%s/model.h5" %(args.modeldir)) +model.save_weights(ckptparams) +model.save(ckptall) +sess.close() +#*****npu modify end***** + +# model.save('framewise_recognition.h5') + +# # evaluate and draw confusion matrix +# print('Test:') +# score, accuracy = model.evaluate(X_test,Y_test,batch_size=32) +# print('Test Score:{:.3}'.format(score)) +# print('Test accuracy:{:.3}'.format(accuracy)) +# # confusion matrix +# Y_pred = model.predict(X_test) +# cfm = confusion_matrix(np.argmax(Y_test,axis=1), np.argmax(Y_pred, axis=1)) +# np.set_printoptions(precision=2) +# +# plt.figure() +# class_names = ['squat', 'stand', 'walk', 'wave'] +# plot_confusion_matrix(cfm, classes=class_names, title='Confusion Matrix') +# plt.show() + +# # test +# model = load_model('framewise_recognition.h5') +# +# test_input = [0.43, 0.46, 0.43, 0.52, 0.4, 0.52, 0.39, 0.61, 0.4, +# 0.67, 0.46, 0.52, 0.46, 0.61, 0.46, 0.67, 0.42, 0.67, +# 0.42, 0.81, 0.43, 0.91, 0.45, 0.67, 0.45, 0.81, 0.45, +# 0.91, 0.42, 0.44, 0.43, 0.44, 0.42, 0.46, 0.44, 0.46] +# test_np = np.array(test_input) +# test_np = test_np.reshape(-1, 36) +# +# test_np = np.array(X[1033]).reshape(-1, 36) +# if test_np.size > 0: +# pred = np.argmax(model.predict(test_np)) +# init_label = Actions(pred).name +# print(init_label) diff --git a/TensorFlow/built-in/cv/detection/OpenPose_ID0117_for_TensorFlow/test/train_RT2_performance_1p.sh b/TensorFlow/built-in/cv/detection/OpenPose_ID0117_for_TensorFlow/test/train_RT2_performance_1p.sh new file mode 100644 index 000000000..b9c791e81 --- /dev/null +++ b/TensorFlow/built-in/cv/detection/OpenPose_ID0117_for_TensorFlow/test/train_RT2_performance_1p.sh @@ -0,0 +1,205 @@ +#!/bin/bash + +#当前路径,不需要修改 +cur_path=`pwd` + +#集合通信参数,不需要修改 + +export RANK_SIZE=1 +export JOB_ID=10087 +RANK_ID_START=0 + +#使能RT2.0 +export ENABLE_RUNTIME_V2=1 + +# 数据集路径,保持为空,不需要修改 +data_path="" + +#设置默认日志级别,不需要修改 +#export ASCEND_GLOBAL_LOG_LEVEL=3 + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="OpenPose_ID0117_for_TensorFlow" +#训练step +train_epoch=20 +#训练batch_size +batch_size=32 +#学习率 +learning_rate=0.0001 +#动态输入模式,不需要修改 +dynamic_input="" + + +#维测参数,precision_mode需要模型审视修改 +precision_mode="allow_fp32_to_fp16" +#维持参数,以下不需要修改 +over_dump=False +data_dump_flag=False +data_dump_step="10" +profiling=False + +# 帮助信息,不需要修改 +if [[ $1 == --help || $1 == -h ]];then + echo"usage:./train_performance_1P.sh " + echo " " + echo "parameter explain: + --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision) + --over_dump if or not over detection, default is False + --data_dump_flag data dump flag, default is False + --data_dump_step data dump step, default is 10 + --profiling if or not profiling for performance debug, default is False + --data_path source data of training + --train_epoch # of epoch for training + --learning_rate learning rate + --batch batch size + --modeldir model dir + -h/--help show help message + " + exit 1 +fi + +#参数校验,不需要修改 +for para in $* +do + if [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` + elif [[ $para == --over_dump* ]];then + over_dump=`echo ${para#*=}` + over_dump_path=${cur_path}/output/overflow_dump + mkdir -p ${over_dump_path} + elif [[ $para == --data_dump_flag* ]];then + data_dump_flag=`echo ${para#*=}` + data_dump_path=${cur_path}/output/data_dump + mkdir -p ${data_dump_path} + elif [[ $para == --data_dump_step* ]];then + data_dump_step=`echo ${para#*=}` + elif [[ $para == --profiling* ]];then + profiling=`echo ${para#*=}` + profiling_dump_path=${cur_path}/output/profiling + mkdir -p ${profiling_dump_path} + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --train_epoch* ]];then + train_epoch=`echo ${para#*=}` + elif [[ $para == --learning_rate* ]];then + learning_rate=`echo ${para#*=}` + elif [[ $para == --batch* ]];then + batch_size=`echo ${para#*=}` + elif [[ $para == --modeldir* ]];then + modeldir=`echo ${para#*=}` + elif [[ $para == --dynamic_input* ]];then + dynamic_input=`echo ${para#*=}` + fi +done + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be config" + exit 1 +fi + + +#############执行训练######################### +#训练开始时间,不需要修改 +start_time=$(date +%s) + +#进入训练脚本目录,需要模型审视修改 +for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); +do + #设置环境变量,不需要修改 + echo "Device ID: $ASCEND_DEVICE_ID" + export RANK_ID=$RANK_ID + + #创建DeviceID输出目录,不需要修改 + if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID} + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + else + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + fi + + #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 + #--data_path, --model_dir, --precision_mode, --precision_mode, --over_dump, --over_dump_path,--data_dump_flag,--data_dump_step,--data_dump_path,--profiling,--profiling_dump_path,--autotune + nohup python3 ${cur_path}/../Action/training/train_rt.py \ + --train_epoch=${train_epoch} \ + --learning_rate=${learning_rate} \ + --data_path=${data_path} \ + --modeldir=${cur_path}/output/$ASCEND_DEVICE_ID/ckpt \ + --precision_mode=${precision_mode} \ + --over_dump=${over_dump} \ + --over_dump_path=${over_dump_path} \ + --data_dump_flag=${data_dump_flag} \ + --data_dump_step=${data_dump_step} \ + --data_dump_path=${data_dump_path} \ + --batch=${batch_size} \ + --profiling=${profiling} \ + --profiling_dump_path=${profiling_dump_path} \ + --dynamic_input=${dynamic_input} > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & +if [ $? -ne 0 ];then + exit 1 +fi +done +wait + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +stepvalue=(`grep -r "/step" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log | awk -F - 'END {print $2}' | awk -F / '{print $1}'`) + +function strindex() { + x="${1%%$2*}" + if [[ $x = $1 ]];then + echo -1 + else + echo ${#x} + return ${#x} + fi +} +index=$(strindex "${stepvalue[0]}" "s") +second=${stepvalue[0]:0:$index} +uindex=$(strindex "${stepvalue[1]}" "us") +usecond=${stepvalue[1]:0:$uindex} +step_sec=$(awk 'BEGIN{printf "%.4f\n",('$usecond'/'1000')}') +FPS=`awk 'BEGIN {printf "%.2f\n", '1000'*'${batch_size}'/'${step_sec}'}'` +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +train_accuracy=(`grep -r "/step" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log | awk -F : 'END {print $3}' | awk '{print $1}'`) +#打印,不需要修改 +echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'RT2'_'perf' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=${step_sec} + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep "/step" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log | awk -F : '{print $2}' | awk '{print $1}' > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}_loss.txt +#最后一个迭代loss值,不需要修改 +ActualLoss=(`awk 'END {print $NF}' $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}_loss.txt`) + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DynamicInput = ${dynamic_input}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -- Gitee From 121221c55a9d94d61e88cde80e89a68320a06093 Mon Sep 17 00:00:00 2001 From: ykxia Date: Mon, 21 Nov 2022 19:04:06 +0800 Subject: [PATCH 2/5] =?UTF-8?q?FastText=5FID0135=5Ffor=5FTensorFlow?= =?UTF-8?q?=E4=BD=BF=E8=83=BDRT2.0=E4=BA=8C=E8=BF=9B=E5=88=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../FastText_ID0135_for_TensorFlow/main_rt.py | 114 ++++++ .../test/train_RT2_performance_1p.sh | 186 +++++++++ .../word_embeddings_rt.py | 352 ++++++++++++++++++ 3 files changed, 652 insertions(+) create mode 100644 TensorFlow/built-in/nlp/FastText_ID0135_for_TensorFlow/main_rt.py create mode 100644 TensorFlow/built-in/nlp/FastText_ID0135_for_TensorFlow/test/train_RT2_performance_1p.sh create mode 100644 TensorFlow/built-in/nlp/FastText_ID0135_for_TensorFlow/word_embeddings_rt.py diff --git a/TensorFlow/built-in/nlp/FastText_ID0135_for_TensorFlow/main_rt.py b/TensorFlow/built-in/nlp/FastText_ID0135_for_TensorFlow/main_rt.py new file mode 100644 index 000000000..66213b733 --- /dev/null +++ b/TensorFlow/built-in/nlp/FastText_ID0135_for_TensorFlow/main_rt.py @@ -0,0 +1,114 @@ +# +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import json +import numpy as np +from sklearn.model_selection import train_test_split +from word_embeddings_rt import load_data,prepare_data_for_word_vectors,building_word_vector_model,\ +classification_model,padding_input,prepare_data_for_word_vectors_imdb,ELMoEmbedding,data_prep_ELMo,Classification_model_with_ELMo +import argparse + +parser = argparse.ArgumentParser(description='') +parser.add_argument('--data_path', dest='data_path', default='/root/.keras/datasets/', help='path of the dataset') +parser.add_argument('--precision_mode', dest='precision_mode', default='allow_mix_precision', help='precision mode') +parser.add_argument('--over_dump', dest='over_dump', default='False', help='if or not over detection') +parser.add_argument('--over_dump_path', dest='over_dump_path', default='./overdump', help='over dump path') +parser.add_argument('--data_dump_flag', dest='data_dump_flag', default='False', help='data dump flag') +parser.add_argument('--data_dump_step', dest='data_dump_step', default='10', help='data dump step') +parser.add_argument('--data_dump_path', dest='data_dump_path', default='./datadump', help='data dump path') +parser.add_argument('--profiling', dest='profiling', default='False', help='if or not profiling for performance debug') +parser.add_argument('--profiling_dump_path', dest='profiling_dump_path', default='./profiling', help='profiling path') +parser.add_argument('--lr', dest='lr', type=float, default=0.01, help='initial learning rate for adam') +parser.add_argument('--loss_scale', dest='loss_scale', default='True', help='enable loss scale ,default is True') +parser.add_argument('--epoch', dest='epoch', type=int, default=200, help='# of epoch') +parser.add_argument('--batch_size', dest='batch_size', type=int, default=1, help='# images in batch') +args = parser.parse_args() +def json_to_dict(json_set): + for k,v in json_set.items(): + if v == "True": + json_set[k]= True + elif v == "False": + json_set[k]=False + else: + json_set[k]=v + return json_set + +with open("config.json","r") as f: + params_set = json.load(f) +params_set = json_to_dict(params_set) + + +with open("model_params.json", "r") as f: + model_params = json.load(f) +model_params = json_to_dict(model_params) + +''' + load_data function works on imdb data. In order to load your data, comment line 27 and pass your data in the form of X,y + X = text data column + y = label column(0,1 etc) + +''' +# for imdb data +if params_set["option"]in [0,1,2]: + x_train,x_test,y_train,y_test = load_data(args.data_path, params_set["vocab_size"],params_set["max_len"]) + sentences,word_ix = prepare_data_for_word_vectors_imdb(args.data_path, x_train) + model_wv = building_word_vector_model(params_set["option"],sentences,params_set["embed_dim"], + params_set["workers"],params_set["window"],y_train) + + # for other data: + # put your data in the form of X,y + ''' + X = ["this is a sentence","this is another sentence by me","yet another sentence for training","one more again"] + y=np.array([0,1,1,0]) + + sentences_as_words,sentences,word_ix = prepare_data_for_word_vectors(X) + print("sentences loaded") + model_wv = building_word_vector_model(params_set["option"],sentences,params_set["embed_dim"], + params_set["workers"],params_set["window"],y) + + + print("word vector model built") + x_train, x_test, y_train, y_test = train_test_split(sentences, y, test_size=params_set["split_ratio"], random_state=42) + print("Data split done") + ''' + x_train_pad,x_test_pad = padding_input(x_train,x_test,params_set["max_len"]) + + model = classification_model(args,params_set["embed_dim"],x_train_pad,x_test_pad,y_train,y_test, + params_set["vocab_size"],word_ix,model_wv, + params_set["trainable_param"], + params_set["option"]) + print(model.summary()) + +else: + x_train,x_test,y_train,y_test = load_data(args.data_path, params_set["vocab_size"],params_set["max_len"]) + + train_text,train_label,test_text,test_label = data_prep_ELMo(x_train,y_train,x_test,y_test,params_set["max_len"]) + + model = Classification_model_with_ELMo(train_text,train_label,test_text,test_label) + print(model.summary()) diff --git a/TensorFlow/built-in/nlp/FastText_ID0135_for_TensorFlow/test/train_RT2_performance_1p.sh b/TensorFlow/built-in/nlp/FastText_ID0135_for_TensorFlow/test/train_RT2_performance_1p.sh new file mode 100644 index 000000000..c85d1c4fb --- /dev/null +++ b/TensorFlow/built-in/nlp/FastText_ID0135_for_TensorFlow/test/train_RT2_performance_1p.sh @@ -0,0 +1,186 @@ +#!/bin/bash + +#当前路径,不需要修改 +cur_path=`pwd` + +#集合通信参数,不需要修改 + +export RANK_SIZE=1 +export JOB_ID=10087 +RANK_ID_START=0 + +#使能RT2.0 +export ENABLE_RUNTIME_V2=1 + +# 数据集路径,保持为空,不需要修改 +data_path="" + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="FastText_ID0135_for_TensorFlow" +#训练epoch +train_epochs=10 +#训练step +train_steps=25000 +#训练batch_size +batch_size=1024 +#学习率 +learning_rate=0.01 + +#维测参数,precision_mode需要模型审视修改 +precision_mode="allow_mix_precision" +#维持参数,以下不需要修改 +over_dump=False +data_dump_flag=False +data_dump_step="10" +profiling=False +loss_scale=True + +# 帮助信息,不需要修改 +if [[ $1 == --help || $1 == -h ]];then + echo"usage:./train_performance_1P.sh " + echo " " + echo "parameter explain: + --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision) + --over_dump if or not over detection, default is False + --data_dump_flag data dump flag, default is False + --data_dump_step data dump step, default is 10 + --profiling if or not profiling for performance debug, default is False + --data_path source data of training + --max_step # of step for training + --learning_rate learning rate + --batch batch size + --modeldir model dir + --save_interval save interval for ckpt + --loss_scale enable loss scale ,default is False + -h/--help show help message + " + exit 1 +fi + +#参数校验,不需要修改 +for para in $* +do + if [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` + elif [[ $para == --over_dump* ]];then + over_dump=`echo ${para#*=}` + over_dump_path=${cur_path}/output/overflow_dump + mkdir -p ${over_dump_path} + elif [[ $para == --data_dump_flag* ]];then + data_dump_flag=`echo ${para#*=}` + data_dump_path=${cur_path}/output/data_dump + mkdir -p ${data_dump_path} + elif [[ $para == --data_dump_step* ]];then + data_dump_step=`echo ${para#*=}` + elif [[ $para == --profiling* ]];then + profiling=`echo ${para#*=}` + profiling_dump_path=${cur_path}/output/profiling + mkdir -p ${profiling_dump_path} + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --max_step* ]];then + train_steps=`echo ${para#*=}` + elif [[ $para == --learning_rate* ]];then + learning_rate=`echo ${para#*=}` + elif [[ $para == --batch* ]];then + batch_size=`echo ${para#*=}` + elif [[ $para == --modeldir* ]];then + modeldir=`echo ${para#*=}` + elif [[ $para == --save_interval* ]];then + save_interval=`echo ${para#*=}` + elif [[ $para == --loss_scale* ]];then + loss_scale=`echo ${para#*=}` + elif [[ $para == --epoch* ]];then + train_epochs=`echo ${para#*=}` + fi +done + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be config" + exit 1 +fi +#############执行训练######################### +#训练开始时间,不需要修改 +start_time=$(date +%s) +cd $cur_path/../ +#进入训练脚本目录,需要模型审视修改 +for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); +do + #设置环境变量,不需要修改 + echo "Device ID: $ASCEND_DEVICE_ID" + export RANK_ID=$RANK_ID + + #创建DeviceID输出目录,不需要修改 + if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID} + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + else + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + fi + + #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 + #--data_path, --model_dir, --precision_mode, --precision_mode, --over_dump, --over_dump_path,--data_dump_flag,--data_dump_step,--data_dump_path,--profiling,--profiling_dump_path,--autotune + nohup python3 main_rt.py \ + --epoch=${train_epochs} \ + --data_path=${data_path} \ + --precision_mode=${precision_mode} \ + --loss_scale=${loss_scale} \ + --over_dump=${over_dump} \ + --over_dump_path=${over_dump_path} \ + --data_dump_flag=${data_dump_flag} \ + --data_dump_step=${data_dump_step} \ + --data_dump_path=${data_dump_path} \ + --batch_size=${batch_size} \ + --profiling=${profiling} \ + --profiling_dump_path=${profiling_dump_path} > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & +if [ $? -ne 0 ];then + exit 1 +fi +done +wait + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +TrainingTime=`grep "us/step" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log | awk -F "/" 'END {print $2}' | awk -F " " '{print $5}' | awk -F "us" '{print $1}'` +FPS=`awk 'BEGIN {printf "%.2f\n", '1000'*'1000'*'${batch_size}'/'${TrainingTime}'}'` +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +train_accuracy=`grep "Accuracy:" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log | awk -F " " '{print $2}'` +#打印,不需要修改 +echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'RT2'_'perf' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep "loss:" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log | awk -F "loss:" '{print $2}' | awk -F " " '{print $1}' > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}_loss.txt +#最后一个迭代loss值,不需要修改 +ActualLoss=(`awk 'END {print $NF}' $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}_loss.txt`) + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log diff --git a/TensorFlow/built-in/nlp/FastText_ID0135_for_TensorFlow/word_embeddings_rt.py b/TensorFlow/built-in/nlp/FastText_ID0135_for_TensorFlow/word_embeddings_rt.py new file mode 100644 index 000000000..0b87e8a0a --- /dev/null +++ b/TensorFlow/built-in/nlp/FastText_ID0135_for_TensorFlow/word_embeddings_rt.py @@ -0,0 +1,352 @@ +# +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from npu_bridge.npu_init import * +import json +import fasttext +import numpy as np +import pandas as pd +import tensorflow as tf +import tensorflow_hub as hub +import keras.layers as layers +from keras.models import Model +from keras.datasets import imdb +from gensim.models import Word2Vec +from gensim.models import FastText +from keras.preprocessing.text import Tokenizer +from keras.preprocessing.sequence import pad_sequences +from keras.layers import Input,Embedding,Dense,Flatten +from sklearn.metrics import accuracy_score,classification_report + +#import tensorflow as tf +import tensorflow.python.keras as keras +#from tensorflow.python.keras import backend as K +from keras import backend as K +from npu_bridge.npu_init import * + +def npu_keras_optimizer(opt): + npu_opt = KerasDistributeOptimizer(opt) + return npu_opt + +def json_to_dict(json_set): + for k,v in json_set.items(): + if v == "True": + json_set[k]= True + elif v == "False": + json_set[k]=False + else: + json_set[k]=v + return json_set + + +with open("model_params.json", "r") as f: + model_params = json.load(f) +model_params = json_to_dict(model_params) + + +def load_data(datapath,vocab_size,max_len): + """ + Loads the keras imdb dataset + + Args: + vocab_size = {int} the size of the vocabulary + max_len = {int} the maximum length of input considered for padding + + Returns: + X_train = tokenized train data + X_test = tokenized test data + + """ + INDEX_FROM = 3 + data_path = ('%s/keras/datasets/imdb.npz' % (datapath)) + (X_train,y_train),(X_test,y_test) = imdb.load_data(path = data_path, num_words = vocab_size,index_from = INDEX_FROM) + + return X_train,X_test,y_train,y_test + + +def prepare_data_for_word_vectors_imdb(data_path, X_train): + """ + Prepares the input + + Args: + X_train = tokenized train data + + Returns: + sentences = {list} sentences containing words as tokens + word_index = {dict} word and its indexes in whole of imdb corpus + + """ + INDEX_FROM = 3 + wordpath = ('%s/keras/datasets/imdb_word_index.json' % (data_path)) + word_to_index = imdb.get_word_index(path = wordpath) + word_to_index = {k:(v+INDEX_FROM) for k,v in word_to_index.items()} + + word_to_index[""] =1 + word_to_index[""]=2 + + index_to_word = {v:k for k,v in word_to_index.items()} + + sentences = [] + for i in range(len(X_train)): + temp = [index_to_word[ids] for ids in X_train[i]] + sentences.append(temp) + """ + tokenizer = Tokenizer() + tokenizer.fit_on_texts(sentences) + word_indexes = tokenizer.word_index + """ + return sentences,word_to_index + + +def prepare_data_for_word_vectors(X): + sentences_as_words=[] + word_to_index={} + count=1 + for sent in X: + temp = sent.split() + sentences_as_words.append(temp) + for sent in sentences_as_words: + for word in sent: + if word_to_index.get(word,None) is None: + word_to_index[word] = count + count +=1 + index_to_word = {v:k for k,v in word_to_index.items()} + sentences=[] + for i in range(len(sentences_as_words)): + temp = [word_to_index[w] for w in sentences_as_words[i]] + sentences.append(temp) + + + return sentences_as_words,sentences,word_to_index + +def data_prep_ELMo(train_x,train_y,test_x,test_y,max_len): + + INDEX_FROM = 3 + word_to_index = imdb.get_word_index() + word_to_index = {k:(v+INDEX_FROM) for k,v in word_to_index.items()} + + word_to_index[""] =1 + word_to_index[""]=2 + + index_to_word = {v:k for k,v in word_to_index.items()} + + sentences=[] + for i in range(len(train_x)): + temp = [index_to_word[ids] for ids in train_x[i]] + sentences.append(temp) + + test_sentences=[] + for i in range(len(test_x)): + temp = [index_to_word[ids] for ids in test_x[i]] + test_sentences.append(temp) + + train_text = [' '.join(sentences[i][:max_len]) for i in range(len(sentences))] + train_text = np.array(train_text, dtype=object)[:, np.newaxis] + train_label = train_y.tolist() + + test_text = [' '.join(test_sentences[i][:500]) for i in range(len(test_sentences))] + test_text = np.array(test_text , dtype=object)[:, np.newaxis] + test_label = test_y.tolist() + + return train_text,train_label,test_text,test_label + + +def building_word_vector_model(option,sentences,embed_dim,workers,window,y_train): + """ + Builds the word vector + + Args: + type = {bool} 0 for Word2vec. 1 for gensim Fastext. 2 for Fasttext 2018. + sentences = {list} list of tokenized words + embed_dim = {int} embedding dimension of the word vectors + workers = {int} no. of worker threads to train the model (faster training with multicore machines) + window = {int} max distance between current and predicted word + y_train = y_train + + Returns: + model = Word2vec/Gensim fastText/ Fastext_2018 model trained on the training corpus + + + """ + if option == 0: + print("Training a word2vec model") + model = Word2Vec(sentences=sentences, size = embed_dim, workers = workers, window = window) + print("Training complete") + + elif option == 1: + print("Training a Gensim FastText model") + model = FastText(sentences=sentences, size = embed_dim, workers = workers, window = window) + print("Training complete") + + elif option == 2: + print("Training a Fasttext model from Facebook Research") + y_train = ["__label__positive" if i==1 else "__label__negative" for i in y_train] + + with open("imdb_train.txt","w") as text_file: + for i in range(len(sentences)): + print(sentences[i],y_train[i],file = text_file) + + model = fasttext.skipgram("imdb_train.txt","model_ft_2018_imdb",dim = embed_dim) + print("Training complete") + + return model + +def padding_input(X_train,X_test,maxlen): + """ + Pads the input upto considered max length + + Args: + X_train = tokenized train data + X_test = tokenized test data + + Returns: + X_train_pad = padded tokenized train data + X_test_pad = padded tokenized test data + + """ + + X_train_pad = pad_sequences(X_train,maxlen=maxlen,padding="post") + + X_test_pad = pad_sequences(X_test,maxlen=maxlen,padding="post") + + return X_train_pad,X_test_pad + + +def ELMoEmbedding(x): + elmo_model = hub.Module("https://tfhub.dev/google/elmo/1", trainable=True) + return elmo_model(tf.squeeze(tf.cast(x, tf.string)), signature="default", as_dict=True)["default"] + + +def classification_model(args,embed_dim,X_train_pad,X_test_pad,y_train,y_test,vocab_size,word_index,w2vmodel, + trainable_param,option): + """ + Builds the classification model for sentiment analysis + + Args: + embded_dim = {int} dimension of the word vectors + X_train_pad = padded tokenized train data + X_test_pad = padded tokenized test data + vocab_size = {int} size of the vocabulary + word_index = {dict} word and its indexes in whole of imdb corpus + w2vmodel = Word2Vec model + trainable_param = {bool} whether to train the word embeddings in the Embedding layer + option = {int} choice of word embedding + """ + sess_config = tf.ConfigProto() + custom_op = sess_config.graph_options.rewrite_options.custom_optimizers.add() + custom_op.name = "NpuOptimizer" + # custom_op.parameter_map["dynamic_input"].b = True + # custom_op.parameter_map["dynamic_graph_execute_mode"].s = tf.compat.as_bytes("lazy_recompile") + custom_op.parameter_map["precision_mode"].s = tf.compat.as_bytes(args.precision_mode) + sess_config.graph_options.rewrite_options.remapping = RewriterConfig.OFF + sess_config.graph_options.rewrite_options.memory_optimization = RewriterConfig.OFF + if args.data_dump_flag.strip()=="True": + custom_op.parameter_map["enable_dump"].b = True + custom_op.parameter_map["dump_path"].s = tf.compat.as_bytes(args.data_dump_path) + custom_op.parameter_map["dump_step"].s = tf.compat.as_bytes(args.data_dump_step) + custom_op.parameter_map["dump_mode"].s = tf.compat.as_bytes("all") + if args.over_dump.strip()=="True": + # dump_path:dump数据存放路径,该参数指定的目录需要在启动训练的环境上(容器或Host侧)提前创建且确保安装时配置的运行用户具有读写权限 + custom_op.parameter_map["dump_path"].s = tf.compat.as_bytes(args.over_dump_path) + # enable_dump_debug:是否开启溢出检测功能 + custom_op.parameter_map["enable_dump_debug"].b = True + # dump_debug_mode:溢出检测模式,取值:all/aicore_overflow/atomic_overflow + custom_op.parameter_map["dump_debug_mode"].s = tf.compat.as_bytes("all") + if args.profiling.strip()=="True": + custom_op.parameter_map["profiling_mode"].b = False + profilingvalue=('{"output":"%s","training_trace":"on","task_trace":"on","aicpu":"on","fp_point":"","bp_point":""}' %(args.profiling_dump_path)) + custom_op.parameter_map["profiling_options"].s = tf.compat.as_bytes(profilingvalue) + + sess = tf.Session(config=sess_config) + K.set_session(sess) + + embedding_matrix = np.zeros((vocab_size,embed_dim)) + for word, i in word_index.items(): + try: + embedding_vector = w2vmodel[word] + except: + pass + try: + if embedding_vector is not None: + embedding_matrix[i]=embedding_vector + except: + pass + + embed_layer = Embedding(vocab_size,embed_dim,weights =[embedding_matrix],trainable=trainable_param) + + input_seq = Input(shape=(X_train_pad.shape[1],)) + embed_seq = embed_layer(input_seq) + x = Dense(256,activation ="relu")(embed_seq) + x = Flatten()(x) + preds = Dense(1,activation="sigmoid")(x) + + model = Model(input_seq,preds) + + optimizer = tf.train.AdamOptimizer(learning_rate=args.lr) + if args.loss_scale == 'True': + loss_scale_manager = ExponentialUpdateLossScaleManager(init_loss_scale=2 ** 32, + incr_every_n_steps=1000, decr_every_n_nan_or_inf=2, + decr_ratio=0.8) + if int(os.getenv('RANK_SIZE')) == 1: + optimizer = NPULossScaleOptimizer(optimizer, loss_scale_manager) + else: + optimizer = NPULossScaleOptimizer(optimizer, loss_scale_manager, is_distributed=True) + optim = npu_tf_optimizer(optimizer) + + model.compile(loss=model_params["loss"],optimizer=optim,metrics= model_params["metrics"]) + model.fit(X_train_pad,y_train,epochs=args.epoch,batch_size=args.batch_size,validation_data=(X_test_pad,y_test)) + + print('====save model====') + #model.save_weights('./ckpt_gpu/model_weigits.h5') + #model.save('./ckpt_gpu/model.h5') + predictions = model.predict(X_test_pad, batch_size=1) + predictions = [0 if i<0.5 else 1 for i in predictions] + print("Accuracy: ",accuracy_score(y_test,predictions)) + print("Classification Report: ",classification_report(y_test,predictions)) + sess.close() + return model + +def Classification_model_with_ELMo(X_train_pad,y_train,X_test_pad,y_test): + input_text = layers.Input(shape=(1,), dtype=tf.string) + embed_seq = layers.Lambda(ELMoEmbedding, output_shape=(1024,))(input_text) + x = Dense(256,activation ="relu")(embed_seq) + preds = Dense(1,activation="sigmoid")(x) + model = Model(input_text,preds) + + + model.compile(loss="binary_crossentropy",optimizer="adam",metrics=["accuracy"]) + + model.fit(X_train_pad,y_train,epochs=10,batch_size=512,validation_data=(X_test_pad,y_test)) + + predictions = model.predict(X_test_pad) + predictions = [0 if i<0.5 else 1 for i in predictions] + print("Accuracy: ",accuracy_score(y_test,predictions)) + print("Classification Report: ",classification_report(y_test,predictions)) + + return model -- Gitee From ee6b8f7ab13035b0778f3f1ac018c0a93c6b2f6e Mon Sep 17 00:00:00 2001 From: ykxia Date: Mon, 21 Nov 2022 19:14:48 +0800 Subject: [PATCH 3/5] =?UTF-8?q?texting=5FID0193=5Ffor=5FTensorFlow?= =?UTF-8?q?=E4=BD=BF=E8=83=BDRT2=E4=BA=8C=E8=BF=9B=E5=88=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../test/train_RT2_performance_1p.sh | 168 +++++++++++++ .../texting_ID0193_for_TensorFlow/train_rt.py | 237 ++++++++++++++++++ 2 files changed, 405 insertions(+) create mode 100644 TensorFlow/contrib/nlp/texting/texting_ID0193_for_TensorFlow/test/train_RT2_performance_1p.sh create mode 100644 TensorFlow/contrib/nlp/texting/texting_ID0193_for_TensorFlow/train_rt.py diff --git a/TensorFlow/contrib/nlp/texting/texting_ID0193_for_TensorFlow/test/train_RT2_performance_1p.sh b/TensorFlow/contrib/nlp/texting/texting_ID0193_for_TensorFlow/test/train_RT2_performance_1p.sh new file mode 100644 index 000000000..7dd277da3 --- /dev/null +++ b/TensorFlow/contrib/nlp/texting/texting_ID0193_for_TensorFlow/test/train_RT2_performance_1p.sh @@ -0,0 +1,168 @@ +#!/bin/bash + +#当前路径,不需要修改 +cur_path=`pwd` + +#集合通信参数,不需要修改 + +export RANK_SIZE=1 +export JOB_ID=10087 +RANK_ID_START=0 + +#使能RT2.0 +export ENABLE_RUNTIME_V2=1 +# 数据集路径,保持为空,不需要修改 +data_path="" + +#设置默认日志级别,不需要修改 +#export ASCEND_GLOBAL_LOG_LEVEL=3 + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="texting_ID0193_for_TensorFlow" +#训练epoch +epoch=6 +#训练batch_size +RANK_SIZE=1 +batch_size=256 +#迭代数iteration +iteration=100 +#训练step +#train_steps=100 +#学习率 +#learning_rate=3.96 + +#cp $data_path $cur_path/../datasets/ + +#维测参数,precision_mode需要模型审视修改 +precision_mode="allow_mix_precision" +#维持参数,以下不需要修改 +over_dump=False +data_dump_flag=False +data_dump_step="10" +profiling=False +autotune=False + +# 帮助信息,不需要修改 +if [[ $1 == --help || $1 == -h ]];then + echo"usage:./train_full_1p.sh " + echo " " + echo "parameter explain: + --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision) + --over_dump if or not over detection, default is False + --data_dump_flag data dump flag, default is False + --data_dump_step data dump step, default is 10 + --profiling if or not profiling for performance debug, default is False + --autotune whether to enable autotune, default is False + --data_path source data of training + -h/--help show help message + " + exit 1 +fi + +#参数校验,不需要修改 +for para in $* +do + if [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` + elif [[ $para == --over_dump* ]];then + over_dump=`echo ${para#*=}` + over_dump_path=${cur_path}/output/overflow_dump + mkdir -p ${over_dump_path} + elif [[ $para == --data_dump_flag* ]];then + data_dump_flag=`echo ${para#*=}` + data_dump_path=${cur_path}/output/data_dump + mkdir -p ${data_dump_path} + elif [[ $para == --data_dump_step* ]];then + data_dump_step=`echo ${para#*=}` + elif [[ $para == --profiling* ]];then + profiling=`echo ${para#*=}` + profiling_dump_path=${cur_path}/output/profiling + mkdir -p ${profiling_dump_path} + elif [[ $para == --autotune* ]];then + autotune=`echo ${para#*=}` + #开autotune特有环境变量 + autotune=True + export autotune=True + export REPEAT_TUNE=True + export ASCEND_DEVICE_ID=0 + export ENABLE_TUNE_BANK=True + export TE_PARALLEL_COMPILER=32 + mv $install_path/fwkacllib/data/rl/Ascend910/custom $install_path/fwkacllib/data/rl/Ascend910/custom_bak + mv $install_path/fwkacllib/data/tiling/Ascend910/custom $install_path/fwkacllib/data/tiling/Ascend910/custom_bak + autotune_dump_path=${cur_path}/output/autotune_dump + mkdir -p ${autotune_dump_path}/GA + mkdir -p ${autotune_dump_path}/rl + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + fi +done + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +#训练开始时间,不需要修改 +start_time=$(date +%s) + +#进入训练脚本目录,需要模型审视修改 +cd $cur_path/../ +for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); +do + #设置环境变量,不需要修改 + echo "Device ID: $ASCEND_DEVICE_ID" + export RANK_ID=$RANK_ID + export DEVICE_ID=$RANK_ID + + #创建DeviceID输出目录,不需要修改 + if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID} + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + else + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + fi + + + #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 + nohup python3 train_rt.py \ + --data_url=$data_path \ + --learning_rate=0.005 \ + --epochs=$epoch \ + --batch_size=256 \ + --hidden=96 \ + > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & +done +wait + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +TrainingTime=`grep "time=" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk 'END {print $14}'` +ActualFPS=`echo "scale=2;${batch_size} / ${TrainingTime}"|bc` + +echo "E2E Training Duration sec : $e2e_time" + +#稳定性精度看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'RT2'_'perf' + +ActualLoss=`grep "train_loss" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log| awk 'END {print $4}'` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log diff --git a/TensorFlow/contrib/nlp/texting/texting_ID0193_for_TensorFlow/train_rt.py b/TensorFlow/contrib/nlp/texting/texting_ID0193_for_TensorFlow/train_rt.py new file mode 100644 index 000000000..837d0442d --- /dev/null +++ b/TensorFlow/contrib/nlp/texting/texting_ID0193_for_TensorFlow/train_rt.py @@ -0,0 +1,237 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import division +from __future__ import print_function + +import time +import tensorflow as tf +#from sklearn import metrics +#import pickle as pkl + +from utils import * +from models import GNN, MLP + +import npu_bridge +from tensorflow.core.protobuf.rewriter_config_pb2 import RewriterConfig + +config = tf.ConfigProto() +custom_op = config.graph_options.rewrite_options.custom_optimizers.add() +custom_op.name = "NpuOptimizer" +custom_op.parameter_map["use_off_line"].b = True +#custom_op.parameter_map["dynamic_input"].b =True +#custom_op.parameter_map["dynamic_graph_execute_mode"].s = tf.compat.as_bytes("lazy_recompile") +config.graph_options.rewrite_options.remapping = RewriterConfig.OFF + +# Set random seed +seed = 123 +np.random.seed(seed) +tf.set_random_seed(seed) + +# Settings +flags = tf.app.flags +FLAGS = flags.FLAGS +flags.DEFINE_string('dataset', 'mr', 'Dataset string.') # 'mr','ohsumed','R8','R52' +flags.DEFINE_string('data_url', './data', 'Path to dataset directory.') +flags.DEFINE_string('train_url', './output', 'Path to output directory.') +flags.DEFINE_string('model', 'gnn', 'Model string.') +flags.DEFINE_float('learning_rate', 0.005, 'Initial learning rate.') +flags.DEFINE_integer('epochs', 50, 'Number of epochs to train.') +flags.DEFINE_integer('batch_size', 1024, 'Size of batches per epoch.') +flags.DEFINE_integer('input_dim', 300, 'Dimension of input.') +flags.DEFINE_integer('hidden', 96, 'Number of units in hidden layer.') # 32, 64, 96, 128 +flags.DEFINE_integer('steps', 2, 'Number of graph layers.') +flags.DEFINE_float('dropout', 0.5, 'Dropout rate (1 - keep probability).') +flags.DEFINE_float('weight_decay', 0, 'Weight for L2 loss on embedding matrix.') # 5e-4 +flags.DEFINE_integer('early_stopping', -1, 'Tolerance for early stopping (# of epochs).') +flags.DEFINE_integer('max_degree', 3, 'Maximum Chebyshev polynomial degree.') # Not used + + +# Load data +train_adj, train_feature, train_y, val_adj, val_feature, val_y, test_adj, test_feature, test_y = load_data(FLAGS.dataset, FLAGS.data_url) + +max_length = max([len(i) for i in train_adj] + [len(j) for j in val_adj] + [len(k) for k in test_adj]) + +# Some preprocessing +print('loading training set') +train_adj, train_mask = preprocess_adj(train_adj, max_length) +train_feature = preprocess_features(train_feature, max_length) +print('loading validation set') +val_adj, val_mask = preprocess_adj(val_adj, max_length) +val_feature = preprocess_features(val_feature, max_length) +print('loading test set') +test_adj, test_mask = preprocess_adj(test_adj, max_length) +test_feature = preprocess_features(test_feature, max_length) + + +if FLAGS.model == 'gnn': + # support = [preprocess_adj(adj)] + # num_supports = 1 + model_func = GNN +elif FLAGS.model == 'gcn_cheby': # not used + # support = chebyshev_polynomials(adj, FLAGS.max_degree) + num_supports = 1 + FLAGS.max_degree + model_func = GNN +elif FLAGS.model == 'dense': # not used + # support = [preprocess_adj(adj)] + num_supports = 1 + model_func = MLP +else: + raise ValueError('Invalid argument for model: ' + str(FLAGS.model)) + + +# Define placeholders +placeholders = { + 'support': tf.placeholder(tf.float32, shape=(None, max_length, max_length)), + 'features': tf.placeholder(tf.float32, shape=(None, max_length, FLAGS.input_dim)), + 'mask': tf.placeholder(tf.float32, shape=(None, max_length, 1)), + 'labels': tf.placeholder(tf.float32, shape=(None, train_y.shape[1])), + 'dropout': tf.placeholder_with_default(0., shape=()), + 'num_features_nonzero': tf.placeholder(tf.int32) # helper variable for sparse dropout +} + + +# label smoothing +# label_smoothing = 0.1 +# num_classes = y_train.shape[1] +# y_train = (1.0 - label_smoothing) * y_train + label_smoothing / num_classes + + +# Create model +model = model_func(placeholders, input_dim=FLAGS.input_dim, logging=True) + +# merged = tf.summary.merge_all() +# writer = tf.summary.FileWriter('logs/', sess.graph) + +# Define model evaluation function +def evaluate(features, support, mask, labels, placeholders): + t_test = time.time() + feed_dict_val = construct_feed_dict(features, support, mask, labels, placeholders) + outs_val = sess.run([model.loss, model.accuracy, model.embeddings, model.preds, model.labels], feed_dict=feed_dict_val) + return outs_val[0], outs_val[1], (time.time() - t_test), outs_val[2], outs_val[3], outs_val[4] + + +cost_val = [] +best_val = 0 +best_epoch = 0 +best_acc = 0 +best_cost = 0 +test_doc_embeddings = None +preds = None +labels = None +#tf.summary.scalar('loss', model.loss) +#tf.summary.scalar('accuracy', model.accuracy) +#summary_op = tf.summary.merge_all() + +print('train start...') +# Train model +# Initialize session +with tf.Session(config=config) as sess: + # Init variables + init_op = tf.group(tf.local_variables_initializer(), tf.global_variables_initializer()) + sess.run(init_op) + #train_writer = tf.summary.FileWriter(logdir=os.path.join(FLAGS.train_url, "train"), graph=sess.graph) + #test_writer = tf.summary.FileWriter(logdir=os.path.join(FLAGS.train_url, "test"), graph=sess.graph) + + for epoch in range(FLAGS.epochs): + t = time.time() + + # Training step + indices = np.arange(0, len(train_y)) + np.random.shuffle(indices) + + train_loss, train_acc = 0, 0 + for start in range(0, len(train_y), FLAGS.batch_size): + end = start + FLAGS.batch_size + idx = indices[start:end] + # Construct feed dictionary + feed_dict = construct_feed_dict(train_feature[idx], train_adj[idx], train_mask[idx], train_y[idx], placeholders) + feed_dict.update({placeholders['dropout']: FLAGS.dropout}) + + outs = sess.run([model.opt_op, model.loss, model.accuracy], feed_dict=feed_dict) + train_loss += outs[1]*len(idx) + train_acc += outs[2]*len(idx) + train_loss /= len(train_y) + train_acc /= len(train_y) + #train_writer.add_summary(outs[3], epoch) + + # Validation + val_cost, val_acc, val_duration, _, _, _ = evaluate(val_feature, val_adj, val_mask, val_y, placeholders) + cost_val.append(val_cost) + + # Test + test_cost, test_acc, test_duration, embeddings, pred, labels = evaluate(test_feature, test_adj, test_mask, test_y, placeholders) + #test_writer.add_summary(summary, epoch) + + #if val_acc >= best_val: + # best_val = val_acc + # best_epoch = epoch + best_acc = test_acc + best_cost = test_cost + test_doc_embeddings = embeddings + preds = pred + #test_writer.add_summary(summary=summary, global_step=epoch) + + # Print results + print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(train_loss), + "train_acc=", "{:.5f}".format(train_acc), "val_loss=", "{:.5f}".format(val_cost), + "val_acc=", "{:.5f}".format(val_acc), "test_acc=", "{:.5f}".format(test_acc), + "time=", "{:.5f}".format(time.time() - t)) + + if FLAGS.early_stopping > 0 and epoch > FLAGS.early_stopping and cost_val[-1] > np.mean(cost_val[-(FLAGS.early_stopping+1):-1]): + print("Early stopping...") + break + + #train_writer.close() + #test_writer.close() + print("Optimization Finished!") + + # Best results + #print('Best epoch:', best_epoch) + print("Test set results:", "cost=", "{:.5f}".format(best_cost), + "accuracy=", "{:.5f}".format(best_acc)) + + #print("Test Precision, Recall and F1-Score...") + #print(metrics.classification_report(labels, preds, digits=4)) + #print("Macro average Test Precision, Recall and F1-Score...") + #print(metrics.precision_recall_fscore_support(labels, preds, average='macro')) + #print("Micro average Test Precision, Recall and F1-Score...") + #print(metrics.precision_recall_fscore_support(labels, preds, average='micro')) + +''' +# For visualization +doc_vectors = [] +for i in range(len(test_doc_embeddings)): + doc_vector = test_doc_embeddings[i] + doc_vector_str = ' '.join([str(x) for x in doc_vector]) + doc_vectors.append(str(np.argmax(test_y[i])) + ' ' + doc_vector_str) + +doc_embeddings_str = '\n'.join(doc_vectors) +with open('data/' + FLAGS.dataset + '_doc_vectors.txt', 'w'): + f.write(doc_embeddings_str) +''' -- Gitee From 5cb0d04f0640d40bd4204c8989d5324f2544497a Mon Sep 17 00:00:00 2001 From: ykxia Date: Mon, 21 Nov 2022 19:23:44 +0800 Subject: [PATCH 4/5] =?UTF-8?q?Oct-ResNet=5FID0251=5Ffor=5FTensorFlow?= =?UTF-8?q?=E4=BD=BF=E8=83=BDRT2=E4=BA=8C=E8=BF=9B=E5=88=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../test/train_RT2_performance_1p.sh | 191 ++++++++++++++++++ 1 file changed, 191 insertions(+) create mode 100644 TensorFlow/built-in/cv/image_classification/Oct-ResNet_ID0251_for_TensorFlow/test/train_RT2_performance_1p.sh diff --git a/TensorFlow/built-in/cv/image_classification/Oct-ResNet_ID0251_for_TensorFlow/test/train_RT2_performance_1p.sh b/TensorFlow/built-in/cv/image_classification/Oct-ResNet_ID0251_for_TensorFlow/test/train_RT2_performance_1p.sh new file mode 100644 index 000000000..892421fb5 --- /dev/null +++ b/TensorFlow/built-in/cv/image_classification/Oct-ResNet_ID0251_for_TensorFlow/test/train_RT2_performance_1p.sh @@ -0,0 +1,191 @@ +#!/bin/bash + +#当前路径,不需要修改 +cur_path=`pwd` + +#集合通信参数,不需要修改 + +export RANK_SIZE=1 +export JOB_ID=10087 +RANK_ID_START=0 +#使能RT2.0 +export ENABLE_RUNTIME_V2=1 +# 数据集路径,保持为空,不需要修改 +data_path="" + +#设置默认日志级别,不需要修改 +#export ASCEND_GLOBAL_LOG_LEVEL=3 + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="Oct-ResNet_ID0251_for_TensorFlow" +#训练epoch +train_epochs=3 +#训练step +train_steps=1000 +#训练batch_size +batch_size=32 +#学习率 +learning_rate=1e-3 + +#维测参数,precision_mode需要模型审视修改 +precision_mode="allow_mix_precision" +#维持参数,以下不需要修改 +over_dump=False +data_dump_flag=False +data_dump_step="10" +profiling=False + +# 帮助信息,不需要修改 +if [[ $1 == --help || $1 == -h ]];then + echo"usage:./train_performance_1P.sh " + echo " " + echo "parameter explain: + --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision) + --over_dump if or not over detection, default is False + --data_dump_flag data dump flag, default is False + --data_dump_step data dump step, default is 10 + --profiling if or not profiling for performance debug, default is False + --data_path source data of training + --max_step # of step for training + --learning_rate learning rate + --batch batch size + --modeldir model dir + --save_interval save interval for ckpt + --loss_scale enable loss scale ,default is False + -h/--help show help message + " + exit 1 +fi + +#参数校验,不需要修改 +for para in $* +do + if [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` + elif [[ $para == --over_dump* ]];then + over_dump=`echo ${para#*=}` + over_dump_path=${cur_path}/output/overflow_dump + mkdir -p ${over_dump_path} + elif [[ $para == --data_dump_flag* ]];then + data_dump_flag=`echo ${para#*=}` + data_dump_path=${cur_path}/output/data_dump + mkdir -p ${data_dump_path} + elif [[ $para == --data_dump_step* ]];then + data_dump_step=`echo ${para#*=}` + elif [[ $para == --profiling* ]];then + profiling=`echo ${para#*=}` + profiling_dump_path=${cur_path}/output/profiling + mkdir -p ${profiling_dump_path} + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --max_step* ]];then + train_steps=`echo ${para#*=}` + elif [[ $para == --learning_rate* ]];then + learning_rate=`echo ${para#*=}` + elif [[ $para == --batch* ]];then + batch_size=`echo ${para#*=}` + elif [[ $para == --modeldir* ]];then + modeldir=`echo ${para#*=}` + elif [[ $para == --save_interval* ]];then + save_interval=`echo ${para#*=}` + elif [[ $para == --loss_scale* ]];then + loss_scale=`echo ${para#*=}` + fi +done + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be config" + exit 1 +fi +tmp="/root/.keras/datasets" + +if [ ! -f $tmp/cifar-10-python.tar.gz ];then + cp $data_path/cifar-10-python.tar.gz $tmp + #mv $tmp/cifar-10-python.tar.gz $tmp/cifar-10-batches-py.tar.gz + chmod 700 $tmp/cifar-10-python.tar.gz +fi + +if [ ! -f $tmp/cifar-10-batches-py.tar.gz ];then + cp $data_path/cifar-10-batches-py.tar.gz $tmp + chmod 700 $tmp/cifar-10-batches-py.tar.gz +fi + + +#############执行训练######################### +#训练开始时间,不需要修改 +start_time=$(date +%s) + +#进入训练脚本目录,需要模型审视修改 +cd ${cur_path}/../ +for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); +do + #设置环境变量,不需要修改 + echo "Device ID: $ASCEND_DEVICE_ID" + export RANK_ID=$RANK_ID + + #创建DeviceID输出目录,不需要修改 + if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID} + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + else + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + fi + + #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 + #--data_path, --model_dir, --precision_mode, --precision_mode, --over_dump, --over_dump_path,--data_dump_flag,--data_dump_step,--data_dump_path,--profiling,--profiling_dump_path,--autotune + nohup python3 train-npu.py 1 3 > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & +if [ $? -ne 0 ];then + exit 1 +fi +done +wait + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +FPS=`cat ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | grep "FPS:" | awk -F "FPS: " '{print $2}' | awk -F 'loss:' '{print $1}' | tail -n +2 | awk '{sum+=$1} END {print sum/NR}'` +TrainingTime=`awk 'BEGIN {printf "%.2f\n",1000*'${batch_size}'/'${FPS}'}'` +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +train_accuracy=`grep "acc =" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log | tail -n 1 | awk -F " " '{print $9}'` +#打印,不需要修改 +echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'RT2'_'perf' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} + +##清除生成文件 +rm -rf $cur_path/../model_dir/* + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +#cat $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log | tr -d '\b\r' | grep -Eo "loss: [0-9]*\.[0-9]*" | awk -F " " '{print $2}' > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}_loss.txt +#最后一个迭代loss值,不需要修改 +ActualLoss=`grep "Loss for final step:" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log | tail -n 1 | awk -F " " '{print $5}'` +ActualLoss=${ActualLoss%?} + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -- Gitee From 3cbf5a57209b3685e33662161a770f2dc013482d Mon Sep 17 00:00:00 2001 From: ykxia Date: Mon, 21 Nov 2022 19:30:18 +0800 Subject: [PATCH 5/5] =?UTF-8?q?ResUNetplusplus=5FID0275=5Ffor=5FTensorflow?= =?UTF-8?q?=E4=BD=BF=E8=83=BDRT2=E4=BA=8C=E8=BF=9B=E5=88=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../run_rt.py | 183 ++++++++++++++++++ .../test/train_RT2_performance_1p.sh | 174 +++++++++++++++++ 2 files changed, 357 insertions(+) create mode 100644 TensorFlow/built-in/cv/image_segmentation/ResUNetplusplus_ID0275_for_Tensorflow/run_rt.py create mode 100644 TensorFlow/built-in/cv/image_segmentation/ResUNetplusplus_ID0275_for_Tensorflow/test/train_RT2_performance_1p.sh diff --git a/TensorFlow/built-in/cv/image_segmentation/ResUNetplusplus_ID0275_for_Tensorflow/run_rt.py b/TensorFlow/built-in/cv/image_segmentation/ResUNetplusplus_ID0275_for_Tensorflow/run_rt.py new file mode 100644 index 000000000..b9a40a66c --- /dev/null +++ b/TensorFlow/built-in/cv/image_segmentation/ResUNetplusplus_ID0275_for_Tensorflow/run_rt.py @@ -0,0 +1,183 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import numpy as np +import cv2 +from glob import glob +import tensorflow as tf +from tensorflow.keras.metrics import Precision, Recall, MeanIoU +from tensorflow.keras.optimizers import Adam, Nadam, SGD, Adagrad +from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, CSVLogger, TensorBoard +from npu_bridge.npu_init import * +from tensorflow.python.keras import backend as K +from time import time +import random +import argparse +import precision_tool.tf_config as npu_tf_config +from tensorflow.keras.losses import binary_crossentropy +from data_generator import DataGen +from unet import Unet +from resunet import ResUnet +from m_resunet import ResUnetPlusPlus +from metrics import dice_coef, dice_loss + +random.seed(0) +tf.random.set_random_seed(0) +np.random.seed(0) + +parser = argparse.ArgumentParser() +parser.add_argument('--save_dir', dest='save_dir', default='./test/out/checkpoints') +parser.add_argument('--data_path', dest='data_path', default='./new_data/Kvasir-SEG', help='path of the dataset') +parser.add_argument('--precision_mode', dest='precision_mode', default='allow_mix_precision', help='precision mode') +parser.add_argument('--over_dump', dest='over_dump', default='False', help='if or not over detection') +parser.add_argument('--over_dump_path', dest='over_dump_path', default='./overdump', help='over dump path') +parser.add_argument('--data_dump_flag', dest='data_dump_flag', default='False', help='data dump flag') +parser.add_argument('--data_dump_step', dest='data_dump_step', default='10', help='data dump step') +parser.add_argument('--data_dump_path', dest='data_dump_path', default='./datadump', help='data dump path') +parser.add_argument('--profiling', dest='profiling', default='False', help='if or not profiling for performance debug') +parser.add_argument('--profiling_dump_path', dest='profiling_dump_path', default='./profiling', help='profiling path') +parser.add_argument('--autotune', dest='autotune', default='False', help='whether to enable autotune, default is False') +parser.add_argument('--npu_loss_scale', dest='npu_loss_scale', type=int, default=1) +parser.add_argument('--mode', dest='mode', default='train', choices=('train', 'test', 'train_and_eval')) +parser.add_argument('--batch_size', dest='batch_size', type=int, default=8) +parser.add_argument('--learning_rate', dest='learning_rate', type=float, default=0.001) +parser.add_argument('--num_epochs', dest='num_epochs', type=int, default=100) +args = parser.parse_args() + +# os.environ["DUMP_GE_GRAPH"] = "2" +# os.environ["DUMP_GRAPH_LEVEL"] = "2" +# os.environ["ASCEND_GLOBAL_LOG_LEVEL"] = "0" +# os.environ["EXPERIMENTAL_DYNAMIC_PARTITION"] = "1" + +sess_config = tf.ConfigProto() +sess_config = npu_tf_config.session_dump_config(sess_config, action='fusion_switch') +sess_config.allow_soft_placement = True +sess_config.log_device_placement = False +sess_config.graph_options.rewrite_options.remapping = RewriterConfig.OFF +sess_config.graph_options.rewrite_options.memory_optimization = RewriterConfig.OFF +sess_config.graph_options.optimizer_options.global_jit_level = config_pb2.OptimizerOptions.OFF + +custom_op = sess_config.graph_options.rewrite_options.custom_optimizers.add() +custom_op.name = "NpuOptimizer" +#custom_op.parameter_map["dynamic_input"].b = True +#custom_op.parameter_map["dynamic_graph_execute_mode"].s = tf.compat.as_bytes("lazy_recompile") +custom_op.parameter_map["precision_mode"].s = tf.compat.as_bytes("allow_mix_precision") + +# custom_op.parameter_map["variable_memory_max_size"].s = tf.compat.as_bytes(str(8*1024 * 1024 * 1024)) +# custom_op.parameter_map["dump_path"].s = tf.compat.as_bytes("/home/ma-user/work/rupp/data_dump") +# custom_op.parameter_map["enable_dump_debug"].b = True +# custom_op.parameter_map["dump_debug_mode"].s = tf.compat.as_bytes("all") +# custom_op.parameter_map["enable_dump"].b = True +# custom_op.parameter_map["dump_step"].s = tf.compat.as_bytes("0") +# custom_op.parameter_map["dump_mode"].s = tf.compat.as_bytes("all") + +sess = tf.Session(config=sess_config) +K.set_session(sess) + +if __name__ == "__main__": + ## Path + file_path = "files/" + model_path = file_path + "resunetplusplus.h5" + + ## Create files folder + try: + os.makedirs(file_path) + except: + pass + + train_path = os.path.join(args.data_path, "train") + valid_path = os.path.join(args.data_path, "valid") + + ## Training + train_image_paths = glob(os.path.join(train_path, "images", "*")) + train_mask_paths = glob(os.path.join(train_path, "masks", "*")) + train_image_paths.sort() + train_mask_paths.sort() + + # train_image_paths = train_image_paths[:2000] + # train_mask_paths = train_mask_paths[:2000] + + ## Validation + valid_image_paths = glob(os.path.join(valid_path, "images", "*")) + valid_mask_paths = glob(os.path.join(valid_path, "masks", "*")) + valid_image_paths.sort() + valid_mask_paths.sort() + + ## Parameters + image_size = 256 + batch_size = args.batch_size + lr = 1e-5 + epochs = args.num_epochs + + train_steps = len(train_image_paths) // batch_size + valid_steps = len(valid_image_paths) // batch_size + + ## Generator + train_gen = DataGen(image_size, train_image_paths, train_mask_paths, batch_size=batch_size) + valid_gen = DataGen(image_size, valid_image_paths, valid_mask_paths, batch_size=batch_size) + + ## Unet + # arch = Unet(input_size=image_size) + # model = arch.build_model() + + ## ResUnet + # arch = ResUnet(input_size=image_size) + # model = arch.build_model() + + ## ResUnet++ + arch = ResUnetPlusPlus(input_size=image_size) + model = arch.build_model() + opt = Nadam(lr) + ''' + # 因为在2021-12版本Keras还不支持Loss Scale,所以不这个方法不行。 + # opt = tf.train.AdamOptimizer(lr, name='AdamOptimizer') + # loss_scale_manager = ExponentialUpdateLossScaleManager(init_loss_scale=2**32, incr_every_n_steps=1000, decr_every_n_nan_or_inf=2, decr_ratio=0.5) + # opt= NPULossScaleOptimizer(opt, loss_scale_manager) + ''' + metrics = [Recall(), Precision(), dice_coef, MeanIoU(num_classes=2)] + # binary_crossentropy dice_loss + model.compile(loss=dice_loss, optimizer=opt, metrics=metrics) + + csv_logger = CSVLogger(f"{file_path}unet_{batch_size}.csv", append=False) + checkpoint = ModelCheckpoint(model_path, verbose=1, save_best_only=True, monitor="val_precision", mode="max") + reduce_lr = ReduceLROnPlateau(monitor='val_precision', factor=0.1, patience=10, min_lr=1e-6, verbose=1,mode="max") + early_stopping = EarlyStopping(monitor='val_precision', patience=30, restore_best_weights=False,mode="max") + tb = TensorBoard(log_dir=file_path, write_grads=True, histogram_freq=0, update_freq=100) + callbacks = [checkpoint, early_stopping, reduce_lr, tb, csv_logger] + StartTime1 = time() # time add + model.fit_generator(train_gen, + validation_data=valid_gen, + steps_per_epoch=train_steps, + validation_steps=valid_steps, + epochs=epochs, + callbacks=callbacks) + EndTime1 = time() # time add + print('-------All epoch time : ' + str(EndTime1 - StartTime1)) # time add,Performance calculation requires division + print('-------Use all time average steps : ' + str( + (train_steps + valid_steps)* args.batch_size /(EndTime1 - StartTime1))) # time add,Performance calculation requires division diff --git a/TensorFlow/built-in/cv/image_segmentation/ResUNetplusplus_ID0275_for_Tensorflow/test/train_RT2_performance_1p.sh b/TensorFlow/built-in/cv/image_segmentation/ResUNetplusplus_ID0275_for_Tensorflow/test/train_RT2_performance_1p.sh new file mode 100644 index 000000000..d073d82c0 --- /dev/null +++ b/TensorFlow/built-in/cv/image_segmentation/ResUNetplusplus_ID0275_for_Tensorflow/test/train_RT2_performance_1p.sh @@ -0,0 +1,174 @@ +#!/bin/bash + +#当前路径,不需要修改 +cur_path=`pwd` + +#集合通信参数,不需要修改 + +export RANK_SIZE=1 +export JOB_ID=10087 +RANK_ID_START=0 + +#使能RT2.0 +export ENABLE_RUNTIME_V2=1 +# 数据集路径,保持为空,不需要修改 +data_path="" + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="ResUNetplusplus_ID0275_for_Tensorflow" +#训练epoch +train_epochs=1 +#训练batch_size +batch_size=8 +#学习率 +learning_rate=0.00001 +#训练模式 +mode="train_and_eval" +npu_loss_scale=1 + +#维测参数,precision_mode需要模型审视修改 +precision_mode="allow_mix_precision" +#维持参数,以下不需要修改 +over_dump=False +data_dump_flag=False +data_dump_step="10" +profiling=False +autotune=False + +# 帮助信息,不需要修改 +if [[ $1 == --help || $1 == -h ]];then + echo"usage:./train_performance_1P.sh " + echo " " + echo "parameter explain: + --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision) + --over_dump if or not over detection, default is False + --data_dump_flag data dump flag, default is False + --data_dump_step data dump step, default is 10 + --profiling if or not profiling for performance debug, default is False + --data_path source data of training + -h/--help show help message + " + exit 1 +fi + +#参数校验,不需要修改 +for para in $* +do + if [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` + elif [[ $para == --over_dump* ]];then + over_dump=`echo ${para#*=}` + over_dump_path=${cur_path}/output/overflow_dump + mkdir -p ${over_dump_path} + elif [[ $para == --data_dump_flag* ]];then + data_dump_flag=`echo ${para#*=}` + data_dump_path=${cur_path}/output/data_dump + mkdir -p ${data_dump_path} + elif [[ $para == --data_dump_step* ]];then + data_dump_step=`echo ${para#*=}` + elif [[ $para == --profiling* ]];then + profiling=`echo ${para#*=}` + profiling_dump_path=${cur_path}/output/profiling + mkdir -p ${profiling_dump_path} + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --learning_rate* ]];then + learning_rate=`echo ${para#*=}` + elif [[ $para == --mode* ]];then + mode=`echo ${para#*=}` + elif [[ $para == --npu_loss_scale* ]];then + npu_loss_scale=`echo ${para#*=}` + fi +done + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + + +#训练开始时间,不需要修改 +start_time=$(date +%s) + +#进入训练脚本目录,需要模型审视修改 +cd $cur_path/.. +for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); +do + #设置环境变量,不需要修改 + echo "Device ID: $ASCEND_DEVICE_ID" + export RANK_ID=$RANK_ID + + + + #创建DeviceID输出目录,不需要修改 + if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID} + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + else + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + fi + + #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 + #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path,--data_dump_flag,--data_dump_step,--data_dump_path,--profiling,--profiling_dump_path + nohup python3 run_rt.py \ + --data_path=${data_path} \ + --num_epochs=$train_epochs \ + --save_dir=${cur_path}/output/$ASCEND_DEVICE_ID/ckpt > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & +done +wait + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +#time=(`grep -r "Time: " $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F '(' '{print $NF}' | cut -d ')' -f 1`) +#i=${#time[*]} +#train_time=`echo "${time[i-1]} ${time[1]} $i"|awk '{print ($1-$2)*10/($3-2)}'` +train_time=(`grep -r "All epoch time" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk '{print $5}'`) +epochsNum=(`grep -r "Epoch 00" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk 'END {print $2}'|awk -F : '{print $1}'`) +averageFPS=(`grep -r "Use all time average steps" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk '{print $7}'`) +#FPS=`echo "$batch_size $train_time"|awk '{print $1*1000/$2}'` +FPS=`echo "$averageFPS $epochsNum"|awk '{print $1*$2}'` +#打印,不需要修改,本模型是打印每个epoch耗时 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +#train_accuracy=(`grep -r "val_precision did not improve from" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk 'END {print $8}'`) +#打印,不需要修改 +#echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'RT2'_'perf' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=$train_time + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep -r "loss:" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk 'END {print $8}' >> $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +#echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -- Gitee