diff --git a/TensorFlow/built-in/cv/detection/CTPN_ID0054_for_TensorFlow/main/train_npu_rt.py b/TensorFlow/built-in/cv/detection/CTPN_ID0054_for_TensorFlow/main/train_npu_rt.py new file mode 100644 index 0000000000000000000000000000000000000000..2fa6753260e56a296e3ea4075dd224be875bd3a4 --- /dev/null +++ b/TensorFlow/built-in/cv/detection/CTPN_ID0054_for_TensorFlow/main/train_npu_rt.py @@ -0,0 +1,249 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import datetime +import os +import sys +import time + +import tensorflow as tf +import numpy as np +sys.path.append(os.getcwd()) + +cur_path = os.path.abspath(os.path.dirname(__file__)) +working_dir = os.path.join(cur_path, '../') +sys.path.append(working_dir) + +from tensorflow.contrib import slim + +tf.app.flags.DEFINE_float('learning_rate', 1e-5, '') +tf.app.flags.DEFINE_integer('max_steps', 50000, '') +tf.app.flags.DEFINE_integer('decay_steps', 30000, '') +tf.app.flags.DEFINE_float('decay_rate', 0.1, '') +tf.app.flags.DEFINE_float('moving_average_decay', 0.997, '') +tf.app.flags.DEFINE_integer('num_readers', 4, '') +tf.app.flags.DEFINE_string('gpu', '0', '') +tf.app.flags.DEFINE_string('checkpoint_path',"checkpoints_mlt/" , '') +tf.app.flags.DEFINE_string('logs_path', 'logs_mlt/', '') +tf.app.flags.DEFINE_string('pretrained_model_path', 'data/vgg_16.ckpt', '') +tf.app.flags.DEFINE_boolean('restore', False, '') +tf.app.flags.DEFINE_integer('save_checkpoint_steps', 2000, '') +tf.app.flags.DEFINE_string('dataset_dir', 'resized/', '') +tf.app.flags.DEFINE_integer('num_bbox', 256, '') +tf.app.flags.DEFINE_integer('loss_scale', 4096, '') +tf.app.flags.DEFINE_integer('inputs_height', 600, '') +tf.app.flags.DEFINE_integer('inputs_width', 900, '') +tf.app.flags.DEFINE_integer('device_id', 1, '') +tf.app.flags.DEFINE_integer('npu_nums', 1, '') +tf.app.flags.DEFINE_string('DEVICE_ID', '0', '') +#modify for NPU start +tf.app.flags.DEFINE_string('precision_mode', 'allow_fp32_to_fp16', '') +#modify for NPU end + +FLAGS = tf.app.flags.FLAGS + + +from nets import model_train as model +from utils.dataset import data_provider as data_provider +from hccl.split.api import set_split_strategy_by_size +# npu libs +from npu_bridge.estimator import npu_ops +from tensorflow.core.protobuf.rewriter_config_pb2 import RewriterConfig +from npu_bridge.estimator.npu.npu_estimator import NPUEstimator +from npu_bridge.estimator.npu.npu_optimizer import allreduce +from npu_bridge.estimator.npu.npu_optimizer import NPUDistributedOptimizer +from npu_bridge.hccl import hccl_ops +from npu_bridge.estimator.npu.npu_loss_scale_optimizer import NPULossScaleOptimizer +from npu_bridge.estimator.npu.npu_loss_scale_manager import FixedLossScaleManager + +from tensorflow.python.client import timeline + +# modify for NPU start +from npu_bridge.npu_init import * +# modify for NPU end + +def pad_input(inputs,target_shape=[1216,1216,3]): + + h,w = inputs.shape[:2] + out = np.zeros(target_shape).astype(np.uint8) + out[0:h,0:w,:] = inputs + + return out + + +def pad_bbox(inputs, count=256): + if len(inputs)>count: + return inputs[:count].copy() + + else: + out = inputs.copy() + num_inputs = len(out) + num_pad = count - num_inputs + + for i in range(num_pad): + out.append([0,0,0,0,1]) + return out + + +def broadcast_global_variables(root_rank, index): + op_list = [] + for var in tf.global_variables(): + if "float" in var.dtype.name: + inputs = [var] + outputs = hccl_ops.broadcast(tensor=inputs, root_rank=root_rank) + if outputs is not None: + op_list.append(outputs[0].op) + op_list.append(tf.assign(var, outputs[0])) + return tf.group(op_list) + +def main(argv=None): + os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu + now = datetime.datetime.now() + StyleTime = now.strftime("%Y-%m-%d-%H-%M-%S") + os.makedirs(FLAGS.logs_path + FLAGS.DEVICE_ID) + if not os.path.exists(FLAGS.checkpoint_path): + os.makedirs(FLAGS.checkpoint_path) + + input_image = tf.placeholder(tf.float32, + shape=[1,FLAGS.inputs_height, FLAGS.inputs_width, 3], + name='input_image') + input_bbox = tf.placeholder(tf.float32, + shape=[FLAGS.num_bbox, 5], name='input_bbox') + + global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) + learning_rate = tf.Variable(FLAGS.learning_rate, trainable=False) + tf.summary.scalar('learning_rate', learning_rate) + opt = tf.train.AdamOptimizer(learning_rate) + if FLAGS.npu_nums == 8: + opt = NPUDistributedOptimizer(opt) + + # opt = NPUDistributedOptimizer(opt) + # modify for NPU start + if FLAGS.precision_mode == "allow_mix_precision": + loss_scale_manager = ExponentialUpdateLossScaleManager( + init_loss_scale=2**32, + incr_every_n_steps=1000, + decr_every_n_nan_or_inf=2, + decr_ratio=0.5) + else: + loss_scale_manager = FixedLossScaleManager(loss_scale=FLAGS.loss_scale) + # modify for NPU end + + opt = NPULossScaleOptimizer(opt, loss_scale_manager) + + + with tf.name_scope('model' ) as scope: + bbox_pred, cls_pred, cls_prob = model.model(input_image) + + total_loss, model_loss, rpn_cross_entropy, rpn_loss_box = model.loss_v2(bbox_pred, cls_pred, input_bbox) + + batch_norm_updates_op = tf.group(*tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope)) + grads = opt.compute_gradients(total_loss) + + apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) + + summary_op = tf.summary.merge_all() + variable_averages = tf.train.ExponentialMovingAverage( + FLAGS.moving_average_decay, global_step) + variables_averages_op = variable_averages.apply(tf.trainable_variables()) + with tf.control_dependencies([variables_averages_op, apply_gradient_op, batch_norm_updates_op]): + train_op = tf.no_op(name='train_op') + + saver = tf.train.Saver(tf.global_variables(), max_to_keep=5) + summary_writer = tf.summary.FileWriter(FLAGS.logs_path + StyleTime, tf.get_default_graph()) + + init = tf.global_variables_initializer() + + if FLAGS.pretrained_model_path is not None: + variable_restore_op = slim.assign_from_checkpoint_fn(FLAGS.pretrained_model_path, + slim.get_trainable_variables(), + ignore_missing_vars=True) + #for NPU + config = tf.ConfigProto(allow_soft_placement=True) + custom_op = config.graph_options.rewrite_options.custom_optimizers.add() + custom_op.name = "NpuOptimizer" + custom_op.parameter_map["use_off_line"].b = True + custom_op.parameter_map["hcom_parallel"].b = True + custom_op.parameter_map["jit_compile"].b = False + config.graph_options.rewrite_options.remapping = RewriterConfig.OFF + # modify for NPU start + if FLAGS.precision_mode == "allow_mix_precision": + custom_op.parameter_map["precision_mode"].s = tf.compat.as_bytes("allow_mix_precision") + # modify for NPU end + # for NPU + if FLAGS.npu_nums == 8: + bcast_op = broadcast_global_variables(0, 1) + with tf.Session(config=config) as sess: + if FLAGS.npu_nums == 8: + sess.run(bcast_op) + if FLAGS.restore: + ckpt = tf.train.latest_checkpoint(FLAGS.checkpoint_path) + restore_step = int(ckpt.split('.')[0].split('_')[-1]) + print("continue training from previous checkpoint {}".format(restore_step)) + saver.restore(sess, ckpt) + else: + sess.run(init) + if FLAGS.npu_nums == 8: + set_split_strategy_by_size([80, 20]) + restore_step = 0 + if FLAGS.pretrained_model_path is not None: + variable_restore_op(sess) + data_generator = data_provider.get_batch(num_workers=FLAGS.num_readers) + start = time.time() + + for step in range(restore_step, FLAGS.max_steps): + data = next(data_generator) + inputs_padded = data[0] + bbox_padded = pad_bbox(data[1],FLAGS.num_bbox) + input_image_np = inputs_padded + input_bbox_np = bbox_padded + + ml, tl,ce_loss, bbox_loss, _, summary_str = sess.run([model_loss, total_loss, + rpn_cross_entropy, + rpn_loss_box, + train_op, summary_op], + feed_dict={input_image: input_image_np, + input_bbox: input_bbox_np}) + summary_writer.add_summary(summary_str, global_step=step) + print('model loss :', ml, 'ce_loss: ', ce_loss, 'box_loss:',bbox_loss) + if step != 0 and step % FLAGS.decay_steps == 0: + sess.run(tf.assign(learning_rate, learning_rate.eval() * FLAGS.decay_rate)) + + if step % 10 == 0: + avg_time_per_step = (time.time() - start) / 10 + start = time.time() + print('Step {:06d}, ce_loss {:.6f}, bbox_loss {:.6f} model loss {:.4f}, total loss {:.4f}, {:.2f} seconds/step, LR: {:.6f}'.format( + step, ce_loss, bbox_loss, ml, tl, avg_time_per_step, learning_rate.eval())) + + if (step + 1) % FLAGS.save_checkpoint_steps == 0: + filename = ('ctpn_{:d}'.format(step + 1) + '.ckpt') + filename = os.path.join(FLAGS.checkpoint_path, filename) + saver.save(sess, filename) + print('Write model to: {:s}'.format(filename)) + +if __name__ == '__main__': + tf.app.run() diff --git a/TensorFlow/built-in/cv/detection/CTPN_ID0054_for_TensorFlow/test/train_RT2_performance_1p.sh b/TensorFlow/built-in/cv/detection/CTPN_ID0054_for_TensorFlow/test/train_RT2_performance_1p.sh new file mode 100644 index 0000000000000000000000000000000000000000..101e3c01e02b60ba26bb0decf01c475a57e9eb11 --- /dev/null +++ b/TensorFlow/built-in/cv/detection/CTPN_ID0054_for_TensorFlow/test/train_RT2_performance_1p.sh @@ -0,0 +1,171 @@ +#!/bin/bash + +#当前路径,不需要修改 +cur_path=`pwd` + +#集合通信参数,不需要修改 + +export RANK_SIZE=1 +export JOB_ID=10087 +RANK_ID_START=0 +export PATH=$PATH:/autotest/anaconda3/bin +source activate python3.7.5 + +#使能RT2.0 +export ENABLE_RUNTIME_V2=1 + +# 数据集路径,保持为空,不需要修改 +data_path="" + + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="CTPN_ID0054_for_TensorFlow" +#训练epoch +#train_epochs=1 +#训练batch_size +batch_size=1 +#训练step +train_steps=100 +save_checkpoint_steps=20 +#学习率 +learning_rate=1e-5 + +#TF2.X独有,不需要修改 +#export NPU_LOOP_SIZE=${train_steps} + +#维测参数,precision_mode需要模型审视修改 +precision_mode="allow_mix_precision" +#维持参数,以下不需要修改 +over_dump=False +data_dump_flag=False +data_dump_step="10" +profiling=False + +# 帮助信息,不需要修改 +if [[ $1 == --help || $1 == -h ]];then + echo"usage:./train_performance_1p.sh " + echo " " + echo "parameter explain: + --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision) + --over_dump if or not over detection, default is False + --data_dump_flag data dump flag, default is False + --data_dump_step data dump step, default is 10 + --profiling if or not profiling for performance debug, default is False + --data_path source data of training + -h/--help show help message + " + exit 1 +fi + +#参数校验,不需要修改 +for para in $* +do + if [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` + elif [[ $para == --over_dump* ]];then + over_dump=`echo ${para#*=}` + over_dump_path=${cur_path}/output/overflow_dump + mkdir -p ${over_dump_path} + elif [[ $para == --data_dump_flag* ]];then + data_dump_flag=`echo ${para#*=}` + data_dump_path=${cur_path}/output/data_dump + mkdir -p ${data_dump_path} + elif [[ $para == --data_dump_step* ]];then + data_dump_step=`echo ${para#*=}` + elif [[ $para == --profiling* ]];then + profiling=`echo ${para#*=}` + profiling_dump_path=${cur_path}/output/profiling + mkdir -p ${profiling_dump_path} + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + fi +done + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +#CTPN独有 +cd $cur_path/../ +cd utils/bbox/ +chmod +x make.sh +./make.sh + +#训练开始时间,不需要修改 +start_time=$(date +%s) +#进入训练脚本目录,需要模型审视修改 +cd $cur_path/.. +for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); +do + #设置环境变量,不需要修改 + echo "Device ID: $ASCEND_DEVICE_ID" + export RANK_ID=$RANK_ID + + #创建DeviceID输出目录,不需要修改 + if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID} + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID + else + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID + fi + #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 + #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path,--data_dump_flag,--data_dump_step,--data_dump_path,--profiling,--profiling_dump_path,--autotune + nohup python3 main/train_npu_rt.py \ + --precision_mode=$precision_mode \ + --pretrained_model_path=$data_path/vgg_16.ckpt \ + --dataset_dir=$data_path \ + --max_steps=$train_steps \ + --save_checkpoint_steps=$save_checkpoint_steps \ + --checkpoint_path=${cur_path}/output/$ASCEND_DEVICE_ID/ckpt > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & +done +wait + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#CTPN独有 +bash eval.sh ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt/ctpn_$train_steps.ckpt >> ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 + + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +train_time=`grep Step $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $13}'` +FPS=`awk 'BEGIN{printf "%.2f\n",'1000'*'${batch_size}'/'${train_time}'}'` +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" +#输出训练精度,需要模型审视修改 +train_accuracy=`grep Calculated $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $6}' | awk -F '}' '{print $1}'` +#打印,不需要修改 +echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" +#稳定性精度看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'RT2'_'perf' +##获取性能数据 +#吞吐量,不需要修改 +ActualFPS=${FPS} +#单迭代训练时长,不需要修改 +TrainingTime=$train_time +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep "total loss" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk '{print $12}' | awk -F ',' '{print $1}'>> $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "train_accuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +sed -i "s/ModuleNotFoundError: No module named 'impl.unsorted_segment_sum'/ /g" `grep ModuleNotFoundError -rl $cur_path/output/$ASCEND_DEVICE_ID/train_*.log` diff --git a/TensorFlow/built-in/cv/detection/CTPN_ID0054_for_TensorFlow/test/train_RT2_performance_8p.sh b/TensorFlow/built-in/cv/detection/CTPN_ID0054_for_TensorFlow/test/train_RT2_performance_8p.sh new file mode 100644 index 0000000000000000000000000000000000000000..cfd98d12c0b12172db0faaf581552bffb3e7a340 --- /dev/null +++ b/TensorFlow/built-in/cv/detection/CTPN_ID0054_for_TensorFlow/test/train_RT2_performance_8p.sh @@ -0,0 +1,205 @@ +#!/bin/bash + +#当前路径,不需要修改 +cur_path=`pwd` + +#集合通信参数,不需要修改 +export RANK_SIZE=8 +export RANK_TABLE_FILE=$cur_path/${RANK_SIZE}p.json +export JOB_ID=10087 +export DEVICE_INDEX=0 +RANK_ID_START=0 + +#使能RT2.0 +export ENABLE_RUNTIME_V2=1 + +# 数据集路径,保持为空,不需要修改 +data_path="" +#设置默认日志级别,不需要修改 +export ASCEND_GLOBAL_LOG_LEVEL_ETP=3 + + + +Network="CTPN_ID0054_for_TensorFlow" +#训练epoch +#train_epochs=1 +#训练batch_size +batch_size=1 +#训练step +train_steps=100 +save_checkpoint_steps=20 +#学习率 +learning_rate=8e-5 + + +#TF2.X独有,不需要修改 +#export NPU_LOOP_SIZE=${train_steps} + +#维测参数,precision_mode需要模型审视修改 +precision_mode="allow_mix_precision" +#维持参数,以下不需要修改 +over_dump=False +data_dump_flag=False +data_dump_step="10" +profiling=False + + +if [[ $1 == --help || $1 == -h ]];then + echo "usage: ./train_performance_8p.sh " + + echo "" + echo "parameter explain: + --task_name finetune dataset + --data_path source data of training + --model_path the path of pretrain ckpt + --train_batch_size training batch + --learning_rate learning_rate + --num_train_epochs epochs + --output_dir output dir + -h/--help Show help message + " + exit 1 +fi + +if [ -d $cur_path/output ];then + rm -rf $cur_path/output/* + mkdir -p $cur_path/output/$ASCEND_DEVICE_ID +else + mkdir -p $cur_path/output/$ASCEND_DEVICE_ID +fi + + +for para in $* +do + if [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` + elif [[ $para == --over_dump* ]];then + over_dump=`echo ${para#*=}` + over_dump_path=${cur_path}/output/overflow_dump + mkdir -p ${over_dump_path} + elif [[ $para == --data_dump_flag* ]];then + data_dump_flag=`echo ${para#*=}` + data_dump_path=${cur_path}/output/data_dump + mkdir -p ${data_dump_path} + elif [[ $para == --data_dump_step* ]];then + data_dump_step=`echo ${para#*=}` + elif [[ $para == --profiling* ]];then + profiling=`echo ${para#*=}` + profiling_dump_path=${cur_path}/output/profiling + mkdir -p ${profiling_dump_path} + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + fi +done + + +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + + +#CTPN独有 +cd $cur_path/../ +cd utils/bbox/ +chmod +x make.sh +./make.sh + + + +#############执行训练######################### +start=$(date +%s) + +for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); +do + #设置环境变量,不需要修改 + echo "Device ID: $ASCEND_DEVICE_ID" + export RANK_ID=$RANK_ID + export ASCEND_DEVICE_ID=$RANK_ID + + #创建DeviceID输出目录,不需要修改 + if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID} + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + else + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + fi + + #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 + cd $cur_path/../ + nohup python3 main/train_npu_rt.py \ + --precision_mode=$precision_mode \ + --pretrained_model_path=$data_path/vgg_16.ckpt \ + --dataset_dir=$data_path \ + --max_steps=$train_steps \ + --device_id=$ASCEND_DEVICE_ID \ + --npu_nums=$RANK_SIZE \ + --DEVICE_ID=$ASCEND_DEVICE_ID \ + --save_checkpoint_steps=$save_checkpoint_steps \ + --checkpoint_path=${cur_path}/output/$ASCEND_DEVICE_ID/ckpt > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & +done +wait + +end=$(date +%s) +e2etime=$(( $end - $start )) + + + + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +train_time=`grep Step $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $13}'` +FPS=`awk 'BEGIN{printf "%.2f\n",'${RANK_SIZE}'*'1000'*'${batch_size}'/'${train_time}'}'` +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" +#输出训练精度,需要模型审视修改 +train_accuracy=`grep Calculated $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $6}' | awk -F '}' '{print $1}'` +#打印,不需要修改 +echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" +#稳定性精度看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'RT2'_'perf' +##获取性能数据 +#吞吐量,不需要修改 +ActualFPS=${FPS} +#单迭代训练时长,不需要修改 +TrainingTime=$train_time +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep "total loss" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk '{print $12}' | awk -F ',' '{print $1}'>> $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2etime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +sed -i "s/ModuleNotFoundError: No module named 'impl.unsorted_segment_sum'/ /g" `grep ModuleNotFoundError -rl $cur_path/output/$ASCEND_DEVICE_ID/train_*.log` + + + + + + + + + + + + + + + + + + + + diff --git a/TensorFlow/built-in/cv/detection/SSD-Resnet50V1-FPN_ID1463_for_TensorFlow/models/research/object_detection/model_lib_rt.py b/TensorFlow/built-in/cv/detection/SSD-Resnet50V1-FPN_ID1463_for_TensorFlow/models/research/object_detection/model_lib_rt.py new file mode 100644 index 0000000000000000000000000000000000000000..bf6d3834cbc6e3e1d7b52dee35bde797eb25d04b --- /dev/null +++ b/TensorFlow/built-in/cv/detection/SSD-Resnet50V1-FPN_ID1463_for_TensorFlow/models/research/object_detection/model_lib_rt.py @@ -0,0 +1,393 @@ +# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +'Constructs model, inputs, and training environment.' +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from npu_bridge.npu_init import * +from tensorflow.core.protobuf.rewriter_config_pb2 import RewriterConfig +from tensorflow.core.protobuf import config_pb2 +import copy +import functools +import os +import tensorflow as tf +#import horovod.tensorflow as hvd +from object_detection import eval_util +from object_detection import exporter as exporter_lib +from object_detection import inputs +from object_detection.builders import graph_rewriter_builder +from object_detection.builders import model_builder +from object_detection.builders import optimizer_builder +from object_detection.core import standard_fields as fields +from object_detection.utils import config_util +from object_detection.utils import label_map_util +from object_detection.utils import shape_utils +from object_detection.utils import variables_helper +from object_detection.utils import visualization_utils as vis_utils + +#2021.5版本升级,下面代码版本包归档 +'''class NpuEmptyHook(tf.train.SessionRunHook): + pass + +def npu_tf_optimizer(opt): + npu_opt = NPUDistributedOptimizer(opt) + return npu_opt + +def npu_session_config_init(session_config=None): + if ((not isinstance(session_config, config_pb2.ConfigProto)) and (not issubclass(type(session_config), config_pb2.ConfigProto))): + session_config = config_pb2.ConfigProto() + if (isinstance(session_config, config_pb2.ConfigProto) or issubclass(type(session_config), config_pb2.ConfigProto)): + custom_op = session_config.graph_options.rewrite_options.custom_optimizers.add() + custom_op.name = 'NpuOptimizer' + session_config.graph_options.rewrite_options.remapping = RewriterConfig.OFF + return session_config + +def npu_run_config_init(run_config=None): + if ((not isinstance(run_config, tf.estimator.RunConfig)) and (not issubclass(type(run_config), tf.estimator.RunConfig))): + run_config = tf.estimator.RunConfig() + if (isinstance(run_config, tf.estimator.RunConfig) or issubclass(type(run_config), tf.estimator.RunConfig)): + run_config.__dict__['_session_config'] = npu_session_config_init(run_config.session_config) + return run_config''' +MODEL_BUILD_UTIL_MAP = {'get_configs_from_pipeline_file': config_util.get_configs_from_pipeline_file, 'create_pipeline_proto_from_configs': config_util.create_pipeline_proto_from_configs, 'merge_external_params_with_configs': config_util.merge_external_params_with_configs, 'create_train_input_fn': inputs.create_train_input_fn, 'create_eval_input_fn': inputs.create_eval_input_fn, 'create_predict_input_fn': inputs.create_predict_input_fn} + +def _prepare_groundtruth_for_eval(detection_model, class_agnostic, max_number_of_boxes): + "Extracts groundtruth data from detection_model and prepares it for eval.\n\n Args:\n detection_model: A `DetectionModel` object.\n class_agnostic: Whether the detections are class_agnostic.\n max_number_of_boxes: Max number of groundtruth boxes.\n\n Returns:\n A tuple of:\n groundtruth: Dictionary with the following fields:\n 'groundtruth_boxes': [batch_size, num_boxes, 4] float32 tensor of boxes,\n in normalized coordinates.\n 'groundtruth_classes': [batch_size, num_boxes] int64 tensor of 1-indexed\n classes.\n 'groundtruth_masks': 4D float32 tensor of instance masks (if provided in\n groundtruth)\n 'groundtruth_is_crowd': [batch_size, num_boxes] bool tensor indicating\n is_crowd annotations (if provided in groundtruth).\n 'num_groundtruth_boxes': [batch_size] tensor containing the maximum number\n of groundtruth boxes per image..\n class_agnostic: Boolean indicating whether detections are class agnostic.\n " + input_data_fields = fields.InputDataFields() + groundtruth_boxes = tf.stack(detection_model.groundtruth_lists(fields.BoxListFields.boxes)) + groundtruth_boxes_shape = tf.shape(groundtruth_boxes) + if class_agnostic: + groundtruth_classes_one_hot = tf.ones([groundtruth_boxes_shape[0], groundtruth_boxes_shape[1], 1]) + else: + groundtruth_classes_one_hot = tf.stack(detection_model.groundtruth_lists(fields.BoxListFields.classes)) + label_id_offset = 1 + groundtruth_classes = (tf.argmax(groundtruth_classes_one_hot, axis=2) + label_id_offset) + groundtruth = {input_data_fields.groundtruth_boxes: groundtruth_boxes, input_data_fields.groundtruth_classes: groundtruth_classes} + if detection_model.groundtruth_has_field(fields.BoxListFields.masks): + groundtruth[input_data_fields.groundtruth_instance_masks] = tf.stack(detection_model.groundtruth_lists(fields.BoxListFields.masks)) + if detection_model.groundtruth_has_field(fields.BoxListFields.is_crowd): + groundtruth[input_data_fields.groundtruth_is_crowd] = tf.stack(detection_model.groundtruth_lists(fields.BoxListFields.is_crowd)) + groundtruth[input_data_fields.num_groundtruth_boxes] = tf.tile([max_number_of_boxes], multiples=[groundtruth_boxes_shape[0]]) + return groundtruth + +def unstack_batch(tensor_dict, unpad_groundtruth_tensors=True): + 'Unstacks all tensors in `tensor_dict` along 0th dimension.\n\n Unstacks tensor from the tensor dict along 0th dimension and returns a\n tensor_dict containing values that are lists of unstacked, unpadded tensors.\n\n Tensors in the `tensor_dict` are expected to be of one of the three shapes:\n 1. [batch_size]\n 2. [batch_size, height, width, channels]\n 3. [batch_size, num_boxes, d1, d2, ... dn]\n\n When unpad_groundtruth_tensors is set to true, unstacked tensors of form 3\n above are sliced along the `num_boxes` dimension using the value in tensor\n field.InputDataFields.num_groundtruth_boxes.\n\n Note that this function has a static list of input data fields and has to be\n kept in sync with the InputDataFields defined in core/standard_fields.py\n\n Args:\n tensor_dict: A dictionary of batched groundtruth tensors.\n unpad_groundtruth_tensors: Whether to remove padding along `num_boxes`\n dimension of the groundtruth tensors.\n\n Returns:\n A dictionary where the keys are from fields.InputDataFields and values are\n a list of unstacked (optionally unpadded) tensors.\n\n Raises:\n ValueError: If unpad_tensors is True and `tensor_dict` does not contain\n `num_groundtruth_boxes` tensor.\n ' + unbatched_tensor_dict = {key: tf.unstack(tensor) for (key, tensor) in tensor_dict.items()} + if unpad_groundtruth_tensors: + if (fields.InputDataFields.num_groundtruth_boxes not in unbatched_tensor_dict): + raise ValueError('`num_groundtruth_boxes` not found in tensor_dict. Keys available: {}'.format(unbatched_tensor_dict.keys())) + unbatched_unpadded_tensor_dict = {} + unpad_keys = set([fields.InputDataFields.groundtruth_instance_masks, fields.InputDataFields.groundtruth_classes, fields.InputDataFields.groundtruth_boxes, fields.InputDataFields.groundtruth_keypoints, fields.InputDataFields.groundtruth_group_of, fields.InputDataFields.groundtruth_difficult, fields.InputDataFields.groundtruth_is_crowd, fields.InputDataFields.groundtruth_area, fields.InputDataFields.groundtruth_weights]).intersection(set(unbatched_tensor_dict.keys())) + for key in unpad_keys: + unpadded_tensor_list = [] + for (num_gt, padded_tensor) in zip(unbatched_tensor_dict[fields.InputDataFields.num_groundtruth_boxes], unbatched_tensor_dict[key]): + tensor_shape = shape_utils.combined_static_and_dynamic_shape(padded_tensor) + slice_begin = tf.zeros([len(tensor_shape)], dtype=tf.int32) + slice_size = tf.stack(([num_gt] + [((- 1) if (dim is None) else dim) for dim in tensor_shape[1:]])) + unpadded_tensor = tf.slice(padded_tensor, slice_begin, slice_size) + unpadded_tensor_list.append(unpadded_tensor) + unbatched_unpadded_tensor_dict[key] = unpadded_tensor_list + unbatched_tensor_dict.update(unbatched_unpadded_tensor_dict) + return unbatched_tensor_dict + +def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False): + 'Creates a model function for `Estimator`.\n\n Args:\n detection_model_fn: Function that returns a `DetectionModel` instance.\n configs: Dictionary of pipeline config objects.\n hparams: `HParams` object.\n use_tpu: Boolean indicating whether model should be constructed for\n use on TPU.\n\n Returns:\n `model_fn` for `Estimator`.\n ' + train_config = configs['train_config'] + eval_input_config = configs['eval_input_config'] + eval_config = configs['eval_config'] + + def model_fn(features, labels, mode, params=None): + 'Constructs the object detection model.\n\n Args:\n features: Dictionary of feature tensors, returned from `input_fn`.\n labels: Dictionary of groundtruth tensors if mode is TRAIN or EVAL,\n otherwise None.\n mode: Mode key from tf.estimator.ModeKeys.\n params: Parameter dictionary passed from the estimator.\n\n Returns:\n An `EstimatorSpec` that encapsulates the model and its serving\n configurations.\n ' + params = (params or {}) + (total_loss, train_op, detections, export_outputs) = (None, None, None, None) + is_training = (mode == tf.estimator.ModeKeys.TRAIN) + tf.keras.backend.set_learning_phase(is_training) + detection_model = detection_model_fn(is_training=is_training, add_summaries=(not use_tpu)) + scaffold_fn = None + + #数据预处理 + if (mode == tf.estimator.ModeKeys.TRAIN): + labels = unstack_batch(labels, unpad_groundtruth_tensors=train_config.unpad_groundtruth_tensors) + elif (mode == tf.estimator.ModeKeys.EVAL): + boxes_shape = labels[fields.InputDataFields.groundtruth_boxes].get_shape().as_list() + unpad_groundtruth_tensors = ((boxes_shape[1] is not None) and (not use_tpu)) + labels = unstack_batch(labels, unpad_groundtruth_tensors=unpad_groundtruth_tensors) + if (mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL)): + gt_boxes_list = labels[fields.InputDataFields.groundtruth_boxes] + gt_classes_list = labels[fields.InputDataFields.groundtruth_classes] + gt_masks_list = None + if (fields.InputDataFields.groundtruth_instance_masks in labels): + gt_masks_list = labels[fields.InputDataFields.groundtruth_instance_masks] + gt_keypoints_list = None + if (fields.InputDataFields.groundtruth_keypoints in labels): + gt_keypoints_list = labels[fields.InputDataFields.groundtruth_keypoints] + gt_weights_list = None + if (fields.InputDataFields.groundtruth_weights in labels): + gt_weights_list = labels[fields.InputDataFields.groundtruth_weights] + gt_confidences_list = None + if (fields.InputDataFields.groundtruth_confidences in labels): + gt_confidences_list = labels[fields.InputDataFields.groundtruth_confidences] + gt_is_crowd_list = None + if (fields.InputDataFields.groundtruth_is_crowd in labels): + gt_is_crowd_list = labels[fields.InputDataFields.groundtruth_is_crowd] + detection_model.provide_groundtruth(groundtruth_boxes_list=gt_boxes_list, groundtruth_classes_list=gt_classes_list, groundtruth_confidences_list=gt_confidences_list, groundtruth_masks_list=gt_masks_list, groundtruth_keypoints_list=gt_keypoints_list, groundtruth_weights_list=gt_weights_list, groundtruth_is_crowd_list=gt_is_crowd_list) + preprocessed_images = features[fields.InputDataFields.image] + + #预测输出 + if (use_tpu and train_config.use_bfloat16): + with tf.contrib.tpu.bfloat16_scope(): + prediction_dict = detection_model.predict(preprocessed_images, features[fields.InputDataFields.true_image_shape]) + for (k, v) in prediction_dict.items(): + if (v.dtype == tf.bfloat16): + prediction_dict[k] = tf.cast(v, tf.float32) + else: + prediction_dict = detection_model.predict(preprocessed_images, features[fields.InputDataFields.true_image_shape]) + + #后处理 + if (mode in (tf.estimator.ModeKeys.EVAL, tf.estimator.ModeKeys.PREDICT)): + detections = detection_model.postprocess(prediction_dict, features[fields.InputDataFields.true_image_shape]) + if (mode == tf.estimator.ModeKeys.TRAIN): + if (train_config.fine_tune_checkpoint and hparams.load_pretrained): + if (not train_config.fine_tune_checkpoint_type): + if train_config.from_detection_checkpoint: + train_config.fine_tune_checkpoint_type = 'detection' + else: + train_config.fine_tune_checkpoint_type = 'classification' + asg_map = detection_model.restore_map(fine_tune_checkpoint_type=train_config.fine_tune_checkpoint_type, load_all_detection_checkpoint_vars=train_config.load_all_detection_checkpoint_vars) + available_var_map = variables_helper.get_variables_available_in_checkpoint(asg_map, train_config.fine_tune_checkpoint, include_global_step=False) + if use_tpu: + + def tpu_scaffold(): + tf.train.init_from_checkpoint(train_config.fine_tune_checkpoint, available_var_map) + return tf.train.Scaffold() + scaffold_fn = tpu_scaffold + else: + tf.train.init_from_checkpoint(train_config.fine_tune_checkpoint, available_var_map) + #loss计算 + if (mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL)): + losses_dict = detection_model.loss(prediction_dict, features[fields.InputDataFields.true_image_shape]) + losses = [loss_tensor for loss_tensor in losses_dict.values()] + if train_config.add_regularization_loss: + regularization_losses = detection_model.regularization_losses() + if regularization_losses: + regularization_loss = tf.add_n(regularization_losses, name='regularization_loss') + losses.append(regularization_loss) + losses_dict['Loss/regularization_loss'] = regularization_loss + total_loss = tf.add_n(losses, name='total_loss') + losses_dict['Loss/total_loss'] = total_loss + if ('graph_rewriter_config' in configs): + graph_rewriter_fn = graph_rewriter_builder.build(configs['graph_rewriter_config'], is_training=is_training) + graph_rewriter_fn() + global_step = tf.train.get_or_create_global_step() + (training_optimizer, optimizer_summary_vars) = optimizer_builder.build(train_config.optimizer) + #loss scale + #training_optimizer = NPULossScaleOptimizer(training_optimizer, loss_scale_manager, is_distributed=True) + + + #训练场景 + if (mode == tf.estimator.ModeKeys.TRAIN): + if use_tpu: + training_optimizer = npu_tf_optimizer(tf.contrib.tpu.CrossShardOptimizer(training_optimizer)) + trainable_variables = None + include_variables = (train_config.update_trainable_variables if train_config.update_trainable_variables else None) + exclude_variables = (train_config.freeze_variables if train_config.freeze_variables else None) + trainable_variables = tf.contrib.framework.filter_variables(tf.trainable_variables(), include_patterns=include_variables, exclude_patterns=exclude_variables) + clip_gradients_value = None + if (train_config.gradient_clipping_by_norm > 0): + clip_gradients_value = train_config.gradient_clipping_by_norm + if (not use_tpu): + for var in optimizer_summary_vars: + tf.summary.scalar(var.op.name, var) + summaries = ([] if use_tpu else None) + if train_config.summarize_gradients: + summaries = ['gradients', 'gradient_norm', 'global_gradient_norm'] + #add + print("[DEBUG in model_lib] enter optimize_loss,total_loss:",total_loss) + + train_op = tf.contrib.layers.optimize_loss(loss=total_loss, global_step=global_step, learning_rate=None, clip_gradients=clip_gradients_value, optimizer=training_optimizer, update_ops=detection_model.updates(), variables=trainable_variables, summaries=summaries, name='') + + + #预测场景 + if (mode == tf.estimator.ModeKeys.PREDICT): + exported_output = exporter_lib.add_output_tensor_nodes(detections) + export_outputs = {tf.saved_model.signature_constants.PREDICT_METHOD_NAME: tf.estimator.export.PredictOutput(exported_output)} + eval_metric_ops = None + scaffold = None + + #eval场景 + if (mode == tf.estimator.ModeKeys.EVAL): + class_agnostic = (fields.DetectionResultFields.detection_classes not in detections) + groundtruth = _prepare_groundtruth_for_eval(detection_model, class_agnostic, eval_input_config.max_number_of_boxes) + use_original_images = (fields.InputDataFields.original_image in features) + if use_original_images: + eval_images = features[fields.InputDataFields.original_image] + true_image_shapes = tf.slice(features[fields.InputDataFields.true_image_shape], [0, 0], [(- 1), 3]) + original_image_spatial_shapes = features[fields.InputDataFields.original_image_spatial_shape] + else: + eval_images = features[fields.InputDataFields.image] + true_image_shapes = None + original_image_spatial_shapes = None + eval_dict = eval_util.result_dict_for_batched_example(eval_images, features[inputs.HASH_KEY], detections, groundtruth, class_agnostic=class_agnostic, scale_to_absolute=True, original_image_spatial_shapes=original_image_spatial_shapes, true_image_shapes=true_image_shapes) + if class_agnostic: + category_index = label_map_util.create_class_agnostic_category_index() + else: + category_index = label_map_util.create_category_index_from_labelmap(eval_input_config.label_map_path) + vis_metric_ops = None + if ((not use_tpu) and use_original_images): + eval_metric_op_vis = vis_utils.VisualizeSingleFrameDetections(category_index, max_examples_to_draw=eval_config.num_visualizations, max_boxes_to_draw=eval_config.max_num_boxes_to_visualize, min_score_thresh=eval_config.min_score_threshold, use_normalized_coordinates=False) + vis_metric_ops = eval_metric_op_vis.get_estimator_eval_metric_ops(eval_dict) + eval_metric_ops = eval_util.get_eval_metric_ops_for_evaluators(eval_config, list(category_index.values()), eval_dict) + for (loss_key, loss_tensor) in iter(losses_dict.items()): + eval_metric_ops[loss_key] = tf.metrics.mean(loss_tensor) + for var in optimizer_summary_vars: + eval_metric_ops[var.op.name] = (var, tf.no_op()) + if (vis_metric_ops is not None): + eval_metric_ops.update(vis_metric_ops) + eval_metric_ops = {str(k): v for (k, v) in eval_metric_ops.items()} + if eval_config.use_moving_averages: + variable_averages = tf.train.ExponentialMovingAverage(0.0) + variables_to_restore = variable_averages.variables_to_restore() + keep_checkpoint_every_n_hours = train_config.keep_checkpoint_every_n_hours + saver = tf.train.Saver(variables_to_restore, keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours) + scaffold = tf.train.Scaffold(saver=saver) + + #训练实例 + if (use_tpu and (mode != tf.estimator.ModeKeys.EVAL)): + return tf.contrib.tpu.TPUEstimatorSpec(mode=mode, scaffold_fn=scaffold_fn, predictions=detections, loss=total_loss, train_op=train_op, eval_metrics=eval_metric_ops, export_outputs=export_outputs) + + else: + if (scaffold is None): + keep_checkpoint_every_n_hours = train_config.keep_checkpoint_every_n_hours + saver = tf.train.Saver(sharded=True, keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours, save_relative_paths=True) + tf.add_to_collection(tf.GraphKeys.SAVERS, saver) + scaffold = tf.train.Scaffold(saver=saver) + return tf.estimator.EstimatorSpec(mode=mode, predictions=detections, loss=total_loss, train_op=train_op, eval_metric_ops=eval_metric_ops, export_outputs=export_outputs, scaffold=scaffold) + return model_fn + +def create_estimator_and_inputs(run_config, hparams, pipeline_config_path, eval_count=1, config_override=None, train_steps=None, sample_1_of_n_eval_examples=1, sample_1_of_n_eval_on_train_examples=1, model_fn_creator=create_model_fn, use_tpu_estimator=False, use_tpu=False, num_shards=1, params=None, override_eval_num_epochs=True, save_final_config=False, **kwargs): + "Creates `Estimator`, input functions, and steps.\n\n Args:\n run_config: A `RunConfig`.\n hparams: A `HParams`.\n pipeline_config_path: A path to a pipeline config file.\n config_override: A pipeline_pb2.TrainEvalPipelineConfig text proto to\n override the config from `pipeline_config_path`.\n train_steps: Number of training steps. If None, the number of training steps\n is set from the `TrainConfig` proto.\n sample_1_of_n_eval_examples: Integer representing how often an eval example\n should be sampled. If 1, will sample all examples.\n sample_1_of_n_eval_on_train_examples: Similar to\n `sample_1_of_n_eval_examples`, except controls the sampling of training\n data for evaluation.\n model_fn_creator: A function that creates a `model_fn` for `Estimator`.\n Follows the signature:\n\n * Args:\n * `detection_model_fn`: Function that returns `DetectionModel` instance.\n * `configs`: Dictionary of pipeline config objects.\n * `hparams`: `HParams` object.\n * Returns:\n `model_fn` for `Estimator`.\n\n use_tpu_estimator: Whether a `TPUEstimator` should be returned. If False,\n an `Estimator` will be returned.\n use_tpu: Boolean, whether training and evaluation should run on TPU. Only\n used if `use_tpu_estimator` is True.\n num_shards: Number of shards (TPU cores). Only used if `use_tpu_estimator`\n is True.\n params: Parameter dictionary passed from the estimator. Only used if\n `use_tpu_estimator` is True.\n override_eval_num_epochs: Whether to overwrite the number of epochs to\n 1 for eval_input.\n save_final_config: Whether to save final config (obtained after applying\n overrides) to `estimator.model_dir`.\n **kwargs: Additional keyword arguments for configuration override.\n\n Returns:\n A dictionary with the following fields:\n 'estimator': An `Estimator` or `TPUEstimator`.\n 'train_input_fn': A training input function.\n 'eval_input_fns': A list of all evaluation input functions.\n 'eval_input_names': A list of names for each evaluation input.\n 'eval_on_train_input_fn': An evaluation-on-train input function.\n 'predict_input_fn': A prediction input function.\n 'train_steps': Number of training steps. Either directly from input or from\n configuration.\n 'train_batch_size': train batch size per GPU\n " + get_configs_from_pipeline_file = MODEL_BUILD_UTIL_MAP['get_configs_from_pipeline_file'] + merge_external_params_with_configs = MODEL_BUILD_UTIL_MAP['merge_external_params_with_configs'] + create_pipeline_proto_from_configs = MODEL_BUILD_UTIL_MAP['create_pipeline_proto_from_configs'] + create_train_input_fn = MODEL_BUILD_UTIL_MAP['create_train_input_fn'] + create_eval_input_fn = MODEL_BUILD_UTIL_MAP['create_eval_input_fn'] + create_predict_input_fn = MODEL_BUILD_UTIL_MAP['create_predict_input_fn'] + configs = get_configs_from_pipeline_file(pipeline_config_path, config_override=config_override) + kwargs.update({'train_steps': train_steps, 'sample_1_of_n_eval_examples': sample_1_of_n_eval_examples}) + if override_eval_num_epochs: + kwargs.update({'eval_num_epochs': 1}) + tf.logging.warning('Forced number of epochs for all eval validations to be 1.') + configs = merge_external_params_with_configs(configs, hparams, kwargs_dict=kwargs) + model_config = configs['model'] + train_config = configs['train_config'] + train_input_config = configs['train_input_config'] + eval_config = configs['eval_config'] + eval_input_configs = configs['eval_input_configs'] + eval_on_train_input_config = copy.deepcopy(train_input_config) + eval_on_train_input_config.sample_1_of_n_examples = sample_1_of_n_eval_on_train_examples + if (override_eval_num_epochs and (eval_on_train_input_config.num_epochs != 1)): + tf.logging.warning('Expected number of evaluation epochs is 1, but instead encountered `eval_on_train_input_config.num_epochs` = {}. Overwriting `num_epochs` to 1.'.format(eval_on_train_input_config.num_epochs)) + eval_on_train_input_config.num_epochs = 1 + if ((train_steps is None) and (train_config.num_steps != 0)): + train_steps = train_config.num_steps + detection_model_fn = functools.partial(model_builder.build, model_config=model_config) + train_input_fn = create_train_input_fn(train_config=train_config, train_input_config=train_input_config, model_config=model_config) + eval_input_fns = [create_eval_input_fn(eval_config=eval_config, eval_input_config=eval_input_config, model_config=model_config) for eval_input_config in eval_input_configs] + eval_input_names = [eval_input_config.name for eval_input_config in eval_input_configs] + eval_on_train_input_fn = create_eval_input_fn(eval_config=eval_config, eval_input_config=eval_on_train_input_config, model_config=model_config) + predict_input_fn = create_predict_input_fn(model_config=model_config, predict_input_config=eval_input_configs[0]) + export_to_tpu = hparams.get('export_to_tpu', False) + tf.logging.info('create_estimator_and_inputs: use_tpu %s, export_to_tpu %s', use_tpu, export_to_tpu) + model_fn = model_fn_creator(detection_model_fn, configs, hparams, use_tpu) + custom_op = run_config.session_config.graph_options.rewrite_options.custom_optimizers.add() + custom_op.name = 'NpuOptimizer' + custom_op.parameter_map['precision_mode'].s = tf.compat.as_bytes("allow_mix_precision") + custom_op.parameter_map['mix_compile_mode'].b = True + # custom_op.parameter_map["dynamic_input"].b = True + # custom_op.parameter_map["dynamic_graph_execute_mode"].s = tf.compat.as_bytes("lazy_recompile") + custom_op.parameter_map["hcom_parallel"].b = True + custom_op.parameter_map["jit_compile"].b = False + + run_config = tf.estimator.RunConfig(model_dir=run_config.model_dir, session_config=run_config.session_config, + save_checkpoints_steps=train_steps // eval_count) + if use_tpu_estimator: + estimator = tf.contrib.tpu.TPUEstimator(model_fn=model_fn, train_batch_size=train_config.batch_size, eval_batch_size=((num_shards * 1) if use_tpu else 1), use_tpu=False, config=run_config, params=(params if params else {}), eval_on_tpu=False, export_to_tpu=False) + else: + estimator = tf.estimator.Estimator(model_fn=model_fn, config=npu_run_config_init(run_config=run_config)) + if (run_config.is_chief and save_final_config): + pipeline_config_final = create_pipeline_proto_from_configs(configs) + config_util.save_pipeline_config(pipeline_config_final, estimator.model_dir) + return dict(estimator=estimator, train_input_fn=train_input_fn, eval_input_fns=eval_input_fns, eval_input_names=eval_input_names, eval_on_train_input_fn=eval_on_train_input_fn, predict_input_fn=predict_input_fn, train_steps=train_steps, train_batch_size=train_config.batch_size) + +def create_train_and_eval_specs(train_input_fn, eval_input_fns, eval_on_train_input_fn, predict_input_fn, train_steps, eval_on_train_data=False, final_exporter_name='Servo', eval_spec_names=None): + 'Creates a `TrainSpec` and `EvalSpec`s.\n\n Args:\n train_input_fn: Function that produces features and labels on train data.\n eval_input_fns: A list of functions that produce features and labels on eval\n data.\n eval_on_train_input_fn: Function that produces features and labels for\n evaluation on train data.\n predict_input_fn: Function that produces features for inference.\n train_steps: Number of training steps.\n eval_on_train_data: Whether to evaluate model on training data. Default is\n False.\n final_exporter_name: String name given to `FinalExporter`.\n eval_spec_names: A list of string names for each `EvalSpec`.\n\n Returns:\n Tuple of `TrainSpec` and list of `EvalSpecs`. If `eval_on_train_data` is\n True, the last `EvalSpec` in the list will correspond to training data. The\n rest EvalSpecs in the list are evaluation datas.\n ' + train_spec = tf.estimator.TrainSpec(input_fn=train_input_fn, max_steps=(train_steps // get_rank_size()), hooks=[NpuEmptyHook()]) + if (eval_spec_names is None): + eval_spec_names = [str(i) for i in range(len(eval_input_fns))] + eval_specs = [] + for (index, (eval_spec_name, eval_input_fn)) in enumerate(zip(eval_spec_names, eval_input_fns)): + if (index == 0): + exporter_name = final_exporter_name + else: + exporter_name = '{}_{}'.format(final_exporter_name, eval_spec_name) + exporter = tf.estimator.FinalExporter(name=exporter_name, serving_input_receiver_fn=predict_input_fn) + eval_specs.append(tf.estimator.EvalSpec(name=eval_spec_name, input_fn=eval_input_fn, steps=None, exporters=exporter,hooks=npu_hooks_append())) + if eval_on_train_data: + eval_specs.append(tf.estimator.EvalSpec(name='eval_on_train', input_fn=eval_on_train_input_fn, steps=None,hooks=npu_hooks_append())) + return (train_spec, eval_specs) + +def continuous_eval(estimator, model_dir, input_fn, train_steps, name): + 'Perform continuous evaluation on checkpoints written to a model directory.\n\n Args:\n estimator: Estimator object to use for evaluation.\n model_dir: Model directory to read checkpoints for continuous evaluation.\n input_fn: Input function to use for evaluation.\n train_steps: Number of training steps. This is used to infer the last\n checkpoint and stop evaluation loop.\n name: Namescope for eval summary.\n ' + + def terminate_eval(): + tf.logging.info('Terminating eval after 180 seconds of no checkpoints') + return True + for ckpt in tf.contrib.training.checkpoints_iterator(model_dir, min_interval_secs=180, timeout=None, timeout_fn=terminate_eval): + tf.logging.info('Starting Evaluation.') + try: + eval_results = estimator.evaluate(input_fn=input_fn, steps=None, checkpoint_path=ckpt, name=name) + tf.logging.info(('Eval results: %s' % eval_results)) + current_step = int(os.path.basename(ckpt).split('-')[1]) + if (current_step >= train_steps): + tf.logging.info(('Evaluation finished after training step %d' % current_step)) + break + except tf.errors.NotFoundError: + tf.logging.info(('Checkpoint %s no longer exists, skipping checkpoint' % ckpt)) + +def populate_experiment(run_config, hparams, pipeline_config_path, train_steps=None, eval_steps=None, model_fn_creator=create_model_fn, **kwargs): + 'Populates an `Experiment` object.\n\n EXPERIMENT CLASS IS DEPRECATED. Please switch to\n tf.estimator.train_and_evaluate. As an example, see model_main.py.\n\n Args:\n run_config: A `RunConfig`.\n hparams: A `HParams`.\n pipeline_config_path: A path to a pipeline config file.\n train_steps: Number of training steps. If None, the number of training steps\n is set from the `TrainConfig` proto.\n eval_steps: Number of evaluation steps per evaluation cycle. If None, the\n number of evaluation steps is set from the `EvalConfig` proto.\n model_fn_creator: A function that creates a `model_fn` for `Estimator`.\n Follows the signature:\n\n * Args:\n * `detection_model_fn`: Function that returns `DetectionModel` instance.\n * `configs`: Dictionary of pipeline config objects.\n * `hparams`: `HParams` object.\n * Returns:\n `model_fn` for `Estimator`.\n\n **kwargs: Additional keyword arguments for configuration override.\n\n Returns:\n An `Experiment` that defines all aspects of training, evaluation, and\n export.\n ' + tf.logging.warning('Experiment is being deprecated. Please use tf.estimator.train_and_evaluate(). See model_main.py for an example.') + train_and_eval_dict = create_estimator_and_inputs(run_config, hparams, pipeline_config_path, train_steps=train_steps, eval_steps=eval_steps, model_fn_creator=model_fn_creator, save_final_config=True, **kwargs) + estimator = train_and_eval_dict['estimator'] + train_input_fn = train_and_eval_dict['train_input_fn'] + eval_input_fns = train_and_eval_dict['eval_input_fns'] + predict_input_fn = train_and_eval_dict['predict_input_fn'] + train_steps = train_and_eval_dict['train_steps'] + export_strategies = [tf.contrib.learn.utils.saved_model_export_utils.make_export_strategy(serving_input_fn=predict_input_fn)] + return tf.contrib.learn.Experiment(estimator=estimator, train_input_fn=train_input_fn, eval_input_fn=eval_input_fns[0], train_steps=train_steps, eval_steps=None, export_strategies=export_strategies, eval_delay_secs=120) diff --git a/TensorFlow/built-in/cv/detection/SSD-Resnet50V1-FPN_ID1463_for_TensorFlow/models/research/object_detection/model_main_rt.py b/TensorFlow/built-in/cv/detection/SSD-Resnet50V1-FPN_ID1463_for_TensorFlow/models/research/object_detection/model_main_rt.py new file mode 100644 index 0000000000000000000000000000000000000000..46bcfb668728927670383d130dfa5d004904f09e --- /dev/null +++ b/TensorFlow/built-in/cv/detection/SSD-Resnet50V1-FPN_ID1463_for_TensorFlow/models/research/object_detection/model_main_rt.py @@ -0,0 +1,175 @@ +# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +'Binary to run train and evaluation on object detection model.' +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from npu_bridge.npu_init import * +from tensorflow.core.protobuf import config_pb2 +from absl import flags +import tensorflow as tf +#import horovod.tensorflow as hvd +import dllogger +import time +import os +from object_detection import model_hparams +from object_detection import model_lib_rt +from object_detection.utils.exp_utils import AverageMeter, setup_dllogger + +flags.DEFINE_string('model_dir', None, 'Path to output model directory where event and checkpoint files will be written.') +flags.DEFINE_string('pipeline_config_path', None, 'Path to pipeline config file.') +flags.DEFINE_string('raport_file', default='summary.json', help='Path to dlloger json') +flags.DEFINE_integer('num_train_steps', None, 'Number of train steps.') +flags.DEFINE_boolean('eval_training_data', False, 'If training data should be evaluated for this job. Note that one call only use this in eval-only mode, and `checkpoint_dir` must be supplied.') +flags.DEFINE_integer('sample_1_of_n_eval_examples', 1, 'Will sample one of every n eval input examples, where n is provided.') +flags.DEFINE_integer('sample_1_of_n_eval_on_train_examples', 5, 'Will sample one of every n train input examples for evaluation, where n is provided. This is only used if `eval_training_data` is True.') +flags.DEFINE_integer('eval_count', 1, 'How many times the evaluation should be run') +flags.DEFINE_string('hparams_overrides', None, 'Hyperparameter overrides, represented as a string containing comma-separated hparam_name=value pairs.') +flags.DEFINE_string('checkpoint_dir', None, 'Path to directory holding a checkpoint. If `checkpoint_dir` is provided, this binary operates in eval-only mode, writing resulting metrics to `model_dir`.') +flags.DEFINE_boolean('allow_xla', False, 'Enable XLA compilation') +flags.DEFINE_boolean('amp', False, 'Whether to enable AMP ops. When false, uses TF32 on A100 and FP32 on V100 GPUS.') +flags.DEFINE_boolean('run_once', False, 'If running in eval-only mode, whether to run just one round of eval vs running continuously (default).') +############################NPU_modify add######################################## +flags.DEFINE_boolean('overflow_dump', False, 'Enable overflow op detection') +flags.DEFINE_string('overflow_dump_path', None, 'Path to directory dump overflow ops data.') +flags.DEFINE_boolean('check_loss_scale', False, 'check whether loss scale is valid') +flags.DEFINE_boolean('step_dump', False, 'Enable dump step data, can only set when overflow_dump is not set') +flags.DEFINE_string('step_dump_path', None, 'Path to directory dump step0 ops data.') +flags.DEFINE_boolean('skip_eval', False, 'Whether to skip eval') +############################NPU_modify end######################################## +FLAGS = flags.FLAGS + + +class DLLoggerHook(tf.estimator.SessionRunHook): + + def __init__(self, global_batch_size, rank=(- 1)): + self.global_batch_size = global_batch_size + self.rank = rank + setup_dllogger(enabled=True, filename=FLAGS.raport_file, rank=rank) + + def after_create_session(self, session, coord): + self.meters = {} + warmup = 100 + self.meters['train_throughput'] = AverageMeter(warmup=warmup) + + def before_run(self, run_context): + self.t0 = time.time() + return tf.estimator.SessionRunArgs(fetches=['global_step:0', 'learning_rate:0']) + + def after_run(self, run_context, run_values): + throughput = (self.global_batch_size / (time.time() - self.t0)) + (global_step, lr) = run_values.results + self.meters['train_throughput'].update(throughput) + + def end(self, session): + summary = {'train_throughput': self.meters['train_throughput'].avg} + dllogger.log(step=tuple(), data=summary) + +###############################NPU_modify add##################################### +class _LogSessionRunHook(tf.train.SessionRunHook): + def before_run(self, run_context): + return tf.estimator.SessionRunArgs(fetches=['overflow_status_reduce_all:0', 'loss_scale:0']) + + def after_run(self, run_context, run_values): + if not run_values.results[0]: + print('Find overflow in this step, skip apply gradients, loss scale value=%d' % run_values.results[1],flush=True) + else: + print('Apply gradients, loss scale value=%d' % run_values.results[1],flush=True) +###############################NPU_modify end##################################### +def main(unused_argv): + tf.logging.set_verbosity(tf.logging.INFO) + #tf的混合精度 + if FLAGS.amp: + os.environ['TF_ENABLE_AUTO_MIXED_PRECISION'] = '1' + else: + os.environ['TF_ENABLE_AUTO_MIXED_PRECISION'] = '0' + flags.mark_flag_as_required('model_dir') + flags.mark_flag_as_required('pipeline_config_path') + if True: + session_config = npu_config_proto(config_proto=tf.ConfigProto()) + + session_config.gpu_options.per_process_gpu_memory_fraction = 0.9 + session_config.gpu_options.visible_device_list = str(get_npu_local_rank_id()) + if FLAGS.allow_xla: + if True: + session_config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1 + model_dir = (FLAGS.model_dir if (get_npu_rank_id() == 0) else None) + config = tf.estimator.RunConfig(model_dir=model_dir, session_config=session_config) + + train_and_eval_dict = model_lib.create_estimator_and_inputs(run_config=config, eval_count=FLAGS.eval_count, hparams=model_hparams.create_hparams(FLAGS.hparams_overrides), pipeline_config_path=FLAGS.pipeline_config_path, train_steps=FLAGS.num_train_steps, sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples, sample_1_of_n_eval_on_train_examples=FLAGS.sample_1_of_n_eval_on_train_examples) + estimator = train_and_eval_dict['estimator'] + train_input_fn = train_and_eval_dict['train_input_fn'] + eval_input_fns = train_and_eval_dict['eval_input_fns'] + eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn'] + predict_input_fn = train_and_eval_dict['predict_input_fn'] + train_steps = train_and_eval_dict['train_steps'] + if FLAGS.checkpoint_dir: + if FLAGS.eval_training_data: + name = 'training_data' + input_fn = eval_on_train_input_fn + else: + name = 'validation_data' + input_fn = eval_input_fns[0] + #if FLAGS.run_once: + # estimator.evaluate(input_fn, steps=None, checkpoint_path=tf.train.latest_checkpoint(FLAGS.checkpoint_dir)) + #else: + # model_lib.continuous_eval(estimator, FLAGS.checkpoint_dir, input_fn, train_steps, name) + else: + (train_spec, eval_specs) = model_lib.create_train_and_eval_specs(train_input_fn, eval_input_fns, eval_on_train_input_fn, predict_input_fn, train_steps, eval_on_train_data=False) + ##################################NPU_modify add################################### + if FLAGS.check_loss_scale: + train_hooks = [NpuEmptyHook(), DLLoggerHook((get_rank_size() * train_and_eval_dict['train_batch_size']), get_npu_rank_id()),_LogSessionRunHook()] + else: + train_hooks = [NpuEmptyHook(), DLLoggerHook((get_rank_size() * train_and_eval_dict['train_batch_size']), get_npu_rank_id())] + #train_hooks = [NpuEmptyHook(), DLLoggerHook((get_rank_size() * train_and_eval_dict['train_batch_size']), get_rank_id())] + ##################################NPU_modify end################################### + eval_hooks = [] + for x in range(FLAGS.eval_count): + estimator.train(train_input_fn, hooks=npu_hooks_append(hooks_list=train_hooks), steps=(train_steps // FLAGS.eval_count)) + if (get_npu_rank_id() == 0): + eval_input_fn = eval_input_fns[0] + #eval阻塞,临时规避 + if FLAGS.skip_eval: + print("[debug]skip eval.") + else: + print("[debug]enter eval process ...") + results = estimator.evaluate(eval_input_fn, steps=None, hooks=eval_hooks) + +if (__name__ == '__main__'): + session_config = tf.ConfigProto() + custom_op = session_config.graph_options.rewrite_options.custom_optimizers.add() + custom_op.name = "NpuOptimizer" + custom_op.parameter_map["precision_mode"].s = tf.compat.as_bytes("allow_mix_precision") + # custom_op.parameter_map["mix_compile_mode"].b = True + custom_op.parameter_map["modify_mixlist"].s = tf.compat.as_bytes("../../configs/ops_info.json") + (npu_sess, npu_shutdown) = init_resource(config=session_config) + tf.app.run() + shutdown_resource(npu_sess, npu_shutdown) + close_session(npu_sess) diff --git a/TensorFlow/built-in/cv/detection/SSD-Resnet50V1-FPN_ID1463_for_TensorFlow/test/train_RT2_performance_1p_inceptionv2.sh b/TensorFlow/built-in/cv/detection/SSD-Resnet50V1-FPN_ID1463_for_TensorFlow/test/train_RT2_performance_1p_inceptionv2.sh new file mode 100644 index 0000000000000000000000000000000000000000..b70f401d407481b1c238f4afbd3efe7faf6688ce --- /dev/null +++ b/TensorFlow/built-in/cv/detection/SSD-Resnet50V1-FPN_ID1463_for_TensorFlow/test/train_RT2_performance_1p_inceptionv2.sh @@ -0,0 +1,142 @@ +#!/bin/bash +cur_path=`pwd` +export PYTHONPATH=$cur_path/../models/research:$cur_path/../models/research/slim:$PYTHONPATH + +#集合通信 +export RANK_SIZE=1 +export RANK_TABLE_FILE=$cur_path/../configs/${RANK_SIZE}p_${ASCEND_DEVICE_ID}.json +export JOB_ID=10087 +RANK_ID_START=0 + +#使能RT2.0 +export ENABLE_RUNTIME_V2=1 + +#数据集参数 +data_path="/data" +use_conda=0 + +#训练参数,需要根据模型修改 +Network="SSD-InceptionV2_ID0510_for_TensorFlow" +num_train_steps=300 +batch_size=24 +ckpt_path=/checkpoints +pipeline_config=$cur_path/../models/research/configs/ssd_inception_v2_coco_1p.config + + +#帮助提示,需要根据网络修改 +if [[ $1 == --help || $1 == -h ]];then + echo "usage: ./train_performance_1p.sh " + + echo "" + echo "parameter explain: + --num_train_steps training steps + --data_path source data of training + --ckpt_path pre-checkpoint path + --pipeline_config pipeline config path + --skip_eval whether to skip eval + -h/--help Show help message + " + exit 1 +fi + +#入参设置,需要根据网络修改 +for para in $* +do + if [[ $para == --num_train_steps* ]];then + num_train_steps=`echo ${para#*=}` + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --ckpt_path* ]];then + ckpt_path=`echo ${para#*=}` + elif [[ $para == --pipeline_config* ]];then + pipeline_config=`echo ${para#*=}` + elif [[ $para == --use_conda* ]];then + use_conda=`echo ${para#*=}` + elif [[ $para == --skip_eval* ]];then + skip_eval=`echo ${para#*=}` + fi +done + +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be config" + exit 1 +fi + + + +##########################执行训练######################### +start_time=$(date +%s) +cd $cur_path/../models/research +if [ -f ${pipeline_config}.bak ];then + cp ${pipeline_config}.bak ${pipeline_config} +else + cp ${pipeline_config} ${pipeline_config}.bak +fi + +sed -i "s%/checkpoints%${ckpt_path}%p" ${pipeline_config} +sed -i "s%/data/coco2017_tfrecords%${data_path}/coco2017_tfrecords%p" ${pipeline_config} + +for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); + do + echo "Device ID: $ASCEND_DEVICE_ID" + export RANK_ID=$RANK_ID + if [ -d $cur_path/output/${ASCEND_DEVICE_ID} ];then + rm -rf $cur_path/output/${ASCEND_DEVICE_ID} + mkdir -p $cur_path/output/${ASCEND_DEVICE_ID} + else + mkdir -p $cur_path/output/${ASCEND_DEVICE_ID} + fi + +#训练执行脚本,需要根据网络修改 + nohup python3 -u ./object_detection/model_main_rt.py \ + --pipeline_config_path=${pipeline_config} \ + --model_dir=$cur_path/output/${ASCEND_DEVICE_ID} \ + --data_path=${data_path} \ + --overflow_dump_path=${overflow_dump_path} \ + --step_dump_path=${step_dump_path} \ + --alsologtostder \ + --amp \ + --num_train_steps=${num_train_steps} \ + --skip_eval=True \ + "${@:1}" > $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & +done +wait + +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +##########################业务日志######################### +grep ERROR $HOME/ascend/log/plog/*.log > $cur_path/output/$ASCEND_DEVICE_ID/plog_err.log + +###########################性能结果处理######################### +echo "-----------------------Final result------------------------" +#性能FPS计算,需要根据网络修改 +#FPS=`grep -a 'INFO:tensorflow:global_step/sec: ' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $2}'` +FPS=`grep -a 'INFO:tensorflow:global_step/sec: ' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'NR==2{print $2}'` + +FPS=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'*'${RANK_SIZE}'*'${FPS}'}'` +echo "Final Performance images/sec : $FPS" + +################################E2E训练时长########################## +echo "Final Training Duration sec : $e2e_time" + +################################性能看护############################# +DeviceType=`uname -m` +CaseName=${Network}_bs${batch_size}_${RANK_SIZE}'p'_'RT2'_'perf' +ActualFPS=${FPS} +TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型修改 +grep INFO:tensorflow:loss $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk '{print $3}'|sed 's/,//g'|sed '/^$/d' >> $cur_path/output/${ASCEND_DEVICE_ID}/train_${CaseName}_loss.txt + +ActualLoss=`awk 'END {print}' $cur_path/output/${ASCEND_DEVICE_ID}/train_${CaseName}_loss.txt` +echo "Network = ${Network}" > $cur_path/output/${ASCEND_DEVICE_ID}/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/${ASCEND_DEVICE_ID}/${CaseName}.log +echo "BatchSize = ${batch_size}" >> $cur_path/output/${ASCEND_DEVICE_ID}/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/${ASCEND_DEVICE_ID}/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/${ASCEND_DEVICE_ID}/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/${ASCEND_DEVICE_ID}/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/${ASCEND_DEVICE_ID}/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/${ASCEND_DEVICE_ID}/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/${ASCEND_DEVICE_ID}/${CaseName}.log + diff --git a/TensorFlow/built-in/cv/detection/SSD-Resnet50V1-FPN_ID1463_for_TensorFlow/test/train_RT2_performance_1p_mobilenetv2.sh b/TensorFlow/built-in/cv/detection/SSD-Resnet50V1-FPN_ID1463_for_TensorFlow/test/train_RT2_performance_1p_mobilenetv2.sh new file mode 100644 index 0000000000000000000000000000000000000000..b7251d32828716906667d7b9f564079d1fe45d50 --- /dev/null +++ b/TensorFlow/built-in/cv/detection/SSD-Resnet50V1-FPN_ID1463_for_TensorFlow/test/train_RT2_performance_1p_mobilenetv2.sh @@ -0,0 +1,173 @@ +#!bin/bash +cur_path=`pwd` + +#环境设置,需要根据网络修改 +export PYTHONPATH=$cur_path/../models/research:$cur_path/../models/research/slim:$PYTHONPATH + +#集合通信 +export RANK_SIZE=1 +export RANK_TABLE_FILE=$cur_path/../configs/${RANK_SIZE}p_${ASCEND_DEVICE_ID}.json +export JOB_ID=10087 +RANK_ID_START=0 + +#使能RT2.0 +export ENABLE_RUNTIME_V2=1 + +#数据集参数 +data_path="/data" +use_conda=0 + +#训练参数,需要根据模型修改 +Network="SSD-MobilenetV2_ID0499_for_TensorFlow" +num_train_steps=1000 +batch_size=24 +ckpt_path=/checkpoints +pipeline_config=$cur_path/../models/research/configs/ssd_mobilenet_v2_coco_1p.config + +#维测参数 +overflow_dump=False +overflow_dump_path=$cur_path/output/overflow_dump +step_dump=False +step_dump_path=$cur_path/output/step_dump +check_loss_scale=Flase + +#帮助提示,需要根据网络修改 +if [[ $1 == --help || $1 == -h ]];then + echo "usage: ./train_performance_1p.sh " + echo "" + echo "parameter explain: + --num_train_steps training steps + --data_path source data of training + --ckpt_path pre-checkpoint path + --pipeline_config pipeline config path + --overflow_dump overflow detection,default is False + --overflow_dump_path overflow dump path + --check_loss_scale check whether loss scale is valid, default is False + --step_dump Dump step data, default is False, can only set when overflow_dump is False + --step_dump_path step_dump_path + -h/--help Show help message + " + exit 1 +fi + +#入参设置,需要根据网络修改 +for para in $* +do + if [[ $para == --num_train_steps* ]];then + num_train_steps=`echo ${para#*=}` + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --ckpt_path* ]];then + ckpt_path=`echo ${para#*=}` + elif [[ $para == --pipeline_config* ]];then + pipeline_config=`echo ${para#*=}` + elif [[ $para == --overflow_dump* ]];then + overflow_dump=`echo ${para#*=}` + if [ -d ${overflow_dump_path} ];then + echo "overflow dump path: ${overflow_dump_path}" + else + mkdir -p ${overflow_dump_path} + fi + elif [[ $para == --check_loss_scale* ]];then + check_loss_scale=`echo ${para#*=}` + elif [[ $para == --step_dump* ]];then + step_dump=`echo ${para#*=}` + if [ -d ${step_dump_path} ];then + echo "step dump path: ${step_dump_path}" + else + mkdir -p ${step_dump_path} + fi + elif [[ $para == --use_conda* ]];then + use_conda=`echo ${para#*=}` + fi +done + +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be config" + exit 1 +fi + +##########################执行训练######################### +start_time=$(date +%s) +cd $cur_path/../models/research +if [ -f ${pipeline_config}.bak ];then + cp ${pipeline_config}.bak ${pipeline_config} +else + cp ${pipeline_config} ${pipeline_config}.bak +fi + +# 更改参数 +sed -i "s%/data/coco2017_tfrecords%${data_path}/coco2017_tfrecords%p" ${pipeline_config} + +for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); + do + echo "Device ID: $ASCEND_DEVICE_ID" + export RANK_ID=$RANK_ID + if [ -d $cur_path/output/${ASCEND_DEVICE_ID} ];then + rm -rf $cur_path/output/${ASCEND_DEVICE_ID} + mkdir -p $cur_path/output/${ASCEND_DEVICE_ID} + else + mkdir -p $cur_path/output/${ASCEND_DEVICE_ID} + fi + + #训练执行脚本,需要根据网络修改 + nohup python3 -u ./object_detection/model_main_rt.py \ + --pipeline_config_path=${pipeline_config} \ + --model_dir=$cur_path/output/${ASCEND_DEVICE_ID}/npu_ckpt_mobilenetv2_${RANK_SIZE}p\ + --data_path=${data_path} \ + --overflow_dump_path=${overflow_dump_path} \ + --step_dump_path=${step_dump_path} \ + --alsologtostder \ + --amp \ + --num_train_steps=${num_train_steps} \ + --skip_eval=True \ + "${@:1}" > $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & +done +wait + +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) +echo "Final Training Duration sec : $e2e_time" + +# 参数回改 +sed -i "s%${data_path}/coco2017_tfrecords%/data/coco2017_tfrecords%p" ${pipeline_config} + + +################################性能结果处理######################### +echo "-----------------------Final result------------------------" +# 性能FPS计算,需要根据网络修改 +step_sec=`grep -a 'INFO:tensorflow:global_step/sec: ' $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk '{print $2}'|tail -2|head -n 1` +FPS=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'*'${step_sec}'}'` +echo "Final Performance images/sec : ${FPS}" + +#################################精度结果处理######################### +# 精度计算,需要根据网络修改 +train_accuracy=`grep Precision $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk 'NR==1 {print $13}'` +echo "Final Training Accuracy mAP: ${train_accuracy}" + +#################################性能看护############################# +# 训练用例信息,不需要修改 +DeviceType=`uname -m` +BatchSize=${batch_size} +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'RT2'_'perf' +ActualFPS=${FPS} +TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'*1000/'${FPS}'}'` + +#################################Loss######################### +# 从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型修改 +grep INFO:tensorflow:loss $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk '{print $3}'|sed 's/,//g'|sed '/^$/d' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt +ActualLoss=`awk 'END {print}' train_loss.txt` +# eval版本需求开发中,精度结果临时看护最终的loss +echo "Final Training Accuracy loss: ${ActualLoss}" + +# 关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log diff --git a/TensorFlow/built-in/cv/detection/SSD-Resnet50V1-FPN_ID1463_for_TensorFlow/test/train_RT2_performance_8p_inceptionv2.sh b/TensorFlow/built-in/cv/detection/SSD-Resnet50V1-FPN_ID1463_for_TensorFlow/test/train_RT2_performance_8p_inceptionv2.sh new file mode 100644 index 0000000000000000000000000000000000000000..69611f27b45cf91737c97f8ee77f688665c9dafa --- /dev/null +++ b/TensorFlow/built-in/cv/detection/SSD-Resnet50V1-FPN_ID1463_for_TensorFlow/test/train_RT2_performance_8p_inceptionv2.sh @@ -0,0 +1,145 @@ +#!/bin/bash +cur_path=`pwd` +export PYTHONPATH=$cur_path/../models/research:$cur_path/../models/research/slim:$PYTHONPATH +export HCCL_CONNECT_TIMEOUT=200 +#集合通信 +export RANK_SIZE=8 +export RANK_TABLE_FILE=$cur_path/../configs/${RANK_SIZE}p.json +export JOB_ID=10087 +RANK_ID_START=0 +ASCEND_DEVICE_ID_START=0 + +#使能RT2.0 +export ENABLE_RUNTIME_V2=1 + +#数据集参数 +data_path="/data" +use_conda=0 + +#训练参数,需要根据模型修改 +Network="SSD-InceptionV2_ID0510_for_TensorFlow" +num_train_steps=300 +batch_size=24 +ckpt_path=/checkpoints +pipeline_config=$cur_path/../models/research/configs/ssd_inception_v2_coco_8p.config + +#帮助提示,需要根据网络修改 +if [[ $1 == --help || $1 == -h ]];then + echo "usage: ./train_performance_8p_inceptionv2.sh " + + echo "" + echo "parameter explain: + --num_train_steps training steps + --data_path source data of training + --ckpt_path pre-checkpoint path + --pipeline_config pipeline config path + --skip_eval whether to skip eval + -h/--help Show help message + " + exit 1 +fi + +#入参设置,需要根据网络修改 +for para in $* +do + if [[ $para == --num_train_steps* ]];then + num_train_steps=`echo ${para#*=}` + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --ckpt_path* ]];then + ckpt_path=`echo ${para#*=}` + elif [[ $para == --pipeline_config* ]];then + pipeline_config=`echo ${para#*=}` + elif [[ $para == --use_conda* ]];then + use_conda=`echo ${para#*=}` + elif [[ $para == --skip_eval* ]];then + skip_eval=`echo ${para#*=}` + fi +done + +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be config" + exit 1 +fi + + +##########################执行训练######################### +start_time=$(date +%s) +cd $cur_path/../models/research +if [ -f ${pipeline_config}.bak ];then + cp ${pipeline_config}.bak ${pipeline_config} +else + cp ${pipeline_config} ${pipeline_config}.bak +fi + +sed -i "s%/checkpoints%${ckpt_path}%p" ${pipeline_config} +sed -i "s%/data/coco2017_tfrecords%${data_path}/coco2017_tfrecords%p" ${pipeline_config} + +for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); + do + export RANK_ID=$RANK_ID + export ASCEND_DEVICE_ID=$((ASCEND_DEVICE_ID_START+RANK_ID)) + echo "Device ID: $ASCEND_DEVICE_ID" + if [ -d $cur_path/output/${ASCEND_DEVICE_ID} ];then + rm -rf $cur_path/output/${ASCEND_DEVICE_ID} + mkdir -p $cur_path/output/${ASCEND_DEVICE_ID} + else + mkdir -p $cur_path/output/${ASCEND_DEVICE_ID} + fi + +#训练执行脚本,需要根据网络修改 + nohup python3 -u ./object_detection/model_main_rt.py \ + --pipeline_config_path=${pipeline_config} \ + --model_dir=$cur_path/output/${ASCEND_DEVICE_ID} \ + --data_path=${data_path} \ + --alsologtostder \ + --amp \ + --num_train_steps=${num_train_steps} \ + --skip_eval=True \ + "${@:1}" > $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & +done +wait + +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) +ASCEND_DEVICE_ID=0 + + +##########################业务日志######################### +grep ERROR $HOME/ascend/log/plog/*.log > $cur_path/output/${ASCEND_DEVICE_ID}/plog_err.log + +################################性能结果处理######################### +echo "-----------------------Final result------------------------" +#性能FPS计算,需要根据网络修改 +#FPS=`grep -a 'INFO:tensorflow:global_step/sec: ' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $2}'` +FPS=`grep -a 'INFO:tensorflow:global_step/sec: ' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'NR==2{print $2}'` +FPS=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'*'${RANK_SIZE}'*'${FPS}'}'` +echo "Final Performance images/sec : $FPS" +################################精度结果处理######################### +#精度计算,需要根据网络修改 +train_accuracy=`grep Precision $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|grep Average |awk 'NR==1 {print $13}'` + +#echo 'Final Training Accuracy mAP: $train_accuracy' +################################E2E训练时长########################## +echo "Final Training Duration sec : $e2e_time" + +################################性能看护############################# +DeviceType=`uname -m` +CaseName=${Network}_bs${batch_size}_${RANK_SIZE}'p'_'RT2'_'perf' +ActualFPS=${FPS} +TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型修改 +grep INFO:tensorflow:loss $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk '{print $3}'|sed 's/,//g'|sed '/^$/d' >> $cur_path/output/${ASCEND_DEVICE_ID}/train_${CaseName}_loss.txt + +ActualLoss=`awk 'END {print}' $cur_path/output/${ASCEND_DEVICE_ID}/train_${CaseName}_loss.txt` +echo "Network = ${Network}" > $cur_path/output/${ASCEND_DEVICE_ID}/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/${ASCEND_DEVICE_ID}/${CaseName}.log +echo "BatchSize = ${batch_size}" >> $cur_path/output/${ASCEND_DEVICE_ID}/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/${ASCEND_DEVICE_ID}/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/${ASCEND_DEVICE_ID}/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/${ASCEND_DEVICE_ID}/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/${ASCEND_DEVICE_ID}/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/${ASCEND_DEVICE_ID}/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/${ASCEND_DEVICE_ID}/${CaseName}.log + diff --git a/TensorFlow/built-in/cv/detection/SSD-Resnet50V1-FPN_ID1463_for_TensorFlow/test/train_RT2_performance_8p_mobilenetv2.sh b/TensorFlow/built-in/cv/detection/SSD-Resnet50V1-FPN_ID1463_for_TensorFlow/test/train_RT2_performance_8p_mobilenetv2.sh new file mode 100644 index 0000000000000000000000000000000000000000..03dbc43bb9f47cda50606fdd222709a77a5e9251 --- /dev/null +++ b/TensorFlow/built-in/cv/detection/SSD-Resnet50V1-FPN_ID1463_for_TensorFlow/test/train_RT2_performance_8p_mobilenetv2.sh @@ -0,0 +1,149 @@ +#!bin/bash +cur_path=`pwd` + +#环境设置,需要根据网络修改 +export PYTHONPATH=$cur_path/../models/research:$cur_path/../models/research/slim:$PYTHONPATH +export HCCL_CONNECT_TIMEOUT=200 +#集合通信 +export RANK_SIZE=8 +export RANK_TABLE_FILE=$cur_path/../configs/${RANK_SIZE}p.json +export JOB_ID=10087 +RANK_ID_START=0 +ASCEND_DEVICE_ID_START=0 + +#使能RT2.0 +export ENABLE_RUNTIME_V2=1 + +#数据集参数 +data_path="/data" +use_conda=0 + +#训练参数,需要根据模型修改 +Network="SSD-MobilenetV2_ID0499_for_TensorFlow" +num_train_steps=1000 +batch_size=24 +ckpt_path=/checkpoints +pipeline_config=$cur_path/../models/research/configs/ssd_mobilenet_v2_coco_8p.config + +#帮助提示,需要根据网络修改 +if [[ $1 == --help || $1 == -h ]];then + echo "usage: ./train_performance_8p.sh " + echo "" + echo "parameter explain: + --num_train_steps training steps + --data_path source data of training + --ckpt_path pre-checkpoint path + --pipeline_config pipeline config path + --skip_eval whether to skip eval + -h/--help Show help message + " + exit 1 +fi + +#入参设置,需要根据网络修改 +for para in $* +do + if [[ $para == --num_train_steps* ]];then + num_train_steps=`echo ${para#*=}` + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --ckpt_path* ]];then + ckpt_path=`echo ${para#*=}` + elif [[ $para == --pipeline_config* ]];then + pipeline_config=`echo ${para#*=}` + elif [[ $para == --use_conda* ]];then + use_conda=`echo ${para#*=}` + elif [[ $para == --skip_eval* ]];then + skip_eval=`echo ${para#*=}` + fi +done + +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be config" + exit 1 +fi + +##########################执行训练######################### +start_time=$(date +%s) +cd $cur_path/../models/research +if [ -f ${pipeline_config}.bak ];then + cp ${pipeline_config}.bak ${pipeline_config} +else + cp ${pipeline_config} ${pipeline_config}.bak +fi + +# 更改参数 +sed -i "s%/data/coco2017_tfrecords%${data_path}/coco2017_tfrecords%p" ${pipeline_config} + +for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); + do + export RANK_ID=$RANK_ID + export ASCEND_DEVICE_ID=$((ASCEND_DEVICE_ID_START+RANK_ID)) + echo "Device ID: $ASCEND_DEVICE_ID" + if [ -d $cur_path/output/${ASCEND_DEVICE_ID} ];then + rm -rf $cur_path/output/${ASCEND_DEVICE_ID} + mkdir -p $cur_path/output/${ASCEND_DEVICE_ID} + else + mkdir -p $cur_path/output/${ASCEND_DEVICE_ID} + fi + + #训练执行脚本,需要根据网络修改 + nohup python3 -u ./object_detection/model_main_rt.py \ + --pipeline_config_path=${pipeline_config} \ + --model_dir=$cur_path/output/${ASCEND_DEVICE_ID}/npu_ckpt_mobilenetv2_${RANK_SIZE}p\ + --data_path=${data_path} \ + --alsologtostder \ + --amp \ + --skip_eval=True \ + --num_train_steps=${num_train_steps} \ + "${@:1}" > $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & +done +wait + +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) +echo "Final Training Duration sec : $e2e_time" +ASCEND_DEVICE_ID=0 + +# 参数回改 +sed -i "s%${data_path}/coco2017_tfrecords%/data/coco2017_tfrecords%p" ${pipeline_config} + + +################################性能结果处理######################### +echo "-----------------------Final result------------------------" +# 性能FPS计算,需要根据网络修改 +step_sec=`grep -a 'INFO:tensorflow:global_step/sec: ' $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk '{print $2}'|tail -2|head -n 1` +FPS=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'*'${step_sec}'}'` +echo "Final Performance images/sec : ${FPS}" + +#################################精度结果处理######################### +# 精度计算,需要根据网络修改 +train_accuracy=`grep Precision $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk 'NR==1 {print $13}'` +echo "Final Training Accuracy mAP: ${train_accuracy}" + +#################################性能看护############################# +# 训练用例信息,不需要修改 +DeviceType=`uname -m` +BatchSize=${batch_size} +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'RT2'_'perf' +ActualFPS=${FPS} +TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'*1000/'${FPS}'}'` + +#################################Loss######################### +ASCEND_DEVICE_ID=7 +# 从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型修改 +grep INFO:tensorflow:loss $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk '{print $3}'|sed 's/,//g'|sed '/^$/d' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt +ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` +echo "Final Training Accuracy loss: ${ActualLoss}" + +# 关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log diff --git a/TensorFlow/built-in/nlp/CNN-CTC_ID0683_for_TensorFlow/cnn_lstm_otc_ocr_rt.py b/TensorFlow/built-in/nlp/CNN-CTC_ID0683_for_TensorFlow/cnn_lstm_otc_ocr_rt.py new file mode 100644 index 0000000000000000000000000000000000000000..6207ae2dd761573bcfa750621ebd50b1c7ff7fd2 --- /dev/null +++ b/TensorFlow/built-in/nlp/CNN-CTC_ID0683_for_TensorFlow/cnn_lstm_otc_ocr_rt.py @@ -0,0 +1,242 @@ +# +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +""" + +""" +from npu_bridge.npu_init import * + +import tensorflow as tf +import utils +from tensorflow.python.framework import dtypes +from npu_bridge.estimator.npu.npu_dynamic_rnn import DynamicRNN + +FLAGS = utils.FLAGS +num_classes = utils.num_classes + + +class LSTMOCR(object): + def __init__(self, mode): + self.mode = mode + # image + self.inputs = tf.placeholder(tf.float32, [None, FLAGS.image_height, FLAGS.image_width, FLAGS.image_channel]) + # SparseTensor required by ctc_loss op + self.labels = tf.sparse_placeholder(tf.int32) + # 1d array of size [batch_size] + # self.seq_len = tf.placeholder(tf.int32, [None]) + # l2 + self._extra_train_ops = [] + + def build_graph(self): + self._build_model() + self._build_train_op() + + self.merged_summay = tf.summary.merge_all() + + def _build_model(self): + filters = [1, 64, 128, 128, FLAGS.out_channels] + strides = [1, 2] + + feature_h = FLAGS.image_height + feature_w = FLAGS.image_width + + count_ = 0 + min_size = min(FLAGS.image_height, FLAGS.image_width) + while min_size > 1: + min_size = (min_size + 1) // 2 + count_ += 1 + assert (FLAGS.cnn_count <= count_, "FLAGS.cnn_count should be <= {}!".format(count_)) + + # CNN part + with tf.variable_scope('cnn'): + x = self.inputs + for i in range(FLAGS.cnn_count): + with tf.variable_scope('unit-%d' % (i + 1)): + x = self._conv2d(x, 'cnn-%d' % (i + 1), 3, filters[i], filters[i + 1], strides[0]) + x = self._batch_norm('bn%d' % (i + 1), x) + x = self._leaky_relu(x, FLAGS.leakiness) + x = self._max_pool(x, 2, strides[1]) + + # print('----x.get_shape().as_list(): {}'.format(x.get_shape().as_list())) + _, feature_h, feature_w, _ = x.get_shape().as_list() + print('\nfeature_h: {}, feature_w: {}'.format(feature_h, feature_w)) + + # LSTM part + with tf.variable_scope('lstm'): + x = tf.transpose(x, [0, 2, 1, 3]) # [batch_size, feature_w, feature_h, FLAGS.out_channels] + # treat `feature_w` as max_timestep in lstm. + x = tf.reshape(x, [FLAGS.batch_size, feature_w, feature_h * FLAGS.out_channels]) + print('lstm input shape: {}'.format(x.get_shape().as_list())) + self.seq_len = tf.fill([x.get_shape().as_list()[0]], feature_w) + # print('self.seq_len.shape: {}'.format(self.seq_len.shape.as_list())) + + # tf.nn.rnn_cell.RNNCell, tf.nn.rnn_cell.GRUCell + ''' + cell = tf.nn.rnn_cell.LSTMCell(FLAGS.num_hidden, state_is_tuple=True) + if self.mode == 'train': + cell = tf.nn.rnn_cell.DropoutWrapper(cell=cell, output_keep_prob=FLAGS.output_keep_prob) + + cell1 = tf.nn.rnn_cell.LSTMCell(FLAGS.num_hidden, state_is_tuple=True) + if self.mode == 'train': + cell1 = tf.nn.rnn_cell.DropoutWrapper(cell=cell1, output_keep_prob=FLAGS.output_keep_prob) + + # Stacking rnn cells + stack = tf.nn.rnn_cell.MultiRNNCell([cell, cell1], state_is_tuple=True) + initial_state = stack.zero_state(FLAGS.batch_size, dtype=tf.float32) + + # The second output is the last state and we will not use that + outputs, _ = tf.nn.dynamic_rnn( + cell=stack, + inputs=x, + sequence_length=self.seq_len, + initial_state=initial_state, + dtype=tf.float32, + time_major=False + ) # [batch_size, max_stepsize, FLAGS.num_hidden] + ''' + # replace lstm compose above + inputdata = tf.transpose(x, [1, 0, 2]) + fw_cell1 = DynamicRNN(FLAGS.num_hidden, dtypes.float32, time_major=True, forget_bias=1.0) + fw_cell2 = DynamicRNN(FLAGS.num_hidden, dtypes.float32, time_major=True, forget_bias=1.0) + y, _, _, _, _, _, _, _ = fw_cell1(inputdata) + if self.mode == 'train': + y = npu_ops.dropout(y, 0.8) + outputs, _, _, _, _, _, _, _ = fw_cell2(y) + if self.mode == 'train': + outputs = npu_ops.dropout(outputs, 0.8) + outputs = tf.transpose(outputs, [1, 0, 2]) + + # Reshaping to apply the same weights over the timesteps + outputs = tf.reshape(outputs, [-1, FLAGS.num_hidden]) # [batch_size * max_stepsize, FLAGS.num_hidden] + + W = tf.get_variable(name='W_out', + shape=[FLAGS.num_hidden, num_classes], + dtype=tf.float32, + initializer=tf.glorot_uniform_initializer()) # tf.glorot_normal_initializer + b = tf.get_variable(name='b_out', + shape=[num_classes], + dtype=tf.float32, + initializer=tf.constant_initializer()) + + self.logits = tf.matmul(outputs, W) + b + # Reshaping back to the original shape + shape = tf.shape(x) + self.logits = tf.reshape(self.logits, [shape[0], -1, num_classes]) + # Time major + self.logits = tf.transpose(self.logits, (1, 0, 2)) + + def _build_train_op(self): + # self.global_step = tf.Variable(0, trainable=False) + self.global_step = tf.train.get_or_create_global_step() + + self.loss = tf.nn.ctc_loss(labels=self.labels, + inputs=self.logits, + sequence_length=self.seq_len) + ####NPU modify begin#### + # self.loss = util.set_graph_exec_config(self.loss, True, "dynamic_execute", "data:[128~128,60~60,180~180,1~1],[640~896],[640~896,2]") + ####NPU modify end#### + self.cost = tf.reduce_mean(self.loss) + tf.summary.scalar('cost', self.cost) + + self.lrn_rate = tf.train.exponential_decay(FLAGS.initial_learning_rate, + self.global_step, + FLAGS.decay_steps, + FLAGS.decay_rate, + staircase=True) + tf.summary.scalar('learning_rate', self.lrn_rate) + + # self.optimizer = tf.train.RMSPropOptimizer(learning_rate=self.lrn_rate, + # momentum=FLAGS.momentum).minimize(self.cost, + # global_step=self.global_step) + # self.optimizer = tf.train.MomentumOptimizer(learning_rate=self.lrn_rate, + # momentum=FLAGS.momentum, + # use_nesterov=True).minimize(self.cost, + # global_step=self.global_step) + self.optimizer = npu_tf_optimizer(tf.train.AdamOptimizer(learning_rate=self.lrn_rate, + beta1=FLAGS.beta1, + beta2=FLAGS.beta2)).minimize(self.loss, + global_step=self.global_step) + train_ops = [self.optimizer] + self._extra_train_ops + self.train_op = tf.group(*train_ops) + + # Option 2: tf.nn.ctc_beam_search_decoder + # (it's slower but you'll get better results) + self.decoded, self.log_prob = \ + tf.nn.ctc_beam_search_decoder(self.logits, + self.seq_len, + merge_repeated=False) + # self.decoded, self.log_prob = tf.nn.ctc_greedy_decoder(logits, seq_len,merge_repeated=False) + self.dense_decoded = tf.sparse_tensor_to_dense(self.decoded[0], default_value=-1) + + def _conv2d(self, x, name, filter_size, in_channels, out_channels, strides): + with tf.variable_scope(name): + kernel = tf.get_variable(name='W', + shape=[filter_size, filter_size, in_channels, out_channels], + dtype=tf.float32, + initializer=tf.glorot_uniform_initializer()) # tf.glorot_normal_initializer + + b = tf.get_variable(name='b', + shape=[out_channels], + dtype=tf.float32, + initializer=tf.constant_initializer()) + + con2d_op = tf.nn.conv2d(x, kernel, [1, strides, strides, 1], padding='SAME') + + return tf.nn.bias_add(con2d_op, b) + + def _batch_norm(self, name, x): + """Batch normalization.""" + with tf.variable_scope(name): + x_bn = \ + tf.contrib.layers.batch_norm( + inputs=x, + decay=0.9, + center=True, + scale=True, + epsilon=1e-5, + updates_collections=None, + is_training=self.mode == 'train', + fused=True, + data_format='NHWC', + zero_debias_moving_mean=True, + scope='BatchNorm' + ) + + return x_bn + + def _leaky_relu(self, x, leakiness=0.0): + return tf.where(tf.less(x, 0.0), leakiness * x, x, name='leaky_relu') + + def _max_pool(self, x, ksize, strides): + return tf.nn.max_pool(x, + ksize=[1, ksize, ksize, 1], + strides=[1, strides, strides, 1], + padding='SAME', + name='max_pool') + diff --git a/TensorFlow/built-in/nlp/CNN-CTC_ID0683_for_TensorFlow/main_rt.py b/TensorFlow/built-in/nlp/CNN-CTC_ID0683_for_TensorFlow/main_rt.py new file mode 100644 index 0000000000000000000000000000000000000000..2e81c1ac17c2e682cf7558eaa2a73d748f18db03 --- /dev/null +++ b/TensorFlow/built-in/nlp/CNN-CTC_ID0683_for_TensorFlow/main_rt.py @@ -0,0 +1,255 @@ +# +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +""" + +""" +from npu_bridge.npu_init import * + +import datetime +import logging +import os +import time + +import cv2 +import numpy as np +import tensorflow as tf + +import cnn_lstm_otc_ocr_rt +import utils +import helper + +FLAGS = utils.FLAGS + +logger = logging.getLogger('Traing for OCR using CNN+LSTM+CTC') +logger.setLevel(logging.INFO) + + +def train(train_dir=None, val_dir=None, mode='train'): + model = cnn_lstm_otc_ocr.LSTMOCR(mode) + model.build_graph() + + print('loading train data') + train_feeder = utils.DataIterator(data_dir=train_dir) + print('size: ', train_feeder.size) + + print('loading validation data') + val_feeder = utils.DataIterator(data_dir=val_dir) + print('size: {}\n'.format(val_feeder.size)) + + num_train_samples = train_feeder.size # 100000 + num_batches_per_epoch = int(num_train_samples / FLAGS.batch_size) # example: 100000/100 + + num_val_samples = val_feeder.size + num_batches_per_epoch_val = int(num_val_samples / FLAGS.batch_size) # example: 10000/100 + shuffle_idx_val = np.random.permutation(num_val_samples) + + config = tf.ConfigProto(allow_soft_placement=True) + config.gpu_options.allow_growth = True + # ***** npu modify begin ***** + global_config = tf.ConfigProto() + custom_op = global_config.graph_options.rewrite_options.custom_optimizers.add() + custom_op.name = "NpuOptimizer" + custom_op.parameter_map["precision_mode"].s = tf.compat.as_bytes("allow_mix_precision") + #custom_op.parameter_map["dynamic_input"].b = 1 + #custom_op.parameter_map["dynamic_graph_execute_mode"].s = tf.compat.as_bytes("lazy_recompile") + custom_op.parameter_map["jit_compile"].b = False + global_config.graph_options.rewrite_options.remapping = RewriterConfig.OFF + global_config.graph_options.rewrite_options.memory_optimization = RewriterConfig.OFF + # ***** npu modify end ****** + + with tf.Session(config=global_config) as sess: + #with tf.Session(config=npu_config_proto(config_proto=config)) as sess: + sess.run(tf.global_variables_initializer()) + + saver = tf.train.Saver(tf.global_variables(), max_to_keep=100) + train_writer = tf.summary.FileWriter(FLAGS.logs_dir + '/train', sess.graph) + if FLAGS.restore: + ckpt = tf.train.latest_checkpoint(FLAGS.checkpoint_dir) + if ckpt: + # the global_step will restore sa well + saver.restore(sess, ckpt) + print('restore from checkpoint{0}'.format(ckpt)) + + print('=============================begin training=============================') + for cur_epoch in range(FLAGS.num_epochs): + shuffle_idx = np.random.permutation(num_train_samples) + train_cost = 0 + start_time = time.time() + batch_time = time.time() + + # the training part + for cur_batch in range(num_batches_per_epoch): + if (cur_batch + 1) % 100 == 0: + print('batch', cur_batch, ': time', time.time() - batch_time) + batch_time = time.time() + indexs = [shuffle_idx[i % num_train_samples] for i in + range(cur_batch * FLAGS.batch_size, (cur_batch + 1) * FLAGS.batch_size)] + batch_inputs, _, batch_labels = \ + train_feeder.input_index_generate_batch(indexs) + # batch_inputs,batch_seq_len,batch_labels=utils.gen_batch(FLAGS.batch_size) + feed = {model.inputs: batch_inputs, + model.labels: batch_labels} + + # if summary is needed + summary_str, batch_cost, step, _ = \ + sess.run([model.merged_summay, model.cost, model.global_step, model.train_op], feed) + # calculate the cost + train_cost += batch_cost * FLAGS.batch_size + + train_writer.add_summary(summary_str, step) + + # save the checkpoint + if step % FLAGS.save_steps == 1: + if not os.path.isdir(FLAGS.checkpoint_dir): + os.mkdir(FLAGS.checkpoint_dir) + # logger.info('save checkpoint at step {0}', format(step)) + saver.save(sess, os.path.join(FLAGS.checkpoint_dir, 'ocr-model'), global_step=step) + + # train_err += the_err * FLAGS.batch_size + # do validation + if step % FLAGS.validation_steps == 0: + acc_batch_total = 0 + lastbatch_err = 0 + lr = 0 + for j in range(num_batches_per_epoch_val): + indexs_val = [shuffle_idx_val[i % num_val_samples] for i in + range(j * FLAGS.batch_size, (j + 1) * FLAGS.batch_size)] + val_inputs, _, val_labels = \ + val_feeder.input_index_generate_batch(indexs_val) + val_feed = {model.inputs: val_inputs, + model.labels: val_labels} + + dense_decoded, lastbatch_err, lr = \ + sess.run([model.dense_decoded, model.cost, model.lrn_rate], + val_feed) + + # print the decode result + ori_labels = val_feeder.the_label(indexs_val) + acc = utils.accuracy_calculation(ori_labels, dense_decoded, + ignore_value=-1, isPrint=True) + acc_batch_total += acc + + accuracy = (acc_batch_total * FLAGS.batch_size) / num_val_samples + + avg_train_cost = train_cost / ((cur_batch + 1) * FLAGS.batch_size) + + # train_err /= num_train_samples + now = datetime.datetime.now() + log = "{}/{} {}:{}:{} Epoch {}/{}, " \ + "accuracy = {:.3f},avg_train_cost = {:.3f}, " \ + "lastbatch_err = {:.3f}, time = {:.3f},lr={:.8f}" + print(log.format(now.month, now.day, now.hour, now.minute, now.second, + cur_epoch + 1, FLAGS.num_epochs, accuracy, avg_train_cost, + lastbatch_err, time.time() - start_time, lr)) + + +def infer(img_path, mode='infer'): + # imgList = load_img_path('/home/yang/Downloads/FILE/ml/imgs/image_contest_level_1_validate/') + imgList = helper.load_img_path(img_path) + print(imgList[:5]) + + model = cnn_lstm_otc_ocr.LSTMOCR(mode) + model.build_graph() + + total_steps = len(imgList) / FLAGS.batch_size + + config = tf.ConfigProto(allow_soft_placement=True) + with tf.Session(config=npu_config_proto(config_proto=config)) as sess: + sess.run(tf.global_variables_initializer()) + + saver = tf.train.Saver(tf.global_variables(), max_to_keep=100) + ckpt = tf.train.latest_checkpoint(FLAGS.checkpoint_dir) + if ckpt: + saver.restore(sess, ckpt) + print('restore from ckpt{}'.format(ckpt)) + else: + print('cannot restore') + + decoded_expression = [] + for curr_step in range(total_steps): + + imgs_input = [] + seq_len_input = [] + for img in imgList[curr_step * FLAGS.batch_size: (curr_step + 1) * FLAGS.batch_size]: + im = cv2.imread(img, 0).astype(np.float32) / 255. + im = np.reshape(im, [FLAGS.image_height, FLAGS.image_width, FLAGS.image_channel]) + + def get_input_lens(seqs): + length = np.array([FLAGS.max_stepsize for _ in seqs], dtype=np.int64) + + return seqs, length + + inp, seq_len = get_input_lens(np.array([im])) + imgs_input.append(im) + seq_len_input.append(seq_len) + + imgs_input = np.asarray(imgs_input) + seq_len_input = np.asarray(seq_len_input) + seq_len_input = np.reshape(seq_len_input, [-1]) + + feed = {model.inputs: imgs_input} + dense_decoded_code = sess.run(model.dense_decoded, feed) + + for item in dense_decoded_code: + expression = '' + + for i in item: + if i == -1: + expression += '' + else: + expression += utils.decode_maps[i] + + decoded_expression.append(expression) + + with open('./result.txt', 'a') as f: + for code in decoded_expression: + f.write(code + '\n') + + +def main(_): + if FLAGS.num_gpus == 0: + dev = '/cpu:0' + elif FLAGS.num_gpus == 1: + dev = '/gpu:0' + else: + raise ValueError('Only support 0 or 1 gpu.') + + with tf.device('/cpu:0'): + if FLAGS.mode == 'train': + train(FLAGS.train_dir, FLAGS.val_dir, FLAGS.mode) + + elif FLAGS.mode == 'infer': + infer(FLAGS.infer_dir, FLAGS.mode) + + +if __name__ == '__main__': + tf.logging.set_verbosity(tf.logging.INFO) + tf.app.run() + diff --git a/TensorFlow/built-in/nlp/CNN-CTC_ID0683_for_TensorFlow/test/train_RT2_performance_1p.sh b/TensorFlow/built-in/nlp/CNN-CTC_ID0683_for_TensorFlow/test/train_RT2_performance_1p.sh new file mode 100644 index 0000000000000000000000000000000000000000..ed46648fd31f67864388d70a0077b2ac25a47d70 --- /dev/null +++ b/TensorFlow/built-in/nlp/CNN-CTC_ID0683_for_TensorFlow/test/train_RT2_performance_1p.sh @@ -0,0 +1,105 @@ +#!/bin/bash +cur_path=`pwd`/../ + +#基础参数,需要模型审视修改 +#Batch Size +batch_size=128 +#网络名称,同目录名称 +Network="CNN-CTC_ID0683_for_TensorFlow" +#Device数量,单卡默认为1 +RankSize=1 +#训练epoch,可选 +train_epochs=2 +#学习率 +learning_rate=0.0001 + +#使能RT2.0 +export ENABLE_RUNTIME_V2=1 + +if [[ $1 == --help || $1 == --h ]];then + echo "usage:./train_performance_1p.sh --data_path=./imgs" + exit 1 +fi + +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + fi +done + +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path \" must be config" + exit 1 +fi +##############执行训练########## +wait +cd $cur_path +if [ -d $cur_path/test/output ];then + rm -rf $cur_path/test/output/* + mkdir -p $cur_path/test/output/$ASCEND_DEVICE_ID +else + mkdir -p $cur_path/test/output/$ASCEND_DEVICE_ID +fi +wait + +start=$(date +%s) +nohup python3 main_rt.py --train_dir=${data_path}/train/ \ + --val_dir=${data_path}/val \ + --image_height=60 \ + --image_width=180 \ + --image_channel=1 \ + --out_channels=64 \ + --num_hidden=128 \ + --batch_size=$batch_size \ + --logs_dir=./log \ + --num_gpus=1 \ + --initial_learning_rate=$learning_rate \ + --num_epochs=${train_epochs} \ + --mode=train > $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & +wait +end=$(date +%s) +e2e_time=$(( $end - $start )) + +#echo "Final Performance ms/step : $average_perf" +echo "Final Training Duration sec : $e2e_time" + +TrainingTime=`grep "batch " $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log |awk 'END {print $5}'` +FPS=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'/'${TrainingTime}'}'` +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +train_accuracy=`grep "accuracy" $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk 'END {print $7}'|cut -d , -f 1` +#打印,不需要修改 +echo "Final Train Accuracy : ${train_accuracy}" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'RT2'_'perf' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +#TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${BatchSize}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep "lastbatch_err" $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk '{print $12}' | cut -d , -f 1 > $cur_path/test/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print $1}' $cur_path/test/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log