From 0971c47d6cb302788d769febe6e52a9ffcde9453 Mon Sep 17 00:00:00 2001 From: ykxia Date: Fri, 11 Nov 2022 10:58:37 +0800 Subject: [PATCH 1/9] =?UTF-8?q?CTPN=5FID0054=5Ffor=5FTensorFlow=E9=80=82?= =?UTF-8?q?=E9=85=8DRT2.0+=E4=BA=8C=E8=BF=9B=E5=88=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../main/train_npu_rt.py | 249 ------------------ .../test/train_RT2_performance_1p.sh | 2 +- 2 files changed, 1 insertion(+), 250 deletions(-) delete mode 100644 TensorFlow/built-in/cv/detection/CTPN_ID0054_for_TensorFlow/main/train_npu_rt.py diff --git a/TensorFlow/built-in/cv/detection/CTPN_ID0054_for_TensorFlow/main/train_npu_rt.py b/TensorFlow/built-in/cv/detection/CTPN_ID0054_for_TensorFlow/main/train_npu_rt.py deleted file mode 100644 index 2fa675326..000000000 --- a/TensorFlow/built-in/cv/detection/CTPN_ID0054_for_TensorFlow/main/train_npu_rt.py +++ /dev/null @@ -1,249 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import datetime -import os -import sys -import time - -import tensorflow as tf -import numpy as np -sys.path.append(os.getcwd()) - -cur_path = os.path.abspath(os.path.dirname(__file__)) -working_dir = os.path.join(cur_path, '../') -sys.path.append(working_dir) - -from tensorflow.contrib import slim - -tf.app.flags.DEFINE_float('learning_rate', 1e-5, '') -tf.app.flags.DEFINE_integer('max_steps', 50000, '') -tf.app.flags.DEFINE_integer('decay_steps', 30000, '') -tf.app.flags.DEFINE_float('decay_rate', 0.1, '') -tf.app.flags.DEFINE_float('moving_average_decay', 0.997, '') -tf.app.flags.DEFINE_integer('num_readers', 4, '') -tf.app.flags.DEFINE_string('gpu', '0', '') -tf.app.flags.DEFINE_string('checkpoint_path',"checkpoints_mlt/" , '') -tf.app.flags.DEFINE_string('logs_path', 'logs_mlt/', '') -tf.app.flags.DEFINE_string('pretrained_model_path', 'data/vgg_16.ckpt', '') -tf.app.flags.DEFINE_boolean('restore', False, '') -tf.app.flags.DEFINE_integer('save_checkpoint_steps', 2000, '') -tf.app.flags.DEFINE_string('dataset_dir', 'resized/', '') -tf.app.flags.DEFINE_integer('num_bbox', 256, '') -tf.app.flags.DEFINE_integer('loss_scale', 4096, '') -tf.app.flags.DEFINE_integer('inputs_height', 600, '') -tf.app.flags.DEFINE_integer('inputs_width', 900, '') -tf.app.flags.DEFINE_integer('device_id', 1, '') -tf.app.flags.DEFINE_integer('npu_nums', 1, '') -tf.app.flags.DEFINE_string('DEVICE_ID', '0', '') -#modify for NPU start -tf.app.flags.DEFINE_string('precision_mode', 'allow_fp32_to_fp16', '') -#modify for NPU end - -FLAGS = tf.app.flags.FLAGS - - -from nets import model_train as model -from utils.dataset import data_provider as data_provider -from hccl.split.api import set_split_strategy_by_size -# npu libs -from npu_bridge.estimator import npu_ops -from tensorflow.core.protobuf.rewriter_config_pb2 import RewriterConfig -from npu_bridge.estimator.npu.npu_estimator import NPUEstimator -from npu_bridge.estimator.npu.npu_optimizer import allreduce -from npu_bridge.estimator.npu.npu_optimizer import NPUDistributedOptimizer -from npu_bridge.hccl import hccl_ops -from npu_bridge.estimator.npu.npu_loss_scale_optimizer import NPULossScaleOptimizer -from npu_bridge.estimator.npu.npu_loss_scale_manager import FixedLossScaleManager - -from tensorflow.python.client import timeline - -# modify for NPU start -from npu_bridge.npu_init import * -# modify for NPU end - -def pad_input(inputs,target_shape=[1216,1216,3]): - - h,w = inputs.shape[:2] - out = np.zeros(target_shape).astype(np.uint8) - out[0:h,0:w,:] = inputs - - return out - - -def pad_bbox(inputs, count=256): - if len(inputs)>count: - return inputs[:count].copy() - - else: - out = inputs.copy() - num_inputs = len(out) - num_pad = count - num_inputs - - for i in range(num_pad): - out.append([0,0,0,0,1]) - return out - - -def broadcast_global_variables(root_rank, index): - op_list = [] - for var in tf.global_variables(): - if "float" in var.dtype.name: - inputs = [var] - outputs = hccl_ops.broadcast(tensor=inputs, root_rank=root_rank) - if outputs is not None: - op_list.append(outputs[0].op) - op_list.append(tf.assign(var, outputs[0])) - return tf.group(op_list) - -def main(argv=None): - os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu - now = datetime.datetime.now() - StyleTime = now.strftime("%Y-%m-%d-%H-%M-%S") - os.makedirs(FLAGS.logs_path + FLAGS.DEVICE_ID) - if not os.path.exists(FLAGS.checkpoint_path): - os.makedirs(FLAGS.checkpoint_path) - - input_image = tf.placeholder(tf.float32, - shape=[1,FLAGS.inputs_height, FLAGS.inputs_width, 3], - name='input_image') - input_bbox = tf.placeholder(tf.float32, - shape=[FLAGS.num_bbox, 5], name='input_bbox') - - global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) - learning_rate = tf.Variable(FLAGS.learning_rate, trainable=False) - tf.summary.scalar('learning_rate', learning_rate) - opt = tf.train.AdamOptimizer(learning_rate) - if FLAGS.npu_nums == 8: - opt = NPUDistributedOptimizer(opt) - - # opt = NPUDistributedOptimizer(opt) - # modify for NPU start - if FLAGS.precision_mode == "allow_mix_precision": - loss_scale_manager = ExponentialUpdateLossScaleManager( - init_loss_scale=2**32, - incr_every_n_steps=1000, - decr_every_n_nan_or_inf=2, - decr_ratio=0.5) - else: - loss_scale_manager = FixedLossScaleManager(loss_scale=FLAGS.loss_scale) - # modify for NPU end - - opt = NPULossScaleOptimizer(opt, loss_scale_manager) - - - with tf.name_scope('model' ) as scope: - bbox_pred, cls_pred, cls_prob = model.model(input_image) - - total_loss, model_loss, rpn_cross_entropy, rpn_loss_box = model.loss_v2(bbox_pred, cls_pred, input_bbox) - - batch_norm_updates_op = tf.group(*tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope)) - grads = opt.compute_gradients(total_loss) - - apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) - - summary_op = tf.summary.merge_all() - variable_averages = tf.train.ExponentialMovingAverage( - FLAGS.moving_average_decay, global_step) - variables_averages_op = variable_averages.apply(tf.trainable_variables()) - with tf.control_dependencies([variables_averages_op, apply_gradient_op, batch_norm_updates_op]): - train_op = tf.no_op(name='train_op') - - saver = tf.train.Saver(tf.global_variables(), max_to_keep=5) - summary_writer = tf.summary.FileWriter(FLAGS.logs_path + StyleTime, tf.get_default_graph()) - - init = tf.global_variables_initializer() - - if FLAGS.pretrained_model_path is not None: - variable_restore_op = slim.assign_from_checkpoint_fn(FLAGS.pretrained_model_path, - slim.get_trainable_variables(), - ignore_missing_vars=True) - #for NPU - config = tf.ConfigProto(allow_soft_placement=True) - custom_op = config.graph_options.rewrite_options.custom_optimizers.add() - custom_op.name = "NpuOptimizer" - custom_op.parameter_map["use_off_line"].b = True - custom_op.parameter_map["hcom_parallel"].b = True - custom_op.parameter_map["jit_compile"].b = False - config.graph_options.rewrite_options.remapping = RewriterConfig.OFF - # modify for NPU start - if FLAGS.precision_mode == "allow_mix_precision": - custom_op.parameter_map["precision_mode"].s = tf.compat.as_bytes("allow_mix_precision") - # modify for NPU end - # for NPU - if FLAGS.npu_nums == 8: - bcast_op = broadcast_global_variables(0, 1) - with tf.Session(config=config) as sess: - if FLAGS.npu_nums == 8: - sess.run(bcast_op) - if FLAGS.restore: - ckpt = tf.train.latest_checkpoint(FLAGS.checkpoint_path) - restore_step = int(ckpt.split('.')[0].split('_')[-1]) - print("continue training from previous checkpoint {}".format(restore_step)) - saver.restore(sess, ckpt) - else: - sess.run(init) - if FLAGS.npu_nums == 8: - set_split_strategy_by_size([80, 20]) - restore_step = 0 - if FLAGS.pretrained_model_path is not None: - variable_restore_op(sess) - data_generator = data_provider.get_batch(num_workers=FLAGS.num_readers) - start = time.time() - - for step in range(restore_step, FLAGS.max_steps): - data = next(data_generator) - inputs_padded = data[0] - bbox_padded = pad_bbox(data[1],FLAGS.num_bbox) - input_image_np = inputs_padded - input_bbox_np = bbox_padded - - ml, tl,ce_loss, bbox_loss, _, summary_str = sess.run([model_loss, total_loss, - rpn_cross_entropy, - rpn_loss_box, - train_op, summary_op], - feed_dict={input_image: input_image_np, - input_bbox: input_bbox_np}) - summary_writer.add_summary(summary_str, global_step=step) - print('model loss :', ml, 'ce_loss: ', ce_loss, 'box_loss:',bbox_loss) - if step != 0 and step % FLAGS.decay_steps == 0: - sess.run(tf.assign(learning_rate, learning_rate.eval() * FLAGS.decay_rate)) - - if step % 10 == 0: - avg_time_per_step = (time.time() - start) / 10 - start = time.time() - print('Step {:06d}, ce_loss {:.6f}, bbox_loss {:.6f} model loss {:.4f}, total loss {:.4f}, {:.2f} seconds/step, LR: {:.6f}'.format( - step, ce_loss, bbox_loss, ml, tl, avg_time_per_step, learning_rate.eval())) - - if (step + 1) % FLAGS.save_checkpoint_steps == 0: - filename = ('ctpn_{:d}'.format(step + 1) + '.ckpt') - filename = os.path.join(FLAGS.checkpoint_path, filename) - saver.save(sess, filename) - print('Write model to: {:s}'.format(filename)) - -if __name__ == '__main__': - tf.app.run() diff --git a/TensorFlow/built-in/cv/detection/CTPN_ID0054_for_TensorFlow/test/train_RT2_performance_1p.sh b/TensorFlow/built-in/cv/detection/CTPN_ID0054_for_TensorFlow/test/train_RT2_performance_1p.sh index 101e3c01e..b567d6169 100644 --- a/TensorFlow/built-in/cv/detection/CTPN_ID0054_for_TensorFlow/test/train_RT2_performance_1p.sh +++ b/TensorFlow/built-in/cv/detection/CTPN_ID0054_for_TensorFlow/test/train_RT2_performance_1p.sh @@ -113,7 +113,7 @@ do fi #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path,--data_dump_flag,--data_dump_step,--data_dump_path,--profiling,--profiling_dump_path,--autotune - nohup python3 main/train_npu_rt.py \ + nohup python3 main/train_npu.py \ --precision_mode=$precision_mode \ --pretrained_model_path=$data_path/vgg_16.ckpt \ --dataset_dir=$data_path \ -- Gitee From 98d89398bc19b19422354dc2692989dddad08bdc Mon Sep 17 00:00:00 2001 From: ykxia Date: Fri, 11 Nov 2022 11:00:34 +0800 Subject: [PATCH 2/9] =?UTF-8?q?CTPN=5FID0054=5Ffor=5FTensorFlow=E9=80=82?= =?UTF-8?q?=E9=85=8DRT2.0+=E4=BA=8C=E8=BF=9B=E5=88=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../CTPN_ID0054_for_TensorFlow/test/train_RT2_performance_8p.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TensorFlow/built-in/cv/detection/CTPN_ID0054_for_TensorFlow/test/train_RT2_performance_8p.sh b/TensorFlow/built-in/cv/detection/CTPN_ID0054_for_TensorFlow/test/train_RT2_performance_8p.sh index cfd98d12c..98a7658a4 100644 --- a/TensorFlow/built-in/cv/detection/CTPN_ID0054_for_TensorFlow/test/train_RT2_performance_8p.sh +++ b/TensorFlow/built-in/cv/detection/CTPN_ID0054_for_TensorFlow/test/train_RT2_performance_8p.sh @@ -127,7 +127,7 @@ do #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 cd $cur_path/../ - nohup python3 main/train_npu_rt.py \ + nohup python3 main/train_npu.py \ --precision_mode=$precision_mode \ --pretrained_model_path=$data_path/vgg_16.ckpt \ --dataset_dir=$data_path \ -- Gitee From bb8a96ecec2737b0239a3983b692d74c61cdb1da Mon Sep 17 00:00:00 2001 From: ykxia Date: Fri, 11 Nov 2022 11:09:49 +0800 Subject: [PATCH 3/9] =?UTF-8?q?SSD-Resnet50V1-FPN=5FID1463=5Ffor=5FTensorF?= =?UTF-8?q?low=E9=80=82=E9=85=8DRT2.0+=E4=BA=8C=E8=BF=9B=E5=88=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../models/research/object_detection/model_lib_rt.py | 1 - 1 file changed, 1 deletion(-) diff --git a/TensorFlow/built-in/cv/detection/SSD-Resnet50V1-FPN_ID1463_for_TensorFlow/models/research/object_detection/model_lib_rt.py b/TensorFlow/built-in/cv/detection/SSD-Resnet50V1-FPN_ID1463_for_TensorFlow/models/research/object_detection/model_lib_rt.py index bf6d3834c..a40980745 100644 --- a/TensorFlow/built-in/cv/detection/SSD-Resnet50V1-FPN_ID1463_for_TensorFlow/models/research/object_detection/model_lib_rt.py +++ b/TensorFlow/built-in/cv/detection/SSD-Resnet50V1-FPN_ID1463_for_TensorFlow/models/research/object_detection/model_lib_rt.py @@ -332,7 +332,6 @@ def create_estimator_and_inputs(run_config, hparams, pipeline_config_path, eval_ # custom_op.parameter_map["dynamic_input"].b = True # custom_op.parameter_map["dynamic_graph_execute_mode"].s = tf.compat.as_bytes("lazy_recompile") custom_op.parameter_map["hcom_parallel"].b = True - custom_op.parameter_map["jit_compile"].b = False run_config = tf.estimator.RunConfig(model_dir=run_config.model_dir, session_config=run_config.session_config, save_checkpoints_steps=train_steps // eval_count) -- Gitee From 5a260c13b8c36aecd09d3a630b3414ffaa46db3c Mon Sep 17 00:00:00 2001 From: ykxia Date: Fri, 11 Nov 2022 11:16:40 +0800 Subject: [PATCH 4/9] =?UTF-8?q?SSD-Resnet50V1-FPN=5FID1463=5Ffor=5FTensorF?= =?UTF-8?q?low=E9=80=82=E9=85=8DRT2.0+=E4=BA=8C=E8=BF=9B=E5=88=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../test/train_RT2_performance_1p.sh | 194 +++++++++++++++++ ...rain_RT2_performance_1p_mobilenetv1_fpn.sh | 174 +++++++++++++++ .../test/train_RT2_performance_8p.sh | 205 ++++++++++++++++++ ...rain_RT2_performance_8p_mobilenetv1_fpn.sh | 150 +++++++++++++ 4 files changed, 723 insertions(+) create mode 100644 TensorFlow/built-in/cv/detection/SSD-Resnet50V1-FPN_ID1463_for_TensorFlow/test/train_RT2_performance_1p.sh create mode 100644 TensorFlow/built-in/cv/detection/SSD-Resnet50V1-FPN_ID1463_for_TensorFlow/test/train_RT2_performance_1p_mobilenetv1_fpn.sh create mode 100644 TensorFlow/built-in/cv/detection/SSD-Resnet50V1-FPN_ID1463_for_TensorFlow/test/train_RT2_performance_8p.sh create mode 100644 TensorFlow/built-in/cv/detection/SSD-Resnet50V1-FPN_ID1463_for_TensorFlow/test/train_RT2_performance_8p_mobilenetv1_fpn.sh diff --git a/TensorFlow/built-in/cv/detection/SSD-Resnet50V1-FPN_ID1463_for_TensorFlow/test/train_RT2_performance_1p.sh b/TensorFlow/built-in/cv/detection/SSD-Resnet50V1-FPN_ID1463_for_TensorFlow/test/train_RT2_performance_1p.sh new file mode 100644 index 000000000..01abaf5d8 --- /dev/null +++ b/TensorFlow/built-in/cv/detection/SSD-Resnet50V1-FPN_ID1463_for_TensorFlow/test/train_RT2_performance_1p.sh @@ -0,0 +1,194 @@ +#!/bin/bash +cur_path=`pwd` +export PYTHONPATH=$cur_path/../models/research:$cur_path/../models/research/slim:$PYTHONPATH +#集合通信 +export RANK_SIZE=1 +export RANK_TABLE_FILE=$cur_path/../configs/${RANK_SIZE}p_${ASCEND_DEVICE_ID}.json +export JOB_ID=10087 +RANK_ID_START=0 + +#使能RT2.0 +export ENABLE_RUNTIME_V2=1 + +#数据集参数 +data_path="/data" +use_conda=0 + +#训练参数,需要根据模型修改 +Network="SSD-Resnet50V1-FPN_ID1463_for_TensorFlow" +num_train_steps=500 +batch_size=32 +ckpt_path=/checkpoints +pipeline_config=$cur_path/../models/research/configs/ssd320_full_1gpus.config + +#维测参数 +overflow_dump=False +overflow_dump_path=$cur_path/output/overflow_dump +step_dump=False +step_dump_path=$cur_path/output/step_dump +check_loss_scale=Flase + +#帮助提示,需要根据网络修改 +if [[ $1 == --help || $1 == -h ]];then + echo "usage: ./train_performance_1p.sh " + + echo "" + echo "parameter explain: + --num_train_steps training steps + --data_path source data of training + --ckpt_path pre-checkpoint path + --pipeline_config pipeline config path + --overflow_dump overflow detection,default is False + --overflow_dump_path overflow dump path + --check_loss_scale check whether loss scale is valid, default is False + --step_dump Dump step data, default is False, can only set when overflow_dump is False + --step_dump_path step_dump_path + --skip_eval whether to skip eval + -h/--help Show help message + " + exit 1 +fi + +#入参设置,需要根据网络修改 +for para in $* +do + if [[ $para == --num_train_steps* ]];then + num_train_steps=`echo ${para#*=}` + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --ckpt_path* ]];then + ckpt_path=`echo ${para#*=}` + elif [[ $para == --pipeline_config* ]];then + pipeline_config=`echo ${para#*=}` + elif [[ $para == --overflow_dump* ]];then + overflow_dump=`echo ${para#*=}` + if [ -d ${overflow_dump_path} ];then + echo "overflow dump path: ${overflow_dump_path}" + else + mkdir -p ${overflow_dump_path} + fi + elif [[ $para == --check_loss_scale* ]];then + check_loss_scale=`echo ${para#*=}` + elif [[ $para == --step_dump* ]];then + step_dump=`echo ${para#*=}` + if [ -d ${step_dump_path} ];then + echo "step dump path: ${step_dump_path}" + else + mkdir -p ${step_dump_path} + fi + elif [[ $para == --skip_eval* ]];then + skip_eval=`echo ${para#*=}` + elif [[ $para == --use_conda* ]];then + use_conda=`echo ${para#*=}` + fi +done + +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be config" + exit 1 +fi + + + +##########################执行训练######################### +start_time=$(date +%s) +cd $cur_path/../models/research +if [ -f ${pipeline_config}.bak ];then + cp ${pipeline_config}.bak ${pipeline_config} +else + cp ${pipeline_config} ${pipeline_config}.bak +fi + +sed -i "s%/checkpoints%${ckpt_path}%p" ${pipeline_config} +sed -i "s%/data/coco2017_tfrecords%${data_path}/coco2017_tfrecords%p" ${pipeline_config} + +for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); + do + echo "Device ID: $ASCEND_DEVICE_ID" + export RANK_ID=$RANK_ID + if [ -d $cur_path/output/${ASCEND_DEVICE_ID} ];then + rm -rf $cur_path/output/${ASCEND_DEVICE_ID} + mkdir -p $cur_path/output/${ASCEND_DEVICE_ID} + else + mkdir -p $cur_path/output/${ASCEND_DEVICE_ID} + fi + +#训练执行脚本,需要根据网络修改 + nohup python3 -u ./object_detection/model_main_rt.py \ + --pipeline_config_path=${pipeline_config} \ + --model_dir=$cur_path/output/${ASCEND_DEVICE_ID} \ + --data_path=${data_path} \ + --overflow_dump_path=${overflow_dump_path} \ + --step_dump_path=${step_dump_path} \ + --alsologtostder \ + --amp \ + --num_train_steps=${num_train_steps} \ + "${@:1}" > $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & +done +wait + +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +##########################业务日志######################### +grep ERROR $HOME/ascend/log/plog/*.log > $cur_path/output/$ASCEND_DEVICE_ID/plog_err.log + +################################性能结果处理######################### +echo "-----------------------Final result------------------------" +#性能FPS计算,需要根据网络修改 +#FPS=`grep -a 'INFO:tensorflow:global_step/sec: ' $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk 'END {print $2}'` +FPS=`grep -a 'INFO:tensorflow:global_step/sec: ' $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk 'NR>2{print line}{line=$0}'|awk '{print $2}'|awk '{sum+=$1} END {print sum/NR}'` + +FPS=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'*'${FPS}'}'` +echo "Final Performance images/sec : $FPS" +################################精度结果处理######################### +#精度计算,需要根据网络修改 +train_accuracy=`grep Precision $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep Average |awk 'NR==1 {print $13}'` + +#echo 'Final Training Accuracy mAP: $train_accuracy' +################################E2E训练时长########################## +echo "Final Training Duration sec : $e2e_time" + +################################性能看护############################# +DeviceType=`uname -m` +CaseName=${Network}_bs${batch_size}_${RANK_SIZE}'p'_'RT2'_'perf' +ActualFPS=${FPS} +TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型修改 +grep INFO:tensorflow:loss $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk '{print $3}'|sed 's/,//g'|sed '/^$/d' >> $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${batch_size}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log + +#eval版本需求开发中,精度结果临时看护最终的loss +echo "Final Training Accuracy loss: $ActualLoss" + +if [[ $skip_eval == "" ]];then +##获取错误信息 +#系统错误消息 +#error_msg="CanonicalizeShape failed, node:Postprocessor/BatchMultiClassNonMaxSuppression/MultiClassNonMaxSuppression/non_max_suppression/NonMaxSuppressionV3" +error_msg="E19999: Inner Error" +#判断错误信息是否和历史版本一致 +Status=`grep "${error_msg}" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log | wc -l` + +#失败阶段 +ModelStatus="图执行FAIL" + +#DTS单号 +#DTS_Number="DTS202105130LVO7FP0J00,DTS202105130O6E1SP1400" +DTS_Number="DTS202105200RLRJ1P1300" +echo "ModelStatus = ${ModelStatus}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DTS_Number = ${DTS_Number}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "Status = ${Status}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "error_msg = ${error_msg}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +fi + diff --git a/TensorFlow/built-in/cv/detection/SSD-Resnet50V1-FPN_ID1463_for_TensorFlow/test/train_RT2_performance_1p_mobilenetv1_fpn.sh b/TensorFlow/built-in/cv/detection/SSD-Resnet50V1-FPN_ID1463_for_TensorFlow/test/train_RT2_performance_1p_mobilenetv1_fpn.sh new file mode 100644 index 000000000..a690a02b4 --- /dev/null +++ b/TensorFlow/built-in/cv/detection/SSD-Resnet50V1-FPN_ID1463_for_TensorFlow/test/train_RT2_performance_1p_mobilenetv1_fpn.sh @@ -0,0 +1,174 @@ +#!bin/bash +cur_path=`pwd` +#临时补丁,需要根据网络修改 +#cp $ASCEND_OPP_PATH/op_impl/built-in/ai_core/tbe/config/ascend910/aic-ascend910-ops-info.json $cur_path/aic-ascend910-ops-info.json.bak -f +#python3 ops_info_patch.py + +#环境设置,需要根据网络修改 +export PYTHONPATH=$cur_path/../models/research:$cur_path/../models/research/slim:$PYTHONPATH + +#集合通信 +export RANK_SIZE=1 +export RANK_TABLE_FILE=$cur_path/../configs/${RANK_SIZE}p_${ASCEND_DEVICE_ID}.json +export JOB_ID=10087 +RANK_ID_START=0 + +#使能RT2.0 +export ENABLE_RUNTIME_V2=1 + +#数据集参数 +data_path="/data" +use_conda=0 + +#训练参数,需要根据模型修改 +Network="SSD-MobilenetV1-FPN_ID1459_for_TensorFlow" +num_train_steps=1000 +batch_size=16 +ckpt_path=/checkpoints +pipeline_config=$cur_path/../models/research/configs/ssd_mobilenet_v1_fpn_shared_box_predictor_640x640_coco14_sync_1gpus.config + +#维测参数 +overflow_dump=False +overflow_dump_path=$cur_path/output/overflow_dump +step_dump=False +step_dump_path=$cur_path/output/step_dump +check_loss_scale=Flase + +#帮助提示,需要根据网络修改 +if [[ $1 == --help || $1 == -h ]];then + echo "usage: ./train_performance_1p.sh " + + echo "" + echo "parameter explain: + --num_train_steps training steps + --data_path source data of training + --ckpt_path pre-checkpoint path + --pipeline_config pipeline config path + --overflow_dump overflow detection,default is False + --overflow_dump_path overflow dump path + --check_loss_scale check whether loss scale is valid, default is False + --step_dump Dump step data, default is False, can only set when overflow_dump is False + --step_dump_path step_dump_path + -h/--help Show help message + " + exit 1 +fi + +#入参设置,需要根据网络修改 +for para in $* +do + if [[ $para == --num_train_steps* ]];then + num_train_steps=`echo ${para#*=}` + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --ckpt_path* ]];then + ckpt_path=`echo ${para#*=}` + elif [[ $para == --pipeline_config* ]];then + pipeline_config=`echo ${para#*=}` + elif [[ $para == --overflow_dump* ]];then + overflow_dump=`echo ${para#*=}` + if [ -d ${overflow_dump_path} ];then + echo "overflow dump path: ${overflow_dump_path}" + else + mkdir -p ${overflow_dump_path} + fi + elif [[ $para == --check_loss_scale* ]];then + check_loss_scale=`echo ${para#*=}` + elif [[ $para == --step_dump* ]];then + step_dump=`echo ${para#*=}` + if [ -d ${step_dump_path} ];then + echo "step dump path: ${step_dump_path}" + else + mkdir -p ${step_dump_path} + fi + elif [[ $para == --use_conda* ]];then + use_conda=`echo ${para#*=}` + fi +done + +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be config" + exit 1 +fi +if [[ $use_conda == "1" ]];then + echo "======start use_conda======" + source activate python3.7.5 +fi + +##########################执行训练######################### +start_time=$(date +%s) +cd $cur_path/../models/research +if [ -f ${pipeline_config}.bak ];then + cp ${pipeline_config}.bak ${pipeline_config} +else + cp ${pipeline_config} ${pipeline_config}.bak +fi + +#sed -i "s%/checkpoints%${ckpt_path}%p" ${pipeline_config} +sed -i "s%/data/coco2017_tfrecords%${data_path}/coco2017_tfrecords%p" ${pipeline_config} + +for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); + do + echo "Device ID: $ASCEND_DEVICE_ID" + export RANK_ID=$RANK_ID + if [ -d $cur_path/output/${ASCEND_DEVICE_ID} ];then + rm -rf $cur_path/output/${ASCEND_DEVICE_ID} + mkdir -p $cur_path/output/${ASCEND_DEVICE_ID} + else + mkdir -p $cur_path/output/${ASCEND_DEVICE_ID} + fi + +#训练执行脚本,需要根据网络修改 + nohup python3 -u ./object_detection/model_main_rt.py \ + --pipeline_config_path=${pipeline_config} \ + --model_dir=$cur_path/output/${ASCEND_DEVICE_ID}/npu_ckpt_mobilenetv1_fpn_${RANK_SIZE}p \ + --data_path=${data_path} \ + --overflow_dump_path=${overflow_dump_path} \ + --step_dump_path=${step_dump_path} \ + --alsologtostder \ + --amp \ + --num_train_steps=${num_train_steps} \ + --skip_eval=True \ + "${@:1}" > $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & +done +wait + +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +################################性能结果处理######################### +echo "-----------------------Final result------------------------" +#性能FPS计算,需要根据网络修改 +FPS=`grep -a 'INFO:tensorflow:global_step/sec: ' $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk '{print $2}'|tail -2|head -1` + +FPS=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'*'${FPS}'}'` +echo "Final Performance images/sec : $FPS" +################################精度结果处理######################### +#精度计算,需要根据网络修改 +cp -r ${ckpt_path} $cur_path/output/$ASCEND_DEVICE_ID +train_accuracy=`grep Precision $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep Average |awk 'NR==1 {print $13}'` + +#echo 'Final Training Accuracy mAP: $train_accuracy' +################################E2E训练时长########################## +echo "Final Training Duration sec : $e2e_time" + +################################性能看护############################# +DeviceType=`uname -m` +CaseName=${Network}_bs${batch_size}_${RANK_SIZE}'p'_'RT2'_'perf' +ActualFPS=${FPS} +TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型修改 +grep INFO:tensorflow:loss $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk '{print $3}'|sed 's/,//g'|sed '/^$/d' >> $cur_path/output/${ASCEND_DEVICE_ID}/train_${CaseName}_loss.txt + +ActualLoss=`awk 'END {print}' $cur_path/output/${ASCEND_DEVICE_ID}/train_${CaseName}_loss.txt` +echo "Network = ${Network}" > $cur_path/output/${ASCEND_DEVICE_ID}/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/${ASCEND_DEVICE_ID}/${CaseName}.log +echo "BatchSize = ${batch_size}" >> $cur_path/output/${ASCEND_DEVICE_ID}/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/${ASCEND_DEVICE_ID}/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/${ASCEND_DEVICE_ID}/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/${ASCEND_DEVICE_ID}/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/${ASCEND_DEVICE_ID}/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/${ASCEND_DEVICE_ID}/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/${ASCEND_DEVICE_ID}/${CaseName}.log +conda deactivate diff --git a/TensorFlow/built-in/cv/detection/SSD-Resnet50V1-FPN_ID1463_for_TensorFlow/test/train_RT2_performance_8p.sh b/TensorFlow/built-in/cv/detection/SSD-Resnet50V1-FPN_ID1463_for_TensorFlow/test/train_RT2_performance_8p.sh new file mode 100644 index 000000000..07a8a8984 --- /dev/null +++ b/TensorFlow/built-in/cv/detection/SSD-Resnet50V1-FPN_ID1463_for_TensorFlow/test/train_RT2_performance_8p.sh @@ -0,0 +1,205 @@ +#!bin/bash +cur_path=`pwd` +export PYTHONPATH=$cur_path/../models/research:$cur_path/../models/research/slim:$PYTHONPATH +#集合通信 +export RANK_SIZE=8 +export RANK_TABLE_FILE=$cur_path/../configs/${RANK_SIZE}p.json +export JOB_ID=10087 +RANK_ID_START=0 +ASCEND_DEVICE_ID_START=0 + +#使能RT2.0 +export ENABLE_RUNTIME_V2=1 + +#数据集参数 +data_path="" +use_conda=0 +#训练参数,需要根据模型修改 +Network="SSD-Resnet50V1-FPN_ID1463_for_TensorFlow" +num_train_steps=1000 +batch_size=32 +ckpt_path=/checkpoints +pipeline_config=$cur_path/../models/research/configs/ssd320_full_8gpus.config + +#维测参数 +overflow_dump=False +overflow_dump_path=$cur_path/output/overflow_dump +step_dump=False +step_dump_path=$cur_path/output/step_dump +check_loss_scale=Flase + +#帮助提示,需要根据网络修改 +if [[ $1 == --help || $1 == -h ]];then + echo "usage: ./train_performance_4p.sh " + + echo "" + echo "parameter explain: + --num_train_steps training steps + --data_path source data of training + --ckpt_path pre-checkpoint path + --pipeline_config pipeline config path + --overflow_dump overflow detection,default is False + --overflow_dump_path overflow dump path + --check_loss_scale check whether loss scale is valid, default is False + --step_dump Dump step data, default is False, can only set when overflow_dump is False + --step_dump_path step_dump_path + -h/--help Show help message + " + exit 1 +fi + +#入参设置,需要根据网络修改 +for para in $* +do + if [[ $para == --num_train_steps* ]];then + num_train_steps=`echo ${para#*=}` + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --bind_core* ]]; then + bind_core=`echo ${para#*=}` + name_bind="_bindcore" + elif [[ $para == --ckpt_path* ]];then + ckpt_path=`echo ${para#*=}` + elif [[ $para == --pipeline_config* ]];then + pipeline_config=`echo ${para#*=}` + elif [[ $para == --overflow_dump* ]];then + overflow_dump=`echo ${para#*=}` + if [ -d ${overflow_dump_path} ];then + echo "overflow dump path: ${overflow_dump_path}" + else + mkdir -p ${overflow_dump_path} + fi + elif [[ $para == --check_loss_scale* ]];then + check_loss_scale=`echo ${para#*=}` + elif [[ $para == --step_dump* ]];then + step_dump=`echo ${para#*=}` + if [ -d ${step_dump_path} ];then + echo "step dump path: ${step_dump_path}" + else + mkdir -p ${step_dump_path} + fi + elif [[ $para == --use_conda* ]];then + use_conda=`echo ${para#*=}` + fi +done + +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be config" + exit 1 +fi + + + +##########################执行训练######################### +start_time=$(date +%s) +cd $cur_path/../models/research +if [ -f ${pipeline_config}.bak ];then + cp ${pipeline_config}.bak ${pipeline_config} +else + cp ${pipeline_config} ${pipeline_config}.bak +fi + +sed -i "s%/checkpoints%${ckpt_path}%p" ${pipeline_config} +sed -i "s%/data/coco2017_tfrecords%${data_path}/coco2017_tfrecords%p" ${pipeline_config} + +for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); + do + export RANK_ID=$RANK_ID + export ASCEND_DEVICE_ID=$((ASCEND_DEVICE_ID_START+RANK_ID)) + echo "Device ID: $ASCEND_DEVICE_ID" + if [ -d $cur_path/output/${ASCEND_DEVICE_ID} ];then + rm -rf $cur_path/output/${ASCEND_DEVICE_ID} + mkdir -p $cur_path/output/${ASCEND_DEVICE_ID} + else + mkdir -p $cur_path/output/${ASCEND_DEVICE_ID} + fi + +#训练执行脚本,需要根据网络修改 + corenum=`cat /proc/cpuinfo |grep 'processor' |wc -l` + let a=RANK_ID*${corenum}/8 + let b=RANK_ID+1 + let c=b*${corenum}/8-1 + if [ "x${bind_core}" != x ];then + bind_core="taskset -c $a-$c" + fi + nohup ${bind_core} python3 -u ./object_detection/model_main_rt.py \ + --pipeline_config_path=${pipeline_config} \ + --model_dir=$cur_path/output/${ASCEND_DEVICE_ID_START} \ + --data_path=${data_path} \ + --overflow_dump_path=${overflow_dump_path} \ + --step_dump_path=${step_dump_path} \ + --alsologtostder \ + --amp \ + --skip_eval=True \ + --num_train_steps=${num_train_steps} \ + "${@:1}" > $cur_path/output/$ASCEND_DEVICE_ID/train_${ASCEND_DEVICE_ID}.log 2>&1 & +done +wait + +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +ASCEND_DEVICE_ID=0 + + +##########################业务日志######################### +grep ERROR $HOME/ascend/log/plog/*.log > $cur_path/output/${ASCEND_DEVICE_ID}/plog_err.log + +################################性能结果处理######################### +echo "-----------------------Final result------------------------" +#性能FPS计算,需要根据网络修改 +#FPS=`grep -a 'INFO:tensorflow:global_step/sec: ' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $2}'` + +FPS=`grep -a 'INFO:tensorflow:global_step/sec: ' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'NR>2{print line}{line=$0}'|awk '{print $2}'|awk '{sum+=$1} END {print sum/NR}'` +FPS=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'*'${FPS}'*'${RANK_SIZE}'}'` +echo "Final Performance images/sec : $FPS" +################################精度结果处理######################### +#精度计算,需要根据网络修改 +train_accuracy=`grep Precision $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|grep Average |awk 'NR==1 {print $13}'` + +#echo 'Final Training Accuracy mAP: $train_accuracy' +################################E2E训练时长########################## +echo "Final Training Duration sec : $e2e_time" + +################################性能看护############################# +DeviceType=`uname -m` +CaseName=${Network}${name_bind}_bs${batch_size}_${RANK_SIZE}'p'_'RT2'_'perf' +ActualFPS=${FPS} +TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型修改 +grep INFO:tensorflow:loss $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk '{print $3}'|sed 's/,//g'|sed '/^$/d' >> $cur_path/output/${ASCEND_DEVICE_ID}/train_${CaseName}_loss.txt + +ActualLoss=`awk 'END {print}' $cur_path/output/${ASCEND_DEVICE_ID}/train_${CaseName}_loss.txt` +echo "Network = ${Network}" > $cur_path/output/${ASCEND_DEVICE_ID}/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/${ASCEND_DEVICE_ID}/${CaseName}.log +echo "BatchSize = ${batch_size}" >> $cur_path/output/${ASCEND_DEVICE_ID}/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/${ASCEND_DEVICE_ID}/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/${ASCEND_DEVICE_ID}/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/${ASCEND_DEVICE_ID}/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/${ASCEND_DEVICE_ID}/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/${ASCEND_DEVICE_ID}/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/${ASCEND_DEVICE_ID}/${CaseName}.log + +#eval版本需求开发中,精度结果临时看护最终的loss +echo "Final Training Accuracy loss: $ActualLoss" + +##获取错误信息 +#系统错误消息 +#error_msg="CanonicalizeShape failed, node:Postprocessor/BatchMultiClassNonMaxSuppression/MultiClassNonMaxSuppression/non_max_suppression/NonMaxSuppressionV3" +error_msg="9999: Inner Error" + +#判断错误信息是否和历史版本一致 +Status=`grep "${error_msg}" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | wc -l` + +#失败阶段 +ModelStatus="图执行FAIL" + +#DTS单号 +#DTS_Number="DTS202105130LVO7FP0J00,DTS202105130O6E1SP1400" +DTS_Number="DTS202105200RLRJ1P1300" + +echo "ModelStatus = ${ModelStatus}" >> $cur_path/output/${ASCEND_DEVICE_ID}/${CaseName}.log +echo "DTS_Number = ${DTS_Number}" >> $cur_path/output/${ASCEND_DEVICE_ID}/${CaseName}.log +echo "Status = ${Status}" >> $cur_path/output/${ASCEND_DEVICE_ID}/${CaseName}.log +echo "error_msg = ${error_msg}" >> $cur_path/output/${ASCEND_DEVICE_ID}/${CaseName}.log diff --git a/TensorFlow/built-in/cv/detection/SSD-Resnet50V1-FPN_ID1463_for_TensorFlow/test/train_RT2_performance_8p_mobilenetv1_fpn.sh b/TensorFlow/built-in/cv/detection/SSD-Resnet50V1-FPN_ID1463_for_TensorFlow/test/train_RT2_performance_8p_mobilenetv1_fpn.sh new file mode 100644 index 000000000..e914e7250 --- /dev/null +++ b/TensorFlow/built-in/cv/detection/SSD-Resnet50V1-FPN_ID1463_for_TensorFlow/test/train_RT2_performance_8p_mobilenetv1_fpn.sh @@ -0,0 +1,150 @@ +#!/bin/bash +cur_path=`pwd` +export PYTHONPATH=$cur_path/../models/research:$cur_path/../models/research/slim:$PYTHONPATH +export HCCL_CONNECT_TIMEOUT=300 +#集合通信 +export RANK_SIZE=8 +export RANK_TABLE_FILE=$cur_path/../configs/${RANK_SIZE}p.json +export JOB_ID=10087 +RANK_ID_START=0 +ASCEND_DEVICE_ID_START=0 + +#使能RT2.0 +export ENABLE_RUNTIME_V2=1 + +#数据集参数 +data_path="/data" +use_conda=0 + +#训练参数,需要根据模型修改 +Network="SSD-MobilenetV1-FPN_ID1459_for_TensorFlow" +num_train_steps=800 +batch_size=16 +ckpt_path=/checkpoints +pipeline_config=$cur_path/../models/research/configs/ssd_mobilenet_v1_fpn_shared_box_predictor_640x640_coco14_sync_8gpus.config + +#帮助提示,需要根据网络修改 +if [[ $1 == --help || $1 == -h ]];then + echo "usage: ./train_performance_8p.sh " + + echo "" + echo "parameter explain: + --num_train_steps training steps + --data_path source data of training + --ckpt_path pre-checkpoint path + --pipeline_config pipeline config path + --skip_eval whether to skip eval + -h/--help Show help message + " + exit 1 +fi + +#入参设置,需要根据网络修改 +for para in $* +do + if [[ $para == --num_train_steps* ]];then + num_train_steps=`echo ${para#*=}` + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --ckpt_path* ]];then + ckpt_path=`echo ${para#*=}` + elif [[ $para == --pipeline_config* ]];then + pipeline_config=`echo ${para#*=}` + elif [[ $para == --use_conda* ]];then + use_conda=`echo ${para#*=}` + elif [[ $para == --skip_eval* ]];then + skip_eval=`echo ${para#*=}` + fi +done + +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be config" + exit 1 +fi +if [[ $use_conda == "1" ]];then + echo "======start use_conda======" + source activate python3.7.5 +fi + + +##########################执行训练######################### +start_time=$(date +%s) +cd $cur_path/../models/research +if [ -f ${pipeline_config}.bak ];then + cp ${pipeline_config}.bak ${pipeline_config} +else + cp ${pipeline_config} ${pipeline_config}.bak +fi + +#sed -i "s%/checkpoints%${ckpt_path}%p" ${pipeline_config} +sed -i "s%/data/coco2017_tfrecords%${data_path}/coco2017_tfrecords%p" ${pipeline_config} + +for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); + do + export RANK_ID=$RANK_ID + export ASCEND_DEVICE_ID=$((ASCEND_DEVICE_ID_START+RANK_ID)) + echo "Device ID: $ASCEND_DEVICE_ID" + if [ -d $cur_path/output/${ASCEND_DEVICE_ID} ];then + rm -rf $cur_path/output/${ASCEND_DEVICE_ID} + mkdir -p $cur_path/output/${ASCEND_DEVICE_ID} + else + mkdir -p $cur_path/output/${ASCEND_DEVICE_ID} + fi + +#训练执行脚本,需要根据网络修改 + nohup python3 -u ./object_detection/model_main_rt.py \ + --pipeline_config_path=${pipeline_config} \ + --model_dir=$cur_path/output/${ASCEND_DEVICE_ID}/npu_ckpt_mobilenetv1_fpn_${RANK_SIZE}p \ + --data_path=${data_path} \ + --alsologtostder \ + --amp \ + --num_train_steps=${num_train_steps} \ + --skip_eval=True \ + "${@:1}" > $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & +done +wait + +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) +ASCEND_DEVICE_ID=0 + + +##########################业务日志######################### +grep ERROR $HOME/ascend/log/plog/*.log > $cur_path/output/${ASCEND_DEVICE_ID}/plog_err.log + +################################性能结果处理######################### +echo "-----------------------Final result------------------------" +#性能FPS计算,需要根据网络修改 +FPS=`grep -a 'INFO:tensorflow:global_step/sec: ' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk '{print $2}'|tail -2|head -1` + +FPS=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'*'${FPS}'*'${RANK_SIZE}'}'` +echo "Final Performance images/sec : $FPS" +################################精度结果处理######################### +#精度计算,需要根据网络修改 +train_accuracy=`grep Precision $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|grep Average |awk 'NR==1 {print $13}'` + +#echo 'Final Training Accuracy mAP: $train_accuracy' +################################E2E训练时长########################## +echo "Final Training Duration sec : $e2e_time" + +################################性能看护############################# +DeviceType=`uname -m` +CaseName=${Network}_bs${batch_size}_${RANK_SIZE}'p'_'RT2'_'perf' +ActualFPS=${FPS} +TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型修改 +grep INFO:tensorflow:loss $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk '{print $3}'|sed 's/,//g'|sed '/^$/d' >> $cur_path/output/7/train_${CaseName}_loss.txt + +ActualLoss=`awk 'END {print}' $cur_path/output/7/train_${CaseName}_loss.txt` +echo "Network = ${Network}" > $cur_path/output/7/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/7/${CaseName}.log +echo "BatchSize = ${batch_size}" >> $cur_path/output/7/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/7/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/7/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/7/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/7/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/7/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/7/${CaseName}.log + +conda deactivate -- Gitee From d08db262d68ed81fd05334396a4c5f82ab96a23d Mon Sep 17 00:00:00 2001 From: ykxia Date: Fri, 11 Nov 2022 11:43:25 +0800 Subject: [PATCH 5/9] =?UTF-8?q?2D=5FUnet=5FID2337=5Ffor=5FTensorFlow?= =?UTF-8?q?=E9=80=82=E9=85=8DRT2.0+=E4=BA=8C=E8=BF=9B=E5=88=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../2D_Unet_ID2337_for_TensorFlow/main_rt.py | 74 +++++++++++ .../test/train_RT2_performance_1p.sh | 115 ++++++++++++++++++ 2 files changed, 189 insertions(+) create mode 100644 TensorFlow/built-in/cv/image_segmentation/2D_Unet_ID2337_for_TensorFlow/main_rt.py create mode 100644 TensorFlow/built-in/cv/image_segmentation/2D_Unet_ID2337_for_TensorFlow/test/train_RT2_performance_1p.sh diff --git a/TensorFlow/built-in/cv/image_segmentation/2D_Unet_ID2337_for_TensorFlow/main_rt.py b/TensorFlow/built-in/cv/image_segmentation/2D_Unet_ID2337_for_TensorFlow/main_rt.py new file mode 100644 index 000000000..0ec68d05f --- /dev/null +++ b/TensorFlow/built-in/cv/image_segmentation/2D_Unet_ID2337_for_TensorFlow/main_rt.py @@ -0,0 +1,74 @@ +# +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from npu_bridge.npu_init import * +from model import * +from data import * +import argparse + +#os.environ["CUDA_VISIBLE_DEVICES"] = "0" + +parser = argparse.ArgumentParser() +parser.add_argument('--epochs', default=5, type=int) +parser.add_argument('--train_data_path', default='./data/membrane/train') +parser.add_argument('--test_data_path', default='./data/membrane/test') +parser.add_argument('--predict_data_path', default='./data/membrane/predict') +args = parser.parse_args() + +def main(): + data_gen_args = dict(rotation_range=0.2, + width_shift_range=0.05, + height_shift_range=0.05, + shear_range=0.05, + zoom_range=0.05, + horizontal_flip=True, + fill_mode='nearest') + myGene = trainGenerator(2,args.train_data_path,'image','label',data_gen_args,save_to_dir = None) + + model = unet() + model_checkpoint = ModelCheckpoint('unet_membrane.hdf5', monitor='loss',verbose=2, save_best_only=True) + model.fit_generator(myGene,steps_per_epoch=300,epochs=args.epochs,callbacks=[model_checkpoint]) + + testGene = testGenerator(args.test_data_path) + results = model.predict_generator(testGene,30,verbose=2) + saveResult(args.predict_data_path,results) + +if __name__ == '__main__': + # ***** npu modify begin ***** + global_config = tf.ConfigProto(log_device_placement=False) + custom_op = global_config.graph_options.rewrite_options.custom_optimizers.add() + custom_op.name = "NpuOptimizer" + custom_op.parameter_map["precision_mode"].s = tf.compat.as_bytes("allow_mix_precision") + # custom_op.parameter_map["dynamic_input"].b = 1 + # custom_op.parameter_map["dynamic_graph_execute_mode"].s = tf.compat.as_bytes("lazy_recompile") + npu_keras_sess = set_keras_session_npu_config(config=global_config) + # ***** npu modify end ***** + main() + close_session(npu_keras_sess) + diff --git a/TensorFlow/built-in/cv/image_segmentation/2D_Unet_ID2337_for_TensorFlow/test/train_RT2_performance_1p.sh b/TensorFlow/built-in/cv/image_segmentation/2D_Unet_ID2337_for_TensorFlow/test/train_RT2_performance_1p.sh new file mode 100644 index 000000000..73c514e23 --- /dev/null +++ b/TensorFlow/built-in/cv/image_segmentation/2D_Unet_ID2337_for_TensorFlow/test/train_RT2_performance_1p.sh @@ -0,0 +1,115 @@ +#!/bin/bash +cur_path=`pwd`/../ + +#基础参数,需要模型审视修改 +#Batch Size +batch_size=2 +#网络名称,同目录名称 +Network="2D_Unet_ID2337_for_TensorFlow" +#Device数量,单卡默认为1 +RANK_SIZE=1 +export RANK_SIZE=1 +#训练epoch,可选 +train_epochs=5 +#训练step +train_steps= +#学习率 +learning_rate= + +#使能RT2.0 +export ENABLE_RUNTIME_V2=1 + +#参数配置 +data_path="" + +if [[ $1 == --help || $1 == --h ]];then + echo "usage:./train_performance_1p.sh --data_path=./datasets" + exit 1 +fi + +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + fi +done + +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path \" must be config" + exit 1 +fi + +##############执行训练########## +cd $cur_path +if [ -d $cur_path/test/output ];then + rm -rf $cur_path/test/output/* + mkdir -p $cur_path/test/output/$ASCEND_DEVICE_ID +else + mkdir -p $cur_path/test/output/$ASCEND_DEVICE_ID +fi +wait + +mkdir -p ${cur_path}/test/predict +start=$(date +%s) +python3 main_rt.py --epochs=${train_epochs} \ + --train_data_path=${data_path}/data/membrane/train \ + --test_data_path=${data_path}/data/membrane/test \ + --predict_data_path=${cur_path}/test/predict > $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & +wait + +end=$(date +%s) +e2e_time=$(( $end - $start )) + +#echo "Final Performance ms/step : $average_perf" +echo "Final Training Duration sec : $e2e_time" + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +TrainingTime=`grep "300/300" $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk 'END {print $5}'|cut -d 's' -f -1` +if echo "${TrainingTime}" | grep -q -E 'm$' +then + TrainingTime=`grep "300/300" $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk 'END {print $5}' |cut -d 'm' -f -1` + FPS=`awk 'BEGIN{printf "%.2f\n",'1000'*'${batch_size}'/'${TrainingTime}'}'` +else + FPS=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'/'${TrainingTime}'}'` +fi +#FPS=`awk 'BEGIN{printf "%.2f\n",'1000'*'${batch_size}'/'${TrainingTime}'}'` +#FPS=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'/'${TrainingTime}'}'` +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +train_accuracy=`grep "300/300" $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk 'END {print $11}'` +#打印,不需要修改 +echo "Final Train Accuracy : ${train_accuracy}" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'RT2'_'perf' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +#TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${BatchSize}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep "300/300" $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk '{print $8}' > $cur_path/test/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print $1}' $cur_path/test/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -- Gitee From d113ff4f3bb6d60d0067c5fcd38459333acd49a1 Mon Sep 17 00:00:00 2001 From: ykxia Date: Fri, 11 Nov 2022 13:00:16 +0800 Subject: [PATCH 6/9] =?UTF-8?q?InceptionV2=5FID0670=5Ffor=5FTensorFlow?= =?UTF-8?q?=E9=80=82=E9=85=8DRT2.0+=E4=BA=8C=E8=BF=9B=E5=88=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../test/train_RT2_performance_1p.sh | 91 +++++++++ .../test/train_RT2_performance_8p.sh | 192 ++++++++++++++++++ 2 files changed, 283 insertions(+) create mode 100644 TensorFlow/built-in/cv/image_classification/InceptionV2_ID0670_for_TensorFlow/test/train_RT2_performance_1p.sh create mode 100644 TensorFlow/built-in/cv/image_classification/InceptionV2_ID0670_for_TensorFlow/test/train_RT2_performance_8p.sh diff --git a/TensorFlow/built-in/cv/image_classification/InceptionV2_ID0670_for_TensorFlow/test/train_RT2_performance_1p.sh b/TensorFlow/built-in/cv/image_classification/InceptionV2_ID0670_for_TensorFlow/test/train_RT2_performance_1p.sh new file mode 100644 index 000000000..d56cff5f4 --- /dev/null +++ b/TensorFlow/built-in/cv/image_classification/InceptionV2_ID0670_for_TensorFlow/test/train_RT2_performance_1p.sh @@ -0,0 +1,91 @@ +#!/bin/bash +cur_path=`pwd`/../ +export JOB_ID=10087 +export RANK_SIZE=1 +#基础参数,需要模型审视修改 +#Batch Size +batch_size=128 +#网络名称,同目录名称 +Network="InceptionV2_ID0670_for_TensorFlow" +#Device数量,单卡默认为1 +RankSize=1 +#参数配置 +data_path="../VCTK-Corpus" + +#使能RT2.0 +export ENABLE_RUNTIME_V2=1 + +if [[ $1 == --help || $1 == --h ]];then + echo "usage:./train_performance_1p.sh --data_path=../VCTK-Corpus" + exit 1 +fi +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + fi +done +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be config" + exit 1 +fi +##############执行训练########## +cd $cur_path +if [ -d $cur_path/test/output ];then + rm -rf $cur_path/test/output/* + mkdir -p $cur_path/test/output/$ASCEND_DEVICE_ID +else + mkdir -p $cur_path/test/output/$ASCEND_DEVICE_ID +fi +wait + + +start=$(date +%s) +nohup python3.7 $cur_path/train.py --rank_size=1 \ + --mode=train \ + --max_epochs=1 \ + --iterations_per_loop=10 \ + --data_dir=${data_path} \ + --batch_size=${batch_size} \ + --lr=0.045 \ + --display_every=100 \ + --log_dir=$cur_path/test/output/model \ + --eval_dir=$cur_path/test/output/model \ + --log_name=inception_v2.log > $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & +wait +end=$(date +%s) +e2e_time=$(( $end - $start )) + + +#输出性能FPS,需要模型审视修改 +FPS=`grep "epoch" $cur_path/test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|sed "1d" |awk -F 'FPS: ' '{print $2}'|awk -F " " '{print $1}'|awk '{sum+=$1} END {print "AVG",sum/NR}'|awk -F " " '{print $2}'` +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" +echo "E2E Training Duration sec : $e2e_time" +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'RT2'_'perf' +train_accuracy="None" +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN {printf "%.2f\n",'${batch_size}'*1000/'${ActualFPS}'}'` +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep "epoch" $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F "loss: " '{print $2}'|awk -F " " '{print $1}' > $cur_path/test/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' $cur_path/test/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log diff --git a/TensorFlow/built-in/cv/image_classification/InceptionV2_ID0670_for_TensorFlow/test/train_RT2_performance_8p.sh b/TensorFlow/built-in/cv/image_classification/InceptionV2_ID0670_for_TensorFlow/test/train_RT2_performance_8p.sh new file mode 100644 index 000000000..07db92a79 --- /dev/null +++ b/TensorFlow/built-in/cv/image_classification/InceptionV2_ID0670_for_TensorFlow/test/train_RT2_performance_8p.sh @@ -0,0 +1,192 @@ +#!/bin/bash +source env.sh +#当前路径,不需要修改 +cur_path=`pwd` + +#集合通信参数,不需要修改 +#保证rank table file 文件rank_table_8p.json存放在和test同级的configs目录下 +export RANK_SIZE=8 +export RANK_TABLE_FILE=${cur_path}/8p.json +export JOB_ID=10087 +RANK_ID_START=0 + +#使能RT2.0 +export ENABLE_RUNTIME_V2=1 + +# 数据集路径,保持为空,不需要修改 +data_path="/npu/traindata/imagenet_TF" + +#设置默认日志级别,不需要修改 +export ASCEND_GLOBAL_LOG_LEVEL=3 + +#基础参数 需要模型审视修改 +#网络名称,同目录名称 +Network="InceptionV2_for_TensorFlow" +#训练epoch +train_epochs=1 +#训练batch_size +batch_size=256 +#训练step +train_steps=`expr 1281167 / ${batch_size}` +#学习率 +learning_rate="" + +#TF2.X独有,不需要修改 +export NPU_LOOP_SIZE=${train_steps} + +#维测参数,precision_mode需要模型审视修改 +precision_mode="allow_mix_precision" +#维持参数,以下不需要修改 +over_dump=False +data_dump_flag=False +data_dump_step="10" +profiling=False +autotune=False + +# 帮助信息,不需要修改 +if [[ $1 == --help || $1 == -h ]];then + echo"usage:./train_full_8p.sh " + echo " " + echo "parameter explain: + --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision) + --over_dump if or not over detection, default is False + --data_dump_flag data dump flag, default is 0 + --data_dump_step data dump step, default is 10 + --profiling if or not profiling for performance debug, default is False + --autotune whether to enable autotune, default is False + --data_path source data of training + -h/--help show help message + " + exit 1 +fi + +#参数校验,不需要修改 +for para in $* +do + if [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` + elif [[ $para == --over_dump* ]];then + over_dump=`echo ${para#*=}` + over_dump_path=${cur_path}/output/overflow_dump + mkdir -p ${over_dump_path} + elif [[ $para == --data_dump_flag* ]];then + data_dump_flag=`echo ${para#*=}` + data_dump_path=${cur_path}/output/data_dump + mkdir -p ${data_dump_path} + elif [[ $para == --data_dump_step* ]];then + data_dump_step=`echo ${para#*=}` + elif [[ $para == --profiling* ]];then + profiling=`echo ${para#*=}` + profiling_dump_path=${cur_path}/output/profiling + mkdir -p ${profiling_dump_path} + elif [[ $para == --autotune* ]];then + autotune=`echo ${para#*=}` + mv $install_path/fwkacllib/data/rl/Ascend910/custom $install_path/fwkacllib/data/rl/Ascend910/custom_bak + mv $install_path/fwkacllib/data/tiling/Ascend910/custom $install_path/fwkacllib/data/tiling/Ascend910/custom_bak + autotune_dump_path=${cur_path}/output/autotune_dump + mkdir -p ${autotune_dump_path}/GA + mkdir -p ${autotune_dump_path}/rl + cp -rf $install_path/fwkacllib/data/tiling/Ascend910/custom ${autotune_dump_path}/GA/ + cp -rf $install_path/fwkacllib/data/rl/Ascend910/custom ${autotune_dump_path}/RL/ + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + fi +done + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +#autotune时,先开启autotune执行单P训练,不需要修改 +if [[ $autotune == True ]]; then + train_full_1p.sh --autotune=$autotune --data_path=$data_path + wait + autotune=False +fi + +#训练开始时间,不需要修改 +start_time=$(date +%s) + +#进入训练脚本目录,需要模型审视修改 +cd $cur_path/../ +for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); +do + #设置环境变量,不需要修改 + echo "Device ID: $RANK_ID" + export RANK_ID=$RANK_ID + export ASCEND_DEVICE_ID=$RANK_ID + ASCEND_DEVICE_ID=$RANK_ID + + #创建DeviceID输出目录,不需要修改 + if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID} + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + else + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + fi + + + #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 + #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path,--data_dump_flag,--data_dump_step,--data_dump_path,--profiling,--profiling_dump_path + nohup python3.7 $cur_path/../train.py --rank_size=8 \ + --mode=train_and_evaluate \ + --max_epochs=100 \ + --T_max=100 \ + --iterations_per_loop=10 \ + --batch_size=64 \ + --display_every=100 \ + --data_dir=${data_path} \ + --lr=0.045 \ + --log_dir=./model \ + --eval_dir=./model \ + --log_name=inception_v2.log > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & +done +wait + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +FPS=`grep -a 'FPS' $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk 'END {print $6}'| awk -F "." '{print $1}'` +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +train_accuracy=`grep -A 1 top1 $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $3}'` +#打印,不需要修改 +echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#稳定性精度看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'RT2'_'perf' + +##获取性能数据 +#吞吐量,不需要修改 +ActualFPS=${FPS} +#单迭代训练时长,不需要修改 +TrainingTime=`expr ${batch_size} \* ${RANK_SIZE} \* 1000 \/ ${FPS}` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep loss $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log | grep -v top1 | awk -F " " '{print $(NF-3)}' >> $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -- Gitee From c85277dc3d9656a1c5046b4a7dd1ddd9ffeae2c9 Mon Sep 17 00:00:00 2001 From: ykxia Date: Fri, 11 Nov 2022 13:08:29 +0800 Subject: [PATCH 7/9] =?UTF-8?q?CNN-CTC=5FID0683=5Ffor=5FTensorFlow?= =?UTF-8?q?=E9=80=82=E9=85=8DRT2.0+=E4=BA=8C=E8=BF=9B=E5=88=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- TensorFlow/built-in/nlp/CNN-CTC_ID0683_for_TensorFlow/main_rt.py | 1 - 1 file changed, 1 deletion(-) diff --git a/TensorFlow/built-in/nlp/CNN-CTC_ID0683_for_TensorFlow/main_rt.py b/TensorFlow/built-in/nlp/CNN-CTC_ID0683_for_TensorFlow/main_rt.py index 2e81c1ac1..9f92b8446 100644 --- a/TensorFlow/built-in/nlp/CNN-CTC_ID0683_for_TensorFlow/main_rt.py +++ b/TensorFlow/built-in/nlp/CNN-CTC_ID0683_for_TensorFlow/main_rt.py @@ -79,7 +79,6 @@ def train(train_dir=None, val_dir=None, mode='train'): custom_op.parameter_map["precision_mode"].s = tf.compat.as_bytes("allow_mix_precision") #custom_op.parameter_map["dynamic_input"].b = 1 #custom_op.parameter_map["dynamic_graph_execute_mode"].s = tf.compat.as_bytes("lazy_recompile") - custom_op.parameter_map["jit_compile"].b = False global_config.graph_options.rewrite_options.remapping = RewriterConfig.OFF global_config.graph_options.rewrite_options.memory_optimization = RewriterConfig.OFF # ***** npu modify end ****** -- Gitee From 4498957f399605951771f92832b9b4999b22aa79 Mon Sep 17 00:00:00 2001 From: ykxia Date: Fri, 11 Nov 2022 14:20:15 +0800 Subject: [PATCH 8/9] =?UTF-8?q?Siamese=5FID0506=5Ffor=5FTensorFlow?= =?UTF-8?q?=E9=80=82=E9=85=8DRT2.0+=E4=BA=8C=E8=BF=9B=E5=88=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../siamese_network_rt.py | 138 ++++++++ .../test/train_RT2_performance_1p.sh | 119 +++++++ .../test/train_RT2_performance_8p.sh | 243 ++++++++++++++ .../Siamese_ID0506_for_TensorFlow/train_rt.py | 307 ++++++++++++++++++ 4 files changed, 807 insertions(+) create mode 100644 TensorFlow/built-in/nlp/Siamese_ID0506_for_TensorFlow/siamese_network_rt.py create mode 100644 TensorFlow/built-in/nlp/Siamese_ID0506_for_TensorFlow/test/train_RT2_performance_1p.sh create mode 100644 TensorFlow/built-in/nlp/Siamese_ID0506_for_TensorFlow/test/train_RT2_performance_8p.sh create mode 100644 TensorFlow/built-in/nlp/Siamese_ID0506_for_TensorFlow/train_rt.py diff --git a/TensorFlow/built-in/nlp/Siamese_ID0506_for_TensorFlow/siamese_network_rt.py b/TensorFlow/built-in/nlp/Siamese_ID0506_for_TensorFlow/siamese_network_rt.py new file mode 100644 index 000000000..4587cee9a --- /dev/null +++ b/TensorFlow/built-in/nlp/Siamese_ID0506_for_TensorFlow/siamese_network_rt.py @@ -0,0 +1,138 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +from npu_bridge.npu_init import * +import tensorflow as tf +import numpy as np +from npu_bridge.estimator.npu.npu_dynamic_rnn import DynamicRNN + + +class SiameseLSTM(object): + """ + A LSTM based deep Siamese network for text similarity. + Uses an character embedding layer, followed by a biLSTM and Energy Loss layer. + """ + + def BiRNN(self, x, dropout, scope, embedding_size, sequence_length, hidden_units): + n_hidden = hidden_units + n_layers = 3 + # Prepare data shape to match `static_rnn` function requirements + x = tf.unstack(tf.transpose(x, perm=[1, 0, 2])) + print(x) + # Define lstm cells with tensorflow + # Forward direction cell + with tf.name_scope("fw" + scope), tf.variable_scope("fw" + scope): + stacked_rnn_fw = [] + for _ in range(n_layers): + fw_cell = tf.nn.rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0, state_is_tuple=True) + lstm_fw_cell = tf.contrib.rnn.DropoutWrapper(fw_cell, output_keep_prob=dropout) + stacked_rnn_fw.append(lstm_fw_cell) + lstm_fw_cell_m = tf.nn.rnn_cell.MultiRNNCell(cells=stacked_rnn_fw, state_is_tuple=True) + + with tf.name_scope("bw" + scope), tf.variable_scope("bw" + scope): + stacked_rnn_bw = [] + for _ in range(n_layers): + bw_cell = tf.nn.rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0, state_is_tuple=True) + lstm_bw_cell = tf.contrib.rnn.DropoutWrapper(bw_cell, output_keep_prob=dropout) + stacked_rnn_bw.append(lstm_bw_cell) + lstm_bw_cell_m = tf.nn.rnn_cell.MultiRNNCell(cells=stacked_rnn_bw, state_is_tuple=True) + # Get lstm cell output + + with tf.name_scope("bw" + scope), tf.variable_scope("bw" + scope): + outputs, _, _ = tf.nn.static_bidirectional_rnn(lstm_fw_cell_m, lstm_bw_cell_m, x, dtype=tf.float32) + return outputs[-1] + + def BiRNN_npu(self, x, dropout, scope, embedding_size, sequence_length, hidden_units): + n_hidden = hidden_units + # n_layers=3 + # Prepare data shape to match `static_rnn` function requirements + x = tf.transpose(x, perm=[1, 0, 2], name="transpose_inputdata") + print(x) + + # dropout_rate is 1, so not add dropout + with tf.name_scope(scope), tf.variable_scope(scope): + fw_cell1 = DynamicRNN(hidden_size=n_hidden, forget_bias=1.0, dtype=tf.float32) + fw_y1, output_h, output_c, i, j, f, o, tanhc = fw_cell1(x) + bw_cell1 = DynamicRNN(hidden_size=n_hidden, forget_bias=1.0, dtype=tf.float32) + bw_y1, output_h, output_c, i, j, f, o, tanhc = bw_cell1(tf.reverse(x, axis=[0])) + output_rnn1 = tf.concat((fw_y1, tf.reverse(bw_y1, axis=[0])), axis=2) + + fw_cell2 = DynamicRNN(hidden_size=n_hidden, forget_bias=1.0, dtype=tf.float32) + fw_y2, output_h, output_c, i, j, f, o, tanhc = fw_cell2(output_rnn1) + bw_cell2 = DynamicRNN(hidden_size=n_hidden, forget_bias=1.0, dtype=tf.float32) + bw_y2, output_h, output_c, i, j, f, o, tanhc = bw_cell2(tf.reverse(output_rnn1, axis=[0])) + output_rnn2 = tf.concat((fw_y2, tf.reverse(bw_y2, axis=[0])), axis=2) + + fw_cell3 = DynamicRNN(hidden_size=n_hidden, forget_bias=1.0, dtype=tf.float32) + fw_y3, output_h, output_c, i, j, f, o, tanhc = fw_cell3(output_rnn2) + bw_cell3 = DynamicRNN(hidden_size=n_hidden, forget_bias=1.0, dtype=tf.float32) + bw_y3, output_h, output_c, i, j, f, o, tanhc = bw_cell3(tf.reverse(output_rnn2, axis=[0])) + output_rnn3 = tf.concat((fw_y3, tf.reverse(bw_y3, axis=[0])), axis=2) + + outputs = tf.transpose(output_rnn3, perm=[1, 0, 2], name="transpose_outdata") + print(outputs) + + return outputs[:, -1, :] + + def contrastive_loss(self, y, d, batch_size): + tmp = y * tf.square(d) + # tmp= tf.mul(y,tf.square(d)) + tmp2 = (1 - y) * tf.square(tf.maximum((1 - d), 0)) + return tf.reduce_sum(tmp + tmp2) / batch_size / 2 + + def __init__( + self, sequence_length, vocab_size, embedding_size, hidden_units, l2_reg_lambda, batch_size): + + # Placeholders for input, output and dropout + self.input_x1 = tf.placeholder(tf.int32, [None, sequence_length], name="input_x1") + self.input_x2 = tf.placeholder(tf.int32, [None, sequence_length], name="input_x2") + self.input_y = tf.placeholder(tf.float32, [None], name="input_y") + self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob") + + # Keeping track of l2 regularization loss (optional) + l2_loss = tf.constant(0.0, name="l2_loss") + + # Embedding layer + with tf.name_scope("embedding"): + self.W = tf.Variable( + tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0), + trainable=True, name="W") + self.embedded_chars1 = tf.nn.embedding_lookup(self.W, self.input_x1) + # self.embedded_chars_expanded1 = tf.expand_dims(self.embedded_chars1, -1) + self.embedded_chars2 = tf.nn.embedding_lookup(self.W, self.input_x2) + # self.embedded_chars_expanded2 = tf.expand_dims(self.embedded_chars2, -1) + + # Create a convolution + maxpool layer for each filter size + with tf.name_scope("output"): + self.out1 = self.BiRNN_npu(self.embedded_chars1, self.dropout_keep_prob, "side1", embedding_size, + sequence_length, hidden_units) + self.out2 = self.BiRNN_npu(self.embedded_chars2, self.dropout_keep_prob, "side2", embedding_size, + sequence_length, hidden_units) + self.distance = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(self.out1, self.out2)), 1, keep_dims=True)) + self.distance = tf.div(self.distance, + tf.add(tf.sqrt(tf.reduce_sum(tf.square(self.out1), 1, keep_dims=True)), + tf.sqrt(tf.reduce_sum(tf.square(self.out2), 1, keep_dims=True)))) + self.distance = tf.reshape(self.distance, [-1], name="distance") + #self.distance = util.set_graph_exec_config(self.distance, dynamic_input=True, + #dynamic_graph_execute_mode='dynamic_execute', + #dynamic_inputs_shape_range='data:[64,15],[64,15],[64]') + with tf.name_scope("loss"): + self.loss = self.contrastive_loss(self.input_y, self.distance, batch_size) + #### Accuracy computation is outside of this class. + with tf.name_scope("accuracy"): + self.temp_sim = tf.subtract(tf.ones_like(self.distance), tf.rint(self.distance), + name="temp_sim") # auto threshold 0.5 + correct_predictions = tf.equal(self.temp_sim, self.input_y) + self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy") diff --git a/TensorFlow/built-in/nlp/Siamese_ID0506_for_TensorFlow/test/train_RT2_performance_1p.sh b/TensorFlow/built-in/nlp/Siamese_ID0506_for_TensorFlow/test/train_RT2_performance_1p.sh new file mode 100644 index 000000000..2a5cb1c2b --- /dev/null +++ b/TensorFlow/built-in/nlp/Siamese_ID0506_for_TensorFlow/test/train_RT2_performance_1p.sh @@ -0,0 +1,119 @@ +#!/bin/bash +cur_path=`pwd`/../ + + +#设置默认日志级别,不需要修改 +# export ASCEND_GLOBAL_LOG_LEVEL=3 + +#基础参数,需要模型审视修改 +#Batch Size +batch_size=64 +#网络名称,同目录名称 +Network="Siamese_ID0506_for_TensorFlow" +#Device数量,单卡默认为1 +RANK_SIZE=1 +#训练epoch,可选 +train_epochs=10 # init 1 +#训练step +train_steps= +#学习率 +learning_rate=5e-5 +#ASCEND_DEVICE_ID=0 + +#使能RT2.0 +export ENABLE_RUNTIME_V2=1 + +#参数配置 +data_path="" + +if [[ $1 == --help || $1 == --h ]];then + echo "usage:./train_performance_1p.sh" + exit 1 +fi + +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + fi +done + +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path \" must be config" + exit 1 +fi +##############执行训练########## +cd $cur_path + +wait + +if [ -d $cur_path/test/output ];then + rm -rf $cur_path/test/output/* + mkdir -p $cur_path/test/output/$ASCEND_DEVICE_ID +else + mkdir -p $cur_path/test/output/$ASCEND_DEVICE_ID +fi +wait + +start=$(date +%s) +nohup python3 train_rt.py \ + --num_epochs $train_epochs \ + --training_files=$data_path/person_match.train2 \ + --hidden_units=64 \ + --embedding_dim=304 \ + --device_size=$RANK_SIZE \ + --device_id=$ASCEND_DEVICE_ID \ + --evaluate_every=10000 \ + --checkpoint_every=10000 > $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & +wait + +end=$(date +%s) +e2e_time=$(( $end - $start )) + +#echo "Final Performance ms/step : $average_perf" +echo "Final Training Duration sec : $e2e_time" + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +TrainingTime=`grep "TRAIN " $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk 'END {print $6}'` +wait +FPS=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'/'${TrainingTime}'*1000}'` +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +train_accuracy=`grep "TRAIN " $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk 'END {print $10}'` +#打印,不需要修改 +#echo "Final Train Accuracy : ${train_accuracy}" + + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'RT2'_'perf' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${BatchSize}'/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep "TRAIN " $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk '{print $8}' > $cur_path/test/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print $1}' $cur_path/test/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log diff --git a/TensorFlow/built-in/nlp/Siamese_ID0506_for_TensorFlow/test/train_RT2_performance_8p.sh b/TensorFlow/built-in/nlp/Siamese_ID0506_for_TensorFlow/test/train_RT2_performance_8p.sh new file mode 100644 index 000000000..1d56056d7 --- /dev/null +++ b/TensorFlow/built-in/nlp/Siamese_ID0506_for_TensorFlow/test/train_RT2_performance_8p.sh @@ -0,0 +1,243 @@ +#!/bin/bash + +#当前路径,不需要修改 +cur_path=`pwd`/../ + +#集合通信参数,不需要修改 + +export RANK_SIZE=8 +export JOB_ID=10087 +export RANK_ID=8p +RANK_ID_START=0 +export RANK_TABLE_FILE=${cur_path}/test/8p.json +export HCCL_CONNECT_TIMEOUT=600 +RANK_SIZE=8 + +#使能RT2.0 +export ENABLE_RUNTIME_V2=1 + +#export ASCEND_SLOG_PRINT_TO_STDOUT=1 + +# 数据集路径,保持为空,不需要修改 +data_path="" + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="Siamese_ID0506_for_TensorFlow" +#训练epoch +train_epochs=10 #init1 +#训练batch_size +batch_size=64 +# 训练step +train_steps= +# 学习率 +learning_rate=4e-4 + + +#维测参数,precision_mode需要模型审视修改 +#precision_mode="allow_mix_precision" +#维持参数,以下不需要修改 +over_dump=False +data_dump_flag=False +data_dump_step="10" +profiling=False +autotune=False +# 帮助信息,不需要修改 +if [[ $1 == --help || $1 == -h ]];then + echo"usage:./train_performance_1P.sh " + echo " " + echo "parameter explain: + --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision) + --over_dump if or not over detection, default is False + --data_dump_flag data dump flag, default is False + --data_dump_step data dump step, default is 10 + --profiling if or not profiling for performance debug, default is False + --data_path source data of training + -h/--help show help message + " + exit 1 +fi + +#参数校验,不需要修改 +for para in $* +do + if [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` + elif [[ $para == --over_dump* ]];then + over_dump=`echo ${para#*=}` + over_dump_path=${cur_path}/output/overflow_dump + mkdir -p ${over_dump_path} + elif [[ $para == --data_dump_flag* ]];then + data_dump_flag=`echo ${para#*=}` + data_dump_path=${cur_path}/output/data_dump + mkdir -p ${data_dump_path} + elif [[ $para == --data_dump_step* ]];then + data_dump_step=`echo ${para#*=}` + elif [[ $para == --profiling* ]];then + profiling=`echo ${para#*=}` + profiling_dump_path=${cur_path}/output/profiling + mkdir -p ${profiling_dump_path} + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + fi +done + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +#训练开始时间,不需要修改 +start_time=$(date +%s) + +#进入训练脚本目录,需要模型审视修改 +cd $cur_path +#sed -i "s|/scratch/shiyichu/dataset/FaceDatabases/CASIA-Webface/casia_mtcnncaffe_aligned|${data_path}|g" ./data/list_casia_mtcnncaffe_aligned_nooverlap.txt +#for i in 0 1 2 3 4 5 6 7 +for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); +do + #设置环境变量,不需要修改 + echo "Device ID: $RANK_ID" + export RANK_ID=$RANK_ID + export DEVICE_INDEX=$RANK_ID + export ASCEND_DEVICE_ID=$RANK_ID + ASCEND_DEVICE_ID=$RANK_ID + export DEVICE_ID=$ASCEND_DEVICE_ID + echo "Device ID: $ASCEND_DEVICE_ID" + + + #创建DeviceID输出目录,不需要修改 + if [ -d ${cur_path}/test/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${cur_path}/test/output/${ASCEND_DEVICE_ID} + mkdir -p ${cur_path}/test/output/$ASCEND_DEVICE_ID/ckpt + else + mkdir -p ${cur_path}/test/output/$ASCEND_DEVICE_ID/ckpt + fi + + # corenum=`cat /proc/cpuinfo |grep 'processor' | wc -l` + # let a=RANK_ID*${corenum}/8 + # let b=RANK_ID+1 + # let c=b*${corenum}/8-1 + # if [ "x${bind_core}" != x];then + # bind_core="taskset -c $a-$c" + # fi + + # sed -i "s|ind_start = 0 * part_int|ind_start = ${i} * part_int|g" $cur_path/../train.py + + #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 + #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path,--data_dump_flag,--data_dump_step,--data_dump_path,--profiling,--profiling_dump_path + # timeout -s SIGINT 3600 nohup python3 $cur_path/../train_8p.py \ + nohup python3 ${cur_path}/train_rt.py \ + --num_epochs $train_epochs \ + --training_files=$data_path/person_match.train2 \ + --hidden_units=64 \ + --embedding_dim=304 \ + --device_size=8 \ + --device_id=$RANK_ID \ + --evaluate_every=10000 \ + --checkpoint_every=10000 > ${cur_path}/test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & + + # sed -i "s|ind_start = ${i} * part_int|ind_start = 0 * part_int|g" $cur_path/../train.py + + + # sleep 60 + # num=`grep 'ERROR' ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | grep "out of bounds"| wc -l` + # while [ $num -eq 0 ] + # do + # num=`grep 'ERROR' ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | grep "out of bounds"| wc -l` + # echo "${num}" + # sleep 5 + # done + # ps -ef | grep python3 |grep max_epoch |grep config |awk '{system("kill -9 "$2)}' + # echo "killed Yolov4" + +done +wait +#sed -i "s|${data_path}|/scratch/shiyichu/dataset/FaceDatabases/CASIA-Webface/casia_mtcnncaffe_aligned|g" ./data/list_casia_mtcnncaffe_aligned_nooverlap.txt + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) +echo "E2E Training Duration sec : $e2e_time" + +# 结果打印,不需要修改 +TrainingTime=`grep "TRAIN " ${cur_path}/test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $6}'` +FPS=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'/'${TrainingTime}'*1000*'${RANK_SIZE}'}'` +# 打印,不需要修改 +echo "Final Performance images/sec: $FPS" + + +#输出训练精度,需要模型审视修改 +train_accuracy=`grep "TRAIN " ${cur_path}/test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $10}'` + +# 训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'RT2'_'perf' + +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${BatchSize}'/'${FPS}' }'` + +grep "TRAIN " ${cur_path}/test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk '{print $8}' > ${cur_path}/test/output/${ASCEND_DEVICE_ID}/train_${CaseName}_loss.txt + +# 最后一个loss值 +ActualLoss=`awk 'END {print $1}' ${cur_path}/test/output/${ASCEND_DEVICE_ID}/train_${CaseName}_loss.txt` + + + +#关键信息打印到CaseName.log中,此处无需修改 +echo "Network = ${Network}" > $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${batch_size}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log + + + + + + + + + + + + + + +# ###下面字段用于冒烟看护 +# BatchSize=${batch_size} +# #设备类型,自动获取 +# DeviceType=`uname -m` +# # #用例名称,自动获取 +# # CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' + +# ##获取错误信息 +# #系统错误信息 +# error_msg="of dimension 1 out of bounds" +# #判断错误信息是否和历史状态一致,此处无需修改 +# Status=`grep "${error_msg}" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log | wc -l` +# #失败阶段,枚举值图准备FAIL/图拆分FAIL/图优化FAIL/图编译FAIL/图执行FAIL/流程OK +# ModelStatus="图执行FAIL" +# #DTS单号或者issue链接 +# DTS_Number="DTS20211112715497" + +# #关键信息打印到CaseName.log中,此处无需修改 +# echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +# echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +# echo "BatchSize = ${batch_size}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +# echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +# echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +# echo "ModelStatus = ${ModelStatus}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +# echo "DTS_Number = ${DTS_Number}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +# echo "Status = ${Status}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +# echo "error_msg = ${error_msg}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +# echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log diff --git a/TensorFlow/built-in/nlp/Siamese_ID0506_for_TensorFlow/train_rt.py b/TensorFlow/built-in/nlp/Siamese_ID0506_for_TensorFlow/train_rt.py new file mode 100644 index 000000000..82f1a942e --- /dev/null +++ b/TensorFlow/built-in/nlp/Siamese_ID0506_for_TensorFlow/train_rt.py @@ -0,0 +1,307 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +#! /usr/bin/env python + +from npu_bridge.npu_init import * +import tensorflow as tf +import numpy as np +import re +import os +import time +import datetime +import gc +from input_helpers import InputHelper +from siamese_network_rt import SiameseLSTM +from siamese_network_semantic import SiameseLSTMw2v +from tensorflow.contrib import learn +import gzip +from random import random +# Parameters +# ================================================== + +tf.flags.DEFINE_boolean("is_char_based", True, "is character based syntactic similarity. " + "if false then word embedding based semantic similarity is used." + "(default: True)") + +tf.flags.DEFINE_string("word2vec_model", "wiki.simple.vec", "word2vec pre-trained embeddings file (default: None)") +tf.flags.DEFINE_string("word2vec_format", "text", "word2vec pre-trained embeddings file format (bin/text/textgz)(default: None)") + +tf.flags.DEFINE_integer("embedding_dim", 300, "Dimensionality of character embedding (default: 300)") +tf.flags.DEFINE_float("dropout_keep_prob", 1.0, "Dropout keep probability (default: 1.0)") +tf.flags.DEFINE_float("l2_reg_lambda", 0.0, "L2 regularizaion lambda (default: 0.0)") +tf.flags.DEFINE_string("training_files", "person_match.train2", "training file (default: None)") #for sentence semantic similarity use "train_snli.txt" +tf.flags.DEFINE_integer("hidden_units", 50, "Number of hidden units (default:50)") + +# Training parameters +tf.flags.DEFINE_integer("batch_size", 64, "Batch Size (default: 64)") +tf.flags.DEFINE_integer("num_epochs", 300, "Number of training epochs (default: 200)") +tf.flags.DEFINE_integer("evaluate_every", 1000, "Evaluate model on dev set after this many steps (default: 100)") +tf.flags.DEFINE_integer("checkpoint_every", 1000, "Save model after this many steps (default: 100)") +# Misc Parameters +tf.flags.DEFINE_boolean("allow_soft_placement", True, "Allow device soft device placement") +tf.flags.DEFINE_boolean("log_device_placement", False, "Log placement of ops on devices") + + +# rankid +tf.flags.DEFINE_integer("device_size", 1, "device_size: 1p/8p") +tf.flags.DEFINE_integer("device_id", 0, "device_id") + +FLAGS = tf.flags.FLAGS + +print("\nParameters:") +for attr, value in sorted(FLAGS.__flags.items()): + print("{}={}".format(attr.upper(), value)) +print("") + +if FLAGS.training_files==None: + print("Input Files List is empty. use --training_files argument.") + exit() + + +max_document_length=15 +inpH = InputHelper() +train_set, dev_set, vocab_processor,sum_no_of_batches = inpH.getDataSets(FLAGS.training_files,max_document_length, 10, + FLAGS.batch_size, FLAGS.is_char_based) +trainableEmbeddings=False +if FLAGS.is_char_based==True: + FLAGS.word2vec_model = False +else: + if FLAGS.word2vec_model==None: + trainableEmbeddings=True + print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n" + "You are using word embedding based semantic similarity but " + "word2vec model path is empty. It is Recommended to use --word2vec_model argument. " + "Otherwise now the code is automatically trying to learn embedding values (may not help in accuracy)" + "\n!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n") + else: + inpH.loadW2V(FLAGS.word2vec_model, FLAGS.word2vec_format) + +# Training +# ================================================== +print("starting graph def") +with tf.Graph().as_default(): + session_conf = tf.ConfigProto(allow_soft_placement=FLAGS.allow_soft_placement,log_device_placement=FLAGS.log_device_placement) + session_conf = tf.ConfigProto() + custom_op = session_conf.graph_options.rewrite_options.custom_optimizers.add() + custom_op.name = 'NpuOptimizer' + custom_op.parameter_map['precision_mode'].s = tf.compat.as_bytes('allow_mix_precision') + custom_op.parameter_map['use_off_line'].b = True + #custom_op.parameter_map['dynamic_input'].b = True + session_conf.graph_options.rewrite_options.remapping = RewriterConfig.OFF + sess = tf.Session(config=npu_config_proto(config_proto=session_conf)) + + print("started session") + with sess.as_default(): + if FLAGS.is_char_based: + siameseModel = SiameseLSTM( + sequence_length=max_document_length, + vocab_size=len(vocab_processor.vocabulary_), + embedding_size=FLAGS.embedding_dim, + hidden_units=FLAGS.hidden_units, + l2_reg_lambda=FLAGS.l2_reg_lambda, + batch_size=FLAGS.batch_size + ) + else: + siameseModel = SiameseLSTMw2v( + sequence_length=max_document_length, + vocab_size=len(vocab_processor.vocabulary_), + embedding_size=FLAGS.embedding_dim, + hidden_units=FLAGS.hidden_units, + l2_reg_lambda=FLAGS.l2_reg_lambda, + batch_size=FLAGS.batch_size, + trainableEmbeddings=trainableEmbeddings + ) + # Define Training procedure + global_step = tf.Variable(0, name="global_step", trainable=False) + #############self add############ + if FLAGS.device_size>1: + optimizer = npu_tf_optimizer(tf.train.AdamOptimizer(8e-3)) + else: + optimizer = npu_tf_optimizer(tf.train.AdamOptimizer(1e-3)) + ################################# + print("initialized siameseModel object") + + grads_and_vars=optimizer.compute_gradients(siameseModel.loss) + tr_op_set = optimizer.apply_gradients(grads_and_vars, global_step=global_step) + print("defined training_ops") + # Keep track of gradient values and sparsity (optional) + grad_summaries = [] + for g, v in grads_and_vars: + if g is not None: + grad_hist_summary = tf.summary.histogram("{}/grad/hist".format(v.name), g) + sparsity_summary = tf.summary.scalar("{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g)) + grad_summaries.append(grad_hist_summary) + grad_summaries.append(sparsity_summary) + grad_summaries_merged = tf.summary.merge(grad_summaries) + print("defined gradient summaries") + # Output directory for models and summaries + timestamp = str(int(time.time())) + out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp)) + print("Writing to {}\n".format(out_dir)) + + # Summaries for loss and accuracy + loss_summary = tf.summary.scalar("loss", siameseModel.loss) + acc_summary = tf.summary.scalar("accuracy", siameseModel.accuracy) + + # Train Summaries + train_summary_op = tf.summary.merge([loss_summary, acc_summary, grad_summaries_merged]) + train_summary_dir = os.path.join(out_dir, "summaries", "train") + train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph) + + # Dev summaries + dev_summary_op = tf.summary.merge([loss_summary, acc_summary]) + dev_summary_dir = os.path.join(out_dir, "summaries", "dev") + dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph) + + # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it + checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints")) + checkpoint_prefix = os.path.join(checkpoint_dir, "model") + if not os.path.exists(checkpoint_dir): + os.makedirs(checkpoint_dir) + saver = tf.train.Saver(tf.global_variables(), max_to_keep=100) + + # Write vocabulary + vocab_processor.save(os.path.join(checkpoint_dir, "vocab")) + + # Initialize all variables + sess.run(tf.global_variables_initializer()) + + print("init all variables") + graph_def = tf.get_default_graph().as_graph_def() + graphpb_txt = str(graph_def) + with open(os.path.join(checkpoint_dir, "graphpb.txt"), 'w') as f: + f.write(graphpb_txt) + + if FLAGS.word2vec_model : + # initial matrix with random uniform + initW = np.random.uniform(-0.25,0.25,(len(vocab_processor.vocabulary_), FLAGS.embedding_dim)) + #initW = np.zeros(shape=(len(vocab_processor.vocabulary_), FLAGS.embedding_dim)) + # load any vectors from the word2vec + print("initializing initW with pre-trained word2vec embeddings") + for w in vocab_processor.vocabulary_._mapping: + arr=[] + s = re.sub('[^0-9a-zA-Z]+', '', w) + if w in inpH.pre_emb: + arr=inpH.pre_emb[w] + elif w.lower() in inpH.pre_emb: + arr=inpH.pre_emb[w.lower()] + elif s in inpH.pre_emb: + arr=inpH.pre_emb[s] + elif s.isdigit(): + arr=inpH.pre_emb["zero"] + if len(arr)>0: + idx = vocab_processor.vocabulary_.get(w) + initW[idx]=np.asarray(arr).astype(np.float32) + print("Done assigning intiW. len="+str(len(initW))) + inpH.deletePreEmb() + gc.collect() + sess.run(siameseModel.W.assign(initW)) + + def train_step(x1_batch, x2_batch, y_batch): + """ + A single training step + """ + if random()>0.5: + feed_dict = { + siameseModel.input_x1: x1_batch, + siameseModel.input_x2: x2_batch, + siameseModel.input_y: y_batch, + siameseModel.dropout_keep_prob: FLAGS.dropout_keep_prob, + } + else: + feed_dict = { + siameseModel.input_x1: x2_batch, + siameseModel.input_x2: x1_batch, + siameseModel.input_y: y_batch, + siameseModel.dropout_keep_prob: FLAGS.dropout_keep_prob, + } + import time + begin = time.time() + _, step, loss, accuracy, dist, sim, summaries = sess.run([tr_op_set, global_step, siameseModel.loss, siameseModel.accuracy, siameseModel.distance, siameseModel.temp_sim, train_summary_op], feed_dict) + end = time.time() + costtime = (end - begin) * 1000 + time_str = datetime.datetime.now().isoformat() + print("TRAIN {}: step {} time(ms) {:g} loss {:g} acc {:g}".format(time_str, step, costtime, loss, accuracy)) + train_summary_writer.add_summary(summaries, step) + #print(y_batch, dist, sim) + + def dev_step(x1_batch, x2_batch, y_batch): + """ + A single training step + """ + if random()>0.5: + feed_dict = { + siameseModel.input_x1: x1_batch, + siameseModel.input_x2: x2_batch, + siameseModel.input_y: y_batch, + siameseModel.dropout_keep_prob: 1.0, + } + else: + feed_dict = { + siameseModel.input_x1: x2_batch, + siameseModel.input_x2: x1_batch, + siameseModel.input_y: y_batch, + siameseModel.dropout_keep_prob: 1.0, + } + import time + begin = time.time() + step, loss, accuracy, sim, summaries = sess.run([global_step, siameseModel.loss, siameseModel.accuracy, siameseModel.temp_sim, dev_summary_op], feed_dict) + end = time.time() + costtime = (end - begin) * 1000 + time_str = datetime.datetime.now().isoformat() + print("DEV {}: step {} time(ms) {:g} loss {:g} acc {:g}".format(time_str, step, costtime, loss, accuracy)) + dev_summary_writer.add_summary(summaries, step) + #print (y_batch, sim) + return accuracy + + # Generate batches + batches=inpH.batch_iter( + list(zip(train_set[0], train_set[1], train_set[2])), FLAGS.batch_size, FLAGS.num_epochs,device_size=FLAGS.device_size,device_id=FLAGS.device_id) + ##############8p################# + if FLAGS.device_size > 1: + sum_no_of_batches = sum_no_of_batches//8 + ################################# + + ptr=0 + max_validation_acc=0.0 + for nn in range(sum_no_of_batches*FLAGS.num_epochs): + batch = next(batches) + if len(batch)<1: + continue + x1_batch,x2_batch, y_batch = zip(*batch) + if len(y_batch)<1: + continue + train_step(x1_batch, x2_batch, y_batch) + current_step = tf.train.global_step(sess, global_step) + sum_acc=0.0 + if current_step % FLAGS.evaluate_every == 0: + print("\nEvaluation:") + dev_batches = inpH.batch_iter(list(zip(dev_set[0],dev_set[1],dev_set[2])), FLAGS.batch_size, 1) + for db in dev_batches: + if len(db)<1: + continue + x1_dev_b,x2_dev_b,y_dev_b = zip(*db) + if len(y_dev_b)<1: + continue + acc = dev_step(x1_dev_b, x2_dev_b, y_dev_b) + sum_acc = sum_acc + acc + print("") + if current_step % FLAGS.checkpoint_every == 0: + if sum_acc >= max_validation_acc: + max_validation_acc = sum_acc + saver.save(sess, checkpoint_prefix, global_step=current_step) + tf.train.write_graph(sess.graph.as_graph_def(), checkpoint_prefix, "graph"+str(nn)+".pb", as_text=False) + print("Saved model {} with sum_accuracy={} checkpoint to {}\n".format(nn, max_validation_acc, checkpoint_prefix)) -- Gitee From b4071d7bc0afd686cd62c55972fb1202771bbafc Mon Sep 17 00:00:00 2001 From: ykxia Date: Fri, 11 Nov 2022 14:31:02 +0800 Subject: [PATCH 9/9] =?UTF-8?q?PixelLink=5FID3056=5Ffor=5FTensorFlow?= =?UTF-8?q?=E9=80=82=E9=85=8DRT2.0+=E4=BA=8C=E8=BF=9B=E5=88=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../test/train_RT2_performance_1p.sh | 171 ++++++++++++++++++ 1 file changed, 171 insertions(+) create mode 100644 TensorFlow/built-in/cv/image_classification/PixelLink_ID3056_for_TensorFlow/test/train_RT2_performance_1p.sh diff --git a/TensorFlow/built-in/cv/image_classification/PixelLink_ID3056_for_TensorFlow/test/train_RT2_performance_1p.sh b/TensorFlow/built-in/cv/image_classification/PixelLink_ID3056_for_TensorFlow/test/train_RT2_performance_1p.sh new file mode 100644 index 000000000..e6ec771ef --- /dev/null +++ b/TensorFlow/built-in/cv/image_classification/PixelLink_ID3056_for_TensorFlow/test/train_RT2_performance_1p.sh @@ -0,0 +1,171 @@ +#!/bin/bash +set -x +#当前路径,不需要修改 +cur_path=`pwd` +export PYTHONPATH=${cur_path}/../pylib/src:$PYTHONPATH +#集合通信参数,不需要修改 + +export RANK_SIZE=1 +export JOB_ID=10087 +RANK_ID_START=0 +RankSize=1 + +#使能RT2.0 +export ENABLE_RUNTIME_V2=1 + +# 数据集路径,保持为空,不需要修改 +data_path="" +#export ASCEND_SLOG_PRINT_TO_STDOUT=1 + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="PixelLink_ID3056_for_TensorFlow" +#训练epoch +train_epochs= +#训练batch_size +batch_size=24 +#训练step +train_steps=200 +#学习率 +learning_rate= + +#维测参数,precision_mode需要模型审视修改 +precision_mode="allow_fp32_to_fp16" +#维持参数,以下不需要修改 +over_dump=False +data_dump_flag=False +data_dump_step="10" +profiling=False + +# 帮助信息,不需要修改 +if [[ $1 == --help || $1 == -h ]];then + echo"usage:./train_performance_1P.sh " + echo " " + echo "parameter explain: + --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision) + --over_dump if or not over detection, default is False + --data_dump_flag data dump flag, default is False + --data_dump_step data dump step, default is 10 + --profiling if or not profiling for performance debug, default is False + --data_path source data of training + -h/--help show help message + " + exit 1 +fi + +#参数校验,不需要修改 +for para in $* +do + if [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` + elif [[ $para == --over_dump* ]];then + over_dump=`echo ${para#*=}` + over_dump_path=${cur_path}/output/overflow_dump + mkdir -p ${over_dump_path} + elif [[ $para == --data_dump_flag* ]];then + data_dump_flag=`echo ${para#*=}` + data_dump_path=${cur_path}/output/data_dump + mkdir -p ${data_dump_path} + elif [[ $para == --data_dump_step* ]];then + data_dump_step=`echo ${para#*=}` + elif [[ $para == --profiling* ]];then + profiling=`echo ${para#*=}` + profiling_dump_path=${cur_path}/output/profiling + mkdir -p ${profiling_dump_path} + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + fi +done + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +#训练开始时间,不需要修改 +start_time=$(date +%s) + +#进入训练脚本目录,需要模型审视修改 +cd $cur_path/.. + +for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); +do + #设置环境变量,不需要修改 + echo "Device ID: $ASCEND_DEVICE_ID" + export RANK_ID=$RANK_ID + + #创建DeviceID输出目录,不需要修改 + if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID} + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + else + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + fi + + #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 + #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path,--data_dump_flag,--data_dump_step,--data_dump_path,--profiling,--profiling_dump_path + nohup python3 train_pixel_link.py \ + --train_dir=./models/pixel_link \ + --num_gpus=1 \ + --learning_rate=1e-3 \ + --gpu_memory_fraction=-1 \ + --train_image_width=512 \ + --train_image_height=512 \ + --batch_size=${batch_size}\ + --dataset_dir=${data_path} \ + --dataset_name=icdar2015 \ + --dataset_split_name=train \ + --max_number_of_steps=${train_steps} \ + --checkpoint_path=${CKPT_PATH} \ + --using_moving_average=1 > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & +done +wait + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" + +#单迭代训练时长 +TrainingTime=`grep 'loss =' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log |awk -F "(" '{print $2}' |awk -F " " '{print $1}' |tail -10|awk '{sum+=$1}END {print"",sum/NR}'|sed s/[[:space:]]//g` +# #输出性能FPS,需要模型审视修改 +FPS=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'/'${TrainingTime}'}'` +#打印,不需要修改 +echo "Final Performance item/sec : $FPS" + + +# #输出训练精度,需要模型审视修改 +#train_accuracy=`grep "test AUC" ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk '{print $3}'` +# #打印,不需要修改 +#echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'RT2'_'perf' + +#吞吐量 +ActualFPS=${FPS} + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep 'loss =' $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F " " '{print $6}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +#echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -- Gitee