diff --git a/TensorFlow/built-in/cv/image_classification/MiniGo_ID0629_for_TensorFlow/test/train_full_1p.sh b/TensorFlow/built-in/cv/image_classification/MiniGo_ID0629_for_TensorFlow/test/train_full_1p.sh index a6f75350e7f68483c98d3dfaf71262203c22d307..566d9265cf5f5aafc2c4161abd6e217a8340b748 100644 --- a/TensorFlow/built-in/cv/image_classification/MiniGo_ID0629_for_TensorFlow/test/train_full_1p.sh +++ b/TensorFlow/built-in/cv/image_classification/MiniGo_ID0629_for_TensorFlow/test/train_full_1p.sh @@ -38,6 +38,8 @@ for para in $* do if [[ $para == --data_path* ]];then data_path=`echo ${para#*=}` + elif [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` elif [[ $para == --dynamic_input* ]];then dynamic_input=`echo ${para#*=}` fi @@ -48,6 +50,11 @@ if [[ $data_path == "" ]];then exit 1 fi +if [[ $precision_mode == "must_keep_origin_dtype" ]];then + sed -i "s|allow_mix_precision|must_keep_origin_dtype|g" $cur_path/../dual_net.py +fi + + ##############执行训练########## cd $cur_path @@ -65,7 +72,7 @@ wait start=$(date +%s) #(Step3)训练 -python3 train.py --training_data_path=$data_path --steps_to_train=$train_steps --train_batch_size=$batch_size --work_dir=$cur_path/estimator_working_dir --export_path=$cur_path/outputs/models/000001-first_generation --dynamic_input=${dynamic_input}> $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & +python3 train.py --training_data_path=$data_path --steps_to_train=$train_steps --train_batch_size=$batch_size --work_dir=$cur_path/estimator_working_dir --export_path=$cur_path/outputs/models/000001-first_generation --dynamic_input=${dynamic_input} > $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & wait end=$(date +%s) e2etime=$(( $end - $start )) @@ -79,7 +86,11 @@ BatchSize=${batch_size} #设备类型,自动获取 DeviceType=`uname -m` #用例名称,自动获取 -CaseName=${Network}_bs${BatchSize}_${RankSize}'p'_'acc' +if [[ $precision_mode == "must_keep_origin_dtype" ]];then + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'fp32'_'acc' +else + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' +fi #获取性能 TrainingTime=`grep "tensorflow:global_step/sec" $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk 'END {print $2}'` @@ -101,4 +112,4 @@ echo "ActualFPS = ${ActualFPS}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${Cas echo "ActualLoss = ${ActualLoss}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "TrainAccuracy = ${ActualLoss}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "E2ETrainingTime = ${e2etime}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DynamicInput = ${dynamic_input}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file +echo "DynamicInput = ${dynamic_input}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log diff --git a/TensorFlow/built-in/cv/image_classification/MiniGo_ID0629_for_TensorFlow/test/train_full_8p.sh b/TensorFlow/built-in/cv/image_classification/MiniGo_ID0629_for_TensorFlow/test/train_full_8p.sh index 8cd78e2f2b804957f26e967acf45a7359b05c13e..b8ad2e3b0cc4a501e30824fa4b05a8b8833ee424 100644 --- a/TensorFlow/built-in/cv/image_classification/MiniGo_ID0629_for_TensorFlow/test/train_full_8p.sh +++ b/TensorFlow/built-in/cv/image_classification/MiniGo_ID0629_for_TensorFlow/test/train_full_8p.sh @@ -38,10 +38,12 @@ for para in $* do if [[ $para == --data_path* ]];then data_path=`echo ${para#*=}` - elif [[ $para == --bind_core* ]]; then - bind_core=`echo ${para#*=}` - name_bind="_bindcore" - elif [[ $para == --dynamic_input* ]];then + elif [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` + elif [[ $para == --bind_core* ]]; then + bind_core=`echo ${para#*=}` + name_bind="_bindcore" + elif [[ $para == --dynamic_input* ]];then dynamic_input=`echo ${para#*=}` fi done @@ -51,6 +53,11 @@ if [[ $data_path == "" ]];then exit 1 fi +if [[ $precision_mode == "must_keep_origin_dtype" ]];then + sed -i "s|allow_mix_precision|must_keep_origin_dtype|g" $cur_path/../dual_net.py +fi + + ##############执行训练########## cd $cur_path @@ -105,8 +112,11 @@ BatchSize=${batch_size} #设备类型,自动获取 DeviceType=`uname -m` #用例名称,自动获取 -CaseName=${Network}_bs${BatchSize}_${RankSize}'p'_'acc' - +if [[ $precision_mode == "must_keep_origin_dtype" ]];then + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'fp32'_'acc' +else + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' +fi #获取性能 TrainingTime=`grep "tensorflow:global_step/sec" $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk 'END {print $2}'` wait @@ -127,4 +137,4 @@ echo "ActualFPS = ${ActualFPS}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${Cas echo "ActualLoss = ${ActualLoss}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "TrainAccuracy = ${ActualLoss}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "E2ETrainingTime = ${e2etime}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DynamicInput = ${dynamic_input}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file +echo "DynamicInput = ${dynamic_input}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log diff --git a/TensorFlow/built-in/cv/image_classification/MiniGo_ID0629_for_TensorFlow/test/train_performance_1p.sh b/TensorFlow/built-in/cv/image_classification/MiniGo_ID0629_for_TensorFlow/test/train_performance_1p.sh index 2b9da3177a38d234fd38f1bee12f16deffdc2815..212394cd378e1e07ad01283d128398fe0bd05fbb 100644 --- a/TensorFlow/built-in/cv/image_classification/MiniGo_ID0629_for_TensorFlow/test/train_performance_1p.sh +++ b/TensorFlow/built-in/cv/image_classification/MiniGo_ID0629_for_TensorFlow/test/train_performance_1p.sh @@ -38,6 +38,8 @@ for para in $* do if [[ $para == --data_path* ]];then data_path=`echo ${para#*=}` + elif [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` elif [[ $para == --dynamic_input* ]];then dynamic_input=`echo ${para#*=}` fi @@ -48,6 +50,10 @@ if [[ $data_path == "" ]];then exit 1 fi +if [[ $precision_mode == "must_keep_origin_dtype" ]];then + sed -i "s|allow_mix_precision|must_keep_origin_dtype|g" $cur_path/../dual_net.py +fi + ##############执行训练########## cd $cur_path @@ -86,8 +92,11 @@ BatchSize=${batch_size} #设备类型,自动获取 DeviceType=`uname -m` #用例名称,自动获取 -CaseName=${Network}_bs${BatchSize}_${RankSize}'p'_'perf' - +if [[ $precision_mode == "must_keep_origin_dtype" ]];then + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'fp32'_'perf' +else + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' +fi #获取性能 TrainingTime=`grep "tensorflow:global_step/sec" $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk 'END {print $2}'` wait @@ -108,4 +117,4 @@ echo "ActualFPS = ${ActualFPS}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${Cas echo "ActualLoss = ${ActualLoss}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "TrainAccuracy = ${ActualLoss}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "E2ETrainingTime = ${e2etime}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DynamicInput = ${dynamic_input}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file +echo "DynamicInput = ${dynamic_input}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log