From f12e7c7f275b8c6351e56ec2bb36eed279b2f98d Mon Sep 17 00:00:00 2001 From: limingxing517 Date: Wed, 19 Apr 2023 10:17:47 +0800 Subject: [PATCH 1/3] update --- .../MiniGo_ID0629_for_TensorFlow/dual_net.py | 2 +- .../test/.train_full_1p.sh.swp | Bin 0 -> 16384 bytes .../test/train_full_1p.sh | 17 ++++++++++--- .../test/train_full_8p.sh | 24 +++++++++++++----- .../test/train_performance_1p.sh | 8 +++++- 5 files changed, 39 insertions(+), 12 deletions(-) create mode 100644 TensorFlow/built-in/cv/image_classification/MiniGo_ID0629_for_TensorFlow/test/.train_full_1p.sh.swp diff --git a/TensorFlow/built-in/cv/image_classification/MiniGo_ID0629_for_TensorFlow/dual_net.py b/TensorFlow/built-in/cv/image_classification/MiniGo_ID0629_for_TensorFlow/dual_net.py index 8f2e173a1..d89f77a1b 100644 --- a/TensorFlow/built-in/cv/image_classification/MiniGo_ID0629_for_TensorFlow/dual_net.py +++ b/TensorFlow/built-in/cv/image_classification/MiniGo_ID0629_for_TensorFlow/dual_net.py @@ -683,7 +683,7 @@ def _get_nontpu_estimator(): session_config = tf.ConfigProto(allow_soft_placement=True) custom_op = session_config.graph_options.rewrite_options.custom_optimizers.add() custom_op.name = 'NpuOptimizer' - custom_op.parameter_map["precision_mode"].s = tf.compat.as_bytes("allow_mix_precision") + custom_op.parameter_map["precision_mode"].s = tf.compat.as_bytes("must_keep_origin_dtype") #session_config = npu_config_proto(config_proto=tf.ConfigProto()) session_config.gpu_options.allow_growth = True diff --git a/TensorFlow/built-in/cv/image_classification/MiniGo_ID0629_for_TensorFlow/test/.train_full_1p.sh.swp b/TensorFlow/built-in/cv/image_classification/MiniGo_ID0629_for_TensorFlow/test/.train_full_1p.sh.swp new file mode 100644 index 0000000000000000000000000000000000000000..390faf4fd7fa56ebb9e0e01052578c815667576f GIT binary patch literal 16384 zcmeI2Ur^k}9mh9L|D}nOrX9cZr5oKj7@Ffa=-80w6$a7lHkbOvis}F^&5I0{NQauOkKCx z<^Oi5x+JP;-Lh-le@Q(fD%uK};%Pl2$JI=4e2<=#)faU=)+TF~Ztha`o_NLI&a9%^ zF-40@2jfac?2-jZ6)j6iDUxU_x)$G~Xv(v?(A-$Jx&Es{N;idy+_r4laodCsLMp4O zLc*xE(kr+VUJ4bs&lFfs8tM~utk4ne3HpW4KYyQ2PO`={Ygs6ePds6ePds6ePds6ePds6ePd zs6ePds6eQ||Dgh6l90{tYaM$5fbajo`u~H+2>A#26PO3*z*&$1BG?D&!NcIsj}r1n z@N+N=u7V5THEDT_$jyqE`nb0{v(9^0{jfz0q=q9U=(}{ zJOeg>;=}L{+yK|XJKz*}3ABQ};G19r_{nDpxeeyPEpQGT2gg7X>;R8~hrsV1BILK= z*Wg#+0@w&B_!3wT{`ndBg%=_}2A9Dwcp2;iQ4j$fAfWguLVgL}1+Rmbz!$*=@P|+0 zdEk9;2Rs9|fWLl%kiUTIU>*1&Ucy`kUEpyrgF4#}SlvCq>TgwtETwhI?G!D!Ma;;Q zr_ui4WRRoVw$Z4RH3dVo({Xoag`TyItR0VTZ{69{(kL`Gz0ka~30pDZ75T6cq>Ffl z8|8zFB)9b$p6T-GS{UCaYTd2Mw>{Ir$yyk0k?kJc>~_`g|1-0z)*|8zzq5Ecw=h4E zKXf&J?36Qh%{h84(3#1#Ff;x4Tci2uNoVrq{JBeu=Z6;NbNSOZ2-z;#SyA1sTNd9T z_H@%K$WHqWQ_*Z1O$=78#wkICxmvr|?jT*JY*4P9vMbzO+w%6;e5etyk_@;`1`ck4 zg9B35rm-Y_ipEkjvC1}8a+yRHW>0v}ukn@3kDqkL&oYUiYDGIN<$=0^kh}J_`a14t zdbYX6^D{-cjkWcu0~+@ft%~;V5N#>V8rEP{4NlwGW+FxFYEv?}GFa6?dPK!0m6!Pr zPehIn_p}itTL*EDp8WEh^$)jsx739hWfyelA``vB9&t{N6{fDCV-%0P z?Hs@26_b>M2;Y&_Sk#JTm$IWfQy8GNLBQG-JhCTVRRBgR;rCwHdh0;Q?M=KDucW3~5OkqfyHiO`BF%6P)8l zQnY3I<&9Rg*Yu1&JFROC)a?efMjwsE+`nj~0+Nbuyyl~&zqrqyZ3(t6xC!~F`{TAw zE66RBmEoi1|G3S9p^#KezC0LUmmT)z+1Pu>Zn_UvOeB?i4c)YT2TDVSH)>dM)&Nv1 zUdN6^ETx!e23@iSIoQTNWKZ^?LKR6+G>kH+tR?4z^kc+euYfCtC|bYm$pn3pL^icT zm4@nKZn`jgn7EoR5qqiHo!Ag*!K~paeosWHiJlrGE+Il1o_ z-#X+BA1%(GEKZCT=f8)G`RO-_=d9QHGu@~y8Z+)TpqKjm$IggAV`gZ~O#O#P)esg1 zSokluor;MB^*cO~DiW}6S#ji#O|gQyl~lfrp6QjfP09Wdm2ROufcp;ETjKN)ozJ`@?1l(yU-}X}0A?OQl-bUZsb=i$OFiTT1E#LDiE|(P*?L zXb5H1J<+okg6vjW>+c3;(bf;f-lkx>BtVwBGgoe?D+W1kS(K|te|KSyD^+sBD`Z+$ z4LUG@>709>F4Y7n>%~QD|zTWvF5w1`xy4Q;Lch%fiZJcgLLlpls8!^}z&muJ&%lx~tp-X8C*}OaMDIVgCUtJtJMuIf~pCA!$*1$YJ!rXAi z&NyQei?gp6bCU}*HxtC4Bk|lv?Jh^53UjX(X3v*qCeF#x!VhxJ+!^l*DUX|cqW)E?lba|`y%Nm%SwyW2s4KR^cnw*eE@&YBFk3@{ z0hsSFdXgP+1UjrspNMy2bA6|XZSidVzXj`U9kBI3TduNW66^YJfWKj_eiGDy-}vb? zmNq;>1wsWv1wsWv1wsWv1wsWv1wsWv1wsWrN&)Y^M5DJ*Zf37I-A_zQR?chj{i> $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & +python3 train.py --training_data_path=$data_path --steps_to_train=$train_steps --train_batch_size=$batch_size --work_dir=$cur_path/estimator_working_dir --export_path=$cur_path/outputs/models/000001-first_generation --dynamic_input=${dynamic_input} > $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & wait end=$(date +%s) e2etime=$(( $end - $start )) @@ -79,7 +86,11 @@ BatchSize=${batch_size} #设备类型,自动获取 DeviceType=`uname -m` #用例名称,自动获取 -CaseName=${Network}_bs${BatchSize}_${RankSize}'p'_'acc' +if [[ $precision_mode == "must_keep_origin_dtype" ]];then + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'fp32'_'acc' +else + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' +fi #获取性能 TrainingTime=`grep "tensorflow:global_step/sec" $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk 'END {print $2}'` @@ -101,4 +112,4 @@ echo "ActualFPS = ${ActualFPS}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${Cas echo "ActualLoss = ${ActualLoss}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "TrainAccuracy = ${ActualLoss}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "E2ETrainingTime = ${e2etime}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DynamicInput = ${dynamic_input}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file +echo "DynamicInput = ${dynamic_input}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log diff --git a/TensorFlow/built-in/cv/image_classification/MiniGo_ID0629_for_TensorFlow/test/train_full_8p.sh b/TensorFlow/built-in/cv/image_classification/MiniGo_ID0629_for_TensorFlow/test/train_full_8p.sh index 8cd78e2f2..b8ad2e3b0 100644 --- a/TensorFlow/built-in/cv/image_classification/MiniGo_ID0629_for_TensorFlow/test/train_full_8p.sh +++ b/TensorFlow/built-in/cv/image_classification/MiniGo_ID0629_for_TensorFlow/test/train_full_8p.sh @@ -38,10 +38,12 @@ for para in $* do if [[ $para == --data_path* ]];then data_path=`echo ${para#*=}` - elif [[ $para == --bind_core* ]]; then - bind_core=`echo ${para#*=}` - name_bind="_bindcore" - elif [[ $para == --dynamic_input* ]];then + elif [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` + elif [[ $para == --bind_core* ]]; then + bind_core=`echo ${para#*=}` + name_bind="_bindcore" + elif [[ $para == --dynamic_input* ]];then dynamic_input=`echo ${para#*=}` fi done @@ -51,6 +53,11 @@ if [[ $data_path == "" ]];then exit 1 fi +if [[ $precision_mode == "must_keep_origin_dtype" ]];then + sed -i "s|allow_mix_precision|must_keep_origin_dtype|g" $cur_path/../dual_net.py +fi + + ##############执行训练########## cd $cur_path @@ -105,8 +112,11 @@ BatchSize=${batch_size} #设备类型,自动获取 DeviceType=`uname -m` #用例名称,自动获取 -CaseName=${Network}_bs${BatchSize}_${RankSize}'p'_'acc' - +if [[ $precision_mode == "must_keep_origin_dtype" ]];then + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'fp32'_'acc' +else + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' +fi #获取性能 TrainingTime=`grep "tensorflow:global_step/sec" $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk 'END {print $2}'` wait @@ -127,4 +137,4 @@ echo "ActualFPS = ${ActualFPS}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${Cas echo "ActualLoss = ${ActualLoss}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "TrainAccuracy = ${ActualLoss}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "E2ETrainingTime = ${e2etime}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DynamicInput = ${dynamic_input}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file +echo "DynamicInput = ${dynamic_input}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log diff --git a/TensorFlow/built-in/cv/image_classification/MiniGo_ID0629_for_TensorFlow/test/train_performance_1p.sh b/TensorFlow/built-in/cv/image_classification/MiniGo_ID0629_for_TensorFlow/test/train_performance_1p.sh index 2b9da3177..073205e5e 100644 --- a/TensorFlow/built-in/cv/image_classification/MiniGo_ID0629_for_TensorFlow/test/train_performance_1p.sh +++ b/TensorFlow/built-in/cv/image_classification/MiniGo_ID0629_for_TensorFlow/test/train_performance_1p.sh @@ -38,6 +38,8 @@ for para in $* do if [[ $para == --data_path* ]];then data_path=`echo ${para#*=}` + elif [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` elif [[ $para == --dynamic_input* ]];then dynamic_input=`echo ${para#*=}` fi @@ -48,6 +50,10 @@ if [[ $data_path == "" ]];then exit 1 fi +if [[ $precision_mode == "must_keep_origin_dtype" ]];then + sed -i "s|allow_mix_precision|must_keep_origin_dtype|g" $cur_path/../dual_net.py +fi + ##############执行训练########## cd $cur_path @@ -108,4 +114,4 @@ echo "ActualFPS = ${ActualFPS}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${Cas echo "ActualLoss = ${ActualLoss}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "TrainAccuracy = ${ActualLoss}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "E2ETrainingTime = ${e2etime}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DynamicInput = ${dynamic_input}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file +echo "DynamicInput = ${dynamic_input}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -- Gitee From 1f0f413c9db0d7adaa533ed961c3e8f0d8aadf0d Mon Sep 17 00:00:00 2001 From: limingxing517 Date: Wed, 19 Apr 2023 10:18:34 +0800 Subject: [PATCH 2/3] update --- .../test/.train_full_1p.sh.swp | Bin 16384 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 TensorFlow/built-in/cv/image_classification/MiniGo_ID0629_for_TensorFlow/test/.train_full_1p.sh.swp diff --git a/TensorFlow/built-in/cv/image_classification/MiniGo_ID0629_for_TensorFlow/test/.train_full_1p.sh.swp b/TensorFlow/built-in/cv/image_classification/MiniGo_ID0629_for_TensorFlow/test/.train_full_1p.sh.swp deleted file mode 100644 index 390faf4fd7fa56ebb9e0e01052578c815667576f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 16384 zcmeI2Ur^k}9mh9L|D}nOrX9cZr5oKj7@Ffa=-80w6$a7lHkbOvis}F^&5I0{NQauOkKCx z<^Oi5x+JP;-Lh-le@Q(fD%uK};%Pl2$JI=4e2<=#)faU=)+TF~Ztha`o_NLI&a9%^ zF-40@2jfac?2-jZ6)j6iDUxU_x)$G~Xv(v?(A-$Jx&Es{N;idy+_r4laodCsLMp4O zLc*xE(kr+VUJ4bs&lFfs8tM~utk4ne3HpW4KYyQ2PO`={Ygs6ePds6ePds6ePds6ePds6ePd zs6ePds6eQ||Dgh6l90{tYaM$5fbajo`u~H+2>A#26PO3*z*&$1BG?D&!NcIsj}r1n z@N+N=u7V5THEDT_$jyqE`nb0{v(9^0{jfz0q=q9U=(}{ zJOeg>;=}L{+yK|XJKz*}3ABQ};G19r_{nDpxeeyPEpQGT2gg7X>;R8~hrsV1BILK= z*Wg#+0@w&B_!3wT{`ndBg%=_}2A9Dwcp2;iQ4j$fAfWguLVgL}1+Rmbz!$*=@P|+0 zdEk9;2Rs9|fWLl%kiUTIU>*1&Ucy`kUEpyrgF4#}SlvCq>TgwtETwhI?G!D!Ma;;Q zr_ui4WRRoVw$Z4RH3dVo({Xoag`TyItR0VTZ{69{(kL`Gz0ka~30pDZ75T6cq>Ffl z8|8zFB)9b$p6T-GS{UCaYTd2Mw>{Ir$yyk0k?kJc>~_`g|1-0z)*|8zzq5Ecw=h4E zKXf&J?36Qh%{h84(3#1#Ff;x4Tci2uNoVrq{JBeu=Z6;NbNSOZ2-z;#SyA1sTNd9T z_H@%K$WHqWQ_*Z1O$=78#wkICxmvr|?jT*JY*4P9vMbzO+w%6;e5etyk_@;`1`ck4 zg9B35rm-Y_ipEkjvC1}8a+yRHW>0v}ukn@3kDqkL&oYUiYDGIN<$=0^kh}J_`a14t zdbYX6^D{-cjkWcu0~+@ft%~;V5N#>V8rEP{4NlwGW+FxFYEv?}GFa6?dPK!0m6!Pr zPehIn_p}itTL*EDp8WEh^$)jsx739hWfyelA``vB9&t{N6{fDCV-%0P z?Hs@26_b>M2;Y&_Sk#JTm$IWfQy8GNLBQG-JhCTVRRBgR;rCwHdh0;Q?M=KDucW3~5OkqfyHiO`BF%6P)8l zQnY3I<&9Rg*Yu1&JFROC)a?efMjwsE+`nj~0+Nbuyyl~&zqrqyZ3(t6xC!~F`{TAw zE66RBmEoi1|G3S9p^#KezC0LUmmT)z+1Pu>Zn_UvOeB?i4c)YT2TDVSH)>dM)&Nv1 zUdN6^ETx!e23@iSIoQTNWKZ^?LKR6+G>kH+tR?4z^kc+euYfCtC|bYm$pn3pL^icT zm4@nKZn`jgn7EoR5qqiHo!Ag*!K~paeosWHiJlrGE+Il1o_ z-#X+BA1%(GEKZCT=f8)G`RO-_=d9QHGu@~y8Z+)TpqKjm$IggAV`gZ~O#O#P)esg1 zSokluor;MB^*cO~DiW}6S#ji#O|gQyl~lfrp6QjfP09Wdm2ROufcp;ETjKN)ozJ`@?1l(yU-}X}0A?OQl-bUZsb=i$OFiTT1E#LDiE|(P*?L zXb5H1J<+okg6vjW>+c3;(bf;f-lkx>BtVwBGgoe?D+W1kS(K|te|KSyD^+sBD`Z+$ z4LUG@>709>F4Y7n>%~QD|zTWvF5w1`xy4Q;Lch%fiZJcgLLlpls8!^}z&muJ&%lx~tp-X8C*}OaMDIVgCUtJtJMuIf~pCA!$*1$YJ!rXAi z&NyQei?gp6bCU}*HxtC4Bk|lv?Jh^53UjX(X3v*qCeF#x!VhxJ+!^l*DUX|cqW)E?lba|`y%Nm%SwyW2s4KR^cnw*eE@&YBFk3@{ z0hsSFdXgP+1UjrspNMy2bA6|XZSidVzXj`U9kBI3TduNW66^YJfWKj_eiGDy-}vb? zmNq;>1wsWv1wsWv1wsWv1wsWv1wsWv1wsWrN&)Y^M5DJ*Zf37I-A_zQR?chj{i> Date: Wed, 19 Apr 2023 10:24:10 +0800 Subject: [PATCH 3/3] update --- .../MiniGo_ID0629_for_TensorFlow/dual_net.py | 2 +- .../test/train_performance_1p.sh | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/TensorFlow/built-in/cv/image_classification/MiniGo_ID0629_for_TensorFlow/dual_net.py b/TensorFlow/built-in/cv/image_classification/MiniGo_ID0629_for_TensorFlow/dual_net.py index d89f77a1b..8f2e173a1 100644 --- a/TensorFlow/built-in/cv/image_classification/MiniGo_ID0629_for_TensorFlow/dual_net.py +++ b/TensorFlow/built-in/cv/image_classification/MiniGo_ID0629_for_TensorFlow/dual_net.py @@ -683,7 +683,7 @@ def _get_nontpu_estimator(): session_config = tf.ConfigProto(allow_soft_placement=True) custom_op = session_config.graph_options.rewrite_options.custom_optimizers.add() custom_op.name = 'NpuOptimizer' - custom_op.parameter_map["precision_mode"].s = tf.compat.as_bytes("must_keep_origin_dtype") + custom_op.parameter_map["precision_mode"].s = tf.compat.as_bytes("allow_mix_precision") #session_config = npu_config_proto(config_proto=tf.ConfigProto()) session_config.gpu_options.allow_growth = True diff --git a/TensorFlow/built-in/cv/image_classification/MiniGo_ID0629_for_TensorFlow/test/train_performance_1p.sh b/TensorFlow/built-in/cv/image_classification/MiniGo_ID0629_for_TensorFlow/test/train_performance_1p.sh index 073205e5e..212394cd3 100644 --- a/TensorFlow/built-in/cv/image_classification/MiniGo_ID0629_for_TensorFlow/test/train_performance_1p.sh +++ b/TensorFlow/built-in/cv/image_classification/MiniGo_ID0629_for_TensorFlow/test/train_performance_1p.sh @@ -92,8 +92,11 @@ BatchSize=${batch_size} #设备类型,自动获取 DeviceType=`uname -m` #用例名称,自动获取 -CaseName=${Network}_bs${BatchSize}_${RankSize}'p'_'perf' - +if [[ $precision_mode == "must_keep_origin_dtype" ]];then + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'fp32'_'perf' +else + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' +fi #获取性能 TrainingTime=`grep "tensorflow:global_step/sec" $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk 'END {print $2}'` wait -- Gitee