diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/run_dcnmix.py b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/run_dcnmix.py index 8ff0a1ec174d36837560a43fac49d1dd5f5a89ee..4f5831e4ae987a083605ba61f41d5dcd48fc5677 100644 --- a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/run_dcnmix.py +++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/examples/run_dcnmix.py @@ -177,8 +177,8 @@ def main(): tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO) model.train(train_model_input, max_steps=args.max_steps) - # if rank_id == 0: - # model.evaluate(test_model_input) + if rank_id == 0: + model.evaluate(test_model_input) if __name__ == "__main__": main() diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/test/train_ID4032_DCNMix_full_1p.sh b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/test/train_ID4032_DCNMix_full_1p.sh index 76697629f2ddee9c42bcf119d6a99a58907d80b4..1aa57c53a0be380aaed13a7cc97638b55ea396cd 100644 --- a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/test/train_ID4032_DCNMix_full_1p.sh +++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/test/train_ID4032_DCNMix_full_1p.sh @@ -16,7 +16,7 @@ train_epochs=4 #训练batch_size batch_size=10240 #训练step -train_steps= +train_steps=16115 #学习率 learning_rate= @@ -101,6 +101,7 @@ do --train_batch_size=${batch_size} \ --eval_batch_size=${batch_size} \ --num_epochs=${train_epochs} \ + --max_steps=${train_steps} \ --output_dir=${cur_path}/output/$ASCEND_DEVICE_ID/ckpt > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & done wait @@ -113,7 +114,7 @@ e2e_time=$(( $end_time - $start_time )) #结果打印,不需要修改 echo "------------------ Final result ------------------" # #输出性能FPS,需要模型审视修改 -fps=`grep "examples\/sec" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk '{print $2}'|tail -n +5 |awk '{sum+=$1} END {print sum/NR}'` +fps=`grep "examples\/sec" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk '{print $2}'|tail -n +5 | awk 'NR>1{print p}{p=$0}'|awk '{sum+=$1} END {print sum/NR}'` FPS=`awk 'BEGIN{printf "%.2f\n", '${fps}'}'` # #打印,不需要修改 echo "Final Performance item/sec : $FPS" @@ -134,7 +135,7 @@ CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'accu' TrainingTime=`awk 'BEGIN{printf "%.6f\n",'${BatchSize}'/'${FPS}'}'` ActualFPS=${FPS} -grep ":loss =" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log| awk '{print $3}' | sed 's/,//g' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt +grep ":loss =" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log| awk '{print $3}' | sed 's/,//g'| sed -n '1~2p' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt #最后一个迭代loss值,不需要修改 ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/test/train_ID4032_DCNMix_full_8p.sh b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/test/train_ID4032_DCNMix_full_8p.sh index 9cef9288e025cbfc3c8f371d26f5266d7e1b301d..60f518698cc75e0fdf89c98d7ba8ad3d1141883c 100644 --- a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/test/train_ID4032_DCNMix_full_8p.sh +++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/test/train_ID4032_DCNMix_full_8p.sh @@ -13,7 +13,7 @@ data_path="" #网络名称,同目录名称 Network="DCNMix_ID4032_for_TensorFlow" #训练epoch -train_epochs=5 +train_epochs=4 #训练batch_size batch_size=10240 #训练step diff --git a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/test/train_ID4032_DCNMix_performance_1p.sh b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/test/train_ID4032_DCNMix_performance_1p.sh index 199f2d05d8819132dd419c1071b777493a829f4f..667aefaa699837f592349fa9e49ba4319ab73812 100644 --- a/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/test/train_ID4032_DCNMix_performance_1p.sh +++ b/TensorFlow/built-in/recommendation/DeepCTR_Series_for_TensorFlow/test/train_ID4032_DCNMix_performance_1p.sh @@ -114,7 +114,7 @@ e2e_time=$(( $end_time - $start_time )) #结果打印,不需要修改 echo "------------------ Final result ------------------" # #输出性能FPS,需要模型审视修改 -fps=`grep "examples\/sec" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk '{print $2}'|tail -n +5 |awk '{sum+=$1} END {print sum/NR}'` +fps=`grep "examples\/sec" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk '{print $2}'|tail -n +5 | awk 'NR>1{print p}{p=$0}'|awk '{sum+=$1} END {print sum/NR}'` FPS=`awk 'BEGIN{printf "%.2f\n", '${fps}'}'` # #打印,不需要修改 echo "Final Performance item/sec : $FPS" @@ -132,7 +132,7 @@ CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' TrainingTime=`awk 'BEGIN{printf "%.6f\n",'${BatchSize}'/'${FPS}'}'` ActualFPS=${FPS} -grep ":loss =" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log| awk '{print $3}' | sed 's/,//g' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt +grep ":loss =" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log| awk '{print $3}' | sed 's/,//g' | sed -n '1~2p' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt #最后一个迭代loss值,不需要修改 ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`