From 8b2ea8430ddd66abbeca1115c998c649118555c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E6=B4=8B=E6=B4=8B?= <584244991@qq.com> Date: Mon, 12 Dec 2022 11:25:59 +0000 Subject: [PATCH 1/4] update TensorFlow/contrib/cv/SRFBN_for_TensorFlow/test/train_full_1p.sh. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 张洋洋 <584244991@qq.com> --- .../test/train_full_1p.sh | 20 +++++++++---------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/TensorFlow/contrib/cv/SRFBN_for_TensorFlow/test/train_full_1p.sh b/TensorFlow/contrib/cv/SRFBN_for_TensorFlow/test/train_full_1p.sh index 1242fb044..7cd68cd4a 100644 --- a/TensorFlow/contrib/cv/SRFBN_for_TensorFlow/test/train_full_1p.sh +++ b/TensorFlow/contrib/cv/SRFBN_for_TensorFlow/test/train_full_1p.sh @@ -90,7 +90,7 @@ function get_casename() cd ${cur_path}/../ rm -rf ./test/output/${ASCEND_DEVICE_ID} mkdir -p ./test/output/${ASCEND_DEVICE_ID} - +sed -i "40s#1000#1#g" config.py # 训练开始时间记录,不需要修改 start_time=$(date +%s) ########################################################## @@ -114,16 +114,16 @@ if [ x"${modelarts_flag}" != x ]; then python3 ./train.py else - python3.7 ./train.py --data_path=${data_path} --output_path=${output_path} 1>${print_log} 2>&1 + python3.7 ./train.py --data_dir=${data_path}/dataset/DIV2K_train_HR --output_path=${output_path} 1>${print_log} 2>&1 fi # 性能相关数据计算 -StepTime=`((cat ${print_log} | grep "time" | head -n 1) && (cat ${print_log} | grep "time" | tail -n 1)) | awk -F ':' '{print $5 $6 }' | awk -F ',' '{print $1 $2}' | awk -F ' ' '{print $1;print $3}' | awk '{if (NR == 1){a=$1} else if (NR == 2){b=$1} else if (NR == 3){c=$1} else if (NR == 4){d=$1}} END {print (d-b)/(c-a)}'` +StepTime=`((cat ${print_log} | grep "time" | tail -n -5 | head -n 1) && (cat ${print_log} | grep "time" | tail -n 1)) | awk -F ':' '{print $5 $6 }' | awk -F ',' '{print $1 $2}' | awk -F ' ' '{print $1;print $3}' | awk '{if (NR == 1){a=$1} else if (NR == 2){b=$1} else if (NR == 3){c=$1} else if (NR == 4){d=$1}} END {print (d-b)/(c-a)}'` FPS=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'/'${StepTime}'}'` #PSNR值计算 -PSNR=`cat ${print_log} | grep "time" | tail -n 10 | awk -F ',' '{print $8}' | awk -F ':' '{sum+=$2} END {print sum/NR}'` +train_accuracy=`grep "PSNR:" ${print_log} | awk -F "PSNR:" '{print $2}' | awk -F "," '{print $1}' | awk '{sum+=$1} END {print sum/NR}'` # 提取所有loss打印信息 -grep "loss:" ${print_log} | awk -F "," '{print $6}' > ./test/output/${ASCEND_DEVICE_ID}/my_output_loss.txt +grep "l2_loss:" ${print_log} | awk -F "l2_loss:" '{print $2}' | awk -F ', PSNR:' '{print $1}' > ./test/output/${ASCEND_DEVICE_ID}/my_output_loss.txt ########################################################### @@ -164,12 +164,10 @@ echo "------------------ Final result ------------------" echo "Final Performance images/sec : $FPS" echo "Final Performance sec/step : $StepTime" echo "E2E Training Duration sec : $e2e_time" -echo "PSNR : $PSNR" # 输出训练精度 -#echo "Final Train Accuracy : ${train_accuracy}" - +echo "Final Train Accuracy : ${train_accuracy}" # 最后一个迭代loss值,不需要修改 -ActualLoss=(`awk 'END {print $NF}' ./test/output/${ASCEND_DEVICE_ID}/my_output_loss.txt`) +ActualLoss=(`awk 'END {print $NF}' ./test/output/${ASCEND_DEVICE_ID}/${CaseName}_loss.txt`) #关键信息打印到${CaseName}.log中,不需要修改 echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log @@ -180,5 +178,5 @@ echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}. echo "ActualFPS = ${FPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "TrainingTime = ${StepTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -#echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file -- Gitee From b89c5fc553252be699a80c289df4187fa583c1b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E6=B4=8B=E6=B4=8B?= <584244991@qq.com> Date: Mon, 12 Dec 2022 11:26:35 +0000 Subject: [PATCH 2/4] update TensorFlow/contrib/cv/SRFBN_for_TensorFlow/test/train_performance_1p.sh. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 张洋洋 <584244991@qq.com> --- .../test/train_performance_1p.sh | 60 ++++++++++++++----- 1 file changed, 45 insertions(+), 15 deletions(-) diff --git a/TensorFlow/contrib/cv/SRFBN_for_TensorFlow/test/train_performance_1p.sh b/TensorFlow/contrib/cv/SRFBN_for_TensorFlow/test/train_performance_1p.sh index 708a9d41c..d310bbef7 100644 --- a/TensorFlow/contrib/cv/SRFBN_for_TensorFlow/test/train_performance_1p.sh +++ b/TensorFlow/contrib/cv/SRFBN_for_TensorFlow/test/train_performance_1p.sh @@ -30,7 +30,7 @@ if [[ $1 == --help || $1 == -h ]];then --data_path # dataset of training --output_path # output of training --train_steps # max_step for training - --train_epochs # max_epoch for training + --train_epochs # max_epoch for training --batch_size # batch size -h/--help show help message " @@ -64,6 +64,17 @@ if [[ $output_path == "" ]];then output_path="./output/${ASCEND_DEVICE_ID}" fi +# 设置打屏日志文件名,请保留,文件名为${print_log} +print_log="./test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log" +modelarts_flag=`cat /etc/passwd |grep ma-user` +if [ x"${modelarts_flag}" != x ]; +then + echo "running with modelarts_flag..." + print_log_name=`ls /home/ma-user/modelarts/log/ | grep proc-rank` + print_log="/home/ma-user/modelarts/log/${print_log_name}" +fi +echo "### get your log here : ${print_log}" + CaseName="" function get_casename() { @@ -79,13 +90,13 @@ function get_casename() cd ${cur_path}/../ rm -rf ./test/output/${ASCEND_DEVICE_ID} mkdir -p ./test/output/${ASCEND_DEVICE_ID} - +sed -i "40s#1000#1#g" config.py # 训练开始时间记录,不需要修改 start_time=$(date +%s) ########################################################## -#########第3行 至 90行,请一定不要、不要、不要修改########## -#########第3行 至 90行,请一定不要、不要、不要修改########## -#########第3行 至 90行,请一定不要、不要、不要修改########## +#########第3行 至 100行,请一定不要、不要、不要修改########## +#########第3行 至 100行,请一定不要、不要、不要修改########## +#########第3行 至 100行,请一定不要、不要、不要修改########## ########################################################## #========================================================= @@ -97,16 +108,22 @@ start_time=$(date +%s) # 您的训练数据集在${data_path}路径下,请直接使用这个变量获取 # 您的训练输出目录在${output_path}路径下,请直接使用这个变量获取 # 您的其他基础参数,可以自定义增加,但是batch_size请保留,并且设置正确的值 -train_epochs=1 batch_size=1 -print_log="./test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log" -python3.7 ./train.py --data_path=${data_path} --output_path=${output_path} 1>${print_log} 2>&1 -StepTime=`((cat ${print_log} | grep "time" | head -n 1) && (cat ${print_log} | grep "time" | tail -n 1)) | awk -F ':' '{print $5 $6 }' | awk -F ',' '{print $1 $2}' | awk -F ' ' '{print $1;print $3}' | awk '{if (NR == 1){a=$1} else if (NR == 2){b=$1} else if (NR == 3){c=$1} else if (NR == 4){d=$1}} END {print (d-b)/(c-a)}'` + +if [ x"${modelarts_flag}" != x ]; +then + python3 ./train.py +else + python3.7 ./train.py --data_dir=${data_path}/dataset/DIV2K_train_HR --output_path=${output_path} 1>${print_log} 2>&1 +fi + +# 性能相关数据计算 +StepTime=`((cat ${print_log} | grep "time" | tail -n -5 | head -n 1) && (cat ${print_log} | grep "time" | tail -n 1)) | awk -F ':' '{print $5 $6 }' | awk -F ',' '{print $1 $2}' | awk -F ' ' '{print $1;print $3}' | awk '{if (NR == 1){a=$1} else if (NR == 2){b=$1} else if (NR == 3){c=$1} else if (NR == 4){d=$1}} END {print (d-b)/(c-a)}'` FPS=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'/'${StepTime}'}'` #PSNR值计算 -PSNR=`cat ${print_log} | grep "time" | tail -n 10 | awk -F ',' '{print $8}' | awk -F ':' '{sum+=$2} END {print sum/NR}'` +train_accuracy=`grep "PSNR:" ${print_log} | awk -F "PSNR:" '{print $2}' | awk -F "," '{print $1}' | awk '{sum+=$1} END {print sum/NR}'` # 提取所有loss打印信息 -grep "loss:" ${print_log} | awk -F "," '{print $6}' > ./test/output/${ASCEND_DEVICE_ID}/my_output_loss.txt +grep "l2_loss:" ${print_log} | awk -F "l2_loss:" '{print $2}' | awk -F ', PSNR:' '{print $1}' > ./test/output/${ASCEND_DEVICE_ID}/my_output_loss.txt ########################################################### @@ -115,6 +132,20 @@ grep "loss:" ${print_log} | awk -F "," '{print $6}' > ./test/output/${ASCEND_DE #########后面的所有内容请不要修改########################### ########################################################### +# 判断本次执行是否正确使用Ascend NPU +tf_flag=`echo ${Network} | grep TensorFlow | wc -l` +use_npu_flag=`grep "The model has been compiled on the Ascend AI processor" ${print_log} | wc -l` +if [ x"${use_npu_flag}" == x0 -a x"${tf_flag}" == x1 ]; +then + echo "------------------ ERROR NOTICE START ------------------" + echo "ERROR, your task haven't used Ascend NPU, please check your npu Migration." + echo "------------------ ERROR NOTICE END------------------" +else + echo "------------------ INFO NOTICE START------------------" + echo "INFO, your task have used Ascend NPU, please check your result." + echo "------------------ INFO NOTICE END------------------" +fi + # 获取最终的casename,请保留,case文件名为${CaseName} get_casename @@ -133,10 +164,8 @@ echo "------------------ Final result ------------------" echo "Final Performance images/sec : $FPS" echo "Final Performance sec/step : $StepTime" echo "E2E Training Duration sec : $e2e_time" -echo "PSNR : $PSNR" # 输出训练精度 -#echo "Final Train Accuracy : ${train_accuracy}" - +echo "Final Train Accuracy : ${train_accuracy}" # 最后一个迭代loss值,不需要修改 ActualLoss=(`awk 'END {print $NF}' ./test/output/${ASCEND_DEVICE_ID}/${CaseName}_loss.txt`) @@ -149,4 +178,5 @@ echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}. echo "ActualFPS = ${FPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "TrainingTime = ${StepTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file -- Gitee From 67be009ed110b9f398eb3b4ac382f4d65931b222 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E6=B4=8B=E6=B4=8B?= <584244991@qq.com> Date: Mon, 12 Dec 2022 11:27:27 +0000 Subject: [PATCH 3/4] update TensorFlow/contrib/cv/SRFBN_for_TensorFlow/train.py. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 张洋洋 <584244991@qq.com> --- TensorFlow/contrib/cv/SRFBN_for_TensorFlow/train.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/TensorFlow/contrib/cv/SRFBN_for_TensorFlow/train.py b/TensorFlow/contrib/cv/SRFBN_for_TensorFlow/train.py index 46f8808e3..eb89452f1 100644 --- a/TensorFlow/contrib/cv/SRFBN_for_TensorFlow/train.py +++ b/TensorFlow/contrib/cv/SRFBN_for_TensorFlow/train.py @@ -34,6 +34,13 @@ import time from PreProcess import * from skimage.metrics import peak_signal_noise_ratio as comparepsnr from skimage.metrics import _structural_similarity +import os +import argparse + +parser = argparse.ArgumentParser() +parser.add_argument('--output_path', type=str, default="./test/output_path") +parser.add_argument("--data_dir", type=str, default="./Resolution_2K/DIV2K/DIV2K_train_HR") +args = parser.parse_args() def train_SRFBN(dataset, sess, cfg): # start put data in queue @@ -132,7 +139,6 @@ def train(*args, **kwargs): imgs = [os.path.join(data_dir,data) for data in os.listdir(data_dir)] - sess = tf.compat.v1.Session(config=npu_config_proto()) ## build NetWork @@ -144,7 +150,4 @@ def train(*args, **kwargs): if __name__ == '__main__': - import os - data_dir = "/home/TestUser08/BUAA/output_npu_20221021153629/SRFBN-tensorflow_npu_20221021153629/Resolution_2K/DIV2K/DIV2K_train_HR" - train(data_dir=data_dir) - + train(data_dir=args.data_dir) \ No newline at end of file -- Gitee From 554ade910d79f650b63231c0086ca057f343fbf1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E6=B4=8B=E6=B4=8B?= <584244991@qq.com> Date: Mon, 12 Dec 2022 11:28:26 +0000 Subject: [PATCH 4/4] update TensorFlow/contrib/cv/SRFBN_for_TensorFlow/test/train_full_1p.sh. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 张洋洋 <584244991@qq.com> --- .../contrib/cv/SRFBN_for_TensorFlow/test/train_full_1p.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TensorFlow/contrib/cv/SRFBN_for_TensorFlow/test/train_full_1p.sh b/TensorFlow/contrib/cv/SRFBN_for_TensorFlow/test/train_full_1p.sh index 7cd68cd4a..defa49885 100644 --- a/TensorFlow/contrib/cv/SRFBN_for_TensorFlow/test/train_full_1p.sh +++ b/TensorFlow/contrib/cv/SRFBN_for_TensorFlow/test/train_full_1p.sh @@ -30,7 +30,7 @@ if [[ $1 == --help || $1 == -h ]];then --data_path # dataset of training --output_path # output of training --train_steps # max_step for training - --train_epochs # max_epoch for training + --train_epochs # max_epoch for training --batch_size # batch size -h/--help show help message " -- Gitee