From 07c1e78bd06acb776d2dcd6fe2219635e8649650 Mon Sep 17 00:00:00 2001
From: Zn <dongxueying6@h-partners.com>
Date: Tue, 10 May 2022 15:03:27 +0800
Subject: [PATCH 1/2] =?UTF-8?q?[=E8=87=AA=E7=A0=94][PyTorch]EfficientNet-B?=
 =?UTF-8?q?1=5FID1713=E6=A8=A1=E5=9E=8B=E8=AE=AD=E7=BB=83=E5=90=AF?=
 =?UTF-8?q?=E5=8A=A8=E8=84=9A=E6=9C=AC=E5=8F=AA=E7=95=99test=E4=B8=8Bshell?=
 =?UTF-8?q?=E8=84=9A=E6=9C=AC=EF=BC=8C=20=E5=85=B6=E4=BD=99=E5=88=A0?=
 =?UTF-8?q?=E9=99=A4=EF=BC=9B?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Zn <dongxueying6@h-partners.com>
---
 .../README.md                                 | 11 +--
 .../run_1p.sh                                 | 25 -----
 .../run_8p.sh                                 | 31 ------
 .../test/train_full_1p.sh                     | 95 ++++++++++---------
 .../test/train_full_8p.sh                     | 11 ++-
 .../test/train_performance_1p.sh              | 70 +++++++++-----
 .../test/train_performance_8p.sh              | 12 ++-
 7 files changed, 118 insertions(+), 137 deletions(-)
 delete mode 100644 PyTorch/built-in/cv/classification/EfficientNet-B1_ID1713_for_PyTorch/run_1p.sh
 delete mode 100644 PyTorch/built-in/cv/classification/EfficientNet-B1_ID1713_for_PyTorch/run_8p.sh

diff --git a/PyTorch/built-in/cv/classification/EfficientNet-B1_ID1713_for_PyTorch/README.md b/PyTorch/built-in/cv/classification/EfficientNet-B1_ID1713_for_PyTorch/README.md
index c96358198e..19fbe569a6 100644
--- a/PyTorch/built-in/cv/classification/EfficientNet-B1_ID1713_for_PyTorch/README.md
+++ b/PyTorch/built-in/cv/classification/EfficientNet-B1_ID1713_for_PyTorch/README.md
@@ -4,9 +4,9 @@
 
 ```
 	1.安装环境
-	2.修改run_1p.sh字段"data"为当前磁盘的数据集路径
+	2.修改train_performance_1p.sh字段"data"为当前磁盘的数据集路径
 	3.修改字段device_id（单卡训练所使用的device id），为训练配置device_id，比如device_id=0
-	4.cd到run_1p.sh文件的目录，执行bash run_1p.sh单卡脚本， 进行单卡训练
+	4.cd到train_performance_1p.sh文件的目录(也可直接在模型目录下)，执行bash train_performance_1p.sh单卡脚本， 进行单卡训练
 ```
 
 	
@@ -15,8 +15,8 @@
 ```
 	1.安装环境
 	2.修改多P脚本中字段"data"为当前磁盘的数据集路径
-	3.修改run_8p.sh字段"addr"为当前主机ip地址
-	4.cd到run_8p.sh文件的目录，执行bash run_8p.sh等多卡脚本， 进行多卡训练	
+	3.修改train_performance_8p.sh字段"addr"为当前主机ip地址
+	4.cd到train_performance_8p.sh文件的目录(也可直接在模型目录下)，执行bash train_performance_8p.sh等多卡脚本， 进行多卡训练	
 ```
 
 
@@ -37,8 +37,7 @@
 三、测试结果
     
 训练日志路径：在训练脚本的同目录下result文件夹里，如：
-
-        /home/Efficientnet/result/training_8p_job_20201121023601
+        /home/Efficientnet/test/output/0/train_0.log
         
 	
diff --git a/PyTorch/built-in/cv/classification/EfficientNet-B1_ID1713_for_PyTorch/run_1p.sh b/PyTorch/built-in/cv/classification/EfficientNet-B1_ID1713_for_PyTorch/run_1p.sh
deleted file mode 100644
index a7bec33616..0000000000
--- a/PyTorch/built-in/cv/classification/EfficientNet-B1_ID1713_for_PyTorch/run_1p.sh
+++ /dev/null
@@ -1,25 +0,0 @@
-#!/usr/bin/env bash
-source env_npu.sh
-
-/usr/local/Ascend/driver/tools/msnpureport -d 0 -g error
-
-currentDir=$(cd "$(dirname "$0")";pwd)
-currtime=`date +%Y%m%d%H%M%S`
-train_log_dir=${currentDir}/result/training_1p_job_${currtime}
-mkdir -p ${train_log_dir}
-cd ${train_log_dir}
-echo "train log path is ${train_log_dir}"
-
-python3.7 ${currentDir}/examples/imagenet/main.py \
-    --data=/data/imagenet \
-    --arch=efficientnet-b0 \
-    --batch-size=512 \
-    --lr=0.2 \
-    --momentum=0.9 \
-    --epochs=100 \
-    --autoaug \
-    --amp \
-    --pm=O1 \
-    --loss_scale=32 \
-    --val_feq=10 \
-    --npu=0 > ${train_log_dir}/train_1p.log 2>&1 &
\ No newline at end of file
diff --git a/PyTorch/built-in/cv/classification/EfficientNet-B1_ID1713_for_PyTorch/run_8p.sh b/PyTorch/built-in/cv/classification/EfficientNet-B1_ID1713_for_PyTorch/run_8p.sh
deleted file mode 100644
index 63e06c3410..0000000000
--- a/PyTorch/built-in/cv/classification/EfficientNet-B1_ID1713_for_PyTorch/run_8p.sh
+++ /dev/null
@@ -1,31 +0,0 @@
-#!/usr/bin/env bash
-source env_npu.sh
-
-/usr/local/Ascend/driver/tools/msnpureport -d 0 -g error
-/usr/local/Ascend/driver/tools/msnpureport -d 4 -g error
-
-currentDir=$(cd "$(dirname "$0")";pwd)
-currtime=`date +%Y%m%d%H%M%S`
-train_log_dir=${currentDir}/result/training_8p_job_${currtime}
-mkdir -p ${train_log_dir}
-cd ${train_log_dir}
-echo "train log path is ${train_log_dir}"
-
-python3.7 ${currentDir}/examples/imagenet/main.py \
-    --data=/data/imagenet \
-    --arch=efficientnet-b0 \
-    --batch-size=4096 \
-    --lr=1.6 \
-    --momentum=0.9 \
-    --epochs=100 \
-    --autoaug \
-    --amp \
-    --pm=O1 \
-    --loss_scale=32 \
-    --val_feq=10 \
-    --addr=$(hostname -I |awk '{print $1}') \
-    --dist-backend=hccl \
-    --multiprocessing-distributed \
-    --world-size 1 \
-    --rank 0 \
-    --device_list '0,1,2,3,4,5,6,7' > ${train_log_dir}/train_8p.log 2>&1 &
\ No newline at end of file
diff --git a/PyTorch/built-in/cv/classification/EfficientNet-B1_ID1713_for_PyTorch/test/train_full_1p.sh b/PyTorch/built-in/cv/classification/EfficientNet-B1_ID1713_for_PyTorch/test/train_full_1p.sh
index dedbfdfdd5..a6b3f11d1c 100644
--- a/PyTorch/built-in/cv/classification/EfficientNet-B1_ID1713_for_PyTorch/test/train_full_1p.sh
+++ b/PyTorch/built-in/cv/classification/EfficientNet-B1_ID1713_for_PyTorch/test/train_full_1p.sh
@@ -2,9 +2,10 @@
 
 #当前路径,不需要修改
 cur_path=`pwd`
+# 指定训练所使用的npu device卡id
+device_id=0
 
 #集合通信参数,不需要修改
-
 export RANK_SIZE=1
 export JOB_ID=10087
 RANK_ID_START=0
@@ -55,41 +56,42 @@ if [[ $1 == --help || $1 == -h ]];then
     exit 1
 fi
 
-#参数校验，不需要修改
-for para in $*
-do
-    if [[ $para == --precision_mode* ]];then
-        precision_mode=`echo ${para#*=}`
-    elif [[ $para == --over_dump* ]];then
-        over_dump=`echo ${para#*=}`
-        over_dump_path=${cur_path}/output/overflow_dump
-        mkdir -p ${over_dump_path}
-    elif [[ $para == --data_dump_flag* ]];then
-        data_dump_flag=`echo ${para#*=}`
-        data_dump_path=${cur_path}/output/data_dump
-        mkdir -p ${data_dump_path}
-    elif [[ $para == --data_dump_step* ]];then
-        data_dump_step=`echo ${para#*=}`
-    elif [[ $para == --profiling* ]];then
-        profiling=`echo ${para#*=}`
-        profiling_dump_path=${cur_path}/output/profiling
-        mkdir -p ${profiling_dump_path}
-    elif [[ $para == --data_path* ]];then
-        data_path=`echo ${para#*=}`
-    fi
-done
-
 #校验是否传入data_path,不需要修改
 if [[ $data_path == "" ]];then
     echo "[Error] para \"data_path\" must be confing"
     exit 1
 fi
+# 校验是否指定了device_id,分动态分配device_id与手动指定device_id,此处不需要修改
+if [ $ASCEND_DEVICE_ID ];then
+    echo "device id is ${ASCEND_DEVICE_ID}"
+elif [ ${device_id} ];then
+    export ASCEND_DEVICE_ID=${device_id}
+    echo "device id is ${ASCEND_DEVICE_ID}"
+else
+    "[Error] device id must be config"
+    exit 1
+fi
+
+#进入训练脚本目录，需要模型审视修改
+cur_path=`pwd`
+cur_path_last_dirname=${cur_path##*/}
+if [ x"${cur_path_last_dirname}" == x"test" ];then
+    test_path_dir=${cur_path}
+    cd ..
+    cur_path=`pwd`
+else
+    test_path_dir=${cur_path}/test
+fi
 
 #训练开始时间，不需要修改
 start_time=$(date +%s)
+# 非平台场景时source 环境变量
+check_etp_flag=`env | grep etp_running_flag`
+etp_flag=`echo ${check_etp_flag#*=}`
+if [ x"${etp_flag}" != x"true" ];then
+    source ${test_path_dir}/env_npu.sh
+fi
 
-#进入训练脚本目录，需要模型审视修改
-cd $cur_path/..
 for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++));
 do
     #设置环境变量，不需要修改
@@ -99,16 +101,16 @@ do
 
 
     #创建DeviceID输出目录，不需要修改
-    if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then
-        rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID}
-        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then
+        rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID}
+        mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID/ckpt
     else
-        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+        mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID/ckpt
     fi
 
     #执行训练脚本，以下传参不需要修改，其他需要模型审视修改
     #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path，--data_dump_flag，--data_dump_step，--data_dump_path，--profiling，--profiling_dump_path
-    python3 ${cur_path}/../examples/imagenet/main.py \
+    nohup python3.7 ${cur_path}/examples/imagenet/main.py \
         --data=${data_path} \
         --arch=efficientnet-b1 \
         --batch-size=${batch_size} \
@@ -120,7 +122,7 @@ do
         --pm=O1 \
         --loss_scale=32 \
         --val_feq=10 \
-        --npu=$ASCEND_DEVICE_ID > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+        --npu=$ASCEND_DEVICE_ID > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
 done 
 wait
 
@@ -131,12 +133,12 @@ e2e_time=$(( $end_time - $start_time ))
 #结果打印，不需要修改
 echo "------------------ Final result ------------------"
 #输出性能FPS，需要模型审视修改
-FPS=`grep -a 'FPS'  $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F " " '{print $NF}'|awk 'END {print}'`
+FPS=`grep -a 'FPS'  ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F " " '{print $NF}'|awk 'END {print}'`
 #打印，不需要修改
 echo "Final Performance images/sec : $FPS"
 
 #输出训练精度,需要模型审视修改
-train_accuracy=`grep -a '* Acc@1' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'`
+train_accuracy=`grep -a '* Acc@1' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'`
 #打印，不需要修改
 echo "Final Train Accuracy : ${train_accuracy}"
 echo "E2E Training Duration sec : $e2e_time"
@@ -154,19 +156,18 @@ ActualFPS=${FPS}
 TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'`
 
 #从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型审视
-grep Epoch: $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F "Loss" '{print $NF}' | awk -F " " '{print $1}' >> $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
+grep Epoch: ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F "Loss" '{print $NF}' | awk -F " " '{print $1}' >> $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
 
 #最后一个迭代loss值，不需要修改
-ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
+ActualLoss=`awk 'END {print}' ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
 
 #关键信息打印到${CaseName}.log中，不需要修改
-echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "TrainAcuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "Network = ${Network}" >  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
diff --git a/PyTorch/built-in/cv/classification/EfficientNet-B1_ID1713_for_PyTorch/test/train_full_8p.sh b/PyTorch/built-in/cv/classification/EfficientNet-B1_ID1713_for_PyTorch/test/train_full_8p.sh
index 78ca564ede..f4701d634b 100644
--- a/PyTorch/built-in/cv/classification/EfficientNet-B1_ID1713_for_PyTorch/test/train_full_8p.sh
+++ b/PyTorch/built-in/cv/classification/EfficientNet-B1_ID1713_for_PyTorch/test/train_full_8p.sh
@@ -98,9 +98,14 @@ fi
 ##################启动训练脚本##################
 #训练开始时间，不需要修改
 start_time=$(date +%s)
-# source 环境变量
-#source ${test_path_dir}/env.sh
-python3 ${test_path_dir}/../examples/imagenet/main.py \
+# 非平台场景时source 环境变量
+check_etp_flag=`env | grep etp_running_flag`
+etp_flag=`echo ${check_etp_flag#*=}`
+if [ x"${etp_flag}" != x"true" ];then
+    source ${test_path_dir}/env_npu.sh
+fi
+
+nohup python3.7 ${test_path_dir}/examples/imagenet/main.py \
         --data=${data_path} \
         --arch=efficientnet-b1 \
         --batch-size=${batch_size} \
diff --git a/PyTorch/built-in/cv/classification/EfficientNet-B1_ID1713_for_PyTorch/test/train_performance_1p.sh b/PyTorch/built-in/cv/classification/EfficientNet-B1_ID1713_for_PyTorch/test/train_performance_1p.sh
index 3257adb89e..9934573e46 100644
--- a/PyTorch/built-in/cv/classification/EfficientNet-B1_ID1713_for_PyTorch/test/train_performance_1p.sh
+++ b/PyTorch/built-in/cv/classification/EfficientNet-B1_ID1713_for_PyTorch/test/train_performance_1p.sh
@@ -2,9 +2,10 @@
 #source env_npu.sh
 #当前路径,不需要修改
 cur_path=`pwd`
+# 指定训练所使用的npu device卡id
+device_id=0
 
 #集合通信参数,不需要修改
-
 export RANK_SIZE=1
 export JOB_ID=10087
 RANK_ID_START=0
@@ -84,12 +85,37 @@ if [[ $data_path == "" ]];then
     echo "[Error] para \"data_path\" must be confing"
     exit 1
 fi
+# 校验是否指定了device_id,分动态分配device_id与手动指定device_id,此处不需要修改
+if [ $ASCEND_DEVICE_ID ];then
+    echo "device id is ${ASCEND_DEVICE_ID}"
+elif [ ${device_id} ];then
+    export ASCEND_DEVICE_ID=${device_id}
+    echo "device id is ${ASCEND_DEVICE_ID}"
+else
+    "[Error] device id must be config"
+    exit 1
+fi
+
+#进入训练脚本目录，需要模型审视修改
+cur_path=`pwd`
+cur_path_last_dirname=${cur_path##*/}
+if [ x"${cur_path_last_dirname}" == x"test" ];then
+    test_path_dir=${cur_path}
+    cd ..
+    cur_path=`pwd`
+else
+    test_path_dir=${cur_path}/test
+fi
 
 #训练开始时间，不需要修改
 start_time=$(date +%s)
+# 非平台场景时source 环境变量
+check_etp_flag=`env | grep etp_running_flag`
+etp_flag=`echo ${check_etp_flag#*=}`
+if [ x"${etp_flag}" != x"true" ];then
+    source ${test_path_dir}/env_npu.sh
+fi
 
-#进入训练脚本目录，需要模型审视修改
-cd $cur_path/..
 for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++));
 do
     #设置环境变量，不需要修改
@@ -99,16 +125,16 @@ do
 
 
     #创建DeviceID输出目录，不需要修改
-    if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then
-        rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID}
-        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then
+        rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID}
+        mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID/ckpt
     else
-        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+        mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID/ckpt
     fi
 
     #执行训练脚本，以下传参不需要修改，其他需要模型审视修改
     #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path，--data_dump_flag，--data_dump_step，--data_dump_path，--profiling，--profiling_dump_path
-    python3 ${cur_path}/../examples/imagenet/main.py \
+    nohup python3.7 ${cur_path}/examples/imagenet/main.py \
         --data=${data_path} \
         --arch=efficientnet-b1 \
         --batch-size=${batch_size} \
@@ -121,7 +147,7 @@ do
         --loss_scale=32 \
         --val_feq=10 \
 		--stop-step-num=128 \
-        --npu=$ASCEND_DEVICE_ID > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+        --npu=$ASCEND_DEVICE_ID > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
 done 
 wait
 
@@ -132,12 +158,12 @@ e2e_time=$(( $end_time - $start_time ))
 #结果打印，不需要修改
 echo "------------------ Final result ------------------"
 #输出性能FPS，需要模型审视修改
-FPS=`grep -a 'FPS'  $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F " " '{print $NF}'|awk 'END {print}'`
+FPS=`grep -a 'FPS'  ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F " " '{print $NF}'|awk 'END {print}'`
 #打印，不需要修改
 echo "Final Performance images/sec : $FPS"
 
 #输出训练精度,需要模型审视修改
-train_accuracy=`grep -a '* Acc@1' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'`
+train_accuracy=`grep -a '* Acc@1' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'`
 #打印，不需要修改
 echo "Final Train Accuracy : ${train_accuracy}"
 echo "E2E Training Duration sec : $e2e_time"
@@ -155,18 +181,18 @@ ActualFPS=${FPS}
 TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'`
 
 #从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型审视
-grep Epoch: $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F "Loss" '{print $NF}' | awk -F " " '{print $1}' >> $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
+grep Epoch: ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F "Loss" '{print $NF}' | awk -F " " '{print $1}' >> $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
 
 #最后一个迭代loss值，不需要修改
-ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
+ActualLoss=`awk 'END {print}' ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
 
 #关键信息打印到${CaseName}.log中，不需要修改
-echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "Network = ${Network}" >  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
diff --git a/PyTorch/built-in/cv/classification/EfficientNet-B1_ID1713_for_PyTorch/test/train_performance_8p.sh b/PyTorch/built-in/cv/classification/EfficientNet-B1_ID1713_for_PyTorch/test/train_performance_8p.sh
index 84cdda3c2d..b3ec488f67 100644
--- a/PyTorch/built-in/cv/classification/EfficientNet-B1_ID1713_for_PyTorch/test/train_performance_8p.sh
+++ b/PyTorch/built-in/cv/classification/EfficientNet-B1_ID1713_for_PyTorch/test/train_performance_8p.sh
@@ -106,9 +106,14 @@ fi
 ##################启动训练脚本##################
 #训练开始时间，不需要修改
 start_time=$(date +%s)
-# source 环境变量
-# source ${test_path_dir}/env.sh
-python3 ${test_path_dir}/../examples/imagenet/main.py \
+# 非平台场景时source 环境变量
+check_etp_flag=`env | grep etp_running_flag`
+etp_flag=`echo ${check_etp_flag#*=}`
+if [ x"${etp_flag}" != x"true" ];then
+    source ${test_path_dir}/env_npu.sh
+fi
+
+nohup python3.7 ${test_path_dir}/examples/imagenet/main.py \
         --data=${data_path} \
         --arch=efficientnet-b1 \
         --batch-size=${batch_size} \
@@ -175,6 +180,7 @@ echo "BatchSize = ${BatchSize}" >> $test_path_dir/output/$ASCEND_DEVICE_ID/${Cas
 echo "DeviceType = ${DeviceType}" >> $test_path_dir/output/$ASCEND_DEVICE_ID/${CaseName}.log
 echo "CaseName = ${CaseName}" >> $test_path_dir/output/$ASCEND_DEVICE_ID/${CaseName}.log
 echo "ActualFPS = ${ActualFPS}" >> $test_path_dir/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainAccuracy = ${train_accuracy}" >> $test_path_dir/output/$ASCEND_DEVICE_ID/${CaseName}.log
 echo "TrainingTime = ${TrainingTime}" >> $test_path_dir/output/$ASCEND_DEVICE_ID/${CaseName}.log
 echo "ActualLoss = ${ActualLoss}" >> $test_path_dir/output/$ASCEND_DEVICE_ID/${CaseName}.log
 echo "E2ETrainingTime = ${e2e_time}" >> $test_path_dir/output/$ASCEND_DEVICE_ID/${CaseName}.log
-- 
Gitee


From 9bb993bac48d9f992031974dd0629fa9254a6826 Mon Sep 17 00:00:00 2001
From: Zn <dongxueying6@h-partners.com>
Date: Thu, 12 May 2022 14:36:54 +0800
Subject: [PATCH 2/2] =?UTF-8?q?[=E8=87=AA=E7=A0=94][PyTorch]ResNet50=5Ffor?=
 =?UTF-8?q?=5FPyTorch=E6=A8=A1=E5=9E=8B=E8=AE=AD=E7=BB=83=E5=90=AF?=
 =?UTF-8?q?=E5=8A=A8=E8=84=9A=E6=9C=AC=E5=8F=AA=E7=95=99test=E4=B8=8Bshell?=
 =?UTF-8?q?=E8=84=9A=E6=9C=AC=EF=BC=8C=20=E5=85=B6=E4=BD=99=E5=88=A0?=
 =?UTF-8?q?=E9=99=A4=EF=BC=9B?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Zn <dongxueying6@h-partners.com>
---
 .../ResNet50_for_PyTorch/README.md            | 10 ++---
 .../ResNet50_for_PyTorch/run_1p.sh            | 31 -------------
 .../ResNet50_for_PyTorch/run_2p.sh            | 44 -------------------
 .../ResNet50_for_PyTorch/run_4p.sh            | 43 ------------------
 .../ResNet50_for_PyTorch/run_8p.sh            | 41 -----------------
 .../train_ID3071_ResNet50_performance_8p.sh   |  2 +-
 .../test/train_ID3071_performance_1p.sh       |  2 +-
 .../test/train_eval_1p.sh                     |  2 +-
 .../test/train_full_16p.sh                    |  2 +-
 .../test/train_full_1p.sh                     |  2 +-
 .../test/train_full_8p.sh                     |  2 +-
 .../test/train_performance_16p.sh             |  2 +-
 .../test/train_performance_1p.sh              |  2 +-
 .../test/train_performance_8p.sh              |  2 +-
 14 files changed, 14 insertions(+), 173 deletions(-)
 delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/run_1p.sh
 delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/run_2p.sh
 delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/run_4p.sh
 delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/run_8p.sh

diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/README.md b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/README.md
index 291e2e2c31..1de5116b9a 100644
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/README.md
+++ b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/README.md
@@ -13,9 +13,9 @@
 
 ```
 	1.安装环境
-	2.修改run_1p.sh字段"data"为当前磁盘的数据集路径
-	3.修改字段device_id（单卡训练所使用的device id），为训练配置device_id，比如device_id=0
-	4.cd到run_1p.sh文件的目录，执行bash run_1p.sh单卡脚本， 进行单卡训练
+	2.修改train_performance_1p.sh字段"data"为当前磁盘的数据集路径；
+	3.修改字段device_id（单卡训练所使用的device id），为训练配置device_id，比如device_id=0；
+	4.执行bash train_performance_1p.sh单卡脚本， 进行单卡训练；
 ```
 
 	
@@ -25,7 +25,7 @@
 	1.安装环境
 	2.修改多P脚本中字段"data"为当前磁盘的数据集路径
 	3.修改字段device_id_list（多卡训练所使用的device id列表），为训练配置device_id，比如4p,device_id_list=0,1,2,3；8P默认使用0，1，2，3，4，5，6，7卡不用配置
-	4.cd到run_8p.sh文件的目录，执行bash run_8p.sh等多卡脚本， 进行多卡训练	
+	4.执行bash train_performance_8p.sh等多卡脚本， 进行多卡训练;
 ```
 
 
@@ -48,6 +48,6 @@
     
 训练日志路径：在训练脚本的同目录下result文件夹里，如：
 
-        /home/ResNet50/result/training_8p_job_20201121023601
+        /home/ResNet50/test/output/device_id/training_8p_job_20201121023601
 	
 
diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/run_1p.sh b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/run_1p.sh
deleted file mode 100644
index e542152b60..0000000000
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/run_1p.sh
+++ /dev/null
@@ -1,31 +0,0 @@
-#!/usr/bin/env bash
-source env_npu.sh
-export WHICH_OP=GEOP
-export NEW_GE_FE_ID=1
-export GE_AICPU_FLAG=1
-
-device_id=0
-
-/usr/local/Ascend/driver/tools/msnpureport -d 0 -g error
-
-currentDir=$(cd "$(dirname "$0")";pwd)
-currtime=`date +%Y%m%d%H%M%S`
-train_log_dir=${currentDir}/result/training_1p_job_${currtime}
-mkdir -p ${train_log_dir}
-cd ${train_log_dir}
-echo "train log path is ${train_log_dir}"
-
-python3.7 ${currentDir}/pytorch_resnet50_apex.py \
-        --data /data/imagenet \
-        --npu ${device_id} \
-        -j64 \
-        -b512 \
-        --lr 0.2 \
-        --warmup 5 \
-        --label-smoothing=0.1 \
-        --epochs 90 \
-        --num_classes=1000 \
-        --optimizer-batch-size 512 > ./resnet50_1p.log 2>&1 &
-
-
-
diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/run_2p.sh b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/run_2p.sh
deleted file mode 100644
index 047849d5f8..0000000000
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/run_2p.sh
+++ /dev/null
@@ -1,44 +0,0 @@
-#!/usr/bin/env bash
-source env_npu.sh
-export WHICH_OP=GEOP
-export NEW_GE_FE_ID=1
-export GE_AICPU_FLAG=1
-
-ip=$(hostname -I |awk '{print $1}')
-device_id_list=0,1
-
-/usr/local/Ascend/driver/tools/msnpureport -d 0 -g error
-
-currentDir=$(cd "$(dirname "$0")";pwd)
-currtime=`date +%Y%m%d%H%M%S`
-train_log_dir=${currentDir}/result/training_2p_job_${currtime}
-mkdir -p ${train_log_dir}
-cd ${train_log_dir}
-echo "train log path is ${train_log_dir}"
-
-python3.7 ${currentDir}/DistributedResnet50/main_apex_d76_npu.py \
-        --data /data/imagenet \
-        --addr=$(hostname -I |awk '{print $1}') \
-        --seed=49 \
-        --workers=128 \
-        --learning-rate=0.4 \
-        --warmup=8 \
-        --label-smoothing=0.1 \
-        --mom=0.9 \
-        --weight-decay=1.0e-04 \
-        --static-loss-scale=128 \
-        --print-freq=1 \
-        --dist-url='tcp://127.0.0.1:50000' \
-        --dist-backend='hccl' \
-        --multiprocessing-distributed \
-        --world-size=1 \
-        --rank=0 \
-        --device-list=${device_id_list} \
-        --benchmark=0 \
-        --device='npu' \
-        --epochs=90 \
-        --num_classes=1000 \
-        --batch-size=1024 > ./resnet50_2p.log 2>&1 &
-
-
-
diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/run_4p.sh b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/run_4p.sh
deleted file mode 100644
index 2b29adfe64..0000000000
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/run_4p.sh
+++ /dev/null
@@ -1,43 +0,0 @@
-#!/usr/bin/env bash
-source env_npu.sh
-export WHICH_OP=GEOP
-export NEW_GE_FE_ID=1
-export GE_AICPU_FLAG=1
-
-ip=$(hostname -I |awk '{print $1}')
-device_id_list=0,1,2,3
-
-/usr/local/Ascend/driver/tools/msnpureport -d 0 -g error
-
-currentDir=$(cd "$(dirname "$0")";pwd)
-currtime=`date +%Y%m%d%H%M%S`
-train_log_dir=${currentDir}/result/training_4p_job_${currtime}
-mkdir -p ${train_log_dir}
-cd ${train_log_dir}
-echo "train log path is ${train_log_dir}"
-
-python3.7 ${currentDir}/DistributedResnet50/main_apex_d76_npu.py \
-        --data /data/imagenet \
-        --addr=$(hostname -I |awk '{print $1}') \
-        --seed=49 \
-        --workers=128 \
-        --learning-rate=0.8 \
-        --warmup=8 \
-        --label-smoothing=0.1 \
-        --mom=0.9 \
-        --weight-decay=1.0e-04 \
-        --static-loss-scale=128 \
-        --print-freq=1 \
-        --dist-url='tcp://127.0.0.1:50000' \
-        --dist-backend='hccl' \
-        --multiprocessing-distributed \
-        --world-size=1 \
-        --rank=0 \
-        --device-list=${device_id_list} \
-        --benchmark=0 \
-        --device='npu' \
-        --epochs=90 \
-        --num_classes=1000 \
-        --batch-size=2048 > ./resnet50_4p.log 2>&1 &
-
-
diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/run_8p.sh b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/run_8p.sh
deleted file mode 100644
index e3b0a5b523..0000000000
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/run_8p.sh
+++ /dev/null
@@ -1,41 +0,0 @@
-#!/usr/bin/env bash
-source env_npu.sh
-export WHICH_OP=GEOP
-export NEW_GE_FE_ID=1
-export GE_AICPU_FLAG=1
-
-/usr/local/Ascend/driver/tools/msnpureport -d 0 -g error
-/usr/local/Ascend/driver/tools/msnpureport -d 4 -g error
-
-ip=$(hostname -I |awk '{print $1}')
-currentDir=$(cd "$(dirname "$0")";pwd)
-currtime=`date +%Y%m%d%H%M%S`
-train_log_dir=${currentDir}/result/training_8p_job_${currtime}
-mkdir -p ${train_log_dir}
-cd ${train_log_dir}
-echo "train log path is ${train_log_dir}"
-
-python3.7 ${currentDir}/DistributedResnet50/main_apex_d76_npu.py \
-        --data /data/imagenet \
-        --addr=$(hostname -I |awk '{print $1}') \
-        --seed=49 \
-        --workers=128 \
-        --learning-rate=1.6 \
-        --warmup=8 \
-        --label-smoothing=0.1 \
-        --mom=0.9 \
-        --weight-decay=1.0e-04  \
-        --static-loss-scale=128 \
-        --print-freq=1 \
-        --dist-url='tcp://127.0.0.1:50000' \
-        --dist-backend='hccl' \
-        --multiprocessing-distributed \
-        --world-size=1 \
-        --rank=0 \
-        --benchmark=0 \
-        --device='npu' \
-        --epochs=90 \
-        --num_classes=1000 \
-        --batch-size=4096 > ./resnet50_8p.log 2>&1 &
-
-
diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_ID3071_ResNet50_performance_8p.sh b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_ID3071_ResNet50_performance_8p.sh
index 0013d69590..f8d134d5bd 100644
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_ID3071_ResNet50_performance_8p.sh
+++ b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_ID3071_ResNet50_performance_8p.sh
@@ -64,7 +64,7 @@ if [ x"${etp_flag}" != x"true" ];then
     source ${test_path_dir}/env_npu.sh
 fi
 
-python3.7 ./DistributedResnet50/main_apex_d76_npu.py \
+nohup python3.7 ./DistributedResnet50/main_apex_d76_npu.py \
         --data ${data_path} \
         --addr=$(hostname -I |awk '{print $1}') \
         --seed=49 \
diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_ID3071_performance_1p.sh b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_ID3071_performance_1p.sh
index f2f584cd46..d116b996ad 100644
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_ID3071_performance_1p.sh
+++ b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_ID3071_performance_1p.sh
@@ -81,7 +81,7 @@ if [ x"${etp_flag}" != x"true" ];then
     source ${test_path_dir}/env_npu.sh
 fi
 
-python3.7 ./pytorch_resnet50_apex.py \
+nohup python3.7 ./pytorch_resnet50_apex.py \
     --data ${data_path} \
     --npu ${ASCEND_DEVICE_ID} \
     -j ${workers} \
diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_eval_1p.sh b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_eval_1p.sh
index e89e5332b6..ea11306d36 100644
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_eval_1p.sh
+++ b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_eval_1p.sh
@@ -79,7 +79,7 @@ etp_flag=`echo ${check_etp_flag#*=}`
 if [ x"${etp_flag}" != x"true" ];then
     source ${test_path_dir}/env_npu.sh
 fi
-python3.7 ./pytorch_resnet50_apex.py \
+nohup python3.7 ./pytorch_resnet50_apex.py \
     --data ${data_path} \
     --npu ${ASCEND_DEVICE_ID} \
     -j ${workers} \
diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_full_16p.sh b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_full_16p.sh
index dccd7239c2..e355a2471f 100644
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_full_16p.sh
+++ b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_full_16p.sh
@@ -96,7 +96,7 @@ fi
 export NODE_RANK=${server_index}
 export NPU_WORLD_SIZE=`awk 'BEGIN{printf "%.0f\n",8*'${linux_num}'}'`
 
-python3.7 ./DistributedResnet50/main_apex_d76_npu.py \
+nohup python3.7 ./DistributedResnet50/main_apex_d76_npu.py \
         --data ${data_path} \
         --addr=$one_node_ip \
         --seed=49 \
diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_full_1p.sh b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_full_1p.sh
index 37fd0fd4b8..e754979b9f 100644
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_full_1p.sh
+++ b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_full_1p.sh
@@ -77,7 +77,7 @@ etp_flag=`echo ${check_etp_flag#*=}`
 if [ x"${etp_flag}" != x"true" ];then
     source ${test_path_dir}/env_npu.sh
 fi
-python3.7 ./pytorch_resnet50_apex.py \
+nohup python3.7 ./pytorch_resnet50_apex.py \
     --data ${data_path} \
     --npu ${ASCEND_DEVICE_ID} \
     -j ${workers} \
diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_full_8p.sh b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_full_8p.sh
index 1cc50890bd..490324f5e5 100644
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_full_8p.sh
+++ b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_full_8p.sh
@@ -79,7 +79,7 @@ if [ x"${etp_flag}" != x"true" ];then
     source ${test_path_dir}/env_npu.sh
 fi
 
-python3.7 ./DistributedResnet50/main_apex_d76_npu.py \
+nohup python3.7 ./DistributedResnet50/main_apex_d76_npu.py \
         --data ${data_path} \
         --addr=$(hostname -I |awk '{print $1}') \
         --seed=49 \
diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_performance_16p.sh b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_performance_16p.sh
index cea3033577..d047e2847e 100644
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_performance_16p.sh
+++ b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_performance_16p.sh
@@ -81,7 +81,7 @@ fi
 export NODE_RANK=${server_index}
 export NPU_WORLD_SIZE=`awk 'BEGIN{printf "%.0f\n",8*'${linux_num}'}'`
 
-python3.7 ./DistributedResnet50/main_apex_d76_npu.py \
+nohup python3.7 ./DistributedResnet50/main_apex_d76_npu.py \
         --data ${data_path} \
         --addr=$one_node_ip \
         --seed=49 \
diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_performance_1p.sh b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_performance_1p.sh
index 96226ecf32..7b20d0af05 100644
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_performance_1p.sh
+++ b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_performance_1p.sh
@@ -79,7 +79,7 @@ etp_flag=`echo ${check_etp_flag#*=}`
 if [ x"${etp_flag}" != x"true" ];then
     source ${test_path_dir}/env_npu.sh
 fi
-python3.7 ./pytorch_resnet50_apex.py \
+nohup python3.7 ./pytorch_resnet50_apex.py \
     --data ${data_path} \
     --npu ${ASCEND_DEVICE_ID} \
     -j ${workers} \
diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_performance_8p.sh b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_performance_8p.sh
index 84c999e576..d7969e07f3 100644
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_performance_8p.sh
+++ b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_performance_8p.sh
@@ -65,7 +65,7 @@ fi
 
 export NODE_RANK=0
 
-python3.7 ./DistributedResnet50/main_apex_d76_npu.py \
+nohup python3.7 ./DistributedResnet50/main_apex_d76_npu.py \
         --data ${data_path} \
         --addr=$(hostname -I |awk '{print $1}') \
         --seed=49 \
-- 
Gitee