From 4bbca5e45613464ac237b660fcda57b5e4c067a6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=80=9CBailang=E2=80=9D?= <“bailang12@h-partners.com”>
Date: Sat, 16 Apr 2022 16:19:00 +0800
Subject: [PATCH] =?UTF-8?q?[=E4=BC=97=E6=99=BA][PyTorchInceptionV3=5FID159?=
 =?UTF-8?q?6=5Ffor=5FPyTorch=E6=A8=A1=E5=9E=8B=E7=BB=91=E6=A0=B8=E5=A4=84?=
 =?UTF-8?q?=E7=90=86?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: “Bailang” <“bailang12@h-partners.com”>
---
 .../test/train_full_8p.sh                     | 77 +++++++++++++------
 .../test/train_performance_8p.sh              | 73 +++++++++++++-----
 .../FCN8s/test/train_full_8p.sh               |  1 +
 .../FCN8s/test/train_performance_8p.sh        |  1 +
 4 files changed, 108 insertions(+), 44 deletions(-)

diff --git a/PyTorch/contrib/cv/classification/InceptionV3_ID1596_for_PyTorch/test/train_full_8p.sh b/PyTorch/contrib/cv/classification/InceptionV3_ID1596_for_PyTorch/test/train_full_8p.sh
index e80116c341..1c846c0576 100644
--- a/PyTorch/contrib/cv/classification/InceptionV3_ID1596_for_PyTorch/test/train_full_8p.sh
+++ b/PyTorch/contrib/cv/classification/InceptionV3_ID1596_for_PyTorch/test/train_full_8p.sh
@@ -66,29 +66,60 @@ if [ x"${etp_flag}" != x"true" ];then
     source ${test_path_dir}/env_npu.sh
 fi
 
-python3 ./main-8p.py \
-    -a inception_v3 \
-    --amp \
-    --loss-scale 128 \
-    --data ${data_path} \
-    --addr=$(hostname -I |awk '{print $1}') \
-    --seed=49 \
-    --workers=128 \
-    --learning-rate=${learning_rate} \
-    --mom=0.9 \
-    --weight-decay=1.0e-04  \
-    --print-freq=30 \
-    --dist-url='tcp://127.0.0.1:50000' \
-    --dist-backend='hccl' \
-    --multiprocessing-distributed \
-    --world-size=1 \
-    --rank=0 \
-    --device='npu' \
-    --epochs=${train_epochs} \
-    --checkpoint-freq=-1 \
-    --label-smoothing=0.1 \
-    --batch-size=${batch_size} > ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 &
-
+KERNEL_NUM=$(($(nproc)/8))
+for i in $(seq 0 7)
+do
+  if [ $(uname -m) = "aarch64" ]
+  then
+    PID_START=$((KERNEL_NUM*i))
+    PID_END=$((PID_START + KERNEL_NUM - 1))
+    taskset -c $PID_START-$PID_END python3 ./main-8p.py \
+      -a inception_v3 \
+      --amp \
+      --loss-scale 128 \
+      --data ${data_path} \
+      --addr=$(hostname -I |awk '{print $1}') \
+      --seed=49 \
+      --workers=128 \
+      --learning-rate=${learning_rate} \
+      --mom=0.9 \
+      --weight-decay=1.0e-04  \
+      --print-freq=30 \
+      --dist-url='tcp://127.0.0.1:50000' \
+      --dist-backend='hccl' \
+      --multiprocessing-distributed \
+      --world-size=1 \
+      --rank=0 \
+      --device='npu' \
+      --epochs=${train_epochs} \
+      --checkpoint-freq=-1 \
+      --label-smoothing=0.1 \
+      --batch-size=${batch_size} > ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 &
+  else
+    python3 ./main-8p.py \
+      -a inception_v3 \
+      --amp \
+      --loss-scale 128 \
+      --data ${data_path} \
+      --addr=$(hostname -I |awk '{print $1}') \
+      --seed=49 \
+      --workers=128 \
+      --learning-rate=${learning_rate} \
+      --mom=0.9 \
+      --weight-decay=1.0e-04  \
+      --print-freq=30 \
+      --dist-url='tcp://127.0.0.1:50000' \
+      --dist-backend='hccl' \
+      --multiprocessing-distributed \
+      --world-size=1 \
+      --rank=0 \
+      --device='npu' \
+      --epochs=${train_epochs} \
+      --checkpoint-freq=-1 \
+      --label-smoothing=0.1 \
+      --batch-size=${batch_size} > ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 &
+  fi
+done
 wait
 
 # 训练结束时间，不需要修改
diff --git a/PyTorch/contrib/cv/classification/InceptionV3_ID1596_for_PyTorch/test/train_performance_8p.sh b/PyTorch/contrib/cv/classification/InceptionV3_ID1596_for_PyTorch/test/train_performance_8p.sh
index 3d1080d971..b59f642204 100644
--- a/PyTorch/contrib/cv/classification/InceptionV3_ID1596_for_PyTorch/test/train_performance_8p.sh
+++ b/PyTorch/contrib/cv/classification/InceptionV3_ID1596_for_PyTorch/test/train_performance_8p.sh
@@ -66,27 +66,58 @@ if [ x"${etp_flag}" != x"true" ];then
     source ${test_path_dir}/env_npu.sh
 fi
 
-python3 ./main-8p.py \
-    -a inception_v3 \
-    --amp \
-    --loss-scale 128 \
-    --data ${data_path} \
-    --addr=$(hostname -I |awk '{print $1}') \
-    --seed=49 \
-    --workers=128 \
-    --learning-rate=${learning_rate} \
-    --mom=0.9 \
-    --weight-decay=1.0e-04  \
-    --print-freq=30 \
-    --dist-url='tcp://127.0.0.1:50000' \
-    --dist-backend='hccl' \
-    --multiprocessing-distributed \
-    --world-size=1 \
-    --rank=0 \
-    --device='npu' \
-    --epochs=${train_epochs} \
-    --label-smoothing=0.1 \
-    --batch-size=${batch_size} > ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 &
+KERNEL_NUM=$(($(nproc)/8))
+for i in $(seq 0 7)
+do
+  if [ $(uname -m) = "aarch64" ]
+  then
+    PID_START=$((KERNEL_NUM*i))
+    PID_END=$((PID_START + KERNEL_NUM - 1))
+    taskset -c $PID_START-$PID_END python3 ./main-8p.py \
+      -a inception_v3 \
+      --amp \
+      --loss-scale 128 \
+      --data ${data_path} \
+      --addr=$(hostname -I |awk '{print $1}') \
+      --seed=49 \
+      --workers=128 \
+      --learning-rate=${learning_rate} \
+      --mom=0.9 \
+      --weight-decay=1.0e-04  \
+      --print-freq=30 \
+      --dist-url='tcp://127.0.0.1:50000' \
+      --dist-backend='hccl' \
+      --multiprocessing-distributed \
+      --world-size=1 \
+      --rank=0 \
+      --device='npu' \
+      --epochs=${train_epochs} \
+      --label-smoothing=0.1 \
+      --batch-size=${batch_size} > ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 &
+  else
+    python3 ./main-8p.py \
+      -a inception_v3 \
+      --amp \
+      --loss-scale 128 \
+      --data ${data_path} \
+      --addr=$(hostname -I |awk '{print $1}') \
+      --seed=49 \
+      --workers=128 \
+      --learning-rate=${learning_rate} \
+      --mom=0.9 \
+      --weight-decay=1.0e-04  \
+      --print-freq=30 \
+      --dist-url='tcp://127.0.0.1:50000' \
+      --dist-backend='hccl' \
+      --multiprocessing-distributed \
+      --world-size=1 \
+      --rank=0 \
+      --device='npu' \
+      --epochs=${train_epochs} \
+      --label-smoothing=0.1 \
+      --batch-size=${batch_size} > ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 &
+  fi
+done
 wait
 
 # 训练结束时间，不需要修改
diff --git a/PyTorch/contrib/cv/semantic_segmentation/FCN8s/test/train_full_8p.sh b/PyTorch/contrib/cv/semantic_segmentation/FCN8s/test/train_full_8p.sh
index 4a924972ff..bc86be9a23 100644
--- a/PyTorch/contrib/cv/semantic_segmentation/FCN8s/test/train_full_8p.sh
+++ b/PyTorch/contrib/cv/semantic_segmentation/FCN8s/test/train_full_8p.sh
@@ -81,6 +81,7 @@ echo "end_time: ${end_time}"
 e2e_time=$(( $end_time - $start_time ))
 
 FPS=`grep -a 'fps: '  $cur_path/output/${Network}/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F "fps: " '{print $NF}'|awk -F "," '{print $1}'|awk 'NR==1{max=$1;next}{max=max>$1?max:$1}END{print max}'`
+FPS='${FPS}'*8
 echo "Final Performance images/sec : $FPS"
 
 #输出训练精度,需要模型审视修改
diff --git a/PyTorch/contrib/cv/semantic_segmentation/FCN8s/test/train_performance_8p.sh b/PyTorch/contrib/cv/semantic_segmentation/FCN8s/test/train_performance_8p.sh
index 900e7652b8..ee843b84b7 100644
--- a/PyTorch/contrib/cv/semantic_segmentation/FCN8s/test/train_performance_8p.sh
+++ b/PyTorch/contrib/cv/semantic_segmentation/FCN8s/test/train_performance_8p.sh
@@ -81,6 +81,7 @@ echo "end_time: ${end_time}"
 e2e_time=$(( $end_time - $start_time ))
 
 FPS=`grep -a 'fps: '  $cur_path/output/${Network}/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F "fps: " '{print $NF}'|awk -F "," '{print $1}'|awk 'NR==1{max=$1;next}{max=max>$1?max:$1}END{print max}'`
+FPS='${FPS}'*8
 echo "Final Performance images/sec : $FPS"
 
 #输出训练精度,需要模型审视修改
-- 
Gitee