diff --git a/PyTorch/contrib/cv/detection/CenterFace/test/train_full_8p.sh b/PyTorch/contrib/cv/detection/CenterFace/test/train_full_8p.sh index 9be4b6cd61040c997c99f318f2d8e3a0e345d40e..f5e213d6cc58c35570d241ce88b86eeb6e5732c9 100644 --- a/PyTorch/contrib/cv/detection/CenterFace/test/train_full_8p.sh +++ b/PyTorch/contrib/cv/detection/CenterFace/test/train_full_8p.sh @@ -87,7 +87,26 @@ wait #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 cd $cur_path/src -python3 -m torch.distributed.launch --nproc_per_node=8 main.py $PREC --device_list='0,1,2,3,4,5,6,7' --world_size=8 --batch_size=$batch_size --lr=2.5e-3 --lr_step='85,120' --num_epochs=160 --distributed_launch > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & +RANK_ID_START=0 + +for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)) +do + echo ${RANK_ID} + KERNEL_NUM=$(($(nproc)/8)) + PID_START=$((KERNEL_NUM * RANK_ID)) + PID_END=$((PID_START + KERNEL_NUM - 1)) + taskset -c $PID_START-$PID_END python3 main.py $PREC \ + --device_list='0,1,2,3,4,5,6,7' \ + --world_size=8 \ + --batch_size=$batch_size \ + --local_rank ${RANK_ID} \ + --lr=2.5e-3 \ + --lr_step='85,120' \ + --num_epochs=160 \ + ${fp32} \ + ${hf32} \ + --distributed_launch > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & +done wait python3 test_wider_face.py >> ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & diff --git a/PyTorch/contrib/cv/detection/SOLOv2/test/train_ID4115_solov2_RT2_full_1p.sh b/PyTorch/contrib/cv/detection/SOLOv2/test/train_ID4115_solov2_RT2_full_1p.sh index e9cb498b5d3f9c85c3b1244aaf8d3585398af298..61ed666b2f5d155fca1b318e69a26c68c308510a 100644 --- a/PyTorch/contrib/cv/detection/SOLOv2/test/train_ID4115_solov2_RT2_full_1p.sh +++ b/PyTorch/contrib/cv/detection/SOLOv2/test/train_ID4115_solov2_RT2_full_1p.sh @@ -114,7 +114,7 @@ python3.7 tools/train.py configs/solov2/solov2_r50_fpn_8gpu_1x.py --opt-level $a --data_root=$data_path --gpu-ids $device_id --fps_lag $fps_lag --batch_size=$batch_size --interval=$interval --stop_step=$stop_step\ --start_step=$start_step --profiling=$profiling --rt2_bin=$rt2_bin > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & wait -python3.7 tools/test_ins.py configs/solov2/solov2_r50_fpn_8gpu_1x.py work_dirs/solov2_release_r50_fpn_8gpu_1x/latest.pth --show \ +python3.7 tools/test_ins.py configs/solov2/solov2_r50_fpn_8gpu_1x.py ${cur_path}/work_dirs/solov2_release_r50_fpn_8gpu_1x/epoch_${epochs}.pth --show \ --out results_solo.pkl --eval segm --data_root=$data_path >> ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & wait