diff --git a/TensorFlow/built-in/cv/image_classification/AlexNet_ID0072_for_TensorFlow/test/train_full_8p.sh b/TensorFlow/built-in/cv/image_classification/AlexNet_ID0072_for_TensorFlow/test/train_full_8p.sh index 71ba419cf1a0f053cf05dc514c6db53743c5c40d..e4f872acb18ec72f0ed0cfc4a4c139a1dcdb5a60 100644 --- a/TensorFlow/built-in/cv/image_classification/AlexNet_ID0072_for_TensorFlow/test/train_full_8p.sh +++ b/TensorFlow/built-in/cv/image_classification/AlexNet_ID0072_for_TensorFlow/test/train_full_8p.sh @@ -124,7 +124,7 @@ do if [ "x${bind_core}" != x ];then bind_core="taskset -c $a-$c" fi -python3.7 ${cur_path}/../train.py --rank_size=8 \ +nohup ${bind_core} python3.7 ${cur_path}/../train.py --rank_size=8 \ --epochs_between_evals=1 \ --mode=train \ --max_epochs=150 \ diff --git a/TensorFlow/built-in/cv/image_classification/DenseNet121_ID0067_for_TensorFlow/test/train_full_8p.sh b/TensorFlow/built-in/cv/image_classification/DenseNet121_ID0067_for_TensorFlow/test/train_full_8p.sh index f258be9af5015718ed3e3a82e6e078e97651918d..e1280f0f66668216e7770b12a15ece2615bae498 100644 --- a/TensorFlow/built-in/cv/image_classification/DenseNet121_ID0067_for_TensorFlow/test/train_full_8p.sh +++ b/TensorFlow/built-in/cv/image_classification/DenseNet121_ID0067_for_TensorFlow/test/train_full_8p.sh @@ -109,14 +109,20 @@ do mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt fi # 绑核,不需要的绑核的模型删除,需要的模型审视修改 + corenum=`cat /proc/cpuinfo |grep "processor"|wc -l` + let a=RANK_ID*${corenum}/${RANK_SIZE} + let b=RANK_ID+1 + let c=b*${corenum}/${RANK_SIZE}-1 #let a=RANK_ID*12 #let b=RANK_ID+1 #let c=b*12-1 #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path,--data_dump_flag,--data_dump_step,--data_dump_path,--profiling,--profiling_dump_path - - python3.7 ./train.py \ + if [ "x${bind_core}" != x ];then + bind_core="taskset -c $a-$c" + fi + nohup ${bind_core} python3.7 ./train.py \ --data_dir=${data_path} \ --rank_size=${RANK_SIZE} \ --iterations_per_loop=1000 \ diff --git a/TensorFlow/built-in/cv/image_classification/EfficientNet_B0_ID0009_for_TensorFlow/test/train_full_8p.sh b/TensorFlow/built-in/cv/image_classification/EfficientNet_B0_ID0009_for_TensorFlow/test/train_full_8p.sh index 77c6e25a6546443f9ff0a1b1011bd687946c3e51..e23728d377b1bde542b4110901b1e8114db47b95 100644 --- a/TensorFlow/built-in/cv/image_classification/EfficientNet_B0_ID0009_for_TensorFlow/test/train_full_8p.sh +++ b/TensorFlow/built-in/cv/image_classification/EfficientNet_B0_ID0009_for_TensorFlow/test/train_full_8p.sh @@ -137,7 +137,7 @@ do if [ "x${bind_core}" != x ];then bind_core="taskset -c $a-$c" fi - nohup python3.7 efficientnet/main_npu.py \ + nohup ${bind_core} python3.7 efficientnet/main_npu.py \ --data_dir=${data_path} \ --model_dir=${cur_path}/output/$ASCEND_DEVICE_ID/ckpt \ --mode=train_and_eval \ diff --git a/TensorFlow/built-in/cv/image_classification/GoogleNet_ID0051_for_TensorFlow/test/train_full_8p.sh b/TensorFlow/built-in/cv/image_classification/GoogleNet_ID0051_for_TensorFlow/test/train_full_8p.sh index f4506c7dc852c45bfdd40b167971412b078cebfc..12b149eaada512e24dd9465fbd5c6c4cb8695f06 100644 --- a/TensorFlow/built-in/cv/image_classification/GoogleNet_ID0051_for_TensorFlow/test/train_full_8p.sh +++ b/TensorFlow/built-in/cv/image_classification/GoogleNet_ID0051_for_TensorFlow/test/train_full_8p.sh @@ -83,6 +83,9 @@ do mkdir -p ${autotune_dump_path}/rl elif [[ $para == --data_path* ]];then data_path=`echo ${para#*=}` + elif [[ $para == --bind_core* ]]; then + bind_core=`echo ${para#*=}` + name_bind="_bindcore" fi done @@ -115,9 +118,18 @@ do else mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt fi + + # 绑核,不需要的绑核的模型删除,需要模型审视修改 + corenum=`cat /proc/cpuinfo |grep "processor"|wc -l` + let a=RANK_ID*${corenum}/${RANK_SIZE} + let b=RANK_ID+1 + let c=b*${corenum}/${RANK_SIZE}-1 #执行训练脚本,需要模型审视修改 - nohup python3.7 train.py \ + if [ "x${bind_core}" != x ];then + bind_core="taskset -c $a-$c" + fi + nohup ${bind_core} python3.7 train.py \ --rank_size=$RANK_SIZE \ --mode=train_and_evaluate \ --max_epochs=200 \ diff --git a/TensorFlow/built-in/cv/image_classification/InceptionV3_ID0491_for_TensorFlow/test/train_full_8p.sh b/TensorFlow/built-in/cv/image_classification/InceptionV3_ID0491_for_TensorFlow/test/train_full_8p.sh index efc2118f78c22fea52bb594dcd0f1eabe30c5531..a1f83e0683f74530356e7d6b6b45827c614ae379 100644 --- a/TensorFlow/built-in/cv/image_classification/InceptionV3_ID0491_for_TensorFlow/test/train_full_8p.sh +++ b/TensorFlow/built-in/cv/image_classification/InceptionV3_ID0491_for_TensorFlow/test/train_full_8p.sh @@ -78,6 +78,9 @@ do cp -rf $install_path/fwkacllib/data/rl/Ascend910/custom ${autotune_dump_path}/RL/ elif [[ $para == --data_path* ]];then data_path=`echo ${para#*=}` + elif [[ $para == --bind_core* ]]; then + bind_core=`echo ${para#*=}` + name_bind="_bindcore" fi done #校验是否传入data_path,不需要修改 @@ -107,9 +110,17 @@ do else mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt fi - + + # 绑核,不需要的绑核的模型删除,需要模型审视修改 + corenum=`cat /proc/cpuinfo |grep "processor"|wc -l` + let a=RANK_ID*${corenum}/${RANK_SIZE} + let b=RANK_ID+1 + let c=b*${corenum}/${RANK_SIZE}-1 #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 - python3 Incetpion_V3.py --dataset_dir=$data_path --epoch_num=$train_epochs --NPU_DEVICE_INDEX=$ASCEND_DEVICE_ID --npu_nums=8 > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & + if [ "x${bind_core}" != x ];then + bind_core="taskset -c $a-$c" + fi + nohup ${bind_core} python3 Incetpion_V3.py --dataset_dir=$data_path --epoch_num=$train_epochs --NPU_DEVICE_INDEX=$ASCEND_DEVICE_ID --npu_nums=8 > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & done wait diff --git a/TensorFlow/built-in/cv/image_classification/InceptionV4_ID0002_for_TensorFlow/test/train_full_8p.sh b/TensorFlow/built-in/cv/image_classification/InceptionV4_ID0002_for_TensorFlow/test/train_full_8p.sh index f99f547886f8f6235b34de7df95fb1e80e03e655..f39f15a6cc1d7f58c1606b3db9615bdc363ee805 100644 --- a/TensorFlow/built-in/cv/image_classification/InceptionV4_ID0002_for_TensorFlow/test/train_full_8p.sh +++ b/TensorFlow/built-in/cv/image_classification/InceptionV4_ID0002_for_TensorFlow/test/train_full_8p.sh @@ -85,6 +85,9 @@ do cp -rf $install_path/fwkacllib/data/rl/Ascend910/custom ${autotune_dump_path}/RL/ elif [[ $para == --data_path* ]];then data_path=`echo ${para#*=}` + elif [[ $para == --bind_core* ]]; then + bind_core=`echo ${para#*=}` + name_bind="_bindcore" fi done @@ -130,10 +133,18 @@ do fi + # 绑核,不需要的绑核的模型删除,需要模型审视修改 + corenum=`cat /proc/cpuinfo |grep "processor"|wc -l` + let a=RANK_ID*${corenum}/${RANK_SIZE} + let b=RANK_ID+1 + let c=b*${corenum}/${RANK_SIZE}-1 #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 + if [ "x${bind_core}" != x ];then + bind_core="taskset -c $a-$c" + fi #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path,--data_dump_flag,--data_dump_step,--data_dump_path,--profiling,--profiling_dump_path - python3.7 train.py --rank_size=8 \ + nohup ${bind_core} python3.7 train.py --rank_size=8 \ --mode=train_and_evaluate \ --max_epochs=$train_epochs \ --T_max=100 \ diff --git a/TensorFlow/built-in/cv/image_classification/MobileNetV2_ID0074_for_TensorFlow/test/train_full_8p.sh b/TensorFlow/built-in/cv/image_classification/MobileNetV2_ID0074_for_TensorFlow/test/train_full_8p.sh index 4eca347461ad6f910fb8f140594611af55bb2af2..d36ce3b7ddca899331d10ed83a6bee66b0b6843d 100644 --- a/TensorFlow/built-in/cv/image_classification/MobileNetV2_ID0074_for_TensorFlow/test/train_full_8p.sh +++ b/TensorFlow/built-in/cv/image_classification/MobileNetV2_ID0074_for_TensorFlow/test/train_full_8p.sh @@ -123,6 +123,7 @@ do DEVICE_INDEX=$DEVICE_ID export DEVICE_INDEX=${DEVICE_INDEX} + #创建DeviceID输出目录,不需要修改 if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID} @@ -134,17 +135,18 @@ do mkdir -p results/$ASCEND_DEVICE_ID sed -i 's|results|results/'$ASCEND_DEVICE_ID'|g' train.py - + + #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path,--data_dump_flag,--data_dump_step,--data_dump_path,--profiling,--profiling_dump_path - #corenum=`cat /proc/cpuinfo |grep 'processor' |wc -l` - #let a=RANK_ID*${corenum}/8 - #let b=RANK_ID+1 - #let c=b*${corenum}/8-1 - #if [ "x${bind_core}" != x ];then - # bind_core="taskset -c $a-$c" - #fi - python3.7 train.py \ + corenum=`cat /proc/cpuinfo |grep 'processor' |wc -l` + let a=RANK_ID*${corenum}/8 + let b=RANK_ID+1 + let c=b*${corenum}/8-1 + if [ "x${bind_core}" != x ];then + bind_core="taskset -c $a-$c" + fi + nohup ${bind_core} python3.7 train.py \ --dataset_dir=$data_path \ --max_epoch=$train_epochs \ --model_name="mobilenet_v2" \ diff --git a/TensorFlow/built-in/cv/image_classification/ResNet101_ID0063_for_TensorFlow/test/train_full_8p.sh b/TensorFlow/built-in/cv/image_classification/ResNet101_ID0063_for_TensorFlow/test/train_full_8p.sh index b341f39267b23d5e890922c6f11d0bbbc0222301..7b585c7ddb819c85e1d94669d2c4351ffb977de7 100644 --- a/TensorFlow/built-in/cv/image_classification/ResNet101_ID0063_for_TensorFlow/test/train_full_8p.sh +++ b/TensorFlow/built-in/cv/image_classification/ResNet101_ID0063_for_TensorFlow/test/train_full_8p.sh @@ -121,18 +121,18 @@ do mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt fi - # 绑核,不需要的绑核的模型删除,需要模型审视修改 - #corenum=`cat /proc/cpuinfo |grep "processor"|wc -l` - #let a=RANK_ID*${corenum}/${RANK_SIZE} - #let b=RANK_ID+1 - #let c=b*${corenum}/${RANK_SIZE}-1 + # 绑核,不需要的绑核的模型删除,需要模型审视修改 + corenum=`cat /proc/cpuinfo |grep "processor"|wc -l` + let a=RANK_ID*${corenum}/${RANK_SIZE} + let b=RANK_ID+1 + let c=b*${corenum}/${RANK_SIZE}-1 #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path,--data_dump_flag,--data_dump_step,--data_dump_path,--profiling,--profiling_dump_path - #if [ "x${bind_core}" != x ];then - # bind_core="taskset -c $a-$c" - #fi - python3.7 r1/resnet/imagenet_main.py \ + if [ "x${bind_core}" != x ];then + bind_core="taskset -c $a-$c" + fi + nohup ${bind_core} python3.7 r1/resnet/imagenet_main.py \ --resnet_size=101 \ --batch_size=${batch_size} \ --num_gpus=1 \ diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_full_8p.sh b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_full_8p.sh index 7f44f2c59ace7e60f8ce5179051d674e20c5b1ed..b385ad12ae12206edbdb1da0c37f91e372ea037a 100644 --- a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_full_8p.sh +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_full_8p.sh @@ -128,14 +128,14 @@ do #执行训练脚本,需要模型审视修改 corenum=`cat /proc/cpuinfo |grep 'processor' |wc -l` - #let a=RANK_ID*${corenum}/8 - #let b=RANK_ID+1 - #let c=b*${corenum}/8-1 - #if [ "x${bind_core}" != x ];then - # bind_core="taskset -c $a-$c" - #fi + let a=RANK_ID*${corenum}/8 + let b=RANK_ID+1 + let c=b*${corenum}/8-1 + if [ "x${bind_core}" != x ];then + bind_core="taskset -c $a-$c" + fi #--max_train_steps=$max_train_steps \ - python3.7 ${cur_path}/../src/mains/res50.py \ + nohup ${bind_core} python3.7 ${cur_path}/../src/mains/res50.py \ --config_file=$config_file \ --iterations_per_loop=$iterations_per_loop \ --debug=$debug \ diff --git a/TensorFlow/built-in/cv/image_classification/ResNext50_ID0070_for_TensorFlow/test/train_full_8p.sh b/TensorFlow/built-in/cv/image_classification/ResNext50_ID0070_for_TensorFlow/test/train_full_8p.sh index 10c84b674caf4b040bb269ee1c65c41b049e91e5..528ea03cf7f03661a1a3ce96852b8c50a68b2d7b 100644 --- a/TensorFlow/built-in/cv/image_classification/ResNext50_ID0070_for_TensorFlow/test/train_full_8p.sh +++ b/TensorFlow/built-in/cv/image_classification/ResNext50_ID0070_for_TensorFlow/test/train_full_8p.sh @@ -150,7 +150,7 @@ do if [ "x${bind_core}" != x ];then bind_core="taskset -c $a-$c" fi - ${bind_core} python3.7 res50.py \ + nohup ${bind_core} python3.7 res50.py \ --config_file=$config_file \ --max_train_steps=$max_train_steps \ --iterations_per_loop=$iterations_per_loop \ diff --git a/TensorFlow/built-in/cv/image_classification/Resnet50v1.5_ID1721_for_TensorFlow/test/train_full_8p.sh b/TensorFlow/built-in/cv/image_classification/Resnet50v1.5_ID1721_for_TensorFlow/test/train_full_8p.sh index 16ac5783db51b43409f2b2f3aab039194ac2ccd5..b01f752e28afc74636498c71b8cd13ddb54b577a 100644 --- a/TensorFlow/built-in/cv/image_classification/Resnet50v1.5_ID1721_for_TensorFlow/test/train_full_8p.sh +++ b/TensorFlow/built-in/cv/image_classification/Resnet50v1.5_ID1721_for_TensorFlow/test/train_full_8p.sh @@ -76,6 +76,9 @@ do cp -rf $install_path/fwkacllib/data/rl/Ascend910/custom ${autotune_dump_path}/RL/ elif [[ $para == --data_path* ]];then data_path=`echo ${para#*=}` + elif [[ $para == --bind_core* ]]; then + bind_core=`echo ${para#*=}` + name_bind="_bindcore" fi done @@ -112,11 +115,19 @@ do else mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt fi + + # 绑核,不需要的绑核的模型删除,需要模型审视修改 + corenum=`cat /proc/cpuinfo |grep "processor"|wc -l` + let a=RANK_ID*${corenum}/${RANK_SIZE} + let b=RANK_ID+1 + let c=b*${corenum}/${RANK_SIZE}-1 #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path,--data_dump_flag,--data_dump_step,--data_dump_path,--profiling,--profiling_dump_path,--autotune - - nohup python3 imagenet_main.py \ + if [ "x${bind_core}" != x ];then + bind_core="taskset -c $a-$c" + fi + nohup ${bind_core} python3 imagenet_main.py \ --resnet_size=50 \ --resnet_version=1 \ --max_train_steps=$train_steps \ diff --git a/TensorFlow/built-in/cv/image_classification/VGG16_ID0068_for_TensorFlow/test/train_full_8p.sh b/TensorFlow/built-in/cv/image_classification/VGG16_ID0068_for_TensorFlow/test/train_full_8p.sh index 31afa627e6bb6cf9a5463b9576491e16fbb38eb9..fd7d65e81c12568680462c38bfb7c3a536011262 100644 --- a/TensorFlow/built-in/cv/image_classification/VGG16_ID0068_for_TensorFlow/test/train_full_8p.sh +++ b/TensorFlow/built-in/cv/image_classification/VGG16_ID0068_for_TensorFlow/test/train_full_8p.sh @@ -118,11 +118,18 @@ do mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt fi + # 绑核,不需要的绑核的模型删除,需要模型审视修改 + corenum=`cat /proc/cpuinfo |grep "processor"|wc -l` + let a=RANK_ID*${corenum}/${RANK_SIZE} + let b=RANK_ID+1 + let c=b*${corenum}/${RANK_SIZE}-1 #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path,--data_dump_flag,--data_dump_step,--data_dump_path,--profiling,--profiling_dump_path - - python3.7 $cur_path/../train.py \ + if [ "x${bind_core}" != x ];then + bind_core="taskset -c $a-$c" + fi + nohup ${bind_core} python3.7 $cur_path/../train.py \ --rank_size=8 \ --mode=train_and_evaluate \ --max_epochs=150 \