From 6692c7a32c00e11aadcda7e0eef27bba691365f0 Mon Sep 17 00:00:00 2001 From: Zn Date: Fri, 20 May 2022 10:20:45 +0800 Subject: [PATCH] =?UTF-8?q?[=E8=87=AA=E7=A0=94][PyTorch]ResNet50=5Ffor=5FP?= =?UTF-8?q?yTorch=E6=A8=A1=E5=9E=8B=E8=AE=AD=E7=BB=83=E5=90=AF=E5=8A=A8?= =?UTF-8?q?=E8=84=9A=E6=9C=AC=E5=8F=AA=E7=95=99test=E4=B8=8Bshell=E8=84=9A?= =?UTF-8?q?=E6=9C=AC=EF=BC=8C=20=E5=85=B6=E4=BD=99=E5=88=A0=E9=99=A4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Zn --- .../DistributedResnet50/main_apex_d76_npu.py | 2 - .../ResNet50_for_PyTorch/README.md | 10 +-- .../ResNet50_for_PyTorch/env_npu.sh | 71 ------------------- .../pytorch_resnet50_apex.py | 2 - .../ResNet50_for_PyTorch/run_1p.sh | 31 -------- .../ResNet50_for_PyTorch/run_2p.sh | 44 ------------ .../ResNet50_for_PyTorch/run_4p.sh | 43 ----------- .../ResNet50_for_PyTorch/run_8p.sh | 41 ----------- .../train_ID3071_ResNet50_performance_8p.sh | 2 +- .../test/train_ID3071_performance_1p.sh | 2 +- .../test/train_eval_1p.sh | 2 +- .../test/train_full_16p.sh | 2 +- .../test/train_full_1p.sh | 2 +- .../test/train_full_8p.sh | 2 +- .../test/train_performance_16p.sh | 2 +- .../test/train_performance_1p.sh | 2 +- .../test/train_performance_8p.sh | 2 +- 17 files changed, 14 insertions(+), 248 deletions(-) delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/env_npu.sh delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/run_1p.sh delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/run_2p.sh delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/run_4p.sh delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/run_8p.sh diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/main_apex_d76_npu.py b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/main_apex_d76_npu.py index b036e66529..83a52095b5 100644 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/main_apex_d76_npu.py +++ b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/main_apex_d76_npu.py @@ -25,8 +25,6 @@ import time import warnings import torch -if torch.__version__ >= "1.8.1": - import torch_npu import torch.nn as nn import torch.nn.parallel import torch.backends.cudnn as cudnn diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/README.md b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/README.md index 291e2e2c31..1de5116b9a 100644 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/README.md +++ b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/README.md @@ -13,9 +13,9 @@ ``` 1.安装环境 - 2.修改run_1p.sh字段"data"为当前磁盘的数据集路径 - 3.修改字段device_id(单卡训练所使用的device id),为训练配置device_id,比如device_id=0 - 4.cd到run_1p.sh文件的目录,执行bash run_1p.sh单卡脚本, 进行单卡训练 + 2.修改train_performance_1p.sh字段"data"为当前磁盘的数据集路径; + 3.修改字段device_id(单卡训练所使用的device id),为训练配置device_id,比如device_id=0; + 4.执行bash train_performance_1p.sh单卡脚本, 进行单卡训练; ``` @@ -25,7 +25,7 @@ 1.安装环境 2.修改多P脚本中字段"data"为当前磁盘的数据集路径 3.修改字段device_id_list(多卡训练所使用的device id列表),为训练配置device_id,比如4p,device_id_list=0,1,2,3;8P默认使用0,1,2,3,4,5,6,7卡不用配置 - 4.cd到run_8p.sh文件的目录,执行bash run_8p.sh等多卡脚本, 进行多卡训练 + 4.执行bash train_performance_8p.sh等多卡脚本, 进行多卡训练; ``` @@ -48,6 +48,6 @@ 训练日志路径:在训练脚本的同目录下result文件夹里,如: - /home/ResNet50/result/training_8p_job_20201121023601 + /home/ResNet50/test/output/device_id/training_8p_job_20201121023601 diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/env_npu.sh b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/env_npu.sh deleted file mode 100644 index 84d83feb94..0000000000 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/env_npu.sh +++ /dev/null @@ -1,71 +0,0 @@ -#!/bin/bash -export install_path=/usr/local/Ascend - -if [ -d ${install_path}/toolkit ]; then - export LD_LIBRARY_PATH=/usr/include/hdf5/lib/:/usr/local/:/usr/local/lib/:/usr/lib/:${install_path}/fwkacllib/lib64/:${install_path}/driver/lib64/common/:${install_path}/driver/lib64/driver/:${install_path}/add-ons:${path_lib}:${LD_LIBRARY_PATH} - export PATH=${install_path}/fwkacllib/ccec_compiler/bin:${install_path}/fwkacllib/bin:$PATH - export PYTHONPATH=${install_path}/fwkacllib/python/site-packages:${install_path}/tfplugin/python/site-packages:${install_path}/toolkit/python/site-packages:$PYTHONPATH - export PYTHONPATH=/usr/local/python3.7.5/lib/python3.7/site-packages:$PYTHONPATH - export ASCEND_OPP_PATH=${install_path}/opp -else - if [ -d ${install_path}/nnae/latest ];then - export LD_LIBRARY_PATH=/usr/local/:/usr/local/python3.7.5/lib/:/usr/local/openblas/lib:/usr/local/lib/:/usr/lib64/:/usr/lib/:${install_path}/nnae/latest/fwkacllib/lib64/:${install_path}/driver/lib64/common/:${install_path}/driver/lib64/driver/:${install_path}/add-ons/:/usr/lib/aarch64_64-linux-gnu:$LD_LIBRARY_PATH - export PATH=$PATH:${install_path}/nnae/latest/fwkacllib/ccec_compiler/bin/:${install_path}/nnae/latest/toolkit/tools/ide_daemon/bin/ - export ASCEND_OPP_PATH=${install_path}/nnae/latest/opp/ - export OPTION_EXEC_EXTERN_PLUGIN_PATH=${install_path}/nnae/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:${install_path}/nnae/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:${install_path}/nnae/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so - export PYTHONPATH=${install_path}/nnae/latest/fwkacllib/python/site-packages/:${install_path}/nnae/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:${install_path}/nnae/latest/fwkacllib/python/site-packages/schedule_search.egg:$PYTHONPATH - export ASCEND_AICPU_PATH=${install_path}/nnae/latest - else - export LD_LIBRARY_PATH=/usr/local/:/usr/local/lib/:/usr/lib64/:/usr/lib/:/usr/local/python3.7.5/lib/:/usr/local/openblas/lib:${install_path}/ascend-toolkit/latest/fwkacllib/lib64/:${install_path}/driver/lib64/common/:${install_path}/driver/lib64/driver/:${install_path}/add-ons/:/usr/lib/aarch64-linux-gnu:$LD_LIBRARY_PATH - export PATH=$PATH:${install_path}/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin/:${install_path}/ascend-toolkit/latest/toolkit/tools/ide_daemon/bin/ - export ASCEND_OPP_PATH=${install_path}/ascend-toolkit/latest/opp/ - export OPTION_EXEC_EXTERN_PLUGIN_PATH=${install_path}/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:${install_path}/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:${install_path}/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so - export PYTHONPATH=${install_path}/ascend-toolkit/latest/fwkacllib/python/site-packages/:${install_path}/ascend-toolkit/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:${install_path}/ascend-toolkit/latest/fwkacllib/python/site-packages/schedule_search.egg:$PYTHONPATH - export ASCEND_AICPU_PATH=${install_path}/ascend-toolkit/latest - fi -fi - - -#将Host日志输出到串口,0-关闭/1-开启 -export ASCEND_SLOG_PRINT_TO_STDOUT=0 -#设置默认日志级别,0-debug/1-info/2-warning/3-error -export ASCEND_GLOBAL_LOG_LEVEL=3 -#设置Event日志开启标志,0-关闭/1-开启 -export ASCEND_GLOBAL_EVENT_ENABLE=0 -#设置是否开启taskque,0-关闭/1-开启 -export TASK_QUEUE_ENABLE=1 -#HCCL白名单开关,1-关闭/0-开启 -export HCCL_WHITELIST_DISABLE=1 - -#设置device侧日志登记为error -${install_path}/driver/tools/msnpureport -g error -d 0 -${install_path}/driver/tools/msnpureport -g error -d 1 -${install_path}/driver/tools/msnpureport -g error -d 2 -${install_path}/driver/tools/msnpureport -g error -d 3 -${install_path}/driver/tools/msnpureport -g error -d 4 -${install_path}/driver/tools/msnpureport -g error -d 5 -${install_path}/driver/tools/msnpureport -g error -d 6 -${install_path}/driver/tools/msnpureport -g error -d 7 -#关闭Device侧Event日志 -${install_path}/driver/tools/msnpureport -e disable - -path_lib=$(python3.7 -c """ -import sys -import re -result='' -for index in range(len(sys.path)): - match_sit = re.search('-packages', sys.path[index]) - if match_sit is not None: - match_lib = re.search('lib', sys.path[index]) - - if match_lib is not None: - end=match_lib.span()[1] - result += sys.path[index][0:end] + ':' - - result+=sys.path[index] + '/torch/lib:' -print(result)""" -) - -echo ${path_lib} - -export LD_LIBRARY_PATH=/usr/local/python3.7.5/lib/:${path_lib}:$LD_LIBRARY_PATH diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py index bd91d5be81..2932965ffd 100644 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py +++ b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py @@ -23,8 +23,6 @@ import math import numpy as np import torch -if torch.__version__ >= "1.8.1": - import torch_npu import torch.nn as nn import torch.nn.parallel import torch.backends.cudnn as cudnn diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/run_1p.sh b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/run_1p.sh deleted file mode 100644 index e542152b60..0000000000 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/run_1p.sh +++ /dev/null @@ -1,31 +0,0 @@ -#!/usr/bin/env bash -source env_npu.sh -export WHICH_OP=GEOP -export NEW_GE_FE_ID=1 -export GE_AICPU_FLAG=1 - -device_id=0 - -/usr/local/Ascend/driver/tools/msnpureport -d 0 -g error - -currentDir=$(cd "$(dirname "$0")";pwd) -currtime=`date +%Y%m%d%H%M%S` -train_log_dir=${currentDir}/result/training_1p_job_${currtime} -mkdir -p ${train_log_dir} -cd ${train_log_dir} -echo "train log path is ${train_log_dir}" - -python3.7 ${currentDir}/pytorch_resnet50_apex.py \ - --data /data/imagenet \ - --npu ${device_id} \ - -j64 \ - -b512 \ - --lr 0.2 \ - --warmup 5 \ - --label-smoothing=0.1 \ - --epochs 90 \ - --num_classes=1000 \ - --optimizer-batch-size 512 > ./resnet50_1p.log 2>&1 & - - - diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/run_2p.sh b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/run_2p.sh deleted file mode 100644 index 047849d5f8..0000000000 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/run_2p.sh +++ /dev/null @@ -1,44 +0,0 @@ -#!/usr/bin/env bash -source env_npu.sh -export WHICH_OP=GEOP -export NEW_GE_FE_ID=1 -export GE_AICPU_FLAG=1 - -ip=$(hostname -I |awk '{print $1}') -device_id_list=0,1 - -/usr/local/Ascend/driver/tools/msnpureport -d 0 -g error - -currentDir=$(cd "$(dirname "$0")";pwd) -currtime=`date +%Y%m%d%H%M%S` -train_log_dir=${currentDir}/result/training_2p_job_${currtime} -mkdir -p ${train_log_dir} -cd ${train_log_dir} -echo "train log path is ${train_log_dir}" - -python3.7 ${currentDir}/DistributedResnet50/main_apex_d76_npu.py \ - --data /data/imagenet \ - --addr=$(hostname -I |awk '{print $1}') \ - --seed=49 \ - --workers=128 \ - --learning-rate=0.4 \ - --warmup=8 \ - --label-smoothing=0.1 \ - --mom=0.9 \ - --weight-decay=1.0e-04 \ - --static-loss-scale=128 \ - --print-freq=1 \ - --dist-url='tcp://127.0.0.1:50000' \ - --dist-backend='hccl' \ - --multiprocessing-distributed \ - --world-size=1 \ - --rank=0 \ - --device-list=${device_id_list} \ - --benchmark=0 \ - --device='npu' \ - --epochs=90 \ - --num_classes=1000 \ - --batch-size=1024 > ./resnet50_2p.log 2>&1 & - - - diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/run_4p.sh b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/run_4p.sh deleted file mode 100644 index 2b29adfe64..0000000000 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/run_4p.sh +++ /dev/null @@ -1,43 +0,0 @@ -#!/usr/bin/env bash -source env_npu.sh -export WHICH_OP=GEOP -export NEW_GE_FE_ID=1 -export GE_AICPU_FLAG=1 - -ip=$(hostname -I |awk '{print $1}') -device_id_list=0,1,2,3 - -/usr/local/Ascend/driver/tools/msnpureport -d 0 -g error - -currentDir=$(cd "$(dirname "$0")";pwd) -currtime=`date +%Y%m%d%H%M%S` -train_log_dir=${currentDir}/result/training_4p_job_${currtime} -mkdir -p ${train_log_dir} -cd ${train_log_dir} -echo "train log path is ${train_log_dir}" - -python3.7 ${currentDir}/DistributedResnet50/main_apex_d76_npu.py \ - --data /data/imagenet \ - --addr=$(hostname -I |awk '{print $1}') \ - --seed=49 \ - --workers=128 \ - --learning-rate=0.8 \ - --warmup=8 \ - --label-smoothing=0.1 \ - --mom=0.9 \ - --weight-decay=1.0e-04 \ - --static-loss-scale=128 \ - --print-freq=1 \ - --dist-url='tcp://127.0.0.1:50000' \ - --dist-backend='hccl' \ - --multiprocessing-distributed \ - --world-size=1 \ - --rank=0 \ - --device-list=${device_id_list} \ - --benchmark=0 \ - --device='npu' \ - --epochs=90 \ - --num_classes=1000 \ - --batch-size=2048 > ./resnet50_4p.log 2>&1 & - - diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/run_8p.sh b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/run_8p.sh deleted file mode 100644 index e3b0a5b523..0000000000 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/run_8p.sh +++ /dev/null @@ -1,41 +0,0 @@ -#!/usr/bin/env bash -source env_npu.sh -export WHICH_OP=GEOP -export NEW_GE_FE_ID=1 -export GE_AICPU_FLAG=1 - -/usr/local/Ascend/driver/tools/msnpureport -d 0 -g error -/usr/local/Ascend/driver/tools/msnpureport -d 4 -g error - -ip=$(hostname -I |awk '{print $1}') -currentDir=$(cd "$(dirname "$0")";pwd) -currtime=`date +%Y%m%d%H%M%S` -train_log_dir=${currentDir}/result/training_8p_job_${currtime} -mkdir -p ${train_log_dir} -cd ${train_log_dir} -echo "train log path is ${train_log_dir}" - -python3.7 ${currentDir}/DistributedResnet50/main_apex_d76_npu.py \ - --data /data/imagenet \ - --addr=$(hostname -I |awk '{print $1}') \ - --seed=49 \ - --workers=128 \ - --learning-rate=1.6 \ - --warmup=8 \ - --label-smoothing=0.1 \ - --mom=0.9 \ - --weight-decay=1.0e-04 \ - --static-loss-scale=128 \ - --print-freq=1 \ - --dist-url='tcp://127.0.0.1:50000' \ - --dist-backend='hccl' \ - --multiprocessing-distributed \ - --world-size=1 \ - --rank=0 \ - --benchmark=0 \ - --device='npu' \ - --epochs=90 \ - --num_classes=1000 \ - --batch-size=4096 > ./resnet50_8p.log 2>&1 & - - diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_ID3071_ResNet50_performance_8p.sh b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_ID3071_ResNet50_performance_8p.sh index 0013d69590..f8d134d5bd 100644 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_ID3071_ResNet50_performance_8p.sh +++ b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_ID3071_ResNet50_performance_8p.sh @@ -64,7 +64,7 @@ if [ x"${etp_flag}" != x"true" ];then source ${test_path_dir}/env_npu.sh fi -python3.7 ./DistributedResnet50/main_apex_d76_npu.py \ +nohup python3.7 ./DistributedResnet50/main_apex_d76_npu.py \ --data ${data_path} \ --addr=$(hostname -I |awk '{print $1}') \ --seed=49 \ diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_ID3071_performance_1p.sh b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_ID3071_performance_1p.sh index f2f584cd46..d116b996ad 100644 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_ID3071_performance_1p.sh +++ b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_ID3071_performance_1p.sh @@ -81,7 +81,7 @@ if [ x"${etp_flag}" != x"true" ];then source ${test_path_dir}/env_npu.sh fi -python3.7 ./pytorch_resnet50_apex.py \ +nohup python3.7 ./pytorch_resnet50_apex.py \ --data ${data_path} \ --npu ${ASCEND_DEVICE_ID} \ -j ${workers} \ diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_eval_1p.sh b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_eval_1p.sh index e89e5332b6..ea11306d36 100644 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_eval_1p.sh +++ b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_eval_1p.sh @@ -79,7 +79,7 @@ etp_flag=`echo ${check_etp_flag#*=}` if [ x"${etp_flag}" != x"true" ];then source ${test_path_dir}/env_npu.sh fi -python3.7 ./pytorch_resnet50_apex.py \ +nohup python3.7 ./pytorch_resnet50_apex.py \ --data ${data_path} \ --npu ${ASCEND_DEVICE_ID} \ -j ${workers} \ diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_full_16p.sh b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_full_16p.sh index dccd7239c2..e355a2471f 100644 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_full_16p.sh +++ b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_full_16p.sh @@ -96,7 +96,7 @@ fi export NODE_RANK=${server_index} export NPU_WORLD_SIZE=`awk 'BEGIN{printf "%.0f\n",8*'${linux_num}'}'` -python3.7 ./DistributedResnet50/main_apex_d76_npu.py \ +nohup python3.7 ./DistributedResnet50/main_apex_d76_npu.py \ --data ${data_path} \ --addr=$one_node_ip \ --seed=49 \ diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_full_1p.sh b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_full_1p.sh index 37fd0fd4b8..e754979b9f 100644 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_full_1p.sh +++ b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_full_1p.sh @@ -77,7 +77,7 @@ etp_flag=`echo ${check_etp_flag#*=}` if [ x"${etp_flag}" != x"true" ];then source ${test_path_dir}/env_npu.sh fi -python3.7 ./pytorch_resnet50_apex.py \ +nohup python3.7 ./pytorch_resnet50_apex.py \ --data ${data_path} \ --npu ${ASCEND_DEVICE_ID} \ -j ${workers} \ diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_full_8p.sh b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_full_8p.sh index 1cc50890bd..490324f5e5 100644 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_full_8p.sh +++ b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_full_8p.sh @@ -79,7 +79,7 @@ if [ x"${etp_flag}" != x"true" ];then source ${test_path_dir}/env_npu.sh fi -python3.7 ./DistributedResnet50/main_apex_d76_npu.py \ +nohup python3.7 ./DistributedResnet50/main_apex_d76_npu.py \ --data ${data_path} \ --addr=$(hostname -I |awk '{print $1}') \ --seed=49 \ diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_performance_16p.sh b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_performance_16p.sh index cea3033577..d047e2847e 100644 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_performance_16p.sh +++ b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_performance_16p.sh @@ -81,7 +81,7 @@ fi export NODE_RANK=${server_index} export NPU_WORLD_SIZE=`awk 'BEGIN{printf "%.0f\n",8*'${linux_num}'}'` -python3.7 ./DistributedResnet50/main_apex_d76_npu.py \ +nohup python3.7 ./DistributedResnet50/main_apex_d76_npu.py \ --data ${data_path} \ --addr=$one_node_ip \ --seed=49 \ diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_performance_1p.sh b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_performance_1p.sh index 96226ecf32..7b20d0af05 100644 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_performance_1p.sh +++ b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_performance_1p.sh @@ -79,7 +79,7 @@ etp_flag=`echo ${check_etp_flag#*=}` if [ x"${etp_flag}" != x"true" ];then source ${test_path_dir}/env_npu.sh fi -python3.7 ./pytorch_resnet50_apex.py \ +nohup python3.7 ./pytorch_resnet50_apex.py \ --data ${data_path} \ --npu ${ASCEND_DEVICE_ID} \ -j ${workers} \ diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_performance_8p.sh b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_performance_8p.sh index 84c999e576..d7969e07f3 100644 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_performance_8p.sh +++ b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_performance_8p.sh @@ -65,7 +65,7 @@ fi export NODE_RANK=0 -python3.7 ./DistributedResnet50/main_apex_d76_npu.py \ +nohup python3.7 ./DistributedResnet50/main_apex_d76_npu.py \ --data ${data_path} \ --addr=$(hostname -I |awk '{print $1}') \ --seed=49 \ -- Gitee