diff --git a/PyTorch/contrib/cv/classification/GENet_for_Pytorch/README.md b/PyTorch/contrib/cv/classification/GENet_for_Pytorch/README.md index 3a09b50b40d753d58f86cc14dc7a9ac75e2c6df5..53d619c91b063ef97659cc52cf784c28491cf2b3 100644 --- a/PyTorch/contrib/cv/classification/GENet_for_Pytorch/README.md +++ b/PyTorch/contrib/cv/classification/GENet_for_Pytorch/README.md @@ -51,9 +51,9 @@ All bash instructions output log files correctly. ## GENET training result -| Acc@1 | FPS | Device Type| Device Nums | Epochs | AMP_Type | -| :------: | :------: | :------: | :------: | :------: |:------: -| 94.73 | 1894.827 | NPU | 1 | 300 | O2 | -| 95.23 | 7858.025 |NPU |8 | 300 | O2 | -| 94.76 | 1350.074 |GPU |1 | 300 | O2 | -| 94.81 | 6536.289 |GPU |8 | 300 | O2 | \ No newline at end of file +| Acc@1 | FPS | Device Type | Device Nums | Epochs | AMP_Type | +|:-----:|:---------:|:-----------:|:-----------:|:------:|:--------:| +| 94.73 | 2900.000 | NPU | 1 | 300 | O2 | +| 95.23 | 16912.437 | NPU | 8 | 300 | O2 | +| 94.76 | 1350.074 | GPU | 1 | 300 | O2 | +| 94.81 | 6536.289 | GPU | 8 | 300 | O2 | diff --git a/PyTorch/contrib/cv/classification/GENet_for_Pytorch/requirements.txt b/PyTorch/contrib/cv/classification/GENet_for_Pytorch/requirements.txt index a6e282f9395b6d611a258d927e3a530a0376d8cf..57ad0ceb1253f5d1ade22e1b4f946c3437c06fed 100644 --- a/PyTorch/contrib/cv/classification/GENet_for_Pytorch/requirements.txt +++ b/PyTorch/contrib/cv/classification/GENet_for_Pytorch/requirements.txt @@ -1 +1,6 @@ -torchvision==0.5.0 \ No newline at end of file +torchvision==0.9.1 +torch==1.8.1+ascend.rc2.20220601 +apex==0.1+ascend.20220601 +numpy==1.21.6 +decorator==5.1.1 +sympy==1.10.1 \ No newline at end of file diff --git a/PyTorch/contrib/cv/classification/GENet_for_Pytorch/test/env_npu.sh b/PyTorch/contrib/cv/classification/GENet_for_Pytorch/test/env_npu.sh index e8ca2d2c0da539d05fa6e57a5ac24a367dff1369..c34a5c85b4734e4568170bb9236224f5a1446dc1 100644 --- a/PyTorch/contrib/cv/classification/GENet_for_Pytorch/test/env_npu.sh +++ b/PyTorch/contrib/cv/classification/GENet_for_Pytorch/test/env_npu.sh @@ -34,23 +34,24 @@ ${install_path}/driver/tools/msnpureport -g error -d 5 ${install_path}/driver/tools/msnpureport -g error -d 6 ${install_path}/driver/tools/msnpureport -g error -d 7 -#将Host日志输出到串口,0-关闭/1-开启 +#Host־,0-ر/1- export ASCEND_SLOG_PRINT_TO_STDOUT=0 -#设置默认日志级别,0-debug/1-info/2-warning/3-error -export ASCEND_GLOBAL_LOG_LEVEL=3 -#设置Event日志开启标志,0-关闭/1-开启 +#Ĭ־,0-debug/1-info/2-warning/3-error +export ASCEND_GLOBAL_LOG_LEVEL==3 +#Event־־,0-ر/1- export ASCEND_GLOBAL_EVENT_ENABLE=0 -#设置是否开启taskque,0-关闭/1-开启 +#Ƿtaskque,0-ر/1- export TASK_QUEUE_ENABLE=1 -#设置是否开启PTCopy,0-关闭/1-开启 +#ǷPTCopy,0-ر/1- export PTCOPY_ENABLE=1 -#设置是否开启combined标志,0-关闭/1-开启 -export COMBINED_ENABLE=0 -#设置特殊场景是否需要重新编译,不需要修改 +#Ƿ2combined־,0-ر/1- +export COMBINED_ENABLE=1 +#ⳡǷҪ±,Ҫ޸ export DYNAMIC_OP="ADD#MUL" -#HCCL白名单开关,1-关闭/0-开启 +# HCCL,1-ر/0- export HCCL_WHITELIST_DISABLE=1 -export HCCL_IF_IP=$(hostname -I |awk '{print $1}') +# HCCLĬϳʱʱ120s٣޸Ϊ1800sPyTorchĬ +export HCCL_CONNECT_TIMEOUT=1800 ulimit -SHn 512000 @@ -74,4 +75,3 @@ print(result)""" echo ${path_lib} export LD_LIBRARY_PATH=/usr/local/python3.7.5/lib/:${path_lib}:$LD_LIBRARY_PATH -export PYTHONWARNINGS='ignore:semaphore_tracker:UserWarning' \ No newline at end of file diff --git a/PyTorch/contrib/cv/classification/GENet_for_Pytorch/test/train_eval_8p.sh b/PyTorch/contrib/cv/classification/GENet_for_Pytorch/test/train_eval_8p.sh index 936b28289d12371bbfedd974f02699e5dd9f6d91..046a39e76fca115d16eadef6c267e559337fe524 100644 --- a/PyTorch/contrib/cv/classification/GENet_for_Pytorch/test/train_eval_8p.sh +++ b/PyTorch/contrib/cv/classification/GENet_for_Pytorch/test/train_eval_8p.sh @@ -100,7 +100,7 @@ echo "------------------ Final result ------------------" # 输出训练精度,需要模型审视修改 train_err=`grep -a '* Err@1' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Err@1" '{print $NF}'|awk -F " " '{print $1}'` #打印,不需要修改 -echo "Final Train Accuracy : ${train_accuracy}" +echo "Final Train Accuracy: `awk 'BEGIN{printf "%.2f\n", '100'-'${train_err}'}'`" echo "E2E Training Duration sec : $e2e_time" # 训练用例信息,不需要修改 @@ -118,6 +118,6 @@ echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${C echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainErr = ${train_err}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAcc = `awk 'BEGIN{printf "%.2f\n", '100'-'${train_err}'}'`" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "ActualLoss = ${ActualLoss}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log diff --git a/PyTorch/contrib/cv/classification/GENet_for_Pytorch/test/train_finetune_1p.sh b/PyTorch/contrib/cv/classification/GENet_for_Pytorch/test/train_finetune_1p.sh index 9a808cd46f6498d28c01a9fc8d0c1ff3df1cda4e..12f492afde9b183bf464f82296cab4764095a764 100644 --- a/PyTorch/contrib/cv/classification/GENet_for_Pytorch/test/train_finetune_1p.sh +++ b/PyTorch/contrib/cv/classification/GENet_for_Pytorch/test/train_finetune_1p.sh @@ -111,7 +111,7 @@ echo "Final Performance images/sec : $FPS" #输出训练精度,需要模型审视修改 train_err=`grep -a '* Err@1' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Err@1" '{print $NF}'|awk -F " " '{print $1}'` #打印,不需要修改 -echo "Final Train Accuracy : ${train_Err}" +echo "Final Train Accuracy: `awk 'BEGIN{printf "%.2f\n", '100'-'${train_err}'}'`" echo "E2E Training Duration sec : $e2e_time" #性能看护结果汇总 @@ -140,6 +140,6 @@ echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/ echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "TrainingTime = ${TrainingTime}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainErr = ${train_err}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAcc = `awk 'BEGIN{printf "%.2f\n", '100'-'${train_err}'}'`" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "ActualLoss = ${ActualLoss}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log diff --git a/PyTorch/contrib/cv/classification/GENet_for_Pytorch/test/train_full_1p.sh b/PyTorch/contrib/cv/classification/GENet_for_Pytorch/test/train_full_1p.sh index 755e9c108fbb8205b06657189c1fe25ff48f65df..695dae8527f991f2937c669577dc073511bb0752 100644 --- a/PyTorch/contrib/cv/classification/GENet_for_Pytorch/test/train_full_1p.sh +++ b/PyTorch/contrib/cv/classification/GENet_for_Pytorch/test/train_full_1p.sh @@ -111,8 +111,9 @@ echo "Final Performance images/sec : $FPS" #输出训练精度,需要模型审视修改 train_err=`grep -a '* Err@1' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Err@1" '{print $NF}'|awk -F " " '{print $1}'` + #打印,不需要修改 -echo "Final Train Error : ${train_err}" +echo "Final Train Accuracy: `awk 'BEGIN{printf "%.2f\n", '100'-'${train_err}'}'`" echo "E2E Training Duration sec : $e2e_time" #性能看护结果汇总 @@ -141,6 +142,6 @@ echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/ echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "TrainingTime = ${TrainingTime}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainErr = ${train_err}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAcc = `awk 'BEGIN{printf "%.2f\n", '100'-'${train_err}'}'`" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "ActualLoss = ${ActualLoss}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log diff --git a/PyTorch/contrib/cv/classification/GENet_for_Pytorch/test/train_full_8p.sh b/PyTorch/contrib/cv/classification/GENet_for_Pytorch/test/train_full_8p.sh index 6c1dde371ed4bda73e0d4e3e680cd24bb200c8b1..ca1702dec1089eb8d84dbed018ecc8028d397ca3 100644 --- a/PyTorch/contrib/cv/classification/GENet_for_Pytorch/test/train_full_8p.sh +++ b/PyTorch/contrib/cv/classification/GENet_for_Pytorch/test/train_full_8p.sh @@ -99,7 +99,7 @@ echo "Final Performance images/sec : $FPS" #输出训练精度,需要模型审视修改 train_err=`grep -a '* Err@1' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Err@1" '{print $NF}'|awk -F " " '{print $1}'` #打印,不需要修改 -echo "Final Train Err : ${train_err}" +echo "Final Train Accuracy: `awk 'BEGIN{printf "%.2f\n", '100'-'${train_err}'}'`" echo "E2E Training Duration sec : $e2e_time" #性能看护结果汇总 @@ -128,6 +128,6 @@ echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/ echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "TrainingTime = ${TrainingTime}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainErr = ${train_err}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAcc = `awk 'BEGIN{printf "%.2f\n", '100'-'${train_err}'}'`" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "ActualLoss = ${ActualLoss}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log diff --git a/PyTorch/contrib/cv/classification/GENet_for_Pytorch/test/train_performance_1p.sh b/PyTorch/contrib/cv/classification/GENet_for_Pytorch/test/train_performance_1p.sh index 9121a82e1002003c1f3ac2132be301d337cc3abc..4e38ab94651036acfed62191606d604cf2c74cc2 100644 --- a/PyTorch/contrib/cv/classification/GENet_for_Pytorch/test/train_performance_1p.sh +++ b/PyTorch/contrib/cv/classification/GENet_for_Pytorch/test/train_performance_1p.sh @@ -107,10 +107,11 @@ FPS=`grep -a 'FPS' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_D #打印,不需要修改 echo "Final Performance images/sec : $FPS" + #输出训练精度,需要模型审视修改 train_err=`grep -a '* Err@1' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Err@1" '{print $NF}'|awk -F " " '{print $1}'` #打印,不需要修改 -echo "Final Train Accuracy : ${train_accuracy}" +echo "Final Train Accuracy: `awk 'BEGIN{printf "%.2f\n", '100'-'${train_err}'}'`" echo "E2E Training Duration sec : $e2e_time" #性能看护结果汇总 diff --git a/PyTorch/contrib/cv/classification/GENet_for_Pytorch/test/train_performance_8p.sh b/PyTorch/contrib/cv/classification/GENet_for_Pytorch/test/train_performance_8p.sh index 451d35b5ac7be810fb0bacb165b228a9efa20a54..1994c2d057c93303aeb1f26b63a3f229178542cf 100644 --- a/PyTorch/contrib/cv/classification/GENet_for_Pytorch/test/train_performance_8p.sh +++ b/PyTorch/contrib/cv/classification/GENet_for_Pytorch/test/train_performance_8p.sh @@ -104,7 +104,7 @@ echo "Final Performance images/sec : $FPS" #输出训练精度,需要模型审视修改 train_err=`grep -a '* Err@1' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Err@1" '{print $NF}'|awk -F " " '{print $1}'` #打印,不需要修改 -echo "Final Train Accuracy : ${train_accuracy}" +echo "Final Train Accuracy: `awk 'BEGIN{printf "%.2f\n", '100'-'${train_err}'}'`" echo "E2E Training Duration sec : $e2e_time" #性能看护结果汇总 diff --git a/PyTorch/contrib/cv/classification/GENet_for_Pytorch/train.py b/PyTorch/contrib/cv/classification/GENet_for_Pytorch/train.py index 783fe250a92b7b2197c4dd6b8c27a725b1a4acce..2d459e44e02eae1ba109f17a341f342d1e722363 100644 --- a/PyTorch/contrib/cv/classification/GENet_for_Pytorch/train.py +++ b/PyTorch/contrib/cv/classification/GENet_for_Pytorch/train.py @@ -12,6 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================ + +import sys +import torch +if torch.__version__ >= '1.8.1': + import torch_npu + import argparse import json import os @@ -25,12 +31,13 @@ import torchvision import torchvision.transforms as transforms import torch.multiprocessing as mp import random - +from torch import nn from models import * +from models.wideresnet import WideResNet from utils import * - import apex from apex import amp + import warnings #Basic @@ -267,7 +274,7 @@ def main_worker(gpu, ngpus_per_node, args): #amp if args.amp: print('Using amp!') - model, optimizer = amp.initialize(model, optimizer, opt_level=args.opt_level, loss_scale=args.loss_scale) + model, optimizer = amp.initialize(model, optimizer, opt_level=args.opt_level, loss_scale=args.loss_scale, combine_grad=True) #DDP if args.distributed: @@ -325,7 +332,7 @@ def main_worker(gpu, ngpus_per_node, args): trainloader = torch.utils.data.DataLoader(trainset, batch_size=args.batch_size, shuffle=(train_sampler is None), num_workers=args.workers, - pin_memory=False, + pin_memory=True, sampler=train_sampler if not args.eval else None, drop_last=True) else: @@ -425,34 +432,34 @@ def train(trainloader, model, criterion, optimizer, epoch, args, ngpus_per_node) # measure data loading time data_time.update(time.time() - end) - with torch.autograd.profiler.profile(use_cuda=True) as prof: - if args.device == 'npu': - loc = 'npu:{}'.format(args.gpu) - input = input.to(loc, non_blocking=True).to(torch.float) - target = target.to(torch.int32).to(loc, non_blocking=True) - else: - input = input.cuda(args.gpu, non_blocking=True) - target = target.cuda(args.gpu, non_blocking=True) + + if args.device == 'npu': + loc = 'npu:{}'.format(args.gpu) + input = input.to(loc, non_blocking=True).to(torch.float) + target = target.to(torch.int32).to(loc, non_blocking=True) + else: + input = input.cuda(args.gpu, non_blocking=True) + target = target.cuda(args.gpu, non_blocking=True) - output = model(input) - loss = criterion(output, target) + output = model(input) + loss = criterion(output, target) # measure accuracy and record loss - err1, err5 = get_error(output.detach(), target, topk=(1, 5)) - losses.update(loss.item(), input.size(0)) - top1.update(err1.item(), input.size(0)) - top5.update(err5.item(), input.size(0)) + err1, err5 = get_error(output.detach(), target, topk=(1, 5)) + losses.update(loss.item(), input.size(0)) + top1.update(err1.item(), input.size(0)) + top5.update(err5.item(), input.size(0)) - if args.amp: - with amp.scale_loss(loss, optimizer) as scaled_loss: - optimizer.zero_grad() - scaled_loss.backward() - else: + if args.amp: + with amp.scale_loss(loss, optimizer) as scaled_loss: optimizer.zero_grad() - loss.backward() - optimizer.step() - if args.device == 'npu': - torch.npu.synchronize() + scaled_loss.backward() + else: + optimizer.zero_grad() + loss.backward() + optimizer.step() + if args.device == 'npu': + torch.npu.synchronize() # measure elapsed time if i == 9: