From bf4a6c6ad609dad606843806462817f0f3186ffd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=84=A6=E5=AE=88=E5=8D=93?= <7704525+drogo@user.noreply.gitee.com> Date: Tue, 24 May 2022 07:56:13 +0000 Subject: [PATCH 01/15] update PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/main.py. --- .../Efficient-3DCNNs_ID1230_for_PyTorch/main.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/main.py b/PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/main.py index c7bdf20029..301ce9d5d9 100644 --- a/PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/main.py +++ b/PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/main.py @@ -15,6 +15,8 @@ import os import json import torch +if torch.__version__.startswith('1.8'): + import torch_npu from torch import nn from torch import optim from torch.optim import lr_scheduler @@ -112,11 +114,12 @@ if __name__ == '__main__': if opt.use_apex == 1: from apex import amp - if opt.loss_scale < 0: - model, optimizer = amp.initialize(model, optimizer, opt_level=opt.opt_level, loss_scale=None) - else: - # model, optimizer = amp.initialize(model, optimizer, opt_level=opt.opt_level, loss_scale=opt.loss_scale, combine_grad=True) - model, optimizer = amp.initialize(model, optimizer, opt_level=opt.opt_level, loss_scale=opt.loss_scale) + #if opt.loss_scale < 0: + # model, optimizer = amp.initialize(model, optimizer, opt_level=opt.opt_level, loss_scale=None) + #else: + # # model, optimizer = amp.initialize(model, optimizer, opt_level=opt.opt_level, loss_scale=opt.loss_scale, combine_grad=True) + # model, optimizer = amp.initialize(model, optimizer, opt_level=opt.opt_level, loss_scale=opt.loss_scale) + model,optimizer = amp.initialize(model, optimizer, opt_level=opt.opt_level,loss_scale=opt.loss_scale) # resume model best_pre1 = 0 -- Gitee From 9cfe2eb0f4d402b729ffd076a1255bd344c0fd5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=84=A6=E5=AE=88=E5=8D=93?= <7704525+drogo@user.noreply.gitee.com> Date: Tue, 24 May 2022 07:56:39 +0000 Subject: [PATCH 02/15] update PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/run/opts.py. --- .../Efficient-3DCNNs_ID1230_for_PyTorch/run/opts.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/run/opts.py b/PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/run/opts.py index e4583c32e6..5165ec11c2 100644 --- a/PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/run/opts.py +++ b/PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/run/opts.py @@ -17,14 +17,14 @@ import argparse def parse_opts(): parser = argparse.ArgumentParser() - parser.add_argument('--root_path', default='/root/ennengyang_space/Efficient-3DCNNs-master/', type=str, help='Root directory path of data') + parser.add_argument('--root_path', default='/home/jsz/Efficient-3DCNNs_ID1230_for_PyTorch/', type=str, help='Root directory path of data') parser.add_argument('--video_path', default='annotation_UCF101/UCF-101-image', type=str, help='Directory path of Videos') parser.add_argument('--annotation_path', default='annotation_UCF101/ucf101_01.json', type=str, help='Annotation file path') # parser.add_argument('--resume_path', default='results/ucf101_mobilenetv2_1.0x_RGB_16_checkpoint.pth', type=str, help='Save data (.pth) of previous training') parser.add_argument('--resume_path', default='', type=str, help='Save data (.pth) of previous training') # parser.add_argument('--pretrain_path', default='pretrain/kinetics_mobilenetv2_1.0x_RGB_16_best_dp.pth', type=str, help='Pretrained model (.pth)') parser.add_argument('--pretrain_path', default='', type=str, help='Pretrained model (.pth)') - parser.add_argument('--ft_portion', default='complete', type=str, help='The portion of the model to apply fine tuning, either complete or last_layer') + parser.add_argument('--ft_portion', default='last_layer', type=str, help='The portion of the model to apply fine tuning, either complete or last_layer') parser.add_argument('--no_drive', action='store_true', help='If true, cuda or npu is not used.') parser.set_defaults(no_drive=False) @@ -41,14 +41,14 @@ def parse_opts(): parser.add_argument('--use_prof', default=1, type=int, help='use_prof') parser.add_argument('--use_apex', default=0, type=int, help='use_apex') parser.add_argument('--opt_level', default='O2', type=str, help='Initial opt_level') - parser.add_argument('--loss_scale', default=128, type=float, help='Initial loss_scale') + parser.add_argument('--loss_scale', default='dynamic', help='Initial loss_scale') - parser.add_argument('--batch_size', default=8, type=int, help='Batch Size') + parser.add_argument('--batch_size', default=80, type=int, help='Batch Size') parser.add_argument('--n_epochs', default=2, type=int, help='Number of total epochs to run') - parser.add_argument('--learning_rate', default=0.1, type=float, help='Initial learning rate') + parser.add_argument('--learning_rate', default=0.01, type=float, help='Initial learning rate') parser.add_argument('--droupout_rate', default=0.9, type=float, help='Droupout rate') parser.add_argument('--lr_steps', default=[15, 25, 35, 40, 45], type=float, nargs="+", metavar='LRSteps', help='epochs to decay learning rate by 10') - parser.add_argument('--n_threads', default=0, type=int, help='Number of threads for multi-thread loading') + parser.add_argument('--n_threads', default=16, type=int, help='Number of threads for multi-thread loading') parser.add_argument('--no_train', default=0, type=int, help='If true, training is not performed.') parser.add_argument('--no_val', default=0, type=int, help='If true, validation is not performed.') -- Gitee From e2c255d48171638ed0e5f5bb961581b1da5e821a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=84=A6=E5=AE=88=E5=8D=93?= <7704525+drogo@user.noreply.gitee.com> Date: Tue, 24 May 2022 07:58:52 +0000 Subject: [PATCH 03/15] update PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/run/getmodel.py. --- .../Efficient-3DCNNs_ID1230_for_PyTorch/run/getmodel.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/run/getmodel.py b/PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/run/getmodel.py index 2e6b18ed23..6a4db1bd28 100644 --- a/PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/run/getmodel.py +++ b/PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/run/getmodel.py @@ -15,6 +15,9 @@ import os import json import torch +if torch.__version__.startswith('1.8'): + import torch_npu + from torch import nn from torch import optim from torch.optim import lr_scheduler -- Gitee From 0da926a7d11d3291729f0306d7467fdc1c948795 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=84=A6=E5=AE=88=E5=8D=93?= <7704525+drogo@user.noreply.gitee.com> Date: Tue, 24 May 2022 08:00:32 +0000 Subject: [PATCH 04/15] update PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/run/spatial_tran --- .../run/spatial_transforms.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/run/spatial_transforms.py b/PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/run/spatial_transforms.py index bc532e2093..5bb23484f4 100644 --- a/PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/run/spatial_transforms.py +++ b/PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/run/spatial_transforms.py @@ -18,6 +18,9 @@ import numbers import collections import numpy as np import torch +if torch.__version__.startswith('1.8'): + import torch_npu + import cv2 import scipy.ndimage from PIL import Image, ImageOps -- Gitee From 695c88d5b0403540cd8c1b98f047e27626a58591 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=84=A6=E5=AE=88=E5=8D=93?= <7704525+drogo@user.noreply.gitee.com> Date: Tue, 24 May 2022 08:01:11 +0000 Subject: [PATCH 05/15] update PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/run/test.py. --- .../Efficient-3DCNNs_ID1230_for_PyTorch/run/test.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/run/test.py b/PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/run/test.py index 4a1abb3f5d..b780dd1557 100644 --- a/PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/run/test.py +++ b/PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/run/test.py @@ -19,6 +19,8 @@ import time import os import sys import json +if torch.__version__.startswith('1.8'): + import torch_npu from run.utils import AverageMeter -- Gitee From 77d3b5856a7ee9764693289c4c982a466bf45f53 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=84=A6=E5=AE=88=E5=8D=93?= <7704525+drogo@user.noreply.gitee.com> Date: Tue, 24 May 2022 08:01:46 +0000 Subject: [PATCH 06/15] update PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/run/train.py. --- .../Efficient-3DCNNs_ID1230_for_PyTorch/run/train.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/run/train.py b/PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/run/train.py index 312ff58055..14225a0476 100644 --- a/PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/run/train.py +++ b/PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/run/train.py @@ -17,6 +17,8 @@ from torch.autograd import Variable import time import os import sys +if torch.__version__.startswith('1.8'): + import torch_npu from run.utils import AverageMeter, calculate_accuracy @@ -126,7 +128,7 @@ def train_epoch(epoch, data_loader, model, criterion, optimizer, opt, if i >= 2: tot_time.update(current_batch_time) - if device_ids == 0: # distributed master or 1p + if (device_ids == 0) or (opt.device_num == 1): # distributed master or 1p batch_logger.log({ 'date': time.strftime('%Y%m%d%H%M%S', time.localtime(time.time())), 'epoch': epoch, @@ -159,7 +161,7 @@ def train_epoch(epoch, data_loader, model, criterion, optimizer, opt, epoch_fps = opt.batch_size * opt.device_num / tot_time.avg - if device_ids == 0: # distributed master or 1p + if (device_ids == 0)or (opt.device_num == 1): # distributed master or 1p epochlog = { 'date': time.strftime('%Y%m%d%H%M%S', time.localtime(time.time())), 'epoch': epoch, -- Gitee From c58a29b9266321f033597f0d3db54c1b56f24e5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=84=A6=E5=AE=88=E5=8D=93?= <7704525+drogo@user.noreply.gitee.com> Date: Tue, 24 May 2022 08:02:09 +0000 Subject: [PATCH 07/15] update PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/run/utils.py. --- .../Efficient-3DCNNs_ID1230_for_PyTorch/run/utils.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/run/utils.py b/PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/run/utils.py index 70357d2254..40c1216f99 100644 --- a/PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/run/utils.py +++ b/PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/run/utils.py @@ -18,6 +18,9 @@ import shutil import numpy as np import json import os +if torch.__version__.startswith('1.8'): + import torch_npu + from run.mean import get_mean, get_std class AverageMeter(object): -- Gitee From 7a1aa853a756a7b407e61a61665d1a692bf39c80 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=84=A6=E5=AE=88=E5=8D=93?= <7704525+drogo@user.noreply.gitee.com> Date: Tue, 24 May 2022 08:02:33 +0000 Subject: [PATCH 08/15] update PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/run/validation.py. --- .../Efficient-3DCNNs_ID1230_for_PyTorch/run/validation.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/run/validation.py b/PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/run/validation.py index 61827e7415..795e555bc1 100644 --- a/PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/run/validation.py +++ b/PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/run/validation.py @@ -13,6 +13,9 @@ # limitations under the License. # ============================================================================ import torch +if torch.__version__.startswith('1.8'): + import torch_npu + from torch.autograd import Variable import time import sys -- Gitee From fa836d3b955d4325ad49b1a77c9063a891d09dd3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=84=A6=E5=AE=88=E5=8D=93?= <7704525+drogo@user.noreply.gitee.com> Date: Tue, 24 May 2022 08:36:50 +0000 Subject: [PATCH 09/15] update PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/run/opts.py. --- .../Efficient-3DCNNs_ID1230_for_PyTorch/run/opts.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/run/opts.py b/PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/run/opts.py index 5165ec11c2..ceb6e6d09d 100644 --- a/PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/run/opts.py +++ b/PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/run/opts.py @@ -17,14 +17,14 @@ import argparse def parse_opts(): parser = argparse.ArgumentParser() - parser.add_argument('--root_path', default='/home/jsz/Efficient-3DCNNs_ID1230_for_PyTorch/', type=str, help='Root directory path of data') + parser.add_argument('--root_path', default='/root/ennengyang_space/Efficient-3DCNNs-master/', type=str, help='Root directory path of data') parser.add_argument('--video_path', default='annotation_UCF101/UCF-101-image', type=str, help='Directory path of Videos') parser.add_argument('--annotation_path', default='annotation_UCF101/ucf101_01.json', type=str, help='Annotation file path') # parser.add_argument('--resume_path', default='results/ucf101_mobilenetv2_1.0x_RGB_16_checkpoint.pth', type=str, help='Save data (.pth) of previous training') parser.add_argument('--resume_path', default='', type=str, help='Save data (.pth) of previous training') # parser.add_argument('--pretrain_path', default='pretrain/kinetics_mobilenetv2_1.0x_RGB_16_best_dp.pth', type=str, help='Pretrained model (.pth)') parser.add_argument('--pretrain_path', default='', type=str, help='Pretrained model (.pth)') - parser.add_argument('--ft_portion', default='last_layer', type=str, help='The portion of the model to apply fine tuning, either complete or last_layer') + parser.add_argument('--ft_portion', default='complete', type=str, help='The portion of the model to apply fine tuning, either complete or last_layer') parser.add_argument('--no_drive', action='store_true', help='If true, cuda or npu is not used.') parser.set_defaults(no_drive=False) @@ -41,14 +41,14 @@ def parse_opts(): parser.add_argument('--use_prof', default=1, type=int, help='use_prof') parser.add_argument('--use_apex', default=0, type=int, help='use_apex') parser.add_argument('--opt_level', default='O2', type=str, help='Initial opt_level') - parser.add_argument('--loss_scale', default='dynamic', help='Initial loss_scale') + parser.add_argument('--loss_scale', default="dynamic", help='Initial loss_scale') - parser.add_argument('--batch_size', default=80, type=int, help='Batch Size') + parser.add_argument('--batch_size', default=8, type=int, help='Batch Size') parser.add_argument('--n_epochs', default=2, type=int, help='Number of total epochs to run') - parser.add_argument('--learning_rate', default=0.01, type=float, help='Initial learning rate') + parser.add_argument('--learning_rate', default=0.1, type=float, help='Initial learning rate') parser.add_argument('--droupout_rate', default=0.9, type=float, help='Droupout rate') parser.add_argument('--lr_steps', default=[15, 25, 35, 40, 45], type=float, nargs="+", metavar='LRSteps', help='epochs to decay learning rate by 10') - parser.add_argument('--n_threads', default=16, type=int, help='Number of threads for multi-thread loading') + parser.add_argument('--n_threads', default=0, type=int, help='Number of threads for multi-thread loading') parser.add_argument('--no_train', default=0, type=int, help='If true, training is not performed.') parser.add_argument('--no_val', default=0, type=int, help='If true, validation is not performed.') -- Gitee From 90154a5eba4fc13027836f29493a723b728facb8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=84=A6=E5=AE=88=E5=8D=93?= <7704525+drogo@user.noreply.gitee.com> Date: Tue, 7 Jun 2022 01:42:30 +0000 Subject: [PATCH 10/15] update PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/requirements.txt. --- .../Efficient-3DCNNs_ID1230_for_PyTorch/requirements.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/requirements.txt b/PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/requirements.txt index 39b92c7dba..1e7b5bc790 100644 --- a/PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/requirements.txt +++ b/PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/requirements.txt @@ -1,4 +1,3 @@ -Python==3.7.0 -torch==1.5.0 +Numpy>=1.17.3 torchvision==0.6.0 opencv-python==4.5.3.56 -- Gitee From 83dc3846541fd69c990013c1d05ac5eff6fc88f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=84=A6=E5=AE=88=E5=8D=93?= <7704525+drogo@user.noreply.gitee.com> Date: Tue, 7 Jun 2022 01:42:53 +0000 Subject: [PATCH 11/15] update PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/requirements.txt. --- .../Efficient-3DCNNs_ID1230_for_PyTorch/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/requirements.txt b/PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/requirements.txt index 1e7b5bc790..105f194e33 100644 --- a/PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/requirements.txt +++ b/PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/requirements.txt @@ -1,3 +1,3 @@ -Numpy>=1.17.3 +numpy>=1.17.3 torchvision==0.6.0 opencv-python==4.5.3.56 -- Gitee From 5ed924bbb1105a310e82af52ac78e82fc833388c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=84=A6=E5=AE=88=E5=8D=93?= <7704525+drogo@user.noreply.gitee.com> Date: Tue, 7 Jun 2022 09:53:09 +0000 Subject: [PATCH 12/15] pytorch 1.5 -1.8 --- .../test/npu_train_full_1p.sh | 84 ++++++++++++++++++- 1 file changed, 83 insertions(+), 1 deletion(-) diff --git a/PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/test/npu_train_full_1p.sh b/PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/test/npu_train_full_1p.sh index 17b06a3892..8e898916bf 100644 --- a/PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/test/npu_train_full_1p.sh +++ b/PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/test/npu_train_full_1p.sh @@ -1,6 +1,31 @@ #!/usr/bin/env bash + +source env_npu.sh + +# 网络名称,同目录名称 +Network="Efficient-3DCNNs" +# 训练batch_size +batch_size=80 + root_path=$1 +#################创建日志输出目录,不需要修改################# +ASCEND_DEVICE_ID=0 +if [ -d ${root_path}/results/${ASCEND_DEVICE_ID} ];then + rm -rf ${root_path}/results/${ASCEND_DEVICE_ID} + mkdir -p ${root_path}/results/$ASCEND_DEVICE_ID +else + mkdir -p ${root_path}/results/$ASCEND_DEVICE_ID +fi + +#################启动训练脚本################# +#训练开始时间,不需要修改 +start_time=$(date +%s) + + +KERNEL_NUM=$(($(nproc)/8)) +PID_START=$((KERNEL_NUM * RANK_ID)) +PID_END=$((PID_START + KERNEL_NUM - 1)) nohup python3.7 ../main.py \ --root_path ${root_path} \ --gpu_or_npu npu \ @@ -16,7 +41,64 @@ nohup python3.7 ../main.py \ --batch_size 80 \ --n_threads 16 \ --ft_portion complete \ - > ${root_path}/results/npu_train_full_1p.log 2>&1 & + > ${root_path}/results/${ASCEND_DEVICE_ID}/npu_train_full_1p.log 2>&1 & + + + + +wait + +##################获取训练数据################ +# 训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +# 结果打印,不需要修改 +echo "------------------ Final result ------------------" +# 输出性能FPS,需要模型审视修改 +FPS=`grep 'fps' ${root_path}/results/${ASCEND_DEVICE_ID}/npu_train_full_1p.log| tail -n 1 |awk '{print $6}'|awk 'END {print}'` +FPS=${FPS%,*} +# 打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +# 输出训练精度,需要模型审视修改 +top1_acc=`grep 'test top1 acc' ${root_path}/results/${ASCEND_DEVICE_ID}/npu_train_full_1p.log|awk '{print $4}'|awk 'END {print}'` +top1_acc=`echo ${top1_acc%,*}` +# 打印,不需要修改 +echo "Final Train Accuracy : ${top1_acc}" +echo "E2E Training Duration sec : $e2e_time" + +# 性能看护结果汇总 +# 训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' + +# 获取性能数据,不需要修改 +# 吞吐量 +ActualFPS=${FPS} +# 单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` + +# 从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep 'date' ${root_path}/results/${ASCEND_DEVICE_ID}/npu_train_full_1p.log|awk '{print $8}' >> ${root_path}/results/${ASCEND_DEVICE_ID}/train_${CaseName}_loss.txt +# 删除loss值后逗号 +sed -i 's/,//g' ${root_path}/results/${ASCEND_DEVICE_ID}/train_${CaseName}_loss.txt + +# 最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' ${root_path}/results/${ASCEND_DEVICE_ID}/train_${CaseName}_loss.txt` + +# 关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > ${root_path}/results/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> ${root_path}/results/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> ${root_path}/results/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> ${root_path}/results/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> ${root_path}/results/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> ${root_path}/results/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> ${root_path}/results/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracy = ${top1_acc}" >> ${root_path}/results/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> ${root_path}/results/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> ${root_path}/results/$ASCEND_DEVICE_ID/${CaseName}.log -- Gitee From cee28d8390297ab4c56b37ef7d8a3fc5fd2a8d66 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=84=A6=E5=AE=88=E5=8D=93?= <7704525+drogo@user.noreply.gitee.com> Date: Tue, 7 Jun 2022 09:54:00 +0000 Subject: [PATCH 13/15] pytorch 1.5 -1.8 --- .../test/npu_train_full_8p.sh | 84 ++++++++++++++++++- 1 file changed, 81 insertions(+), 3 deletions(-) diff --git a/PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/test/npu_train_full_8p.sh b/PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/test/npu_train_full_8p.sh index cc344cf943..c658476d0c 100644 --- a/PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/test/npu_train_full_8p.sh +++ b/PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/test/npu_train_full_8p.sh @@ -1,8 +1,30 @@ #!/usr/bin/env bash -RANK_ID_START=0 + +source env_npu.sh + +# 网络名称,同目录名称 +Network="Efficient-3DCNNs" +# 训练batch_size +batch_size=640 + RANK_SIZE=8 root_path=$1 +#################创建日志输出目录,不需要修改################# +ASCEND_DEVICE_ID=0 +if [ -d ${root_path}/results/${ASCEND_DEVICE_ID} ];then + rm -rf ${root_path}/results/${ASCEND_DEVICE_ID} + mkdir -p ${root_path}/results/$ASCEND_DEVICE_ID +else + mkdir -p ${root_path}/results/$ASCEND_DEVICE_ID +fi + +#################启动训练脚本################# +#训练开始时间,不需要修改 +start_time=$(date +%s) + +RANK_ID_START=0 + for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); do @@ -25,5 +47,61 @@ nohup taskset -c $PID_START-$PID_END python3.7 ../main.py --local_rank $RANK_ID --batch_size 640 \ --n_threads 64 \ --ft_portion complete \ - > ${root_path}/results/npu_train_full_8p.log 2>&1 & -done \ No newline at end of file + > ${root_path}/results/${ASCEND_DEVICE_ID}/npu_train_full_8p.log 2>&1 & +done + + + +wait + +##################获取训练数据################ +# 训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +# 结果打印,不需要修改 +echo "------------------ Final result ------------------" +# 输出性能FPS,需要模型审视修改 +FPS=`grep 'fps' ${root_path}/results/${ASCEND_DEVICE_ID}/npu_train_full_8p.log| tail -n 1 |awk '{print $6}'|awk 'END {print}'` +FPS=${FPS%,*} +# 打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +# 输出训练精度,需要模型审视修改 +top1_acc=`grep 'test top1 acc' ${root_path}/results/${ASCEND_DEVICE_ID}/npu_train_full_8p.log|awk '{print $4}'|awk 'END {print}'` +top1_acc=`echo ${top1_acc%,*}` +# 打印,不需要修改 +echo "Final Train Accuracy : ${top1_acc}" +echo "E2E Training Duration sec : $e2e_time" + +# 性能看护结果汇总 +# 训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' + +# 获取性能数据,不需要修改 +# 吞吐量 +ActualFPS=${FPS} +# 单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` + +# 从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep 'date' ${root_path}/results/${ASCEND_DEVICE_ID}/npu_train_full_8p.log|awk '{print $8}' >> ${root_path}/results/${ASCEND_DEVICE_ID}/train_${CaseName}_loss.txt +# 删除loss值后逗号 +sed -i 's/,//g' ${root_path}/results/${ASCEND_DEVICE_ID}/train_${CaseName}_loss.txt + +# 最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' ${root_path}/results/${ASCEND_DEVICE_ID}/train_${CaseName}_loss.txt` + +# 关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > ${root_path}/results/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> ${root_path}/results/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> ${root_path}/results/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> ${root_path}/results/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> ${root_path}/results/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> ${root_path}/results/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> ${root_path}/results/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracy = ${top1_acc}" >> ${root_path}/results/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> ${root_path}/results/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> ${root_path}/results/$ASCEND_DEVICE_ID/${CaseName}.log -- Gitee From 570efe8b405ae3e6722b2846cf6165d79959e921 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=84=A6=E5=AE=88=E5=8D=93?= <7704525+drogo@user.noreply.gitee.com> Date: Tue, 7 Jun 2022 09:54:53 +0000 Subject: [PATCH 14/15] pytorch 1.5 -1.8 --- .../test/npu_train_performance_1p.sh | 79 ++++++++++++++++++- 1 file changed, 78 insertions(+), 1 deletion(-) diff --git a/PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/test/npu_train_performance_1p.sh b/PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/test/npu_train_performance_1p.sh index 49773c1153..881ce6da08 100644 --- a/PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/test/npu_train_performance_1p.sh +++ b/PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/test/npu_train_performance_1p.sh @@ -1,6 +1,31 @@ #!/usr/bin/env bash + +source env_npu.sh + +# 网络名称,同目录名称 +Network="Efficient-3DCNNs" +# 训练batch_size +batch_size=80 + + root_path=$1 +#################创建日志输出目录,不需要修改################# +ASCEND_DEVICE_ID=0 +if [ -d ${root_path}/results/${ASCEND_DEVICE_ID} ];then + rm -rf ${root_path}/results/${ASCEND_DEVICE_ID} + mkdir -p ${root_path}/results/$ASCEND_DEVICE_ID +else + mkdir -p ${root_path}/results/$ASCEND_DEVICE_ID +fi + +#################启动训练脚本################# +#训练开始时间,不需要修改 +start_time=$(date +%s) + + + + nohup python3.7 ../main.py \ --root_path ${root_path} \ --gpu_or_npu npu \ @@ -16,7 +41,59 @@ nohup python3.7 ../main.py \ --batch_size 80 \ --n_threads 16 \ --ft_portion complete \ - > ${root_path}/results/npu_train_performance_1p.log 2>&1 & + > ${root_path}/results/${ASCEND_DEVICE_ID}/npu_train_performance_1p.log 2>&1 & + + +wait + +##################获取训练数据################ +# 训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +# 结果打印,不需要修改 +echo "------------------ Final result ------------------" +# 输出性能FPS,需要模型审视修改 +FPS=`grep 'fps' ${root_path}/results/${ASCEND_DEVICE_ID}/npu_train_performance_1p.log| tail -n 1 |awk '{print $6}'|awk 'END {print}'` +FPS=${FPS%,*} +# 打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +# 输出训练精度,需要模型审视修改 +top1_acc=`grep 'test top1 acc' ${root_path}/results/${ASCEND_DEVICE_ID}/npu_train_performance_1p.log|awk '{print $4}'|awk 'END {print}'` +top1_acc=`echo ${top1_acc%,*}` +# 打印,不需要修改 +echo "Final Train Accuracy : ${top1_acc}" +echo "E2E Training Duration sec : $e2e_time" + +# 性能看护结果汇总 +# 训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' + +# 获取性能数据,不需要修改 +# 吞吐量 +ActualFPS=${FPS} +# 单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` +# 从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep 'date' ${root_path}/results/${ASCEND_DEVICE_ID}/npu_train_performance_1p.log|awk '{print $8}' >> ${root_path}/results/${ASCEND_DEVICE_ID}/train_${CaseName}_loss.txt +# 删除loss值后逗号 +sed -i 's/,//g' ${root_path}/results/${ASCEND_DEVICE_ID}/train_${CaseName}_loss.txt +# 最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' ${root_path}/results/${ASCEND_DEVICE_ID}/train_${CaseName}_loss.txt` +# 关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > ${root_path}/results/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> ${root_path}/results/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> ${root_path}/results/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> ${root_path}/results/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> ${root_path}/results/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> ${root_path}/results/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> ${root_path}/results/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracy = ${top1_acc}" >> ${root_path}/results/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> ${root_path}/results/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> ${root_path}/results/$ASCEND_DEVICE_ID/${CaseName}.log -- Gitee From 91de99f293793d47736b3c52ed62fe13aa7d46ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=84=A6=E5=AE=88=E5=8D=93?= <7704525+drogo@user.noreply.gitee.com> Date: Tue, 7 Jun 2022 09:55:28 +0000 Subject: [PATCH 15/15] pytorch 1.5 -1.8 --- .../test/npu_train_performance_8p.sh | 84 ++++++++++++++++++- 1 file changed, 80 insertions(+), 4 deletions(-) diff --git a/PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/test/npu_train_performance_8p.sh b/PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/test/npu_train_performance_8p.sh index ad27280e18..5df7b95cde 100644 --- a/PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/test/npu_train_performance_8p.sh +++ b/PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/test/npu_train_performance_8p.sh @@ -1,8 +1,30 @@ #!/usr/bin/env bash -RANK_ID_START=0 + +source env_npu.sh + +# 网络名称,同目录名称 +Network="Efficient-3DCNNs" +# 训练batch_size +batch_size=640 + RANK_SIZE=8 root_path=$1 +#################创建日志输出目录,不需要修改################# +ASCEND_DEVICE_ID=0 +if [ -d ${root_path}/results/${ASCEND_DEVICE_ID} ];then + rm -rf ${root_path}/results/${ASCEND_DEVICE_ID} + mkdir -p ${root_path}/results/$ASCEND_DEVICE_ID +else + mkdir -p ${root_path}/results/$ASCEND_DEVICE_ID +fi + +#################启动训练脚本################# +#训练开始时间,不需要修改 +start_time=$(date +%s) + +RANK_ID_START=0 + for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); do @@ -22,8 +44,62 @@ nohup taskset -c $PID_START-$PID_END python3.7 ../main.py --local_rank $RANK_ID --learning_rate 0.04 \ --droupout_rate 0.2 \ --n_epochs 2 \ - --batch_size 640 \ + --batch_size ${batch_size} \ --n_threads 64 \ --ft_portion complete \ - > ${root_path}/results/npu_train_performance_8p.log 2>&1 & -done \ No newline at end of file + > ${root_path}/results/${ASCEND_DEVICE_ID}/npu_train_performance_8p.log 2>&1 & +done + +wait + +##################获取训练数据################ +# 训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +# 结果打印,不需要修改 +echo "------------------ Final result ------------------" +# 输出性能FPS,需要模型审视修改 +FPS=`grep 'fps' ${root_path}/results/${ASCEND_DEVICE_ID}/npu_train_performance_8p.log| tail -n 1 |awk '{print $6}'|awk 'END {print}'` +FPS=${FPS%,*} +# 打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +# 输出训练精度,需要模型审视修改 +top1_acc=`grep 'test top1 acc' ${root_path}/results/${ASCEND_DEVICE_ID}/npu_train_performance_8p.log|awk '{print $4}'|awk 'END {print}'` +top1_acc=`echo ${top1_acc%,*}` +# 打印,不需要修改 +echo "Final Train Accuracy : ${top1_acc}" +echo "E2E Training Duration sec : $e2e_time" + +# 性能看护结果汇总 +# 训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' + +# 获取性能数据,不需要修改 +# 吞吐量 +ActualFPS=${FPS} +# 单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` + +# 从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep 'date' ${root_path}/results/${ASCEND_DEVICE_ID}/npu_train_performance_8p.log|awk '{print $8}' >> ${root_path}/results/${ASCEND_DEVICE_ID}/train_${CaseName}_loss.txt +# 删除loss值后逗号 +sed -i 's/,//g' ${root_path}/results/${ASCEND_DEVICE_ID}/train_${CaseName}_loss.txt + +# 最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' ${root_path}/results/${ASCEND_DEVICE_ID}/train_${CaseName}_loss.txt` + +# 关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > ${root_path}/results/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> ${root_path}/results/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> ${root_path}/results/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> ${root_path}/results/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> ${root_path}/results/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> ${root_path}/results/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> ${root_path}/results/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracy = ${top1_acc}" >> ${root_path}/results/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> ${root_path}/results/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> ${root_path}/results/$ASCEND_DEVICE_ID/${CaseName}.log -- Gitee