From 81b315ba358a822fdc19fe87646a9850a50ba051 Mon Sep 17 00:00:00 2001 From: hyp <834613101@qq.com> Date: Fri, 20 May 2022 11:53:47 +0000 Subject: [PATCH 1/3] =?UTF-8?q?=E8=BF=81=E7=A7=BB=E6=A8=A1=E5=9E=8B?= =?UTF-8?q?=E7=89=88=E6=9C=AC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../SimCLR_for_Pytorch/data_aug/gaussian_blur.py | 2 ++ .../contrib/cv/detection/SimCLR_for_Pytorch/main_1p.py | 7 ++++++- .../contrib/cv/detection/SimCLR_for_Pytorch/main_8p.py | 8 +++++++- .../SimCLR_for_Pytorch/multi_epochs_dataloader.py | 2 ++ .../SimCLR_for_Pytorch/test/train_finetune_1p.sh | 3 ++- .../cv/detection/SimCLR_for_Pytorch/test/train_full_1p.sh | 3 ++- .../cv/detection/SimCLR_for_Pytorch/test/train_full_8p.sh | 3 ++- .../SimCLR_for_Pytorch/test/train_performance_1p.sh | 3 ++- .../SimCLR_for_Pytorch/test/train_performance_8p.sh | 3 ++- PyTorch/contrib/cv/detection/SimCLR_for_Pytorch/utils.py | 2 ++ 10 files changed, 29 insertions(+), 7 deletions(-) diff --git a/PyTorch/contrib/cv/detection/SimCLR_for_Pytorch/data_aug/gaussian_blur.py b/PyTorch/contrib/cv/detection/SimCLR_for_Pytorch/data_aug/gaussian_blur.py index 1d0113d7ec..ab63c8b9ff 100644 --- a/PyTorch/contrib/cv/detection/SimCLR_for_Pytorch/data_aug/gaussian_blur.py +++ b/PyTorch/contrib/cv/detection/SimCLR_for_Pytorch/data_aug/gaussian_blur.py @@ -21,6 +21,8 @@ # ============================================================================ import numpy as np import torch +if torch.__version__ >= "1.8.1": + import torch_npu from torch import nn from torchvision.transforms import transforms diff --git a/PyTorch/contrib/cv/detection/SimCLR_for_Pytorch/main_1p.py b/PyTorch/contrib/cv/detection/SimCLR_for_Pytorch/main_1p.py index 1152709ee9..ac87757c9d 100644 --- a/PyTorch/contrib/cv/detection/SimCLR_for_Pytorch/main_1p.py +++ b/PyTorch/contrib/cv/detection/SimCLR_for_Pytorch/main_1p.py @@ -23,6 +23,8 @@ import time import argparse import torch +if torch.__version__ >= "1.8.1": + import torch_npu import torch.npu import torch.nn.functional as F import torch.utils.data @@ -87,6 +89,9 @@ parser.add_argument('--pretrained', dest='pretrained', action='store_true', parser.add_argument('--pth_path', default='', type=str, metavar='PATH', help='path to pretrained checkpoint (default: none)') parser.add_argument('--device_id', type=int, default=0, help="device id") +parser.add_argument('--loss-scale', default=1024., type=float, + help='loss scale using in amp, default -1 means dynamic') +parser.add_argument('--opt_level', type=str, default="O2", help="opt level") def main(): @@ -208,7 +213,7 @@ def main_worker(npu, npus_per_node, args): weight_decay=args.weight_decay ) - model, optimizer = amp.initialize(model, optimizer, opt_level="O2", loss_scale=128.0, combine_grad=True) + model, optimizer = amp.initialize(model, optimizer, opt_level=args.opt_level, loss_scale=args.loss_scale, combine_grad=True) criterion = torch.nn.CrossEntropyLoss().to(device) print('Part2 : Load Network <==> Done') diff --git a/PyTorch/contrib/cv/detection/SimCLR_for_Pytorch/main_8p.py b/PyTorch/contrib/cv/detection/SimCLR_for_Pytorch/main_8p.py index 3f93c0d9ef..a90b74f14f 100644 --- a/PyTorch/contrib/cv/detection/SimCLR_for_Pytorch/main_8p.py +++ b/PyTorch/contrib/cv/detection/SimCLR_for_Pytorch/main_8p.py @@ -24,6 +24,8 @@ import os import time import argparse import torch +if torch.__version__ >= "1.8.1": + import torch_npu import torch.npu import torch.nn.functional as F import torch.multiprocessing as mp @@ -91,6 +93,10 @@ parser.add_argument('--distributed', action='store_true', parser.add_argument('--nodes', type=int, default=1) parser.add_argument('--device_id', type=int, default=0, help="device id") parser.add_argument('--device_list', type=str, default="0,1,2,3,4,5,6,7", help="device id list") +parser.add_argument('--loss-scale', default=1024., type=float, + help='loss scale using in amp, default -1 means dynamic') +parser.add_argument('--opt_level', type=str, default="O2", help="opt level") + def get_host_ip(): @@ -189,7 +195,7 @@ def main_worker(npu, npus_per_node, args): args.lr, weight_decay=args.weight_decay ) - model, optimizer = amp.initialize(model, optimizer, opt_level="O2", loss_scale=128.0, combine_grad=True) + model, optimizer = amp.initialize(model, optimizer, opt_level=args.opt_level, loss_scale=args.loss_scale, combine_grad=True) if args.distributed: model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[local_rank], broadcast_buffers=False) diff --git a/PyTorch/contrib/cv/detection/SimCLR_for_Pytorch/multi_epochs_dataloader.py b/PyTorch/contrib/cv/detection/SimCLR_for_Pytorch/multi_epochs_dataloader.py index 49b9ae910a..3bb3fd2345 100644 --- a/PyTorch/contrib/cv/detection/SimCLR_for_Pytorch/multi_epochs_dataloader.py +++ b/PyTorch/contrib/cv/detection/SimCLR_for_Pytorch/multi_epochs_dataloader.py @@ -14,6 +14,8 @@ # ============================================================================ import torch +if torch.__version__ >= "1.8.1": + import torch_npu class MultiEpochsDataLoader(torch.utils.data.DataLoader): diff --git a/PyTorch/contrib/cv/detection/SimCLR_for_Pytorch/test/train_finetune_1p.sh b/PyTorch/contrib/cv/detection/SimCLR_for_Pytorch/test/train_finetune_1p.sh index 2683e63373..989c7ac129 100644 --- a/PyTorch/contrib/cv/detection/SimCLR_for_Pytorch/test/train_finetune_1p.sh +++ b/PyTorch/contrib/cv/detection/SimCLR_for_Pytorch/test/train_finetune_1p.sh @@ -102,7 +102,8 @@ python3.7 ./main_1p.py \ --npus_per_node=1 \ --pretrained \ --pth_path=${pth_path} \ - --batch_size=${batch_size} > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & + --batch_size=${batch_size} \ + --loss-scale=-1 > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & wait diff --git a/PyTorch/contrib/cv/detection/SimCLR_for_Pytorch/test/train_full_1p.sh b/PyTorch/contrib/cv/detection/SimCLR_for_Pytorch/test/train_full_1p.sh index 8eee249513..a7ed37cee7 100644 --- a/PyTorch/contrib/cv/detection/SimCLR_for_Pytorch/test/train_full_1p.sh +++ b/PyTorch/contrib/cv/detection/SimCLR_for_Pytorch/test/train_full_1p.sh @@ -92,7 +92,8 @@ python3.7 ./main_1p.py \ --epochs=${train_epochs} \ --npu=0 \ --npus_per_node=1 \ - --batch_size=${batch_size} > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & + --batch_size=${batch_size} \ + --loss-scale=-1 > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & wait diff --git a/PyTorch/contrib/cv/detection/SimCLR_for_Pytorch/test/train_full_8p.sh b/PyTorch/contrib/cv/detection/SimCLR_for_Pytorch/test/train_full_8p.sh index 14af2fa821..e37ecce7bd 100644 --- a/PyTorch/contrib/cv/detection/SimCLR_for_Pytorch/test/train_full_8p.sh +++ b/PyTorch/contrib/cv/detection/SimCLR_for_Pytorch/test/train_full_8p.sh @@ -92,7 +92,8 @@ python3.7 ./main_8p.py \ --epochs=${train_epochs} \ --npu=0 \ --distributed \ - --batch_size=${batch_size} > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & + --batch_size=${batch_size} \ + --loss-scale=-1 > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & wait diff --git a/PyTorch/contrib/cv/detection/SimCLR_for_Pytorch/test/train_performance_1p.sh b/PyTorch/contrib/cv/detection/SimCLR_for_Pytorch/test/train_performance_1p.sh index 728fa8c17b..c359d1ffdd 100644 --- a/PyTorch/contrib/cv/detection/SimCLR_for_Pytorch/test/train_performance_1p.sh +++ b/PyTorch/contrib/cv/detection/SimCLR_for_Pytorch/test/train_performance_1p.sh @@ -92,7 +92,8 @@ python3.7 ./main_1p.py \ --epochs=${train_epochs} \ --npu=0 \ --npus_per_node=1 \ - --batch_size=${batch_size} > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & + --batch_size=${batch_size} \ + --loss-scale=-1 > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & wait diff --git a/PyTorch/contrib/cv/detection/SimCLR_for_Pytorch/test/train_performance_8p.sh b/PyTorch/contrib/cv/detection/SimCLR_for_Pytorch/test/train_performance_8p.sh index 52c8b94262..699020d1ba 100644 --- a/PyTorch/contrib/cv/detection/SimCLR_for_Pytorch/test/train_performance_8p.sh +++ b/PyTorch/contrib/cv/detection/SimCLR_for_Pytorch/test/train_performance_8p.sh @@ -92,7 +92,8 @@ python3.7 ./main_8p.py \ --epochs=${train_epochs} \ --npu=0 \ --distributed \ - --batch_size=${batch_size} > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & + --batch_size=${batch_size} \ + --loss-scale=-1 > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & wait diff --git a/PyTorch/contrib/cv/detection/SimCLR_for_Pytorch/utils.py b/PyTorch/contrib/cv/detection/SimCLR_for_Pytorch/utils.py index 6caf4cc67a..4aca7431da 100644 --- a/PyTorch/contrib/cv/detection/SimCLR_for_Pytorch/utils.py +++ b/PyTorch/contrib/cv/detection/SimCLR_for_Pytorch/utils.py @@ -21,6 +21,8 @@ # ============================================================================ import os import torch +if torch.__version__ >= "1.8.1": + import torch_npu import yaml -- Gitee From d0ea64388c79b6a8992790c345ff0d9b16ddb9c4 Mon Sep 17 00:00:00 2001 From: hyp <834613101@qq.com> Date: Fri, 20 May 2022 12:03:28 +0000 Subject: [PATCH 2/3] =?UTF-8?q?[=E8=8B=8F=E5=B7=9E=E5=A4=A7=E5=AD=A6?= =?UTF-8?q?=E8=AE=A1=E7=AE=97=E6=9C=BA=E5=AD=A6=E9=99=A2][=E9=AB=98?= =?UTF-8?q?=E6=A0=A1=E8=B4=A1=E7=8C=AE][Pytorch][SimCLR]-=E5=88=9D?= =?UTF-8?q?=E6=AC=A1=E6=8F=90=E4=BA=A4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../cv/detection/SimCLR_for_Pytorch/README_raw.md | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/PyTorch/contrib/cv/detection/SimCLR_for_Pytorch/README_raw.md b/PyTorch/contrib/cv/detection/SimCLR_for_Pytorch/README_raw.md index e04900eef2..46e262100b 100644 --- a/PyTorch/contrib/cv/detection/SimCLR_for_Pytorch/README_raw.md +++ b/PyTorch/contrib/cv/detection/SimCLR_for_Pytorch/README_raw.md @@ -44,9 +44,11 @@ Log path: ## SimCLR Training Results -| Acc@1 | FPS | # of NPU/GPU | Epochs | Opt-Level | Loss Scale | -| :------: | :------: | :------: | :------: | :------: | :------: | -| ------ | 1767.030 | 1P GPU | 100 | O2 | 128.0 | -| 60.352 | 2098.001 | 1P NPU | 100 | O2 | 128.0 | -| 55.859 | 5227.504 | 8P GPU | 100 | O2 | 128.0 | -| 58.594 | 9747.414 | 8P NPU | 100 | O2 | 128.0 | +| Acc@1 | FPS | # of NPU/GPU | Epochs | Opt-Level | Loss Scale | Torch | +| :------: | :------: | :------: | :------: | :------: | :------: | :------: | +| ------ | 1767.030 | 1P GPU | 100 | O2 | 128.0 | 1.5 | +| 60.352 | 2098.001 | 1P NPU | 100 | O2 | 128.0 | 1.5 | +| 55.859 | 5227.504 | 8P GPU | 100 | O2 | 128.0 | 1.5 | +| 58.594 | 9747.414 | 8P NPU | 100 | O2 | 128.0 | 1.5 | +| 69.141 | 5002.128 | 1P NPU | 100 | O2 | dynamic | 1.8 | +| 58.789 | 12455.228 | 8P NPU | 100 | O2 | dynamic | 1.8 | -- Gitee From b67e5427f7b8a8c3065f4f8db1e4034b9bf5fe25 Mon Sep 17 00:00:00 2001 From: hyp <834613101@qq.com> Date: Fri, 20 May 2022 17:24:10 +0000 Subject: [PATCH 3/3] =?UTF-8?q?=C2=A0[=E8=8B=8F=E5=B7=9E=E5=A4=A7=E5=AD=A6?= =?UTF-8?q?=E8=AE=A1=E7=AE=97=E6=9C=BA=E5=AD=A6=E9=99=A2][=E9=AB=98?= =?UTF-8?q?=E6=A0=A1=E8=B4=A1=E7=8C=AE][Pytorch][SimCLR]-=E5=88=9D?= =?UTF-8?q?=E6=AC=A1=E6=8F=90=E4=BA=A4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- PyTorch/contrib/cv/detection/SimCLR_for_Pytorch/README_raw.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PyTorch/contrib/cv/detection/SimCLR_for_Pytorch/README_raw.md b/PyTorch/contrib/cv/detection/SimCLR_for_Pytorch/README_raw.md index 46e262100b..6c0430781a 100644 --- a/PyTorch/contrib/cv/detection/SimCLR_for_Pytorch/README_raw.md +++ b/PyTorch/contrib/cv/detection/SimCLR_for_Pytorch/README_raw.md @@ -51,4 +51,4 @@ Log path: | 55.859 | 5227.504 | 8P GPU | 100 | O2 | 128.0 | 1.5 | | 58.594 | 9747.414 | 8P NPU | 100 | O2 | 128.0 | 1.5 | | 69.141 | 5002.128 | 1P NPU | 100 | O2 | dynamic | 1.8 | -| 58.789 | 12455.228 | 8P NPU | 100 | O2 | dynamic | 1.8 | +| 60.352 | 12455.228 | 8P NPU | 100 | O2 | dynamic | 1.8 | -- Gitee