From 7bf147d759a6ee7964b58bc0edd5d9390dcf0e15 Mon Sep 17 00:00:00 2001 From: zhang-zhibin-123 Date: Tue, 25 Apr 2023 09:44:38 +0800 Subject: [PATCH 1/8] my first commit --- .../cv/detection/YoloV2-640/.gitignore | 6 + .../contrib/cv/detection/YoloV2-640/README.md | 331 +++++++++++ .../cv/detection/YoloV2-640/benchmark.py | 139 +++++ .../contrib/cv/detection/YoloV2-640/coco.py | 209 +++++++ .../YoloV2-640/config/yolo_config.py | 227 ++++++++ .../cv/detection/YoloV2-640/data/__init__.py | 0 .../cv/detection/YoloV2-640/data/coco.py | 336 +++++++++++ .../YoloV2-640/data/scripts/COCO2017.sh | 20 + .../YoloV2-640/data/scripts/VOC2007.sh | 42 ++ .../YoloV2-640/data/scripts/VOC2012.sh | 38 ++ .../detection/YoloV2-640/data/transforms.py | 423 ++++++++++++++ .../cv/detection/YoloV2-640/data/voc.py | 342 +++++++++++ .../contrib/cv/detection/YoloV2-640/demo.py | 249 ++++++++ .../cv/detection/YoloV2-640/env_npu.sh | 79 +++ .../contrib/cv/detection/YoloV2-640/eval.py | 134 +++++ .../YoloV2-640/evaluator/cocoapi_evaluator.py | 135 +++++ .../YoloV2-640/evaluator/vocapi_evaluator.py | 347 +++++++++++ .../detection/YoloV2-640/models/__init__.py | 0 .../YoloV2-640/models/backbone/__init__.py | 84 +++ .../models/backbone/cspdarknet53.py | 296 ++++++++++ .../models/backbone/cspdarknet_tiny.py | 128 ++++ .../YoloV2-640/models/backbone/darknet.py | 102 ++++ .../YoloV2-640/models/backbone/resnet.py | 227 ++++++++ .../models/backbone/shufflenetv2.py | 194 +++++++ .../YoloV2-640/models/backbone/vit.py | 378 ++++++++++++ .../models/backbone/weights/README.md | 15 + .../models/backbone/yolox_backbone.py | 409 +++++++++++++ .../YoloV2-640/models/basic/__init__.py | 0 .../YoloV2-640/models/basic/bottleneck_csp.py | 30 + .../detection/YoloV2-640/models/basic/conv.py | 59 ++ .../YoloV2-640/models/basic/upsample.py | 20 + .../YoloV2-640/models/head/__init__.py | 0 .../YoloV2-640/models/head/coupled_head.py | 100 ++++ .../YoloV2-640/models/head/decoupled_head.py | 120 ++++ .../YoloV2-640/models/neck/__init__.py | 23 + .../YoloV2-640/models/neck/dilated_encoder.py | 39 ++ .../detection/YoloV2-640/models/neck/fpn.py | 120 ++++ .../detection/YoloV2-640/models/neck/spp.py | 95 +++ .../YoloV2-640/models/yolo/__init__.py | 92 +++ .../YoloV2-640/models/yolo/yolo_nano.py | 340 +++++++++++ .../YoloV2-640/models/yolo/yolo_tiny.py | 335 +++++++++++ .../YoloV2-640/models/yolo/yolov1.py | 260 +++++++++ .../YoloV2-640/models/yolo/yolov2.py | 271 +++++++++ .../YoloV2-640/models/yolo/yolov3.py | 327 +++++++++++ .../YoloV2-640/models/yolo/yolov4.py | 345 +++++++++++ .../cv/detection/YoloV2-640/requirements.txt | 19 + .../contrib/cv/detection/YoloV2-640/test.py | 233 ++++++++ .../cv/detection/YoloV2-640/train-1p.sh | 13 + .../cv/detection/YoloV2-640/train-8p.sh | 96 +++ .../cv/detection/YoloV2-640/train1p.py | 545 ++++++++++++++++++ .../cv/detection/YoloV2-640/train8p.py | 545 ++++++++++++++++++ .../cv/detection/YoloV2-640/train_yolonano.sh | 15 + .../cv/detection/YoloV2-640/train_yolov1.sh | 16 + .../cv/detection/YoloV2-640/train_yolov3.sh | 15 + .../detection/YoloV2-640/train_yolov3_de.sh | 15 + .../detection/YoloV2-640/train_yolov3_spp.sh | 15 + .../cv/detection/YoloV2-640/train_yolov4.sh | 19 + .../cv/detection/YoloV2-640/utils/__init__.py | 0 .../cv/detection/YoloV2-640/utils/box_ops.py | 101 ++++ .../YoloV2-640/utils/com_flops_params.py | 17 + .../YoloV2-640/utils/create_labels.py | 240 ++++++++ .../detection/YoloV2-640/utils/criterion.py | 192 ++++++ .../YoloV2-640/utils/distributed_utils.py | 77 +++ .../YoloV2-640/utils/fuse_conv_bn.py | 55 ++ .../YoloV2-640/utils/kmeans_anchor.py | 230 ++++++++ .../cv/detection/YoloV2-640/utils/misc.py | 149 +++++ .../cv/detection/YoloV2-640/utils/vis.py | 106 ++++ .../cv/detection/YoloV2-640/weights/README.md | 15 + 68 files changed, 10194 insertions(+) create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/.gitignore create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/README.md create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/benchmark.py create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/coco.py create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/config/yolo_config.py create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/data/__init__.py create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/data/coco.py create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/data/scripts/COCO2017.sh create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/data/scripts/VOC2007.sh create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/data/scripts/VOC2012.sh create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/data/transforms.py create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/data/voc.py create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/demo.py create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/env_npu.sh create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/eval.py create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/evaluator/cocoapi_evaluator.py create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/evaluator/vocapi_evaluator.py create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/models/__init__.py create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/models/backbone/__init__.py create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/models/backbone/cspdarknet53.py create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/models/backbone/cspdarknet_tiny.py create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/models/backbone/darknet.py create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/models/backbone/resnet.py create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/models/backbone/shufflenetv2.py create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/models/backbone/vit.py create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/models/backbone/weights/README.md create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/models/backbone/yolox_backbone.py create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/models/basic/__init__.py create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/models/basic/bottleneck_csp.py create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/models/basic/conv.py create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/models/basic/upsample.py create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/models/head/__init__.py create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/models/head/coupled_head.py create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/models/head/decoupled_head.py create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/models/neck/__init__.py create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/models/neck/dilated_encoder.py create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/models/neck/fpn.py create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/models/neck/spp.py create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/models/yolo/__init__.py create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/models/yolo/yolo_nano.py create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/models/yolo/yolo_tiny.py create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/models/yolo/yolov1.py create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/models/yolo/yolov2.py create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/models/yolo/yolov3.py create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/models/yolo/yolov4.py create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/requirements.txt create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/test.py create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/train-1p.sh create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/train-8p.sh create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/train1p.py create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/train8p.py create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/train_yolonano.sh create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/train_yolov1.sh create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/train_yolov3.sh create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/train_yolov3_de.sh create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/train_yolov3_spp.sh create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/train_yolov4.sh create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/utils/__init__.py create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/utils/box_ops.py create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/utils/com_flops_params.py create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/utils/create_labels.py create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/utils/criterion.py create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/utils/distributed_utils.py create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/utils/fuse_conv_bn.py create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/utils/kmeans_anchor.py create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/utils/misc.py create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/utils/vis.py create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/weights/README.md diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/.gitignore b/PyTorch/contrib/cv/detection/YoloV2-640/.gitignore new file mode 100644 index 0000000000..00eaf9d2bd --- /dev/null +++ b/PyTorch/contrib/cv/detection/YoloV2-640/.gitignore @@ -0,0 +1,6 @@ +*.pt +*.pth +*.pkl +__pycache__ +.vscode +det_results \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/README.md b/PyTorch/contrib/cv/detection/YoloV2-640/README.md new file mode 100644 index 0000000000..5cf9a9f4ad --- /dev/null +++ b/PyTorch/contrib/cv/detection/YoloV2-640/README.md @@ -0,0 +1,331 @@ +# Update: 2022-05-31 +Recently, I have released an anchor-free YOLO: + +https://github.com/yjh0410/FreeYOLO + +# A new and strong YOLO family +Recently, I rebuild my YOLO-Family project !! + +# Requirements +- We recommend you to use Anaconda to create a conda environment: +```Shell +conda create -n yolo python=3.6 +``` + +- Then, activate the environment: +```Shell +conda activate yolo +``` + +- Requirements: +```Shell +pip install -r requirements.txt +``` +PyTorch >= 1.1.0 and Torchvision >= 0.3.0 + +# Visualize positive samples +You can run following command to visualize positiva sample: +```Shell +python train.py \ + -d voc \ + --root path/to/your/dataset \ + -m yolov2 \ + --batch_size 2 \ + --vis_targets +``` + +# Come soon +My better YOLO family + + +# This project +In this project, you can enjoy: +- a new and stronger YOLOv1 +- a new and stronger YOLOv2 +- a stronger YOLOv3 +- a stronger YOLOv3 with SPP +- a stronger YOLOv3 with DilatedEncoder +- YOLOv4 (I'm trying to make it better) +- YOLO-Tiny +- YOLO-Nano + + +# Future work +- Try to make my YOLO-v4 better. +- Train my YOLOv1/YOLOv2 with ViT-Base (pretrained by MaskAutoencoder) + +# Weights +You can download all weights including my DarkNet-53, CSPDarkNet-53, MAE-ViT and YOLO weights from the following links. + +## Backbone +My Backbone: +- DarkNet53: https://github.com/yjh0410/PyTorch_YOLO-Family/releases/download/yolo-weight/darknet53.pth +- CSPDarkNet-53: https://github.com/yjh0410/PyTorch_YOLO-Family/releases/download/yolo-weight/cspdarknet53.pth +- CSPDarkNet-Tiny: https://github.com/yjh0410/PyTorch_YOLO-Family/releases/download/yolo-weight/cspdarknet_tiny.pth + +YOLOX-Backbone: +- CSPDarkNet-S: https://github.com/yjh0410/YOLOX-Backbone/releases/download/YOLOX-Backbone/yolox_cspdarknet_s.pth +- CSPDarkNet-M: https://github.com/yjh0410/YOLOX-Backbone/releases/download/YOLOX-Backbone/yolox_cspdarknet_m.pth +- CSPDarkNet-L: https://github.com/yjh0410/YOLOX-Backbone/releases/download/YOLOX-Backbone/yolox_cspdarknet_l.pth +- CSPDarkNet-X: https://github.com/yjh0410/YOLOX-Backbone/releases/download/YOLOX-Backbone/yolox_cspdarknet_x.pth +- CSPDarkNet-Tiny: https://github.com/yjh0410/YOLOX-Backbone/releases/download/YOLOX-Backbone/yolox_cspdarknet_tiny.pth +- CSPDarkNet-Nano: https://github.com/yjh0410/YOLOX-Backbone/releases/download/YOLOX-Backbone/yolox_cspdarknet_nano.pth + +## YOLO +- YOLOv1: https://github.com/yjh0410/PyTorch_YOLO-Family/releases/download/yolo-weight/yolov1_35.22_54.7.pth +- YOLOv2: https://github.com/yjh0410/PyTorch_YOLO-Family/releases/download/yolo-weight/yolov2_36.4_56.6.pth +- YOLOv3: https://github.com/yjh0410/PyTorch_YOLO-Family/releases/download/yolo-weight/yolov3_36.9_59.0.pth +- YOLOv3-SPP: https://github.com/yjh0410/PyTorch_YOLO-Family/releases/download/yolo-weight/yolov3_spp_38.2_60.1.pth +- YOLOv3-DE: https://github.com/yjh0410/PyTorch_YOLO-Family/releases/download/yolo-weight/yolov3_de_38.7_60.2.pth +- YOLOv4: https://github.com/yjh0410/PyTorch_YOLO-Family/releases/download/yolo-weight/yolov4_exp_43.0_63.4.pth +- YOLO-Tiny: https://github.com/yjh0410/PyTorch_YOLO-Family/releases/download/yolo-weight/yolo_tiny_28.8_48.6.pth +- YOLO-Nano: https://github.com/yjh0410/PyTorch_YOLO-Family/releases/download/yolo-weight/yolo_nano_22.4_40.7.pth + + +# Experiments +## Tricks +Tricks in this project: +- [x] Augmentations: Flip + Color jitter + RandomCrop +- [x] Model EMA +- [x] Mosaic Augmentation +- [x] Multi Scale training +- [ ] Gradient accumulation +- [ ] MixUp Augmentation +- [ ] Cosine annealing learning schedule +- [ ] AdamW +- [ ] Scale loss by number of positive samples + + +# Experiments +All experiment results are evaluated on COCO val. All FPS results except YOLO-Nano's are measured on a 2080ti GPU. +We will measure the speed of YOLO-Nano on a CPU. + +## YOLOv1 + + + + + + + + + + + + + +
FPS AP AP50 AP75 APs APm APl GFLOPs Params
YOLOv1-320 151 25.4 41.5 26.0 4.2 25.0 49.8 10.49 44.54M
YOLOv1-416 128 30.1 47.8 30.9 7.8 31.9 53.3 17.73 44.54M
YOLOv1-512 114 33.1 52.2 34.0 10.8 35.9 54.9 26.85 44.54M
YOLOv1-640 75 35.2 54.7 37.1 14.3 39.5 53.4 41.96 44.54M
YOLOv1-800 65.56 44.54M
+ +## YOLOv2 + + + + + + + + + + + + + +
FPS AP AP50 AP75 APs APm APl GFLOPs Params
YOLOv2-320 147 26.8 44.1 27.1 4.7 27.6 50.8 10.53 44.89M
YOLOv2-416 123 31.6 50.3 32.4 9.1 33.8 54.0 17.79 44.89M
YOLOv2-512 108 34.3 54.0 35.4 12.3 37.8 55.2 26.94 44.89M
YOLOv2-640 73 36.3 56.6 37.7 15.1 41.1 54.0 42.10 44.89M
YOLOv2-800 65.78 44.89M
+ +## YOLOv3 + + + + + + + + + + + + + +
FPS AP AP50 AP75 APs APm APl GFLOPs Params
YOLOv3-320 111 30.8 50.3 31.8 10.0 33.1 50.0 19.57 61.97M
YOLOv3-416 89 34.8 55.8 36.1 14.6 37.5 52.9 33.08 61.97M
YOLOv3-512 77 36.9 58.1 39.3 18.0 40.3 52.2 50.11 61.97M
YOLOv3-608 51 37.0 58.9 39.3 20.5 41.2 49.0 70.66 61.97M
YOLOv3-640 49 36.9 59.0 39.7 21.6 41.6 47.7 78.30 61.97M
+ +## YOLOv3 with SPP + + + + + + + + + + + + + +
FPS AP AP50 AP75 APs APm APl GFLOPs Params
YOLOv3-SPP-320 110 31.0 50.8 32.0 10.5 33.0 50.4 19.68 63.02M
YOLOv3-SPP-416 88 35.0 56.1 36.4 14.9 37.7 52.8 33.26 63.02M
YOLOv3-SPP-512 75 37.2 58.7 39.1 19.1 40.0 53.0 50.38 63.02M
YOLOv3-SPP-608 50 38.3 60.1 40.7 20.9 41.1 51.2 71.04 63.02M
YOLOv3-SPP-640 48 38.2 60.1 40.4 21.6 41.1 50.5 78.72 63.02M
+ +## YOLOv3 with Dilated Encoder +The DilatedEncoder is proposed by YOLOF. + + + + + + + + + + + + + +
FPS AP AP50 AP75 APs APm APl GFLOPs Params
YOLOv3-DE-320 109 31.1 51.1 31.7 10.2 32.6 51.2 19.10 57.25M
YOLOv3-DE-416 88 35.0 56.1 36.3 14.6 37.4 53.7 32.28 57.25M
YOLOv3-DE-512 74 37.7 59.3 39.6 17.9 40.4 54.4 48.90 57.25M
YOLOv3-DE-608 50 38.7 60.5 40.8 20.6 41.7 53.1 68.96 57.25M
YOLOv3-DE-640 48 38.7 60.2 40.7 21.3 41.7 51.7 76.41 57.25M
+ +## YOLOv4 +I'm still trying to make it better. + + + + + + + + + + + + +
FPS AP AP50 AP75 APs APm APl GFLOPs Params
YOLOv4-320 89 39.2 58.6 40.9 16.9 44.1 59.2 16.38 58.14M
YOLOv4-416 84 41.7 61.6 44.2 22.0 46.6 57.7 27.69 58.14M
YOLOv4-512 70 42.9 63.1 46.1 24.5 48.3 56.5 41.94 58.14M
YOLOv4-608 51 43.0 63.4 46.1 26.7 48.6 53.9 59.14 58.14M
+ +## YOLO-Tiny + + + + + + + + + +
FPS AP AP50 AP75 APs APm APl GFLOPs Params
YOLO-Tiny-320 143 26.4 44.5 26.8 8.8 28.2 42.4 2.17 7.66M
YOLO-Tiny-416 130 28.2 47.6 28.8 11.6 31.5 41.4 3.67 7.82M
YOLO-Tiny-512 118 28.8 48.6 29.4 13.3 33.4 38.3 5.57 7.82M
+ +## YOLO-Nano +The FPS is measured on i5-1135G& CPU. Any accelerated deployments that would help speed up detection are not done. + + + + + + + + + + +
FPS AP AP50 AP75 APs APm APl GFLOPs Params
YOLO-Nano-320 25 18.4 33.7 17.8 3.9 17.5 33.1 0.64 1.86M
YOLO-Nano-416 15 21.4 38.5 20.9 6.5 21.4 34.8 0.99 1.86M
YOLO-Nano-512 10 22.4 40.7 22.1 8.0 24.0 33.2 1.65 1.86M
+ + +# Dataset + +## VOC Dataset +### My BaiduYunDisk +- BaiduYunDisk: https://pan.baidu.com/s/1tYPGCYGyC0wjpC97H-zzMQ Password:4la9 + +### Download VOC2007 trainval & test + +```Shell +# specify a directory for dataset to be downloaded into, else default is ~/data/ +sh data/scripts/VOC2007.sh # +``` + +### Download VOC2012 trainval +```Shell +# specify a directory for dataset to be downloaded into, else default is ~/data/ +sh data/scripts/VOC2012.sh # +``` +### My BaiduYunDisk +- BaiduYunDisk: https://pan.baidu.com/s/1xAPk8fnaWMMov1VEjr8-zA Password:6vhp + +On Ubuntu system, you might use the command `jar xvf xxx.zip` to unzip the `train2017.zip` and `test2017.zip` files +since they are larger than 2G (As far as I know, `unzip` operation can't process the zip file which is larger than 2G.). + +## MSCOCO Dataset + +### Download MSCOCO 2017 dataset +Just run ```sh data/scripts/COCO2017.sh```. You will get COCO train2017, val2017, test2017. + + +# Train +For example: + +```Shell +python train.py --cuda \ + -d coco \ + -m yolov2 \ + -ms \ + --ema \ + --batch_size 16 \ + --root path/to/dataset/ +``` + +You can run ```python train.py -h``` to check all optional argument. Or you can just run the shell file, for example: +```Shell +sh train_yolov1.sh +``` + +If you have multi gpus like 8, and you put 4 images on each gpu: +```Shell +python -m torch.distributed.launch --nproc_per_node=8 train.py -d coco \ + --cuda \ + -m yolov1 \ + -ms \ + --ema \ + -dist \ + --sybn \ + --num_gpu 8 \ + --batch_size 4 \ + --root path/to/dataset/ +``` +Attention, `--batch_size` is the number of batchsize on per GPU, not all GPUs. + +I have upload all training log files. For example, `1-v1.txt` contains all the output information during the training YOLOv1. + +It is strongly recommended that you open the training shell file to check how I train each YOLO detector. + +# Test +For example: + +```Shell +python test.py -d coco \ + --cuda \ + -m yolov2 \ + --weight path/to/weight \ + --img_size 640 \ + --root path/to/dataset/ \ + --show +``` + +# Evaluation +For example + +```Shell +python eval.py -d coco-val \ + --cuda \ + -m yolov1 \ + --weight path/to/weight \ + --img_size 640 \ + --root path/to/dataset/ +``` + +# Evaluation on COCO-test-dev +To run on COCO_test-dev(You must be sure that you have downloaded test2017): +```Shell +python eval.py -d coco-test \ + --cuda \ + -m yolov1 \ + --weight path/to/weight \ + --img_size 640 \ + --root path/to/dataset/ +``` +You will get a `coco_test-dev.json` file. +Then you should follow the official requirements to compress it into zip format +and upload it the official evaluation server. diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/benchmark.py b/PyTorch/contrib/cv/detection/YoloV2-640/benchmark.py new file mode 100644 index 0000000000..0d6c755e66 --- /dev/null +++ b/PyTorch/contrib/cv/detection/YoloV2-640/benchmark.py @@ -0,0 +1,139 @@ +import argparse +import numpy as np +import time +import os +import torch + +from config.yolo_config import yolo_config +from data.transforms import ValTransforms +from data.coco import COCODataset, coco_class_index, coco_class_labels +from utils.com_flops_params import FLOPs_and_Params +from utils import fuse_conv_bn + +from models.yolo import build_model +import torch_npu + + +parser = argparse.ArgumentParser(description='Benchmark') +# Model +parser.add_argument('-m', '--model', default='yolov1', + help='yolov1, yolov2, yolov3, yolov3_spp, yolov3_de, ' + 'yolov4, yolo_tiny, yolo_nano') +parser.add_argument('--fuse_conv_bn', action='store_true', default=False, + help='fuse conv and bn') +parser.add_argument('--conf_thresh', default=0.1, type=float, + help='confidence threshold') +parser.add_argument('--nms_thresh', default=0.45, type=float, + help='NMS threshold') +parser.add_argument('--center_sample', action='store_true', default=False, + help='center sample trick.') +# data root +parser.add_argument('--root', default='/mnt/share/ssd2/dataset', + help='data root') +# basic +parser.add_argument('-size', '--img_size', default=640, type=int or list, + help='img_size') +parser.add_argument('--weight', default=None, + type=str, help='Trained state_dict file path to open') +# cuda +parser.add_argument('--cuda', action='store_true', default=False, + help='use cuda.') + +args = parser.parse_args() + + +def test(net, device, img_size, testset, transform): + # Step-1: Compute FLOPs and Params + FLOPs_and_Params(net, img_size) + + # Step-2: Compute FPS + num_images = 2002 + total_time = 0 + count = 0 + with torch.no_grad(): + for index in range(num_images): + if index % 500 == 0: + print('Testing image {:d}/{:d}....'.format(index+1, num_images)) + image, _ = testset.pull_image(index) + + h, w, _ = image.shape + size = np.array([[w, h, w, h]]) + + # prepare + x, _, _, scale, offset = transform(image) + x = x.unsqueeze(0).to(device) + + # star time + torch_npu.npu.synchronize() + start_time = time.perf_counter() + + # inference + bboxes, scores, cls_inds = net(x) + + # rescale + bboxes -= offset + bboxes /= scale + bboxes *= size + + # end time + torch_npu.npu.synchronize() + elapsed = time.perf_counter() - start_time + + # print("detection time used ", elapsed, "s") + if index > 1: + total_time += elapsed + count += 1 + + print('- FPS :', 1.0 / (total_time / count)) + + + +if __name__ == '__main__': + # get device + if args.cuda: + print('use cuda') + device = torch.device("npu") + else: + device = torch.device("cpu") + + # dataset + print('test on coco-val ...') + data_dir = os.path.join(args.root, 'COCO') + class_names = coco_class_labels + class_indexs = coco_class_index + num_classes = 80 + dataset = COCODataset( + data_dir=data_dir, + image_set='val2017', + img_size=args.img_size) + + # YOLO Config + cfg = yolo_config[args.model] + # build model + model = build_model(args=args, + cfg=cfg, + device=device, + num_classes=num_classes, + trainable=False) + + # load weight + if args.weight: + model.load_state_dict(torch.load(args.weight, map_location='cpu'), strict=False) + print('Finished loading model!') + else: + print('The path to weight file is None !') + exit(0) + model = model.to(device).eval() + + # fuse conv bn + if args.fuse_conv_bn: + print('fuse conv and bn ...') + model = fuse_conv_bn(model) + + # run + test(net=model, + img_size=args.img_size, + device=device, + testset=dataset, + transform=ValTransforms(args.img_size) + ) diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/coco.py b/PyTorch/contrib/cv/detection/YoloV2-640/coco.py new file mode 100644 index 0000000000..3260ebe1e3 --- /dev/null +++ b/PyTorch/contrib/cv/detection/YoloV2-640/coco.py @@ -0,0 +1,209 @@ +# +# BSD 3-Clause License +# +# Copyright (c) 2017 xxxx +# All rights reserved. +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ============================================================================ +# +""" +@author: Wenbo Li +@contact: fenglinglwb@gmail.com +""" + +import cv2 +import json +import numpy as np +import os + +from dataset.JointsDataset import JointsDataset +from pycocotools.coco import COCO +from pycocotools.cocoeval import COCOeval + + +class COCODataset(JointsDataset): + + def __init__(self, DATASET, stage, transform=None): + super().__init__(DATASET, stage, transform) + self.cur_dir = os.path.split(os.path.realpath(__file__))[0] + + self.train_gt_file = 'train_val_minus_minival_2014.json' + self.train_gt_path = os.path.join(self.cur_dir, 'gt_json', + self.train_gt_file) + + self.val_gt_file = 'minival_2014.json' + self.val_gt_path = os.path.join(self.cur_dir, 'gt_json', + self.val_gt_file) + self.val_det_file = 'minival_2014_det.json' + self.val_det_path = os.path.join(self.cur_dir, 'det_json', + self.val_det_file) + + self.test_det_file = '' + self.test_det_path = os.path.join(self.cur_dir, 'det_json', + self.test_det_file) + + self._exception_ids = ['366379'] + + self.data = self._get_data() + self.data_num = len(self.data) + + def _get_data(self): + data = list() + + if self.stage == 'train': + coco = COCO(self.train_gt_path) + elif self.stage == 'val': + coco = COCO(self.val_gt_path) + self.val_gt = coco + else: + pass + + if self.stage == 'train': + for aid, ann in coco.anns.items(): + img_id = ann['image_id'] + if img_id not in coco.imgs \ + or img_id in self._exception_ids: + continue + + if ann['iscrowd']: + continue + + img_name = coco.imgs[img_id]['file_name'] + prefix = 'val2014' if 'val' in img_name else 'train2014' + img_path = os.path.join(self.cur_dir, 'images', prefix, + img_name) + + bbox = np.array(ann['bbox']) + area = ann['area'] + joints = np.array(ann['keypoints']).reshape((-1, 3)) + headRect = np.array([0, 0, 1, 1], np.int32) + + center, scale = self._bbox_to_center_and_scale(bbox) + + if np.sum(joints[:, -1] > 0) < self.kp_load_min_num or \ + ann['num_keypoints'] == 0: + continue + + d = dict(aid=aid, + area=area, + bbox=bbox, + center=center, + headRect=headRect, + img_id=img_id, + img_name=img_name, + img_path=img_path, + joints=joints, + scale=scale) + + data.append(d) + + else: + if self.stage == 'val': + det_path = self.val_det_path + else: + det_path = self.test_det_path + dets = json.load(open(det_path)) + + for det in dets: + if det['image_id'] not in coco.imgs or det['category_id'] != 1: + continue + + img_id = det['image_id'] + img_name = 'COCO_val2014_000000%06d.jpg' % img_id + img_path = os.path.join(self.cur_dir, 'images', 'val2014', + img_name) + + bbox = np.array(det['bbox']) + center, scale = self._bbox_to_center_and_scale(bbox) + joints = np.zeros((self.keypoint_num, 3)) + score = det['score'] + headRect = np.array([0, 0, 1, 1], np.int32) + + d = dict(bbox=bbox, + center=center, + headRect=headRect, + img_id=img_id, + img_name=img_name, + img_path=img_path, + joints=joints, + scale=scale, + score=score) + + data.append(d) + + return data + + def _bbox_to_center_and_scale(self, bbox): + x, y, w, h = bbox + + center = np.zeros(2, dtype=np.float32) + center[0] = x + w / 2.0 + center[1] = y + h / 2.0 + + scale = np.array([w * 1.0 / self.pixel_std, h * 1.0 / self.pixel_std], + dtype=np.float32) + + return center, scale + + def evaluate(self, pred_path): + pred = self.val_gt.loadRes(pred_path) + coco_eval = COCOeval(self.val_gt, pred, iouType='keypoints') + coco_eval.evaluate() + coco_eval.accumulate() + coco_eval.summarize() + + def visualize(self, img, joints, score=None): + pairs = [[16, 14], [14, 12], [17, 15], [15, 13], [12, 13], [6, 12], + [7, 13], [6, 7], [6, 8], [7, 9], [8, 10], [9, 11], [2, 3], + [1, 2], [1, 3], [2, 4], [3, 5], [4, 6], [5, 7]] + color = np.random.randint(0, 256, (self.keypoint_num, 3)).tolist() + + for i in range(self.keypoint_num): + if joints[i, 0] > 0 and joints[i, 1] > 0: + cv2.circle(img, tuple(joints[i, :2]), 2, tuple(color[i]), 2) + if score: + cv2.putText(img, score, (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.2, + (128, 255, 0), 2) + + def draw_line(img, p1, p2): + c = (0, 0, 255) + if p1[0] > 0 and p1[1] > 0 and p2[0] > 0 and p2[1] > 0: + cv2.line(img, tuple(p1), tuple(p2), c, 2) + + for pair in pairs: + draw_line(img, joints[pair[0] - 1], joints[pair[1] - 1]) + + return img + + +if __name__ == '__main__': + from dataset.attribute import load_dataset + + dataset = load_dataset('COCO') + coco = COCODataset(dataset, 'val') + print(coco.data_num) diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/config/yolo_config.py b/PyTorch/contrib/cv/detection/YoloV2-640/config/yolo_config.py new file mode 100644 index 0000000000..5af1106830 --- /dev/null +++ b/PyTorch/contrib/cv/detection/YoloV2-640/config/yolo_config.py @@ -0,0 +1,227 @@ +# YOLO config + + +yolo_config = { + 'yolov1': { + # backbone + 'backbone': 'r50', + # neck + 'neck': 'dilated_encoder', + # anchor size + 'anchor_size': None + }, + 'yolov2': { + # backbone + 'backbone': 'r50', + # neck + 'neck': 'dilated_encoder', + # anchor size: P5-640 + 'anchor_size': [[10, 13], [16, 30], [33, 23], + [30, 61], [62, 45], [59, 119], + [116, 90], [156, 198], [373, 326]] + }, + 'yolov3': { + # backbone + 'backbone': 'd53', + # neck + 'neck': 'conv_blocks', + # anchor size: P5-640 + 'anchor_size': [[10, 13], [16, 30], [33, 23], + [30, 61], [62, 45], [59, 119], + [116, 90], [156, 198], [373, 326]] + }, + 'yolov3_spp': { + # backbone + 'backbone': 'd53', + # neck + 'neck': 'spp', + # anchor size: P5-640 + 'anchor_size': [[10, 13], [16, 30], [33, 23], + [30, 61], [62, 45], [59, 119], + [116, 90], [156, 198], [373, 326]] + }, + 'yolov3_de': { + # backbone + 'backbone': 'd53', + # neck + 'neck': 'dilated_encoder', + # anchor size: P5-640 + 'anchor_size': [[10, 13], [16, 30], [33, 23], + [30, 61], [62, 45], [59, 119], + [116, 90], [156, 198], [373, 326]] + }, + 'yolov4': { + # backbone + 'backbone': 'cspd53', + # neck + 'neck': 'spp', + # anchor size: P5-640 + 'anchor_size': [[10, 13], [16, 30], [33, 23], + [30, 61], [62, 45], [59, 119], + [116, 90], [156, 198], [373, 326]], + # loss + 'loss_obj': 'mse', # optional: mse, qfl + 'loss_box': 'giou' # optional: iou, giou, ciou + }, + 'yolov5_s': { + # backbone + 'backbone': 'csp_s', + 'width': 0.5, + 'depth': 0.33, + 'depthwise': False, + 'freeze': False, + # neck + 'neck': 'yolopafpn', + # head + 'head_dim': 256, + # anchor size: P5-640 + 'anchor_size': [[10, 13], [16, 30], [33, 23], + [30, 61], [62, 45], [59, 119], + [116, 90], [156, 198], [373, 326]], + # loss + 'loss_obj': 'mse', # optional: mse, bce + 'loss_box': 'giou' # optional: iou, giou, ciou + }, + 'yolov5_m': { + # backbone + 'backbone': 'csp_m', + 'width': 0.75, + 'depth': 0.67, + 'depthwise': False, + 'freeze': False, + # neck + 'neck': 'yolopafpn', + # head + 'head_dim': 256, + # anchor size: P5-640 + 'anchor_size': [[10, 13], [16, 30], [33, 23], + [30, 61], [62, 45], [59, 119], + [116, 90], [156, 198], [373, 326]], + # loss + 'loss_obj': 'mse', # optional: mse, bce + 'loss_box': 'giou' # optional: iou, giou, ciou + }, + 'yolov5_l': { + # backbone + 'backbone': 'csp_l', + 'width': 1.0, + 'depth': 1.0, + 'depthwise': False, + 'freeze': False, + # neck + 'neck': 'yolopafpn', + # head + 'head_dim': 256, + # anchor size: P5-640 + 'anchor_size': [[10, 13], [16, 30], [33, 23], + [30, 61], [62, 45], [59, 119], + [116, 90], [156, 198], [373, 326]], + # loss + 'loss_obj': 'mse', # optional: mse, bce + 'loss_box': 'giou' # optional: iou, giou, ciou + }, + 'yolov5_x': { + # backbone + 'backbone': 'csp_x', + 'width': 1.25, + 'depth': 1.33, + 'depthwise': False, + 'freeze': False, + # neck + 'neck': 'yolopafpn', + # head + 'head_dim': 256, + # anchor size: P5-640 + 'anchor_size': [[10, 13], [16, 30], [33, 23], + [30, 61], [62, 45], [59, 119], + [116, 90], [156, 198], [373, 326]], + # loss + 'loss_obj': 'mse', # optional: mse, bce + 'loss_box': 'giou' # optional: iou, giou, ciou + }, + 'yolov5_t': { + # backbone + 'backbone': 'csp_t', + 'width': 0.375, + 'depth': 0.33, + 'depthwise': False, + 'freeze': False, + # neck + 'neck': 'yolopafpn', + # head + 'head_dim': 256, + # anchor size: P5-640 + 'anchor_size': [[10, 13], [16, 30], [33, 23], + [30, 61], [62, 45], [59, 119], + [116, 90], [156, 198], [373, 326]], + # loss + 'loss_obj': 'mse', # optional: mse, bce + 'loss_box': 'giou' # optional: iou, giou, ciou + }, + 'yolov5_n': { + # backbone + 'backbone': 'csp_n', + 'width': 0.25, + 'depth': 0.33, + 'depthwise': True, + 'freeze': False, + # neck + 'neck': 'yolopafpn', + # head + 'head_dim': 256, + # anchor size: P5-640 + 'anchor_size': [[10, 13], [16, 30], [33, 23], + [30, 61], [62, 45], [59, 119], + [116, 90], [156, 198], [373, 326]], + # loss + 'loss_obj': 'mse', # optional: mse, bce + 'loss_box': 'giou' # optional: iou, giou, ciou + }, + 'yolo_tiny': { + # backbone + 'backbone': 'cspd_tiny', + # neck + 'neck': 'spp-csp', + # anchor size: P5-640 + 'anchor_size': [[10, 13], [16, 30], [33, 23], + [30, 61], [62, 45], [59, 119], + [116, 90], [156, 198], [373, 326]] + }, + 'yolo_nano': { + # backbone + 'backbone': 'sfnet_v2', + # neck + 'neck': 'spp-dw', + # anchor size: P5-640 + 'anchor_size': [[10, 13], [16, 30], [33, 23], + [30, 61], [62, 45], [59, 119], + [116, 90], [156, 198], [373, 326]], + # loss + 'loss_obj': 'mse', # optional: mse, qfl + 'loss_box': 'giou' # optional: iou, giou, ciou + }, + 'yolo_nano_plus': { + # backbone + 'backbone': 'csp_n', + 'depthwise': True, + # neck + 'neck': 'yolopafpn', + # anchor size: P5-640 + 'anchor_size': [[10, 13], [16, 30], [33, 23], + [30, 61], [62, 45], [59, 119], + [116, 90], [156, 198], [373, 326]], + # loss + 'loss_obj': 'mse', # optional: mse, qfl + 'loss_box': 'giou' # optional: iou, giou, ciou + }, + 'yolotr': { + # backbone + 'backbone': 'vit_b', + # neck + 'neck': 'dilated_encoder', + # anchor size: P5-640 + 'anchor_size': [[10, 13], [16, 30], [33, 23], + [30, 61], [62, 45], [59, 119], + [116, 90], [156, 198], [373, 326]] + } +} \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/data/__init__.py b/PyTorch/contrib/cv/detection/YoloV2-640/data/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/data/coco.py b/PyTorch/contrib/cv/detection/YoloV2-640/data/coco.py new file mode 100644 index 0000000000..5648bc821e --- /dev/null +++ b/PyTorch/contrib/cv/detection/YoloV2-640/data/coco.py @@ -0,0 +1,336 @@ +import os +import numpy as np +import random + +from torch.utils.data import Dataset +import cv2 +import torch_npu + +try: + from pycocotools.coco import COCO +except: + print("It seems that the COCOAPI is not installed.") + + + +coco_class_labels = ('background', + 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', + 'boat', 'traffic light', 'fire hydrant', 'street sign', 'stop sign', + 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', + 'elephant', 'bear', 'zebra', 'giraffe', 'hat', 'backpack', 'umbrella', + 'shoe', 'eye glasses', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', + 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', + 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'plate', 'wine glass', + 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', + 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', + 'couch', 'potted plant', 'bed', 'mirror', 'dining table', 'window', 'desk', + 'toilet', 'door', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', + 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'blender', 'book', + 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush') + +coco_class_index = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, + 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, + 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 67, + 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90] + + +class COCODataset(Dataset): + """ + COCO dataset class. + """ + def __init__(self, + data_dir=None, + image_set='train2017', + img_size=640, + transform=None, + color_augment=None, + mosaic=False, + mixup=False): + """ + COCO dataset initialization. Annotation data are read into memory by COCO API. + Args: + data_dir (str): dataset root directory + json_file (str): COCO json file name + name (str): COCO data name (e.g. 'train2017' or 'val2017') + img_size (int): target image size after pre-processing + debug (bool): if True, only one data id is selected from the dataset + """ + if image_set == 'train2017': + self.json_file='instances_train2017.json' + elif image_set == 'val2017': + self.json_file='instances_val2017.json' + elif image_set == 'test2017': + self.json_file='image_info_test-dev2017.json' + self.image_set = image_set + self.data_dir = data_dir + + self.coco = COCO(os.path.join(self.data_dir, 'annotations', self.json_file)) + self.ids = self.coco.getImgIds() + self.img_size = img_size + self.class_ids = sorted(self.coco.getCatIds()) + # augmentation + self.transform = transform + self.mosaic = mosaic + self.mixup = mixup + self.color_augment = color_augment + if self.mosaic: + print('use Mosaic Augmentation ...') + if self.mixup: + print('use MixUp Augmentation ...') + + def __len__(self): + return len(self.ids) + + + def __getitem__(self, index): + im, gt, h, w, scale, offset = self.pull_item(index) + return im, gt + + + def load_img_targets(self, index): + anno_ids = self.coco.getAnnIds(imgIds=[int(index)], iscrowd=None) + annotations = self.coco.loadAnns(anno_ids) + + # load an image + img_file = os.path.join(self.data_dir, self.image_set, + '{:012}'.format(index) + '.jpg') + img = cv2.imread(img_file) + + if self.json_file == 'instances_val5k.json' and img is None: + img_file = os.path.join(self.data_dir, 'train2017', + '{:012}'.format(index) + '.jpg') + img = cv2.imread(img_file) + + assert img is not None + + height, width, channels = img.shape + + #load a target + target = [] + for anno in annotations: + if 'bbox' in anno and anno['area'] > 0: + xmin = np.max((0, anno['bbox'][0])) + ymin = np.max((0, anno['bbox'][1])) + xmax = np.min((width - 1, xmin + np.max((0, anno['bbox'][2] - 1)))) + ymax = np.min((height - 1, ymin + np.max((0, anno['bbox'][3] - 1)))) + if xmax > xmin and ymax > ymin: + label_ind = anno['category_id'] + cls_id = self.class_ids.index(label_ind) + xmin /= width + ymin /= height + xmax /= width + ymax /= height + + target.append([xmin, ymin, xmax, ymax, cls_id]) # [xmin, ymin, xmax, ymax, label_ind] + else: + print('No bbox !!!') + + return img, target, height, width + + + def load_mosaic(self, index): + ids_list_ = self.ids[:index] + self.ids[index+1:] + # random sample other indexs + id1 = self.ids[index] + id2, id3, id4 = random.sample(ids_list_, 3) + ids = [id1, id2, id3, id4] + + img_lists = [] + tg_lists = [] + # load image and target + for id_ in ids: + img_i, target_i, _, _ = self.load_img_targets(id_) + img_lists.append(img_i) + tg_lists.append(target_i) + + mean = np.array([v*255 for v in self.transform.mean]) + mosaic_img = np.ones([self.img_size*2, self.img_size*2, img_i.shape[2]], dtype=np.uint8) * mean + # mosaic center + yc, xc = [int(random.uniform(-x, 2*self.img_size + x)) for x in [-self.img_size // 2, -self.img_size // 2]] + # yc = xc = self.img_size + + mosaic_tg = [] + for i in range(4): + img_i, target_i = img_lists[i], tg_lists[i] + target_i = np.array(target_i) + h0, w0, _ = img_i.shape + + # resize + scale_range = np.arange(50, 210, 10) + s = np.random.choice(scale_range) / 100. + + if np.random.randint(2): + # keep aspect ratio + r = self.img_size / max(h0, w0) + if r != 1: + img_i = cv2.resize(img_i, (int(w0 * r * s), int(h0 * r * s))) + else: + img_i = cv2.resize(img_i, (int(self.img_size * s), int(self.img_size * s))) + h, w, _ = img_i.shape + + # place img in img4 + if i == 0: # top left + x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image) + x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image) + elif i == 1: # top right + x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, self.img_size * 2), yc + x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h + elif i == 2: # bottom left + x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(self.img_size * 2, yc + h) + x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h) + elif i == 3: # bottom right + x1a, y1a, x2a, y2a = xc, yc, min(xc + w, self.img_size * 2), min(self.img_size * 2, yc + h) + x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h) + + mosaic_img[y1a:y2a, x1a:x2a] = img_i[y1b:y2b, x1b:x2b] + padw = x1a - x1b + padh = y1a - y1b + + # labels + target_i_ = target_i.copy() + if len(target_i) > 0: + # a valid target, and modify it. + target_i_[:, 0] = (w * (target_i[:, 0]) + padw) + target_i_[:, 1] = (h * (target_i[:, 1]) + padh) + target_i_[:, 2] = (w * (target_i[:, 2]) + padw) + target_i_[:, 3] = (h * (target_i[:, 3]) + padh) + # check boxes + valid_tgt = [] + for tgt in target_i_: + x1, y1, x2, y2, label = tgt + bw, bh = x2 - x1, y2 - y1 + if bw > 5. and bh > 5.: + valid_tgt.append([x1, y1, x2, y2, label]) + if len(valid_tgt) == 0: + valid_tgt.append([0., 0., 0., 0., 0.]) + + mosaic_tg.append(target_i_) + # check target + if len(mosaic_tg) == 0: + mosaic_tg = np.zeros([1, 5]) + else: + mosaic_tg = np.concatenate(mosaic_tg, axis=0) + # Cutout/Clip targets + np.clip(mosaic_tg[:, :4], 0, 2 * self.img_size, out=mosaic_tg[:, :4]) + # normalize + mosaic_tg[:, :4] /= (self.img_size * 2) + + return mosaic_img, mosaic_tg, self.img_size, self.img_size + + + def pull_item(self, index): + # load a mosaic image + if self.mosaic and np.random.randint(2): + # mosaic + img, target, height, width = self.load_mosaic(index) + + # MixUp https://arxiv.org/pdf/1710.09412.pdf + if self.mixup and np.random.randint(2): + img2, target2, height, width = self.load_mosaic(np.random.randint(0, len(self.ids))) + r = np.random.beta(8.0, 8.0) # mixup ratio, alpha=beta=8.0 + img = (img * r + img2 * (1 - r)).astype(np.uint8) + target = np.concatenate((target, target2), 0) + + # augment + img, boxes, labels, scale, offset = self.color_augment(img, target[:, :4], target[:, 4]) + + # load an image and target + else: + id_ = self.ids[index] + img, target, height, width = self.load_img_targets(id_) + if len(target) == 0: + target = np.zeros([1, 5]) + else: + target = np.array(target) + # augment + img, boxes, labels, scale, offset = self.transform(img, target[:, :4], target[:, 4]) + + target = np.hstack((boxes, np.expand_dims(labels, axis=1))) + + return img, target, height, width, scale, offset + + + def pull_image(self, index): + id_ = self.ids[index] + img_file = os.path.join(self.data_dir, self.image_set, + '{:012}'.format(id_) + '.jpg') + img = cv2.imread(img_file) + + if self.json_file == 'instances_val5k.json' and img is None: + img_file = os.path.join(self.data_dir, 'train2017', + '{:012}'.format(id_) + '.jpg') + img = cv2.imread(img_file) + + return img, id_ + + + def pull_anno(self, index): + id_ = self.ids[index] + + anno_ids = self.coco.getAnnIds(imgIds=[int(id_)], iscrowd=None) + annotations = self.coco.loadAnns(anno_ids) + + target = [] + for anno in annotations: + if 'bbox' in anno: + xmin = np.max((0, anno['bbox'][0])) + ymin = np.max((0, anno['bbox'][1])) + xmax = xmin + anno['bbox'][2] + ymax = ymin + anno['bbox'][3] + + if anno['area'] > 0 and xmax >= xmin and ymax >= ymin: + label_ind = anno['category_id'] + cls_id = self.class_ids.index(label_ind) + + target.append([xmin, ymin, xmax, ymax, cls_id]) # [xmin, ymin, xmax, ymax, label_ind] + else: + print('No bbox !!') + return target + + +if __name__ == "__main__": + from transforms import TrainTransforms, ColorTransforms, ValTransforms + + mean=(0.406, 0.456, 0.485) + std=(0.225, 0.224, 0.229) + mean = np.array(mean, dtype=np.float32) + std = np.array(std, dtype=np.float32) + + img_size = 640 + dataset = COCODataset( + data_dir='/mnt/share/ssd2/dataset/COCO/', + img_size=img_size, + image_set='train2017', + transform=TrainTransforms(img_size), + color_augment=ColorTransforms(img_size), + mosaic=True, + mixup=True) + + np.random.seed(0) + class_colors = [(np.random.randint(255), + np.random.randint(255), + np.random.randint(255)) for _ in range(80)] + print('Data length: ', len(dataset)) + for i in range(1000): + image, target, _, _, _, _ = dataset.pull_item(i) + image = image.permute(1, 2, 0).numpy()[:, :, (2, 1, 0)] + image = ((image * std + mean)*255).astype(np.uint8) + image = image.copy() + + for box in target: + x1, y1, x2, y2, cls_id = box + cls_id = int(cls_id.item()) + color = class_colors[cls_id] + # class name + label = coco_class_labels[coco_class_index[cls_id]] + # bbox + x1 *= img_size + y1 *= img_size + x2 *= img_size + y2 *= img_size + image = cv2.rectangle(image, (int(x1), int(y1)), (int(x2), int(y2)), (0,0,255), 2) + # put the test on the bbox + cv2.putText(image, label, (int(x1), int(y1 - 5)), 0, 0.5, color, 1, lineType=cv2.LINE_AA) + cv2.imshow('gt', image) + # cv2.imwrite(str(i)+'.jpg', img) + cv2.waitKey(0) diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/data/scripts/COCO2017.sh b/PyTorch/contrib/cv/detection/YoloV2-640/data/scripts/COCO2017.sh new file mode 100644 index 0000000000..6adddfcb36 --- /dev/null +++ b/PyTorch/contrib/cv/detection/YoloV2-640/data/scripts/COCO2017.sh @@ -0,0 +1,20 @@ +mkdir COCO +cd COCO + +wget http://images.cocodataset.org/zips/train2017.zip +wget http://images.cocodataset.org/zips/val2017.zip +wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip +wget http://images.cocodataset.org/zips/test2017.zip +wget http://images.cocodataset.org/annotations/image_info_test2017.zip  + +unzip train2017.zip +unzip val2017.zip +unzip annotations_trainval2017.zip +unzip test2017.zip +unzip image_info_test2017.zip + +# rm -f train2017.zip +# rm -f val2017.zip +# rm -f annotations_trainval2017.zip +# rm -f test2017.zip +# rm -f image_info_test2017.zip diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/data/scripts/VOC2007.sh b/PyTorch/contrib/cv/detection/YoloV2-640/data/scripts/VOC2007.sh new file mode 100644 index 0000000000..9d53c8e990 --- /dev/null +++ b/PyTorch/contrib/cv/detection/YoloV2-640/data/scripts/VOC2007.sh @@ -0,0 +1,42 @@ +#!/bin/bash +# Ellis Brown + +start=`date +%s` + +# handle optional download dir +if [ -z "$1" ] + then + # navigate to ~/data + echo "navigating to ~/data/ ..." + mkdir -p ~/data + cd ~/data/ + else + # check if is valid directory + if [ ! -d $1 ]; then + echo $1 "is not a valid directory" + exit 0 + fi + echo "navigating to" $1 "..." + cd $1 +fi + +echo "Downloading VOC2007 trainval ..." +# Download the data. +curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar +echo "Downloading VOC2007 test data ..." +curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar +echo "Done downloading." + +# Extract data +echo "Extracting trainval ..." +tar -xvf VOCtrainval_06-Nov-2007.tar +echo "Extracting test ..." +tar -xvf VOCtest_06-Nov-2007.tar +echo "removing tars ..." +rm VOCtrainval_06-Nov-2007.tar +rm VOCtest_06-Nov-2007.tar + +end=`date +%s` +runtime=$((end-start)) + +echo "Completed in" $runtime "seconds" \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/data/scripts/VOC2012.sh b/PyTorch/contrib/cv/detection/YoloV2-640/data/scripts/VOC2012.sh new file mode 100644 index 0000000000..0a00f38969 --- /dev/null +++ b/PyTorch/contrib/cv/detection/YoloV2-640/data/scripts/VOC2012.sh @@ -0,0 +1,38 @@ +#!/bin/bash +# Ellis Brown + +start=`date +%s` + +# handle optional download dir +if [ -z "$1" ] + then + # navigate to ~/data + echo "navigating to ~/data/ ..." + mkdir -p ~/data + cd ~/data/ + else + # check if is valid directory + if [ ! -d $1 ]; then + echo $1 "is not a valid directory" + exit 0 + fi + echo "navigating to" $1 "..." + cd $1 +fi + +echo "Downloading VOC2012 trainval ..." +# Download the data. +curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar +echo "Done downloading." + + +# Extract data +echo "Extracting trainval ..." +tar -xvf VOCtrainval_11-May-2012.tar +echo "removing tar ..." +rm VOCtrainval_11-May-2012.tar + +end=`date +%s` +runtime=$((end-start)) + +echo "Completed in" $runtime "seconds" \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/data/transforms.py b/PyTorch/contrib/cv/detection/YoloV2-640/data/transforms.py new file mode 100644 index 0000000000..09e2bd2c9f --- /dev/null +++ b/PyTorch/contrib/cv/detection/YoloV2-640/data/transforms.py @@ -0,0 +1,423 @@ +import cv2 +import torch +import numpy as np +from numpy import random +import torch_npu + + +def intersect(box_a, box_b): + max_xy = np.minimum(box_a[:, 2:], box_b[2:]) + min_xy = np.maximum(box_a[:, :2], box_b[:2]) + inter = np.clip((max_xy - min_xy), a_min=0, a_max=np.inf) + return inter[:, 0] * inter[:, 1] + + +def jaccard_numpy(box_a, box_b): + """Compute the jaccard overlap of two sets of boxes. The jaccard overlap + is simply the intersection over union of two boxes. + E.g.: + A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B) + Args: + box_a: Multiple bounding boxes, Shape: [num_boxes,4] + box_b: Single bounding box, Shape: [4] + Return: + jaccard overlap: Shape: [box_a.shape[0], box_a.shape[1]] + """ + inter = intersect(box_a, box_b) + area_a = ((box_a[:, 2]-box_a[:, 0]) * + (box_a[:, 3]-box_a[:, 1])) # [A,B] + area_b = ((box_b[2]-box_b[0]) * + (box_b[3]-box_b[1])) # [A,B] + union = area_a + area_b - inter + return inter / union # [A,B] + + +class Compose(object): + """Composes several augmentations together. + Args: + transforms (List[Transform]): list of transforms to compose. + Example: + >>> augmentations.Compose([ + >>> transforms.CenterCrop(10), + >>> transforms.ToTensor(), + >>> ]) + """ + + def __init__(self, transforms): + self.transforms = transforms + + def __call__(self, img, boxes=None, labels=None, scale=None, offset=None): + for t in self.transforms: + img, boxes, labels, scale, offset = t(img, boxes, labels, scale, offset) + return img, boxes, labels, scale, offset + + +class ConvertFromInts(object): + def __call__(self, image, boxes=None, labels=None, scale=None, offset=None): + return image.astype(np.float32), boxes, labels, scale, offset + + +class ToAbsoluteCoords(object): + def __call__(self, image, boxes=None, labels=None, scale=None, offset=None): + height, width, channels = image.shape + boxes[:, 0] *= width + boxes[:, 2] *= width + boxes[:, 1] *= height + boxes[:, 3] *= height + + return image, boxes, labels, scale, offset + + +class ToPercentCoords(object): + def __call__(self, image, boxes=None, labels=None, scale=None, offset=None): + height, width, channels = image.shape + boxes[:, 0] /= width + boxes[:, 2] /= width + boxes[:, 1] /= height + boxes[:, 3] /= height + + return image, boxes, labels, scale, offset + + +# ColorJitter +class ColorJitter(object): + def __init__(self): + self.pd = [ + RandomContrast(), + ConvertColor(transform='HSV'), + RandomSaturation(), + RandomHue(), + ConvertColor(current='HSV', transform='BGR'), + RandomContrast() + ] + self.rand_brightness = RandomBrightness() + + def __call__(self, image, boxes, labels, scale=None, offset=None): + im = image.copy() + im, boxes, labels, scale, offset = self.rand_brightness(im, boxes, labels, scale, offset) + if random.randint(2): + distort = Compose(self.pd[:-1]) + else: + distort = Compose(self.pd[1:]) + im, boxes, labels, scale, offset = distort(im, boxes, labels, scale, offset) + return im, boxes, labels, scale, offset + + +class RandomSaturation(object): + def __init__(self, lower=0.5, upper=1.5): + self.lower = lower + self.upper = upper + assert self.upper >= self.lower, "contrast upper must be >= lower." + assert self.lower >= 0, "contrast lower must be non-negative." + + def __call__(self, image, boxes=None, labels=None, scale=None, offset=None): + if random.randint(2): + image[:, :, 1] *= random.uniform(self.lower, self.upper) + + return image, boxes, labels, scale, offset + + +class RandomHue(object): + def __init__(self, delta=18.0): + assert delta >= 0.0 and delta <= 360.0 + self.delta = delta + + def __call__(self, image, boxes=None, labels=None, scale=None, offset=None): + if random.randint(2): + image[:, :, 0] += random.uniform(-self.delta, self.delta) + image[:, :, 0][image[:, :, 0] > 360.0] -= 360.0 + image[:, :, 0][image[:, :, 0] < 0.0] += 360.0 + return image, boxes, labels, scale, offset + + +class ConvertColor(object): + def __init__(self, current='BGR', transform='HSV'): + self.transform = transform + self.current = current + + def __call__(self, image, boxes=None, labels=None, scale=None, offset=None): + if self.current == 'BGR' and self.transform == 'HSV': + image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) + elif self.current == 'HSV' and self.transform == 'BGR': + image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR) + else: + raise NotImplementedError + return image, boxes, labels, scale, offset + + +class RandomContrast(object): + def __init__(self, lower=0.5, upper=1.5): + self.lower = lower + self.upper = upper + assert self.upper >= self.lower, "contrast upper must be >= lower." + assert self.lower >= 0, "contrast lower must be non-negative." + + # expects float image + def __call__(self, image, boxes=None, labels=None, scale=None, offset=None): + if random.randint(2): + alpha = random.uniform(self.lower, self.upper) + image *= alpha + return image, boxes, labels, scale, offset + + +class RandomBrightness(object): + def __init__(self, delta=32): + assert delta >= 0.0 + assert delta <= 255.0 + self.delta = delta + + def __call__(self, image, boxes=None, labels=None, scale=None, offset=None): + if random.randint(2): + delta = random.uniform(-self.delta, self.delta) + image += delta + return image, boxes, labels, scale, offset + + +# RandomCrop +class RandomSampleCrop(object): + """Crop + Arguments: + img (Image): the image being input during training + boxes (Tensor): the original bounding boxes in pt form + labels (Tensor): the class labels for each bbox + mode (float tuple): the min and max jaccard overlaps + Return: + (img, boxes, classes) + img (Image): the cropped image + boxes (Tensor): the adjusted bounding boxes in pt form + labels (Tensor): the class labels for each bbox + """ + def __init__(self): + self.sample_options = ( + # using entire original input image + None, + # sample a patch s.t. MIN jaccard w/ obj in .1,.3,.4,.7,.9 + (0.1, None), + (0.3, None), + (0.7, None), + (0.9, None), + # randomly sample a patch + (None, None), + ) + + def __call__(self, image, boxes=None, labels=None, scale=None, offset=None): + height, width, _ = image.shape + while True: + # randomly choose a mode + sample_id = np.random.randint(len(self.sample_options)) + mode = self.sample_options[sample_id] + if mode is None: + return image, boxes, labels, scale, offset + + min_iou, max_iou = mode + if min_iou is None: + min_iou = float('-inf') + if max_iou is None: + max_iou = float('inf') + + # max trails (50) + for _ in range(50): + current_image = image + + w = random.uniform(0.3 * width, width) + h = random.uniform(0.3 * height, height) + + # aspect ratio constraint b/t .5 & 2 + if h / w < 0.5 or h / w > 2: + continue + + left = random.uniform(width - w) + top = random.uniform(height - h) + + # convert to integer rect x1,y1,x2,y2 + rect = np.array([int(left), int(top), int(left+w), int(top+h)]) + + # calculate IoU (jaccard overlap) b/t the cropped and gt boxes + overlap = jaccard_numpy(boxes, rect) + + # is min and max overlap constraint satisfied? if not try again + if overlap.min() < min_iou and max_iou < overlap.max(): + continue + + # cut the crop from the image + current_image = current_image[rect[1]:rect[3], rect[0]:rect[2], + :] + + # keep overlap with gt box IF center in sampled patch + centers = (boxes[:, :2] + boxes[:, 2:]) / 2.0 + + # mask in all gt boxes that above and to the left of centers + m1 = (rect[0] < centers[:, 0]) * (rect[1] < centers[:, 1]) + + # mask in all gt boxes that under and to the right of centers + m2 = (rect[2] > centers[:, 0]) * (rect[3] > centers[:, 1]) + + # mask in that both m1 and m2 are true + mask = m1 * m2 + + # have any valid boxes? try again if not + if not mask.any(): + continue + + # take only matching gt boxes + current_boxes = boxes[mask, :].copy() + + # take only matching gt labels + current_labels = labels[mask] + + # should we use the box left and top corner or the crop's + current_boxes[:, :2] = np.maximum(current_boxes[:, :2], + rect[:2]) + # adjust to crop (by substracting crop's left,top) + current_boxes[:, :2] -= rect[:2] + + current_boxes[:, 2:] = np.minimum(current_boxes[:, 2:], + rect[2:]) + # adjust to crop (by substracting crop's left,top) + current_boxes[:, 2:] -= rect[:2] + + return current_image, current_boxes, current_labels, scale, offset + + +# RandomHFlip +class RandomHFlip(object): + def __call__(self, image, boxes, classes, scale=None, offset=None): + _, width, _ = image.shape + if random.randint(2): + image = image[:, ::-1] + boxes = boxes.copy() + boxes[:, 0::2] = width - boxes[:, 2::-2] + return image, boxes, classes, scale, offset + + +# Normalize image +class Normalize(object): + def __init__(self, mean=None, std=None): + self.mean = np.array(mean, dtype=np.float32) + self.std = np.array(std, dtype=np.float32) + + def __call__(self, image, boxes=None, labels=None, scale=None, offset=None): + image = image.astype(np.float32) + image /= 255. + image -= self.mean + image /= self.std + + return image, boxes, labels, scale, offset + + +# Resize +class Resize(object): + def __init__(self, size=640, mean=None): + self.size = size + self.mean = np.array([v*255 for v in mean]) + + def __call__(self, image, boxes=None, labels=None, scale=None, offset=None): + h0, w0, _ = image.shape + + if h0 > w0: + # resize + r = w0 / h0 + image = cv2.resize(image, (int(r * self.size), self.size)).astype(np.float32) + # zero padding + h, w, _ = image.shape + image_ = np.ones([h, h, 3]) * self.mean + dw = h - w + left = dw // 2 + image_[:, left:left+w, :] = image + offset = np.array([[ left / h, 0., left / h, 0.]]) + scale = np.array([[w / h, 1., w / h, 1.]]) + + elif h0 < w0: + # resize + r = h0 / w0 + image = cv2.resize(image, (self.size, int(r * self.size))).astype(np.float32) + # zero padding + h, w, _ = image.shape + image_ = np.ones([w, w, 3]) * self.mean + dh = w - h + top = dh // 2 + image_[top:top+h, :, :] = image + offset = np.array([[0., top / w, 0., top / w]]) + scale = np.array([1., h / w, 1., h / w]) + + else: + # resize + if h0 == self.size: + image_ = image + else: + image_ = cv2.resize(image, (self.size, self.size)).astype(np.float32) + offset = np.zeros([1, 4]) + scale = 1. + + if boxes is not None: + boxes = boxes * scale + offset + + return image_, boxes, labels, scale, offset + + +# convert ndarray image to tensor type +class ToTensor(object): + def __call__(self, image, boxes=None, labels=None, scale=None, offset=None): + # to rgb + image = image[..., (2, 1, 0)] + return torch.from_numpy(image).permute(2, 0, 1).float(), boxes, labels, scale, offset + + +# TrainTransform +class TrainTransforms(object): + def __init__(self, size=640, mean=(0.406, 0.456, 0.485), std=(0.225, 0.224, 0.229)): + self.mean = mean + self.size = size + self.std = std + self.augment = Compose([ + ConvertFromInts(), + ToAbsoluteCoords(), + ColorJitter(), + RandomSampleCrop(), + RandomHFlip(), + ToPercentCoords(), + Resize(self.size, self.mean), + Normalize(self.mean, self.std), + ToTensor() + ]) + + def __call__(self, image, boxes, labels, scale=None, offset=None): + return self.augment(image, boxes, labels, scale, offset) + + +# ColorTransform +class ColorTransforms(object): + def __init__(self, size=640, mean=(0.406, 0.456, 0.485), std=(0.225, 0.224, 0.229)): + self.mean = mean + self.size = size + self.std = std + self.augment = Compose([ + ConvertFromInts(), + ToAbsoluteCoords(), + ColorJitter(), + RandomHFlip(), + ToPercentCoords(), + Resize(self.size, self.mean), + Normalize(self.mean, self.std), + ToTensor() + ]) + + def __call__(self, image, boxes, labels, scale=None, offset=None): + return self.augment(image, boxes, labels, scale, offset) + + +# ValTransform +class ValTransforms(object): + def __init__(self, size=640, mean=(0.406, 0.456, 0.485), std=(0.225, 0.224, 0.229)): + self.size = size + self.mean = np.array(mean, dtype=np.float32) + self.std = np.array(std, dtype=np.float32) + self.augment = Compose([ + Resize(self.size, self.mean), + Normalize(self.mean, self.std), + ToTensor() + ]) + + + def __call__(self, image, boxes=None, labels=None, scale=None, offset=None): + return self.augment(image, boxes, labels, scale, offset) diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/data/voc.py b/PyTorch/contrib/cv/detection/YoloV2-640/data/voc.py new file mode 100644 index 0000000000..f98abe232d --- /dev/null +++ b/PyTorch/contrib/cv/detection/YoloV2-640/data/voc.py @@ -0,0 +1,342 @@ +"""VOC Dataset Classes + +Original author: Francisco Massa +https://github.com/fmassa/vision/blob/voc_dataset/torchvision/datasets/voc.py + +Updated by: Ellis Brown, Max deGroot +""" +import os.path as osp +import torch.utils.data as data +import cv2 +import random +import numpy as np +import xml.etree.ElementTree as ET +import torch_npu + + +VOC_CLASSES = ( # always index 0 + 'aeroplane', 'bicycle', 'bird', 'boat', + 'bottle', 'bus', 'car', 'cat', 'chair', + 'cow', 'diningtable', 'dog', 'horse', + 'motorbike', 'person', 'pottedplant', + 'sheep', 'sofa', 'train', 'tvmonitor') + + +class VOCAnnotationTransform(object): + """Transforms a VOC annotation into a Tensor of bbox coords and label index + Initilized with a dictionary lookup of classnames to indexes + + Arguments: + class_to_ind (dict, optional): dictionary lookup of classnames -> indexes + (default: alphabetic indexing of VOC's 20 classes) + keep_difficult (bool, optional): keep difficult instances or not + (default: False) + height (int): height + width (int): width + """ + + def __init__(self, class_to_ind=None, keep_difficult=False): + self.class_to_ind = class_to_ind or dict( + zip(VOC_CLASSES, range(len(VOC_CLASSES)))) + self.keep_difficult = keep_difficult + + def __call__(self, target, width, height): + """ + Arguments: + target (annotation) : the target annotation to be made usable + will be an ET.Element + Returns: + a list containing lists of bounding boxes [bbox coords, class name] + """ + res = [] + for obj in target.iter('object'): + difficult = int(obj.find('difficult').text) == 1 + if not self.keep_difficult and difficult: + continue + name = obj.find('name').text.lower().strip() + bbox = obj.find('bndbox') + + pts = ['xmin', 'ymin', 'xmax', 'ymax'] + bndbox = [] + for i, pt in enumerate(pts): + cur_pt = int(bbox.find(pt).text) - 1 + # scale height or width + cur_pt = cur_pt / width if i % 2 == 0 else cur_pt / height + bndbox.append(cur_pt) + label_idx = self.class_to_ind[name] + bndbox.append(label_idx) + res += [bndbox] # [x1, y1, x2, y2, label_ind] + # img_id = target.find('filename').text[:-4] + + return res # [[x1, y1, x2, y2, label_ind], ... ] + + +class VOCDetection(data.Dataset): + """VOC Detection Dataset Object + + input is image, target is annotation + + Arguments: + root (string): filepath to VOCdevkit folder. + image_set (string): imageset to use (eg. 'train', 'val', 'test') + transform (callable, optional): transformation to perform on the + input image + target_transform (callable, optional): transformation to perform on the + target `annotation` + (eg: take in caption string, return tensor of word indices) + dataset_name (string, optional): which dataset to load + (default: 'VOC2007') + """ + + def __init__(self, + data_dir=None, + img_size=640, + image_sets=[('2007', 'trainval'), ('2012', 'trainval')], + transform=None, + color_augment=None, + target_transform=VOCAnnotationTransform(), + mosaic=False, + mixup=False): + self.root = data_dir + self.img_size = img_size + self.image_set = image_sets + self.target_transform = target_transform + self._annopath = osp.join('%s', 'Annotations', '%s.xml') + self._imgpath = osp.join('%s', 'JPEGImages', '%s.jpg') + self.ids = list() + for (year, name) in image_sets: + rootpath = osp.join(self.root, 'VOC' + year) + for line in open(osp.join(rootpath, 'ImageSets', 'Main', name + '.txt')): + self.ids.append((rootpath, line.strip())) + # augmentation + self.transform = transform + self.mosaic = mosaic + self.mixup = mixup + self.color_augment = color_augment + if self.mosaic: + print('use Mosaic Augmentation ...') + if self.mixup: + print('use MixUp Augmentation ...') + + + def __getitem__(self, index): + im, gt, h, w, scale, offset = self.pull_item(index) + return im, gt + + + def __len__(self): + return len(self.ids) + + + def load_img_targets(self, img_id): + # load an image + img = cv2.imread(self._imgpath % img_id) + height, width, channels = img.shape + + # laod a target + target = ET.parse(self._annopath % img_id).getroot() + if self.target_transform is not None: + target = self.target_transform(target, width, height) + + return img, target, height, width + + + def load_mosaic(self, index): + ids_list_ = self.ids[:index] + self.ids[index+1:] + # random sample other indexs + id1 = self.ids[index] + id2, id3, id4 = random.sample(ids_list_, 3) + ids = [id1, id2, id3, id4] + + img_lists = [] + tg_lists = [] + # load image and target + for id_ in ids: + img_i, target_i, _, _ = self.load_img_targets(id_) + img_lists.append(img_i) + tg_lists.append(target_i) + + mean = np.array([v*255 for v in self.transform.mean]) + mosaic_img = np.ones([self.img_size*2, self.img_size*2, img_i.shape[2]], dtype=np.uint8) * mean + # mosaic center + yc, xc = [int(random.uniform(-x, 2*self.img_size + x)) for x in [-self.img_size // 2, -self.img_size // 2]] + # yc = xc = self.img_size + + mosaic_tg = [] + for i in range(4): + img_i, target_i = img_lists[i], tg_lists[i] + target_i = np.array(target_i) + h0, w0, _ = img_i.shape + + # resize + scale_range = np.arange(50, 210, 10) + s = np.random.choice(scale_range) / 100. + + if np.random.randint(2): + # keep aspect ratio + r = self.img_size / max(h0, w0) + if r != 1: + img_i = cv2.resize(img_i, (int(w0 * r * s), int(h0 * r * s))) + else: + img_i = cv2.resize(img_i, (int(self.img_size * s), int(self.img_size * s))) + h, w, _ = img_i.shape + + # place img in img4 + if i == 0: # top left + x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image) + x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image) + elif i == 1: # top right + x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, self.img_size * 2), yc + x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h + elif i == 2: # bottom left + x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(self.img_size * 2, yc + h) + x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h) + elif i == 3: # bottom right + x1a, y1a, x2a, y2a = xc, yc, min(xc + w, self.img_size * 2), min(self.img_size * 2, yc + h) + x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h) + + mosaic_img[y1a:y2a, x1a:x2a] = img_i[y1b:y2b, x1b:x2b] + padw = x1a - x1b + padh = y1a - y1b + + # labels + target_i_ = target_i.copy() + if len(target_i) > 0: + # a valid target, and modify it. + target_i_[:, 0] = (w * (target_i[:, 0]) + padw) + target_i_[:, 1] = (h * (target_i[:, 1]) + padh) + target_i_[:, 2] = (w * (target_i[:, 2]) + padw) + target_i_[:, 3] = (h * (target_i[:, 3]) + padh) + # check boxes + valid_tgt = [] + for tgt in target_i_: + x1, y1, x2, y2, label = tgt + bw, bh = x2 - x1, y2 - y1 + if bw > 5. and bh > 5.: + valid_tgt.append([x1, y1, x2, y2, label]) + if len(valid_tgt) == 0: + valid_tgt.append([0., 0., 0., 0., 0.]) + + mosaic_tg.append(target_i_) + # check target + if len(mosaic_tg) == 0: + mosaic_tg = np.zeros([1, 5]) + else: + mosaic_tg = np.concatenate(mosaic_tg, axis=0) + # Cutout/Clip targets + np.clip(mosaic_tg[:, :4], 0, 2 * self.img_size, out=mosaic_tg[:, :4]) + # normalize + mosaic_tg[:, :4] /= (self.img_size * 2) + + return mosaic_img, mosaic_tg, self.img_size, self.img_size + + + def pull_item(self, index): + # load a mosaic image + if self.mosaic and np.random.randint(2): + # mosaic + img, target, height, width = self.load_mosaic(index) + + # MixUp https://arxiv.org/pdf/1710.09412.pdf + if self.mixup and np.random.randint(2): + img2, target2, height, width = self.load_mosaic(np.random.randint(0, len(self.ids))) + r = np.random.beta(8.0, 8.0) # mixup ratio, alpha=beta=8.0 + img = (img * r + img2 * (1 - r)).astype(np.uint8) + target = np.concatenate((target, target2), 0) + + # augment + img, boxes, labels, scale, offset = self.color_augment(img, target[:, :4], target[:, 4]) + + # load an image and target + else: + img_id = self.ids[index] + img, target, height, width = self.load_img_targets(img_id) + if len(target) == 0: + target = np.zeros([1, 5]) + else: + target = np.array(target) + # augment + img, boxes, labels, scale, offset = self.transform(img, target[:, :4], target[:, 4]) + + target = np.hstack((boxes, np.expand_dims(labels, axis=1))) + + return img, target, height, width, scale, offset + + + def pull_image(self, index): + '''Returns the original image object at index in PIL form + + Note: not using self.__getitem__(), as any transformations passed in + could mess up this functionality. + + Argument: + index (int): index of img to show + Return: + PIL img + ''' + img_id = self.ids[index] + return cv2.imread(self._imgpath % img_id, cv2.IMREAD_COLOR), img_id + + + def pull_anno(self, index): + '''Returns the original annotation of image at index + + Note: not using self.__getitem__(), as any transformations passed in + could mess up this functionality. + + Argument: + index (int): index of img to get annotation of + Return: + list: [img_id, [(label, bbox coords),...]] + eg: ('001718', [('dog', (96, 13, 438, 332))]) + ''' + img_id = self.ids[index] + anno = ET.parse(self._annopath % img_id).getroot() + gt = self.target_transform(anno, 1, 1) + return img_id[1], gt + + +if __name__ == "__main__": + from transforms import TrainTransforms, ColorTransforms, ValTransforms + + mean=(0.406, 0.456, 0.485) + std=(0.225, 0.224, 0.229) + mean = np.array(mean, dtype=np.float32) + std = np.array(std, dtype=np.float32) + + img_size = 640 + dataset = VOCDetection( + data_dir='d:/datasets/VOCdevkit/', + img_size=img_size, + transform=ValTransforms(img_size), + color_augment=ColorTransforms(img_size), + mosaic=True, + mixup=True) + + np.random.seed(0) + class_colors = [(np.random.randint(255), + np.random.randint(255), + np.random.randint(255)) for _ in range(20)] + print('Data length: ', len(dataset)) + for i in range(len(dataset)): + image, target, _, _, _, _ = dataset.pull_item(i) + image = image.permute(1, 2, 0).numpy()[:, :, (2, 1, 0)] + image = ((image * std + mean)*255).astype(np.uint8) + image = image.copy() + + for box in target: + x1, y1, x2, y2, cls_id = box + cls_id = int(cls_id) + color = class_colors[cls_id] + # class name + label = VOC_CLASSES[cls_id] + x1 *= img_size + y1 *= img_size + x2 *= img_size + y2 *= img_size + image = cv2.rectangle(image, (int(x1), int(y1)), (int(x2), int(y2)), (0,0,255), 2) + # put the test on the bbox + cv2.putText(image, label, (int(x1), int(y1 - 5)), 0, 0.5, color, 1, lineType=cv2.LINE_AA) + cv2.imshow('gt', image) + # cv2.imwrite(str(i)+'.jpg', img) + cv2.waitKey(0) diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/demo.py b/PyTorch/contrib/cv/detection/YoloV2-640/demo.py new file mode 100644 index 0000000000..0481695818 --- /dev/null +++ b/PyTorch/contrib/cv/detection/YoloV2-640/demo.py @@ -0,0 +1,249 @@ +import argparse +import os +import cv2 +import time +import numpy as np +import torch + +from config.yolo_config import yolo_config +from data.coco import coco_class_labels, coco_class_index +from data.transforms import ValTransforms +from models.yolo import build_model +import torch_npu + + + +def parse_args(): + parser = argparse.ArgumentParser(description='YOLO Demo Detection') + + # basic + parser.add_argument('--mode', default='image', + type=str, help='Use the data from image, video or camera') + parser.add_argument('--cuda', action='store_true', default=False, + help='Use cuda') + parser.add_argument('--path_to_img', default='data/demo/images/', + type=str, help='The path to image files') + parser.add_argument('--path_to_vid', default='data/demo/videos/', + type=str, help='The path to video files') + parser.add_argument('--path_to_save', default='det_results/images/', + type=str, help='The path to save the detection results') + parser.add_argument('--path_to_saveVid', default='data/video/result.avi', + type=str, help='The path to save the detection results video') + parser.add_argument('-vs', '--visual_threshold', default=0.3, + type=float, help='visual threshold') + + # model + parser.add_argument('-m', '--model', default='yolov1', + help='yolov1, yolov2, yolov3, yolov3_spp, yolov3_de, ' + 'yolov4, yolo_tiny, yolo_nano') + parser.add_argument('--num_queries', type=int, default=4, + help='number of queris of YOLOQ') + parser.add_argument('--weight', default='weights/', + type=str, help='Trained state_dict file path to open') + parser.add_argument('-size', '--img_size', default=640, type=int, + help='img_size') + parser.add_argument('--conf_thresh', default=0.1, type=float, + help='NMS threshold') + parser.add_argument('--nms_thresh', default=0.45, type=float, + help='NMS threshold') + parser.add_argument('--center_sample', action='store_true', default=False, + help='center sample trick.') + + return parser.parse_args() + + +def plot_bbox_labels(img, bbox, label, cls_color, test_scale=0.4): + x1, y1, x2, y2 = bbox + x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2) + t_size = cv2.getTextSize(label, 0, fontScale=1, thickness=2)[0] + # plot bbox + cv2.rectangle(img, (x1, y1), (x2, y2), cls_color, 2) + # plot title bbox + cv2.rectangle(img, (x1, y1-t_size[1]), (int(x1 + t_size[0] * test_scale), y1), cls_color, -1) + # put the test on the title bbox + cv2.putText(img, label, (int(x1), int(y1 - 5)), 0, test_scale, (0, 0, 0), 1, lineType=cv2.LINE_AA) + + return img + + +def visualize(img, bboxes, scores, cls_inds, class_colors, vis_thresh=0.3): + ts = 0.4 + for i, bbox in enumerate(bboxes): + if scores[i] > vis_thresh: + cls_color = class_colors[int(cls_inds[i])] + cls_id = coco_class_index[int(cls_inds[i])] + mess = '%s: %.2f' % (coco_class_labels[cls_id], scores[i]) + img = plot_bbox_labels(img, bbox, mess, cls_color, test_scale=ts) + + return img + + +def detect(net, + device, + transform, + vis_thresh, + mode='image', + path_to_img=None, + path_to_vid=None, + path_to_save=None): + # class color + class_colors = [(np.random.randint(255), + np.random.randint(255), + np.random.randint(255)) for _ in range(80)] + save_path = os.path.join(path_to_save, mode) + os.makedirs(save_path, exist_ok=True) + + # ------------------------- Camera ---------------------------- + if mode == 'camera': + print('use camera !!!') + cap = cv2.VideoCapture(0, cv2.CAP_DSHOW) + while True: + ret, frame = cap.read() + if ret: + if cv2.waitKey(1) == ord('q'): + break + img_h, img_w = frame.shape[:2] + size = np.array([[img_w, img_h, img_w, img_h]]) + # prepare + x, _, _, scale, offset = transform(frame) + x = x.unsqueeze(0).to(device) + # inference + t0 = time.time() + bboxes, scores, cls_inds = net(x) + t1 = time.time() + print("detection time used ", t1-t0, "s") + + # rescale + bboxes -= offset + bboxes /= scale + bboxes *= size + + frame_processed = visualize(img=frame, + bboxes=bboxes, + scores=scores, + cls_inds=cls_inds, + class_colors=class_colors, + vis_thresh=vis_thresh) + cv2.imshow('detection result', frame_processed) + cv2.waitKey(1) + else: + break + cap.release() + cv2.destroyAllWindows() + + # ------------------------- Image ---------------------------- + elif mode == 'image': + for i, img_id in enumerate(os.listdir(path_to_img)): + img = cv2.imread(path_to_img + '/' + img_id, cv2.IMREAD_COLOR) + img_h, img_w = img.shape[:2] + size = np.array([[img_w, img_h, img_w, img_h]]) + + # prepare + x, _, _, scale, offset = transform(img) + x = x.unsqueeze(0).to(device) + # inference + t0 = time.time() + bboxes, scores, cls_inds = net(x) + t1 = time.time() + print("detection time used ", t1-t0, "s") + + # rescale + bboxes -= offset + bboxes /= scale + bboxes *= size + + img_processed = visualize(img=img, + bboxes=bboxes, + scores=scores, + cls_inds=cls_inds, + class_colors=class_colors, + vis_thresh=vis_thresh) + + cv2.imshow('detection', img_processed) + cv2.imwrite(os.path.join(save_path, str(i).zfill(6)+'.jpg'), img_processed) + cv2.waitKey(0) + + # ------------------------- Video --------------------------- + elif mode == 'video': + video = cv2.VideoCapture(path_to_vid) + fourcc = cv2.VideoWriter_fourcc(*'XVID') + save_size = (640, 480) + save_path = os.path.join(save_path, 'det.avi') + fps = 15.0 + out = cv2.VideoWriter(save_path, fourcc, fps, save_size) + + while(True): + ret, frame = video.read() + + if ret: + # ------------------------- Detection --------------------------- + img_h, img_w = frame.shape[:2] + size = np.array([[img_w, img_h, img_w, img_h]]) + # prepare + x, _, _, scale, offset = transform(frame) + x = x.unsqueeze(0).to(device) + # inference + t0 = time.time() + bboxes, scores, cls_inds = net(x) + t1 = time.time() + print("detection time used ", t1-t0, "s") + + # rescale + bboxes -= offset + bboxes /= scale + bboxes *= size + + frame_processed = visualize(img=frame, + bboxes=bboxes, + scores=scores, + cls_inds=cls_inds, + class_colors=class_colors, + vis_thresh=vis_thresh) + + frame_processed_resize = cv2.resize(frame_processed, save_size) + out.write(frame_processed_resize) + cv2.imshow('detection', frame_processed) + cv2.waitKey(1) + else: + break + video.release() + out.release() + cv2.destroyAllWindows() + + +def run(): + args = parse_args() + + # use cuda + if args.cuda: + device = torch.device("npu") + else: + device = torch.device("cpu") + + # YOLO Config + cfg = yolo_config[args.model] + # build model + model = build_model(args=args, + cfg=cfg, + device=device, + num_classes=80, + trainable=False) + + # load weight + model.load_state_dict(torch.load(args.weight, map_location='cpu'), strict=False) + model = model.to(device).eval() + print('Finished loading model!') + + # run + detect(net=model, + device=device, + transform=ValTransforms(args.img_size), + mode=args.mode, + path_to_img=args.path_to_img, + path_to_vid=args.path_to_vid, + path_to_save=args.path_to_save, + vis_thresh=args.visual_threshold) + + +if __name__ == '__main__': + run() diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/env_npu.sh b/PyTorch/contrib/cv/detection/YoloV2-640/env_npu.sh new file mode 100644 index 0000000000..0c12c76322 --- /dev/null +++ b/PyTorch/contrib/cv/detection/YoloV2-640/env_npu.sh @@ -0,0 +1,79 @@ +#!/bin/bash +export install_path=/usr/local/Ascend + +if [ -d ${install_path}/toolkit ]; then + export LD_LIBRARY_PATH=/usr/include/hdf5/lib/:/usr/local/:/usr/local/lib/:/usr/lib/:${install_path}/fwkacllib/lib64/:${install_path}/driver/lib64/common/:${install_path}/driver/lib64/driver/:${install_path}/add-ons:${path_lib}:${LD_LIBRARY_PATH} + export PATH=${install_path}/fwkacllib/ccec_compiler/bin:${install_path}/fwkacllib/bin:$PATH + export PYTHONPATH=${install_path}/fwkacllib/python/site-packages:${install_path}/tfplugin/python/site-packages:${install_path}/toolkit/python/site-packages:$PYTHONPATH + export PYTHONPATH=/usr/local/python3.7.5/lib/python3.7/site-packages:$PYTHONPATH + export ASCEND_OPP_PATH=${install_path}/opp +else + if [ -d ${install_path}/nnae/latest ];then + export LD_LIBRARY_PATH=/usr/local/:/usr/local/python3.7.5/lib/:/usr/local/openblas/lib:/usr/local/lib/:/usr/lib64/:/usr/lib/:${install_path}/nnae/latest/fwkacllib/lib64/:${install_path}/driver/lib64/common/:${install_path}/driver/lib64/driver/:${install_path}/add-ons/:/usr/lib/aarch64_64-linux-gnu:$LD_LIBRARY_PATH + export PATH=$PATH:${install_path}/nnae/latest/fwkacllib/ccec_compiler/bin/:${install_path}/nnae/latest/toolkit/tools/ide_daemon/bin/ + export ASCEND_OPP_PATH=${install_path}/nnae/latest/opp/ + export OPTION_EXEC_EXTERN_PLUGIN_PATH=${install_path}/nnae/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:${install_path}/nnae/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:${install_path}/nnae/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so + export PYTHONPATH=${install_path}/nnae/latest/fwkacllib/python/site-packages/:${install_path}/nnae/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:${install_path}/nnae/latest/fwkacllib/python/site-packages/schedule_search.egg:$PYTHONPATH + export ASCEND_AICPU_PATH=${install_path}/nnae/latest + else + export LD_LIBRARY_PATH=/usr/local/:/usr/local/lib/:/usr/lib64/:/usr/lib/:/usr/local/python3.7.5/lib/:/usr/local/openblas/lib:${install_path}/ascend-toolkit/latest/fwkacllib/lib64/:${install_path}/driver/lib64/common/:${install_path}/driver/lib64/driver/:${install_path}/add-ons/:/usr/lib/aarch64-linux-gnu:$LD_LIBRARY_PATH + export PATH=$PATH:${install_path}/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin/:${install_path}/ascend-toolkit/latest/toolkit/tools/ide_daemon/bin/ + export ASCEND_OPP_PATH=${install_path}/ascend-toolkit/latest/opp/ + export OPTION_EXEC_EXTERN_PLUGIN_PATH=${install_path}/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:${install_path}/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:${install_path}/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so + export PYTHONPATH=${install_path}/ascend-toolkit/latest/fwkacllib/python/site-packages/:${install_path}/ascend-toolkit/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:${install_path}/ascend-toolkit/latest/fwkacllib/python/site-packages/schedule_search.egg:$PYTHONPATH + export ASCEND_AICPU_PATH=${install_path}/ascend-toolkit/latest + fi +fi + + +#将Host日志输出到串口,0-关闭/1-开启 +export ASCEND_SLOG_PRINT_TO_STDOUT=0 +#设置默认日志级别,0-debug/1-info/2-warning/3-error +export ASCEND_GLOBAL_LOG_LEVEL=3 +#设置Event日志开启标志,0-关闭/1-开启 +export ASCEND_GLOBAL_EVENT_ENABLE=0 +#设置是否开启taskque,0-关闭/1-开启 +export TASK_QUEUE_ENABLE=1 +#设置是否开启PTCopy,0-关闭/1-开启 +export PTCOPY_ENABLE=1 +#设置是否开启combined标志,0-关闭/1-开启 +export COMBINED_ENABLE=1 +#设置特殊场景是否需要重新编译,不需要修改 +export DYNAMIC_OP="ADD#MUL" +#HCCL白名单开关,1-关闭/0-开启 +export HCCL_WHITELIST_DISABLE=1 +export HCCL_IF_IP=$(hostname -I |gawk '{print $1}') +${install_path}/driver/tools/msnpureport -g error -d 0 +${install_path}/driver/tools/msnpureport -g error -d 1 +${install_path}/driver/tools/msnpureport -g error -d 2 +${install_path}/driver/tools/msnpureport -g error -d 3 +${install_path}/driver/tools/msnpureport -g error -d 4 +${install_path}/driver/tools/msnpureport -g error -d 5 +${install_path}/driver/tools/msnpureport -g error -d 6 +${install_path}/driver/tools/msnpureport -g error -d 7 +# HCCL默认超时时间120s较少,修改为1800s对齐PyTorch默认设置 +export HCCL_CONNECT_TIMEOUT=5400 +export HCCL_EXEC_TIMEOUT=5400 + +ulimit -SHn 512000 + +path_lib=$(python3.7 -c """ +import sys +import re +result='' +for index in range(len(sys.path)): + match_sit = re.search('-packages', sys.path[index]) + if match_sit is not None: + match_lib = re.search('lib', sys.path[index]) + + if match_lib is not None: + end=match_lib.span()[1] + result += sys.path[index][0:end] + ':' + + result+=sys.path[index] + '/torch/lib:' +print(result)""" +) + +echo ${path_lib} + +export LD_LIBRARY_PATH=/usr/local/python3.7.5/lib/:${path_lib}:$LD_LIBRARY_PATH \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/eval.py b/PyTorch/contrib/cv/detection/YoloV2-640/eval.py new file mode 100644 index 0000000000..e6e47646e4 --- /dev/null +++ b/PyTorch/contrib/cv/detection/YoloV2-640/eval.py @@ -0,0 +1,134 @@ +import argparse +import os + +import torch + +from config.yolo_config import yolo_config +from data.transforms import ValTransforms +from models.yolo import build_model +from utils.misc import TestTimeAugmentation + +from evaluator.vocapi_evaluator import VOCAPIEvaluator +from evaluator.cocoapi_evaluator import COCOAPIEvaluator +import torch_npu + + +parser = argparse.ArgumentParser(description='YOLO Detection') +# basic +parser.add_argument('-size', '--img_size', default=640, type=int, + help='img_size') +parser.add_argument('--cuda', action='store_true', default=False, + help='Use cuda') +# model +parser.add_argument('-m', '--model', default='yolov1', + help='yolov1, yolov2, yolov3, yolov3_spp, yolov3_de, ' + 'yolov4, yolo_tiny, yolo_nano') +parser.add_argument('--weight', type=str, + default='weights/', + help='Trained state_dict file path to open') +parser.add_argument('--conf_thresh', default=0.001, type=float, + help='NMS threshold') +parser.add_argument('--nms_thresh', default=0.6, type=float, + help='NMS threshold') +parser.add_argument('--center_sample', action='store_true', default=False, + help='center sample trick.') +# dataset +parser.add_argument('--root', default='/mnt/share/ssd2/dataset', + help='data root') +parser.add_argument('-d', '--dataset', default='coco-val', + help='voc, coco-val, coco-test.') +# TTA +parser.add_argument('-tta', '--test_aug', action='store_true', default=False, + help='use test augmentation.') + +args = parser.parse_args() + + +def voc_test(model, data_dir, device, img_size): + evaluator = VOCAPIEvaluator(data_root=data_dir, + img_size=img_size, + device=device, + transform=ValTransforms(img_size), + display=True + ) + + # VOC evaluation + evaluator.evaluate(model) + + +def coco_test(model, data_dir, device, img_size, test=False): + if test: + # test-dev + print('test on test-dev 2017') + evaluator = COCOAPIEvaluator( + data_dir=data_dir, + img_size=img_size, + device=device, + testset=True, + transform=ValTransforms(img_size) + ) + + else: + # eval + evaluator = COCOAPIEvaluator( + data_dir=data_dir, + img_size=img_size, + device=device, + testset=False, + transform=ValTransforms(img_size) + ) + + # COCO evaluation + evaluator.evaluate(model) + + +if __name__ == '__main__': + # dataset + if args.dataset == 'voc': + print('eval on voc ...') + num_classes = 20 + data_dir = os.path.join(args.root, 'VOCdevkit') + elif args.dataset == 'coco-val': + print('eval on coco-val ...') + num_classes = 80 + data_dir = os.path.join(args.root, 'COCO') + elif args.dataset == 'coco-test': + print('eval on coco-test-dev ...') + num_classes = 80 + data_dir = os.path.join(args.root, 'COCO') + else: + print('unknow dataset !! we only support voc, coco-val, coco-test !!!') + exit(0) + + # cuda + if args.cuda: + print('use cuda') + device = torch.device("npu") + else: + device = torch.device("cpu") + + # YOLO Config + cfg = yolo_config[args.model] + # build model + model = build_model(args=args, + cfg=cfg, + device=device, + num_classes=num_classes, + trainable=False) + + # load weight + model.load_state_dict(torch.load(args.weight, map_location='cpu'), strict=False) + model = model.to(device).eval() + print('Finished loading model!') + + # TTA + test_aug = TestTimeAugmentation(num_classes=num_classes) if args.test_aug else None + + # evaluation + with torch.no_grad(): + if args.dataset == 'voc': + voc_test(model, data_dir, device, args.img_size) + elif args.dataset == 'coco-val': + coco_test(model, data_dir, device, args.img_size, test=False) + elif args.dataset == 'coco-test': + coco_test(model, data_dir, device, args.img_size, test=True) diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/evaluator/cocoapi_evaluator.py b/PyTorch/contrib/cv/detection/YoloV2-640/evaluator/cocoapi_evaluator.py new file mode 100644 index 0000000000..9480ce3b59 --- /dev/null +++ b/PyTorch/contrib/cv/detection/YoloV2-640/evaluator/cocoapi_evaluator.py @@ -0,0 +1,135 @@ +import torch.nn.functional as F +import json +import tempfile +import torch +from data.coco import * +import torch_npu +try: + from pycocotools.cocoeval import COCOeval +except: + print("It seems that the COCOAPI is not installed.") + + +class COCOAPIEvaluator(): + """ + COCO AP Evaluation class. + All the data in the val2017 dataset are processed \ + and evaluated by COCO API. + """ + def __init__(self, data_dir, img_size, device, testset=False, transform=None): + """ + Args: + data_dir (str): dataset root directory + img_size (int): image size after preprocess. images are resized \ + to squares whose shape is (img_size, img_size). + confthre (float): + confidence threshold ranging from 0 to 1, \ + which is defined in the config file. + nmsthre (float): + IoU threshold of non-max supression ranging from 0 to 1. + """ + self.testset = testset + if self.testset: + image_set = 'test2017' + else: + image_set = 'val2017' + + self.dataset = COCODataset( + data_dir=data_dir, + image_set=image_set, + img_size=img_size, + transform=None) + self.img_size = img_size + self.transform = transform + self.device = device + + self.map = 0. + self.ap50_95 = 0. + self.ap50 = 0. + + def evaluate(self, model): + """ + COCO average precision (AP) Evaluation. Iterate inference on the test dataset + and the results are evaluated by COCO API. + Args: + model : model object + Returns: + ap50_95 (float) : calculated COCO AP for IoU=50:95 + ap50 (float) : calculated COCO AP for IoU=50 + """ + model.eval() + ids = [] + data_dict = [] + num_images = len(self.dataset) + print('total number of images: %d' % (num_images)) + + # start testing + for index in range(num_images): # all the data in val2017 + if index % 500 == 0: + print('[Eval: %d / %d]'%(index, num_images)) + + # load an image + img, id_ = self.dataset.pull_image(index) + h, w, _ = img.shape + size = np.array([[w, h, w, h]]) + + # preprocess + x, _, _, scale, offset = self.transform(img) + x = x.unsqueeze(0).to(self.device) + + id_ = int(id_) + ids.append(id_) + # inference + with torch.no_grad(): + outputs = model(x) + bboxes, scores, cls_inds = outputs + # map the boxes to original image + bboxes -= offset + bboxes /= scale + bboxes *= size + + for i, box in enumerate(bboxes): + x1 = float(box[0]) + y1 = float(box[1]) + x2 = float(box[2]) + y2 = float(box[3]) + label = self.dataset.class_ids[int(cls_inds[i])] + + bbox = [x1, y1, x2 - x1, y2 - y1] + score = float(scores[i]) # object score * class score + A = {"image_id": id_, "category_id": label, "bbox": bbox, + "score": score} # COCO json format + data_dict.append(A) + + annType = ['segm', 'bbox', 'keypoints'] + + # Evaluate the Dt (detection) json comparing with the ground truth + if len(data_dict) > 0: + print('evaluating ......') + cocoGt = self.dataset.coco + # workaround: temporarily write data to json file because pycocotools can't process dict in py36. + if self.testset: + json.dump(data_dict, open('coco_test-dev.json', 'w')) + cocoDt = cocoGt.loadRes('coco_test-dev.json') + return -1, -1 + else: + _, tmp = tempfile.mkstemp() + json.dump(data_dict, open(tmp, 'w')) + cocoDt = cocoGt.loadRes(tmp) + cocoEval = COCOeval(self.dataset.coco, cocoDt, annType[1]) + cocoEval.params.imgIds = ids + cocoEval.evaluate() + cocoEval.accumulate() + cocoEval.summarize() + + ap50_95, ap50 = cocoEval.stats[0], cocoEval.stats[1] + print('ap50_95 : ', ap50_95) + print('ap50 : ', ap50) + self.map = ap50_95 + self.ap50_95 = ap50_95 + self.ap50 = ap50 + + return ap50, ap50_95 + else: + return 0, 0 + diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/evaluator/vocapi_evaluator.py b/PyTorch/contrib/cv/detection/YoloV2-640/evaluator/vocapi_evaluator.py new file mode 100644 index 0000000000..43f09e93a8 --- /dev/null +++ b/PyTorch/contrib/cv/detection/YoloV2-640/evaluator/vocapi_evaluator.py @@ -0,0 +1,347 @@ +"""Adapted from: + @longcw faster_rcnn_pytorch: https://github.com/longcw/faster_rcnn_pytorch + @rbgirshick py-faster-rcnn https://github.com/rbgirshick/py-faster-rcnn + Licensed under The MIT License [see LICENSE for details] +""" + +from data.voc import VOCDetection, VOC_CLASSES +import os +import time +import numpy as np +import pickle +import xml.etree.ElementTree as ET + + +class VOCAPIEvaluator(): + """ VOC AP Evaluation class """ + def __init__(self, + data_dir, + img_size, + device, + transform, + set_type='test', + year='2007', + display=False): + self.data_dir = data_dir + self.img_size = img_size + self.device = device + self.transform = transform + self.labelmap = VOC_CLASSES + self.set_type = set_type + self.year = year + self.display = display + + # path + self.devkit_path = os.path.join(data_dir, 'VOC' + year) + self.annopath = os.path.join(data_dir, 'VOC2007', 'Annotations', '%s.xml') + self.imgpath = os.path.join(data_dir, 'VOC2007', 'JPEGImages', '%s.jpg') + self.imgsetpath = os.path.join(data_dir, 'VOC2007', 'ImageSets', 'Main', set_type+'.txt') + self.output_dir = self.get_output_dir('voc_eval/', self.set_type) + + # dataset + self.dataset = VOCDetection(data_dir=data_dir, + image_sets=[('2007', set_type)], + transform=transform) + + def evaluate(self, net): + net.eval() + num_images = len(self.dataset) + # all detections are collected into: + # all_boxes[cls][image] = N x 5 array of detections in + # (x1, y1, x2, y2, score) + self.all_boxes = [[[] for _ in range(num_images)] + for _ in range(len(self.labelmap))] + + # timers + det_file = os.path.join(self.output_dir, 'detections.pkl') + + for i in range(num_images): + im, _ = self.dataset.pull_image(i) + h, w, _ = im.shape + size = np.array([[w, h, w, h]]) + + # preprocess + x, _, _, scale, offset = self.transform(im) + x = x.unsqueeze(0).to(self.device) + + t0 = time.time() + # forward + bboxes, scores, cls_inds = net(x) + detect_time = time.time() - t0 + # map the boxes to original image + bboxes -= offset + bboxes /= scale + bboxes *= size + + for j in range(len(self.labelmap)): + inds = np.where(cls_inds == j)[0] + if len(inds) == 0: + self.all_boxes[j][i] = np.empty([0, 5], dtype=np.float32) + continue + c_bboxes = bboxes[inds] + c_scores = scores[inds] + c_dets = np.hstack((c_bboxes, + c_scores[:, np.newaxis])).astype(np.float32, + copy=False) + self.all_boxes[j][i] = c_dets + + if i % 500 == 0: + print('im_detect: {:d}/{:d} {:.3f}s'.format(i + 1, num_images, detect_time)) + + with open(det_file, 'wb') as f: + pickle.dump(self.all_boxes, f, pickle.HIGHEST_PROTOCOL) + + print('Evaluating detections') + self.evaluate_detections(self.all_boxes) + + print('Mean AP: ', self.map) + + + def parse_rec(self, filename): + """ Parse a PASCAL VOC xml file """ + tree = ET.parse(filename) + objects = [] + for obj in tree.findall('object'): + obj_struct = {} + obj_struct['name'] = obj.find('name').text + obj_struct['pose'] = obj.find('pose').text + obj_struct['truncated'] = int(obj.find('truncated').text) + obj_struct['difficult'] = int(obj.find('difficult').text) + bbox = obj.find('bndbox') + obj_struct['bbox'] = [int(bbox.find('xmin').text), + int(bbox.find('ymin').text), + int(bbox.find('xmax').text), + int(bbox.find('ymax').text)] + objects.append(obj_struct) + + return objects + + + def get_output_dir(self, name, phase): + """Return the directory where experimental artifacts are placed. + If the directory does not exist, it is created. + A canonical path is built using the name from an imdb and a network + (if not None). + """ + filedir = os.path.join(name, phase) + if not os.path.exists(filedir): + os.makedirs(filedir) + return filedir + + + def get_voc_results_file_template(self, cls): + # VOCdevkit/VOC2007/results/det_test_aeroplane.txt + filename = 'det_' + self.set_type + '_%s.txt' % (cls) + filedir = os.path.join(self.devkit_path, 'results') + if not os.path.exists(filedir): + os.makedirs(filedir) + path = os.path.join(filedir, filename) + return path + + + def write_voc_results_file(self, all_boxes): + for cls_ind, cls in enumerate(self.labelmap): + if self.display: + print('Writing {:s} VOC results file'.format(cls)) + filename = self.get_voc_results_file_template(cls) + with open(filename, 'wt') as f: + for im_ind, index in enumerate(self.dataset.ids): + dets = all_boxes[cls_ind][im_ind] + if dets == []: + continue + # the VOCdevkit expects 1-based indices + for k in range(dets.shape[0]): + f.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'. + format(index[1], dets[k, -1], + dets[k, 0] + 1, dets[k, 1] + 1, + dets[k, 2] + 1, dets[k, 3] + 1)) + + + def do_python_eval(self, use_07=True): + cachedir = os.path.join(self.devkit_path, 'annotations_cache') + aps = [] + # The PASCAL VOC metric changed in 2010 + use_07_metric = use_07 + print('VOC07 metric? ' + ('Yes' if use_07_metric else 'No')) + if not os.path.isdir(self.output_dir): + os.mkdir(self.output_dir) + for i, cls in enumerate(self.labelmap): + filename = self.get_voc_results_file_template(cls) + rec, prec, ap = self.voc_eval(detpath=filename, + classname=cls, + cachedir=cachedir, + ovthresh=0.5, + use_07_metric=use_07_metric + ) + aps += [ap] + print('AP for {} = {:.4f}'.format(cls, ap)) + with open(os.path.join(self.output_dir, cls + '_pr.pkl'), 'wb') as f: + pickle.dump({'rec': rec, 'prec': prec, 'ap': ap}, f) + if self.display: + self.map = np.mean(aps) + print('Mean AP = {:.4f}'.format(np.mean(aps))) + print('~~~~~~~~') + print('Results:') + for ap in aps: + print('{:.3f}'.format(ap)) + print('{:.3f}'.format(np.mean(aps))) + print('~~~~~~~~') + print('') + print('--------------------------------------------------------------') + print('Results computed with the **unofficial** Python eval code.') + print('Results should be very close to the official MATLAB eval code.') + print('--------------------------------------------------------------') + else: + self.map = np.mean(aps) + print('Mean AP = {:.4f}'.format(np.mean(aps))) + + + def voc_ap(self, rec, prec, use_07_metric=True): + """ ap = voc_ap(rec, prec, [use_07_metric]) + Compute VOC AP given precision and recall. + If use_07_metric is true, uses the + VOC 07 11 point method (default:True). + """ + if use_07_metric: + # 11 point metric + ap = 0. + for t in np.arange(0., 1.1, 0.1): + if np.sum(rec >= t) == 0: + p = 0 + else: + p = np.max(prec[rec >= t]) + ap = ap + p / 11. + else: + # correct AP calculation + # first append sentinel values at the end + mrec = np.concatenate(([0.], rec, [1.])) + mpre = np.concatenate(([0.], prec, [0.])) + + # compute the precision envelope + for i in range(mpre.size - 1, 0, -1): + mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) + + # to calculate area under PR curve, look for points + # where X axis (recall) changes value + i = np.where(mrec[1:] != mrec[:-1])[0] + + # and sum (\Delta recall) * prec + ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) + return ap + + + def voc_eval(self, detpath, classname, cachedir, ovthresh=0.5, use_07_metric=True): + if not os.path.isdir(cachedir): + os.mkdir(cachedir) + cachefile = os.path.join(cachedir, 'annots.pkl') + # read list of images + with open(self.imgsetpath, 'r') as f: + lines = f.readlines() + imagenames = [x.strip() for x in lines] + if not os.path.isfile(cachefile): + # load annots + recs = {} + for i, imagename in enumerate(imagenames): + recs[imagename] = self.parse_rec(self.annopath % (imagename)) + if i % 100 == 0 and self.display: + print('Reading annotation for {:d}/{:d}'.format( + i + 1, len(imagenames))) + # save + if self.display: + print('Saving cached annotations to {:s}'.format(cachefile)) + with open(cachefile, 'wb') as f: + pickle.dump(recs, f) + else: + # load + with open(cachefile, 'rb') as f: + recs = pickle.load(f) + + # extract gt objects for this class + class_recs = {} + npos = 0 + for imagename in imagenames: + R = [obj for obj in recs[imagename] if obj['name'] == classname] + bbox = np.array([x['bbox'] for x in R]) + difficult = np.array([x['difficult'] for x in R]).astype(np.bool) + det = [False] * len(R) + npos = npos + sum(~difficult) + class_recs[imagename] = {'bbox': bbox, + 'difficult': difficult, + 'det': det} + + # read dets + detfile = detpath.format(classname) + with open(detfile, 'r') as f: + lines = f.readlines() + if any(lines) == 1: + + splitlines = [x.strip().split(' ') for x in lines] + image_ids = [x[0] for x in splitlines] + confidence = np.array([float(x[1]) for x in splitlines]) + BB = np.array([[float(z) for z in x[2:]] for x in splitlines]) + + # sort by confidence + sorted_ind = np.argsort(-confidence) + sorted_scores = np.sort(-confidence) + BB = BB[sorted_ind, :] + image_ids = [image_ids[x] for x in sorted_ind] + + # go down dets and mark TPs and FPs + nd = len(image_ids) + tp = np.zeros(nd) + fp = np.zeros(nd) + for d in range(nd): + R = class_recs[image_ids[d]] + bb = BB[d, :].astype(float) + ovmax = -np.inf + BBGT = R['bbox'].astype(float) + if BBGT.size > 0: + # compute overlaps + # intersection + ixmin = np.maximum(BBGT[:, 0], bb[0]) + iymin = np.maximum(BBGT[:, 1], bb[1]) + ixmax = np.minimum(BBGT[:, 2], bb[2]) + iymax = np.minimum(BBGT[:, 3], bb[3]) + iw = np.maximum(ixmax - ixmin, 0.) + ih = np.maximum(iymax - iymin, 0.) + inters = iw * ih + uni = ((bb[2] - bb[0]) * (bb[3] - bb[1]) + + (BBGT[:, 2] - BBGT[:, 0]) * + (BBGT[:, 3] - BBGT[:, 1]) - inters) + overlaps = inters / uni + ovmax = np.max(overlaps) + jmax = np.argmax(overlaps) + + if ovmax > ovthresh: + if not R['difficult'][jmax]: + if not R['det'][jmax]: + tp[d] = 1. + R['det'][jmax] = 1 + else: + fp[d] = 1. + else: + fp[d] = 1. + + # compute precision recall + fp = np.cumsum(fp) + tp = np.cumsum(tp) + rec = tp / float(npos) + # avoid divide by zero in case the first detection matches a difficult + # ground truth + prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps) + ap = self.voc_ap(rec, prec, use_07_metric) + else: + rec = -1. + prec = -1. + ap = -1. + + return rec, prec, ap + + + def evaluate_detections(self, box_list): + self.write_voc_results_file(box_list) + self.do_python_eval() + + +if __name__ == '__main__': + pass \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/models/__init__.py b/PyTorch/contrib/cv/detection/YoloV2-640/models/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/models/backbone/__init__.py b/PyTorch/contrib/cv/detection/YoloV2-640/models/backbone/__init__.py new file mode 100644 index 0000000000..270c40cb74 --- /dev/null +++ b/PyTorch/contrib/cv/detection/YoloV2-640/models/backbone/__init__.py @@ -0,0 +1,84 @@ +from .resnet import resnet18, resnet50, resnet101 +from .darknet import darknet53 +from .cspdarknet_tiny import cspdarknet_tiny +from .cspdarknet53 import cspdarknet53 +from .yolox_backbone import yolox_cspdarknet_s, yolox_cspdarknet_m, yolox_cspdarknet_l, \ + yolox_cspdarknet_x, yolox_cspdarknet_tiny, yolox_cspdarknet_nano +from .shufflenetv2 import shufflenetv2 +from .vit import vit_base_patch16_224 + + +def build_backbone(model_name='r18', pretrained=False, freeze=None, img_size=224): + if model_name == 'r18': + print('Backbone: ResNet-18 ...') + model = resnet18(pretrained=pretrained) + feature_channels = [128, 256, 512] + strides = [8, 16, 32] + elif model_name == 'r50': + print('Backbone: ResNet-50 ...') + model = resnet50(pretrained=pretrained) + feature_channels = [512, 1024, 2048] + strides = [8, 16, 32] + elif model_name == 'r101': + print('Backbone: ResNet-101 ...') + model = resnet101(pretrained=pretrained) + feature_channels = [512, 1024, 2048] + strides = [8, 16, 32] + elif model_name == 'd53': + print('Backbone: DarkNet-53 ...') + model = darknet53(pretrained=pretrained) + feature_channels = [256, 512, 1024] + strides = [8, 16, 32] + elif model_name == 'cspd53': + print('Backbone: CSPDarkNet-53 ...') + model = cspdarknet53(pretrained=pretrained) + feature_channels = [256, 512, 1024] + strides = [8, 16, 32] + elif model_name == 'cspd_tiny': + print('Backbone: CSPDarkNet-Tiny ...') + model = cspdarknet_tiny(pretrained=pretrained) + feature_channels = [128, 256, 512] + strides = [8, 16, 32] + elif model_name == 'sfnet_v2': + print('Backbone: ShuffleNet-V2 ...') + model = shufflenetv2(pretrained=pretrained) + feature_channels = [116, 232, 464] + strides = [8, 16, 32] + elif model_name == 'vit_base_16': + print('Backbone: ViT_Base_16 ...') + model = vit_base_patch16_224(img_size=img_size, pretrained=pretrained) + feature_channels = [None, None, 768] + strides = [None, None, 16] + # YOLOX backbone + elif model_name == 'csp_s': + print('Backbone: YOLOX-CSPDarkNet-S ...') + model = yolox_cspdarknet_s(pretrained=pretrained, freeze=freeze) + feature_channels = [128, 256, 512] + strides = [8, 16, 32] + elif model_name == 'csp_m': + print('Backbone: YOLOX-CSPDarkNet-M ...') + model = yolox_cspdarknet_m(pretrained=pretrained, freeze=freeze) + feature_channels = [192, 384, 768] + strides = [8, 16, 32] + elif model_name == 'csp_l': + print('Backbone: YOLOX-CSPDarkNet-L ...') + model = yolox_cspdarknet_l(pretrained=pretrained, freeze=freeze) + feature_channels = [256, 512, 1024] + strides = [8, 16, 32] + elif model_name == 'csp_x': + print('Backbone: YOLOX-CSPDarkNet-X ...') + model = yolox_cspdarknet_x(pretrained=pretrained, freeze=freeze) + feature_channels = [320, 640, 1280] + strides = [8, 16, 32] + elif model_name == 'csp_t': + print('Backbone: YOLOX-CSPDarkNet-Tiny ...') + model = yolox_cspdarknet_tiny(pretrained=pretrained, freeze=freeze) + feature_channels = [96, 192, 384] + strides = [8, 16, 32] + elif model_name == 'csp_n': + print('Backbone: YOLOX-CSPDarkNet-Nano ...') + model = yolox_cspdarknet_nano(pretrained=pretrained, freeze=freeze) + feature_channels = [64, 128, 256] + strides = [8, 16, 32] + + return model, feature_channels, strides diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/models/backbone/cspdarknet53.py b/PyTorch/contrib/cv/detection/YoloV2-640/models/backbone/cspdarknet53.py new file mode 100644 index 0000000000..40dcf833de --- /dev/null +++ b/PyTorch/contrib/cv/detection/YoloV2-640/models/backbone/cspdarknet53.py @@ -0,0 +1,296 @@ +""" + This is a CSPDarkNet-53 with Mish. +""" +import os +import torch +import torch.nn as nn +import torch_npu + + +def ConvNormActivation(inplanes, + planes, + kernel_size=3, + stride=1, + padding=0, + dilation=1, + groups=1): + """ + A help function to build a 'conv-bn-activation' module + """ + layers = [] + layers.append(nn.Conv2d(inplanes, + planes, + kernel_size=kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + groups=groups, + bias=False)) + layers.append(nn.BatchNorm2d(planes, eps=1e-4, momentum=0.03)) + layers.append(nn.Mish(inplace=True)) + return nn.Sequential(*layers) + + +def make_cspdark_layer(block, + inplanes, + planes, + num_blocks, + is_csp_first_stage, + dilation=1): + downsample = ConvNormActivation( + inplanes=planes, + planes=planes if is_csp_first_stage else inplanes, + kernel_size=1, + stride=1, + padding=0 + ) + + layers = [] + for i in range(0, num_blocks): + layers.append( + block( + inplanes=inplanes, + planes=planes if is_csp_first_stage else inplanes, + downsample=downsample if i == 0 else None, + dilation=dilation + ) + ) + return nn.Sequential(*layers) + + +class DarkBlock(nn.Module): + + def __init__(self, + inplanes, + planes, + dilation=1, + downsample=None): + """Residual Block for DarkNet. + This module has the dowsample layer (optional), + 1x1 conv layer and 3x3 conv layer. + """ + super(DarkBlock, self).__init__() + + self.downsample = downsample + + self.bn1 = nn.BatchNorm2d(inplanes, eps=1e-4, momentum=0.03) + self.bn2 = nn.BatchNorm2d(planes, eps=1e-4, momentum=0.03) + + self.conv1 = nn.Conv2d( + planes, + inplanes, + kernel_size=1, + stride=1, + padding=0, + bias=False + ) + + self.conv2 = nn.Conv2d( + inplanes, + planes, + kernel_size=3, + stride=1, + padding=dilation, + dilation=dilation, + bias=False + ) + + self.activation = nn.Mish(inplace=True) + + def forward(self, x): + if self.downsample is not None: + x = self.downsample(x) + + identity = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.activation(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.activation(out) + + out += identity + + return out + + +class CrossStagePartialBlock(nn.Module): + """CSPNet: A New Backbone that can Enhance Learning Capability of CNN. + Refer to the paper for more details: https://arxiv.org/abs/1911.11929. + In this module, the inputs go throuth the base conv layer at the first, + and then pass the two partial transition layers. + 1. go throuth basic block (like DarkBlock) + and one partial transition layer. + 2. go throuth the other partial transition layer. + At last, They are concat into fuse transition layer. + Args: + inplanes (int): number of input channels. + planes (int): number of output channels + stage_layers (nn.Module): the basic block which applying CSPNet. + is_csp_first_stage (bool): Is the first stage or not. + The number of input and output channels in the first stage of + CSPNet is different from other stages. + dilation (int): conv dilation + stride (int): stride for the base layer + """ + + def __init__(self, + inplanes, + planes, + stage_layers, + is_csp_first_stage, + dilation=1, + stride=2): + super(CrossStagePartialBlock, self).__init__() + + self.base_layer = ConvNormActivation( + inplanes, + planes, + kernel_size=3, + stride=stride, + padding=dilation, + dilation=dilation + ) + self.partial_transition1 = ConvNormActivation( + inplanes=planes, + planes=inplanes if not is_csp_first_stage else planes, + kernel_size=1, + stride=1, + padding=0 + ) + self.stage_layers = stage_layers + + self.partial_transition2 = ConvNormActivation( + inplanes=inplanes if not is_csp_first_stage else planes, + planes=inplanes if not is_csp_first_stage else planes, + kernel_size=1, + stride=1, + padding=0 + ) + self.fuse_transition = ConvNormActivation( + inplanes=planes if not is_csp_first_stage else planes * 2, + planes=planes, + kernel_size=1, + stride=1, + padding=0 + ) + + def forward(self, x): + x = self.base_layer(x) + + out1 = self.partial_transition1(x) + + out2 = self.stage_layers(x) + out2 = self.partial_transition2(out2) + + out = torch.cat([out2, out1], dim=1) + out = self.fuse_transition(out) + + return out + + +class CSPDarkNet53(nn.Module): + """CSPDarkNet backbone. + Refer to the paper for more details: https://arxiv.org/pdf/1804.02767 + Args: + depth (int): Depth of Darknet, from {53}. + num_stages (int): Darknet stages, normally 5. + with_csp (bool): Use cross stage partial connection or not. + out_features (List[str]): Output features. + norm_type (str): type of normalization layer. + res5_dilation (int): dilation for the last stage + """ + + def __init__(self): + super(CSPDarkNet53, self).__init__() + + self.block = DarkBlock + self.stage_blocks = (1, 2, 8, 8, 4) + self.with_csp = True + self.inplanes = 32 + + self.backbone = nn.ModuleDict() + self.layer_names = [] + # First stem layer + self.backbone["conv1"] = nn.Conv2d(3, self.inplanes, kernel_size=3, padding=1, bias=False) + self.backbone["bn1"] = nn.BatchNorm2d(self.inplanes, eps=1e-4, momentum=0.03) + self.backbone["act1"] = nn.Mish(inplace=True) + + for i, num_blocks in enumerate(self.stage_blocks): + planes = 64 * 2 ** i + dilation = 1 + stride = 2 + layer = make_cspdark_layer( + block=self.block, + inplanes=self.inplanes, + planes=planes, + num_blocks=num_blocks, + is_csp_first_stage=True if i == 0 else False, + dilation=dilation + ) + layer = CrossStagePartialBlock( + self.inplanes, + planes, + stage_layers=layer, + is_csp_first_stage=True if i == 0 else False, + dilation=dilation, + stride=stride + ) + self.inplanes = planes + layer_name = 'layer{}'.format(i + 1) + self.backbone[layer_name]=layer + self.layer_names.append(layer_name) + + + def forward(self, x): + outputs = [] + x = self.backbone["conv1"](x) + x = self.backbone["bn1"](x) + x = self.backbone["act1"](x) + + for i, layer_name in enumerate(self.layer_names): + layer = self.backbone[layer_name] + x = layer(x) + outputs.append(x) + return outputs[-3:] # C3, C4, C5 + + +def cspdarknet53(pretrained=False): + """ + Create a CSPDarkNet. + """ + model = CSPDarkNet53() + if pretrained: + print('Loading the pretrained model ...') + path_to_weight = os.path.dirname(os.path.abspath(__file__)) + '/weights/cspdarknet53/cspdarknet53.pth' + checkpoint = torch.load(path_to_weight, map_location='cpu') + # checkpoint state dict + checkpoint_state_dict = checkpoint.pop("model") + # model state dict + model_state_dict = model.state_dict() + # check + for k in list(checkpoint_state_dict.keys()): + if k in model_state_dict: + shape_model = tuple(model_state_dict[k].shape) + shape_checkpoint = tuple(checkpoint_state_dict[k].shape) + if shape_model != shape_checkpoint: + checkpoint_state_dict.pop(k) + else: + print(k) + + model.load_state_dict(checkpoint_state_dict, strict=False) + return model + + +if __name__=='__main__': + img_size = 512 + input = torch.ones(1, 3, img_size, img_size) + + model = cspdarknet53(pretrained=True) + output = model(input) + for y in output: + print(y.size()) + print(output[-1]) + diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/models/backbone/cspdarknet_tiny.py b/PyTorch/contrib/cv/detection/YoloV2-640/models/backbone/cspdarknet_tiny.py new file mode 100644 index 0000000000..1d0ae5e8f1 --- /dev/null +++ b/PyTorch/contrib/cv/detection/YoloV2-640/models/backbone/cspdarknet_tiny.py @@ -0,0 +1,128 @@ +""" + This is a CSPDarkNet-53 with LaekyReLU. +""" +import os +import torch +import torch.nn as nn +import torch_npu + + +__all__ = ['cspdarkner53'] + + +class Conv(nn.Module): + def __init__(self, c1, c2, k, s=1, p=0, d=1, g=1, act=True): + super(Conv, self).__init__() + if act: + self.convs = nn.Sequential( + nn.Conv2d(c1, c2, k, stride=s, padding=p, dilation=d, groups=g, bias=False), + nn.BatchNorm2d(c2), + nn.LeakyReLU(0.1, inplace=True) + ) + else: + self.convs = nn.Sequential( + nn.Conv2d(c1, c2, k, stride=s, padding=p, dilation=d, groups=g, bias=False), + nn.BatchNorm2d(c2) + ) + + def forward(self, x): + return self.convs(x) + + +class ResidualBlock(nn.Module): + """ + basic residual block for CSP-Darknet + """ + def __init__(self, in_ch): + super(ResidualBlock, self).__init__() + self.conv1 = Conv(in_ch, in_ch, k=1) + self.conv2 = Conv(in_ch, in_ch, k=3, p=1, act=False) + self.act = nn.LeakyReLU(0.1, inplace=True) + + def forward(self, x): + h = self.conv2(self.conv1(x)) + out = self.act(x + h) + + return out + + +class CSPStage(nn.Module): + def __init__(self, c1, n=1): + super(CSPStage, self).__init__() + c_ = c1 // 2 # hidden channels + self.cv1 = Conv(c1, c_, k=1) + self.cv2 = Conv(c1, c_, k=1) + self.res_blocks = nn.Sequential(*[ResidualBlock(in_ch=c_) for _ in range(n)]) + self.cv3 = Conv(2 * c_, c1, k=1) + + def forward(self, x): + y1 = self.cv1(x) + y2 = self.res_blocks(self.cv2(x)) + + return self.cv3(torch.cat([y1, y2], dim=1)) + + +# CSPDarkNet-Tiny +class CSPDarknetTiny(nn.Module): + """ + CSPDarknet_Tiny. + """ + def __init__(self): + super(CSPDarknetTiny, self).__init__() + + self.layer_1 = nn.Sequential( + Conv(3, 16, k=3, p=1), + Conv(16, 32, k=3, p=1, s=2), + CSPStage(c1=32, n=1) # p1/2 + ) + self.layer_2 = nn.Sequential( + Conv(32, 64, k=3, p=1, s=2), + CSPStage(c1=64, n=1) # P2/4 + ) + self.layer_3 = nn.Sequential( + Conv(64, 128, k=3, p=1, s=2), + CSPStage(c1=128, n=1) # P3/8 + ) + self.layer_4 = nn.Sequential( + Conv(128, 256, k=3, p=1, s=2), + CSPStage(c1=256, n=1) # P4/16 + ) + self.layer_5 = nn.Sequential( + Conv(256, 512, k=3, p=1, s=2), + CSPStage(c1=512, n=1) # P5/32 + ) + + + def forward(self, x): + c1 = self.layer_1(x) + c2 = self.layer_2(c1) + c3 = self.layer_3(c2) + c4 = self.layer_4(c3) + c5 = self.layer_5(c4) + + return c3, c4, c5 + + +def cspdarknet_tiny(pretrained=False, **kwargs): + """Constructs a CSPDarknet53 model. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = CSPDarknetTiny() + if pretrained: + print('Loading the pretrained model ...') + path_to_dir = os.path.dirname(os.path.abspath(__file__)) + checkpoint = torch.load(path_to_dir + '/weights/cspdarknet_tiny/cspdarknet_tiny.pth', map_location='cpu') + model.load_state_dict(checkpoint, strict=False) + return model + + +if __name__ == '__main__': + import time + net = cspdarknet_tiny(pretrained=True) + x = torch.randn(1, 3, 224, 224) + t0 = time.time() + y = net(x) + t1 = time.time() + print('Time: ', t1 - t0) \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/models/backbone/darknet.py b/PyTorch/contrib/cv/detection/YoloV2-640/models/backbone/darknet.py new file mode 100644 index 0000000000..5ce99fd305 --- /dev/null +++ b/PyTorch/contrib/cv/detection/YoloV2-640/models/backbone/darknet.py @@ -0,0 +1,102 @@ +import torch +import torch.nn as nn +import os +import torch_npu + + +__all__ = ['darknet53'] + + +class Conv_BN_LeakyReLU(nn.Module): + def __init__(self, in_channels, out_channels, ksize, padding=0, stride=1, dilation=1): + super(Conv_BN_LeakyReLU, self).__init__() + self.convs = nn.Sequential( + nn.Conv2d(in_channels, out_channels, ksize, padding=padding, stride=stride, dilation=dilation), + nn.BatchNorm2d(out_channels), + nn.LeakyReLU(0.1, inplace=True) + ) + + def forward(self, x): + return self.convs(x) + + +class resblock(nn.Module): + def __init__(self, ch, nblocks=1): + super().__init__() + self.module_list = nn.ModuleList() + for _ in range(nblocks): + resblock_one = nn.Sequential( + Conv_BN_LeakyReLU(ch, ch//2, 1), + Conv_BN_LeakyReLU(ch//2, ch, 3, padding=1) + ) + self.module_list.append(resblock_one) + + def forward(self, x): + for module in self.module_list: + x = module(x) + x + return x + + +class DarkNet_53(nn.Module): + """ + DarkNet-53. + """ + def __init__(self, num_classes=1000): + super(DarkNet_53, self).__init__() + # stride = 2 + self.layer_1 = nn.Sequential( + Conv_BN_LeakyReLU(3, 32, 3, padding=1), + Conv_BN_LeakyReLU(32, 64, 3, padding=1, stride=2), + resblock(64, nblocks=1) + ) + # stride = 4 + self.layer_2 = nn.Sequential( + Conv_BN_LeakyReLU(64, 128, 3, padding=1, stride=2), + resblock(128, nblocks=2) + ) + # stride = 8 + self.layer_3 = nn.Sequential( + Conv_BN_LeakyReLU(128, 256, 3, padding=1, stride=2), + resblock(256, nblocks=8) + ) + # stride = 16 + self.layer_4 = nn.Sequential( + Conv_BN_LeakyReLU(256, 512, 3, padding=1, stride=2), + resblock(512, nblocks=8) + ) + # stride = 32 + self.layer_5 = nn.Sequential( + Conv_BN_LeakyReLU(512, 1024, 3, padding=1, stride=2), + resblock(1024, nblocks=4) + ) + + # self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) + # self.fc = nn.Linear(1024, num_classes) + + def forward(self, x, targets=None): + c1 = self.layer_1(x) + c2 = self.layer_2(c1) + c3 = self.layer_3(c2) + c4 = self.layer_4(c3) + c5 = self.layer_5(c4) + + return c3, c4, c5 + + +def darknet53(pretrained=False, **kwargs): + """Constructs a darknet-53 model. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = DarkNet_53() + if pretrained: + try: + print('Loading the pretrained model ...') + path_to_dir = os.path.dirname(os.path.abspath(__file__)) + checkpoint = torch.load(path_to_dir + '/weights/darknet53/darknet53.pth', map_location='cpu') + model.load_state_dict(checkpoint, strict=False) + except: + print('The pretrained weight can not be found ...') + pass + return model diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/models/backbone/resnet.py b/PyTorch/contrib/cv/detection/YoloV2-640/models/backbone/resnet.py new file mode 100644 index 0000000000..1f4df9b046 --- /dev/null +++ b/PyTorch/contrib/cv/detection/YoloV2-640/models/backbone/resnet.py @@ -0,0 +1,227 @@ +import torch +import torch.nn as nn +import torch.utils.model_zoo as model_zoo +import torch_npu +import torch.nn.functional as F + +__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', + 'resnet152'] + + +model_urls = { + 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', + 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', + 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', + 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', + 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', +} + + +def conv3x3(in_planes, out_planes, stride=1): + """3x3 convolution with padding""" + return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, + padding=1, bias=False) + +def conv1x1(in_planes, out_planes, stride=1): + """1x1 convolution""" + return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False) + +class BasicBlock(nn.Module): + expansion = 1 + + def __init__(self, inplanes, planes, stride=1, downsample=None): + super(BasicBlock, self).__init__() + self.conv1 = conv3x3(inplanes, planes, stride) + self.bn1 = nn.BatchNorm2d(planes) + self.relu = nn.ReLU(inplace=True) + self.conv2 = conv3x3(planes, planes) + self.bn2 = nn.BatchNorm2d(planes) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + identity = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + out = self.relu(out) + + return out + +class Bottleneck(nn.Module): + expansion = 4 + + def __init__(self, inplanes, planes, stride=1, downsample=None): + super(Bottleneck, self).__init__() + self.conv1 = conv1x1(inplanes, planes) + self.bn1 = nn.BatchNorm2d(planes) + self.conv2 = conv3x3(planes, planes, stride) + self.bn2 = nn.BatchNorm2d(planes) + self.conv3 = conv1x1(planes, planes * self.expansion) + self.bn3 = nn.BatchNorm2d(planes * self.expansion) + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + identity = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + out = self.relu(out) + + return out + +class ResNet(nn.Module): + + def __init__(self, block, layers, zero_init_residual=False): + super(ResNet, self).__init__() + self.inplanes = 64 + self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, + bias=False) + self.bn1 = nn.BatchNorm2d(64) + self.relu = nn.ReLU(inplace=True) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + self.layer1 = self._make_layer(block, 64, layers[0]) + self.layer2 = self._make_layer(block, 128, layers[1], stride=2) + self.layer3 = self._make_layer(block, 256, layers[2], stride=2) + self.layer4 = self._make_layer(block, 512, layers[3], stride=2) + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') + elif isinstance(m, nn.BatchNorm2d): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + + # Zero-initialize the last BN in each residual branch, + # so that the residual branch starts with zeros, and each residual block behaves like an identity. + # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677 + if zero_init_residual: + for m in self.modules(): + if isinstance(m, Bottleneck): + nn.init.constant_(m.bn3.weight, 0) + elif isinstance(m, BasicBlock): + nn.init.constant_(m.bn2.weight, 0) + + def _make_layer(self, block, planes, blocks, stride=1): + downsample = None + if stride != 1 or self.inplanes != planes * block.expansion: + downsample = nn.Sequential( + conv1x1(self.inplanes, planes * block.expansion, stride), + nn.BatchNorm2d(planes * block.expansion), + ) + + layers = [] + layers.append(block(self.inplanes, planes, stride, downsample)) + self.inplanes = planes * block.expansion + for _ in range(1, blocks): + layers.append(block(self.inplanes, planes)) + + return nn.Sequential(*layers) + + def forward(self, x): + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.maxpool(x) + + c2 = self.layer1(x) + c3 = self.layer2(c2) + c4 = self.layer3(c3) + c5 = self.layer4(c4) + + return c3, c4, c5 + +def resnet18(pretrained=False, **kwargs): + """Constructs a ResNet-18 model. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs) + if pretrained: + print('Loading the pretrained model ...') + # strict = False as we don't need fc layer params. + model.load_state_dict(model_zoo.load_url(model_urls['resnet18']), strict=False) + return model + +def resnet34(pretrained=False, **kwargs): + """Constructs a ResNet-34 model. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs) + if pretrained: + print('Loading the pretrained model ...') + model.load_state_dict(model_zoo.load_url(model_urls['resnet34']), strict=False) + return model + +def resnet50(pretrained=False, **kwargs): + """Constructs a ResNet-50 model. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) + if pretrained: + print('Loading the pretrained model ...') + model.load_state_dict(model_zoo.load_url(model_urls['resnet50']), strict=False) + return model + +def resnet101(pretrained=False, **kwargs): + """Constructs a ResNet-101 model. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs) + if pretrained: + print('Loading the pretrained model ...') + model.load_state_dict(model_zoo.load_url(model_urls['resnet101']), strict=False) + return model + +def resnet152(pretrained=False, **kwargs): + """Constructs a ResNet-152 model. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs) + if pretrained: + print('Loading the pretrained model ...') + model.load_state_dict(model_zoo.load_url(model_urls['resnet152'])) + return model + +if __name__=='__main__': + #model = torchvision.models.resnet50() + print("found ", torch_npu.npu.device_count(), " GPU(s)") + device = torch.device("npu") + model = resnet101(detection=True).to(device) + print(model) + + input = torch.randn(1, 3, 512, 512).to(device) + output = model(input) \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/models/backbone/shufflenetv2.py b/PyTorch/contrib/cv/detection/YoloV2-640/models/backbone/shufflenetv2.py new file mode 100644 index 0000000000..f7637cb60e --- /dev/null +++ b/PyTorch/contrib/cv/detection/YoloV2-640/models/backbone/shufflenetv2.py @@ -0,0 +1,194 @@ +import torch +import torch.nn as nn +import torch.utils.model_zoo as model_zoo +import torch_npu + + +model_urls = { + 'shufflenetv2_0.5x': 'https://download.pytorch.org/models/shufflenetv2_x0.5-f707e7126e.pth', + 'shufflenetv2_1.0x': 'https://download.pytorch.org/models/shufflenetv2_x1-5666bf0f80.pth', + 'shufflenetv2_1.5x': None, + 'shufflenetv2_2.0x': None, +} + + +def channel_shuffle(x, groups): + # type: (torch.Tensor, int) -> torch.Tensor + batchsize, num_channels, height, width = x.data.size() + channels_per_group = num_channels // groups + + # reshape + x = x.view(batchsize, groups, + channels_per_group, height, width) + + x = torch.transpose(x, 1, 2).contiguous() + + # flatten + x = x.view(batchsize, -1, height, width) + + return x + + +class ShuffleV2Block(nn.Module): + def __init__(self, inp, oup, stride): + super(ShuffleV2Block, self).__init__() + + if not (1 <= stride <= 3): + raise ValueError('illegal stride value') + self.stride = stride + + branch_features = oup // 2 + assert (self.stride != 1) or (inp == branch_features << 1) + + if self.stride > 1: + self.branch1 = nn.Sequential( + self.depthwise_conv(inp, inp, kernel_size=3, stride=self.stride, padding=1), + nn.BatchNorm2d(inp), + nn.Conv2d(inp, branch_features, kernel_size=1, stride=1, padding=0, bias=False), + nn.BatchNorm2d(branch_features), + nn.ReLU(inplace=True), + ) + else: + self.branch1 = nn.Sequential() + + self.branch2 = nn.Sequential( + nn.Conv2d(inp if (self.stride > 1) else branch_features, + branch_features, kernel_size=1, stride=1, padding=0, bias=False), + nn.BatchNorm2d(branch_features), + nn.ReLU(inplace=True), + self.depthwise_conv(branch_features, branch_features, kernel_size=3, stride=self.stride, padding=1), + nn.BatchNorm2d(branch_features), + nn.Conv2d(branch_features, branch_features, kernel_size=1, stride=1, padding=0, bias=False), + nn.BatchNorm2d(branch_features), + nn.ReLU(inplace=True), + ) + + @staticmethod + def depthwise_conv(i, o, kernel_size, stride=1, padding=0, bias=False): + return nn.Conv2d(i, o, kernel_size, stride, padding, bias=bias, groups=i) + + def forward(self, x): + if self.stride == 1: + x1, x2 = x.chunk(2, dim=1) + out = torch.cat((x1, self.branch2(x2)), dim=1) + else: + out = torch.cat((self.branch1(x), self.branch2(x)), dim=1) + + out = channel_shuffle(out, 2) + + return out + + +class ShuffleNetV2(nn.Module): + def __init__(self, + model_size='1.0x', + out_stages=(2, 3, 4), + with_last_conv=False, + kernal_size=3): + super(ShuffleNetV2, self).__init__() + print('model size is ', model_size) + + self.stage_repeats = [4, 8, 4] + self.model_size = model_size + self.out_stages = out_stages + self.with_last_conv = with_last_conv + self.kernal_size = kernal_size + if model_size == '0.5x': + self._stage_out_channels = [24, 48, 96, 192, 1024] + elif model_size == '1.0x': + self._stage_out_channels = [24, 116, 232, 464, 1024] + elif model_size == '1.5x': + self._stage_out_channels = [24, 176, 352, 704, 1024] + elif model_size == '2.0x': + self._stage_out_channels = [24, 244, 488, 976, 2048] + else: + raise NotImplementedError + + # building first layer + input_channels = 3 + output_channels = self._stage_out_channels[0] + self.conv1 = nn.Sequential( + nn.Conv2d(input_channels, output_channels, 3, 2, 1, bias=False), + nn.BatchNorm2d(output_channels), + nn.ReLU(inplace=True), + ) + input_channels = output_channels + + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + + stage_names = ['stage{}'.format(i) for i in [2, 3, 4]] + for name, repeats, output_channels in zip( + stage_names, self.stage_repeats, self._stage_out_channels[1:]): + seq = [ShuffleV2Block(input_channels, output_channels, 2)] + for i in range(repeats - 1): + seq.append(ShuffleV2Block(output_channels, output_channels, 1)) + setattr(self, name, nn.Sequential(*seq)) + input_channels = output_channels + output_channels = self._stage_out_channels[-1] + + self._initialize_weights() + + + def _initialize_weights(self, pretrain=True): + print('init weights...') + for name, m in self.named_modules(): + if isinstance(m, nn.Conv2d): + if 'first' in name: + nn.init.normal_(m.weight, 0, 0.01) + else: + nn.init.normal_(m.weight, 0, 1.0 / m.weight.shape[1]) + if m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.BatchNorm2d): + nn.init.constant_(m.weight, 1) + if m.bias is not None: + nn.init.constant_(m.bias, 0.0001) + nn.init.constant_(m.running_mean, 0) + elif isinstance(m, nn.BatchNorm1d): + nn.init.constant_(m.weight, 1) + if m.bias is not None: + nn.init.constant_(m.bias, 0.0001) + nn.init.constant_(m.running_mean, 0) + elif isinstance(m, nn.Linear): + nn.init.normal_(m.weight, 0, 0.01) + if m.bias is not None: + nn.init.constant_(m.bias, 0) + + + def forward(self, x): + x = self.conv1(x) + x = self.maxpool(x) + output = [] + for i in range(2, 5): + stage = getattr(self, 'stage{}'.format(i)) + x = stage(x) + if i in self.out_stages: + output.append(x) + + return tuple(output) + + +def shufflenetv2(model_size='1.0x', pretrained=False, **kwargs): + """Constructs a shufflenetv2 model. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ShuffleNetV2(model_size=model_size) + if pretrained: + print('Loading the pretrained model ...') + url = model_urls['shufflenetv2_{}'.format(model_size)] + print('=> loading pretrained model {}'.format(url)) + model.load_state_dict(model_zoo.load_url(url), strict=False) + + return model + + +if __name__ == "__main__": + model = shufflenetv2(model_size='0.5x', pretrained=True) + print(model) + test_data = torch.rand(5, 3, 320, 320) + c3, c4, c5 = model(test_data) + print(c3.size()) + print(c4.size()) + print(c5.size()) \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/models/backbone/vit.py b/PyTorch/contrib/cv/detection/YoloV2-640/models/backbone/vit.py new file mode 100644 index 0000000000..73eb7383b8 --- /dev/null +++ b/PyTorch/contrib/cv/detection/YoloV2-640/models/backbone/vit.py @@ -0,0 +1,378 @@ +# -------------------------------------------------------- +# Based on BEiT, timm, DINO and DeiT code bases +# https://github.com/microsoft/unilm/tree/master/beit +# https://github.com/rwightman/pytorch-image-models/tree/master/timm +# https://github.com/facebookresearch/deit +# https://github.com/facebookresearch/dino +# --------------------------------------------------------' +import os +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F + +from functools import partial + +from timm.models.layers import drop_path, to_2tuple +from timm.models.registry import register_model +from timm.models.layers import trunc_normal_ as __call_trunc_normal_ +import torch_npu + + +def _cfg(url='', **kwargs): + return { + 'url': url, + 'num_classes': 1000, 'input_size': (3, 224, 224), 'pool_size': None, + 'crop_pct': .9, 'interpolation': 'bicubic', + 'mean': (0.5, 0.5, 0.5), 'std': (0.5, 0.5, 0.5), + **kwargs + } + + +class DropPath(nn.Module): + """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). + """ + def __init__(self, drop_prob=None): + super(DropPath, self).__init__() + self.drop_prob = drop_prob + + def forward(self, x): + return drop_path(x, self.drop_prob, self.training) + + def extra_repr(self) -> str: + return 'p={}'.format(self.drop_prob) + + +class Mlp(nn.Module): + def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.): + super().__init__() + out_features = out_features or in_features + hidden_features = hidden_features or in_features + self.fc1 = nn.Linear(in_features, hidden_features) + self.act = act_layer() + self.fc2 = nn.Linear(hidden_features, out_features) + self.drop = nn.Dropout(drop) + + def forward(self, x): + x = self.fc1(x) + x = self.act(x) + # x = self.drop(x) + # commit this for the orignal BERT implement + x = self.fc2(x) + x = self.drop(x) + return x + + +class Attention(nn.Module): + def __init__( + self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0., + proj_drop=0., attn_head_dim=None): + super().__init__() + self.num_heads = num_heads + head_dim = dim // num_heads + if attn_head_dim is not None: + head_dim = attn_head_dim + all_head_dim = head_dim * self.num_heads + self.scale = qk_scale or head_dim ** -0.5 + + self.qkv = nn.Linear(dim, all_head_dim * 3, bias=False) + if qkv_bias: + self.q_bias = nn.Parameter(torch.zeros(all_head_dim)) + self.v_bias = nn.Parameter(torch.zeros(all_head_dim)) + else: + self.q_bias = None + self.v_bias = None + + self.attn_drop = nn.Dropout(attn_drop) + self.proj = nn.Linear(all_head_dim, dim) + self.proj_drop = nn.Dropout(proj_drop) + + def forward(self, x): + B, N, C = x.shape + qkv_bias = None + if self.q_bias is not None: + qkv_bias = torch.cat((self.q_bias, torch.zeros_like(self.v_bias, requires_grad=False), self.v_bias)) + # qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4) + qkv = F.linear(input=x, weight=self.qkv.weight, bias=qkv_bias) + qkv = qkv.reshape(B, N, 3, self.num_heads, -1).permute(2, 0, 3, 1, 4) + q, k, v = qkv[0], qkv[1], qkv[2] # make torchscript happy (cannot use tensor as tuple) + + q = q * self.scale + attn = (q @ k.transpose(-2, -1)) + + + attn = attn.softmax(dim=-1) + attn = self.attn_drop(attn) + + x = (attn @ v).transpose(1, 2).reshape(B, N, -1) + x = self.proj(x) + x = self.proj_drop(x) + return x + + +class Block(nn.Module): + + def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop=0., attn_drop=0., + drop_path=0., init_values=None, act_layer=nn.GELU, norm_layer=nn.LayerNorm, + attn_head_dim=None): + super().__init__() + self.norm1 = norm_layer(dim) + self.attn = Attention( + dim, num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale, + attn_drop=attn_drop, proj_drop=drop, attn_head_dim=attn_head_dim) + # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here + self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity() + self.norm2 = norm_layer(dim) + mlp_hidden_dim = int(dim * mlp_ratio) + self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop) + + if init_values > 0: + self.gamma_1 = nn.Parameter(init_values * torch.ones((dim)),requires_grad=True) + self.gamma_2 = nn.Parameter(init_values * torch.ones((dim)),requires_grad=True) + else: + self.gamma_1, self.gamma_2 = None, None + + def forward(self, x): + if self.gamma_1 is None: + x = x + self.drop_path(self.attn(self.norm1(x))) + x = x + self.drop_path(self.mlp(self.norm2(x))) + else: + x = x + self.drop_path(self.gamma_1 * self.attn(self.norm1(x))) + x = x + self.drop_path(self.gamma_2 * self.mlp(self.norm2(x))) + return x + + +class PatchEmbed(nn.Module): + """ Image to Patch Embedding + """ + def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768): + super().__init__() + img_size = to_2tuple(img_size) + patch_size = to_2tuple(patch_size) + num_patches = (img_size[1] // patch_size[1]) * (img_size[0] // patch_size[0]) + self.patch_shape = (img_size[0] // patch_size[0], img_size[1] // patch_size[1]) + self.img_size = img_size + self.patch_size = patch_size + self.num_patches = num_patches + + self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size) + + def forward(self, x, **kwargs): + B, C, H, W = x.shape + # FIXME look at relaxing size constraints + assert H == self.img_size[0] and W == self.img_size[1], \ + f"Input image size ({H}*{W}) doesn't match model ({self.img_size[0]}*{self.img_size[1]})." + x = self.proj(x).flatten(2).transpose(1, 2) + return x + +# sin-cos position encoding +# https://github.com/jadore801120/attention-is-all-you-need-pytorch/blob/master/transformer/Models.py#L31 +def get_sinusoid_encoding_table(n_position, d_hid): + ''' Sinusoid position encoding table ''' + # TODO: make it with torch instead of numpy + def get_position_angle_vec(position): + return [position / np.power(10000, 2 * (hid_j // 2) / d_hid) for hid_j in range(d_hid)] + + sinusoid_table = np.array([get_position_angle_vec(pos_i) for pos_i in range(n_position)]) + sinusoid_table[:, 0::2] = np.sin(sinusoid_table[:, 0::2]) # dim 2i + sinusoid_table[:, 1::2] = np.cos(sinusoid_table[:, 1::2]) # dim 2i+1 + + return torch.FloatTensor(sinusoid_table).unsqueeze(0) + + +def trunc_normal_(tensor, mean=0., std=1.): + __call_trunc_normal_(tensor, mean=mean, std=std, a=-std, b=std) + + +__all__ = [ + 'pretrain_mae_base_patch16_224', + 'pretrain_mae_large_patch16_224', +] + + +class PretrainVisionTransformerEncoder(nn.Module): + """ Vision Transformer with support for patch or hybrid CNN input stage + """ + def __init__(self, img_size=224, patch_size=16, in_chans=3, num_classes=0, embed_dim=768, depth=12, + num_heads=12, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop_rate=0., attn_drop_rate=0., + drop_path_rate=0., norm_layer=nn.LayerNorm, init_values=None, + use_learnable_pos_emb=False): + super().__init__() + self.num_classes = num_classes + self.num_features = self.embed_dim = embed_dim # num_features for consistency with other models + + self.patch_embed = PatchEmbed( + img_size=img_size, patch_size=patch_size, in_chans=in_chans, embed_dim=embed_dim) + num_patches = self.patch_embed.num_patches + + # TODO: Add the cls token + # self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim)) + if use_learnable_pos_emb: + self.pos_embed = nn.Parameter(torch.zeros(1, num_patches + 1, embed_dim)) + else: + # sine-cosine positional embeddings + self.pos_embed = get_sinusoid_encoding_table(num_patches, embed_dim) + + dpr = [x.item() for x in torch.linspace(0, drop_path_rate, depth)] # stochastic depth decay rule + self.blocks = nn.ModuleList([ + Block( + dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale, + drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i], norm_layer=norm_layer, + init_values=init_values) + for i in range(depth)]) + self.norm = norm_layer(embed_dim) + self.head = nn.Linear(embed_dim, num_classes) if num_classes > 0 else nn.Identity() + + if use_learnable_pos_emb: + trunc_normal_(self.pos_embed, std=.02) + + # trunc_normal_(self.cls_token, std=.02) + self.apply(self._init_weights) + + + def _init_weights(self, m): + if isinstance(m, nn.Linear): + nn.init.xavier_uniform_(m.weight) + if isinstance(m, nn.Linear) and m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.LayerNorm): + nn.init.constant_(m.bias, 0) + nn.init.constant_(m.weight, 1.0) + + def get_num_layers(self): + return len(self.blocks) + + @torch.jit.ignore + def no_weight_decay(self): + return {'pos_embed', 'cls_token'} + + def get_classifier(self): + return self.head + + def reset_classifier(self, num_classes, global_pool=''): + self.num_classes = num_classes + self.head = nn.Linear(self.embed_dim, num_classes) if num_classes > 0 else nn.Identity() + + def forward_features(self, x): + x = self.patch_embed(x) + + x = x + self.pos_embed.type_as(x).to(x.device).clone().detach() + + B, _, C = x.shape + x = x.reshape(B, -1, C) + + for blk in self.blocks: + x = blk(x) + + x = self.norm(x) + return x + + def forward(self, x): + x = self.forward_features(x) + x = self.head(x) + return x + +class PretrainVisionTransformer(nn.Module): + """ Vision Transformer with support for patch or hybrid CNN input stage + """ + def __init__(self, + img_size=224, + patch_size=16, + encoder_in_chans=3, + encoder_num_classes=0, + encoder_embed_dim=768, + encoder_depth=12, + encoder_num_heads=12, + mlp_ratio=4., + qkv_bias=False, + qk_scale=None, + drop_rate=0., + attn_drop_rate=0., + drop_path_rate=0., + norm_layer=nn.LayerNorm, + init_values=0., + use_learnable_pos_emb=False, + num_classes=0, # avoid the error from create_fn in timm + in_chans=0, # avoid the error from create_fn in timm + ): + super().__init__() + self.encoder = PretrainVisionTransformerEncoder( + img_size=img_size, + patch_size=patch_size, + in_chans=encoder_in_chans, + num_classes=encoder_num_classes, + embed_dim=encoder_embed_dim, + depth=encoder_depth, + num_heads=encoder_num_heads, + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop_rate=drop_rate, + attn_drop_rate=attn_drop_rate, + drop_path_rate=drop_path_rate, + norm_layer=norm_layer, + init_values=init_values, + use_learnable_pos_emb=use_learnable_pos_emb) + + + def _init_weights(self, m): + if isinstance(m, nn.Linear): + nn.init.xavier_uniform_(m.weight) + if isinstance(m, nn.Linear) and m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.LayerNorm): + nn.init.constant_(m.bias, 0) + nn.init.constant_(m.weight, 1.0) + + def get_num_layers(self): + return len(self.blocks) + + @torch.jit.ignore + def no_weight_decay(self): + return {'pos_embed', 'cls_token', 'mask_token'} + + def forward(self, x): + fmap_list = [] + x = self.encoder(x) # [B, N, C] + + fmp_h = self.encoder.patch_embed.img_size[0] // self.encoder.patch_embed.patch_size[0] + fmp_w = self.encoder.patch_embed.img_size[1] // self.encoder.patch_embed.patch_size[1] + # [B, N, C] -> [B, C, N] -> [B, C, H, W] + x = x.permute(0, 2, 1).contiguous().view(x.size(0), x.size(-1), fmp_h, fmp_w) + fmap_list.append(x) + + return fmap_list + + +@register_model +def vit_base_patch16_224(img_size=224, pretrained=False, **kwargs): + model = PretrainVisionTransformer( + img_size=img_size, + patch_size=16, + encoder_embed_dim=768, + encoder_depth=12, + encoder_num_heads=12, + encoder_num_classes=0, + mlp_ratio=4, + qkv_bias=True, + norm_layer=partial(nn.LayerNorm, eps=1e-6), + **kwargs) + model.default_cfg = _cfg() + if pretrained: + try: + print('Loading the pretrained weights ...') + path_to_dir = os.path.dirname(os.path.abspath(__file__)) + checkpoint = torch.load(path_to_dir + '/weights/vit/pretrain_mae_vit_base_mask_0.75_400e.pth', map_location='cpu') + model.load_state_dict(checkpoint['model'], strict=False) + except: + print('The pretrained weight can not be found ...') + pass + return model + +if __name__ == '__main__': + x = torch.ones(2, 3, 224, 224) + model = vit_base_patch16_224(pretrained=True) + outputs = model(x) + for y in outputs: + print(y.size()) + print(y) diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/models/backbone/weights/README.md b/PyTorch/contrib/cv/detection/YoloV2-640/models/backbone/weights/README.md new file mode 100644 index 0000000000..ce687a1c95 --- /dev/null +++ b/PyTorch/contrib/cv/detection/YoloV2-640/models/backbone/weights/README.md @@ -0,0 +1,15 @@ +# darknet19, darknet53, darknet-tiny, darknet-light +darknet-tiny is designed by myself. It is a very simple and lightweight backbone. + +darknet-light is same to the backbone used in official TinyYOLOv3. + +For researchers in China, you can download them from BaiduYunDisk: + +link:https://pan.baidu.com/s/1Rm87Fcj1RXZFmeTUrDWANA + +password:qgzn + + +Also, you can download them from Google Drive: + +link: https://drive.google.com/drive/folders/15saMtvYiz3yfFNu5EnC7GSltEAvTImMB?usp=sharing diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/models/backbone/yolox_backbone.py b/PyTorch/contrib/cv/detection/YoloV2-640/models/backbone/yolox_backbone.py new file mode 100644 index 0000000000..28f9ac719d --- /dev/null +++ b/PyTorch/contrib/cv/detection/YoloV2-640/models/backbone/yolox_backbone.py @@ -0,0 +1,409 @@ +#!/usr/bin/env python +# -*- encoding: utf-8 -*- +# Copyright (c) 2014-2021 Megvii Inc. All rights reserved. +import os +import torch +import torch.nn as nn +import torch_npu + + +class SiLU(nn.Module): + """export-friendly version of nn.SiLU()""" + + @staticmethod + def forward(x): + return x * torch.sigmoid(x) + + +def get_activation(name="silu", inplace=True): + if name == "silu": + module = nn.SiLU(inplace=inplace) + elif name == "relu": + module = nn.ReLU(inplace=inplace) + elif name == "lrelu": + module = nn.LeakyReLU(0.1, inplace=inplace) + else: + raise AttributeError("Unsupported act type: {}".format(name)) + return module + + +class BaseConv(nn.Module): + """A Conv2d -> Batchnorm -> silu/leaky relu block""" + + def __init__( + self, in_channels, out_channels, ksize, stride, groups=1, bias=False, act="silu" + ): + super().__init__() + # same padding + pad = (ksize - 1) // 2 + self.conv = nn.Conv2d( + in_channels, + out_channels, + kernel_size=ksize, + stride=stride, + padding=pad, + groups=groups, + bias=bias, + ) + self.bn = nn.BatchNorm2d(out_channels) + self.act = get_activation(act, inplace=True) + + def forward(self, x): + return self.act(self.bn(self.conv(x))) + + def fuseforward(self, x): + return self.act(self.conv(x)) + + +class DWConv(nn.Module): + """Depthwise Conv + Conv""" + + def __init__(self, in_channels, out_channels, ksize, stride=1, act="silu"): + super().__init__() + self.dconv = BaseConv( + in_channels, + in_channels, + ksize=ksize, + stride=stride, + groups=in_channels, + act=act, + ) + self.pconv = BaseConv( + in_channels, out_channels, ksize=1, stride=1, groups=1, act=act + ) + + def forward(self, x): + x = self.dconv(x) + return self.pconv(x) + + +class Bottleneck(nn.Module): + # Standard bottleneck + def __init__( + self, + in_channels, + out_channels, + shortcut=True, + expansion=0.5, + depthwise=False, + act="silu", + ): + super().__init__() + hidden_channels = int(out_channels * expansion) + Conv = DWConv if depthwise else BaseConv + self.conv1 = BaseConv(in_channels, hidden_channels, 1, stride=1, act=act) + self.conv2 = Conv(hidden_channels, out_channels, 3, stride=1, act=act) + self.use_add = shortcut and in_channels == out_channels + + def forward(self, x): + y = self.conv2(self.conv1(x)) + if self.use_add: + y = y + x + return y + + +class SPPBottleneck(nn.Module): + """Spatial pyramid pooling layer used in YOLOv3-SPP""" + + def __init__( + self, in_channels, out_channels, kernel_sizes=(5, 9, 13), activation="silu" + ): + super().__init__() + hidden_channels = in_channels // 2 + self.conv1 = BaseConv(in_channels, hidden_channels, 1, stride=1, act=activation) + self.m = nn.ModuleList( + [ + nn.MaxPool2d(kernel_size=ks, stride=1, padding=ks // 2) + for ks in kernel_sizes + ] + ) + conv2_channels = hidden_channels * (len(kernel_sizes) + 1) + self.conv2 = BaseConv(conv2_channels, out_channels, 1, stride=1, act=activation) + + def forward(self, x): + x = self.conv1(x) + x = torch.cat([x] + [m(x) for m in self.m], dim=1) + x = self.conv2(x) + return x + + +class CSPLayer(nn.Module): + """C3 in yolov5, CSP Bottleneck with 3 convolutions""" + + def __init__( + self, + in_channels, + out_channels, + n=1, + shortcut=True, + expansion=0.5, + depthwise=False, + act="silu", + ): + """ + Args: + in_channels (int): input channels. + out_channels (int): output channels. + n (int): number of Bottlenecks. Default value: 1. + """ + # ch_in, ch_out, number, shortcut, groups, expansion + super().__init__() + hidden_channels = int(out_channels * expansion) # hidden channels + self.conv1 = BaseConv(in_channels, hidden_channels, 1, stride=1, act=act) + self.conv2 = BaseConv(in_channels, hidden_channels, 1, stride=1, act=act) + self.conv3 = BaseConv(2 * hidden_channels, out_channels, 1, stride=1, act=act) + module_list = [ + Bottleneck( + hidden_channels, hidden_channels, shortcut, 1.0, depthwise, act=act + ) + for _ in range(n) + ] + self.m = nn.Sequential(*module_list) + + def forward(self, x): + x_1 = self.conv1(x) + x_2 = self.conv2(x) + x_1 = self.m(x_1) + x = torch.cat((x_1, x_2), dim=1) + return self.conv3(x) + + +class Focus(nn.Module): + """Focus width and height information into channel space.""" + + def __init__(self, in_channels, out_channels, ksize=1, stride=1, act="silu"): + super().__init__() + self.conv = BaseConv(in_channels * 4, out_channels, ksize, stride, act=act) + + def forward(self, x): + # shape of x (b,c,w,h) -> y(b,4c,w/2,h/2) + patch_top_left = x[..., ::2, ::2] + patch_top_right = x[..., ::2, 1::2] + patch_bot_left = x[..., 1::2, ::2] + patch_bot_right = x[..., 1::2, 1::2] + x = torch.cat( + ( + patch_top_left, + patch_bot_left, + patch_top_right, + patch_bot_right, + ), + dim=1, + ) + return self.conv(x) + + +# CSPDarkNet +class CSPDarknet(nn.Module): + def __init__( + self, + dep_mul, + wid_mul, + out_features=("dark3", "dark4", "dark5"), + depthwise=False, + act="silu", + ): + super().__init__() + assert out_features, "please provide output features of Darknet" + self.out_features = out_features + Conv = DWConv if depthwise else BaseConv + + base_channels = int(wid_mul * 64) # 64 + base_depth = max(round(dep_mul * 3), 1) # 3 + + # stem + self.stem = Focus(3, base_channels, ksize=3, act=act) + + # dark2 + self.dark2 = nn.Sequential( + Conv(base_channels, base_channels * 2, 3, 2, act=act), + CSPLayer( + base_channels * 2, + base_channels * 2, + n=base_depth, + depthwise=depthwise, + act=act, + ), + ) + + # dark3 + self.dark3 = nn.Sequential( + Conv(base_channels * 2, base_channels * 4, 3, 2, act=act), + CSPLayer( + base_channels * 4, + base_channels * 4, + n=base_depth * 3, + depthwise=depthwise, + act=act, + ), + ) + + # dark4 + self.dark4 = nn.Sequential( + Conv(base_channels * 4, base_channels * 8, 3, 2, act=act), + CSPLayer( + base_channels * 8, + base_channels * 8, + n=base_depth * 3, + depthwise=depthwise, + act=act, + ), + ) + + # dark5 + self.dark5 = nn.Sequential( + Conv(base_channels * 8, base_channels * 16, 3, 2, act=act), + SPPBottleneck(base_channels * 16, base_channels * 16, activation=act), + CSPLayer( + base_channels * 16, + base_channels * 16, + n=base_depth, + shortcut=False, + depthwise=depthwise, + act=act, + ), + ) + + + def freeze_stage(self): + # Because the YOLOX-Backbone has been trained on COCO, we freeze all stages to save computation. + print('freeze all stage of YOLOX-Backbone ...') + for m in self.parameters(): + m.requires_grad = False + + + def forward(self, x): + outputs = {} + c1 = self.stem(x) + c2 = self.dark2(c1) + c3 = self.dark3(c2) + c4 = self.dark4(c3) + c5 = self.dark5(c4) + + return c3, c4, c5 + + +def yolox_cspdarknet_s(pretrained=False, freeze=False): + # build backbone + backbone = CSPDarknet(dep_mul=0.33, wid_mul=0.5, depthwise=False, act='silu') + + # load weight + if pretrained: + print('Loading pretrained cspdarknet_s ...') + path_to_dir = os.path.dirname(os.path.abspath(__file__)) + path_to_weight = path_to_dir + '/weights/yolox_backbone/yolox_cspdarknet_s.pth' + checkpoint = torch.load(path_to_weight, map_location='cpu') + backbone.load_state_dict(checkpoint) + + # freeze stage + if freeze: + backbone.freeze_stage() + + return backbone + + +def yolox_cspdarknet_m(pretrained=False, freeze=False): + # build backbone + backbone = CSPDarknet(dep_mul=0.67, wid_mul=0.75, depthwise=False, act='silu') + + # load weight + if pretrained: + print('Loading pretrained cspdarknet_m ...') + path_to_dir = os.path.dirname(os.path.abspath(__file__)) + path_to_weight = path_to_dir + '/weights/yolox_backbone/yolox_cspdarknet_m.pth' + checkpoint = torch.load(path_to_weight, map_location='cpu') + backbone.load_state_dict(checkpoint) + + # freeze stage + if freeze: + backbone.freeze_stage() + + return backbone + + +def yolox_cspdarknet_l(pretrained=False, freeze=False): + # build backbone + backbone = CSPDarknet(dep_mul=1.0, wid_mul=1.0, depthwise=False, act='silu') + + # load weight + if pretrained: + print('Loading pretrained cspdarknet_l ...') + path_to_dir = os.path.dirname(os.path.abspath(__file__)) + path_to_weight = path_to_dir + '/weights/yolox_backbone/yolox_cspdarknet_l.pth' + checkpoint = torch.load(path_to_weight, map_location='cpu') + backbone.load_state_dict(checkpoint) + + # freeze stage + if freeze: + backbone.freeze_stage() + + return backbone + + +def yolox_cspdarknet_x(pretrained=False, freeze=False): + # build backbone + backbone = CSPDarknet(dep_mul=1.33, wid_mul=1.25, depthwise=False, act='silu') + + # load weight + if pretrained: + print('Loading pretrained cspdarknet_x ...') + path_to_dir = os.path.dirname(os.path.abspath(__file__)) + path_to_weight = path_to_dir + '/weights/yolox_backbone/yolox_cspdarknet_x.pth' + checkpoint = torch.load(path_to_weight, map_location='cpu') + backbone.load_state_dict(checkpoint) + + # freeze stage + if freeze: + backbone.freeze_stage() + + return backbone + + +def yolox_cspdarknet_tiny(pretrained=False, freeze=False): + # build backbone + backbone = CSPDarknet(dep_mul=0.33, wid_mul=0.375, depthwise=False, act='silu') + + # load weight + if pretrained: + print('Loading pretrained cspdarknet_tiny ...') + path_to_dir = os.path.dirname(os.path.abspath(__file__)) + path_to_weight = path_to_dir + '/weights/yolox_backbone/yolox_cspdarknet_tiny.pth' + checkpoint = torch.load(path_to_weight, map_location='cpu') + backbone.load_state_dict(checkpoint) + + # freeze stage + if freeze: + backbone.freeze_stage() + + return backbone + + +def yolox_cspdarknet_nano(pretrained=False, freeze=False): + # build backbone + backbone = CSPDarknet(dep_mul=0.33, wid_mul=0.25, depthwise=True, act='silu') + + # load weight + if pretrained: + print('Loading pretrained cspdarknet_nano ...') + path_to_dir = os.path.dirname(os.path.abspath(__file__)) + path_to_weight = path_to_dir + '/weights/yolox_backbone/yolox_cspdarknet_nano.pth' + checkpoint = torch.load(path_to_weight, map_location='cpu') + backbone.load_state_dict(checkpoint) + + # freeze stage + if freeze: + backbone.freeze_stage() + + return backbone + + +if __name__ == '__main__': + import time + net = yolox_cspdarknet_nano(pretrained=True) + x = torch.randn(1, 3, 224, 224) + t0 = time.time() + outputs = net(x) + t1 = time.time() + print('Time: ', t1 - t0) + for y in outputs: + print(y.shape) diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/models/basic/__init__.py b/PyTorch/contrib/cv/detection/YoloV2-640/models/basic/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/models/basic/bottleneck_csp.py b/PyTorch/contrib/cv/detection/YoloV2-640/models/basic/bottleneck_csp.py new file mode 100644 index 0000000000..b246d68e83 --- /dev/null +++ b/PyTorch/contrib/cv/detection/YoloV2-640/models/basic/bottleneck_csp.py @@ -0,0 +1,30 @@ +import torch +import torch.nn as nn +from .conv import Conv +import torch_npu + + +class Bottleneck(nn.Module): + # Standard bottleneck + def __init__(self, c1, c2, shortcut=True, d=1, e=0.5, depthwise=False, act='lrelu'): # ch_in, ch_out, shortcut, groups, expansion + super(Bottleneck, self).__init__() + c_ = int(c2 * e) # hidden channels + self.cv1 = Conv(c1, c_, k=1, act=act) + self.cv2 = Conv(c_, c2, k=3, p=d, d=d, act=act, depthwise=depthwise) + self.add = shortcut and c1 == c2 + + def forward(self, x): + return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x)) + + +class BottleneckCSP(nn.Module): + def __init__(self, c1, c2, n=1, shortcut=True, e=0.5, depthwise=False, act='lrelu'): + super(BottleneckCSP, self).__init__() + c_ = int(c2 * e) # hidden channels + self.cv1 = Conv(c1, c_, k=1, act=act) + self.cv2 = Conv(c1, c_, k=1, act=act) + self.cv3 = Conv(2 * c_, c2, k=1, act=act) + self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, e=1.0, depthwise=depthwise, act=act) for _ in range(n)]) + + def forward(self, x): + return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), dim=1)) diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/models/basic/conv.py b/PyTorch/contrib/cv/detection/YoloV2-640/models/basic/conv.py new file mode 100644 index 0000000000..4811d455f9 --- /dev/null +++ b/PyTorch/contrib/cv/detection/YoloV2-640/models/basic/conv.py @@ -0,0 +1,59 @@ +import torch +import torch.nn as nn +import torch_npu + + +def get_activation(name="lrelu", inplace=True): + if name == "silu": + module = nn.SiLU(inplace=inplace) + elif name == "relu": + module = nn.ReLU(inplace=inplace) + elif name == "lrelu": + module = nn.LeakyReLU(0.1, inplace=inplace) + elif name is None: + module = nn.Identity() + else: + raise AttributeError("Unsupported act type: {}".format(name)) + return module + + +# Basic conv layer +class Conv(nn.Module): + def __init__(self, c1, c2, k=1, p=0, s=1, d=1, g=1, act='lrelu', depthwise=False, bias=False): + super(Conv, self).__init__() + if depthwise: + assert c1 == c2 + self.convs = nn.Sequential( + nn.Conv2d(c1, c2, k, stride=s, padding=p, dilation=d, groups=c1, bias=bias), + nn.BatchNorm2d(c2), + get_activation(name=act), + nn.Conv2d(c2, c2, kernel_size=1, bias=bias), + nn.BatchNorm2d(c2), + get_activation(name=act) + ) + else: + self.convs = nn.Sequential( + nn.Conv2d(c1, c2, k, stride=s, padding=p, dilation=d, groups=g, bias=bias), + nn.BatchNorm2d(c2), + get_activation(name=act) + ) + + def forward(self, x): + return self.convs(x) + + +# ConvBlocks +class ConvBlocks(nn.Module): + def __init__(self, c1, c2, act='lrelu'): # in_channels, inner_channels + super().__init__() + c_ = c2 *2 + self.convs = nn.Sequential( + Conv(c1, c2, k=1, act=act), + Conv(c2, c_, k=3, p=1, act=act), + Conv(c_, c2, k=1, act=act), + Conv(c2, c_, k=3, p=1, act=act), + Conv(c_, c2, k=1, act=act) + ) + + def forward(self, x): + return self.convs(x) diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/models/basic/upsample.py b/PyTorch/contrib/cv/detection/YoloV2-640/models/basic/upsample.py new file mode 100644 index 0000000000..3e78bc8407 --- /dev/null +++ b/PyTorch/contrib/cv/detection/YoloV2-640/models/basic/upsample.py @@ -0,0 +1,20 @@ +import torch +import torch.nn as nn +import torch_npu + + +class UpSample(nn.Module): + def __init__(self, size=None, scale_factor=None, mode='nearest', align_corner=None): + super(UpSample, self).__init__() + self.size = size + self.scale_factor = scale_factor + self.mode = mode + self.align_corner = align_corner + + def forward(self, x): + return torch.nn.functional.interpolate(input=x, + size=self.size, + scale_factor=self.scale_factor, + mode=self.mode, + align_corners=self.align_corner + ) diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/models/head/__init__.py b/PyTorch/contrib/cv/detection/YoloV2-640/models/head/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/models/head/coupled_head.py b/PyTorch/contrib/cv/detection/YoloV2-640/models/head/coupled_head.py new file mode 100644 index 0000000000..777a5721b3 --- /dev/null +++ b/PyTorch/contrib/cv/detection/YoloV2-640/models/head/coupled_head.py @@ -0,0 +1,100 @@ +import torch +import torch.nn as nn + +from ..basic.conv import Conv +import torch_npu + + +class CoupledHead(nn.Module): + def __init__(self, + in_dim=[256, 512, 1024], + stride=[8, 16, 32], + kernel_size=3, + padding=1, + width=1.0, + num_classes=80, + num_anchors=3, + depthwise=False, + act='silu', + init_bias=True, + center_sample=False): + super().__init__() + self.num_classes = num_classes + self.num_anchors = num_anchors + self.width = width + self.stride = stride + self.center_sample = center_sample + + self.head_feat = nn.ModuleList() + self.head_pred = nn.ModuleList() + + for c in in_dim: + head_dim = int(c * width) + self.head_feat.append( + nn.Sequential( + Conv(head_dim, head_dim, k=kernel_size, p=padding, act=act, depthwise=depthwise), + Conv(head_dim, head_dim, k=kernel_size, p=padding, act=act, depthwise=depthwise), + ) + ) + self.head_pred.append( + nn.Conv2d(head_dim, num_anchors * (1 + num_classes + 4), kernel_size=1) + ) + + if init_bias: + # init bias + self.init_bias() + + + def init_bias(self): + # init bias + init_prob = 0.01 + bias_value = -torch.log(torch.tensor((1. - init_prob) / init_prob)) + for head_pred in self.head_pred: + nn.init.constant_(head_pred.bias[..., :self.num_anchors], bias_value) + + + def forward(self, features, grid_cell=None, anchors_wh=None): + """ + features: (List of Tensor) of multiple feature maps + """ + B = features[0].size(0) + obj_preds = [] + cls_preds = [] + box_preds = [] + for i in range(len(features)): + feat = features[i] + head_feat = self.head_feat[i](feat) + head_pred = self.head_pred[i](head_feat) + # obj_pred / cls_pred / reg_pred + obj_pred = head_pred[:, :self.num_anchors, :, :] + cls_pred = head_pred[:, self.num_anchors:self.num_anchors*(1+self.num_classes), :, :] + reg_pred = head_pred[:, self.num_anchors*(1+self.num_classes):, :, :] + + # [B, KA, H, W] -> [B, H, W, KA] -> [B, HW*KA, 1] + obj_preds.append(obj_pred.permute(0, 2, 3, 1).contiguous().view(B, -1, 1)) + # [[B, KA*C, H, W] -> [B, H, W, KA*C] -> [B, H*W*KA, C] + cls_preds.append(cls_pred.permute(0, 2, 3, 1).contiguous().view(B, -1, self.num_classes)) + # [B, KA*4, H, W] -> [B, H, W, KA*4] -> [B, HW, KA, 4] + reg_pred = reg_pred.permute(0, 2, 3, 1).contiguous().view(B, -1, self.num_anchors, 4) + + # decode box + ## txty -> xy + if self.center_sample: + xy_pred = (grid_cell[i] + reg_pred[..., :2].sigmoid() * 2.0 - 1.0) * self.stride[i] + else: + xy_pred = (grid_cell[i] + reg_pred[..., :2].sigmoid()) * self.stride[i] + ## twth -> wh + if anchors_wh is not None: + wh_pred = reg_pred[..., 2:].exp() * anchors_wh[i] + else: + wh_pred = reg_pred[..., 2:].exp() * self.stride[i] + ## xywh -> x1y1x2y2 + x1y1_pred = xy_pred - wh_pred * 0.5 + x2y2_pred = xy_pred + wh_pred * 0.5 + box_preds.append(torch.cat([x1y1_pred, x2y2_pred], dim=-1).view(B, -1, 4)) + + obj_preds = torch.cat(obj_preds, dim=1) # [B, N, 1] + cls_preds = torch.cat(cls_preds, dim=1) # [B, N, C] + box_preds = torch.cat(box_preds, dim=1) # [B, N, 4] + + return obj_preds, cls_preds, box_preds diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/models/head/decoupled_head.py b/PyTorch/contrib/cv/detection/YoloV2-640/models/head/decoupled_head.py new file mode 100644 index 0000000000..0e0ace6a70 --- /dev/null +++ b/PyTorch/contrib/cv/detection/YoloV2-640/models/head/decoupled_head.py @@ -0,0 +1,120 @@ +import torch +import torch.nn as nn + +from ..basic.conv import Conv +import torch_npu + + +class DecoupledHead(nn.Module): + def __init__(self, + in_dim=[256, 512, 1024], + stride=[8, 16, 32], + head_dim=256, + kernel_size=3, + padding=1, + width=1.0, + num_classes=80, + num_anchors=3, + depthwise=False, + act='silu', + init_bias=True, + center_sample=False): + super().__init__() + self.num_classes = num_classes + self.num_anchors = num_anchors + self.head_dim = int(head_dim * width) + self.width = width + self.stride = stride + self.center_sample = center_sample + + self.input_proj = nn.ModuleList() + self.cls_feat = nn.ModuleList() + self.reg_feat = nn.ModuleList() + self.obj_pred = nn.ModuleList() + self.cls_pred = nn.ModuleList() + self.reg_pred = nn.ModuleList() + + for c in in_dim: + self.input_proj.append( + Conv(c, self.head_dim, k=1, act=act) + ) + self.cls_feat.append( + nn.Sequential( + Conv(self.head_dim, self.head_dim, k=kernel_size, p=padding, act=act, depthwise=depthwise), + Conv(self.head_dim, self.head_dim, k=kernel_size, p=padding, act=act, depthwise=depthwise) + ) + ) + self.reg_feat.append( + nn.Sequential( + Conv(self.head_dim, self.head_dim, k=kernel_size, p=padding, act=act, depthwise=depthwise), + Conv(self.head_dim, self.head_dim, k=kernel_size, p=padding, act=act, depthwise=depthwise) + ) + ) + self.obj_pred.append( + nn.Conv2d(self.head_dim, num_anchors * 1, kernel_size=1) + ) + self.cls_pred.append( + nn.Conv2d(self.head_dim, num_anchors * num_classes, kernel_size=1) + ) + self.reg_pred.append( + nn.Conv2d(self.head_dim, num_anchors * 4, kernel_size=1) + ) + + if init_bias: + # init bias + self.init_bias() + + + def init_bias(self): + # init bias + init_prob = 0.01 + bias_value = -torch.log(torch.tensor((1. - init_prob) / init_prob)) + for obj_pred in self.obj_pred: + nn.init.constant_(obj_pred.bias, bias_value) + + + def forward(self, features, grid_cell=None, anchors_wh=None): + """ + features: (List of Tensor) of multiple feature maps + """ + B = features[0].size(0) + obj_preds = [] + cls_preds = [] + box_preds = [] + for i in range(len(features)): + feat = features[i] + feat = self.input_proj[i](feat) + cls_feat = self.cls_feat[i](feat) + reg_feat = self.reg_feat[i](feat) + obj_pred = self.obj_pred[i](reg_feat) + cls_pred = self.cls_pred[i](cls_feat) + reg_pred = self.reg_pred[i](reg_feat) + + # [B, KA, H, W] -> [B, H, W, KA] -> [B, HW*KA, 1] + obj_preds.append(obj_pred.permute(0, 2, 3, 1).contiguous().view(B, -1, 1)) + # [[B, KA*C, H, W] -> [B, H, W, KA*C] -> [B, H*W*KA, C] + cls_preds.append(cls_pred.permute(0, 2, 3, 1).contiguous().view(B, -1, self.num_classes)) + # [B, KA*4, H, W] -> [B, H, W, KA*4] -> [B, HW, KA, 4] + reg_pred = reg_pred.permute(0, 2, 3, 1).contiguous().view(B, -1, self.num_anchors, 4) + + # decode box + ## txty -> xy + if self.center_sample: + xy_pred = (grid_cell[i] + reg_pred[..., :2].sigmoid() * 2.0 - 1.0) * self.stride[i] + else: + xy_pred = (grid_cell[i] + reg_pred[..., :2].sigmoid()) * self.stride[i] + ## twth -> wh + if anchors_wh is not None: + wh_pred = reg_pred[..., 2:].exp() * anchors_wh[i] + else: + wh_pred = reg_pred[..., 2:].exp() * self.stride[i] + ## xywh -> x1y1x2y2 + x1y1_pred = xy_pred - wh_pred * 0.5 + x2y2_pred = xy_pred + wh_pred * 0.5 + box_preds.append(torch.cat([x1y1_pred, x2y2_pred], dim=-1).view(B, -1, 4)) + + obj_preds = torch.cat(obj_preds, dim=1) # [B, N, 1] + cls_preds = torch.cat(cls_preds, dim=1) # [B, N, C] + box_preds = torch.cat(box_preds, dim=1) # [B, N, 4] + + return obj_preds, cls_preds, box_preds diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/models/neck/__init__.py b/PyTorch/contrib/cv/detection/YoloV2-640/models/neck/__init__.py new file mode 100644 index 0000000000..39c4c1a379 --- /dev/null +++ b/PyTorch/contrib/cv/detection/YoloV2-640/models/neck/__init__.py @@ -0,0 +1,23 @@ +from .spp import SPPBlock, SPPBlockCSP, SPPBlockDW +from .dilated_encoder import DilatedEncoder +from ..basic.conv import ConvBlocks + + +def build_neck(model, in_ch, out_ch, act='lrelu'): + if model == 'conv_blocks': + print("Neck: ConvBlocks") + neck = ConvBlocks(c1=in_ch, c2=out_ch, act=act) + elif model == 'spp': + print("Neck: SPP") + neck = SPPBlock(c1=in_ch, c2=out_ch, act=act) + elif model == 'spp-csp': + print("Neck: SPP-CSP") + neck = SPPBlockCSP(c1=in_ch, c2=out_ch, act=act) + elif model == 'spp-dw': + print("Neck: SPP-DW") + neck = SPPBlockDW(c1=in_ch, c2=out_ch, act=act) + elif model == 'dilated_encoder': + print("Neck: Dilated Encoder") + neck = DilatedEncoder(c1=in_ch, c2=out_ch, act=act) + + return neck diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/models/neck/dilated_encoder.py b/PyTorch/contrib/cv/detection/YoloV2-640/models/neck/dilated_encoder.py new file mode 100644 index 0000000000..e544a997d2 --- /dev/null +++ b/PyTorch/contrib/cv/detection/YoloV2-640/models/neck/dilated_encoder.py @@ -0,0 +1,39 @@ +import torch +import torch.nn as nn +from ..basic.conv import Conv +import torch_npu + + +# Dilated Encoder +class DilatedBottleneck(nn.Module): + def __init__(self, c, d=1, e=0.5, act='lrelu'): + super(DilatedBottleneck, self).__init__() + c_ = int(c * e) + self.branch = nn.Sequential( + Conv(c, c_, k=1, act=act), + Conv(c_, c_, k=3, p=d, d=d, act=act), + Conv(c_, c, k=1, act=act) + ) + + def forward(self, x): + return x + self.branch(x) + + +class DilatedEncoder(nn.Module): + """ DilateEncoder """ + def __init__(self, c1, c2, act='lrelu', dilation_list=[2, 4, 6, 8]): + super(DilatedEncoder, self).__init__() + self.projector = nn.Sequential( + Conv(c1, c2, k=1, act=None), + Conv(c2, c2, k=3, p=1, act=None) + ) + encoders = [] + for d in dilation_list: + encoders.append(DilatedBottleneck(c=c2, d=d, act=act)) + self.encoders = nn.Sequential(*encoders) + + def forward(self, x): + x = self.projector(x) + x = self.encoders(x) + + return x diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/models/neck/fpn.py b/PyTorch/contrib/cv/detection/YoloV2-640/models/neck/fpn.py new file mode 100644 index 0000000000..437d6c8980 --- /dev/null +++ b/PyTorch/contrib/cv/detection/YoloV2-640/models/neck/fpn.py @@ -0,0 +1,120 @@ +import torch +import torch.nn as nn +from ..basic.conv import Conv, ConvBlocks +from ..basic.upsample import UpSample +from ..basic.bottleneck_csp import BottleneckCSP +import torch_npu + + +# YoloFPN +class YoloFPN(nn.Module): + def __init__(self, in_dim=[512, 1024, 2048]): + super(YoloFPN, self).__init__() + c3, c4, c5 = in_dim + # head + # P3/8-small + self.head_convblock_0 = ConvBlocks(c5, c5//2) + self.head_conv_0 = Conv(c5//2, c4//2, k=1) + self.head_upsample_0 = UpSample(scale_factor=2) + self.head_conv_1 = Conv(c5//2, c5, k=3, p=1) + + # P4/16-medium + self.head_convblock_1 = ConvBlocks(c4 + c4//2, c4//2) + self.head_conv_2 = Conv(c4//2, c3//2, k=1) + self.head_upsample_1 = UpSample(scale_factor=2) + self.head_conv_3 = Conv(c4//2, c4, k=3, p=1) + + # P8/32-large + self.head_convblock_2 = ConvBlocks(c3 + c3//2, c3//2) + self.head_conv_4 = Conv(c3//2, c3, k=3, p=1) + + + def forward(self, features): + c3, c4, c5 = features + + # p5/32 + p5 = self.head_convblock_0(c5) + p5_up = self.head_upsample_0(self.head_conv_0(p5)) + p5 = self.head_conv_1(p5) + + # p4/16 + p4 = self.head_convblock_1(torch.cat([c4, p5_up], dim=1)) + p4_up = self.head_upsample_1(self.head_conv_2(p4)) + p4 = self.head_conv_3(p4) + + # P3/8 + p3 = self.head_convblock_2(torch.cat([c3, p4_up], dim=1)) + p3 = self.head_conv_4(p3) + + return [p3, p4, p5] + + +# YoloPaFPN +class YoloPaFPN(nn.Module): + def __init__(self, + in_dim=[256, 512, 1024], + depth=1.0, + depthwise=False, + act='silu'): + super(YoloPaFPN, self).__init__() + c3, c4, c5 = in_dim + nblocks = int(3 * depth) + self.head_conv_0 = Conv(c5, c5//2, k=1, act=act) # 10 + self.head_upsample_0 = UpSample(scale_factor=2) + self.head_csp_0 = BottleneckCSP(c4 + c5//2, c4, n=nblocks, shortcut=False, depthwise=depthwise, act=act) + + # P3/8-small + self.head_conv_1 = Conv(c4, c4//2, k=1, act=act) # 14 + self.head_upsample_1 = UpSample(scale_factor=2) + self.head_csp_1 = BottleneckCSP(c3 + c4//2, c3, n=nblocks, shortcut=False, depthwise=depthwise, act=act) + + # P4/16-medium + self.head_conv_2 = Conv(c3, c3, k=3, p=1, s=2, depthwise=depthwise, act=act) + self.head_csp_2 = BottleneckCSP(c3 + c4//2, c4, n=nblocks, shortcut=False, depthwise=depthwise, act=act) + + # P8/32-large + self.head_conv_3 = Conv(c4, c4, k=3, p=1, s=2, depthwise=depthwise, act=act) + self.head_csp_3 = BottleneckCSP(c4 + c5//2, c5, n=nblocks, shortcut=False, depthwise=depthwise) + + + def forward(self, features): + c3, c4, c5 = features + + c6 = self.head_conv_0(c5) + c7 = self.head_upsample_0(c6) # s32->s16 + c8 = torch.cat([c7, c4], dim=1) + c9 = self.head_csp_0(c8) + # P3/8 + c10 = self.head_conv_1(c9) + c11 = self.head_upsample_1(c10) # s16->s8 + c12 = torch.cat([c11, c3], dim=1) + c13 = self.head_csp_1(c12) # to det + # p4/16 + c14 = self.head_conv_2(c13) + c15 = torch.cat([c14, c10], dim=1) + c16 = self.head_csp_2(c15) # to det + # p5/32 + c17 = self.head_conv_3(c16) + c18 = torch.cat([c17, c6], dim=1) + c19 = self.head_csp_3(c18) # to det + + return [c13, c16, c19] # [P3, P4, P5] + + +# build Head +def build_fpn(model_name='yolofpn', + in_dim=[256, 512, 1024], + depth=1.0, + depthwise=False, + act='silu'): + if model_name == 'yolofpn': + print("Head: YoloFPN ...") + return YoloFPN(in_dim) + + elif model_name == 'yolopafpn': + print('Head: YoloPaFPN ...') + return YoloPaFPN(in_dim, depth, depthwise, act) + + else: + print("Unknown FPN version ...") + exit() diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/models/neck/spp.py b/PyTorch/contrib/cv/detection/YoloV2-640/models/neck/spp.py new file mode 100644 index 0000000000..652124089c --- /dev/null +++ b/PyTorch/contrib/cv/detection/YoloV2-640/models/neck/spp.py @@ -0,0 +1,95 @@ +import torch +import torch.nn as nn + +from ..basic.conv import Conv +import torch_npu + + +# Spatial Pyramid Pooling +class SPP(nn.Module): + """ + Spatial Pyramid Pooling + """ + def __init__(self, c1, c2, e=0.5, kernel_sizes=[5, 9, 13], act='lrelu'): + super(SPP, self).__init__() + c_ = int(c1 * e) + self.cv1 = Conv(c1, c_, k=1, act=act) + self.m = nn.ModuleList( + [ + nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2) + for k in kernel_sizes + ] + ) + + self.cv2 = Conv(c_*(len(kernel_sizes) + 1), c2, k=1, act=act) + + def forward(self, x): + x = self.cv1(x) + x = torch.cat([x] + [m(x) for m in self.m], dim=1) + x = self.cv2(x) + + return x + + +class SPPBlock(nn.Module): + """ + Spatial Pyramid Pooling Block + """ + def __init__(self, c1, c2, e=0.5, kernel_sizes=[5, 9, 13], act='lrelu'): + super(SPPBlock, self).__init__() + self.m = nn.Sequential( + Conv(c1, c1//2, k=1, act=act), + Conv(c1//2, c1, k=3, p=1, act=act), + SPP(c1, c1//2, e=e, kernel_sizes=kernel_sizes, act=act), + Conv(c1//2, c1, k=3, p=1, act=act), + Conv(c1, c2, k=1, act=act) + ) + + + def forward(self, x): + x = self.m(x) + + return x + + +class SPPBlockCSP(nn.Module): + """ + CSP Spatial Pyramid Pooling Block + """ + def __init__(self, c1, c2, e=0.5, kernel_sizes=[5, 9, 13], act='lrelu'): + super(SPPBlockCSP, self).__init__() + self.cv1 = Conv(c1, c1//2, k=1, act=act) + self.cv2 = Conv(c1, c1//2, k=1, act=act) + self.m = nn.Sequential( + Conv(c1//2, c1//2, k=3, p=1, act=act), + SPP(c1//2, c1//2, e=e, kernel_sizes=kernel_sizes, act=act), + Conv(c1//2, c1//2, k=3, p=1, act=act) + ) + self.cv3 = Conv(c1, c2, k=1, act=act) + + + def forward(self, x): + x1 = self.cv1(x) + x2 = self.cv2(x) + x3 = self.m(x2) + y = self.cv3(torch.cat([x1, x3], dim=1)) + + return y + + +class SPPBlockDW(nn.Module): + """ + Depth-wise Spatial Pyramid Pooling Block + """ + def __init__(self, c1, c2, e=0.5, kernel_sizes=[5, 9, 13], act='lrelu'): + super(SPPBlockDW, self).__init__() + self.m = nn.Sequential( + Conv(c1, c1//2, k=1, act=act), + Conv(c1//2, c1//2, k=3, p=1, g=c1//2, act=act), + SPP(c1//2, c1//2, e=e, kernel_sizes=kernel_sizes, act=act), + Conv(c1//2, c1//2, k=3, p=1, g=c1//2, act=act), + Conv(c1//2, c2, k=1, act=act) + ) + + def forward(self, x): + return self.m(x) diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/models/yolo/__init__.py b/PyTorch/contrib/cv/detection/YoloV2-640/models/yolo/__init__.py new file mode 100644 index 0000000000..6a154046dc --- /dev/null +++ b/PyTorch/contrib/cv/detection/YoloV2-640/models/yolo/__init__.py @@ -0,0 +1,92 @@ +from .yolov1 import YOLOv1 +from .yolov2 import YOLOv2 +from .yolov3 import YOLOv3 +from .yolov4 import YOLOv4 +from .yolo_tiny import YOLOTiny +from .yolo_nano import YOLONano + + +# build YOLO detector +def build_model(args, cfg, device, num_classes=80, trainable=False): + + if args.model == 'yolov1': + print('Build YOLOv1 ...') + model = YOLOv1(cfg=cfg, + device=device, + img_size=args.img_size, + num_classes=num_classes, + trainable=trainable, + conf_thresh=args.conf_thresh, + nms_thresh=args.nms_thresh, + center_sample=args.center_sample) + elif args.model == 'yolov2': + print('Build YOLOv2 ...') + model = YOLOv2(cfg=cfg, + device=device, + img_size=args.img_size, + num_classes=num_classes, + trainable=trainable, + conf_thresh=args.conf_thresh, + nms_thresh=args.nms_thresh, + center_sample=args.center_sample) + elif args.model == 'yolov3': + print('Build YOLOv3 ...') + model = YOLOv3(cfg=cfg, + device=device, + img_size=args.img_size, + num_classes=num_classes, + trainable=trainable, + conf_thresh=args.conf_thresh, + nms_thresh=args.nms_thresh, + center_sample=args.center_sample) + elif args.model == 'yolov3_spp': + print('Build YOLOv3 with SPP ...') + model = YOLOv3(cfg=cfg, + device=device, + img_size=args.img_size, + num_classes=num_classes, + trainable=trainable, + conf_thresh=args.conf_thresh, + nms_thresh=args.nms_thresh, + center_sample=args.center_sample) + elif args.model == 'yolov3_de': + print('Build YOLOv3 with DilatedEncoder ...') + model = YOLOv3(cfg=cfg, + device=device, + img_size=args.img_size, + num_classes=num_classes, + trainable=trainable, + conf_thresh=args.conf_thresh, + nms_thresh=args.nms_thresh, + center_sample=args.center_sample) + elif args.model == 'yolov4': + print('Build YOLOv4 ...') + model = YOLOv4(cfg=cfg, + device=device, + img_size=args.img_size, + num_classes=num_classes, + trainable=trainable, + conf_thresh=args.conf_thresh, + nms_thresh=args.nms_thresh, + center_sample=args.center_sample) + elif args.model == 'yolo_tiny': + print('Build YOLO-Tiny ...') + model = YOLOTiny(cfg=cfg, + device=device, + img_size=args.img_size, + num_classes=num_classes, + trainable=trainable, + conf_thresh=args.conf_thresh, + nms_thresh=args.nms_thresh, + center_sample=args.center_sample) + elif args.model == 'yolo_nano': + print('Build YOLO-Nano ...') + model = YOLONano(cfg=cfg, + device=device, + img_size=args.img_size, + num_classes=num_classes, + trainable=trainable, + conf_thresh=args.conf_thresh, + nms_thresh=args.nms_thresh, + center_sample=args.center_sample) + return model diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/models/yolo/yolo_nano.py b/PyTorch/contrib/cv/detection/YoloV2-640/models/yolo/yolo_nano.py new file mode 100644 index 0000000000..fbe8fb99ab --- /dev/null +++ b/PyTorch/contrib/cv/detection/YoloV2-640/models/yolo/yolo_nano.py @@ -0,0 +1,340 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +import numpy as np + +from ..backbone import build_backbone +from ..neck.spp import SPP +from ..basic.conv import Conv +from utils import box_ops +import torch_npu + + +class YOLONano(nn.Module): + def __init__(self, + cfg=None, + device=None, + img_size=640, + num_classes=80, + trainable=False, + conf_thresh=0.001, + nms_thresh=0.60, + center_sample=False): + super(YOLONano, self).__init__() + self.cfg = cfg + self.device = device + self.img_size = img_size + self.num_classes = num_classes + self.trainable = trainable + self.conf_thresh = conf_thresh + self.nms_thresh = nms_thresh + self.center_sample = center_sample + + # backbone + self.backbone, feature_channels, strides = build_backbone(model_name=cfg["backbone"], + pretrained=trainable) + self.stride = strides + anchor_size = cfg["anchor_size"] + self.anchor_size = torch.tensor(anchor_size).reshape(len(self.stride), len(anchor_size) // 3, 2).float() + self.num_anchors = self.anchor_size.size(1) + c3, c4, c5 = feature_channels + + # build grid cell + self.grid_cell, self.anchors_wh = self.create_grid(img_size) + + # neck + self.neck = SPP(c5, c5) + + # FPN+PAN + self.conv1x1_0 = Conv(c3, 96, k=1) + self.conv1x1_1 = Conv(c4, 96, k=1) + self.conv1x1_2 = Conv(c5, 96, k=1) + + self.smooth_0 = Conv(96, 96, k=3, p=1) + self.smooth_1 = Conv(96, 96, k=3, p=1) + self.smooth_2 = Conv(96, 96, k=3, p=1) + self.smooth_3 = Conv(96, 96, k=3, p=1) + + # det head + self.head_conv_1 = nn.Sequential( + Conv(96, 96, k=3, p=1, g=96), + Conv(96, 96, k=1), + Conv(96, 96, k=3, p=1, g=96), + Conv(96, 96, k=1) + ) + self.head_conv_2 = nn.Sequential( + Conv(96, 96, k=3, p=1, g=96), + Conv(96, 96, k=1), + Conv(96, 96, k=3, p=1, g=96), + Conv(96, 96, k=1) + ) + self.head_conv_3 = nn.Sequential( + Conv(96, 96, k=3, p=1, g=96), + Conv(96, 96, k=1), + Conv(96, 96, k=3, p=1, g=96), + Conv(96, 96, k=1) + ) + + # det conv + self.head_det_1 = nn.Conv2d(96, self.num_anchors * (1 + self.num_classes + 4), 1) + self.head_det_2 = nn.Conv2d(96, self.num_anchors * (1 + self.num_classes + 4), 1) + self.head_det_3 = nn.Conv2d(96, self.num_anchors * (1 + self.num_classes + 4), 1) + + if self.trainable: + # init bias + self.init_bias() + + + def init_bias(self): + # init bias + init_prob = 0.01 + bias_value = -torch.log(torch.tensor((1. - init_prob) / init_prob)) + nn.init.constant_(self.head_det_1.bias[..., :self.num_anchors], bias_value) + nn.init.constant_(self.head_det_2.bias[..., :self.num_anchors], bias_value) + nn.init.constant_(self.head_det_3.bias[..., :self.num_anchors], bias_value) + + + def create_grid(self, img_size): + total_grid_xy = [] + total_anchor_wh = [] + w, h = img_size, img_size + for ind, s in enumerate(self.stride): + # generate grid cells + fmp_w, fmp_h = w // s, h // s + grid_y, grid_x = torch.meshgrid([torch.arange(fmp_h), torch.arange(fmp_w)]) + # [H, W, 2] -> [HW, 2] + grid_xy = torch.stack([grid_x, grid_y], dim=-1).float().view(-1, 2) + # [HW, 2] -> [1, HW, 1, 2] + grid_xy = grid_xy[None, :, None, :].to(self.device) + # [1, HW, 1, 2] + anchor_wh = self.anchor_size[ind].repeat(fmp_h*fmp_w, 1, 1).unsqueeze(0).to(self.device) + + total_grid_xy.append(grid_xy) + total_anchor_wh.append(anchor_wh) + + return total_grid_xy, total_anchor_wh + + + def set_grid(self, img_size): + self.img_size = img_size + self.grid_cell, self.anchors_wh = self.create_grid(img_size) + + + def nms(self, dets, scores): + """"Pure Python NMS YOLOv4.""" + x1 = dets[:, 0] #xmin + y1 = dets[:, 1] #ymin + x2 = dets[:, 2] #xmax + y2 = dets[:, 3] #ymax + + areas = (x2 - x1) * (y2 - y1) # the size of bbox + order = scores.argsort()[::-1] # sort bounding boxes by decreasing order + + keep = [] # store the final bounding boxes + while order.size > 0: + i = order[0] #the index of the bbox with highest confidence + keep.append(i) #save it to keep + # compute iou + xx1 = np.maximum(x1[i], x1[order[1:]]) + yy1 = np.maximum(y1[i], y1[order[1:]]) + xx2 = np.minimum(x2[i], x2[order[1:]]) + yy2 = np.minimum(y2[i], y2[order[1:]]) + + w = np.maximum(1e-28, xx2 - xx1) + h = np.maximum(1e-28, yy2 - yy1) + inter = w * h + + ovr = inter / (areas[i] + areas[order[1:]] - inter + 1e-14) + #reserve all the boundingbox whose ovr less than thresh + inds = np.where(ovr <= self.nms_thresh)[0] + order = order[inds + 1] + + return keep + + + def postprocess(self, bboxes, scores): + """ + bboxes: (N, 4), bsize = 1 + scores: (N, C), bsize = 1 + """ + + cls_inds = np.argmax(scores, axis=1) + scores = scores[(np.arange(scores.shape[0]), cls_inds)] + + # threshold + keep = np.where(scores >= self.conf_thresh) + bboxes = bboxes[keep] + scores = scores[keep] + cls_inds = cls_inds[keep] + + # NMS + keep = np.zeros(len(bboxes), dtype=np.int) + for i in range(self.num_classes): + inds = np.where(cls_inds == i)[0] + if len(inds) == 0: + continue + c_bboxes = bboxes[inds] + c_scores = scores[inds] + c_keep = self.nms(c_bboxes, c_scores) + keep[inds[c_keep]] = 1 + + keep = np.where(keep > 0) + bboxes = bboxes[keep] + scores = scores[keep] + cls_inds = cls_inds[keep] + + return bboxes, scores, cls_inds + + + @torch.no_grad() + def inference_single_image(self, x): + KA = self.num_anchors + C = self.num_classes + # backbone + c3, c4, c5 = self.backbone(x) + + # neck + c5 = self.neck(c5) + + # head + p3 = self.conv1x1_0(c3) + p4 = self.conv1x1_1(c4) + p5 = self.conv1x1_2(c5) + + # top-down + p4 = self.smooth_0(p4 + F.interpolate(p5, scale_factor=2.0)) + p3 = self.smooth_1(p3 + F.interpolate(p4, scale_factor=2.0)) + + # bottom-up + p4 = self.smooth_2(p4 + F.interpolate(p3, scale_factor=0.5)) + p5 = self.smooth_3(p5 + F.interpolate(p4, scale_factor=0.5)) + + # det head + pred_s = self.head_det_1(self.head_conv_1(p3))[0] + pred_m = self.head_det_2(self.head_conv_2(p4))[0] + pred_l = self.head_det_3(self.head_conv_3(p5))[0] + + preds = [pred_s, pred_m, pred_l] + obj_pred_list = [] + cls_pred_list = [] + box_pred_list = [] + + for i, pred in enumerate(preds): + # [KA*(1 + C + 4), H, W] -> [KA*1, H, W] -> [H, W, KA*1] -> [HW*KA, 1] + obj_pred_i = pred[:KA, :, :].permute(1, 2, 0).contiguous().view(-1, 1) + # [KA*(1 + C + 4), H, W] -> [KA*C, H, W] -> [H, W, KA*C] -> [HW*KA, C] + cls_pred_i = pred[KA:KA*(1+C), :, :].permute(1, 2, 0).contiguous().view(-1, C) + # [KA*(1 + C + 4), H, W] -> [KA*4, H, W] -> [H, W, KA*4] -> [HW, KA, 4] + reg_pred_i = pred[KA*(1+C):, :, :].permute(1, 2, 0).contiguous().view(-1, KA, 4) + # txty -> xy + if self.center_sample: + xy_pred_i = (reg_pred_i[None, ..., :2].sigmoid() * 2.0 - 1.0 + self.grid_cell[i]) * self.stride[i] + else: + xy_pred_i = (reg_pred_i[None, ..., :2].sigmoid() + self.grid_cell[i]) * self.stride[i] + # twth -> wh + wh_pred_i = reg_pred_i[None, ..., 2:].exp() * self.anchors_wh[i] + # xywh -> x1y1x2y2 + x1y1_pred_i = xy_pred_i - wh_pred_i * 0.5 + x2y2_pred_i = xy_pred_i + wh_pred_i * 0.5 + box_pred_i = torch.cat([x1y1_pred_i, x2y2_pred_i], dim=-1)[0].view(-1, 4) + + obj_pred_list.append(obj_pred_i) + cls_pred_list.append(cls_pred_i) + box_pred_list.append(box_pred_i) + + obj_pred = torch.cat(obj_pred_list, dim=0) + cls_pred = torch.cat(cls_pred_list, dim=0) + box_pred = torch.cat(box_pred_list, dim=0) + + # normalize bbox + bboxes = torch.clamp(box_pred / self.img_size, 0., 1.) + + # scores + scores = torch.sigmoid(obj_pred) * torch.softmax(cls_pred, dim=-1) + + # to cpu + scores = scores.to('cpu').numpy() + bboxes = bboxes.to('cpu').numpy() + + # post-process + bboxes, scores, cls_inds = self.postprocess(bboxes, scores) + + return bboxes, scores, cls_inds + + + def forward(self, x, targets=None): + if not self.trainable: + return self.inference_single_image(x) + else: + B = x.size(0) + KA = self.num_anchors + C = self.num_classes + # backbone + c3, c4, c5 = self.backbone(x) + + # neck + c5 = self.neck(c5) + + p3 = self.conv1x1_0(c3) + p4 = self.conv1x1_1(c4) + p5 = self.conv1x1_2(c5) + + # top-down + p4 = self.smooth_0(p4 + F.interpolate(p5, scale_factor=2.0)) + p3 = self.smooth_1(p3 + F.interpolate(p4, scale_factor=2.0)) + + # bottom-up + p4 = self.smooth_2(p4 + F.interpolate(p3, scale_factor=0.5)) + p5 = self.smooth_3(p5 + F.interpolate(p4, scale_factor=0.5)) + + # det head + pred_s = self.head_det_1(self.head_conv_1(p3)) + pred_m = self.head_det_2(self.head_conv_2(p4)) + pred_l = self.head_det_3(self.head_conv_3(p5)) + + preds = [pred_s, pred_m, pred_l] + obj_pred_list = [] + cls_pred_list = [] + box_pred_list = [] + + for i, pred in enumerate(preds): + # [B, KA*(1 + C + 4), H, W] -> [B, KA, H, W] -> [B, H, W, KA] -> [B, HW*KA, 1] + obj_pred_i = pred[:, :KA, :, :].permute(0, 2, 3, 1).contiguous().view(B, -1, 1) + # [B, KA*(1 + C + 4), H, W] -> [B, KA*C, H, W] -> [B, H, W, KA*C] -> [B, H*W*KA, C] + cls_pred_i = pred[:, KA:KA*(1+C), :, :].permute(0, 2, 3, 1).contiguous().view(B, -1, C) + # [B, KA*(1 + C + 4), H, W] -> [B, KA*4, H, W] -> [B, H, W, KA*4] -> [B, HW, KA, 4] + reg_pred_i = pred[:, KA*(1+C):, :, :].permute(0, 2, 3, 1).contiguous().view(B, -1, KA, 4) + # txty -> xy + if self.center_sample: + xy_pred_i = (reg_pred_i[..., :2].sigmoid() * 2.0 - 1.0 + self.grid_cell[i]) * self.stride[i] + else: + xy_pred_i = (reg_pred_i[..., :2].sigmoid() + self.grid_cell[i]) * self.stride[i] + # twth -> wh + wh_pred_i = reg_pred_i[..., 2:].exp() * self.anchors_wh[i] + # xywh -> x1y1x2y2 + x1y1_pred_i = xy_pred_i - wh_pred_i * 0.5 + x2y2_pred_i = xy_pred_i + wh_pred_i * 0.5 + box_pred_i = torch.cat([x1y1_pred_i, x2y2_pred_i], dim=-1).view(B, -1, 4) + + obj_pred_list.append(obj_pred_i) + cls_pred_list.append(cls_pred_i) + box_pred_list.append(box_pred_i) + + obj_pred = torch.cat(obj_pred_list, dim=1) + cls_pred = torch.cat(cls_pred_list, dim=1) + box_pred = torch.cat(box_pred_list, dim=1) + + # normalize bbox + box_pred = box_pred / self.img_size + + # compute giou between prediction bbox and target bbox + x1y1x2y2_pred = box_pred.view(-1, 4) + x1y1x2y2_gt = targets[..., 2:6].view(-1, 4) + + # giou: [B, HW,] + giou_pred = box_ops.giou_score(x1y1x2y2_pred, x1y1x2y2_gt, batch_size=B) + + # we set giou as the target of the objectness + targets = torch.cat([0.5 * (giou_pred[..., None].clone().detach() + 1.0), targets], dim=-1) + + return obj_pred, cls_pred, giou_pred, targets diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/models/yolo/yolo_tiny.py b/PyTorch/contrib/cv/detection/YoloV2-640/models/yolo/yolo_tiny.py new file mode 100644 index 0000000000..42bec87756 --- /dev/null +++ b/PyTorch/contrib/cv/detection/YoloV2-640/models/yolo/yolo_tiny.py @@ -0,0 +1,335 @@ +import numpy as np +import torch +import torch.nn as nn + +from utils import box_ops + +from ..backbone import build_backbone +from ..neck import build_neck +from ..basic.conv import Conv +from ..basic.upsample import UpSample +from ..basic.bottleneck_csp import BottleneckCSP +import torch_npu + + +class YOLOTiny(nn.Module): + def __init__(self, + cfg=None, + device=None, + img_size=640, + num_classes=80, + trainable=False, + conf_thresh=0.001, + nms_thresh=0.60, + center_sample=False): + super(YOLOTiny, self).__init__() + self.cfg = cfg + self.device = device + self.img_size = img_size + self.num_classes = num_classes + self.trainable = trainable + self.conf_thresh = conf_thresh + self.nms_thresh = nms_thresh + self.center_sample = center_sample + + # backbone + self.backbone, feature_channels, strides = build_backbone(model_name=cfg['backbone'], pretrained=trainable) + self.stride = strides + anchor_size = cfg["anchor_size"] + self.anchor_size = torch.tensor(anchor_size).reshape(len(self.stride), len(anchor_size) // 3, 2).float() + self.num_anchors = self.anchor_size.size(1) + c3, c4, c5 = feature_channels + + # build grid cell + self.grid_cell, self.anchors_wh = self.create_grid(img_size) + + # head + self.head_conv_0 = build_neck(model=cfg["neck"], in_ch=c5, out_ch=c5//2) # 10 + self.head_upsample_0 = UpSample(scale_factor=2) + self.head_csp_0 = BottleneckCSP(c4 + c5//2, c4, n=1, shortcut=False) + + # P3/8-small + self.head_conv_1 = Conv(c4, c4//2, k=1) # 14 + self.head_upsample_1 = UpSample(scale_factor=2) + self.head_csp_1 = BottleneckCSP(c3 + c4//2, c3, n=1, shortcut=False) + + # P4/16-medium + self.head_conv_2 = Conv(c3, c3, k=3, p=1, s=2) + self.head_csp_2 = BottleneckCSP(c3 + c4//2, c4, n=1, shortcut=False) + + # P8/32-large + self.head_conv_3 = Conv(c4, c4, k=3, p=1, s=2) + self.head_csp_3 = BottleneckCSP(c4 + c5//2, c5, n=1, shortcut=False) + + # det conv + self.head_det_1 = nn.Conv2d(c3, self.num_anchors * (1 + self.num_classes + 4), 1) + self.head_det_2 = nn.Conv2d(c4, self.num_anchors * (1 + self.num_classes + 4), 1) + self.head_det_3 = nn.Conv2d(c5, self.num_anchors * (1 + self.num_classes + 4), 1) + + if self.trainable: + # init bias + self.init_bias() + + + def init_bias(self): + # init bias + init_prob = 0.01 + bias_value = -torch.log(torch.tensor((1. - init_prob) / init_prob)) + nn.init.constant_(self.head_det_1.bias[..., :self.num_anchors], bias_value) + nn.init.constant_(self.head_det_2.bias[..., :self.num_anchors], bias_value) + nn.init.constant_(self.head_det_3.bias[..., :self.num_anchors], bias_value) + + + def create_grid(self, img_size): + total_grid_xy = [] + total_anchor_wh = [] + w, h = img_size, img_size + for ind, s in enumerate(self.stride): + # generate grid cells + fmp_w, fmp_h = w // s, h // s + grid_y, grid_x = torch.meshgrid([torch.arange(fmp_h), torch.arange(fmp_w)]) + # [H, W, 2] -> [HW, 2] + grid_xy = torch.stack([grid_x, grid_y], dim=-1).float().view(-1, 2) + # [HW, 2] -> [1, HW, 1, 2] + grid_xy = grid_xy[None, :, None, :].to(self.device) + # [1, HW, 1, 2] + anchor_wh = self.anchor_size[ind].repeat(fmp_h*fmp_w, 1, 1).unsqueeze(0).to(self.device) + + total_grid_xy.append(grid_xy) + total_anchor_wh.append(anchor_wh) + + return total_grid_xy, total_anchor_wh + + + def set_grid(self, img_size): + self.img_size = img_size + self.grid_cell, self.anchors_wh = self.create_grid(img_size) + + + def nms(self, dets, scores): + """"Pure Python NMS YOLOv4.""" + x1 = dets[:, 0] #xmin + y1 = dets[:, 1] #ymin + x2 = dets[:, 2] #xmax + y2 = dets[:, 3] #ymax + + areas = (x2 - x1) * (y2 - y1) + order = scores.argsort()[::-1] + + keep = [] + while order.size > 0: + i = order[0] + keep.append(i) + # compute iou + xx1 = np.maximum(x1[i], x1[order[1:]]) + yy1 = np.maximum(y1[i], y1[order[1:]]) + xx2 = np.minimum(x2[i], x2[order[1:]]) + yy2 = np.minimum(y2[i], y2[order[1:]]) + + w = np.maximum(1e-28, xx2 - xx1) + h = np.maximum(1e-28, yy2 - yy1) + inter = w * h + + ovr = inter / (areas[i] + areas[order[1:]] - inter + 1e-14) + #reserve all the boundingbox whose ovr less than thresh + inds = np.where(ovr <= self.nms_thresh)[0] + order = order[inds + 1] + + return keep + + + def postprocess(self, bboxes, scores): + """ + bboxes: (HxW, 4), bsize = 1 + scores: (HxW, num_classes), bsize = 1 + """ + + cls_inds = np.argmax(scores, axis=1) + scores = scores[(np.arange(scores.shape[0]), cls_inds)] + + # threshold + keep = np.where(scores >= self.conf_thresh) + bboxes = bboxes[keep] + scores = scores[keep] + cls_inds = cls_inds[keep] + + # NMS + keep = np.zeros(len(bboxes), dtype=np.int) + for i in range(self.num_classes): + inds = np.where(cls_inds == i)[0] + if len(inds) == 0: + continue + c_bboxes = bboxes[inds] + c_scores = scores[inds] + c_keep = self.nms(c_bboxes, c_scores) + keep[inds[c_keep]] = 1 + + keep = np.where(keep > 0) + bboxes = bboxes[keep] + scores = scores[keep] + cls_inds = cls_inds[keep] + + return bboxes, scores, cls_inds + + + @torch.no_grad() + def inference_single_image(self, x): + KA = self.num_anchors + C = self.num_classes + # backbone + c3, c4, c5 = self.backbone(x) + + # FPN + PAN + # head + c6 = self.head_conv_0(c5) + c7 = self.head_upsample_0(c6) # s32->s16 + c8 = torch.cat([c7, c4], dim=1) + c9 = self.head_csp_0(c8) + # P3/8 + c10 = self.head_conv_1(c9) + c11 = self.head_upsample_1(c10) # s16->s8 + c12 = torch.cat([c11, c3], dim=1) + c13 = self.head_csp_1(c12) # to det + # p4/16 + c14 = self.head_conv_2(c13) + c15 = torch.cat([c14, c10], dim=1) + c16 = self.head_csp_2(c15) # to det + # p5/32 + c17 = self.head_conv_3(c16) + c18 = torch.cat([c17, c6], dim=1) + c19 = self.head_csp_3(c18) # to det + + # det + pred_s = self.head_det_1(c13)[0] + pred_m = self.head_det_2(c16)[0] + pred_l = self.head_det_3(c19)[0] + + preds = [pred_s, pred_m, pred_l] + obj_pred_list = [] + cls_pred_list = [] + box_pred_list = [] + + for i, pred in enumerate(preds): + # [KA*(1 + C + 4), H, W] -> [KA*1, H, W] -> [H, W, KA*1] -> [HW*KA, 1] + obj_pred_i = pred[:KA, :, :].permute(1, 2, 0).contiguous().view(-1, 1) + # [KA*(1 + C + 4), H, W] -> [KA*C, H, W] -> [H, W, KA*C] -> [HW*KA, C] + cls_pred_i = pred[KA:KA*(1+C), :, :].permute(1, 2, 0).contiguous().view(-1, C) + # [KA*(1 + C + 4), H, W] -> [KA*4, H, W] -> [H, W, KA*4] -> [HW, KA, 4] + reg_pred_i = pred[KA*(1+C):, :, :].permute(1, 2, 0).contiguous().view(-1, KA, 4) + # txty -> xy + if self.center_sample: + xy_pred_i = (reg_pred_i[None, ..., :2].sigmoid() * 2.0 - 1.0 + self.grid_cell[i]) * self.stride[i] + else: + xy_pred_i = (reg_pred_i[None, ..., :2].sigmoid() + self.grid_cell[i]) * self.stride[i] + # twth -> wh + wh_pred_i = reg_pred_i[None, ..., 2:].exp() * self.anchors_wh[i] + # xywh -> x1y1x2y2 + x1y1_pred_i = xy_pred_i - wh_pred_i * 0.5 + x2y2_pred_i = xy_pred_i + wh_pred_i * 0.5 + box_pred_i = torch.cat([x1y1_pred_i, x2y2_pred_i], dim=-1)[0].view(-1, 4) + + obj_pred_list.append(obj_pred_i) + cls_pred_list.append(cls_pred_i) + box_pred_list.append(box_pred_i) + + obj_pred = torch.cat(obj_pred_list, dim=0) + cls_pred = torch.cat(cls_pred_list, dim=0) + box_pred = torch.cat(box_pred_list, dim=0) + + # normalize bbox + bboxes = torch.clamp(box_pred / self.img_size, 0., 1.) + + # scores + scores = torch.sigmoid(obj_pred) * torch.softmax(cls_pred, dim=-1) + + # to cpu + scores = scores.to('cpu').numpy() + bboxes = bboxes.to('cpu').numpy() + + # post-process + bboxes, scores, cls_inds = self.postprocess(bboxes, scores) + + return bboxes, scores, cls_inds + + + def forward(self, x, targets=None): + if not self.trainable: + return self.inference_single_image(x) + else: + B = x.size(0) + KA = self.num_anchors + C = self.num_classes + # backbone + c3, c4, c5 = self.backbone(x) + + # FPN + PAN + # head + c6 = self.head_conv_0(c5) + c7 = self.head_upsample_0(c6) # s32->s16 + c8 = torch.cat([c7, c4], dim=1) + c9 = self.head_csp_0(c8) + # P3/8 + c10 = self.head_conv_1(c9) + c11 = self.head_upsample_1(c10) # s16->s8 + c12 = torch.cat([c11, c3], dim=1) + c13 = self.head_csp_1(c12) # to det + # p4/16 + c14 = self.head_conv_2(c13) + c15 = torch.cat([c14, c10], dim=1) + c16 = self.head_csp_2(c15) # to det + # p5/32 + c17 = self.head_conv_3(c16) + c18 = torch.cat([c17, c6], dim=1) + c19 = self.head_csp_3(c18) # to det + + # det + pred_s = self.head_det_1(c13) + pred_m = self.head_det_2(c16) + pred_l = self.head_det_3(c19) + + preds = [pred_s, pred_m, pred_l] + obj_pred_list = [] + cls_pred_list = [] + box_pred_list = [] + + for i, pred in enumerate(preds): + # [B, KA*(1 + C + 4), H, W] -> [B, KA, H, W] -> [B, H, W, KA] -> [B, HW*KA, 1] + obj_pred_i = pred[:, :KA, :, :].permute(0, 2, 3, 1).contiguous().view(B, -1, 1) + # [B, KA*(1 + C + 4), H, W] -> [B, KA*C, H, W] -> [B, H, W, KA*C] -> [B, H*W*KA, C] + cls_pred_i = pred[:, KA:KA*(1+C), :, :].permute(0, 2, 3, 1).contiguous().view(B, -1, C) + # [B, KA*(1 + C + 4), H, W] -> [B, KA*4, H, W] -> [B, H, W, KA*4] -> [B, HW, KA, 4] + reg_pred_i = pred[:, KA*(1+C):, :, :].permute(0, 2, 3, 1).contiguous().view(B, -1, KA, 4) + # txty -> xy + if self.center_sample: + xy_pred_i = (reg_pred_i[..., :2].sigmoid() * 2.0 - 1.0 + self.grid_cell[i]) * self.stride[i] + else: + xy_pred_i = (reg_pred_i[..., :2].sigmoid() + self.grid_cell[i]) * self.stride[i] + # twth -> wh + wh_pred_i = reg_pred_i[..., 2:].exp() * self.anchors_wh[i] + # xywh -> x1y1x2y2 + x1y1_pred_i = xy_pred_i - wh_pred_i * 0.5 + x2y2_pred_i = xy_pred_i + wh_pred_i * 0.5 + box_pred_i = torch.cat([x1y1_pred_i, x2y2_pred_i], dim=-1).view(B, -1, 4) + + obj_pred_list.append(obj_pred_i) + cls_pred_list.append(cls_pred_i) + box_pred_list.append(box_pred_i) + + obj_pred = torch.cat(obj_pred_list, dim=1) + cls_pred = torch.cat(cls_pred_list, dim=1) + box_pred = torch.cat(box_pred_list, dim=1) + + # normalize bbox + box_pred = box_pred / self.img_size + + # compute giou between prediction bbox and target bbox + x1y1x2y2_pred = box_pred.view(-1, 4) + x1y1x2y2_gt = targets[..., 2:6].view(-1, 4) + + # giou: [B, HW,] + giou_pred = box_ops.giou_score(x1y1x2y2_pred, x1y1x2y2_gt, batch_size=B) + + # we set giou as the target of the objectness + targets = torch.cat([0.5 * (giou_pred[..., None].clone().detach() + 1.0), targets], dim=-1) + + return obj_pred, cls_pred, giou_pred, targets diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/models/yolo/yolov1.py b/PyTorch/contrib/cv/detection/YoloV2-640/models/yolo/yolov1.py new file mode 100644 index 0000000000..bf3160505f --- /dev/null +++ b/PyTorch/contrib/cv/detection/YoloV2-640/models/yolo/yolov1.py @@ -0,0 +1,260 @@ +import numpy as np +import torch +import torch.nn as nn + +from utils import box_ops + +from ..basic.conv import Conv +from ..neck import build_neck +from ..backbone import build_backbone +import torch_npu + + +class YOLOv1(nn.Module): + def __init__(self, + cfg=None, + device=None, + img_size=None, + num_classes=20, + trainable=False, + conf_thresh=0.001, + nms_thresh=0.6, + center_sample=False): + super(YOLOv1, self).__init__() + self.cfg = cfg + self.device = device + self.img_size = img_size + self.num_classes = num_classes + self.trainable = trainable + self.conf_thresh = conf_thresh + self.nms_thresh = nms_thresh + self.center_sample = center_sample + + # backbone + self.backbone, feature_channels, strides = build_backbone(model_name=cfg['backbone'], + pretrained=trainable) + self.stride = [strides[-1]] + feature_dim = feature_channels[-1] + head_dim = 512 + + # build grid cell + self.grid_xy = self.create_grid(img_size) + + # neck + self.neck = build_neck(model=cfg['neck'], in_ch=feature_dim, out_ch=head_dim) + + # head + self.cls_feat = nn.Sequential( + Conv(head_dim, head_dim, k=3, p=1, s=1), + Conv(head_dim, head_dim, k=3, p=1, s=1) + ) + self.reg_feat = nn.Sequential( + Conv(head_dim, head_dim, k=3, p=1, s=1), + Conv(head_dim, head_dim, k=3, p=1, s=1), + Conv(head_dim, head_dim, k=3, p=1, s=1), + Conv(head_dim, head_dim, k=3, p=1, s=1) + ) + + # head + self.obj_pred = nn.Conv2d(head_dim, 1, kernel_size=1) + self.cls_pred = nn.Conv2d(head_dim, self.num_classes, kernel_size=1) + self.reg_pred = nn.Conv2d(head_dim, 4, kernel_size=1) + + if self.trainable: + self.init_bias() + + + def init_bias(self): + # init bias + init_prob = 0.01 + bias_value = -torch.log(torch.tensor((1. - init_prob) / init_prob)) + nn.init.constant_(self.obj_pred.bias, bias_value) + + + def create_grid(self, img_size): + """img_size: [H, W]""" + img_h = img_w = img_size + # generate grid cells + fmp_h, fmp_w = img_h // self.stride[0], img_w // self.stride[0] + grid_y, grid_x = torch.meshgrid([torch.arange(fmp_h), torch.arange(fmp_w)]) + # [H, W, 2] -> [HW, 2] + grid_xy = torch.stack([grid_x, grid_y], dim=-1).float().view(-1, 2) + # [HW, 2] -> [1, HW, 2] + grid_xy = grid_xy.unsqueeze(0).to(self.device) + + return grid_xy + + + def set_grid(self, img_size): + self.grid_xy = self.create_grid(img_size) + self.img_size = img_size + + + def decode_bbox(self, reg_pred): + """reg_pred: [B, N, 4]""" + # txty -> xy + if self.center_sample: + xy_pred = reg_pred[..., :2].sigmoid() * 2.0 - 1.0 + self.grid_xy + else: + xy_pred = reg_pred[..., :2].sigmoid() + self.grid_xy + # twth -> wh + wh_pred = reg_pred[..., 2:].exp() + xywh_pred = torch.cat([xy_pred, wh_pred], dim=-1) + # xywh -> x1y1x2y2 + x1y1_pred = xywh_pred[..., :2] - xywh_pred[..., 2:] / 2 + x2y2_pred = xywh_pred[..., :2] + xywh_pred[..., 2:] / 2 + box_pred = torch.cat([x1y1_pred, x2y2_pred], dim=-1) + # rescale bbox + box_pred = box_pred * self.stride[0] + + return box_pred + + + def nms(self, dets, scores): + """"Pure Python NMS YOLOv4.""" + x1 = dets[:, 0] #xmin + y1 = dets[:, 1] #ymin + x2 = dets[:, 2] #xmax + y2 = dets[:, 3] #ymax + + areas = (x2 - x1) * (y2 - y1) + order = scores.argsort()[::-1] + + keep = [] + while order.size > 0: + i = order[0] + keep.append(i) + # compute iou + xx1 = np.maximum(x1[i], x1[order[1:]]) + yy1 = np.maximum(y1[i], y1[order[1:]]) + xx2 = np.minimum(x2[i], x2[order[1:]]) + yy2 = np.minimum(y2[i], y2[order[1:]]) + + w = np.maximum(1e-28, xx2 - xx1) + h = np.maximum(1e-28, yy2 - yy1) + inter = w * h + + ovr = inter / (areas[i] + areas[order[1:]] - inter + 1e-14) + #reserve all the boundingbox whose ovr less than thresh + inds = np.where(ovr <= self.nms_thresh)[0] + order = order[inds + 1] + + return keep + + + def postprocess(self, bboxes, scores): + """ + bboxes: (N, 4), bsize = 1 + scores: (N, C), bsize = 1 + """ + + cls_inds = np.argmax(scores, axis=1) + scores = scores[(np.arange(scores.shape[0]), cls_inds)] + + # threshold + keep = np.where(scores >= self.conf_thresh) + bboxes = bboxes[keep] + scores = scores[keep] + cls_inds = cls_inds[keep] + + # NMS + keep = np.zeros(len(bboxes), dtype=np.int) + for i in range(self.num_classes): + inds = np.where(cls_inds == i)[0] + if len(inds) == 0: + continue + c_bboxes = bboxes[inds] + c_scores = scores[inds] + c_keep = self.nms(c_bboxes, c_scores) + keep[inds[c_keep]] = 1 + + keep = np.where(keep > 0) + bboxes = bboxes[keep] + scores = scores[keep] + cls_inds = cls_inds[keep] + + return bboxes, scores, cls_inds + + + @torch.no_grad() + def inference_single_image(self, x): + # backbone + x = self.backbone(x)[-1] + + # neck + x = self.neck(x) + + # head + cls_feat = self.cls_feat(x) + reg_feat = self.reg_feat(x) + + # pred + obj_pred = self.obj_pred(reg_feat)[0] + cls_pred = self.cls_pred(cls_feat)[0] + reg_pred = self.reg_pred(reg_feat)[0] + + # [1, H, W] -> [1, HW] -> [HW, 1] -> [HW, 1] + obj_pred =obj_pred.flatten(1).permute(1, 0).contiguous() + # [C, H, W] -> [C, HW] -> [HW, C] -> [HW, C] + cls_pred =cls_pred.flatten(1).permute(1, 0).contiguous() + # [4, H, W] -> [4, HW] -> [HW, 4] + reg_pred = reg_pred.flatten(1).permute(1, 0).contiguous() + box_pred = self.decode_bbox(reg_pred[None])[0] # [B, HW, 4] -> [HW, 4] + # normalize bbox + bboxes = torch.clamp(box_pred / self.img_size, 0., 1.) + + # scores + scores = torch.sigmoid(obj_pred) * torch.softmax(cls_pred, dim=-1) + + # to cpu + scores = scores.to('cpu').numpy() + bboxes = bboxes.to('cpu').numpy() + + # post-process + bboxes, scores, cls_inds = self.postprocess(bboxes, scores) + + return bboxes, scores, cls_inds + + + def forward(self, x, targets=None): + if not self.trainable: + return self.inference_single_image(x) + else: + B = x.size(0) + C = self.num_classes + # backbone + x = self.backbone(x)[-1] + + # neck + x = self.neck(x) + + # head + cls_feat = self.cls_feat(x) + reg_feat = self.reg_feat(x) + + # pred + obj_pred = self.obj_pred(reg_feat) + cls_pred = self.cls_pred(cls_feat) + reg_pred = self.reg_pred(reg_feat) + + # [B, 1, H, W] -> [B, 1, HW] -> [B, HW, 1] + obj_pred =obj_pred.flatten(2).permute(0, 2, 1).contiguous() + # [B, C, H, W] -> [B, C, HW] -> [B, HW, C] + cls_pred =cls_pred.flatten(2).permute(0, 2, 1).contiguous() + # [B, 4, H, W] -> [B, 4, HW] -> [B, HW, 4] + reg_pred = reg_pred.flatten(2).permute(0, 2, 1).contiguous() + box_pred = self.decode_bbox(reg_pred) + # normalize bbox + box_pred = box_pred / self.img_size + + # compute giou between prediction bbox and target bbox + x1y1x2y2_pred = box_pred.view(-1, 4) + x1y1x2y2_gt = targets[..., 2:6].view(-1, 4) + + # giou: [B, HW,] + giou_pred = box_ops.giou_score(x1y1x2y2_pred, x1y1x2y2_gt, batch_size=B) + + # we set giou as the target of the objectness + targets = torch.cat([0.5 * (giou_pred[..., None].clone().detach() + 1.0), targets], dim=-1) + + return obj_pred, cls_pred, giou_pred, targets diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/models/yolo/yolov2.py b/PyTorch/contrib/cv/detection/YoloV2-640/models/yolo/yolov2.py new file mode 100644 index 0000000000..47d386c09a --- /dev/null +++ b/PyTorch/contrib/cv/detection/YoloV2-640/models/yolo/yolov2.py @@ -0,0 +1,271 @@ +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F +from utils import box_ops +from utils import criterion + +from ..basic.conv import Conv +from ..neck import build_neck +from ..backbone import build_backbone +import torch_npu + + + +class YOLOv2(nn.Module): + def __init__(self, + cfg=None, + device=None, + img_size=None, + num_classes=20, + trainable=False, + conf_thresh=0.001, + nms_thresh=0.6, + center_sample=False): + super(YOLOv2, self).__init__() + self.cfg = cfg + self.device = device + self.img_size = img_size + self.num_classes = num_classes + self.trainable = trainable + self.conf_thresh = conf_thresh + self.nms_thresh = nms_thresh + self.center_sample = center_sample + self.anchor_size = torch.tensor(cfg["anchor_size"]) # [KA, 2] + self.num_anchors = len(cfg["anchor_size"]) + + # backbone + self.backbone, feature_channels, strides = build_backbone(model_name=cfg['backbone'], + pretrained=trainable) + self.stride = [strides[-1]] + feature_dim = feature_channels[-1] + head_dim = 512 + + # build grid cell + self.grid_xy, self.anchor_wh = self.create_grid(img_size) + + # neck + self.neck = build_neck(model=cfg['neck'], in_ch=feature_dim, out_ch=head_dim) + + # head + self.cls_feat = nn.Sequential( + Conv(head_dim, head_dim, k=3, p=1, s=1), + Conv(head_dim, head_dim, k=3, p=1, s=1) + ) + self.reg_feat = nn.Sequential( + Conv(head_dim, head_dim, k=3, p=1, s=1), + Conv(head_dim, head_dim, k=3, p=1, s=1), + Conv(head_dim, head_dim, k=3, p=1, s=1), + Conv(head_dim, head_dim, k=3, p=1, s=1) + ) + + # head + self.obj_pred = nn.Conv2d(head_dim, self.num_anchors * 1, kernel_size=1) + self.cls_pred = nn.Conv2d(head_dim, self.num_anchors * self.num_classes, kernel_size=1) + self.reg_pred = nn.Conv2d(head_dim, self.num_anchors * 4, kernel_size=1) + + if self.trainable: + # init bias + self.init_bias() + + + def init_bias(self): + # init bias + init_prob = 0.01 + bias_value = -torch.log(torch.tensor((1. - init_prob) / init_prob)) + nn.init.constant_(self.obj_pred.bias, bias_value) + + + def create_grid(self, img_size): + """img_size: [H, W]""" + img_h = img_w = img_size + # generate grid cells + fmp_h, fmp_w = img_h // self.stride[0], img_w // self.stride[0] + grid_y, grid_x = torch.meshgrid([torch.arange(fmp_h), torch.arange(fmp_w)]) + # [H, W, 2] -> [HW, 2] + grid_xy = torch.stack([grid_x, grid_y], dim=-1).float().view(-1, 2) + # [HW, 2] -> [1, HW, 1, 2] + grid_xy = grid_xy[None, :, None, :].to(self.device) + # [1, HW, 1, 2] + anchor_wh = self.anchor_size.repeat(fmp_h*fmp_w, 1, 1).unsqueeze(0).to(self.device) + + return grid_xy, anchor_wh + + + def set_grid(self, img_size): + self.grid_xy, self.anchor_wh = self.create_grid(img_size) + self.img_size = img_size + + + def nms(self, dets, scores): + """"Pure Python NMS YOLOv4.""" + x1 = dets[:, 0] #xmin + y1 = dets[:, 1] #ymin + x2 = dets[:, 2] #xmax + y2 = dets[:, 3] #ymax + + areas = (x2 - x1) * (y2 - y1) + order = scores.argsort()[::-1] + + keep = [] + while order.size > 0: + i = order[0] + keep.append(i) + # compute iou + xx1 = np.maximum(x1[i], x1[order[1:]]) + yy1 = np.maximum(y1[i], y1[order[1:]]) + xx2 = np.minimum(x2[i], x2[order[1:]]) + yy2 = np.minimum(y2[i], y2[order[1:]]) + + w = np.maximum(1e-28, xx2 - xx1) + h = np.maximum(1e-28, yy2 - yy1) + inter = w * h + + ovr = inter / (areas[i] + areas[order[1:]] - inter + 1e-14) + #reserve all the boundingbox whose ovr less than thresh + inds = np.where(ovr <= self.nms_thresh)[0] + order = order[inds + 1] + + return keep + + + def postprocess(self, bboxes, scores): + """ + bboxes: (N, 4), bsize = 1 + scores: (N, C), bsize = 1 + """ + + cls_inds = np.argmax(scores, axis=1) + scores = scores[(np.arange(scores.shape[0]), cls_inds)] + + # threshold + keep = np.where(scores >= self.conf_thresh) + bboxes = bboxes[keep] + scores = scores[keep] + cls_inds = cls_inds[keep] + + # NMS + keep = np.zeros(len(bboxes), dtype=np.int) + for i in range(self.num_classes): + inds = np.where(cls_inds == i)[0] + if len(inds) == 0: + continue + c_bboxes = bboxes[inds] + c_scores = scores[inds] + c_keep = self.nms(c_bboxes, c_scores) + keep[inds[c_keep]] = 1 + + keep = np.where(keep > 0) + bboxes = bboxes[keep] + scores = scores[keep] + cls_inds = cls_inds[keep] + + return bboxes, scores, cls_inds + + + def decode_bbox(self, reg_pred): + """reg_pred: [B, N, KA, 4]""" + B = reg_pred.size(0) + # txty -> cxcy + if self.center_sample: + xy_pred = (reg_pred[..., :2].sigmoid() * 2.0 - 1.0 + self.grid_xy) * self.stride[0] + else: + xy_pred = (reg_pred[..., :2].sigmoid() + self.grid_xy) * self.stride[0] + # twth -> wh + wh_pred = reg_pred[..., 2:].exp() * self.anchor_wh + xywh_pred = torch.cat([xy_pred, wh_pred], dim=-1).view(B, -1, 4) + # xywh -> x1y1x2y2 + x1y1_pred = xywh_pred[..., :2] - xywh_pred[..., 2:] / 2 + x2y2_pred = xywh_pred[..., :2] + xywh_pred[..., 2:] / 2 + box_pred = torch.cat([x1y1_pred, x2y2_pred], dim=-1) + + return box_pred + + + @torch.no_grad() + def inference_single_image(self, x): + KA = self.num_anchors + C = self.num_classes + # backbone + x = self.backbone(x)[-1] + + # neck + x = self.neck(x) + + # head + cls_feat = self.cls_feat(x) + reg_feat = self.reg_feat(x) + + # pred + obj_pred = self.obj_pred(reg_feat)[0] + cls_pred = self.cls_pred(cls_feat)[0] + reg_pred = self.reg_pred(reg_feat)[0] + + # [KA*1, H, W] -> [H, W, KA*1] -> [HW*KA, 1] + obj_pred = obj_pred.permute(1, 2, 0).contiguous().view(-1, 1) + # [KA*C, H, W] -> [H, W, KA*C] -> [HW*KA, C] + cls_pred = cls_pred.permute(1, 2, 0).contiguous().view(-1, C) + # [KA*4, H, W] -> [H, W, KA*4] -> [HW, KA, 4] + reg_pred = reg_pred.permute(1, 2, 0).contiguous().view(-1, KA, 4) + # [HW, KA, 4] -> [HW*KA, 4] + box_pred = self.decode_bbox(reg_pred[None])[0] + # normalize bbox + bboxes = torch.clamp(box_pred / self.img_size, 0., 1.) + + # scores + scores = torch.sigmoid(obj_pred) * torch.softmax(cls_pred, dim=-1) + + # to cpu + scores = scores.to('cpu').numpy() + bboxes = bboxes.to('cpu').numpy() + + # post-process + bboxes, scores, cls_inds = self.postprocess(bboxes, scores) + + return bboxes, scores, cls_inds + + + def forward(self, x, targets=None): + if not self.trainable: + return self.inference_single_image(x) + else: + B = x.size(0) + KA = self.num_anchors + C = self.num_classes + # backbone + x = self.backbone(x)[-1] + + # neck + x = self.neck(x) + + # head + cls_feat = self.cls_feat(x) + reg_feat = self.reg_feat(x) + + # pred + obj_pred = self.obj_pred(reg_feat) + cls_pred = self.cls_pred(cls_feat) + reg_pred = self.reg_pred(reg_feat) + + # [B, KA*1, H, W] -> [B, H, W, KA*1] -> [B, H*W*KA, 1] + obj_pred = obj_pred.permute(0, 2, 3, 1).contiguous().view(B, -1, 1) + # [B, KA*C, H, W] -> [B, H, W, KA*C] -> [B, H*W*KA, C] + cls_pred = cls_pred.permute(0, 2, 3, 1).contiguous().view(B, -1, C) + # [B, KA*4, H, W] -> [B, H, W, KA*4] -> [B, HW, KA, 4] + reg_pred = reg_pred.permute(0, 2, 3, 1).contiguous().view(B, -1, KA, 4) + # [B, HW, KA, 4] -> [B, HW*KA, 4] + box_pred = self.decode_bbox(reg_pred) + # normalize bbox + box_pred = box_pred / self.img_size + + # compute giou between prediction bbox and target bbox + x1y1x2y2_pred = box_pred.view(-1, 4) + x1y1x2y2_gt = targets[..., 2:6].view(-1, 4) + + # giou: [B, HW,] + giou_pred = box_ops.giou_score(x1y1x2y2_pred, x1y1x2y2_gt, batch_size=B) + + # we set giou as the target of the objectness + targets = torch.cat([0.5 * (giou_pred[..., None].clone().detach() + 1.0), targets], dim=-1) + + return obj_pred, cls_pred, giou_pred, targets diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/models/yolo/yolov3.py b/PyTorch/contrib/cv/detection/YoloV2-640/models/yolo/yolov3.py new file mode 100644 index 0000000000..0b89db12b1 --- /dev/null +++ b/PyTorch/contrib/cv/detection/YoloV2-640/models/yolo/yolov3.py @@ -0,0 +1,327 @@ +import numpy as np +import torch +import torch.nn as nn + +from utils import box_ops + +from ..backbone import build_backbone +from ..neck import build_neck +from ..basic.conv import Conv, ConvBlocks +from ..basic.upsample import UpSample +import torch_npu + + +class YOLOv3(nn.Module): + def __init__(self, + cfg=None, + device=None, + img_size=640, + num_classes=80, + trainable=False, + conf_thresh=0.001, + nms_thresh=0.60, + center_sample=False): + + super(YOLOv3, self).__init__() + self.cfg = cfg + self.device = device + self.img_size = img_size + self.num_classes = num_classes + self.trainable = trainable + self.conf_thresh = conf_thresh + self.nms_thresh = nms_thresh + self.center_sample = center_sample + + # backbone + self.backbone, feature_channels, strides = build_backbone(model_name=cfg["backbone"], + pretrained=trainable) + self.stride = strides + anchor_size = cfg["anchor_size"] + # [S, KA, 2], S is equal to number of stride + self.anchor_size = torch.tensor(anchor_size).reshape(len(self.stride), len(anchor_size) // 3, 2).float() + self.num_anchors = self.anchor_size.size(1) + c3, c4, c5 = feature_channels + + # build grid cell + self.grid_cell, self.anchors_wh = self.create_grid(img_size) + + # head + # P3/8-small + self.head_convblock_0 = build_neck(model=cfg["neck"], in_ch=c5, out_ch=c5//2) + self.head_conv_0 = Conv(c5//2, c4//2, k=1) + self.head_upsample_0 = UpSample(scale_factor=2) + self.head_conv_1 = Conv(c5//2, c5, k=3, p=1) + + # P4/16-medium + self.head_convblock_1 = ConvBlocks(c4 + c4//2, c4//2) + self.head_conv_2 = Conv(c4//2, c3//2, k=1) + self.head_upsample_1 = UpSample(scale_factor=2) + self.head_conv_3 = Conv(c4//2, c4, k=3, p=1) + + # P8/32-large + self.head_convblock_2 = ConvBlocks(c3 + c3//2, c3//2) + self.head_conv_4 = Conv(c3//2, c3, k=3, p=1) + + # det conv + self.head_det_1 = nn.Conv2d(c3, self.num_anchors * (1 + self.num_classes + 4), 1) + self.head_det_2 = nn.Conv2d(c4, self.num_anchors * (1 + self.num_classes + 4), 1) + self.head_det_3 = nn.Conv2d(c5, self.num_anchors * (1 + self.num_classes + 4), 1) + + + if self.trainable: + # init bias + self.init_bias() + + + def init_bias(self): + # init bias + init_prob = 0.01 + bias_value = -torch.log(torch.tensor((1. - init_prob) / init_prob)) + nn.init.constant_(self.head_det_1.bias[..., :self.num_anchors], bias_value) + nn.init.constant_(self.head_det_2.bias[..., :self.num_anchors], bias_value) + nn.init.constant_(self.head_det_3.bias[..., :self.num_anchors], bias_value) + + + def create_grid(self, img_size): + total_grid_xy = [] + total_anchor_wh = [] + w, h = img_size, img_size + for ind, s in enumerate(self.stride): + # generate grid cells + fmp_w, fmp_h = w // s, h // s + grid_y, grid_x = torch.meshgrid([torch.arange(fmp_h), torch.arange(fmp_w)]) + # [H, W, 2] -> [HW, 2] + grid_xy = torch.stack([grid_x, grid_y], dim=-1).float().view(-1, 2) + # [HW, 2] -> [1, HW, 1, 2] + grid_xy = grid_xy[None, :, None, :].to(self.device) + # [1, HW, 1, 2] + anchor_wh = self.anchor_size[ind].repeat(fmp_h*fmp_w, 1, 1).unsqueeze(0).to(self.device) + + total_grid_xy.append(grid_xy) + total_anchor_wh.append(anchor_wh) + + return total_grid_xy, total_anchor_wh + + + def set_grid(self, img_size): + self.img_size = img_size + self.grid_cell, self.anchors_wh = self.create_grid(img_size) + + + def nms(self, dets, scores): + """"Pure Python NMS YOLOv4.""" + x1 = dets[:, 0] #xmin + y1 = dets[:, 1] #ymin + x2 = dets[:, 2] #xmax + y2 = dets[:, 3] #ymax + + areas = (x2 - x1) * (y2 - y1) + order = scores.argsort()[::-1] + + keep = [] + while order.size > 0: + i = order[0] + keep.append(i) + # compute iou + xx1 = np.maximum(x1[i], x1[order[1:]]) + yy1 = np.maximum(y1[i], y1[order[1:]]) + xx2 = np.minimum(x2[i], x2[order[1:]]) + yy2 = np.minimum(y2[i], y2[order[1:]]) + + w = np.maximum(1e-28, xx2 - xx1) + h = np.maximum(1e-28, yy2 - yy1) + inter = w * h + + ovr = inter / (areas[i] + areas[order[1:]] - inter + 1e-14) + #reserve all the boundingbox whose ovr less than thresh + inds = np.where(ovr <= self.nms_thresh)[0] + order = order[inds + 1] + + return keep + + + def postprocess(self, bboxes, scores): + """ + bboxes: (N, 4), bsize = 1 + scores: (N, C), bsize = 1 + """ + + cls_inds = np.argmax(scores, axis=1) + scores = scores[(np.arange(scores.shape[0]), cls_inds)] + + # threshold + keep = np.where(scores >= self.conf_thresh) + bboxes = bboxes[keep] + scores = scores[keep] + cls_inds = cls_inds[keep] + + # NMS + keep = np.zeros(len(bboxes), dtype=np.int) + for i in range(self.num_classes): + inds = np.where(cls_inds == i)[0] + if len(inds) == 0: + continue + c_bboxes = bboxes[inds] + c_scores = scores[inds] + c_keep = self.nms(c_bboxes, c_scores) + keep[inds[c_keep]] = 1 + + keep = np.where(keep > 0) + bboxes = bboxes[keep] + scores = scores[keep] + cls_inds = cls_inds[keep] + + return bboxes, scores, cls_inds + + + @torch.no_grad() + def inference_single_image(self, x): + KA = self.num_anchors + C = self.num_classes + # backbone + c3, c4, c5 = self.backbone(x) + + # head + # p5/32 + p5 = self.head_convblock_0(c5) + p5_up = self.head_upsample_0(self.head_conv_0(p5)) + p5 = self.head_conv_1(p5) + + # p4/16 + p4 = self.head_convblock_1(torch.cat([c4, p5_up], dim=1)) + p4_up = self.head_upsample_1(self.head_conv_2(p4)) + p4 = self.head_conv_3(p4) + + # P3/8 + p3 = self.head_convblock_2(torch.cat([c3, p4_up], dim=1)) + p3 = self.head_conv_4(p3) + + # det + pred_s = self.head_det_1(p3)[0] + pred_m = self.head_det_2(p4)[0] + pred_l = self.head_det_3(p5)[0] + + preds = [pred_s, pred_m, pred_l] + obj_pred_list = [] + cls_pred_list = [] + box_pred_list = [] + + for i, pred in enumerate(preds): + # [KA*(1 + C + 4), H, W] -> [KA*1, H, W] -> [H, W, KA*1] -> [HW*KA, 1] + obj_pred_i = pred[:KA, :, :].permute(1, 2, 0).contiguous().view(-1, 1) + # [KA*(1 + C + 4), H, W] -> [KA*C, H, W] -> [H, W, KA*C] -> [HW*KA, C] + cls_pred_i = pred[KA:KA*(1+C), :, :].permute(1, 2, 0).contiguous().view(-1, C) + # [KA*(1 + C + 4), H, W] -> [KA*4, H, W] -> [H, W, KA*4] -> [HW, KA, 4] + reg_pred_i = pred[KA*(1+C):, :, :].permute(1, 2, 0).contiguous().view(-1, KA, 4) + # txty -> xy + if self.center_sample: + xy_pred_i = (reg_pred_i[None, ..., :2].sigmoid() * 2.0 - 1.0 + self.grid_cell[i]) * self.stride[i] + else: + xy_pred_i = (reg_pred_i[None, ..., :2].sigmoid() + self.grid_cell[i]) * self.stride[i] + # twth -> wh + wh_pred_i = reg_pred_i[None, ..., 2:].exp() * self.anchors_wh[i] + # xywh -> x1y1x2y2 + x1y1_pred_i = xy_pred_i - wh_pred_i * 0.5 + x2y2_pred_i = xy_pred_i + wh_pred_i * 0.5 + box_pred_i = torch.cat([x1y1_pred_i, x2y2_pred_i], dim=-1)[0].view(-1, 4) + + obj_pred_list.append(obj_pred_i) + cls_pred_list.append(cls_pred_i) + box_pred_list.append(box_pred_i) + + obj_pred = torch.cat(obj_pred_list, dim=0) + cls_pred = torch.cat(cls_pred_list, dim=0) + box_pred = torch.cat(box_pred_list, dim=0) + + # normalize bbox + bboxes = torch.clamp(box_pred / self.img_size, 0., 1.) + + # scores + scores = torch.sigmoid(obj_pred) * torch.softmax(cls_pred, dim=-1) + + # to cpu + scores = scores.to('cpu').numpy() + bboxes = bboxes.to('cpu').numpy() + + # post-process + bboxes, scores, cls_inds = self.postprocess(bboxes, scores) + + return bboxes, scores, cls_inds + + + def forward(self, x, targets=None): + if not self.trainable: + return self.inference_single_image(x) + else: + B = x.size(0) + KA = self.num_anchors + C = self.num_classes + # backbone + c3, c4, c5 = self.backbone(x) + + # head + # p5/32 + p5 = self.head_convblock_0(c5) + p5_up = self.head_upsample_0(self.head_conv_0(p5)) + p5 = self.head_conv_1(p5) + + # p4/16 + p4 = self.head_convblock_1(torch.cat([c4, p5_up], dim=1)) + p4_up = self.head_upsample_1(self.head_conv_2(p4)) + p4 = self.head_conv_3(p4) + + # P3/8 + p3 = self.head_convblock_2(torch.cat([c3, p4_up], dim=1)) + p3 = self.head_conv_4(p3) + + # det + pred_s = self.head_det_1(p3) + pred_m = self.head_det_2(p4) + pred_l = self.head_det_3(p5) + + preds = [pred_s, pred_m, pred_l] + obj_pred_list = [] + cls_pred_list = [] + box_pred_list = [] + + for i, pred in enumerate(preds): + # [B, KA*(1 + C + 4), H, W] -> [B, KA, H, W] -> [B, H, W, KA] -> [B, HW*KA, 1] + obj_pred_i = pred[:, :KA, :, :].permute(0, 2, 3, 1).contiguous().view(B, -1, 1) + # [B, KA*(1 + C + 4), H, W] -> [B, KA*C, H, W] -> [B, H, W, KA*C] -> [B, H*W*KA, C] + cls_pred_i = pred[:, KA:KA*(1+C), :, :].permute(0, 2, 3, 1).contiguous().view(B, -1, C) + # [B, KA*(1 + C + 4), H, W] -> [B, KA*4, H, W] -> [B, H, W, KA*4] -> [B, HW, KA, 4] + reg_pred_i = pred[:, KA*(1+C):, :, :].permute(0, 2, 3, 1).contiguous().view(B, -1, KA, 4) + # txty -> xy + if self.center_sample: + xy_pred_i = (reg_pred_i[..., :2].sigmoid() * 2.0 - 1.0 + self.grid_cell[i]) * self.stride[i] + else: + xy_pred_i = (reg_pred_i[..., :2].sigmoid() + self.grid_cell[i]) * self.stride[i] + # twth -> wh + wh_pred_i = reg_pred_i[..., 2:].exp() * self.anchors_wh[i] + # xywh -> x1y1x2y2 + x1y1_pred_i = xy_pred_i - wh_pred_i * 0.5 + x2y2_pred_i = xy_pred_i + wh_pred_i * 0.5 + box_pred_i = torch.cat([x1y1_pred_i, x2y2_pred_i], dim=-1).view(B, -1, 4) + + obj_pred_list.append(obj_pred_i) + cls_pred_list.append(cls_pred_i) + box_pred_list.append(box_pred_i) + + obj_pred = torch.cat(obj_pred_list, dim=1) + cls_pred = torch.cat(cls_pred_list, dim=1) + box_pred = torch.cat(box_pred_list, dim=1) + + # normalize bbox + box_pred = box_pred / self.img_size + + # compute giou between prediction bbox and target bbox + x1y1x2y2_pred = box_pred.view(-1, 4) + x1y1x2y2_gt = targets[..., 2:6].view(-1, 4) + + # giou: [B, HW,] + giou_pred = box_ops.giou_score(x1y1x2y2_pred, x1y1x2y2_gt, batch_size=B) + + # we set giou as the target of the objectness + targets = torch.cat([0.5 * (giou_pred[..., None].clone().detach() + 1.0), targets], dim=-1) + + return obj_pred, cls_pred, giou_pred, targets diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/models/yolo/yolov4.py b/PyTorch/contrib/cv/detection/YoloV2-640/models/yolo/yolov4.py new file mode 100644 index 0000000000..67d5aa1e6f --- /dev/null +++ b/PyTorch/contrib/cv/detection/YoloV2-640/models/yolo/yolov4.py @@ -0,0 +1,345 @@ +import numpy as np +import torch +import torch.nn as nn + +from utils import box_ops + +from ..backbone import build_backbone +from ..neck import build_neck +from ..basic.conv import Conv +from ..basic.upsample import UpSample +from ..basic.bottleneck_csp import BottleneckCSP +import torch_npu + + +class YOLOv4(nn.Module): + def __init__(self, + cfg=None, + device=None, + img_size=640, + num_classes=80, + trainable=False, + conf_thresh=0.001, + nms_thresh=0.60, + center_sample=False): + + super(YOLOv4, self).__init__() + self.cfg = cfg + self.device = device + self.img_size = img_size + self.num_classes = num_classes + self.trainable = trainable + self.conf_thresh = conf_thresh + self.nms_thresh = nms_thresh + self.center_sample = center_sample + + # backbone + self.backbone, feature_channels, strides = build_backbone(model_name=cfg["backbone"], + pretrained=trainable) + self.stride = strides + anchor_size = cfg["anchor_size"] + self.anchor_size = torch.tensor(anchor_size).reshape(len(self.stride), len(anchor_size) // 3, 2).float() + self.num_anchors = self.anchor_size.size(1) + c3, c4, c5 = feature_channels + + # build grid cell + self.grid_cell, self.anchors_wh = self.create_grid(img_size) + + # head + self.head_conv_0 = build_neck(model=cfg["neck"], in_ch=c5, out_ch=c5//2) # 10 + self.head_upsample_0 = UpSample(scale_factor=2) + self.head_csp_0 = BottleneckCSP(c4 + c5//2, c4, n=3, shortcut=False) + + # P3/8-small + self.head_conv_1 = Conv(c4, c4//2, k=1) # 14 + self.head_upsample_1 = UpSample(scale_factor=2) + self.head_csp_1 = BottleneckCSP(c3 + c4//2, c3, n=3, shortcut=False) + + # P4/16-medium + self.head_conv_2 = Conv(c3, c3, k=3, p=1, s=2) + self.head_csp_2 = BottleneckCSP(c3 + c4//2, c4, n=3, shortcut=False) + + # P8/32-large + self.head_conv_3 = Conv(c4, c4, k=3, p=1, s=2) + self.head_csp_3 = BottleneckCSP(c4 + c5//2, c5, n=3, shortcut=False) + + # det conv + self.head_det_1 = nn.Conv2d(c3, self.num_anchors * (1 + self.num_classes + 4), 1) + self.head_det_2 = nn.Conv2d(c4, self.num_anchors * (1 + self.num_classes + 4), 1) + self.head_det_3 = nn.Conv2d(c5, self.num_anchors * (1 + self.num_classes + 4), 1) + + if self.trainable: + # init bias + self.init_bias() + + + def init_bias(self): + # init bias + init_prob = 0.01 + bias_value = -torch.log(torch.tensor((1. - init_prob) / init_prob)) + nn.init.constant_(self.head_det_1.bias[..., :self.num_anchors], bias_value) + nn.init.constant_(self.head_det_2.bias[..., :self.num_anchors], bias_value) + nn.init.constant_(self.head_det_3.bias[..., :self.num_anchors], bias_value) + + + def create_grid(self, img_size): + total_grid_xy = [] + total_anchor_wh = [] + w, h = img_size, img_size + for ind, s in enumerate(self.stride): + # generate grid cells + fmp_w, fmp_h = w // s, h // s + grid_y, grid_x = torch.meshgrid([torch.arange(fmp_h), torch.arange(fmp_w)]) + # [H, W, 2] -> [HW, 2] + grid_xy = torch.stack([grid_x, grid_y], dim=-1).float().view(-1, 2) + # [HW, 2] -> [1, HW, 1, 2] + grid_xy = grid_xy[None, :, None, :].to(self.device) + # [1, HW, 1, 2] + anchor_wh = self.anchor_size[ind].repeat(fmp_h*fmp_w, 1, 1).unsqueeze(0).to(self.device) + + total_grid_xy.append(grid_xy) + total_anchor_wh.append(anchor_wh) + + return total_grid_xy, total_anchor_wh + + + def set_grid(self, img_size): + self.img_size = img_size + self.grid_cell, self.anchors_wh = self.create_grid(img_size) + + + def nms(self, dets, scores): + """"Pure Python NMS.""" + x1 = dets[:, 0] #xmin + y1 = dets[:, 1] #ymin + x2 = dets[:, 2] #xmax + y2 = dets[:, 3] #ymax + + areas = (x2 - x1) * (y2 - y1) + order = scores.argsort()[::-1] + + keep = [] + while order.size > 0: + i = order[0] + keep.append(i) + # compute iou + xx1 = np.maximum(x1[i], x1[order[1:]]) + yy1 = np.maximum(y1[i], y1[order[1:]]) + xx2 = np.minimum(x2[i], x2[order[1:]]) + yy2 = np.minimum(y2[i], y2[order[1:]]) + + w = np.maximum(1e-28, xx2 - xx1) + h = np.maximum(1e-28, yy2 - yy1) + inter = w * h + + ovr = inter / (areas[i] + areas[order[1:]] - inter + 1e-14) + #reserve all the boundingbox whose ovr less than thresh + inds = np.where(ovr <= self.nms_thresh)[0] + order = order[inds + 1] + + return keep + + + def postprocess(self, bboxes, scores): + """ + bboxes: (HxW, 4), bsize = 1 + scores: (HxW, num_classes), bsize = 1 + """ + + cls_inds = np.argmax(scores, axis=1) + scores = scores[(np.arange(scores.shape[0]), cls_inds)] + + # threshold + keep = np.where(scores >= self.conf_thresh) + bboxes = bboxes[keep] + scores = scores[keep] + cls_inds = cls_inds[keep] + + # NMS + keep = np.zeros(len(bboxes), dtype=np.int) + for i in range(self.num_classes): + inds = np.where(cls_inds == i)[0] + if len(inds) == 0: + continue + c_bboxes = bboxes[inds] + c_scores = scores[inds] + c_keep = self.nms(c_bboxes, c_scores) + keep[inds[c_keep]] = 1 + + keep = np.where(keep > 0) + bboxes = bboxes[keep] + scores = scores[keep] + cls_inds = cls_inds[keep] + + return bboxes, scores, cls_inds + + + @torch.no_grad() + def inference_single_image(self, x): + KA = self.num_anchors + C = self.num_classes + # backbone + c3, c4, c5 = self.backbone(x) + + # FPN + PAN + # head + c6 = self.head_conv_0(c5) + c7 = self.head_upsample_0(c6) # s32->s16 + c8 = torch.cat([c7, c4], dim=1) + c9 = self.head_csp_0(c8) + # P3/8 + c10 = self.head_conv_1(c9) + c11 = self.head_upsample_1(c10) # s16->s8 + c12 = torch.cat([c11, c3], dim=1) + c13 = self.head_csp_1(c12) # to det + # p4/16 + c14 = self.head_conv_2(c13) + c15 = torch.cat([c14, c10], dim=1) + c16 = self.head_csp_2(c15) # to det + # p5/32 + c17 = self.head_conv_3(c16) + c18 = torch.cat([c17, c6], dim=1) + c19 = self.head_csp_3(c18) # to det + + # det + pred_s = self.head_det_1(c13)[0] + pred_m = self.head_det_2(c16)[0] + pred_l = self.head_det_3(c19)[0] + + preds = [pred_s, pred_m, pred_l] + obj_pred_list = [] + cls_pred_list = [] + box_pred_list = [] + + for i, pred in enumerate(preds): + # [KA*(1 + C + 4), H, W] -> [KA*1, H, W] -> [H, W, KA*1] -> [HW*KA, 1] + obj_pred_i = pred[:KA, :, :].permute(1, 2, 0).contiguous().view(-1, 1) + # [KA*(1 + C + 4), H, W] -> [KA*C, H, W] -> [H, W, KA*C] -> [HW*KA, C] + cls_pred_i = pred[KA:KA*(1+C), :, :].permute(1, 2, 0).contiguous().view(-1, C) + # [KA*(1 + C + 4), H, W] -> [KA*4, H, W] -> [H, W, KA*4] -> [HW, KA, 4] + reg_pred_i = pred[KA*(1+C):, :, :].permute(1, 2, 0).contiguous().view(-1, KA, 4) + # txty -> xy + if self.center_sample: + xy_pred_i = (reg_pred_i[None, ..., :2].sigmoid() * 2.0 - 1.0 + self.grid_cell[i]) * self.stride[i] + else: + xy_pred_i = (reg_pred_i[None, ..., :2].sigmoid() + self.grid_cell[i]) * self.stride[i] + # twth -> wh + wh_pred_i = reg_pred_i[None, ..., 2:].exp() * self.anchors_wh[i] + # xywh -> x1y1x2y2 + x1y1_pred_i = xy_pred_i - wh_pred_i * 0.5 + x2y2_pred_i = xy_pred_i + wh_pred_i * 0.5 + box_pred_i = torch.cat([x1y1_pred_i, x2y2_pred_i], dim=-1)[0].view(-1, 4) + + obj_pred_list.append(obj_pred_i) + cls_pred_list.append(cls_pred_i) + box_pred_list.append(box_pred_i) + + obj_pred = torch.cat(obj_pred_list, dim=0) + cls_pred = torch.cat(cls_pred_list, dim=0) + box_pred = torch.cat(box_pred_list, dim=0) + + # normalize bbox + bboxes = torch.clamp(box_pred / self.img_size, 0., 1.) + + # scores + scores = torch.sigmoid(obj_pred) * torch.softmax(cls_pred, dim=-1) + + # to cpu + scores = scores.to('cpu').numpy() + bboxes = bboxes.to('cpu').numpy() + + # post-process + bboxes, scores, cls_inds = self.postprocess(bboxes, scores) + + return bboxes, scores, cls_inds + + + def forward(self, x, targets=None): + if not self.trainable: + return self.inference_single_image(x) + else: + B = x.size(0) + KA = self.num_anchors + C = self.num_classes + # backbone + c3, c4, c5 = self.backbone(x) + + # FPN + PAN + # head + c6 = self.head_conv_0(c5) + c7 = self.head_upsample_0(c6) # s32->s16 + c8 = torch.cat([c7, c4], dim=1) + c9 = self.head_csp_0(c8) + # P3/8 + c10 = self.head_conv_1(c9) + c11 = self.head_upsample_1(c10) # s16->s8 + c12 = torch.cat([c11, c3], dim=1) + c13 = self.head_csp_1(c12) # to det + # p4/16 + c14 = self.head_conv_2(c13) + c15 = torch.cat([c14, c10], dim=1) + c16 = self.head_csp_2(c15) # to det + # p5/32 + c17 = self.head_conv_3(c16) + c18 = torch.cat([c17, c6], dim=1) + c19 = self.head_csp_3(c18) # to det + + # det + pred_s = self.head_det_1(c13) + pred_m = self.head_det_2(c16) + pred_l = self.head_det_3(c19) + + preds = [pred_s, pred_m, pred_l] + obj_pred_list = [] + cls_pred_list = [] + box_pred_list = [] + + for i, pred in enumerate(preds): + # [B, KA*(1 + C + 4), H, W] -> [B, KA, H, W] -> [B, H, W, KA] -> [B, HW*KA, 1] + obj_pred_i = pred[:, :KA, :, :].permute(0, 2, 3, 1).contiguous().view(B, -1, 1) + # [B, KA*(1 + C + 4), H, W] -> [B, KA*C, H, W] -> [B, H, W, KA*C] -> [B, H*W*KA, C] + cls_pred_i = pred[:, KA:KA*(1+C), :, :].permute(0, 2, 3, 1).contiguous().view(B, -1, C) + # [B, KA*(1 + C + 4), H, W] -> [B, KA*4, H, W] -> [B, H, W, KA*4] -> [B, HW, KA, 4] + reg_pred_i = pred[:, KA*(1+C):, :, :].permute(0, 2, 3, 1).contiguous().view(B, -1, KA, 4) + # txty -> xy + if self.center_sample: + xy_pred_i = (self.grid_cell[i] + reg_pred_i[..., :2].sigmoid() * 2.0 - 1.0) * self.stride[i] + else: + xy_pred_i = (self.grid_cell[i] + reg_pred_i[..., :2].sigmoid()) * self.stride[i] + # twth -> wh + wh_pred_i = reg_pred_i[..., 2:].exp() * self.anchors_wh[i] + # xywh -> x1y1x2y2 + x1y1_pred_i = xy_pred_i - wh_pred_i * 0.5 + x2y2_pred_i = xy_pred_i + wh_pred_i * 0.5 + box_pred_i = torch.cat([x1y1_pred_i, x2y2_pred_i], dim=-1).view(B, -1, 4) + + obj_pred_list.append(obj_pred_i) + cls_pred_list.append(cls_pred_i) + box_pred_list.append(box_pred_i) + + obj_pred = torch.cat(obj_pred_list, dim=1) + cls_pred = torch.cat(cls_pred_list, dim=1) + box_pred = torch.cat(box_pred_list, dim=1) + + # normalize bbox + box_pred = box_pred / self.img_size + + # compute giou between prediction bbox and target bbox + x1y1x2y2_pred = box_pred.view(-1, 4) + x1y1x2y2_gt = targets[..., 2:6].view(-1, 4) + + # iou: [B, HW,] + if self.cfg['loss_box'] == 'iou': + iou_pred = box_ops.iou_score(x1y1x2y2_pred, x1y1x2y2_gt, batch_size=B) + obj_tgt = iou_pred[..., None].clone().detach().clamp(0.) # [0, 1] + elif self.cfg['loss_box'] == 'giou': + iou_pred = box_ops.giou_score(x1y1x2y2_pred, x1y1x2y2_gt, batch_size=B) + obj_tgt = 0.5 * (iou_pred[..., None].clone().detach() + 1.0) # [-1, 1] -> [0, 1] + elif self.cfg['loss_box'] == 'ciou': + iou_pred = box_ops.ciou_score(x1y1x2y2_pred, x1y1x2y2_gt, batch_size=B) + obj_tgt = iou_pred[..., None].clone().detach().clamp(0.) # [0, 1] + + # we set iou as the target of the objectness + targets = torch.cat([obj_tgt, targets], dim=-1) + + return obj_pred, cls_pred, iou_pred, targets diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/requirements.txt b/PyTorch/contrib/cv/detection/YoloV2-640/requirements.txt new file mode 100644 index 0000000000..4c64801e7f --- /dev/null +++ b/PyTorch/contrib/cv/detection/YoloV2-640/requirements.txt @@ -0,0 +1,19 @@ +torch==1.8.1 + +torch_npu==1.8.1 + +torchvision==0.9.1 + +opencv-python + +thop + +scipy + +matplotlib + +numpy + +pycocotools + +timm diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/test.py b/PyTorch/contrib/cv/detection/YoloV2-640/test.py new file mode 100644 index 0000000000..9c58c234f5 --- /dev/null +++ b/PyTorch/contrib/cv/detection/YoloV2-640/test.py @@ -0,0 +1,233 @@ +import argparse +import cv2 +import os +import time +import numpy as np +import torch + +from config.yolo_config import yolo_config +from data.voc import VOC_CLASSES, VOCDetection +from data.coco import coco_class_index, coco_class_labels, COCODataset +from data.transforms import ValTransforms +from utils.misc import TestTimeAugmentation + +from models.yolo import build_model +import torch_npu + + +parser = argparse.ArgumentParser(description='YOLO Detection') +# basic +parser.add_argument('-size', '--img_size', default=640, type=int, + help='img_size') +parser.add_argument('--show', action='store_true', default=False, + help='show the visulization results.') +parser.add_argument('-vs', '--visual_threshold', default=0.35, type=float, + help='Final confidence threshold') +parser.add_argument('--cuda', action='store_true', default=False, + help='use cuda.') +parser.add_argument('--save_folder', default='det_results/', type=str, + help='Dir to save results') +# model +parser.add_argument('-m', '--model', default='yolov1', + help='yolov1, yolov2, yolov3, yolov3_spp, yolov3_de, ' + 'yolov4, yolo_tiny, yolo_nano') +parser.add_argument('--weight', default='weight/', + type=str, help='Trained state_dict file path to open') +parser.add_argument('--conf_thresh', default=0.1, type=float, + help='NMS threshold') +parser.add_argument('--nms_thresh', default=0.45, type=float, + help='NMS threshold') +parser.add_argument('--center_sample', action='store_true', default=False, + help='center sample trick.') +# dataset +parser.add_argument('--root', default='/mnt/share/ssd2/dataset', + help='data root') +parser.add_argument('-d', '--dataset', default='coco', + help='coco.') +# TTA +parser.add_argument('-tta', '--test_aug', action='store_true', default=False, + help='use test augmentation.') + +args = parser.parse_args() + + + +def plot_bbox_labels(img, bbox, label=None, cls_color=None, text_scale=0.4): + x1, y1, x2, y2 = bbox + x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2) + t_size = cv2.getTextSize(label, 0, fontScale=1, thickness=2)[0] + # plot bbox + cv2.rectangle(img, (x1, y1), (x2, y2), cls_color, 2) + + if label is not None: + # plot title bbox + cv2.rectangle(img, (x1, y1-t_size[1]), (int(x1 + t_size[0] * text_scale), y1), cls_color, -1) + # put the test on the title bbox + cv2.putText(img, label, (int(x1), int(y1 - 5)), 0, text_scale, (0, 0, 0), 1, lineType=cv2.LINE_AA) + + return img + + +def visualize(img, + bboxes, + scores, + cls_inds, + vis_thresh, + class_colors, + class_names, + class_indexs=None, + dataset_name='voc'): + ts = 0.4 + for i, bbox in enumerate(bboxes): + if scores[i] > vis_thresh: + cls_id = int(cls_inds[i]) + if dataset_name == 'coco': + cls_color = class_colors[cls_id] + cls_id = class_indexs[cls_id] + else: + cls_color = class_colors[cls_id] + + if len(class_names) > 1: + mess = '%s: %.2f' % (class_names[cls_id], scores[i]) + else: + cls_color = [255, 0, 0] + mess = None + img = plot_bbox_labels(img, bbox, mess, cls_color, text_scale=ts) + + return img + + +def test(args, + net, + device, + dataset, + transforms=None, + vis_thresh=0.4, + class_colors=None, + class_names=None, + class_indexs=None, + show=False, + test_aug=None, + dataset_name='coco'): + num_images = len(dataset) + save_path = os.path.join('det_results/', args.dataset, args.model) + os.makedirs(save_path, exist_ok=True) + + for index in range(num_images): + print('Testing image {:d}/{:d}....'.format(index+1, num_images)) + image, _ = dataset.pull_image(index) + + h, w, _ = image.shape + size = np.array([[w, h, w, h]]) + + # prepare + x, _, _, scale, offset = transforms(image) + x = x.unsqueeze(0).to(device) + + t0 = time.time() + # forward + # test augmentation: + if test_aug is not None: + bboxes, scores, cls_inds = test_aug(x, net) + else: + # inference + bboxes, scores, cls_inds = net(x) + print("detection time used ", time.time() - t0, "s") + + # rescale + bboxes -= offset + bboxes /= scale + bboxes *= size + + # vis detection + img_processed = visualize( + img=image, + bboxes=bboxes, + scores=scores, + cls_inds=cls_inds, + vis_thresh=vis_thresh, + class_colors=class_colors, + class_names=class_names, + class_indexs=class_indexs, + dataset_name=dataset_name + ) + if show: + cv2.imshow('detection', img_processed) + cv2.waitKey(0) + # save result + cv2.imwrite(os.path.join(save_path, str(index).zfill(6) +'.jpg'), img_processed) + + +if __name__ == '__main__': + args = parser.parse_args() + # cuda + if args.cuda: + print('use cuda') + device = torch.device("npu") + else: + device = torch.device("cpu") + + model_name = args.model + print('Model: ', model_name) + + # dataset and evaluator + if args.dataset == 'voc': + data_dir = os.path.join(args.root, 'VOCdevkit') + class_names = VOC_CLASSES + class_indexs = None + num_classes = 20 + dataset = VOCDetection( + data_dir=data_dir, + img_size=args.img_size, + image_sets=[('2007', 'test')]) + + elif args.dataset == 'coco': + data_dir = os.path.join(args.root, 'COCO') + class_names = coco_class_labels + class_indexs = coco_class_index + num_classes = 80 + dataset = COCODataset( + data_dir=data_dir, + img_size=args.img_size, + image_set='val2017') + + else: + print('unknow dataset !! Only support voc and coco !!') + exit(0) + + np.random.seed(0) + class_colors = [(np.random.randint(255), + np.random.randint(255), + np.random.randint(255)) for _ in range(num_classes)] + + # YOLO Config + cfg = yolo_config[args.model] + # build model + model = build_model(args=args, + cfg=cfg, + device=device, + num_classes=num_classes, + trainable=False) + + # load weight + model.load_state_dict(torch.load(args.weight, map_location='cpu'), strict=False) + model = model.to(device).eval() + print('Finished loading model!') + + # TTA + test_aug = TestTimeAugmentation(num_classes=num_classes) if args.test_aug else None + + + # run + test(args=args, + net=model, + device=device, + dataset=dataset, + transforms=ValTransforms(args.img_size), + vis_thresh=args.visual_threshold, + class_colors=class_colors, + class_names=class_names, + class_indexs=class_indexs, + show=args.show, + test_aug=test_aug, + dataset_name=args.dataset) diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/train-1p.sh b/PyTorch/contrib/cv/detection/YoloV2-640/train-1p.sh new file mode 100644 index 0000000000..b0d155ece7 --- /dev/null +++ b/PyTorch/contrib/cv/detection/YoloV2-640/train-1p.sh @@ -0,0 +1,13 @@ +python3 train3.py \ + --npu \ + -d coco \ + -m yolov2 \ + --root /home/normal58/zhang/zzb_msft \ + --batch_size 16 \ + --lr 0.001 \ + --img_size 640 \ + --max_epoch 200 \ + --lr_epoch 100 150 \ + --multi_scale \ + --multi_scale_range 10 20 \ + --multi_anchor \ diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/train-8p.sh b/PyTorch/contrib/cv/detection/YoloV2-640/train-8p.sh new file mode 100644 index 0000000000..8c1bcdb666 --- /dev/null +++ b/PyTorch/contrib/cv/detection/YoloV2-640/train-8p.sh @@ -0,0 +1,96 @@ +#!/bin/bash +cur_path=`pwd` +cur_path_last_dirname=${cur_path##*/} +if [ x"${cur_path_last_dirname}" == x"test" ];then + test_path_dir=${cur_path} + cd .. + cur_path=`pwd` +else + test_path_dir=${cur_path}/test +fi +#集合通信参数,不需要修改 +export RANK_SIZE=8 +RANK_ID_START=0 +export WORLD_SIZE=8 +#训练开始时间,不需要修改 +start_time=$(date +%s) +#训练batch_size,,需要模型审视修改 +batch_size=32 +#设置环境变量,不需要修改 +RANK_ID=0 +echo "Decive ID: $RANK_ID" +export RANK_ID=$RANK_ID +export ASCEND_DEVICE_ID=$RANK_ID +ASCEND_DEVICE_ID=$RANK_ID +#创建DeviceID输出目录,不需要修改 +if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID} + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID/ckpt +else + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID/ckpt +fi +#执行训练脚本,以下传参不需要修改,其他需要模型审视修改 +export RANK_SIZE=8 + +KERNEL_NUM=$(($(nproc)/8)) +for((RANK_ID=0;RANK_ID ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & + else + python3.7 -m torch.distributed.launch --nproc_per_node=8 train8p.py \ + --npu \ + -d coco \ + -m yolov2 \ + --root /forDocker/dataset \ + --batch_size 32 \ + --lr 0.002 \ + --img_size 640 \ + --max_epoch 200 \ + --lr_epoch 100 150 \ + --multi_scale \ + --multi_scale_range 10 20 \ + --multi_anchor \ + -dist \ + --sybn \ + --num_gpu 8 \ + --local_rank 0 > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & + fi +done + +#8p情况下仅0卡(主节点)有完整日志,因此后续日志提取仅涉及0卡 +ASCEND_DEVICE_ID=0 + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +time=`grep -a 'Epoch ' $test_path_dir/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F "time: " '{print $2}'|awk -F "," '{print $1}'|awk 'END {print}'|sed 's/.$//'` +FPS=`awk 'BEGIN{printf "%.2f\n", '${RANK_SIZE}'*'${batch_size}'/'${time}'}'` +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/train1p.py b/PyTorch/contrib/cv/detection/YoloV2-640/train1p.py new file mode 100644 index 0000000000..4a50a26de8 --- /dev/null +++ b/PyTorch/contrib/cv/detection/YoloV2-640/train1p.py @@ -0,0 +1,545 @@ +from __future__ import division + +import os +import argparse +import time +import math +import random +from copy import deepcopy +import apex +from apex import amp +import torch +import torch_npu +import torch.optim as optim +import torch.backends.cudnn as cudnn +import torch.distributed as dist +from torch.nn.parallel import DistributedDataParallel as DDP +import sys +from config.yolo_config import yolo_config +from data.voc import VOCDetection +from data.coco import COCODataset +from data.transforms import TrainTransforms, ColorTransforms, ValTransforms + +from utils import distributed_utils +from utils import create_labels +from utils.vis import vis_data, vis_targets +from utils.com_flops_params import FLOPs_and_Params +from utils.criterion import build_criterion +from utils.misc import detection_collate +from utils.misc import ModelEMA +from utils.criterion import build_criterion + +from models.yolo import build_model + +from evaluator.cocoapi_evaluator import COCOAPIEvaluator +from evaluator.vocapi_evaluator import VOCAPIEvaluator + +def parse_args(): + parser = argparse.ArgumentParser(description='YOLO Detection') + # basic + parser.add_argument('--npu', action='store_true', default=False, + help='use npu.') + parser.add_argument('--batch_size', default=16, type=int, + help='Batch size for training') + parser.add_argument('--lr', default=1e-3, type=float, + help='initial learning rate') + parser.add_argument('--img_size', type=int, default=640, + help='The upper bound of warm-up') + parser.add_argument('--multi_scale_range', nargs='+', default=[10, 20], type=int, + help='lr epoch to decay') + parser.add_argument('--max_epoch', type=int, default=200, + help='The upper bound of warm-up') + parser.add_argument('--lr_epoch', nargs='+', default=[100, 150], type=int, + help='lr epoch to decay') + parser.add_argument('--wp_epoch', type=int, default=2, + help='The upper bound of warm-up') + parser.add_argument('--start_epoch', type=int, default=0, + help='start epoch to train') + parser.add_argument('-r', '--resume', default=None, type=str, + help='keep training') + parser.add_argument('--num_workers', default=8, type=int, + help='Number of workers used in dataloading') + parser.add_argument('--num_gpu', default=1, type=int, + help='Number of GPUs to train') + parser.add_argument('--eval_epoch', type=int, + default=10, help='interval between evaluations') + parser.add_argument('--tfboard', action='store_true', default=False, + help='use tensorboard') + parser.add_argument('--save_folder', default='weights/', type=str, + help='path to save weight') + parser.add_argument('--vis_data', action='store_true', default=False, + help='visualize images and labels.') + parser.add_argument('--vis_targets', action='store_true', default=False, + help='visualize assignment.') + + # Optimizer & Schedule + parser.add_argument('--optimizer', default='NpuFusedSGD', type=str, + help='sgd, adamw') + parser.add_argument('--lr_schedule', default='step', type=str, + help='step, cos') + parser.add_argument('--grad_clip', default=None, type=float, + help='clip gradient') + + # model + parser.add_argument('-m', '--model', default='yolov1', + help='yolov1, yolov2, yolov3, yolov3_spp, yolov3_de, ' + 'yolov4, yolo_tiny, yolo_nano') + parser.add_argument('--conf_thresh', default=0.001, type=float, + help='NMS threshold') + parser.add_argument('--nms_thresh', default=0.5, type=float, + help='NMS threshold') + + # dataset + parser.add_argument('--root', default='/mnt/share/ssd2/dataset', + help='data root') + parser.add_argument('-d', '--dataset', default='coco', + help='coco, widerface, crowdhuman') + + # Loss + parser.add_argument('--loss_obj_weight', default=1.0, type=float, + help='weight of obj loss') + parser.add_argument('--loss_cls_weight', default=1.0, type=float, + help='weight of cls loss') + parser.add_argument('--loss_reg_weight', default=1.0, type=float, + help='weight of reg loss') + parser.add_argument('--scale_loss', default='batch', type=str, + help='scale loss: batch or positive samples') + + # train trick + parser.add_argument('--no_warmup', action='store_true', default=False, + help='do not use warmup') + parser.add_argument('-ms', '--multi_scale', action='store_true', default=False, + help='use multi-scale trick') + parser.add_argument('--ema', action='store_true', default=False, + help='use ema training trick') + parser.add_argument('--mosaic', action='store_true', default=False, + help='use Mosaic Augmentation trick') + parser.add_argument('--mixup', action='store_true', default=False, + help='use MixUp Augmentation trick') + parser.add_argument('--multi_anchor', action='store_true', default=False, + help='use multiple anchor boxes as the positive samples') + parser.add_argument('--center_sample', action='store_true', default=False, + help='use center sample for labels') + parser.add_argument('--accumulate', type=int, default=1, + help='accumulate gradient') + # DDP train + parser.add_argument('-dist', '--distributed', action='store_true', default=False, + help='distributed training') + parser.add_argument('--local_rank', type=int, default=0, + help='local_rank') + parser.add_argument('--sybn', action='store_true', default=False, + help='use sybn.') + parser.add_argument('--opt-level', default='O2', type=str, + help='loss scale using in amp, default O1') + + return parser.parse_args() + + +def train(): + args = parse_args() + os.environ['MASTER_ADDR'] = 'localhost' + os.environ['MASTER_PORT'] = '12345' + + # torch.npu.set_compile_mode(jit_compile=False) + option = {} + option["ACL_OP_COMPILER_CACHE_MODE"]="enable" + option["ACL_OP_COMPILER_CACHE_DIR"]="./kernel_meta" + option["NPU_FUZZY_COMPILE_BLACKLIST"] = "Maximum,Conv2D,BNInfer,BNTrainingReduceGrad,Cast" + print("option:",option) + # torch.npu.set_option(option) + print("Setting Arguments.. : ", args) + print("----------------------------------------------------------") + + # path to save model + path_to_save = os.path.join(args.save_folder, args.dataset, args.model) + os.makedirs(path_to_save, exist_ok=True) + + # set distributed + local_rank = 0 + if args.distributed: + dist.init_process_group(backend="hccl", #init_method="env://" + ) + local_rank = torch.distributed.get_rank() + print(local_rank) + torch_npu.npu.set_device(local_rank) + + # cuda + if args.npu: + print('use npu') + cudnn.benchmark = True + device = torch.device("npu") + else: + device = torch.device("cpu") + + # YOLO config + cfg = yolo_config[args.model] + train_size = val_size = args.img_size + + # dataset and evaluator + dataset, evaluator, num_classes = build_dataset(args, train_size, val_size, device) + # dataloader + dataloader = build_dataloader(args, dataset, detection_collate) + # criterioin + criterion = build_criterion(args, cfg, num_classes) + + print('Training model on:', args.dataset) + print('The dataset size:', len(dataset)) + print("----------------------------------------------------------") + + # build model + net = build_model(args=args, + cfg=cfg, + device=device, + num_classes=num_classes, + trainable=True) + model = net + + # SyncBatchNorm + # if args.sybn and args.npu and args.num_gpu > 1: + # print('use SyncBatchNorm ...') + # model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model) + + model = model.to(device).train() + # compute FLOPs and Params + # if local_rank == 0: + # model_copy = deepcopy(model) + # model_copy.trainable = False + # model_copy.eval() + # FLOPs_and_Params(model=model_copy, size=train_size) + # model_copy.trainable = True + # model_copy.train() + # keep training + if args.resume is not None: + print('keep training model: %s' % (args.resume)) + model.load_state_dict(torch.load(args.resume, map_location=device)) + + # EMA + ema = ModelEMA(model) if args.ema else None + # use tfboard + tblogger = None + if args.tfboard: + print('use tensorboard') + from torch.utils.tensorboard import SummaryWriter + c_time = time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())) + log_path = os.path.join('log/', args.dataset, c_time) + os.makedirs(log_path, exist_ok=True) + + tblogger = SummaryWriter(log_path) + # optimizer setup + base_lr = args.lr + tmp_lr = args.lr + if args.optimizer == 'NpuFusedSGD': + print('use SGD with momentum ...') + optimizer = apex.optimizers.NpuFusedSGD(model.parameters(), lr=args.lr, momentum=0.9) + # optimizer = optim.SGD(model.parameters(), + # lr=tmp_lr, + # momentum=0.9, + # weight_decay=5e-4) + elif args.optimizer == 'adamw': + print('use AdamW ...') + optimizer = optim.AdamW(model.parameters(), + lr=tmp_lr, + weight_decay=5e-4) + + model, optimizer = amp.initialize(model, optimizer, opt_level='O1', loss_scale=128.0,combine_grad=True) + + # DDP + if args.distributed and args.num_gpu > 1: + print('using DDP ...') + model = DDP(model, device_ids=[local_rank], output_device=local_rank, broadcast_buffers=False) + + + + + batch_size = args.batch_size + epoch_size = len(dataset) // (batch_size * args.num_gpu) + best_map = -100. + warmup = not args.no_warmup + + t0 = time.time() + # start training loop + for epoch in range(args.start_epoch, args.max_epoch): + if args.distributed: + dataloader.sampler.set_epoch(epoch) + + # use step lr decay + if args.lr_schedule == 'step': + if epoch in args.lr_epoch: + tmp_lr = tmp_lr * 0.1 + set_lr(optimizer, tmp_lr) + # use cos lr decay + elif args.lr_schedule == 'cos' and not warmup: + T_max = args.max_epoch - 15 + lr_min = base_lr * 0.1 * 0.1 + if epoch > T_max: + # Cos decay is done + print('Cosine annealing is over !!') + args.lr_schedule == None + tmp_lr = lr_min + set_lr(optimizer, tmp_lr) + else: + tmp_lr = lr_min + 0.5*(base_lr - lr_min)*(1 + math.cos(math.pi*epoch / T_max)) + set_lr(optimizer, tmp_lr) + fps_sum=0 + # train one epoch + # pre_flag = False + # start_time = time.time() + for iter_i, (images, targets) in enumerate(dataloader): + # if iter_i == 5: + # start_time = time.time() + # with torch.autograd.profiler.profile(use_npu=True) as prof: + ni = iter_i + epoch * epoch_size + # warmup + if epoch < args.wp_epoch and warmup: + nw = args.wp_epoch * epoch_size + tmp_lr = base_lr * pow(ni / nw, 4) + set_lr(optimizer, tmp_lr) + + elif epoch == args.wp_epoch and iter_i == 0 and warmup: + # warmup is over + print('Warmup is over !!') + warmup = False + tmp_lr = base_lr + set_lr(optimizer, tmp_lr) + + # multi-scale trick + if iter_i % 10 == 0 and iter_i > 0 and args.multi_scale: + # randomly choose a new size + r = args.multi_scale_range + train_size = random.randint(r[0], r[1]) * 32 + model.set_grid(train_size) + if args.multi_scale: + # interpolate + images = torch.nn.functional.interpolate( + input=images, + size=train_size, + mode='bilinear', + align_corners=False) + + targets = [label.tolist() for label in targets] + # visualize target + if args.vis_data: + vis_data(images, targets) + continue + # make labels + targets = create_labels.gt_creator( + img_size=train_size, + strides=net.stride, + label_lists=targets, + anchor_size=cfg["anchor_size"], + multi_anchor=args.multi_anchor, + center_sample=args.center_sample) + # visualize assignment + if args.vis_targets: + vis_targets(images, targets, cfg["anchor_size"], net.stride) + continue + + # to device + images = images.to(device) + targets = targets.to(device) + + # inference + pred_obj, pred_cls, pred_iou, targets = model(images, targets=targets) + + # compute loss + loss_obj, loss_cls, loss_reg, total_loss = criterion(pred_obj, pred_cls, pred_iou, targets) + + # check loss + if torch.isnan(total_loss): + continue + + loss_dict = dict( + loss_obj=loss_obj, + loss_cls=loss_cls, + loss_reg=loss_reg, + total_loss=total_loss + ) + loss_dict_reduced = distributed_utils.reduce_loss_dict(loss_dict) + + total_loss = total_loss / args.accumulate + # Backward and Optimize + with amp.scale_loss(total_loss , optimizer) as scaled_loss: + scaled_loss.backward() + if ni % args.accumulate == 0: + if args.grad_clip is not None: + torch.nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip) + optimizer.step() + optimizer.zero_grad() + + if args.ema: + ema.update(model) + + # display + # if iter_i % 10 == 0: + if args.tfboard: + # viz loss + tblogger.add_scalar('loss obj', loss_dict_reduced['loss_obj'].item(), ni) + tblogger.add_scalar('loss cls', loss_dict_reduced['loss_cls'].item(), ni) + tblogger.add_scalar('loss reg', loss_dict_reduced['loss_reg'].item(), ni) + + t1 = time.time() + print('[Epoch %d/%d][Iter %d/%d][lr %.6f][Loss: obj %.2f || cls %.2f || reg %.2f || size %d || time: %.2f]' + % (epoch+1, + args.max_epoch, + iter_i, + epoch_size, + tmp_lr, + loss_dict['loss_obj'].item(), + loss_dict['loss_cls'].item(), + loss_dict['loss_reg'].item(), + train_size, + t1-t0), + flush=True) + fps_sum = fps_sum + (batch_size*8 / (t1 - t0)) + t0 = time.time() + # if local_rank in [-1, 0]: + # epoch_time = time.time() - start_time + # if iter_i >= 5: + # print('Training speed is {} FPS'.format(batch_size * 8 * (iter_i + 1 - 5) / (epoch_time))) + # else: + # print('Training speed is {} FPS'.format(batch_size * 8 * (iter_i + 1) / (epoch_time))) + if iter_i > 0 and iter_i == 461: + fps_avg = fps_sum / 461 + print("fps:",fps_avg) + fps_sum = 0 + + # evaluation + if (epoch + 1) % args.eval_epoch == 0 or (epoch + 1) == args.max_epoch: + if evaluator is None: + print('No evaluator ...') + print('Saving state, epoch:', epoch + 1) + torch.save(model_eval.state_dict(), os.path.join(path_to_save, + args.model + '_' + repr(epoch + 1) + '.pth')) + print('Keep training ...') + else: + print('eval ...') + # check ema + if args.ema: + model_eval = ema.ema + else: + model_eval = model.module if args.distributed else model + + # set eval mode + model_eval.trainable = False + model_eval.set_grid(val_size) + model_eval.eval() + + if local_rank == 0: + # evaluate + evaluator.evaluate(model_eval) + + cur_map = evaluator.map + if cur_map > best_map: + # update best-map + best_map = cur_map + # save model + print('Saving state, epoch:', epoch + 1) + torch.save(model_eval.state_dict(), os.path.join(path_to_save, + args.model + '_' + repr(epoch + 1) + '_' + str(round(best_map*100, 2)) + '.pth')) + if args.tfboard: + if args.dataset == 'voc': + tblogger.add_scalar('07test/mAP', evaluator.map, epoch) + elif args.dataset == 'coco': + tblogger.add_scalar('val/AP50_95', evaluator.ap50_95, epoch) + tblogger.add_scalar('val/AP50', evaluator.ap50, epoch) + + if args.distributed: + # wait for all processes to synchronize + dist.barrier() + + # set train mode. + model_eval.trainable = True + model_eval.set_grid(train_size) + model_eval.train() + + # close mosaic augmentation + if args.mosaic and args.max_epoch - epoch == 15: + print('close Mosaic Augmentation ...') + dataloader.dataset.mosaic = False + # close mixup augmentation + if args.mixup and args.max_epoch - epoch == 15: + print('close Mixup Augmentation ...') + dataloader.dataset.mixup = False + + if args.tfboard: + tblogger.close() + + +def build_dataset(args, train_size, val_size, device): + if args.dataset == 'voc': + data_dir = os.path.join(args.root, 'VOCdevkit') + num_classes = 20 + dataset = VOCDetection( + data_dir=data_dir, + img_size=train_size, + transform=TrainTransforms(train_size), + color_augment=ColorTransforms(train_size), + mosaic=args.mosaic, + mixup=args.mixup) + + evaluator = VOCAPIEvaluator( + data_dir=data_dir, + img_size=val_size, + device=device, + transform=ValTransforms(val_size)) + + elif args.dataset == 'coco': + data_dir = os.path.join(args.root, 'COCO') + num_classes = 80 + dataset = COCODataset( + data_dir=data_dir, + img_size=train_size, + image_set='train2017', + transform=TrainTransforms(train_size), + color_augment=ColorTransforms(train_size), + mosaic=args.mosaic, + mixup=args.mixup) + + evaluator = COCOAPIEvaluator( + data_dir=data_dir, + img_size=val_size, + device=device, + transform=ValTransforms(val_size) + ) + + else: + print('unknow dataset !! Only support voc and coco !!') + exit(0) + + return dataset, evaluator, num_classes + + +def build_dataloader(args, dataset, collate_fn=None): + # distributed + if args.distributed and args.num_gpu > 1: + # dataloader + dataloader = torch.utils.data.DataLoader( + dataset=dataset, + batch_size=args.batch_size, + collate_fn=collate_fn, + num_workers=args.num_workers, + pin_memory=True, + sampler=torch.utils.data.distributed.DistributedSampler(dataset) + ) + + else: + # dataloader + dataloader = torch.utils.data.DataLoader( + dataset=dataset, + shuffle=True, + batch_size=args.batch_size, + collate_fn=collate_fn, + num_workers=args.num_workers, + pin_memory=True + ) + return dataloader + + +def set_lr(optimizer, lr): + for param_group in optimizer.param_groups: + param_group['lr'] = lr + + +if __name__ == '__main__': + train() + diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/train8p.py b/PyTorch/contrib/cv/detection/YoloV2-640/train8p.py new file mode 100644 index 0000000000..b34ec2f26f --- /dev/null +++ b/PyTorch/contrib/cv/detection/YoloV2-640/train8p.py @@ -0,0 +1,545 @@ +from __future__ import division + +import os +import argparse +import time +import math +import random +from copy import deepcopy +import apex +from apex import amp +import torch +import torch_npu +import torch.optim as optim +import torch.backends.cudnn as cudnn +import torch.distributed as dist +from torch.nn.parallel import DistributedDataParallel as DDP +import sys +from config.yolo_config import yolo_config +from data.voc import VOCDetection +from data.coco import COCODataset +from data.transforms import TrainTransforms, ColorTransforms, ValTransforms + +from utils import distributed_utils +from utils import create_labels +from utils.vis import vis_data, vis_targets +from utils.com_flops_params import FLOPs_and_Params +from utils.criterion import build_criterion +from utils.misc import detection_collate +from utils.misc import ModelEMA +from utils.criterion import build_criterion + +from models.yolo import build_model + +from evaluator.cocoapi_evaluator import COCOAPIEvaluator +from evaluator.vocapi_evaluator import VOCAPIEvaluator + +def parse_args(): + parser = argparse.ArgumentParser(description='YOLO Detection') + # basic + parser.add_argument('--npu', action='store_true', default=False, + help='use npu.') + parser.add_argument('--batch_size', default=16, type=int, + help='Batch size for training') + parser.add_argument('--lr', default=1e-3, type=float, + help='initial learning rate') + parser.add_argument('--img_size', type=int, default=640, + help='The upper bound of warm-up') + parser.add_argument('--multi_scale_range', nargs='+', default=[10, 20], type=int, + help='lr epoch to decay') + parser.add_argument('--max_epoch', type=int, default=200, + help='The upper bound of warm-up') + parser.add_argument('--lr_epoch', nargs='+', default=[100, 150], type=int, + help='lr epoch to decay') + parser.add_argument('--wp_epoch', type=int, default=2, + help='The upper bound of warm-up') + parser.add_argument('--start_epoch', type=int, default=0, + help='start epoch to train') + parser.add_argument('-r', '--resume', default=None, type=str, + help='keep training') + parser.add_argument('--num_workers', default=8, type=int, + help='Number of workers used in dataloading') + parser.add_argument('--num_gpu', default=1, type=int, + help='Number of GPUs to train') + parser.add_argument('--eval_epoch', type=int, + default=10, help='interval between evaluations') + parser.add_argument('--tfboard', action='store_true', default=False, + help='use tensorboard') + parser.add_argument('--save_folder', default='weights/', type=str, + help='path to save weight') + parser.add_argument('--vis_data', action='store_true', default=False, + help='visualize images and labels.') + parser.add_argument('--vis_targets', action='store_true', default=False, + help='visualize assignment.') + + # Optimizer & Schedule + parser.add_argument('--optimizer', default='NpuFusedSGD', type=str, + help='sgd, adamw') + parser.add_argument('--lr_schedule', default='step', type=str, + help='step, cos') + parser.add_argument('--grad_clip', default=None, type=float, + help='clip gradient') + + # model + parser.add_argument('-m', '--model', default='yolov1', + help='yolov1, yolov2, yolov3, yolov3_spp, yolov3_de, ' + 'yolov4, yolo_tiny, yolo_nano') + parser.add_argument('--conf_thresh', default=0.001, type=float, + help='NMS threshold') + parser.add_argument('--nms_thresh', default=0.5, type=float, + help='NMS threshold') + + # dataset + parser.add_argument('--root', default='/mnt/share/ssd2/dataset', + help='data root') + parser.add_argument('-d', '--dataset', default='coco', + help='coco, widerface, crowdhuman') + + # Loss + parser.add_argument('--loss_obj_weight', default=1.0, type=float, + help='weight of obj loss') + parser.add_argument('--loss_cls_weight', default=1.0, type=float, + help='weight of cls loss') + parser.add_argument('--loss_reg_weight', default=1.0, type=float, + help='weight of reg loss') + parser.add_argument('--scale_loss', default='batch', type=str, + help='scale loss: batch or positive samples') + + # train trick + parser.add_argument('--no_warmup', action='store_true', default=False, + help='do not use warmup') + parser.add_argument('-ms', '--multi_scale', action='store_true', default=False, + help='use multi-scale trick') + parser.add_argument('--ema', action='store_true', default=False, + help='use ema training trick') + parser.add_argument('--mosaic', action='store_true', default=False, + help='use Mosaic Augmentation trick') + parser.add_argument('--mixup', action='store_true', default=False, + help='use MixUp Augmentation trick') + parser.add_argument('--multi_anchor', action='store_true', default=False, + help='use multiple anchor boxes as the positive samples') + parser.add_argument('--center_sample', action='store_true', default=False, + help='use center sample for labels') + parser.add_argument('--accumulate', type=int, default=1, + help='accumulate gradient') + # DDP train + parser.add_argument('-dist', '--distributed', action='store_true', default=False, + help='distributed training') + parser.add_argument('--local_rank', type=int, default=0, + help='local_rank') + parser.add_argument('--sybn', action='store_true', default=False, + help='use sybn.') + parser.add_argument('--opt-level', default='O2', type=str, + help='loss scale using in amp, default O1') + + return parser.parse_args() + + +def train(): + args = parse_args() + os.environ['MASTER_ADDR'] = 'localhost' + os.environ['MASTER_PORT'] = '12345' + + # torch.npu.set_compile_mode(jit_compile=False) + option = {} + option["ACL_OP_COMPILER_CACHE_MODE"]="enable" + option["ACL_OP_COMPILER_CACHE_DIR"]="./kernel_meta" + option["NPU_FUZZY_COMPILE_BLACKLIST"] = "Maximum,Conv2D,BNInfer,BNTrainingReduceGrad,Cast" + print("option:",option) + # torch.npu.set_option(option) + print("Setting Arguments.. : ", args) + print("----------------------------------------------------------") + + # path to save model + path_to_save = os.path.join(args.save_folder, args.dataset, args.model) + os.makedirs(path_to_save, exist_ok=True) + + # set distributed + local_rank = 0 + if args.distributed: + dist.init_process_group(backend="hccl", #init_method="env://" + ) + local_rank = torch.distributed.get_rank() + print(local_rank) + torch_npu.npu.set_device(local_rank) + + # cuda + if args.npu: + print('use npu') + cudnn.benchmark = True + device = torch.device("npu") + else: + device = torch.device("cpu") + + # YOLO config + cfg = yolo_config[args.model] + train_size = val_size = args.img_size + + # dataset and evaluator + dataset, evaluator, num_classes = build_dataset(args, train_size, val_size, device) + # dataloader + dataloader = build_dataloader(args, dataset, detection_collate) + # criterioin + criterion = build_criterion(args, cfg, num_classes) + + print('Training model on:', args.dataset) + print('The dataset size:', len(dataset)) + print("----------------------------------------------------------") + + # build model + net = build_model(args=args, + cfg=cfg, + device=device, + num_classes=num_classes, + trainable=True) + model = net + + # SyncBatchNorm + # if args.sybn and args.npu and args.num_gpu > 1: + # print('use SyncBatchNorm ...') + # model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model) + + model = model.to(device).train() + # compute FLOPs and Params + # if local_rank == 0: + # model_copy = deepcopy(model) + # model_copy.trainable = False + # model_copy.eval() + # FLOPs_and_Params(model=model_copy, size=train_size) + # model_copy.trainable = True + # model_copy.train() + # keep training + if args.resume is not None: + print('keep training model: %s' % (args.resume)) + model.load_state_dict(torch.load(args.resume, map_location=device)) + + # EMA + ema = ModelEMA(model) if args.ema else None + # use tfboard + tblogger = None + if args.tfboard: + print('use tensorboard') + from torch.utils.tensorboard import SummaryWriter + c_time = time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())) + log_path = os.path.join('log/', args.dataset, c_time) + os.makedirs(log_path, exist_ok=True) + + tblogger = SummaryWriter(log_path) + # optimizer setup + base_lr = args.lr + tmp_lr = args.lr + if args.optimizer == 'NpuFusedSGD': + print('use SGD with momentum ...') + optimizer = apex.optimizers.NpuFusedSGD(model.parameters(), lr=args.lr, momentum=0.9) + # optimizer = optim.SGD(model.parameters(), + # lr=tmp_lr, + # momentum=0.9, + # weight_decay=5e-4) + elif args.optimizer == 'adamw': + print('use AdamW ...') + optimizer = optim.AdamW(model.parameters(), + lr=tmp_lr, + weight_decay=5e-4) + + model, optimizer = amp.initialize(model, optimizer, opt_level='O1', loss_scale=128.0,combine_grad=True) + + # DDP + if args.distributed and args.num_gpu > 1: + print('using DDP ...') + model = DDP(model, device_ids=[local_rank], output_device=local_rank, broadcast_buffers=False) + + + + + batch_size = args.batch_size + epoch_size = len(dataset) // (batch_size * args.num_gpu) + best_map = -100. + warmup = not args.no_warmup + + t0 = time.time() + # start training loop + for epoch in range(args.start_epoch, args.max_epoch): + if args.distributed: + dataloader.sampler.set_epoch(epoch) + + # use step lr decay + if args.lr_schedule == 'step': + if epoch in args.lr_epoch: + tmp_lr = tmp_lr * 0.1 + set_lr(optimizer, tmp_lr) + # use cos lr decay + elif args.lr_schedule == 'cos' and not warmup: + T_max = args.max_epoch - 15 + lr_min = base_lr * 0.1 * 0.1 + if epoch > T_max: + # Cos decay is done + print('Cosine annealing is over !!') + args.lr_schedule == None + tmp_lr = lr_min + set_lr(optimizer, tmp_lr) + else: + tmp_lr = lr_min + 0.5*(base_lr - lr_min)*(1 + math.cos(math.pi*epoch / T_max)) + set_lr(optimizer, tmp_lr) + fps_sum=0 + # train one epoch + # pre_flag = False + # start_time = time.time() + for iter_i, (images, targets) in enumerate(dataloader): + # if iter_i == 5: + # start_time = time.time() + # with torch.autograd.profiler.profile(use_npu=True) as prof: + ni = iter_i + epoch * epoch_size + # warmup + if epoch < args.wp_epoch and warmup: + nw = args.wp_epoch * epoch_size + tmp_lr = base_lr * pow(ni / nw, 4) + set_lr(optimizer, tmp_lr) + + elif epoch == args.wp_epoch and iter_i == 0 and warmup: + # warmup is over + print('Warmup is over !!') + warmup = False + tmp_lr = base_lr + set_lr(optimizer, tmp_lr) + + # multi-scale trick + if iter_i % 10 == 0 and iter_i > 0 and args.multi_scale: + # randomly choose a new size + r = args.multi_scale_range + train_size = random.randint(r[0], r[1]) * 32 + model.module.set_grid(train_size) + if args.multi_scale: + # interpolate + images = torch.nn.functional.interpolate( + input=images, + size=train_size, + mode='bilinear', + align_corners=False) + + targets = [label.tolist() for label in targets] + # visualize target + if args.vis_data: + vis_data(images, targets) + continue + # make labels + targets = create_labels.gt_creator( + img_size=train_size, + strides=net.stride, + label_lists=targets, + anchor_size=cfg["anchor_size"], + multi_anchor=args.multi_anchor, + center_sample=args.center_sample) + # visualize assignment + if args.vis_targets: + vis_targets(images, targets, cfg["anchor_size"], net.stride) + continue + + # to device + images = images.to(device) + targets = targets.to(device) + + # inference + pred_obj, pred_cls, pred_iou, targets = model(images, targets=targets) + + # compute loss + loss_obj, loss_cls, loss_reg, total_loss = criterion(pred_obj, pred_cls, pred_iou, targets) + + # check loss + if torch.isnan(total_loss): + continue + + loss_dict = dict( + loss_obj=loss_obj, + loss_cls=loss_cls, + loss_reg=loss_reg, + total_loss=total_loss + ) + loss_dict_reduced = distributed_utils.reduce_loss_dict(loss_dict) + + total_loss = total_loss / args.accumulate + # Backward and Optimize + with amp.scale_loss(total_loss , optimizer) as scaled_loss: + scaled_loss.backward() + if ni % args.accumulate == 0: + if args.grad_clip is not None: + torch.nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip) + optimizer.step() + optimizer.zero_grad() + + if args.ema: + ema.update(model) + + # display + # if iter_i % 10 == 0: + if args.tfboard: + # viz loss + tblogger.add_scalar('loss obj', loss_dict_reduced['loss_obj'].item(), ni) + tblogger.add_scalar('loss cls', loss_dict_reduced['loss_cls'].item(), ni) + tblogger.add_scalar('loss reg', loss_dict_reduced['loss_reg'].item(), ni) + + t1 = time.time() + print('[Epoch %d/%d][Iter %d/%d][lr %.6f][Loss: obj %.2f || cls %.2f || reg %.2f || size %d || time: %.2f]' + % (epoch+1, + args.max_epoch, + iter_i, + epoch_size, + tmp_lr, + loss_dict['loss_obj'].item(), + loss_dict['loss_cls'].item(), + loss_dict['loss_reg'].item(), + train_size, + t1-t0), + flush=True) + fps_sum = fps_sum + (batch_size*8 / (t1 - t0)) + t0 = time.time() + # if local_rank in [-1, 0]: + # epoch_time = time.time() - start_time + # if iter_i >= 5: + # print('Training speed is {} FPS'.format(batch_size * 8 * (iter_i + 1 - 5) / (epoch_time))) + # else: + # print('Training speed is {} FPS'.format(batch_size * 8 * (iter_i + 1) / (epoch_time))) + if iter_i > 0 and iter_i == 461: + fps_avg = fps_sum / 461 + print("fps:",fps_avg) + fps_sum = 0 + + # evaluation + if (epoch + 1) % args.eval_epoch == 0 or (epoch + 1) == args.max_epoch: + if evaluator is None: + print('No evaluator ...') + print('Saving state, epoch:', epoch + 1) + torch.save(model_eval.state_dict(), os.path.join(path_to_save, + args.model + '_' + repr(epoch + 1) + '.pth')) + print('Keep training ...') + else: + print('eval ...') + # check ema + if args.ema: + model_eval = ema.ema + else: + model_eval = model.module if args.distributed else model + + # set eval mode + model_eval.trainable = False + model_eval.set_grid(val_size) + model_eval.eval() + + if local_rank == 0: + # evaluate + evaluator.evaluate(model_eval) + + cur_map = evaluator.map + if cur_map > best_map: + # update best-map + best_map = cur_map + # save model + print('Saving state, epoch:', epoch + 1) + torch.save(model_eval.state_dict(), os.path.join(path_to_save, + args.model + '_' + repr(epoch + 1) + '_' + str(round(best_map*100, 2)) + '.pth')) + if args.tfboard: + if args.dataset == 'voc': + tblogger.add_scalar('07test/mAP', evaluator.map, epoch) + elif args.dataset == 'coco': + tblogger.add_scalar('val/AP50_95', evaluator.ap50_95, epoch) + tblogger.add_scalar('val/AP50', evaluator.ap50, epoch) + + if args.distributed: + # wait for all processes to synchronize + dist.barrier() + + # set train mode. + model_eval.trainable = True + model_eval.set_grid(train_size) + model_eval.train() + + # close mosaic augmentation + if args.mosaic and args.max_epoch - epoch == 15: + print('close Mosaic Augmentation ...') + dataloader.dataset.mosaic = False + # close mixup augmentation + if args.mixup and args.max_epoch - epoch == 15: + print('close Mixup Augmentation ...') + dataloader.dataset.mixup = False + + if args.tfboard: + tblogger.close() + + +def build_dataset(args, train_size, val_size, device): + if args.dataset == 'voc': + data_dir = os.path.join(args.root, 'VOCdevkit') + num_classes = 20 + dataset = VOCDetection( + data_dir=data_dir, + img_size=train_size, + transform=TrainTransforms(train_size), + color_augment=ColorTransforms(train_size), + mosaic=args.mosaic, + mixup=args.mixup) + + evaluator = VOCAPIEvaluator( + data_dir=data_dir, + img_size=val_size, + device=device, + transform=ValTransforms(val_size)) + + elif args.dataset == 'coco': + data_dir = os.path.join(args.root, 'COCO') + num_classes = 80 + dataset = COCODataset( + data_dir=data_dir, + img_size=train_size, + image_set='train2017', + transform=TrainTransforms(train_size), + color_augment=ColorTransforms(train_size), + mosaic=args.mosaic, + mixup=args.mixup) + + evaluator = COCOAPIEvaluator( + data_dir=data_dir, + img_size=val_size, + device=device, + transform=ValTransforms(val_size) + ) + + else: + print('unknow dataset !! Only support voc and coco !!') + exit(0) + + return dataset, evaluator, num_classes + + +def build_dataloader(args, dataset, collate_fn=None): + # distributed + if args.distributed and args.num_gpu > 1: + # dataloader + dataloader = torch.utils.data.DataLoader( + dataset=dataset, + batch_size=args.batch_size, + collate_fn=collate_fn, + num_workers=args.num_workers, + pin_memory=True, + sampler=torch.utils.data.distributed.DistributedSampler(dataset) + ) + + else: + # dataloader + dataloader = torch.utils.data.DataLoader( + dataset=dataset, + shuffle=True, + batch_size=args.batch_size, + collate_fn=collate_fn, + num_workers=args.num_workers, + pin_memory=True + ) + return dataloader + + +def set_lr(optimizer, lr): + for param_group in optimizer.param_groups: + param_group['lr'] = lr + + +if __name__ == '__main__': + train() + diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/train_yolonano.sh b/PyTorch/contrib/cv/detection/YoloV2-640/train_yolonano.sh new file mode 100644 index 0000000000..60e0967766 --- /dev/null +++ b/PyTorch/contrib/cv/detection/YoloV2-640/train_yolonano.sh @@ -0,0 +1,15 @@ +python train.py \ + --cuda \ + -d coco \ + -m yolo_nano \ + --root /home/zzb/PyTorch_YOLO-Family-master \ + --batch_size 64 \ + --lr 0.001 \ + --img_size 512 \ + --max_epoch 160 \ + --lr_epoch 100 130 \ + --multi_scale \ + --multi_scale_range 10 16 \ + --multi_anchor \ + --ema + \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/train_yolov1.sh b/PyTorch/contrib/cv/detection/YoloV2-640/train_yolov1.sh new file mode 100644 index 0000000000..f02d280e1c --- /dev/null +++ b/PyTorch/contrib/cv/detection/YoloV2-640/train_yolov1.sh @@ -0,0 +1,16 @@ +python train.py \ + --cuda \ + -d coco \ + -m yolov1 \ + --root /home/zzb/PyTorch_YOLO-Family-master \ + --batch_size 16 \ + --lr 0.001 \ + --img_size 640 \ + --max_epoch 200 \ + --lr_epoch 100 150 \ + --multi_scale \ + --multi_scale_range 10 20 \ + --ema +FPS=`grep FPS $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F "FPS:" '{print $2}'|tail -n +2|awk '{sum+=$1} END {print"",sum/NR}'|sed s/[[:space:]]//g` +#FPS=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'*'${perf}'}'` +echo "Final Performance images/sec : $FPS" \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/train_yolov3.sh b/PyTorch/contrib/cv/detection/YoloV2-640/train_yolov3.sh new file mode 100644 index 0000000000..f6fc52db37 --- /dev/null +++ b/PyTorch/contrib/cv/detection/YoloV2-640/train_yolov3.sh @@ -0,0 +1,15 @@ +python train.py \ + --npu \ + -d coco \ + -m yolov3 \ + --root /home/zzb/PyTorch_YOLO-Family-master \ + --batch_size 16 \ + --lr 0.001 \ + --img_size 640 \ + --max_epoch 200 \ + --lr_epoch 100 150 \ + --multi_scale \ + --multi_scale_range 10 20 \ + --multi_anchor \ + --ema + \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/train_yolov3_de.sh b/PyTorch/contrib/cv/detection/YoloV2-640/train_yolov3_de.sh new file mode 100644 index 0000000000..0c4bc871b5 --- /dev/null +++ b/PyTorch/contrib/cv/detection/YoloV2-640/train_yolov3_de.sh @@ -0,0 +1,15 @@ +python train.py \ + --cuda \ + -d coco \ + -m yolov3_de \ + --root /home/zzb/PyTorch_YOLO-Family-master \ + --batch_size 16 \ + --lr 0.001 \ + --img_size 640 \ + --max_epoch 200 \ + --lr_epoch 100 150 \ + --multi_scale \ + --multi_scale_range 10 20 \ + --multi_anchor \ + --ema + \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/train_yolov3_spp.sh b/PyTorch/contrib/cv/detection/YoloV2-640/train_yolov3_spp.sh new file mode 100644 index 0000000000..0494d09827 --- /dev/null +++ b/PyTorch/contrib/cv/detection/YoloV2-640/train_yolov3_spp.sh @@ -0,0 +1,15 @@ +python train.py \ + --cuda \ + -d coco \ + -m yolov3_spp \ + --root /home/zzb/PyTorch_YOLO-Family-master \ + --batch_size 16 \ + --lr 0.001 \ + --img_size 640 \ + --max_epoch 200 \ + --lr_epoch 100 150 \ + --multi_scale \ + --multi_scale_range 10 20 \ + --multi_anchor \ + --ema + \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/train_yolov4.sh b/PyTorch/contrib/cv/detection/YoloV2-640/train_yolov4.sh new file mode 100644 index 0000000000..2258bb927b --- /dev/null +++ b/PyTorch/contrib/cv/detection/YoloV2-640/train_yolov4.sh @@ -0,0 +1,19 @@ +python train.py \ + --cuda \ + -d coco \ + -m yolov4 \ + --root /home/zzb/PyTorch_YOLO-Family-master \ + --batch_size 16 \ + --lr 0.001 \ + --img_size 608 \ + --max_epoch 250 \ + --lr_epoch 130 180 \ + --multi_scale \ + --multi_scale_range 10 19 \ + --scale_loss batch \ + --accumulate 1 \ + --mosaic \ + --mixup \ + --multi_anchor \ + --ema + \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/utils/__init__.py b/PyTorch/contrib/cv/detection/YoloV2-640/utils/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/utils/box_ops.py b/PyTorch/contrib/cv/detection/YoloV2-640/utils/box_ops.py new file mode 100644 index 0000000000..6176d23478 --- /dev/null +++ b/PyTorch/contrib/cv/detection/YoloV2-640/utils/box_ops.py @@ -0,0 +1,101 @@ +import math +import torch_npu +import torch + + +def iou_score(bboxes_a, bboxes_b, batch_size): + """ + Input:\n + bboxes_a : [B*N, 4] = [x1, y1, x2, y2] \n + bboxes_b : [B*N, 4] = [x1, y1, x2, y2] \n + + Output:\n + iou : [B, N] = [iou, ...] \n + """ + tl = torch.max(bboxes_a[:, :2], bboxes_b[:, :2]) + br = torch.min(bboxes_a[:, 2:], bboxes_b[:, 2:]) + area_a = torch.prod(bboxes_a[:, 2:] - bboxes_a[:, :2], 1) + area_b = torch.prod(bboxes_b[:, 2:] - bboxes_b[:, :2], 1) + + en = (tl < br).type(tl.type()).prod(dim=1) + area_i = torch.prod(br - tl, 1) * en # * ((tl < br).all()) + iou = area_i / (area_a + area_b - area_i + 1e-14) + + return iou.view(batch_size, -1) + + +def giou_score(bboxes_a, bboxes_b, batch_size): + """ + bbox_1 : [B*N, 4] = [x1, y1, x2, y2] + bbox_2 : [B*N, 4] = [x1, y1, x2, y2] + """ + # iou + tl = torch.max(bboxes_a[:, :2], bboxes_b[:, :2]) + br = torch.min(bboxes_a[:, 2:], bboxes_b[:, 2:]) + area_a = torch.prod(bboxes_a[:, 2:] - bboxes_a[:, :2], 1) + area_b = torch.prod(bboxes_b[:, 2:] - bboxes_b[:, :2], 1) + + en = (tl < br).float().prod(dim=1) + #en = (tl < br).type(tl.type()).prod(dim=1) + area_i = torch.prod(br - tl, 1) * en # * ((tl < br).all()) + area_u = area_a + area_b - area_i + iou = (area_i / (area_u + 1e-14)).clamp(0) + + # giou + tl = torch.min(bboxes_a[:, :2], bboxes_b[:, :2]) + br = torch.max(bboxes_a[:, 2:], bboxes_b[:, 2:]) + en = (tl < br).float().prod(dim=1) + #en = (tl < br).type(tl.type()).prod(dim=1) + area_c = torch.prod(br - tl, 1) * en # * ((tl < br).all()) + + giou = (iou - (area_c - area_u) / (area_c + 1e-14)) + + return giou.view(batch_size, -1) + + +def ciou_score(bboxes_a, bboxes_b, batch_size): + """ + Input:\n + bboxes_a : [B*N, 4] = [x1, y1, x2, y2] \n + bboxes_b : [B*N, 4] = [x1, y1, x2, y2] \n + + Output:\n + iou : [B, N] = [ciou, ...] \n + """ + tl = torch.max(bboxes_a[:, :2], bboxes_b[:, :2]) + br = torch.min(bboxes_a[:, 2:], bboxes_b[:, 2:]) + area_a = torch.prod(bboxes_a[:, 2:] - bboxes_a[:, :2], 1) + area_b = torch.prod(bboxes_b[:, 2:] - bboxes_b[:, :2], 1) + + en = (tl < br).type(tl.type()).prod(dim=1) + area_i = torch.prod(br - tl, 1) * en # * ((tl < br).all()) + iou = area_i / (area_a + area_b - area_i + 1e-7) + + cw = torch.max(bboxes_a[..., 2], bboxes_b[..., 2]) - torch.min(bboxes_a[..., 0], bboxes_b[..., 0]) + ch = torch.max(bboxes_a[..., 3], bboxes_b[..., 3]) - torch.min(bboxes_a[..., 1], bboxes_b[..., 1]) + + c2 = cw ** 2 + ch ** 2 + 1e-7 + rho2 = ((bboxes_b[..., 0] + bboxes_b[..., 2] - bboxes_a[..., 0] - bboxes_a[..., 2]) ** 2 + + (bboxes_b[..., 1] + bboxes_b[..., 3] - bboxes_a[..., 1] - bboxes_a[..., 3]) ** 2) / 4 + w1 = bboxes_a[..., 2] - bboxes_a[..., 0] + h1 = bboxes_a[..., 3] - bboxes_a[..., 1] + w2 = bboxes_b[..., 2] - bboxes_b[..., 0] + h2 = bboxes_b[..., 3] - bboxes_b[..., 1] + v = (4 / math.pi ** 2) * torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2) + with torch.no_grad(): + alpha = v / (v - iou + (1. + 1e-7)) + + ciou = iou - (rho2 / c2 + v * alpha) + + return ciou.view(batch_size, -1) + + +if __name__ == '__main__': + bboxes_a = torch.tensor([[10, 10, 20, 20]]) + bboxes_b = torch.tensor([[13, 15, 27, 25]]) + iou = iou_score(bboxes_a, bboxes_b, 1) + print(iou) + giou = giou_score(bboxes_a, bboxes_b, 1) + print(giou) + ciou = ciou_score(bboxes_a, bboxes_b, 1) + print(ciou) diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/utils/com_flops_params.py b/PyTorch/contrib/cv/detection/YoloV2-640/utils/com_flops_params.py new file mode 100644 index 0000000000..be7efad152 --- /dev/null +++ b/PyTorch/contrib/cv/detection/YoloV2-640/utils/com_flops_params.py @@ -0,0 +1,17 @@ +import torch +from thop import profile +import torch_npu +import torch.nn.functional as F + + +def FLOPs_and_Params(model, size): + device = model.device + x = torch.randn(1, 3, size, size).to(device) + + flops, params = profile(model, inputs=(x, )) + print('FLOPs : ', flops / 1e9, ' B') + print('Params : ', params / 1e6, ' M') + + +if __name__ == "__main__": + pass diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/utils/create_labels.py b/PyTorch/contrib/cv/detection/YoloV2-640/utils/create_labels.py new file mode 100644 index 0000000000..3b32df3134 --- /dev/null +++ b/PyTorch/contrib/cv/detection/YoloV2-640/utils/create_labels.py @@ -0,0 +1,240 @@ +import numpy as np +import torch +import torch_npu + + +def compute_iou(anchor_boxes, gt_box): + """ + Input: + anchor_boxes : ndarray -> [[xc_s, yc_s, anchor_w, anchor_h], ..., [xc_s, yc_s, anchor_w, anchor_h]]. + gt_box : ndarray -> [xc_s, yc_s, anchor_w, anchor_h]. + Output: + iou : ndarray -> [iou_1, iou_2, ..., iou_m], and m is equal to the number of anchor boxes. + """ + # compute the iou between anchor box and gt box + # First, change [xc_s, yc_s, anchor_w, anchor_h] -> [x1, y1, x2, y2] + # anchor box : + ab_x1y1_x2y2 = np.zeros([len(anchor_boxes), 4]) + ab_x1y1_x2y2[:, 0] = anchor_boxes[:, 0] - anchor_boxes[:, 2] / 2 # x1 + ab_x1y1_x2y2[:, 1] = anchor_boxes[:, 1] - anchor_boxes[:, 3] / 2 # y1 + ab_x1y1_x2y2[:, 2] = anchor_boxes[:, 0] + anchor_boxes[:, 2] / 2 # x2 + ab_x1y1_x2y2[:, 3] = anchor_boxes[:, 1] + anchor_boxes[:, 3] / 2 # y2 + w_ab, h_ab = anchor_boxes[:, 2], anchor_boxes[:, 3] + + # gt_box : + # We need to expand gt_box(ndarray) to the shape of anchor_boxes(ndarray), in order to compute IoU easily. + gt_box_expand = np.repeat(gt_box, len(anchor_boxes), axis=0) + + gb_x1y1_x2y2 = np.zeros([len(anchor_boxes), 4]) + gb_x1y1_x2y2[:, 0] = gt_box_expand[:, 0] - gt_box_expand[:, 2] / 2 # x1 + gb_x1y1_x2y2[:, 1] = gt_box_expand[:, 1] - gt_box_expand[:, 3] / 2 # y1 + gb_x1y1_x2y2[:, 2] = gt_box_expand[:, 0] + gt_box_expand[:, 2] / 2 # x2 + gb_x1y1_x2y2[:, 3] = gt_box_expand[:, 1] + gt_box_expand[:, 3] / 2 # y1 + w_gt, h_gt = gt_box_expand[:, 2], gt_box_expand[:, 3] + + # Then we compute IoU between anchor_box and gt_box + S_gt = w_gt * h_gt + S_ab = w_ab * h_ab + I_w = np.minimum(gb_x1y1_x2y2[:, 2], ab_x1y1_x2y2[:, 2]) - np.maximum(gb_x1y1_x2y2[:, 0], ab_x1y1_x2y2[:, 0]) + I_h = np.minimum(gb_x1y1_x2y2[:, 3], ab_x1y1_x2y2[:, 3]) - np.maximum(gb_x1y1_x2y2[:, 1], ab_x1y1_x2y2[:, 1]) + S_I = I_h * I_w + U = S_gt + S_ab - S_I + 1e-20 + IoU = S_I / U + + return IoU + + +def set_anchors(anchor_size): + """ + Input: + anchor_size : list -> [[h_1, w_1], [h_2, w_2], ..., [h_n, w_n]]. + Output: + anchor_boxes : ndarray -> [[0, 0, anchor_w, anchor_h], + [0, 0, anchor_w, anchor_h], + ... + [0, 0, anchor_w, anchor_h]]. + """ + num_anchors = len(anchor_size) + anchor_boxes = np.zeros([num_anchors, 4]) + for index, size in enumerate(anchor_size): + anchor_w, anchor_h = size + anchor_boxes[index] = np.array([0, 0, anchor_w, anchor_h]) + + return anchor_boxes + + +def label_assignment_with_anchorbox(anchor_size, target_boxes, num_anchors, strides, multi_anchor=False): + # prepare + anchor_boxes = set_anchors(anchor_size) + gt_box = np.array([[0, 0, target_boxes[2], target_boxes[3]]]) + + # compute IoU + iou = compute_iou(anchor_boxes, gt_box) + + label_assignment_results = [] + if multi_anchor: + # We consider those anchor boxes whose IoU is more than 0.5, + iou_mask = (iou > 0.5) + if iou_mask.sum() == 0: + # We assign the anchor box with highest IoU score. + iou_ind = np.argmax(iou) + + # scale_ind, anchor_ind = index // num_scale, index % num_scale + scale_ind = iou_ind // num_anchors + anchor_ind = iou_ind - scale_ind * num_anchors + + # get the corresponding stride + stride = strides[scale_ind] + + # compute the grid cell + xc_s = target_boxes[0] / stride + yc_s = target_boxes[1] / stride + grid_x = int(xc_s) + grid_y = int(yc_s) + + label_assignment_results.append([grid_x, grid_y, scale_ind, anchor_ind]) + else: + for iou_ind, iou_m in enumerate(iou_mask): + if iou_m: + # scale_ind, anchor_ind = index // num_scale, index % num_scale + scale_ind = iou_ind // num_anchors + anchor_ind = iou_ind - scale_ind * num_anchors + + # get the corresponding stride + stride = strides[scale_ind] + + # compute the gride cell + xc_s = target_boxes[0] / stride + yc_s = target_boxes[1] / stride + grid_x = int(xc_s) + grid_y = int(yc_s) + + label_assignment_results.append([grid_x, grid_y, scale_ind, anchor_ind]) + + else: + # We assign the anchor box with highest IoU score. + iou_ind = np.argmax(iou) + + # scale_ind, anchor_ind = index // num_scale, index % num_scale + scale_ind = iou_ind // num_anchors + anchor_ind = iou_ind - scale_ind * num_anchors + + # get the corresponding stride + stride = strides[scale_ind] + + # compute the grid cell + xc_s = target_boxes[0] / stride + yc_s = target_boxes[1] / stride + grid_x = int(xc_s) + grid_y = int(yc_s) + + label_assignment_results.append([grid_x, grid_y, scale_ind, anchor_ind]) + + return label_assignment_results + + +def label_assignment_without_anchorbox(target_boxes, strides): + # no anchor box + scale_ind = 0 + anchor_ind = 0 + + label_assignment_results = [] + # get the corresponding stride + stride = strides[scale_ind] + + # compute the grid cell + xc_s = target_boxes[0] / stride + yc_s = target_boxes[1] / stride + grid_x = int(xc_s) + grid_y = int(yc_s) + + label_assignment_results.append([grid_x, grid_y, scale_ind, anchor_ind]) + + return label_assignment_results + + +def gt_creator(img_size, strides, label_lists, anchor_size=None, multi_anchor=False, center_sample=False): + """creator gt""" + # prepare + batch_size = len(label_lists) + img_h = img_w = img_size + num_scale = len(strides) + gt_tensor = [] + KA = len(anchor_size) // num_scale if anchor_size is not None else 1 + + for s in strides: + fmp_h, fmp_w = img_h // s, img_w // s + # [B, H, W, KA, obj+cls+box+scale] + gt_tensor.append(np.zeros([batch_size, fmp_h, fmp_w, KA, 1+1+4+1])) + + # generate gt datas + for bi in range(batch_size): + label = label_lists[bi] + for box_cls in label: + # get a bbox coords + cls_id = int(box_cls[-1]) + x1, y1, x2, y2 = box_cls[:-1] + # [x1, y1, x2, y2] -> [xc, yc, bw, bh] + xc = (x2 + x1) / 2 * img_w + yc = (y2 + y1) / 2 * img_h + bw = (x2 - x1) * img_w + bh = (y2 - y1) * img_h + target_boxes = [xc, yc, bw, bh] + box_scale = 2.0 - (bw / img_w) * (bh / img_h) + + # check label + if bw < 1. or bh < 1.: + # print('A dirty data !!!') + continue + + # label assignment + if anchor_size is not None: + # use anchor box + label_assignment_results = label_assignment_with_anchorbox( + anchor_size=anchor_size, + target_boxes=target_boxes, + num_anchors=KA, + strides=strides, + multi_anchor=multi_anchor) + else: + # no anchor box + label_assignment_results = label_assignment_without_anchorbox( + target_boxes=target_boxes, + strides=strides) + + # make labels + for result in label_assignment_results: + grid_x, grid_y, scale_ind, anchor_ind = result + + if center_sample: + # We consider four grid points near the center point + for j in range(grid_y, grid_y+2): + for i in range(grid_x, grid_x+2): + if (j >= 0 and j < gt_tensor[scale_ind].shape[1]) and (i >= 0 and i < gt_tensor[scale_ind].shape[2]): + gt_tensor[scale_ind][bi, j, i, anchor_ind, 0] = 1.0 + gt_tensor[scale_ind][bi, j, i, anchor_ind, 1] = cls_id + gt_tensor[scale_ind][bi, j, i, anchor_ind, 2:6] = np.array([x1, y1, x2, y2]) + gt_tensor[scale_ind][bi, j, i, anchor_ind, 6] = box_scale + else: + # We ongly consider top-left grid point near the center point + if (grid_y >= 0 and grid_y < gt_tensor[scale_ind].shape[1]) and (grid_x >= 0 and grid_x < gt_tensor[scale_ind].shape[2]): + gt_tensor[scale_ind][bi, grid_y, grid_x, anchor_ind, 0] = 1.0 + gt_tensor[scale_ind][bi, grid_y, grid_x, anchor_ind, 1] = cls_id + gt_tensor[scale_ind][bi, grid_y, grid_x, anchor_ind, 2:6] = np.array([x1, y1, x2, y2]) + gt_tensor[scale_ind][bi, grid_y, grid_x, anchor_ind, 6] = box_scale + + gt_tensor = [gt.reshape(batch_size, -1, 1+1+4+1) for gt in gt_tensor] + gt_tensor = np.concatenate(gt_tensor, axis=1) + + return torch.from_numpy(gt_tensor).float() + + +if __name__ == "__main__": + gt_box = np.array([[0.0, 0.0, 10, 10]]) + anchor_boxes = np.array([[0.0, 0.0, 10, 10], + [0.0, 0.0, 4, 4], + [0.0, 0.0, 8, 8], + [0.0, 0.0, 16, 16] + ]) + iou = compute_iou(anchor_boxes, gt_box) + print(iou) diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/utils/criterion.py b/PyTorch/contrib/cv/detection/YoloV2-640/utils/criterion.py new file mode 100644 index 0000000000..fd07debdb9 --- /dev/null +++ b/PyTorch/contrib/cv/detection/YoloV2-640/utils/criterion.py @@ -0,0 +1,192 @@ +import torch.nn as nn +import torch.nn.functional as F +import torch_npu + + +class MSEWithLogitsLoss(nn.Module): + def __init__(self, reduction='mean'): + super().__init__() + self.reduction = reduction + + def forward(self, logits, targets, target_pos): + inputs = logits.sigmoid() + # mse loss + loss = F.mse_loss(input=inputs, + target=targets, + reduction="none") + pos_loss = loss * target_pos * 5.0 + neg_loss = loss * (1.0 - target_pos) * 1.0 + loss = pos_loss + neg_loss + + if self.reduction == 'mean': + loss = loss.mean() + + elif self.reduction == 'sum': + loss = loss.sum() + + return loss + + +class BCEWithLogitsLoss(nn.Module): + def __init__(self, pos_weight=1.0, neg_weight=0.25, reduction='mean'): + super().__init__() + self.pos_weight = pos_weight + self.neg_weight = neg_weight + self.reduction = reduction + + def forward(self, logits, targets, target_pos): + # bce loss + loss = F.binary_cross_entropy_with_logits(input=logits, target=targets, reduction="none") + pos_loss = loss * target_pos * self.pos_weight + neg_loss = loss * (1.0 - target_pos) * self.neg_weight + loss = pos_loss + neg_loss + + if self.reduction == 'mean': + loss = loss.mean() + + elif self.reduction == 'sum': + loss = loss.sum() + + return loss + + +class Criterion(nn.Module): + def __init__(self, + args, + cfg, + loss_obj_weight=1.0, + loss_cls_weight=1.0, + loss_reg_weight=1.0, + num_classes=80): + super().__init__() + self.args = args + self.num_classes = num_classes + self.loss_obj_weight = loss_obj_weight + self.loss_cls_weight = loss_cls_weight + self.loss_reg_weight = loss_reg_weight + + # objectness loss + try: + if cfg['loss_obj'] == 'mse': + self.obj_loss_f = MSEWithLogitsLoss(reduction='none') + elif cfg['loss_obj'] == 'bce': + self.obj_loss_f = BCEWithLogitsLoss(reduction='none') + except: + self.obj_loss_f = MSEWithLogitsLoss(reduction='none') + # class loss + self.cls_loss_f = nn.CrossEntropyLoss(reduction='none') + + + def loss_objectness(self, pred_obj, target_obj, target_pos): + """ + pred_obj: (FloatTensor) [B, HW, 1] + target_obj: (FloatTensor) [B, HW,] + target_pos: (FloatTensor) [B, HW,] + """ + # obj loss: [B, HW,] + loss_obj = self.obj_loss_f(pred_obj[..., 0], target_obj, target_pos) + + if self.args.scale_loss == 'batch': + # scale loss by batch size + batch_size = pred_obj.size(0) + loss_obj = loss_obj.sum() / batch_size + elif self.args.scale_loss == 'positive': + # scale loss by number of positive samples + num_pos = target_pos.sum().clamp(1.0) + loss_obj = loss_obj.sum() / num_pos + + return loss_obj + + + def loss_class(self, pred_cls, target_cls, target_pos): + """ + pred_cls: (FloatTensor) [B, HW, C] + target_cls: (LongTensor) [B, HW,] + target_pos: (FloatTensor) [B, HW,] + """ + # [B, HW, C] -> [B, C, HW] + pred_cls = pred_cls.permute(0, 2, 1) + # reg loss: [B, HW, ] + loss_cls = self.cls_loss_f(pred_cls, target_cls) + # valid loss. Here we only compute the loss of positive samples + loss_cls = loss_cls * target_pos + + if self.args.scale_loss == 'batch': + # scale loss by batch size + batch_size = pred_cls.size(0) + loss_cls = loss_cls.sum() / batch_size + elif self.args.scale_loss == 'positive': + # scale loss by number of positive samples + num_pos = target_pos.sum().clamp(1.0) + loss_cls = loss_cls.sum() / num_pos + + return loss_cls + + + def loss_bbox(self, pred_iou, target_pos, target_scale): + """ + pred_iou: (FloatTensor) [B, HW, ] + target_pos: (FloatTensor) [B, HW,] + target_scale: (FloatTensor) [B, HW,] + """ + + # bbox loss: [B, HW,] + loss_reg = 1. - pred_iou + loss_reg = loss_reg * target_scale + # valid loss. Here we only compute the loss of positive samples + loss_reg = loss_reg * target_pos + + if self.args.scale_loss == 'batch': + # scale loss by batch size + batch_size = pred_iou.size(0) + loss_reg = loss_reg.sum() / batch_size + elif self.args.scale_loss == 'positive': + # scale loss by number of positive samples + num_pos = target_pos.sum().clamp(1.0) + loss_reg = loss_reg.sum() / num_pos + + return loss_reg + + + def forward(self, pred_obj, pred_cls, pred_iou, targets): + """ + pred_obj: (Tensor) [B, HW, 1] + pred_cls: (Tensor) [B, HW, C] + pred_iou: (Tensor) [B, HW,] + targets: (Tensor) [B, HW, 1+1+1+4] + """ + # groundtruth + target_obj = targets[..., 0].float() # [B, HW,] + target_pos = targets[..., 1].float() # [B, HW,] + target_cls = targets[..., 2].long() # [B, HW,] + target_scale = targets[..., -1].float() # [B, HW,] + + # objectness loss + loss_obj = self.loss_objectness(pred_obj, target_obj, target_pos) + + # class loss + loss_cls = self.loss_class(pred_cls, target_cls, target_pos) + + # regression loss + loss_reg = self.loss_bbox(pred_iou, target_pos, target_scale) + + # total loss + losses = self.loss_obj_weight * loss_obj + \ + self.loss_cls_weight * loss_cls + \ + self.loss_reg_weight * loss_reg + + return loss_obj, loss_cls, loss_reg, losses + + +def build_criterion(args, cfg, num_classes=80): + criterion = Criterion(args=args, + cfg=cfg, + loss_obj_weight=args.loss_obj_weight, + loss_cls_weight=args.loss_cls_weight, + loss_reg_weight=args.loss_reg_weight, + num_classes=num_classes) + return criterion + + +if __name__ == "__main__": + pass diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/utils/distributed_utils.py b/PyTorch/contrib/cv/detection/YoloV2-640/utils/distributed_utils.py new file mode 100644 index 0000000000..33333f4d2c --- /dev/null +++ b/PyTorch/contrib/cv/detection/YoloV2-640/utils/distributed_utils.py @@ -0,0 +1,77 @@ +# from github: https://github.com/ruinmessi/ASFF/blob/master/utils/distributed_util.py + +import torch +import time +import torch_npu + +def get_world_size(): + if not torch.distributed.is_initialized(): + return 1 + return torch.distributed.get_world_size() + + +def get_rank(): + if not torch.distributed.is_initialized(): + return 0 + return torch.distributed.get_rank() + + +def is_main_process(): + if not torch.distributed.is_initialized(): + return True + return torch.distributed.get_rank() == 0 + + +def synchronize(): + """ + Helper function to synchronize between multiple processes when + using distributed training + """ + if not torch.distributed.is_initialized(): + return + world_size = torch.distributed.get_world_size() + rank = torch.distributed.get_rank() + if world_size == 1: + return + + def _send_and_wait(r): + if rank == r: + tensor = torch.tensor(0, device="npu") + else: + tensor = torch.tensor(1, device="npu") + torch.distributed.broadcast(tensor, r) + while tensor.item() == 1: + time.sleep(1) + + _send_and_wait(0) + # now sync on the main process + _send_and_wait(1) + + +def reduce_loss_dict(loss_dict): + """ + Reduce the loss dictionary from all processes so that process with rank + 0 has the averaged results. Returns a dict with the same fields as + loss_dict, after reduction. + """ + world_size = get_world_size() + if world_size < 2: + return loss_dict + with torch.no_grad(): + loss_names = [] + all_losses = [] + for k in sorted(loss_dict.keys()): + loss_names.append(k) + if len(loss_dict[k].size()) == 0: + all_losses.append(loss_dict[k].unsqueeze(0)) + else: + all_losses.append(loss_dict[k]) + + all_losses = torch.stack(all_losses, dim=0) + torch.distributed.reduce(all_losses, dst=0) + if torch.distributed.get_rank() == 0: + # only main process gets accumulated, so only divide by + # world_size in this case + all_losses /= world_size + reduced_losses = {k: v for k, v in zip(loss_names, all_losses)} + return reduced_losses \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/utils/fuse_conv_bn.py b/PyTorch/contrib/cv/detection/YoloV2-640/utils/fuse_conv_bn.py new file mode 100644 index 0000000000..97794e3b0e --- /dev/null +++ b/PyTorch/contrib/cv/detection/YoloV2-640/utils/fuse_conv_bn.py @@ -0,0 +1,55 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +import torch_npu + + +def _fuse_conv_bn(conv, bn): + """Fuse conv and bn into one module. + Args: + conv (nn.Module): Conv to be fused. + bn (nn.Module): BN to be fused. + Returns: + nn.Module: Fused module. + """ + conv_w = conv.weight + conv_b = conv.bias if conv.bias is not None else torch.zeros_like( + bn.running_mean) + + factor = bn.weight / torch.sqrt(bn.running_var + bn.eps) + conv.weight = nn.Parameter(conv_w * + factor.reshape([conv.out_channels, 1, 1, 1])) + conv.bias = nn.Parameter((conv_b - bn.running_mean) * factor + bn.bias) + return conv + + +def fuse_conv_bn(module): + """Recursively fuse conv and bn in a module. + During inference, the functionary of batch norm layers is turned off + but only the mean and var alone channels are used, which exposes the + chance to fuse it with the preceding conv layers to save computations and + simplify network structures. + Args: + module (nn.Module): Module to be fused. + Returns: + nn.Module: Fused module. + """ + last_conv = None + last_conv_name = None + + for name, child in module.named_children(): + if isinstance(child, + (nn.modules.batchnorm._BatchNorm, nn.SyncBatchNorm)): + if last_conv is None: # only fuse BN that is after Conv + continue + fused_conv = _fuse_conv_bn(last_conv, child) + module._modules[last_conv_name] = fused_conv + # To reduce changes, set BN as Identity instead of deleting it. + module._modules[name] = nn.Identity() + last_conv = None + elif isinstance(child, nn.Conv2d): + last_conv = child + last_conv_name = name + else: + fuse_conv_bn(child) + return module \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/utils/kmeans_anchor.py b/PyTorch/contrib/cv/detection/YoloV2-640/utils/kmeans_anchor.py new file mode 100644 index 0000000000..2c8a0c10eb --- /dev/null +++ b/PyTorch/contrib/cv/detection/YoloV2-640/utils/kmeans_anchor.py @@ -0,0 +1,230 @@ +import numpy as np +import random +import argparse +import os +import sys +sys.path.append('..') + +from data.voc import VOCDetection +from data.coco import COCODataset + + +def parse_args(): + parser = argparse.ArgumentParser(description='kmeans for anchor box') + parser.add_argument('--root', default='/mnt/share/ssd2/dataset', + help='data root') + parser.add_argument('-d', '--dataset', default='coco', + help='coco, widerface, crowdhuman') + parser.add_argument('-na', '--num_anchorbox', default=9, type=int, + help='number of anchor box.') + parser.add_argument('-size', '--img_size', default=512, type=int, + help='input size.') + return parser.parse_args() + +args = parse_args() + + +class Box(): + def __init__(self, x, y, w, h): + self.x = x + self.y = y + self.w = w + self.h = h + + +def iou(box1, box2): + x1, y1, w1, h1 = box1.x, box1.y, box1.w, box1.h + x2, y2, w2, h2 = box2.x, box2.y, box2.w, box2.h + + S_1 = w1 * h1 + S_2 = w2 * h2 + + xmin_1, ymin_1 = x1 - w1 / 2, y1 - h1 / 2 + xmax_1, ymax_1 = x1 + w1 / 2, y1 + h1 / 2 + xmin_2, ymin_2 = x2 - w2 / 2, y2 - h2 / 2 + xmax_2, ymax_2 = x2 + w2 / 2, y2 + h2 / 2 + + I_w = min(xmax_1, xmax_2) - max(xmin_1, xmin_2) + I_h = min(ymax_1, ymax_2) - max(ymin_1, ymin_2) + if I_w < 0 or I_h < 0: + return 0 + I = I_w * I_h + + IoU = I / (S_1 + S_2 - I) + + return IoU + + +def init_centroids(boxes, n_anchors): + """ + We use kmeans++ to initialize centroids. + """ + centroids = [] + boxes_num = len(boxes) + + centroid_index = int(np.random.choice(boxes_num, 1)[0]) + centroids.append(boxes[centroid_index]) + print(centroids[0].w,centroids[0].h) + + for centroid_index in range(0, n_anchors-1): + sum_distance = 0 + distance_thresh = 0 + distance_list = [] + cur_sum = 0 + + for box in boxes: + min_distance = 1 + for centroid_i, centroid in enumerate(centroids): + distance = (1 - iou(box, centroid)) + if distance < min_distance: + min_distance = distance + sum_distance += min_distance + distance_list.append(min_distance) + + distance_thresh = sum_distance * np.random.random() + + for i in range(0, boxes_num): + cur_sum += distance_list[i] + if cur_sum > distance_thresh: + centroids.append(boxes[i]) + print(boxes[i].w, boxes[i].h) + break + return centroids + + +def do_kmeans(n_anchors, boxes, centroids): + loss = 0 + groups = [] + new_centroids = [] + # for box in centroids: + # print('box: ', box.x, box.y, box.w, box.h) + # exit() + for i in range(n_anchors): + groups.append([]) + new_centroids.append(Box(0, 0, 0, 0)) + + for box in boxes: + min_distance = 1 + group_index = 0 + for centroid_index, centroid in enumerate(centroids): + distance = (1 - iou(box, centroid)) + if distance < min_distance: + min_distance = distance + group_index = centroid_index + groups[group_index].append(box) + loss += min_distance + new_centroids[group_index].w += box.w + new_centroids[group_index].h += box.h + + for i in range(n_anchors): + new_centroids[i].w /= max(len(groups[i]), 1) + new_centroids[i].h /= max(len(groups[i]), 1) + + return new_centroids, groups, loss# / len(boxes) + + +def anchor_box_kmeans(total_gt_boxes, n_anchors, loss_convergence, iters, plus=True): + """ + This function will use k-means to get appropriate anchor boxes for train dataset. + Input: + total_gt_boxes: + n_anchor : int -> the number of anchor boxes. + loss_convergence : float -> threshold of iterating convergence. + iters: int -> the number of iterations for training kmeans. + Output: anchor_boxes : list -> [[w1, h1], [w2, h2], ..., [wn, hn]]. + """ + boxes = total_gt_boxes + centroids = [] + if plus: + centroids = init_centroids(boxes, n_anchors) + else: + total_indexs = range(len(boxes)) + sample_indexs = random.sample(total_indexs, n_anchors) + for i in sample_indexs: + centroids.append(boxes[i]) + + # iterate k-means + centroids, groups, old_loss = do_kmeans(n_anchors, boxes, centroids) + iterations = 1 + while(True): + centroids, groups, loss = do_kmeans(n_anchors, boxes, centroids) + iterations += 1 + print("Loss = %f" % loss) + if abs(old_loss - loss) < loss_convergence or iterations > iters: + break + old_loss = loss + + for centroid in centroids: + print(centroid.w, centroid.h) + + print("k-means result : ") + for centroid in centroids: + print("w, h: ", round(centroid.w, 2), round(centroid.h, 2), + "area: ", round(centroid.w, 2) * round(centroid.h, 2)) + + return centroids + + +if __name__ == "__main__": + + n_anchors = args.num_anchorbox + img_size = args.img_size + dataset = args.dataset + + loss_convergence = 1e-6 + iters_n = 1000 + + dataset_voc = VOCDetection(data_dir=os.path.join(args.root, 'VOCdevkit'), + img_size=img_size) + + dataset_coco = COCODataset(data_dir=os.path.join(args.root, 'COCO'), + img_size=img_size) + + boxes = [] + print("The dataset size: ", len(dataset)) + print("Loading the dataset ...") + # VOC + for i in range(len(dataset_voc)): + if i % 5000 == 0: + print('Loading voc data [%d / %d]' % (i+1, len(dataset_voc))) + + # For VOC + img, _ = dataset_voc.pull_image(i) + w, h = img.shape[1], img.shape[0] + _, annotation = dataset_voc.pull_anno(i) + + # prepare bbox datas + for box_and_label in annotation: + box = box_and_label[:-1] + xmin, ymin, xmax, ymax = box + bw = (xmax - xmin) / max(w, h) * img_size + bh = (ymax - ymin) / max(w, h) * img_size + # check bbox + if bw < 1.0 or bh < 1.0: + continue + boxes.append(Box(0, 0, bw, bh)) + + # COCO + for i in range(len(dataset_coco)): + if i % 5000 == 0: + print('Loading coco datat [%d / %d]' % (i+1, len(dataset_coco))) + + # For COCO + img, _ = dataset_coco.pull_image(i) + w, h = img.shape[1], img.shape[0] + annotation = dataset_coco.pull_anno(i) + + # prepare bbox datas + for box_and_label in annotation: + box = box_and_label[:-1] + xmin, ymin, xmax, ymax = box + bw = (xmax - xmin) / max(w, h) * img_size + bh = (ymax - ymin) / max(w, h) * img_size + # check bbox + if bw < 1.0 or bh < 1.0: + continue + boxes.append(Box(0, 0, bw, bh)) + + print("Number of all bboxes: ", len(boxes)) + print("Start k-means !") + centroids = anchor_box_kmeans(boxes, n_anchors, loss_convergence, iters_n, plus=True) diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/utils/misc.py b/PyTorch/contrib/cv/detection/YoloV2-640/utils/misc.py new file mode 100644 index 0000000000..583cefd71f --- /dev/null +++ b/PyTorch/contrib/cv/detection/YoloV2-640/utils/misc.py @@ -0,0 +1,149 @@ +import torch +import torch.nn as nn +import numpy as np +import math +from copy import deepcopy +import torch_npu + + +def nms(dets, scores, nms_thresh=0.4): + """"Pure Python NMS baseline.""" + x1 = dets[:, 0] #xmin + y1 = dets[:, 1] #ymin + x2 = dets[:, 2] #xmax + y2 = dets[:, 3] #ymax + + areas = (x2 - x1) * (y2 - y1) # the size of bbox + order = scores.argsort()[::-1] # sort bounding boxes by decreasing order + + keep = [] # store the final bounding boxes + while order.size > 0: + i = order[0] #the index of the bbox with highest confidence + keep.append(i) #save it to keep + xx1 = np.maximum(x1[i], x1[order[1:]]) + yy1 = np.maximum(y1[i], y1[order[1:]]) + xx2 = np.minimum(x2[i], x2[order[1:]]) + yy2 = np.minimum(y2[i], y2[order[1:]]) + + w = np.maximum(1e-28, xx2 - xx1) + h = np.maximum(1e-28, yy2 - yy1) + inter = w * h + + # Cross Area / (bbox + particular area - Cross Area) + ovr = inter / (areas[i] + areas[order[1:]] - inter) + #reserve all the boundingbox whose ovr less than thresh + inds = np.where(ovr <= nms_thresh)[0] + order = order[inds + 1] + + return keep + + +def is_parallel(model): + # Returns True if model is of type DP or DDP + return type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel) + + +def detection_collate(batch): + """Custom collate fn for dealing with batches of images that have a different + number of associated object annotations (bounding boxes). + + Arguments: + batch: (tuple) A tuple of tensor images and lists of annotations + + Return: + A tuple containing: + 1) (tensor) batch of images stacked on their 0 dim + 2) (list of tensors) annotations for a given image are stacked on + 0 dim + """ + targets = [] + imgs = [] + for sample in batch: + imgs.append(sample[0]) + targets.append(torch.FloatTensor(sample[1])) + return torch.stack(imgs, 0), targets + + +# Model EMA +class ModelEMA(object): + def __init__(self, model, decay=0.9999, updates=0): + # create EMA + self.ema = deepcopy(model.module if is_parallel(model) else model).eval() # FP32 EMA + self.updates = updates + self.decay = lambda x: decay * (1 - math.exp(-x / 2000.)) + for p in self.ema.parameters(): + p.requires_grad_(False) + + def update(self, model): + # Update EMA parameters + with torch.no_grad(): + self.updates += 1 + d = self.decay(self.updates) + + msd = model.module.state_dict() if is_parallel(model) else model.state_dict() # model state_dict + for k, v in self.ema.state_dict().items(): + if v.dtype.is_floating_point: + v *= d + v += (1. - d) * msd[k].detach() + + +# test time augmentation(TTA) +class TestTimeAugmentation(object): + def __init__(self, num_classes=80, nms_thresh=0.4, scale_range=[320, 640, 32]): + self.nms = nms + self.num_classes = num_classes + self.nms_thresh = nms_thresh + self.scales = np.arange(scale_range[0], scale_range[1]+1, scale_range[2]) + + def __call__(self, x, model): + # x: Tensor -> [B, C, H, W] + bboxes_list = [] + scores_list = [] + labels_list = [] + + # multi scale + for s in self.scales: + if x.size(-1) == s and x.size(-2) == s: + x_scale = x + else: + x_scale =torch.nn.functional.interpolate( + input=x, + size=(s, s), + mode='bilinear', + align_corners=False) + model.set_grid(s) + bboxes, scores, labels = model(x_scale) + bboxes_list.append(bboxes) + scores_list.append(scores) + labels_list.append(labels) + + # Flip + x_flip = torch.flip(x_scale, [-1]) + bboxes, scores, labels = model(x_flip) + bboxes = bboxes.copy() + bboxes[:, 0::2] = 1.0 - bboxes[:, 2::-2] + bboxes_list.append(bboxes) + scores_list.append(scores) + labels_list.append(labels) + + bboxes = np.concatenate(bboxes_list) + scores = np.concatenate(scores_list) + labels = np.concatenate(labels_list) + + # nms + keep = np.zeros(len(bboxes), dtype=np.int) + for i in range(self.num_classes): + inds = np.where(labels == i)[0] + if len(inds) == 0: + continue + c_bboxes = bboxes[inds] + c_scores = scores[inds] + c_keep = self.nms(c_bboxes, c_scores, self.nms_thresh) + keep[inds[c_keep]] = 1 + + keep = np.where(keep > 0) + bboxes = bboxes[keep] + scores = scores[keep] + labels = labels[keep] + + return bboxes, scores, labels diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/utils/vis.py b/PyTorch/contrib/cv/detection/YoloV2-640/utils/vis.py new file mode 100644 index 0000000000..19bc181d65 --- /dev/null +++ b/PyTorch/contrib/cv/detection/YoloV2-640/utils/vis.py @@ -0,0 +1,106 @@ +import numpy as np +import cv2 + + +def vis_data(images, targets): + """ + images: (tensor) [B, 3, H, W] + targets: (list) a list of targets + """ + batch_size = images.size(0) + # vis data + rgb_mean=np.array((0.406, 0.456, 0.485), dtype=np.float32) + rgb_std=np.array((0.225, 0.224, 0.229), dtype=np.float32) + + for bi in range(batch_size): + # to numpy + image = images[bi].permute(1, 2, 0).cpu().numpy() + # to BGR + image = image[..., (2, 1, 0)] + # denormalize + image = ((image * rgb_std + rgb_mean)*255).astype(np.uint8) + image = image.copy() + img_h, img_w = image.shape[:2] + + targets_i = targets[bi] + for target in targets_i: + x1, y1, x2, y2 = target[:-1] + x1 = int(x1 * img_w) + y1 = int(y1 * img_h) + x2 = int(x2 * img_w) + y2 = int(y2 * img_h) + cv2.rectangle(image, (x1, y1), (x2, y2), (0, 0, 255), 2) + + cv2.imshow('groundtruth', image) + cv2.waitKey(0) + + +def vis_targets(images, targets, anchor_sizes=None, strides=[8, 16, 32]): + """ + images: (tensor) [B, 3, H, W] + targets: (tensor) [B, HW*KA, 1+1+4+1] + anchor_sizes: (List) + strides: (List[Int]) output stride of network + """ + batch_size = images.size(0) + KA = len(anchor_sizes) // len(strides) if anchor_sizes is not None else 1 + # vis data + rgb_mean=np.array((0.485, 0.456, 0.406), dtype=np.float32) + rgb_std=np.array((0.229, 0.224, 0.225), dtype=np.float32) + + for bi in range(batch_size): + # to numpy + image = images[bi].permute(1, 2, 0).cpu().numpy() + # denormalize + image = ((image * rgb_std + rgb_mean)*255).astype(np.uint8) + # to BGR + image = image[..., (2, 1, 0)] + image = image.copy() + img_h, img_w = image.shape[:2] + + target_i = targets[bi] # [HW*KA, 1+1+4+1] + N = 0 + for si, s in enumerate(strides): + fmp_h, fmp_w = img_h // s, img_w // s + HWKA = fmp_h * fmp_w * KA + targets_i_s = target_i[N:N+HWKA] + N += HWKA + # [HW*KA, 1+1+4+1] -> [H, W, KA, 1+1+4+1] + targets_i_s = targets_i_s.reshape(fmp_h, fmp_w, KA, -1) + for j in range(fmp_h): + for i in range(fmp_w): + for k in range(KA): + target = targets_i_s[j, i, k] # [1+1+4+1,] + if target[0] > 0.: + # gt box + box = target[2:6] + x1, y1, x2, y2 = box + # denormalize bbox + x1 = int(x1 * img_w) + y1 = int(y1 * img_h) + x2 = int(x2 * img_w) + y2 = int(y2 * img_h) + cv2.rectangle(image, (x1, y1), (x2, y2), (0, 0, 255), 2) + + if anchor_sizes is not None: + # anchor box + anchor_size = anchor_sizes[si*KA + k] + x_anchor = (i) * s + y_anchor = (j) * s + w_anchor, h_anchor = anchor_size + anchor_box = [x_anchor, y_anchor, w_anchor, h_anchor] + print('stride: {} - anchor box: ({}, {}, {}, {})'.format(s, *anchor_box)) + x1_a = int(x_anchor - w_anchor * 0.5) + y1_a = int(y_anchor - h_anchor * 0.5) + x2_a = int(x_anchor + w_anchor * 0.5) + y2_a = int(y_anchor + h_anchor * 0.5) + cv2.rectangle(image, (x1_a, y1_a), (x2_a, y2_a), (255, 0, 0), 2) + else: + x_anchor = (i) * s + y_anchor = (j) * s + anchor_point = (x_anchor, y_anchor) + print('stride: {} - anchor point: ({}, {})'.format(s, *anchor_point)) + cv2.circle(image, anchor_point, 10, (255, 0, 0), -1) + + cv2.imshow('assignment', image) + cv2.waitKey(0) diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/weights/README.md b/PyTorch/contrib/cv/detection/YoloV2-640/weights/README.md new file mode 100644 index 0000000000..6550070efb --- /dev/null +++ b/PyTorch/contrib/cv/detection/YoloV2-640/weights/README.md @@ -0,0 +1,15 @@ +# yolo-v2-v3 and tiny model +Hi, guys ! + +For researchers in China, you can download them from BaiduYunDisk. +There are 5 models including yolo-v2, yolo-v3, yolo_v3_spp, slim-yolo-v2 and tiny-yolo-v3. + +The link is as following: + +link: https://pan.baidu.com/s/1rnmM8HGFzE2NTv6AkljJdg + +password: 5c8h + + + +I will upload all models to googledrive. \ No newline at end of file -- Gitee From ef2fab5ff3866f5c26da2608e5ad83c20265e4c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E6=99=BA=E6=96=8C123?= Date: Thu, 27 Apr 2023 06:41:49 +0000 Subject: [PATCH 2/8] =?UTF-8?q?=E5=88=A0=E9=99=A4=E6=96=87=E4=BB=B6=20PyTo?= =?UTF-8?q?rch/contrib/cv/detection/YoloV2-640/README.md?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../contrib/cv/detection/YoloV2-640/README.md | 331 ------------------ 1 file changed, 331 deletions(-) delete mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/README.md diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/README.md b/PyTorch/contrib/cv/detection/YoloV2-640/README.md deleted file mode 100644 index 5cf9a9f4ad..0000000000 --- a/PyTorch/contrib/cv/detection/YoloV2-640/README.md +++ /dev/null @@ -1,331 +0,0 @@ -# Update: 2022-05-31 -Recently, I have released an anchor-free YOLO: - -https://github.com/yjh0410/FreeYOLO - -# A new and strong YOLO family -Recently, I rebuild my YOLO-Family project !! - -# Requirements -- We recommend you to use Anaconda to create a conda environment: -```Shell -conda create -n yolo python=3.6 -``` - -- Then, activate the environment: -```Shell -conda activate yolo -``` - -- Requirements: -```Shell -pip install -r requirements.txt -``` -PyTorch >= 1.1.0 and Torchvision >= 0.3.0 - -# Visualize positive samples -You can run following command to visualize positiva sample: -```Shell -python train.py \ - -d voc \ - --root path/to/your/dataset \ - -m yolov2 \ - --batch_size 2 \ - --vis_targets -``` - -# Come soon -My better YOLO family - - -# This project -In this project, you can enjoy: -- a new and stronger YOLOv1 -- a new and stronger YOLOv2 -- a stronger YOLOv3 -- a stronger YOLOv3 with SPP -- a stronger YOLOv3 with DilatedEncoder -- YOLOv4 (I'm trying to make it better) -- YOLO-Tiny -- YOLO-Nano - - -# Future work -- Try to make my YOLO-v4 better. -- Train my YOLOv1/YOLOv2 with ViT-Base (pretrained by MaskAutoencoder) - -# Weights -You can download all weights including my DarkNet-53, CSPDarkNet-53, MAE-ViT and YOLO weights from the following links. - -## Backbone -My Backbone: -- DarkNet53: https://github.com/yjh0410/PyTorch_YOLO-Family/releases/download/yolo-weight/darknet53.pth -- CSPDarkNet-53: https://github.com/yjh0410/PyTorch_YOLO-Family/releases/download/yolo-weight/cspdarknet53.pth -- CSPDarkNet-Tiny: https://github.com/yjh0410/PyTorch_YOLO-Family/releases/download/yolo-weight/cspdarknet_tiny.pth - -YOLOX-Backbone: -- CSPDarkNet-S: https://github.com/yjh0410/YOLOX-Backbone/releases/download/YOLOX-Backbone/yolox_cspdarknet_s.pth -- CSPDarkNet-M: https://github.com/yjh0410/YOLOX-Backbone/releases/download/YOLOX-Backbone/yolox_cspdarknet_m.pth -- CSPDarkNet-L: https://github.com/yjh0410/YOLOX-Backbone/releases/download/YOLOX-Backbone/yolox_cspdarknet_l.pth -- CSPDarkNet-X: https://github.com/yjh0410/YOLOX-Backbone/releases/download/YOLOX-Backbone/yolox_cspdarknet_x.pth -- CSPDarkNet-Tiny: https://github.com/yjh0410/YOLOX-Backbone/releases/download/YOLOX-Backbone/yolox_cspdarknet_tiny.pth -- CSPDarkNet-Nano: https://github.com/yjh0410/YOLOX-Backbone/releases/download/YOLOX-Backbone/yolox_cspdarknet_nano.pth - -## YOLO -- YOLOv1: https://github.com/yjh0410/PyTorch_YOLO-Family/releases/download/yolo-weight/yolov1_35.22_54.7.pth -- YOLOv2: https://github.com/yjh0410/PyTorch_YOLO-Family/releases/download/yolo-weight/yolov2_36.4_56.6.pth -- YOLOv3: https://github.com/yjh0410/PyTorch_YOLO-Family/releases/download/yolo-weight/yolov3_36.9_59.0.pth -- YOLOv3-SPP: https://github.com/yjh0410/PyTorch_YOLO-Family/releases/download/yolo-weight/yolov3_spp_38.2_60.1.pth -- YOLOv3-DE: https://github.com/yjh0410/PyTorch_YOLO-Family/releases/download/yolo-weight/yolov3_de_38.7_60.2.pth -- YOLOv4: https://github.com/yjh0410/PyTorch_YOLO-Family/releases/download/yolo-weight/yolov4_exp_43.0_63.4.pth -- YOLO-Tiny: https://github.com/yjh0410/PyTorch_YOLO-Family/releases/download/yolo-weight/yolo_tiny_28.8_48.6.pth -- YOLO-Nano: https://github.com/yjh0410/PyTorch_YOLO-Family/releases/download/yolo-weight/yolo_nano_22.4_40.7.pth - - -# Experiments -## Tricks -Tricks in this project: -- [x] Augmentations: Flip + Color jitter + RandomCrop -- [x] Model EMA -- [x] Mosaic Augmentation -- [x] Multi Scale training -- [ ] Gradient accumulation -- [ ] MixUp Augmentation -- [ ] Cosine annealing learning schedule -- [ ] AdamW -- [ ] Scale loss by number of positive samples - - -# Experiments -All experiment results are evaluated on COCO val. All FPS results except YOLO-Nano's are measured on a 2080ti GPU. -We will measure the speed of YOLO-Nano on a CPU. - -## YOLOv1 - - - - - - - - - - - - - -
FPS AP AP50 AP75 APs APm APl GFLOPs Params
YOLOv1-320 151 25.4 41.5 26.0 4.2 25.0 49.8 10.49 44.54M
YOLOv1-416 128 30.1 47.8 30.9 7.8 31.9 53.3 17.73 44.54M
YOLOv1-512 114 33.1 52.2 34.0 10.8 35.9 54.9 26.85 44.54M
YOLOv1-640 75 35.2 54.7 37.1 14.3 39.5 53.4 41.96 44.54M
YOLOv1-800 65.56 44.54M
- -## YOLOv2 - - - - - - - - - - - - - -
FPS AP AP50 AP75 APs APm APl GFLOPs Params
YOLOv2-320 147 26.8 44.1 27.1 4.7 27.6 50.8 10.53 44.89M
YOLOv2-416 123 31.6 50.3 32.4 9.1 33.8 54.0 17.79 44.89M
YOLOv2-512 108 34.3 54.0 35.4 12.3 37.8 55.2 26.94 44.89M
YOLOv2-640 73 36.3 56.6 37.7 15.1 41.1 54.0 42.10 44.89M
YOLOv2-800 65.78 44.89M
- -## YOLOv3 - - - - - - - - - - - - - -
FPS AP AP50 AP75 APs APm APl GFLOPs Params
YOLOv3-320 111 30.8 50.3 31.8 10.0 33.1 50.0 19.57 61.97M
YOLOv3-416 89 34.8 55.8 36.1 14.6 37.5 52.9 33.08 61.97M
YOLOv3-512 77 36.9 58.1 39.3 18.0 40.3 52.2 50.11 61.97M
YOLOv3-608 51 37.0 58.9 39.3 20.5 41.2 49.0 70.66 61.97M
YOLOv3-640 49 36.9 59.0 39.7 21.6 41.6 47.7 78.30 61.97M
- -## YOLOv3 with SPP - - - - - - - - - - - - - -
FPS AP AP50 AP75 APs APm APl GFLOPs Params
YOLOv3-SPP-320 110 31.0 50.8 32.0 10.5 33.0 50.4 19.68 63.02M
YOLOv3-SPP-416 88 35.0 56.1 36.4 14.9 37.7 52.8 33.26 63.02M
YOLOv3-SPP-512 75 37.2 58.7 39.1 19.1 40.0 53.0 50.38 63.02M
YOLOv3-SPP-608 50 38.3 60.1 40.7 20.9 41.1 51.2 71.04 63.02M
YOLOv3-SPP-640 48 38.2 60.1 40.4 21.6 41.1 50.5 78.72 63.02M
- -## YOLOv3 with Dilated Encoder -The DilatedEncoder is proposed by YOLOF. - - - - - - - - - - - - - -
FPS AP AP50 AP75 APs APm APl GFLOPs Params
YOLOv3-DE-320 109 31.1 51.1 31.7 10.2 32.6 51.2 19.10 57.25M
YOLOv3-DE-416 88 35.0 56.1 36.3 14.6 37.4 53.7 32.28 57.25M
YOLOv3-DE-512 74 37.7 59.3 39.6 17.9 40.4 54.4 48.90 57.25M
YOLOv3-DE-608 50 38.7 60.5 40.8 20.6 41.7 53.1 68.96 57.25M
YOLOv3-DE-640 48 38.7 60.2 40.7 21.3 41.7 51.7 76.41 57.25M
- -## YOLOv4 -I'm still trying to make it better. - - - - - - - - - - - - -
FPS AP AP50 AP75 APs APm APl GFLOPs Params
YOLOv4-320 89 39.2 58.6 40.9 16.9 44.1 59.2 16.38 58.14M
YOLOv4-416 84 41.7 61.6 44.2 22.0 46.6 57.7 27.69 58.14M
YOLOv4-512 70 42.9 63.1 46.1 24.5 48.3 56.5 41.94 58.14M
YOLOv4-608 51 43.0 63.4 46.1 26.7 48.6 53.9 59.14 58.14M
- -## YOLO-Tiny - - - - - - - - - -
FPS AP AP50 AP75 APs APm APl GFLOPs Params
YOLO-Tiny-320 143 26.4 44.5 26.8 8.8 28.2 42.4 2.17 7.66M
YOLO-Tiny-416 130 28.2 47.6 28.8 11.6 31.5 41.4 3.67 7.82M
YOLO-Tiny-512 118 28.8 48.6 29.4 13.3 33.4 38.3 5.57 7.82M
- -## YOLO-Nano -The FPS is measured on i5-1135G& CPU. Any accelerated deployments that would help speed up detection are not done. - - - - - - - - - - -
FPS AP AP50 AP75 APs APm APl GFLOPs Params
YOLO-Nano-320 25 18.4 33.7 17.8 3.9 17.5 33.1 0.64 1.86M
YOLO-Nano-416 15 21.4 38.5 20.9 6.5 21.4 34.8 0.99 1.86M
YOLO-Nano-512 10 22.4 40.7 22.1 8.0 24.0 33.2 1.65 1.86M
- - -# Dataset - -## VOC Dataset -### My BaiduYunDisk -- BaiduYunDisk: https://pan.baidu.com/s/1tYPGCYGyC0wjpC97H-zzMQ Password:4la9 - -### Download VOC2007 trainval & test - -```Shell -# specify a directory for dataset to be downloaded into, else default is ~/data/ -sh data/scripts/VOC2007.sh # -``` - -### Download VOC2012 trainval -```Shell -# specify a directory for dataset to be downloaded into, else default is ~/data/ -sh data/scripts/VOC2012.sh # -``` -### My BaiduYunDisk -- BaiduYunDisk: https://pan.baidu.com/s/1xAPk8fnaWMMov1VEjr8-zA Password:6vhp - -On Ubuntu system, you might use the command `jar xvf xxx.zip` to unzip the `train2017.zip` and `test2017.zip` files -since they are larger than 2G (As far as I know, `unzip` operation can't process the zip file which is larger than 2G.). - -## MSCOCO Dataset - -### Download MSCOCO 2017 dataset -Just run ```sh data/scripts/COCO2017.sh```. You will get COCO train2017, val2017, test2017. - - -# Train -For example: - -```Shell -python train.py --cuda \ - -d coco \ - -m yolov2 \ - -ms \ - --ema \ - --batch_size 16 \ - --root path/to/dataset/ -``` - -You can run ```python train.py -h``` to check all optional argument. Or you can just run the shell file, for example: -```Shell -sh train_yolov1.sh -``` - -If you have multi gpus like 8, and you put 4 images on each gpu: -```Shell -python -m torch.distributed.launch --nproc_per_node=8 train.py -d coco \ - --cuda \ - -m yolov1 \ - -ms \ - --ema \ - -dist \ - --sybn \ - --num_gpu 8 \ - --batch_size 4 \ - --root path/to/dataset/ -``` -Attention, `--batch_size` is the number of batchsize on per GPU, not all GPUs. - -I have upload all training log files. For example, `1-v1.txt` contains all the output information during the training YOLOv1. - -It is strongly recommended that you open the training shell file to check how I train each YOLO detector. - -# Test -For example: - -```Shell -python test.py -d coco \ - --cuda \ - -m yolov2 \ - --weight path/to/weight \ - --img_size 640 \ - --root path/to/dataset/ \ - --show -``` - -# Evaluation -For example - -```Shell -python eval.py -d coco-val \ - --cuda \ - -m yolov1 \ - --weight path/to/weight \ - --img_size 640 \ - --root path/to/dataset/ -``` - -# Evaluation on COCO-test-dev -To run on COCO_test-dev(You must be sure that you have downloaded test2017): -```Shell -python eval.py -d coco-test \ - --cuda \ - -m yolov1 \ - --weight path/to/weight \ - --img_size 640 \ - --root path/to/dataset/ -``` -You will get a `coco_test-dev.json` file. -Then you should follow the official requirements to compress it into zip format -and upload it the official evaluation server. -- Gitee From e3094e57375a2cddf82a11a4e8dfa00677120971 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E6=99=BA=E6=96=8C123?= Date: Thu, 27 Apr 2023 06:42:23 +0000 Subject: [PATCH 3/8] my first commit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 张智斌123 --- .../contrib/cv/detection/YoloV2-640/README.md | 194 ++++++++++++++++++ 1 file changed, 194 insertions(+) create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/README.md diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/README.md b/PyTorch/contrib/cv/detection/YoloV2-640/README.md new file mode 100644 index 0000000000..677c207e4d --- /dev/null +++ b/PyTorch/contrib/cv/detection/YoloV2-640/README.md @@ -0,0 +1,194 @@ +# YoloV2 for PyTorch + +- [概述](概述.md) +- [准备训练环境](准备训练环境.md) +- [开始训练](开始训练.md) +- [训练结果展示](训练结果展示.md) +- [版本说明](版本说明.md) + + + +# 概述 + +## 简述 + +为提高物体定位精准性和召回率,Yolo作者提出了YoloV2。相比V1提高了训练图像的分辨率;引入了Faster RCNN中anchor box的思想,对网络结构的设计进行了改进,输出层使用卷积层替代Yolo的全连接层,使用coco物体检测标注数据训练物体检测模型。相比YoloV1,YoloV2在识别种类、精度、速度、和定位准确性等方面都有大大提升。 +- 参考实现: + + ``` + url=https://github.com/yjh0410/PyTorch_YOLO-Family + commit_id=234fa7c53b1f0d2a8bec3a8cdb656f63b916c6ef + ``` + +- 适配昇腾 AI 处理器的实现: + + ``` + url=https://gitee.com/ascend/ModelZoo-PyTorch.git + code_path=PyTorch/contrib/cv/detection + ``` + +- 通过Git获取代码方法如下: + + ``` + git clone {url} # 克隆仓库的代码 + cd {code_path} # 切换到模型代码所在路径,若仓库下只有该模型,则无需切换 + ``` + +- 通过单击“立即下载”,下载源码包。 + +# 准备训练环境 + +## 准备环境 + +- 当前模型支持的 PyTorch 版本和已知三方库依赖如下表所示。 + + **表 1** 版本支持表 + + | Torch_Version| 三方库依赖版本 | + |----------------------------------| ----------------------------------| + | PyTorch 1.5 | torchvision==0.6.0;pillow==8.4.0 | + | PyTorch 1.8 | torchvision==0.9.1;pillow==9.1.0 | + +- 环境准备指导。 + + 请参考《[Pytorch框架训练环境准备](https://www.hiascend.com/document/detail/zh/ModelZoo/pytorchframework/ptes)》。 + +- 安装依赖。 + + 在模型源码包根目录下执行命令,安装模型对应PyTorch版本需要的依赖 + ``` + pip install -r 1.5_requirements.txt # PyTorch1.5版本 + + pip install -r 1.8_requirements.txt # PyTorch1.8版本 + ``` + + +## 准备数据集 + +1. 获取数据集。 + + 用户只需运行sh data/scripts/COCO2017.sh,用户即可获得COCO train2017,val2017,test2017。 + + COCO2017数据集目录结构参考如下所示。 + + ``` + ├── COCO: 数据集根目录 + ├──train2017: 所有训练图像文件夹(118287张) + │──000000000009.jpg + │──000000000025.jpg + │──000000000030.jpg + │ ... + ├──test2017 + │──000000000001.jpg + │──000000000016.jpg + │──000000000019.jpg + │ ... + ├──val2017: 所有验证图像文件夹(5000张) + ├──000000000139.jpg + ├──000000000285.jpg + ├──000000000632.jpg + │ ... + ├──annotations: 对应标注文件夹 + ├── instances_train2017.json: 对应目标检测、分割任务的训练集标注文件 + ├── instances_val2017.json: 对应目标检测、分割任务的验证集标注文件 + ├── captions_train2017.json: 对应图像描述的训练集标注文件 + ├── captions_val2017.json: 对应图像描述的验证集标注文件 + ├── person_keypoints_train2017.json: 对应人体关键点检测的训练集标注文件 + └── person_keypoints_val2017.json: 对应人体关键点检测的验证集标注文件夹 + + ``` + + > **说明:** + >该数据集的训练过程脚本只作为一种参考示例。 + +2. 数据预处理(按需处理所需要的数据集)。 + +# 开始训练 + +## 训练模型 + +1. 进入解压后的源码包根目录。 + + ``` + cd /${模型文件夹名称} + ``` + +2. 运行训练脚本。 + + 该模型支持单机单卡训练和单机8卡训练。 + + - 单机单卡训练 + + 启动单卡训练。 + + ``` + bash train-1p.sh + ``` + + - 单机8卡训练 + + 启动8卡训练。 + + ``` + bash train-8p.sh + ``` + + 模型训练脚本参数说明如下。 + + ``` + 公共参数: + --npu //使用npu + -d //所用数据集,coco或者voc + -m //使用模型,yolov2 + --root //数据集路径 + --batch_size //训练批次大小 + --lr //初始学习率 + --img_size //指定图像尺寸 + --max_epoch //最大训练次数 + --lr_epoch //学习率调整 + --multi_scale //多尺度训练 + --multi_scale_range //多尺度训练分辨率范围 + --multi_anchor //使用multi anchor正样本策略 + 多卡训练参数: + --nproc_per_node //每个节点上有多少个进程 + --multiprocessing-distributed //使用多卡训练 + -dist //分布式训练 + --num_gpu //npu数量 + ``` + + 训练完成后,会在weights/yolov2目录下保存模型权重文件,并输出模型训练精度和性能信息。 + +# 训练结果展示 + +**表 2** 训练结果展示表 + +| NAME | AP50 | FPS | Epochs | AMP_Type | +|--------|-------|----:|--------|---------:| +| 1p-竞品V | - | 95 | 10 | O2 | +| 1p-NPU | - | 95 | 20 | O2 | +| 8p-竞品V | 53.40 | 259 | 200 | O2 | +| 8p-NPU | 52.50 | 216 | 200 | O2 | + + +# 版本说明 + +## 变更 + +2023.4.27:首次发布 + +## 已知问题 + +**_当前发行版本中存在的问题描述。_** + +无 + + + + + + + + + + + -- Gitee From 56bbb3b5749cb4dc9214e31d15890dea242a4d9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E6=99=BA=E6=96=8C123?= Date: Thu, 27 Apr 2023 06:47:50 +0000 Subject: [PATCH 4/8] =?UTF-8?q?=E5=88=A0=E9=99=A4=E6=96=87=E4=BB=B6=20PyTo?= =?UTF-8?q?rch/contrib/cv/detection/YoloV2-640/train-8p.sh?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../cv/detection/YoloV2-640/train-8p.sh | 96 ------------------- 1 file changed, 96 deletions(-) delete mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/train-8p.sh diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/train-8p.sh b/PyTorch/contrib/cv/detection/YoloV2-640/train-8p.sh deleted file mode 100644 index 8c1bcdb666..0000000000 --- a/PyTorch/contrib/cv/detection/YoloV2-640/train-8p.sh +++ /dev/null @@ -1,96 +0,0 @@ -#!/bin/bash -cur_path=`pwd` -cur_path_last_dirname=${cur_path##*/} -if [ x"${cur_path_last_dirname}" == x"test" ];then - test_path_dir=${cur_path} - cd .. - cur_path=`pwd` -else - test_path_dir=${cur_path}/test -fi -#集合通信参数,不需要修改 -export RANK_SIZE=8 -RANK_ID_START=0 -export WORLD_SIZE=8 -#训练开始时间,不需要修改 -start_time=$(date +%s) -#训练batch_size,,需要模型审视修改 -batch_size=32 -#设置环境变量,不需要修改 -RANK_ID=0 -echo "Decive ID: $RANK_ID" -export RANK_ID=$RANK_ID -export ASCEND_DEVICE_ID=$RANK_ID -ASCEND_DEVICE_ID=$RANK_ID -#创建DeviceID输出目录,不需要修改 -if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then - rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID} - mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID/ckpt -else - mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID/ckpt -fi -#执行训练脚本,以下传参不需要修改,其他需要模型审视修改 -export RANK_SIZE=8 - -KERNEL_NUM=$(($(nproc)/8)) -for((RANK_ID=0;RANK_ID ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & - else - python3.7 -m torch.distributed.launch --nproc_per_node=8 train8p.py \ - --npu \ - -d coco \ - -m yolov2 \ - --root /forDocker/dataset \ - --batch_size 32 \ - --lr 0.002 \ - --img_size 640 \ - --max_epoch 200 \ - --lr_epoch 100 150 \ - --multi_scale \ - --multi_scale_range 10 20 \ - --multi_anchor \ - -dist \ - --sybn \ - --num_gpu 8 \ - --local_rank 0 > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & - fi -done - -#8p情况下仅0卡(主节点)有完整日志,因此后续日志提取仅涉及0卡 -ASCEND_DEVICE_ID=0 - -#训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) - -#结果打印,不需要修改 -echo "------------------ Final result ------------------" -#输出性能FPS,需要模型审视修改 -time=`grep -a 'Epoch ' $test_path_dir/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F "time: " '{print $2}'|awk -F "," '{print $1}'|awk 'END {print}'|sed 's/.$//'` -FPS=`awk 'BEGIN{printf "%.2f\n", '${RANK_SIZE}'*'${batch_size}'/'${time}'}'` -#打印,不需要修改 -echo "Final Performance images/sec : $FPS" - -- Gitee From 0f4c9df09374a126057566e0941d78765037fba1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E6=99=BA=E6=96=8C123?= Date: Thu, 27 Apr 2023 06:48:03 +0000 Subject: [PATCH 5/8] =?UTF-8?q?=E5=88=A0=E9=99=A4=E6=96=87=E4=BB=B6=20PyTo?= =?UTF-8?q?rch/contrib/cv/detection/YoloV2-640/train1p.py?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../cv/detection/YoloV2-640/train1p.py | 545 ------------------ 1 file changed, 545 deletions(-) delete mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/train1p.py diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/train1p.py b/PyTorch/contrib/cv/detection/YoloV2-640/train1p.py deleted file mode 100644 index 4a50a26de8..0000000000 --- a/PyTorch/contrib/cv/detection/YoloV2-640/train1p.py +++ /dev/null @@ -1,545 +0,0 @@ -from __future__ import division - -import os -import argparse -import time -import math -import random -from copy import deepcopy -import apex -from apex import amp -import torch -import torch_npu -import torch.optim as optim -import torch.backends.cudnn as cudnn -import torch.distributed as dist -from torch.nn.parallel import DistributedDataParallel as DDP -import sys -from config.yolo_config import yolo_config -from data.voc import VOCDetection -from data.coco import COCODataset -from data.transforms import TrainTransforms, ColorTransforms, ValTransforms - -from utils import distributed_utils -from utils import create_labels -from utils.vis import vis_data, vis_targets -from utils.com_flops_params import FLOPs_and_Params -from utils.criterion import build_criterion -from utils.misc import detection_collate -from utils.misc import ModelEMA -from utils.criterion import build_criterion - -from models.yolo import build_model - -from evaluator.cocoapi_evaluator import COCOAPIEvaluator -from evaluator.vocapi_evaluator import VOCAPIEvaluator - -def parse_args(): - parser = argparse.ArgumentParser(description='YOLO Detection') - # basic - parser.add_argument('--npu', action='store_true', default=False, - help='use npu.') - parser.add_argument('--batch_size', default=16, type=int, - help='Batch size for training') - parser.add_argument('--lr', default=1e-3, type=float, - help='initial learning rate') - parser.add_argument('--img_size', type=int, default=640, - help='The upper bound of warm-up') - parser.add_argument('--multi_scale_range', nargs='+', default=[10, 20], type=int, - help='lr epoch to decay') - parser.add_argument('--max_epoch', type=int, default=200, - help='The upper bound of warm-up') - parser.add_argument('--lr_epoch', nargs='+', default=[100, 150], type=int, - help='lr epoch to decay') - parser.add_argument('--wp_epoch', type=int, default=2, - help='The upper bound of warm-up') - parser.add_argument('--start_epoch', type=int, default=0, - help='start epoch to train') - parser.add_argument('-r', '--resume', default=None, type=str, - help='keep training') - parser.add_argument('--num_workers', default=8, type=int, - help='Number of workers used in dataloading') - parser.add_argument('--num_gpu', default=1, type=int, - help='Number of GPUs to train') - parser.add_argument('--eval_epoch', type=int, - default=10, help='interval between evaluations') - parser.add_argument('--tfboard', action='store_true', default=False, - help='use tensorboard') - parser.add_argument('--save_folder', default='weights/', type=str, - help='path to save weight') - parser.add_argument('--vis_data', action='store_true', default=False, - help='visualize images and labels.') - parser.add_argument('--vis_targets', action='store_true', default=False, - help='visualize assignment.') - - # Optimizer & Schedule - parser.add_argument('--optimizer', default='NpuFusedSGD', type=str, - help='sgd, adamw') - parser.add_argument('--lr_schedule', default='step', type=str, - help='step, cos') - parser.add_argument('--grad_clip', default=None, type=float, - help='clip gradient') - - # model - parser.add_argument('-m', '--model', default='yolov1', - help='yolov1, yolov2, yolov3, yolov3_spp, yolov3_de, ' - 'yolov4, yolo_tiny, yolo_nano') - parser.add_argument('--conf_thresh', default=0.001, type=float, - help='NMS threshold') - parser.add_argument('--nms_thresh', default=0.5, type=float, - help='NMS threshold') - - # dataset - parser.add_argument('--root', default='/mnt/share/ssd2/dataset', - help='data root') - parser.add_argument('-d', '--dataset', default='coco', - help='coco, widerface, crowdhuman') - - # Loss - parser.add_argument('--loss_obj_weight', default=1.0, type=float, - help='weight of obj loss') - parser.add_argument('--loss_cls_weight', default=1.0, type=float, - help='weight of cls loss') - parser.add_argument('--loss_reg_weight', default=1.0, type=float, - help='weight of reg loss') - parser.add_argument('--scale_loss', default='batch', type=str, - help='scale loss: batch or positive samples') - - # train trick - parser.add_argument('--no_warmup', action='store_true', default=False, - help='do not use warmup') - parser.add_argument('-ms', '--multi_scale', action='store_true', default=False, - help='use multi-scale trick') - parser.add_argument('--ema', action='store_true', default=False, - help='use ema training trick') - parser.add_argument('--mosaic', action='store_true', default=False, - help='use Mosaic Augmentation trick') - parser.add_argument('--mixup', action='store_true', default=False, - help='use MixUp Augmentation trick') - parser.add_argument('--multi_anchor', action='store_true', default=False, - help='use multiple anchor boxes as the positive samples') - parser.add_argument('--center_sample', action='store_true', default=False, - help='use center sample for labels') - parser.add_argument('--accumulate', type=int, default=1, - help='accumulate gradient') - # DDP train - parser.add_argument('-dist', '--distributed', action='store_true', default=False, - help='distributed training') - parser.add_argument('--local_rank', type=int, default=0, - help='local_rank') - parser.add_argument('--sybn', action='store_true', default=False, - help='use sybn.') - parser.add_argument('--opt-level', default='O2', type=str, - help='loss scale using in amp, default O1') - - return parser.parse_args() - - -def train(): - args = parse_args() - os.environ['MASTER_ADDR'] = 'localhost' - os.environ['MASTER_PORT'] = '12345' - - # torch.npu.set_compile_mode(jit_compile=False) - option = {} - option["ACL_OP_COMPILER_CACHE_MODE"]="enable" - option["ACL_OP_COMPILER_CACHE_DIR"]="./kernel_meta" - option["NPU_FUZZY_COMPILE_BLACKLIST"] = "Maximum,Conv2D,BNInfer,BNTrainingReduceGrad,Cast" - print("option:",option) - # torch.npu.set_option(option) - print("Setting Arguments.. : ", args) - print("----------------------------------------------------------") - - # path to save model - path_to_save = os.path.join(args.save_folder, args.dataset, args.model) - os.makedirs(path_to_save, exist_ok=True) - - # set distributed - local_rank = 0 - if args.distributed: - dist.init_process_group(backend="hccl", #init_method="env://" - ) - local_rank = torch.distributed.get_rank() - print(local_rank) - torch_npu.npu.set_device(local_rank) - - # cuda - if args.npu: - print('use npu') - cudnn.benchmark = True - device = torch.device("npu") - else: - device = torch.device("cpu") - - # YOLO config - cfg = yolo_config[args.model] - train_size = val_size = args.img_size - - # dataset and evaluator - dataset, evaluator, num_classes = build_dataset(args, train_size, val_size, device) - # dataloader - dataloader = build_dataloader(args, dataset, detection_collate) - # criterioin - criterion = build_criterion(args, cfg, num_classes) - - print('Training model on:', args.dataset) - print('The dataset size:', len(dataset)) - print("----------------------------------------------------------") - - # build model - net = build_model(args=args, - cfg=cfg, - device=device, - num_classes=num_classes, - trainable=True) - model = net - - # SyncBatchNorm - # if args.sybn and args.npu and args.num_gpu > 1: - # print('use SyncBatchNorm ...') - # model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model) - - model = model.to(device).train() - # compute FLOPs and Params - # if local_rank == 0: - # model_copy = deepcopy(model) - # model_copy.trainable = False - # model_copy.eval() - # FLOPs_and_Params(model=model_copy, size=train_size) - # model_copy.trainable = True - # model_copy.train() - # keep training - if args.resume is not None: - print('keep training model: %s' % (args.resume)) - model.load_state_dict(torch.load(args.resume, map_location=device)) - - # EMA - ema = ModelEMA(model) if args.ema else None - # use tfboard - tblogger = None - if args.tfboard: - print('use tensorboard') - from torch.utils.tensorboard import SummaryWriter - c_time = time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())) - log_path = os.path.join('log/', args.dataset, c_time) - os.makedirs(log_path, exist_ok=True) - - tblogger = SummaryWriter(log_path) - # optimizer setup - base_lr = args.lr - tmp_lr = args.lr - if args.optimizer == 'NpuFusedSGD': - print('use SGD with momentum ...') - optimizer = apex.optimizers.NpuFusedSGD(model.parameters(), lr=args.lr, momentum=0.9) - # optimizer = optim.SGD(model.parameters(), - # lr=tmp_lr, - # momentum=0.9, - # weight_decay=5e-4) - elif args.optimizer == 'adamw': - print('use AdamW ...') - optimizer = optim.AdamW(model.parameters(), - lr=tmp_lr, - weight_decay=5e-4) - - model, optimizer = amp.initialize(model, optimizer, opt_level='O1', loss_scale=128.0,combine_grad=True) - - # DDP - if args.distributed and args.num_gpu > 1: - print('using DDP ...') - model = DDP(model, device_ids=[local_rank], output_device=local_rank, broadcast_buffers=False) - - - - - batch_size = args.batch_size - epoch_size = len(dataset) // (batch_size * args.num_gpu) - best_map = -100. - warmup = not args.no_warmup - - t0 = time.time() - # start training loop - for epoch in range(args.start_epoch, args.max_epoch): - if args.distributed: - dataloader.sampler.set_epoch(epoch) - - # use step lr decay - if args.lr_schedule == 'step': - if epoch in args.lr_epoch: - tmp_lr = tmp_lr * 0.1 - set_lr(optimizer, tmp_lr) - # use cos lr decay - elif args.lr_schedule == 'cos' and not warmup: - T_max = args.max_epoch - 15 - lr_min = base_lr * 0.1 * 0.1 - if epoch > T_max: - # Cos decay is done - print('Cosine annealing is over !!') - args.lr_schedule == None - tmp_lr = lr_min - set_lr(optimizer, tmp_lr) - else: - tmp_lr = lr_min + 0.5*(base_lr - lr_min)*(1 + math.cos(math.pi*epoch / T_max)) - set_lr(optimizer, tmp_lr) - fps_sum=0 - # train one epoch - # pre_flag = False - # start_time = time.time() - for iter_i, (images, targets) in enumerate(dataloader): - # if iter_i == 5: - # start_time = time.time() - # with torch.autograd.profiler.profile(use_npu=True) as prof: - ni = iter_i + epoch * epoch_size - # warmup - if epoch < args.wp_epoch and warmup: - nw = args.wp_epoch * epoch_size - tmp_lr = base_lr * pow(ni / nw, 4) - set_lr(optimizer, tmp_lr) - - elif epoch == args.wp_epoch and iter_i == 0 and warmup: - # warmup is over - print('Warmup is over !!') - warmup = False - tmp_lr = base_lr - set_lr(optimizer, tmp_lr) - - # multi-scale trick - if iter_i % 10 == 0 and iter_i > 0 and args.multi_scale: - # randomly choose a new size - r = args.multi_scale_range - train_size = random.randint(r[0], r[1]) * 32 - model.set_grid(train_size) - if args.multi_scale: - # interpolate - images = torch.nn.functional.interpolate( - input=images, - size=train_size, - mode='bilinear', - align_corners=False) - - targets = [label.tolist() for label in targets] - # visualize target - if args.vis_data: - vis_data(images, targets) - continue - # make labels - targets = create_labels.gt_creator( - img_size=train_size, - strides=net.stride, - label_lists=targets, - anchor_size=cfg["anchor_size"], - multi_anchor=args.multi_anchor, - center_sample=args.center_sample) - # visualize assignment - if args.vis_targets: - vis_targets(images, targets, cfg["anchor_size"], net.stride) - continue - - # to device - images = images.to(device) - targets = targets.to(device) - - # inference - pred_obj, pred_cls, pred_iou, targets = model(images, targets=targets) - - # compute loss - loss_obj, loss_cls, loss_reg, total_loss = criterion(pred_obj, pred_cls, pred_iou, targets) - - # check loss - if torch.isnan(total_loss): - continue - - loss_dict = dict( - loss_obj=loss_obj, - loss_cls=loss_cls, - loss_reg=loss_reg, - total_loss=total_loss - ) - loss_dict_reduced = distributed_utils.reduce_loss_dict(loss_dict) - - total_loss = total_loss / args.accumulate - # Backward and Optimize - with amp.scale_loss(total_loss , optimizer) as scaled_loss: - scaled_loss.backward() - if ni % args.accumulate == 0: - if args.grad_clip is not None: - torch.nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip) - optimizer.step() - optimizer.zero_grad() - - if args.ema: - ema.update(model) - - # display - # if iter_i % 10 == 0: - if args.tfboard: - # viz loss - tblogger.add_scalar('loss obj', loss_dict_reduced['loss_obj'].item(), ni) - tblogger.add_scalar('loss cls', loss_dict_reduced['loss_cls'].item(), ni) - tblogger.add_scalar('loss reg', loss_dict_reduced['loss_reg'].item(), ni) - - t1 = time.time() - print('[Epoch %d/%d][Iter %d/%d][lr %.6f][Loss: obj %.2f || cls %.2f || reg %.2f || size %d || time: %.2f]' - % (epoch+1, - args.max_epoch, - iter_i, - epoch_size, - tmp_lr, - loss_dict['loss_obj'].item(), - loss_dict['loss_cls'].item(), - loss_dict['loss_reg'].item(), - train_size, - t1-t0), - flush=True) - fps_sum = fps_sum + (batch_size*8 / (t1 - t0)) - t0 = time.time() - # if local_rank in [-1, 0]: - # epoch_time = time.time() - start_time - # if iter_i >= 5: - # print('Training speed is {} FPS'.format(batch_size * 8 * (iter_i + 1 - 5) / (epoch_time))) - # else: - # print('Training speed is {} FPS'.format(batch_size * 8 * (iter_i + 1) / (epoch_time))) - if iter_i > 0 and iter_i == 461: - fps_avg = fps_sum / 461 - print("fps:",fps_avg) - fps_sum = 0 - - # evaluation - if (epoch + 1) % args.eval_epoch == 0 or (epoch + 1) == args.max_epoch: - if evaluator is None: - print('No evaluator ...') - print('Saving state, epoch:', epoch + 1) - torch.save(model_eval.state_dict(), os.path.join(path_to_save, - args.model + '_' + repr(epoch + 1) + '.pth')) - print('Keep training ...') - else: - print('eval ...') - # check ema - if args.ema: - model_eval = ema.ema - else: - model_eval = model.module if args.distributed else model - - # set eval mode - model_eval.trainable = False - model_eval.set_grid(val_size) - model_eval.eval() - - if local_rank == 0: - # evaluate - evaluator.evaluate(model_eval) - - cur_map = evaluator.map - if cur_map > best_map: - # update best-map - best_map = cur_map - # save model - print('Saving state, epoch:', epoch + 1) - torch.save(model_eval.state_dict(), os.path.join(path_to_save, - args.model + '_' + repr(epoch + 1) + '_' + str(round(best_map*100, 2)) + '.pth')) - if args.tfboard: - if args.dataset == 'voc': - tblogger.add_scalar('07test/mAP', evaluator.map, epoch) - elif args.dataset == 'coco': - tblogger.add_scalar('val/AP50_95', evaluator.ap50_95, epoch) - tblogger.add_scalar('val/AP50', evaluator.ap50, epoch) - - if args.distributed: - # wait for all processes to synchronize - dist.barrier() - - # set train mode. - model_eval.trainable = True - model_eval.set_grid(train_size) - model_eval.train() - - # close mosaic augmentation - if args.mosaic and args.max_epoch - epoch == 15: - print('close Mosaic Augmentation ...') - dataloader.dataset.mosaic = False - # close mixup augmentation - if args.mixup and args.max_epoch - epoch == 15: - print('close Mixup Augmentation ...') - dataloader.dataset.mixup = False - - if args.tfboard: - tblogger.close() - - -def build_dataset(args, train_size, val_size, device): - if args.dataset == 'voc': - data_dir = os.path.join(args.root, 'VOCdevkit') - num_classes = 20 - dataset = VOCDetection( - data_dir=data_dir, - img_size=train_size, - transform=TrainTransforms(train_size), - color_augment=ColorTransforms(train_size), - mosaic=args.mosaic, - mixup=args.mixup) - - evaluator = VOCAPIEvaluator( - data_dir=data_dir, - img_size=val_size, - device=device, - transform=ValTransforms(val_size)) - - elif args.dataset == 'coco': - data_dir = os.path.join(args.root, 'COCO') - num_classes = 80 - dataset = COCODataset( - data_dir=data_dir, - img_size=train_size, - image_set='train2017', - transform=TrainTransforms(train_size), - color_augment=ColorTransforms(train_size), - mosaic=args.mosaic, - mixup=args.mixup) - - evaluator = COCOAPIEvaluator( - data_dir=data_dir, - img_size=val_size, - device=device, - transform=ValTransforms(val_size) - ) - - else: - print('unknow dataset !! Only support voc and coco !!') - exit(0) - - return dataset, evaluator, num_classes - - -def build_dataloader(args, dataset, collate_fn=None): - # distributed - if args.distributed and args.num_gpu > 1: - # dataloader - dataloader = torch.utils.data.DataLoader( - dataset=dataset, - batch_size=args.batch_size, - collate_fn=collate_fn, - num_workers=args.num_workers, - pin_memory=True, - sampler=torch.utils.data.distributed.DistributedSampler(dataset) - ) - - else: - # dataloader - dataloader = torch.utils.data.DataLoader( - dataset=dataset, - shuffle=True, - batch_size=args.batch_size, - collate_fn=collate_fn, - num_workers=args.num_workers, - pin_memory=True - ) - return dataloader - - -def set_lr(optimizer, lr): - for param_group in optimizer.param_groups: - param_group['lr'] = lr - - -if __name__ == '__main__': - train() - -- Gitee From c60141e8229575609cad7ccd124c83b0323ac428 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E6=99=BA=E6=96=8C123?= Date: Thu, 27 Apr 2023 06:48:06 +0000 Subject: [PATCH 6/8] =?UTF-8?q?=E5=88=A0=E9=99=A4=E6=96=87=E4=BB=B6=20PyTo?= =?UTF-8?q?rch/contrib/cv/detection/YoloV2-640/train8p.py?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../cv/detection/YoloV2-640/train8p.py | 545 ------------------ 1 file changed, 545 deletions(-) delete mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/train8p.py diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/train8p.py b/PyTorch/contrib/cv/detection/YoloV2-640/train8p.py deleted file mode 100644 index b34ec2f26f..0000000000 --- a/PyTorch/contrib/cv/detection/YoloV2-640/train8p.py +++ /dev/null @@ -1,545 +0,0 @@ -from __future__ import division - -import os -import argparse -import time -import math -import random -from copy import deepcopy -import apex -from apex import amp -import torch -import torch_npu -import torch.optim as optim -import torch.backends.cudnn as cudnn -import torch.distributed as dist -from torch.nn.parallel import DistributedDataParallel as DDP -import sys -from config.yolo_config import yolo_config -from data.voc import VOCDetection -from data.coco import COCODataset -from data.transforms import TrainTransforms, ColorTransforms, ValTransforms - -from utils import distributed_utils -from utils import create_labels -from utils.vis import vis_data, vis_targets -from utils.com_flops_params import FLOPs_and_Params -from utils.criterion import build_criterion -from utils.misc import detection_collate -from utils.misc import ModelEMA -from utils.criterion import build_criterion - -from models.yolo import build_model - -from evaluator.cocoapi_evaluator import COCOAPIEvaluator -from evaluator.vocapi_evaluator import VOCAPIEvaluator - -def parse_args(): - parser = argparse.ArgumentParser(description='YOLO Detection') - # basic - parser.add_argument('--npu', action='store_true', default=False, - help='use npu.') - parser.add_argument('--batch_size', default=16, type=int, - help='Batch size for training') - parser.add_argument('--lr', default=1e-3, type=float, - help='initial learning rate') - parser.add_argument('--img_size', type=int, default=640, - help='The upper bound of warm-up') - parser.add_argument('--multi_scale_range', nargs='+', default=[10, 20], type=int, - help='lr epoch to decay') - parser.add_argument('--max_epoch', type=int, default=200, - help='The upper bound of warm-up') - parser.add_argument('--lr_epoch', nargs='+', default=[100, 150], type=int, - help='lr epoch to decay') - parser.add_argument('--wp_epoch', type=int, default=2, - help='The upper bound of warm-up') - parser.add_argument('--start_epoch', type=int, default=0, - help='start epoch to train') - parser.add_argument('-r', '--resume', default=None, type=str, - help='keep training') - parser.add_argument('--num_workers', default=8, type=int, - help='Number of workers used in dataloading') - parser.add_argument('--num_gpu', default=1, type=int, - help='Number of GPUs to train') - parser.add_argument('--eval_epoch', type=int, - default=10, help='interval between evaluations') - parser.add_argument('--tfboard', action='store_true', default=False, - help='use tensorboard') - parser.add_argument('--save_folder', default='weights/', type=str, - help='path to save weight') - parser.add_argument('--vis_data', action='store_true', default=False, - help='visualize images and labels.') - parser.add_argument('--vis_targets', action='store_true', default=False, - help='visualize assignment.') - - # Optimizer & Schedule - parser.add_argument('--optimizer', default='NpuFusedSGD', type=str, - help='sgd, adamw') - parser.add_argument('--lr_schedule', default='step', type=str, - help='step, cos') - parser.add_argument('--grad_clip', default=None, type=float, - help='clip gradient') - - # model - parser.add_argument('-m', '--model', default='yolov1', - help='yolov1, yolov2, yolov3, yolov3_spp, yolov3_de, ' - 'yolov4, yolo_tiny, yolo_nano') - parser.add_argument('--conf_thresh', default=0.001, type=float, - help='NMS threshold') - parser.add_argument('--nms_thresh', default=0.5, type=float, - help='NMS threshold') - - # dataset - parser.add_argument('--root', default='/mnt/share/ssd2/dataset', - help='data root') - parser.add_argument('-d', '--dataset', default='coco', - help='coco, widerface, crowdhuman') - - # Loss - parser.add_argument('--loss_obj_weight', default=1.0, type=float, - help='weight of obj loss') - parser.add_argument('--loss_cls_weight', default=1.0, type=float, - help='weight of cls loss') - parser.add_argument('--loss_reg_weight', default=1.0, type=float, - help='weight of reg loss') - parser.add_argument('--scale_loss', default='batch', type=str, - help='scale loss: batch or positive samples') - - # train trick - parser.add_argument('--no_warmup', action='store_true', default=False, - help='do not use warmup') - parser.add_argument('-ms', '--multi_scale', action='store_true', default=False, - help='use multi-scale trick') - parser.add_argument('--ema', action='store_true', default=False, - help='use ema training trick') - parser.add_argument('--mosaic', action='store_true', default=False, - help='use Mosaic Augmentation trick') - parser.add_argument('--mixup', action='store_true', default=False, - help='use MixUp Augmentation trick') - parser.add_argument('--multi_anchor', action='store_true', default=False, - help='use multiple anchor boxes as the positive samples') - parser.add_argument('--center_sample', action='store_true', default=False, - help='use center sample for labels') - parser.add_argument('--accumulate', type=int, default=1, - help='accumulate gradient') - # DDP train - parser.add_argument('-dist', '--distributed', action='store_true', default=False, - help='distributed training') - parser.add_argument('--local_rank', type=int, default=0, - help='local_rank') - parser.add_argument('--sybn', action='store_true', default=False, - help='use sybn.') - parser.add_argument('--opt-level', default='O2', type=str, - help='loss scale using in amp, default O1') - - return parser.parse_args() - - -def train(): - args = parse_args() - os.environ['MASTER_ADDR'] = 'localhost' - os.environ['MASTER_PORT'] = '12345' - - # torch.npu.set_compile_mode(jit_compile=False) - option = {} - option["ACL_OP_COMPILER_CACHE_MODE"]="enable" - option["ACL_OP_COMPILER_CACHE_DIR"]="./kernel_meta" - option["NPU_FUZZY_COMPILE_BLACKLIST"] = "Maximum,Conv2D,BNInfer,BNTrainingReduceGrad,Cast" - print("option:",option) - # torch.npu.set_option(option) - print("Setting Arguments.. : ", args) - print("----------------------------------------------------------") - - # path to save model - path_to_save = os.path.join(args.save_folder, args.dataset, args.model) - os.makedirs(path_to_save, exist_ok=True) - - # set distributed - local_rank = 0 - if args.distributed: - dist.init_process_group(backend="hccl", #init_method="env://" - ) - local_rank = torch.distributed.get_rank() - print(local_rank) - torch_npu.npu.set_device(local_rank) - - # cuda - if args.npu: - print('use npu') - cudnn.benchmark = True - device = torch.device("npu") - else: - device = torch.device("cpu") - - # YOLO config - cfg = yolo_config[args.model] - train_size = val_size = args.img_size - - # dataset and evaluator - dataset, evaluator, num_classes = build_dataset(args, train_size, val_size, device) - # dataloader - dataloader = build_dataloader(args, dataset, detection_collate) - # criterioin - criterion = build_criterion(args, cfg, num_classes) - - print('Training model on:', args.dataset) - print('The dataset size:', len(dataset)) - print("----------------------------------------------------------") - - # build model - net = build_model(args=args, - cfg=cfg, - device=device, - num_classes=num_classes, - trainable=True) - model = net - - # SyncBatchNorm - # if args.sybn and args.npu and args.num_gpu > 1: - # print('use SyncBatchNorm ...') - # model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model) - - model = model.to(device).train() - # compute FLOPs and Params - # if local_rank == 0: - # model_copy = deepcopy(model) - # model_copy.trainable = False - # model_copy.eval() - # FLOPs_and_Params(model=model_copy, size=train_size) - # model_copy.trainable = True - # model_copy.train() - # keep training - if args.resume is not None: - print('keep training model: %s' % (args.resume)) - model.load_state_dict(torch.load(args.resume, map_location=device)) - - # EMA - ema = ModelEMA(model) if args.ema else None - # use tfboard - tblogger = None - if args.tfboard: - print('use tensorboard') - from torch.utils.tensorboard import SummaryWriter - c_time = time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())) - log_path = os.path.join('log/', args.dataset, c_time) - os.makedirs(log_path, exist_ok=True) - - tblogger = SummaryWriter(log_path) - # optimizer setup - base_lr = args.lr - tmp_lr = args.lr - if args.optimizer == 'NpuFusedSGD': - print('use SGD with momentum ...') - optimizer = apex.optimizers.NpuFusedSGD(model.parameters(), lr=args.lr, momentum=0.9) - # optimizer = optim.SGD(model.parameters(), - # lr=tmp_lr, - # momentum=0.9, - # weight_decay=5e-4) - elif args.optimizer == 'adamw': - print('use AdamW ...') - optimizer = optim.AdamW(model.parameters(), - lr=tmp_lr, - weight_decay=5e-4) - - model, optimizer = amp.initialize(model, optimizer, opt_level='O1', loss_scale=128.0,combine_grad=True) - - # DDP - if args.distributed and args.num_gpu > 1: - print('using DDP ...') - model = DDP(model, device_ids=[local_rank], output_device=local_rank, broadcast_buffers=False) - - - - - batch_size = args.batch_size - epoch_size = len(dataset) // (batch_size * args.num_gpu) - best_map = -100. - warmup = not args.no_warmup - - t0 = time.time() - # start training loop - for epoch in range(args.start_epoch, args.max_epoch): - if args.distributed: - dataloader.sampler.set_epoch(epoch) - - # use step lr decay - if args.lr_schedule == 'step': - if epoch in args.lr_epoch: - tmp_lr = tmp_lr * 0.1 - set_lr(optimizer, tmp_lr) - # use cos lr decay - elif args.lr_schedule == 'cos' and not warmup: - T_max = args.max_epoch - 15 - lr_min = base_lr * 0.1 * 0.1 - if epoch > T_max: - # Cos decay is done - print('Cosine annealing is over !!') - args.lr_schedule == None - tmp_lr = lr_min - set_lr(optimizer, tmp_lr) - else: - tmp_lr = lr_min + 0.5*(base_lr - lr_min)*(1 + math.cos(math.pi*epoch / T_max)) - set_lr(optimizer, tmp_lr) - fps_sum=0 - # train one epoch - # pre_flag = False - # start_time = time.time() - for iter_i, (images, targets) in enumerate(dataloader): - # if iter_i == 5: - # start_time = time.time() - # with torch.autograd.profiler.profile(use_npu=True) as prof: - ni = iter_i + epoch * epoch_size - # warmup - if epoch < args.wp_epoch and warmup: - nw = args.wp_epoch * epoch_size - tmp_lr = base_lr * pow(ni / nw, 4) - set_lr(optimizer, tmp_lr) - - elif epoch == args.wp_epoch and iter_i == 0 and warmup: - # warmup is over - print('Warmup is over !!') - warmup = False - tmp_lr = base_lr - set_lr(optimizer, tmp_lr) - - # multi-scale trick - if iter_i % 10 == 0 and iter_i > 0 and args.multi_scale: - # randomly choose a new size - r = args.multi_scale_range - train_size = random.randint(r[0], r[1]) * 32 - model.module.set_grid(train_size) - if args.multi_scale: - # interpolate - images = torch.nn.functional.interpolate( - input=images, - size=train_size, - mode='bilinear', - align_corners=False) - - targets = [label.tolist() for label in targets] - # visualize target - if args.vis_data: - vis_data(images, targets) - continue - # make labels - targets = create_labels.gt_creator( - img_size=train_size, - strides=net.stride, - label_lists=targets, - anchor_size=cfg["anchor_size"], - multi_anchor=args.multi_anchor, - center_sample=args.center_sample) - # visualize assignment - if args.vis_targets: - vis_targets(images, targets, cfg["anchor_size"], net.stride) - continue - - # to device - images = images.to(device) - targets = targets.to(device) - - # inference - pred_obj, pred_cls, pred_iou, targets = model(images, targets=targets) - - # compute loss - loss_obj, loss_cls, loss_reg, total_loss = criterion(pred_obj, pred_cls, pred_iou, targets) - - # check loss - if torch.isnan(total_loss): - continue - - loss_dict = dict( - loss_obj=loss_obj, - loss_cls=loss_cls, - loss_reg=loss_reg, - total_loss=total_loss - ) - loss_dict_reduced = distributed_utils.reduce_loss_dict(loss_dict) - - total_loss = total_loss / args.accumulate - # Backward and Optimize - with amp.scale_loss(total_loss , optimizer) as scaled_loss: - scaled_loss.backward() - if ni % args.accumulate == 0: - if args.grad_clip is not None: - torch.nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip) - optimizer.step() - optimizer.zero_grad() - - if args.ema: - ema.update(model) - - # display - # if iter_i % 10 == 0: - if args.tfboard: - # viz loss - tblogger.add_scalar('loss obj', loss_dict_reduced['loss_obj'].item(), ni) - tblogger.add_scalar('loss cls', loss_dict_reduced['loss_cls'].item(), ni) - tblogger.add_scalar('loss reg', loss_dict_reduced['loss_reg'].item(), ni) - - t1 = time.time() - print('[Epoch %d/%d][Iter %d/%d][lr %.6f][Loss: obj %.2f || cls %.2f || reg %.2f || size %d || time: %.2f]' - % (epoch+1, - args.max_epoch, - iter_i, - epoch_size, - tmp_lr, - loss_dict['loss_obj'].item(), - loss_dict['loss_cls'].item(), - loss_dict['loss_reg'].item(), - train_size, - t1-t0), - flush=True) - fps_sum = fps_sum + (batch_size*8 / (t1 - t0)) - t0 = time.time() - # if local_rank in [-1, 0]: - # epoch_time = time.time() - start_time - # if iter_i >= 5: - # print('Training speed is {} FPS'.format(batch_size * 8 * (iter_i + 1 - 5) / (epoch_time))) - # else: - # print('Training speed is {} FPS'.format(batch_size * 8 * (iter_i + 1) / (epoch_time))) - if iter_i > 0 and iter_i == 461: - fps_avg = fps_sum / 461 - print("fps:",fps_avg) - fps_sum = 0 - - # evaluation - if (epoch + 1) % args.eval_epoch == 0 or (epoch + 1) == args.max_epoch: - if evaluator is None: - print('No evaluator ...') - print('Saving state, epoch:', epoch + 1) - torch.save(model_eval.state_dict(), os.path.join(path_to_save, - args.model + '_' + repr(epoch + 1) + '.pth')) - print('Keep training ...') - else: - print('eval ...') - # check ema - if args.ema: - model_eval = ema.ema - else: - model_eval = model.module if args.distributed else model - - # set eval mode - model_eval.trainable = False - model_eval.set_grid(val_size) - model_eval.eval() - - if local_rank == 0: - # evaluate - evaluator.evaluate(model_eval) - - cur_map = evaluator.map - if cur_map > best_map: - # update best-map - best_map = cur_map - # save model - print('Saving state, epoch:', epoch + 1) - torch.save(model_eval.state_dict(), os.path.join(path_to_save, - args.model + '_' + repr(epoch + 1) + '_' + str(round(best_map*100, 2)) + '.pth')) - if args.tfboard: - if args.dataset == 'voc': - tblogger.add_scalar('07test/mAP', evaluator.map, epoch) - elif args.dataset == 'coco': - tblogger.add_scalar('val/AP50_95', evaluator.ap50_95, epoch) - tblogger.add_scalar('val/AP50', evaluator.ap50, epoch) - - if args.distributed: - # wait for all processes to synchronize - dist.barrier() - - # set train mode. - model_eval.trainable = True - model_eval.set_grid(train_size) - model_eval.train() - - # close mosaic augmentation - if args.mosaic and args.max_epoch - epoch == 15: - print('close Mosaic Augmentation ...') - dataloader.dataset.mosaic = False - # close mixup augmentation - if args.mixup and args.max_epoch - epoch == 15: - print('close Mixup Augmentation ...') - dataloader.dataset.mixup = False - - if args.tfboard: - tblogger.close() - - -def build_dataset(args, train_size, val_size, device): - if args.dataset == 'voc': - data_dir = os.path.join(args.root, 'VOCdevkit') - num_classes = 20 - dataset = VOCDetection( - data_dir=data_dir, - img_size=train_size, - transform=TrainTransforms(train_size), - color_augment=ColorTransforms(train_size), - mosaic=args.mosaic, - mixup=args.mixup) - - evaluator = VOCAPIEvaluator( - data_dir=data_dir, - img_size=val_size, - device=device, - transform=ValTransforms(val_size)) - - elif args.dataset == 'coco': - data_dir = os.path.join(args.root, 'COCO') - num_classes = 80 - dataset = COCODataset( - data_dir=data_dir, - img_size=train_size, - image_set='train2017', - transform=TrainTransforms(train_size), - color_augment=ColorTransforms(train_size), - mosaic=args.mosaic, - mixup=args.mixup) - - evaluator = COCOAPIEvaluator( - data_dir=data_dir, - img_size=val_size, - device=device, - transform=ValTransforms(val_size) - ) - - else: - print('unknow dataset !! Only support voc and coco !!') - exit(0) - - return dataset, evaluator, num_classes - - -def build_dataloader(args, dataset, collate_fn=None): - # distributed - if args.distributed and args.num_gpu > 1: - # dataloader - dataloader = torch.utils.data.DataLoader( - dataset=dataset, - batch_size=args.batch_size, - collate_fn=collate_fn, - num_workers=args.num_workers, - pin_memory=True, - sampler=torch.utils.data.distributed.DistributedSampler(dataset) - ) - - else: - # dataloader - dataloader = torch.utils.data.DataLoader( - dataset=dataset, - shuffle=True, - batch_size=args.batch_size, - collate_fn=collate_fn, - num_workers=args.num_workers, - pin_memory=True - ) - return dataloader - - -def set_lr(optimizer, lr): - for param_group in optimizer.param_groups: - param_group['lr'] = lr - - -if __name__ == '__main__': - train() - -- Gitee From f9207a4be3168f6d309510e1a077d9f6c78dc98a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E6=99=BA=E6=96=8C123?= Date: Thu, 27 Apr 2023 06:48:32 +0000 Subject: [PATCH 7/8] =?UTF-8?q?=E5=88=A0=E9=99=A4=E6=96=87=E4=BB=B6=20PyTo?= =?UTF-8?q?rch/contrib/cv/detection/YoloV2-640/train-1p.sh?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- PyTorch/contrib/cv/detection/YoloV2-640/train-1p.sh | 13 ------------- 1 file changed, 13 deletions(-) delete mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/train-1p.sh diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/train-1p.sh b/PyTorch/contrib/cv/detection/YoloV2-640/train-1p.sh deleted file mode 100644 index b0d155ece7..0000000000 --- a/PyTorch/contrib/cv/detection/YoloV2-640/train-1p.sh +++ /dev/null @@ -1,13 +0,0 @@ -python3 train3.py \ - --npu \ - -d coco \ - -m yolov2 \ - --root /home/normal58/zhang/zzb_msft \ - --batch_size 16 \ - --lr 0.001 \ - --img_size 640 \ - --max_epoch 200 \ - --lr_epoch 100 150 \ - --multi_scale \ - --multi_scale_range 10 20 \ - --multi_anchor \ -- Gitee From a745b8e64c4190d108f3bce4377e9050616f70dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E6=99=BA=E6=96=8C123?= Date: Thu, 27 Apr 2023 06:49:01 +0000 Subject: [PATCH 8/8] my first commit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 张智斌123 --- .../cv/detection/YoloV2-640/train-1p.sh | 13 + .../cv/detection/YoloV2-640/train-8p.sh | 82 +++ .../cv/detection/YoloV2-640/train1p.py | 559 ++++++++++++++++++ .../cv/detection/YoloV2-640/train8p.py | 559 ++++++++++++++++++ 4 files changed, 1213 insertions(+) create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/train-1p.sh create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/train-8p.sh create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/train1p.py create mode 100644 PyTorch/contrib/cv/detection/YoloV2-640/train8p.py diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/train-1p.sh b/PyTorch/contrib/cv/detection/YoloV2-640/train-1p.sh new file mode 100644 index 0000000000..d844daf25b --- /dev/null +++ b/PyTorch/contrib/cv/detection/YoloV2-640/train-1p.sh @@ -0,0 +1,13 @@ +python3 train1p.py \ + --npu \ + -d coco \ + -m yolov2 \ + --root /forDocker/dataset \ + --batch_size 16 \ + --lr 0.001 \ + --img_size 640 \ + --max_epoch 200 \ + --lr_epoch 100 150 \ + --multi_scale \ + --multi_scale_range 10 20 \ + --multi_anchor \ diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/train-8p.sh b/PyTorch/contrib/cv/detection/YoloV2-640/train-8p.sh new file mode 100644 index 0000000000..707a4dc136 --- /dev/null +++ b/PyTorch/contrib/cv/detection/YoloV2-640/train-8p.sh @@ -0,0 +1,82 @@ +#!/bin/bash +cur_path=`pwd` +cur_path_last_dirname=${cur_path##*/} +if [ x"${cur_path_last_dirname}" == x"test" ];then + test_path_dir=${cur_path} + cd .. + cur_path=`pwd` +else + test_path_dir=${cur_path}/test +fi +#集合通信参数,不需要修改 +export RANK_SIZE=8 +RANK_ID_START=0 +export WORLD_SIZE=8 +#训练开始时间,不需要修改 +start_time=$(date +%s) +#训练batch_size,,需要模型审视修改 +batch_size=32 +#设置环境变量,不需要修改 +RANK_ID=0 +echo "Decive ID: $RANK_ID" +export RANK_ID=$RANK_ID +export ASCEND_DEVICE_ID=$RANK_ID +ASCEND_DEVICE_ID=$RANK_ID +#创建DeviceID输出目录,不需要修改 +if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID} + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID/ckpt +else + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID/ckpt +fi +#执行训练脚本,以下传参不需要修改,其他需要模型审视修改 +export RANK_SIZE=8 + +KERNEL_NUM=$(($(nproc)/8)) +for((RANK_ID=0;RANK_ID ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & + else + python3.7 -m torch.distributed.launch --nproc_per_node=8 train8p.py \ + --npu \ + -d coco \ + -m yolov2 \ + --root /forDocker/dataset \ + --batch_size 32 \ + --lr 0.002 \ + --img_size 640 \ + --max_epoch 200 \ + --lr_epoch 100 150 \ + --multi_scale \ + --multi_scale_range 10 20 \ + --multi_anchor \ + -dist \ + --sybn \ + --num_gpu 8 \ + --local_rank 0 > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & + fi +done + + diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/train1p.py b/PyTorch/contrib/cv/detection/YoloV2-640/train1p.py new file mode 100644 index 0000000000..54429fa1ec --- /dev/null +++ b/PyTorch/contrib/cv/detection/YoloV2-640/train1p.py @@ -0,0 +1,559 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +from __future__ import division + +import os +import argparse +import time +import math +import random +from copy import deepcopy +import apex +from apex import amp +import torch +import torch_npu +import torch.optim as optim +import torch.backends.cudnn as cudnn +import torch.distributed as dist +from torch.nn.parallel import DistributedDataParallel as DDP +import sys +from config.yolo_config import yolo_config +from data.voc import VOCDetection +from data.coco import COCODataset +from data.transforms import TrainTransforms, ColorTransforms, ValTransforms + +from utils import distributed_utils +from utils import create_labels +from utils.vis import vis_data, vis_targets +from utils.com_flops_params import FLOPs_and_Params +from utils.criterion import build_criterion +from utils.misc import detection_collate +from utils.misc import ModelEMA +from utils.criterion import build_criterion + +from models.yolo import build_model + +from evaluator.cocoapi_evaluator import COCOAPIEvaluator +from evaluator.vocapi_evaluator import VOCAPIEvaluator + +def parse_args(): + parser = argparse.ArgumentParser(description='YOLO Detection') + # basic + parser.add_argument('--npu', action='store_true', default=False, + help='use npu.') + parser.add_argument('--batch_size', default=16, type=int, + help='Batch size for training') + parser.add_argument('--lr', default=1e-3, type=float, + help='initial learning rate') + parser.add_argument('--img_size', type=int, default=640, + help='The upper bound of warm-up') + parser.add_argument('--multi_scale_range', nargs='+', default=[10, 20], type=int, + help='lr epoch to decay') + parser.add_argument('--max_epoch', type=int, default=200, + help='The upper bound of warm-up') + parser.add_argument('--lr_epoch', nargs='+', default=[100, 150], type=int, + help='lr epoch to decay') + parser.add_argument('--wp_epoch', type=int, default=2, + help='The upper bound of warm-up') + parser.add_argument('--start_epoch', type=int, default=0, + help='start epoch to train') + parser.add_argument('-r', '--resume', default=None, type=str, + help='keep training') + parser.add_argument('--num_workers', default=8, type=int, + help='Number of workers used in dataloading') + parser.add_argument('--num_gpu', default=1, type=int, + help='Number of GPUs to train') + parser.add_argument('--eval_epoch', type=int, + default=10, help='interval between evaluations') + parser.add_argument('--tfboard', action='store_true', default=False, + help='use tensorboard') + parser.add_argument('--save_folder', default='weights/', type=str, + help='path to save weight') + parser.add_argument('--vis_data', action='store_true', default=False, + help='visualize images and labels.') + parser.add_argument('--vis_targets', action='store_true', default=False, + help='visualize assignment.') + + # Optimizer & Schedule + parser.add_argument('--optimizer', default='NpuFusedSGD', type=str, + help='sgd, adamw') + parser.add_argument('--lr_schedule', default='step', type=str, + help='step, cos') + parser.add_argument('--grad_clip', default=None, type=float, + help='clip gradient') + + # model + parser.add_argument('-m', '--model', default='yolov1', + help='yolov1, yolov2, yolov3, yolov3_spp, yolov3_de, ' + 'yolov4, yolo_tiny, yolo_nano') + parser.add_argument('--conf_thresh', default=0.001, type=float, + help='NMS threshold') + parser.add_argument('--nms_thresh', default=0.5, type=float, + help='NMS threshold') + + # dataset + parser.add_argument('--root', default='/mnt/share/ssd2/dataset', + help='data root') + parser.add_argument('-d', '--dataset', default='coco', + help='coco, widerface, crowdhuman') + + # Loss + parser.add_argument('--loss_obj_weight', default=1.0, type=float, + help='weight of obj loss') + parser.add_argument('--loss_cls_weight', default=1.0, type=float, + help='weight of cls loss') + parser.add_argument('--loss_reg_weight', default=1.0, type=float, + help='weight of reg loss') + parser.add_argument('--scale_loss', default='batch', type=str, + help='scale loss: batch or positive samples') + + # train trick + parser.add_argument('--no_warmup', action='store_true', default=False, + help='do not use warmup') + parser.add_argument('-ms', '--multi_scale', action='store_true', default=False, + help='use multi-scale trick') + parser.add_argument('--ema', action='store_true', default=False, + help='use ema training trick') + parser.add_argument('--mosaic', action='store_true', default=False, + help='use Mosaic Augmentation trick') + parser.add_argument('--mixup', action='store_true', default=False, + help='use MixUp Augmentation trick') + parser.add_argument('--multi_anchor', action='store_true', default=False, + help='use multiple anchor boxes as the positive samples') + parser.add_argument('--center_sample', action='store_true', default=False, + help='use center sample for labels') + parser.add_argument('--accumulate', type=int, default=1, + help='accumulate gradient') + # DDP train + parser.add_argument('-dist', '--distributed', action='store_true', default=False, + help='distributed training') + parser.add_argument('--local_rank', type=int, default=0, + help='local_rank') + parser.add_argument('--sybn', action='store_true', default=False, + help='use sybn.') + parser.add_argument('--opt-level', default='O2', type=str, + help='loss scale using in amp, default O1') + + return parser.parse_args() + + +def train(): + args = parse_args() + os.environ['MASTER_ADDR'] = 'localhost' + os.environ['MASTER_PORT'] = '12345' + + # torch.npu.set_compile_mode(jit_compile=False) + option = {} + option["ACL_OP_COMPILER_CACHE_MODE"]="enable" + option["ACL_OP_COMPILER_CACHE_DIR"]="./kernel_meta" + option["NPU_FUZZY_COMPILE_BLACKLIST"] = "Maximum,Conv2D,BNInfer,BNTrainingReduceGrad,Cast" + print("option:",option) + # torch.npu.set_option(option) + print("Setting Arguments.. : ", args) + print("----------------------------------------------------------") + + # path to save model + path_to_save = os.path.join(args.save_folder, args.dataset, args.model) + os.makedirs(path_to_save, exist_ok=True) + + # set distributed + local_rank = 0 + if args.distributed: + dist.init_process_group(backend="hccl", #init_method="env://" + ) + local_rank = torch.distributed.get_rank() + print(local_rank) + torch_npu.npu.set_device(local_rank) + + # cuda + if args.npu: + print('use npu') + cudnn.benchmark = True + device = torch.device("npu") + else: + device = torch.device("cpu") + + # YOLO config + cfg = yolo_config[args.model] + train_size = val_size = args.img_size + + # dataset and evaluator + dataset, evaluator, num_classes = build_dataset(args, train_size, val_size, device) + # dataloader + dataloader = build_dataloader(args, dataset, detection_collate) + # criterioin + criterion = build_criterion(args, cfg, num_classes) + + print('Training model on:', args.dataset) + print('The dataset size:', len(dataset)) + print("----------------------------------------------------------") + + # build model + net = build_model(args=args, + cfg=cfg, + device=device, + num_classes=num_classes, + trainable=True) + model = net + + # SyncBatchNorm + # if args.sybn and args.npu and args.num_gpu > 1: + # print('use SyncBatchNorm ...') + # model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model) + + model = model.to(device).train() + # compute FLOPs and Params + # if local_rank == 0: + # model_copy = deepcopy(model) + # model_copy.trainable = False + # model_copy.eval() + # FLOPs_and_Params(model=model_copy, size=train_size) + # model_copy.trainable = True + # model_copy.train() + # keep training + if args.resume is not None: + print('keep training model: %s' % (args.resume)) + model.load_state_dict(torch.load(args.resume, map_location=device)) + + # EMA + ema = ModelEMA(model) if args.ema else None + # use tfboard + tblogger = None + if args.tfboard: + print('use tensorboard') + from torch.utils.tensorboard import SummaryWriter + c_time = time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())) + log_path = os.path.join('log/', args.dataset, c_time) + os.makedirs(log_path, exist_ok=True) + + tblogger = SummaryWriter(log_path) + # optimizer setup + base_lr = args.lr + tmp_lr = args.lr + if args.optimizer == 'NpuFusedSGD': + print('use SGD with momentum ...') + optimizer = apex.optimizers.NpuFusedSGD(model.parameters(), lr=args.lr, momentum=0.9) + # optimizer = optim.SGD(model.parameters(), + # lr=tmp_lr, + # momentum=0.9, + # weight_decay=5e-4) + elif args.optimizer == 'adamw': + print('use AdamW ...') + optimizer = optim.AdamW(model.parameters(), + lr=tmp_lr, + weight_decay=5e-4) + + model, optimizer = amp.initialize(model, optimizer, opt_level='O1', loss_scale=128.0,combine_grad=True) + + # DDP + if args.distributed and args.num_gpu > 1: + print('using DDP ...') + model = DDP(model, device_ids=[local_rank], output_device=local_rank, broadcast_buffers=False) + + + + + batch_size = args.batch_size + epoch_size = len(dataset) // (batch_size * args.num_gpu) + best_map = -100. + warmup = not args.no_warmup + + t0 = time.time() + # start training loop + for epoch in range(args.start_epoch, args.max_epoch): + if args.distributed: + dataloader.sampler.set_epoch(epoch) + + # use step lr decay + if args.lr_schedule == 'step': + if epoch in args.lr_epoch: + tmp_lr = tmp_lr * 0.1 + set_lr(optimizer, tmp_lr) + # use cos lr decay + elif args.lr_schedule == 'cos' and not warmup: + T_max = args.max_epoch - 15 + lr_min = base_lr * 0.1 * 0.1 + if epoch > T_max: + # Cos decay is done + print('Cosine annealing is over !!') + args.lr_schedule == None + tmp_lr = lr_min + set_lr(optimizer, tmp_lr) + else: + tmp_lr = lr_min + 0.5*(base_lr - lr_min)*(1 + math.cos(math.pi*epoch / T_max)) + set_lr(optimizer, tmp_lr) + fps_sum=0 + # train one epoch + # pre_flag = False + # start_time = time.time() + for iter_i, (images, targets) in enumerate(dataloader): + # if iter_i == 5: + # start_time = time.time() + # with torch.autograd.profiler.profile(use_npu=True) as prof: + ni = iter_i + epoch * epoch_size + # warmup + if epoch < args.wp_epoch and warmup: + nw = args.wp_epoch * epoch_size + tmp_lr = base_lr * pow(ni / nw, 4) + set_lr(optimizer, tmp_lr) + + elif epoch == args.wp_epoch and iter_i == 0 and warmup: + # warmup is over + print('Warmup is over !!') + warmup = False + tmp_lr = base_lr + set_lr(optimizer, tmp_lr) + + # multi-scale trick + if iter_i % 10 == 0 and iter_i > 0 and args.multi_scale: + # randomly choose a new size + r = args.multi_scale_range + train_size = random.randint(r[0], r[1]) * 32 + model.set_grid(train_size) + if args.multi_scale: + # interpolate + images = torch.nn.functional.interpolate( + input=images, + size=train_size, + mode='bilinear', + align_corners=False) + + targets = [label.tolist() for label in targets] + # visualize target + if args.vis_data: + vis_data(images, targets) + continue + # make labels + targets = create_labels.gt_creator( + img_size=train_size, + strides=net.stride, + label_lists=targets, + anchor_size=cfg["anchor_size"], + multi_anchor=args.multi_anchor, + center_sample=args.center_sample) + # visualize assignment + if args.vis_targets: + vis_targets(images, targets, cfg["anchor_size"], net.stride) + continue + + # to device + images = images.to(device) + targets = targets.to(device) + + # inference + pred_obj, pred_cls, pred_iou, targets = model(images, targets=targets) + + # compute loss + loss_obj, loss_cls, loss_reg, total_loss = criterion(pred_obj, pred_cls, pred_iou, targets) + + # check loss + if torch.isnan(total_loss): + continue + + loss_dict = dict( + loss_obj=loss_obj, + loss_cls=loss_cls, + loss_reg=loss_reg, + total_loss=total_loss + ) + loss_dict_reduced = distributed_utils.reduce_loss_dict(loss_dict) + + total_loss = total_loss / args.accumulate + # Backward and Optimize + with amp.scale_loss(total_loss , optimizer) as scaled_loss: + scaled_loss.backward() + if ni % args.accumulate == 0: + if args.grad_clip is not None: + torch.nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip) + optimizer.step() + optimizer.zero_grad() + + if args.ema: + ema.update(model) + + # display + # if iter_i % 10 == 0: + if args.tfboard: + # viz loss + tblogger.add_scalar('loss obj', loss_dict_reduced['loss_obj'].item(), ni) + tblogger.add_scalar('loss cls', loss_dict_reduced['loss_cls'].item(), ni) + tblogger.add_scalar('loss reg', loss_dict_reduced['loss_reg'].item(), ni) + + t1 = time.time() + print('[Epoch %d/%d][Iter %d/%d][lr %.6f][Loss: obj %.2f || cls %.2f || reg %.2f || size %d || time: %.2f]' + % (epoch+1, + args.max_epoch, + iter_i, + epoch_size, + tmp_lr, + loss_dict['loss_obj'].item(), + loss_dict['loss_cls'].item(), + loss_dict['loss_reg'].item(), + train_size, + t1-t0), + flush=True) + fps_sum = fps_sum + (batch_size*8 / (t1 - t0)) + t0 = time.time() + # if local_rank in [-1, 0]: + # epoch_time = time.time() - start_time + # if iter_i >= 5: + # print('Training speed is {} FPS'.format(batch_size * 8 * (iter_i + 1 - 5) / (epoch_time))) + # else: + # print('Training speed is {} FPS'.format(batch_size * 8 * (iter_i + 1) / (epoch_time))) + if iter_i > 0 and iter_i == 461: + fps_avg = fps_sum / 461 + print("fps:",fps_avg) + fps_sum = 0 + + # evaluation + if (epoch + 1) % args.eval_epoch == 0 or (epoch + 1) == args.max_epoch: + if evaluator is None: + print('No evaluator ...') + print('Saving state, epoch:', epoch + 1) + torch.save(model_eval.state_dict(), os.path.join(path_to_save, + args.model + '_' + repr(epoch + 1) + '.pth')) + print('Keep training ...') + else: + print('eval ...') + # check ema + if args.ema: + model_eval = ema.ema + else: + model_eval = model.module if args.distributed else model + + # set eval mode + model_eval.trainable = False + model_eval.set_grid(val_size) + model_eval.eval() + + if local_rank == 0: + # evaluate + evaluator.evaluate(model_eval) + + cur_map = evaluator.map + if cur_map > best_map: + # update best-map + best_map = cur_map + # save model + print('Saving state, epoch:', epoch + 1) + torch.save(model_eval.state_dict(), os.path.join(path_to_save, + args.model + '_' + repr(epoch + 1) + '_' + str(round(best_map*100, 2)) + '.pth')) + if args.tfboard: + if args.dataset == 'voc': + tblogger.add_scalar('07test/mAP', evaluator.map, epoch) + elif args.dataset == 'coco': + tblogger.add_scalar('val/AP50_95', evaluator.ap50_95, epoch) + tblogger.add_scalar('val/AP50', evaluator.ap50, epoch) + + if args.distributed: + # wait for all processes to synchronize + dist.barrier() + + # set train mode. + model_eval.trainable = True + model_eval.set_grid(train_size) + model_eval.train() + + # close mosaic augmentation + if args.mosaic and args.max_epoch - epoch == 15: + print('close Mosaic Augmentation ...') + dataloader.dataset.mosaic = False + # close mixup augmentation + if args.mixup and args.max_epoch - epoch == 15: + print('close Mixup Augmentation ...') + dataloader.dataset.mixup = False + + if args.tfboard: + tblogger.close() + + +def build_dataset(args, train_size, val_size, device): + if args.dataset == 'voc': + data_dir = os.path.join(args.root, 'VOCdevkit') + num_classes = 20 + dataset = VOCDetection( + data_dir=data_dir, + img_size=train_size, + transform=TrainTransforms(train_size), + color_augment=ColorTransforms(train_size), + mosaic=args.mosaic, + mixup=args.mixup) + + evaluator = VOCAPIEvaluator( + data_dir=data_dir, + img_size=val_size, + device=device, + transform=ValTransforms(val_size)) + + elif args.dataset == 'coco': + data_dir = os.path.join(args.root, 'COCO') + num_classes = 80 + dataset = COCODataset( + data_dir=data_dir, + img_size=train_size, + image_set='train2017', + transform=TrainTransforms(train_size), + color_augment=ColorTransforms(train_size), + mosaic=args.mosaic, + mixup=args.mixup) + + evaluator = COCOAPIEvaluator( + data_dir=data_dir, + img_size=val_size, + device=device, + transform=ValTransforms(val_size) + ) + + else: + print('unknow dataset !! Only support voc and coco !!') + exit(0) + + return dataset, evaluator, num_classes + + +def build_dataloader(args, dataset, collate_fn=None): + # distributed + if args.distributed and args.num_gpu > 1: + # dataloader + dataloader = torch.utils.data.DataLoader( + dataset=dataset, + batch_size=args.batch_size, + collate_fn=collate_fn, + num_workers=args.num_workers, + pin_memory=True, + sampler=torch.utils.data.distributed.DistributedSampler(dataset) + ) + + else: + # dataloader + dataloader = torch.utils.data.DataLoader( + dataset=dataset, + shuffle=True, + batch_size=args.batch_size, + collate_fn=collate_fn, + num_workers=args.num_workers, + pin_memory=True + ) + return dataloader + + +def set_lr(optimizer, lr): + for param_group in optimizer.param_groups: + param_group['lr'] = lr + + +if __name__ == '__main__': + train() + diff --git a/PyTorch/contrib/cv/detection/YoloV2-640/train8p.py b/PyTorch/contrib/cv/detection/YoloV2-640/train8p.py new file mode 100644 index 0000000000..572b4aced1 --- /dev/null +++ b/PyTorch/contrib/cv/detection/YoloV2-640/train8p.py @@ -0,0 +1,559 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +from __future__ import division + +import os +import argparse +import time +import math +import random +from copy import deepcopy +import apex +from apex import amp +import torch +import torch_npu +import torch.optim as optim +import torch.backends.cudnn as cudnn +import torch.distributed as dist +from torch.nn.parallel import DistributedDataParallel as DDP +import sys +from config.yolo_config import yolo_config +from data.voc import VOCDetection +from data.coco import COCODataset +from data.transforms import TrainTransforms, ColorTransforms, ValTransforms + +from utils import distributed_utils +from utils import create_labels +from utils.vis import vis_data, vis_targets +from utils.com_flops_params import FLOPs_and_Params +from utils.criterion import build_criterion +from utils.misc import detection_collate +from utils.misc import ModelEMA +from utils.criterion import build_criterion + +from models.yolo import build_model + +from evaluator.cocoapi_evaluator import COCOAPIEvaluator +from evaluator.vocapi_evaluator import VOCAPIEvaluator + +def parse_args(): + parser = argparse.ArgumentParser(description='YOLO Detection') + # basic + parser.add_argument('--npu', action='store_true', default=False, + help='use npu.') + parser.add_argument('--batch_size', default=16, type=int, + help='Batch size for training') + parser.add_argument('--lr', default=1e-3, type=float, + help='initial learning rate') + parser.add_argument('--img_size', type=int, default=640, + help='The upper bound of warm-up') + parser.add_argument('--multi_scale_range', nargs='+', default=[10, 20], type=int, + help='lr epoch to decay') + parser.add_argument('--max_epoch', type=int, default=200, + help='The upper bound of warm-up') + parser.add_argument('--lr_epoch', nargs='+', default=[100, 150], type=int, + help='lr epoch to decay') + parser.add_argument('--wp_epoch', type=int, default=2, + help='The upper bound of warm-up') + parser.add_argument('--start_epoch', type=int, default=0, + help='start epoch to train') + parser.add_argument('-r', '--resume', default=None, type=str, + help='keep training') + parser.add_argument('--num_workers', default=8, type=int, + help='Number of workers used in dataloading') + parser.add_argument('--num_gpu', default=1, type=int, + help='Number of GPUs to train') + parser.add_argument('--eval_epoch', type=int, + default=10, help='interval between evaluations') + parser.add_argument('--tfboard', action='store_true', default=False, + help='use tensorboard') + parser.add_argument('--save_folder', default='weights/', type=str, + help='path to save weight') + parser.add_argument('--vis_data', action='store_true', default=False, + help='visualize images and labels.') + parser.add_argument('--vis_targets', action='store_true', default=False, + help='visualize assignment.') + + # Optimizer & Schedule + parser.add_argument('--optimizer', default='NpuFusedSGD', type=str, + help='sgd, adamw') + parser.add_argument('--lr_schedule', default='step', type=str, + help='step, cos') + parser.add_argument('--grad_clip', default=None, type=float, + help='clip gradient') + + # model + parser.add_argument('-m', '--model', default='yolov1', + help='yolov1, yolov2, yolov3, yolov3_spp, yolov3_de, ' + 'yolov4, yolo_tiny, yolo_nano') + parser.add_argument('--conf_thresh', default=0.001, type=float, + help='NMS threshold') + parser.add_argument('--nms_thresh', default=0.5, type=float, + help='NMS threshold') + + # dataset + parser.add_argument('--root', default='/mnt/share/ssd2/dataset', + help='data root') + parser.add_argument('-d', '--dataset', default='coco', + help='coco, widerface, crowdhuman') + + # Loss + parser.add_argument('--loss_obj_weight', default=1.0, type=float, + help='weight of obj loss') + parser.add_argument('--loss_cls_weight', default=1.0, type=float, + help='weight of cls loss') + parser.add_argument('--loss_reg_weight', default=1.0, type=float, + help='weight of reg loss') + parser.add_argument('--scale_loss', default='batch', type=str, + help='scale loss: batch or positive samples') + + # train trick + parser.add_argument('--no_warmup', action='store_true', default=False, + help='do not use warmup') + parser.add_argument('-ms', '--multi_scale', action='store_true', default=False, + help='use multi-scale trick') + parser.add_argument('--ema', action='store_true', default=False, + help='use ema training trick') + parser.add_argument('--mosaic', action='store_true', default=False, + help='use Mosaic Augmentation trick') + parser.add_argument('--mixup', action='store_true', default=False, + help='use MixUp Augmentation trick') + parser.add_argument('--multi_anchor', action='store_true', default=False, + help='use multiple anchor boxes as the positive samples') + parser.add_argument('--center_sample', action='store_true', default=False, + help='use center sample for labels') + parser.add_argument('--accumulate', type=int, default=1, + help='accumulate gradient') + # DDP train + parser.add_argument('-dist', '--distributed', action='store_true', default=False, + help='distributed training') + parser.add_argument('--local_rank', type=int, default=0, + help='local_rank') + parser.add_argument('--sybn', action='store_true', default=False, + help='use sybn.') + parser.add_argument('--opt-level', default='O2', type=str, + help='loss scale using in amp, default O1') + + return parser.parse_args() + + +def train(): + args = parse_args() + os.environ['MASTER_ADDR'] = 'localhost' + os.environ['MASTER_PORT'] = '12345' + + # torch.npu.set_compile_mode(jit_compile=False) + option = {} + option["ACL_OP_COMPILER_CACHE_MODE"]="enable" + option["ACL_OP_COMPILER_CACHE_DIR"]="./kernel_meta" + option["NPU_FUZZY_COMPILE_BLACKLIST"] = "Maximum,Conv2D,BNInfer,BNTrainingReduceGrad,Cast" + print("option:",option) + # torch.npu.set_option(option) + print("Setting Arguments.. : ", args) + print("----------------------------------------------------------") + + # path to save model + path_to_save = os.path.join(args.save_folder, args.dataset, args.model) + os.makedirs(path_to_save, exist_ok=True) + + # set distributed + local_rank = 0 + if args.distributed: + dist.init_process_group(backend="hccl", #init_method="env://" + ) + local_rank = torch.distributed.get_rank() + print(local_rank) + torch_npu.npu.set_device(local_rank) + + # cuda + if args.npu: + print('use npu') + cudnn.benchmark = True + device = torch.device("npu") + else: + device = torch.device("cpu") + + # YOLO config + cfg = yolo_config[args.model] + train_size = val_size = args.img_size + + # dataset and evaluator + dataset, evaluator, num_classes = build_dataset(args, train_size, val_size, device) + # dataloader + dataloader = build_dataloader(args, dataset, detection_collate) + # criterioin + criterion = build_criterion(args, cfg, num_classes) + + print('Training model on:', args.dataset) + print('The dataset size:', len(dataset)) + print("----------------------------------------------------------") + + # build model + net = build_model(args=args, + cfg=cfg, + device=device, + num_classes=num_classes, + trainable=True) + model = net + + # SyncBatchNorm + # if args.sybn and args.npu and args.num_gpu > 1: + # print('use SyncBatchNorm ...') + # model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model) + + model = model.to(device).train() + # compute FLOPs and Params + # if local_rank == 0: + # model_copy = deepcopy(model) + # model_copy.trainable = False + # model_copy.eval() + # FLOPs_and_Params(model=model_copy, size=train_size) + # model_copy.trainable = True + # model_copy.train() + # keep training + if args.resume is not None: + print('keep training model: %s' % (args.resume)) + model.load_state_dict(torch.load(args.resume, map_location=device)) + + # EMA + ema = ModelEMA(model) if args.ema else None + # use tfboard + tblogger = None + if args.tfboard: + print('use tensorboard') + from torch.utils.tensorboard import SummaryWriter + c_time = time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())) + log_path = os.path.join('log/', args.dataset, c_time) + os.makedirs(log_path, exist_ok=True) + + tblogger = SummaryWriter(log_path) + # optimizer setup + base_lr = args.lr + tmp_lr = args.lr + if args.optimizer == 'NpuFusedSGD': + print('use SGD with momentum ...') + optimizer = apex.optimizers.NpuFusedSGD(model.parameters(), lr=args.lr, momentum=0.9) + # optimizer = optim.SGD(model.parameters(), + # lr=tmp_lr, + # momentum=0.9, + # weight_decay=5e-4) + elif args.optimizer == 'adamw': + print('use AdamW ...') + optimizer = optim.AdamW(model.parameters(), + lr=tmp_lr, + weight_decay=5e-4) + + model, optimizer = amp.initialize(model, optimizer, opt_level='O1', loss_scale=128.0,combine_grad=True) + + # DDP + if args.distributed and args.num_gpu > 1: + print('using DDP ...') + model = DDP(model, device_ids=[local_rank], output_device=local_rank, broadcast_buffers=False) + + + + + batch_size = args.batch_size + epoch_size = len(dataset) // (batch_size * args.num_gpu) + best_map = -100. + warmup = not args.no_warmup + + t0 = time.time() + # start training loop + for epoch in range(args.start_epoch, args.max_epoch): + if args.distributed: + dataloader.sampler.set_epoch(epoch) + + # use step lr decay + if args.lr_schedule == 'step': + if epoch in args.lr_epoch: + tmp_lr = tmp_lr * 0.1 + set_lr(optimizer, tmp_lr) + # use cos lr decay + elif args.lr_schedule == 'cos' and not warmup: + T_max = args.max_epoch - 15 + lr_min = base_lr * 0.1 * 0.1 + if epoch > T_max: + # Cos decay is done + print('Cosine annealing is over !!') + args.lr_schedule == None + tmp_lr = lr_min + set_lr(optimizer, tmp_lr) + else: + tmp_lr = lr_min + 0.5*(base_lr - lr_min)*(1 + math.cos(math.pi*epoch / T_max)) + set_lr(optimizer, tmp_lr) + fps_sum=0 + # train one epoch + # pre_flag = False + # start_time = time.time() + for iter_i, (images, targets) in enumerate(dataloader): + # if iter_i == 5: + # start_time = time.time() + # with torch.autograd.profiler.profile(use_npu=True) as prof: + ni = iter_i + epoch * epoch_size + # warmup + if epoch < args.wp_epoch and warmup: + nw = args.wp_epoch * epoch_size + tmp_lr = base_lr * pow(ni / nw, 4) + set_lr(optimizer, tmp_lr) + + elif epoch == args.wp_epoch and iter_i == 0 and warmup: + # warmup is over + print('Warmup is over !!') + warmup = False + tmp_lr = base_lr + set_lr(optimizer, tmp_lr) + + # multi-scale trick + if iter_i % 10 == 0 and iter_i > 0 and args.multi_scale: + # randomly choose a new size + r = args.multi_scale_range + train_size = random.randint(r[0], r[1]) * 32 + model.module.set_grid(train_size) + if args.multi_scale: + # interpolate + images = torch.nn.functional.interpolate( + input=images, + size=train_size, + mode='bilinear', + align_corners=False) + + targets = [label.tolist() for label in targets] + # visualize target + if args.vis_data: + vis_data(images, targets) + continue + # make labels + targets = create_labels.gt_creator( + img_size=train_size, + strides=net.stride, + label_lists=targets, + anchor_size=cfg["anchor_size"], + multi_anchor=args.multi_anchor, + center_sample=args.center_sample) + # visualize assignment + if args.vis_targets: + vis_targets(images, targets, cfg["anchor_size"], net.stride) + continue + + # to device + images = images.to(device) + targets = targets.to(device) + + # inference + pred_obj, pred_cls, pred_iou, targets = model(images, targets=targets) + + # compute loss + loss_obj, loss_cls, loss_reg, total_loss = criterion(pred_obj, pred_cls, pred_iou, targets) + + # check loss + if torch.isnan(total_loss): + continue + + loss_dict = dict( + loss_obj=loss_obj, + loss_cls=loss_cls, + loss_reg=loss_reg, + total_loss=total_loss + ) + loss_dict_reduced = distributed_utils.reduce_loss_dict(loss_dict) + + total_loss = total_loss / args.accumulate + # Backward and Optimize + with amp.scale_loss(total_loss , optimizer) as scaled_loss: + scaled_loss.backward() + if ni % args.accumulate == 0: + if args.grad_clip is not None: + torch.nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip) + optimizer.step() + optimizer.zero_grad() + + if args.ema: + ema.update(model) + + # display + # if iter_i % 10 == 0: + if args.tfboard: + # viz loss + tblogger.add_scalar('loss obj', loss_dict_reduced['loss_obj'].item(), ni) + tblogger.add_scalar('loss cls', loss_dict_reduced['loss_cls'].item(), ni) + tblogger.add_scalar('loss reg', loss_dict_reduced['loss_reg'].item(), ni) + + t1 = time.time() + print('[Epoch %d/%d][Iter %d/%d][lr %.6f][Loss: obj %.2f || cls %.2f || reg %.2f || size %d || time: %.2f]' + % (epoch+1, + args.max_epoch, + iter_i, + epoch_size, + tmp_lr, + loss_dict['loss_obj'].item(), + loss_dict['loss_cls'].item(), + loss_dict['loss_reg'].item(), + train_size, + t1-t0), + flush=True) + fps_sum = fps_sum + (batch_size*8 / (t1 - t0)) + t0 = time.time() + # if local_rank in [-1, 0]: + # epoch_time = time.time() - start_time + # if iter_i >= 5: + # print('Training speed is {} FPS'.format(batch_size * 8 * (iter_i + 1 - 5) / (epoch_time))) + # else: + # print('Training speed is {} FPS'.format(batch_size * 8 * (iter_i + 1) / (epoch_time))) + if iter_i > 0 and iter_i == 461: + fps_avg = fps_sum / 461 + print("fps:",fps_avg) + fps_sum = 0 + + # evaluation + if (epoch + 1) % args.eval_epoch == 0 or (epoch + 1) == args.max_epoch: + if evaluator is None: + print('No evaluator ...') + print('Saving state, epoch:', epoch + 1) + torch.save(model_eval.state_dict(), os.path.join(path_to_save, + args.model + '_' + repr(epoch + 1) + '.pth')) + print('Keep training ...') + else: + print('eval ...') + # check ema + if args.ema: + model_eval = ema.ema + else: + model_eval = model.module if args.distributed else model + + # set eval mode + model_eval.trainable = False + model_eval.set_grid(val_size) + model_eval.eval() + + if local_rank == 0: + # evaluate + evaluator.evaluate(model_eval) + + cur_map = evaluator.map + if cur_map > best_map: + # update best-map + best_map = cur_map + # save model + print('Saving state, epoch:', epoch + 1) + torch.save(model_eval.state_dict(), os.path.join(path_to_save, + args.model + '_' + repr(epoch + 1) + '_' + str(round(best_map*100, 2)) + '.pth')) + if args.tfboard: + if args.dataset == 'voc': + tblogger.add_scalar('07test/mAP', evaluator.map, epoch) + elif args.dataset == 'coco': + tblogger.add_scalar('val/AP50_95', evaluator.ap50_95, epoch) + tblogger.add_scalar('val/AP50', evaluator.ap50, epoch) + + if args.distributed: + # wait for all processes to synchronize + dist.barrier() + + # set train mode. + model_eval.trainable = True + model_eval.set_grid(train_size) + model_eval.train() + + # close mosaic augmentation + if args.mosaic and args.max_epoch - epoch == 15: + print('close Mosaic Augmentation ...') + dataloader.dataset.mosaic = False + # close mixup augmentation + if args.mixup and args.max_epoch - epoch == 15: + print('close Mixup Augmentation ...') + dataloader.dataset.mixup = False + + if args.tfboard: + tblogger.close() + + +def build_dataset(args, train_size, val_size, device): + if args.dataset == 'voc': + data_dir = os.path.join(args.root, 'VOCdevkit') + num_classes = 20 + dataset = VOCDetection( + data_dir=data_dir, + img_size=train_size, + transform=TrainTransforms(train_size), + color_augment=ColorTransforms(train_size), + mosaic=args.mosaic, + mixup=args.mixup) + + evaluator = VOCAPIEvaluator( + data_dir=data_dir, + img_size=val_size, + device=device, + transform=ValTransforms(val_size)) + + elif args.dataset == 'coco': + data_dir = os.path.join(args.root, 'COCO') + num_classes = 80 + dataset = COCODataset( + data_dir=data_dir, + img_size=train_size, + image_set='train2017', + transform=TrainTransforms(train_size), + color_augment=ColorTransforms(train_size), + mosaic=args.mosaic, + mixup=args.mixup) + + evaluator = COCOAPIEvaluator( + data_dir=data_dir, + img_size=val_size, + device=device, + transform=ValTransforms(val_size) + ) + + else: + print('unknow dataset !! Only support voc and coco !!') + exit(0) + + return dataset, evaluator, num_classes + + +def build_dataloader(args, dataset, collate_fn=None): + # distributed + if args.distributed and args.num_gpu > 1: + # dataloader + dataloader = torch.utils.data.DataLoader( + dataset=dataset, + batch_size=args.batch_size, + collate_fn=collate_fn, + num_workers=args.num_workers, + pin_memory=True, + sampler=torch.utils.data.distributed.DistributedSampler(dataset) + ) + + else: + # dataloader + dataloader = torch.utils.data.DataLoader( + dataset=dataset, + shuffle=True, + batch_size=args.batch_size, + collate_fn=collate_fn, + num_workers=args.num_workers, + pin_memory=True + ) + return dataloader + + +def set_lr(optimizer, lr): + for param_group in optimizer.param_groups: + param_group['lr'] = lr + + +if __name__ == '__main__': + train() + -- Gitee