From 77d89be8f84a7da7c5396ae7e211b8d26f0daf21 Mon Sep 17 00:00:00 2001 From: Savion_G Date: Sat, 9 Apr 2022 16:03:48 +0800 Subject: [PATCH 01/20] first commit --- .../contrib/cv/detection/RetinaMask/LICENSE | 201 +++++++++ .../contrib/cv/detection/RetinaMask/README.md | 54 +++ .../cv/detection/RetinaMask/bind_pyt.py | 136 ++++++ .../e2e_faster_rcnn_R_101_FPN_1x_caffe2.yaml | 25 ++ .../e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml | 5 + .../e2e_faster_rcnn_R_50_FPN_1x_caffe2.yaml | 25 ++ ...faster_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml | 29 ++ .../e2e_mask_rcnn_R_101_FPN_1x_caffe2.yaml | 34 ++ .../e2e_mask_rcnn_R_50_C4_1x_caffe2.yaml | 9 + .../e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml | 34 ++ ...e_mask_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml | 38 ++ .../configs/e2e_faster_rcnn_R_101_FPN_1x.yaml | 31 ++ .../configs/e2e_faster_rcnn_R_50_C4_1x.yaml | 15 + .../configs/e2e_faster_rcnn_R_50_FPN_1x.yaml | 31 ++ .../e2e_faster_rcnn_X_101_32x8d_FPN_1x.yaml | 36 ++ .../configs/e2e_mask_rcnn_R_101_FPN_1x.yaml | 40 ++ .../configs/e2e_mask_rcnn_R_50_C4_1x.yaml | 19 + .../configs/e2e_mask_rcnn_R_50_FPN_1x.yaml | 40 ++ .../e2e_mask_rcnn_X_101_32x8d_FPN_1x.yaml | 45 ++ .../e2e_faster_rcnn_R_50_C4_quick.yaml | 24 + .../e2e_faster_rcnn_R_50_FPN_quick.yaml | 40 ++ ...e2e_faster_rcnn_X_101_32x8d_FPN_quick.yaml | 44 ++ .../e2e_mask_rcnn_R_50_C4_quick.yaml | 28 ++ .../e2e_mask_rcnn_R_50_FPN_quick.yaml | 49 ++ .../e2e_mask_rcnn_X_101_32x8d_FPN_quick.yaml | 53 +++ .../quick_schedules/rpn_R_50_C4_quick.yaml | 23 + .../quick_schedules/rpn_R_50_FPN_quick.yaml | 31 ++ .../retina/retinanet_R-101-FPN_1x.yaml | 48 ++ .../configs/retina/retinanet_R-50-FPN_1x.yaml | 46 ++ .../retinanet_R-50-FPN_1x_adjust_std011.yaml | 47 ++ .../retinanet_R-50-FPN_1x_adjust_std100.yaml | 48 ++ .../retinanet_R-50-FPN_1x_adjustl1.yaml | 47 ++ .../retina/retinanet_R-50-FPN_1x_beta100.yaml | 48 ++ ...retinanet_R-50-FPN_1x_low_quality_0.2.yaml | 47 ++ ...retinanet_R-50-FPN_1x_low_quality_0.3.yaml | 47 ++ ...retinanet_R-50-FPN_1x_low_quality_0.4.yaml | 47 ++ .../retinanet_R-50-FPN_1x_no_low_quality.yaml | 47 ++ ...t_R-50-FPN_1x_no_low_quality_adjustl1.yaml | 48 ++ ...mask_R-101-FPN_1.5x_adjust_std011_400.yaml | 60 +++ ...mask_R-101-FPN_1.5x_adjust_std011_500.yaml | 61 +++ ...mask_R-101-FPN_1.5x_adjust_std011_600.yaml | 60 +++ ...mask_R-101-FPN_1.5x_adjust_std011_700.yaml | 60 +++ ...mask_R-101-FPN_1.5x_adjust_std011_800.yaml | 60 +++ ...et_mask_R-101-FPN_2x_adjust_std011_ms.yaml | 62 +++ ...mask_R-101-FPN_2x_adjust_std011_ms_gn.yaml | 61 +++ .../retina/retinanet_mask_R-50-FPN_1.5x.yaml | 58 +++ ..._mask_R-50-FPN_1.5x_adjust_std011_400.yaml | 60 +++ ..._mask_R-50-FPN_1.5x_adjust_std011_500.yaml | 60 +++ ..._mask_R-50-FPN_1.5x_adjust_std011_600.yaml | 60 +++ ..._mask_R-50-FPN_1.5x_adjust_std011_800.yaml | 60 +++ .../retina/retinanet_mask_R-50-FPN_1x.yaml | 58 +++ ...net_mask_R-50-FPN_2x_adjust_std011_ms.yaml | 59 +++ ...tinanet_mask_R-50-FPN_canonical5_1.5x.yaml | 59 +++ ...retinanet_mask_R-50-FPN_canonical5_1x.yaml | 59 +++ ...-101-32x8d_FPN_2x_adjust_std011_ms_gn.yaml | 66 +++ .../retinanet_mask_p2p7_R-50-FPN_1x.yaml | 57 +++ .../RetinaMask/maskrcnn_benchmark/__init__.py | 1 + .../maskrcnn_benchmark/config/__init__.py | 2 + .../maskrcnn_benchmark/config/defaults.py | 363 +++++++++++++++ .../config/paths_catalog.py | 109 +++++ .../maskrcnn_benchmark/csrc/ROIAlign.h | 46 ++ .../maskrcnn_benchmark/csrc/ROIPool.h | 48 ++ .../csrc/SigmoidFocalLoss.h | 41 ++ .../csrc/cpu/ROIAlign_cpu.cpp | 257 +++++++++++ .../csrc/cpu/nms_cpu.cpp.bak | 75 ++++ .../maskrcnn_benchmark/csrc/cpu/vision.h | 16 + .../csrc/cuda/ROIAlign_cuda.cu | 346 ++++++++++++++ .../csrc/cuda/ROIPool_cuda.cu | 202 +++++++++ .../csrc/cuda/SigmoidFocalLoss_cuda.cu | 188 ++++++++ .../maskrcnn_benchmark/csrc/cuda/nms.cu | 128 ++++++ .../maskrcnn_benchmark/csrc/cuda/vision.h | 63 +++ .../maskrcnn_benchmark/csrc/nms.h.bak | 28 ++ .../maskrcnn_benchmark/csrc/vision.cpp | 15 + .../maskrcnn_benchmark/data/__init__.py | 2 + .../maskrcnn_benchmark/data/build.py | 165 +++++++ .../maskrcnn_benchmark/data/collate_batch.py | 20 + .../data/datasets/__init__.py | 5 + .../maskrcnn_benchmark/data/datasets/coco.py | 84 ++++ .../data/datasets/concat_dataset.py | 23 + .../data/datasets/list_dataset.py | 36 ++ .../data/samplers/__init__.py | 6 + .../data/samplers/distributed.py | 67 +++ .../data/samplers/grouped_batch_sampler.py | 115 +++++ .../samplers/iteration_based_batch_sampler.py | 31 ++ .../data/transforms/__init__.py | 9 + .../data/transforms/build.py | 40 ++ .../data/transforms/transforms.py | 176 ++++++++ .../maskrcnn_benchmark/engine/__init__.py | 1 + .../maskrcnn_benchmark/engine/inference.py | 423 ++++++++++++++++++ .../maskrcnn_benchmark/engine/trainer.py | 106 +++++ .../maskrcnn_benchmark/layers/__init__.py | 19 + .../maskrcnn_benchmark/layers/_utils.py | 38 ++ .../layers/adjust_smooth_l1_loss.py | 42 ++ .../maskrcnn_benchmark/layers/batch_norm.py | 24 + .../maskrcnn_benchmark/layers/misc.py | 101 +++++ .../maskrcnn_benchmark/layers/nms.py | 41 ++ .../layers/npu_roi_align.py | 123 +++++ .../maskrcnn_benchmark/layers/roi_align.py | 67 +++ .../maskrcnn_benchmark/layers/roi_pool.py | 63 +++ .../layers/sigmoid_focal_loss.py | 95 ++++ .../layers/smooth_l1_loss.py | 30 ++ .../maskrcnn_benchmark/modeling/__init__.py | 0 .../modeling/backbone/__init__.py | 2 + .../modeling/backbone/backbone.py | 101 +++++ .../modeling/backbone/fpn.py | 115 +++++ .../modeling/backbone/resnet.py | 317 +++++++++++++ .../balanced_positive_negative_sampler.py | 68 +++ .../maskrcnn_benchmark/modeling/box_coder.py | 95 ++++ .../modeling/detector/__init__.py | 2 + .../modeling/detector/detectors.py | 11 + .../modeling/detector/generalized_rcnn.py | 68 +++ .../modeling/detector/retinanet.py | 113 +++++ .../maskrcnn_benchmark/modeling/matcher.py | 108 +++++ .../maskrcnn_benchmark/modeling/poolers.py | 130 ++++++ .../modeling/roi_heads/__init__.py | 0 .../modeling/roi_heads/box_head/__init__.py | 0 .../modeling/roi_heads/box_head/box_head.py | 69 +++ .../modeling/roi_heads/box_head/inference.py | 152 +++++++ .../modeling/roi_heads/box_head/loss.py | 177 ++++++++ .../box_head/roi_box_feature_extractors.py | 88 ++++ .../roi_heads/box_head/roi_box_predictors.py | 62 +++ .../modeling/roi_heads/mask_head/__init__.py | 0 .../modeling/roi_heads/mask_head/inference.py | 188 ++++++++ .../modeling/roi_heads/mask_head/loss.py | 144 ++++++ .../modeling/roi_heads/mask_head/mask_head.py | 103 +++++ .../mask_head/roi_mask_feature_extractors.py | 69 +++ .../mask_head/roi_mask_predictors.py | 44 ++ .../modeling/roi_heads/roi_heads.py | 54 +++ .../modeling/rpn/__init__.py | 2 + .../modeling/rpn/anchor_generator.py | 238 ++++++++++ .../modeling/rpn/inference.py | 200 +++++++++ .../maskrcnn_benchmark/modeling/rpn/loss.py | 151 +++++++ .../modeling/rpn/retinanet.py | 213 +++++++++ .../modeling/rpn/retinanet_detail_infer.py | 212 +++++++++ .../modeling/rpn/retinanet_infer.py | 238 ++++++++++ .../modeling/rpn/retinanet_loss.py | 156 +++++++ .../maskrcnn_benchmark/modeling/rpn/rpn.py | 139 ++++++ .../maskrcnn_benchmark/modeling/utils.py | 16 + .../maskrcnn_benchmark/solver/__init__.py | 4 + .../maskrcnn_benchmark/solver/build.py | 31 ++ .../maskrcnn_benchmark/solver/lr_scheduler.py | 52 +++ .../maskrcnn_benchmark/structures/__init__.py | 0 .../structures/bounding_box.py | 323 +++++++++++++ .../structures/boxlist_ops.py | 125 ++++++ .../structures/image_list.py | 72 +++ .../structures/segmentation_mask.py | 250 +++++++++++ .../maskrcnn_benchmark/utils/README.md | 5 + .../maskrcnn_benchmark/utils/__init__.py | 0 .../utils/c2_model_loading.py | 146 ++++++ .../maskrcnn_benchmark/utils/checkpoint.py | 138 ++++++ .../maskrcnn_benchmark/utils/collect_env.py | 14 + .../maskrcnn_benchmark/utils/comm.py | 148 ++++++ .../maskrcnn_benchmark/utils/env.py | 37 ++ .../maskrcnn_benchmark/utils/imports.py | 24 + .../maskrcnn_benchmark/utils/logger.py | 25 ++ .../maskrcnn_benchmark/utils/metric_logger.py | 63 +++ .../maskrcnn_benchmark/utils/miscellaneous.py | 11 + .../utils/model_serialization.py | 78 ++++ .../maskrcnn_benchmark/utils/model_zoo.py | 56 +++ .../contrib/cv/detection/RetinaMask/setup.py | 69 +++ .../cv/detection/RetinaMask/test/env_npu.sh | 77 ++++ .../RetinaMask/test/train_eval_1p.sh | 130 ++++++ .../RetinaMask/test/train_full_1p.sh | 130 ++++++ .../RetinaMask/test/train_full_8p.sh | 131 ++++++ .../RetinaMask/test/train_performance_1p.sh | 132 ++++++ .../RetinaMask/test/train_performance_8p.sh | 129 ++++++ .../detection/RetinaMask/tools/parse_log.py | 72 +++ .../cv/detection/RetinaMask/tools/test_net.py | 108 +++++ .../detection/RetinaMask/tools/train_net.py | 188 ++++++++ 169 files changed, 13232 insertions(+) create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/LICENSE create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/README.md create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/bind_pyt.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/configs/caffe2/e2e_faster_rcnn_R_101_FPN_1x_caffe2.yaml create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/configs/caffe2/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/configs/caffe2/e2e_faster_rcnn_R_50_FPN_1x_caffe2.yaml create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/configs/caffe2/e2e_faster_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/configs/caffe2/e2e_mask_rcnn_R_101_FPN_1x_caffe2.yaml create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/configs/caffe2/e2e_mask_rcnn_R_50_C4_1x_caffe2.yaml create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/configs/caffe2/e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/configs/caffe2/e2e_mask_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/configs/e2e_faster_rcnn_R_101_FPN_1x.yaml create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/configs/e2e_faster_rcnn_R_50_C4_1x.yaml create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/configs/e2e_faster_rcnn_R_50_FPN_1x.yaml create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/configs/e2e_faster_rcnn_X_101_32x8d_FPN_1x.yaml create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/configs/e2e_mask_rcnn_R_101_FPN_1x.yaml create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/configs/e2e_mask_rcnn_R_50_C4_1x.yaml create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/configs/e2e_mask_rcnn_R_50_FPN_1x.yaml create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/configs/e2e_mask_rcnn_X_101_32x8d_FPN_1x.yaml create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/configs/quick_schedules/e2e_faster_rcnn_R_50_C4_quick.yaml create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/configs/quick_schedules/e2e_faster_rcnn_R_50_FPN_quick.yaml create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/configs/quick_schedules/e2e_faster_rcnn_X_101_32x8d_FPN_quick.yaml create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/configs/quick_schedules/e2e_mask_rcnn_R_50_C4_quick.yaml create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/configs/quick_schedules/e2e_mask_rcnn_R_50_FPN_quick.yaml create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/configs/quick_schedules/e2e_mask_rcnn_X_101_32x8d_FPN_quick.yaml create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/configs/quick_schedules/rpn_R_50_C4_quick.yaml create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/configs/quick_schedules/rpn_R_50_FPN_quick.yaml create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_R-101-FPN_1x.yaml create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_R-50-FPN_1x.yaml create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_R-50-FPN_1x_adjust_std011.yaml create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_R-50-FPN_1x_adjust_std100.yaml create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_R-50-FPN_1x_adjustl1.yaml create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_R-50-FPN_1x_beta100.yaml create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_R-50-FPN_1x_low_quality_0.2.yaml create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_R-50-FPN_1x_low_quality_0.3.yaml create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_R-50-FPN_1x_low_quality_0.4.yaml create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_R-50-FPN_1x_no_low_quality.yaml create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_R-50-FPN_1x_no_low_quality_adjustl1.yaml create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_mask_R-101-FPN_1.5x_adjust_std011_400.yaml create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_mask_R-101-FPN_1.5x_adjust_std011_500.yaml create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_mask_R-101-FPN_1.5x_adjust_std011_600.yaml create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_mask_R-101-FPN_1.5x_adjust_std011_700.yaml create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_mask_R-101-FPN_1.5x_adjust_std011_800.yaml create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_mask_R-101-FPN_2x_adjust_std011_ms.yaml create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_mask_R-101-FPN_2x_adjust_std011_ms_gn.yaml create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_mask_R-50-FPN_1.5x.yaml create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_mask_R-50-FPN_1.5x_adjust_std011_400.yaml create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_mask_R-50-FPN_1.5x_adjust_std011_500.yaml create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_mask_R-50-FPN_1.5x_adjust_std011_600.yaml create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_mask_R-50-FPN_1.5x_adjust_std011_800.yaml create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_mask_R-50-FPN_1x.yaml create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_mask_R-50-FPN_2x_adjust_std011_ms.yaml create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_mask_R-50-FPN_canonical5_1.5x.yaml create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_mask_R-50-FPN_canonical5_1x.yaml create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_mask_X-101-32x8d_FPN_2x_adjust_std011_ms_gn.yaml create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_mask_p2p7_R-50-FPN_1x.yaml create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/__init__.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/config/__init__.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/config/defaults.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/config/paths_catalog.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/ROIAlign.h create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/ROIPool.h create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/SigmoidFocalLoss.h create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/cpu/ROIAlign_cpu.cpp create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/cpu/nms_cpu.cpp.bak create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/cpu/vision.h create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/cuda/ROIAlign_cuda.cu create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/cuda/ROIPool_cuda.cu create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/cuda/SigmoidFocalLoss_cuda.cu create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/cuda/nms.cu create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/cuda/vision.h create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/nms.h.bak create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/vision.cpp create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/data/__init__.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/data/build.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/data/collate_batch.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/data/datasets/__init__.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/data/datasets/coco.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/data/datasets/concat_dataset.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/data/datasets/list_dataset.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/data/samplers/__init__.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/data/samplers/distributed.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/data/samplers/grouped_batch_sampler.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/data/samplers/iteration_based_batch_sampler.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/data/transforms/__init__.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/data/transforms/build.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/data/transforms/transforms.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/engine/__init__.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/engine/inference.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/engine/trainer.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/__init__.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/_utils.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/adjust_smooth_l1_loss.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/batch_norm.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/misc.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/nms.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/npu_roi_align.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/roi_align.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/roi_pool.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/sigmoid_focal_loss.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/smooth_l1_loss.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/__init__.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/backbone/__init__.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/backbone/backbone.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/backbone/fpn.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/backbone/resnet.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/balanced_positive_negative_sampler.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/box_coder.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/detector/__init__.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/detector/detectors.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/detector/generalized_rcnn.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/detector/retinanet.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/matcher.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/poolers.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/roi_heads/__init__.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/roi_heads/box_head/__init__.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/roi_heads/box_head/box_head.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/roi_heads/box_head/inference.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/roi_heads/box_head/loss.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/roi_heads/box_head/roi_box_feature_extractors.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/roi_heads/box_head/roi_box_predictors.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/roi_heads/mask_head/__init__.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/roi_heads/mask_head/inference.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/roi_heads/mask_head/loss.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/roi_heads/mask_head/mask_head.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/roi_heads/mask_head/roi_mask_feature_extractors.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/roi_heads/mask_head/roi_mask_predictors.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/roi_heads/roi_heads.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/rpn/__init__.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/rpn/anchor_generator.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/rpn/inference.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/rpn/loss.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/rpn/retinanet.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/rpn/retinanet_detail_infer.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/rpn/retinanet_infer.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/rpn/retinanet_loss.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/rpn/rpn.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/utils.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/solver/__init__.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/solver/build.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/solver/lr_scheduler.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/structures/__init__.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/structures/bounding_box.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/structures/boxlist_ops.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/structures/image_list.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/structures/segmentation_mask.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/utils/README.md create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/utils/__init__.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/utils/c2_model_loading.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/utils/checkpoint.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/utils/collect_env.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/utils/comm.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/utils/env.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/utils/imports.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/utils/logger.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/utils/metric_logger.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/utils/miscellaneous.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/utils/model_serialization.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/utils/model_zoo.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/setup.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/test/env_npu.sh create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/test/train_eval_1p.sh create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/test/train_full_1p.sh create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/test/train_full_8p.sh create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/test/train_performance_1p.sh create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/test/train_performance_8p.sh create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/tools/parse_log.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/tools/test_net.py create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/tools/train_net.py diff --git a/PyTorch/contrib/cv/detection/RetinaMask/LICENSE b/PyTorch/contrib/cv/detection/RetinaMask/LICENSE new file mode 100644 index 0000000000..56ee3c8c4c --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/RetinaMask/README.md b/PyTorch/contrib/cv/detection/RetinaMask/README.md new file mode 100644 index 0000000000..70f512848c --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/README.md @@ -0,0 +1,54 @@ +## Requirements + +- Part of the requirements: + +``` +apex==0.1+ascend.20220315 +torch==1.5.0+ascend.post5.20220315 +torchvision==0.2.1 +``` + +- Build for maskrcnn-benchmark: + +``` +pyhton3.7 setup.py build develop +``` + + + +## Training + +- To train: + +``` +# 1p train full +bash test/train_full_1p.sh --data_path=xxx + +# 1p train perf +bash test/train_performance_1p.sh --data_path=xxx + +# 8p train full +bash test/train_full_8p.sh --data_path=xxx + +# 8p train perf +bash test/train_performance_8p.sh --data_path=xxx +``` + +- To evaluate: + +``` +bash test/train_eval_1p.sh --data_path=xxx --weight_path=./model_0044999.pth # for example +``` + + + +## Result + +1p batch_size == 8,8p batch_size == 64 + +| NAME | Steps | BBOX-MAP | SEGM-MAP | FPS | +| :-----------------: | :---: | :------: | :------: | :--: | +| GPU-1p(@NV-T4,bs=4) | 90000 | 26.9 | 23.3 | 2.6 | +| GPU-8p | 20000 | 29.0 | 25.7 | 55.1 | +| NPU-1p | 400 | - | - | 31.9 | +| NPU-8p | 20000 | 28.8 | 25.7 | 33.3 | diff --git a/PyTorch/contrib/cv/detection/RetinaMask/bind_pyt.py b/PyTorch/contrib/cv/detection/RetinaMask/bind_pyt.py new file mode 100644 index 0000000000..ba41a0d1fa --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/bind_pyt.py @@ -0,0 +1,136 @@ +# Copyright (c) 2019-2021 NVIDIA CORPORATION. All rights reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import subprocess +import os +from argparse import ArgumentParser, REMAINDER + + +def parse_args(): + """ + Helper function parsing the command line options + @retval ArgumentParser + """ + parser = ArgumentParser(description="PyTorch distributed training launch " + "helper utilty that will spawn up " + "multiple distributed processes") + + # Optional arguments for the launch helper + parser.add_argument("--nnodes", type=int, default=1, + help="The number of nodes to use for distributed " + "training") + parser.add_argument("--node_rank", type=int, default=0, + help="The rank of the node for multi-node distributed " + "training") + parser.add_argument("--nproc_per_node", type=int, default=8, + help="The number of processes to launch on each node, " + "for GPU training, this is recommended to be set " + "to the number of GPUs in your system so that " + "each process can be bound to a single GPU.") + parser.add_argument("--master_addr", default="127.0.0.1", type=str, + help="Master node (rank 0)'s address, should be either " + "the IP address or the hostname of node 0, for " + "single node multi-proc training, the " + "--master_addr can simply be 127.0.0.1") + parser.add_argument("--master_port", default=29688, type=int, + help="Master node (rank 0)'s free port that needs to " + "be used for communciation during distributed " + "training") + parser.add_argument('--no_hyperthreads', action='store_true', + help='Flag to disable binding to hyperthreads') + parser.add_argument('--no_membind', action='store_true', + help='Flag to disable memory binding') + + # non-optional arguments for binding + parser.add_argument("--nsockets_per_node", type=int, required=True, + help="Number of CPU sockets on a node") + parser.add_argument("--ncores_per_socket", type=int, required=True, + help="Number of CPU cores per socket") + + # positional + parser.add_argument("training_script", type=str, + help="The full path to the single GPU training " + "program/script to be launched in parallel, " + "followed by all the arguments for the " + "training script") + + # rest from the training program + parser.add_argument("--data_path", type=str, default='') + parser.add_argument('training_script_args', nargs=REMAINDER) + return parser.parse_args() + + +def main(): + args = parse_args() + + # variables for numactrl binding + + NSOCKETS = args.nsockets_per_node + NGPUS_PER_SOCKET = (args.nproc_per_node // args.nsockets_per_node) + ( + 1 if (args.nproc_per_node % args.nsockets_per_node) else 0) + NCORES_PER_GPU = args.ncores_per_socket // NGPUS_PER_SOCKET + + # world size in terms of number of processes + dist_world_size = args.nproc_per_node * args.nnodes + + # set PyTorch distributed related environmental variables + current_env = os.environ.copy() + current_env["MASTER_ADDR"] = args.master_addr + current_env["MASTER_PORT"] = str(args.master_port) + current_env["WORLD_SIZE"] = str(dist_world_size) + current_env['NODE_RANK'] = str(args.node_rank) + + processes = [] + + for local_rank in range(0, args.nproc_per_node): + # each process's rank + dist_rank = args.nproc_per_node * args.node_rank + local_rank + current_env["RANK"] = str(dist_rank) + current_env['LOCAL_RANK'] = str(local_rank) + + # form numactrl binding command + cpu_ranges = [local_rank * NCORES_PER_GPU, + (local_rank + 1) * NCORES_PER_GPU - 1, + local_rank * NCORES_PER_GPU + (NCORES_PER_GPU * NGPUS_PER_SOCKET * NSOCKETS), + (local_rank + 1) * NCORES_PER_GPU + (NCORES_PER_GPU * NGPUS_PER_SOCKET * NSOCKETS) - 1] + + numactlargs = [] + if args.no_hyperthreads: + numactlargs += ["--physcpubind={}-{}".format(*cpu_ranges[0:2])] + else: + numactlargs += ["--physcpubind={}-{},{}-{}".format(*cpu_ranges)] + + if not args.no_membind: + memnode = local_rank // NGPUS_PER_SOCKET + numactlargs += ["--membind={}".format(memnode)] + + # spawn the processes + cmd = ["/usr/bin/numactl"] \ + + numactlargs \ + + [sys.executable, + "-u", + args.training_script, + "--local_rank={}".format(local_rank) + ] \ + + args.training_script_args + + process = subprocess.Popen(cmd, env=current_env) + processes.append(process) + + for process in processes: + process.wait() + + +if __name__ == "__main__": + main() diff --git a/PyTorch/contrib/cv/detection/RetinaMask/configs/caffe2/e2e_faster_rcnn_R_101_FPN_1x_caffe2.yaml b/PyTorch/contrib/cv/detection/RetinaMask/configs/caffe2/e2e_faster_rcnn_R_101_FPN_1x_caffe2.yaml new file mode 100644 index 0000000000..e129ac8855 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/configs/caffe2/e2e_faster_rcnn_R_101_FPN_1x_caffe2.yaml @@ -0,0 +1,25 @@ +MODEL: + META_ARCHITECTURE: "GeneralizedRCNN" + WEIGHT: "catalog://Caffe2Detectron/COCO/35857890/e2e_faster_rcnn_R-101-FPN_1x" + BACKBONE: + CONV_BODY: "R-101-FPN" + OUT_CHANNELS: 256 + RPN: + USE_FPN: True + ANCHOR_STRIDE: (4, 8, 16, 32, 64) + PRE_NMS_TOP_N_TRAIN: 2000 + PRE_NMS_TOP_N_TEST: 1000 + POST_NMS_TOP_N_TEST: 1000 + FPN_POST_NMS_TOP_N_TEST: 1000 + ROI_HEADS: + USE_FPN: True + ROI_BOX_HEAD: + POOLER_RESOLUTION: 7 + POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) + POOLER_SAMPLING_RATIO: 2 + FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" + PREDICTOR: "FPNPredictor" +DATASETS: + TEST: ("coco_2014_minival",) +DATALOADER: + SIZE_DIVISIBILITY: 32 diff --git a/PyTorch/contrib/cv/detection/RetinaMask/configs/caffe2/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml b/PyTorch/contrib/cv/detection/RetinaMask/configs/caffe2/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml new file mode 100644 index 0000000000..393defe7ff --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/configs/caffe2/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml @@ -0,0 +1,5 @@ +MODEL: + META_ARCHITECTURE: "GeneralizedRCNN" + WEIGHT: "catalog://Caffe2Detectron/COCO/35857197/e2e_faster_rcnn_R-50-C4_1x" +DATASETS: + TEST: ("coco_2014_minival",) diff --git a/PyTorch/contrib/cv/detection/RetinaMask/configs/caffe2/e2e_faster_rcnn_R_50_FPN_1x_caffe2.yaml b/PyTorch/contrib/cv/detection/RetinaMask/configs/caffe2/e2e_faster_rcnn_R_50_FPN_1x_caffe2.yaml new file mode 100644 index 0000000000..180d737a63 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/configs/caffe2/e2e_faster_rcnn_R_50_FPN_1x_caffe2.yaml @@ -0,0 +1,25 @@ +MODEL: + META_ARCHITECTURE: "GeneralizedRCNN" + WEIGHT: "catalog://Caffe2Detectron/COCO/35857345/e2e_faster_rcnn_R-50-FPN_1x" + BACKBONE: + CONV_BODY: "R-50-FPN" + OUT_CHANNELS: 256 + RPN: + USE_FPN: True + ANCHOR_STRIDE: (4, 8, 16, 32, 64) + PRE_NMS_TOP_N_TRAIN: 2000 + PRE_NMS_TOP_N_TEST: 1000 + POST_NMS_TOP_N_TEST: 1000 + FPN_POST_NMS_TOP_N_TEST: 1000 + ROI_HEADS: + USE_FPN: True + ROI_BOX_HEAD: + POOLER_RESOLUTION: 7 + POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) + POOLER_SAMPLING_RATIO: 2 + FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" + PREDICTOR: "FPNPredictor" +DATASETS: + TEST: ("coco_2014_minival",) +DATALOADER: + SIZE_DIVISIBILITY: 32 diff --git a/PyTorch/contrib/cv/detection/RetinaMask/configs/caffe2/e2e_faster_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml b/PyTorch/contrib/cv/detection/RetinaMask/configs/caffe2/e2e_faster_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml new file mode 100644 index 0000000000..166a2ea0e4 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/configs/caffe2/e2e_faster_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml @@ -0,0 +1,29 @@ +MODEL: + META_ARCHITECTURE: "GeneralizedRCNN" + WEIGHT: "catalog://Caffe2Detectron/COCO/36761737/e2e_faster_rcnn_X-101-32x8d-FPN_1x" + BACKBONE: + CONV_BODY: "R-101-FPN" + OUT_CHANNELS: 256 + RPN: + USE_FPN: True + ANCHOR_STRIDE: (4, 8, 16, 32, 64) + PRE_NMS_TOP_N_TRAIN: 2000 + PRE_NMS_TOP_N_TEST: 1000 + POST_NMS_TOP_N_TEST: 1000 + FPN_POST_NMS_TOP_N_TEST: 1000 + ROI_HEADS: + USE_FPN: True + ROI_BOX_HEAD: + POOLER_RESOLUTION: 7 + POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) + POOLER_SAMPLING_RATIO: 2 + FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" + PREDICTOR: "FPNPredictor" + RESNETS: + STRIDE_IN_1X1: False + NUM_GROUPS: 32 + WIDTH_PER_GROUP: 8 +DATASETS: + TEST: ("coco_2014_minival",) +DATALOADER: + SIZE_DIVISIBILITY: 32 diff --git a/PyTorch/contrib/cv/detection/RetinaMask/configs/caffe2/e2e_mask_rcnn_R_101_FPN_1x_caffe2.yaml b/PyTorch/contrib/cv/detection/RetinaMask/configs/caffe2/e2e_mask_rcnn_R_101_FPN_1x_caffe2.yaml new file mode 100644 index 0000000000..57da8e8cce --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/configs/caffe2/e2e_mask_rcnn_R_101_FPN_1x_caffe2.yaml @@ -0,0 +1,34 @@ +MODEL: + META_ARCHITECTURE: "GeneralizedRCNN" + WEIGHT: "catalog://Caffe2Detectron/COCO/35861795/e2e_mask_rcnn_R-101-FPN_1x" + BACKBONE: + CONV_BODY: "R-101-FPN" + OUT_CHANNELS: 256 + RPN: + USE_FPN: True + ANCHOR_STRIDE: (4, 8, 16, 32, 64) + PRE_NMS_TOP_N_TRAIN: 2000 + PRE_NMS_TOP_N_TEST: 1000 + POST_NMS_TOP_N_TEST: 1000 + FPN_POST_NMS_TOP_N_TEST: 1000 + ROI_HEADS: + USE_FPN: True + ROI_BOX_HEAD: + POOLER_RESOLUTION: 7 + POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) + POOLER_SAMPLING_RATIO: 2 + FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" + PREDICTOR: "FPNPredictor" + ROI_MASK_HEAD: + POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) + FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" + PREDICTOR: "MaskRCNNC4Predictor" + POOLER_RESOLUTION: 14 + POOLER_SAMPLING_RATIO: 2 + RESOLUTION: 28 + SHARE_BOX_FEATURE_EXTRACTOR: False + MASK_ON: True +DATASETS: + TEST: ("coco_2014_minival",) +DATALOADER: + SIZE_DIVISIBILITY: 32 diff --git a/PyTorch/contrib/cv/detection/RetinaMask/configs/caffe2/e2e_mask_rcnn_R_50_C4_1x_caffe2.yaml b/PyTorch/contrib/cv/detection/RetinaMask/configs/caffe2/e2e_mask_rcnn_R_50_C4_1x_caffe2.yaml new file mode 100644 index 0000000000..d1d0572f8a --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/configs/caffe2/e2e_mask_rcnn_R_50_C4_1x_caffe2.yaml @@ -0,0 +1,9 @@ +MODEL: + META_ARCHITECTURE: "GeneralizedRCNN" + WEIGHT: "catalog://Caffe2Detectron/COCO/35858791/e2e_mask_rcnn_R-50-C4_1x" + ROI_MASK_HEAD: + PREDICTOR: "MaskRCNNC4Predictor" + SHARE_BOX_FEATURE_EXTRACTOR: True + MASK_ON: True +DATASETS: + TEST: ("coco_2014_minival",) diff --git a/PyTorch/contrib/cv/detection/RetinaMask/configs/caffe2/e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml b/PyTorch/contrib/cv/detection/RetinaMask/configs/caffe2/e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml new file mode 100644 index 0000000000..f0e675df57 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/configs/caffe2/e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml @@ -0,0 +1,34 @@ +MODEL: + META_ARCHITECTURE: "GeneralizedRCNN" + WEIGHT: "catalog://Caffe2Detectron/COCO/35858933/e2e_mask_rcnn_R-50-FPN_1x" + BACKBONE: + CONV_BODY: "R-50-FPN" + OUT_CHANNELS: 256 + RPN: + USE_FPN: True + ANCHOR_STRIDE: (4, 8, 16, 32, 64) + PRE_NMS_TOP_N_TRAIN: 2000 + PRE_NMS_TOP_N_TEST: 1000 + POST_NMS_TOP_N_TEST: 1000 + FPN_POST_NMS_TOP_N_TEST: 1000 + ROI_HEADS: + USE_FPN: True + ROI_BOX_HEAD: + POOLER_RESOLUTION: 7 + POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) + POOLER_SAMPLING_RATIO: 2 + FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" + PREDICTOR: "FPNPredictor" + ROI_MASK_HEAD: + POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) + FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" + PREDICTOR: "MaskRCNNC4Predictor" + POOLER_RESOLUTION: 14 + POOLER_SAMPLING_RATIO: 2 + RESOLUTION: 28 + SHARE_BOX_FEATURE_EXTRACTOR: False + MASK_ON: True +DATASETS: + TEST: ("coco_2014_minival",) +DATALOADER: + SIZE_DIVISIBILITY: 32 diff --git a/PyTorch/contrib/cv/detection/RetinaMask/configs/caffe2/e2e_mask_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml b/PyTorch/contrib/cv/detection/RetinaMask/configs/caffe2/e2e_mask_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml new file mode 100644 index 0000000000..c97b940738 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/configs/caffe2/e2e_mask_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml @@ -0,0 +1,38 @@ +MODEL: + META_ARCHITECTURE: "GeneralizedRCNN" + WEIGHT: "catalog://Caffe2Detectron/COCO/36761843/e2e_mask_rcnn_X-101-32x8d-FPN_1x" + BACKBONE: + CONV_BODY: "R-101-FPN" + OUT_CHANNELS: 256 + RPN: + USE_FPN: True + ANCHOR_STRIDE: (4, 8, 16, 32, 64) + PRE_NMS_TOP_N_TRAIN: 2000 + PRE_NMS_TOP_N_TEST: 1000 + POST_NMS_TOP_N_TEST: 1000 + FPN_POST_NMS_TOP_N_TEST: 1000 + ROI_HEADS: + USE_FPN: True + ROI_BOX_HEAD: + POOLER_RESOLUTION: 7 + POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) + POOLER_SAMPLING_RATIO: 2 + FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" + PREDICTOR: "FPNPredictor" + ROI_MASK_HEAD: + POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) + FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" + PREDICTOR: "MaskRCNNC4Predictor" + POOLER_RESOLUTION: 14 + POOLER_SAMPLING_RATIO: 2 + RESOLUTION: 28 + SHARE_BOX_FEATURE_EXTRACTOR: False + RESNETS: + STRIDE_IN_1X1: False + NUM_GROUPS: 32 + WIDTH_PER_GROUP: 8 + MASK_ON: True +DATASETS: + TEST: ("coco_2014_minival",) +DATALOADER: + SIZE_DIVISIBILITY: 32 diff --git a/PyTorch/contrib/cv/detection/RetinaMask/configs/e2e_faster_rcnn_R_101_FPN_1x.yaml b/PyTorch/contrib/cv/detection/RetinaMask/configs/e2e_faster_rcnn_R_101_FPN_1x.yaml new file mode 100644 index 0000000000..45b07e06da --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/configs/e2e_faster_rcnn_R_101_FPN_1x.yaml @@ -0,0 +1,31 @@ +MODEL: + META_ARCHITECTURE: "GeneralizedRCNN" + WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" + BACKBONE: + CONV_BODY: "R-101-FPN" + OUT_CHANNELS: 256 + RPN: + USE_FPN: True + ANCHOR_STRIDE: (4, 8, 16, 32, 64) + PRE_NMS_TOP_N_TRAIN: 2000 + PRE_NMS_TOP_N_TEST: 1000 + POST_NMS_TOP_N_TEST: 1000 + FPN_POST_NMS_TOP_N_TEST: 1000 + ROI_HEADS: + USE_FPN: True + ROI_BOX_HEAD: + POOLER_RESOLUTION: 7 + POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) + POOLER_SAMPLING_RATIO: 2 + FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" + PREDICTOR: "FPNPredictor" +DATASETS: + TRAIN: ("coco_2014_train", "coco_2014_valminusminival") + TEST: ("coco_2014_minival",) +DATALOADER: + SIZE_DIVISIBILITY: 32 +SOLVER: + BASE_LR: 0.02 + WEIGHT_DECAY: 0.0001 + STEPS: (60000, 80000) + MAX_ITER: 90000 diff --git a/PyTorch/contrib/cv/detection/RetinaMask/configs/e2e_faster_rcnn_R_50_C4_1x.yaml b/PyTorch/contrib/cv/detection/RetinaMask/configs/e2e_faster_rcnn_R_50_C4_1x.yaml new file mode 100644 index 0000000000..5cec224a0b --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/configs/e2e_faster_rcnn_R_50_C4_1x.yaml @@ -0,0 +1,15 @@ +MODEL: + META_ARCHITECTURE: "GeneralizedRCNN" + WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" + RPN: + PRE_NMS_TOP_N_TEST: 6000 + POST_NMS_TOP_N_TEST: 1000 +DATASETS: + TRAIN: ("coco_2014_train", "coco_2014_valminusminival") + TEST: ("coco_2014_minival",) +SOLVER: + BASE_LR: 0.01 + WEIGHT_DECAY: 0.0001 + STEPS: (120000, 160000) + MAX_ITER: 180000 + IMS_PER_BATCH: 8 diff --git a/PyTorch/contrib/cv/detection/RetinaMask/configs/e2e_faster_rcnn_R_50_FPN_1x.yaml b/PyTorch/contrib/cv/detection/RetinaMask/configs/e2e_faster_rcnn_R_50_FPN_1x.yaml new file mode 100644 index 0000000000..267a12c13c --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/configs/e2e_faster_rcnn_R_50_FPN_1x.yaml @@ -0,0 +1,31 @@ +MODEL: + META_ARCHITECTURE: "GeneralizedRCNN" + WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" + BACKBONE: + CONV_BODY: "R-50-FPN" + OUT_CHANNELS: 256 + RPN: + USE_FPN: True + ANCHOR_STRIDE: (4, 8, 16, 32, 64) + PRE_NMS_TOP_N_TRAIN: 2000 + PRE_NMS_TOP_N_TEST: 1000 + POST_NMS_TOP_N_TEST: 1000 + FPN_POST_NMS_TOP_N_TEST: 1000 + ROI_HEADS: + USE_FPN: True + ROI_BOX_HEAD: + POOLER_RESOLUTION: 7 + POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) + POOLER_SAMPLING_RATIO: 2 + FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" + PREDICTOR: "FPNPredictor" +DATASETS: + TRAIN: ("coco_2014_train", "coco_2014_valminusminival") + TEST: ("coco_2014_minival",) +DATALOADER: + SIZE_DIVISIBILITY: 32 +SOLVER: + BASE_LR: 0.02 + WEIGHT_DECAY: 0.0001 + STEPS: (60000, 80000) + MAX_ITER: 90000 diff --git a/PyTorch/contrib/cv/detection/RetinaMask/configs/e2e_faster_rcnn_X_101_32x8d_FPN_1x.yaml b/PyTorch/contrib/cv/detection/RetinaMask/configs/e2e_faster_rcnn_X_101_32x8d_FPN_1x.yaml new file mode 100644 index 0000000000..9338c87676 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/configs/e2e_faster_rcnn_X_101_32x8d_FPN_1x.yaml @@ -0,0 +1,36 @@ +MODEL: + META_ARCHITECTURE: "GeneralizedRCNN" + WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d" + BACKBONE: + CONV_BODY: "R-101-FPN" + OUT_CHANNELS: 256 + RPN: + USE_FPN: True + ANCHOR_STRIDE: (4, 8, 16, 32, 64) + PRE_NMS_TOP_N_TRAIN: 2000 + PRE_NMS_TOP_N_TEST: 1000 + POST_NMS_TOP_N_TEST: 1000 + FPN_POST_NMS_TOP_N_TEST: 1000 + ROI_HEADS: + USE_FPN: True + ROI_BOX_HEAD: + POOLER_RESOLUTION: 7 + POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) + POOLER_SAMPLING_RATIO: 2 + FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" + PREDICTOR: "FPNPredictor" + RESNETS: + STRIDE_IN_1X1: False + NUM_GROUPS: 32 + WIDTH_PER_GROUP: 8 +DATASETS: + TRAIN: ("coco_2014_train", "coco_2014_valminusminival") + TEST: ("coco_2014_minival",) +DATALOADER: + SIZE_DIVISIBILITY: 32 +SOLVER: + BASE_LR: 0.01 + WEIGHT_DECAY: 0.0001 + STEPS: (120000, 160000) + MAX_ITER: 180000 + IMS_PER_BATCH: 8 diff --git a/PyTorch/contrib/cv/detection/RetinaMask/configs/e2e_mask_rcnn_R_101_FPN_1x.yaml b/PyTorch/contrib/cv/detection/RetinaMask/configs/e2e_mask_rcnn_R_101_FPN_1x.yaml new file mode 100644 index 0000000000..c2da8f3775 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/configs/e2e_mask_rcnn_R_101_FPN_1x.yaml @@ -0,0 +1,40 @@ +MODEL: + META_ARCHITECTURE: "GeneralizedRCNN" + WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" + BACKBONE: + CONV_BODY: "R-101-FPN" + OUT_CHANNELS: 256 + RPN: + USE_FPN: True + ANCHOR_STRIDE: (4, 8, 16, 32, 64) + PRE_NMS_TOP_N_TRAIN: 2000 + PRE_NMS_TOP_N_TEST: 1000 + POST_NMS_TOP_N_TEST: 1000 + FPN_POST_NMS_TOP_N_TEST: 1000 + ROI_HEADS: + USE_FPN: True + ROI_BOX_HEAD: + POOLER_RESOLUTION: 7 + POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) + POOLER_SAMPLING_RATIO: 2 + FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" + PREDICTOR: "FPNPredictor" + ROI_MASK_HEAD: + POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) + FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" + PREDICTOR: "MaskRCNNC4Predictor" + POOLER_RESOLUTION: 14 + POOLER_SAMPLING_RATIO: 2 + RESOLUTION: 28 + SHARE_BOX_FEATURE_EXTRACTOR: False + MASK_ON: True +DATASETS: + TRAIN: ("coco_2014_train", "coco_2014_valminusminival") + TEST: ("coco_2014_minival",) +DATALOADER: + SIZE_DIVISIBILITY: 32 +SOLVER: + BASE_LR: 0.02 + WEIGHT_DECAY: 0.0001 + STEPS: (60000, 80000) + MAX_ITER: 90000 diff --git a/PyTorch/contrib/cv/detection/RetinaMask/configs/e2e_mask_rcnn_R_50_C4_1x.yaml b/PyTorch/contrib/cv/detection/RetinaMask/configs/e2e_mask_rcnn_R_50_C4_1x.yaml new file mode 100644 index 0000000000..bfcd25866f --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/configs/e2e_mask_rcnn_R_50_C4_1x.yaml @@ -0,0 +1,19 @@ +MODEL: + META_ARCHITECTURE: "GeneralizedRCNN" + WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" + RPN: + PRE_NMS_TOP_N_TEST: 6000 + POST_NMS_TOP_N_TEST: 1000 + ROI_MASK_HEAD: + PREDICTOR: "MaskRCNNC4Predictor" + SHARE_BOX_FEATURE_EXTRACTOR: True + MASK_ON: True +DATASETS: + TRAIN: ("coco_2014_train", "coco_2014_valminusminival") + TEST: ("coco_2014_minival",) +SOLVER: + BASE_LR: 0.01 + WEIGHT_DECAY: 0.0001 + STEPS: (120000, 160000) + MAX_ITER: 180000 + IMS_PER_BATCH: 8 diff --git a/PyTorch/contrib/cv/detection/RetinaMask/configs/e2e_mask_rcnn_R_50_FPN_1x.yaml b/PyTorch/contrib/cv/detection/RetinaMask/configs/e2e_mask_rcnn_R_50_FPN_1x.yaml new file mode 100644 index 0000000000..176e66069e --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/configs/e2e_mask_rcnn_R_50_FPN_1x.yaml @@ -0,0 +1,40 @@ +MODEL: + META_ARCHITECTURE: "GeneralizedRCNN" + WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" + BACKBONE: + CONV_BODY: "R-50-FPN" + OUT_CHANNELS: 256 + RPN: + USE_FPN: True + ANCHOR_STRIDE: (4, 8, 16, 32, 64) + PRE_NMS_TOP_N_TRAIN: 2000 + PRE_NMS_TOP_N_TEST: 1000 + POST_NMS_TOP_N_TEST: 1000 + FPN_POST_NMS_TOP_N_TEST: 1000 + ROI_HEADS: + USE_FPN: True + ROI_BOX_HEAD: + POOLER_RESOLUTION: 7 + POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) + POOLER_SAMPLING_RATIO: 2 + FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" + PREDICTOR: "FPNPredictor" + ROI_MASK_HEAD: + POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) + FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" + PREDICTOR: "MaskRCNNC4Predictor" + POOLER_RESOLUTION: 14 + POOLER_SAMPLING_RATIO: 2 + RESOLUTION: 28 + SHARE_BOX_FEATURE_EXTRACTOR: False + MASK_ON: True +DATASETS: + TRAIN: ("coco_2014_train", "coco_2014_valminusminival") + TEST: ("coco_2014_minival",) +DATALOADER: + SIZE_DIVISIBILITY: 32 +SOLVER: + BASE_LR: 0.02 + WEIGHT_DECAY: 0.0001 + STEPS: (60000, 80000) + MAX_ITER: 90000 diff --git a/PyTorch/contrib/cv/detection/RetinaMask/configs/e2e_mask_rcnn_X_101_32x8d_FPN_1x.yaml b/PyTorch/contrib/cv/detection/RetinaMask/configs/e2e_mask_rcnn_X_101_32x8d_FPN_1x.yaml new file mode 100644 index 0000000000..4204419be1 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/configs/e2e_mask_rcnn_X_101_32x8d_FPN_1x.yaml @@ -0,0 +1,45 @@ +MODEL: + META_ARCHITECTURE: "GeneralizedRCNN" + WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d" + BACKBONE: + CONV_BODY: "R-101-FPN" + OUT_CHANNELS: 256 + RPN: + USE_FPN: True + ANCHOR_STRIDE: (4, 8, 16, 32, 64) + PRE_NMS_TOP_N_TRAIN: 2000 + PRE_NMS_TOP_N_TEST: 1000 + POST_NMS_TOP_N_TEST: 1000 + FPN_POST_NMS_TOP_N_TEST: 1000 + ROI_HEADS: + USE_FPN: True + ROI_BOX_HEAD: + POOLER_RESOLUTION: 7 + POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) + POOLER_SAMPLING_RATIO: 2 + FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" + PREDICTOR: "FPNPredictor" + ROI_MASK_HEAD: + POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) + FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" + PREDICTOR: "MaskRCNNC4Predictor" + POOLER_RESOLUTION: 14 + POOLER_SAMPLING_RATIO: 2 + RESOLUTION: 28 + SHARE_BOX_FEATURE_EXTRACTOR: False + RESNETS: + STRIDE_IN_1X1: False + NUM_GROUPS: 32 + WIDTH_PER_GROUP: 8 + MASK_ON: True +DATASETS: + TRAIN: ("coco_2014_train", "coco_2014_valminusminival") + TEST: ("coco_2014_minival",) +DATALOADER: + SIZE_DIVISIBILITY: 32 +SOLVER: + BASE_LR: 0.01 + WEIGHT_DECAY: 0.0001 + STEPS: (120000, 160000) + MAX_ITER: 180000 + IMS_PER_BATCH: 8 diff --git a/PyTorch/contrib/cv/detection/RetinaMask/configs/quick_schedules/e2e_faster_rcnn_R_50_C4_quick.yaml b/PyTorch/contrib/cv/detection/RetinaMask/configs/quick_schedules/e2e_faster_rcnn_R_50_C4_quick.yaml new file mode 100644 index 0000000000..d5eae44578 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/configs/quick_schedules/e2e_faster_rcnn_R_50_C4_quick.yaml @@ -0,0 +1,24 @@ +MODEL: + META_ARCHITECTURE: "GeneralizedRCNN" + WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" + RPN: + PRE_NMS_TOP_N_TEST: 6000 + POST_NMS_TOP_N_TEST: 1000 + ROI_HEADS: + BATCH_SIZE_PER_IMAGE: 256 +DATASETS: + TRAIN: ("coco_2014_minival",) + TEST: ("coco_2014_minival",) +INPUT: + MIN_SIZE_TRAIN: 600 + MAX_SIZE_TRAIN: 1000 + MIN_SIZE_TEST: 800 + MAX_SIZE_TEST: 1000 +SOLVER: + BASE_LR: 0.005 + WEIGHT_DECAY: 0.0001 + STEPS: (1500,) + MAX_ITER: 2000 + IMS_PER_BATCH: 2 +TEST: + IMS_PER_BATCH: 2 diff --git a/PyTorch/contrib/cv/detection/RetinaMask/configs/quick_schedules/e2e_faster_rcnn_R_50_FPN_quick.yaml b/PyTorch/contrib/cv/detection/RetinaMask/configs/quick_schedules/e2e_faster_rcnn_R_50_FPN_quick.yaml new file mode 100644 index 0000000000..f69d029f34 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/configs/quick_schedules/e2e_faster_rcnn_R_50_FPN_quick.yaml @@ -0,0 +1,40 @@ +MODEL: + META_ARCHITECTURE: "GeneralizedRCNN" + WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" + BACKBONE: + CONV_BODY: "R-50-FPN" + OUT_CHANNELS: 256 + RPN: + USE_FPN: True + ANCHOR_STRIDE: (4, 8, 16, 32, 64) + PRE_NMS_TOP_N_TRAIN: 2000 + PRE_NMS_TOP_N_TEST: 1000 + POST_NMS_TOP_N_TEST: 1000 + FPN_POST_NMS_TOP_N_TEST: 1000 + ROI_HEADS: + USE_FPN: True + BATCH_SIZE_PER_IMAGE: 256 + ROI_BOX_HEAD: + POOLER_RESOLUTION: 7 + POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) + POOLER_SAMPLING_RATIO: 2 + FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" + PREDICTOR: "FPNPredictor" +DATASETS: + TRAIN: ("coco_2014_minival",) + TEST: ("coco_2014_minival",) +INPUT: + MIN_SIZE_TRAIN: 600 + MAX_SIZE_TRAIN: 1000 + MIN_SIZE_TEST: 800 + MAX_SIZE_TEST: 1000 +DATALOADER: + SIZE_DIVISIBILITY: 32 +SOLVER: + BASE_LR: 0.005 + WEIGHT_DECAY: 0.0001 + STEPS: (1500,) + MAX_ITER: 2000 + IMS_PER_BATCH: 4 +TEST: + IMS_PER_BATCH: 2 diff --git a/PyTorch/contrib/cv/detection/RetinaMask/configs/quick_schedules/e2e_faster_rcnn_X_101_32x8d_FPN_quick.yaml b/PyTorch/contrib/cv/detection/RetinaMask/configs/quick_schedules/e2e_faster_rcnn_X_101_32x8d_FPN_quick.yaml new file mode 100644 index 0000000000..d36ef53adc --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/configs/quick_schedules/e2e_faster_rcnn_X_101_32x8d_FPN_quick.yaml @@ -0,0 +1,44 @@ +MODEL: + META_ARCHITECTURE: "GeneralizedRCNN" + WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d" + BACKBONE: + CONV_BODY: "R-101-FPN" + OUT_CHANNELS: 256 + RPN: + USE_FPN: True + ANCHOR_STRIDE: (4, 8, 16, 32, 64) + PRE_NMS_TOP_N_TRAIN: 2000 + PRE_NMS_TOP_N_TEST: 1000 + POST_NMS_TOP_N_TEST: 1000 + FPN_POST_NMS_TOP_N_TEST: 1000 + ROI_HEADS: + USE_FPN: True + BATCH_SIZE_PER_IMAGE: 256 + ROI_BOX_HEAD: + POOLER_RESOLUTION: 7 + POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) + POOLER_SAMPLING_RATIO: 2 + FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" + PREDICTOR: "FPNPredictor" + RESNETS: + STRIDE_IN_1X1: False + NUM_GROUPS: 32 + WIDTH_PER_GROUP: 8 +DATASETS: + TRAIN: ("coco_2014_minival",) + TEST: ("coco_2014_minival",) +INPUT: + MIN_SIZE_TRAIN: 600 + MAX_SIZE_TRAIN: 1000 + MIN_SIZE_TEST: 800 + MAX_SIZE_TEST: 1000 +DATALOADER: + SIZE_DIVISIBILITY: 32 +SOLVER: + BASE_LR: 0.005 + WEIGHT_DECAY: 0.0001 + STEPS: (1500,) + MAX_ITER: 2000 + IMS_PER_BATCH: 2 +TEST: + IMS_PER_BATCH: 2 diff --git a/PyTorch/contrib/cv/detection/RetinaMask/configs/quick_schedules/e2e_mask_rcnn_R_50_C4_quick.yaml b/PyTorch/contrib/cv/detection/RetinaMask/configs/quick_schedules/e2e_mask_rcnn_R_50_C4_quick.yaml new file mode 100644 index 0000000000..621dd0f680 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/configs/quick_schedules/e2e_mask_rcnn_R_50_C4_quick.yaml @@ -0,0 +1,28 @@ +MODEL: + META_ARCHITECTURE: "GeneralizedRCNN" + WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" + RPN: + PRE_NMS_TOP_N_TEST: 6000 + POST_NMS_TOP_N_TEST: 1000 + ROI_HEADS: + BATCH_SIZE_PER_IMAGE: 256 + ROI_MASK_HEAD: + PREDICTOR: "MaskRCNNC4Predictor" + SHARE_BOX_FEATURE_EXTRACTOR: True + MASK_ON: True +DATASETS: + TRAIN: ("coco_2014_minival",) + TEST: ("coco_2014_minival",) +INPUT: + MIN_SIZE_TRAIN: 600 + MAX_SIZE_TRAIN: 1000 + MIN_SIZE_TEST: 800 + MAX_SIZE_TEST: 1000 +SOLVER: + BASE_LR: 0.005 + WEIGHT_DECAY: 0.0001 + STEPS: (1500,) + MAX_ITER: 2000 + IMS_PER_BATCH: 4 +TEST: + IMS_PER_BATCH: 2 diff --git a/PyTorch/contrib/cv/detection/RetinaMask/configs/quick_schedules/e2e_mask_rcnn_R_50_FPN_quick.yaml b/PyTorch/contrib/cv/detection/RetinaMask/configs/quick_schedules/e2e_mask_rcnn_R_50_FPN_quick.yaml new file mode 100644 index 0000000000..28760d8f9f --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/configs/quick_schedules/e2e_mask_rcnn_R_50_FPN_quick.yaml @@ -0,0 +1,49 @@ +MODEL: + META_ARCHITECTURE: "GeneralizedRCNN" + WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" + BACKBONE: + CONV_BODY: "R-50-FPN" + OUT_CHANNELS: 256 + RPN: + USE_FPN: True + ANCHOR_STRIDE: (4, 8, 16, 32, 64) + PRE_NMS_TOP_N_TRAIN: 2000 + PRE_NMS_TOP_N_TEST: 1000 + POST_NMS_TOP_N_TEST: 1000 + FPN_POST_NMS_TOP_N_TEST: 1000 + ROI_HEADS: + USE_FPN: True + BATCH_SIZE_PER_IMAGE: 256 + ROI_BOX_HEAD: + POOLER_RESOLUTION: 7 + POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) + POOLER_SAMPLING_RATIO: 2 + FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" + PREDICTOR: "FPNPredictor" + ROI_MASK_HEAD: + POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) + FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" + PREDICTOR: "MaskRCNNC4Predictor" + POOLER_RESOLUTION: 14 + POOLER_SAMPLING_RATIO: 2 + RESOLUTION: 28 + SHARE_BOX_FEATURE_EXTRACTOR: False + MASK_ON: True +DATASETS: + TRAIN: ("coco_2014_minival",) + TEST: ("coco_2014_minival",) +INPUT: + MIN_SIZE_TRAIN: 600 + MAX_SIZE_TRAIN: 1000 + MIN_SIZE_TEST: 800 + MAX_SIZE_TEST: 1000 +DATALOADER: + SIZE_DIVISIBILITY: 32 +SOLVER: + BASE_LR: 0.005 + WEIGHT_DECAY: 0.0001 + STEPS: (1500,) + MAX_ITER: 2000 + IMS_PER_BATCH: 4 +TEST: + IMS_PER_BATCH: 2 diff --git a/PyTorch/contrib/cv/detection/RetinaMask/configs/quick_schedules/e2e_mask_rcnn_X_101_32x8d_FPN_quick.yaml b/PyTorch/contrib/cv/detection/RetinaMask/configs/quick_schedules/e2e_mask_rcnn_X_101_32x8d_FPN_quick.yaml new file mode 100644 index 0000000000..a6f1283a37 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/configs/quick_schedules/e2e_mask_rcnn_X_101_32x8d_FPN_quick.yaml @@ -0,0 +1,53 @@ +MODEL: + META_ARCHITECTURE: "GeneralizedRCNN" + WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d" + BACKBONE: + CONV_BODY: "R-101-FPN" + OUT_CHANNELS: 256 + RPN: + USE_FPN: True + ANCHOR_STRIDE: (4, 8, 16, 32, 64) + PRE_NMS_TOP_N_TRAIN: 2000 + PRE_NMS_TOP_N_TEST: 1000 + POST_NMS_TOP_N_TEST: 1000 + FPN_POST_NMS_TOP_N_TEST: 1000 + ROI_HEADS: + USE_FPN: True + BATCH_SIZE_PER_IMAGE: 256 + ROI_BOX_HEAD: + POOLER_RESOLUTION: 7 + POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) + POOLER_SAMPLING_RATIO: 2 + FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" + PREDICTOR: "FPNPredictor" + ROI_MASK_HEAD: + POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) + FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" + PREDICTOR: "MaskRCNNC4Predictor" + POOLER_RESOLUTION: 14 + POOLER_SAMPLING_RATIO: 2 + RESOLUTION: 28 + SHARE_BOX_FEATURE_EXTRACTOR: False + RESNETS: + STRIDE_IN_1X1: False + NUM_GROUPS: 32 + WIDTH_PER_GROUP: 8 + MASK_ON: True +DATASETS: + TRAIN: ("coco_2014_minival",) + TEST: ("coco_2014_minival",) +INPUT: + MIN_SIZE_TRAIN: 600 + MAX_SIZE_TRAIN: 1000 + MIN_SIZE_TEST: 800 + MAX_SIZE_TEST: 1000 +DATALOADER: + SIZE_DIVISIBILITY: 32 +SOLVER: + BASE_LR: 0.005 + WEIGHT_DECAY: 0.0001 + STEPS: (1500,) + MAX_ITER: 2000 + IMS_PER_BATCH: 2 +TEST: + IMS_PER_BATCH: 2 diff --git a/PyTorch/contrib/cv/detection/RetinaMask/configs/quick_schedules/rpn_R_50_C4_quick.yaml b/PyTorch/contrib/cv/detection/RetinaMask/configs/quick_schedules/rpn_R_50_C4_quick.yaml new file mode 100644 index 0000000000..ecf1e87666 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/configs/quick_schedules/rpn_R_50_C4_quick.yaml @@ -0,0 +1,23 @@ +MODEL: + META_ARCHITECTURE: "GeneralizedRCNN" + WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" + RPN_ONLY: True + RPN: + PRE_NMS_TOP_N_TEST: 12000 + POST_NMS_TOP_N_TEST: 2000 +DATASETS: + TRAIN: ("coco_2014_minival",) + TEST: ("coco_2014_minival",) +INPUT: + MIN_SIZE_TRAIN: 600 + MAX_SIZE_TRAIN: 1000 + MIN_SIZE_TEST: 800 + MAX_SIZE_TEST: 1000 +SOLVER: + BASE_LR: 0.005 + WEIGHT_DECAY: 0.0001 + STEPS: (1500,) + MAX_ITER: 2000 + IMS_PER_BATCH: 4 +TEST: + IMS_PER_BATCH: 2 diff --git a/PyTorch/contrib/cv/detection/RetinaMask/configs/quick_schedules/rpn_R_50_FPN_quick.yaml b/PyTorch/contrib/cv/detection/RetinaMask/configs/quick_schedules/rpn_R_50_FPN_quick.yaml new file mode 100644 index 0000000000..d762b4f9d2 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/configs/quick_schedules/rpn_R_50_FPN_quick.yaml @@ -0,0 +1,31 @@ +MODEL: + META_ARCHITECTURE: "GeneralizedRCNN" + WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" + RPN_ONLY: True + BACKBONE: + CONV_BODY: "R-50-FPN" + OUT_CHANNELS: 256 + RPN: + USE_FPN: True + ANCHOR_STRIDE: (4, 8, 16, 32, 64) + PRE_NMS_TOP_N_TEST: 1000 + POST_NMS_TOP_N_TEST: 2000 + FPN_POST_NMS_TOP_N_TEST: 2000 +DATASETS: + TRAIN: ("coco_2014_minival",) + TEST: ("coco_2014_minival",) +INPUT: + MIN_SIZE_TRAIN: 600 + MAX_SIZE_TRAIN: 1000 + MIN_SIZE_TEST: 800 + MAX_SIZE_TEST: 1000 +DATALOADER: + SIZE_DIVISIBILITY: 32 +SOLVER: + BASE_LR: 0.005 + WEIGHT_DECAY: 0.0001 + STEPS: (1500,) + MAX_ITER: 2000 + IMS_PER_BATCH: 4 +TEST: + IMS_PER_BATCH: 2 diff --git a/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_R-101-FPN_1x.yaml b/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_R-101-FPN_1x.yaml new file mode 100644 index 0000000000..b2f78d22f1 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_R-101-FPN_1x.yaml @@ -0,0 +1,48 @@ +MODEL: + META_ARCHITECTURE: "RetinaNet" + WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" + RPN_ONLY: True + BACKBONE: + CONV_BODY: "R-101-FPN" + OUT_CHANNELS: 256 + RPN: + USE_FPN: True + FG_IOU_THRESHOLD: 0.5 + BG_IOU_THRESHOLD: 0.4 + ANCHOR_STRIDE: (4, 8, 16, 32, 64) + PRE_NMS_TOP_N_TRAIN: 2000 + PRE_NMS_TOP_N_TEST: 1000 + POST_NMS_TOP_N_TEST: 1000 + FPN_POST_NMS_TOP_N_TEST: 1000 + ROI_HEADS: + USE_FPN: True + BATCH_SIZE_PER_IMAGE: 256 + ROI_BOX_HEAD: + POOLER_RESOLUTION: 7 + POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) + POOLER_SAMPLING_RATIO: 2 + FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" + PREDICTOR: "FPNPredictor" +DATASETS: + TRAIN: ("coco_2017_train",) + TEST: ("coco_2017_val",) +INPUT: + MIN_SIZE_TRAIN: (800, ) + MAX_SIZE_TRAIN: 1333 + MIN_SIZE_TEST: 800 + MAX_SIZE_TEST: 1333 +DATALOADER: + SIZE_DIVISIBILITY: 32 +SOLVER: + # Assume 4 gpus + BASE_LR: 0.005 + WEIGHT_DECAY: 0.0001 + STEPS: (120000, 160000) + MAX_ITER: 180000 + IMS_PER_BATCH: 8 +RETINANET: + RETINANET_ON: True + SCALES_PER_OCTAVE: 3 + STRADDLE_THRESH: -1 + + diff --git a/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_R-50-FPN_1x.yaml b/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_R-50-FPN_1x.yaml new file mode 100644 index 0000000000..b851a6c999 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_R-50-FPN_1x.yaml @@ -0,0 +1,46 @@ +MODEL: + META_ARCHITECTURE: "RetinaNet" + WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" + RPN_ONLY: True + BACKBONE: + CONV_BODY: "R-50-FPN" + OUT_CHANNELS: 256 + RPN: + USE_FPN: True + FG_IOU_THRESHOLD: 0.5 + BG_IOU_THRESHOLD: 0.4 + ANCHOR_STRIDE: (4, 8, 16, 32, 64) + PRE_NMS_TOP_N_TRAIN: 2000 + PRE_NMS_TOP_N_TEST: 1000 + POST_NMS_TOP_N_TEST: 1000 + FPN_POST_NMS_TOP_N_TEST: 1000 + ROI_HEADS: + USE_FPN: True + BATCH_SIZE_PER_IMAGE: 256 + ROI_BOX_HEAD: + POOLER_RESOLUTION: 7 + POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) + POOLER_SAMPLING_RATIO: 2 + FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" + PREDICTOR: "FPNPredictor" +DATASETS: + TRAIN: ("coco_2017_train",) + TEST: ("coco_2017_val",) +INPUT: + MIN_SIZE_TRAIN: (800,) + MAX_SIZE_TRAIN: 1333 + MIN_SIZE_TEST: 800 + MAX_SIZE_TEST: 1333 +DATALOADER: + SIZE_DIVISIBILITY: 32 +SOLVER: + # Assume 4 gpus + BASE_LR: 0.01 + WEIGHT_DECAY: 0.0001 + STEPS: (60000, 80000) + MAX_ITER: 90000 + IMS_PER_BATCH: 16 +RETINANET: + RETINANET_ON: True + SCALES_PER_OCTAVE: 3 + STRADDLE_THRESH: -1 diff --git a/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_R-50-FPN_1x_adjust_std011.yaml b/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_R-50-FPN_1x_adjust_std011.yaml new file mode 100644 index 0000000000..edfb95ec85 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_R-50-FPN_1x_adjust_std011.yaml @@ -0,0 +1,47 @@ +MODEL: + META_ARCHITECTURE: "RetinaNet" + WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" + RPN_ONLY: True + BACKBONE: + CONV_BODY: "R-50-FPN" + OUT_CHANNELS: 256 + RPN: + USE_FPN: True + FG_IOU_THRESHOLD: 0.5 + BG_IOU_THRESHOLD: 0.4 + ANCHOR_STRIDE: (4, 8, 16, 32, 64) + PRE_NMS_TOP_N_TRAIN: 2000 + PRE_NMS_TOP_N_TEST: 1000 + POST_NMS_TOP_N_TEST: 1000 + FPN_POST_NMS_TOP_N_TEST: 1000 + ROI_HEADS: + USE_FPN: True + BATCH_SIZE_PER_IMAGE: 256 + ROI_BOX_HEAD: + POOLER_RESOLUTION: 7 + POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) + POOLER_SAMPLING_RATIO: 2 + FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" + PREDICTOR: "FPNPredictor" +DATASETS: + TRAIN: ("coco_2017_train",) + TEST: ("coco_2017_val",) +INPUT: + MIN_SIZE_TRAIN: (800,) + MAX_SIZE_TRAIN: 1333 + MIN_SIZE_TEST: 800 + MAX_SIZE_TEST: 1333 +DATALOADER: + SIZE_DIVISIBILITY: 32 +SOLVER: + # Assume 4 gpus + BASE_LR: 0.01 + WEIGHT_DECAY: 0.0001 + STEPS: (60000, 80000) + MAX_ITER: 90000 + IMS_PER_BATCH: 16 +RETINANET: + RETINANET_ON: True + SCALES_PER_OCTAVE: 3 + STRADDLE_THRESH: -1 + SELFADJUST_SMOOTH_L1: True diff --git a/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_R-50-FPN_1x_adjust_std100.yaml b/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_R-50-FPN_1x_adjust_std100.yaml new file mode 100644 index 0000000000..c591e9ceb7 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_R-50-FPN_1x_adjust_std100.yaml @@ -0,0 +1,48 @@ +MODEL: + META_ARCHITECTURE: "RetinaNet" + WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" + RPN_ONLY: True + BACKBONE: + CONV_BODY: "R-50-FPN" + OUT_CHANNELS: 256 + RPN: + USE_FPN: True + FG_IOU_THRESHOLD: 0.5 + BG_IOU_THRESHOLD: 0.4 + ANCHOR_STRIDE: (4, 8, 16, 32, 64) + PRE_NMS_TOP_N_TRAIN: 2000 + PRE_NMS_TOP_N_TEST: 1000 + POST_NMS_TOP_N_TEST: 1000 + FPN_POST_NMS_TOP_N_TEST: 1000 + ROI_HEADS: + USE_FPN: True + BATCH_SIZE_PER_IMAGE: 256 + ROI_BOX_HEAD: + POOLER_RESOLUTION: 7 + POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) + POOLER_SAMPLING_RATIO: 2 + FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" + PREDICTOR: "FPNPredictor" +DATASETS: + TRAIN: ("coco_2017_train",) + TEST: ("coco_2017_val",) +INPUT: + MIN_SIZE_TRAIN: (800,) + MAX_SIZE_TRAIN: 1333 + MIN_SIZE_TEST: 800 + MAX_SIZE_TEST: 1333 +DATALOADER: + SIZE_DIVISIBILITY: 32 +SOLVER: + # Assume 4 gpus + BASE_LR: 0.01 + WEIGHT_DECAY: 0.0001 + STEPS: (60000, 80000) + MAX_ITER: 90000 + IMS_PER_BATCH: 16 +RETINANET: + RETINANET_ON: True + SCALES_PER_OCTAVE: 3 + STRADDLE_THRESH: -1 + BBOX_REG_BETA: 1.0 + SELFADJUST_SMOOTH_L1: True diff --git a/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_R-50-FPN_1x_adjustl1.yaml b/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_R-50-FPN_1x_adjustl1.yaml new file mode 100644 index 0000000000..edfb95ec85 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_R-50-FPN_1x_adjustl1.yaml @@ -0,0 +1,47 @@ +MODEL: + META_ARCHITECTURE: "RetinaNet" + WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" + RPN_ONLY: True + BACKBONE: + CONV_BODY: "R-50-FPN" + OUT_CHANNELS: 256 + RPN: + USE_FPN: True + FG_IOU_THRESHOLD: 0.5 + BG_IOU_THRESHOLD: 0.4 + ANCHOR_STRIDE: (4, 8, 16, 32, 64) + PRE_NMS_TOP_N_TRAIN: 2000 + PRE_NMS_TOP_N_TEST: 1000 + POST_NMS_TOP_N_TEST: 1000 + FPN_POST_NMS_TOP_N_TEST: 1000 + ROI_HEADS: + USE_FPN: True + BATCH_SIZE_PER_IMAGE: 256 + ROI_BOX_HEAD: + POOLER_RESOLUTION: 7 + POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) + POOLER_SAMPLING_RATIO: 2 + FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" + PREDICTOR: "FPNPredictor" +DATASETS: + TRAIN: ("coco_2017_train",) + TEST: ("coco_2017_val",) +INPUT: + MIN_SIZE_TRAIN: (800,) + MAX_SIZE_TRAIN: 1333 + MIN_SIZE_TEST: 800 + MAX_SIZE_TEST: 1333 +DATALOADER: + SIZE_DIVISIBILITY: 32 +SOLVER: + # Assume 4 gpus + BASE_LR: 0.01 + WEIGHT_DECAY: 0.0001 + STEPS: (60000, 80000) + MAX_ITER: 90000 + IMS_PER_BATCH: 16 +RETINANET: + RETINANET_ON: True + SCALES_PER_OCTAVE: 3 + STRADDLE_THRESH: -1 + SELFADJUST_SMOOTH_L1: True diff --git a/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_R-50-FPN_1x_beta100.yaml b/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_R-50-FPN_1x_beta100.yaml new file mode 100644 index 0000000000..e61c0ef074 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_R-50-FPN_1x_beta100.yaml @@ -0,0 +1,48 @@ +MODEL: + META_ARCHITECTURE: "RetinaNet" + WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" + RPN_ONLY: True + BACKBONE: + CONV_BODY: "R-50-FPN" + OUT_CHANNELS: 256 + RPN: + USE_FPN: True + FG_IOU_THRESHOLD: 0.5 + BG_IOU_THRESHOLD: 0.4 + ANCHOR_STRIDE: (4, 8, 16, 32, 64) + PRE_NMS_TOP_N_TRAIN: 2000 + PRE_NMS_TOP_N_TEST: 1000 + POST_NMS_TOP_N_TEST: 1000 + FPN_POST_NMS_TOP_N_TEST: 1000 + ROI_HEADS: + USE_FPN: True + BATCH_SIZE_PER_IMAGE: 256 + ROI_BOX_HEAD: + POOLER_RESOLUTION: 7 + POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) + POOLER_SAMPLING_RATIO: 2 + FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" + PREDICTOR: "FPNPredictor" +DATASETS: + TRAIN: ("coco_2017_train",) + TEST: ("coco_2017_val",) +INPUT: + MIN_SIZE_TRAIN: (800,) + MAX_SIZE_TRAIN: 1333 + MIN_SIZE_TEST: 800 + MAX_SIZE_TEST: 1333 +DATALOADER: + SIZE_DIVISIBILITY: 32 +SOLVER: + # Assume 4 gpus + BASE_LR: 0.01 + WEIGHT_DECAY: 0.0001 + STEPS: (60000, 80000) + MAX_ITER: 90000 + IMS_PER_BATCH: 16 +RETINANET: + RETINANET_ON: True + SCALES_PER_OCTAVE: 3 + STRADDLE_THRESH: -1 + BBOX_REG_BETA: 1.0 + SELFADJUST_SMOOTH_L1: False diff --git a/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_R-50-FPN_1x_low_quality_0.2.yaml b/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_R-50-FPN_1x_low_quality_0.2.yaml new file mode 100644 index 0000000000..7e8891c406 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_R-50-FPN_1x_low_quality_0.2.yaml @@ -0,0 +1,47 @@ +MODEL: + META_ARCHITECTURE: "RetinaNet" + WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" + RPN_ONLY: True + BACKBONE: + CONV_BODY: "R-50-FPN" + OUT_CHANNELS: 256 + RPN: + USE_FPN: True + FG_IOU_THRESHOLD: 0.5 + BG_IOU_THRESHOLD: 0.4 + ANCHOR_STRIDE: (4, 8, 16, 32, 64) + PRE_NMS_TOP_N_TRAIN: 2000 + PRE_NMS_TOP_N_TEST: 1000 + POST_NMS_TOP_N_TEST: 1000 + FPN_POST_NMS_TOP_N_TEST: 1000 + ROI_HEADS: + USE_FPN: True + BATCH_SIZE_PER_IMAGE: 256 + ROI_BOX_HEAD: + POOLER_RESOLUTION: 7 + POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) + POOLER_SAMPLING_RATIO: 2 + FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" + PREDICTOR: "FPNPredictor" +DATASETS: + TRAIN: ("coco_2017_train",) + TEST: ("coco_2017_val",) +INPUT: + MIN_SIZE_TRAIN: (800,) + MAX_SIZE_TRAIN: 1333 + MIN_SIZE_TEST: 800 + MAX_SIZE_TEST: 1333 +DATALOADER: + SIZE_DIVISIBILITY: 32 +SOLVER: + # Assume 4 gpus + BASE_LR: 0.01 + WEIGHT_DECAY: 0.0001 + STEPS: (60000, 80000) + MAX_ITER: 90000 + IMS_PER_BATCH: 16 +RETINANET: + RETINANET_ON: True + SCALES_PER_OCTAVE: 3 + STRADDLE_THRESH: -1 + LOW_QUALITY_THRESHOLD: 0.2 diff --git a/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_R-50-FPN_1x_low_quality_0.3.yaml b/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_R-50-FPN_1x_low_quality_0.3.yaml new file mode 100644 index 0000000000..19d28fb6d5 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_R-50-FPN_1x_low_quality_0.3.yaml @@ -0,0 +1,47 @@ +MODEL: + META_ARCHITECTURE: "RetinaNet" + WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" + RPN_ONLY: True + BACKBONE: + CONV_BODY: "R-50-FPN" + OUT_CHANNELS: 256 + RPN: + USE_FPN: True + FG_IOU_THRESHOLD: 0.5 + BG_IOU_THRESHOLD: 0.4 + ANCHOR_STRIDE: (4, 8, 16, 32, 64) + PRE_NMS_TOP_N_TRAIN: 2000 + PRE_NMS_TOP_N_TEST: 1000 + POST_NMS_TOP_N_TEST: 1000 + FPN_POST_NMS_TOP_N_TEST: 1000 + ROI_HEADS: + USE_FPN: True + BATCH_SIZE_PER_IMAGE: 256 + ROI_BOX_HEAD: + POOLER_RESOLUTION: 7 + POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) + POOLER_SAMPLING_RATIO: 2 + FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" + PREDICTOR: "FPNPredictor" +DATASETS: + TRAIN: ("coco_2017_train",) + TEST: ("coco_2017_val",) +INPUT: + MIN_SIZE_TRAIN: (800,) + MAX_SIZE_TRAIN: 1333 + MIN_SIZE_TEST: 800 + MAX_SIZE_TEST: 1333 +DATALOADER: + SIZE_DIVISIBILITY: 32 +SOLVER: + # Assume 4 gpus + BASE_LR: 0.01 + WEIGHT_DECAY: 0.0001 + STEPS: (60000, 80000) + MAX_ITER: 90000 + IMS_PER_BATCH: 16 +RETINANET: + RETINANET_ON: True + SCALES_PER_OCTAVE: 3 + STRADDLE_THRESH: -1 + LOW_QUALITY_THRESHOLD: 0.3 diff --git a/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_R-50-FPN_1x_low_quality_0.4.yaml b/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_R-50-FPN_1x_low_quality_0.4.yaml new file mode 100644 index 0000000000..e30ac293ea --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_R-50-FPN_1x_low_quality_0.4.yaml @@ -0,0 +1,47 @@ +MODEL: + META_ARCHITECTURE: "RetinaNet" + WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" + RPN_ONLY: True + BACKBONE: + CONV_BODY: "R-50-FPN" + OUT_CHANNELS: 256 + RPN: + USE_FPN: True + FG_IOU_THRESHOLD: 0.5 + BG_IOU_THRESHOLD: 0.4 + ANCHOR_STRIDE: (4, 8, 16, 32, 64) + PRE_NMS_TOP_N_TRAIN: 2000 + PRE_NMS_TOP_N_TEST: 1000 + POST_NMS_TOP_N_TEST: 1000 + FPN_POST_NMS_TOP_N_TEST: 1000 + ROI_HEADS: + USE_FPN: True + BATCH_SIZE_PER_IMAGE: 256 + ROI_BOX_HEAD: + POOLER_RESOLUTION: 7 + POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) + POOLER_SAMPLING_RATIO: 2 + FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" + PREDICTOR: "FPNPredictor" +DATASETS: + TRAIN: ("coco_2017_train",) + TEST: ("coco_2017_val",) +INPUT: + MIN_SIZE_TRAIN: (800,) + MAX_SIZE_TRAIN: 1333 + MIN_SIZE_TEST: 800 + MAX_SIZE_TEST: 1333 +DATALOADER: + SIZE_DIVISIBILITY: 32 +SOLVER: + # Assume 4 gpus + BASE_LR: 0.01 + WEIGHT_DECAY: 0.0001 + STEPS: (60000, 80000) + MAX_ITER: 90000 + IMS_PER_BATCH: 16 +RETINANET: + RETINANET_ON: True + SCALES_PER_OCTAVE: 3 + STRADDLE_THRESH: -1 + LOW_QUALITY_THRESHOLD: 0.4 diff --git a/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_R-50-FPN_1x_no_low_quality.yaml b/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_R-50-FPN_1x_no_low_quality.yaml new file mode 100644 index 0000000000..4ad4a2c8d3 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_R-50-FPN_1x_no_low_quality.yaml @@ -0,0 +1,47 @@ +MODEL: + META_ARCHITECTURE: "RetinaNet" + WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" + RPN_ONLY: True + BACKBONE: + CONV_BODY: "R-50-FPN" + OUT_CHANNELS: 256 + RPN: + USE_FPN: True + FG_IOU_THRESHOLD: 0.5 + BG_IOU_THRESHOLD: 0.4 + ANCHOR_STRIDE: (4, 8, 16, 32, 64) + PRE_NMS_TOP_N_TRAIN: 2000 + PRE_NMS_TOP_N_TEST: 1000 + POST_NMS_TOP_N_TEST: 1000 + FPN_POST_NMS_TOP_N_TEST: 1000 + ROI_HEADS: + USE_FPN: True + BATCH_SIZE_PER_IMAGE: 256 + ROI_BOX_HEAD: + POOLER_RESOLUTION: 7 + POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) + POOLER_SAMPLING_RATIO: 2 + FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" + PREDICTOR: "FPNPredictor" +DATASETS: + TRAIN: ("coco_2017_train",) + TEST: ("coco_2017_val",) +INPUT: + MIN_SIZE_TRAIN: (800,) + MAX_SIZE_TRAIN: 1333 + MIN_SIZE_TEST: 800 + MAX_SIZE_TEST: 1333 +DATALOADER: + SIZE_DIVISIBILITY: 32 +SOLVER: + # Assume 4 gpus + BASE_LR: 0.01 + WEIGHT_DECAY: 0.0001 + STEPS: (60000, 80000) + MAX_ITER: 90000 + IMS_PER_BATCH: 16 +RETINANET: + RETINANET_ON: True + SCALES_PER_OCTAVE: 3 + STRADDLE_THRESH: -1 + LOW_QUALITY_MATCHES: False diff --git a/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_R-50-FPN_1x_no_low_quality_adjustl1.yaml b/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_R-50-FPN_1x_no_low_quality_adjustl1.yaml new file mode 100644 index 0000000000..d6315e31ee --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_R-50-FPN_1x_no_low_quality_adjustl1.yaml @@ -0,0 +1,48 @@ +MODEL: + META_ARCHITECTURE: "RetinaNet" + WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" + RPN_ONLY: True + BACKBONE: + CONV_BODY: "R-50-FPN" + OUT_CHANNELS: 256 + RPN: + USE_FPN: True + FG_IOU_THRESHOLD: 0.5 + BG_IOU_THRESHOLD: 0.4 + ANCHOR_STRIDE: (4, 8, 16, 32, 64) + PRE_NMS_TOP_N_TRAIN: 2000 + PRE_NMS_TOP_N_TEST: 1000 + POST_NMS_TOP_N_TEST: 1000 + FPN_POST_NMS_TOP_N_TEST: 1000 + ROI_HEADS: + USE_FPN: True + BATCH_SIZE_PER_IMAGE: 256 + ROI_BOX_HEAD: + POOLER_RESOLUTION: 7 + POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) + POOLER_SAMPLING_RATIO: 2 + FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" + PREDICTOR: "FPNPredictor" +DATASETS: + TRAIN: ("coco_2017_train",) + TEST: ("coco_2017_val",) +INPUT: + MIN_SIZE_TRAIN: (800,) + MAX_SIZE_TRAIN: 1333 + MIN_SIZE_TEST: 800 + MAX_SIZE_TEST: 1333 +DATALOADER: + SIZE_DIVISIBILITY: 32 +SOLVER: + # Assume 4 gpus + BASE_LR: 0.01 + WEIGHT_DECAY: 0.0001 + STEPS: (60000, 80000) + MAX_ITER: 90000 + IMS_PER_BATCH: 16 +RETINANET: + RETINANET_ON: True + SCALES_PER_OCTAVE: 3 + STRADDLE_THRESH: -1 + LOW_QUALITY_MATCHES: False + SELFADJUST_SMOOTH_L1: True diff --git a/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_mask_R-101-FPN_1.5x_adjust_std011_400.yaml b/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_mask_R-101-FPN_1.5x_adjust_std011_400.yaml new file mode 100644 index 0000000000..37fae57e8a --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_mask_R-101-FPN_1.5x_adjust_std011_400.yaml @@ -0,0 +1,60 @@ +MODEL: + META_ARCHITECTURE: "RetinaNet" + WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" + RPN_ONLY: True + BACKBONE: + CONV_BODY: "R-101-FPN" + OUT_CHANNELS: 256 + RPN: + USE_FPN: True + FG_IOU_THRESHOLD: 0.5 + BG_IOU_THRESHOLD: 0.4 + ANCHOR_STRIDE: (4, 8, 16, 32, 64) + PRE_NMS_TOP_N_TRAIN: 2000 + PRE_NMS_TOP_N_TEST: 1000 + POST_NMS_TOP_N_TEST: 1000 + FPN_POST_NMS_TOP_N_TEST: 1000 + ROI_HEADS: + USE_FPN: True + BATCH_SIZE_PER_IMAGE: 256 + ROI_BOX_HEAD: + POOLER_RESOLUTION: 7 + POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) + POOLER_SAMPLING_RATIO: 2 + FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" + PREDICTOR: "FPNPredictor" + ROI_MASK_HEAD: + POOLER_SCALES: (0.125, 0.0625, 0.03125) + #POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) + FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" + PREDICTOR: "MaskRCNNC4Predictor" + POOLER_RESOLUTION: 14 + POOLER_SAMPLING_RATIO: 2 + RESOLUTION: 28 + SHARE_BOX_FEATURE_EXTRACTOR: False + MASK_ON: True +DATASETS: + TRAIN: ("coco_2017_train",) + TEST: ("coco_2017_val",) +INPUT: + MIN_SIZE_TRAIN: (400,) + MAX_SIZE_TRAIN: 667 + MIN_SIZE_TEST: 400 + MAX_SIZE_TEST: 667 +DATALOADER: + SIZE_DIVISIBILITY: 32 +SOLVER: + # Assume 4 gpus + BASE_LR: 0.005 + WEIGHT_DECAY: 0.0001 + STEPS: (180000, 240000) + MAX_ITER: 270000 + IMS_PER_BATCH: 8 +RETINANET: + RETINANET_ON: True + BACKBONE: "p3p7" + SCALES_PER_OCTAVE: 3 + STRADDLE_THRESH: -1 + NUM_MASKS_TEST: 50 + BBOX_REG_BETA: 0.11 + SELFADJUST_SMOOTH_L1: True diff --git a/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_mask_R-101-FPN_1.5x_adjust_std011_500.yaml b/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_mask_R-101-FPN_1.5x_adjust_std011_500.yaml new file mode 100644 index 0000000000..a34b4cd407 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_mask_R-101-FPN_1.5x_adjust_std011_500.yaml @@ -0,0 +1,61 @@ +MODEL: + META_ARCHITECTURE: "RetinaNet" + WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" + RPN_ONLY: True + BACKBONE: + CONV_BODY: "R-101-FPN" + OUT_CHANNELS: 256 + RPN: + USE_FPN: True + FG_IOU_THRESHOLD: 0.5 + BG_IOU_THRESHOLD: 0.4 + ANCHOR_STRIDE: (4, 8, 16, 32, 64) + PRE_NMS_TOP_N_TRAIN: 2000 + PRE_NMS_TOP_N_TEST: 1000 + POST_NMS_TOP_N_TEST: 1000 + FPN_POST_NMS_TOP_N_TEST: 1000 + ROI_HEADS: + USE_FPN: True + BATCH_SIZE_PER_IMAGE: 256 + ROI_BOX_HEAD: + POOLER_RESOLUTION: 7 + POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) + POOLER_SAMPLING_RATIO: 2 + FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" + PREDICTOR: "FPNPredictor" + ROI_MASK_HEAD: + POOLER_SCALES: (0.125, 0.0625, 0.03125) + #POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) + FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" + PREDICTOR: "MaskRCNNC4Predictor" + POOLER_RESOLUTION: 14 + POOLER_SAMPLING_RATIO: 2 + RESOLUTION: 28 + SHARE_BOX_FEATURE_EXTRACTOR: False + MASK_ON: True +DATASETS: + TRAIN: ("coco_2017_train",) + TEST: ("coco_2017_val",) + #TEST: ("coco_test-dev",) +INPUT: + MIN_SIZE_TRAIN: (500,) + MAX_SIZE_TRAIN: 833 + MIN_SIZE_TEST: 500 + MAX_SIZE_TEST: 833 +DATALOADER: + SIZE_DIVISIBILITY: 32 +SOLVER: + # Assume 4 gpus + BASE_LR: 0.005 + WEIGHT_DECAY: 0.0001 + STEPS: (180000, 240000) + MAX_ITER: 270000 + IMS_PER_BATCH: 8 +RETINANET: + RETINANET_ON: True + BACKBONE: "p3p7" + SCALES_PER_OCTAVE: 3 + STRADDLE_THRESH: -1 + NUM_MASKS_TEST: 50 + BBOX_REG_BETA: 0.11 + SELFADJUST_SMOOTH_L1: True diff --git a/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_mask_R-101-FPN_1.5x_adjust_std011_600.yaml b/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_mask_R-101-FPN_1.5x_adjust_std011_600.yaml new file mode 100644 index 0000000000..f439591331 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_mask_R-101-FPN_1.5x_adjust_std011_600.yaml @@ -0,0 +1,60 @@ +MODEL: + META_ARCHITECTURE: "RetinaNet" + WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" + RPN_ONLY: True + BACKBONE: + CONV_BODY: "R-101-FPN" + OUT_CHANNELS: 256 + RPN: + USE_FPN: True + FG_IOU_THRESHOLD: 0.5 + BG_IOU_THRESHOLD: 0.4 + ANCHOR_STRIDE: (4, 8, 16, 32, 64) + PRE_NMS_TOP_N_TRAIN: 2000 + PRE_NMS_TOP_N_TEST: 1000 + POST_NMS_TOP_N_TEST: 1000 + FPN_POST_NMS_TOP_N_TEST: 1000 + ROI_HEADS: + USE_FPN: True + BATCH_SIZE_PER_IMAGE: 256 + ROI_BOX_HEAD: + POOLER_RESOLUTION: 7 + POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) + POOLER_SAMPLING_RATIO: 2 + FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" + PREDICTOR: "FPNPredictor" + ROI_MASK_HEAD: + POOLER_SCALES: (0.125, 0.0625, 0.03125) + #POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) + FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" + PREDICTOR: "MaskRCNNC4Predictor" + POOLER_RESOLUTION: 14 + POOLER_SAMPLING_RATIO: 2 + RESOLUTION: 28 + SHARE_BOX_FEATURE_EXTRACTOR: False + MASK_ON: True +DATASETS: + TRAIN: ("coco_2017_train",) + TEST: ("coco_2017_val",) +INPUT: + MIN_SIZE_TRAIN: (600,) + MAX_SIZE_TRAIN: 1000 + MIN_SIZE_TEST: 600 + MAX_SIZE_TEST: 1000 +DATALOADER: + SIZE_DIVISIBILITY: 32 +SOLVER: + # Assume 4 gpus + BASE_LR: 0.005 + WEIGHT_DECAY: 0.0001 + STEPS: (180000, 240000) + MAX_ITER: 270000 + IMS_PER_BATCH: 8 +RETINANET: + RETINANET_ON: True + BACKBONE: "p3p7" + SCALES_PER_OCTAVE: 3 + STRADDLE_THRESH: -1 + NUM_MASKS_TEST: 50 + BBOX_REG_BETA: 0.11 + SELFADJUST_SMOOTH_L1: True diff --git a/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_mask_R-101-FPN_1.5x_adjust_std011_700.yaml b/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_mask_R-101-FPN_1.5x_adjust_std011_700.yaml new file mode 100644 index 0000000000..98dd797d1d --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_mask_R-101-FPN_1.5x_adjust_std011_700.yaml @@ -0,0 +1,60 @@ +MODEL: + META_ARCHITECTURE: "RetinaNet" + WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" + RPN_ONLY: True + BACKBONE: + CONV_BODY: "R-101-FPN" + OUT_CHANNELS: 256 + RPN: + USE_FPN: True + FG_IOU_THRESHOLD: 0.5 + BG_IOU_THRESHOLD: 0.4 + ANCHOR_STRIDE: (4, 8, 16, 32, 64) + PRE_NMS_TOP_N_TRAIN: 2000 + PRE_NMS_TOP_N_TEST: 1000 + POST_NMS_TOP_N_TEST: 1000 + FPN_POST_NMS_TOP_N_TEST: 1000 + ROI_HEADS: + USE_FPN: True + BATCH_SIZE_PER_IMAGE: 256 + ROI_BOX_HEAD: + POOLER_RESOLUTION: 7 + POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) + POOLER_SAMPLING_RATIO: 2 + FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" + PREDICTOR: "FPNPredictor" + ROI_MASK_HEAD: + POOLER_SCALES: (0.125, 0.0625, 0.03125) + #POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) + FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" + PREDICTOR: "MaskRCNNC4Predictor" + POOLER_RESOLUTION: 14 + POOLER_SAMPLING_RATIO: 2 + RESOLUTION: 28 + SHARE_BOX_FEATURE_EXTRACTOR: False + MASK_ON: True +DATASETS: + TRAIN: ("coco_2017_train",) + TEST: ("coco_2017_val",) +INPUT: + MIN_SIZE_TRAIN: (700,) + MAX_SIZE_TRAIN: 1167 + MIN_SIZE_TEST: 700 + MAX_SIZE_TEST: 1167 +DATALOADER: + SIZE_DIVISIBILITY: 32 +SOLVER: + # Assume 4 gpus + BASE_LR: 0.0025 + WEIGHT_DECAY: 0.0001 + STEPS: (360000, 480000) + MAX_ITER: 540000 + IMS_PER_BATCH: 4 +RETINANET: + RETINANET_ON: True + BACKBONE: "p3p7" + SCALES_PER_OCTAVE: 3 + STRADDLE_THRESH: -1 + NUM_MASKS_TEST: 50 + BBOX_REG_BETA: 0.11 + SELFADJUST_SMOOTH_L1: True diff --git a/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_mask_R-101-FPN_1.5x_adjust_std011_800.yaml b/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_mask_R-101-FPN_1.5x_adjust_std011_800.yaml new file mode 100644 index 0000000000..f582b97f47 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_mask_R-101-FPN_1.5x_adjust_std011_800.yaml @@ -0,0 +1,60 @@ +MODEL: + META_ARCHITECTURE: "RetinaNet" + WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" + RPN_ONLY: True + BACKBONE: + CONV_BODY: "R-101-FPN" + OUT_CHANNELS: 256 + RPN: + USE_FPN: True + FG_IOU_THRESHOLD: 0.5 + BG_IOU_THRESHOLD: 0.4 + ANCHOR_STRIDE: (4, 8, 16, 32, 64) + PRE_NMS_TOP_N_TRAIN: 2000 + PRE_NMS_TOP_N_TEST: 1000 + POST_NMS_TOP_N_TEST: 1000 + FPN_POST_NMS_TOP_N_TEST: 1000 + ROI_HEADS: + USE_FPN: True + BATCH_SIZE_PER_IMAGE: 256 + ROI_BOX_HEAD: + POOLER_RESOLUTION: 7 + POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) + POOLER_SAMPLING_RATIO: 2 + FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" + PREDICTOR: "FPNPredictor" + ROI_MASK_HEAD: + POOLER_SCALES: (0.125, 0.0625, 0.03125) + #POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) + FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" + PREDICTOR: "MaskRCNNC4Predictor" + POOLER_RESOLUTION: 14 + POOLER_SAMPLING_RATIO: 2 + RESOLUTION: 28 + SHARE_BOX_FEATURE_EXTRACTOR: False + MASK_ON: True +DATASETS: + TRAIN: ("coco_2017_train",) + TEST: ("coco_2017_val",) +INPUT: + MIN_SIZE_TRAIN: (800,) + MAX_SIZE_TRAIN: 1333 + MIN_SIZE_TEST: 800 + MAX_SIZE_TEST: 1333 +DATALOADER: + SIZE_DIVISIBILITY: 32 +SOLVER: + # Assume 4 gpus + BASE_LR: 0.0025 + WEIGHT_DECAY: 0.0001 + STEPS: (360000, 480000) + MAX_ITER: 540000 + IMS_PER_BATCH: 4 +RETINANET: + RETINANET_ON: True + BACKBONE: "p3p7" + SCALES_PER_OCTAVE: 3 + STRADDLE_THRESH: -1 + NUM_MASKS_TEST: 50 + BBOX_REG_BETA: 0.11 + SELFADJUST_SMOOTH_L1: True diff --git a/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_mask_R-101-FPN_2x_adjust_std011_ms.yaml b/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_mask_R-101-FPN_2x_adjust_std011_ms.yaml new file mode 100644 index 0000000000..d31c926cd3 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_mask_R-101-FPN_2x_adjust_std011_ms.yaml @@ -0,0 +1,62 @@ +MODEL: + META_ARCHITECTURE: "RetinaNet" + WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" + RPN_ONLY: True + BACKBONE: + CONV_BODY: "R-101-FPN" + OUT_CHANNELS: 256 + RPN: + USE_FPN: True + FG_IOU_THRESHOLD: 0.5 + BG_IOU_THRESHOLD: 0.4 + ANCHOR_STRIDE: (4, 8, 16, 32, 64) + PRE_NMS_TOP_N_TRAIN: 2000 + PRE_NMS_TOP_N_TEST: 1000 + POST_NMS_TOP_N_TEST: 1000 + FPN_POST_NMS_TOP_N_TEST: 1000 + ROI_HEADS: + USE_FPN: True + BATCH_SIZE_PER_IMAGE: 256 + ROI_BOX_HEAD: + POOLER_RESOLUTION: 7 + POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) + POOLER_SAMPLING_RATIO: 2 + FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" + PREDICTOR: "FPNPredictor" + ROI_MASK_HEAD: + POOLER_SCALES: (0.125, 0.0625, 0.03125) + #POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) + FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" + PREDICTOR: "MaskRCNNC4Predictor" + POOLER_RESOLUTION: 14 + POOLER_SAMPLING_RATIO: 2 + RESOLUTION: 28 + SHARE_BOX_FEATURE_EXTRACTOR: False + MASK_ON: True +DATASETS: + TRAIN: ("coco_2017_train",) + TEST: ("coco_2017_val",) +INPUT: + MIN_SIZE_TRAIN: (640, 800, 1000) + MAX_SIZE_TRAIN: 1333 + MIN_SIZE_TEST: 800 + MAX_SIZE_TEST: 1333 +DATALOADER: + SIZE_DIVISIBILITY: 32 +SOLVER: + # Assume 4 gpus + BASE_LR: 0.0025 + WEIGHT_DECAY: 0.0001 + STEPS: (480000, 640000) + MAX_ITER: 720000 + IMS_PER_BATCH: 4 +RETINANET: + RETINANET_ON: True + BACKBONE: "p3p7" + SCALES_PER_OCTAVE: 3 + STRADDLE_THRESH: -1 + NUM_MASKS_TEST: 50 + BBOX_REG_BETA: 0.11 + SELFADJUST_SMOOTH_L1: True +INPUT: + FIX_SHAPE:(1344, 1344) diff --git a/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_mask_R-101-FPN_2x_adjust_std011_ms_gn.yaml b/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_mask_R-101-FPN_2x_adjust_std011_ms_gn.yaml new file mode 100644 index 0000000000..1d7374b055 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_mask_R-101-FPN_2x_adjust_std011_ms_gn.yaml @@ -0,0 +1,61 @@ +MODEL: + META_ARCHITECTURE: "RetinaNet" + WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" + RPN_ONLY: True + BACKBONE: + CONV_BODY: "R-101-FPN" + OUT_CHANNELS: 256 + RPN: + USE_FPN: True + FG_IOU_THRESHOLD: 0.5 + BG_IOU_THRESHOLD: 0.4 + ANCHOR_STRIDE: (4, 8, 16, 32, 64) + PRE_NMS_TOP_N_TRAIN: 2000 + PRE_NMS_TOP_N_TEST: 1000 + POST_NMS_TOP_N_TEST: 1000 + FPN_POST_NMS_TOP_N_TEST: 1000 + ROI_HEADS: + USE_FPN: True + BATCH_SIZE_PER_IMAGE: 256 + ROI_BOX_HEAD: + POOLER_RESOLUTION: 7 + POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) + POOLER_SAMPLING_RATIO: 2 + FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" + PREDICTOR: "FPNPredictor" + ROI_MASK_HEAD: + POOLER_SCALES: (0.125, 0.0625, 0.03125) + #POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) + FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" + PREDICTOR: "MaskRCNNC4Predictor" + POOLER_RESOLUTION: 14 + POOLER_SAMPLING_RATIO: 2 + RESOLUTION: 28 + SHARE_BOX_FEATURE_EXTRACTOR: False + MASK_ON: True + USE_GN: True +DATASETS: + TRAIN: ("coco_2017_train",) + TEST: ("coco_2017_val",) +INPUT: + MIN_SIZE_TRAIN: (640, 800, 1000) + MAX_SIZE_TRAIN: 1333 + MIN_SIZE_TEST: 800 + MAX_SIZE_TEST: 1333 +DATALOADER: + SIZE_DIVISIBILITY: 32 +SOLVER: + # Assume 4 gpus + BASE_LR: 0.005 + WEIGHT_DECAY: 0.0001 + STEPS: (240000, 320000) + MAX_ITER: 360000 + IMS_PER_BATCH: 8 +RETINANET: + RETINANET_ON: True + BACKBONE: "p3p7" + SCALES_PER_OCTAVE: 3 + STRADDLE_THRESH: -1 + NUM_MASKS_TEST: 50 + BBOX_REG_BETA: 0.11 + SELFADJUST_SMOOTH_L1: True diff --git a/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_mask_R-50-FPN_1.5x.yaml b/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_mask_R-50-FPN_1.5x.yaml new file mode 100644 index 0000000000..e3ffb2543f --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_mask_R-50-FPN_1.5x.yaml @@ -0,0 +1,58 @@ +MODEL: + META_ARCHITECTURE: "RetinaNet" + WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" + RPN_ONLY: True + BACKBONE: + CONV_BODY: "R-50-FPN" + OUT_CHANNELS: 256 + RPN: + USE_FPN: True + FG_IOU_THRESHOLD: 0.5 + BG_IOU_THRESHOLD: 0.4 + ANCHOR_STRIDE: (4, 8, 16, 32, 64) + PRE_NMS_TOP_N_TRAIN: 2000 + PRE_NMS_TOP_N_TEST: 1000 + POST_NMS_TOP_N_TEST: 1000 + FPN_POST_NMS_TOP_N_TEST: 1000 + ROI_HEADS: + USE_FPN: True + BATCH_SIZE_PER_IMAGE: 256 + ROI_BOX_HEAD: + POOLER_RESOLUTION: 7 + POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) + POOLER_SAMPLING_RATIO: 2 + FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" + PREDICTOR: "FPNPredictor" + ROI_MASK_HEAD: + POOLER_SCALES: (0.125, 0.0625, 0.03125) + #POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) + FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" + PREDICTOR: "MaskRCNNC4Predictor" + POOLER_RESOLUTION: 14 + POOLER_SAMPLING_RATIO: 2 + RESOLUTION: 28 + SHARE_BOX_FEATURE_EXTRACTOR: False + MASK_ON: True +DATASETS: + TRAIN: ("coco_2017_train",) + TEST: ("coco_2017_val",) +INPUT: + MIN_SIZE_TRAIN: (800,) + MAX_SIZE_TRAIN: 1333 + MIN_SIZE_TEST: 800 + MAX_SIZE_TEST: 1333 +DATALOADER: + SIZE_DIVISIBILITY: 32 +SOLVER: + # Assume 4 gpus + BASE_LR: 0.005 + WEIGHT_DECAY: 0.0001 + STEPS: (180000, 240000) + MAX_ITER: 270000 + IMS_PER_BATCH: 8 +RETINANET: + RETINANET_ON: True + BACKBONE: "p3p7" + SCALES_PER_OCTAVE: 3 + STRADDLE_THRESH: -1 + NUM_MASKS_TEST: 50 diff --git a/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_mask_R-50-FPN_1.5x_adjust_std011_400.yaml b/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_mask_R-50-FPN_1.5x_adjust_std011_400.yaml new file mode 100644 index 0000000000..1a6f710022 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_mask_R-50-FPN_1.5x_adjust_std011_400.yaml @@ -0,0 +1,60 @@ +MODEL: + META_ARCHITECTURE: "RetinaNet" + WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" + RPN_ONLY: True + BACKBONE: + CONV_BODY: "R-50-FPN" + OUT_CHANNELS: 256 + RPN: + USE_FPN: True + FG_IOU_THRESHOLD: 0.5 + BG_IOU_THRESHOLD: 0.4 + ANCHOR_STRIDE: (4, 8, 16, 32, 64) + PRE_NMS_TOP_N_TRAIN: 2000 + PRE_NMS_TOP_N_TEST: 1000 + POST_NMS_TOP_N_TEST: 1000 + FPN_POST_NMS_TOP_N_TEST: 1000 + ROI_HEADS: + USE_FPN: True + BATCH_SIZE_PER_IMAGE: 256 + ROI_BOX_HEAD: + POOLER_RESOLUTION: 7 + POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) + POOLER_SAMPLING_RATIO: 2 + FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" + PREDICTOR: "FPNPredictor" + ROI_MASK_HEAD: + POOLER_SCALES: (0.125, 0.0625, 0.03125) + #POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) + FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" + PREDICTOR: "MaskRCNNC4Predictor" + POOLER_RESOLUTION: 14 + POOLER_SAMPLING_RATIO: 2 + RESOLUTION: 28 + SHARE_BOX_FEATURE_EXTRACTOR: False + MASK_ON: True +DATASETS: + TRAIN: ("coco_2017_train",) + TEST: ("coco_2017_val",) +INPUT: + MIN_SIZE_TRAIN: (400,) + MAX_SIZE_TRAIN: 667 + MIN_SIZE_TEST: 400 + MAX_SIZE_TEST: 667 +DATALOADER: + SIZE_DIVISIBILITY: 32 +SOLVER: + # Assume 4 gpus + BASE_LR: 0.005 + WEIGHT_DECAY: 0.0001 + STEPS: (180000, 240000) + MAX_ITER: 270000 + IMS_PER_BATCH: 8 +RETINANET: + RETINANET_ON: True + BACKBONE: "p3p7" + SCALES_PER_OCTAVE: 3 + STRADDLE_THRESH: -1 + NUM_MASKS_TEST: 50 + BBOX_REG_BETA: 0.11 + SELFADJUST_SMOOTH_L1: True diff --git a/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_mask_R-50-FPN_1.5x_adjust_std011_500.yaml b/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_mask_R-50-FPN_1.5x_adjust_std011_500.yaml new file mode 100644 index 0000000000..1a6f710022 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_mask_R-50-FPN_1.5x_adjust_std011_500.yaml @@ -0,0 +1,60 @@ +MODEL: + META_ARCHITECTURE: "RetinaNet" + WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" + RPN_ONLY: True + BACKBONE: + CONV_BODY: "R-50-FPN" + OUT_CHANNELS: 256 + RPN: + USE_FPN: True + FG_IOU_THRESHOLD: 0.5 + BG_IOU_THRESHOLD: 0.4 + ANCHOR_STRIDE: (4, 8, 16, 32, 64) + PRE_NMS_TOP_N_TRAIN: 2000 + PRE_NMS_TOP_N_TEST: 1000 + POST_NMS_TOP_N_TEST: 1000 + FPN_POST_NMS_TOP_N_TEST: 1000 + ROI_HEADS: + USE_FPN: True + BATCH_SIZE_PER_IMAGE: 256 + ROI_BOX_HEAD: + POOLER_RESOLUTION: 7 + POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) + POOLER_SAMPLING_RATIO: 2 + FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" + PREDICTOR: "FPNPredictor" + ROI_MASK_HEAD: + POOLER_SCALES: (0.125, 0.0625, 0.03125) + #POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) + FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" + PREDICTOR: "MaskRCNNC4Predictor" + POOLER_RESOLUTION: 14 + POOLER_SAMPLING_RATIO: 2 + RESOLUTION: 28 + SHARE_BOX_FEATURE_EXTRACTOR: False + MASK_ON: True +DATASETS: + TRAIN: ("coco_2017_train",) + TEST: ("coco_2017_val",) +INPUT: + MIN_SIZE_TRAIN: (400,) + MAX_SIZE_TRAIN: 667 + MIN_SIZE_TEST: 400 + MAX_SIZE_TEST: 667 +DATALOADER: + SIZE_DIVISIBILITY: 32 +SOLVER: + # Assume 4 gpus + BASE_LR: 0.005 + WEIGHT_DECAY: 0.0001 + STEPS: (180000, 240000) + MAX_ITER: 270000 + IMS_PER_BATCH: 8 +RETINANET: + RETINANET_ON: True + BACKBONE: "p3p7" + SCALES_PER_OCTAVE: 3 + STRADDLE_THRESH: -1 + NUM_MASKS_TEST: 50 + BBOX_REG_BETA: 0.11 + SELFADJUST_SMOOTH_L1: True diff --git a/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_mask_R-50-FPN_1.5x_adjust_std011_600.yaml b/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_mask_R-50-FPN_1.5x_adjust_std011_600.yaml new file mode 100644 index 0000000000..e2eae35779 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_mask_R-50-FPN_1.5x_adjust_std011_600.yaml @@ -0,0 +1,60 @@ +MODEL: + META_ARCHITECTURE: "RetinaNet" + WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" + RPN_ONLY: True + BACKBONE: + CONV_BODY: "R-50-FPN" + OUT_CHANNELS: 256 + RPN: + USE_FPN: True + FG_IOU_THRESHOLD: 0.5 + BG_IOU_THRESHOLD: 0.4 + ANCHOR_STRIDE: (4, 8, 16, 32, 64) + PRE_NMS_TOP_N_TRAIN: 2000 + PRE_NMS_TOP_N_TEST: 1000 + POST_NMS_TOP_N_TEST: 1000 + FPN_POST_NMS_TOP_N_TEST: 1000 + ROI_HEADS: + USE_FPN: True + BATCH_SIZE_PER_IMAGE: 256 + ROI_BOX_HEAD: + POOLER_RESOLUTION: 7 + POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) + POOLER_SAMPLING_RATIO: 2 + FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" + PREDICTOR: "FPNPredictor" + ROI_MASK_HEAD: + POOLER_SCALES: (0.125, 0.0625, 0.03125) + #POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) + FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" + PREDICTOR: "MaskRCNNC4Predictor" + POOLER_RESOLUTION: 14 + POOLER_SAMPLING_RATIO: 2 + RESOLUTION: 28 + SHARE_BOX_FEATURE_EXTRACTOR: False + MASK_ON: True +DATASETS: + TRAIN: ("coco_2017_train",) + TEST: ("coco_2017_val",) +INPUT: + MIN_SIZE_TRAIN: (600,) + MAX_SIZE_TRAIN: 1000 + MIN_SIZE_TEST: 600 + MAX_SIZE_TEST: 1000 +DATALOADER: + SIZE_DIVISIBILITY: 32 +SOLVER: + # Assume 4 gpus + BASE_LR: 0.005 + WEIGHT_DECAY: 0.0001 + STEPS: (180000, 240000) + MAX_ITER: 270000 + IMS_PER_BATCH: 8 +RETINANET: + RETINANET_ON: True + BACKBONE: "p3p7" + SCALES_PER_OCTAVE: 3 + STRADDLE_THRESH: -1 + NUM_MASKS_TEST: 50 + BBOX_REG_BETA: 0.11 + SELFADJUST_SMOOTH_L1: True diff --git a/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_mask_R-50-FPN_1.5x_adjust_std011_800.yaml b/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_mask_R-50-FPN_1.5x_adjust_std011_800.yaml new file mode 100644 index 0000000000..635f2c904b --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_mask_R-50-FPN_1.5x_adjust_std011_800.yaml @@ -0,0 +1,60 @@ +MODEL: + META_ARCHITECTURE: "RetinaNet" + WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" + RPN_ONLY: True + BACKBONE: + CONV_BODY: "R-50-FPN" + OUT_CHANNELS: 256 + RPN: + USE_FPN: True + FG_IOU_THRESHOLD: 0.5 + BG_IOU_THRESHOLD: 0.4 + ANCHOR_STRIDE: (4, 8, 16, 32, 64) + PRE_NMS_TOP_N_TRAIN: 2000 + PRE_NMS_TOP_N_TEST: 1000 + POST_NMS_TOP_N_TEST: 1000 + FPN_POST_NMS_TOP_N_TEST: 1000 + ROI_HEADS: + USE_FPN: True + BATCH_SIZE_PER_IMAGE: 256 + ROI_BOX_HEAD: + POOLER_RESOLUTION: 7 + POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) + POOLER_SAMPLING_RATIO: 2 + FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" + PREDICTOR: "FPNPredictor" + ROI_MASK_HEAD: + POOLER_SCALES: (0.125, 0.0625, 0.03125) + #POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) + FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" + PREDICTOR: "MaskRCNNC4Predictor" + POOLER_RESOLUTION: 14 + POOLER_SAMPLING_RATIO: 2 + RESOLUTION: 28 + SHARE_BOX_FEATURE_EXTRACTOR: False + MASK_ON: True +DATASETS: + TRAIN: ("coco_2017_train",) + TEST: ("coco_2017_val",) +INPUT: + MIN_SIZE_TRAIN: (800,) + MAX_SIZE_TRAIN: 1333 + MIN_SIZE_TEST: 800 + MAX_SIZE_TEST: 1333 +DATALOADER: + SIZE_DIVISIBILITY: 32 +SOLVER: + # Assume 4 gpus + BASE_LR: 0.005 + WEIGHT_DECAY: 0.0001 + STEPS: (180000, 240000) + MAX_ITER: 270000 + IMS_PER_BATCH: 8 +RETINANET: + RETINANET_ON: True + BACKBONE: "p3p7" + SCALES_PER_OCTAVE: 3 + STRADDLE_THRESH: -1 + NUM_MASKS_TEST: 50 + BBOX_REG_BETA: 0.11 + SELFADJUST_SMOOTH_L1: True diff --git a/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_mask_R-50-FPN_1x.yaml b/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_mask_R-50-FPN_1x.yaml new file mode 100644 index 0000000000..bb60b73acf --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_mask_R-50-FPN_1x.yaml @@ -0,0 +1,58 @@ +MODEL: + META_ARCHITECTURE: "RetinaNet" + WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" + RPN_ONLY: True + BACKBONE: + CONV_BODY: "R-50-FPN" + OUT_CHANNELS: 256 + RPN: + USE_FPN: True + FG_IOU_THRESHOLD: 0.5 + BG_IOU_THRESHOLD: 0.4 + ANCHOR_STRIDE: (4, 8, 16, 32, 64) + PRE_NMS_TOP_N_TRAIN: 2000 + PRE_NMS_TOP_N_TEST: 1000 + POST_NMS_TOP_N_TEST: 1000 + FPN_POST_NMS_TOP_N_TEST: 1000 + ROI_HEADS: + USE_FPN: True + BATCH_SIZE_PER_IMAGE: 256 + ROI_BOX_HEAD: + POOLER_RESOLUTION: 7 + POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) + POOLER_SAMPLING_RATIO: 2 + FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" + PREDICTOR: "FPNPredictor" + ROI_MASK_HEAD: + POOLER_SCALES: (0.125, 0.0625, 0.03125) + #POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) + FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" + PREDICTOR: "MaskRCNNC4Predictor" + POOLER_RESOLUTION: 14 + POOLER_SAMPLING_RATIO: 2 + RESOLUTION: 28 + SHARE_BOX_FEATURE_EXTRACTOR: False + MASK_ON: True +DATASETS: + TRAIN: ("coco_2017_train",) + TEST: ("coco_2017_val",) +INPUT: + MIN_SIZE_TRAIN: (800,) + MAX_SIZE_TRAIN: 1333 + MIN_SIZE_TEST: 800 + MAX_SIZE_TEST: 1333 +DATALOADER: + SIZE_DIVISIBILITY: 32 +SOLVER: + # Assume 4 gpus + BASE_LR: 0.005 + WEIGHT_DECAY: 0.0001 + STEPS: (120000, 160000) + MAX_ITER: 180000 + IMS_PER_BATCH: 8 +RETINANET: + RETINANET_ON: True + BACKBONE: "p3p7" + SCALES_PER_OCTAVE: 3 + STRADDLE_THRESH: -1 + NUM_MASKS_TEST: 50 diff --git a/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_mask_R-50-FPN_2x_adjust_std011_ms.yaml b/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_mask_R-50-FPN_2x_adjust_std011_ms.yaml new file mode 100644 index 0000000000..4b47382430 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_mask_R-50-FPN_2x_adjust_std011_ms.yaml @@ -0,0 +1,59 @@ +MODEL: + META_ARCHITECTURE: "RetinaNet" + WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" + RPN_ONLY: True + BACKBONE: + CONV_BODY: "R-50-FPN" + OUT_CHANNELS: 256 + RPN: + USE_FPN: True + FG_IOU_THRESHOLD: 0.5 + BG_IOU_THRESHOLD: 0.4 + ANCHOR_STRIDE: (4, 8, 16, 32, 64) + PRE_NMS_TOP_N_TRAIN: 2000 + PRE_NMS_TOP_N_TEST: 1000 + POST_NMS_TOP_N_TEST: 1000 + FPN_POST_NMS_TOP_N_TEST: 1000 + ROI_HEADS: + USE_FPN: True + BATCH_SIZE_PER_IMAGE: 256 + ROI_BOX_HEAD: + POOLER_RESOLUTION: 7 + POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) + POOLER_SAMPLING_RATIO: 2 + FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" + PREDICTOR: "FPNPredictor" + ROI_MASK_HEAD: + POOLER_SCALES: (0.125, 0.0625, 0.03125) + #POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) + FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" + PREDICTOR: "MaskRCNNC4Predictor" + POOLER_RESOLUTION: 14 + POOLER_SAMPLING_RATIO: 2 + RESOLUTION: 28 + SHARE_BOX_FEATURE_EXTRACTOR: False + MASK_ON: True +DATASETS: + TRAIN: ("coco_2017_train",) + TEST: ("coco_2017_val",) +INPUT: + MIN_SIZE_TRAIN: (640, 732, 800, 896, 960) + MAX_SIZE_TRAIN: 1333 + MIN_SIZE_TEST: 800 + MAX_SIZE_TEST: 1333 +DATALOADER: + SIZE_DIVISIBILITY: 32 +SOLVER: + BASE_LR: 0.005 + WEIGHT_DECAY: 0.0001 + STEPS: (240000, 320000) + MAX_ITER: 360000 + IMS_PER_BATCH: 8 +RETINANET: + RETINANET_ON: True + BACKBONE: "p3p7" + SCALES_PER_OCTAVE: 3 + STRADDLE_THRESH: -1 + NUM_MASKS_TEST: 50 + BBOX_REG_BETA: 0.11 + SELFADJUST_SMOOTH_L1: True diff --git a/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_mask_R-50-FPN_canonical5_1.5x.yaml b/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_mask_R-50-FPN_canonical5_1.5x.yaml new file mode 100644 index 0000000000..03715a62ec --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_mask_R-50-FPN_canonical5_1.5x.yaml @@ -0,0 +1,59 @@ +MODEL: + META_ARCHITECTURE: "RetinaNet" + WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" + RPN_ONLY: True + BACKBONE: + CONV_BODY: "R-50-FPN" + OUT_CHANNELS: 256 + RPN: + USE_FPN: True + FG_IOU_THRESHOLD: 0.5 + BG_IOU_THRESHOLD: 0.4 + ANCHOR_STRIDE: (4, 8, 16, 32, 64) + PRE_NMS_TOP_N_TRAIN: 2000 + PRE_NMS_TOP_N_TEST: 1000 + POST_NMS_TOP_N_TEST: 1000 + FPN_POST_NMS_TOP_N_TEST: 1000 + ROI_HEADS: + USE_FPN: True + BATCH_SIZE_PER_IMAGE: 256 + ROI_BOX_HEAD: + POOLER_RESOLUTION: 7 + POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) + POOLER_SAMPLING_RATIO: 2 + FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" + PREDICTOR: "FPNPredictor" + ROI_MASK_HEAD: + POOLER_SCALES: (0.125, 0.0625, 0.03125) + #POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) + FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" + PREDICTOR: "MaskRCNNC4Predictor" + POOLER_RESOLUTION: 14 + POOLER_SAMPLING_RATIO: 2 + RESOLUTION: 28 + SHARE_BOX_FEATURE_EXTRACTOR: False + CANONICAL_LEVEL: 5 + MASK_ON: True +DATASETS: + TRAIN: ("coco_2017_train",) + TEST: ("coco_2017_val",) +INPUT: + MIN_SIZE_TRAIN: (800,) + MAX_SIZE_TRAIN: 1333 + MIN_SIZE_TEST: 800 + MAX_SIZE_TEST: 1333 +DATALOADER: + SIZE_DIVISIBILITY: 32 +SOLVER: + # Assume 4 gpus + BASE_LR: 0.005 + WEIGHT_DECAY: 0.0001 + STEPS: (180000, 240000) + MAX_ITER: 270000 + IMS_PER_BATCH: 8 +RETINANET: + RETINANET_ON: True + BACKBONE: "p3p7" + SCALES_PER_OCTAVE: 3 + STRADDLE_THRESH: -1 + NUM_MASKS_TEST: 50 diff --git a/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_mask_R-50-FPN_canonical5_1x.yaml b/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_mask_R-50-FPN_canonical5_1x.yaml new file mode 100644 index 0000000000..80edcbfcbc --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_mask_R-50-FPN_canonical5_1x.yaml @@ -0,0 +1,59 @@ +MODEL: + META_ARCHITECTURE: "RetinaNet" + WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" + RPN_ONLY: True + BACKBONE: + CONV_BODY: "R-50-FPN" + OUT_CHANNELS: 256 + RPN: + USE_FPN: True + FG_IOU_THRESHOLD: 0.5 + BG_IOU_THRESHOLD: 0.4 + ANCHOR_STRIDE: (4, 8, 16, 32, 64) + PRE_NMS_TOP_N_TRAIN: 2000 + PRE_NMS_TOP_N_TEST: 1000 + POST_NMS_TOP_N_TEST: 1000 + FPN_POST_NMS_TOP_N_TEST: 1000 + ROI_HEADS: + USE_FPN: True + BATCH_SIZE_PER_IMAGE: 256 + ROI_BOX_HEAD: + POOLER_RESOLUTION: 7 + POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) + POOLER_SAMPLING_RATIO: 2 + FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" + PREDICTOR: "FPNPredictor" + ROI_MASK_HEAD: + POOLER_SCALES: (0.125, 0.0625, 0.03125, 0.015625) + #POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) + FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" + PREDICTOR: "MaskRCNNC4Predictor" + POOLER_RESOLUTION: 14 + POOLER_SAMPLING_RATIO: 2 + RESOLUTION: 28 + SHARE_BOX_FEATURE_EXTRACTOR: False + CANONICAL_LEVEL: 5 + MASK_ON: True +DATASETS: + TRAIN: ("coco_2017_train",) + TEST: ("coco_2017_val",) +INPUT: + MIN_SIZE_TRAIN: (800,) + MAX_SIZE_TRAIN: 1333 + MIN_SIZE_TEST: 800 + MAX_SIZE_TEST: 1333 +DATALOADER: + SIZE_DIVISIBILITY: 32 +SOLVER: + # Assume 4 gpus + BASE_LR: 0.005 + WEIGHT_DECAY: 0.0001 + STEPS: (120000, 160000) + MAX_ITER: 180000 + IMS_PER_BATCH: 8 +RETINANET: + RETINANET_ON: True + BACKBONE: "p3p7" + SCALES_PER_OCTAVE: 3 + STRADDLE_THRESH: -1 + NUM_MASKS_TEST: 50 diff --git a/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_mask_X-101-32x8d_FPN_2x_adjust_std011_ms_gn.yaml b/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_mask_X-101-32x8d_FPN_2x_adjust_std011_ms_gn.yaml new file mode 100644 index 0000000000..996fe4ebc1 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_mask_X-101-32x8d_FPN_2x_adjust_std011_ms_gn.yaml @@ -0,0 +1,66 @@ +MODEL: + META_ARCHITECTURE: "RetinaNet" + WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d" + RPN_ONLY: True + BACKBONE: + CONV_BODY: "R-101-FPN" + OUT_CHANNELS: 256 + RPN: + USE_FPN: True + FG_IOU_THRESHOLD: 0.5 + BG_IOU_THRESHOLD: 0.4 + ANCHOR_STRIDE: (4, 8, 16, 32, 64) + PRE_NMS_TOP_N_TRAIN: 2000 + PRE_NMS_TOP_N_TEST: 1000 + POST_NMS_TOP_N_TEST: 1000 + FPN_POST_NMS_TOP_N_TEST: 1000 + ROI_HEADS: + USE_FPN: True + BATCH_SIZE_PER_IMAGE: 256 + ROI_BOX_HEAD: + POOLER_RESOLUTION: 7 + POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) + POOLER_SAMPLING_RATIO: 2 + FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" + PREDICTOR: "FPNPredictor" + ROI_MASK_HEAD: + POOLER_SCALES: (0.125, 0.0625, 0.03125) + #POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) + FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" + PREDICTOR: "MaskRCNNC4Predictor" + POOLER_RESOLUTION: 14 + POOLER_SAMPLING_RATIO: 2 + RESOLUTION: 28 + SHARE_BOX_FEATURE_EXTRACTOR: False + RESNETS: + STRIDE_IN_1X1: False + NUM_GROUPS: 32 + WIDTH_PER_GROUP: 8 + MASK_ON: True + USE_GN: True +DATASETS: + TRAIN: ("coco_2017_train",) + TEST: ("coco_2017_val",) + #TEST: ("coco_test-dev",) +INPUT: + MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800, 840, 880, 920, 960, 1000) + MAX_SIZE_TRAIN: 1333 + MIN_SIZE_TEST: 800 + MAX_SIZE_TEST: 1333 +DATALOADER: + SIZE_DIVISIBILITY: 32 +SOLVER: + # Assume 4 gpus + BASE_LR: 0.0025 + WEIGHT_DECAY: 0.0001 + STEPS: (480000, 640000) + MAX_ITER: 720000 + IMS_PER_BATCH: 4 +RETINANET: + RETINANET_ON: True + BACKBONE: "p3p7" + SCALES_PER_OCTAVE: 3 + STRADDLE_THRESH: -1 + NUM_MASKS_TEST: 50 + BBOX_REG_BETA: 0.11 + SELFADJUST_SMOOTH_L1: True diff --git a/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_mask_p2p7_R-50-FPN_1x.yaml b/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_mask_p2p7_R-50-FPN_1x.yaml new file mode 100644 index 0000000000..91742b7197 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/configs/retina/retinanet_mask_p2p7_R-50-FPN_1x.yaml @@ -0,0 +1,57 @@ +MODEL: + META_ARCHITECTURE: "RetinaNet" + WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" + RPN_ONLY: True + BACKBONE: + CONV_BODY: "R-50-FPN" + OUT_CHANNELS: 256 + RPN: + USE_FPN: True + FG_IOU_THRESHOLD: 0.5 + BG_IOU_THRESHOLD: 0.4 + ANCHOR_STRIDE: (4, 8, 16, 32, 64) + PRE_NMS_TOP_N_TRAIN: 2000 + PRE_NMS_TOP_N_TEST: 1000 + POST_NMS_TOP_N_TEST: 1000 + FPN_POST_NMS_TOP_N_TEST: 1000 + ROI_HEADS: + USE_FPN: True + BATCH_SIZE_PER_IMAGE: 256 + ROI_BOX_HEAD: + POOLER_RESOLUTION: 7 + POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) + POOLER_SAMPLING_RATIO: 2 + FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" + PREDICTOR: "FPNPredictor" + ROI_MASK_HEAD: + POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) + FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" + PREDICTOR: "MaskRCNNC4Predictor" + POOLER_RESOLUTION: 14 + POOLER_SAMPLING_RATIO: 2 + RESOLUTION: 28 + SHARE_BOX_FEATURE_EXTRACTOR: False + MASK_ON: True +DATASETS: + TRAIN: ("coco_2017_train",) + TEST: ("coco_2017_val",) +INPUT: + MIN_SIZE_TRAIN: (800,) + MAX_SIZE_TRAIN: 1333 + MIN_SIZE_TEST: 800 + MAX_SIZE_TEST: 1333 +DATALOADER: + SIZE_DIVISIBILITY: 32 +SOLVER: + # Assume 4 gpus + BASE_LR: 0.005 + WEIGHT_DECAY: 0.0001 + STEPS: (120000, 160000) + MAX_ITER: 180000 + IMS_PER_BATCH: 8 +RETINANET: + RETINANET_ON: True + BACKBONE: "p2p7" + SCALES_PER_OCTAVE: 3 + STRADDLE_THRESH: -1 + NUM_MASKS_TEST: 50 diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/__init__.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/__init__.py new file mode 100644 index 0000000000..5c7f19c6c0 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/__init__.py @@ -0,0 +1 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/config/__init__.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/config/__init__.py new file mode 100644 index 0000000000..22a15023b1 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/config/__init__.py @@ -0,0 +1,2 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +from .defaults import _C as cfg diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/config/defaults.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/config/defaults.py new file mode 100644 index 0000000000..6765834b35 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/config/defaults.py @@ -0,0 +1,363 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +import os + +from yacs.config import CfgNode as CN + +# ----------------------------------------------------------------------------- +# Convention about Training / Test specific parameters +# ----------------------------------------------------------------------------- +# Whenever an argument can be either used for training or for testing, the +# corresponding name will be post-fixed by a _TRAIN for a training parameter, +# or _TEST for a test-specific parameter. +# For example, the number of images during training will be +# IMAGES_PER_BATCH_TRAIN, while the number of images for testing will be +# IMAGES_PER_BATCH_TEST + +# ----------------------------------------------------------------------------- +# Config definition +# ----------------------------------------------------------------------------- + +_C = CN() +_C.DEBUG = False +_C.MODEL = CN() +_C.MODEL.RPN_ONLY = False +_C.MODEL.MASK_ON = False +_C.MODEL.SPARSE_MASK_ON = False +_C.MODEL.DEVICE = "npu:0" +_C.MODEL.META_ARCHITECTURE = "GeneralizedRCNN" +_C.MODEL.USE_GN = False +# If the WEIGHT starts with a catalog://, like :R-50, the code will look for +# the path in paths_catalog. Else, it will use it as the specified absolute +# path +_C.MODEL.WEIGHT = "" + +# ----------------------------------------------------------------------------- +# INPUT +# ----------------------------------------------------------------------------- +_C.INPUT = CN() +# Size of the fixed shape +_C.INPUT.FIX_SHAPE = (1344, 1344) +# Size of the smallest side of the image during training +_C.INPUT.MIN_SIZE_TRAIN = (800,) # 800 +# Maximum size of the side of the image during training +_C.INPUT.MAX_SIZE_TRAIN = 1333 +# Size of the smallest side of the image during testing +_C.INPUT.MIN_SIZE_TEST = 800 +# Maximum size of the side of the image during testing +_C.INPUT.MAX_SIZE_TEST = 1333 +# Values to be used for image normalization +_C.INPUT.PIXEL_MEAN = [102.9801, 115.9465, 122.7717] +# Values to be used for image normalization +_C.INPUT.PIXEL_STD = [1., 1., 1.] +# Convert image to BGR format (for Caffe2 models), in range 0-255 +_C.INPUT.TO_BGR255 = True + +# ----------------------------------------------------------------------------- +# Dataset +# ----------------------------------------------------------------------------- +_C.DATASETS = CN() +# List of the dataset names for training, as present in paths_catalog.py +_C.DATASETS.TRAIN = () +# List of the dataset names for testing, as present in paths_catalog.py +_C.DATASETS.TEST = () + +# ----------------------------------------------------------------------------- +# DataLoader +# ----------------------------------------------------------------------------- +_C.DATALOADER = CN() +# Number of data loading threads +_C.DATALOADER.NUM_WORKERS = 0 +# If > 0, this enforces that each collated batch should have a size divisible +# by SIZE_DIVISIBILITY +_C.DATALOADER.SIZE_DIVISIBILITY = 0 +# If True, each batch should contain only images for which the aspect ratio +# is compatible. This groups portrait images together, and landscape images +# are not batched with portrait images. +_C.DATALOADER.ASPECT_RATIO_GROUPING = True + +# ---------------------------------------------------------------------------- # +# Backbone options +# ---------------------------------------------------------------------------- # +_C.MODEL.BACKBONE = CN() + +# The backbone conv body to use +# The string must match a function that is imported in modeling.model_builder +# (e.g., 'FPN.add_fpn_ResNet101_conv5_body' to specify a ResNet-101-FPN +# backbone) +_C.MODEL.BACKBONE.CONV_BODY = "R-50-C4" + +# Add StopGrad at a specified stage so the bottom layers are frozen +_C.MODEL.BACKBONE.FREEZE_CONV_BODY_AT = 2 +_C.MODEL.BACKBONE.OUT_CHANNELS = 256 * 4 + +# ---------------------------------------------------------------------------- # +# RPN options +# ---------------------------------------------------------------------------- # +_C.MODEL.RPN = CN() +_C.MODEL.RPN.USE_FPN = False +# Base RPN anchor sizes given in absolute pixels w.r.t. the scaled network input +_C.MODEL.RPN.ANCHOR_SIZES = (32, 64, 128, 256, 512) +# Stride of the feature map that RPN is attached. +# For FPN, number of strides should match number of scales +_C.MODEL.RPN.ANCHOR_STRIDE = (16,) +# RPN anchor aspect ratios +_C.MODEL.RPN.ASPECT_RATIOS = (0.5, 1.0, 2.0) +# Remove RPN anchors that go outside the image by RPN_STRADDLE_THRESH pixels +# Set to -1 or a large value, e.g. 100000, to disable pruning anchors +_C.MODEL.RPN.STRADDLE_THRESH = 0 +# Minimum overlap required between an anchor and ground-truth box for the +# (anchor, gt box) pair to be a positive example (IoU >= FG_IOU_THRESHOLD +# ==> positive RPN example) +_C.MODEL.RPN.FG_IOU_THRESHOLD = 0.7 +# Maximum overlap allowed between an anchor and ground-truth box for the +# (anchor, gt box) pair to be a negative examples (IoU < BG_IOU_THRESHOLD +# ==> negative RPN example) +_C.MODEL.RPN.BG_IOU_THRESHOLD = 0.3 +# Total number of RPN examples per image +_C.MODEL.RPN.BATCH_SIZE_PER_IMAGE = 256 +# Target fraction of foreground (positive) examples per RPN minibatch +_C.MODEL.RPN.POSITIVE_FRACTION = 0.5 +# Number of top scoring RPN proposals to keep before applying NMS +# When FPN is used, this is *per FPN level* (not total) +_C.MODEL.RPN.PRE_NMS_TOP_N_TRAIN = 12000 +_C.MODEL.RPN.PRE_NMS_TOP_N_TEST = 6000 +# Number of top scoring RPN proposals to keep after applying NMS +_C.MODEL.RPN.POST_NMS_TOP_N_TRAIN = 2000 +_C.MODEL.RPN.POST_NMS_TOP_N_TEST = 1000 +# NMS threshold used on RPN proposals +_C.MODEL.RPN.NMS_THRESH = 0.7 +# Proposal height and width both need to be greater than RPN_MIN_SIZE +# (a the scale used during training or inference) +_C.MODEL.RPN.MIN_SIZE = 0 +# Number of top scoring RPN proposals to keep after combining proposals from +# all FPN levels +_C.MODEL.RPN.FPN_POST_NMS_TOP_N_TRAIN = 2000 +_C.MODEL.RPN.FPN_POST_NMS_TOP_N_TEST = 2000 + +# ---------------------------------------------------------------------------- # +# ROI HEADS options +# ---------------------------------------------------------------------------- # +_C.MODEL.ROI_HEADS = CN() +_C.MODEL.ROI_HEADS.USE_FPN = False +# Overlap threshold for an RoI to be considered foreground (if >= FG_IOU_THRESHOLD) +_C.MODEL.ROI_HEADS.FG_IOU_THRESHOLD = 0.5 +# Overlap threshold for an RoI to be considered background +# (class = 0 if overlap in [0, BG_IOU_THRESHOLD)) +_C.MODEL.ROI_HEADS.BG_IOU_THRESHOLD = 0.5 +# Default weights on (dx, dy, dw, dh) for normalizing bbox regression targets +# These are empirically chosen to approximately lead to unit variance targets +_C.MODEL.ROI_HEADS.BBOX_REG_WEIGHTS = (10., 10., 5., 5.) +# RoI minibatch size *per image* (number of regions of interest [ROIs]) +# Total number of RoIs per training minibatch = +# TRAIN.BATCH_SIZE_PER_IM * TRAIN.IMS_PER_BATCH * NUM_GPUS +# E.g., a common configuration is: 512 * 2 * 8 = 8192 +_C.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512 +# Target fraction of RoI minibatch that is labeled foreground (i.e. class > 0) +_C.MODEL.ROI_HEADS.POSITIVE_FRACTION = 0.25 + +# Only used on test mode + +# Minimum score threshold (assuming scores in a [0, 1] range); a value chosen to +# balance obtaining high recall with not having too many low precision +# detections that will slow down inference post processing steps (like NMS) +_C.MODEL.ROI_HEADS.SCORE_THRESH = 0.05 +# Overlap threshold used for non-maximum suppression (suppress boxes with +# IoU >= this threshold) +_C.MODEL.ROI_HEADS.NMS = 0.5 +# Maximum number of detections to return per image (100 is based on the limit +# established for the COCO dataset) +_C.MODEL.ROI_HEADS.DETECTIONS_PER_IMG = 100 + +_C.MODEL.ROI_BOX_HEAD = CN() +_C.MODEL.ROI_BOX_HEAD.FEATURE_EXTRACTOR = "ResNet50Conv5ROIFeatureExtractor" +_C.MODEL.ROI_BOX_HEAD.PREDICTOR = "FastRCNNPredictor" +_C.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION = 14 +_C.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO = 0 +_C.MODEL.ROI_BOX_HEAD.POOLER_SCALES = (1.0 / 16,) +_C.MODEL.ROI_BOX_HEAD.NUM_CLASSES = 81 +# Hidden layer dimension when using an MLP for the RoI box head +_C.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM = 1024 + +_C.MODEL.ROI_MASK_HEAD = CN() +_C.MODEL.ROI_MASK_HEAD.FEATURE_EXTRACTOR = "ResNet50Conv5ROIFeatureExtractor" +_C.MODEL.ROI_MASK_HEAD.PREDICTOR = "MaskRCNNC4Predictor" +_C.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION = 14 +_C.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO = 0 +_C.MODEL.ROI_MASK_HEAD.POOLER_SCALES = (1.0 / 16,) +_C.MODEL.ROI_MASK_HEAD.MLP_HEAD_DIM = 1024 +_C.MODEL.ROI_MASK_HEAD.CONV_LAYERS = (256, 256, 256, 256) +_C.MODEL.ROI_MASK_HEAD.RESOLUTION = 14 +_C.MODEL.ROI_MASK_HEAD.SHARE_BOX_FEATURE_EXTRACTOR = True +_C.MODEL.ROI_MASK_HEAD.CANONICAL_LEVEL = 4 +# ---------------------------------------------------------------------------- # +# ResNe[X]t options (ResNets = {ResNet, ResNeXt} +# Note that parts of a resnet may be used for both the backbone and the head +# These options apply to both +# ---------------------------------------------------------------------------- # +_C.MODEL.RESNETS = CN() + +# Number of groups to use; 1 ==> ResNet; > 1 ==> ResNeXt +_C.MODEL.RESNETS.NUM_GROUPS = 1 + +# Baseline width of each group +_C.MODEL.RESNETS.WIDTH_PER_GROUP = 64 + +# Place the stride 2 conv on the 1x1 filter +# Use True only for the original MSRA ResNet; use False for C2 and Torch models +_C.MODEL.RESNETS.STRIDE_IN_1X1 = True + +# Residual transformation function +_C.MODEL.RESNETS.TRANS_FUNC = "BottleneckWithFixedBatchNorm" +# ResNet's stem function (conv1 and pool1) +_C.MODEL.RESNETS.STEM_FUNC = "StemWithFixedBatchNorm" + +# Apply dilation in stage "res5" +_C.MODEL.RESNETS.RES5_DILATION = 1 + +_C.MODEL.RESNETS.RES2_OUT_CHANNELS = 256 +_C.MODEL.RESNETS.STEM_OUT_CHANNELS = 64 + +# ---------------------------------------------------------------------------- # +# RetinaNet Options (Follow the Detectron version) +# ---------------------------------------------------------------------------- # +_C.RETINANET = CN() + +# RetinaNet is used (instead of Fast/er/Mask R-CNN/R-FCN/RPN) if True +_C.RETINANET.RETINANET_ON = False + +# This is the number of foreground classes, background is not included. +_C.RETINANET.NUM_CLASSES = 81 + +# Anchor aspect ratios to use +_C.RETINANET.ANCHOR_SIZES = (32, 64, 128, 256, 512) +_C.RETINANET.ASPECT_RATIOS = (0.5, 1.0, 2.0) +_C.RETINANET.ANCHOR_STRIDES = (8, 16, 32, 64, 128) +_C.RETINANET.STRADDLE_THRESH = 0 + +# Anchor scales per octave +_C.RETINANET.OCTAVE = 2.0 +_C.RETINANET.SCALES_PER_OCTAVE = 3 + +# Convolutions to use in the cls and bbox tower +# NOTE: this doesn't include the last conv for logits +_C.RETINANET.NUM_CONVS = 4 + +# Weight for bbox_regression loss +_C.RETINANET.BBOX_REG_WEIGHT = 1.0 + +# Smooth L1 loss beta for bbox regression +_C.RETINANET.BBOX_REG_BETA = 0.11 + +# Use Self-Adjust Smooth L1 Loss +_C.RETINANET.SELFADJUST_SMOOTH_L1 = False + +# During inference, #locs to select based on cls score before NMS is performed +# per FPN level +_C.RETINANET.PRE_NMS_TOP_N = 1000 + +# IoU overlap ratio for labeling an anchor as positive +# Anchors with >= iou overlap are labeled positive +_C.RETINANET.POSITIVE_OVERLAP = 0.5 + +# IoU overlap ratio for labeling an anchor as negative +# Anchors with < iou overlap are labeled negative +_C.RETINANET.NEGATIVE_OVERLAP = 0.4 + +# Focal loss parameter: alpha +_C.RETINANET.LOSS_ALPHA = 0.25 + +# Focal loss parameter: gamma +_C.RETINANET.LOSS_GAMMA = 2.0 + +# Prior prob for the positives at the beginning of training. This is used to set +# the bias init for the logits layer +_C.RETINANET.PRIOR_PROB = 0.01 + +# Whether classification and bbox branch tower should be shared or not +_C.RETINANET.SHARE_CLS_BBOX_TOWER = False + +# Use class specific bounding box regression instead of the default class +# agnostic regression +_C.RETINANET.CLASS_SPECIFIC_BBOX = False + +# Whether softmax should be used in classification branch training +_C.RETINANET.SOFTMAX = False + +# Inference cls score threshold, anchors with score > INFERENCE_TH are +# considered for inference +_C.RETINANET.INFERENCE_TH = 0.05 + +# "p3p7": Use feature p3p7 for object detection and p3-p5 for mask prediction. +# "p2p7": Use feature p3p7 for object detection and p2-p5 for mask prediction. +_C.RETINANET.BACKBONE = "p3p7" + +_C.RETINANET.NUM_MASKS_TEST = 50 + +_C.RETINANET.LOW_QUALITY_MATCHES = True +_C.RETINANET.LOW_QUALITY_THRESHOLD = 0.0 + +# ---------------------------------------------------------------------------- # +# SparseMask Options (Follow the Detectron version) +# ---------------------------------------------------------------------------- # +_C.MODEL.SPARSE_MASK_HEAD = CN() +_C.MODEL.SPARSE_MASK_HEAD.PREDICTOR = "" +_C.MODEL.SPARSE_MASK_HEAD.FEATURE_EXTRACTOR = "SparseMaskFPNFeatureExtractor" +_C.MODEL.SPARSE_MASK_HEAD.CONV_LAYERS = (256, 256, 256, 256) +_C.MODEL.SPARSE_MASK_HEAD.RESOLUTION = 14 + +# ---------------------------------------------------------------------------- # +# Solver +# ---------------------------------------------------------------------------- # +_C.SOLVER = CN() +_C.SOLVER.MAX_ITER = 40000 + +_C.SOLVER.BASE_LR = 0.001 +_C.SOLVER.BIAS_LR_FACTOR = 2 + +_C.SOLVER.MOMENTUM = 0.9 + +_C.SOLVER.WEIGHT_DECAY = 0.0005 +_C.SOLVER.WEIGHT_DECAY_BIAS = 0 + +_C.SOLVER.GAMMA = 0.1 +_C.SOLVER.STEPS = (30000,) + +_C.SOLVER.WARMUP_FACTOR = 1.0 / 3 +_C.SOLVER.WARMUP_ITERS = 500 +_C.SOLVER.WARMUP_METHOD = "linear" + +_C.SOLVER.CHECKPOINT_PERIOD = 2500 + +# Number of images per batch +# This is global, so if we have 8 GPUs and IMS_PER_BATCH = 16, each GPU will +# see 2 images per batch +_C.SOLVER.IMS_PER_BATCH = 16 + +# ---------------------------------------------------------------------------- # +# Specific test options +# ---------------------------------------------------------------------------- # +_C.TEST = CN() +_C.TEST.EXPECTED_RESULTS = [] +_C.TEST.EXPECTED_RESULTS_SIGMA_TOL = 4 +# Number of images per batch +# This is global, so if we have 8 GPUs and IMS_PER_BATCH = 16, each GPU will +# see 2 images per batch +_C.TEST.IMS_PER_BATCH = 8 + +_C.TEST.DETECTIONS_PER_IMG = 100 +# Misc options +# ---------------------------------------------------------------------------- # +_C.OUTPUT_DIR = "." + +_C.SEED = 42 +# Set to 0 to disable. +_C.AMP = True +# Optimize level, you can set O2 to enable fp16 training +_C.OPT_LEVEL = "O2" +# Adjust the loss during training +_C.LOSS_SCALE_VALUE = 64 +_C.PATHS_CATALOG = os.path.join(os.path.dirname(__file__), "paths_catalog.py") +_C.LOCAL_RANK = 0 +_C.DIST_BACKEND = 'hccl' +_C.DEVICE = 0 +_C.N_GPU = 8 diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/config/paths_catalog.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/config/paths_catalog.py new file mode 100644 index 0000000000..766399ebaa --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/config/paths_catalog.py @@ -0,0 +1,109 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +"""Centralized catalog of paths.""" + +import os + + +class DatasetCatalog(object): + DATA_DIR = "datasets" + DATASETS = { + "coco_test-dev": ( + "coco/test2017", + "coco/annotations/image_info_test-dev2017.json", + ), + "coco_2017_test": ( + "coco/test2017", + "coco/annotations/image_info_test2017.json", + ), + "coco_2017_train": ( + "coco/train2017", + "coco/annotations/instances_train2017.json", + ), + "coco_2017_val": ( + "coco/val2017", + "coco/annotations/instances_val2017.json", + ), + "coco_2014_train": ( + "coco/train2014", + "coco/annotations/instances_train2014.json", + ), + "coco_2014_val": ("coco/val2014", "coco/annotations/instances_val2014.json"), + "coco_2014_minival": ( + "coco/val2014", + "coco/annotations/instances_minival2014.json", + ), + "coco_2014_valminusminival": ( + "coco/val2014", + "coco/annotations/instances_valminusminival2014.json", + ), + } + + @staticmethod + def get(name): + if "coco" in name: + data_dir = DatasetCatalog.DATA_DIR + attrs = DatasetCatalog.DATASETS[name] + args = dict( + root=os.path.join(data_dir, attrs[0]), + ann_file=os.path.join(data_dir, attrs[1]), + ) + return dict( + factory="COCODataset", + args=args, + ) + raise RuntimeError("Dataset not available: {}".format(name)) + + +class ModelCatalog(object): + S3_C2_DETECTRON_URL = "https://dl.fbaipublicfiles.com/detectron" + C2_IMAGENET_MODELS = { + "MSRA/R-50": "ImageNetPretrained/MSRA/R-50.pkl", + "MSRA/R-101": "ImageNetPretrained/MSRA/R-101.pkl", + "FAIR/20171220/X-101-32x8d": "ImageNetPretrained/20171220/X-101-32x8d.pkl", + } + + C2_DETECTRON_SUFFIX = "output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl" + C2_DETECTRON_MODELS = { + "35857197/e2e_faster_rcnn_R-50-C4_1x": "01_33_49.iAX0mXvW", + "35857345/e2e_faster_rcnn_R-50-FPN_1x": "01_36_30.cUF7QR7I", + "35857890/e2e_faster_rcnn_R-101-FPN_1x": "01_38_50.sNxI7sX7", + "36761737/e2e_faster_rcnn_X-101-32x8d-FPN_1x": "06_31_39.5MIHi1fZ", + "35858791/e2e_mask_rcnn_R-50-C4_1x": "01_45_57.ZgkA7hPB", + "35858933/e2e_mask_rcnn_R-50-FPN_1x": "01_48_14.DzEQe4wC", + "35861795/e2e_mask_rcnn_R-101-FPN_1x": "02_31_37.KqyEK4tT", + "36761843/e2e_mask_rcnn_X-101-32x8d-FPN_1x": "06_35_59.RZotkLKI", + } + + @staticmethod + def get(name): + if name.startswith("Caffe2Detectron/COCO"): + return ModelCatalog.get_c2_detectron_12_2017_baselines(name) + if name.startswith("ImageNetPretrained"): + return ModelCatalog.get_c2_imagenet_pretrained(name) + raise RuntimeError("model not present in the catalog {}".format(name)) + + @staticmethod + def get_c2_imagenet_pretrained(name): + prefix = ModelCatalog.S3_C2_DETECTRON_URL + name = name[len("ImageNetPretrained/"):] + name = ModelCatalog.C2_IMAGENET_MODELS[name] + url = "/".join([prefix, name]) + return url + + @staticmethod + def get_c2_detectron_12_2017_baselines(name): + # Detectron C2 models are stored following the structure + # prefix//2012_2017_baselines/.yaml./suffix + # we use as identifiers in the catalog Caffe2Detectron/COCO// + prefix = ModelCatalog.S3_C2_DETECTRON_URL + suffix = ModelCatalog.C2_DETECTRON_SUFFIX + # remove identification prefix + name = name[len("Caffe2Detectron/COCO/"):] + # split in and + model_id, model_name = name.split("/") + # parsing to make it match the url address from the Caffe2 models + model_name = "{}.yaml".format(model_name) + signature = ModelCatalog.C2_DETECTRON_MODELS[name] + unique_name = ".".join([model_name, signature]) + url = "/".join([prefix, model_id, "12_2017_baselines", unique_name, suffix]) + return url diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/ROIAlign.h b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/ROIAlign.h new file mode 100644 index 0000000000..3907deab2a --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/ROIAlign.h @@ -0,0 +1,46 @@ +// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +#pragma once + +#include "cpu/vision.h" + +#ifdef WITH_CUDA +#include "cuda/vision.h" +#endif + +// Interface for Python +at::Tensor ROIAlign_forward(const at::Tensor& input, + const at::Tensor& rois, + const float spatial_scale, + const int pooled_height, + const int pooled_width, + const int sampling_ratio) { + if (input.type().is_cuda()) { +#ifdef WITH_CUDA + return ROIAlign_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio); +#else + AT_ERROR("Not compiled with GPU support"); +#endif + } + return ROIAlign_forward_cpu(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio); +} + +at::Tensor ROIAlign_backward(const at::Tensor& grad, + const at::Tensor& rois, + const float spatial_scale, + const int pooled_height, + const int pooled_width, + const int batch_size, + const int channels, + const int height, + const int width, + const int sampling_ratio) { + if (grad.type().is_cuda()) { +#ifdef WITH_CUDA + return ROIAlign_backward_cuda(grad, rois, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width, sampling_ratio); +#else + AT_ERROR("Not compiled with GPU support"); +#endif + } + AT_ERROR("Not implemented on the CPU"); +} + diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/ROIPool.h b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/ROIPool.h new file mode 100644 index 0000000000..200fd7390b --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/ROIPool.h @@ -0,0 +1,48 @@ +// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +#pragma once + +#include "cpu/vision.h" + +#ifdef WITH_CUDA +#include "cuda/vision.h" +#endif + + +std::tuple ROIPool_forward(const at::Tensor& input, + const at::Tensor& rois, + const float spatial_scale, + const int pooled_height, + const int pooled_width) { + if (input.type().is_cuda()) { +#ifdef WITH_CUDA + return ROIPool_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width); +#else + AT_ERROR("Not compiled with GPU support"); +#endif + } + AT_ERROR("Not implemented on the CPU"); +} + +at::Tensor ROIPool_backward(const at::Tensor& grad, + const at::Tensor& input, + const at::Tensor& rois, + const at::Tensor& argmax, + const float spatial_scale, + const int pooled_height, + const int pooled_width, + const int batch_size, + const int channels, + const int height, + const int width) { + if (grad.type().is_cuda()) { +#ifdef WITH_CUDA + return ROIPool_backward_cuda(grad, input, rois, argmax, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width); +#else + AT_ERROR("Not compiled with GPU support"); +#endif + } + AT_ERROR("Not implemented on the CPU"); +} + + + diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/SigmoidFocalLoss.h b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/SigmoidFocalLoss.h new file mode 100644 index 0000000000..308861e447 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/SigmoidFocalLoss.h @@ -0,0 +1,41 @@ +#pragma once + +#include "cpu/vision.h" + +#ifdef WITH_CUDA +#include "cuda/vision.h" +#endif + +// Interface for Python +at::Tensor SigmoidFocalLoss_forward( + const at::Tensor& logits, + const at::Tensor& targets, + const int num_classes, + const float gamma, + const float alpha) { + if (logits.type().is_cuda()) { +#ifdef WITH_CUDA + return SigmoidFocalLoss_forward_cuda(logits, targets, num_classes, gamma, alpha); +#else + AT_ERROR("Not compiled with GPU support"); +#endif + } + AT_ERROR("Not implemented on the CPU"); +} + +at::Tensor SigmoidFocalLoss_backward( + const at::Tensor& logits, + const at::Tensor& targets, + const at::Tensor& d_losses, + const int num_classes, + const float gamma, + const float alpha) { + if (logits.type().is_cuda()) { +#ifdef WITH_CUDA + return SigmoidFocalLoss_backward_cuda(logits, targets, d_losses, num_classes, gamma, alpha); +#else + AT_ERROR("Not compiled with GPU support"); +#endif + } + AT_ERROR("Not implemented on the CPU"); +} diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/cpu/ROIAlign_cpu.cpp b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/cpu/ROIAlign_cpu.cpp new file mode 100644 index 0000000000..d35aedf27e --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/cpu/ROIAlign_cpu.cpp @@ -0,0 +1,257 @@ +// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +#include "cpu/vision.h" + +// implementation taken from Caffe2 +template +struct PreCalc { + int pos1; + int pos2; + int pos3; + int pos4; + T w1; + T w2; + T w3; + T w4; +}; + +template +void pre_calc_for_bilinear_interpolate( + const int height, + const int width, + const int pooled_height, + const int pooled_width, + const int iy_upper, + const int ix_upper, + T roi_start_h, + T roi_start_w, + T bin_size_h, + T bin_size_w, + int roi_bin_grid_h, + int roi_bin_grid_w, + std::vector>& pre_calc) { + int pre_calc_index = 0; + for (int ph = 0; ph < pooled_height; ph++) { + for (int pw = 0; pw < pooled_width; pw++) { + for (int iy = 0; iy < iy_upper; iy++) { + const T yy = roi_start_h + ph * bin_size_h + + static_cast(iy + .5f) * bin_size_h / + static_cast(roi_bin_grid_h); // e.g., 0.5, 1.5 + for (int ix = 0; ix < ix_upper; ix++) { + const T xx = roi_start_w + pw * bin_size_w + + static_cast(ix + .5f) * bin_size_w / + static_cast(roi_bin_grid_w); + + T x = xx; + T y = yy; + // deal with: inverse elements are out of feature map boundary + if (y < -1.0 || y > height || x < -1.0 || x > width) { + // empty + PreCalc pc; + pc.pos1 = 0; + pc.pos2 = 0; + pc.pos3 = 0; + pc.pos4 = 0; + pc.w1 = 0; + pc.w2 = 0; + pc.w3 = 0; + pc.w4 = 0; + pre_calc[pre_calc_index] = pc; + pre_calc_index += 1; + continue; + } + + if (y <= 0) { + y = 0; + } + if (x <= 0) { + x = 0; + } + + int y_low = (int)y; + int x_low = (int)x; + int y_high; + int x_high; + + if (y_low >= height - 1) { + y_high = y_low = height - 1; + y = (T)y_low; + } else { + y_high = y_low + 1; + } + + if (x_low >= width - 1) { + x_high = x_low = width - 1; + x = (T)x_low; + } else { + x_high = x_low + 1; + } + + T ly = y - y_low; + T lx = x - x_low; + T hy = 1. - ly, hx = 1. - lx; + T w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx; + + // save weights and indeces + PreCalc pc; + pc.pos1 = y_low * width + x_low; + pc.pos2 = y_low * width + x_high; + pc.pos3 = y_high * width + x_low; + pc.pos4 = y_high * width + x_high; + pc.w1 = w1; + pc.w2 = w2; + pc.w3 = w3; + pc.w4 = w4; + pre_calc[pre_calc_index] = pc; + + pre_calc_index += 1; + } + } + } + } +} + +template +void ROIAlignForward_cpu_kernel( + const int nthreads, + const T* bottom_data, + const T& spatial_scale, + const int channels, + const int height, + const int width, + const int pooled_height, + const int pooled_width, + const int sampling_ratio, + const T* bottom_rois, + //int roi_cols, + T* top_data) { + //AT_ASSERT(roi_cols == 4 || roi_cols == 5); + int roi_cols = 5; + + int n_rois = nthreads / channels / pooled_width / pooled_height; + // (n, c, ph, pw) is an element in the pooled output + // can be parallelized using omp + // #pragma omp parallel for num_threads(32) + for (int n = 0; n < n_rois; n++) { + int index_n = n * channels * pooled_width * pooled_height; + + // roi could have 4 or 5 columns + const T* offset_bottom_rois = bottom_rois + n * roi_cols; + int roi_batch_ind = 0; + if (roi_cols == 5) { + roi_batch_ind = offset_bottom_rois[0]; + offset_bottom_rois++; + } + + // Do not using rounding; this implementation detail is critical + T roi_start_w = offset_bottom_rois[0] * spatial_scale; + T roi_start_h = offset_bottom_rois[1] * spatial_scale; + T roi_end_w = offset_bottom_rois[2] * spatial_scale; + T roi_end_h = offset_bottom_rois[3] * spatial_scale; + // T roi_start_w = round(offset_bottom_rois[0] * spatial_scale); + // T roi_start_h = round(offset_bottom_rois[1] * spatial_scale); + // T roi_end_w = round(offset_bottom_rois[2] * spatial_scale); + // T roi_end_h = round(offset_bottom_rois[3] * spatial_scale); + + // Force malformed ROIs to be 1x1 + T roi_width = std::max(roi_end_w - roi_start_w, (T)1.); + T roi_height = std::max(roi_end_h - roi_start_h, (T)1.); + T bin_size_h = static_cast(roi_height) / static_cast(pooled_height); + T bin_size_w = static_cast(roi_width) / static_cast(pooled_width); + + // We use roi_bin_grid to sample the grid and mimic integral + int roi_bin_grid_h = (sampling_ratio > 0) + ? sampling_ratio + : ceil(roi_height / pooled_height); // e.g., = 2 + int roi_bin_grid_w = + (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width); + + // We do average (integral) pooling inside a bin + const T count = roi_bin_grid_h * roi_bin_grid_w; // e.g. = 4 + + // we want to precalculate indeces and weights shared by all chanels, + // this is the key point of optimiation + std::vector> pre_calc( + roi_bin_grid_h * roi_bin_grid_w * pooled_width * pooled_height); + pre_calc_for_bilinear_interpolate( + height, + width, + pooled_height, + pooled_width, + roi_bin_grid_h, + roi_bin_grid_w, + roi_start_h, + roi_start_w, + bin_size_h, + bin_size_w, + roi_bin_grid_h, + roi_bin_grid_w, + pre_calc); + + for (int c = 0; c < channels; c++) { + int index_n_c = index_n + c * pooled_width * pooled_height; + const T* offset_bottom_data = + bottom_data + (roi_batch_ind * channels + c) * height * width; + int pre_calc_index = 0; + + for (int ph = 0; ph < pooled_height; ph++) { + for (int pw = 0; pw < pooled_width; pw++) { + int index = index_n_c + ph * pooled_width + pw; + + T output_val = 0.; + for (int iy = 0; iy < roi_bin_grid_h; iy++) { + for (int ix = 0; ix < roi_bin_grid_w; ix++) { + PreCalc pc = pre_calc[pre_calc_index]; + output_val += pc.w1 * offset_bottom_data[pc.pos1] + + pc.w2 * offset_bottom_data[pc.pos2] + + pc.w3 * offset_bottom_data[pc.pos3] + + pc.w4 * offset_bottom_data[pc.pos4]; + + pre_calc_index += 1; + } + } + output_val /= count; + + top_data[index] = output_val; + } // for pw + } // for ph + } // for c + } // for n +} + +at::Tensor ROIAlign_forward_cpu(const at::Tensor& input, + const at::Tensor& rois, + const float spatial_scale, + const int pooled_height, + const int pooled_width, + const int sampling_ratio) { + AT_ASSERTM(!input.type().is_cuda(), "input must be a CPU tensor"); + AT_ASSERTM(!rois.type().is_cuda(), "rois must be a CPU tensor"); + + auto num_rois = rois.size(0); + auto channels = input.size(1); + auto height = input.size(2); + auto width = input.size(3); + + auto output = at::empty({num_rois, channels, pooled_height, pooled_width}, input.options()); + auto output_size = num_rois * pooled_height * pooled_width * channels; + + if (output.numel() == 0) { + return output; + } + + AT_DISPATCH_FLOATING_TYPES(input.type(), "ROIAlign_forward", [&] { + ROIAlignForward_cpu_kernel( + output_size, + input.data(), + spatial_scale, + channels, + height, + width, + pooled_height, + pooled_width, + sampling_ratio, + rois.data(), + output.data()); + }); + return output; +} diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/cpu/nms_cpu.cpp.bak b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/cpu/nms_cpu.cpp.bak new file mode 100644 index 0000000000..551b6eb0bf --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/cpu/nms_cpu.cpp.bak @@ -0,0 +1,75 @@ +// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +#include "cpu/vision.h" + + +template +at::Tensor nms_cpu_kernel(const at::Tensor& dets, + const at::Tensor& scores, + const float threshold) { + AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor"); + AT_ASSERTM(!scores.type().is_cuda(), "scores must be a CPU tensor"); + AT_ASSERTM(dets.type() == scores.type(), "dets should have the same type as scores"); + + if (dets.numel() == 0) { + return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); + } + + auto x1_t = dets.select(1, 0).contiguous(); + auto y1_t = dets.select(1, 1).contiguous(); + auto x2_t = dets.select(1, 2).contiguous(); + auto y2_t = dets.select(1, 3).contiguous(); + + at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1); + + auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); + + auto ndets = dets.size(0); + at::Tensor suppressed_t = at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU)); + + auto suppressed = suppressed_t.data(); + auto order = order_t.data(); + auto x1 = x1_t.data(); + auto y1 = y1_t.data(); + auto x2 = x2_t.data(); + auto y2 = y2_t.data(); + auto areas = areas_t.data(); + + for (int64_t _i = 0; _i < ndets; _i++) { + auto i = order[_i]; + if (suppressed[i] == 1) + continue; + auto ix1 = x1[i]; + auto iy1 = y1[i]; + auto ix2 = x2[i]; + auto iy2 = y2[i]; + auto iarea = areas[i]; + + for (int64_t _j = _i + 1; _j < ndets; _j++) { + auto j = order[_j]; + if (suppressed[j] == 1) + continue; + auto xx1 = std::max(ix1, x1[j]); + auto yy1 = std::max(iy1, y1[j]); + auto xx2 = std::min(ix2, x2[j]); + auto yy2 = std::min(iy2, y2[j]); + + auto w = std::max(static_cast(0), xx2 - xx1 + 1); + auto h = std::max(static_cast(0), yy2 - yy1 + 1); + auto inter = w * h; + auto ovr = inter / (iarea + areas[j] - inter); + if (ovr >= threshold) + suppressed[j] = 1; + } + } + return at::nonzero(suppressed_t == 0).squeeze(1); +} + +at::Tensor nms_cpu(const at::Tensor& dets, + const at::Tensor& scores, + const float threshold) { + at::Tensor result; + AT_DISPATCH_FLOATING_TYPES(dets.type(), "nms", [&] { + result = nms_cpu_kernel(dets, scores, threshold); + }); + return result; +} \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/cpu/vision.h b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/cpu/vision.h new file mode 100644 index 0000000000..6cc112f975 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/cpu/vision.h @@ -0,0 +1,16 @@ +// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +#pragma once +#include + + +at::Tensor ROIAlign_forward_cpu(const at::Tensor& input, + const at::Tensor& rois, + const float spatial_scale, + const int pooled_height, + const int pooled_width, + const int sampling_ratio); + + +//at::Tensor nms_cpu(const at::Tensor& dets, +// const at::Tensor& scores, +// const float threshold); diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/cuda/ROIAlign_cuda.cu b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/cuda/ROIAlign_cuda.cu new file mode 100644 index 0000000000..5fe97ca906 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/cuda/ROIAlign_cuda.cu @@ -0,0 +1,346 @@ +// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +#include +#include + +#include +#include +#include + +// TODO make it in a common file +#define CUDA_1D_KERNEL_LOOP(i, n) \ + for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \ + i += blockDim.x * gridDim.x) + + +template +__device__ T bilinear_interpolate(const T* bottom_data, + const int height, const int width, + T y, T x, + const int index /* index for debug only*/) { + + // deal with cases that inverse elements are out of feature map boundary + if (y < -1.0 || y > height || x < -1.0 || x > width) { + //empty + return 0; + } + + if (y <= 0) y = 0; + if (x <= 0) x = 0; + + int y_low = (int) y; + int x_low = (int) x; + int y_high; + int x_high; + + if (y_low >= height - 1) { + y_high = y_low = height - 1; + y = (T) y_low; + } else { + y_high = y_low + 1; + } + + if (x_low >= width - 1) { + x_high = x_low = width - 1; + x = (T) x_low; + } else { + x_high = x_low + 1; + } + + T ly = y - y_low; + T lx = x - x_low; + T hy = 1. - ly, hx = 1. - lx; + // do bilinear interpolation + T v1 = bottom_data[y_low * width + x_low]; + T v2 = bottom_data[y_low * width + x_high]; + T v3 = bottom_data[y_high * width + x_low]; + T v4 = bottom_data[y_high * width + x_high]; + T w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx; + + T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); + + return val; +} + +template +__global__ void RoIAlignForward(const int nthreads, const T* bottom_data, + const T spatial_scale, const int channels, + const int height, const int width, + const int pooled_height, const int pooled_width, + const int sampling_ratio, + const T* bottom_rois, T* top_data) { + CUDA_1D_KERNEL_LOOP(index, nthreads) { + // (n, c, ph, pw) is an element in the pooled output + int pw = index % pooled_width; + int ph = (index / pooled_width) % pooled_height; + int c = (index / pooled_width / pooled_height) % channels; + int n = index / pooled_width / pooled_height / channels; + + const T* offset_bottom_rois = bottom_rois + n * 5; + int roi_batch_ind = offset_bottom_rois[0]; + + // Do not using rounding; this implementation detail is critical + T roi_start_w = offset_bottom_rois[1] * spatial_scale; + T roi_start_h = offset_bottom_rois[2] * spatial_scale; + T roi_end_w = offset_bottom_rois[3] * spatial_scale; + T roi_end_h = offset_bottom_rois[4] * spatial_scale; + // T roi_start_w = round(offset_bottom_rois[1] * spatial_scale); + // T roi_start_h = round(offset_bottom_rois[2] * spatial_scale); + // T roi_end_w = round(offset_bottom_rois[3] * spatial_scale); + // T roi_end_h = round(offset_bottom_rois[4] * spatial_scale); + + // Force malformed ROIs to be 1x1 + T roi_width = max(roi_end_w - roi_start_w, (T)1.); + T roi_height = max(roi_end_h - roi_start_h, (T)1.); + T bin_size_h = static_cast(roi_height) / static_cast(pooled_height); + T bin_size_w = static_cast(roi_width) / static_cast(pooled_width); + + const T* offset_bottom_data = bottom_data + (roi_batch_ind * channels + c) * height * width; + + // We use roi_bin_grid to sample the grid and mimic integral + int roi_bin_grid_h = (sampling_ratio > 0) ? sampling_ratio : ceil(roi_height / pooled_height); // e.g., = 2 + int roi_bin_grid_w = (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width); + + // We do average (integral) pooling inside a bin + const T count = roi_bin_grid_h * roi_bin_grid_w; // e.g. = 4 + + T output_val = 0.; + for (int iy = 0; iy < roi_bin_grid_h; iy ++) // e.g., iy = 0, 1 + { + const T y = roi_start_h + ph * bin_size_h + static_cast(iy + .5f) * bin_size_h / static_cast(roi_bin_grid_h); // e.g., 0.5, 1.5 + for (int ix = 0; ix < roi_bin_grid_w; ix ++) + { + const T x = roi_start_w + pw * bin_size_w + static_cast(ix + .5f) * bin_size_w / static_cast(roi_bin_grid_w); + + T val = bilinear_interpolate(offset_bottom_data, height, width, y, x, index); + output_val += val; + } + } + output_val /= count; + + top_data[index] = output_val; + } +} + + +template +__device__ void bilinear_interpolate_gradient( + const int height, const int width, + T y, T x, + T & w1, T & w2, T & w3, T & w4, + int & x_low, int & x_high, int & y_low, int & y_high, + const int index /* index for debug only*/) { + + // deal with cases that inverse elements are out of feature map boundary + if (y < -1.0 || y > height || x < -1.0 || x > width) { + //empty + w1 = w2 = w3 = w4 = 0.; + x_low = x_high = y_low = y_high = -1; + return; + } + + if (y <= 0) y = 0; + if (x <= 0) x = 0; + + y_low = (int) y; + x_low = (int) x; + + if (y_low >= height - 1) { + y_high = y_low = height - 1; + y = (T) y_low; + } else { + y_high = y_low + 1; + } + + if (x_low >= width - 1) { + x_high = x_low = width - 1; + x = (T) x_low; + } else { + x_high = x_low + 1; + } + + T ly = y - y_low; + T lx = x - x_low; + T hy = 1. - ly, hx = 1. - lx; + + // reference in forward + // T v1 = bottom_data[y_low * width + x_low]; + // T v2 = bottom_data[y_low * width + x_high]; + // T v3 = bottom_data[y_high * width + x_low]; + // T v4 = bottom_data[y_high * width + x_high]; + // T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); + + w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx; + + return; +} + +template +__global__ void RoIAlignBackwardFeature(const int nthreads, const T* top_diff, + const int num_rois, const T spatial_scale, + const int channels, const int height, const int width, + const int pooled_height, const int pooled_width, + const int sampling_ratio, + T* bottom_diff, + const T* bottom_rois) { + CUDA_1D_KERNEL_LOOP(index, nthreads) { + // (n, c, ph, pw) is an element in the pooled output + int pw = index % pooled_width; + int ph = (index / pooled_width) % pooled_height; + int c = (index / pooled_width / pooled_height) % channels; + int n = index / pooled_width / pooled_height / channels; + + const T* offset_bottom_rois = bottom_rois + n * 5; + int roi_batch_ind = offset_bottom_rois[0]; + + // Do not using rounding; this implementation detail is critical + T roi_start_w = offset_bottom_rois[1] * spatial_scale; + T roi_start_h = offset_bottom_rois[2] * spatial_scale; + T roi_end_w = offset_bottom_rois[3] * spatial_scale; + T roi_end_h = offset_bottom_rois[4] * spatial_scale; + // T roi_start_w = round(offset_bottom_rois[1] * spatial_scale); + // T roi_start_h = round(offset_bottom_rois[2] * spatial_scale); + // T roi_end_w = round(offset_bottom_rois[3] * spatial_scale); + // T roi_end_h = round(offset_bottom_rois[4] * spatial_scale); + + // Force malformed ROIs to be 1x1 + T roi_width = max(roi_end_w - roi_start_w, (T)1.); + T roi_height = max(roi_end_h - roi_start_h, (T)1.); + T bin_size_h = static_cast(roi_height) / static_cast(pooled_height); + T bin_size_w = static_cast(roi_width) / static_cast(pooled_width); + + T* offset_bottom_diff = bottom_diff + (roi_batch_ind * channels + c) * height * width; + + int top_offset = (n * channels + c) * pooled_height * pooled_width; + const T* offset_top_diff = top_diff + top_offset; + const T top_diff_this_bin = offset_top_diff[ph * pooled_width + pw]; + + // We use roi_bin_grid to sample the grid and mimic integral + int roi_bin_grid_h = (sampling_ratio > 0) ? sampling_ratio : ceil(roi_height / pooled_height); // e.g., = 2 + int roi_bin_grid_w = (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width); + + // We do average (integral) pooling inside a bin + const T count = roi_bin_grid_h * roi_bin_grid_w; // e.g. = 4 + + for (int iy = 0; iy < roi_bin_grid_h; iy ++) // e.g., iy = 0, 1 + { + const T y = roi_start_h + ph * bin_size_h + static_cast(iy + .5f) * bin_size_h / static_cast(roi_bin_grid_h); // e.g., 0.5, 1.5 + for (int ix = 0; ix < roi_bin_grid_w; ix ++) + { + const T x = roi_start_w + pw * bin_size_w + static_cast(ix + .5f) * bin_size_w / static_cast(roi_bin_grid_w); + + T w1, w2, w3, w4; + int x_low, x_high, y_low, y_high; + + bilinear_interpolate_gradient(height, width, y, x, + w1, w2, w3, w4, + x_low, x_high, y_low, y_high, + index); + + T g1 = top_diff_this_bin * w1 / count; + T g2 = top_diff_this_bin * w2 / count; + T g3 = top_diff_this_bin * w3 / count; + T g4 = top_diff_this_bin * w4 / count; + + if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) + { + atomicAdd(offset_bottom_diff + y_low * width + x_low, static_cast(g1)); + atomicAdd(offset_bottom_diff + y_low * width + x_high, static_cast(g2)); + atomicAdd(offset_bottom_diff + y_high * width + x_low, static_cast(g3)); + atomicAdd(offset_bottom_diff + y_high * width + x_high, static_cast(g4)); + } // if + } // ix + } // iy + } // CUDA_1D_KERNEL_LOOP +} // RoIAlignBackward + + +at::Tensor ROIAlign_forward_cuda(const at::Tensor& input, + const at::Tensor& rois, + const float spatial_scale, + const int pooled_height, + const int pooled_width, + const int sampling_ratio) { + AT_ASSERTM(input.type().is_cuda(), "input must be a CUDA tensor"); + AT_ASSERTM(rois.type().is_cuda(), "rois must be a CUDA tensor"); + + auto num_rois = rois.size(0); + auto channels = input.size(1); + auto height = input.size(2); + auto width = input.size(3); + + auto output = at::empty({num_rois, channels, pooled_height, pooled_width}, input.options()); + auto output_size = num_rois * pooled_height * pooled_width * channels; + cudaStream_t stream = at::cuda::getCurrentCUDAStream(); + + dim3 grid(std::min(THCCeilDiv(output_size, 512L), 4096L)); + dim3 block(512); + + if (output.numel() == 0) { + THCudaCheck(cudaGetLastError()); + return output; + } + + AT_DISPATCH_FLOATING_TYPES(input.type(), "ROIAlign_forward", [&] { + RoIAlignForward<<>>( + output_size, + input.contiguous().data(), + spatial_scale, + channels, + height, + width, + pooled_height, + pooled_width, + sampling_ratio, + rois.contiguous().data(), + output.data()); + }); + THCudaCheck(cudaGetLastError()); + return output; +} + +// TODO remove the dependency on input and use instead its sizes -> save memory +at::Tensor ROIAlign_backward_cuda(const at::Tensor& grad, + const at::Tensor& rois, + const float spatial_scale, + const int pooled_height, + const int pooled_width, + const int batch_size, + const int channels, + const int height, + const int width, + const int sampling_ratio) { + AT_ASSERTM(grad.type().is_cuda(), "grad must be a CUDA tensor"); + AT_ASSERTM(rois.type().is_cuda(), "rois must be a CUDA tensor"); + + auto num_rois = rois.size(0); + auto grad_input = at::zeros({batch_size, channels, height, width}, grad.options()); + + cudaStream_t stream = at::cuda::getCurrentCUDAStream(); + + dim3 grid(std::min(THCCeilDiv(grad.numel(), 512L), 4096L)); + dim3 block(512); + + // handle possibly empty gradients + if (grad.numel() == 0) { + THCudaCheck(cudaGetLastError()); + return grad_input; + } + + AT_DISPATCH_FLOATING_TYPES(grad.type(), "ROIAlign_backward", [&] { + RoIAlignBackwardFeature<<>>( + grad.numel(), + grad.contiguous().data(), + num_rois, + spatial_scale, + channels, + height, + width, + pooled_height, + pooled_width, + sampling_ratio, + grad_input.data(), + rois.contiguous().data()); + }); + THCudaCheck(cudaGetLastError()); + return grad_input; +} diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/cuda/ROIPool_cuda.cu b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/cuda/ROIPool_cuda.cu new file mode 100644 index 0000000000..b826dd9bc2 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/cuda/ROIPool_cuda.cu @@ -0,0 +1,202 @@ +// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +#include +#include + +#include +#include +#include + + +// TODO make it in a common file +#define CUDA_1D_KERNEL_LOOP(i, n) \ + for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \ + i += blockDim.x * gridDim.x) + + +template +__global__ void RoIPoolFForward(const int nthreads, const T* bottom_data, + const T spatial_scale, const int channels, const int height, + const int width, const int pooled_height, const int pooled_width, + const T* bottom_rois, T* top_data, int* argmax_data) { + CUDA_1D_KERNEL_LOOP(index, nthreads) { + // (n, c, ph, pw) is an element in the pooled output + int pw = index % pooled_width; + int ph = (index / pooled_width) % pooled_height; + int c = (index / pooled_width / pooled_height) % channels; + int n = index / pooled_width / pooled_height / channels; + + const T* offset_bottom_rois = bottom_rois + n * 5; + int roi_batch_ind = offset_bottom_rois[0]; + int roi_start_w = round(offset_bottom_rois[1] * spatial_scale); + int roi_start_h = round(offset_bottom_rois[2] * spatial_scale); + int roi_end_w = round(offset_bottom_rois[3] * spatial_scale); + int roi_end_h = round(offset_bottom_rois[4] * spatial_scale); + + // Force malformed ROIs to be 1x1 + int roi_width = max(roi_end_w - roi_start_w + 1, 1); + int roi_height = max(roi_end_h - roi_start_h + 1, 1); + T bin_size_h = static_cast(roi_height) + / static_cast(pooled_height); + T bin_size_w = static_cast(roi_width) + / static_cast(pooled_width); + + int hstart = static_cast(floor(static_cast(ph) + * bin_size_h)); + int wstart = static_cast(floor(static_cast(pw) + * bin_size_w)); + int hend = static_cast(ceil(static_cast(ph + 1) + * bin_size_h)); + int wend = static_cast(ceil(static_cast(pw + 1) + * bin_size_w)); + + // Add roi offsets and clip to input boundaries + hstart = min(max(hstart + roi_start_h, 0), height); + hend = min(max(hend + roi_start_h, 0), height); + wstart = min(max(wstart + roi_start_w, 0), width); + wend = min(max(wend + roi_start_w, 0), width); + bool is_empty = (hend <= hstart) || (wend <= wstart); + + // Define an empty pooling region to be zero + T maxval = is_empty ? 0 : -FLT_MAX; + // If nothing is pooled, argmax = -1 causes nothing to be backprop'd + int maxidx = -1; + const T* offset_bottom_data = + bottom_data + (roi_batch_ind * channels + c) * height * width; + for (int h = hstart; h < hend; ++h) { + for (int w = wstart; w < wend; ++w) { + int bottom_index = h * width + w; + if (offset_bottom_data[bottom_index] > maxval) { + maxval = offset_bottom_data[bottom_index]; + maxidx = bottom_index; + } + } + } + top_data[index] = maxval; + argmax_data[index] = maxidx; + } +} + +template +__global__ void RoIPoolFBackward(const int nthreads, const T* top_diff, + const int* argmax_data, const int num_rois, const T spatial_scale, + const int channels, const int height, const int width, + const int pooled_height, const int pooled_width, T* bottom_diff, + const T* bottom_rois) { + CUDA_1D_KERNEL_LOOP(index, nthreads) { + // (n, c, ph, pw) is an element in the pooled output + int pw = index % pooled_width; + int ph = (index / pooled_width) % pooled_height; + int c = (index / pooled_width / pooled_height) % channels; + int n = index / pooled_width / pooled_height / channels; + + const T* offset_bottom_rois = bottom_rois + n * 5; + int roi_batch_ind = offset_bottom_rois[0]; + int bottom_offset = (roi_batch_ind * channels + c) * height * width; + int top_offset = (n * channels + c) * pooled_height * pooled_width; + const T* offset_top_diff = top_diff + top_offset; + T* offset_bottom_diff = bottom_diff + bottom_offset; + const int* offset_argmax_data = argmax_data + top_offset; + + int argmax = offset_argmax_data[ph * pooled_width + pw]; + if (argmax != -1) { + atomicAdd( + offset_bottom_diff + argmax, + static_cast(offset_top_diff[ph * pooled_width + pw])); + + } + } +} + +std::tuple ROIPool_forward_cuda(const at::Tensor& input, + const at::Tensor& rois, + const float spatial_scale, + const int pooled_height, + const int pooled_width) { + AT_ASSERTM(input.type().is_cuda(), "input must be a CUDA tensor"); + AT_ASSERTM(rois.type().is_cuda(), "rois must be a CUDA tensor"); + + auto num_rois = rois.size(0); + auto channels = input.size(1); + auto height = input.size(2); + auto width = input.size(3); + + auto output = at::empty({num_rois, channels, pooled_height, pooled_width}, input.options()); + auto output_size = num_rois * pooled_height * pooled_width * channels; + auto argmax = at::zeros({num_rois, channels, pooled_height, pooled_width}, input.options().dtype(at::kInt)); + + cudaStream_t stream = at::cuda::getCurrentCUDAStream(); + + dim3 grid(std::min(THCCeilDiv(output_size, 512L), 4096L)); + dim3 block(512); + + if (output.numel() == 0) { + THCudaCheck(cudaGetLastError()); + return std::make_tuple(output, argmax); + } + + AT_DISPATCH_FLOATING_TYPES(input.type(), "ROIPool_forward", [&] { + RoIPoolFForward<<>>( + output_size, + input.contiguous().data(), + spatial_scale, + channels, + height, + width, + pooled_height, + pooled_width, + rois.contiguous().data(), + output.data(), + argmax.data()); + }); + THCudaCheck(cudaGetLastError()); + return std::make_tuple(output, argmax); +} + +// TODO remove the dependency on input and use instead its sizes -> save memory +at::Tensor ROIPool_backward_cuda(const at::Tensor& grad, + const at::Tensor& input, + const at::Tensor& rois, + const at::Tensor& argmax, + const float spatial_scale, + const int pooled_height, + const int pooled_width, + const int batch_size, + const int channels, + const int height, + const int width) { + AT_ASSERTM(grad.type().is_cuda(), "grad must be a CUDA tensor"); + AT_ASSERTM(rois.type().is_cuda(), "rois must be a CUDA tensor"); + // TODO add more checks + + auto num_rois = rois.size(0); + auto grad_input = at::zeros({batch_size, channels, height, width}, grad.options()); + + cudaStream_t stream = at::cuda::getCurrentCUDAStream(); + + dim3 grid(std::min(THCCeilDiv(grad.numel(), 512L), 4096L)); + dim3 block(512); + + // handle possibly empty gradients + if (grad.numel() == 0) { + THCudaCheck(cudaGetLastError()); + return grad_input; + } + + AT_DISPATCH_FLOATING_TYPES(grad.type(), "ROIPool_backward", [&] { + RoIPoolFBackward<<>>( + grad.numel(), + grad.contiguous().data(), + argmax.data(), + num_rois, + spatial_scale, + channels, + height, + width, + pooled_height, + pooled_width, + grad_input.data(), + rois.contiguous().data()); + }); + THCudaCheck(cudaGetLastError()); + return grad_input; +} diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/cuda/SigmoidFocalLoss_cuda.cu b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/cuda/SigmoidFocalLoss_cuda.cu new file mode 100644 index 0000000000..7d40767bbb --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/cuda/SigmoidFocalLoss_cuda.cu @@ -0,0 +1,188 @@ +// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +// This file is modified from https://github.com/pytorch/pytorch/blob/master/modules/detectron/sigmoid_focal_loss_op.cu +// Cheng-Yang Fu +// cyfu@cs.unc.edu +#include +#include + +#include +#include +#include + +#include + +// TODO make it in a common file +#define CUDA_1D_KERNEL_LOOP(i, n) \ + for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \ + i += blockDim.x * gridDim.x) + + +template +__global__ void SigmoidFocalLossForward(const int nthreads, + const T* logits, + const int* targets, + const int num_classes, + const float gamma, + const float alpha, + const int num, + T* losses) { + CUDA_1D_KERNEL_LOOP(i, nthreads) { + + int n = i / num_classes; + int d = i % num_classes; // current class[0~79]; + int t = targets[n]; // target class [1~80]; + + // Decide it is positive or negative case. + T c1 = (t == (d+1)); + T c2 = (t>=0 & t != (d+1)); + + T zn = (1.0 - alpha); + T zp = (alpha); + + // p = 1. / 1. + expf(-x); p = sigmoid(x) + T p = 1. / (1. + expf(-logits[i])); + + // (1-p)**gamma * log(p) where + T term1 = powf((1. - p), gamma) * logf(max(p, FLT_MIN)); + + // p**gamma * log(1-p) + T term2 = powf(p, gamma) * + (-1. * logits[i] * (logits[i] >= 0) - + logf(1. + expf(logits[i] - 2. * logits[i] * (logits[i] >= 0)))); + + losses[i] = 0.0; + losses[i] += -c1 * term1 * zp; + losses[i] += -c2 * term2 * zn; + + } // CUDA_1D_KERNEL_LOOP +} // SigmoidFocalLossForward + + +template +__global__ void SigmoidFocalLossBackward(const int nthreads, + const T* logits, + const int* targets, + const T* d_losses, + const int num_classes, + const float gamma, + const float alpha, + const int num, + T* d_logits) { + CUDA_1D_KERNEL_LOOP(i, nthreads) { + + int n = i / num_classes; + int d = i % num_classes; // current class[0~79]; + int t = targets[n]; // target class [1~80], 0 is background; + + // Decide it is positive or negative case. + T c1 = (t == (d+1)); + T c2 = (t>=0 & t != (d+1)); + + T zn = (1.0 - alpha); + T zp = (alpha); + // p = 1. / 1. + expf(-x); p = sigmoid(x) + T p = 1. / (1. + expf(-logits[i])); + + // (1-p)**g * (1 - p - g*p*log(p) + T term1 = powf((1. - p), gamma) * + (1. - p - (p * gamma * logf(max(p, FLT_MIN)))); + + // (p**g) * (g*(1-p)*log(1-p) - p) + T term2 = powf(p, gamma) * + ((-1. * logits[i] * (logits[i] >= 0) - + logf(1. + expf(logits[i] - 2. * logits[i] * (logits[i] >= 0)))) * + (1. - p) * gamma - p); + d_logits[i] = 0.0; + d_logits[i] += -c1 * term1 * zp; + d_logits[i] += -c2 * term2 * zn; + d_logits[i] = d_logits[i] * d_losses[i]; + + } // CUDA_1D_KERNEL_LOOP +} // SigmoidFocalLossBackward + + +at::Tensor SigmoidFocalLoss_forward_cuda( + const at::Tensor& logits, + const at::Tensor& targets, + const int num_classes, + const float gamma, + const float alpha) { + AT_ASSERTM(logits.type().is_cuda(), "logits must be a CUDA tensor"); + AT_ASSERTM(targets.type().is_cuda(), "targets must be a CUDA tensor"); + AT_ASSERTM(logits.dim() == 2, "logits should be NxClass"); + + const int num_samples = logits.size(0); + + auto losses = at::empty({num_samples, logits.size(1)}, logits.options()); + auto losses_size = num_samples * logits.size(1); + cudaStream_t stream = at::cuda::getCurrentCUDAStream(); + + dim3 grid(std::min(THCCeilDiv(losses_size, 512L), 4096L)); + dim3 block(512); + + if (losses.numel() == 0) { + THCudaCheck(cudaGetLastError()); + return losses; + } + + AT_DISPATCH_FLOATING_TYPES(logits.type(), "SigmoidFocalLoss_forward", [&] { + SigmoidFocalLossForward<<>>( + losses_size, + logits.contiguous().data(), + targets.contiguous().data(), + num_classes, + gamma, + alpha, + num_samples, + losses.data()); + }); + THCudaCheck(cudaGetLastError()); + return losses; +} + + +at::Tensor SigmoidFocalLoss_backward_cuda( + const at::Tensor& logits, + const at::Tensor& targets, + const at::Tensor& d_losses, + const int num_classes, + const float gamma, + const float alpha) { + AT_ASSERTM(logits.type().is_cuda(), "logits must be a CUDA tensor"); + AT_ASSERTM(targets.type().is_cuda(), "targets must be a CUDA tensor"); + AT_ASSERTM(d_losses.type().is_cuda(), "d_losses must be a CUDA tensor"); + + AT_ASSERTM(logits.dim() == 2, "logits should be NxClass"); + + const int num_samples = logits.size(0); + AT_ASSERTM(logits.size(1) == num_classes, "logits.size(1) should be num_classes"); + + auto d_logits = at::zeros({num_samples, num_classes}, logits.options()); + auto d_logits_size = num_samples * logits.size(1); + cudaStream_t stream = at::cuda::getCurrentCUDAStream(); + + dim3 grid(std::min(THCCeilDiv(d_logits_size, 512L), 4096L)); + dim3 block(512); + + if (d_logits.numel() == 0) { + THCudaCheck(cudaGetLastError()); + return d_logits; + } + + AT_DISPATCH_FLOATING_TYPES(logits.type(), "SigmoidFocalLoss_backward", [&] { + SigmoidFocalLossBackward<<>>( + d_logits_size, + logits.contiguous().data(), + targets.contiguous().data(), + d_losses.contiguous().data(), + num_classes, + gamma, + alpha, + num_samples, + d_logits.data()); + }); + + THCudaCheck(cudaGetLastError()); + return d_logits; +} + diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/cuda/nms.cu b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/cuda/nms.cu new file mode 100644 index 0000000000..d7ccf79b0d --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/cuda/nms.cu @@ -0,0 +1,128 @@ +// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +#include +#include + +#include +#include + +#include +#include + +int const threadsPerBlock = sizeof(unsigned long long) * 8; + +__device__ inline float devIoU(float const * const a, float const * const b) { + float left = max(a[0], b[0]), right = min(a[2], b[2]); + float top = max(a[1], b[1]), bottom = min(a[3], b[3]); + float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f); + float interS = width * height; + float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); + float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); + return interS / (Sa + Sb - interS); +} + +__global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, + const float *dev_boxes, unsigned long long *dev_mask) { + const int row_start = blockIdx.y; + const int col_start = blockIdx.x; + + // if (row_start > col_start) return; + + const int row_size = + min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); + const int col_size = + min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); + + __shared__ float block_boxes[threadsPerBlock * 5]; + if (threadIdx.x < col_size) { + block_boxes[threadIdx.x * 5 + 0] = + dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; + block_boxes[threadIdx.x * 5 + 1] = + dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; + block_boxes[threadIdx.x * 5 + 2] = + dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; + block_boxes[threadIdx.x * 5 + 3] = + dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; + block_boxes[threadIdx.x * 5 + 4] = + dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; + } + __syncthreads(); + + if (threadIdx.x < row_size) { + const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; + const float *cur_box = dev_boxes + cur_box_idx * 5; + int i = 0; + unsigned long long t = 0; + int start = 0; + if (row_start == col_start) { + start = threadIdx.x + 1; + } + for (i = start; i < col_size; i++) { + if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { + t |= 1ULL << i; + } + } + const int col_blocks = THCCeilDiv(n_boxes, threadsPerBlock); + dev_mask[cur_box_idx * col_blocks + col_start] = t; + } +} + +// boxes is a N x 5 tensor +at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh) { + using scalar_t = float; + AT_ASSERTM(boxes.type().is_cuda(), "boxes must be a CUDA tensor"); + auto scores = boxes.select(1, 4); + auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); + auto boxes_sorted = boxes.index_select(0, order_t); + + int boxes_num = boxes.size(0); + + const int col_blocks = THCCeilDiv(boxes_num, threadsPerBlock); + + scalar_t* boxes_dev = boxes_sorted.data(); + + THCState *state = at::globalContext().lazyInitCUDA(); // TODO replace with getTHCState + + unsigned long long* mask_dev = NULL; + //THCudaCheck(THCudaMalloc(state, (void**) &mask_dev, + // boxes_num * col_blocks * sizeof(unsigned long long))); + + mask_dev = (unsigned long long*) THCudaMalloc(state, boxes_num * col_blocks * sizeof(unsigned long long)); + + dim3 blocks(THCCeilDiv(boxes_num, threadsPerBlock), + THCCeilDiv(boxes_num, threadsPerBlock)); + dim3 threads(threadsPerBlock); + nms_kernel<<>>(boxes_num, + nms_overlap_thresh, + boxes_dev, + mask_dev); + + std::vector mask_host(boxes_num * col_blocks); + THCudaCheck(cudaMemcpy(&mask_host[0], + mask_dev, + sizeof(unsigned long long) * boxes_num * col_blocks, + cudaMemcpyDeviceToHost)); + + std::vector remv(col_blocks); + memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); + + at::Tensor keep = at::empty({boxes_num}, boxes.options().dtype(at::kLong).device(at::kCPU)); + int64_t* keep_out = keep.data(); + + int num_to_keep = 0; + for (int i = 0; i < boxes_num; i++) { + int nblock = i / threadsPerBlock; + int inblock = i % threadsPerBlock; + + if (!(remv[nblock] & (1ULL << inblock))) { + keep_out[num_to_keep++] = i; + unsigned long long *p = &mask_host[0] + i * col_blocks; + for (int j = nblock; j < col_blocks; j++) { + remv[j] |= p[j]; + } + } + } + + THCudaFree(state, mask_dev); + // TODO improve this part + return std::get<0>(order_t.index({keep.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep)}).sort(0, false)); +} diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/cuda/vision.h b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/cuda/vision.h new file mode 100644 index 0000000000..6d9f8871f7 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/cuda/vision.h @@ -0,0 +1,63 @@ +// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +#pragma once +#include + + +at::Tensor SigmoidFocalLoss_forward_cuda( + const at::Tensor& logits, + const at::Tensor& targets, + const int num_classes, + const float gamma, + const float alpha); + +at::Tensor SigmoidFocalLoss_backward_cuda( + const at::Tensor& logits, + const at::Tensor& targets, + const at::Tensor& d_losses, + const int num_classes, + const float gamma, + const float alpha); + +at::Tensor ROIAlign_forward_cuda(const at::Tensor& input, + const at::Tensor& rois, + const float spatial_scale, + const int pooled_height, + const int pooled_width, + const int sampling_ratio); + +at::Tensor ROIAlign_backward_cuda(const at::Tensor& grad, + const at::Tensor& rois, + const float spatial_scale, + const int pooled_height, + const int pooled_width, + const int batch_size, + const int channels, + const int height, + const int width, + const int sampling_ratio); + + +std::tuple ROIPool_forward_cuda(const at::Tensor& input, + const at::Tensor& rois, + const float spatial_scale, + const int pooled_height, + const int pooled_width); + +at::Tensor ROIPool_backward_cuda(const at::Tensor& grad, + const at::Tensor& input, + const at::Tensor& rois, + const at::Tensor& argmax, + const float spatial_scale, + const int pooled_height, + const int pooled_width, + const int batch_size, + const int channels, + const int height, + const int width); + +at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh); + + +at::Tensor compute_flow_cuda(const at::Tensor& boxes, + const int height, + const int width); diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/nms.h.bak b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/nms.h.bak new file mode 100644 index 0000000000..312fed4a7c --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/nms.h.bak @@ -0,0 +1,28 @@ +// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +#pragma once +#include "cpu/vision.h" + +#ifdef WITH_CUDA +#include "cuda/vision.h" +#endif + + +at::Tensor nms(const at::Tensor& dets, + const at::Tensor& scores, + const float threshold) { + + if (dets.type().is_cuda()) { +#ifdef WITH_CUDA + // TODO raise error if not compiled with CUDA + if (dets.numel() == 0) + return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); + auto b = at::cat({dets, scores.unsqueeze(1)}, 1); + return nms_cuda(b, threshold); +#else + AT_ERROR("Not compiled with GPU support"); +#endif + } + + at::Tensor result = nms_cpu(dets, scores, threshold); + return result; +} diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/vision.cpp b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/vision.cpp new file mode 100644 index 0000000000..fbd5613273 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/vision.cpp @@ -0,0 +1,15 @@ +// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +//#include "nms.h" +#include "ROIAlign.h" +#include "ROIPool.h" +#include "SigmoidFocalLoss.h" + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { +// m.def("nms", &nms, "non-maximum suppression"); + m.def("roi_align_forward", &ROIAlign_forward, "ROIAlign_forward"); + m.def("roi_align_backward", &ROIAlign_backward, "ROIAlign_backward"); + m.def("roi_pool_forward", &ROIPool_forward, "ROIPool_forward"); + m.def("roi_pool_backward", &ROIPool_backward, "ROIPool_backward"); + m.def("sigmoid_focalloss_forward", &SigmoidFocalLoss_forward, "SigmoidFocalLoss_forward"); + m.def("sigmoid_focalloss_backward", &SigmoidFocalLoss_backward, "SigmoidFocalLoss_backward"); +} diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/data/__init__.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/data/__init__.py new file mode 100644 index 0000000000..2ba1e52473 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/data/__init__.py @@ -0,0 +1,2 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +from .build import make_data_loader diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/data/build.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/data/build.py new file mode 100644 index 0000000000..ec85ff43bc --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/data/build.py @@ -0,0 +1,165 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +import bisect +import copy +import logging +import torch.utils.data + +from maskrcnn_benchmark.utils.imports import import_file +from . import datasets as D +from . import samplers +from .collate_batch import BatchCollator +from .transforms import build_transforms + + +def build_dataset(dataset_list, transforms, dataset_catalog, is_train=True): + """ + Arguments: + dataset_list (list[str]): Contains the names of the datasets, i.e., + coco_2014_trian, coco_2014_val, etc + transforms (callable): transforms to apply to each (image, target) sample + dataset_catalog (DatasetCatalog): contains the information on how to + construct a dataset. + is_train (bool): whether to setup the dataset for training or testing + """ + if not isinstance(dataset_list, (list, tuple)): + raise RuntimeError( + "dataset_list should be a list of strings, got {}".format(dataset_list)) + datasets = [] + for dataset_name in dataset_list: + data = dataset_catalog.get(dataset_name) + factory = getattr(D, data["factory"]) + args = data["args"] + # for COCODataset, we want to remove images without annotations + # during training + if data["factory"] == "COCODataset": + args["remove_images_without_annotations"] = is_train + args["transforms"] = transforms + # make dataset from factory + dataset = factory(**args) + datasets.append(dataset) + + # for testing, return a list of datasets + if not is_train: + return datasets + + # for training, concatenate all datasets into a single one + dataset = datasets[0] + if len(datasets) > 1: + dataset = D.ConcatDataset(datasets) + + return [dataset] + + +def make_data_sampler(dataset, shuffle, distributed): + if distributed: + return samplers.DistributedSampler(dataset, shuffle=shuffle) + if shuffle: + sampler = torch.utils.data.sampler.RandomSampler(dataset) + else: + sampler = torch.utils.data.sampler.SequentialSampler(dataset) + return sampler + + +def _quantize(x, bins): + bins = copy.copy(bins) + bins = sorted(bins) + quantized = list(map(lambda y: bisect.bisect_right(bins, y), x)) + return quantized + + +def _compute_aspect_ratios(dataset): + aspect_ratios = [] + for i in range(len(dataset)): + img_info = dataset.get_img_info(i) + aspect_ratio = float(img_info["height"]) / float(img_info["width"]) + aspect_ratios.append(aspect_ratio) + return aspect_ratios + + +def make_batch_data_sampler( + dataset, sampler, aspect_grouping, images_per_batch, num_iters=None, start_iter=0 +): + if aspect_grouping: + if not isinstance(aspect_grouping, (list, tuple)): + aspect_grouping = [aspect_grouping] + aspect_ratios = _compute_aspect_ratios(dataset) + group_ids = _quantize(aspect_ratios, aspect_grouping) + batch_sampler = samplers.GroupedBatchSampler( + sampler, group_ids, images_per_batch, drop_uneven=True + ) + else: + batch_sampler = torch.utils.data.sampler.BatchSampler( + sampler, images_per_batch, drop_last=True + ) + if num_iters is not None: + batch_sampler = samplers.IterationBasedBatchSampler(batch_sampler, num_iters, start_iter) + return batch_sampler + + +def make_data_loader(cfg, is_train=True, is_distributed=False, start_iter=0): + num_gpus = cfg.N_GPU + if is_train: + images_per_batch = cfg.SOLVER.IMS_PER_BATCH + assert (images_per_batch % num_gpus == 0), \ + "SOLVER.IMS_PER_BATCH ({}) must be divisible by the number of GPUs ({}) used.".format(images_per_batch, num_gpus) + images_per_gpu = images_per_batch // num_gpus + shuffle = True + num_iters = cfg.SOLVER.MAX_ITER + else: + images_per_batch = cfg.TEST.IMS_PER_BATCH + assert (images_per_batch % num_gpus == 0), \ + "TEST.IMS_PER_BATCH ({}) must be divisible by the number of GPUs ({}) used.".format(images_per_batch, num_gpus) + images_per_gpu = images_per_batch // num_gpus + shuffle = False if not is_distributed else True + num_iters = None + start_iter = 0 + + if images_per_gpu > 1: + logger = logging.getLogger(__name__) + logger.warning( + "When using more than one image per GPU you may encounter " + "an out-of-memory (OOM) error if your GPU does not have " + "sufficient memory. If this happens, you can reduce " + "SOLVER.IMS_PER_BATCH (for training) or " + "TEST.IMS_PER_BATCH (for inference). For training, you must " + "also adjust the learning rate and schedule length according " + "to the linear scaling rule. See for example: " + "https://github.com/facebookresearch/Detectron/blob/master/configs/getting_started/tutorial_1gpu_e2e_faster_rcnn_R-50-FPN.yaml#L14" + ) + + # group images which have similar aspect ratio. In this case, we only + # group in two cases: those with width / height > 1, and the other way around, + # but the code supports more general grouping strategy + aspect_grouping = [1] if cfg.DATALOADER.ASPECT_RATIO_GROUPING else [] + + paths_catalog = import_file( + "maskrcnn_benchmark.config.paths_catalog", cfg.PATHS_CATALOG, True + ) + DatasetCatalog = paths_catalog.DatasetCatalog + DatasetCatalog.DATA_DIR = cfg.DATASETS.DATA_DIR + dataset_list = cfg.DATASETS.TRAIN if is_train else cfg.DATASETS.TEST + + transforms = build_transforms(cfg, is_train) + datasets = build_dataset(dataset_list, transforms, DatasetCatalog, is_train) + + data_loaders = [] + for dataset in datasets: + sampler = make_data_sampler(dataset, shuffle, is_distributed) + batch_sampler = make_batch_data_sampler( + dataset, sampler, aspect_grouping, images_per_gpu, num_iters, start_iter + ) + collator = BatchCollator(cfg.DATALOADER.SIZE_DIVISIBILITY) + num_workers = cfg.DATALOADER.NUM_WORKERS + data_loader = torch.utils.data.DataLoader( + dataset, + num_workers=2, + batch_sampler=batch_sampler, + collate_fn=collator, + pin_memory=True, + ) + data_loaders.append(data_loader) + if is_train: + # during training, a single (possibly concatenated) data_loader is returned + assert len(data_loaders) == 1 + return data_loaders[0] + return data_loaders diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/data/collate_batch.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/data/collate_batch.py new file mode 100644 index 0000000000..a7f0341674 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/data/collate_batch.py @@ -0,0 +1,20 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +from maskrcnn_benchmark.structures.image_list import to_image_list + + +class BatchCollator(object): + """ + From a list of samples from the dataset, + returns the batched images and targets. + This should be passed to the DataLoader + """ + + def __init__(self, size_divisible=0): + self.size_divisible = size_divisible + + def __call__(self, batch): + transposed_batch = list(zip(*batch)) + images = to_image_list(transposed_batch[0], self.size_divisible) + targets = transposed_batch[1] + img_ids = transposed_batch[2] + return images, targets, img_ids diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/data/datasets/__init__.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/data/datasets/__init__.py new file mode 100644 index 0000000000..7f2692e4bc --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/data/datasets/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +from .coco import COCODataset +from .concat_dataset import ConcatDataset + +__all__ = ["COCODataset", "ConcatDataset"] diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/data/datasets/coco.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/data/datasets/coco.py new file mode 100644 index 0000000000..fbfdf641bf --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/data/datasets/coco.py @@ -0,0 +1,84 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +import torch +import torchvision +import numpy as np +import cv2 +import os +from maskrcnn_benchmark.structures.bounding_box import BoxList +from maskrcnn_benchmark.structures.segmentation_mask import SegmentationMask + + +class COCODataset(torchvision.datasets.coco.CocoDetection): + def __init__( + self, ann_file, root, remove_images_without_annotations, transforms=None + ): + super(COCODataset, self).__init__(root, ann_file) + + # sort indices for reproducible results + self.ids = sorted(self.ids) + + # filter images without detection annotations + if remove_images_without_annotations: + self.ids = [ + img_id + for img_id in self.ids + if len(self.coco.getAnnIds(imgIds=img_id, iscrowd=None)) > 0 + ] + + self.json_category_id_to_contiguous_id = { + v: i + 1 for i, v in enumerate(self.coco.getCatIds()) + } + self.contiguous_category_id_to_json_id = { + v: k for k, v in self.json_category_id_to_contiguous_id.items() + } + self.id_to_img_map = {k: v for k, v in enumerate(self.ids)} + self.transforms = transforms + + def __getitem__(self, idx): + img, anno = super(COCODataset, self).__getitem__(idx) + + # filter crowd annotations + # TODO might be better to add an extra field + anno = [obj for obj in anno if obj["iscrowd"] == 0] + + boxes = [obj["bbox"] for obj in anno] + boxes = torch.as_tensor(boxes).reshape(-1, 4) # guard against no boxes + target = BoxList(boxes, img.size, mode="xywh").convert("xyxy") + + classes = [obj["category_id"] for obj in anno] + classes = [self.json_category_id_to_contiguous_id[c] for c in classes] + classes = torch.tensor(classes) + target.add_field("labels", classes) + + masks = [obj["segmentation"] for obj in anno] + masks = SegmentationMask(masks, img.size) + + target.add_field("masks", masks) + + target = target.clip_to_image(remove_empty=True) + + if self.transforms is not None: + img, target = self.transforms(img, target) + + return img, target, idx + + def get_src_img(self, index): + """ + Args: + index (int): Index + + Returns: + tuple: Tuple (image, target). target is a list of captions for the image. + """ + coco = self.coco + img_id = self.ids[index] + path = coco.loadImgs(img_id)[0]['file_name'] + + img = cv2.imread(os.path.join(self.root, path)) + + return img + + def get_img_info(self, index): + img_id = self.id_to_img_map[index] + img_data = self.coco.imgs[img_id] + return img_data diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/data/datasets/concat_dataset.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/data/datasets/concat_dataset.py new file mode 100644 index 0000000000..e5e087c420 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/data/datasets/concat_dataset.py @@ -0,0 +1,23 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +import bisect + +from torch.utils.data.dataset import ConcatDataset as _ConcatDataset + + +class ConcatDataset(_ConcatDataset): + """ + Same as torch.utils.data.dataset.ConcatDataset, but exposes an extra + method for querying the sizes of the image + """ + + def get_idxs(self, idx): + dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx) + if dataset_idx == 0: + sample_idx = idx + else: + sample_idx = idx - self.cumulative_sizes[dataset_idx - 1] + return dataset_idx, sample_idx + + def get_img_info(self, idx): + dataset_idx, sample_idx = self.get_idxs(idx) + return self.datasets[dataset_idx].get_img_info(sample_idx) diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/data/datasets/list_dataset.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/data/datasets/list_dataset.py new file mode 100644 index 0000000000..9058d35b3d --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/data/datasets/list_dataset.py @@ -0,0 +1,36 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +""" +Simple dataset class that wraps a list of path names +""" + +from PIL import Image + +from maskrcnn_benchmark.structures.bounding_box import BoxList + + +class ListDataset(object): + def __init__(self, image_lists, transforms=None): + self.image_lists = image_lists + self.transforms = transforms + + def __getitem__(self, item): + img = Image.open(self.image_lists[item]).convert("RGB") + + # dummy target + w, h = img.size + target = BoxList([[0, 0, w, h]], img.size, mode="xyxy") + + if self.transforms is not None: + img, target = self.transforms(img, target) + + return img, target + + def __len__(self): + return len(self.image_lists) + + def get_img_info(self, item): + """ + Return the image dimensions for the image, without + loading and pre-processing it + """ + pass diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/data/samplers/__init__.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/data/samplers/__init__.py new file mode 100644 index 0000000000..27982cbe68 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/data/samplers/__init__.py @@ -0,0 +1,6 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +from .distributed import DistributedSampler +from .grouped_batch_sampler import GroupedBatchSampler +from .iteration_based_batch_sampler import IterationBasedBatchSampler + +__all__ = ["DistributedSampler", "GroupedBatchSampler", "IterationBasedBatchSampler"] diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/data/samplers/distributed.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/data/samplers/distributed.py new file mode 100644 index 0000000000..632443c62e --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/data/samplers/distributed.py @@ -0,0 +1,67 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +# Code is copy-pasted exactly as in torch.utils.data.distributed, +# with a modification in the import to use the deprecated backend +# FIXME remove this once c10d fixes the bug it has +import math +import torch +import torch.distributed as dist +from torch.utils.data.sampler import Sampler + + +class DistributedSampler(Sampler): + """Sampler that restricts data loading to a subset of the dataset. + It is especially useful in conjunction with + :class:`torch.nn.parallel.DistributedDataParallel`. In such case, each + process can pass a DistributedSampler instance as a DataLoader sampler, + and load a subset of the original dataset that is exclusive to it. + .. note:: + Dataset is assumed to be of constant size. + Arguments: + dataset: Dataset used for sampling. + num_replicas (optional): Number of processes participating in + distributed training. + rank (optional): Rank of the current process within num_replicas. + """ + + def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True): + if num_replicas is None: + if not dist.is_available(): + raise RuntimeError("Requires distributed package to be available") + num_replicas = dist.get_world_size() + if rank is None: + if not dist.is_available(): + raise RuntimeError("Requires distributed package to be available") + rank = dist.get_rank() + self.dataset = dataset + self.num_replicas = num_replicas + self.rank = rank + self.epoch = 0 + self.num_samples = int(math.ceil(len(self.dataset) * 1.0 / self.num_replicas)) + self.total_size = self.num_samples * self.num_replicas + self.shuffle = True + + def __iter__(self): + if self.shuffle: + # deterministically shuffle based on epoch + g = torch.Generator() + g.manual_seed(self.epoch) + indices = torch.randperm(len(self.dataset), generator=g).tolist() + else: + indices = torch.arange(len(self.dataset)).tolist() + + # add extra samples to make it evenly divisible + indices += indices[: (self.total_size - len(indices))] + assert len(indices) == self.total_size + + # subsample + offset = self.num_samples * self.rank + indices = indices[offset : offset + self.num_samples] + assert len(indices) == self.num_samples + + return iter(indices) + + def __len__(self): + return self.num_samples + + def set_epoch(self, epoch): + self.epoch = epoch diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/data/samplers/grouped_batch_sampler.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/data/samplers/grouped_batch_sampler.py new file mode 100644 index 0000000000..d72e2f0265 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/data/samplers/grouped_batch_sampler.py @@ -0,0 +1,115 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +import itertools + +import torch +from torch.utils.data.sampler import BatchSampler +from torch.utils.data.sampler import Sampler + + +class GroupedBatchSampler(BatchSampler): + """ + Wraps another sampler to yield a mini-batch of indices. + It enforces that elements from the same group should appear in groups of batch_size. + It also tries to provide mini-batches which follows an ordering which is + as close as possible to the ordering from the original sampler. + + Arguments: + sampler (Sampler): Base sampler. + batch_size (int): Size of mini-batch. + drop_uneven (bool): If ``True``, the sampler will drop the batches whose + size is less than ``batch_size`` + + """ + + def __init__(self, sampler, group_ids, batch_size, drop_uneven=False): + if not isinstance(sampler, Sampler): + raise ValueError( + "sampler should be an instance of " + "torch.utils.data.Sampler, but got sampler={}".format(sampler) + ) + self.sampler = sampler + self.group_ids = torch.as_tensor(group_ids) + assert self.group_ids.dim() == 1 + self.batch_size = batch_size + self.drop_uneven = drop_uneven + + self.groups = torch.unique(self.group_ids).sort(0)[0] + + self._can_reuse_batches = False + + def _prepare_batches(self): + dataset_size = len(self.group_ids) + # get the sampled indices from the sampler + sampled_ids = torch.as_tensor(list(self.sampler)) + # potentially not all elements of the dataset were sampled + # by the sampler (e.g., DistributedSampler). + # construct a tensor which contains -1 if the element was + # not sampled, and a non-negative number indicating the + # order where the element was sampled. + # for example. if sampled_ids = [3, 1] and dataset_size = 5, + # the order is [-1, 1, -1, 0, -1] + order = torch.full((dataset_size,), -1, dtype=torch.int64) + order[sampled_ids] = torch.arange(len(sampled_ids)) + + # get a mask with the elements that were sampled + mask = order >= 0 + + # find the elements that belong to each individual cluster + clusters = [(self.group_ids == i) & mask for i in self.groups] + # get relative order of the elements inside each cluster + # that follows the order from the sampler + relative_order = [order[cluster] for cluster in clusters] + # with the relative order, find the absolute order in the + # sampled space + permutation_ids = [s[s.sort()[1]] for s in relative_order] + # permute each cluster so that they follow the order from + # the sampler + permuted_clusters = [sampled_ids[idx] for idx in permutation_ids] + + # splits each cluster in batch_size, and merge as a list of tensors + splits = [c.split(self.batch_size) for c in permuted_clusters] + merged = tuple(itertools.chain.from_iterable(splits)) + + # now each batch internally has the right order, but + # they are grouped by clusters. Find the permutation between + # different batches that brings them as close as possible to + # the order that we have in the sampler. For that, we will consider the + # ordering as coming from the first element of each batch, and sort + # correspondingly + first_element_of_batch = [t[0].item() for t in merged] + # get and inverse mapping from sampled indices and the position where + # they occur (as returned by the sampler) + inv_sampled_ids_map = {v: k for k, v in enumerate(sampled_ids.tolist())} + # from the first element in each batch, get a relative ordering + first_index_of_batch = torch.as_tensor( + [inv_sampled_ids_map[s] for s in first_element_of_batch] + ) + + # permute the batches so that they approximately follow the order + # from the sampler + permutation_order = first_index_of_batch.sort(0)[1].tolist() + # finally, permute the batches + batches = [merged[i].tolist() for i in permutation_order] + + if self.drop_uneven: + kept = [] + for batch in batches: + if len(batch) == self.batch_size: + kept.append(batch) + batches = kept + return batches + + def __iter__(self): + if self._can_reuse_batches: + batches = self._batches + self._can_reuse_batches = False + else: + batches = self._prepare_batches() + self._batches = batches + return iter(batches) + + def __len__(self): + if not hasattr(self, "_batches"): + self._batches = self._prepare_batches() + self._can_reuse_batches = True + return len(self._batches) diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/data/samplers/iteration_based_batch_sampler.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/data/samplers/iteration_based_batch_sampler.py new file mode 100644 index 0000000000..93452b6469 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/data/samplers/iteration_based_batch_sampler.py @@ -0,0 +1,31 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +from torch.utils.data.sampler import BatchSampler + + +class IterationBasedBatchSampler(BatchSampler): + """ + Wraps a BatchSampler, resampling from it until + a specified number of iterations have been sampled + """ + + def __init__(self, batch_sampler, num_iterations, start_iter=0): + self.batch_sampler = batch_sampler + self.num_iterations = num_iterations + self.start_iter = start_iter + + def __iter__(self): + iteration = self.start_iter + while iteration <= self.num_iterations: + # if the underlying sampler has a set_epoch method, like + # DistributedSampler, used for making each process see + # a different split of the dataset, then set it + if hasattr(self.batch_sampler.sampler, "set_epoch"): + self.batch_sampler.sampler.set_epoch(iteration) + for batch in self.batch_sampler: + iteration += 1 + if iteration > self.num_iterations: + break + yield batch + + def __len__(self): + return self.num_iterations diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/data/transforms/__init__.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/data/transforms/__init__.py new file mode 100644 index 0000000000..076f8e98f7 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/data/transforms/__init__.py @@ -0,0 +1,9 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +from .transforms import Compose +from .transforms import Resize +from .transforms import RandomHorizontalFlip +from .transforms import ToTensor +from .transforms import Normalize + +from .build import build_transforms + diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/data/transforms/build.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/data/transforms/build.py new file mode 100644 index 0000000000..94e8ea2d4e --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/data/transforms/build.py @@ -0,0 +1,40 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +from . import transforms as T + + +def build_transforms(cfg, is_train=True): + if is_train: + min_size = cfg.INPUT.MIN_SIZE_TRAIN + max_size = cfg.INPUT.MAX_SIZE_TRAIN + flip_prob = 0.5 + resize = T.MultiScaleResize(min_size, max_size) + else: + min_size = cfg.INPUT.MIN_SIZE_TEST + max_size = cfg.INPUT.MAX_SIZE_TEST + flip_prob = 0 + resize = T.Resize(min_size, max_size) + + to_bgr255 = cfg.INPUT.TO_BGR255 + normalize_transform = T.Normalize( + mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD, to_bgr255=to_bgr255 + ) + if is_train: + transform = T.Compose( + [ + resize, + T.RandomHorizontalFlip(flip_prob), + T.ToTensor(), + normalize_transform, + T.BoxMaskPad(cfg) + ] + ) + else: + transform = T.Compose( + [ + resize, + T.RandomHorizontalFlip(flip_prob), + T.ToTensor(), + normalize_transform + ] + ) + return transform diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/data/transforms/transforms.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/data/transforms/transforms.py new file mode 100644 index 0000000000..5ba343aed4 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/data/transforms/transforms.py @@ -0,0 +1,176 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +import random +import math +import torch + +from torchvision.transforms import functional as F +from maskrcnn_benchmark.structures.segmentation_mask import Polygons + + +class Compose(object): + def __init__(self, transforms): + self.transforms = transforms + + def __call__(self, image, target): + for t in self.transforms: + image, target = t(image, target) + return image, target + + def __repr__(self): + format_string = self.__class__.__name__ + "(" + for t in self.transforms: + format_string += "\n" + format_string += " {0}".format(t) + format_string += "\n)" + return format_string + + +class Resize(object): + def __init__(self, min_size, max_size): + self.min_size = min_size + self.max_size = max_size + + # modified from torchvision to add support for max size + def get_size(self, image_size): + w, h = image_size + size = self.min_size + max_size = self.max_size + if max_size is not None: + min_original_size = float(min((w, h))) + max_original_size = float(max((w, h))) + if max_original_size / min_original_size * size > max_size: + size = int(round(max_size * min_original_size / max_original_size)) + + if (w <= h and w == size) or (h <= w and h == size): + return (h, w) + + if w < h: + ow = size + oh = int(size * h / w) + else: + oh = size + ow = int(size * w / h) + + return (oh, ow) + + def __call__(self, image, target): + size = self.get_size(image.size) + image = F.resize(image, size) + target = target.resize(image.size) + target.size_before_pad = (size[1], size[0]) + return image, target + + +class MultiScaleResize(object): + def __init__(self, min_sizes, max_size): + self.resizers = [] + for min_size in min_sizes: + self.resizers.append(Resize(min_size, max_size)) + + def __call__(self, image, target): + resizer = random.choice(self.resizers) + image, target = resizer(image, target) + + return image, target + + +class RandomHorizontalFlip(object): + def __init__(self, prob=0.5): + self.prob = prob + + def __call__(self, image, target): + if random.random() < self.prob: + image = F.hflip(image) + target = target.transpose(0) + return image, target + + +class ToTensor(object): + def __call__(self, image, target): + return F.to_tensor(image), target + + +class Normalize(object): + def __init__(self, mean, std, to_bgr255=True): + self.mean = mean + self.std = std + self.to_bgr255 = to_bgr255 + + def __call__(self, image, target): + if self.to_bgr255: + image = image[[2, 1, 0]] * 255 + image = F.normalize(image, mean=self.mean, std=self.std) + return image, target + + +class ImgPad(object): + def __init__(self, cfg): + self.cfg = cfg + self.fix_shape = cfg.INPUT.FIX_SHAPE + self.amp = cfg.AMP + self.opt_level = cfg.OPT_LEVEL + + def _pad(self, image, target): + + pad_value = 0 + + dst_shape = (3, self.fix_shape[1], self.fix_shape[0]) + padding_size = [0, dst_shape[-1] - image.shape[-1], + 0, dst_shape[-2] - image.shape[-2]] + padded = torch.nn.functional.pad(image, padding_size, value=pad_value) + + if self.amp and (self.opt_level == "O1" or self.opt_level == "O2"): + padded = padded.to(torch.float16) + image_preprocess = padded.contiguous() + + target.size = (self.fix_shape[1], self.fix_shape[0]) + target.extra_fields['masks'].size = (self.fix_shape[1], self.fix_shape[0]) + for i in range(len(target.extra_fields['masks'].polygons)): + target.extra_fields['masks'].polygons[i].size = (self.fix_shape[1], self.fix_shape[0]) + + return image_preprocess, target + + def __call__(self, image, target): + image, target = self._pad(image, target) + + return image, target + + +class BoxMaskPad(object): + def __init__(self, cfg): + self.cfg = cfg + self.amp = cfg.AMP + self.opt_level = cfg.OPT_LEVEL + + def _pad(self, target): + + boxes_num = target.bbox.shape[0] + + max_len = int(math.ceil(boxes_num / 20)) * 20 + + if boxes_num < max_len: + diff_num = max_len - boxes_num + target.bbox = torch.cat([target.bbox, torch.zeros([diff_num, 4])], dim=0).contiguous() + + target.extra_fields['labels'] = torch.cat( + [target.extra_fields['labels'].long(), torch.full((diff_num,), -1, dtype=torch.long)], dim=0) + + target.extra_fields['masks'].polygons += [Polygons(p, target.extra_fields['masks'].size, mode=None) for p in + [torch.zeros([1], dtype=torch.float16)] * diff_num] + + else: + select_idx = torch.randperm(boxes_num)[:max_len] + # noinspection PyInterpreter + target.bbox = target.bbox[select_idx].contiguous() + target.extra_fields['labels'] = target.extra_fields['labels'][select_idx].long().contiguous() + + target.extra_fields['masks'].polygons = [target.extra_fields['masks'].polygons[idx] + for idx in select_idx.numpy().tolist()] + + return target + + def __call__(self, image, target): + + target = self._pad(target) + + return image, target diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/engine/__init__.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/engine/__init__.py new file mode 100644 index 0000000000..5c7f19c6c0 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/engine/__init__.py @@ -0,0 +1 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/engine/inference.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/engine/inference.py new file mode 100644 index 0000000000..29443353cb --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/engine/inference.py @@ -0,0 +1,423 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +import datetime +import logging +import tempfile +import time +import os +import cv2 +from collections import OrderedDict + +import torch + +from tqdm import tqdm + +from ..structures.bounding_box import BoxList +from ..utils.comm import is_main_process +from ..utils.comm import scatter_gather +from ..utils.comm import synchronize + +from maskrcnn_benchmark.modeling.roi_heads.mask_head.inference import Masker +from maskrcnn_benchmark.structures.boxlist_ops import boxlist_iou + + +def compute_on_dataset(model, data_loader, device): + model.eval() + results_dict = {} + cpu_device = torch.device("cpu") + for i, batch in enumerate(tqdm(data_loader)): + images, targets, image_ids = batch + images = images.to(device) + with torch.no_grad(): + output = model(images) + output = [o.to(cpu_device) for o in output] + results_dict.update( + {img_id: result for img_id, result in zip(image_ids, output)} + ) + return results_dict + + +def prepare_for_coco_detection(predictions, dataset): + coco_results = [] + for image_id, prediction in enumerate(tqdm(predictions)): + original_id = dataset.id_to_img_map[image_id] + if len(prediction) == 0: + continue + + # TODO replace with get_img_info? + image_width = dataset.coco.imgs[original_id]["width"] + image_height = dataset.coco.imgs[original_id]["height"] + prediction = prediction.resize((image_width, image_height)) + prediction = prediction.convert("xywh") + + boxes = prediction.bbox.tolist() + scores = prediction.get_field("scores").tolist() + labels = prediction.get_field("labels").tolist() + coco_result = [] + for k, box in enumerate(boxes): + if labels[k] >= 1: + coco_result.append( + { + "image_id": original_id, + "category_id": dataset.contiguous_category_id_to_json_id[labels[k]], + "bbox": box, + "score": scores[k], + } + ) + src_img = dataset.get_src_img(image_id) + for res in coco_result: + box = res['bbox'] + if res['score'] > 0.1: + cv2.rectangle(src_img, (int(box[0]), int(box[1])), + (int(box[0]) + int(box[2]) - 1, int(box[1]) + int(box[3]) - 1), + (0, 0, 255), 2) + cv2.imwrite(f'./demo/{str(image_id)}.jpg', src_img) + + coco_results.extend(coco_result) + return coco_results + + +def prepare_for_coco_segmentation(predictions, dataset): + import pycocotools.mask as mask_util + import numpy as np + + masker = Masker(threshold=0.5, padding=1) + coco_results = [] + for image_id, prediction in enumerate(tqdm(predictions)): + original_id = dataset.id_to_img_map[image_id] + if len(prediction) == 0: + continue + + # TODO replace with get_img_info? + image_width = dataset.coco.imgs[original_id]["width"] + image_height = dataset.coco.imgs[original_id]["height"] + prediction = prediction.resize((image_width, image_height)) + masks = prediction.get_field("mask") + masks = masker(masks, prediction) + + scores = prediction.get_field("scores").tolist() + labels = prediction.get_field("labels").tolist() + + rles = [ + mask_util.encode(np.array(mask[0, :, :, np.newaxis], order="F"))[0] + for mask in masks + ] + for rle in rles: + rle["counts"] = rle["counts"].decode("utf-8") + + coco_result = [] + for k, rle in enumerate(rles): + if labels[k] >= 1: + coco_result.append( + { + "image_id": original_id, + "category_id": dataset.contiguous_category_id_to_json_id[labels[k]], + "segmentation": rle, + "score": scores[k], + } + ) + coco_results.extend(coco_result) + + return coco_results + + +def evaluate_box_proposals( + predictions, dataset, thresholds=None, area="all", limit=None +): + """Evaluate detection proposal recall metrics. This function is a much + faster alternative to the official COCO API recall evaluation code. However, + it produces slightly different results. + """ + # Record max overlap value for each gt box + # Return vector of overlap values + areas = { + "all": 0, + "small": 1, + "medium": 2, + "large": 3, + "96-128": 4, + "128-256": 5, + "256-512": 6, + "512-inf": 7, + } + area_ranges = [ + [0 ** 2, 1e5 ** 2], # all + [0 ** 2, 32 ** 2], # small + [32 ** 2, 96 ** 2], # medium + [96 ** 2, 1e5 ** 2], # large + [96 ** 2, 128 ** 2], # 96-128 + [128 ** 2, 256 ** 2], # 128-256 + [256 ** 2, 512 ** 2], # 256-512 + [512 ** 2, 1e5 ** 2], + ] # 512-inf + assert area in areas, "Unknown area range: {}".format(area) + area_range = area_ranges[areas[area]] + gt_overlaps = [] + num_pos = 0 + + for image_id, prediction in enumerate(tqdm(predictions)): + original_id = dataset.id_to_img_map[image_id] + + # TODO replace with get_img_info? + image_width = dataset.coco.imgs[original_id]["width"] + image_height = dataset.coco.imgs[original_id]["height"] + prediction = prediction.resize((image_width, image_height)) + + # sort predictions in descending order + # TODO maybe remove this and make it explicit in the documentation + inds = prediction.get_field("objectness").sort(descending=True)[1] + prediction = prediction[inds] + + ann_ids = dataset.coco.getAnnIds(imgIds=original_id) + anno = dataset.coco.loadAnns(ann_ids) + gt_boxes = [obj["bbox"] for obj in anno if obj["iscrowd"] == 0] + gt_boxes = torch.as_tensor(gt_boxes).reshape(-1, 4) # guard against no boxes + gt_boxes = BoxList(gt_boxes, (image_width, image_height), mode="xywh").convert( + "xyxy" + ) + gt_areas = torch.as_tensor([obj["area"] for obj in anno if obj["iscrowd"] == 0]) + + if len(gt_boxes) == 0: + continue + + valid_gt_inds = (gt_areas >= area_range[0]) & (gt_areas <= area_range[1]) + gt_boxes = gt_boxes[valid_gt_inds] + + num_pos += len(gt_boxes) + + if len(gt_boxes) == 0: + continue + + if len(prediction) == 0: + continue + + if limit is not None and len(prediction) > limit: + prediction = prediction[:limit] + + overlaps = boxlist_iou(prediction, gt_boxes) + + _gt_overlaps = torch.zeros(len(gt_boxes)) + for j in range(min(len(prediction), len(gt_boxes))): + # find which proposal box maximally covers each gt box + # and get the iou amount of coverage for each gt box + max_overlaps, argmax_overlaps = overlaps.max(dim=0) + + # find which gt box is 'best' covered (i.e. 'best' = most iou) + gt_ovr, gt_ind = max_overlaps.max(dim=0) + assert gt_ovr >= 0 + # find the proposal box that covers the best covered gt box + box_ind = argmax_overlaps[gt_ind] + # record the iou coverage of this gt box + _gt_overlaps[j] = overlaps[box_ind, gt_ind] + assert _gt_overlaps[j] == gt_ovr + # mark the proposal box and the gt box as used + overlaps[box_ind, :] = -1 + overlaps[:, gt_ind] = -1 + + # append recorded iou coverage level + gt_overlaps.append(_gt_overlaps) + gt_overlaps = torch.cat(gt_overlaps, dim=0) + gt_overlaps, _ = torch.sort(gt_overlaps) + + if thresholds is None: + step = 0.05 + thresholds = torch.arange(0.5, 0.95 + 1e-5, step, dtype=torch.float32) + recalls = torch.zeros_like(thresholds) + # compute recall for each iou threshold + for i, t in enumerate(thresholds): + recalls[i] = (gt_overlaps >= t).float().sum() / float(num_pos) + ar = recalls.mean() + return { + "ar": ar, + "recalls": recalls, + "thresholds": thresholds, + "gt_overlaps": gt_overlaps, + "num_pos": num_pos, + } + + +def evaluate_predictions_on_coco( + coco_gt, coco_results, json_result_file, iou_type="bbox" +): + import json + + with open(json_result_file, "w") as f: + json.dump(coco_results, f) + + from pycocotools.cocoeval import COCOeval + + coco_dt = coco_gt.loadRes(str(json_result_file)) + coco_eval = COCOeval(coco_gt, coco_dt, iou_type) + coco_eval.evaluate() + coco_eval.accumulate() + coco_eval.summarize() + return coco_eval + + +def _accumulate_predictions_from_multiple_gpus(predictions_per_gpu): + all_predictions = scatter_gather(predictions_per_gpu) + if not is_main_process(): + return + # merge the list of dicts + predictions = {} + for p in all_predictions: + predictions.update(p) + # convert a dict where the key is the index in a list + image_ids = list(sorted(predictions.keys())) + if len(image_ids) != image_ids[-1] + 1: + logger = logging.getLogger("maskrcnn_benchmark.inference") + logger.warning( + "Number of images that were gathered from multiple processes is not " + "a contiguous set. Some images might be missing from the evaluation" + ) + + # convert to a list + predictions = [predictions[i] for i in image_ids] + return predictions + + +class COCOResults(object): + METRICS = { + "bbox": ["AP", "AP50", "AP75", "APs", "APm", "APl"], + "segm": ["AP", "AP50", "AP75", "APs", "APm", "APl"], + "box_proposal": [ + "AR@100", + "ARs@100", + "ARm@100", + "ARl@100", + "AR@1000", + "ARs@1000", + "ARm@1000", + "ARl@1000", + ], + "keypoint": ["AP", "AP50", "AP75", "APm", "APl"], + } + + def __init__(self, *iou_types): + allowed_types = ("box_proposal", "bbox", "segm") + assert all(iou_type in allowed_types for iou_type in iou_types) + results = OrderedDict() + for iou_type in iou_types: + results[iou_type] = OrderedDict( + [(metric, -1) for metric in COCOResults.METRICS[iou_type]] + ) + self.results = results + + def update(self, coco_eval): + if coco_eval is None: + return + from pycocotools.cocoeval import COCOeval + + assert isinstance(coco_eval, COCOeval) + s = coco_eval.stats + iou_type = coco_eval.params.iouType + res = self.results[iou_type] + metrics = COCOResults.METRICS[iou_type] + for idx, metric in enumerate(metrics): + res[metric] = s[idx] + + def __repr__(self): + # TODO make it pretty + return repr(self.results) + + +def check_expected_results(results, expected_results, sigma_tol): + if not expected_results: + return + + logger = logging.getLogger("maskrcnn_benchmark.inference") + for task, metric, (mean, std) in expected_results: + actual_val = results.results[task][metric] + lo = mean - sigma_tol * std + hi = mean + sigma_tol * std + ok = (lo < actual_val) and (actual_val < hi) + msg = ( + "{} > {} sanity check (actual vs. expected): " + "{:.3f} vs. mean={:.4f}, std={:.4}, range=({:.4f}, {:.4f})" + ).format(task, metric, actual_val, mean, std, lo, hi) + if not ok: + msg = "FAIL: " + msg + logger.error(msg) + else: + msg = "PASS: " + msg + logger.info(msg) + + +def inference( + model, + data_loader, + iou_types=("bbox",), + box_only=False, + device="cuda", + expected_results=(), + expected_results_sigma_tol=4, + output_folder=None, +): + # convert to a torch.device for efficiency + device = torch.device(device) + num_devices = ( + torch.distributed.get_world_size() + if torch.distributed.is_initialized() + else 1 + ) + logger = logging.getLogger("maskrcnn_benchmark.inference") + dataset = data_loader.dataset + logger.info("Start evaluation on {} images".format(len(dataset))) + start_time = time.time() + predictions = compute_on_dataset(model, data_loader, device) + # wait for all processes to complete before measuring the time + synchronize() + total_time = time.time() - start_time + total_time_str = str(datetime.timedelta(seconds=total_time)) + logger.info( + "Total inference time: {} ({} s / img per device, on {} devices)".format( + total_time_str, total_time * num_devices / len(dataset), num_devices + ) + ) + + predictions = _accumulate_predictions_from_multiple_gpus(predictions) + if not is_main_process(): + return + + if output_folder: + torch.save(predictions, os.path.join(output_folder, "predictions.pth")) + + if box_only: + logger.info("Evaluating bbox proposals") + areas = {"all": "", "small": "s", "medium": "m", "large": "l"} + res = COCOResults("box_proposal") + for limit in [100, 1000]: + for area, suffix in areas.items(): + stats = evaluate_box_proposals( + predictions, dataset, area=area, limit=limit + ) + key = "AR{}@{:d}".format(suffix, limit) + res.results["box_proposal"][key] = stats["ar"].item() + logger.info(res) + check_expected_results(res, expected_results, expected_results_sigma_tol) + if output_folder: + torch.save(res, os.path.join(output_folder, "box_proposals.pth")) + return + logger.info("Preparing results for COCO format") + coco_results = {} + if "bbox" in iou_types: + logger.info("Preparing bbox results") + coco_results["bbox"] = prepare_for_coco_detection(predictions, dataset) + if "segm" in iou_types: + logger.info("Preparing segm results") + coco_results["segm"] = prepare_for_coco_segmentation(predictions, dataset) + + results = COCOResults(*iou_types) + logger.info("Evaluating predictions") + for iou_type in iou_types: + with tempfile.NamedTemporaryFile() as f: + file_path = f.name + if output_folder: + file_path = os.path.join(output_folder, iou_type + ".json") + res = evaluate_predictions_on_coco( + dataset.coco, coco_results[iou_type], file_path, iou_type + ) + results.update(res) + logger.info(results) + check_expected_results(results, expected_results, expected_results_sigma_tol) + if output_folder: + torch.save(results, os.path.join(output_folder, "coco_results.pth")) diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/engine/trainer.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/engine/trainer.py new file mode 100644 index 0000000000..75a456f757 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/engine/trainer.py @@ -0,0 +1,106 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +import datetime +import logging +import time + +from apex import amp +from maskrcnn_benchmark.utils.metric_logger import MetricLogger + + +class GetFPS(object): + def __init__(self, max_len): + self.max_len = max_len + self.all_fps = [] + self.mean_fps = 0 + + def update(self, batch_time): + self.all_fps.append(1 / batch_time) + if len(self.all_fps) > self.max_len: + del self.all_fps[0] + self.mean_fps = sum(self.all_fps) / len(self.all_fps) + + +def do_train( + model, + data_loader, + optimizer, + scheduler, + checkpointer, + device, + checkpoint_period, + arguments, + isAMP, + local_rank +): + logger = logging.getLogger("maskrcnn_benchmark.trainer") + logger.info("Start training") + meters = MetricLogger(delimiter=" ") + max_iter = len(data_loader) + start_iter = arguments["iteration"] + model.train() + start_training_time = time.time() + end = time.time() + get_fps = GetFPS(100) + for iteration, (images, targets, _) in enumerate(data_loader, start_iter): + + if local_rank == 0: + print('=====iter%d' % iteration) + + data_time = time.time() - end + arguments["iteration"] = iteration + + scheduler.step() + + images = images.to(device, non_blocking=True) + targets = [target.to(device, non_blocking=True) for target in targets] + + loss_dict = model(images, targets) + + losses = sum(loss for loss in loss_dict.values()) + + meters.update(loss=losses, **loss_dict) + + optimizer.zero_grad() + if isAMP: + with amp.scale_loss(losses, optimizer) as scaled_loss: + scaled_loss.backward() + else: + losses.backward() + optimizer.step() + + batch_time = time.time() - end + end = time.time() + meters.update(time=batch_time, data=data_time) + + get_fps.update(batch_time) + + eta_seconds = meters.time.global_avg * (max_iter - iteration) + eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) + + if local_rank == 0: + logger.info( + meters.delimiter.join( + [ + "eta: {eta}", + "iter: {iter}", + "{meters}", + "lr: {lr:.6f}", + "max mem: {memory:.0f}", + ] + ).format( + eta=eta_string, + iter=iteration, + meters=str(meters), + lr=optimizer.param_groups[0]["lr"], + # memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0, + memory=0 + ) + ) + if iteration % checkpoint_period == 0 and iteration > 0: + checkpointer.save("model_{:07d}".format(iteration + 1), **arguments) + checkpointer.save("model_{:07d}".format(iteration), **arguments) + total_training_time = time.time() - start_training_time + total_time_str = str(datetime.timedelta(seconds=total_training_time)) + if local_rank == 0: + logger.info("Total training time: {}".format(total_time_str)) + logger.info("step_fps: {:.4f}".format(get_fps.mean_fps)) diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/__init__.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/__init__.py new file mode 100644 index 0000000000..f2dbb0c236 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/__init__.py @@ -0,0 +1,19 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. + +from .batch_norm import FrozenBatchNorm2d +from .misc import Conv2d +from .misc import ConvTranspose2d +from .misc import interpolate +from .nms import nms +from .npu_roi_align import ROIAlign +from .npu_roi_align import roi_align +from .roi_pool import ROIPool +from .roi_pool import roi_pool +from .smooth_l1_loss import smooth_l1_loss, SmoothL1Loss +from .sigmoid_focal_loss import SigmoidFocalLoss +from .adjust_smooth_l1_loss import AdjustSmoothL1Loss + +__all__ = ["nms", "roi_align", "ROIAlign", "roi_pool", "ROIPool", + "smooth_l1_loss", "SmoothL1Loss", "Conv2d", "ConvTranspose2d", + "interpolate", "FrozenBatchNorm2d", "SigmoidFocalLoss", + "AdjustSmoothL1Loss"] diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/_utils.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/_utils.py new file mode 100644 index 0000000000..bda1b6fca7 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/_utils.py @@ -0,0 +1,38 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +import glob +import os.path +import torch + +try: + from torch.utils.cpp_extension import load as load_ext + from torch.utils.cpp_extension import CUDA_HOME +except ImportError: + raise ImportError("The cpp layer extensions requires PyTorch 0.4 or higher") + + +def _load_C_extensions(): + this_dir = os.path.dirname(os.path.abspath(__file__)) + this_dir = os.path.dirname(this_dir) + this_dir = os.path.join(this_dir, "csrc") + + main_file = glob.glob(os.path.join(this_dir, "*.cpp")) + source_cpu = glob.glob(os.path.join(this_dir, "cpu", "*.cpp")) + source_cuda = glob.glob(os.path.join(this_dir, "cuda", "*.cu")) + + source = main_file + source_cpu + + extra_cflags = [] + if torch.cuda.is_available() and CUDA_HOME is not None: + source.extend(source_cuda) + extra_cflags = ["-DWITH_CUDA"] + source = [os.path.join(this_dir, s) for s in source] + extra_include_paths = [this_dir] + return load_ext( + "torchvision", + source, + extra_cflags=extra_cflags, + extra_include_paths=extra_include_paths, + ) + + +_C = _load_C_extensions() diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/adjust_smooth_l1_loss.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/adjust_smooth_l1_loss.py new file mode 100644 index 0000000000..7bef251a50 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/adjust_smooth_l1_loss.py @@ -0,0 +1,42 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +import torch +from torch import nn +import logging + + +class AdjustSmoothL1Loss(nn.Module): + + def __init__(self, num_features, momentum=0.1, beta=1. / 9): + super(AdjustSmoothL1Loss, self).__init__() + self.num_features = num_features + self.momentum = momentum + self.beta = beta + self.register_buffer( + 'running_mean', torch.empty(num_features).fill_(beta) + ) + self.register_buffer('running_var', torch.zeros(num_features)) + self.logger = logging.getLogger("maskrcnn_benchmark.trainer") + + def forward(self, inputs, target, size_average=True): + + n = torch.abs(inputs - target) + n_mean = n.mean(dim=0) + n_var = n.var(dim=0) + with torch.no_grad(): + if torch.isnan(n_var).sum().item() == 0: + self.running_mean = self.running_mean.to(n.device) + self.running_mean *= (1 - self.momentum) + self.running_mean += (self.momentum * n_mean) + self.running_var = self.running_var.to(n.device) + self.running_var *= (1 - self.momentum) + self.running_var += (self.momentum * n_var) + + beta = (self.running_mean - self.running_var) + beta = beta.clamp(max=self.beta, min=1e-3) + + beta = beta.view(-1, self.num_features).to(n.device) + cond = n < beta.expand_as(n) + loss = torch.where(cond, 0.5 * n ** 2 / beta, n - 0.5 * beta) + if size_average: + return loss.mean() + return loss.sum() diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/batch_norm.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/batch_norm.py new file mode 100644 index 0000000000..903607ac38 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/batch_norm.py @@ -0,0 +1,24 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +import torch +from torch import nn + + +class FrozenBatchNorm2d(nn.Module): + """ + BatchNorm2d where the batch statistics and the affine parameters + are fixed + """ + + def __init__(self, n): + super(FrozenBatchNorm2d, self).__init__() + self.register_buffer("weight", torch.ones(n)) + self.register_buffer("bias", torch.zeros(n)) + self.register_buffer("running_mean", torch.zeros(n)) + self.register_buffer("running_var", torch.ones(n)) + + def forward(self, x): + scale = self.weight * self.running_var.rsqrt() + bias = self.bias - self.running_mean * scale + scale = scale.reshape(1, -1, 1, 1) + bias = bias.reshape(1, -1, 1, 1) + return x * scale + bias diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/misc.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/misc.py new file mode 100644 index 0000000000..40fb7a1136 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/misc.py @@ -0,0 +1,101 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +""" +helper class that supports empty tensors on some nn functions. + +Ideally, add support directly in PyTorch to empty tensors in +those functions. + +This can be removed once https://github.com/pytorch/pytorch/issues/12013 +is implemented +""" + +import math +import torch +from torch.nn.modules.utils import _ntuple + + +class _NewEmptyTensorOp(torch.autograd.Function): + @staticmethod + def forward(ctx, x, new_shape): + ctx.shape = x.shape + return x.new_empty(new_shape) + + @staticmethod + def backward(ctx, grad): + shape = ctx.shape + return _NewEmptyTensorOp.apply(grad, shape), None + + +class Conv2d(torch.nn.Conv2d): + def forward(self, x): + if x.numel() > 0: + return super(Conv2d, self).forward(x) + # get output shape + + output_shape = [ + (i + 2 * p - (di * (k - 1) + 1)) // d + 1 + for i, p, di, k, d in zip( + x.shape[-2:], self.padding, self.dilation, self.kernel_size, self.stride + ) + ] + output_shape = [x.shape[0], self.weight.shape[0]] + output_shape + return _NewEmptyTensorOp.apply(x, output_shape) + + +class ConvTranspose2d(torch.nn.ConvTranspose2d): + def forward(self, x): + if x.numel() > 0: + return super(ConvTranspose2d, self).forward(x) + # get output shape + + output_shape = [ + (i - 1) * d - 2 * p + (di * (k - 1) + 1) + op + for i, p, di, k, d, op in zip( + x.shape[-2:], + self.padding, + self.dilation, + self.kernel_size, + self.stride, + self.output_padding, + ) + ] + output_shape = [x.shape[0], self.bias.shape[0]] + output_shape + return _NewEmptyTensorOp.apply(x, output_shape) + + +def interpolate( + input, size=None, scale_factor=None, mode="nearest", align_corners=None +): + if input.numel() > 0: + return torch.nn.functional.interpolate( + input, size, scale_factor, mode, align_corners + ) + + def _check_size_scale_factor(dim): + if size is None and scale_factor is None: + raise ValueError("either size or scale_factor should be defined") + if size is not None and scale_factor is not None: + raise ValueError("only one of size or scale_factor should be defined") + if ( + scale_factor is not None + and isinstance(scale_factor, tuple) + and len(scale_factor) != dim + ): + raise ValueError( + "scale_factor shape must match input shape. " + "Input is {}D, scale_factor size is {}".format(dim, len(scale_factor)) + ) + + def _output_size(dim): + _check_size_scale_factor(dim) + if size is not None: + return size + scale_factors = _ntuple(dim)(scale_factor) + # math.floor might return float in py2.7 + return [ + int(math.floor(input.size(i + 2) * scale_factors[i])) for i in range(dim) + ] + + output_shape = tuple(_output_size(2)) + output_shape = input.shape[:-2] + output_shape + return _NewEmptyTensorOp.apply(input, output_shape) diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/nms.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/nms.py new file mode 100644 index 0000000000..be7d96233c --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/nms.py @@ -0,0 +1,41 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. + +import torch + + +def py_cpu_nms(boxes, scores, thresh): + boxes = boxes.cpu() + scores = scores.cpu() + + x1 = boxes[:, 0] + y1 = boxes[:, 1] + x2 = boxes[:, 2] + y2 = boxes[:, 3] + scores = scores + + areas = (x2 - x1 + 1) * (y2 - y1 + 1) + _, order = torch.sort(scores, descending=True) + + keep = [] + while len(order) > 0: + i = order[0] + keep.append(i) + xx1 = torch.max(x1[i], x1[order[1:]]) + yy1 = torch.max(y1[i], y1[order[1:]]) + xx2 = torch.min(x2[i], x2[order[1:]]) + yy2 = torch.min(y2[i], y2[order[1:]]) + + w = torch.max(torch.tensor(0.0), xx2 - xx1 + 1) + h = torch.max(torch.tensor(0.0), yy2 - yy1 + 1) + inter = w * h + ovr = inter / (areas[i] + areas[order[1:]] - inter) + + inds = torch.where(ovr <= thresh)[0] + order = order[inds + 1] + + res_keep = torch.tensor(keep) + + return res_keep + + +nms = py_cpu_nms diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/npu_roi_align.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/npu_roi_align.py new file mode 100644 index 0000000000..90d3168610 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/npu_roi_align.py @@ -0,0 +1,123 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +from torch import nn + +from torch.nn.modules.utils import _pair +from torch.autograd import Function +from torch.autograd.function import once_differentiable + + +class _ROIAlign(Function): + @staticmethod + def forward(ctx, input_tensor, roi, output_size, spatial_scale, sampling_ratio, aligned): + ctx.save_for_backward(roi) + ctx.output_size = _pair(output_size) + ctx.spatial_scale = spatial_scale + ctx.sampling_ratio = sampling_ratio + ctx.input_shape = input_tensor.size() + ctx.aligned = aligned + roi_end_mode = 0 + output = torch.npu_roi_align( + input_tensor, roi, spatial_scale, + output_size[0], output_size[1], sampling_ratio, roi_end_mode) + + return output + + @staticmethod + @once_differentiable + def backward(ctx, grad_output): + (rois,) = ctx.saved_tensors + output_size = ctx.output_size + spatial_scale = ctx.spatial_scale + sampling_ratio = ctx.sampling_ratio + bs, ch, h, w = ctx.input_shape + + grad_input = torch.npu_roi_alignbk( + grad_output, rois, ctx.input_shape, + output_size[0], output_size[1], + spatial_scale, sampling_ratio) + + return grad_input, None, None, None, None, None + + +roi_align = _ROIAlign.apply + + +# NOTE: torchvision's RoIAlign has a different default aligned=False +class ROIAlign(nn.Module): + def __init__(self, output_size, spatial_scale, sampling_ratio, aligned=True): + """ROIAlign using npu api. + + Origin implement from detectron2 is + https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/roi_align.py#L7 + + The input parameters of the interface are the same, but due to the different implementation of the operator, + the accuracy is different from that of CPU and GPU. + + Args: + output_size (tuple): h, w + spatial_scale (float): scale the input boxes by this number + sampling_ratio (int): number of inputs samples to take for each output + sample. 0 to take samples densely. + aligned (bool): if False, use the legacy implementation in + Detectron. If True, align the results more perfectly. + + Note: + The meaning of aligned=True: + + Given a continuous coordinate c, its two neighboring pixel indices (in our + pixel model) are computed by floor(c - 0.5) and ceil(c - 0.5). For example, + c=1.3 has pixel neighbors with discrete indices [0] and [1] (which are sampled + from the underlying signal at continuous coordinates 0.5 and 1.5). But the original + roi_align (aligned=False) does not subtract the 0.5 when computing neighboring + pixel indices and therefore it uses pixels with a slightly incorrect alignment + (relative to our pixel model) when performing bilinear interpolation. + + With `aligned=True`, + we first appropriately scale the ROI and then shift it by -0.5 + prior to calling roi_align. This produces the correct neighbors; see + detectron2/tests/test_roi_align.py for verification. + + The difference does not make a difference to the model's performance if + ROIAlign is used together with conv layers. + """ + super(ROIAlign, self).__init__() + self.output_size = output_size + self.spatial_scale = spatial_scale + self.sampling_ratio = sampling_ratio + self.aligned = aligned + + def forward(self, input_tensor, rois): + """ + Args: + input_tensor: NCHW images + rois: Bx5 boxes. First column is the index into N. The other 4 columns are xyxy. + """ + assert rois.dim() == 2 and rois.size(1) == 5 + return roi_align( + input_tensor.float(), rois, self.output_size, + self.spatial_scale, self.sampling_ratio, self.aligned + ) + + def __repr__(self): + tmpstr = self.__class__.__name__ + "(" + tmpstr += "output_size=" + str(self.output_size) + tmpstr += ", spatial_scale=" + str(self.spatial_scale) + tmpstr += ", sampling_ratio=" + str(self.sampling_ratio) + tmpstr += ", aligned=" + str(self.aligned) + tmpstr += ")" + return tmpstr diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/roi_align.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/roi_align.py new file mode 100644 index 0000000000..c68a45f3db --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/roi_align.py @@ -0,0 +1,67 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +import torch +from torch import nn +from torch.autograd import Function +from torch.autograd.function import once_differentiable +from torch.nn.modules.utils import _pair + +from maskrcnn_benchmark import _C + + +class _ROIAlign(Function): + @staticmethod + def forward(ctx, input, roi, output_size, spatial_scale, sampling_ratio): + ctx.save_for_backward(roi) + ctx.output_size = _pair(output_size) + ctx.spatial_scale = spatial_scale + ctx.sampling_ratio = sampling_ratio + ctx.input_shape = input.size() + output = _C.roi_align_forward( + input.cpu(), roi.cpu(), spatial_scale, output_size[0], output_size[1], sampling_ratio + ) + return output.npu() + + @staticmethod + @once_differentiable + def backward(ctx, grad_output): + rois, = ctx.saved_tensors + output_size = ctx.output_size + spatial_scale = ctx.spatial_scale + sampling_ratio = ctx.sampling_ratio + bs, ch, h, w = ctx.input_shape + grad_input = _C.roi_align_backward( + grad_output, + rois, + spatial_scale, + output_size[0], + output_size[1], + bs, + ch, + h, + w, + sampling_ratio, + ) + return grad_input, None, None, None, None + + +roi_align = _ROIAlign.apply + + +class ROIAlign(nn.Module): + def __init__(self, output_size, spatial_scale, sampling_ratio): + super(ROIAlign, self).__init__() + self.output_size = output_size + self.spatial_scale = spatial_scale + self.sampling_ratio = sampling_ratio + + def forward(self, input, rois): + return torch.npu_roi_align(input, rois, self.spatial_scale, self.output_size, self.output_size, + self.sample_num, 0) + + def __repr__(self): + tmpstr = self.__class__.__name__ + "(" + tmpstr += "output_size=" + str(self.output_size) + tmpstr += ", spatial_scale=" + str(self.spatial_scale) + tmpstr += ", sampling_ratio=" + str(self.sampling_ratio) + tmpstr += ")" + return tmpstr diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/roi_pool.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/roi_pool.py new file mode 100644 index 0000000000..c0e42756ee --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/roi_pool.py @@ -0,0 +1,63 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +import torch +from torch import nn +from torch.autograd import Function +from torch.autograd.function import once_differentiable +from torch.nn.modules.utils import _pair + +from maskrcnn_benchmark import _C + + +class _ROIPool(Function): + @staticmethod + def forward(ctx, input, roi, output_size, spatial_scale): + ctx.output_size = _pair(output_size) + ctx.spatial_scale = spatial_scale + ctx.input_shape = input.size() + output, argmax = _C.roi_pool_forward( + input, roi, spatial_scale, output_size[0], output_size[1] + ) + ctx.save_for_backward(input, roi, argmax) + return output + + @staticmethod + @once_differentiable + def backward(ctx, grad_output): + input, rois, argmax = ctx.saved_tensors + output_size = ctx.output_size + spatial_scale = ctx.spatial_scale + bs, ch, h, w = ctx.input_shape + grad_input = _C.roi_pool_backward( + grad_output, + input, + rois, + argmax, + spatial_scale, + output_size[0], + output_size[1], + bs, + ch, + h, + w, + ) + return grad_input, None, None, None + + +roi_pool = _ROIPool.apply + + +class ROIPool(nn.Module): + def __init__(self, output_size, spatial_scale): + super(ROIPool, self).__init__() + self.output_size = output_size + self.spatial_scale = spatial_scale + + def forward(self, input, rois): + return roi_pool(input, rois, self.output_size, self.spatial_scale) + + def __repr__(self): + tmpstr = self.__class__.__name__ + "(" + tmpstr += "output_size=" + str(self.output_size) + tmpstr += ", spatial_scale=" + str(self.spatial_scale) + tmpstr += ")" + return tmpstr diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/sigmoid_focal_loss.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/sigmoid_focal_loss.py new file mode 100644 index 0000000000..50345b79cd --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/sigmoid_focal_loss.py @@ -0,0 +1,95 @@ +import torch +from torch import nn +from torch.autograd import Function +from torch.autograd.function import once_differentiable +import torch.nn.functional as F +from maskrcnn_benchmark import _C + + +class _SigmoidFocalLoss(Function): + @staticmethod + def forward(ctx, logits, targets, num_classes, gamma, alpha): + ctx.save_for_backward(logits, targets); + ctx.num_classes = num_classes + ctx.gamma = gamma + ctx.alpha = alpha + + losses = _C.sigmoid_focalloss_forward( + logits, targets, num_classes, gamma, alpha + ) + return losses + + @staticmethod + @once_differentiable + def backward(ctx, d_loss): + logits, targets = ctx.saved_tensors + num_classes = ctx.num_classes + gamma = ctx.gamma + alpha = ctx.alpha + d_loss = d_loss.contiguous() + d_logits = _C.sigmoid_focalloss_backward( + logits, targets, d_loss, num_classes, gamma, alpha + ) + return d_logits, None, None, None, None + + +def py_sigmoid_focal_loss(pred, + target, + gamma=2.0, + alpha=0.25): + """PyTorch version of `Focal Loss `_. + + Args: + pred (torch.Tensor): The prediction with shape (N, C), C is the + number of classes + target (torch.Tensor): The learning label of the prediction. + weight (torch.Tensor, optional): Sample-wise loss weight. + gamma (float, optional): The gamma for calculating the modulating + factor. Defaults to 2.0. + alpha (float, optional): A balanced form for Focal Loss. + Defaults to 0.25. + reduction (str, optional): The method used to reduce the loss into + a scalar. Defaults to 'mean'. + avg_factor (int, optional): Average factor that is used to average + the loss. Defaults to None. + """ + pred_sigmoid = pred.sigmoid() + target = target.type_as(pred) + pt = (1 - pred_sigmoid) * target + pred_sigmoid * (1 - target) + focal_weight = (alpha * target + (1 - alpha) * + (1 - target)) * pt.pow(gamma) + loss = F.binary_cross_entropy_with_logits( + pred, target, reduction='none') * focal_weight + + return loss + + +sigmoid_focalloss = _SigmoidFocalLoss.apply + + +class SigmoidFocalLoss(nn.Module): + def __init__(self, num_classes, gamma, alpha): + super(SigmoidFocalLoss, self).__init__() + self.num_classes = num_classes + self.gamma = gamma + self.alpha = alpha + + def forward(self, logits, targets): + num_classes = logits.size(1) + 1 + valid = (targets != -1)[:, None] + targets = F.one_hot(targets.abs(), num_classes=num_classes) + targets = targets[:, 1:num_classes] + loss = py_sigmoid_focal_loss( + logits, targets, self.gamma, self.alpha + ) + loss = loss * valid + + return loss.sum() + + def __repr__(self): + tmpstr = self.__class__.__name__ + "(" + tmpstr += "num_classes=" + str(self.num_classes) + tmpstr += ", gamma=" + str(self.gamma) + tmpstr += ", alpha=" + str(self.alpha) + tmpstr += ")" + return tmpstr diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/smooth_l1_loss.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/smooth_l1_loss.py new file mode 100644 index 0000000000..859b538cea --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/smooth_l1_loss.py @@ -0,0 +1,30 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +import torch +import numpy as np + + +class SmoothL1Loss(torch.nn.Module): + def __init__(self, beta=1. / 9): + super(SmoothL1Loss, self).__init__() + self.beta = beta + + def forward(self, input, target, size_average=True): + return smooth_l1_loss(input, target, size_average=size_average) + + +# TODO maybe push this to nn? +def smooth_l1_loss(input, target, alpha=0.5, gamma=1.5, beta=1.0, size_average=True): + """ + very similar to the smooth_l1_loss from pytorch, but with + the extra beta parameter + """ + + diff = torch.abs(input - target) + b = np.e ** (gamma / alpha) - 1 + cond = diff < beta + neg_cond = (~cond) + loss = (alpha / b * (b * diff + 1) * torch.log(b * diff / beta + 1) - alpha * diff) * cond.half() + loss = loss + (gamma * diff + gamma / b - alpha * beta) * neg_cond.half() + if size_average: + return loss.mean() + return loss.sum() diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/__init__.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/backbone/__init__.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/backbone/__init__.py new file mode 100644 index 0000000000..4b3da17b81 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/backbone/__init__.py @@ -0,0 +1,2 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +from .backbone import build_backbone diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/backbone/backbone.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/backbone/backbone.py new file mode 100644 index 0000000000..aca58f4e5b --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/backbone/backbone.py @@ -0,0 +1,101 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +from collections import OrderedDict + +from torch import nn + +from . import fpn as fpn_module +from . import resnet + + +def build_resnet_backbone(cfg): + body = resnet.ResNet(cfg) + model = nn.Sequential(OrderedDict([("body", body)])) + return model + + +def build_resnet_fpn_backbone(cfg): + body = resnet.ResNet(cfg) + in_channels_stage2 = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS + out_channels = cfg.MODEL.BACKBONE.OUT_CHANNELS + fpn = fpn_module.FPN( + in_channels_list=[ + in_channels_stage2, + in_channels_stage2 * 2, + in_channels_stage2 * 4, + in_channels_stage2 * 8, + ], + out_channels=out_channels, + top_blocks=fpn_module.LastLevelMaxPool(), + use_gn=cfg.MODEL.USE_GN + ) + model = nn.Sequential(OrderedDict([("body", body), ("fpn", fpn)])) + return model + + +def build_resnet_fpn_p3p7_backbone(cfg): + body = resnet.ResNet(cfg) + in_channels_stage2 = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS + out_channels = cfg.MODEL.BACKBONE.OUT_CHANNELS + fpn = fpn_module.FPN( + in_channels_list=[ + 0, + in_channels_stage2 * 2, + in_channels_stage2 * 4, + in_channels_stage2 * 8, + ], + out_channels=out_channels, + top_blocks=fpn_module.LastLevelP6P7(out_channels), + use_gn=cfg.MODEL.USE_GN + ) + model = nn.Sequential(OrderedDict([("body", body), ("fpn", fpn)])) + return model + + +_BACKBONES = {"resnet": build_resnet_backbone, + "resnet-fpn": build_resnet_fpn_backbone, + "resnet-fpn-retina": build_resnet_fpn_p3p7_backbone, + } + + +def build_resnet_fpn_p2p7_backbone(cfg): + body = resnet.ResNet(cfg) + in_channels_stage2 = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS + out_channels = cfg.MODEL.BACKBONE.OUT_CHANNELS + fpn = fpn_module.FPN( + in_channels_list=[ + in_channels_stage2, + in_channels_stage2 * 2, + in_channels_stage2 * 4, + in_channels_stage2 * 8, + ], + out_channels=out_channels, + top_blocks=fpn_module.LastLevelP6P7(out_channels), + use_gn=cfg.MODEL.USE_GN + ) + model = nn.Sequential(OrderedDict([("body", body), ("fpn", fpn)])) + return model + + +_BACKBONES = {"resnet": build_resnet_backbone, + "resnet-fpn": build_resnet_fpn_backbone, + "resnet-fpn-retina": build_resnet_fpn_p3p7_backbone, + } + + +def build_backbone(cfg): + assert cfg.MODEL.BACKBONE.CONV_BODY.startswith( + "R-" + ), "Only ResNet and ResNeXt models are currently implemented" + # Models using FPN end with "-FPN" + if cfg.MODEL.BACKBONE.CONV_BODY.endswith("-FPN"): + if cfg.RETINANET.RETINANET_ON: + if cfg.RETINANET.BACKBONE == "p3p7": + return build_resnet_fpn_p3p7_backbone(cfg) + elif cfg.RETINANET.BACKBONE == "p2p7": + return build_resnet_fpn_p2p7_backbone(cfg) + else: + raise Exception("Wrong Setting {}:{}".format( + 'cfg.RETINANET.BACKBONE', cfg.RETINANET.BACKBBACKBONE)) + else: + return build_resnet_fpn_backbone(cfg) + return build_resnet_backbone(cfg) diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/backbone/fpn.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/backbone/fpn.py new file mode 100644 index 0000000000..0916755c90 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/backbone/fpn.py @@ -0,0 +1,115 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +import torch.nn.functional as F +from torch import nn + + +class FPN(nn.Module): + """ + Module that adds FPN on top of a list of feature maps. + The feature maps are currently supposed to be in increasing depth + order, and must be consecutive + """ + + def __init__(self, in_channels_list, out_channels, top_blocks=None, + use_gn=False): + """ + Arguments: + in_channels_list (list[int]): number of channels for each feature map that + will be fed + out_channels (int): number of channels of the FPN representation + top_blocks (nn.Module or None): if provided, an extra operation will + be performed on the output of the last (smallest resolution) + FPN output, and the result will extend the result list + """ + super(FPN, self).__init__() + self.inner_blocks = [] + self.layer_blocks = [] + # If in_channels is 0, it would be used. + self.valid_layers = [i > 0 for i in in_channels_list] + for idx, in_channels in enumerate(in_channels_list, 1): + inner_block = "fpn_inner{}".format(idx) + layer_block = "fpn_layer{}".format(idx) + + if in_channels == 0: + continue + + if use_gn: + inner_block_module = nn.Sequential( + nn.Conv2d(in_channels, out_channels, 1), + nn.GroupNorm(32, out_channels)) + layer_block_module = nn.Sequential( + nn.Conv2d(out_channels, out_channels, 3, 1, 1), + nn.GroupNorm(32, out_channels)) + else: + inner_block_module = nn.Conv2d(in_channels, out_channels, 1) + layer_block_module = nn.Conv2d(out_channels, out_channels, 3, 1, 1) + + for module in [inner_block_module, layer_block_module]: + for m in module.modules(): + if isinstance(m, nn.Conv2d): + # Caffe2 implementation uses XavierFill, which in fact + # corresponds to kaiming_uniform_ in PyTorch + nn.init.kaiming_uniform_(m.weight, a=1) + nn.init.constant_(m.bias, 0) + if isinstance(m, nn.GroupNorm): + nn.init.constant_(m.weight, 1.0) + nn.init.constant_(m.bias, 0) + + self.add_module(inner_block, inner_block_module) + self.add_module(layer_block, layer_block_module) + self.inner_blocks.append(inner_block) + self.layer_blocks.append(layer_block) + self.top_blocks = top_blocks + + def forward(self, x): + """ + Arguments: + x (list[Tensor]): feature maps for each feature level. + Returns: + results (tuple[Tensor]): feature maps after FPN layers. + They are ordered from highest resolution first. + """ + last_inner = getattr(self, self.inner_blocks[-1])(x[-1]) + results = [] + results.append(getattr(self, self.layer_blocks[-1])(last_inner)) + for feature, inner_block, layer_block in zip( + x[:-1][::-1], self.inner_blocks[:-1][::-1], self.layer_blocks[:-1][::-1] + ): + if len(inner_block): + inner_top_down = F.interpolate(last_inner, scale_factor=2, mode="nearest") + inner_lateral = getattr(self, inner_block)(feature) + # TODO use size instead of scale to make it robust to different sizes + # inner_top_down = F.upsample(last_inner, size=inner_lateral.shape[-2:], + # mode='bilinear', align_corners=False) + last_inner = inner_lateral + inner_top_down + results.insert(0, getattr(self, layer_block)(last_inner)) + + if self.top_blocks is not None: + last_results = self.top_blocks(results[-1]) + results.extend(last_results) + + return tuple(results) + + +class LastLevelMaxPool(nn.Module): + def forward(self, x): + return [F.max_pool2d(x, 1, 2, 0)] + + +class LastLevelP6P7(nn.Module): + """ + This module is used in RetinaNet to generate extra layers, P6 and P7. + """ + + def __init__(self, out_channels): + super(LastLevelP6P7, self).__init__() + self.p6 = nn.Conv2d(out_channels, out_channels, 3, 2, 1) + self.p7 = nn.Conv2d(out_channels, out_channels, 3, 2, 1) + for module in [self.p6, self.p7]: + nn.init.kaiming_uniform_(module.weight, a=1) + nn.init.constant_(module.bias, 0) + + def forward(self, x): + p6 = self.p6(x) + p7 = self.p7(F.relu(p6)) + return [p6, p7] diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/backbone/resnet.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/backbone/resnet.py new file mode 100644 index 0000000000..5f129a2a23 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/backbone/resnet.py @@ -0,0 +1,317 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +""" +Variant of the resnet module that takes cfg as an argument. +Example usage. Strings may be specified in the config file. + model = ResNet( + "StemWithFixedBatchNorm", + "BottleneckWithFixedBatchNorm", + "ResNet50StagesTo4", + ) +Custom implementations may be written in user code and hooked in via the +`register_*` functions. +""" +from collections import namedtuple + +import torch.nn.functional as F +from torch import nn + +from maskrcnn_benchmark.layers import FrozenBatchNorm2d +from maskrcnn_benchmark.layers import Conv2d + +# ResNet stage specification +StageSpec = namedtuple( + "StageSpec", + [ + "index", # Index of the stage, eg 1, 2, ..,. 5 + "block_count", # Numer of residual blocks in the stage + "return_features", # True => return the last feature map from this stage + ], +) + +# ----------------------------------------------------------------------------- +# Standard ResNet models +# ----------------------------------------------------------------------------- +# ResNet-50 (including all stages) +ResNet50StagesTo5 = ( + StageSpec(index=i, block_count=c, return_features=r) + for (i, c, r) in ((1, 3, False), (2, 4, False), (3, 6, False), (4, 3, True)) +) +# ResNet-50 up to stage 4 (excludes stage 5) +ResNet50StagesTo4 = ( + StageSpec(index=i, block_count=c, return_features=r) + for (i, c, r) in ((1, 3, False), (2, 4, False), (3, 6, True)) +) +# ResNet-50-FPN (including all stages) +ResNet50FPNStagesTo5 = ( + StageSpec(index=i, block_count=c, return_features=r) + for (i, c, r) in ((1, 3, True), (2, 4, True), (3, 6, True), (4, 3, True)) +) +# ResNet-101-FPN (including all stages) +ResNet101FPNStagesTo5 = ( + StageSpec(index=i, block_count=c, return_features=r) + for (i, c, r) in ((1, 3, True), (2, 4, True), (3, 23, True), (4, 3, True)) +) + + +class ResNet(nn.Module): + def __init__(self, cfg): + super(ResNet, self).__init__() + + # If we want to use the cfg in forward(), then we should make a copy + # of it and store it for later use: + # self.cfg = cfg.clone() + + # Translate string names to implementations + stem_module = _STEM_MODULES[cfg.MODEL.RESNETS.STEM_FUNC] + stage_specs = _STAGE_SPECS[cfg.MODEL.BACKBONE.CONV_BODY] + transformation_module = _TRANSFORMATION_MODULES[cfg.MODEL.RESNETS.TRANS_FUNC] + + # Construct the stem module + self.stem = stem_module(cfg) + + # Constuct the specified ResNet stages + num_groups = cfg.MODEL.RESNETS.NUM_GROUPS + width_per_group = cfg.MODEL.RESNETS.WIDTH_PER_GROUP + in_channels = cfg.MODEL.RESNETS.STEM_OUT_CHANNELS + stage2_bottleneck_channels = num_groups * width_per_group + stage2_out_channels = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS + self.stages = [] + self.return_features = {} + for stage_spec in stage_specs: + name = "layer" + str(stage_spec.index) + stage2_relative_factor = 2 ** (stage_spec.index - 1) + bottleneck_channels = stage2_bottleneck_channels * stage2_relative_factor + out_channels = stage2_out_channels * stage2_relative_factor + module = _make_stage( + transformation_module, + in_channels, + bottleneck_channels, + out_channels, + stage_spec.block_count, + num_groups, + cfg.MODEL.RESNETS.STRIDE_IN_1X1, + first_stride=int(stage_spec.index > 1) + 1, + ) + in_channels = out_channels + self.add_module(name, module) + self.stages.append(name) + self.return_features[name] = stage_spec.return_features + + # Optionally freeze (requires_grad=False) parts of the backbone + self._freeze_backbone(cfg.MODEL.BACKBONE.FREEZE_CONV_BODY_AT) + + def _freeze_backbone(self, freeze_at): + for stage_index in range(freeze_at): + if stage_index == 0: + m = self.stem # stage 0 is the stem + else: + m = getattr(self, "layer" + str(stage_index)) + for p in m.parameters(): + p.requires_grad = False + + def forward(self, x): + outputs = [] + x = self.stem(x) + for stage_name in self.stages: + x = getattr(self, stage_name)(x) + if self.return_features[stage_name]: + outputs.append(x) + return outputs + + +class ResNetHead(nn.Module): + def __init__( + self, + block_module, + stages, + num_groups=1, + width_per_group=64, + stride_in_1x1=True, + stride_init=None, + res2_out_channels=256, + ): + super(ResNetHead, self).__init__() + + stage2_relative_factor = 2 ** (stages[0].index - 1) + stage2_bottleneck_channels = num_groups * width_per_group + out_channels = res2_out_channels * stage2_relative_factor + in_channels = out_channels // 2 + bottleneck_channels = stage2_bottleneck_channels * stage2_relative_factor + + block_module = _TRANSFORMATION_MODULES[block_module] + + self.stages = [] + stride = stride_init + for stage in stages: + name = "layer" + str(stage.index) + if not stride: + stride = int(stage.index > 1) + 1 + module = _make_stage( + block_module, + in_channels, + bottleneck_channels, + out_channels, + stage.block_count, + num_groups, + stride_in_1x1, + first_stride=stride, + ) + stride = None + self.add_module(name, module) + self.stages.append(name) + + def forward(self, x): + for stage in self.stages: + x = getattr(self, stage)(x) + return x + + +def _make_stage( + transformation_module, + in_channels, + bottleneck_channels, + out_channels, + block_count, + num_groups, + stride_in_1x1, + first_stride, +): + blocks = [] + stride = first_stride + for _ in range(block_count): + blocks.append( + transformation_module( + in_channels, + bottleneck_channels, + out_channels, + num_groups, + stride_in_1x1, + stride, + ) + ) + stride = 1 + in_channels = out_channels + return nn.Sequential(*blocks) + + +class BottleneckWithFixedBatchNorm(nn.Module): + def __init__( + self, + in_channels, + bottleneck_channels, + out_channels, + num_groups=1, + stride_in_1x1=True, + stride=1, + ): + super(BottleneckWithFixedBatchNorm, self).__init__() + + self.downsample = None + if in_channels != out_channels: + self.downsample = nn.Sequential( + Conv2d( + in_channels, out_channels, kernel_size=1, stride=stride, bias=False + ), + FrozenBatchNorm2d(out_channels), + ) + + # The original MSRA ResNet models have stride in the first 1x1 conv + # The subsequent fb.torch.resnet and Caffe2 ResNe[X]t implementations have + # stride in the 3x3 conv + stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride) + + self.conv1 = Conv2d( + in_channels, + bottleneck_channels, + kernel_size=1, + stride=stride_1x1, + bias=False, + ) + self.bn1 = FrozenBatchNorm2d(bottleneck_channels) + # TODO: specify init for the above + + self.conv2 = Conv2d( + bottleneck_channels, + bottleneck_channels, + kernel_size=3, + stride=stride_3x3, + padding=1, + bias=False, + groups=num_groups, + ) + self.bn2 = FrozenBatchNorm2d(bottleneck_channels) + + self.conv3 = Conv2d( + bottleneck_channels, out_channels, kernel_size=1, bias=False + ) + self.bn3 = FrozenBatchNorm2d(out_channels) + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = F.relu_(out) + + out = self.conv2(out) + out = self.bn2(out) + out = F.relu_(out) + + out0 = self.conv3(out) + out = self.bn3(out0) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = F.relu_(out) + + return out + + +class StemWithFixedBatchNorm(nn.Module): + def __init__(self, cfg): + super(StemWithFixedBatchNorm, self).__init__() + + out_channels = cfg.MODEL.RESNETS.STEM_OUT_CHANNELS + + self.conv1 = Conv2d( + 3, out_channels, kernel_size=7, stride=2, padding=3, bias=False + ) + self.bn1 = FrozenBatchNorm2d(out_channels) + + def forward(self, x): + x = self.conv1(x) + x = self.bn1(x) + x = F.relu_(x) + x = F.max_pool2d(x, kernel_size=3, stride=2, padding=1) + return x + + +_TRANSFORMATION_MODULES = {"BottleneckWithFixedBatchNorm": BottleneckWithFixedBatchNorm} + +_STEM_MODULES = {"StemWithFixedBatchNorm": StemWithFixedBatchNorm} + +_STAGE_SPECS = { + "R-50-C4": ResNet50StagesTo4, + "R-50-C5": ResNet50StagesTo5, + "R-50-FPN": ResNet50FPNStagesTo5, + "R-101-FPN": ResNet101FPNStagesTo5, +} + + +def register_transformation_module(module_name, module): + _register_generic(_TRANSFORMATION_MODULES, module_name, module) + + +def register_stem_module(module_name, module): + _register_generic(_STEM_MODULES, module_name, module) + + +def register_stage_spec(stage_spec_name, stage_spec): + _register_generic(_STAGE_SPECS, stage_spec_name, stage_spec) + + +def _register_generic(module_dict, module_name, module): + assert module_name not in module_dict + module_dict[module_name] = module diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/balanced_positive_negative_sampler.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/balanced_positive_negative_sampler.py new file mode 100644 index 0000000000..c0bd00444d --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/balanced_positive_negative_sampler.py @@ -0,0 +1,68 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +import torch + + +class BalancedPositiveNegativeSampler(object): + """ + This class samples batches, ensuring that they contain a fixed proportion of positives + """ + + def __init__(self, batch_size_per_image, positive_fraction): + """ + Arguments: + batch_size_per_image (int): number of elements to be selected per image + positive_fraction (float): percentace of positive elements per batch + """ + self.batch_size_per_image = batch_size_per_image + self.positive_fraction = positive_fraction + + def __call__(self, matched_idxs): + """ + Arguments: + matched idxs: list of tensors containing -1, 0 or positive values. + Each tensor corresponds to a specific image. + -1 values are ignored, 0 are considered as negatives and > 0 as + positives. + + Returns: + pos_idx (list[tensor]) + neg_idx (list[tensor]) + + Returns two lists of binary masks for each image. + The first list contains the positive elements that were selected, + and the second list the negative example. + """ + pos_idx = [] + neg_idx = [] + for matched_idxs_per_image in matched_idxs: + positive = torch.nonzero(matched_idxs_per_image >= 1).squeeze(1) + negative = torch.nonzero(matched_idxs_per_image == 0).squeeze(1) + + num_pos = int(self.batch_size_per_image * self.positive_fraction) + # protect against not enough positive examples + num_pos = min(positive.numel(), num_pos) + num_neg = self.batch_size_per_image - num_pos + # protect against not enough negative examples + num_neg = min(negative.numel(), num_neg) + + # randomly select positive and negative examples + perm1 = torch.randperm(positive.numel(), device=positive.device)[:num_pos] + perm2 = torch.randperm(negative.numel(), device=negative.device)[:num_neg] + + pos_idx_per_image = positive[perm1] + neg_idx_per_image = negative[perm2] + + # create binary mask from indices + pos_idx_per_image_mask = torch.zeros_like( + matched_idxs_per_image, dtype=torch.uint8 + ) + neg_idx_per_image_mask = torch.zeros_like( + matched_idxs_per_image, dtype=torch.uint8 + ) + pos_idx_per_image_mask[pos_idx_per_image] = 1 + neg_idx_per_image_mask[neg_idx_per_image] = 1 + + pos_idx.append(pos_idx_per_image_mask) + neg_idx.append(neg_idx_per_image_mask) + + return pos_idx, neg_idx diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/box_coder.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/box_coder.py new file mode 100644 index 0000000000..46a4acb324 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/box_coder.py @@ -0,0 +1,95 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +import math + +import torch + + +class BoxCoder(object): + """ + This class encodes and decodes a set of bounding boxes into + the representation used for training the regressors. + """ + + def __init__(self, weights, bbox_xform_clip=math.log(1000. / 16)): + """ + Arguments: + weights (4-element tuple) + bbox_xform_clip (float) + """ + self.weights = weights + self.bbox_xform_clip = bbox_xform_clip + + def encode(self, reference_boxes, proposals): + """ + Encode a set of proposals with respect to some + reference boxes + + Arguments: + reference_boxes (Tensor): reference boxes + proposals (Tensor): boxes to be encoded + """ + + TO_REMOVE = 1 # TODO remove + ex_widths = proposals[:, 2] - proposals[:, 0] + TO_REMOVE + ex_heights = proposals[:, 3] - proposals[:, 1] + TO_REMOVE + ex_ctr_x = proposals[:, 0] + 0.5 * ex_widths + ex_ctr_y = proposals[:, 1] + 0.5 * ex_heights + + gt_widths = reference_boxes[:, 2] - reference_boxes[:, 0] + TO_REMOVE + gt_heights = reference_boxes[:, 3] - reference_boxes[:, 1] + TO_REMOVE + gt_ctr_x = reference_boxes[:, 0] + 0.5 * gt_widths + gt_ctr_y = reference_boxes[:, 1] + 0.5 * gt_heights + + wx, wy, ww, wh = self.weights + targets_dx = wx * (gt_ctr_x - ex_ctr_x) / ex_widths + targets_dy = wy * (gt_ctr_y - ex_ctr_y) / ex_heights + targets_dw = ww * torch.log(gt_widths / ex_widths) + targets_dh = wh * torch.log(gt_heights / ex_heights) + + targets = torch.stack((targets_dx, targets_dy, targets_dw, targets_dh), dim=1) + return targets + + def decode(self, rel_codes, boxes): + """ + From a set of original boxes and encoded relative box offsets, + get the decoded boxes. + + Arguments: + rel_codes (Tensor): encoded boxes + boxes (Tensor): reference boxes. + """ + + boxes = boxes.to(rel_codes.dtype) + + TO_REMOVE = 1 # TODO remove + widths = boxes[:, 2] - boxes[:, 0] + TO_REMOVE + heights = boxes[:, 3] - boxes[:, 1] + TO_REMOVE + ctr_x = boxes[:, 0] + 0.5 * widths + ctr_y = boxes[:, 1] + 0.5 * heights + + wx, wy, ww, wh = self.weights + dx = rel_codes[:, 0::4] / wx + dy = rel_codes[:, 1::4] / wy + dw = rel_codes[:, 2::4] / ww + dh = rel_codes[:, 3::4] / wh + + # Prevent sending too large values into torch.exp() + dw = torch.clamp(dw, max=self.bbox_xform_clip) + dh = torch.clamp(dh, max=self.bbox_xform_clip) + + pred_ctr_x = dx * widths[:, None] + ctr_x[:, None] + pred_ctr_y = dy * heights[:, None] + ctr_y[:, None] + pred_w = torch.exp(dw) * widths[:, None] + pred_h = torch.exp(dh) * heights[:, None] + + pred_boxes = torch.zeros_like(rel_codes) + # x1 + pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w + # y1 + pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h + # x2 (note: "- 1" is correct; don't be fooled by the asymmetry) + pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w - 1 + # y2 (note: "- 1" is correct; don't be fooled by the asymmetry) + pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h - 1 + + return pred_boxes diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/detector/__init__.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/detector/__init__.py new file mode 100644 index 0000000000..ff421e281e --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/detector/__init__.py @@ -0,0 +1,2 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +from .detectors import build_detection_model diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/detector/detectors.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/detector/detectors.py new file mode 100644 index 0000000000..33ca7353d9 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/detector/detectors.py @@ -0,0 +1,11 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +from .generalized_rcnn import GeneralizedRCNN +from .retinanet import RetinaNet + +_DETECTION_META_ARCHITECTURES = {"GeneralizedRCNN": GeneralizedRCNN, + "RetinaNet": RetinaNet} + + +def build_detection_model(cfg): + meta_arch = _DETECTION_META_ARCHITECTURES[cfg.MODEL.META_ARCHITECTURE] + return meta_arch(cfg) diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/detector/generalized_rcnn.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/detector/generalized_rcnn.py new file mode 100644 index 0000000000..3c04761f4d --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/detector/generalized_rcnn.py @@ -0,0 +1,68 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +""" +Implements the Generalized R-CNN framework +""" + +from torch import nn + +from maskrcnn_benchmark.structures.image_list import to_image_list + +from ..backbone import build_backbone +from ..rpn.rpn import build_rpn +from ..rpn.retinanet import build_retinanet +from ..roi_heads.roi_heads import build_roi_heads + + +class GeneralizedRCNN(nn.Module): + """ + Main class for Generalized R-CNN. Currently supports boxes and masks. + It consists of three main parts: + - backbone + = rpn + - heads: takes the features + the proposals from the RPN and computes + detections / masks from it. + """ + + def __init__(self, cfg): + super(GeneralizedRCNN, self).__init__() + + self.backbone = build_backbone(cfg) + if not cfg.RETINANET.RETINANET_ON: + self.rpn = build_rpn(cfg) + else: + self.rpn = build_retinanet(cfg) + self.roi_heads = build_roi_heads(cfg) + + def forward(self, images, targets=None): + """ + Arguments: + images (list[Tensor] or ImageList): images to be processed + targets (list[BoxList]): ground-truth boxes present in the image (optional) + + Returns: + result (list[BoxList] or dict[Tensor]): the output from the model. + During training, it returns a dict[Tensor] which contains the losses. + During testing, it returns list[BoxList] contains additional fields + like `scores`, `labels` and `mask` (for Mask R-CNN models). + + """ + if self.training and targets is None: + raise ValueError("In training mode, targets should be passed") + images = to_image_list(images) + features = self.backbone(images.tensors) + proposals, proposal_losses = self.rpn(images, features, targets) + if self.roi_heads: + x, result, detector_losses = self.roi_heads(features, proposals, targets) + else: + # RPN-only models don't have roi_heads + x = features + result = proposals + detector_losses = {} + + if self.training: + losses = {} + losses.update(detector_losses) + losses.update(proposal_losses) + return losses + + return result diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/detector/retinanet.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/detector/retinanet.py new file mode 100644 index 0000000000..4f2fa7f368 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/detector/retinanet.py @@ -0,0 +1,113 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +""" +Implements the Generalized R-CNN framework +""" + +import copy +import torch + +from torch import nn +from maskrcnn_benchmark.structures.image_list import to_image_list + +from ..backbone import build_backbone +from ..rpn.retinanet import build_retinanet +from maskrcnn_benchmark.modeling.roi_heads.mask_head.mask_head import build_roi_mask_head +from maskrcnn_benchmark.structures.boxlist_ops import cat_boxlist + + +class RetinaNet(nn.Module): + """ + Main class for RetinaNet + It consists of three main parts: + - backbone + - bbox_heads: BBox prediction. + - Mask_heads: + """ + + def __init__(self, cfg): + super(RetinaNet, self).__init__() + self.cfg = copy.deepcopy(cfg) + self.backbone = build_backbone(cfg) + self.rpn = build_retinanet(cfg) + self.mask = None + if cfg.MODEL.MASK_ON: + self.mask = build_roi_mask_head(cfg) + + def forward(self, images, targets=None): + """ + Arguments: + images (list[Tensor] or ImageList): images to be processed + targets (list[BoxList]): ground-truth boxes present in the image (optional) + + Returns: + result (list[BoxList] or dict[Tensor]): the output from the model. + During training, it returns a dict[Tensor] which contains the losses. + During testing, it returns list[BoxList] contains additional fields + like `scores`, `labels` and `mask` (for Mask R-CNN models). + + """ + if self.training and targets is None: + raise ValueError("In training mode, targets should be passed") + images = to_image_list(images) + features = self.backbone(images.tensors) + + # Retina RPN Output + rpn_features = features + if self.cfg.RETINANET.BACKBONE == "p2p7": + rpn_features = features[1:] + + (anchors, detections), detector_losses = self.rpn(images, rpn_features, targets) + + if self.training: + losses = {} + losses.update(detector_losses) + if self.mask: + if self.cfg.MODEL.MASK_ON: + # Padding the GT + proposals = [] + for (image_detections, image_targets) in zip( + detections, targets): + + merge_list = [] + if not isinstance(image_detections, list): + merge_list.append(image_detections.copy_with_fields('labels')) + + if not isinstance(image_targets, list): + merge_list.append(image_targets.copy_with_fields('labels')) + + if len(merge_list) == 1: + proposals.append(merge_list[0]) + else: + proposals.append(cat_boxlist(merge_list)) + + x, result, mask_losses = self.mask(features, proposals, targets) + + elif self.cfg.MODEL.SPARSE_MASK_ON: + x, result, mask_losses = self.mask(features, anchors, targets) + + losses.update(mask_losses) + return losses + else: + if self.mask: + proposals = [] + for image_detections in detections: + num_of_detections = image_detections.bbox.shape[0] + if num_of_detections > self.cfg.RETINANET.NUM_MASKS_TEST > 0: + cls_scores = image_detections.get_field("scores") + cls_scores = cls_scores.type(torch.float32) + _, keep = torch.topk( + cls_scores, + self.cfg.RETINANET.NUM_MASKS_TEST, + largest=True + ) + image_detections = image_detections[keep] + + proposals.append(image_detections) + + if self.cfg.MODEL.SPARSE_MASK_ON: + x, detections, mask_losses = self.mask( + features, proposals, targets + ) + else: + x, detections, mask_losses = self.mask(features, proposals, targets) + return detections diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/matcher.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/matcher.py new file mode 100644 index 0000000000..4f8c54dd1e --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/matcher.py @@ -0,0 +1,108 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +import torch + + +class Matcher(object): + """ + This class assigns to each predicted "element" (e.g., a box) a ground-truth + element. Each predicted element will have exactly zero or one matches; each + ground-truth element may be assigned to zero or more predicted elements. + + Matching is based on the MxN match_quality_matrix, that characterizes how well + each (ground-truth, predicted)-pair match. For example, if the elements are + boxes, the matrix may contain box IoU overlap values. + + The matcher returns a tensor of size N containing the index of the ground-truth + element m that matches to prediction n. If there is no match, a negative value + is returned. + """ + + BELOW_LOW_THRESHOLD = -1 + BETWEEN_THRESHOLDS = -2 + + def __init__(self, high_threshold, low_threshold, + allow_low_quality_matches=False, low_quality_threshold=0.0): + """ + Args: + high_threshold (float): quality values greater than or equal to + this value are candidate matches. + low_threshold (float): a lower quality threshold used to stratify + matches into three levels: + 1) matches >= high_threshold + 2) BETWEEN_THRESHOLDS matches in [low_threshold, high_threshold) + 3) BELOW_LOW_THRESHOLD matches in [0, low_threshold) + allow_low_quality_matches (bool): if True, produce additional matches + for predictions that have only low-quality match candidates. See + set_low_quality_matches_ for more details. + """ + assert low_threshold <= high_threshold + self.high_threshold = high_threshold + self.low_threshold = low_threshold + self.allow_low_quality_matches = allow_low_quality_matches + self.low_quality_threshold = low_quality_threshold + + def __call__(self, match_quality_matrix): + """ + Args: + match_quality_matrix (Tensor[float]): an MxN tensor, containing the + pairwise quality between M ground-truth elements and N predicted elements. + + Returns: + matches (Tensor[int64]): an N tensor where N[i] is a matched gt in + [0, M - 1] or a negative value indicating that prediction i could not + be matched. + """ + # match_quality_matrix is M (gt) x N (predicted) + # Max over gt elements (dim 0) to find best gt candidate for each prediction + matched_vals, matches = match_quality_matrix.max(dim=0) + if self.allow_low_quality_matches: + all_matches = matches.clone() + + # Assign candidate matches with low quality to negative (unassigned) values + below_low_threshold = matched_vals < self.low_threshold + between_thresholds = (matched_vals >= self.low_threshold) & ( + matched_vals < self.high_threshold + ) + matches[below_low_threshold] = Matcher.BELOW_LOW_THRESHOLD + matches[between_thresholds] = Matcher.BETWEEN_THRESHOLDS + + if self.allow_low_quality_matches: + self.set_low_quality_matches_(matches, all_matches, match_quality_matrix) + + return matches + + def set_low_quality_matches_(self, matches, all_matches, match_quality_matrix): + """ + Produce additional matches for predictions that have only low-quality matches. + Specifically, for each ground-truth find the set of predictions that have + maximum overlap with it (including ties); for each prediction in that set, if + it is unmatched, then match it to the ground-truth with which it has the highest + quality value. + """ + # For each gt, find the prediction with which it has highest quality + highest_quality_foreach_gt, _ = match_quality_matrix.max(dim=1) + + if self.low_quality_threshold > 0.0: + select = highest_quality_foreach_gt >= self.low_quality_threshold + highest_quality_foreach_gt = highest_quality_foreach_gt[select] + match_quality_matrix = match_quality_matrix[select] + # Find highest quality match available, even if it is low, including ties + gt_pred_pairs_of_highest_quality = torch.nonzero( + match_quality_matrix == highest_quality_foreach_gt[:, None] + ) + # Example gt_pred_pairs_of_highest_quality: + # tensor([[ 0, 39796], + # [ 1, 32055], + # [ 1, 32070], + # [ 2, 39190], + # [ 2, 40255], + # [ 3, 40390], + # [ 3, 41455], + # [ 4, 45470], + # [ 5, 45325], + # [ 5, 46390]]) + # Each row is a (gt index, prediction index) + # Note how gt items 1, 2, 3, and 5 each have two ties + + pred_inds_to_update = gt_pred_pairs_of_highest_quality[:, 1] + matches[pred_inds_to_update] = all_matches[pred_inds_to_update] diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/poolers.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/poolers.py new file mode 100644 index 0000000000..bed784d2a2 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/poolers.py @@ -0,0 +1,130 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +import torch + +from torch import nn +from maskrcnn_benchmark.layers import ROIAlign +from .utils import cat + + +class LevelMapper(object): + """Determine which FPN level each RoI in a set of RoIs should map to based + on the heuristic in the FPN paper. + """ + + def __init__(self, k_min, k_max, canonical_scale=224, canonical_level=4, eps=1e-6): + """ + Arguments: + k_min (int) + k_max (int) + canonical_scale (int) + canonical_level (int) + eps (float) + """ + self.k_min = k_min + self.k_max = k_max + self.s0 = canonical_scale + self.lvl0 = canonical_level + self.eps = eps + + def __call__(self, boxlists): + """ + Arguments: + boxlists (list[BoxList]) + """ + # Compute level ids + s = torch.sqrt(cat([boxlist.area() for boxlist in boxlists])) + + # Eqn.(1) in FPN paper + target_lvls = torch.floor(self.lvl0 + torch.log2(s / self.s0 + self.eps)) + target_lvls = torch.clamp(target_lvls, min=self.k_min, max=self.k_max) + return target_lvls.to(torch.int64) - self.k_min + + +class Pooler(nn.Module): + """ + Pooler for Detection with or without FPN. + It currently hard-code ROIAlign in the implementation, + but that can be made more generic later on. + Also, the requirement of passing the scales is not strictly necessary, as they + can be inferred from the size of the feature map / size of original image, + which is available thanks to the BoxList. + """ + + def __init__(self, output_size, scales, sampling_ratio, canonical_level=4): + """ + Arguments: + output_size (list[tuple[int]] or list[int]): output size for the pooled region + scales (list[float]): scales for each Pooler + sampling_ratio (int): sampling ratio for ROIAlign + """ + super(Pooler, self).__init__() + poolers = [] + for scale in scales: + poolers.append( + ROIAlign( + output_size, spatial_scale=scale, sampling_ratio=sampling_ratio + ) + ) + self.poolers = nn.ModuleList(poolers) + self.output_size = output_size + # get the levels in the feature map by leveraging the fact that the network always + # downsamples by a factor of 2 at each level. + lvl_min = -torch.log2(torch.tensor(scales[0], dtype=torch.float32)).item() + lvl_max = -torch.log2(torch.tensor(scales[-1], dtype=torch.float32)).item() + self.map_levels = LevelMapper( + lvl_min, lvl_max, canonical_level=canonical_level + ) + + def convert_to_roi_format(self, boxes): + concat_boxes = cat([b.bbox for b in boxes], dim=0) + device, dtype = concat_boxes.device, concat_boxes.dtype + ids = cat( + [ + torch.full((len(b), 1), i, dtype=dtype, device=device) + for i, b in enumerate(boxes) + ], + dim=0, + ) + rois = torch.cat([ids, concat_boxes], dim=1) + return rois + + def forward(self, x, boxes): + """ + Arguments: + x (list[Tensor]): feature maps for each level + boxes (list[BoxList]): boxes to be used to perform the pooling operation. + Returns: + result (Tensor) + """ + num_levels = len(self.poolers) + rois = self.convert_to_roi_format(boxes) + + if num_levels == 1: + return self.poolers[0](x[0], rois) + + levels = self.map_levels(boxes) + + num_rois = len(rois) + num_channels = x[0].shape[1] + output_size = self.output_size[0] + + dtype, device = x[0].dtype, x[0].device + result = torch.zeros( + (num_rois, num_channels, output_size, output_size), + dtype=dtype, + device=device, + ) + + for level, (per_level_feature, pooler) in enumerate(zip(x, self.poolers)): + idx_in_level = levels == level + + rois_per_level = rois[idx_in_level] + + num_rois_per_level = len(rois_per_level) + max_len = len(rois) + fix_shape_rois = rois_per_level.new_zeros([max_len, 5]) + fix_shape_rois[:num_rois_per_level] = rois_per_level + fix_shape_res = pooler(per_level_feature, fix_shape_rois) + result[idx_in_level] = fix_shape_res[:num_rois_per_level] + + return result diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/roi_heads/__init__.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/roi_heads/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/roi_heads/box_head/__init__.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/roi_heads/box_head/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/roi_heads/box_head/box_head.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/roi_heads/box_head/box_head.py new file mode 100644 index 0000000000..2a2d77df14 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/roi_heads/box_head/box_head.py @@ -0,0 +1,69 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +import torch + +from .roi_box_feature_extractors import make_roi_box_feature_extractor +from .roi_box_predictors import make_roi_box_predictor +from .inference import make_roi_box_post_processor +from .loss import make_roi_box_loss_evaluator + + +class ROIBoxHead(torch.nn.Module): + """ + Generic Box Head class. + """ + + def __init__(self, cfg): + super(ROIBoxHead, self).__init__() + self.feature_extractor = make_roi_box_feature_extractor(cfg) + self.predictor = make_roi_box_predictor(cfg) + self.post_processor = make_roi_box_post_processor(cfg) + self.loss_evaluator = make_roi_box_loss_evaluator(cfg) + + def forward(self, features, proposals, targets=None): + """ + Arguments: + features (list[Tensor]): feature-maps from possibly several levels + proposals (list[BoxList]): proposal boxes + targets (list[BoxList], optional): the ground-truth targets. + + Returns: + x (Tensor): the result of the feature extractor + proposals (list[BoxList]): during training, the subsampled proposals + are returned. During testing, the predicted boxlists are returned + losses (dict[Tensor]): During training, returns the losses for the + head. During testing, returns an empty dict. + """ + + if self.training: + # Faster R-CNN subsamples during training the proposals with a fixed + # positive / negative ratio + with torch.no_grad(): + proposals = self.loss_evaluator.subsample(proposals, targets) + + # extract features that will be fed to the final classifier. The + # feature_extractor generally corresponds to the pooler + heads + x = self.feature_extractor(features, proposals) + # final classifier that converts the features into predictions + class_logits, box_regression = self.predictor(x) + + if not self.training: + result = self.post_processor((class_logits, box_regression), proposals) + return x, result, {} + + loss_classifier, loss_box_reg = self.loss_evaluator( + [class_logits], [box_regression] + ) + return ( + x, + proposals, + dict(loss_classifier=loss_classifier, loss_box_reg=loss_box_reg), + ) + + +def build_roi_box_head(cfg): + """ + Constructs a new box head. + By default, uses ROIBoxHead, but if it turns out not to be enough, just register a new class + and make it a parameter in the config + """ + return ROIBoxHead(cfg) diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/roi_heads/box_head/inference.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/roi_heads/box_head/inference.py new file mode 100644 index 0000000000..1968925505 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/roi_heads/box_head/inference.py @@ -0,0 +1,152 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +import torch +import torch.nn.functional as F +from torch import nn + +from maskrcnn_benchmark.structures.bounding_box import BoxList +from maskrcnn_benchmark.structures.boxlist_ops import boxlist_nms +from maskrcnn_benchmark.structures.boxlist_ops import cat_boxlist +from maskrcnn_benchmark.modeling.box_coder import BoxCoder + + +class PostProcessor(nn.Module): + """ + From a set of classification scores, box regression and proposals, + computes the post-processed boxes, and applies NMS to obtain the + final results + """ + + def __init__( + self, score_thresh=0.05, nms=0.5, detections_per_img=100, box_coder=None + ): + """ + Arguments: + score_thresh (float) + nms (float) + detections_per_img (int) + box_coder (BoxCoder) + """ + super(PostProcessor, self).__init__() + self.score_thresh = score_thresh + self.nms = nms + self.detections_per_img = detections_per_img + if box_coder is None: + box_coder = BoxCoder(weights=(10., 10., 5., 5.)) + self.box_coder = box_coder + + def forward(self, x, boxes): + """ + Arguments: + x (tuple[tensor, tensor]): x contains the class logits + and the box_regression from the model. + boxes (list[BoxList]): bounding boxes that are used as + reference, one for ech image + + Returns: + results (list[BoxList]): one BoxList for each image, containing + the extra fields labels and scores + """ + class_logits, box_regression = x + class_prob = F.softmax(class_logits, -1) + + # TODO think about a representation of batch of boxes + image_shapes = [box.size for box in boxes] + boxes_per_image = [len(box) for box in boxes] + concat_boxes = torch.cat([a.bbox for a in boxes], dim=0) + + proposals = self.box_coder.decode( + box_regression.view(sum(boxes_per_image), -1), concat_boxes + ) + + num_classes = class_prob.shape[1] + + proposals = proposals.split(boxes_per_image, dim=0) + class_prob = class_prob.split(boxes_per_image, dim=0) + + results = [] + for prob, boxes_per_img, image_shape in zip( + class_prob, proposals, image_shapes + ): + boxlist = self.prepare_boxlist(boxes_per_img, prob, image_shape) + boxlist = boxlist.clip_to_image(remove_empty=False) + boxlist = self.filter_results(boxlist, num_classes) + results.append(boxlist) + return results + + def prepare_boxlist(self, boxes, scores, image_shape): + """ + Returns BoxList from `boxes` and adds probability scores information + as an extra field + `boxes` has shape (#detections, 4 * #classes), where each row represents + a list of predicted bounding boxes for each of the object classes in the + dataset (including the background class). The detections in each row + originate from the same object proposal. + `scores` has shape (#detection, #classes), where each row represents a list + of object detection confidence scores for each of the object classes in the + dataset (including the background class). `scores[i, j]`` corresponds to the + box at `boxes[i, j * 4:(j + 1) * 4]`. + """ + boxes = boxes.reshape(-1, 4) + scores = scores.reshape(-1) + boxlist = BoxList(boxes, image_shape, mode="xyxy") + boxlist.add_field("scores", scores) + return boxlist + + def filter_results(self, boxlist, num_classes): + """Returns bounding-box detection results by thresholding on scores and + applying non-maximum suppression (NMS). + """ + # unwrap the boxlist to avoid additional overhead. + # if we had multi-class NMS, we could perform this directly on the boxlist + boxes = boxlist.bbox.reshape(-1, num_classes * 4) + scores = boxlist.get_field("scores").reshape(-1, num_classes) + + device = scores.device + result = [] + # Apply threshold on detection probabilities and apply NMS + # Skip j = 0, because it's the background class + inds_all = scores > self.score_thresh + for j in range(1, num_classes): + inds = inds_all[:, j].nonzero().squeeze(1) + scores_j = scores[inds, j] + boxes_j = boxes[inds, j * 4 : (j + 1) * 4] + boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy") + boxlist_for_class.add_field("scores", scores_j) + boxlist_for_class = boxlist_nms( + boxlist_for_class, self.nms, score_field="scores" + ) + num_labels = len(boxlist_for_class) + boxlist_for_class.add_field( + "labels", torch.full((num_labels,), j, dtype=torch.int64, device=device) + ) + result.append(boxlist_for_class) + + result = cat_boxlist(result) + number_of_detections = len(result) + + # Limit to max_per_image detections **over all classes** + if number_of_detections > self.detections_per_img > 0: + cls_scores = result.get_field("scores") + image_thresh, _ = torch.kthvalue( + cls_scores.cpu(), number_of_detections - self.detections_per_img + 1 + ) + keep = cls_scores >= image_thresh.item() + keep = torch.nonzero(keep).squeeze(1) + result = result[keep] + return result + + +def make_roi_box_post_processor(cfg): + use_fpn = cfg.MODEL.ROI_HEADS.USE_FPN + + bbox_reg_weights = cfg.MODEL.ROI_HEADS.BBOX_REG_WEIGHTS + box_coder = BoxCoder(weights=bbox_reg_weights) + + score_thresh = cfg.MODEL.ROI_HEADS.SCORE_THRESH + nms_thresh = cfg.MODEL.ROI_HEADS.NMS + detections_per_img = cfg.MODEL.ROI_HEADS.DETECTIONS_PER_IMG + + postprocessor = PostProcessor( + score_thresh, nms_thresh, detections_per_img, box_coder + ) + return postprocessor diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/roi_heads/box_head/loss.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/roi_heads/box_head/loss.py new file mode 100644 index 0000000000..8a25fad36b --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/roi_heads/box_head/loss.py @@ -0,0 +1,177 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +import torch +from torch.nn import functional as F + +from maskrcnn_benchmark.layers import smooth_l1_loss +from maskrcnn_benchmark.modeling.box_coder import BoxCoder +from maskrcnn_benchmark.modeling.matcher import Matcher +from maskrcnn_benchmark.structures.boxlist_ops import boxlist_iou +from maskrcnn_benchmark.modeling.balanced_positive_negative_sampler import ( + BalancedPositiveNegativeSampler +) +from maskrcnn_benchmark.modeling.utils import cat + + +class FastRCNNLossComputation(object): + """ + Computes the loss for Faster R-CNN. + Also supports FPN + """ + + def __init__(self, proposal_matcher, fg_bg_sampler, box_coder): + """ + Arguments: + proposal_matcher (Matcher) + fg_bg_sampler (BalancedPositiveNegativeSampler) + box_coder (BoxCoder) + """ + self.proposal_matcher = proposal_matcher + self.fg_bg_sampler = fg_bg_sampler + self.box_coder = box_coder + + def match_targets_to_proposals(self, proposal, target): + match_quality_matrix = boxlist_iou(target, proposal) + matched_idxs = self.proposal_matcher(match_quality_matrix) + # Fast RCNN only need "labels" field for selecting the targets + target = target.copy_with_fields("labels") + # get the targets corresponding GT for each proposal + # NB: need to clamp the indices because we can have a single + # GT in the image, and matched_idxs can be -2, which goes + # out of bounds + matched_targets = target[matched_idxs.clamp(min=0)] + matched_targets.add_field("matched_idxs", matched_idxs) + return matched_targets + + def prepare_targets(self, proposals, targets): + labels = [] + regression_targets = [] + for proposals_per_image, targets_per_image in zip(proposals, targets): + matched_targets = self.match_targets_to_proposals( + proposals_per_image, targets_per_image + ) + matched_idxs = matched_targets.get_field("matched_idxs") + + labels_per_image = matched_targets.get_field("labels") + labels_per_image = labels_per_image.to(dtype=torch.int64) + + # Label background (below the low threshold) + bg_inds = matched_idxs == Matcher.BELOW_LOW_THRESHOLD + labels_per_image[bg_inds] = 0 + + # Label ignore proposals (between low and high thresholds) + ignore_inds = matched_idxs == Matcher.BETWEEN_THRESHOLDS + labels_per_image[ignore_inds] = -1 # -1 is ignored by sampler + + # compute regression targets + regression_targets_per_image = self.box_coder.encode( + matched_targets.bbox, proposals_per_image.bbox + ) + + labels.append(labels_per_image) + regression_targets.append(regression_targets_per_image) + + return labels, regression_targets + + def subsample(self, proposals, targets): + """ + This method performs the positive/negative sampling, and return + the sampled proposals. + Note: this function keeps a state. + + Arguments: + proposals (list[BoxList]) + targets (list[BoxList]) + """ + + labels, regression_targets = self.prepare_targets(proposals, targets) + sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels) + + proposals = list(proposals) + # add corresponding label and regression_targets information to the bounding boxes + for labels_per_image, regression_targets_per_image, proposals_per_image in zip( + labels, regression_targets, proposals + ): + proposals_per_image.add_field("labels", labels_per_image) + proposals_per_image.add_field( + "regression_targets", regression_targets_per_image + ) + + # distributed sampled proposals, that were obtained on all feature maps + # concatenated via the fg_bg_sampler, into individual feature map levels + for img_idx, (pos_inds_img, neg_inds_img) in enumerate( + zip(sampled_pos_inds, sampled_neg_inds) + ): + img_sampled_inds = torch.nonzero(pos_inds_img | neg_inds_img).squeeze(1) + proposals_per_image = proposals[img_idx][img_sampled_inds] + proposals[img_idx] = proposals_per_image + + self._proposals = proposals + return proposals + + def __call__(self, class_logits, box_regression): + """ + Computes the loss for Faster R-CNN. + This requires that the subsample method has been called beforehand. + + Arguments: + class_logits (list[Tensor]) + box_regression (list[Tensor]) + + Returns: + classification_loss (Tensor) + box_loss (Tensor) + """ + + class_logits = cat(class_logits, dim=0) + box_regression = cat(box_regression, dim=0) + device = class_logits.device + + if not hasattr(self, "_proposals"): + raise RuntimeError("subsample needs to be called before") + + proposals = self._proposals + + labels = cat([proposal.get_field("labels") for proposal in proposals], dim=0) + regression_targets = cat( + [proposal.get_field("regression_targets") for proposal in proposals], dim=0 + ) + + scores = cat([proposal.get_field("objectness") for proposal in proposals], dim=0) + weight = scores.gt(0).float() + classification_loss = (F.cross_entropy(class_logits, labels, reduction='none') * weight) / weight.sum() + + # get indices that correspond to the regression targets for + # the corresponding ground truth labels, to be used with + # advanced indexing + sampled_pos_inds_subset = torch.nonzero(labels > 0).squeeze(1) + labels_pos = labels[sampled_pos_inds_subset] + map_inds = 4 * labels_pos[:, None] + torch.tensor([0, 1, 2, 3], device=device) + + box_loss = smooth_l1_loss( + box_regression[sampled_pos_inds_subset[:, None], map_inds], + regression_targets[sampled_pos_inds_subset], + size_average=False, + beta=1, + ) + box_loss = box_loss / labels.numel() + + return classification_loss, box_loss + + +def make_roi_box_loss_evaluator(cfg): + matcher = Matcher( + cfg.MODEL.ROI_HEADS.FG_IOU_THRESHOLD, + cfg.MODEL.ROI_HEADS.BG_IOU_THRESHOLD, + allow_low_quality_matches=False, + ) + + bbox_reg_weights = cfg.MODEL.ROI_HEADS.BBOX_REG_WEIGHTS + box_coder = BoxCoder(weights=bbox_reg_weights) + + fg_bg_sampler = BalancedPositiveNegativeSampler( + cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE, cfg.MODEL.ROI_HEADS.POSITIVE_FRACTION + ) + + loss_evaluator = FastRCNNLossComputation(matcher, fg_bg_sampler, box_coder) + + return loss_evaluator diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/roi_heads/box_head/roi_box_feature_extractors.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/roi_heads/box_head/roi_box_feature_extractors.py new file mode 100644 index 0000000000..9194eafb3b --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/roi_heads/box_head/roi_box_feature_extractors.py @@ -0,0 +1,88 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +from torch import nn +from torch.nn import functional as F + +from maskrcnn_benchmark.modeling.backbone import resnet +from maskrcnn_benchmark.modeling.poolers import Pooler + + +class ResNet50Conv5ROIFeatureExtractor(nn.Module): + def __init__(self, config): + super(ResNet50Conv5ROIFeatureExtractor, self).__init__() + + resolution = config.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION + scales = config.MODEL.ROI_BOX_HEAD.POOLER_SCALES + sampling_ratio = config.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO + pooler = Pooler( + output_size=(resolution, resolution), + scales=scales, + sampling_ratio=sampling_ratio, + ) + + stage = resnet.StageSpec(index=4, block_count=3, return_features=False) + head = resnet.ResNetHead( + block_module=config.MODEL.RESNETS.TRANS_FUNC, + stages=(stage,), + num_groups=config.MODEL.RESNETS.NUM_GROUPS, + width_per_group=config.MODEL.RESNETS.WIDTH_PER_GROUP, + stride_in_1x1=config.MODEL.RESNETS.STRIDE_IN_1X1, + stride_init=None, + res2_out_channels=config.MODEL.RESNETS.RES2_OUT_CHANNELS, + ) + + self.pooler = pooler + self.head = head + + def forward(self, x, proposals): + x = self.pooler(x, proposals) + x = self.head(x) + return x + + +class FPN2MLPFeatureExtractor(nn.Module): + """ + Heads for FPN for classification + """ + + def __init__(self, cfg): + super(FPN2MLPFeatureExtractor, self).__init__() + + resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION + scales = cfg.MODEL.ROI_BOX_HEAD.POOLER_SCALES + sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO + pooler = Pooler( + output_size=(resolution, resolution), + scales=scales, + sampling_ratio=sampling_ratio, + ) + input_size = cfg.MODEL.BACKBONE.OUT_CHANNELS * resolution ** 2 + representation_size = cfg.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM + self.pooler = pooler + self.fc6 = nn.Linear(input_size, representation_size) + self.fc7 = nn.Linear(representation_size, representation_size) + + for l in [self.fc6, self.fc7]: + # Caffe2 implementation uses XavierFill, which in fact + # corresponds to kaiming_uniform_ in PyTorch + nn.init.kaiming_uniform_(l.weight, a=1) + nn.init.constant_(l.bias, 0) + + def forward(self, x, proposals): + x = self.pooler(x, proposals) + x = x.view(x.size(0), -1) + + x = F.relu(self.fc6(x)) + x = F.relu(self.fc7(x)) + + return x + + +_ROI_BOX_FEATURE_EXTRACTORS = { + "ResNet50Conv5ROIFeatureExtractor": ResNet50Conv5ROIFeatureExtractor, + "FPN2MLPFeatureExtractor": FPN2MLPFeatureExtractor, +} + + +def make_roi_box_feature_extractor(cfg): + func = _ROI_BOX_FEATURE_EXTRACTORS[cfg.MODEL.ROI_BOX_HEAD.FEATURE_EXTRACTOR] + return func(cfg) diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/roi_heads/box_head/roi_box_predictors.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/roi_heads/box_head/roi_box_predictors.py new file mode 100644 index 0000000000..e05fcbb1d7 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/roi_heads/box_head/roi_box_predictors.py @@ -0,0 +1,62 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +from torch import nn + + +class FastRCNNPredictor(nn.Module): + def __init__(self, config, pretrained=None): + super(FastRCNNPredictor, self).__init__() + + stage_index = 4 + stage2_relative_factor = 2 ** (stage_index - 1) + res2_out_channels = config.MODEL.RESNETS.RES2_OUT_CHANNELS + num_inputs = res2_out_channels * stage2_relative_factor + + num_classes = config.MODEL.ROI_BOX_HEAD.NUM_CLASSES + self.avgpool = nn.AvgPool2d(kernel_size=7, stride=7) + self.cls_score = nn.Linear(num_inputs, num_classes) + self.bbox_pred = nn.Linear(num_inputs, num_classes * 4) + + nn.init.normal_(self.cls_score.weight, mean=0, std=0.01) + nn.init.constant_(self.cls_score.bias, 0) + + nn.init.normal_(self.bbox_pred.weight, mean=0, std=0.001) + nn.init.constant_(self.bbox_pred.bias, 0) + + def forward(self, x): + x = self.avgpool(x) + x = x.view(x.size(0), -1) + cls_logit = self.cls_score(x) + bbox_pred = self.bbox_pred(x) + return cls_logit, bbox_pred + + +class FPNPredictor(nn.Module): + def __init__(self, cfg): + super(FPNPredictor, self).__init__() + num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES + representation_size = cfg.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM + + self.cls_score = nn.Linear(representation_size, num_classes) + self.bbox_pred = nn.Linear(representation_size, num_classes * 4) + + nn.init.normal_(self.cls_score.weight, std=0.01) + nn.init.normal_(self.bbox_pred.weight, std=0.001) + for l in [self.cls_score, self.bbox_pred]: + nn.init.constant_(l.bias, 0) + + def forward(self, x): + scores = self.cls_score(x) + bbox_deltas = self.bbox_pred(x) + + return scores, bbox_deltas + + +_ROI_BOX_PREDICTOR = { + "FastRCNNPredictor": FastRCNNPredictor, + "FPNPredictor": FPNPredictor, +} + + +def make_roi_box_predictor(cfg): + func = _ROI_BOX_PREDICTOR[cfg.MODEL.ROI_BOX_HEAD.PREDICTOR] + return func(cfg) diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/roi_heads/mask_head/__init__.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/roi_heads/mask_head/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/roi_heads/mask_head/inference.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/roi_heads/mask_head/inference.py new file mode 100644 index 0000000000..f97f9921ae --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/roi_heads/mask_head/inference.py @@ -0,0 +1,188 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +import numpy as np +import torch +from PIL import Image +from torch import nn + +from maskrcnn_benchmark.structures.bounding_box import BoxList + + +# TODO check if want to return a single BoxList or a composite +class MaskPostProcessor(nn.Module): + """ + From the results of the CNN, post process the masks + by taking the mask corresponding to the class with max + probability (which are of fixed size and directly output + by the CNN) and return the masks in the mask field of the BoxList. + + If a masker object is passed, it will additionally + project the masks in the image according to the locations in boxes, + """ + + def __init__(self, masker=None): + super(MaskPostProcessor, self).__init__() + self.masker = masker + + def forward(self, x, boxes): + """ + Arguments: + x (Tensor): the mask logits + boxes (list[BoxList]): bounding boxes that are used as + reference, one for ech image + + Returns: + results (list[BoxList]): one BoxList for each image, containing + the extra field mask + """ + mask_prob = x.sigmoid() + + # select masks coresponding to the predicted classes + num_masks = x.shape[0] + labels = [bbox.get_field("labels") for bbox in boxes] + labels = torch.cat(labels) + index = torch.arange(num_masks, device=labels.device) + mask_prob = mask_prob[index.long(), labels.long()][:, None] + + if self.masker: + mask_prob = self.masker(mask_prob, boxes) + + boxes_per_image = [len(box) for box in boxes] + mask_prob = mask_prob.split(boxes_per_image, dim=0) + + results = [] + for prob, box in zip(mask_prob, boxes): + bbox = BoxList(box.bbox, box.size, mode="xyxy") + for field in box.fields(): + bbox.add_field(field, box.get_field(field)) + bbox.add_field("mask", prob) + results.append(bbox) + + return results + + +class MaskPostProcessorCOCOFormat(MaskPostProcessor): + """ + From the results of the CNN, post process the results + so that the masks are pasted in the image, and + additionally convert the results to COCO format. + """ + + def forward(self, x, boxes): + import pycocotools.mask as mask_util + import numpy as np + + results = super(MaskPostProcessorCOCOFormat, self).forward(x, boxes) + for result in results: + masks = result.get_field("mask").cpu() + rles = [ + mask_util.encode(np.array(mask[0, :, :, np.newaxis], order="F"))[0] + for mask in masks + ] + for rle in rles: + rle["counts"] = rle["counts"].decode("utf-8") + result.add_field("mask", rles) + return results + + +# the next two functions should be merged inside Masker +# but are kept here for the moment while we need them +# temporarily gor paste_mask_in_image +def expand_boxes(boxes, scale): + w_half = (boxes[:, 2] - boxes[:, 0]) * .5 + h_half = (boxes[:, 3] - boxes[:, 1]) * .5 + x_c = (boxes[:, 2] + boxes[:, 0]) * .5 + y_c = (boxes[:, 3] + boxes[:, 1]) * .5 + + w_half *= scale + h_half *= scale + + boxes_exp = torch.zeros_like(boxes) + boxes_exp[:, 0] = x_c - w_half + boxes_exp[:, 2] = x_c + w_half + boxes_exp[:, 1] = y_c - h_half + boxes_exp[:, 3] = y_c + h_half + return boxes_exp + + +def expand_masks(mask, padding): + N = mask.shape[0] + M = mask.shape[-1] + pad2 = 2 * padding + scale = float(M + pad2) / M + padded_mask = mask.new_zeros((N, 1, M + pad2, M + pad2)) + padded_mask[:, :, padding:-padding, padding:-padding] = mask + return padded_mask, scale + + +def paste_mask_in_image(mask, box, im_h, im_w, thresh=0.5, padding=1): + padded_mask, scale = expand_masks(mask[None], padding=padding) + mask = padded_mask[0, 0] + box = expand_boxes(box[None], scale)[0] + box = box.numpy().astype(np.int32) + + TO_REMOVE = 1 + w = box[2] - box[0] + TO_REMOVE + h = box[3] - box[1] + TO_REMOVE + w = max(w, 1) + h = max(h, 1) + + mask = Image.fromarray(mask.cpu().numpy()) + mask = mask.resize((w, h), resample=Image.BILINEAR) + mask = np.array(mask, copy=False) + + if thresh >= 0: + mask = np.array(mask > thresh, dtype=np.uint8) + mask = torch.from_numpy(mask) + else: + # for visualization and debugging, we also + # allow it to return an unmodified mask + mask = torch.from_numpy(mask * 255).to(torch.uint8) + + im_mask = torch.zeros((im_h, im_w), dtype=torch.uint8) + x_0 = max(box[0], 0) + x_1 = min(box[2] + 1, im_w) + y_0 = max(box[1], 0) + y_1 = min(box[3] + 1, im_h) + + im_mask[y_0:y_1, x_0:x_1] = mask[ + (y_0 - box[1]): (y_1 - box[1]), (x_0 - box[0]): (x_1 - box[0]) + ] + return im_mask + + +class Masker(object): + """ + Projects a set of masks in an image on the locations + specified by the bounding boxes + """ + + def __init__(self, threshold=0.5, padding=1): + self.threshold = threshold + self.padding = padding + + def forward_single_image(self, masks, boxes): + boxes = boxes.convert("xyxy") + im_w, im_h = boxes.size + res = [ + paste_mask_in_image(mask[0], box, im_h, im_w, self.threshold, self.padding) + for mask, box in zip(masks, boxes.bbox) + ] + if len(res) > 0: + res = torch.stack(res, dim=0)[:, None] + else: + res = masks.new_empty((0, 1, masks.shape[-2], masks.shape[-1])) + return res + + def __call__(self, masks, boxes): + # TODO do this properly + if isinstance(boxes, BoxList): + boxes = [boxes] + assert len(boxes) == 1, "Only single image batch supported" + result = self.forward_single_image(masks, boxes[0]) + return result + + +def make_roi_mask_post_processor(cfg): + masker = None + mask_post_processor = MaskPostProcessor(masker) + return mask_post_processor diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/roi_heads/mask_head/loss.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/roi_heads/mask_head/loss.py new file mode 100644 index 0000000000..f84d2edd9c --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/roi_heads/mask_head/loss.py @@ -0,0 +1,144 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +import torch +from torch.nn import functional as F + +from maskrcnn_benchmark.layers import smooth_l1_loss +from maskrcnn_benchmark.modeling.matcher import Matcher +from maskrcnn_benchmark.structures.boxlist_ops import boxlist_iou +from maskrcnn_benchmark.modeling.utils import cat + + +def project_masks_on_boxes(segmentation_masks, proposals, labels_per_img, discretization_size): + """ + Given segmentation masks and the bounding boxes corresponding + to the location of the masks in the image, this function + crops and resizes the masks in the position defined by the + boxes. This prepares the masks for them to be fed to the + loss computation as the targets. + + Arguments: + segmentation_masks: an instance of SegmentationMask + proposals: an instance of BoxList + """ + M = discretization_size + device = proposals.bbox.device + proposals = proposals.convert("xyxy") + assert segmentation_masks.size == proposals.size, "{}, {}".format( + segmentation_masks, proposals + ) + # TODO put the proposals on the CPU, as the representation for the + # masks is not efficient GPU-wise (possibly several small tensors for + # representing a single instance mask) + num_targets = len(labels_per_img) + proposals = proposals.bbox.to(torch.device("cpu")).float() + masks = proposals.new_zeros([num_targets, M, M]) + proposals = proposals.numpy() + labels = labels_per_img.to(torch.device("cpu")) + + for i, (segmentation_mask, proposal, label) in enumerate(zip(segmentation_masks, proposals, labels)): + # crop the masks, resize them to the desired resolution and + # then convert them to the tensor representation, + # instead of the list representation that was used + if label <= 0: + continue + mask = segmentation_mask.crop_and_resize_and_decode(proposal, (M, M)) + masks[i] = mask + masks = masks.to(device) + return masks + + +class MaskRCNNLossComputation(object): + def __init__(self, proposal_matcher, discretization_size, fg_thr, bg_thr): + """ + Arguments: + proposal_matcher (Matcher) + discretization_size (int) + """ + self.proposal_matcher = proposal_matcher + self.discretization_size = discretization_size + self.fg_thr = fg_thr + self.bg_thr = bg_thr + + def match_targets_to_proposals(self, proposal, target): + match_quality_matrix = boxlist_iou(target, proposal) + matched_vals, matches = match_quality_matrix.max(dim=0) + # Mask RCNN needs "labels" and "masks "fields for creating the targets + target = target.copy_with_fields(["labels", "masks"]) + # get the targets corresponding GT for each proposal + # NB: need to clamp the indices because we can have a single + # GT in the image, and matched_idxs can be -2, which goes + # out of bounds + matched_targets = target[matches] + return matched_vals, matched_targets + + def prepare_targets(self, proposals, targets): + labels = [] + masks = [] + for proposals_per_image, targets_per_image in zip(proposals, targets): + matched_vals, matched_targets = self.match_targets_to_proposals( + proposals_per_image, targets_per_image + ) + labels_per_image = matched_targets.get_field("labels") + labels_per_image = labels_per_image.to(dtype=torch.int64) + + # this can probably be removed, but is left here for clarity + # and completeness + neg_inds = matched_vals < self.fg_thr + labels_per_image[neg_inds] = 0 + + # mask scores are only computed on positive samples + segmentation_masks = matched_targets.get_field("masks") + + masks_per_image = project_masks_on_boxes( + segmentation_masks, proposals_per_image, labels_per_image, self.discretization_size + ) + + labels.append(labels_per_image) + masks.append(masks_per_image) + + return labels, masks + + def __call__(self, proposals, mask_logits, targets): + """ + Arguments: + proposals (list[BoxList]) + mask_logits (Tensor) + targets (list[BoxList]) + + Return: + mask_loss (Tensor): scalar tensor containing the loss + """ + labels, mask_targets = self.prepare_targets(proposals, targets) + + labels = cat(labels, dim=0) + mask_targets = cat(mask_targets, dim=0) + + positive_inds = (labels > 0).half() + + # torch.mean (in binary_cross_entropy_with_logits) doesn't + # accept empty tensors, so handle it separately + pos_cnt = positive_inds.sum() + if pos_cnt == 0: + return mask_logits.sum() * 0 + labels_inds = torch.arange(0, labels.size(0)).to(labels.device) + positive_inds = positive_inds.view(-1, 1, 1).expand(-1, 28, 28) + mask_loss = F.binary_cross_entropy_with_logits( + mask_logits[labels_inds, labels], mask_targets, weight=positive_inds, reduction='sum') / ( + pos_cnt * 28 * 28) + + return mask_loss + + +def make_roi_mask_loss_evaluator(cfg): + matcher = Matcher( + cfg.MODEL.ROI_HEADS.FG_IOU_THRESHOLD, + cfg.MODEL.ROI_HEADS.BG_IOU_THRESHOLD, + allow_low_quality_matches=False, + ) + + loss_evaluator = MaskRCNNLossComputation( + matcher, cfg.MODEL.ROI_MASK_HEAD.RESOLUTION, cfg.MODEL.ROI_HEADS.FG_IOU_THRESHOLD, + cfg.MODEL.ROI_HEADS.BG_IOU_THRESHOLD + ) + + return loss_evaluator diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/roi_heads/mask_head/mask_head.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/roi_heads/mask_head/mask_head.py new file mode 100644 index 0000000000..60891958c4 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/roi_heads/mask_head/mask_head.py @@ -0,0 +1,103 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +import math +import torch +from maskrcnn_benchmark.structures.bounding_box import BoxList + +from .roi_mask_feature_extractors import make_roi_mask_feature_extractor +from .roi_mask_predictors import make_roi_mask_predictor +from .inference import make_roi_mask_post_processor +from .loss import make_roi_mask_loss_evaluator + + +def keep_only_positive_boxes(boxes): + """ + Given a set of BoxList containing the `labels` field, + return a set of BoxList for which `labels > 0`. + + Arguments: + boxes (list of BoxList) + """ + + assert isinstance(boxes, (list, tuple)) + assert isinstance(boxes[0], BoxList) + assert boxes[0].has_field("labels") + positive_boxes = [] + positive_inds = [] + for boxes_per_image in boxes: + labels = boxes_per_image.get_field("labels") + inds_mask = labels > 0 + + positive_boxes.append(boxes_per_image) + positive_inds.append(inds_mask) + + return positive_boxes, positive_inds + + +def extra_proposals(proposals): + for proposal in proposals: + cur_count = len(proposal) + boxes = proposal.bbox + labels = proposal.get_field('labels') + + box_count = 180 + if cur_count > box_count: + box_count = int(math.ceil(cur_count / 45)) * 45 + new_boxes = boxes.new_zeros((box_count, 4), dtype=torch.float) + new_labels = boxes.new_full((box_count,), fill_value=-1, dtype=torch.int) + new_boxes[:cur_count] = boxes + new_labels[:cur_count] = labels + + proposal.bbox = new_boxes + proposal.add_field('labels', new_labels) + return proposals + + +class ROIMaskHead(torch.nn.Module): + def __init__(self, cfg): + super(ROIMaskHead, self).__init__() + self.cfg = cfg.clone() + self.feature_extractor = make_roi_mask_feature_extractor(cfg) + self.predictor = make_roi_mask_predictor(cfg) + self.post_processor = make_roi_mask_post_processor(cfg) + self.loss_evaluator = make_roi_mask_loss_evaluator(cfg) + + def forward(self, features, proposals, targets=None): + """ + Arguments: + features (list[Tensor]): feature-maps from possibly several levels + proposals (list[BoxList]): proposal boxes + targets (list[BoxList], optional): the ground-truth targets. + + Returns: + x (Tensor): the result of the feature extractor + proposals (list[BoxList]): during training, the original proposals + are returned. During testing, the predicted boxlists are returned + with the `mask` field set + losses (dict[Tensor]): During training, returns the losses for the + head. During testing, returns an empty dict. + """ + + if self.training: + # during training, only focus on positive boxes + all_proposals = proposals + proposals = extra_proposals(proposals) + + if self.training and self.cfg.MODEL.ROI_MASK_HEAD.SHARE_BOX_FEATURE_EXTRACTOR: + x = features + x = x[torch.cat(positive_inds, dim=0)] + else: + x = self.feature_extractor(features, proposals) + + mask_logits = self.predictor(x) + + if not self.training: + result = self.post_processor(mask_logits, proposals) + return x, result, {} + + loss_mask = self.loss_evaluator(proposals, mask_logits, targets) + + return x, all_proposals, dict(loss_mask=loss_mask) + + +def build_roi_mask_head(cfg): + return ROIMaskHead(cfg) diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/roi_heads/mask_head/roi_mask_feature_extractors.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/roi_heads/mask_head/roi_mask_feature_extractors.py new file mode 100644 index 0000000000..b1f66a8ca1 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/roi_heads/mask_head/roi_mask_feature_extractors.py @@ -0,0 +1,69 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +from torch import nn +from torch.nn import functional as F + +from ..box_head.roi_box_feature_extractors import ResNet50Conv5ROIFeatureExtractor +from maskrcnn_benchmark.modeling.poolers import Pooler +from maskrcnn_benchmark.layers import Conv2d + + +class MaskRCNNFPNFeatureExtractor(nn.Module): + """ + Heads for FPN for classification + """ + + def __init__(self, cfg): + """ + Arguments: + num_classes (int): number of output classes + input_size (int): number of channels of the input once it's flattened + representation_size (int): size of the intermediate representation + """ + super(MaskRCNNFPNFeatureExtractor, self).__init__() + + resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION + scales = cfg.MODEL.ROI_MASK_HEAD.POOLER_SCALES + sampling_ratio = cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO + pooler = Pooler( + output_size=(resolution, resolution), + scales=scales, + sampling_ratio=sampling_ratio, + canonical_level=cfg.MODEL.ROI_MASK_HEAD.CANONICAL_LEVEL, + ) + input_size = cfg.MODEL.BACKBONE.OUT_CHANNELS + self.pooler = pooler + + layers = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS + + next_feature = input_size + self.blocks = [] + for layer_idx, layer_features in enumerate(layers, 1): + layer_name = "mask_fcn{}".format(layer_idx) + module = Conv2d(next_feature, layer_features, 3, stride=1, padding=1) + # Caffe2 implementation uses MSRAFill, which in fact + # corresponds to kaiming_normal_ in PyTorch + nn.init.kaiming_normal_(module.weight, mode="fan_out", nonlinearity="relu") + nn.init.constant_(module.bias, 0) + self.add_module(layer_name, module) + next_feature = layer_features + self.blocks.append(layer_name) + + def forward(self, x, proposals): + + x = self.pooler(x, proposals) + + for layer_name in self.blocks: + x = F.relu(getattr(self, layer_name)(x)) + + return x + + +_ROI_MASK_FEATURE_EXTRACTORS = { + "ResNet50Conv5ROIFeatureExtractor": ResNet50Conv5ROIFeatureExtractor, + "MaskRCNNFPNFeatureExtractor": MaskRCNNFPNFeatureExtractor, +} + + +def make_roi_mask_feature_extractor(cfg): + func = _ROI_MASK_FEATURE_EXTRACTORS[cfg.MODEL.ROI_MASK_HEAD.FEATURE_EXTRACTOR] + return func(cfg) diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/roi_heads/mask_head/roi_mask_predictors.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/roi_heads/mask_head/roi_mask_predictors.py new file mode 100644 index 0000000000..c24962f9f9 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/roi_heads/mask_head/roi_mask_predictors.py @@ -0,0 +1,44 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +from torch import nn +from torch.nn import functional as F + +from maskrcnn_benchmark.layers import Conv2d +from maskrcnn_benchmark.layers import ConvTranspose2d + + +class MaskRCNNC4Predictor(nn.Module): + def __init__(self, cfg): + super(MaskRCNNC4Predictor, self).__init__() + num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES + dim_reduced = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS[-1] + + if cfg.MODEL.ROI_HEADS.USE_FPN: + num_inputs = dim_reduced + else: + stage_index = 4 + stage2_relative_factor = 2 ** (stage_index - 1) + res2_out_channels = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS + num_inputs = res2_out_channels * stage2_relative_factor + + self.conv5_mask = ConvTranspose2d(num_inputs, dim_reduced, 2, 2, 0) + self.mask_fcn_logits = Conv2d(dim_reduced, num_classes, 1, 1, 0) + + for name, param in self.named_parameters(): + if "bias" in name: + nn.init.constant_(param, 0) + elif "weight" in name: + # Caffe2 implementation uses MSRAFill, which in fact + # corresponds to kaiming_normal_ in PyTorch + nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu") + + def forward(self, x): + x = F.relu(self.conv5_mask(x)) + return self.mask_fcn_logits(x) + + +_ROI_MASK_PREDICTOR = {"MaskRCNNC4Predictor": MaskRCNNC4Predictor} + + +def make_roi_mask_predictor(cfg): + func = _ROI_MASK_PREDICTOR[cfg.MODEL.ROI_MASK_HEAD.PREDICTOR] + return func(cfg) diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/roi_heads/roi_heads.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/roi_heads/roi_heads.py new file mode 100644 index 0000000000..a62cedd598 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/roi_heads/roi_heads.py @@ -0,0 +1,54 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +import torch + +from .box_head.box_head import build_roi_box_head +from .mask_head.mask_head import build_roi_mask_head + + +class CombinedROIHeads(torch.nn.ModuleDict): + """ + Combines a set of individual heads (for box prediction or masks) into a single + head. + """ + + def __init__(self, cfg, heads): + super(CombinedROIHeads, self).__init__(heads) + self.cfg = cfg.clone() + if cfg.MODEL.MASK_ON and cfg.MODEL.ROI_MASK_HEAD.SHARE_BOX_FEATURE_EXTRACTOR: + self.mask.feature_extractor = self.box.feature_extractor + + def forward(self, features, proposals, targets=None): + losses = {} + # TODO rename x to roi_box_features, if it doesn't increase memory consumption + x, detections, loss_box = self.box(features, proposals, targets) + losses.update(loss_box) + if self.cfg.MODEL.MASK_ON: + mask_features = features + # optimization: during training, if we share the feature extractor between + # the box and the mask heads, then we can reuse the features already computed + if ( + self.training + and self.cfg.MODEL.ROI_MASK_HEAD.SHARE_BOX_FEATURE_EXTRACTOR + ): + mask_features = x + # During training, self.box() will return the unaltered proposals as "detections" + # this makes the API consistent during training and testing + x, detections, loss_mask = self.mask(mask_features, detections, targets) + losses.update(loss_mask) + return x, detections, losses + + +def build_roi_heads(cfg): + # individually create the heads, that will be combined together + # afterwards + roi_heads = [] + if not cfg.MODEL.RPN_ONLY: + roi_heads.append(("box", build_roi_box_head(cfg))) + if cfg.MODEL.MASK_ON: + roi_heads.append(("mask", build_roi_mask_head(cfg))) + + # combine individual heads in a single module + if roi_heads: + roi_heads = CombinedROIHeads(cfg, roi_heads) + + return roi_heads diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/rpn/__init__.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/rpn/__init__.py new file mode 100644 index 0000000000..b01f30cfdd --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/rpn/__init__.py @@ -0,0 +1,2 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +# from .rpn import build_rpn diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/rpn/anchor_generator.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/rpn/anchor_generator.py new file mode 100644 index 0000000000..ef09c13a60 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/rpn/anchor_generator.py @@ -0,0 +1,238 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. + +import numpy as np +import torch +from torch import nn + +from maskrcnn_benchmark.structures.bounding_box import BoxList + + +class BufferList(nn.Module): + """ + Similar to nn.ParameterList, but for buffers + """ + + def __init__(self, buffers=None): + super(BufferList, self).__init__() + if buffers is not None: + self.extend(buffers) + + def extend(self, buffers): + offset = len(self) + for i, buffer in enumerate(buffers): + self.register_buffer(str(offset + i), buffer) + return self + + def __len__(self): + return len(self._buffers) + + def __iter__(self): + return iter(self._buffers.values()) + + +class AnchorGenerator(nn.Module): + """ + For a set of image sizes and feature maps, computes a set + of anchors + """ + + def __init__( + self, + sizes=(128, 256, 512), + aspect_ratios=(0.5, 1.0, 2.0), + anchor_strides=(8, 16, 32), + straddle_thresh=0, + ): + super(AnchorGenerator, self).__init__() + + if len(anchor_strides) == 1: + anchor_stride = anchor_strides[0] + cell_anchors = [ + generate_anchors(anchor_stride, sizes, aspect_ratios).float() + ] + else: + if len(anchor_strides) != len(sizes): + raise RuntimeError("FPN should have #anchor_strides == #sizes") + + cell_anchors = [ + generate_anchors( + anchor_stride, + size if type(size) is tuple else (size,), + aspect_ratios + ).float() + for anchor_stride, size in zip(anchor_strides, sizes) + ] + self.strides = anchor_strides + self.cell_anchors = BufferList(cell_anchors) + self.straddle_thresh = straddle_thresh + + def num_anchors_per_location(self): + return [len(cell_anchors) for cell_anchors in self.cell_anchors] + + def grid_anchors(self, grid_sizes): + anchors = [] + for size, stride, base_anchors in zip( + grid_sizes, self.strides, self.cell_anchors + ): + grid_height, grid_width = size + device = base_anchors.device + shifts_x = torch.arange( + 0, grid_width * stride, step=stride, dtype=torch.float32, device=device + ) + shifts_y = torch.arange( + 0, grid_height * stride, step=stride, dtype=torch.float32, device=device + ) + shift_y, shift_x = torch.meshgrid(shifts_y, shifts_x) + shift_x = shift_x.reshape(-1) + shift_y = shift_y.reshape(-1) + shifts = torch.stack((shift_x, shift_y, shift_x, shift_y), dim=1) + + anchors.append( + (shifts.view(-1, 1, 4) + base_anchors.view(1, -1, 4)).reshape(-1, 4) + ) + + return anchors + + def add_visibility_to(self, boxlist): + image_width, image_height = boxlist.size + anchors = boxlist.bbox + if self.straddle_thresh >= 0: + inds_inside = ( + (anchors[..., 0] >= -self.straddle_thresh) + & (anchors[..., 1] >= -self.straddle_thresh) + & (anchors[..., 2] < image_width + self.straddle_thresh) + & (anchors[..., 3] < image_height + self.straddle_thresh) + ) + else: + device = anchors.device + inds_inside = torch.ones(anchors.shape[0], dtype=torch.uint8, device=device) + boxlist.add_field("visibility", inds_inside) + + def forward(self, image_list, feature_maps): + grid_height, grid_width = feature_maps[0].shape[-2:] + grid_sizes = [feature_map.shape[-2:] for feature_map in feature_maps] + anchors_over_all_feature_maps = self.grid_anchors(grid_sizes) + anchors = [] + for i, (image_height, image_width) in enumerate(image_list.image_sizes): + anchors_in_image = [] + for anchors_per_feature_map in anchors_over_all_feature_maps: + boxlist = BoxList( + anchors_per_feature_map, (image_width, image_height), mode="xyxy" + ) + self.add_visibility_to(boxlist) + anchors_in_image.append(boxlist) + anchors.append(anchors_in_image) + return anchors + + +def make_anchor_generator(config): + anchor_sizes = config.MODEL.RPN.ANCHOR_SIZES + aspect_ratios = config.MODEL.RPN.ASPECT_RATIOS + anchor_stride = config.MODEL.RPN.ANCHOR_STRIDE + straddle_thresh = config.MODEL.RPN.STRADDLE_THRESH + + if config.MODEL.RPN.USE_FPN: + assert len(anchor_stride) == len( + anchor_sizes + ), "FPN should have len(ANCHOR_STRIDE) == len(ANCHOR_SIZES)" + else: + assert len(anchor_stride) == 1, "Non-FPN should have a single ANCHOR_STRIDE" + anchor_generator = AnchorGenerator( + anchor_sizes, aspect_ratios, anchor_stride, straddle_thresh + ) + return anchor_generator + + +def make_anchor_generator_retinanet(config): + anchor_sizes = config.RETINANET.ANCHOR_SIZES + aspect_ratios = config.RETINANET.ASPECT_RATIOS + anchor_strides = config.RETINANET.ANCHOR_STRIDES + straddle_thresh = config.RETINANET.STRADDLE_THRESH + octave = config.RETINANET.OCTAVE + scales_per_octave = config.RETINANET.SCALES_PER_OCTAVE + + assert len(anchor_strides) == len(anchor_sizes), "Only support FPN now" + new_anchor_sizes = [] + for size in anchor_sizes: + per_layer_anchor_sizes = [] + for scale_per_octave in range(scales_per_octave): + octave_scale = octave ** (scale_per_octave / float(scales_per_octave)) + per_layer_anchor_sizes.append(octave_scale * size) + new_anchor_sizes.append(tuple(per_layer_anchor_sizes)) + + anchor_generator = AnchorGenerator( + tuple(new_anchor_sizes), aspect_ratios, anchor_strides, straddle_thresh + ) + return anchor_generator + + +def generate_anchors( + stride=16, sizes=(32, 64, 128, 256, 512), aspect_ratios=(0.5, 1, 2) +): + """Generates a matrix of anchor boxes in (x1, y1, x2, y2) format. Anchors + are centered on stride / 2, have (approximate) sqrt areas of the specified + sizes, and aspect ratios as given. + """ + return _generate_anchors( + stride, + np.array(sizes, dtype=np.float) / stride, + np.array(aspect_ratios, dtype=np.float), + ) + + +def _generate_anchors(base_size, scales, aspect_ratios): + """Generate anchor (reference) windows by enumerating aspect ratios X + scales wrt a reference (0, 0, base_size - 1, base_size - 1) window. + """ + anchor = np.array([1, 1, base_size, base_size], dtype=np.float) - 1 + anchors = _ratio_enum(anchor, aspect_ratios) + anchors = np.vstack( + [_scale_enum(anchors[i, :], scales) for i in range(anchors.shape[0])] + ) + return torch.from_numpy(anchors) + + +def _whctrs(anchor): + """Return width, height, x center, and y center for an anchor (window).""" + w = anchor[2] - anchor[0] + 1 + h = anchor[3] - anchor[1] + 1 + x_ctr = anchor[0] + 0.5 * (w - 1) + y_ctr = anchor[1] + 0.5 * (h - 1) + return w, h, x_ctr, y_ctr + + +def _mkanchors(ws, hs, x_ctr, y_ctr): + """Given a vector of widths (ws) and heights (hs) around a center + (x_ctr, y_ctr), output a set of anchors (windows). + """ + ws = ws[:, np.newaxis] + hs = hs[:, np.newaxis] + anchors = np.hstack( + ( + x_ctr - 0.5 * (ws - 1), + y_ctr - 0.5 * (hs - 1), + x_ctr + 0.5 * (ws - 1), + y_ctr + 0.5 * (hs - 1), + ) + ) + return anchors + + +def _ratio_enum(anchor, ratios): + """Enumerate a set of anchors for each aspect ratio wrt an anchor.""" + w, h, x_ctr, y_ctr = _whctrs(anchor) + size = w * h + size_ratios = size / ratios + ws = np.round(np.sqrt(size_ratios)) + hs = np.round(ws * ratios) + anchors = _mkanchors(ws, hs, x_ctr, y_ctr) + return anchors + + +def _scale_enum(anchor, scales): + """Enumerate a set of anchors for each scale wrt an anchor.""" + w, h, x_ctr, y_ctr = _whctrs(anchor) + ws = w * scales + hs = h * scales + anchors = _mkanchors(ws, hs, x_ctr, y_ctr) + return anchors diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/rpn/inference.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/rpn/inference.py new file mode 100644 index 0000000000..0c3e9f6432 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/rpn/inference.py @@ -0,0 +1,200 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +import torch + +from maskrcnn_benchmark.modeling.box_coder import BoxCoder +from maskrcnn_benchmark.structures.bounding_box import BoxList +from maskrcnn_benchmark.structures.boxlist_ops import cat_boxlist +from maskrcnn_benchmark.structures.boxlist_ops import boxlist_nms +from maskrcnn_benchmark.structures.boxlist_ops import remove_small_boxes + + +class RPNPostProcessor(torch.nn.Module): + """ + Performs post-processing on the outputs of the RPN boxes, before feeding the + proposals to the heads + """ + + def __init__( + self, + pre_nms_top_n, + post_nms_top_n, + nms_thresh, + min_size, + box_coder=None, + fpn_post_nms_top_n=None, + ): + """ + Arguments: + pre_nms_top_n (int) + post_nms_top_n (int) + nms_thresh (float) + min_size (int) + box_coder (BoxCoder) + fpn_post_nms_top_n (int) + """ + super(RPNPostProcessor, self).__init__() + self.pre_nms_top_n = pre_nms_top_n + self.post_nms_top_n = post_nms_top_n + self.nms_thresh = nms_thresh + self.min_size = min_size + + if box_coder is None: + box_coder = BoxCoder(weights=(1.0, 1.0, 1.0, 1.0)) + self.box_coder = box_coder + + if fpn_post_nms_top_n is None: + fpn_post_nms_top_n = post_nms_top_n + self.fpn_post_nms_top_n = fpn_post_nms_top_n + + def add_gt_proposals(self, proposals, targets): + """ + Arguments: + proposals: list[BoxList] + targets: list[BoxList] + """ + # Get the device we're operating on + device = proposals[0].bbox.device + + gt_boxes = [target.copy_with_fields([]) for target in targets] + + # later cat of bbox requires all fields to be present for all bbox + # so we need to add a dummy for objectness that's missing + for gt_box in gt_boxes: + gt_box.add_field("objectness", torch.ones(len(gt_box), device=device)) + + proposals = [ + cat_boxlist((proposal, gt_box)) + for proposal, gt_box in zip(proposals, gt_boxes) + ] + + return proposals + + def forward_for_single_feature_map(self, anchors, objectness, box_regression): + """ + Arguments: + anchors: list[BoxList] + objectness: tensor of size N, A, H, W + box_regression: tensor of size N, A * 4, H, W + """ + device = objectness.device + N, A, H, W = objectness.shape + + # put in the same format as anchors + objectness = objectness.permute(0, 2, 3, 1).reshape(N, -1) + objectness = objectness.sigmoid() + box_regression = box_regression.view(N, -1, 4, H, W).permute(0, 3, 4, 1, 2) + box_regression = box_regression.reshape(N, -1, 4) + + num_anchors = A * H * W + + pre_nms_top_n = min(self.pre_nms_top_n, num_anchors) + objectness, topk_idx = objectness.topk(pre_nms_top_n, dim=1, sorted=True) + + batch_idx = torch.arange(N, device=device)[:, None] + box_regression = box_regression[batch_idx, topk_idx] + + image_shapes = [box.size for box in anchors] + concat_anchors = torch.cat([a.bbox for a in anchors], dim=0) + concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx, topk_idx] + + proposals = self.box_coder.decode( + box_regression.view(-1, 4), concat_anchors.view(-1, 4) + ) + + proposals = proposals.view(N, -1, 4) + + result = [] + for proposal, score, im_shape in zip(proposals, objectness, image_shapes): + boxlist = BoxList(proposal, im_shape, mode="xyxy") + boxlist.add_field("objectness", score) + boxlist = boxlist.clip_to_image(remove_empty=False) + boxlist = remove_small_boxes(boxlist, self.min_size) + boxlist = boxlist_nms( + boxlist, + self.nms_thresh, + max_proposals=self.post_nms_top_n, + score_field="objectness", + ) + result.append(boxlist) + return result + + def forward(self, anchors, objectness, box_regression, targets=None): + """ + Arguments: + anchors: list[list[BoxList]] + objectness: list[tensor] + box_regression: list[tensor] + + Returns: + boxlists (list[BoxList]): the post-processed anchors, after + applying box decoding and NMS + """ + sampled_boxes = [] + num_levels = len(objectness) + anchors = list(zip(*anchors)) + for a, o, b in zip(anchors, objectness, box_regression): + sampled_boxes.append(self.forward_for_single_feature_map(a, o, b)) + + boxlists = list(zip(*sampled_boxes)) + boxlists = [cat_boxlist(boxlist) for boxlist in boxlists] + + if num_levels > 1: + boxlists = self.select_over_all_levels(boxlists) + + # append ground-truth bboxes to proposals + if self.training and targets is not None: + boxlists = self.add_gt_proposals(boxlists, targets) + + return boxlists + + def select_over_all_levels(self, boxlists): + num_images = len(boxlists) + # different behavior during training and during testing: + # during training, post_nms_top_n is over *all* the proposals combined, while + # during testing, it is over the proposals for each image + # TODO resolve this difference and make it consistent. It should be per image, + # and not per batch + if self.training: + objectness = torch.cat( + [boxlist.get_field("objectness") for boxlist in boxlists], dim=0 + ) + box_sizes = [len(boxlist) for boxlist in boxlists] + post_nms_top_n = min(self.fpn_post_nms_top_n, len(objectness)) + _, inds_sorted = torch.topk(objectness, post_nms_top_n, dim=0, sorted=True) + inds_mask = torch.zeros_like(objectness, dtype=torch.uint8) + inds_mask[inds_sorted] = 1 + inds_mask = inds_mask.split(box_sizes) + for i in range(num_images): + boxlists[i] = boxlists[i][inds_mask[i]] + else: + for i in range(num_images): + objectness = boxlists[i].get_field("objectness") + post_nms_top_n = min(self.fpn_post_nms_top_n, len(objectness)) + _, inds_sorted = torch.topk( + objectness, post_nms_top_n, dim=0, sorted=True + ) + boxlists[i] = boxlists[i][inds_sorted] + return boxlists + + +def make_rpn_postprocessor(config, rpn_box_coder, is_train): + fpn_post_nms_top_n = config.MODEL.RPN.FPN_POST_NMS_TOP_N_TRAIN + if not is_train: + fpn_post_nms_top_n = config.MODEL.RPN.FPN_POST_NMS_TOP_N_TEST + + pre_nms_top_n = config.MODEL.RPN.PRE_NMS_TOP_N_TRAIN + post_nms_top_n = config.MODEL.RPN.POST_NMS_TOP_N_TRAIN + if not is_train: + pre_nms_top_n = config.MODEL.RPN.PRE_NMS_TOP_N_TEST + post_nms_top_n = config.MODEL.RPN.POST_NMS_TOP_N_TEST + nms_thresh = config.MODEL.RPN.NMS_THRESH + min_size = config.MODEL.RPN.MIN_SIZE + box_selector = RPNPostProcessor( + pre_nms_top_n=pre_nms_top_n, + post_nms_top_n=post_nms_top_n, + nms_thresh=nms_thresh, + min_size=min_size, + box_coder=rpn_box_coder, + fpn_post_nms_top_n=fpn_post_nms_top_n, + ) + return box_selector diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/rpn/loss.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/rpn/loss.py new file mode 100644 index 0000000000..fc16700ebd --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/rpn/loss.py @@ -0,0 +1,151 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +""" +This file contains specific functions for computing losses on the RPN +file +""" + +import torch +from torch.nn import functional as F + +from ..balanced_positive_negative_sampler import BalancedPositiveNegativeSampler +from ..utils import cat + +from maskrcnn_benchmark.layers import smooth_l1_loss +from maskrcnn_benchmark.modeling.matcher import Matcher +from maskrcnn_benchmark.structures.boxlist_ops import boxlist_iou +from maskrcnn_benchmark.structures.boxlist_ops import cat_boxlist + + +class RPNLossComputation(object): + """ + This class computes the RPN loss. + """ + + def __init__(self, proposal_matcher, fg_bg_sampler, box_coder): + """ + Arguments: + proposal_matcher (Matcher) + fg_bg_sampler (BalancedPositiveNegativeSampler) + box_coder (BoxCoder) + """ + # self.target_preparator = target_preparator + self.proposal_matcher = proposal_matcher + self.fg_bg_sampler = fg_bg_sampler + self.box_coder = box_coder + + def match_targets_to_anchors(self, anchor, target): + match_quality_matrix = boxlist_iou(target, anchor) + matched_idxs = self.proposal_matcher(match_quality_matrix) + # RPN doesn't need any fields from target + # for creating the labels, so clear them all + target = target.copy_with_fields([]) + # get the targets corresponding GT for each anchor + # NB: need to clamp the indices because we can have a single + # GT in the image, and matched_idxs can be -2, which goes + # out of bounds + matched_targets = target[matched_idxs.clamp(min=0)] + matched_targets.add_field("matched_idxs", matched_idxs) + return matched_targets + + def prepare_targets(self, anchors, targets): + labels = [] + regression_targets = [] + for anchors_per_image, targets_per_image in zip(anchors, targets): + matched_targets = self.match_targets_to_anchors( + anchors_per_image, targets_per_image + ) + + matched_idxs = matched_targets.get_field("matched_idxs") + labels_per_image = matched_idxs >= 0 + labels_per_image = labels_per_image.to(dtype=torch.float16) + # discard anchors that go out of the boundaries of the image + labels_per_image[~anchors_per_image.get_field("visibility")] = -1 + + # discard indices that are between thresholds + inds_to_discard = matched_idxs == Matcher.BETWEEN_THRESHOLDS + labels_per_image[inds_to_discard] = -1 + + # compute regression targets + regression_targets_per_image = self.box_coder.encode( + matched_targets.bbox, anchors_per_image.bbox + ) + + labels.append(labels_per_image) + regression_targets.append(regression_targets_per_image) + + return labels, regression_targets + + def __call__(self, anchors, objectness, box_regression, targets): + """ + Arguments: + anchors (list[BoxList]) + objectness (list[Tensor]) + box_regression (list[Tensor]) + targets (list[BoxList]) + + Returns: + objectness_loss (Tensor) + box_loss (Tensor + """ + anchors = [cat_boxlist(anchors_per_image) for anchors_per_image in anchors] + labels, regression_targets = self.prepare_targets(anchors, targets) + sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels) + sampled_pos_inds = torch.nonzero(torch.cat(sampled_pos_inds, dim=0)).squeeze(1) + sampled_neg_inds = torch.nonzero(torch.cat(sampled_neg_inds, dim=0)).squeeze(1) + + sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds], dim=0) + + objectness_flattened = [] + box_regression_flattened = [] + # for each feature level, permute the outputs to make them be in the + # same format as the labels. Note that the labels are computed for + # all feature levels concatenated, so we keep the same representation + # for the objectness and the box_regression + for objectness_per_level, box_regression_per_level in zip( + objectness, box_regression + ): + N, A, H, W = objectness_per_level.shape + objectness_per_level = objectness_per_level.permute(0, 2, 3, 1).reshape( + N, -1 + ) + box_regression_per_level = box_regression_per_level.view(N, -1, 4, H, W) + box_regression_per_level = box_regression_per_level.permute(0, 3, 4, 1, 2) + box_regression_per_level = box_regression_per_level.reshape(N, -1, 4) + objectness_flattened.append(objectness_per_level) + box_regression_flattened.append(box_regression_per_level) + # concatenate on the first dimension (representing the feature levels), to + # take into account the way the labels were generated (with all feature maps + # being concatenated as well) + objectness = cat(objectness_flattened, dim=1).reshape(-1) + box_regression = cat(box_regression_flattened, dim=1).reshape(-1, 4) + + labels = torch.cat(labels, dim=0) + regression_targets = torch.cat(regression_targets, dim=0) + + box_loss = smooth_l1_loss( + box_regression[sampled_pos_inds], + regression_targets[sampled_pos_inds], + beta=1.0 / 9, + size_average=False, + ) / (sampled_inds.numel()) + + objectness_loss = F.binary_cross_entropy_with_logits( + objectness[sampled_inds], labels[sampled_inds] + ) + + return objectness_loss, box_loss + + +def make_rpn_loss_evaluator(cfg, box_coder): + matcher = Matcher( + cfg.MODEL.RPN.FG_IOU_THRESHOLD, + cfg.MODEL.RPN.BG_IOU_THRESHOLD, + allow_low_quality_matches=True, + ) + + fg_bg_sampler = BalancedPositiveNegativeSampler( + cfg.MODEL.RPN.BATCH_SIZE_PER_IMAGE, cfg.MODEL.RPN.POSITIVE_FRACTION + ) + + loss_evaluator = RPNLossComputation(matcher, fg_bg_sampler, box_coder) + return loss_evaluator diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/rpn/retinanet.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/rpn/retinanet.py new file mode 100644 index 0000000000..b5c17df351 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/rpn/retinanet.py @@ -0,0 +1,213 @@ +import numpy as np +import torch +from torch import nn + +from maskrcnn_benchmark.modeling.box_coder import BoxCoder +from .retinanet_loss import make_retinanet_loss_evaluator +from .anchor_generator import make_anchor_generator_retinanet +from .retinanet_infer import make_retinanet_postprocessor +from .retinanet_detail_infer import make_retinanet_detail_postprocessor + + +class RetinaNetHead(torch.nn.Module): + """ + Adds a RetinNet head with classification and regression heads + """ + + def __init__(self, cfg): + """ + Arguments: + in_channels (int): number of channels of the input feature + num_anchors (int): number of anchors to be predicted + """ + super(RetinaNetHead, self).__init__() + # TODO: Implement the sigmoid version first. + num_classes = cfg.RETINANET.NUM_CLASSES - 1 + in_channels = cfg.MODEL.BACKBONE.OUT_CHANNELS + num_anchors = len(cfg.RETINANET.ASPECT_RATIOS) \ + * cfg.RETINANET.SCALES_PER_OCTAVE + + cls_tower = [] + bbox_tower = [] + for i in range(cfg.RETINANET.NUM_CONVS): + cls_tower.append( + nn.Conv2d( + in_channels, + in_channels, + kernel_size=3, + stride=1, + padding=1 + ) + ) + if cfg.MODEL.USE_GN: + cls_tower.append(nn.GroupNorm(32, in_channels)) + + cls_tower.append(nn.ReLU()) + bbox_tower.append( + nn.Conv2d( + in_channels, + in_channels, + kernel_size=3, + stride=1, + padding=1 + ) + ) + if cfg.MODEL.USE_GN: + bbox_tower.append(nn.GroupNorm(32, in_channels)) + + bbox_tower.append(nn.ReLU()) + + self.add_module('cls_tower', nn.Sequential(*cls_tower)) + self.add_module('bbox_tower', nn.Sequential(*bbox_tower)) + self.cls_logits = nn.Conv2d( + in_channels, num_anchors * num_classes, kernel_size=3, stride=1, + padding=1 + ) + self.bbox_pred = nn.Conv2d( + in_channels, num_anchors * 4, kernel_size=3, stride=1, + padding=1 + ) + + # Initialization + for modules in [self.cls_tower, self.bbox_tower, self.cls_logits, + self.bbox_pred]: + for l in modules.modules(): + if isinstance(l, nn.Conv2d): + torch.nn.init.normal_(l.weight, std=0.01) + torch.nn.init.constant_(l.bias, 0) + if isinstance(l, nn.GroupNorm): + torch.nn.init.constant_(l.weight, 1.0) + torch.nn.init.constant_(l.bias, 0) + + # retinanet_bias_init + prior_prob = cfg.RETINANET.PRIOR_PROB + bias_value = -np.log((1 - prior_prob) / prior_prob) + torch.nn.init.constant_(self.cls_logits.bias, bias_value) + + def forward(self, x): + logits = [] + bbox_reg = [] + for feature in x: + logits.append(self.cls_logits(self.cls_tower(feature))) + bbox_reg.append(self.bbox_pred(self.bbox_tower(feature))) + return logits, bbox_reg + + +class RetinaNetModule(torch.nn.Module): + """ + Module for RetinaNet computation. Takes feature maps from the backbone and RPN + proposals and losses. + """ + + def __init__(self, cfg): + super(RetinaNetModule, self).__init__() + + self.cfg = cfg.clone() + + anchor_generator = make_anchor_generator_retinanet(cfg) + head = RetinaNetHead(cfg) + box_coder = BoxCoder(weights=(10., 10., 5., 5.)) + + if self.cfg.MODEL.SPARSE_MASK_ON: + box_selector_test = make_retinanet_detail_postprocessor( + cfg, 100, box_coder) + else: + box_selector_test = make_retinanet_postprocessor( + cfg, 100, box_coder) + box_selector_train = None + if self.cfg.MODEL.MASK_ON or self.cfg.MODEL.SPARSE_MASK_ON: + box_selector_train = make_retinanet_postprocessor( + cfg, 100, box_coder) + + loss_evaluator = make_retinanet_loss_evaluator(cfg, box_coder) + + self.anchor_generator = anchor_generator + self.head = head + self.box_selector_test = box_selector_test + self.box_selector_train = box_selector_train + self.loss_evaluator = loss_evaluator + + def forward(self, images, features, targets=None): + """ + Arguments: + images (ImageList): images for which we want to compute the predictions + features (list[Tensor]): features computed from the images that are + used for computing the predictions. Each tensor in the list + correspond to different feature levels + targets (list[BoxList): ground-truth boxes present in the image (optional) + + Returns: + boxes (list[BoxList]): the predicted boxes from the RPN, one BoxList per + image. + losses (dict[Tensor]): the losses for the model during training. During + testing, it is an empty dict. + """ + box_cls, box_regression = self.head(features) + + anchors = self.anchor_generator(images, features) + + if self.training: + return self._forward_train(anchors, box_cls, box_regression, targets) + else: + return self._forward_test(anchors, box_cls, box_regression) + + def permute_and_concat(self, list_tensor, channels): + b, c = list_tensor[0].shape[:2] + list_permute = [lt.permute(0, 2, 3, 1).reshape(b, -1, channels) for lt in list_tensor] + return torch.cat(list_permute, 1) + + def _forward_train(self, anchors, box_cls, box_regression, targets): + + N = int(box_cls[0].size(0)) + A = int(box_regression[0].size(1) / 4) + C = int(box_cls[0].size(1) / A) + anchors_size = [anchor_list[0].size for anchor_list in anchors] + anchors_bbox = [[anchor.bbox for anchor in anchor_list] for anchor_list in anchors] + anchors_per_img = [torch.cat(anchor_list, 0) for anchor_list in anchors_bbox] + + box_cls = self.permute_and_concat(box_cls, C) + box_regression = self.permute_and_concat(box_regression, 4) + + loss_box_cls, loss_box_reg = self.loss_evaluator( + anchors_per_img, box_cls, box_regression, targets, C + ) + losses = { + "loss_retina_cls": loss_box_cls, + "loss_retina_reg": loss_box_reg, + } + detections = None + if self.cfg.MODEL.MASK_ON or self.cfg.MODEL.SPARSE_MASK_ON: + with torch.no_grad(): + detections = self.box_selector_train( + anchors_per_img, box_cls, box_regression, anchors_size, N, C + ) + + return (anchors, detections), losses + + def _forward_test(self, anchors, box_cls, box_regression): + N = int(box_cls[0].size(0)) + A = int(box_regression[0].size(1) / 4) + C = int(box_cls[0].size(1) / A) + anchors_size = [anchor_list[0].size for anchor_list in anchors] + anchors_bbox = [[anchor.bbox for anchor in anchor_list] for anchor_list in anchors] + anchors_per_img = [torch.cat(anchor_list, 0) for anchor_list in anchors_bbox] + + box_cls = self.permute_and_concat(box_cls, C) + box_regression = self.permute_and_concat(box_regression, 4) + boxes = self.box_selector_test(anchors_per_img, box_cls, box_regression, anchors_size, N, C) + ''' + if self.cfg.MODEL.RPN_ONLY: + # For end-to-end models, the RPN proposals are an intermediate state + # and don't bother to sort them in decreasing score order. For RPN-only + # models, the proposals are the final output and we return them in + # high-to-low confidence order. + inds = [ + box.get_field("objectness").sort(descending=True)[1] for box in boxes + ] + boxes = [box[ind] for box, ind in zip(boxes, inds)] + ''' + return (anchors, boxes), {} + + +def build_retinanet(cfg): + return RetinaNetModule(cfg) diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/rpn/retinanet_detail_infer.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/rpn/retinanet_detail_infer.py new file mode 100644 index 0000000000..63ce272f28 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/rpn/retinanet_detail_infer.py @@ -0,0 +1,212 @@ +import torch + +from maskrcnn_benchmark.modeling.box_coder import BoxCoder +from maskrcnn_benchmark.structures.bounding_box import BoxList +from maskrcnn_benchmark.structures.boxlist_ops import cat_boxlist +from maskrcnn_benchmark.structures.boxlist_ops import boxlist_nms +from maskrcnn_benchmark.structures.boxlist_ops import remove_small_boxes + + +class RetinaNetDetailPostProcessor(torch.nn.Module): + """ + Performs post-processing on the outputs of the RetinaNet boxes. + This is only used in the testing. + """ + + def __init__( + self, + pre_nms_thresh, + pre_nms_top_n, + nms_thresh, + fpn_post_nms_top_n, + min_size, + box_coder=None, + ): + """ + Arguments: + pre_nms_thresh (float) + pre_nms_top_n (int) + nms_thresh (float) + fpn_post_nms_top_n (int) + min_size (int) + box_coder (BoxCoder) + """ + super(RetinaNetDetailPostProcessor, self).__init__() + self.pre_nms_thresh = pre_nms_thresh + self.pre_nms_top_n = pre_nms_top_n + self.nms_thresh = nms_thresh + self.fpn_post_nms_top_n = fpn_post_nms_top_n + self.min_size = min_size + + if box_coder is None: + box_coder = BoxCoder(weights=(10., 10., 5., 5.)) + self.box_coder = box_coder + + def forward_for_single_feature_map(self, anchors, box_cls, box_regression): + """ + Arguments: + anchors: list[BoxList] + box_cls: tensor of size N, A * C, H, W + box_regression: tensor of size N, A * 4, H, W + """ + device = box_cls.device + N, _, H, W = box_cls.shape + A = int(box_regression.size(1) / 4) + C = int(box_cls.size(1) / A) + + # put in the same format as anchors + box_cls = box_cls.view(N, -1, C, H, W).permute(0, 3, 4, 1, 2) + box_cls = box_cls.reshape(N, -1, C) + box_cls = box_cls.sigmoid() + + box_regression = box_regression.view(N, -1, 4, H, W) + box_regression = box_regression.permute(0, 3, 4, 1, 2) + box_regression = box_regression.reshape(N, -1, 4) + + num_anchors = A * H * W + + results = [[] for _ in range(N)] + pre_nms_thresh = self.pre_nms_thresh + candidate_inds = box_cls > self.pre_nms_thresh + if candidate_inds.sum().item() == 0: + return results + + pre_nms_top_n = candidate_inds.view(N, -1).sum(1) + pre_nms_top_n = pre_nms_top_n.clamp(max=self.pre_nms_top_n) + + for batch_idx, (per_box_cls, per_box_regression, per_pre_nms_top_n, \ + per_candidate_inds, per_anchors) in enumerate(zip( + box_cls, + box_regression, + pre_nms_top_n, + candidate_inds, + anchors)): + + # Sort and select TopN + per_box_cls = per_box_cls[per_candidate_inds] + per_candidate_nonzeros = per_candidate_inds.nonzero() + per_box_loc = per_candidate_nonzeros[:, 0] + per_class = per_candidate_nonzeros[:, 1] + per_class += 1 + if per_candidate_inds.sum().item() > per_pre_nms_top_n.item(): + per_box_cls, top_k_indices = \ + per_box_cls.topk(per_pre_nms_top_n, sorted=False) + per_box_loc = per_box_loc[top_k_indices] + per_class = per_class[top_k_indices] + + detections = self.box_coder.decode( + per_box_regression[per_box_loc, :].view(-1, 4), + per_anchors.bbox[per_box_loc, :].view(-1, 4) + ) + + boxlist = BoxList(detections, per_anchors.size, mode="xyxy") + boxlist.add_field("labels", per_class) + boxlist.add_field("scores", per_box_cls) + boxlist.add_field("sparse_off", per_box_loc / 9) + boxlist.add_field("sparse_anchor_idx", per_box_loc % 9) + boxlist.add_field("sparse_anchors", + per_anchors.bbox[per_box_loc, :].view(-1, 4)) + boxlist.add_field("sparse_batch", + per_box_loc.clone().fill_(batch_idx)) + + boxlist = boxlist.clip_to_image(remove_empty=False) + boxlist = remove_small_boxes(boxlist, self.min_size) + results[batch_idx] = boxlist + + return results + + def forward(self, anchors, box_cls, box_regression, targets=None): + """ + Arguments: + anchors: list[list[BoxList]] + box_cls: list[tensor] + box_regression: list[tensor] + + Returns: + boxlists (list[BoxList]): the post-processed anchors, after + applying box decoding and NMS + """ + sampled_boxes = [] + num_levels = len(box_cls) + anchors = list(zip(*anchors)) + for a, o, b in zip(anchors, box_cls, box_regression): + sampled_boxes.append(self.forward_for_single_feature_map(a, o, b)) + + for layer in range(len(sampled_boxes)): + for sampled_boxes_per_image in sampled_boxes[layer]: + sampled_boxes_per_image.add_field( + 'sparse_layers', + sampled_boxes_per_image.get_field('labels').clone().fill_(layer) + ) + + boxlists = list(zip(*sampled_boxes)) + boxlists = [cat_boxlist(boxlist) for boxlist in boxlists] + + boxlists = self.select_over_all_levels(boxlists) + + return boxlists + + def select_over_all_levels(self, boxlists): + num_images = len(boxlists) + results = [] + for i in range(num_images): + if len(boxlists[i]) == 0: + results.append([]) + continue + + scores = boxlists[i].get_field("scores") + labels = boxlists[i].get_field("labels") + boxes = boxlists[i].bbox + boxlist = boxlists[i] + result = [] + # skip the background + for j in range(1, 81): + inds = (labels == j).nonzero().view(-1) + if len(inds) == 0: + continue + + boxlist_for_class = boxlist[inds] + boxlist_for_class = boxlist_nms( + boxlist_for_class, self.nms_thresh, + score_field="scores" + ) + num_labels = len(boxlist_for_class) + result.append(boxlist_for_class) + + result = cat_boxlist(result) + number_of_detections = len(result) + + # Limit to max_per_image detections **over all classes** + if number_of_detections > self.fpn_post_nms_top_n > 0: + cls_scores = result.get_field("scores") + image_thresh, _ = torch.kthvalue( + cls_scores.cpu(), + number_of_detections - self.fpn_post_nms_top_n + 1 + ) + keep = cls_scores >= image_thresh.item() + keep = torch.nonzero(keep).squeeze(1) + result = result[keep] + results.append(result) + + return results + + +def make_retinanet_detail_postprocessor( + config, fpn_post_nms_top_n, rpn_box_coder): + pre_nms_thresh = 0.05 + pre_nms_top_n = 1000 + nms_thresh = 0.4 + fpn_post_nms_top_n = fpn_post_nms_top_n + min_size = 0 + + # nms_thresh = config.MODEL.RPN.NMS_THRESH + # min_size = config.MODEL.RPN.MIN_SIZE + box_selector = RetinaNetDetailPostProcessor( + pre_nms_thresh=pre_nms_thresh, + pre_nms_top_n=pre_nms_top_n, + nms_thresh=nms_thresh, + fpn_post_nms_top_n=fpn_post_nms_top_n, + box_coder=rpn_box_coder, + min_size=min_size + ) + return box_selector diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/rpn/retinanet_infer.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/rpn/retinanet_infer.py new file mode 100644 index 0000000000..99932ecc38 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/rpn/retinanet_infer.py @@ -0,0 +1,238 @@ +import torch + +from maskrcnn_benchmark.modeling.box_coder import BoxCoder +from maskrcnn_benchmark.structures.bounding_box import BoxList +from maskrcnn_benchmark.structures.boxlist_ops import remove_small_boxes + + +def batched_nms(boxes, scores, max_output_size, iou_threshold, scores_threshold): + """ + Performs non-maximum suppression in a batched fashion. + + Each index value correspond to a category, and NMS + will not be applied between elements of different categories. + + Parameters + ---------- + boxes : Tensor[N, 4] + boxes where NMS will be performed. They + are expected to be in (x1, y1, x2, y2) format + scores : Tensor[N] + scores for each one of the boxes + idxs : Tensor[N] + indices of the categories for each one of the boxes. + iou_threshold : float + discards all overlapping boxes + with IoU > iou_threshold + + Returns + ------- + keep : Tensor + int64 tensor with the indices of + the elements that have been kept by NMS, sorted + in decreasing order of scores + """ + num_classes = scores.size(1) + num_boxes = scores.size(0) + multi_bboxes = boxes.reshape(1, num_boxes, -1, 4) + multi_scores = scores.reshape(1, num_boxes, num_classes) + nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_num = torch.npu_batch_nms(multi_bboxes.half(), multi_scores.half(), + scores_threshold, + iou_threshold, max_output_size, + max_output_size) + nmsed_boxes = nmsed_boxes.reshape(nmsed_boxes.shape[1:]) + nmsed_scores = nmsed_scores.reshape(nmsed_scores.shape[1]) + nmsed_classes = nmsed_classes.reshape(nmsed_classes.shape[1]) + nmsed_num = nmsed_num.item() + + return nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_num + + +class RetinaNetPostProcessor(torch.nn.Module): + """ + Performs post-processing on the outputs of the RetinaNet boxes. + This is only used in the testing. + """ + + def __init__( + self, + pre_nms_thresh, + pre_nms_top_n, + nms_thresh, + fpn_post_nms_top_n, + min_size, + box_coder=None, + ): + """ + Arguments: + pre_nms_thresh (float) + pre_nms_top_n (int) + nms_thresh (float) + fpn_post_nms_top_n (int) + min_size (int) + box_coder (BoxCoder) + """ + super(RetinaNetPostProcessor, self).__init__() + self.pre_nms_thresh = pre_nms_thresh + self.pre_nms_top_n = pre_nms_top_n + self.nms_thresh = nms_thresh + self.fpn_post_nms_top_n = fpn_post_nms_top_n + self.min_size = min_size + + if box_coder is None: + box_coder = BoxCoder(weights=(10., 10., 5., 5.)) + self.box_coder = box_coder + + def forward_for_single_feature_map(self, anchors, box_cls, box_regression, + pre_nms_thresh): + """ + Arguments: + anchors: list[BoxList] + box_cls: tensor of size N, A * C, H, W + box_regression: tensor of size N, A * 4, H, W + """ + device = box_cls.device + N, _, H, W = box_cls.shape + A = int(box_regression.size(1) / 4) + C = int(box_cls.size(1) / A) + + # put in the same format as anchors + box_cls = box_cls.permute(0, 2, 3, 1) + box_cls = box_cls.reshape(N, -1, C) + box_cls = box_cls.sigmoid().cpu().float() + + box_regression = box_regression.permute(0, 2, 3, 1) + box_regression = box_regression.reshape(N, -1, 4).cpu().float() + + num_anchors = A * H * W + + results = [[] for _ in range(N)] + candidate_inds = box_cls > pre_nms_thresh + if candidate_inds.sum().item() == 0: + empty_boxlists = [] + for a in anchors: + empty_boxlist = BoxList(torch.zeros(1, 4).cpu().float(), a.size) + empty_boxlist.add_field( + "labels", torch.LongTensor([-1]).cpu()) + empty_boxlist.add_field( + "scores", torch.Tensor([0]).cpu().float()) + empty_boxlists.append(empty_boxlist) + return empty_boxlists + + pre_nms_top_n = candidate_inds.reshape(N, -1).sum(1) + pre_nms_top_n = pre_nms_top_n.clamp(max=self.pre_nms_top_n) + + for batch_idx, (per_box_cls, per_box_regression, per_pre_nms_top_n, + per_candidate_inds, per_anchors) in enumerate( + zip(box_cls, box_regression, pre_nms_top_n, candidate_inds, anchors)): + # Sort and select TopN + per_box_cls = per_box_cls[per_candidate_inds] + per_box_cls, top_k_indices = \ + per_box_cls.topk(per_pre_nms_top_n, sorted=False) + + per_candidate_nonzeros = \ + per_candidate_inds.nonzero()[top_k_indices, :] + + per_box_loc = per_candidate_nonzeros[:, 0] + per_class = per_candidate_nonzeros[:, 1] + per_class += 1 + + detections = self.box_coder.decode_cpu( + per_box_regression[per_box_loc, :].view(-1, 4), + per_anchors.bbox[per_box_loc, :].view(-1, 4) + ) + + boxlist = BoxList(detections, per_anchors.size, mode="xyxy") + boxlist.add_field("labels", per_class) + boxlist.add_field("scores", per_box_cls) + boxlist = boxlist.clip_to_image(remove_empty=False) + boxlist = remove_small_boxes(boxlist, self.min_size) + results[batch_idx] = boxlist + + return results + + def forward(self, anchors_per_img, box_cls, box_regression, anchors_size, N, C, targets=None): + """ + Arguments: + anchors: list[list[BoxList]] + box_cls: list[tensor] + box_regression: list[tensor] + + Returns: + boxlists (list[BoxList]): the post-processed anchors, after + applying box decoding and NMS + """ + device = box_cls.device + box_cls = box_cls.sigmoid() + k = self.pre_nms_top_n * 2 + results = [] + for i in range(N): + cls_scores = box_cls[i] + bboxes = box_regression[i] + achrs = anchors_per_img[i] + anchor_size = anchors_size[i] + bboxes = self.box_coder.decode( + bboxes.view(-1, 4), + achrs.view(-1, 4) + ) + if not self.training: + k = k * 2 + scores, topk_inds = torch.topk(cls_scores.flatten(), k=k, largest=True) + labels = topk_inds % C + topk_inds = topk_inds // C + bboxes = bboxes[topk_inds] + else: + max_scores, labels = torch.max(cls_scores, 1) + topk_scores, topk_inds = torch.topk(max_scores, k=k, largest=True) + bboxes = bboxes[topk_inds] + scores = topk_scores + labels = labels[topk_inds] + if labels.numel() == 0: + result = BoxList(bboxes.new_ones([1, 4]), anchor_size, mode="xyxy") + result.add_field("scores", bboxes.new_zeros([1, ])) + result.add_field("labels", bboxes.new_full((1,), -1, dtype=torch.long)) + else: + multi_scores = scores.new_zeros([k, C]) + multi_bboxes = bboxes.new_zeros([k, 4]) + k = min(k, labels.numel()) + multi_bboxes[:k] = bboxes[:k] + indices = torch.arange(0, k).to(device) + multi_scores[indices, labels[:k]] = scores[:k] + + nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_num = batched_nms(multi_bboxes, multi_scores, + self.fpn_post_nms_top_n, + iou_threshold=self.nms_thresh, + scores_threshold=self.pre_nms_thresh) + nmsed_classes = nmsed_classes + 1 + result = BoxList(nmsed_boxes, anchor_size, mode="xyxy") + result.add_field("scores", nmsed_scores) + result.add_field("labels", nmsed_classes) + result = result.clip_to_image(remove_empty=False) + + result.bbox = result.bbox.to(device) + result.add_field('labels', result.get_field('labels').to(device)) + result.add_field('scores', result.get_field('scores').to(device)) + results.append(result) + + return results + + +def make_retinanet_postprocessor( + config, fpn_post_nms_top_n, rpn_box_coder): + pre_nms_thresh = 0.05 + pre_nms_top_n = config.RETINANET.PRE_NMS_TOP_N + nms_thresh = 0.4 + fpn_post_nms_top_n = fpn_post_nms_top_n + min_size = 0 + + # nms_thresh = config.MODEL.RPN.NMS_THRESH + # min_size = config.MODEL.RPN.MIN_SIZE + box_selector = RetinaNetPostProcessor( + pre_nms_thresh=pre_nms_thresh, + pre_nms_top_n=pre_nms_top_n, + nms_thresh=nms_thresh, + fpn_post_nms_top_n=fpn_post_nms_top_n, + box_coder=rpn_box_coder, + min_size=min_size + ) + return box_selector diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/rpn/retinanet_loss.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/rpn/retinanet_loss.py new file mode 100644 index 0000000000..79bb6bcb73 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/rpn/retinanet_loss.py @@ -0,0 +1,156 @@ +""" +This file contains specific functions for computing losses on the RetinaNet +file +""" + +import torch + +from maskrcnn_benchmark.layers import SmoothL1Loss +from maskrcnn_benchmark.layers import AdjustSmoothL1Loss +from maskrcnn_benchmark.layers import SigmoidFocalLoss +from maskrcnn_benchmark.modeling.matcher import Matcher + + +class RetinaNetLossComputation(object): + """ + This class computes the RetinaNet loss. + """ + + def __init__(self, cfg, proposal_matcher, box_coder): + """ + Arguments: + proposal_matcher (Matcher) + box_coder (BoxCoder) + """ + self.proposal_matcher = proposal_matcher + self.box_coder = box_coder + self.num_classes = cfg.RETINANET.NUM_CLASSES - 1 + self.box_cls_loss_func = SigmoidFocalLoss( + self.num_classes, + cfg.RETINANET.LOSS_GAMMA, + cfg.RETINANET.LOSS_ALPHA + ) + if cfg.RETINANET.SELFADJUST_SMOOTH_L1: + self.regression_loss = AdjustSmoothL1Loss( + 4, + beta=cfg.RETINANET.BBOX_REG_BETA + ) + else: + self.regression_loss = SmoothL1Loss( + beta=cfg.RETINANET.BBOX_REG_BETA + ) + + def _iou(self, boxlist1, anchors): + + area1 = boxlist1.area() + area2 = (anchors[:, 2] - anchors[:, 0] + 1) * (anchors[:, 3] - anchors[:, 1] + 1) + + box1, box2 = boxlist1.bbox, anchors + + lt = torch.max(box1[:, None, :2], box2[:, :2]) # [N,M,2] + rb = torch.min(box1[:, None, 2:], box2[:, 2:]) # [N,M,2] + + TO_REMOVE = 1 + + wh = (rb - lt + TO_REMOVE).clamp(min=0) # [N,M,2] + inter = wh[:, :, 0] * wh[:, :, 1] # [N,M] + + iou = inter / (area1[:, None] + area2 - inter) + return iou + + def match_targets_to_anchors(self, anchor, target): + match_quality_matrix = self._iou(target, anchor) + + matched_idxs = self.proposal_matcher(match_quality_matrix) + + # RPN doesn't need any fields from target + # for creating the labels, so clear them all + target = target.copy_with_fields(['labels']) + # get the targets corresponding GT for each anchor + # NB: need to clamp the indices because we can have a single + # GT in the image, and matched_idxs can be -2, which goes + # out of bounds + matched_targets = target[matched_idxs.clamp(min=0)] + matched_targets.add_field("matched_idxs", matched_idxs) + return matched_targets + + def prepare_targets(self, anchors_per_img, targets): + labels = [] + regression_targets = [] + for anchors_per_image, targets_per_image in zip(anchors_per_img, targets): + matched_targets = self.match_targets_to_anchors( + anchors_per_image, targets_per_image + ) + + matched_idxs = matched_targets.get_field("matched_idxs") + labels_per_image = matched_targets.get_field("labels").clone() + + # Background (negative examples) + bg_indices = matched_idxs == Matcher.BELOW_LOW_THRESHOLD + labels_per_image[bg_indices] = 0 + + # discard indices that are between thresholds + # -1 will be ignored in SigmoidFocalLoss + inds_to_discard = matched_idxs == Matcher.BETWEEN_THRESHOLDS + labels_per_image[inds_to_discard] = -1 + + labels_per_image = labels_per_image.to(dtype=torch.float16) + # compute regression targets + regression_targets_per_image = self.box_coder.encode( + matched_targets.bbox, anchors_per_image + ) + labels.append(labels_per_image) + regression_targets.append(regression_targets_per_image) + + return labels, regression_targets + + def __call__(self, anchors_per_img, box_cls, box_regression, targets, C): + """ + Arguments: + anchors (list[BoxList]) + objectness (list[Tensor]) + box_regression (list[Tensor]) + targets (list[BoxList]) + + Returns: + objectness_loss (Tensor) + box_loss (Tensor + """ + labels, regression_targets = self.prepare_targets(anchors_per_img, targets) + labels = torch.cat(labels, dim=0) + regression_targets = torch.cat(regression_targets, dim=0) + pos_inds = labels > 0 + N = box_cls.size(0) + box_cls = box_cls.reshape(-1, C) + box_regression = box_regression.reshape(-1, 4) + pos_inds_int = pos_inds.to(dtype=torch.float16).reshape(-1, 1) + box_regression = box_regression * pos_inds_int + regression_targets = regression_targets * pos_inds_int + pos_cnt = pos_inds.sum() + + retinanet_regression_loss = self.regression_loss( + box_regression, + regression_targets, + size_average=False, + ) / (pos_cnt * 4) + labels = labels.int() + + retinanet_cls_loss = self.box_cls_loss_func( + box_cls, + labels + ) / (pos_cnt + N) + return retinanet_cls_loss, retinanet_regression_loss + + +def make_retinanet_loss_evaluator(cfg, box_coder): + matcher = Matcher( + cfg.MODEL.RPN.FG_IOU_THRESHOLD, + cfg.MODEL.RPN.BG_IOU_THRESHOLD, + allow_low_quality_matches=cfg.RETINANET.LOW_QUALITY_MATCHES, + low_quality_threshold=cfg.RETINANET.LOW_QUALITY_THRESHOLD + ) + + loss_evaluator = RetinaNetLossComputation( + cfg, matcher, box_coder + ) + return loss_evaluator diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/rpn/rpn.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/rpn/rpn.py new file mode 100644 index 0000000000..becb39fcf1 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/rpn/rpn.py @@ -0,0 +1,139 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +import torch +import torch.nn.functional as F +from torch import nn + +from maskrcnn_benchmark.modeling.box_coder import BoxCoder +from .loss import make_rpn_loss_evaluator +from .anchor_generator import make_anchor_generator +from .inference import make_rpn_postprocessor + + +class RPNHead(nn.Module): + """ + Adds a simple RPN Head with classification and regression heads + """ + + def __init__(self, in_channels, num_anchors): + """ + Arguments: + in_channels (int): number of channels of the input feature + num_anchors (int): number of anchors to be predicted + """ + super(RPNHead, self).__init__() + self.conv = nn.Conv2d( + in_channels, in_channels, kernel_size=3, stride=1, padding=1 + ) + self.cls_logits = nn.Conv2d(in_channels, num_anchors, kernel_size=1, stride=1) + self.bbox_pred = nn.Conv2d( + in_channels, num_anchors * 4, kernel_size=1, stride=1 + ) + + for l in [self.conv, self.cls_logits, self.bbox_pred]: + torch.nn.init.normal_(l.weight, std=0.01) + torch.nn.init.constant_(l.bias, 0) + + def forward(self, x): + logits = [] + bbox_reg = [] + for feature in x: + t = F.relu(self.conv(feature)) + logits.append(self.cls_logits(t)) + bbox_reg.append(self.bbox_pred(t)) + return logits, bbox_reg + + +class RPNModule(torch.nn.Module): + """ + Module for RPN computation. Takes feature maps from the backbone and RPN + proposals and losses. Works for both FPN and non-FPN. + """ + + def __init__(self, cfg): + super(RPNModule, self).__init__() + + self.cfg = cfg.clone() + + anchor_generator = make_anchor_generator(cfg) + + in_channels = cfg.MODEL.BACKBONE.OUT_CHANNELS + head = RPNHead(in_channels, anchor_generator.num_anchors_per_location()[0]) + + rpn_box_coder = BoxCoder(weights=(1.0, 1.0, 1.0, 1.0)) + + box_selector_train = make_rpn_postprocessor(cfg, rpn_box_coder, is_train=True) + box_selector_test = make_rpn_postprocessor(cfg, rpn_box_coder, is_train=False) + + loss_evaluator = make_rpn_loss_evaluator(cfg, rpn_box_coder) + + self.anchor_generator = anchor_generator + self.head = head + self.box_selector_train = box_selector_train + self.box_selector_test = box_selector_test + self.loss_evaluator = loss_evaluator + + def forward(self, images, features, targets=None): + """ + Arguments: + images (ImageList): images for which we want to compute the predictions + features (list[Tensor]): features computed from the images that are + used for computing the predictions. Each tensor in the list + correspond to different feature levels + targets (list[BoxList): ground-truth boxes present in the image (optional) + + Returns: + boxes (list[BoxList]): the predicted boxes from the RPN, one BoxList per + image. + losses (dict[Tensor]): the losses for the model during training. During + testing, it is an empty dict. + """ + objectness, rpn_box_regression = self.head(features) + anchors = self.anchor_generator(images, features) + + if self.training: + return self._forward_train(anchors, objectness, rpn_box_regression, targets) + else: + return self._forward_test(anchors, objectness, rpn_box_regression) + + def _forward_train(self, anchors, objectness, rpn_box_regression, targets): + if self.cfg.MODEL.RPN_ONLY: + # When training an RPN-only model, the loss is determined by the + # predicted objectness and rpn_box_regression values and there is + # no need to transform the anchors into predicted boxes; this is an + # optimization that avoids the unnecessary transformation. + boxes = anchors + else: + # For end-to-end models, anchors must be transformed into boxes and + # sampled into a training batch. + with torch.no_grad(): + boxes = self.box_selector_train( + anchors, objectness, rpn_box_regression, targets + ) + loss_objectness, loss_rpn_box_reg = self.loss_evaluator( + anchors, objectness, rpn_box_regression, targets + ) + losses = { + "loss_objectness": loss_objectness, + "loss_rpn_box_reg": loss_rpn_box_reg, + } + return boxes, losses + + def _forward_test(self, anchors, objectness, rpn_box_regression): + boxes = self.box_selector_test(anchors, objectness, rpn_box_regression) + if self.cfg.MODEL.RPN_ONLY: + # For end-to-end models, the RPN proposals are an intermediate state + # and don't bother to sort them in decreasing score order. For RPN-only + # models, the proposals are the final output and we return them in + # high-to-low confidence order. + inds = [ + box.get_field("objectness").sort(descending=True)[1] for box in boxes + ] + boxes = [box[ind] for box, ind in zip(boxes, inds)] + return boxes, {} + + +def build_rpn(cfg): + """ + This gives the gist of it. Not super important because it doesn't change as much + """ + return RPNModule(cfg) diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/utils.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/utils.py new file mode 100644 index 0000000000..5b1d79a812 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/utils.py @@ -0,0 +1,16 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +""" +Miscellaneous utility functions +""" + +import torch + + +def cat(tensors, dim=0): + """ + Efficient version of torch.cat that avoids a copy if there is only a single element in a list + """ + assert isinstance(tensors, (list, tuple)) + if len(tensors) == 1: + return tensors[0] + return torch.cat(tensors, dim) diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/solver/__init__.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/solver/__init__.py new file mode 100644 index 0000000000..75f40530cc --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/solver/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +from .build import make_optimizer +from .build import make_lr_scheduler +from .lr_scheduler import WarmupMultiStepLR diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/solver/build.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/solver/build.py new file mode 100644 index 0000000000..533bcdf881 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/solver/build.py @@ -0,0 +1,31 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +import apex + +from .lr_scheduler import WarmupMultiStepLR + + +def make_optimizer(cfg, model): + params = [] + for key, value in model.named_parameters(): + if not value.requires_grad: + continue + lr = cfg.SOLVER.BASE_LR + weight_decay = cfg.SOLVER.WEIGHT_DECAY + if "bias" in key: + lr = cfg.SOLVER.BASE_LR * cfg.SOLVER.BIAS_LR_FACTOR + weight_decay = cfg.SOLVER.WEIGHT_DECAY_BIAS + params += [{"params": [value], "lr": lr, "weight_decay": weight_decay}] + + optimizer = apex.optimizers.NpuFusedSGD(params, lr, momentum=cfg.SOLVER.MOMENTUM) + return optimizer + + +def make_lr_scheduler(cfg, optimizer): + return WarmupMultiStepLR( + optimizer, + cfg.SOLVER.STEPS, + cfg.SOLVER.GAMMA, + warmup_factor=cfg.SOLVER.WARMUP_FACTOR, + warmup_iters=cfg.SOLVER.WARMUP_ITERS, + warmup_method=cfg.SOLVER.WARMUP_METHOD, + ) diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/solver/lr_scheduler.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/solver/lr_scheduler.py new file mode 100644 index 0000000000..fc7e9d7cd8 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/solver/lr_scheduler.py @@ -0,0 +1,52 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +from bisect import bisect_right + +import torch + + +# FIXME ideally this would be achieved with a CombinedLRScheduler, +# separating MultiStepLR with WarmupLR +# but the current LRScheduler design doesn't allow it +class WarmupMultiStepLR(torch.optim.lr_scheduler._LRScheduler): + def __init__( + self, + optimizer, + milestones, + gamma=0.1, + warmup_factor=1.0 / 3, + warmup_iters=500, + warmup_method="linear", + last_epoch=-1, + ): + if not list(milestones) == sorted(milestones): + raise ValueError( + "Milestones should be a list of" " increasing integers. Got {}", + milestones, + ) + + if warmup_method not in ("constant", "linear"): + raise ValueError( + "Only 'constant' or 'linear' warmup_method accepted" + "got {}".format(warmup_method) + ) + self.milestones = milestones + self.gamma = gamma + self.warmup_factor = warmup_factor + self.warmup_iters = warmup_iters + self.warmup_method = warmup_method + super(WarmupMultiStepLR, self).__init__(optimizer, last_epoch) + + def get_lr(self): + warmup_factor = 1 + if self.last_epoch < self.warmup_iters: + if self.warmup_method == "constant": + warmup_factor = self.warmup_factor + elif self.warmup_method == "linear": + alpha = self.last_epoch / self.warmup_iters + warmup_factor = self.warmup_factor * (1 - alpha) + alpha + return [ + base_lr + * warmup_factor + * self.gamma ** bisect_right(self.milestones, self.last_epoch) + for base_lr in self.base_lrs + ] diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/structures/__init__.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/structures/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/structures/bounding_box.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/structures/bounding_box.py new file mode 100644 index 0000000000..9fda7ebaa0 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/structures/bounding_box.py @@ -0,0 +1,323 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +import torch +from maskrcnn_benchmark.structures.segmentation_mask import SegmentationMask + +# transpose +FLIP_LEFT_RIGHT = 0 +FLIP_TOP_BOTTOM = 1 + + +class BoxList(object): + """ + This class represents a set of bounding boxes. + The bounding boxes are represented as a Nx4 Tensor. + In order to uniquely determine the bounding boxes with respect + to an image, we also store the corresponding image dimensions. + They can contain extra information that is specific to each bounding box, such as + labels. + """ + + def __init__(self, bbox, image_size, mode="xyxy"): + device = bbox.device if isinstance(bbox, torch.Tensor) else torch.device("cpu") + bbox = torch.as_tensor(bbox, dtype=torch.float32, device=device) + if bbox.ndimension() != 2: + raise ValueError( + "bbox should have 2 dimensions, got {}".format(bbox.ndimension()) + ) + if bbox.size(-1) != 4: + raise ValueError( + "last dimenion of bbox should have a " + "size of 4, got {}".format(bbox.size(-1)) + ) + if mode not in ("xyxy", "xywh"): + raise ValueError("mode should be 'xyxy' or 'xywh'") + + self.bbox = bbox + self.size = image_size # (image_width, image_height) + self.mode = mode + self.extra_fields = {} + self.size_before_pad = (0, 0) + + def add_field(self, field, field_data): + self.extra_fields[field] = field_data + + def get_field(self, field): + return self.extra_fields[field] + + def has_field(self, field): + return field in self.extra_fields + + def expand(self, pad): + target_size = list(self.bbox.size()) + target_size[0] = pad + pad_tensor = self.bbox.new_full(target_size, fill_value=0) + self.bbox = torch.cat([self.bbox, pad_tensor], dim=0) + for k, v in self.extra_fields.items(): + if isinstance(v, SegmentationMask): + v.expand(pad) + self.extra_fields[k] = v + else: + target_size = list(v.size()) + target_size[0] = pad + pad_tensor = v.new_full(target_size, fill_value=0) + v = torch.cat([v, pad_tensor], dim=0) + self.extra_fields[k] = v + + def fields(self): + return list(self.extra_fields.keys()) + + def _copy_extra_fields(self, bbox): + for k, v in bbox.extra_fields.items(): + self.extra_fields[k] = v + + def convert(self, mode): + if mode not in ("xyxy", "xywh"): + raise ValueError("mode should be 'xyxy' or 'xywh'") + if mode == self.mode: + return self + # we only have two modes, so don't need to check + # self.mode + xmin, ymin, xmax, ymax = self._split_into_xyxy() + if mode == "xyxy": + bbox = torch.cat((xmin, ymin, xmax, ymax), dim=-1) + bbox = BoxList(bbox, self.size, mode=mode) + else: + TO_REMOVE = 1 + bbox = torch.cat( + (xmin, ymin, xmax - xmin + TO_REMOVE, ymax - ymin + TO_REMOVE), dim=-1 + ) + bbox = BoxList(bbox, self.size, mode=mode) + bbox._copy_extra_fields(self) + return bbox + + def _split_into_xyxy(self): + if self.mode == "xyxy": + xmin, ymin, xmax, ymax = self.bbox.split(1, dim=-1) + return xmin, ymin, xmax, ymax + elif self.mode == "xywh": + TO_REMOVE = 1 + xmin, ymin, w, h = self.bbox.split(1, dim=-1) + return ( + xmin, + ymin, + xmin + (w - TO_REMOVE).clamp(min=0), + ymin + (h - TO_REMOVE).clamp(min=0), + ) + else: + raise RuntimeError("Should not be here") + + def resize(self, size, *args, **kwargs): + """ + Returns a resized copy of this bounding box + + :param size: The requested size in pixels, as a 2-tuple: + (width, height). + """ + + ratios = tuple(float(s) / float(s_orig) for s, s_orig in zip(size, self.size)) + if ratios[0] == ratios[1]: + ratio = ratios[0] + scaled_box = self.bbox * ratio + bbox = BoxList(scaled_box, size, mode=self.mode) + for k, v in self.extra_fields.items(): + if not isinstance(v, torch.Tensor): + v = v.resize(size, *args, **kwargs) + bbox.add_field(k, v) + return bbox + + ratio_width, ratio_height = ratios + xmin, ymin, xmax, ymax = self._split_into_xyxy() + scaled_xmin = xmin * ratio_width + scaled_xmax = xmax * ratio_width + scaled_ymin = ymin * ratio_height + scaled_ymax = ymax * ratio_height + scaled_box = torch.cat( + (scaled_xmin, scaled_ymin, scaled_xmax, scaled_ymax), dim=-1 + ) + bbox = BoxList(scaled_box, size, mode="xyxy") + for k, v in self.extra_fields.items(): + if not isinstance(v, torch.Tensor): + v = v.resize(size, *args, **kwargs) + bbox.add_field(k, v) + + return bbox.convert(self.mode) + + def resize2(self, size, *args, **kwargs): + """ + Returns a resized copy of this bounding box + + :param size: The requested size in pixels, as a 2-tuple: + (width, height). + """ + + ratios = tuple(float(s) / float(s_orig) for s, s_orig in zip(size, self.size_before_pad)) + if ratios[0] == ratios[1]: + ratio = ratios[0] + scaled_box = self.bbox * ratio + bbox = BoxList(scaled_box, size, mode=self.mode) + for k, v in self.extra_fields.items(): + if not isinstance(v, torch.Tensor): + v = v.resize(size, *args, **kwargs) + bbox.add_field(k, v) + return bbox + + ratio_width, ratio_height = ratios + xmin, ymin, xmax, ymax = self._split_into_xyxy() + scaled_xmin = xmin * ratio_width + scaled_xmax = xmax * ratio_width + scaled_ymin = ymin * ratio_height + scaled_ymax = ymax * ratio_height + scaled_box = torch.cat( + (scaled_xmin, scaled_ymin, scaled_xmax, scaled_ymax), dim=-1 + ) + bbox = BoxList(scaled_box, size, mode="xyxy") + for k, v in self.extra_fields.items(): + if not isinstance(v, torch.Tensor): + v = v.resize(size, *args, **kwargs) + bbox.add_field(k, v) + + return bbox.convert(self.mode) + + def transpose(self, method): + """ + Transpose bounding box (flip or rotate in 90 degree steps) + :param method: One of :py:attr:`PIL.Image.FLIP_LEFT_RIGHT`, + :py:attr:`PIL.Image.FLIP_TOP_BOTTOM`, :py:attr:`PIL.Image.ROTATE_90`, + :py:attr:`PIL.Image.ROTATE_180`, :py:attr:`PIL.Image.ROTATE_270`, + :py:attr:`PIL.Image.TRANSPOSE` or :py:attr:`PIL.Image.TRANSVERSE`. + """ + if method not in (FLIP_LEFT_RIGHT, FLIP_TOP_BOTTOM): + raise NotImplementedError( + "Only FLIP_LEFT_RIGHT and FLIP_TOP_BOTTOM implemented" + ) + + image_width, image_height = self.size + xmin, ymin, xmax, ymax = self._split_into_xyxy() + if method == FLIP_LEFT_RIGHT: + TO_REMOVE = 1 + transposed_xmin = image_width - xmax - TO_REMOVE + transposed_xmax = image_width - xmin - TO_REMOVE + transposed_ymin = ymin + transposed_ymax = ymax + elif method == FLIP_TOP_BOTTOM: + transposed_xmin = xmin + transposed_xmax = xmax + transposed_ymin = image_height - ymax + transposed_ymax = image_height - ymin + + transposed_boxes = torch.cat( + (transposed_xmin, transposed_ymin, transposed_xmax, transposed_ymax), dim=-1 + ) + bbox = BoxList(transposed_boxes, self.size, mode="xyxy") + for k, v in self.extra_fields.items(): + if not isinstance(v, torch.Tensor): + v = v.transpose(method) + bbox.add_field(k, v) + return bbox.convert(self.mode) + + def mask_ind_op(self, fg_selection_mask): + for i in range(len(fg_selection_mask)): + if not fg_selection_mask[i]: + self.bbox[i] = torch.zeros([4], dtype=self.bbox[1].dtype) + + bbox = BoxList(self.bbox, self.size, self.mode) + return bbox + + def crop(self, box): + """ + Cropss a rectangular region from this bounding box. The box is a + 4-tuple defining the left, upper, right, and lower pixel + coordinate. + """ + xmin, ymin, xmax, ymax = self._split_into_xyxy() + w, h = box[2] - box[0], box[3] - box[1] + cropped_xmin = (xmin - box[0]).clamp(min=0, max=w) + cropped_ymin = (ymin - box[1]).clamp(min=0, max=h) + cropped_xmax = (xmax - box[0]).clamp(min=0, max=w) + cropped_ymax = (ymax - box[1]).clamp(min=0, max=h) + + # TODO should I filter empty boxes here? + if False: + is_empty = (cropped_xmin == cropped_xmax) | (cropped_ymin == cropped_ymax) + + cropped_box = torch.cat( + (cropped_xmin, cropped_ymin, cropped_xmax, cropped_ymax), dim=-1 + ) + bbox = BoxList(cropped_box, (w, h), mode="xyxy") + for k, v in self.extra_fields.items(): + if not isinstance(v, torch.Tensor): + v = v.crop(box) + bbox.add_field(k, v) + return bbox.convert(self.mode) + + # Tensor-like methods + + def to(self, device, non_blocking=True): + bbox = BoxList(self.bbox.to(device, non_blocking=non_blocking), self.size, self.mode) + for k, v in self.extra_fields.items(): + if hasattr(v, "to"): + v = v.to(device, non_blocking=non_blocking) + bbox.add_field(k, v) + return bbox + + def __getitem__(self, item): + bbox = BoxList(self.bbox[item], self.size, self.mode) + for k, v in self.extra_fields.items(): + bbox.add_field(k, v[item]) + return bbox + + def __len__(self): + return self.bbox.shape[0] + + def clip_to_image(self, remove_empty=True): + TO_REMOVE = 1 + self.bbox[:, 0].clamp_(min=0, max=self.size[0] - TO_REMOVE) + self.bbox[:, 1].clamp_(min=0, max=self.size[1] - TO_REMOVE) + self.bbox[:, 2].clamp_(min=0, max=self.size[0] - TO_REMOVE) + self.bbox[:, 3].clamp_(min=0, max=self.size[1] - TO_REMOVE) + if remove_empty: + box = self.bbox + keep = (box[:, 3] > box[:, 1]) & (box[:, 2] > box[:, 0]) + return self[keep] + return self + + def area(self): + if self.mode == 'xyxy': + TO_REMOVE = 1 + box = self.bbox + area = (box[:, 2] - box[:, 0] + TO_REMOVE) * (box[:, 3] - box[:, 1] + TO_REMOVE) + elif self.mode == 'xywh': + box = self.bbox + area = box[:, 2] * box[:, 3] + else: + raise RuntimeError("Should not be here") + + return area + + def copy_with_fields(self, fields): + bbox = BoxList(self.bbox, self.size, self.mode) + if not isinstance(fields, (list, tuple)): + fields = [fields] + for field in fields: + bbox.add_field(field, self.get_field(field)) + return bbox + + def __repr__(self): + + s = self.__class__.__name__ + "(" + s += "num_boxes={}, ".format(len(self)) + s += "image_width={}, ".format(self.size[0]) + s += "image_height={}, ".format(self.size[1]) + s += "mode={})".format(self.mode) + return s + + +if __name__ == "__main__": + bbox = BoxList([[0, 0, 10, 10], [0, 0, 5, 5]], (10, 10)) + s_bbox = bbox.resize((5, 5)) + print(s_bbox) + print(s_bbox.bbox) + + t_bbox = bbox.transpose(0) + print(t_bbox) + print(t_bbox.bbox) diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/structures/boxlist_ops.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/structures/boxlist_ops.py new file mode 100644 index 0000000000..ca15693692 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/structures/boxlist_ops.py @@ -0,0 +1,125 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +import torch + +from .bounding_box import BoxList +from maskrcnn_benchmark.layers import nms as _box_nms + + +def boxlist_nms(boxlist, nms_thresh, max_proposals=-1, score_field="score"): + """ + Performs non-maximum suppression on a boxlist, with scores specified + in a boxlist field via score_field. + + Arguments: + boxlist(BoxList) + nms_thresh (float) + max_proposals (int): if > 0, then only the top max_proposals are kept + after non-maxium suppression + score_field (str) + """ + if nms_thresh <= 0: + return boxlist + mode = boxlist.mode + boxlist = boxlist.convert("xyxy") + boxes = boxlist.bbox + score = boxlist.get_field(score_field) + score_32 = score.type(torch.float16) + keep = _box_nms(boxes, score_32, nms_thresh) + if max_proposals > 0: + keep = keep[: max_proposals] + boxlist = boxlist[keep] + return boxlist.convert(mode) + + +def remove_small_boxes(boxlist, min_size): + """ + Only keep boxes with both sides >= min_size + + Arguments: + boxlist (Boxlist) + min_size (int) + """ + # TODO maybe add an API for querying the ws / hs + xywh_boxes = boxlist.convert("xywh").bbox + keep = (xywh_boxes[:, 2] >= min_size) and (xywh_boxes[:, 3] >= min_size) + return boxlist[keep] + + +# implementation from https://github.com/kuangliu/torchcv/blob/master/torchcv/utils/box.py +# with slight modifications +def boxlist_iou(boxlist1, boxlist2): + """Compute the intersection over union of two set of boxes. + The box order must be (xmin, ymin, xmax, ymax). + + Arguments: + box1: (BoxList) bounding boxes, sized [N,4]. + box2: (BoxList) bounding boxes, sized [M,4]. + + Returns: + (tensor) iou, sized [N,M]. + + Reference: + https://github.com/chainer/chainercv/blob/master/chainercv/utils/bbox/bbox_iou.py + """ + if boxlist1.size != boxlist2.size: + raise RuntimeError( + "boxlists should have same image size, got {}, {}".format(boxlist1, boxlist2)) + + N = len(boxlist1) + M = len(boxlist2) + + area1 = boxlist1.area() + area2 = boxlist2.area() + + box1, box2 = boxlist1.bbox, boxlist2.bbox + + lt = torch.max(box1[:, None, :2], box2[:, :2]) # [N,M,2] + rb = torch.min(box1[:, None, 2:], box2[:, 2:]) # [N,M,2] + + TO_REMOVE = 1 + + wh = (rb - lt + TO_REMOVE).clamp(min=0) # [N,M,2] + inter = wh[:, :, 0] * wh[:, :, 1] # [N,M] + + iou = inter / (area1[:, None] + area2 - inter) + return iou + + +# TODO redundant, remove +def _cat(tensors, dim=0): + """ + Efficient version of torch.cat that avoids a copy if there is only a single element in a list + """ + assert isinstance(tensors, (list, tuple)) + if len(tensors) == 1: + return tensors[0] + return torch.cat(tensors, dim) + + +def cat_boxlist(bboxes): + """ + Concatenates a list of BoxList (having the same image size) into a + single BoxList + + Arguments: + bboxes (list[BoxList]) + """ + assert isinstance(bboxes, (list, tuple)) + assert all(isinstance(bbox, BoxList) for bbox in bboxes) + + size = bboxes[0].size + assert all(bbox.size == size for bbox in bboxes) + + mode = bboxes[0].mode + assert all(bbox.mode == mode for bbox in bboxes) + + fields = set(bboxes[0].fields()) + assert all(set(bbox.fields()) == fields for bbox in bboxes) + + cat_boxes = BoxList(_cat([bbox.bbox for bbox in bboxes], dim=0), size, mode) + + for field in fields: + data = _cat([bbox.get_field(field) for bbox in bboxes], dim=0) + cat_boxes.add_field(field, data) + + return cat_boxes diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/structures/image_list.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/structures/image_list.py new file mode 100644 index 0000000000..c340fa1c94 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/structures/image_list.py @@ -0,0 +1,72 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +from __future__ import division + +import torch + + +class ImageList(object): + """ + Structure that holds a list of images (of possibly + varying sizes) as a single tensor. + This works by padding the images to the same size, + and storing in a field the original sizes of each image + """ + + def __init__(self, tensors, image_sizes): + """ + Arguments: + tensors (tensor) + image_sizes (list[tuple[int, int]]) + """ + self.tensors = tensors + self.image_sizes = image_sizes + + def to(self, *args, **kwargs): + cast_tensor = self.tensors.to(*args, **kwargs) + return ImageList(cast_tensor, self.image_sizes) + + +def to_image_list(tensors, size_divisible=0): + """ + tensors can be an ImageList, a torch.Tensor or + an iterable of Tensors. It can't be a numpy array. + When tensors is an iterable of Tensors, it pads + the Tensors with zeros so that they have the same + shape + """ + target_sizes = [[960, 1344], [1344, 960], [1344, 1344]] + if isinstance(tensors, torch.Tensor) and size_divisible > 0: + tensors = [tensors] + + if isinstance(tensors, ImageList): + return tensors + elif isinstance(tensors, torch.Tensor): + # single tensor shape can be inferred + assert tensors.dim() == 4 + image_sizes = [tensor.shape[-2:] for tensor in tensors] + return ImageList(tensors, image_sizes) + elif isinstance(tensors, (tuple, list)): + max_size = tuple(max(s) for s in zip(*[img.shape for img in tensors])) + + # TODO Ideally, just remove this and let me model handle arbitrary + if size_divisible > 0: + + max_size = list(max_size) + + if max_size[1] > max_size[2]: + max_size[1], max_size[2] = 1344, 960 + else: + max_size[1], max_size[2] = 960, 1344 + + max_size = tuple(max_size) + + batch_shape = (len(tensors),) + max_size + batched_imgs = tensors[0].new(*batch_shape).zero_() + for img, pad_img in zip(tensors, batched_imgs): + pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img) + + image_sizes = [im.shape[-2:] for im in tensors] + + return ImageList(batched_imgs, image_sizes) + else: + raise TypeError("Unsupported type for to_image_list: {}".format(type(tensors))) diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/structures/segmentation_mask.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/structures/segmentation_mask.py new file mode 100644 index 0000000000..cb8c382e25 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/structures/segmentation_mask.py @@ -0,0 +1,250 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +import torch + +import pycocotools.mask as mask_utils + +# transpose +FLIP_LEFT_RIGHT = 0 +FLIP_TOP_BOTTOM = 1 + + +class Mask(object): + """ + This class is unfinished and not meant for use yet + It is supposed to contain the mask for an object as + a 2d tensor + """ + + def __init__(self, masks, size, mode): + self.masks = masks + self.size = size + self.mode = mode + + def transpose(self, method): + if method not in (FLIP_LEFT_RIGHT, FLIP_TOP_BOTTOM): + raise NotImplementedError( + "Only FLIP_LEFT_RIGHT and FLIP_TOP_BOTTOM implemented" + ) + + width, height = self.size + if method == FLIP_LEFT_RIGHT: + dim = width + idx = 2 + elif method == FLIP_TOP_BOTTOM: + dim = height + idx = 1 + + flip_idx = list(range(dim)[::-1]) + flipped_masks = self.masks.index_select(dim, flip_idx) + return Mask(flipped_masks, self.size, self.mode) + + def crop(self, box): + w, h = box[2] - box[0], box[3] - box[1] + + cropped_masks = self.masks[:, box[1]: box[3], box[0]: box[2]] + + return Mask(cropped_masks, size=(w, h), mode=self.mode) + + def resize(self, size, *args, **kwargs): + pass + + +class Polygons(object): + """ + This class holds a set of polygons that represents a single instance + of an object mask. The object can be represented as a set of + polygons + """ + + def __init__(self, polygons, size, mode): + # assert isinstance(polygons, list), '{}'.format(polygons) + if isinstance(polygons, list): + polygons = [torch.as_tensor(p, dtype=torch.float32) for p in polygons] + elif isinstance(polygons, Polygons): + polygons = polygons.polygons + + self.polygons = polygons + self.size = size + self.mode = mode + + def transpose(self, method): + if method not in (FLIP_LEFT_RIGHT, FLIP_TOP_BOTTOM): + raise NotImplementedError( + "Only FLIP_LEFT_RIGHT and FLIP_TOP_BOTTOM implemented" + ) + + flipped_polygons = [] + width, height = self.size + if method == FLIP_LEFT_RIGHT: + dim = width + idx = 0 + elif method == FLIP_TOP_BOTTOM: + dim = height + idx = 1 + + for poly in self.polygons: + p = poly.clone() + TO_REMOVE = 1 + p[idx::2] = dim - poly[idx::2] - TO_REMOVE + flipped_polygons.append(p) + + return Polygons(flipped_polygons, size=self.size, mode=self.mode) + + def crop_and_resize_and_decode(self, box, size): + w, h = box[2] - box[0], box[3] - box[1] + + # TODO chck if necessary + w = max(w, 1) + h = max(h, 1) + out_h, out_w = size + h_scale = out_h / max(h, 0.1) # avoid too large scale + w_scale = out_w / max(w, 0.1) + if isinstance(self.polygons, torch.Tensor): + self.polygons = [self.polygons] + cropped_polygons = [] + for poly in self.polygons: + p = poly.numpy().copy() + p[0::2] = (p[0::2] - box[0]) * w_scale # .clamp(min=0, max=w) + p[1::2] = (p[1::2] - box[1]) * h_scale # .clamp(min=0, max=h) + cropped_polygons.append(p) + rles = mask_utils.frPyObjects(cropped_polygons, out_h, out_w) + rle = mask_utils.merge(rles) + mask = mask_utils.decode(rle) + mask = torch.from_numpy(mask) + return mask + + def crop(self, box): + w, h = box[2] - box[0], box[3] - box[1] + + # TODO chck if necessary + w = max(w, 1) + h = max(h, 1) + if isinstance(self.polygons, torch.Tensor): + self.polygons = [self.polygons] + cropped_polygons = [] + for poly in self.polygons: + p = poly.clone() + p[0::2] = p[0::2] - box[0] # .clamp(min=0, max=w) + p[1::2] = p[1::2] - box[1] # .clamp(min=0, max=h) + cropped_polygons.append(p) + + return Polygons(cropped_polygons, size=(w, h), mode=self.mode) + + def resize(self, size, *args, **kwargs): + ratios = tuple(float(s) / float(s_orig) for s, s_orig in zip(size, self.size)) + if ratios[0] == ratios[1]: + ratio = ratios[0] + scaled_polys = [p * ratio for p in self.polygons] + return Polygons(scaled_polys, size, mode=self.mode) + + ratio_w, ratio_h = ratios + scaled_polygons = [] + for poly in self.polygons: + p = poly.clone() + p[0::2] *= ratio_w + p[1::2] *= ratio_h + scaled_polygons.append(p) + + return Polygons(scaled_polygons, size=size, mode=self.mode) + + def convert(self, mode): + width, height = self.size + if mode == "mask": + rles = mask_utils.frPyObjects( + [p.numpy() for p in self.polygons], height, width + ) + rle = mask_utils.merge(rles) + mask = mask_utils.decode(rle) + mask = torch.from_numpy(mask) + # TODO add squeeze? + return mask + + def __repr__(self): + s = self.__class__.__name__ + "(" + s += "num_polygons={}, ".format(len(self.polygons)) + s += "image_width={}, ".format(self.size[0]) + s += "image_height={}, ".format(self.size[1]) + s += "mode={})".format(self.mode) + return s + + +class SegmentationMask(object): + """ + This class stores the segmentations for all objects in the image + """ + + def __init__(self, polygons, size, mode=None): + """ + Arguments: + polygons: a list of list of lists of numbers. The first + level of the list correspond to individual instances, + the second level to all the polygons that compose the + object, and the third level to the polygon coordinates. + """ + assert isinstance(polygons, list) + + self.polygons = [Polygons(p, size, mode) for p in polygons] + self.size = size + self.mode = mode + + def transpose(self, method): + if method not in (FLIP_LEFT_RIGHT, FLIP_TOP_BOTTOM): + raise NotImplementedError( + "Only FLIP_LEFT_RIGHT and FLIP_TOP_BOTTOM implemented" + ) + + flipped = [] + for polygon in self.polygons: + flipped.append(polygon.transpose(method)) + return SegmentationMask(flipped, size=self.size, mode=self.mode) + + def crop(self, box): + w, h = box[2] - box[0], box[3] - box[1] + cropped = [] + for polygon in self.polygons: + cropped.append(polygon.crop(box)) + + return SegmentationMask(cropped, size=(w, h), mode=self.mode) + + def mask_ind_op(self, fg_selection_mask): + for i in range(len(fg_selection_mask)): + if not fg_selection_mask[i]: + self.polygons[i].polygons = [torch.zeros([6], dtype=torch.float16)] + + return SegmentationMask(self.polygons, size=self.size, mode=self.mode) + + def resize(self, size, *args, **kwargs): + scaled = [] + for polygon in self.polygons: + scaled.append(polygon.resize(size, *args, **kwargs)) + return SegmentationMask(scaled, size=size, mode=self.mode) + + def expand(self, pad): + self.polygons += [Polygons(p, self.size, mode=None) for p in [torch.zeros([1], dtype=torch.float16)] * pad] + + def to(self, *args, **kwargs): + return self + + def __getitem__(self, item): + if isinstance(item, (int, slice)): + selected_polygons = [self.polygons[item]] + else: + # advanced indexing on a single dimension + selected_polygons = [] + if isinstance(item, torch.Tensor) and item.dtype == torch.bool: + item = item.nonzero() + item = item.squeeze(1) if item.numel() > 0 else item + item = item.tolist() + for i in item: + selected_polygons.append(self.polygons[i]) + return SegmentationMask(selected_polygons, size=self.size, mode=self.mode) + + def __iter__(self): + return iter(self.polygons) + + def __repr__(self): + s = self.__class__.__name__ + "(" + s += "num_instances={}, ".format(len(self.polygons)) + s += "image_width={}, ".format(self.size[0]) + s += "image_height={})".format(self.size[1]) + return s diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/utils/README.md b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/utils/README.md new file mode 100644 index 0000000000..9765b24a73 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/utils/README.md @@ -0,0 +1,5 @@ +# Utility functions + +This folder contain utility functions that are not used in the +core library, but are useful for building models or training +code using the config system. diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/utils/__init__.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/utils/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/utils/c2_model_loading.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/utils/c2_model_loading.py new file mode 100644 index 0000000000..975f08d6b8 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/utils/c2_model_loading.py @@ -0,0 +1,146 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +import logging +import pickle +from collections import OrderedDict + +import torch + + +def _rename_basic_resnet_weights(layer_keys): + layer_keys = [k.replace("_", ".") for k in layer_keys] + layer_keys = [k.replace(".w", ".weight") for k in layer_keys] + layer_keys = [k.replace(".bn", "_bn") for k in layer_keys] + layer_keys = [k.replace(".b", ".bias") for k in layer_keys] + layer_keys = [k.replace("_bn.s", "_bn.scale") for k in layer_keys] + layer_keys = [k.replace(".biasranch", ".branch") for k in layer_keys] + layer_keys = [k.replace("bbox.pred", "bbox_pred") for k in layer_keys] + layer_keys = [k.replace("cls.score", "cls_score") for k in layer_keys] + layer_keys = [k.replace("res.conv1_", "conv1_") for k in layer_keys] + + # RPN / Faster RCNN + layer_keys = [k.replace(".biasbox", ".bbox") for k in layer_keys] + layer_keys = [k.replace("conv.rpn", "rpn.conv") for k in layer_keys] + layer_keys = [k.replace("rpn.bbox.pred", "rpn.bbox_pred") for k in layer_keys] + layer_keys = [k.replace("rpn.cls.logits", "rpn.cls_logits") for k in layer_keys] + + # Affine-Channel -> BatchNorm enaming + layer_keys = [k.replace("_bn.scale", "_bn.weight") for k in layer_keys] + + # Make torchvision-compatible + layer_keys = [k.replace("conv1_bn.", "bn1.") for k in layer_keys] + + layer_keys = [k.replace("res2.", "layer1.") for k in layer_keys] + layer_keys = [k.replace("res3.", "layer2.") for k in layer_keys] + layer_keys = [k.replace("res4.", "layer3.") for k in layer_keys] + layer_keys = [k.replace("res5.", "layer4.") for k in layer_keys] + + layer_keys = [k.replace(".branch2a.", ".conv1.") for k in layer_keys] + layer_keys = [k.replace(".branch2a_bn.", ".bn1.") for k in layer_keys] + layer_keys = [k.replace(".branch2b.", ".conv2.") for k in layer_keys] + layer_keys = [k.replace(".branch2b_bn.", ".bn2.") for k in layer_keys] + layer_keys = [k.replace(".branch2c.", ".conv3.") for k in layer_keys] + layer_keys = [k.replace(".branch2c_bn.", ".bn3.") for k in layer_keys] + + layer_keys = [k.replace(".branch1.", ".downsample.0.") for k in layer_keys] + layer_keys = [k.replace(".branch1_bn.", ".downsample.1.") for k in layer_keys] + + return layer_keys + + +def _rename_fpn_weights(layer_keys, stage_names): + for mapped_idx, stage_name in enumerate(stage_names, 1): + suffix = "" + if mapped_idx < 4: + suffix = ".lateral" + layer_keys = [ + k.replace("fpn.inner.layer{}.sum{}".format(stage_name, suffix), "fpn_inner{}".format(mapped_idx)) for k in + layer_keys + ] + layer_keys = [k.replace("fpn.layer{}.sum".format(stage_name), "fpn_layer{}".format(mapped_idx)) for k in + layer_keys] + + layer_keys = [k.replace("rpn.conv.fpn2", "rpn.conv") for k in layer_keys] + layer_keys = [k.replace("rpn.bbox_pred.fpn2", "rpn.bbox_pred") for k in layer_keys] + layer_keys = [ + k.replace("rpn.cls_logits.fpn2", "rpn.cls_logits") for k in layer_keys + ] + + return layer_keys + + +def _rename_weights_for_resnet(weights, stage_names): + original_keys = sorted(weights.keys()) + layer_keys = sorted(weights.keys()) + + # for X-101, rename output to fc1000 to avoid conflicts afterwards + layer_keys = [k if k != "pred_b" else "fc1000_b" for k in layer_keys] + layer_keys = [k if k != "pred_w" else "fc1000_w" for k in layer_keys] + + # performs basic renaming: _ -> . , etc + layer_keys = _rename_basic_resnet_weights(layer_keys) + + # FPN + layer_keys = _rename_fpn_weights(layer_keys, stage_names) + + # Mask R-CNN + layer_keys = [k.replace("mask.fcn.logits", "mask_fcn_logits") for k in layer_keys] + layer_keys = [k.replace(".[mask].fcn", "mask_fcn") for k in layer_keys] + layer_keys = [k.replace("conv5.mask", "conv5_mask") for k in layer_keys] + + # Keypoint R-CNN + layer_keys = [k.replace("kps.score.lowres", "kps_score_lowres") for k in layer_keys] + layer_keys = [k.replace("kps.score", "kps_score") for k in layer_keys] + layer_keys = [k.replace("conv.fcn", "conv_fcn") for k in layer_keys] + + # Rename for our RPN structure + layer_keys = [k.replace("rpn.", "rpn.head.") for k in layer_keys] + + key_map = {k: v for k, v in zip(original_keys, layer_keys)} + + logger = logging.getLogger(__name__) + logger.info("Remapping C2 weights") + max_c2_key_size = max([len(k) for k in original_keys if "_momentum" not in k]) + + new_weights = OrderedDict() + for k in original_keys: + v = weights[k] + if "_momentum" in k: + continue + # if 'fc1000' in k: + # continue + w = torch.from_numpy(v) + # if "bn" in k: + # w = w.view(1, -1, 1, 1) + logger.info("C2 name: {: <{}} mapped name: {}".format(k, max_c2_key_size, key_map[k])) + new_weights[key_map[k]] = w + + return new_weights + + +def _load_c2_pickled_weights(file_path): + with open(file_path, "rb") as f: + if torch._six.PY3: + data = pickle.load(f, encoding="latin1") + else: + data = pickle.load(f) + if "blobs" in data: + weights = data["blobs"] + else: + weights = data + return weights + + +_C2_STAGE_NAMES = { + "R-50": ["1.2", "2.3", "3.5", "4.2"], + "R-101": ["1.2", "2.3", "3.22", "4.2"], +} + + +def load_c2_format(cfg, f): + # TODO make it support other architectures + state_dict = _load_c2_pickled_weights(f) + conv_body = cfg.MODEL.BACKBONE.CONV_BODY + arch = conv_body.replace("-C4", "").replace("-FPN", "") + stages = _C2_STAGE_NAMES[arch] + state_dict = _rename_weights_for_resnet(state_dict, stages) + return dict(model=state_dict) diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/utils/checkpoint.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/utils/checkpoint.py new file mode 100644 index 0000000000..048c758be5 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/utils/checkpoint.py @@ -0,0 +1,138 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +import logging +import os + +import torch + +from maskrcnn_benchmark.utils.model_serialization import load_state_dict +from maskrcnn_benchmark.utils.c2_model_loading import load_c2_format +from maskrcnn_benchmark.utils.imports import import_file +from maskrcnn_benchmark.utils.model_zoo import cache_url + + +class Checkpointer(object): + def __init__( + self, + model, + optimizer=None, + scheduler=None, + save_dir="", + save_to_disk=None, + logger=None, + ): + self.model = model + self.optimizer = optimizer + self.scheduler = scheduler + self.save_dir = save_dir + self.save_to_disk = save_to_disk + if logger is None: + logger = logging.getLogger(__name__) + self.logger = logger + + def save(self, name, **kwargs): + if not self.save_dir: + return + + if not self.save_to_disk: + return + + data = {} + data["model"] = self.model.state_dict() + if self.optimizer is not None: + data["optimizer"] = self.optimizer.state_dict() + if self.scheduler is not None: + data["scheduler"] = self.scheduler.state_dict() + data.update(kwargs) + + save_file = os.path.join(self.save_dir, "{}.pth".format(name)) + self.logger.info("Saving checkpoint to {}".format(save_file)) + torch.save(data, save_file) + self.tag_last_checkpoint(save_file) + + def load(self, f=None): + if self.has_checkpoint(): + # override argument with existing checkpoint + f = self.get_checkpoint_file() + if not f: + # no checkpoint could be found + self.logger.info("No checkpoint found. Initializing model from scratch") + return {} + self.logger.info("Loading checkpoint from {}".format(f)) + checkpoint = self._load_file(f) + self._load_model(checkpoint) + if "optimizer" in checkpoint and self.optimizer: + self.logger.info("Loading optimizer from {}".format(f)) + self.optimizer.load_state_dict(checkpoint.pop("optimizer")) + if "scheduler" in checkpoint and self.scheduler: + self.logger.info("Loading scheduler from {}".format(f)) + self.scheduler.load_state_dict(checkpoint.pop("scheduler")) + + # return any further checkpoint data + return checkpoint + + def has_checkpoint(self): + save_file = os.path.join(self.save_dir, "last_checkpoint") + return os.path.exists(save_file) + + def get_checkpoint_file(self): + save_file = os.path.join(self.save_dir, "last_checkpoint") + try: + with open(save_file, "r") as f: + last_saved = f.read() + except IOError: + # if file doesn't exist, maybe because it has just been + # deleted by a separate process + last_saved = "" + return last_saved + + def tag_last_checkpoint(self, last_filename): + save_file = os.path.join(self.save_dir, "last_checkpoint") + with open(save_file, "w") as f: + f.write(last_filename) + + def _load_file(self, f): + return torch.load(f, map_location=torch.device("cpu")) + + def _load_model(self, checkpoint): + load_state_dict(self.model, checkpoint.pop("model")) + + +class DetectronCheckpointer(Checkpointer): + def __init__( + self, + cfg, + model, + optimizer=None, + scheduler=None, + save_dir="", + save_to_disk=None, + logger=None, + ): + super(DetectronCheckpointer, self).__init__( + model, optimizer, scheduler, save_dir, save_to_disk, logger + ) + self.cfg = cfg.clone() + + def _load_file(self, f): + # catalog lookup + if f.startswith("catalog://"): + paths_catalog = import_file( + "maskrcnn_benchmark.config.paths_catalog", self.cfg.PATHS_CATALOG, True + ) + catalog_f = paths_catalog.ModelCatalog.get(f[len("catalog://"):]) + self.logger.info("{} points to {}".format(f, catalog_f)) + f = catalog_f + # download url files + if f.startswith("http"): + # if the file is a url path, download it and cache it + cached_f = cache_url(f) + self.logger.info("url {} cached in {}".format(f, cached_f)) + f = cached_f + # convert Caffe2 checkpoint from pkl + if f.endswith(".pkl"): + return load_c2_format(self.cfg, f) + # load native detectron.pytorch checkpoint + loaded = super(DetectronCheckpointer, self)._load_file(f) + if "model" not in loaded: + loaded = dict(model=loaded) + return loaded diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/utils/collect_env.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/utils/collect_env.py new file mode 100644 index 0000000000..2d0641dda6 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/utils/collect_env.py @@ -0,0 +1,14 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +import PIL + +from torch.utils.collect_env import get_pretty_env_info + + +def get_pil_version(): + return "\n Pillow ({})".format(PIL.__version__) + + +def collect_env_info(): + env_str = get_pretty_env_info() + env_str += get_pil_version() + return env_str diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/utils/comm.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/utils/comm.py new file mode 100644 index 0000000000..7ba757d11f --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/utils/comm.py @@ -0,0 +1,148 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +""" +This file contains primitives for multi-gpu communication. +This is useful when doing distributed training. +""" + +import os +import pickle +import tempfile +import time + +import torch +import torch.distributed as dist + + +def get_world_size(): + if not dist.is_initialized(): + return 1 + return dist.get_world_size() + + +def get_rank(): + if not dist.is_initialized(): + return 0 + return dist.get_rank() + + +def is_main_process(): + if not dist.is_initialized(): + return True + return dist.get_rank() == 0 + + +def synchronize(): + """ + Helper function to synchronize between multiple processes when + using distributed training + """ + if not dist.is_initialized(): + return + world_size = dist.get_world_size() + rank = dist.get_rank() + if world_size == 1: + return + + def _send_and_wait(r): + if rank == r: + tensor = torch.tensor(0, device="npu") + else: + tensor = torch.tensor(1, device="npu") + dist.broadcast(tensor, r) + while tensor.item() == 1: + time.sleep(1) + + _send_and_wait(0) + # now sync on the main process + _send_and_wait(1) + + +def _encode(encoded_data, data): + # gets a byte representation for the data + encoded_bytes = pickle.dumps(data) + # convert this byte string into a byte tensor + storage = torch.ByteStorage.from_buffer(encoded_bytes) + tensor = torch.ByteTensor(storage).to("cuda") + # encoding: first byte is the size and then rest is the data + s = tensor.numel() + assert s <= 255, "Can't encode data greater than 255 bytes" + # put the encoded data in encoded_data + encoded_data[0] = s + encoded_data[1: (s + 1)] = tensor + + +def _decode(encoded_data): + size = encoded_data[0] + encoded_tensor = encoded_data[1: (size + 1)].to("cpu") + return pickle.loads(bytearray(encoded_tensor.tolist())) + + +# TODO try to use tensor in shared-memory instead of serializing to disk +# this involves getting the all_gather to work +def scatter_gather(data): + """ + This function gathers data from multiple processes, and returns them + in a list, as they were obtained from each process. + + This function is useful for retrieving data from multiple processes, + when launching the code with torch.distributed.launch + + Note: this function is slow and should not be used in tight loops, i.e., + do not use it in the training loop. + + Arguments: + data: the object to be gathered from multiple processes. + It must be serializable + + Returns: + result (list): a list with as many elements as there are processes, + where each element i in the list corresponds to the data that was + gathered from the process of rank i. + """ + # strategy: the main process creates a temporary directory, and communicates + # the location of the temporary directory to all other processes. + # each process will then serialize the data to the folder defined by + # the main process, and then the main process reads all of the serialized + # files and returns them in a list + if not dist.is_initialized(): + return [data] + synchronize() + # get rank of the current process + rank = dist.get_rank() + + # the data to communicate should be small + data_to_communicate = torch.empty(256, dtype=torch.uint8, device="cuda") + if rank == 0: + # manually creates a temporary directory, that needs to be cleaned + # afterwards + tmp_dir = tempfile.mkdtemp() + _encode(data_to_communicate, tmp_dir) + + synchronize() + # the main process (rank=0) communicates the data to all processes + dist.broadcast(data_to_communicate, 0) + + # get the data that was communicated + tmp_dir = _decode(data_to_communicate) + + # each process serializes to a different file + file_template = "file{}.pth" + tmp_file = os.path.join(tmp_dir, file_template.format(rank)) + torch.save(data, tmp_file) + + # synchronize before loading the data + synchronize() + + # only the master process returns the data + if rank == 0: + data_list = [] + world_size = dist.get_world_size() + for r in range(world_size): + file_path = os.path.join(tmp_dir, file_template.format(r)) + d = torch.load(file_path) + data_list.append(d) + # cleanup + os.remove(file_path) + # cleanup + os.rmdir(tmp_dir) + return data_list diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/utils/env.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/utils/env.py new file mode 100644 index 0000000000..1c7db32e41 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/utils/env.py @@ -0,0 +1,37 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +import os + +from maskrcnn_benchmark.utils.imports import import_file + + +def setup_environment(): + """Perform environment setup work. The default setup is a no-op, but this + function allows the user to specify a Python source file that performs + custom setup work that may be necessary to their computing environment. + """ + custom_module_path = os.environ.get("TORCH_DETECTRON_ENV_MODULE") + if custom_module_path: + setup_custom_environment(custom_module_path) + else: + # The default setup is a no-op + pass + + +def setup_custom_environment(custom_module_path): + """Load custom environment setup from a Python source file and run the setup + function. + """ + module = import_file("maskrcnn_benchmark.utils.env.custom_module", custom_module_path) + assert hasattr(module, "setup_environment") and callable( + module.setup_environment + ), ( + "Custom environment module defined in {} does not have the " + "required callable attribute 'setup_environment'." + ).format( + custom_module_path + ) + module.setup_environment() + + +# Force environment setup when this module is imported +setup_environment() diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/utils/imports.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/utils/imports.py new file mode 100644 index 0000000000..53c0ed1681 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/utils/imports.py @@ -0,0 +1,24 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +import torch + +if torch._six.PY3: + import importlib + import importlib.util + import sys + + + # from https://stackoverflow.com/questions/67631/how-to-import-a-module-given-the-full-path?utm_medium=organic&utm_source=google_rich_qa&utm_campaign=google_rich_qa + def import_file(module_name, file_path, make_importable=False): + spec = importlib.util.spec_from_file_location(module_name, file_path) + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + if make_importable: + sys.modules[module_name] = module + return module +else: + import imp + + + def import_file(module_name, file_path, make_importable=None): + module = imp.load_source(module_name, file_path) + return module diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/utils/logger.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/utils/logger.py new file mode 100644 index 0000000000..c2b68f4b1f --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/utils/logger.py @@ -0,0 +1,25 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +import logging +import os +import sys + + +def setup_logger(name, save_dir, distributed_rank): + logger = logging.getLogger(name) + logger.setLevel(logging.DEBUG) + # don't log results for the non-master process + if distributed_rank > 0: + return logger + ch = logging.StreamHandler(stream=sys.stdout) + ch.setLevel(logging.DEBUG) + formatter = logging.Formatter("%(asctime)s %(name)s %(levelname)s: %(message)s") + ch.setFormatter(formatter) + logger.addHandler(ch) + + if save_dir: + fh = logging.FileHandler(os.path.join(save_dir, "log.txt")) + fh.setLevel(logging.DEBUG) + fh.setFormatter(formatter) + logger.addHandler(fh) + + return logger diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/utils/metric_logger.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/utils/metric_logger.py new file mode 100644 index 0000000000..c314e13117 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/utils/metric_logger.py @@ -0,0 +1,63 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +from collections import defaultdict +from collections import deque + +import torch + + +class SmoothedValue(object): + """Track a series of values and provide access to smoothed values over a + window or the global series average. + """ + + def __init__(self, window_size=20): + self.deque = deque(maxlen=window_size) + self.series = [] + self.total = 0.0 + self.count = 0 + + def update(self, value): + self.deque.append(value) + self.series.append(value) + self.count += 1 + self.total += value + + @property + def median(self): + d = torch.tensor(list(self.deque)) + return d.median().item() + + @property + def avg(self): + d = torch.tensor(list(self.deque)) + return d.mean().item() + + @property + def global_avg(self): + return self.total / self.count + + +class MetricLogger(object): + def __init__(self, delimiter="\t"): + self.meters = defaultdict(SmoothedValue) + self.delimiter = delimiter + + def update(self, **kwargs): + for k, v in kwargs.items(): + if isinstance(v, torch.Tensor): + v = v.item() + assert isinstance(v, (float, int)) + self.meters[k].update(v) + + def __getattr__(self, attr): + if attr in self.meters: + return self.meters[attr] + return object.__getattr__(self, attr) + + def __str__(self): + loss_str = [] + for name, meter in self.meters.items(): + loss_str.append( + "{}: {:.4f} ({:.4f})".format(name, meter.median, meter.global_avg) + ) + return self.delimiter.join(loss_str) diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/utils/miscellaneous.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/utils/miscellaneous.py new file mode 100644 index 0000000000..db9a8b3679 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/utils/miscellaneous.py @@ -0,0 +1,11 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +import errno +import os + + +def mkdir(path): + try: + os.makedirs(path) + except OSError as e: + if e.errno != errno.EEXIST: + raise diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/utils/model_serialization.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/utils/model_serialization.py new file mode 100644 index 0000000000..d1902e49d8 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/utils/model_serialization.py @@ -0,0 +1,78 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +from collections import OrderedDict +import logging + +import torch + + +def align_and_update_state_dicts(model_state_dict, loaded_state_dict): + """ + Strategy: suppose that the models that we will create will have prefixes appended + to each of its keys, for example due to an extra level of nesting that the original + pre-trained weights from ImageNet won't contain. For example, model.state_dict() + might return backbone[0].body.res2.conv1.weight, while the pre-trained model contains + res2.conv1.weight. We thus want to match both parameters together. + For that, we look for each model weight, look among all loaded keys if there is one + that is a suffix of the current weight name, and use it if that's the case. + If multiple matches exist, take the one with longest size + of the corresponding name. For example, for the same model as before, the pretrained + weight file can contain both res2.conv1.weight, as well as conv1.weight. In this case, + we want to match backbone[0].body.conv1.weight to conv1.weight, and + backbone[0].body.res2.conv1.weight to res2.conv1.weight. + """ + current_keys = sorted(list(model_state_dict.keys())) + loaded_keys = sorted(list(loaded_state_dict.keys())) + # get a matrix of string matches, where each (i, j) entry correspond to the size of the + # loaded_key string, if it matches + match_matrix = [ + len(j) if i.endswith(j) else 0 for i in current_keys for j in loaded_keys + ] + match_matrix = torch.as_tensor(match_matrix).view( + len(current_keys), len(loaded_keys) + ) + max_match_size, idxs = match_matrix.max(1) + # remove indices that correspond to no-match + idxs[max_match_size == 0] = -1 + + # used for logging + max_size = max([len(key) for key in current_keys]) if current_keys else 1 + max_size_loaded = max([len(key) for key in loaded_keys]) if loaded_keys else 1 + log_str_template = "{: <{}} loaded from {: <{}} of shape {}" + logger = logging.getLogger(__name__) + for idx_new, idx_old in enumerate(idxs.tolist()): + if idx_old == -1: + continue + key = current_keys[idx_new] + key_old = loaded_keys[idx_old] + model_state_dict[key] = loaded_state_dict[key_old] + logger.info( + log_str_template.format( + key, + max_size, + key_old, + max_size_loaded, + tuple(loaded_state_dict[key_old].shape), + ) + ) + + +def strip_prefix_if_present(state_dict, prefix): + keys = sorted(state_dict.keys()) + if not all(key.startswith(prefix) for key in keys): + return state_dict + stripped_state_dict = OrderedDict() + for key, value in state_dict.items(): + stripped_state_dict[key.replace(prefix, "")] = value + return stripped_state_dict + + +def load_state_dict(model, loaded_state_dict): + model_state_dict = model.state_dict() + # if the state_dict comes from a model that was wrapped in a + # DataParallel or DistributedDataParallel during serialization, + # remove the "module" prefix before performing the matching + loaded_state_dict = strip_prefix_if_present(loaded_state_dict, prefix="module.") + align_and_update_state_dicts(model_state_dict, loaded_state_dict) + + # use strict loading + model.load_state_dict(model_state_dict) diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/utils/model_zoo.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/utils/model_zoo.py new file mode 100644 index 0000000000..8f073957e8 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/utils/model_zoo.py @@ -0,0 +1,56 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +import os +import sys + +from torch.hub import _download_url_to_file +from torch.hub import urlparse +from torch.hub import HASH_REGEX + +from maskrcnn_benchmark.utils.comm import is_main_process +from maskrcnn_benchmark.utils.comm import synchronize + + +# very similar to https://github.com/pytorch/pytorch/blob/master/torch/utils/model_zoo.py +# but with a few improvements and modifications +def cache_url(url, model_dir=None, progress=True): + r"""Loads the Torch serialized object at the given URL. + If the object is already present in `model_dir`, it's deserialized and + returned. The filename part of the URL should follow the naming convention + ``filename-.ext`` where ```` is the first eight or more + digits of the SHA256 hash of the contents of the file. The hash is used to + ensure unique names and to verify the contents of the file. + The default value of `model_dir` is ``$TORCH_HOME/models`` where + ``$TORCH_HOME`` defaults to ``~/.torch``. The default directory can be + overridden with the ``$TORCH_MODEL_ZOO`` environment variable. + Args: + url (string): URL of the object to download + model_dir (string, optional): directory in which to save the object + progress (bool, optional): whether or not to display a progress bar to stderr + Example: + >>> cached_file = maskrcnn_benchmark.utils.model_zoo.cache_url('https://s3.amazonaws.com/pytorch/models/resnet18-5c106cde.pth') + """ + if model_dir is None: + torch_home = os.path.expanduser(os.getenv('TORCH_HOME', '~/.torch')) + model_dir = os.getenv('TORCH_MODEL_ZOO', os.path.join(torch_home, 'models')) + if not os.path.exists(model_dir): + os.makedirs(model_dir) + parts = urlparse(url) + filename = os.path.basename(parts.path) + if filename == "model_final.pkl": + # workaround as pre-trained Caffe2 models from Detectron have all the same filename + # so make the full path the filename by replacing / with _ + filename = parts.path.replace("/", "_") + cached_file = os.path.join(model_dir, filename) + if not os.path.exists(cached_file) and is_main_process(): + sys.stderr.write('Downloading: "{}" to {}\n'.format(url, cached_file)) + hash_prefix = HASH_REGEX.search(filename) + if hash_prefix is not None: + hash_prefix = hash_prefix.group(1) + # workaround: Caffe2 models don't have a hash, but follow the R-50 convention, + # which matches the hash PyTorch uses. So we skip the hash matching + # if the hash_prefix is less than 6 characters + if len(hash_prefix) < 6: + hash_prefix = None + _download_url_to_file(url, cached_file, hash_prefix, progress=progress) + synchronize() + return cached_file diff --git a/PyTorch/contrib/cv/detection/RetinaMask/setup.py b/PyTorch/contrib/cv/detection/RetinaMask/setup.py new file mode 100644 index 0000000000..acb817983f --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/setup.py @@ -0,0 +1,69 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +# !/usr/bin/env python + +import glob +import os + +import torch +from setuptools import find_packages +from setuptools import setup +from torch.utils.cpp_extension import CUDA_HOME +from torch.utils.cpp_extension import CppExtension +from torch.utils.cpp_extension import CUDAExtension + +requirements = ["torch", "torchvision"] + + +def get_extensions(): + this_dir = os.path.dirname(os.path.abspath(__file__)) + extensions_dir = os.path.join(this_dir, "maskrcnn_benchmark", "csrc") + + main_file = glob.glob(os.path.join(extensions_dir, "*.cpp")) + source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp")) + # source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu")) + + sources = main_file + source_cpu + extension = CppExtension + + extra_compile_args = {"cxx": []} + define_macros = [] + + if torch.cuda.is_available() and CUDA_HOME is not None: + extension = CUDAExtension + # sources += source_cuda + define_macros += [("WITH_CUDA", None)] + extra_compile_args["nvcc"] = [ + "-DCUDA_HAS_FP16=1", + "-D__CUDA_NO_HALF_OPERATORS__", + "-D__CUDA_NO_HALF_CONVERSIONS__", + "-D__CUDA_NO_HALF2_OPERATORS__", + ] + + sources = [os.path.join(extensions_dir, s) for s in sources] + + include_dirs = [extensions_dir] + + ext_modules = [ + extension( + "maskrcnn_benchmark._C", + sources, + include_dirs=include_dirs, + define_macros=define_macros, + extra_compile_args=extra_compile_args, + ) + ] + + return ext_modules + + +setup( + name="maskrcnn_benchmark", + version="0.1", + author="fmassa", + url="https://github.com/facebookresearch/maskrnn-benchmark", + description="object detection in pytorch", + packages=find_packages(exclude=("configs", "tests",)), + # install_requires=requirements, + ext_modules=get_extensions(), + cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension}, +) diff --git a/PyTorch/contrib/cv/detection/RetinaMask/test/env_npu.sh b/PyTorch/contrib/cv/detection/RetinaMask/test/env_npu.sh new file mode 100644 index 0000000000..1554185f16 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/test/env_npu.sh @@ -0,0 +1,77 @@ +#!/bin/bash +export install_path=/usr/local/Ascend + +if [ -d ${install_path}/toolkit ]; then + export LD_LIBRARY_PATH=${install_path}/fwkacllib/lib64/:/usr/include/hdf5/lib/:/usr/local/:/usr/local/lib/:/usr/lib/:${install_path}/driver/lib64/common/:${install_path}/driver/lib64/driver/:${install_path}/add-ons:${path_lib}:${LD_LIBRARY_PATH} + export PATH=${install_path}/fwkacllib/ccec_compiler/bin:${install_path}/fwkacllib/bin:$PATH + export PYTHONPATH=${install_path}/fwkacllib/python/site-packages:${install_path}/tfplugin/python/site-packages:${install_path}/toolkit/python/site-packages:$PYTHONPATH + export PYTHONPATH=/usr/local/python3.7.5/lib/python3.7/site-packages:$PYTHONPATH + export ASCEND_OPP_PATH=${install_path}/opp +else + if [ -d ${install_path}/nnae/latest ];then + export LD_LIBRARY_PATH=${install_path}/nnae/latest/fwkacllib/lib64/:/usr/local/:/usr/local/python3.7.5/lib/:/usr/local/openblas/lib:/usr/local/lib/:/usr/lib64/:/usr/lib/:${install_path}/driver/lib64/common/:${install_path}/driver/lib64/driver/:${install_path}/add-ons/:/usr/lib/aarch64_64-linux-gnu:$LD_LIBRARY_PATH + export PATH=$PATH:${install_path}/nnae/latest/fwkacllib/ccec_compiler/bin/:${install_path}/nnae/latest/toolkit/tools/ide_daemon/bin/ + export ASCEND_OPP_PATH=${install_path}/nnae/latest/opp/ + export OPTION_EXEC_EXTERN_PLUGIN_PATH=${install_path}/nnae/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:${install_path}/nnae/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:${install_path}/nnae/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so + export PYTHONPATH=${install_path}/nnae/latest/fwkacllib/python/site-packages/:${install_path}/nnae/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:${install_path}/nnae/latest/fwkacllib/python/site-packages/schedule_search.egg:$PYTHONPATH + export ASCEND_AICPU_PATH=${install_path}/nnae/latest + else + export LD_LIBRARY_PATH=${install_path}/ascend-toolkit/latest/fwkacllib/lib64/:/usr/local/:/usr/local/lib/:/usr/lib64/:/usr/lib/:/usr/local/python3.7.5/lib/:/usr/local/openblas/lib:${install_path}/driver/lib64/common/:${install_path}/driver/lib64/driver/:${install_path}/add-ons/:/usr/lib/aarch64-linux-gnu:$LD_LIBRARY_PATH + export PATH=$PATH:${install_path}/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin/:${install_path}/ascend-toolkit/latest/toolkit/tools/ide_daemon/bin/ + export ASCEND_OPP_PATH=${install_path}/ascend-toolkit/latest/opp/ + export OPTION_EXEC_EXTERN_PLUGIN_PATH=${install_path}/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:${install_path}/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:${install_path}/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so + export PYTHONPATH=${install_path}/ascend-toolkit/latest/fwkacllib/python/site-packages/:${install_path}/ascend-toolkit/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:${install_path}/ascend-toolkit/latest/fwkacllib/python/site-packages/schedule_search.egg:$PYTHONPATH + export ASCEND_AICPU_PATH=${install_path}/ascend-toolkit/latest + fi +fi + +${install_path}/driver/tools/msnpureport -g error -d 0 +${install_path}/driver/tools/msnpureport -g error -d 1 +${install_path}/driver/tools/msnpureport -g error -d 2 +${install_path}/driver/tools/msnpureport -g error -d 3 +${install_path}/driver/tools/msnpureport -g error -d 4 +${install_path}/driver/tools/msnpureport -g error -d 5 +${install_path}/driver/tools/msnpureport -g error -d 6 +${install_path}/driver/tools/msnpureport -g error -d 7 + +#将Host日志输出到串口,0-关闭/1-开启 +export ASCEND_SLOG_PRINT_TO_STDOUT=0 +#设置默认日志级别,0-debug/1-info/2-warning/3-error +export ASCEND_GLOBAL_LOG_LEVEL=3 +#设置Event日志开启标志,0-关闭/1-开启 +export ASCEND_GLOBAL_EVENT_ENABLE=0 +#设置是否开启taskque,0-关闭/1-开启 +export TASK_QUEUE_ENABLE=1 +#设置是否开启PTCopy,0-关闭/1-开启 +export PTCOPY_ENABLE=1 +#设置是否开启combined标志,0-关闭/1-开启 +export COMBINED_ENABLE=0 +#设置特殊场景是否需要重新编译,不需要修改 +export TRI_COMBINED_ENABLE=0 +#设置特殊场景是否需要重新编译,不需要修改 +export DYNAMIC_OP="ADD#MUL" +#HCCL白名单开关,1-关闭/0-开启 +export HCCL_WHITELIST_DISABLE=1 + +ulimit -SHn 512000 + +path_lib=$(python3.7 -c """ +import sys +import re +result='' +for index in range(len(sys.path)): + match_sit = re.search('-packages', sys.path[index]) + if match_sit is not None: + match_lib = re.search('lib', sys.path[index]) + + if match_lib is not None: + end=match_lib.span()[1] + result += sys.path[index][0:end] + ':' + + result+=sys.path[index] + '/torch/lib:' +print(result)""" +) + +echo ${path_lib} + +export LD_LIBRARY_PATH=/usr/local/python3.7.5/lib/:${path_lib}:$LD_LIBRARY_PATH diff --git a/PyTorch/contrib/cv/detection/RetinaMask/test/train_eval_1p.sh b/PyTorch/contrib/cv/detection/RetinaMask/test/train_eval_1p.sh new file mode 100644 index 0000000000..93cd0d6886 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/test/train_eval_1p.sh @@ -0,0 +1,130 @@ +#!/bin/bash + +################基础配置参数,需要模型审视修改################## +# 必选字段(必须在此处定义的参数): Network batch_size RANK_SIZE +# 网络名称,同目录名称 +Network="RetinaMask" +# 训练batch_size +batch_size=8 +# 训练使用的npu卡数 +export RANK_SIZE=1 +# 数据集路径,保持为空,不需要修改 +data_path="" +weight_path="" + + +# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + fi + if [[ $para == --weight_path* ]];then + weight_path=`echo ${para#*=}` + fi +done + +# 校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi +if [[ weight_path == "" ]];then + echo "[Error] para \"weight_path\" must be confing" + exit 1 +fi + + +###############指定训练脚本执行路径############### +# cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 +cur_path=`pwd` +cur_path_last_diename=${cur_path##*/} +if [ x"${cur_path_last_diename}" == x"test" ];then + test_path_dir=${cur_path} + cd .. + cur_path=`pwd` +else + test_path_dir=${cur_path}/test +fi +echo ${pwd} + +#################创建日志输出目录,不需要修改################# +ASCEND_DEVICE_ID=0 +if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID} + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +else + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +fi + +# 变量 +export RETINAMASK_DATASETS=${data_path} +export PYTHONPATH=./:$PYTHONPATH + +#################启动训练脚本################# +#训练开始时间,不需要修改 +start_time=$(date +%s) +# 非平台场景时source 环境变量 +check_etp_flag=`env | grep etp_running_flag` +etp_flag=`echo ${check_etp_flag#*=}` +if [ x"${etp_flag}" != x"true" ];then + source ${test_path_dir}/env_npu.sh + export PYTHONWARNINGS='ignore:semaphore_tracker:UserWarning' +fi + +KERNEL_NUM=$(($(nproc)/8)) +for i in $(seq 0 0) +do + if [ $(uname -m) = "aarch64" ] + then + PID_START=$((KERNEL_NUM * i)) + PID_END=$((PID_START + KERNEL_NUM - 1)) + taskset -c $PID_START-$PID_END \ + python3.7.5 -u tools/test_net.py \ + --config-file ./configs/retina/retinanet_mask_R-50-FPN_2x_adjust_std011_ms.yaml --data_path ${data_path} \ + --weight ${weight_path} \ + SOLVER.IMS_PER_BATCH ${batch_size}\ + > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/eval_${ASCEND_DEVICE_ID}.log 2>&1 & + else + python3.7.5 -u tools/test_net.py \ + --config-file ./configs/retina/retinanet_mask_R-50-FPN_2x_adjust_std011_ms.yaml --data_path ${data_path} \ + --weight ${weight_path} \ + SOLVER.IMS_PER_BATCH ${batch_size} \ + > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/eval_${ASCEND_DEVICE_ID}.log 2>&1 & + fi +done + +wait + + +##################获取训练数据################ +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" + +#输出训练精度,需要模型审视修改 +bbox_map=`cat ${test_path_dir}/output/${ASCEND_DEVICE_ID}/eval_${ASCEND_DEVICE_ID}.log | grep 'OrderedDict' | awk '{print $7}' | awk -F ')' '{print $1}'` +segm_map=`cat ${test_path_dir}/output/${ASCEND_DEVICE_ID}/eval_${ASCEND_DEVICE_ID}.log | grep 'OrderedDict' | awk '{print $20}' | awk -F ')' '{print $1}'` +#打印,不需要修改 +echo "Final bbox mAp : ${bbox_map}" +echo "Final segm mAp : ${segm_map}" +echo "E2E Eval Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BBOXMAP = ${bbox_map}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "SEGMMAP = ${segm_map}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2EEvalTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log diff --git a/PyTorch/contrib/cv/detection/RetinaMask/test/train_full_1p.sh b/PyTorch/contrib/cv/detection/RetinaMask/test/train_full_1p.sh new file mode 100644 index 0000000000..9a7f229b94 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/test/train_full_1p.sh @@ -0,0 +1,130 @@ +#!/bin/bash + +################基础配置参数,需要模型审视修改################## +# 必选字段(必须在此处定义的参数): Network batch_size RANK_SIZE +# 网络名称,同目录名称 +Network="RetinaMask" +# 训练batch_size +batch_size=8 +# 训练使用的npu卡数 +export RANK_SIZE=1 +# 数据集路径,保持为空,不需要修改 +data_path="" + +# 训练最大iter数 +max_iter=360000 +lr_steps=(240000, 320000) + + +# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + fi +done + +# 校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + + +###############指定训练脚本执行路径############### +# cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 +cur_path=`pwd` +cur_path_last_diename=${cur_path##*/} +if [ x"${cur_path_last_diename}" == x"test" ];then + test_path_dir=${cur_path} + cd .. + cur_path=`pwd` +else + test_path_dir=${cur_path}/test +fi +echo ${pwd} + +#################创建日志输出目录,不需要修改################# +ASCEND_DEVICE_ID=0 +if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID} + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +else + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +fi + +# 变量 +export RETINAMASK_DATASETS=${data_path} +export PYTHONPATH=./:$PYTHONPATH + +#################启动训练脚本################# +#训练开始时间,不需要修改 +start_time=$(date +%s) +# 非平台场景时source 环境变量 +check_etp_flag=`env | grep etp_running_flag` +etp_flag=`echo ${check_etp_flag#*=}` +if [ x"${etp_flag}" != x"true" ];then + source ${test_path_dir}/env_npu.sh + export PYTHONWARNINGS='ignore:semaphore_tracker:UserWarning' +fi + +KERNEL_NUM=$(($(nproc)/8)) +for i in $(seq 0 0) +do + if [ $(uname -m) = "aarch64" ] + then + PID_START=$((KERNEL_NUM * i)) + PID_END=$((PID_START + KERNEL_NUM - 1)) + taskset -c $PID_START-$PID_END \ + python3.7.5 -u tools/train_net.py \ + --config-file ./configs/retina/retinanet_mask_R-50-FPN_2x_adjust_std011_ms.yaml --data_path ${data_path} \ + --skip-test SOLVER.IMS_PER_BATCH ${batch_size} SOLVER.MAX_ITER ${max_iter} SOLVER.STEPS ${lr_steps} \ + > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & + else + python3.7.5 -u tools/train_net.py \ + --config-file ./configs/retina/retinanet_mask_R-50-FPN_2x_adjust_std011_ms.yaml --data_path ${data_path} \ + --skip-test SOLVER.IMS_PER_BATCH ${batch_size} SOLVER.MAX_ITER ${max_iter} SOLVER.STEPS ${lr_steps} \ + > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & + fi +done + +wait + + +##################获取训练数据################ +# 训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +step_fps=`cat ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | grep 'step_fps'| awk '{print $6}'` +FPS=`awk -v x=${batch_size} -v y=${step_fps} 'BEGIN{printf "%.2f\n",x*y}'` +#打印,不需要修改 +echo "Final Performance FPS : ${FPS}" + +#打印,不需要修改 +echo "E2E Training Duration sec : $e2e_time" + +# 训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} + +#最后一个迭代loss值 +ActualLoss=`cat ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | grep 'loss_retina_cls' | tail -n -1 | awk '{print $10}'` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/RetinaMask/test/train_full_8p.sh b/PyTorch/contrib/cv/detection/RetinaMask/test/train_full_8p.sh new file mode 100644 index 0000000000..a1d915fb85 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/test/train_full_8p.sh @@ -0,0 +1,131 @@ +#!/bin/bash + +################基础配置参数,需要模型审视修改################## +# 必选字段(必须在此处定义的参数): Network batch_size RANK_SIZE +# 网络名称,同目录名称 +Network="RetinaMask" +# 训练batch_size +batch_size=64 +# 训练使用的npu卡数 +export RANK_SIZE=8 +# 数据集路径,保持为空,不需要修改 +data_path="" + +# 训练最大iter数 +max_iter=45000 +lr_steps=(30000, 40000) + + +# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + fi +done + +# 校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + + +###############指定训练脚本执行路径############### +# cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 +cur_path=`pwd` +cur_path_last_diename=${cur_path##*/} +if [ x"${cur_path_last_diename}" == x"test" ];then + test_path_dir=${cur_path} + cd .. + cur_path=`pwd` +else + test_path_dir=${cur_path}/test +fi +echo ${pwd} + +#################创建日志输出目录,不需要修改################# +ASCEND_DEVICE_ID=0 +if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID} + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +else + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +fi + +# 变量 +export RETINAMASK_DATASETS=${data_path} +export PYTHONPATH=./:$PYTHONPATH + +#################启动训练脚本################# +#训练开始时间,不需要修改 +start_time=$(date +%s) +# 非平台场景时source 环境变量 +check_etp_flag=`env | grep etp_running_flag` +etp_flag=`echo ${check_etp_flag#*=}` +if [ x"${etp_flag}" != x"true" ];then + source ${test_path_dir}/env_npu.sh + export PYTHONWARNINGS='ignore:semaphore_tracker:UserWarning' +fi + +get_lscpu_value() { + awk -F: "(\$1 == \"${1}\"){gsub(/ /, \"\", \$2); print \$2; found=1} END{exit found!=1}" +} + +lscpu_out=$(lscpu) +n_sockets=$(get_lscpu_value 'Socket(s)' <<< "${lscpu_out}") +n_cores_per_socket=$(get_lscpu_value 'Core(s) per socket' <<< "${lscpu_out}") + +echo "num_sockets = ${n_sockets} cores_per_socket=${n_cores_per_socket}" + +python3.7 -u -m bind_pyt \ + --nsockets_per_node ${n_sockets} \ + --ncores_per_socket ${n_cores_per_socket} \ + --master_addr $(hostname -I |awk '{print $1}') \ + --no_hyperthreads \ + --no_membind "$@" ./tools/train_net.py --config-file ./configs/retina/retinanet_mask_R-50-FPN_2x_adjust_std011_ms.yaml \ + --data_path ${data_path} --skip-test \ + SOLVER.IMS_PER_BATCH ${batch_size} \ + SOLVER.MAX_ITER ${max_iter} SOLVER.STEPS ${lr_steps} SOLVER.BASE_LR 0.02 \ + > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & + +wait + +##################获取训练数据################ +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +step_fps=`cat ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | grep 'step_fps'| awk '{print $6}'` +FPS=`awk -v x=${batch_size} -v y=${step_fps} 'BEGIN{printf "%.2f\n",x*y}'` +#打印,不需要修改 +echo "Final Performance FPS : ${FPS}" + +#打印,不需要修改 +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} + +#最后一个迭代loss值 +ActualLoss=`cat ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | grep 'loss_retina_cls' | tail -n -1 | awk '{print $10}'` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log diff --git a/PyTorch/contrib/cv/detection/RetinaMask/test/train_performance_1p.sh b/PyTorch/contrib/cv/detection/RetinaMask/test/train_performance_1p.sh new file mode 100644 index 0000000000..f47c3f058f --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/test/train_performance_1p.sh @@ -0,0 +1,132 @@ +#!/bin/bash + +################基础配置参数,需要模型审视修改################## +# 必选字段(必须在此处定义的参数): Network batch_size RANK_SIZE +# 网络名称,同目录名称 +Network="RetinaMask" +# 训练batch_size +batch_size=8 +# 训练使用的npu卡数 +export RANK_SIZE=1 +# 数据集路径,保持为空,不需要修改 +data_path="" + +# 训练最大iter数 +max_iter=401 + + +# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + fi +done + +# 校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + + +###############指定训练脚本执行路径############### +# cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 +cur_path=`pwd` +cur_path_last_diename=${cur_path##*/} +if [ x"${cur_path_last_diename}" == x"test" ];then + test_path_dir=${cur_path} + cd .. + cur_path=`pwd` +else + test_path_dir=${cur_path}/test +fi +echo ${pwd} + +#################创建日志输出目录,不需要修改################# +ASCEND_DEVICE_ID=0 +if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID} + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +else + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +fi + +# 变量 +export RETINAMASK_DATASETS=${data_path} +export PYTHONPATH=./:$PYTHONPATH + +#################启动训练脚本################# +#训练开始时间,不需要修改 +start_time=$(date +%s) +# 非平台场景时source 环境变量 +check_etp_flag=`env | grep etp_running_flag` +etp_flag=`echo ${check_etp_flag#*=}` +if [ x"${etp_flag}" != x"true" ];then + source ${test_path_dir}/env_npu.sh + export PYTHONWARNINGS='ignore:semaphore_tracker:UserWarning' +fi + +KERNEL_NUM=$(($(nproc)/8)) +for i in $(seq 0 0) +do + if [ $(uname -m) = "aarch64" ] + then + PID_START=$((KERNEL_NUM * i)) + PID_END=$((PID_START + KERNEL_NUM - 1)) + taskset -c $PID_START-$PID_END \ + python3.7.5 -u tools/train_net.py \ + --config-file ./configs/retina/retinanet_mask_R-50-FPN_2x_adjust_std011_ms.yaml --data_path ${data_path} \ + --skip-test \ + SOLVER.IMS_PER_BATCH ${batch_size} SOLVER.MAX_ITER ${max_iter} \ + > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & + else + python3.7.5 -u tools/train_net.py \ + --config-file ./configs/retina/retinanet_mask_R-50-FPN_2x_adjust_std011_ms.yaml --data_path ${data_path} \ + --skip-test \ + SOLVER.IMS_PER_BATCH ${batch_size} SOLVER.MAX_ITER ${max_iter} \ + > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & + fi +done + +wait + + +##################获取训练数据################ +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +step_fps=`cat ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | grep 'step_fps'| awk '{print $6}'` +FPS=`awk -v x=${batch_size} -v y=${step_fps} 'BEGIN{printf "%.2f\n",x*y}'` +#打印,不需要修改 +echo "Final Performance FPS : ${FPS}" + +#打印,不需要修改 +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} + +#最后一个迭代loss值 +ActualLoss=`cat ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | grep 'loss_retina_cls' | tail -n -1 | awk '{print $10}'` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log diff --git a/PyTorch/contrib/cv/detection/RetinaMask/test/train_performance_8p.sh b/PyTorch/contrib/cv/detection/RetinaMask/test/train_performance_8p.sh new file mode 100644 index 0000000000..3aefdfc912 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/test/train_performance_8p.sh @@ -0,0 +1,129 @@ +#!/bin/bash + +################基础配置参数,需要模型审视修改################## +# 必选字段(必须在此处定义的参数): Network batch_size RANK_SIZE +# 网络名称,同目录名称 +Network="RetinaMask" +# 训练batch_size +batch_size=64 +# 训练使用的npu卡数 +export RANK_SIZE=8 +# 数据集路径,保持为空,不需要修改 +data_path="" + +# 训练最大iter数 +max_iter=201 + + +# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + fi +done + +# 校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + + +###############指定训练脚本执行路径############### +# cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 +cur_path=`pwd` +cur_path_last_diename=${cur_path##*/} +if [ x"${cur_path_last_diename}" == x"test" ];then + test_path_dir=${cur_path} + cd .. + cur_path=`pwd` +else + test_path_dir=${cur_path}/test +fi +echo ${pwd} + +#################创建日志输出目录,不需要修改################# +ASCEND_DEVICE_ID=0 +if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID} + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +else + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +fi + +# 变量 +export RETINAMASK_DATASETS=${data_path} +export PYTHONPATH=./:$PYTHONPATH + +#################启动训练脚本################# +#训练开始时间,不需要修改 +start_time=$(date +%s) +# 非平台场景时source 环境变量 +check_etp_flag=`env | grep etp_running_flag` +etp_flag=`echo ${check_etp_flag#*=}` +if [ x"${etp_flag}" != x"true" ];then + source ${test_path_dir}/env_npu.sh + export PYTHONWARNINGS='ignore:semaphore_tracker:UserWarning' +fi + +get_lscpu_value() { + awk -F: "(\$1 == \"${1}\"){gsub(/ /, \"\", \$2); print \$2; found=1} END{exit found!=1}" +} + +lscpu_out=$(lscpu) +n_sockets=$(get_lscpu_value 'Socket(s)' <<< "${lscpu_out}") +n_cores_per_socket=$(get_lscpu_value 'Core(s) per socket' <<< "${lscpu_out}") + +echo "num_sockets = ${n_sockets} cores_per_socket=${n_cores_per_socket}" + +python3.7 -u -m bind_pyt \ + --nsockets_per_node ${n_sockets} \ + --ncores_per_socket ${n_cores_per_socket} \ + --master_addr $(hostname -I |awk '{print $1}') \ + --no_hyperthreads \ + --no_membind "$@" ./tools/train_net.py --config-file ./configs/retina/retinanet_mask_R-50-FPN_2x_adjust_std011_ms.yaml \ + --skip-test --data_path ${data_path} \ + SOLVER.IMS_PER_BATCH ${batch_size} SOLVER.MAX_ITER ${max_iter} SOLVER.BASE_LR 0.02 \ + > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & + +wait + +##################获取训练数据################ +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +step_fps=`cat ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | grep 'step_fps'| awk '{print $6}'` +FPS=`awk -v x=${batch_size} -v y=${step_fps} 'BEGIN{printf "%.2f\n",x*y}'` +#打印,不需要修改 +echo "Final Performance FPS : ${FPS}" + +#打印,不需要修改 +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} + +#最后一个迭代loss值 +ActualLoss=`cat ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | grep 'loss_retina_cls' | tail -n -1 | awk '{print $10}'` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log diff --git a/PyTorch/contrib/cv/detection/RetinaMask/tools/parse_log.py b/PyTorch/contrib/cv/detection/RetinaMask/tools/parse_log.py new file mode 100644 index 0000000000..9cd37d3ea8 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/tools/parse_log.py @@ -0,0 +1,72 @@ +import re +import argparse +import numpy as np + + +def parse(log_path): + with open(log_path) as f: + text = f.read() + + float_pattern = r'\d+\.\d+' + mean_pattern = r'AdjustSmoothL1\(mean\): ({}), ({}), ({}), ({})'.format( + float_pattern, float_pattern, float_pattern, float_pattern) + var_pattern = r'AdjustSmoothL1\(var\): ({}), ({}), ({}), ({})'.format( + float_pattern, float_pattern, float_pattern, float_pattern) + pattern = mean_pattern + r'.*\n.*' + var_pattern + r'.*\n.*' + \ + r'iter: (\d+) ' + \ + r'loss: ({}) \(({})\) '.format(float_pattern, float_pattern) + \ + r'loss_retina_cls: ({}) \(({})\) '.format(float_pattern, float_pattern) + \ + r'loss_retina_reg: ({}) \(({})\) '.format(float_pattern, float_pattern) + \ + r'loss_mask: ({}) \(({})\) '.format(float_pattern, float_pattern) + \ + r'time: ({}) \(({})\) '.format(float_pattern, float_pattern) + \ + r'data: ({}) \(({})\) '.format(float_pattern, float_pattern) + \ + r'lr: ({}) '.format(float_pattern) + \ + r'max mem: (\d+)' + reg_exp = re.compile(pattern) + + headers = ['smooth_l1_mean', 'smooth_l1_var', 'iter', 'loss', + 'loss_retina_cls', 'loss_retina_reg', 'loss_mask', + 'time', 'data', 'lr', 'max_mem'] + + iterations = list() + means = list() + variations = list() + running_losses = list() + for args in reg_exp.findall(text): + mean = [float(v) for v in args[0:4]] + var = [float(v) for v in args[5:8]] + iteration = int(args[8]) + point_loss = float(args[9]) + running_loss = float(args[10]) + point_loss_retina_cls = float(args[11]) + running_loss_retina_cls = float(args[12]) + point_loss_retina_reg = float(args[13]) + running_loss_retina_reg = float(args[14]) + point_loss_mask = float(args[15]) + running_loss_mask = float(args[16]) + point_time = float(args[17]) + running_time = float(args[18]) + point_data = float(args[19]) + running_data = float(args[20]) + lr = float(args[21]) + max_mem = int(args[22]) + + iterations.append(iteration) + means.append(mean) + variations.append(var) + running_losses.append(running_loss) + + iterations = np.asarray(iterations) + means = np.asarray(means) + variations = np.asarray(variations) + running_losses = np.asarray(running_losses) + print(iterations) + print(means) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Parse log file') + parser.add_argument('log_path', metavar='P', help='path to the log file') + args = parser.parse_args() + + parse(args.log_path) diff --git a/PyTorch/contrib/cv/detection/RetinaMask/tools/test_net.py b/PyTorch/contrib/cv/detection/RetinaMask/tools/test_net.py new file mode 100644 index 0000000000..1c6a4ba668 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/tools/test_net.py @@ -0,0 +1,108 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +from maskrcnn_benchmark.utils.env import setup_environment # noqa F401 isort:skip + +import argparse +import os + +import torch +from maskrcnn_benchmark.config import cfg +from maskrcnn_benchmark.data import make_data_loader +from maskrcnn_benchmark.engine.inference import inference +from maskrcnn_benchmark.modeling.detector import build_detection_model +from maskrcnn_benchmark.utils.checkpoint import DetectronCheckpointer +from maskrcnn_benchmark.utils.collect_env import collect_env_info +from maskrcnn_benchmark.utils.comm import synchronize, get_rank +from maskrcnn_benchmark.utils.logger import setup_logger +from maskrcnn_benchmark.utils.miscellaneous import mkdir + + +def main(): + parser = argparse.ArgumentParser(description="PyTorch Object Detection Inference") + parser.add_argument( + "--config-file", + default="/private/home/fmassa/github/detectron.pytorch_v2/configs/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml", + metavar="FILE", + help="path to config file", + ) + parser.add_argument("--local_rank", type=int, default=0) + parser.add_argument("--data_path", type=str, default='') + parser.add_argument("--weight_path", type=str, default='') + parser.add_argument( + "opts", + help="Modify config options using the command-line", + default=None, + nargs=argparse.REMAINDER, + ) + + args = parser.parse_args() + + num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 + distributed = num_gpus > 1 + + if distributed: + torch.cuda.set_device(args.local_rank) + torch.distributed.init_process_group( + backend="nccl", init_method="env://" + ) + + cfg.DATASETS.DATA_DIR = args.data_path + cfg.merge_from_file(args.config_file) + cfg.merge_from_list(args.opts) + cfg.freeze() + + save_dir = "" + logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank()) + logger.info("Using {} GPUs".format(num_gpus)) + logger.info(cfg) + + logger.info("Collecting env info (might take some time)") + logger.info("\n" + collect_env_info()) + + model = build_detection_model(cfg) + model.to(cfg.MODEL.DEVICE) + + checkpointer = DetectronCheckpointer(cfg, model) + _ = checkpointer.load(args.weight_path) + + iou_types = ("bbox",) + if cfg.MODEL.MASK_ON: + iou_types = iou_types + ("segm",) + output_folders = [None] * len(cfg.DATASETS.TEST) + if cfg.OUTPUT_DIR: + dataset_names = cfg.DATASETS.TEST + for idx, dataset_name in enumerate(dataset_names): + output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name) + mkdir(output_folder) + output_folders[idx] = output_folder + data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=distributed) + for output_folder, data_loader_val in zip(output_folders, data_loaders_val): + inference( + model, + data_loader_val, + iou_types=iou_types, + # box_only=cfg.MODEL.RPN_ONLY, + box_only=False if cfg.RETINANET.RETINANET_ON else cfg.MODEL.RPN_ONLY, + device=cfg.MODEL.DEVICE, + expected_results=cfg.TEST.EXPECTED_RESULTS, + expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL, + output_folder=output_folder, + ) + synchronize() + + +if __name__ == "__main__": + main() diff --git a/PyTorch/contrib/cv/detection/RetinaMask/tools/train_net.py b/PyTorch/contrib/cv/detection/RetinaMask/tools/train_net.py new file mode 100644 index 0000000000..4dbd349fcc --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/tools/train_net.py @@ -0,0 +1,188 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +from maskrcnn_benchmark.utils.env import setup_environment # noqa F401 isort:skip + +import argparse +import os +from apex import amp + +import torch.npu +import torch +from maskrcnn_benchmark.config import cfg +from maskrcnn_benchmark.data import make_data_loader +from maskrcnn_benchmark.solver import make_lr_scheduler +from maskrcnn_benchmark.solver import make_optimizer +from maskrcnn_benchmark.engine.inference import inference +from maskrcnn_benchmark.engine.trainer import do_train +from maskrcnn_benchmark.modeling.detector import build_detection_model +from maskrcnn_benchmark.utils.checkpoint import DetectronCheckpointer +from maskrcnn_benchmark.utils.collect_env import collect_env_info +from maskrcnn_benchmark.utils.comm import synchronize, get_rank +from maskrcnn_benchmark.utils.logger import setup_logger +from maskrcnn_benchmark.utils.miscellaneous import mkdir + + +def train(cfg, local_rank, distributed, device): + model = build_detection_model(cfg) + + model = model.to(device) + + optimizer = make_optimizer(cfg, model) + if cfg.AMP: + model, optimizer = amp.initialize(model, optimizer, opt_level=cfg.OPT_LEVEL, loss_scale=cfg.LOSS_SCALE_VALUE, + combine_grad=True) + scheduler = make_lr_scheduler(cfg, optimizer) + + if distributed: + model = torch.nn.parallel.DistributedDataParallel( + model, device_ids=[local_rank], broadcast_buffers=False + ) + + arguments = {} + arguments["iteration"] = 0 + + output_dir = cfg.OUTPUT_DIR + + save_to_disk = get_rank() == 0 + checkpointer = DetectronCheckpointer( + cfg, model, optimizer, scheduler, output_dir, save_to_disk + ) + extra_checkpoint_data = checkpointer.load(cfg.MODEL.WEIGHT) + arguments.update(extra_checkpoint_data) + + data_loader = make_data_loader( + cfg, + is_train=True, + is_distributed=distributed, + start_iter=arguments["iteration"] + ) + + checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD + + do_train( + model, + data_loader, + optimizer, + scheduler, + checkpointer, + device, + checkpoint_period, + arguments, + cfg.AMP, + local_rank + ) + + return model + + +def test(cfg, model, distributed): + if distributed: + model = model.module + torch.cuda.empty_cache() # TODO check if it helps + iou_types = ("bbox",) + if cfg.MODEL.MASK_ON: + iou_types = iou_types + ("segm",) + output_folders = [None] * len(cfg.DATASETS.TEST) + if cfg.OUTPUT_DIR: + dataset_names = cfg.DATASETS.TEST + for idx, dataset_name in enumerate(dataset_names): + output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name) + mkdir(output_folder) + output_folders[idx] = output_folder + data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=distributed) + for output_folder, data_loader_val in zip(output_folders, data_loaders_val): + inference( + model, + data_loader_val, + iou_types=iou_types, + # box_only=cfg.MODEL.RPN_ONLY, + box_only=False if cfg.RETINANET.RETINANET_ON else cfg.MODEL.RPN_ONLY, + device=cfg.MODEL.DEVICE, + expected_results=cfg.TEST.EXPECTED_RESULTS, + expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL, + output_folder=output_folder, + ) + synchronize() + + +def main(): + parser = argparse.ArgumentParser(description="PyTorch Object Detection Training") + parser.add_argument( + "--config-file", + default="", + metavar="FILE", + help="path to config file", + type=str, + ) + parser.add_argument("--local_rank", type=int, default=0) + parser.add_argument("--data_path", type=str, default='') + parser.add_argument( + "--skip-test", + dest="skip_test", + help="Do not test the final model", + action="store_true", + ) + parser.add_argument( + "opts", + help="Modify config options using the command-line", + default=None, + nargs=argparse.REMAINDER, + ) + + args = parser.parse_args() + + num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 + args.distributed = num_gpus > 1 + device = 'npu:{}'.format(args.local_rank) + torch.npu.set_device(device) + if args.distributed: + host_addr_full = 'tcp://' + os.environ["MASTER_ADDR"] + ':' + os.environ["MASTER_PORT"] + rank = int(os.environ['RANK']) + world_size = int(os.environ["WORLD_SIZE"]) + print(host_addr_full, rank, world_size) + # init_method = host_addr_full, + torch.distributed.init_process_group(backend=cfg.DIST_BACKEND, rank=rank, world_size=world_size) + + cfg.DATASETS.DATA_DIR = args.data_path + cfg.merge_from_file(args.config_file) + cfg.merge_from_list(args.opts) + cfg.freeze() + + output_dir = cfg.OUTPUT_DIR + if output_dir: + mkdir(output_dir) + + logger = setup_logger("maskrcnn_benchmark", output_dir, get_rank()) + logger.info("Using {} GPUs".format(num_gpus)) + logger.info(args) + + logger.info("Collecting env info (might take some time)") + logger.info("\n" + collect_env_info()) + + logger.info("Loaded configuration file {}".format(args.config_file)) + with open(args.config_file, "r") as cf: + config_str = "\n" + cf.read() + logger.info(config_str) + logger.info("Running with config:\n{}".format(cfg)) + + model = train(cfg, args.local_rank, args.distributed, device) + + if not args.skip_test: + test(cfg, model, args.distributed) + + +if __name__ == "__main__": + main() -- Gitee From b22feec421f6edfd64712a22ae6417f1c52ec73b Mon Sep 17 00:00:00 2001 From: Savion_G Date: Sat, 9 Apr 2022 18:31:42 +0800 Subject: [PATCH 02/20] fix something --- .../contrib/cv/detection/RetinaMask/README.md | 12 ++++++------ .../maskrcnn_benchmark/config/defaults.py | 2 +- .../maskrcnn_benchmark/engine/inference.py | 16 ++++++++-------- .../detection/RetinaMask/test/train_eval_1p.sh | 4 ++-- .../detection/RetinaMask/test/train_full_1p.sh | 6 ++++-- .../detection/RetinaMask/test/train_full_8p.sh | 2 +- .../RetinaMask/test/train_performance_1p.sh | 6 +++--- .../RetinaMask/test/train_performance_8p.sh | 4 ++-- 8 files changed, 27 insertions(+), 25 deletions(-) diff --git a/PyTorch/contrib/cv/detection/RetinaMask/README.md b/PyTorch/contrib/cv/detection/RetinaMask/README.md index 70f512848c..5a53b61b27 100644 --- a/PyTorch/contrib/cv/detection/RetinaMask/README.md +++ b/PyTorch/contrib/cv/detection/RetinaMask/README.md @@ -46,9 +46,9 @@ bash test/train_eval_1p.sh --data_path=xxx --weight_path=./model_0044999.pth # 1p batch_size == 8,8p batch_size == 64 -| NAME | Steps | BBOX-MAP | SEGM-MAP | FPS | -| :-----------------: | :---: | :------: | :------: | :--: | -| GPU-1p(@NV-T4,bs=4) | 90000 | 26.9 | 23.3 | 2.6 | -| GPU-8p | 20000 | 29.0 | 25.7 | 55.1 | -| NPU-1p | 400 | - | - | 31.9 | -| NPU-8p | 20000 | 28.8 | 25.7 | 33.3 | +| NAME | Steps | BBOX-MAP | SEGM-MAP | FPS | +| :----: | :----: | :------: | :------: | :--: | +| GPU-1p | 360000 | - | - | 8.7 | +| GPU-8p | 20000 | 29.0 | 25.7 | 55.1 | +| NPU-1p | 400 | - | - | 4.7 | +| NPU-8p | 20000 | 28.8 | 25.7 | 33.3 | diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/config/defaults.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/config/defaults.py index 6765834b35..57db416d57 100644 --- a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/config/defaults.py +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/config/defaults.py @@ -360,4 +360,4 @@ _C.PATHS_CATALOG = os.path.join(os.path.dirname(__file__), "paths_catalog.py") _C.LOCAL_RANK = 0 _C.DIST_BACKEND = 'hccl' _C.DEVICE = 0 -_C.N_GPU = 8 +_C.N_GPU = 1 diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/engine/inference.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/engine/inference.py index 29443353cb..49855938d1 100644 --- a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/engine/inference.py +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/engine/inference.py @@ -63,14 +63,14 @@ def prepare_for_coco_detection(predictions, dataset): "score": scores[k], } ) - src_img = dataset.get_src_img(image_id) - for res in coco_result: - box = res['bbox'] - if res['score'] > 0.1: - cv2.rectangle(src_img, (int(box[0]), int(box[1])), - (int(box[0]) + int(box[2]) - 1, int(box[1]) + int(box[3]) - 1), - (0, 0, 255), 2) - cv2.imwrite(f'./demo/{str(image_id)}.jpg', src_img) + # src_img = dataset.get_src_img(image_id) + # for res in coco_result: + # box = res['bbox'] + # if res['score'] > 0.1: + # cv2.rectangle(src_img, (int(box[0]), int(box[1])), + # (int(box[0]) + int(box[2]) - 1, int(box[1]) + int(box[3]) - 1), + # (0, 0, 255), 2) + # cv2.imwrite(f'./demo/{str(image_id)}.jpg', src_img) coco_results.extend(coco_result) return coco_results diff --git a/PyTorch/contrib/cv/detection/RetinaMask/test/train_eval_1p.sh b/PyTorch/contrib/cv/detection/RetinaMask/test/train_eval_1p.sh index 93cd0d6886..ad0eca762f 100644 --- a/PyTorch/contrib/cv/detection/RetinaMask/test/train_eval_1p.sh +++ b/PyTorch/contrib/cv/detection/RetinaMask/test/train_eval_1p.sh @@ -83,13 +83,13 @@ do python3.7.5 -u tools/test_net.py \ --config-file ./configs/retina/retinanet_mask_R-50-FPN_2x_adjust_std011_ms.yaml --data_path ${data_path} \ --weight ${weight_path} \ - SOLVER.IMS_PER_BATCH ${batch_size}\ + SOLVER.IMS_PER_BATCH ${batch_size} N_GPU ${RANK_SIZE} \ > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/eval_${ASCEND_DEVICE_ID}.log 2>&1 & else python3.7.5 -u tools/test_net.py \ --config-file ./configs/retina/retinanet_mask_R-50-FPN_2x_adjust_std011_ms.yaml --data_path ${data_path} \ --weight ${weight_path} \ - SOLVER.IMS_PER_BATCH ${batch_size} \ + SOLVER.IMS_PER_BATCH ${batch_size} N_GPU ${RANK_SIZE} \ > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/eval_${ASCEND_DEVICE_ID}.log 2>&1 & fi done diff --git a/PyTorch/contrib/cv/detection/RetinaMask/test/train_full_1p.sh b/PyTorch/contrib/cv/detection/RetinaMask/test/train_full_1p.sh index 9a7f229b94..04a256f710 100644 --- a/PyTorch/contrib/cv/detection/RetinaMask/test/train_full_1p.sh +++ b/PyTorch/contrib/cv/detection/RetinaMask/test/train_full_1p.sh @@ -78,12 +78,14 @@ do taskset -c $PID_START-$PID_END \ python3.7.5 -u tools/train_net.py \ --config-file ./configs/retina/retinanet_mask_R-50-FPN_2x_adjust_std011_ms.yaml --data_path ${data_path} \ - --skip-test SOLVER.IMS_PER_BATCH ${batch_size} SOLVER.MAX_ITER ${max_iter} SOLVER.STEPS ${lr_steps} \ + --skip-test SOLVER.IMS_PER_BATCH ${batch_size} SOLVER.MAX_ITER ${max_iter} \ + SOLVER.STEPS ${lr_steps} N_GPU ${RANK_SIZE} \ > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & else python3.7.5 -u tools/train_net.py \ --config-file ./configs/retina/retinanet_mask_R-50-FPN_2x_adjust_std011_ms.yaml --data_path ${data_path} \ - --skip-test SOLVER.IMS_PER_BATCH ${batch_size} SOLVER.MAX_ITER ${max_iter} SOLVER.STEPS ${lr_steps} \ + --skip-test SOLVER.IMS_PER_BATCH ${batch_size} SOLVER.MAX_ITER ${max_iter} \ + SOLVER.STEPS ${lr_steps} N_GPU ${RANK_SIZE} \ > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & fi done diff --git a/PyTorch/contrib/cv/detection/RetinaMask/test/train_full_8p.sh b/PyTorch/contrib/cv/detection/RetinaMask/test/train_full_8p.sh index a1d915fb85..46bde9081d 100644 --- a/PyTorch/contrib/cv/detection/RetinaMask/test/train_full_8p.sh +++ b/PyTorch/contrib/cv/detection/RetinaMask/test/train_full_8p.sh @@ -85,7 +85,7 @@ python3.7 -u -m bind_pyt \ --no_hyperthreads \ --no_membind "$@" ./tools/train_net.py --config-file ./configs/retina/retinanet_mask_R-50-FPN_2x_adjust_std011_ms.yaml \ --data_path ${data_path} --skip-test \ - SOLVER.IMS_PER_BATCH ${batch_size} \ + SOLVER.IMS_PER_BATCH ${batch_size} N_GPU ${RANK_SIZE} \ SOLVER.MAX_ITER ${max_iter} SOLVER.STEPS ${lr_steps} SOLVER.BASE_LR 0.02 \ > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & diff --git a/PyTorch/contrib/cv/detection/RetinaMask/test/train_performance_1p.sh b/PyTorch/contrib/cv/detection/RetinaMask/test/train_performance_1p.sh index f47c3f058f..4de255e4a9 100644 --- a/PyTorch/contrib/cv/detection/RetinaMask/test/train_performance_1p.sh +++ b/PyTorch/contrib/cv/detection/RetinaMask/test/train_performance_1p.sh @@ -12,7 +12,7 @@ export RANK_SIZE=1 data_path="" # 训练最大iter数 -max_iter=401 +max_iter=801 # 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 @@ -78,13 +78,13 @@ do python3.7.5 -u tools/train_net.py \ --config-file ./configs/retina/retinanet_mask_R-50-FPN_2x_adjust_std011_ms.yaml --data_path ${data_path} \ --skip-test \ - SOLVER.IMS_PER_BATCH ${batch_size} SOLVER.MAX_ITER ${max_iter} \ + SOLVER.IMS_PER_BATCH ${batch_size} SOLVER.MAX_ITER ${max_iter} N_GPU ${RANK_SIZE} \ > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & else python3.7.5 -u tools/train_net.py \ --config-file ./configs/retina/retinanet_mask_R-50-FPN_2x_adjust_std011_ms.yaml --data_path ${data_path} \ --skip-test \ - SOLVER.IMS_PER_BATCH ${batch_size} SOLVER.MAX_ITER ${max_iter} \ + SOLVER.IMS_PER_BATCH ${batch_size} SOLVER.MAX_ITER ${max_iter} N_GPU ${RANK_SIZE} \ > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & fi done diff --git a/PyTorch/contrib/cv/detection/RetinaMask/test/train_performance_8p.sh b/PyTorch/contrib/cv/detection/RetinaMask/test/train_performance_8p.sh index 3aefdfc912..f60c2efb11 100644 --- a/PyTorch/contrib/cv/detection/RetinaMask/test/train_performance_8p.sh +++ b/PyTorch/contrib/cv/detection/RetinaMask/test/train_performance_8p.sh @@ -12,7 +12,7 @@ export RANK_SIZE=8 data_path="" # 训练最大iter数 -max_iter=201 +max_iter=401 # 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 @@ -83,7 +83,7 @@ python3.7 -u -m bind_pyt \ --master_addr $(hostname -I |awk '{print $1}') \ --no_hyperthreads \ --no_membind "$@" ./tools/train_net.py --config-file ./configs/retina/retinanet_mask_R-50-FPN_2x_adjust_std011_ms.yaml \ - --skip-test --data_path ${data_path} \ + --skip-test --data_path ${data_path} N_GPU ${RANK_SIZE} \ SOLVER.IMS_PER_BATCH ${batch_size} SOLVER.MAX_ITER ${max_iter} SOLVER.BASE_LR 0.02 \ > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & -- Gitee From 693135ba8c7590dcf9ada2e71e64aec97cfeec88 Mon Sep 17 00:00:00 2001 From: Savion_G Date: Tue, 12 Apr 2022 22:43:52 +0800 Subject: [PATCH 03/20] fix something about display --- .../contrib/cv/detection/RetinaMask/README.md | 49 +++++++++++---- .../maskrcnn_benchmark/engine/trainer.py | 19 +++++- .../maskrcnn_benchmark/utils/metric_logger.py | 12 ++-- .../RetinaMask/test/train_full_1p.sh | 2 +- .../RetinaMask/test/train_performance_1p.sh | 2 +- .../RetinaMask/test/train_performance_8p.sh | 2 +- .../RetinaMask/tools/recompiled_op.py | 59 +++++++++++++++++++ 7 files changed, 122 insertions(+), 23 deletions(-) create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/tools/recompiled_op.py diff --git a/PyTorch/contrib/cv/detection/RetinaMask/README.md b/PyTorch/contrib/cv/detection/RetinaMask/README.md index 5a53b61b27..05145cc168 100644 --- a/PyTorch/contrib/cv/detection/RetinaMask/README.md +++ b/PyTorch/contrib/cv/detection/RetinaMask/README.md @@ -1,43 +1,68 @@ -## Requirements +## Before running -- Part of the requirements: +- install numactl: ``` +apt-get install numactl # for Ubuntu +yum install numactl # for CentOS +``` + +- get R-50.pkl: + +``` +mkdir -p /root/.torch/models/ +wget https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl +mv R-50.pkl /root/.torch/models/ +``` + +- ln -s dataset: + +``` +mkdir ./dataset +ln -snf path_to_coco ./dataset/coco +``` + +- other requirements: + +``` +pip3 install torchvision==0.2.1 + +# other recommended requirements apex==0.1+ascend.20220315 torch==1.5.0+ascend.post5.20220315 -torchvision==0.2.1 ``` -- Build for maskrcnn-benchmark: +- source env and build: ``` +source test/env_npu.sh pyhton3.7 setup.py build develop ``` -## Training +## Running - To train: ``` # 1p train full -bash test/train_full_1p.sh --data_path=xxx +bash test/train_full_1p.sh --data_path=./dataset/ # 1p train perf -bash test/train_performance_1p.sh --data_path=xxx +bash test/train_performance_1p.sh --data_path=./dataset/ # 8p train full -bash test/train_full_8p.sh --data_path=xxx +bash test/train_full_8p.sh --data_path=./dataset/ # 8p train perf -bash test/train_performance_8p.sh --data_path=xxx +bash test/train_performance_8p.sh --data_path=./dataset/ ``` - To evaluate: ``` -bash test/train_eval_1p.sh --data_path=xxx --weight_path=./model_0044999.pth # for example +bash test/train_eval_1p.sh --data_path=./dataset/ --weight_path=./model_0044999.pth # for example ``` @@ -50,5 +75,5 @@ bash test/train_eval_1p.sh --data_path=xxx --weight_path=./model_0044999.pth # | :----: | :----: | :------: | :------: | :--: | | GPU-1p | 360000 | - | - | 8.7 | | GPU-8p | 20000 | 29.0 | 25.7 | 55.1 | -| NPU-1p | 400 | - | - | 4.7 | -| NPU-8p | 20000 | 28.8 | 25.7 | 33.3 | +| NPU-1p | 400 | - | - | 4.6 | +| NPU-8p | 20000 | 28.8 | 25.7 | 34.8 | diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/engine/trainer.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/engine/trainer.py index 75a456f757..382002cb4f 100644 --- a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/engine/trainer.py +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/engine/trainer.py @@ -5,6 +5,7 @@ import time from apex import amp from maskrcnn_benchmark.utils.metric_logger import MetricLogger +from maskrcnn_benchmark.config import cfg class GetFPS(object): @@ -17,7 +18,18 @@ class GetFPS(object): self.all_fps.append(1 / batch_time) if len(self.all_fps) > self.max_len: del self.all_fps[0] - self.mean_fps = sum(self.all_fps) / len(self.all_fps) + self.mean_fps = self.get_mean(self.all_fps) + + def get_mean(self, all_fps): + all_fps = sorted(all_fps) + length = len(all_fps) + del_num = max(1, int(length / 10)) + + del all_fps[-del_num:] + del all_fps[:del_num] + avg = sum(all_fps) / len(all_fps) + + return avg def do_train( @@ -34,13 +46,14 @@ def do_train( ): logger = logging.getLogger("maskrcnn_benchmark.trainer") logger.info("Start training") - meters = MetricLogger(delimiter=" ") + batch_size = cfg.SOLVER.IMS_PER_BATCH + meters = MetricLogger(bs=batch_size, delimiter=" ") max_iter = len(data_loader) start_iter = arguments["iteration"] model.train() start_training_time = time.time() end = time.time() - get_fps = GetFPS(100) + get_fps = GetFPS(500) for iteration, (images, targets, _) in enumerate(data_loader, start_iter): if local_rank == 0: diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/utils/metric_logger.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/utils/metric_logger.py index c314e13117..f7437f4f2c 100644 --- a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/utils/metric_logger.py +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/utils/metric_logger.py @@ -10,7 +10,7 @@ class SmoothedValue(object): window or the global series average. """ - def __init__(self, window_size=20): + def __init__(self, window_size=500): self.deque = deque(maxlen=window_size) self.series = [] self.total = 0.0 @@ -38,9 +38,10 @@ class SmoothedValue(object): class MetricLogger(object): - def __init__(self, delimiter="\t"): + def __init__(self, bs, delimiter="\t"): self.meters = defaultdict(SmoothedValue) self.delimiter = delimiter + self.batch_size = bs def update(self, **kwargs): for k, v in kwargs.items(): @@ -57,7 +58,8 @@ class MetricLogger(object): def __str__(self): loss_str = [] for name, meter in self.meters.items(): - loss_str.append( - "{}: {:.4f} ({:.4f})".format(name, meter.median, meter.global_avg) - ) + if name == 'time': + loss_str.append("{}: {:.4f}".format('fps', self.batch_size / meter.median)) + else: + loss_str.append("{}: {:.4f} ({:.4f})".format(name, meter.median, meter.global_avg)) return self.delimiter.join(loss_str) diff --git a/PyTorch/contrib/cv/detection/RetinaMask/test/train_full_1p.sh b/PyTorch/contrib/cv/detection/RetinaMask/test/train_full_1p.sh index 04a256f710..9b229cc213 100644 --- a/PyTorch/contrib/cv/detection/RetinaMask/test/train_full_1p.sh +++ b/PyTorch/contrib/cv/detection/RetinaMask/test/train_full_1p.sh @@ -129,4 +129,4 @@ echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/ echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "ActualLoss = ${ActualLoss}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file +echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log diff --git a/PyTorch/contrib/cv/detection/RetinaMask/test/train_performance_1p.sh b/PyTorch/contrib/cv/detection/RetinaMask/test/train_performance_1p.sh index 4de255e4a9..99fc597305 100644 --- a/PyTorch/contrib/cv/detection/RetinaMask/test/train_performance_1p.sh +++ b/PyTorch/contrib/cv/detection/RetinaMask/test/train_performance_1p.sh @@ -12,7 +12,7 @@ export RANK_SIZE=1 data_path="" # 训练最大iter数 -max_iter=801 +max_iter=2000 # 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 diff --git a/PyTorch/contrib/cv/detection/RetinaMask/test/train_performance_8p.sh b/PyTorch/contrib/cv/detection/RetinaMask/test/train_performance_8p.sh index f60c2efb11..ff27fe7eaa 100644 --- a/PyTorch/contrib/cv/detection/RetinaMask/test/train_performance_8p.sh +++ b/PyTorch/contrib/cv/detection/RetinaMask/test/train_performance_8p.sh @@ -12,7 +12,7 @@ export RANK_SIZE=8 data_path="" # 训练最大iter数 -max_iter=401 +max_iter=2000 # 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 diff --git a/PyTorch/contrib/cv/detection/RetinaMask/tools/recompiled_op.py b/PyTorch/contrib/cv/detection/RetinaMask/tools/recompiled_op.py new file mode 100644 index 0000000000..c9a659dc69 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/tools/recompiled_op.py @@ -0,0 +1,59 @@ +# Copyright (c) Soumith Chintala 2016, +# All rights reserved +# +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://spdx.org/licenses/BSD-3-Clause.html +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# -*- coding: utf-8 -*- +"""用于导出动态shape算子 +""" + +import os +import sys +import argparse + + +def func(log_path, split_flag): + """ + :param log_path: where log_path addr is. + :return: + """ + recompile_flag = 'To compile op: ' + output_list = [[]] + + with open(log_path, 'r')as f: + log_list = f.read().split('\n') + for log in log_list: + log = log.strip() + if split_flag in log: + output_list.append([]) + elif recompile_flag in log: + op_name = log.split(recompile_flag)[1] + if op_name not in output_list[-1]: + output_list[-1].append(op_name) + + with open('recompile_op_list.txt', 'w')as f: + for idx, output in enumerate(output_list): + f.write('iter: %d' % idx + '\n') + f.write(','.join(output) + '\n') + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='trans the log') + parser.add_argument('--log_path', default="./recompile_op.log", + help="input the dir name, trans the current dir with default") + parser.add_argument('--split_flag', default='=====iter', + help="flag for split epochs") + args = parser.parse_args() + func(args.log_path, args.split_flag) \ No newline at end of file -- Gitee From e8df8b7685d973c29eadc11dcbd648d76ea136a6 Mon Sep 17 00:00:00 2001 From: Savion_G Date: Wed, 13 Apr 2022 07:54:53 +0800 Subject: [PATCH 04/20] fix something --- .../maskrcnn_benchmark/engine/trainer.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/engine/trainer.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/engine/trainer.py index 382002cb4f..cd98b97054 100644 --- a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/engine/trainer.py +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/engine/trainer.py @@ -23,13 +23,17 @@ class GetFPS(object): def get_mean(self, all_fps): all_fps = sorted(all_fps) length = len(all_fps) - del_num = max(1, int(length / 10)) + if length <= 3: + avg = sum(all_fps) / len(all_fps) - del all_fps[-del_num:] - del all_fps[:del_num] - avg = sum(all_fps) / len(all_fps) + return avg + else: + del_num = max(1, int(length / 10)) + del all_fps[-del_num:] + del all_fps[:del_num] + avg = sum(all_fps) / len(all_fps) - return avg + return avg def do_train( -- Gitee From cc0371de0cb243b459f679e9cbd114a004702f6c Mon Sep 17 00:00:00 2001 From: Savion_G Date: Wed, 13 Apr 2022 09:36:37 +0800 Subject: [PATCH 05/20] nothing changed --- .../contrib/cv/detection/RetinaMask/README.md | 49 +++++++++++---- .../maskrcnn_benchmark/engine/trainer.py | 23 +++++++- .../maskrcnn_benchmark/utils/metric_logger.py | 12 ++-- .../RetinaMask/test/train_full_1p.sh | 2 +- .../RetinaMask/test/train_performance_1p.sh | 2 +- .../RetinaMask/test/train_performance_8p.sh | 2 +- .../RetinaMask/tools/recompiled_op.py | 59 +++++++++++++++++++ 7 files changed, 126 insertions(+), 23 deletions(-) create mode 100644 PyTorch/contrib/cv/detection/RetinaMask/tools/recompiled_op.py diff --git a/PyTorch/contrib/cv/detection/RetinaMask/README.md b/PyTorch/contrib/cv/detection/RetinaMask/README.md index 5a53b61b27..05145cc168 100644 --- a/PyTorch/contrib/cv/detection/RetinaMask/README.md +++ b/PyTorch/contrib/cv/detection/RetinaMask/README.md @@ -1,43 +1,68 @@ -## Requirements +## Before running -- Part of the requirements: +- install numactl: ``` +apt-get install numactl # for Ubuntu +yum install numactl # for CentOS +``` + +- get R-50.pkl: + +``` +mkdir -p /root/.torch/models/ +wget https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl +mv R-50.pkl /root/.torch/models/ +``` + +- ln -s dataset: + +``` +mkdir ./dataset +ln -snf path_to_coco ./dataset/coco +``` + +- other requirements: + +``` +pip3 install torchvision==0.2.1 + +# other recommended requirements apex==0.1+ascend.20220315 torch==1.5.0+ascend.post5.20220315 -torchvision==0.2.1 ``` -- Build for maskrcnn-benchmark: +- source env and build: ``` +source test/env_npu.sh pyhton3.7 setup.py build develop ``` -## Training +## Running - To train: ``` # 1p train full -bash test/train_full_1p.sh --data_path=xxx +bash test/train_full_1p.sh --data_path=./dataset/ # 1p train perf -bash test/train_performance_1p.sh --data_path=xxx +bash test/train_performance_1p.sh --data_path=./dataset/ # 8p train full -bash test/train_full_8p.sh --data_path=xxx +bash test/train_full_8p.sh --data_path=./dataset/ # 8p train perf -bash test/train_performance_8p.sh --data_path=xxx +bash test/train_performance_8p.sh --data_path=./dataset/ ``` - To evaluate: ``` -bash test/train_eval_1p.sh --data_path=xxx --weight_path=./model_0044999.pth # for example +bash test/train_eval_1p.sh --data_path=./dataset/ --weight_path=./model_0044999.pth # for example ``` @@ -50,5 +75,5 @@ bash test/train_eval_1p.sh --data_path=xxx --weight_path=./model_0044999.pth # | :----: | :----: | :------: | :------: | :--: | | GPU-1p | 360000 | - | - | 8.7 | | GPU-8p | 20000 | 29.0 | 25.7 | 55.1 | -| NPU-1p | 400 | - | - | 4.7 | -| NPU-8p | 20000 | 28.8 | 25.7 | 33.3 | +| NPU-1p | 400 | - | - | 4.6 | +| NPU-8p | 20000 | 28.8 | 25.7 | 34.8 | diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/engine/trainer.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/engine/trainer.py index 75a456f757..cd98b97054 100644 --- a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/engine/trainer.py +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/engine/trainer.py @@ -5,6 +5,7 @@ import time from apex import amp from maskrcnn_benchmark.utils.metric_logger import MetricLogger +from maskrcnn_benchmark.config import cfg class GetFPS(object): @@ -17,7 +18,22 @@ class GetFPS(object): self.all_fps.append(1 / batch_time) if len(self.all_fps) > self.max_len: del self.all_fps[0] - self.mean_fps = sum(self.all_fps) / len(self.all_fps) + self.mean_fps = self.get_mean(self.all_fps) + + def get_mean(self, all_fps): + all_fps = sorted(all_fps) + length = len(all_fps) + if length <= 3: + avg = sum(all_fps) / len(all_fps) + + return avg + else: + del_num = max(1, int(length / 10)) + del all_fps[-del_num:] + del all_fps[:del_num] + avg = sum(all_fps) / len(all_fps) + + return avg def do_train( @@ -34,13 +50,14 @@ def do_train( ): logger = logging.getLogger("maskrcnn_benchmark.trainer") logger.info("Start training") - meters = MetricLogger(delimiter=" ") + batch_size = cfg.SOLVER.IMS_PER_BATCH + meters = MetricLogger(bs=batch_size, delimiter=" ") max_iter = len(data_loader) start_iter = arguments["iteration"] model.train() start_training_time = time.time() end = time.time() - get_fps = GetFPS(100) + get_fps = GetFPS(500) for iteration, (images, targets, _) in enumerate(data_loader, start_iter): if local_rank == 0: diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/utils/metric_logger.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/utils/metric_logger.py index c314e13117..f7437f4f2c 100644 --- a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/utils/metric_logger.py +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/utils/metric_logger.py @@ -10,7 +10,7 @@ class SmoothedValue(object): window or the global series average. """ - def __init__(self, window_size=20): + def __init__(self, window_size=500): self.deque = deque(maxlen=window_size) self.series = [] self.total = 0.0 @@ -38,9 +38,10 @@ class SmoothedValue(object): class MetricLogger(object): - def __init__(self, delimiter="\t"): + def __init__(self, bs, delimiter="\t"): self.meters = defaultdict(SmoothedValue) self.delimiter = delimiter + self.batch_size = bs def update(self, **kwargs): for k, v in kwargs.items(): @@ -57,7 +58,8 @@ class MetricLogger(object): def __str__(self): loss_str = [] for name, meter in self.meters.items(): - loss_str.append( - "{}: {:.4f} ({:.4f})".format(name, meter.median, meter.global_avg) - ) + if name == 'time': + loss_str.append("{}: {:.4f}".format('fps', self.batch_size / meter.median)) + else: + loss_str.append("{}: {:.4f} ({:.4f})".format(name, meter.median, meter.global_avg)) return self.delimiter.join(loss_str) diff --git a/PyTorch/contrib/cv/detection/RetinaMask/test/train_full_1p.sh b/PyTorch/contrib/cv/detection/RetinaMask/test/train_full_1p.sh index 04a256f710..9b229cc213 100644 --- a/PyTorch/contrib/cv/detection/RetinaMask/test/train_full_1p.sh +++ b/PyTorch/contrib/cv/detection/RetinaMask/test/train_full_1p.sh @@ -129,4 +129,4 @@ echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/ echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "ActualLoss = ${ActualLoss}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file +echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log diff --git a/PyTorch/contrib/cv/detection/RetinaMask/test/train_performance_1p.sh b/PyTorch/contrib/cv/detection/RetinaMask/test/train_performance_1p.sh index 4de255e4a9..99fc597305 100644 --- a/PyTorch/contrib/cv/detection/RetinaMask/test/train_performance_1p.sh +++ b/PyTorch/contrib/cv/detection/RetinaMask/test/train_performance_1p.sh @@ -12,7 +12,7 @@ export RANK_SIZE=1 data_path="" # 训练最大iter数 -max_iter=801 +max_iter=2000 # 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 diff --git a/PyTorch/contrib/cv/detection/RetinaMask/test/train_performance_8p.sh b/PyTorch/contrib/cv/detection/RetinaMask/test/train_performance_8p.sh index f60c2efb11..ff27fe7eaa 100644 --- a/PyTorch/contrib/cv/detection/RetinaMask/test/train_performance_8p.sh +++ b/PyTorch/contrib/cv/detection/RetinaMask/test/train_performance_8p.sh @@ -12,7 +12,7 @@ export RANK_SIZE=8 data_path="" # 训练最大iter数 -max_iter=401 +max_iter=2000 # 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 diff --git a/PyTorch/contrib/cv/detection/RetinaMask/tools/recompiled_op.py b/PyTorch/contrib/cv/detection/RetinaMask/tools/recompiled_op.py new file mode 100644 index 0000000000..c9a659dc69 --- /dev/null +++ b/PyTorch/contrib/cv/detection/RetinaMask/tools/recompiled_op.py @@ -0,0 +1,59 @@ +# Copyright (c) Soumith Chintala 2016, +# All rights reserved +# +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://spdx.org/licenses/BSD-3-Clause.html +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# -*- coding: utf-8 -*- +"""用于导出动态shape算子 +""" + +import os +import sys +import argparse + + +def func(log_path, split_flag): + """ + :param log_path: where log_path addr is. + :return: + """ + recompile_flag = 'To compile op: ' + output_list = [[]] + + with open(log_path, 'r')as f: + log_list = f.read().split('\n') + for log in log_list: + log = log.strip() + if split_flag in log: + output_list.append([]) + elif recompile_flag in log: + op_name = log.split(recompile_flag)[1] + if op_name not in output_list[-1]: + output_list[-1].append(op_name) + + with open('recompile_op_list.txt', 'w')as f: + for idx, output in enumerate(output_list): + f.write('iter: %d' % idx + '\n') + f.write(','.join(output) + '\n') + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='trans the log') + parser.add_argument('--log_path', default="./recompile_op.log", + help="input the dir name, trans the current dir with default") + parser.add_argument('--split_flag', default='=====iter', + help="flag for split epochs") + args = parser.parse_args() + func(args.log_path, args.split_flag) \ No newline at end of file -- Gitee From 4b8f000a1b27a92f6bb75892db08f258b9d8119c Mon Sep 17 00:00:00 2001 From: Savion_G Date: Wed, 13 Apr 2022 10:23:36 +0800 Subject: [PATCH 06/20] nothing changed --- .../RetinaMask/maskrcnn_benchmark/engine/trainer.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/engine/trainer.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/engine/trainer.py index cd98b97054..ac753d2876 100644 --- a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/engine/trainer.py +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/engine/trainer.py @@ -29,9 +29,8 @@ class GetFPS(object): return avg else: del_num = max(1, int(length / 10)) - del all_fps[-del_num:] - del all_fps[:del_num] - avg = sum(all_fps) / len(all_fps) + cut = all_fps[del_num:length - del_num] + avg = sum(cut) / len(cut) return avg -- Gitee From 9898590d09194907891975d2cf416ad2c80b6592 Mon Sep 17 00:00:00 2001 From: Savion_G Date: Fri, 15 Apr 2022 17:39:31 +0800 Subject: [PATCH 07/20] change eval batch 1 --- PyTorch/contrib/cv/detection/RetinaMask/test/train_eval_1p.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PyTorch/contrib/cv/detection/RetinaMask/test/train_eval_1p.sh b/PyTorch/contrib/cv/detection/RetinaMask/test/train_eval_1p.sh index ad0eca762f..d7907e96c8 100644 --- a/PyTorch/contrib/cv/detection/RetinaMask/test/train_eval_1p.sh +++ b/PyTorch/contrib/cv/detection/RetinaMask/test/train_eval_1p.sh @@ -5,7 +5,7 @@ # 网络名称,同目录名称 Network="RetinaMask" # 训练batch_size -batch_size=8 +batch_size=1 # 训练使用的npu卡数 export RANK_SIZE=1 # 数据集路径,保持为空,不需要修改 -- Gitee From 58b204a2cedc719a7a4a0c7e0cce14379672c83b Mon Sep 17 00:00:00 2001 From: Savion_G Date: Fri, 15 Apr 2022 09:58:22 +0000 Subject: [PATCH 08/20] update PyTorch/contrib/cv/detection/RetinaMask/test/train_eval_1p.sh. --- PyTorch/contrib/cv/detection/RetinaMask/test/train_eval_1p.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/PyTorch/contrib/cv/detection/RetinaMask/test/train_eval_1p.sh b/PyTorch/contrib/cv/detection/RetinaMask/test/train_eval_1p.sh index d7907e96c8..66052d984c 100644 --- a/PyTorch/contrib/cv/detection/RetinaMask/test/train_eval_1p.sh +++ b/PyTorch/contrib/cv/detection/RetinaMask/test/train_eval_1p.sh @@ -108,6 +108,8 @@ echo "------------------ Final result ------------------" #输出训练精度,需要模型审视修改 bbox_map=`cat ${test_path_dir}/output/${ASCEND_DEVICE_ID}/eval_${ASCEND_DEVICE_ID}.log | grep 'OrderedDict' | awk '{print $7}' | awk -F ')' '{print $1}'` segm_map=`cat ${test_path_dir}/output/${ASCEND_DEVICE_ID}/eval_${ASCEND_DEVICE_ID}.log | grep 'OrderedDict' | awk '{print $20}' | awk -F ')' '{print $1}'` +bbox_map=`awk -v x=100 -v y=${bbox_map} 'BEGIN{printf "%.2f\n",x*y}'` +segm_map=`awk -v x=100 -v y=${segm_map} 'BEGIN{printf "%.2f\n",x*y}'` #打印,不需要修改 echo "Final bbox mAp : ${bbox_map}" echo "Final segm mAp : ${segm_map}" -- Gitee From a8e6c96a91c4e4ee2288043abcc2cfa3ebeacf00 Mon Sep 17 00:00:00 2001 From: Savion_G Date: Fri, 15 Apr 2022 13:17:37 +0000 Subject: [PATCH 09/20] update PyTorch/contrib/cv/detection/RetinaMask/test/train_eval_1p.sh. --- .../contrib/cv/detection/RetinaMask/test/train_eval_1p.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/PyTorch/contrib/cv/detection/RetinaMask/test/train_eval_1p.sh b/PyTorch/contrib/cv/detection/RetinaMask/test/train_eval_1p.sh index 66052d984c..ca07a4e362 100644 --- a/PyTorch/contrib/cv/detection/RetinaMask/test/train_eval_1p.sh +++ b/PyTorch/contrib/cv/detection/RetinaMask/test/train_eval_1p.sh @@ -5,7 +5,7 @@ # 网络名称,同目录名称 Network="RetinaMask" # 训练batch_size -batch_size=1 +batch_size=8 # 训练使用的npu卡数 export RANK_SIZE=1 # 数据集路径,保持为空,不需要修改 @@ -72,7 +72,7 @@ if [ x"${etp_flag}" != x"true" ];then export PYTHONWARNINGS='ignore:semaphore_tracker:UserWarning' fi -KERNEL_NUM=$(($(nproc)/8)) +KERNEL_NUM=$(($(nproc)/1)) for i in $(seq 0 0) do if [ $(uname -m) = "aarch64" ] @@ -83,7 +83,7 @@ do python3.7.5 -u tools/test_net.py \ --config-file ./configs/retina/retinanet_mask_R-50-FPN_2x_adjust_std011_ms.yaml --data_path ${data_path} \ --weight ${weight_path} \ - SOLVER.IMS_PER_BATCH ${batch_size} N_GPU ${RANK_SIZE} \ + SOLVER.IMS_PER_BATCH 1 N_GPU 8 \ > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/eval_${ASCEND_DEVICE_ID}.log 2>&1 & else python3.7.5 -u tools/test_net.py \ @@ -108,9 +108,9 @@ echo "------------------ Final result ------------------" #输出训练精度,需要模型审视修改 bbox_map=`cat ${test_path_dir}/output/${ASCEND_DEVICE_ID}/eval_${ASCEND_DEVICE_ID}.log | grep 'OrderedDict' | awk '{print $7}' | awk -F ')' '{print $1}'` segm_map=`cat ${test_path_dir}/output/${ASCEND_DEVICE_ID}/eval_${ASCEND_DEVICE_ID}.log | grep 'OrderedDict' | awk '{print $20}' | awk -F ')' '{print $1}'` +#打印,不需要修改 bbox_map=`awk -v x=100 -v y=${bbox_map} 'BEGIN{printf "%.2f\n",x*y}'` segm_map=`awk -v x=100 -v y=${segm_map} 'BEGIN{printf "%.2f\n",x*y}'` -#打印,不需要修改 echo "Final bbox mAp : ${bbox_map}" echo "Final segm mAp : ${segm_map}" echo "E2E Eval Duration sec : $e2e_time" -- Gitee From 2356f09777a0d0376322b677c75da08761f2388f Mon Sep 17 00:00:00 2001 From: Savion_G Date: Mon, 18 Apr 2022 02:20:07 +0000 Subject: [PATCH 10/20] update PyTorch/contrib/cv/detection/RetinaMask/test/train_eval_1p.sh. --- PyTorch/contrib/cv/detection/RetinaMask/test/train_eval_1p.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/PyTorch/contrib/cv/detection/RetinaMask/test/train_eval_1p.sh b/PyTorch/contrib/cv/detection/RetinaMask/test/train_eval_1p.sh index ca07a4e362..c478416cf9 100644 --- a/PyTorch/contrib/cv/detection/RetinaMask/test/train_eval_1p.sh +++ b/PyTorch/contrib/cv/detection/RetinaMask/test/train_eval_1p.sh @@ -72,7 +72,7 @@ if [ x"${etp_flag}" != x"true" ];then export PYTHONWARNINGS='ignore:semaphore_tracker:UserWarning' fi -KERNEL_NUM=$(($(nproc)/1)) +KERNEL_NUM=$(($(nproc)/8)) for i in $(seq 0 0) do if [ $(uname -m) = "aarch64" ] @@ -112,7 +112,7 @@ segm_map=`cat ${test_path_dir}/output/${ASCEND_DEVICE_ID}/eval_${ASCEND_DEVICE_I bbox_map=`awk -v x=100 -v y=${bbox_map} 'BEGIN{printf "%.2f\n",x*y}'` segm_map=`awk -v x=100 -v y=${segm_map} 'BEGIN{printf "%.2f\n",x*y}'` echo "Final bbox mAp : ${bbox_map}" -echo "Final segm mAp : ${segm_map}" +echo "Final mask mAp : ${segm_map}" echo "E2E Eval Duration sec : $e2e_time" #性能看护结果汇总 -- Gitee From 7a469fa2123ac2dbb29a138d432c87af24772980 Mon Sep 17 00:00:00 2001 From: Savion_G Date: Mon, 18 Apr 2022 17:09:08 +0800 Subject: [PATCH 11/20] no resume --- .../cv/detection/RetinaMask/test/train_eval_1p.sh | 10 ++++------ .../cv/detection/RetinaMask/test/train_full_1p.sh | 3 +++ .../cv/detection/RetinaMask/test/train_full_8p.sh | 3 +++ .../detection/RetinaMask/test/train_performance_1p.sh | 3 +++ .../detection/RetinaMask/test/train_performance_8p.sh | 3 +++ 5 files changed, 16 insertions(+), 6 deletions(-) diff --git a/PyTorch/contrib/cv/detection/RetinaMask/test/train_eval_1p.sh b/PyTorch/contrib/cv/detection/RetinaMask/test/train_eval_1p.sh index c478416cf9..b33e195ace 100644 --- a/PyTorch/contrib/cv/detection/RetinaMask/test/train_eval_1p.sh +++ b/PyTorch/contrib/cv/detection/RetinaMask/test/train_eval_1p.sh @@ -5,7 +5,7 @@ # 网络名称,同目录名称 Network="RetinaMask" # 训练batch_size -batch_size=8 +batch_size=1 # 训练使用的npu卡数 export RANK_SIZE=1 # 数据集路径,保持为空,不需要修改 @@ -83,13 +83,13 @@ do python3.7.5 -u tools/test_net.py \ --config-file ./configs/retina/retinanet_mask_R-50-FPN_2x_adjust_std011_ms.yaml --data_path ${data_path} \ --weight ${weight_path} \ - SOLVER.IMS_PER_BATCH 1 N_GPU 8 \ + TEST.IMS_PER_BATCH ${batch_size} N_GPU ${RANK_SIZE} \ > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/eval_${ASCEND_DEVICE_ID}.log 2>&1 & else python3.7.5 -u tools/test_net.py \ --config-file ./configs/retina/retinanet_mask_R-50-FPN_2x_adjust_std011_ms.yaml --data_path ${data_path} \ --weight ${weight_path} \ - SOLVER.IMS_PER_BATCH ${batch_size} N_GPU ${RANK_SIZE} \ + TEST.IMS_PER_BATCH ${batch_size} N_GPU ${RANK_SIZE} \ > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/eval_${ASCEND_DEVICE_ID}.log 2>&1 & fi done @@ -109,10 +109,8 @@ echo "------------------ Final result ------------------" bbox_map=`cat ${test_path_dir}/output/${ASCEND_DEVICE_ID}/eval_${ASCEND_DEVICE_ID}.log | grep 'OrderedDict' | awk '{print $7}' | awk -F ')' '{print $1}'` segm_map=`cat ${test_path_dir}/output/${ASCEND_DEVICE_ID}/eval_${ASCEND_DEVICE_ID}.log | grep 'OrderedDict' | awk '{print $20}' | awk -F ')' '{print $1}'` #打印,不需要修改 -bbox_map=`awk -v x=100 -v y=${bbox_map} 'BEGIN{printf "%.2f\n",x*y}'` -segm_map=`awk -v x=100 -v y=${segm_map} 'BEGIN{printf "%.2f\n",x*y}'` echo "Final bbox mAp : ${bbox_map}" -echo "Final mask mAp : ${segm_map}" +echo "Final segm mAp : ${segm_map}" echo "E2E Eval Duration sec : $e2e_time" #性能看护结果汇总 diff --git a/PyTorch/contrib/cv/detection/RetinaMask/test/train_full_1p.sh b/PyTorch/contrib/cv/detection/RetinaMask/test/train_full_1p.sh index 9b229cc213..1a34f23a65 100644 --- a/PyTorch/contrib/cv/detection/RetinaMask/test/train_full_1p.sh +++ b/PyTorch/contrib/cv/detection/RetinaMask/test/train_full_1p.sh @@ -1,5 +1,8 @@ #!/bin/bash +# 删除last_checkpoint,默认不resume +rm -f ./last_checkpoint + ################基础配置参数,需要模型审视修改################## # 必选字段(必须在此处定义的参数): Network batch_size RANK_SIZE # 网络名称,同目录名称 diff --git a/PyTorch/contrib/cv/detection/RetinaMask/test/train_full_8p.sh b/PyTorch/contrib/cv/detection/RetinaMask/test/train_full_8p.sh index 46bde9081d..a7e5b9f4d0 100644 --- a/PyTorch/contrib/cv/detection/RetinaMask/test/train_full_8p.sh +++ b/PyTorch/contrib/cv/detection/RetinaMask/test/train_full_8p.sh @@ -1,5 +1,8 @@ #!/bin/bash +# 删除last_checkpoint,默认不resume +rm -f ./last_checkpoint + ################基础配置参数,需要模型审视修改################## # 必选字段(必须在此处定义的参数): Network batch_size RANK_SIZE # 网络名称,同目录名称 diff --git a/PyTorch/contrib/cv/detection/RetinaMask/test/train_performance_1p.sh b/PyTorch/contrib/cv/detection/RetinaMask/test/train_performance_1p.sh index 99fc597305..7ba823c498 100644 --- a/PyTorch/contrib/cv/detection/RetinaMask/test/train_performance_1p.sh +++ b/PyTorch/contrib/cv/detection/RetinaMask/test/train_performance_1p.sh @@ -1,5 +1,8 @@ #!/bin/bash +# 删除last_checkpoint,默认不resume +rm -f ./last_checkpoint + ################基础配置参数,需要模型审视修改################## # 必选字段(必须在此处定义的参数): Network batch_size RANK_SIZE # 网络名称,同目录名称 diff --git a/PyTorch/contrib/cv/detection/RetinaMask/test/train_performance_8p.sh b/PyTorch/contrib/cv/detection/RetinaMask/test/train_performance_8p.sh index ff27fe7eaa..b6db929632 100644 --- a/PyTorch/contrib/cv/detection/RetinaMask/test/train_performance_8p.sh +++ b/PyTorch/contrib/cv/detection/RetinaMask/test/train_performance_8p.sh @@ -1,5 +1,8 @@ #!/bin/bash +# 删除last_checkpoint,默认不resume +rm -f ./last_checkpoint + ################基础配置参数,需要模型审视修改################## # 必选字段(必须在此处定义的参数): Network batch_size RANK_SIZE # 网络名称,同目录名称 -- Gitee From 54fe9e3f616d327ba597fa9095ea776f27431dfe Mon Sep 17 00:00:00 2001 From: Savion_G Date: Mon, 18 Apr 2022 17:10:41 +0800 Subject: [PATCH 12/20] eval --- .../cv/detection/RetinaMask/test/train_eval_1p.sh | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/PyTorch/contrib/cv/detection/RetinaMask/test/train_eval_1p.sh b/PyTorch/contrib/cv/detection/RetinaMask/test/train_eval_1p.sh index b33e195ace..c478416cf9 100644 --- a/PyTorch/contrib/cv/detection/RetinaMask/test/train_eval_1p.sh +++ b/PyTorch/contrib/cv/detection/RetinaMask/test/train_eval_1p.sh @@ -5,7 +5,7 @@ # 网络名称,同目录名称 Network="RetinaMask" # 训练batch_size -batch_size=1 +batch_size=8 # 训练使用的npu卡数 export RANK_SIZE=1 # 数据集路径,保持为空,不需要修改 @@ -83,13 +83,13 @@ do python3.7.5 -u tools/test_net.py \ --config-file ./configs/retina/retinanet_mask_R-50-FPN_2x_adjust_std011_ms.yaml --data_path ${data_path} \ --weight ${weight_path} \ - TEST.IMS_PER_BATCH ${batch_size} N_GPU ${RANK_SIZE} \ + SOLVER.IMS_PER_BATCH 1 N_GPU 8 \ > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/eval_${ASCEND_DEVICE_ID}.log 2>&1 & else python3.7.5 -u tools/test_net.py \ --config-file ./configs/retina/retinanet_mask_R-50-FPN_2x_adjust_std011_ms.yaml --data_path ${data_path} \ --weight ${weight_path} \ - TEST.IMS_PER_BATCH ${batch_size} N_GPU ${RANK_SIZE} \ + SOLVER.IMS_PER_BATCH ${batch_size} N_GPU ${RANK_SIZE} \ > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/eval_${ASCEND_DEVICE_ID}.log 2>&1 & fi done @@ -109,8 +109,10 @@ echo "------------------ Final result ------------------" bbox_map=`cat ${test_path_dir}/output/${ASCEND_DEVICE_ID}/eval_${ASCEND_DEVICE_ID}.log | grep 'OrderedDict' | awk '{print $7}' | awk -F ')' '{print $1}'` segm_map=`cat ${test_path_dir}/output/${ASCEND_DEVICE_ID}/eval_${ASCEND_DEVICE_ID}.log | grep 'OrderedDict' | awk '{print $20}' | awk -F ')' '{print $1}'` #打印,不需要修改 +bbox_map=`awk -v x=100 -v y=${bbox_map} 'BEGIN{printf "%.2f\n",x*y}'` +segm_map=`awk -v x=100 -v y=${segm_map} 'BEGIN{printf "%.2f\n",x*y}'` echo "Final bbox mAp : ${bbox_map}" -echo "Final segm mAp : ${segm_map}" +echo "Final mask mAp : ${segm_map}" echo "E2E Eval Duration sec : $e2e_time" #性能看护结果汇总 -- Gitee From 050c8e4db7d46fbc83bfe8fcc463d8e7cac3b196 Mon Sep 17 00:00:00 2001 From: Savion_G Date: Tue, 19 Apr 2022 02:11:43 +0000 Subject: [PATCH 13/20] update PyTorch/contrib/cv/detection/RetinaMask/test/train_eval_1p.sh. --- PyTorch/contrib/cv/detection/RetinaMask/test/train_eval_1p.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/PyTorch/contrib/cv/detection/RetinaMask/test/train_eval_1p.sh b/PyTorch/contrib/cv/detection/RetinaMask/test/train_eval_1p.sh index c478416cf9..6deeea56c5 100644 --- a/PyTorch/contrib/cv/detection/RetinaMask/test/train_eval_1p.sh +++ b/PyTorch/contrib/cv/detection/RetinaMask/test/train_eval_1p.sh @@ -1,5 +1,8 @@ #!/bin/bash +# 删除last_checkpoint,默认不resume +rm -f ./last_checkpoint + ################基础配置参数,需要模型审视修改################## # 必选字段(必须在此处定义的参数): Network batch_size RANK_SIZE # 网络名称,同目录名称 -- Gitee From 06ad6a78822677ee5d5c5fda2e6c605fc7f7c251 Mon Sep 17 00:00:00 2001 From: Savion_G Date: Thu, 21 Apr 2022 14:15:43 +0800 Subject: [PATCH 14/20] delete csrc --- .../contrib/cv/detection/RetinaMask/README.md | 1 - .../maskrcnn_benchmark/csrc/ROIAlign.h | 46 --- .../maskrcnn_benchmark/csrc/ROIPool.h | 48 --- .../csrc/SigmoidFocalLoss.h | 41 --- .../csrc/cpu/ROIAlign_cpu.cpp | 257 ------------- .../csrc/cpu/nms_cpu.cpp.bak | 75 ---- .../maskrcnn_benchmark/csrc/cpu/vision.h | 16 - .../csrc/cuda/ROIAlign_cuda.cu | 346 ------------------ .../csrc/cuda/ROIPool_cuda.cu | 202 ---------- .../csrc/cuda/SigmoidFocalLoss_cuda.cu | 188 ---------- .../maskrcnn_benchmark/csrc/cuda/nms.cu | 128 ------- .../maskrcnn_benchmark/csrc/cuda/vision.h | 63 ---- .../maskrcnn_benchmark/csrc/nms.h.bak | 28 -- .../maskrcnn_benchmark/csrc/vision.cpp | 15 - 14 files changed, 1454 deletions(-) delete mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/ROIAlign.h delete mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/ROIPool.h delete mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/SigmoidFocalLoss.h delete mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/cpu/ROIAlign_cpu.cpp delete mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/cpu/nms_cpu.cpp.bak delete mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/cpu/vision.h delete mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/cuda/ROIAlign_cuda.cu delete mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/cuda/ROIPool_cuda.cu delete mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/cuda/SigmoidFocalLoss_cuda.cu delete mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/cuda/nms.cu delete mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/cuda/vision.h delete mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/nms.h.bak delete mode 100644 PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/vision.cpp diff --git a/PyTorch/contrib/cv/detection/RetinaMask/README.md b/PyTorch/contrib/cv/detection/RetinaMask/README.md index 05145cc168..e6474ac722 100644 --- a/PyTorch/contrib/cv/detection/RetinaMask/README.md +++ b/PyTorch/contrib/cv/detection/RetinaMask/README.md @@ -36,7 +36,6 @@ torch==1.5.0+ascend.post5.20220315 ``` source test/env_npu.sh -pyhton3.7 setup.py build develop ``` diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/ROIAlign.h b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/ROIAlign.h deleted file mode 100644 index 3907deab2a..0000000000 --- a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/ROIAlign.h +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -#pragma once - -#include "cpu/vision.h" - -#ifdef WITH_CUDA -#include "cuda/vision.h" -#endif - -// Interface for Python -at::Tensor ROIAlign_forward(const at::Tensor& input, - const at::Tensor& rois, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int sampling_ratio) { - if (input.type().is_cuda()) { -#ifdef WITH_CUDA - return ROIAlign_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio); -#else - AT_ERROR("Not compiled with GPU support"); -#endif - } - return ROIAlign_forward_cpu(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio); -} - -at::Tensor ROIAlign_backward(const at::Tensor& grad, - const at::Tensor& rois, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int batch_size, - const int channels, - const int height, - const int width, - const int sampling_ratio) { - if (grad.type().is_cuda()) { -#ifdef WITH_CUDA - return ROIAlign_backward_cuda(grad, rois, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width, sampling_ratio); -#else - AT_ERROR("Not compiled with GPU support"); -#endif - } - AT_ERROR("Not implemented on the CPU"); -} - diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/ROIPool.h b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/ROIPool.h deleted file mode 100644 index 200fd7390b..0000000000 --- a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/ROIPool.h +++ /dev/null @@ -1,48 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -#pragma once - -#include "cpu/vision.h" - -#ifdef WITH_CUDA -#include "cuda/vision.h" -#endif - - -std::tuple ROIPool_forward(const at::Tensor& input, - const at::Tensor& rois, - const float spatial_scale, - const int pooled_height, - const int pooled_width) { - if (input.type().is_cuda()) { -#ifdef WITH_CUDA - return ROIPool_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width); -#else - AT_ERROR("Not compiled with GPU support"); -#endif - } - AT_ERROR("Not implemented on the CPU"); -} - -at::Tensor ROIPool_backward(const at::Tensor& grad, - const at::Tensor& input, - const at::Tensor& rois, - const at::Tensor& argmax, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int batch_size, - const int channels, - const int height, - const int width) { - if (grad.type().is_cuda()) { -#ifdef WITH_CUDA - return ROIPool_backward_cuda(grad, input, rois, argmax, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width); -#else - AT_ERROR("Not compiled with GPU support"); -#endif - } - AT_ERROR("Not implemented on the CPU"); -} - - - diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/SigmoidFocalLoss.h b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/SigmoidFocalLoss.h deleted file mode 100644 index 308861e447..0000000000 --- a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/SigmoidFocalLoss.h +++ /dev/null @@ -1,41 +0,0 @@ -#pragma once - -#include "cpu/vision.h" - -#ifdef WITH_CUDA -#include "cuda/vision.h" -#endif - -// Interface for Python -at::Tensor SigmoidFocalLoss_forward( - const at::Tensor& logits, - const at::Tensor& targets, - const int num_classes, - const float gamma, - const float alpha) { - if (logits.type().is_cuda()) { -#ifdef WITH_CUDA - return SigmoidFocalLoss_forward_cuda(logits, targets, num_classes, gamma, alpha); -#else - AT_ERROR("Not compiled with GPU support"); -#endif - } - AT_ERROR("Not implemented on the CPU"); -} - -at::Tensor SigmoidFocalLoss_backward( - const at::Tensor& logits, - const at::Tensor& targets, - const at::Tensor& d_losses, - const int num_classes, - const float gamma, - const float alpha) { - if (logits.type().is_cuda()) { -#ifdef WITH_CUDA - return SigmoidFocalLoss_backward_cuda(logits, targets, d_losses, num_classes, gamma, alpha); -#else - AT_ERROR("Not compiled with GPU support"); -#endif - } - AT_ERROR("Not implemented on the CPU"); -} diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/cpu/ROIAlign_cpu.cpp b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/cpu/ROIAlign_cpu.cpp deleted file mode 100644 index d35aedf27e..0000000000 --- a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/cpu/ROIAlign_cpu.cpp +++ /dev/null @@ -1,257 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -#include "cpu/vision.h" - -// implementation taken from Caffe2 -template -struct PreCalc { - int pos1; - int pos2; - int pos3; - int pos4; - T w1; - T w2; - T w3; - T w4; -}; - -template -void pre_calc_for_bilinear_interpolate( - const int height, - const int width, - const int pooled_height, - const int pooled_width, - const int iy_upper, - const int ix_upper, - T roi_start_h, - T roi_start_w, - T bin_size_h, - T bin_size_w, - int roi_bin_grid_h, - int roi_bin_grid_w, - std::vector>& pre_calc) { - int pre_calc_index = 0; - for (int ph = 0; ph < pooled_height; ph++) { - for (int pw = 0; pw < pooled_width; pw++) { - for (int iy = 0; iy < iy_upper; iy++) { - const T yy = roi_start_h + ph * bin_size_h + - static_cast(iy + .5f) * bin_size_h / - static_cast(roi_bin_grid_h); // e.g., 0.5, 1.5 - for (int ix = 0; ix < ix_upper; ix++) { - const T xx = roi_start_w + pw * bin_size_w + - static_cast(ix + .5f) * bin_size_w / - static_cast(roi_bin_grid_w); - - T x = xx; - T y = yy; - // deal with: inverse elements are out of feature map boundary - if (y < -1.0 || y > height || x < -1.0 || x > width) { - // empty - PreCalc pc; - pc.pos1 = 0; - pc.pos2 = 0; - pc.pos3 = 0; - pc.pos4 = 0; - pc.w1 = 0; - pc.w2 = 0; - pc.w3 = 0; - pc.w4 = 0; - pre_calc[pre_calc_index] = pc; - pre_calc_index += 1; - continue; - } - - if (y <= 0) { - y = 0; - } - if (x <= 0) { - x = 0; - } - - int y_low = (int)y; - int x_low = (int)x; - int y_high; - int x_high; - - if (y_low >= height - 1) { - y_high = y_low = height - 1; - y = (T)y_low; - } else { - y_high = y_low + 1; - } - - if (x_low >= width - 1) { - x_high = x_low = width - 1; - x = (T)x_low; - } else { - x_high = x_low + 1; - } - - T ly = y - y_low; - T lx = x - x_low; - T hy = 1. - ly, hx = 1. - lx; - T w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx; - - // save weights and indeces - PreCalc pc; - pc.pos1 = y_low * width + x_low; - pc.pos2 = y_low * width + x_high; - pc.pos3 = y_high * width + x_low; - pc.pos4 = y_high * width + x_high; - pc.w1 = w1; - pc.w2 = w2; - pc.w3 = w3; - pc.w4 = w4; - pre_calc[pre_calc_index] = pc; - - pre_calc_index += 1; - } - } - } - } -} - -template -void ROIAlignForward_cpu_kernel( - const int nthreads, - const T* bottom_data, - const T& spatial_scale, - const int channels, - const int height, - const int width, - const int pooled_height, - const int pooled_width, - const int sampling_ratio, - const T* bottom_rois, - //int roi_cols, - T* top_data) { - //AT_ASSERT(roi_cols == 4 || roi_cols == 5); - int roi_cols = 5; - - int n_rois = nthreads / channels / pooled_width / pooled_height; - // (n, c, ph, pw) is an element in the pooled output - // can be parallelized using omp - // #pragma omp parallel for num_threads(32) - for (int n = 0; n < n_rois; n++) { - int index_n = n * channels * pooled_width * pooled_height; - - // roi could have 4 or 5 columns - const T* offset_bottom_rois = bottom_rois + n * roi_cols; - int roi_batch_ind = 0; - if (roi_cols == 5) { - roi_batch_ind = offset_bottom_rois[0]; - offset_bottom_rois++; - } - - // Do not using rounding; this implementation detail is critical - T roi_start_w = offset_bottom_rois[0] * spatial_scale; - T roi_start_h = offset_bottom_rois[1] * spatial_scale; - T roi_end_w = offset_bottom_rois[2] * spatial_scale; - T roi_end_h = offset_bottom_rois[3] * spatial_scale; - // T roi_start_w = round(offset_bottom_rois[0] * spatial_scale); - // T roi_start_h = round(offset_bottom_rois[1] * spatial_scale); - // T roi_end_w = round(offset_bottom_rois[2] * spatial_scale); - // T roi_end_h = round(offset_bottom_rois[3] * spatial_scale); - - // Force malformed ROIs to be 1x1 - T roi_width = std::max(roi_end_w - roi_start_w, (T)1.); - T roi_height = std::max(roi_end_h - roi_start_h, (T)1.); - T bin_size_h = static_cast(roi_height) / static_cast(pooled_height); - T bin_size_w = static_cast(roi_width) / static_cast(pooled_width); - - // We use roi_bin_grid to sample the grid and mimic integral - int roi_bin_grid_h = (sampling_ratio > 0) - ? sampling_ratio - : ceil(roi_height / pooled_height); // e.g., = 2 - int roi_bin_grid_w = - (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width); - - // We do average (integral) pooling inside a bin - const T count = roi_bin_grid_h * roi_bin_grid_w; // e.g. = 4 - - // we want to precalculate indeces and weights shared by all chanels, - // this is the key point of optimiation - std::vector> pre_calc( - roi_bin_grid_h * roi_bin_grid_w * pooled_width * pooled_height); - pre_calc_for_bilinear_interpolate( - height, - width, - pooled_height, - pooled_width, - roi_bin_grid_h, - roi_bin_grid_w, - roi_start_h, - roi_start_w, - bin_size_h, - bin_size_w, - roi_bin_grid_h, - roi_bin_grid_w, - pre_calc); - - for (int c = 0; c < channels; c++) { - int index_n_c = index_n + c * pooled_width * pooled_height; - const T* offset_bottom_data = - bottom_data + (roi_batch_ind * channels + c) * height * width; - int pre_calc_index = 0; - - for (int ph = 0; ph < pooled_height; ph++) { - for (int pw = 0; pw < pooled_width; pw++) { - int index = index_n_c + ph * pooled_width + pw; - - T output_val = 0.; - for (int iy = 0; iy < roi_bin_grid_h; iy++) { - for (int ix = 0; ix < roi_bin_grid_w; ix++) { - PreCalc pc = pre_calc[pre_calc_index]; - output_val += pc.w1 * offset_bottom_data[pc.pos1] + - pc.w2 * offset_bottom_data[pc.pos2] + - pc.w3 * offset_bottom_data[pc.pos3] + - pc.w4 * offset_bottom_data[pc.pos4]; - - pre_calc_index += 1; - } - } - output_val /= count; - - top_data[index] = output_val; - } // for pw - } // for ph - } // for c - } // for n -} - -at::Tensor ROIAlign_forward_cpu(const at::Tensor& input, - const at::Tensor& rois, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int sampling_ratio) { - AT_ASSERTM(!input.type().is_cuda(), "input must be a CPU tensor"); - AT_ASSERTM(!rois.type().is_cuda(), "rois must be a CPU tensor"); - - auto num_rois = rois.size(0); - auto channels = input.size(1); - auto height = input.size(2); - auto width = input.size(3); - - auto output = at::empty({num_rois, channels, pooled_height, pooled_width}, input.options()); - auto output_size = num_rois * pooled_height * pooled_width * channels; - - if (output.numel() == 0) { - return output; - } - - AT_DISPATCH_FLOATING_TYPES(input.type(), "ROIAlign_forward", [&] { - ROIAlignForward_cpu_kernel( - output_size, - input.data(), - spatial_scale, - channels, - height, - width, - pooled_height, - pooled_width, - sampling_ratio, - rois.data(), - output.data()); - }); - return output; -} diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/cpu/nms_cpu.cpp.bak b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/cpu/nms_cpu.cpp.bak deleted file mode 100644 index 551b6eb0bf..0000000000 --- a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/cpu/nms_cpu.cpp.bak +++ /dev/null @@ -1,75 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -#include "cpu/vision.h" - - -template -at::Tensor nms_cpu_kernel(const at::Tensor& dets, - const at::Tensor& scores, - const float threshold) { - AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor"); - AT_ASSERTM(!scores.type().is_cuda(), "scores must be a CPU tensor"); - AT_ASSERTM(dets.type() == scores.type(), "dets should have the same type as scores"); - - if (dets.numel() == 0) { - return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); - } - - auto x1_t = dets.select(1, 0).contiguous(); - auto y1_t = dets.select(1, 1).contiguous(); - auto x2_t = dets.select(1, 2).contiguous(); - auto y2_t = dets.select(1, 3).contiguous(); - - at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1); - - auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); - - auto ndets = dets.size(0); - at::Tensor suppressed_t = at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU)); - - auto suppressed = suppressed_t.data(); - auto order = order_t.data(); - auto x1 = x1_t.data(); - auto y1 = y1_t.data(); - auto x2 = x2_t.data(); - auto y2 = y2_t.data(); - auto areas = areas_t.data(); - - for (int64_t _i = 0; _i < ndets; _i++) { - auto i = order[_i]; - if (suppressed[i] == 1) - continue; - auto ix1 = x1[i]; - auto iy1 = y1[i]; - auto ix2 = x2[i]; - auto iy2 = y2[i]; - auto iarea = areas[i]; - - for (int64_t _j = _i + 1; _j < ndets; _j++) { - auto j = order[_j]; - if (suppressed[j] == 1) - continue; - auto xx1 = std::max(ix1, x1[j]); - auto yy1 = std::max(iy1, y1[j]); - auto xx2 = std::min(ix2, x2[j]); - auto yy2 = std::min(iy2, y2[j]); - - auto w = std::max(static_cast(0), xx2 - xx1 + 1); - auto h = std::max(static_cast(0), yy2 - yy1 + 1); - auto inter = w * h; - auto ovr = inter / (iarea + areas[j] - inter); - if (ovr >= threshold) - suppressed[j] = 1; - } - } - return at::nonzero(suppressed_t == 0).squeeze(1); -} - -at::Tensor nms_cpu(const at::Tensor& dets, - const at::Tensor& scores, - const float threshold) { - at::Tensor result; - AT_DISPATCH_FLOATING_TYPES(dets.type(), "nms", [&] { - result = nms_cpu_kernel(dets, scores, threshold); - }); - return result; -} \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/cpu/vision.h b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/cpu/vision.h deleted file mode 100644 index 6cc112f975..0000000000 --- a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/cpu/vision.h +++ /dev/null @@ -1,16 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -#pragma once -#include - - -at::Tensor ROIAlign_forward_cpu(const at::Tensor& input, - const at::Tensor& rois, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int sampling_ratio); - - -//at::Tensor nms_cpu(const at::Tensor& dets, -// const at::Tensor& scores, -// const float threshold); diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/cuda/ROIAlign_cuda.cu b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/cuda/ROIAlign_cuda.cu deleted file mode 100644 index 5fe97ca906..0000000000 --- a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/cuda/ROIAlign_cuda.cu +++ /dev/null @@ -1,346 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -#include -#include - -#include -#include -#include - -// TODO make it in a common file -#define CUDA_1D_KERNEL_LOOP(i, n) \ - for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \ - i += blockDim.x * gridDim.x) - - -template -__device__ T bilinear_interpolate(const T* bottom_data, - const int height, const int width, - T y, T x, - const int index /* index for debug only*/) { - - // deal with cases that inverse elements are out of feature map boundary - if (y < -1.0 || y > height || x < -1.0 || x > width) { - //empty - return 0; - } - - if (y <= 0) y = 0; - if (x <= 0) x = 0; - - int y_low = (int) y; - int x_low = (int) x; - int y_high; - int x_high; - - if (y_low >= height - 1) { - y_high = y_low = height - 1; - y = (T) y_low; - } else { - y_high = y_low + 1; - } - - if (x_low >= width - 1) { - x_high = x_low = width - 1; - x = (T) x_low; - } else { - x_high = x_low + 1; - } - - T ly = y - y_low; - T lx = x - x_low; - T hy = 1. - ly, hx = 1. - lx; - // do bilinear interpolation - T v1 = bottom_data[y_low * width + x_low]; - T v2 = bottom_data[y_low * width + x_high]; - T v3 = bottom_data[y_high * width + x_low]; - T v4 = bottom_data[y_high * width + x_high]; - T w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx; - - T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); - - return val; -} - -template -__global__ void RoIAlignForward(const int nthreads, const T* bottom_data, - const T spatial_scale, const int channels, - const int height, const int width, - const int pooled_height, const int pooled_width, - const int sampling_ratio, - const T* bottom_rois, T* top_data) { - CUDA_1D_KERNEL_LOOP(index, nthreads) { - // (n, c, ph, pw) is an element in the pooled output - int pw = index % pooled_width; - int ph = (index / pooled_width) % pooled_height; - int c = (index / pooled_width / pooled_height) % channels; - int n = index / pooled_width / pooled_height / channels; - - const T* offset_bottom_rois = bottom_rois + n * 5; - int roi_batch_ind = offset_bottom_rois[0]; - - // Do not using rounding; this implementation detail is critical - T roi_start_w = offset_bottom_rois[1] * spatial_scale; - T roi_start_h = offset_bottom_rois[2] * spatial_scale; - T roi_end_w = offset_bottom_rois[3] * spatial_scale; - T roi_end_h = offset_bottom_rois[4] * spatial_scale; - // T roi_start_w = round(offset_bottom_rois[1] * spatial_scale); - // T roi_start_h = round(offset_bottom_rois[2] * spatial_scale); - // T roi_end_w = round(offset_bottom_rois[3] * spatial_scale); - // T roi_end_h = round(offset_bottom_rois[4] * spatial_scale); - - // Force malformed ROIs to be 1x1 - T roi_width = max(roi_end_w - roi_start_w, (T)1.); - T roi_height = max(roi_end_h - roi_start_h, (T)1.); - T bin_size_h = static_cast(roi_height) / static_cast(pooled_height); - T bin_size_w = static_cast(roi_width) / static_cast(pooled_width); - - const T* offset_bottom_data = bottom_data + (roi_batch_ind * channels + c) * height * width; - - // We use roi_bin_grid to sample the grid and mimic integral - int roi_bin_grid_h = (sampling_ratio > 0) ? sampling_ratio : ceil(roi_height / pooled_height); // e.g., = 2 - int roi_bin_grid_w = (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width); - - // We do average (integral) pooling inside a bin - const T count = roi_bin_grid_h * roi_bin_grid_w; // e.g. = 4 - - T output_val = 0.; - for (int iy = 0; iy < roi_bin_grid_h; iy ++) // e.g., iy = 0, 1 - { - const T y = roi_start_h + ph * bin_size_h + static_cast(iy + .5f) * bin_size_h / static_cast(roi_bin_grid_h); // e.g., 0.5, 1.5 - for (int ix = 0; ix < roi_bin_grid_w; ix ++) - { - const T x = roi_start_w + pw * bin_size_w + static_cast(ix + .5f) * bin_size_w / static_cast(roi_bin_grid_w); - - T val = bilinear_interpolate(offset_bottom_data, height, width, y, x, index); - output_val += val; - } - } - output_val /= count; - - top_data[index] = output_val; - } -} - - -template -__device__ void bilinear_interpolate_gradient( - const int height, const int width, - T y, T x, - T & w1, T & w2, T & w3, T & w4, - int & x_low, int & x_high, int & y_low, int & y_high, - const int index /* index for debug only*/) { - - // deal with cases that inverse elements are out of feature map boundary - if (y < -1.0 || y > height || x < -1.0 || x > width) { - //empty - w1 = w2 = w3 = w4 = 0.; - x_low = x_high = y_low = y_high = -1; - return; - } - - if (y <= 0) y = 0; - if (x <= 0) x = 0; - - y_low = (int) y; - x_low = (int) x; - - if (y_low >= height - 1) { - y_high = y_low = height - 1; - y = (T) y_low; - } else { - y_high = y_low + 1; - } - - if (x_low >= width - 1) { - x_high = x_low = width - 1; - x = (T) x_low; - } else { - x_high = x_low + 1; - } - - T ly = y - y_low; - T lx = x - x_low; - T hy = 1. - ly, hx = 1. - lx; - - // reference in forward - // T v1 = bottom_data[y_low * width + x_low]; - // T v2 = bottom_data[y_low * width + x_high]; - // T v3 = bottom_data[y_high * width + x_low]; - // T v4 = bottom_data[y_high * width + x_high]; - // T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); - - w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx; - - return; -} - -template -__global__ void RoIAlignBackwardFeature(const int nthreads, const T* top_diff, - const int num_rois, const T spatial_scale, - const int channels, const int height, const int width, - const int pooled_height, const int pooled_width, - const int sampling_ratio, - T* bottom_diff, - const T* bottom_rois) { - CUDA_1D_KERNEL_LOOP(index, nthreads) { - // (n, c, ph, pw) is an element in the pooled output - int pw = index % pooled_width; - int ph = (index / pooled_width) % pooled_height; - int c = (index / pooled_width / pooled_height) % channels; - int n = index / pooled_width / pooled_height / channels; - - const T* offset_bottom_rois = bottom_rois + n * 5; - int roi_batch_ind = offset_bottom_rois[0]; - - // Do not using rounding; this implementation detail is critical - T roi_start_w = offset_bottom_rois[1] * spatial_scale; - T roi_start_h = offset_bottom_rois[2] * spatial_scale; - T roi_end_w = offset_bottom_rois[3] * spatial_scale; - T roi_end_h = offset_bottom_rois[4] * spatial_scale; - // T roi_start_w = round(offset_bottom_rois[1] * spatial_scale); - // T roi_start_h = round(offset_bottom_rois[2] * spatial_scale); - // T roi_end_w = round(offset_bottom_rois[3] * spatial_scale); - // T roi_end_h = round(offset_bottom_rois[4] * spatial_scale); - - // Force malformed ROIs to be 1x1 - T roi_width = max(roi_end_w - roi_start_w, (T)1.); - T roi_height = max(roi_end_h - roi_start_h, (T)1.); - T bin_size_h = static_cast(roi_height) / static_cast(pooled_height); - T bin_size_w = static_cast(roi_width) / static_cast(pooled_width); - - T* offset_bottom_diff = bottom_diff + (roi_batch_ind * channels + c) * height * width; - - int top_offset = (n * channels + c) * pooled_height * pooled_width; - const T* offset_top_diff = top_diff + top_offset; - const T top_diff_this_bin = offset_top_diff[ph * pooled_width + pw]; - - // We use roi_bin_grid to sample the grid and mimic integral - int roi_bin_grid_h = (sampling_ratio > 0) ? sampling_ratio : ceil(roi_height / pooled_height); // e.g., = 2 - int roi_bin_grid_w = (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width); - - // We do average (integral) pooling inside a bin - const T count = roi_bin_grid_h * roi_bin_grid_w; // e.g. = 4 - - for (int iy = 0; iy < roi_bin_grid_h; iy ++) // e.g., iy = 0, 1 - { - const T y = roi_start_h + ph * bin_size_h + static_cast(iy + .5f) * bin_size_h / static_cast(roi_bin_grid_h); // e.g., 0.5, 1.5 - for (int ix = 0; ix < roi_bin_grid_w; ix ++) - { - const T x = roi_start_w + pw * bin_size_w + static_cast(ix + .5f) * bin_size_w / static_cast(roi_bin_grid_w); - - T w1, w2, w3, w4; - int x_low, x_high, y_low, y_high; - - bilinear_interpolate_gradient(height, width, y, x, - w1, w2, w3, w4, - x_low, x_high, y_low, y_high, - index); - - T g1 = top_diff_this_bin * w1 / count; - T g2 = top_diff_this_bin * w2 / count; - T g3 = top_diff_this_bin * w3 / count; - T g4 = top_diff_this_bin * w4 / count; - - if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) - { - atomicAdd(offset_bottom_diff + y_low * width + x_low, static_cast(g1)); - atomicAdd(offset_bottom_diff + y_low * width + x_high, static_cast(g2)); - atomicAdd(offset_bottom_diff + y_high * width + x_low, static_cast(g3)); - atomicAdd(offset_bottom_diff + y_high * width + x_high, static_cast(g4)); - } // if - } // ix - } // iy - } // CUDA_1D_KERNEL_LOOP -} // RoIAlignBackward - - -at::Tensor ROIAlign_forward_cuda(const at::Tensor& input, - const at::Tensor& rois, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int sampling_ratio) { - AT_ASSERTM(input.type().is_cuda(), "input must be a CUDA tensor"); - AT_ASSERTM(rois.type().is_cuda(), "rois must be a CUDA tensor"); - - auto num_rois = rois.size(0); - auto channels = input.size(1); - auto height = input.size(2); - auto width = input.size(3); - - auto output = at::empty({num_rois, channels, pooled_height, pooled_width}, input.options()); - auto output_size = num_rois * pooled_height * pooled_width * channels; - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - dim3 grid(std::min(THCCeilDiv(output_size, 512L), 4096L)); - dim3 block(512); - - if (output.numel() == 0) { - THCudaCheck(cudaGetLastError()); - return output; - } - - AT_DISPATCH_FLOATING_TYPES(input.type(), "ROIAlign_forward", [&] { - RoIAlignForward<<>>( - output_size, - input.contiguous().data(), - spatial_scale, - channels, - height, - width, - pooled_height, - pooled_width, - sampling_ratio, - rois.contiguous().data(), - output.data()); - }); - THCudaCheck(cudaGetLastError()); - return output; -} - -// TODO remove the dependency on input and use instead its sizes -> save memory -at::Tensor ROIAlign_backward_cuda(const at::Tensor& grad, - const at::Tensor& rois, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int batch_size, - const int channels, - const int height, - const int width, - const int sampling_ratio) { - AT_ASSERTM(grad.type().is_cuda(), "grad must be a CUDA tensor"); - AT_ASSERTM(rois.type().is_cuda(), "rois must be a CUDA tensor"); - - auto num_rois = rois.size(0); - auto grad_input = at::zeros({batch_size, channels, height, width}, grad.options()); - - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - dim3 grid(std::min(THCCeilDiv(grad.numel(), 512L), 4096L)); - dim3 block(512); - - // handle possibly empty gradients - if (grad.numel() == 0) { - THCudaCheck(cudaGetLastError()); - return grad_input; - } - - AT_DISPATCH_FLOATING_TYPES(grad.type(), "ROIAlign_backward", [&] { - RoIAlignBackwardFeature<<>>( - grad.numel(), - grad.contiguous().data(), - num_rois, - spatial_scale, - channels, - height, - width, - pooled_height, - pooled_width, - sampling_ratio, - grad_input.data(), - rois.contiguous().data()); - }); - THCudaCheck(cudaGetLastError()); - return grad_input; -} diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/cuda/ROIPool_cuda.cu b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/cuda/ROIPool_cuda.cu deleted file mode 100644 index b826dd9bc2..0000000000 --- a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/cuda/ROIPool_cuda.cu +++ /dev/null @@ -1,202 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -#include -#include - -#include -#include -#include - - -// TODO make it in a common file -#define CUDA_1D_KERNEL_LOOP(i, n) \ - for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \ - i += blockDim.x * gridDim.x) - - -template -__global__ void RoIPoolFForward(const int nthreads, const T* bottom_data, - const T spatial_scale, const int channels, const int height, - const int width, const int pooled_height, const int pooled_width, - const T* bottom_rois, T* top_data, int* argmax_data) { - CUDA_1D_KERNEL_LOOP(index, nthreads) { - // (n, c, ph, pw) is an element in the pooled output - int pw = index % pooled_width; - int ph = (index / pooled_width) % pooled_height; - int c = (index / pooled_width / pooled_height) % channels; - int n = index / pooled_width / pooled_height / channels; - - const T* offset_bottom_rois = bottom_rois + n * 5; - int roi_batch_ind = offset_bottom_rois[0]; - int roi_start_w = round(offset_bottom_rois[1] * spatial_scale); - int roi_start_h = round(offset_bottom_rois[2] * spatial_scale); - int roi_end_w = round(offset_bottom_rois[3] * spatial_scale); - int roi_end_h = round(offset_bottom_rois[4] * spatial_scale); - - // Force malformed ROIs to be 1x1 - int roi_width = max(roi_end_w - roi_start_w + 1, 1); - int roi_height = max(roi_end_h - roi_start_h + 1, 1); - T bin_size_h = static_cast(roi_height) - / static_cast(pooled_height); - T bin_size_w = static_cast(roi_width) - / static_cast(pooled_width); - - int hstart = static_cast(floor(static_cast(ph) - * bin_size_h)); - int wstart = static_cast(floor(static_cast(pw) - * bin_size_w)); - int hend = static_cast(ceil(static_cast(ph + 1) - * bin_size_h)); - int wend = static_cast(ceil(static_cast(pw + 1) - * bin_size_w)); - - // Add roi offsets and clip to input boundaries - hstart = min(max(hstart + roi_start_h, 0), height); - hend = min(max(hend + roi_start_h, 0), height); - wstart = min(max(wstart + roi_start_w, 0), width); - wend = min(max(wend + roi_start_w, 0), width); - bool is_empty = (hend <= hstart) || (wend <= wstart); - - // Define an empty pooling region to be zero - T maxval = is_empty ? 0 : -FLT_MAX; - // If nothing is pooled, argmax = -1 causes nothing to be backprop'd - int maxidx = -1; - const T* offset_bottom_data = - bottom_data + (roi_batch_ind * channels + c) * height * width; - for (int h = hstart; h < hend; ++h) { - for (int w = wstart; w < wend; ++w) { - int bottom_index = h * width + w; - if (offset_bottom_data[bottom_index] > maxval) { - maxval = offset_bottom_data[bottom_index]; - maxidx = bottom_index; - } - } - } - top_data[index] = maxval; - argmax_data[index] = maxidx; - } -} - -template -__global__ void RoIPoolFBackward(const int nthreads, const T* top_diff, - const int* argmax_data, const int num_rois, const T spatial_scale, - const int channels, const int height, const int width, - const int pooled_height, const int pooled_width, T* bottom_diff, - const T* bottom_rois) { - CUDA_1D_KERNEL_LOOP(index, nthreads) { - // (n, c, ph, pw) is an element in the pooled output - int pw = index % pooled_width; - int ph = (index / pooled_width) % pooled_height; - int c = (index / pooled_width / pooled_height) % channels; - int n = index / pooled_width / pooled_height / channels; - - const T* offset_bottom_rois = bottom_rois + n * 5; - int roi_batch_ind = offset_bottom_rois[0]; - int bottom_offset = (roi_batch_ind * channels + c) * height * width; - int top_offset = (n * channels + c) * pooled_height * pooled_width; - const T* offset_top_diff = top_diff + top_offset; - T* offset_bottom_diff = bottom_diff + bottom_offset; - const int* offset_argmax_data = argmax_data + top_offset; - - int argmax = offset_argmax_data[ph * pooled_width + pw]; - if (argmax != -1) { - atomicAdd( - offset_bottom_diff + argmax, - static_cast(offset_top_diff[ph * pooled_width + pw])); - - } - } -} - -std::tuple ROIPool_forward_cuda(const at::Tensor& input, - const at::Tensor& rois, - const float spatial_scale, - const int pooled_height, - const int pooled_width) { - AT_ASSERTM(input.type().is_cuda(), "input must be a CUDA tensor"); - AT_ASSERTM(rois.type().is_cuda(), "rois must be a CUDA tensor"); - - auto num_rois = rois.size(0); - auto channels = input.size(1); - auto height = input.size(2); - auto width = input.size(3); - - auto output = at::empty({num_rois, channels, pooled_height, pooled_width}, input.options()); - auto output_size = num_rois * pooled_height * pooled_width * channels; - auto argmax = at::zeros({num_rois, channels, pooled_height, pooled_width}, input.options().dtype(at::kInt)); - - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - dim3 grid(std::min(THCCeilDiv(output_size, 512L), 4096L)); - dim3 block(512); - - if (output.numel() == 0) { - THCudaCheck(cudaGetLastError()); - return std::make_tuple(output, argmax); - } - - AT_DISPATCH_FLOATING_TYPES(input.type(), "ROIPool_forward", [&] { - RoIPoolFForward<<>>( - output_size, - input.contiguous().data(), - spatial_scale, - channels, - height, - width, - pooled_height, - pooled_width, - rois.contiguous().data(), - output.data(), - argmax.data()); - }); - THCudaCheck(cudaGetLastError()); - return std::make_tuple(output, argmax); -} - -// TODO remove the dependency on input and use instead its sizes -> save memory -at::Tensor ROIPool_backward_cuda(const at::Tensor& grad, - const at::Tensor& input, - const at::Tensor& rois, - const at::Tensor& argmax, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int batch_size, - const int channels, - const int height, - const int width) { - AT_ASSERTM(grad.type().is_cuda(), "grad must be a CUDA tensor"); - AT_ASSERTM(rois.type().is_cuda(), "rois must be a CUDA tensor"); - // TODO add more checks - - auto num_rois = rois.size(0); - auto grad_input = at::zeros({batch_size, channels, height, width}, grad.options()); - - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - dim3 grid(std::min(THCCeilDiv(grad.numel(), 512L), 4096L)); - dim3 block(512); - - // handle possibly empty gradients - if (grad.numel() == 0) { - THCudaCheck(cudaGetLastError()); - return grad_input; - } - - AT_DISPATCH_FLOATING_TYPES(grad.type(), "ROIPool_backward", [&] { - RoIPoolFBackward<<>>( - grad.numel(), - grad.contiguous().data(), - argmax.data(), - num_rois, - spatial_scale, - channels, - height, - width, - pooled_height, - pooled_width, - grad_input.data(), - rois.contiguous().data()); - }); - THCudaCheck(cudaGetLastError()); - return grad_input; -} diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/cuda/SigmoidFocalLoss_cuda.cu b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/cuda/SigmoidFocalLoss_cuda.cu deleted file mode 100644 index 7d40767bbb..0000000000 --- a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/cuda/SigmoidFocalLoss_cuda.cu +++ /dev/null @@ -1,188 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -// This file is modified from https://github.com/pytorch/pytorch/blob/master/modules/detectron/sigmoid_focal_loss_op.cu -// Cheng-Yang Fu -// cyfu@cs.unc.edu -#include -#include - -#include -#include -#include - -#include - -// TODO make it in a common file -#define CUDA_1D_KERNEL_LOOP(i, n) \ - for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \ - i += blockDim.x * gridDim.x) - - -template -__global__ void SigmoidFocalLossForward(const int nthreads, - const T* logits, - const int* targets, - const int num_classes, - const float gamma, - const float alpha, - const int num, - T* losses) { - CUDA_1D_KERNEL_LOOP(i, nthreads) { - - int n = i / num_classes; - int d = i % num_classes; // current class[0~79]; - int t = targets[n]; // target class [1~80]; - - // Decide it is positive or negative case. - T c1 = (t == (d+1)); - T c2 = (t>=0 & t != (d+1)); - - T zn = (1.0 - alpha); - T zp = (alpha); - - // p = 1. / 1. + expf(-x); p = sigmoid(x) - T p = 1. / (1. + expf(-logits[i])); - - // (1-p)**gamma * log(p) where - T term1 = powf((1. - p), gamma) * logf(max(p, FLT_MIN)); - - // p**gamma * log(1-p) - T term2 = powf(p, gamma) * - (-1. * logits[i] * (logits[i] >= 0) - - logf(1. + expf(logits[i] - 2. * logits[i] * (logits[i] >= 0)))); - - losses[i] = 0.0; - losses[i] += -c1 * term1 * zp; - losses[i] += -c2 * term2 * zn; - - } // CUDA_1D_KERNEL_LOOP -} // SigmoidFocalLossForward - - -template -__global__ void SigmoidFocalLossBackward(const int nthreads, - const T* logits, - const int* targets, - const T* d_losses, - const int num_classes, - const float gamma, - const float alpha, - const int num, - T* d_logits) { - CUDA_1D_KERNEL_LOOP(i, nthreads) { - - int n = i / num_classes; - int d = i % num_classes; // current class[0~79]; - int t = targets[n]; // target class [1~80], 0 is background; - - // Decide it is positive or negative case. - T c1 = (t == (d+1)); - T c2 = (t>=0 & t != (d+1)); - - T zn = (1.0 - alpha); - T zp = (alpha); - // p = 1. / 1. + expf(-x); p = sigmoid(x) - T p = 1. / (1. + expf(-logits[i])); - - // (1-p)**g * (1 - p - g*p*log(p) - T term1 = powf((1. - p), gamma) * - (1. - p - (p * gamma * logf(max(p, FLT_MIN)))); - - // (p**g) * (g*(1-p)*log(1-p) - p) - T term2 = powf(p, gamma) * - ((-1. * logits[i] * (logits[i] >= 0) - - logf(1. + expf(logits[i] - 2. * logits[i] * (logits[i] >= 0)))) * - (1. - p) * gamma - p); - d_logits[i] = 0.0; - d_logits[i] += -c1 * term1 * zp; - d_logits[i] += -c2 * term2 * zn; - d_logits[i] = d_logits[i] * d_losses[i]; - - } // CUDA_1D_KERNEL_LOOP -} // SigmoidFocalLossBackward - - -at::Tensor SigmoidFocalLoss_forward_cuda( - const at::Tensor& logits, - const at::Tensor& targets, - const int num_classes, - const float gamma, - const float alpha) { - AT_ASSERTM(logits.type().is_cuda(), "logits must be a CUDA tensor"); - AT_ASSERTM(targets.type().is_cuda(), "targets must be a CUDA tensor"); - AT_ASSERTM(logits.dim() == 2, "logits should be NxClass"); - - const int num_samples = logits.size(0); - - auto losses = at::empty({num_samples, logits.size(1)}, logits.options()); - auto losses_size = num_samples * logits.size(1); - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - dim3 grid(std::min(THCCeilDiv(losses_size, 512L), 4096L)); - dim3 block(512); - - if (losses.numel() == 0) { - THCudaCheck(cudaGetLastError()); - return losses; - } - - AT_DISPATCH_FLOATING_TYPES(logits.type(), "SigmoidFocalLoss_forward", [&] { - SigmoidFocalLossForward<<>>( - losses_size, - logits.contiguous().data(), - targets.contiguous().data(), - num_classes, - gamma, - alpha, - num_samples, - losses.data()); - }); - THCudaCheck(cudaGetLastError()); - return losses; -} - - -at::Tensor SigmoidFocalLoss_backward_cuda( - const at::Tensor& logits, - const at::Tensor& targets, - const at::Tensor& d_losses, - const int num_classes, - const float gamma, - const float alpha) { - AT_ASSERTM(logits.type().is_cuda(), "logits must be a CUDA tensor"); - AT_ASSERTM(targets.type().is_cuda(), "targets must be a CUDA tensor"); - AT_ASSERTM(d_losses.type().is_cuda(), "d_losses must be a CUDA tensor"); - - AT_ASSERTM(logits.dim() == 2, "logits should be NxClass"); - - const int num_samples = logits.size(0); - AT_ASSERTM(logits.size(1) == num_classes, "logits.size(1) should be num_classes"); - - auto d_logits = at::zeros({num_samples, num_classes}, logits.options()); - auto d_logits_size = num_samples * logits.size(1); - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - dim3 grid(std::min(THCCeilDiv(d_logits_size, 512L), 4096L)); - dim3 block(512); - - if (d_logits.numel() == 0) { - THCudaCheck(cudaGetLastError()); - return d_logits; - } - - AT_DISPATCH_FLOATING_TYPES(logits.type(), "SigmoidFocalLoss_backward", [&] { - SigmoidFocalLossBackward<<>>( - d_logits_size, - logits.contiguous().data(), - targets.contiguous().data(), - d_losses.contiguous().data(), - num_classes, - gamma, - alpha, - num_samples, - d_logits.data()); - }); - - THCudaCheck(cudaGetLastError()); - return d_logits; -} - diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/cuda/nms.cu b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/cuda/nms.cu deleted file mode 100644 index d7ccf79b0d..0000000000 --- a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/cuda/nms.cu +++ /dev/null @@ -1,128 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -#include -#include - -#include -#include - -#include -#include - -int const threadsPerBlock = sizeof(unsigned long long) * 8; - -__device__ inline float devIoU(float const * const a, float const * const b) { - float left = max(a[0], b[0]), right = min(a[2], b[2]); - float top = max(a[1], b[1]), bottom = min(a[3], b[3]); - float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f); - float interS = width * height; - float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); - float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); - return interS / (Sa + Sb - interS); -} - -__global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, - const float *dev_boxes, unsigned long long *dev_mask) { - const int row_start = blockIdx.y; - const int col_start = blockIdx.x; - - // if (row_start > col_start) return; - - const int row_size = - min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); - const int col_size = - min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); - - __shared__ float block_boxes[threadsPerBlock * 5]; - if (threadIdx.x < col_size) { - block_boxes[threadIdx.x * 5 + 0] = - dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; - block_boxes[threadIdx.x * 5 + 1] = - dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; - block_boxes[threadIdx.x * 5 + 2] = - dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; - block_boxes[threadIdx.x * 5 + 3] = - dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; - block_boxes[threadIdx.x * 5 + 4] = - dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; - } - __syncthreads(); - - if (threadIdx.x < row_size) { - const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; - const float *cur_box = dev_boxes + cur_box_idx * 5; - int i = 0; - unsigned long long t = 0; - int start = 0; - if (row_start == col_start) { - start = threadIdx.x + 1; - } - for (i = start; i < col_size; i++) { - if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { - t |= 1ULL << i; - } - } - const int col_blocks = THCCeilDiv(n_boxes, threadsPerBlock); - dev_mask[cur_box_idx * col_blocks + col_start] = t; - } -} - -// boxes is a N x 5 tensor -at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh) { - using scalar_t = float; - AT_ASSERTM(boxes.type().is_cuda(), "boxes must be a CUDA tensor"); - auto scores = boxes.select(1, 4); - auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); - auto boxes_sorted = boxes.index_select(0, order_t); - - int boxes_num = boxes.size(0); - - const int col_blocks = THCCeilDiv(boxes_num, threadsPerBlock); - - scalar_t* boxes_dev = boxes_sorted.data(); - - THCState *state = at::globalContext().lazyInitCUDA(); // TODO replace with getTHCState - - unsigned long long* mask_dev = NULL; - //THCudaCheck(THCudaMalloc(state, (void**) &mask_dev, - // boxes_num * col_blocks * sizeof(unsigned long long))); - - mask_dev = (unsigned long long*) THCudaMalloc(state, boxes_num * col_blocks * sizeof(unsigned long long)); - - dim3 blocks(THCCeilDiv(boxes_num, threadsPerBlock), - THCCeilDiv(boxes_num, threadsPerBlock)); - dim3 threads(threadsPerBlock); - nms_kernel<<>>(boxes_num, - nms_overlap_thresh, - boxes_dev, - mask_dev); - - std::vector mask_host(boxes_num * col_blocks); - THCudaCheck(cudaMemcpy(&mask_host[0], - mask_dev, - sizeof(unsigned long long) * boxes_num * col_blocks, - cudaMemcpyDeviceToHost)); - - std::vector remv(col_blocks); - memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); - - at::Tensor keep = at::empty({boxes_num}, boxes.options().dtype(at::kLong).device(at::kCPU)); - int64_t* keep_out = keep.data(); - - int num_to_keep = 0; - for (int i = 0; i < boxes_num; i++) { - int nblock = i / threadsPerBlock; - int inblock = i % threadsPerBlock; - - if (!(remv[nblock] & (1ULL << inblock))) { - keep_out[num_to_keep++] = i; - unsigned long long *p = &mask_host[0] + i * col_blocks; - for (int j = nblock; j < col_blocks; j++) { - remv[j] |= p[j]; - } - } - } - - THCudaFree(state, mask_dev); - // TODO improve this part - return std::get<0>(order_t.index({keep.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep)}).sort(0, false)); -} diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/cuda/vision.h b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/cuda/vision.h deleted file mode 100644 index 6d9f8871f7..0000000000 --- a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/cuda/vision.h +++ /dev/null @@ -1,63 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -#pragma once -#include - - -at::Tensor SigmoidFocalLoss_forward_cuda( - const at::Tensor& logits, - const at::Tensor& targets, - const int num_classes, - const float gamma, - const float alpha); - -at::Tensor SigmoidFocalLoss_backward_cuda( - const at::Tensor& logits, - const at::Tensor& targets, - const at::Tensor& d_losses, - const int num_classes, - const float gamma, - const float alpha); - -at::Tensor ROIAlign_forward_cuda(const at::Tensor& input, - const at::Tensor& rois, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int sampling_ratio); - -at::Tensor ROIAlign_backward_cuda(const at::Tensor& grad, - const at::Tensor& rois, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int batch_size, - const int channels, - const int height, - const int width, - const int sampling_ratio); - - -std::tuple ROIPool_forward_cuda(const at::Tensor& input, - const at::Tensor& rois, - const float spatial_scale, - const int pooled_height, - const int pooled_width); - -at::Tensor ROIPool_backward_cuda(const at::Tensor& grad, - const at::Tensor& input, - const at::Tensor& rois, - const at::Tensor& argmax, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int batch_size, - const int channels, - const int height, - const int width); - -at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh); - - -at::Tensor compute_flow_cuda(const at::Tensor& boxes, - const int height, - const int width); diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/nms.h.bak b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/nms.h.bak deleted file mode 100644 index 312fed4a7c..0000000000 --- a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/nms.h.bak +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -#pragma once -#include "cpu/vision.h" - -#ifdef WITH_CUDA -#include "cuda/vision.h" -#endif - - -at::Tensor nms(const at::Tensor& dets, - const at::Tensor& scores, - const float threshold) { - - if (dets.type().is_cuda()) { -#ifdef WITH_CUDA - // TODO raise error if not compiled with CUDA - if (dets.numel() == 0) - return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); - auto b = at::cat({dets, scores.unsqueeze(1)}, 1); - return nms_cuda(b, threshold); -#else - AT_ERROR("Not compiled with GPU support"); -#endif - } - - at::Tensor result = nms_cpu(dets, scores, threshold); - return result; -} diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/vision.cpp b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/vision.cpp deleted file mode 100644 index fbd5613273..0000000000 --- a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/csrc/vision.cpp +++ /dev/null @@ -1,15 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -//#include "nms.h" -#include "ROIAlign.h" -#include "ROIPool.h" -#include "SigmoidFocalLoss.h" - -PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { -// m.def("nms", &nms, "non-maximum suppression"); - m.def("roi_align_forward", &ROIAlign_forward, "ROIAlign_forward"); - m.def("roi_align_backward", &ROIAlign_backward, "ROIAlign_backward"); - m.def("roi_pool_forward", &ROIPool_forward, "ROIPool_forward"); - m.def("roi_pool_backward", &ROIPool_backward, "ROIPool_backward"); - m.def("sigmoid_focalloss_forward", &SigmoidFocalLoss_forward, "SigmoidFocalLoss_forward"); - m.def("sigmoid_focalloss_backward", &SigmoidFocalLoss_backward, "SigmoidFocalLoss_backward"); -} -- Gitee From 0c37cc32a2a0d8094b6e605013d2d38fd3b51b0b Mon Sep 17 00:00:00 2001 From: Savion_G Date: Thu, 21 Apr 2022 14:22:31 +0800 Subject: [PATCH 15/20] fix something --- .../maskrcnn_benchmark/layers/__init__.py | 3 +- .../maskrcnn_benchmark/layers/_utils.py | 1 + .../layers/adjust_smooth_l1_loss.py | 8 +- .../maskrcnn_benchmark/layers/misc.py | 9 +- .../maskrcnn_benchmark/layers/nms.py | 106 +++++++++++++++++- .../maskrcnn_benchmark/layers/roi_align.py | 11 +- .../layers/sigmoid_focal_loss.py | 65 +++++------ .../layers/smooth_l1_loss.py | 7 +- 8 files changed, 149 insertions(+), 61 deletions(-) diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/__init__.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/__init__.py index f2dbb0c236..f18f01dd06 100644 --- a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/__init__.py +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/__init__.py @@ -1,4 +1,5 @@ # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +import torch from .batch_norm import FrozenBatchNorm2d from .misc import Conv2d @@ -7,8 +8,6 @@ from .misc import interpolate from .nms import nms from .npu_roi_align import ROIAlign from .npu_roi_align import roi_align -from .roi_pool import ROIPool -from .roi_pool import roi_pool from .smooth_l1_loss import smooth_l1_loss, SmoothL1Loss from .sigmoid_focal_loss import SigmoidFocalLoss from .adjust_smooth_l1_loss import AdjustSmoothL1Loss diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/_utils.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/_utils.py index bda1b6fca7..3dabc127b2 100644 --- a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/_utils.py +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/_utils.py @@ -1,6 +1,7 @@ # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. import glob import os.path + import torch try: diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/adjust_smooth_l1_loss.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/adjust_smooth_l1_loss.py index 7bef251a50..3728c73df0 100644 --- a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/adjust_smooth_l1_loss.py +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/adjust_smooth_l1_loss.py @@ -2,11 +2,11 @@ import torch from torch import nn import logging - +import torch.distributed as dist class AdjustSmoothL1Loss(nn.Module): - def __init__(self, num_features, momentum=0.1, beta=1. / 9): + def __init__(self, num_features, momentum=0.1, beta=1. /9): super(AdjustSmoothL1Loss, self).__init__() self.num_features = num_features self.momentum = momentum @@ -19,7 +19,7 @@ class AdjustSmoothL1Loss(nn.Module): def forward(self, inputs, target, size_average=True): - n = torch.abs(inputs - target) + n = torch.abs(inputs -target) n_mean = n.mean(dim=0) n_var = n.var(dim=0) with torch.no_grad(): @@ -31,6 +31,7 @@ class AdjustSmoothL1Loss(nn.Module): self.running_var *= (1 - self.momentum) self.running_var += (self.momentum * n_var) + beta = (self.running_mean - self.running_var) beta = beta.clamp(max=self.beta, min=1e-3) @@ -40,3 +41,4 @@ class AdjustSmoothL1Loss(nn.Module): if size_average: return loss.mean() return loss.sum() + diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/misc.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/misc.py index 40fb7a1136..61f6610032 100644 --- a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/misc.py +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/misc.py @@ -26,6 +26,7 @@ class _NewEmptyTensorOp(torch.autograd.Function): return _NewEmptyTensorOp.apply(grad, shape), None + class Conv2d(torch.nn.Conv2d): def forward(self, x): if x.numel() > 0: @@ -64,7 +65,7 @@ class ConvTranspose2d(torch.nn.ConvTranspose2d): def interpolate( - input, size=None, scale_factor=None, mode="nearest", align_corners=None + input, size=None, scale_factor=None, mode="nearest", align_corners=None ): if input.numel() > 0: return torch.nn.functional.interpolate( @@ -77,9 +78,9 @@ def interpolate( if size is not None and scale_factor is not None: raise ValueError("only one of size or scale_factor should be defined") if ( - scale_factor is not None - and isinstance(scale_factor, tuple) - and len(scale_factor) != dim + scale_factor is not None + and isinstance(scale_factor, tuple) + and len(scale_factor) != dim ): raise ValueError( "scale_factor shape must match input shape. " diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/nms.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/nms.py index be7d96233c..521a911a04 100644 --- a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/nms.py +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/nms.py @@ -1,12 +1,23 @@ # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. - +# from ._utils import _C +# from maskrcnn_benchmark import _C +import time import torch +import numpy as np +from typing import List + +# nms = _C.nms + +# nms.__doc__ = """ +# This function performs Non-maximum suppresion""" def py_cpu_nms(boxes, scores, thresh): boxes = boxes.cpu() scores = scores.cpu() + start_time = time.time() + """Pure Python NMS baseline.""" x1 = boxes[:, 0] y1 = boxes[:, 1] x2 = boxes[:, 2] @@ -14,28 +25,111 @@ def py_cpu_nms(boxes, scores, thresh): scores = scores areas = (x2 - x1 + 1) * (y2 - y1 + 1) + # 按照从小到大排序后返回下标,然后顺序取反,即从大到小对应的下标 + # order = scores.argsort()[::-1] _, order = torch.sort(scores, descending=True) keep = [] while len(order) > 0: i = order[0] keep.append(i) + # 求交叉面积intersection采用了这个非常巧妙的方法,自己画一下思考一下 + # xx1 = np.maximum(x1[i], x1[order[1:]]) + # yy1 = np.maximum(y1[i], y1[order[1:]]) + # xx2 = np.minimum(x2[i], x2[order[1:]]) + # yy2 = np.minimum(y2[i], y2[order[1:]]) xx1 = torch.max(x1[i], x1[order[1:]]) yy1 = torch.max(y1[i], y1[order[1:]]) xx2 = torch.min(x2[i], x2[order[1:]]) yy2 = torch.min(y2[i], y2[order[1:]]) - w = torch.max(torch.tensor(0.0), xx2 - xx1 + 1) - h = torch.max(torch.tensor(0.0), yy2 - yy1 + 1) - inter = w * h - ovr = inter / (areas[i] + areas[order[1:]] - inter) + # print('xy: ', xx1, '\n', yy1, '\n', xx2, '\n', yy2) + w = torch.max(torch.tensor(0.0), xx2 - xx1 + 1) # 计算w + h = torch.max(torch.tensor(0.0), yy2 - yy1 + 1) # 计算h + inter = w * h # 交叉面积 + # A交B/A并B + ovr = inter / (areas[i] + areas[order[1:]] - inter) + """ + 保留重叠面积小于threshold的+ + np.where的返回值是tuple + 第一个维度是x的list,第二个维度是y的list + 这里因为输入是1维,因此就取0就好 + """ inds = torch.where(ovr <= thresh)[0] order = order[inds + 1] res_keep = torch.tensor(keep) + end_time = time.time() + print('nms cost: ', end_time - start_time) + return res_keep +def batched_nms_npu(boxes, scores, iou_threshold): + """ + Performs non-maximum suppression in a batched fashion. + + Each index value correspond to a category, and NMS + will not be applied between elements of different categories. + + Parameters + ---------- + boxes : Tensor[N, 4] + boxes where NMS will be performed. They + are expected to be in (x1, y1, x2, y2) format + scores : Tensor[N] + scores for each one of the boxes + idxs : Tensor[N] + indices of the categories for each one of the boxes. + iou_threshold : float + discards all overlapping boxes + with IoU > iou_threshold + + Returns + ------- + keep : Tensor + int64 tensor with the indices of + the elements that have been kept by NMS, sorted + in decreasing order of scoresyong + """ + if boxes.numel() == 0: + return torch.empty((0,), dtype=torch.int64, device=boxes.device) + + ''' + npu_nms_with_mask function detail + box_scores tensor (N,8),N为候选框个数,8为候选框坐标与置信度得分 + iou_threshold float IOU阈值 + selected_boxes tensor (N,5),返回过滤后并排过序的候选框,N为输出候选框个数,5为坐标与置信度得分 + selected_idx tensor 排过序的box在输入box列表中的位置索引 + selected_mask tensor 当前候选框是否可用的标志 + ''' + _, _, keep_mask = \ + torch.npu_nms_with_mask( + torch.cat([boxes, scores[..., None]], 1), iou_threshold) + return keep_mask + + +def batched_nms( + boxes: torch.Tensor, scores: torch.Tensor, idxs: torch.Tensor, iou_threshold: float +): + """ + Same as torchvision.ops.boxes.batched_nms, but safer. + """ + assert boxes.shape[-1] == 4 + # TODO may need better strategy. + # Investigate after having a fully-cuda NMS op. + if len(boxes) < 40000: + return batched_nms_npu(boxes, scores, idxs, iou_threshold) + + result_mask = scores.new_zeros(scores.size(), dtype=torch.bool) + for id in torch.jit.annotate(List[int], torch.unique(idxs).cpu().tolist()): + mask = (idxs == id).nonzero().view(-1) + keep = nms(boxes[mask], scores[mask], iou_threshold) + result_mask[mask[keep]] = True + keep = result_mask.nonzero().view(-1) + keep = keep[scores[keep].argsort(descending=True)] + return keep + -nms = py_cpu_nms +nms = py_cpu_nms #batched_nms_npu diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/roi_align.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/roi_align.py index c68a45f3db..cc32b7b22e 100644 --- a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/roi_align.py +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/roi_align.py @@ -55,8 +55,15 @@ class ROIAlign(nn.Module): self.sampling_ratio = sampling_ratio def forward(self, input, rois): - return torch.npu_roi_align(input, rois, self.spatial_scale, self.output_size, self.output_size, - self.sample_num, 0) + return torch.npu_roi_align(input, rois, self.spatial_scale, self.output_size, self.output_size, self.sample_num, 0) + #return roi_align( + # input, rois, self.output_size, self.spatial_scale, self.sampling_ratio + #) + # orig_type = input.dtype + # input = input.type(torch.float32) + # res = roi_align(input, rois, self.output_size, self.spatial_scale, self.sampling_ratio) + + # return res.type(orig_type) def __repr__(self): tmpstr = self.__class__.__name__ + "(" diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/sigmoid_focal_loss.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/sigmoid_focal_loss.py index 50345b79cd..053a835628 100644 --- a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/sigmoid_focal_loss.py +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/sigmoid_focal_loss.py @@ -3,35 +3,6 @@ from torch import nn from torch.autograd import Function from torch.autograd.function import once_differentiable import torch.nn.functional as F -from maskrcnn_benchmark import _C - - -class _SigmoidFocalLoss(Function): - @staticmethod - def forward(ctx, logits, targets, num_classes, gamma, alpha): - ctx.save_for_backward(logits, targets); - ctx.num_classes = num_classes - ctx.gamma = gamma - ctx.alpha = alpha - - losses = _C.sigmoid_focalloss_forward( - logits, targets, num_classes, gamma, alpha - ) - return losses - - @staticmethod - @once_differentiable - def backward(ctx, d_loss): - logits, targets = ctx.saved_tensors - num_classes = ctx.num_classes - gamma = ctx.gamma - alpha = ctx.alpha - d_loss = d_loss.contiguous() - d_logits = _C.sigmoid_focalloss_backward( - logits, targets, d_loss, num_classes, gamma, alpha - ) - return d_logits, None, None, None, None - def py_sigmoid_focal_loss(pred, target, @@ -64,9 +35,6 @@ def py_sigmoid_focal_loss(pred, return loss -sigmoid_focalloss = _SigmoidFocalLoss.apply - - class SigmoidFocalLoss(nn.Module): def __init__(self, num_classes, gamma, alpha): super(SigmoidFocalLoss, self).__init__() @@ -75,14 +43,31 @@ class SigmoidFocalLoss(nn.Module): self.alpha = alpha def forward(self, logits, targets): - num_classes = logits.size(1) + 1 - valid = (targets != -1)[:, None] - targets = F.one_hot(targets.abs(), num_classes=num_classes) - targets = targets[:, 1:num_classes] - loss = py_sigmoid_focal_loss( - logits, targets, self.gamma, self.alpha - ) - loss = loss * valid + # loss = sigmoid_focalloss( + # logits.cpu(), targets.cpu(), self.num_classes, self.gamma, self.alpha + # ) + # return loss.sum().npu() + + # orig_type = logits.dtype + # logits = logits.float() + # loss = sigmoid_focalloss( + # logits.cpu(), targets.cpu(), self.num_classes, self.gamma, self.alpha + # ) + # return loss.sum().type(orig_type).npu() + + if torch.cuda.is_available(): + loss = sigmoid_focalloss( + logits, targets, self.num_classes, self.gamma, self.alpha + ) + else: + num_classes = logits.size(1) + 1 + valid = (targets != -1)[:, None] + targets = F.one_hot(targets.abs(), num_classes=num_classes) + targets = targets[:, 1:num_classes] + loss = py_sigmoid_focal_loss( + logits, targets, self.gamma, self.alpha + ) + loss = loss * valid return loss.sum() diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/smooth_l1_loss.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/smooth_l1_loss.py index 859b538cea..4b2399af95 100644 --- a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/smooth_l1_loss.py +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/smooth_l1_loss.py @@ -2,9 +2,8 @@ import torch import numpy as np - class SmoothL1Loss(torch.nn.Module): - def __init__(self, beta=1. / 9): + def __init__(self, beta=1. /9): super(SmoothL1Loss, self).__init__() self.beta = beta @@ -18,9 +17,9 @@ def smooth_l1_loss(input, target, alpha=0.5, gamma=1.5, beta=1.0, size_average=T very similar to the smooth_l1_loss from pytorch, but with the extra beta parameter """ - + diff = torch.abs(input - target) - b = np.e ** (gamma / alpha) - 1 + b = np.e**(gamma / alpha) - 1 cond = diff < beta neg_cond = (~cond) loss = (alpha / b * (b * diff + 1) * torch.log(b * diff / beta + 1) - alpha * diff) * cond.half() -- Gitee From 71dc8e1f000e7e7d003095f72f6dda2dd40c2571 Mon Sep 17 00:00:00 2001 From: Savion_G Date: Thu, 21 Apr 2022 14:29:55 +0800 Subject: [PATCH 16/20] fix something --- .../maskrcnn_benchmark/layers/__init__.py | 4 ++-- .../RetinaMask/maskrcnn_benchmark/layers/nms.py | 13 +------------ .../maskrcnn_benchmark/layers/sigmoid_focal_loss.py | 12 ------------ 3 files changed, 3 insertions(+), 26 deletions(-) diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/__init__.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/__init__.py index f18f01dd06..0aff023bd1 100644 --- a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/__init__.py +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/__init__.py @@ -12,7 +12,7 @@ from .smooth_l1_loss import smooth_l1_loss, SmoothL1Loss from .sigmoid_focal_loss import SigmoidFocalLoss from .adjust_smooth_l1_loss import AdjustSmoothL1Loss -__all__ = ["nms", "roi_align", "ROIAlign", "roi_pool", "ROIPool", - "smooth_l1_loss", "SmoothL1Loss", "Conv2d", "ConvTranspose2d", +__all__ = ["nms", "roi_align", "ROIAlign", "smooth_l1_loss", + "SmoothL1Loss", "Conv2d", "ConvTranspose2d", "interpolate", "FrozenBatchNorm2d", "SigmoidFocalLoss", "AdjustSmoothL1Loss"] diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/nms.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/nms.py index 521a911a04..e38f195013 100644 --- a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/nms.py +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/nms.py @@ -5,13 +5,6 @@ import time import torch import numpy as np from typing import List - -# nms = _C.nms - -# nms.__doc__ = """ -# This function performs Non-maximum suppresion""" - - def py_cpu_nms(boxes, scores, thresh): boxes = boxes.cpu() scores = scores.cpu() @@ -34,10 +27,6 @@ def py_cpu_nms(boxes, scores, thresh): i = order[0] keep.append(i) # 求交叉面积intersection采用了这个非常巧妙的方法,自己画一下思考一下 - # xx1 = np.maximum(x1[i], x1[order[1:]]) - # yy1 = np.maximum(y1[i], y1[order[1:]]) - # xx2 = np.minimum(x2[i], x2[order[1:]]) - # yy2 = np.minimum(y2[i], y2[order[1:]]) xx1 = torch.max(x1[i], x1[order[1:]]) yy1 = torch.max(y1[i], y1[order[1:]]) xx2 = torch.min(x2[i], x2[order[1:]]) @@ -132,4 +121,4 @@ def batched_nms( return keep -nms = py_cpu_nms #batched_nms_npu +nms = py_cpu_nms diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/sigmoid_focal_loss.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/sigmoid_focal_loss.py index 053a835628..639faa7c43 100644 --- a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/sigmoid_focal_loss.py +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/sigmoid_focal_loss.py @@ -43,18 +43,6 @@ class SigmoidFocalLoss(nn.Module): self.alpha = alpha def forward(self, logits, targets): - # loss = sigmoid_focalloss( - # logits.cpu(), targets.cpu(), self.num_classes, self.gamma, self.alpha - # ) - # return loss.sum().npu() - - # orig_type = logits.dtype - # logits = logits.float() - # loss = sigmoid_focalloss( - # logits.cpu(), targets.cpu(), self.num_classes, self.gamma, self.alpha - # ) - # return loss.sum().type(orig_type).npu() - if torch.cuda.is_available(): loss = sigmoid_focalloss( logits, targets, self.num_classes, self.gamma, self.alpha -- Gitee From a524117f3e0a0be515c5dbdbb31a6c1c7ac16207 Mon Sep 17 00:00:00 2001 From: Savion_G Date: Thu, 21 Apr 2022 18:13:32 +0800 Subject: [PATCH 17/20] fix some of the warnings --- .../maskrcnn_benchmark/data/datasets/coco.py | 1 - .../data/samplers/distributed.py | 2 +- .../maskrcnn_benchmark/engine/inference.py | 8 ++----- .../maskrcnn_benchmark/engine/trainer.py | 9 ++++---- .../maskrcnn_benchmark/layers/misc.py | 12 +++++------ .../maskrcnn_benchmark/layers/nms.py | 8 +++---- .../maskrcnn_benchmark/layers/roi_align.py | 15 ++++++------- .../maskrcnn_benchmark/layers/roi_pool.py | 16 +++++++------- .../layers/sigmoid_focal_loss.py | 21 +++++++------------ .../layers/smooth_l1_loss.py | 16 +++++++------- .../modeling/backbone/fpn.py | 1 - .../modeling/backbone/resnet.py | 1 - .../maskrcnn_benchmark/modeling/box_coder.py | 4 ++-- 13 files changed, 52 insertions(+), 62 deletions(-) diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/data/datasets/coco.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/data/datasets/coco.py index fbfdf641bf..37de632013 100644 --- a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/data/datasets/coco.py +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/data/datasets/coco.py @@ -38,7 +38,6 @@ class COCODataset(torchvision.datasets.coco.CocoDetection): img, anno = super(COCODataset, self).__getitem__(idx) # filter crowd annotations - # TODO might be better to add an extra field anno = [obj for obj in anno if obj["iscrowd"] == 0] boxes = [obj["bbox"] for obj in anno] diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/data/samplers/distributed.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/data/samplers/distributed.py index 632443c62e..878f006801 100644 --- a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/data/samplers/distributed.py +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/data/samplers/distributed.py @@ -1,7 +1,6 @@ # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. # Code is copy-pasted exactly as in torch.utils.data.distributed, # with a modification in the import to use the deprecated backend -# FIXME remove this once c10d fixes the bug it has import math import torch import torch.distributed as dist @@ -24,6 +23,7 @@ class DistributedSampler(Sampler): """ def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True): + shuffle = shuffle if num_replicas is None: if not dist.is_available(): raise RuntimeError("Requires distributed package to be available") diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/engine/inference.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/engine/inference.py index 49855938d1..30b59d8c0c 100644 --- a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/engine/inference.py +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/engine/inference.py @@ -87,7 +87,6 @@ def prepare_for_coco_segmentation(predictions, dataset): if len(prediction) == 0: continue - # TODO replace with get_img_info? image_width = dataset.coco.imgs[original_id]["width"] image_height = dataset.coco.imgs[original_id]["height"] prediction = prediction.resize((image_width, image_height)) @@ -157,13 +156,11 @@ def evaluate_box_proposals( for image_id, prediction in enumerate(tqdm(predictions)): original_id = dataset.id_to_img_map[image_id] - # TODO replace with get_img_info? image_width = dataset.coco.imgs[original_id]["width"] image_height = dataset.coco.imgs[original_id]["height"] prediction = prediction.resize((image_width, image_height)) # sort predictions in descending order - # TODO maybe remove this and make it explicit in the documentation inds = prediction.get_field("objectness").sort(descending=True)[1] prediction = prediction[inds] @@ -316,7 +313,6 @@ class COCOResults(object): res[metric] = s[idx] def __repr__(self): - # TODO make it pretty return repr(self.results) @@ -361,7 +357,7 @@ def inference( ) logger = logging.getLogger("maskrcnn_benchmark.inference") dataset = data_loader.dataset - logger.info("Start evaluation on {} images".format(len(dataset))) + logger.info("Start evaluation on %d images" % (len(dataset))) start_time = time.time() predictions = compute_on_dataset(model, data_loader, device) # wait for all processes to complete before measuring the time @@ -369,7 +365,7 @@ def inference( total_time = time.time() - start_time total_time_str = str(datetime.timedelta(seconds=total_time)) logger.info( - "Total inference time: {} ({} s / img per device, on {} devices)".format( + "Total inference time: %s (%.4f s / img per device, on %d devices)" % ( total_time_str, total_time * num_devices / len(dataset), num_devices ) ) diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/engine/trainer.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/engine/trainer.py index ac753d2876..92b1b549c0 100644 --- a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/engine/trainer.py +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/engine/trainer.py @@ -57,6 +57,7 @@ def do_train( start_training_time = time.time() end = time.time() get_fps = GetFPS(500) + iteration = 0 for iteration, (images, targets, _) in enumerate(data_loader, start_iter): if local_rank == 0: @@ -113,10 +114,10 @@ def do_train( ) ) if iteration % checkpoint_period == 0 and iteration > 0: - checkpointer.save("model_{:07d}".format(iteration + 1), **arguments) - checkpointer.save("model_{:07d}".format(iteration), **arguments) + checkpointer.save("model_%07d" % (iteration + 1), **arguments) + checkpointer.save("model_%07d".format(iteration), **arguments) total_training_time = time.time() - start_training_time total_time_str = str(datetime.timedelta(seconds=total_training_time)) if local_rank == 0: - logger.info("Total training time: {}".format(total_time_str)) - logger.info("step_fps: {:.4f}".format(get_fps.mean_fps)) + logger.info("Total training time: %s".format(total_time_str)) + logger.info("step_fps: %.4f" % get_fps.mean_fps) diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/misc.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/misc.py index 61f6610032..18bfdbf38c 100644 --- a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/misc.py +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/misc.py @@ -65,11 +65,11 @@ class ConvTranspose2d(torch.nn.ConvTranspose2d): def interpolate( - input, size=None, scale_factor=None, mode="nearest", align_corners=None + _input, size=None, scale_factor=None, mode="nearest", align_corners=None ): - if input.numel() > 0: + if _input.numel() > 0: return torch.nn.functional.interpolate( - input, size, scale_factor, mode, align_corners + _input, size, scale_factor, mode, align_corners ) def _check_size_scale_factor(dim): @@ -94,9 +94,9 @@ def interpolate( scale_factors = _ntuple(dim)(scale_factor) # math.floor might return float in py2.7 return [ - int(math.floor(input.size(i + 2) * scale_factors[i])) for i in range(dim) + int(math.floor(_input.size(i + 2) * scale_factors[i])) for i in range(dim) ] output_shape = tuple(_output_size(2)) - output_shape = input.shape[:-2] + output_shape - return _NewEmptyTensorOp.apply(input, output_shape) + output_shape = _input.shape[:-2] + output_shape + return _NewEmptyTensorOp.apply(_input, output_shape) diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/nms.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/nms.py index e38f195013..de5e71eb30 100644 --- a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/nms.py +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/nms.py @@ -5,6 +5,8 @@ import time import torch import numpy as np from typing import List + + def py_cpu_nms(boxes, scores, thresh): boxes = boxes.cpu() scores = scores.cpu() @@ -55,6 +57,7 @@ def py_cpu_nms(boxes, scores, thresh): return res_keep + def batched_nms_npu(boxes, scores, iou_threshold): """ Performs non-maximum suppression in a batched fashion. @@ -99,14 +102,11 @@ def batched_nms_npu(boxes, scores, iou_threshold): return keep_mask -def batched_nms( - boxes: torch.Tensor, scores: torch.Tensor, idxs: torch.Tensor, iou_threshold: float -): +def batched_nms(boxes, scores, idxs, iou_threshold): """ Same as torchvision.ops.boxes.batched_nms, but safer. """ assert boxes.shape[-1] == 4 - # TODO may need better strategy. # Investigate after having a fully-cuda NMS op. if len(boxes) < 40000: return batched_nms_npu(boxes, scores, idxs, iou_threshold) diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/roi_align.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/roi_align.py index cc32b7b22e..23aa35150e 100644 --- a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/roi_align.py +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/roi_align.py @@ -10,14 +10,14 @@ from maskrcnn_benchmark import _C class _ROIAlign(Function): @staticmethod - def forward(ctx, input, roi, output_size, spatial_scale, sampling_ratio): + def forward(ctx, input_, roi, output_size, spatial_scale, sampling_ratio): ctx.save_for_backward(roi) ctx.output_size = _pair(output_size) ctx.spatial_scale = spatial_scale ctx.sampling_ratio = sampling_ratio - ctx.input_shape = input.size() + ctx.input_shape = input_.size() output = _C.roi_align_forward( - input.cpu(), roi.cpu(), spatial_scale, output_size[0], output_size[1], sampling_ratio + input_.cpu(), roi.cpu(), spatial_scale, output_size[0], output_size[1], sampling_ratio ) return output.npu() @@ -54,11 +54,12 @@ class ROIAlign(nn.Module): self.spatial_scale = spatial_scale self.sampling_ratio = sampling_ratio - def forward(self, input, rois): - return torch.npu_roi_align(input, rois, self.spatial_scale, self.output_size, self.output_size, self.sample_num, 0) - #return roi_align( + def forward(self, _input, rois): + return torch.npu_roi_align(_input, rois, self.spatial_scale, self.output_size, + self.output_size, self.sample_num, 0) + # return roi_align( # input, rois, self.output_size, self.spatial_scale, self.sampling_ratio - #) + # ) # orig_type = input.dtype # input = input.type(torch.float32) # res = roi_align(input, rois, self.output_size, self.spatial_scale, self.sampling_ratio) diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/roi_pool.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/roi_pool.py index c0e42756ee..ee07f5a5c7 100644 --- a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/roi_pool.py +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/roi_pool.py @@ -10,26 +10,26 @@ from maskrcnn_benchmark import _C class _ROIPool(Function): @staticmethod - def forward(ctx, input, roi, output_size, spatial_scale): + def forward(ctx, _input, roi, output_size, spatial_scale): ctx.output_size = _pair(output_size) ctx.spatial_scale = spatial_scale - ctx.input_shape = input.size() + ctx.input_shape = _input.size() output, argmax = _C.roi_pool_forward( - input, roi, spatial_scale, output_size[0], output_size[1] + _input, roi, spatial_scale, output_size[0], output_size[1] ) - ctx.save_for_backward(input, roi, argmax) + ctx.save_for_backward(_input, roi, argmax) return output @staticmethod @once_differentiable def backward(ctx, grad_output): - input, rois, argmax = ctx.saved_tensors + _input, rois, argmax = ctx.saved_tensors output_size = ctx.output_size spatial_scale = ctx.spatial_scale bs, ch, h, w = ctx.input_shape grad_input = _C.roi_pool_backward( grad_output, - input, + _input, rois, argmax, spatial_scale, @@ -52,8 +52,8 @@ class ROIPool(nn.Module): self.output_size = output_size self.spatial_scale = spatial_scale - def forward(self, input, rois): - return roi_pool(input, rois, self.output_size, self.spatial_scale) + def forward(self, input_, rois): + return roi_pool(input_, rois, self.output_size, self.spatial_scale) def __repr__(self): tmpstr = self.__class__.__name__ + "(" diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/sigmoid_focal_loss.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/sigmoid_focal_loss.py index 639faa7c43..bae9510de5 100644 --- a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/sigmoid_focal_loss.py +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/sigmoid_focal_loss.py @@ -43,19 +43,14 @@ class SigmoidFocalLoss(nn.Module): self.alpha = alpha def forward(self, logits, targets): - if torch.cuda.is_available(): - loss = sigmoid_focalloss( - logits, targets, self.num_classes, self.gamma, self.alpha - ) - else: - num_classes = logits.size(1) + 1 - valid = (targets != -1)[:, None] - targets = F.one_hot(targets.abs(), num_classes=num_classes) - targets = targets[:, 1:num_classes] - loss = py_sigmoid_focal_loss( - logits, targets, self.gamma, self.alpha - ) - loss = loss * valid + num_classes = logits.size(1) + 1 + valid = (targets != -1)[:, None] + targets = F.one_hot(targets.abs(), num_classes=num_classes) + targets = targets[:, 1:num_classes] + loss = py_sigmoid_focal_loss( + logits, targets, self.gamma, self.alpha + ) + loss = loss * valid return loss.sum() diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/smooth_l1_loss.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/smooth_l1_loss.py index 4b2399af95..b02718fcc3 100644 --- a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/smooth_l1_loss.py +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/smooth_l1_loss.py @@ -2,24 +2,24 @@ import torch import numpy as np + class SmoothL1Loss(torch.nn.Module): - def __init__(self, beta=1. /9): + def __init__(self, beta=1. / 9): super(SmoothL1Loss, self).__init__() self.beta = beta - def forward(self, input, target, size_average=True): - return smooth_l1_loss(input, target, size_average=size_average) + def forward(self, input_, target, size_average=True): + return smooth_l1_loss(input_, target, size_average=size_average) -# TODO maybe push this to nn? -def smooth_l1_loss(input, target, alpha=0.5, gamma=1.5, beta=1.0, size_average=True): +def smooth_l1_loss(_input, target, alpha=0.5, gamma=1.5, beta=1.0, size_average=True): """ very similar to the smooth_l1_loss from pytorch, but with the extra beta parameter """ - - diff = torch.abs(input - target) - b = np.e**(gamma / alpha) - 1 + + diff = torch.abs(_input - target) + b = np.e ** (gamma / alpha) - 1 cond = diff < beta neg_cond = (~cond) loss = (alpha / b * (b * diff + 1) * torch.log(b * diff / beta + 1) - alpha * diff) * cond.half() diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/backbone/fpn.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/backbone/fpn.py index 0916755c90..d7f719ffc6 100644 --- a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/backbone/fpn.py +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/backbone/fpn.py @@ -78,7 +78,6 @@ class FPN(nn.Module): if len(inner_block): inner_top_down = F.interpolate(last_inner, scale_factor=2, mode="nearest") inner_lateral = getattr(self, inner_block)(feature) - # TODO use size instead of scale to make it robust to different sizes # inner_top_down = F.upsample(last_inner, size=inner_lateral.shape[-2:], # mode='bilinear', align_corners=False) last_inner = inner_lateral + inner_top_down diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/backbone/resnet.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/backbone/resnet.py index 5f129a2a23..324ceb69a4 100644 --- a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/backbone/resnet.py +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/backbone/resnet.py @@ -228,7 +228,6 @@ class BottleneckWithFixedBatchNorm(nn.Module): bias=False, ) self.bn1 = FrozenBatchNorm2d(bottleneck_channels) - # TODO: specify init for the above self.conv2 = Conv2d( bottleneck_channels, diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/box_coder.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/box_coder.py index 46a4acb324..f26c369126 100644 --- a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/box_coder.py +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/modeling/box_coder.py @@ -29,7 +29,7 @@ class BoxCoder(object): proposals (Tensor): boxes to be encoded """ - TO_REMOVE = 1 # TODO remove + TO_REMOVE = 1 ex_widths = proposals[:, 2] - proposals[:, 0] + TO_REMOVE ex_heights = proposals[:, 3] - proposals[:, 1] + TO_REMOVE ex_ctr_x = proposals[:, 0] + 0.5 * ex_widths @@ -61,7 +61,7 @@ class BoxCoder(object): boxes = boxes.to(rel_codes.dtype) - TO_REMOVE = 1 # TODO remove + TO_REMOVE = 1 widths = boxes[:, 2] - boxes[:, 0] + TO_REMOVE heights = boxes[:, 3] - boxes[:, 1] + TO_REMOVE ctr_x = boxes[:, 0] + 0.5 * widths -- Gitee From 6fa313a78b8480f09710cab8b0174fe3148425db Mon Sep 17 00:00:00 2001 From: Savion_G Date: Thu, 21 Apr 2022 18:39:54 +0800 Subject: [PATCH 18/20] fix something --- .../maskrcnn_benchmark/engine/inference.py | 3 +- .../maskrcnn_benchmark/engine/trainer.py | 4 +- .../maskrcnn_benchmark/layers/nms.py | 46 ++++--------------- 3 files changed, 11 insertions(+), 42 deletions(-) diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/engine/inference.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/engine/inference.py index 30b59d8c0c..f0a4059ba9 100644 --- a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/engine/inference.py +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/engine/inference.py @@ -43,7 +43,6 @@ def prepare_for_coco_detection(predictions, dataset): if len(prediction) == 0: continue - # TODO replace with get_img_info? image_width = dataset.coco.imgs[original_id]["width"] image_height = dataset.coco.imgs[original_id]["height"] prediction = prediction.resize((image_width, image_height)) @@ -357,7 +356,7 @@ def inference( ) logger = logging.getLogger("maskrcnn_benchmark.inference") dataset = data_loader.dataset - logger.info("Start evaluation on %d images" % (len(dataset))) + logger.info("Start evaluation on %d images" % len(dataset)) start_time = time.time() predictions = compute_on_dataset(model, data_loader, device) # wait for all processes to complete before measuring the time diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/engine/trainer.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/engine/trainer.py index 92b1b549c0..4b8e90dc52 100644 --- a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/engine/trainer.py +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/engine/trainer.py @@ -115,9 +115,9 @@ def do_train( ) if iteration % checkpoint_period == 0 and iteration > 0: checkpointer.save("model_%07d" % (iteration + 1), **arguments) - checkpointer.save("model_%07d".format(iteration), **arguments) + checkpointer.save("model_%07d" % iteration, **arguments) total_training_time = time.time() - start_training_time total_time_str = str(datetime.timedelta(seconds=total_training_time)) if local_rank == 0: - logger.info("Total training time: %s".format(total_time_str)) + logger.info("Total training time: %s" % total_time_str) logger.info("step_fps: %.4f" % get_fps.mean_fps) diff --git a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/nms.py b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/nms.py index de5e71eb30..b92bece18c 100644 --- a/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/nms.py +++ b/PyTorch/contrib/cv/detection/RetinaMask/maskrcnn_benchmark/layers/nms.py @@ -1,18 +1,12 @@ # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -# from ._utils import _C -# from maskrcnn_benchmark import _C -import time import torch -import numpy as np from typing import List def py_cpu_nms(boxes, scores, thresh): boxes = boxes.cpu() scores = scores.cpu() - start_time = time.time() - """Pure Python NMS baseline.""" x1 = boxes[:, 0] y1 = boxes[:, 1] x2 = boxes[:, 2] @@ -20,41 +14,27 @@ def py_cpu_nms(boxes, scores, thresh): scores = scores areas = (x2 - x1 + 1) * (y2 - y1 + 1) - # 按照从小到大排序后返回下标,然后顺序取反,即从大到小对应的下标 - # order = scores.argsort()[::-1] _, order = torch.sort(scores, descending=True) keep = [] while len(order) > 0: i = order[0] keep.append(i) - # 求交叉面积intersection采用了这个非常巧妙的方法,自己画一下思考一下 xx1 = torch.max(x1[i], x1[order[1:]]) yy1 = torch.max(y1[i], y1[order[1:]]) xx2 = torch.min(x2[i], x2[order[1:]]) yy2 = torch.min(y2[i], y2[order[1:]]) - # print('xy: ', xx1, '\n', yy1, '\n', xx2, '\n', yy2) - - w = torch.max(torch.tensor(0.0), xx2 - xx1 + 1) # 计算w - h = torch.max(torch.tensor(0.0), yy2 - yy1 + 1) # 计算h - inter = w * h # 交叉面积 - # A交B/A并B + w = torch.max(torch.tensor(0.0), xx2 - xx1 + 1) + h = torch.max(torch.tensor(0.0), yy2 - yy1 + 1) + inter = w * h ovr = inter / (areas[i] + areas[order[1:]] - inter) - """ - 保留重叠面积小于threshold的+ - np.where的返回值是tuple - 第一个维度是x的list,第二个维度是y的list - 这里因为输入是1维,因此就取0就好 - """ + inds = torch.where(ovr <= thresh)[0] order = order[inds + 1] res_keep = torch.tensor(keep) - end_time = time.time() - print('nms cost: ', end_time - start_time) - return res_keep @@ -88,14 +68,6 @@ def batched_nms_npu(boxes, scores, iou_threshold): if boxes.numel() == 0: return torch.empty((0,), dtype=torch.int64, device=boxes.device) - ''' - npu_nms_with_mask function detail - box_scores tensor (N,8),N为候选框个数,8为候选框坐标与置信度得分 - iou_threshold float IOU阈值 - selected_boxes tensor (N,5),返回过滤后并排过序的候选框,N为输出候选框个数,5为坐标与置信度得分 - selected_idx tensor 排过序的box在输入box列表中的位置索引 - selected_mask tensor 当前候选框是否可用的标志 - ''' _, _, keep_mask = \ torch.npu_nms_with_mask( torch.cat([boxes, scores[..., None]], 1), iou_threshold) @@ -103,17 +75,15 @@ def batched_nms_npu(boxes, scores, iou_threshold): def batched_nms(boxes, scores, idxs, iou_threshold): - """ - Same as torchvision.ops.boxes.batched_nms, but safer. - """ + assert boxes.shape[-1] == 4 # Investigate after having a fully-cuda NMS op. if len(boxes) < 40000: - return batched_nms_npu(boxes, scores, idxs, iou_threshold) + return batched_nms_npu(boxes, scores, iou_threshold) result_mask = scores.new_zeros(scores.size(), dtype=torch.bool) - for id in torch.jit.annotate(List[int], torch.unique(idxs).cpu().tolist()): - mask = (idxs == id).nonzero().view(-1) + for _id in torch.jit.annotate(List[int], torch.unique(idxs).cpu().tolist()): + mask = (idxs == _id).nonzero().view(-1) keep = nms(boxes[mask], scores[mask], iou_threshold) result_mask[mask[keep]] = True keep = result_mask.nonzero().view(-1) -- Gitee From 3d40496fc2234aabd431fcff1cd6536954ed76b1 Mon Sep 17 00:00:00 2001 From: Savion_G Date: Fri, 13 May 2022 18:15:53 +0800 Subject: [PATCH 19/20] first commit --- .../contrib/cv/detection/RetinaMask/README.md | 262 +++++++ .../cv/detection/RetinaMask/RetinaMask.patch | 692 ++++++++++++++++++ .../cv/detection/RetinaMask/requirements.txt | 9 + .../cv/detection/RetinaMask/test/env_310.sh | 6 + .../detection/RetinaMask/test/infer_om_310.sh | 30 + .../RetinaMask/test/infer_onnx_T4.sh | 13 + .../cv/detection/RetinaMask/test/onnx2om.sh | 16 + .../detection/RetinaMask/test/postprocess.sh | 19 + .../detection/RetinaMask/test/preprocess.sh | 16 + .../cv/detection/RetinaMask/test/pth2onnx.sh | 22 + .../RetinaMask/tools/RetinaMask_eval_onnx.py | 92 +++ .../tools/RetinaMask_postprocess.py | 78 ++ .../RetinaMask/tools/RetinaMask_preprocess.py | 75 ++ .../RetinaMask/tools/RetinaMask_pth2onnx.py | 75 ++ .../detection/RetinaMask/tools/cast_onnx.py | 44 ++ .../cv/detection/RetinaMask/tools/utils.py | 400 ++++++++++ 16 files changed, 1849 insertions(+) create mode 100644 ACL_PyTorch/contrib/cv/detection/RetinaMask/README.md create mode 100644 ACL_PyTorch/contrib/cv/detection/RetinaMask/RetinaMask.patch create mode 100644 ACL_PyTorch/contrib/cv/detection/RetinaMask/requirements.txt create mode 100644 ACL_PyTorch/contrib/cv/detection/RetinaMask/test/env_310.sh create mode 100644 ACL_PyTorch/contrib/cv/detection/RetinaMask/test/infer_om_310.sh create mode 100644 ACL_PyTorch/contrib/cv/detection/RetinaMask/test/infer_onnx_T4.sh create mode 100644 ACL_PyTorch/contrib/cv/detection/RetinaMask/test/onnx2om.sh create mode 100644 ACL_PyTorch/contrib/cv/detection/RetinaMask/test/postprocess.sh create mode 100644 ACL_PyTorch/contrib/cv/detection/RetinaMask/test/preprocess.sh create mode 100644 ACL_PyTorch/contrib/cv/detection/RetinaMask/test/pth2onnx.sh create mode 100644 ACL_PyTorch/contrib/cv/detection/RetinaMask/tools/RetinaMask_eval_onnx.py create mode 100644 ACL_PyTorch/contrib/cv/detection/RetinaMask/tools/RetinaMask_postprocess.py create mode 100644 ACL_PyTorch/contrib/cv/detection/RetinaMask/tools/RetinaMask_preprocess.py create mode 100644 ACL_PyTorch/contrib/cv/detection/RetinaMask/tools/RetinaMask_pth2onnx.py create mode 100644 ACL_PyTorch/contrib/cv/detection/RetinaMask/tools/cast_onnx.py create mode 100644 ACL_PyTorch/contrib/cv/detection/RetinaMask/tools/utils.py diff --git a/ACL_PyTorch/contrib/cv/detection/RetinaMask/README.md b/ACL_PyTorch/contrib/cv/detection/RetinaMask/README.md new file mode 100644 index 0000000000..5b65b4fba5 --- /dev/null +++ b/ACL_PyTorch/contrib/cv/detection/RetinaMask/README.md @@ -0,0 +1,262 @@ +# RetinaMask推理指导 +- 1 [模型地址](#1-模型地址) +- [2 环境依赖](#2-环境依赖) + - [2.1 深度学习框架](#21-深度学习框架) + - [2.2 python第三方库](#22-python第三方库) +- [3 模型转换](#3-模型转换) + - [3.1 pth转onnx模型](#31-pth转onnx模型) + - [3.2 onnx转om模型](#32-onnx转om模型) +- [4 数据集预处理](#4-数据集预处理) +- [5 离线推理](#5-离线推理) + - [5.1 benchmark工具概述](#51-benchmark工具概述) + - [5.2 离线推理](#52-离线推理) +- [6 精度对比](#6-精度对比) + - [6.1 om离线推理精度统计](#61-om离线推理精度统计) + - [6.2 onnx精度](#62-onnx精度) + - [6.3 精度对比](#63-精度对比) +- [7 性能对比](#7-性能对比) + - [7.1 310性能数据](#71-310性能数据) + - [7.2 T4性能数据](#72-T4性能数据) + - [7.3 性能对比](#73-性能对比) + + + +## 1 模型地址 + +- GPU 代码地址 [GitHub - chengyangfu/retinamask: RetinaMask](https://github.com/chengyangfu/retinamask) + +- NPU 代码地址 [PyTorch/contrib/cv/detection/RetinaMask · Ascend/ModelZoo-PyTorch - 码云 - 开源中国 (gitee.com)](https://gitee.com/ascend/modelzoo/tree/master/built-in/PyTorch/Official/cv/image_object_detection/Faster_Mask_RCNN_for_PyTorch) + + + +## 2 环境依赖 + +### 2.1 深度学习框架 +``` +torch==1.6.0 +torchvision==0.7.0 +onnx==1.8.1 +``` + +### 2.2 python第三方库 + +``` +matplotlib==3.5.2 +onnx-simplifier==0.3.9 +opencv-python==4.5.5.64 +pycocotools==2.0 +tqdm==4.64.0 +yacs==0.1.8 + +pip install -r ./requirements.txt +``` + + + +## 3 模型转换 + +### 3.1 pth转onnx模型 + +- 通过打补丁的方式修改`RetinaMask`: + +```shell +git apply ./RetinaMask.patch +``` + +- 主要修改说明: + +> 1.更改原版输入格式`boxlist`为正常的`tensor`,各处对应修改 +> +> 2.`onnx`不支持`nms`,去除`nms`操作 +> +> 3.减小计算压力,`rpn`的`topK`输出结果从4000个降低至1000个 +> +> 4.`torch.npu_roi_align`不支持转换`onnx`,用`torchvision.ops.roialign`替换。另外与`torch.npu_roi_align`保持一致,`aligned=False` + +- 获取`pth`权重: + +使用的是`retinamask`在`8p 910`训练`20000steps`的权重`npu_8P_model_0020001.pth` + +- `pth`转`onnx`模型: + +```shell +bash ./test/pth2onnx.sh --cfg_path="./configs/retina/retinanet_mask_R-50-FPN_2x_adjust_std011_ms.yaml" --pth_path="./npu_8P_model_0020001.pth" --onnx_path="./weights/npu_8P_model_0020001_bs1_sim.onnx" --save_dir="./weights" +``` +### 3.2 onnx转om模型 + +- `onnx`修改说明: + +> `Concat_742`节点的`input_0`与`input_1`在转换`om`的过程中,被转换为`float16`与`float32`,引起报错。因此将`onnx`模型手动插入`cast`节点,均转换至`float16`以避免问题 + +- `onnx`转`om`模型 + +```shell +bash ./test/onnx2om.sh --input_shape="input:1,3,1344,1344" --onnx_path="./weights/npu_8P_model_0020001_bs1_sim_cast.onnx" --om_path="./weights/npu_8P_model_0020001_bs1" +``` + + +## 4 数据集预处理 + +- 数据集使用[COCO官网](https://cocodataset.org/#download)的`coco2017`的`5000`张验证集进行测试,图片与标签分别存放在`/opt/npu/coco/val2017/`与`/opt/npu/coco/annotations/instances_val2017.json` + + +- 数据集预处理,生成`bin`与`info` + +```shell +bash ./test/preprocess.sh --image_src_path="/opt/npu/coco/val2017/" --bin_file_path="./bins/" --bin_info_name="retinamask_coco2017.info" +``` + + + +## 5 离线推理 + +### 5.1 benchmark工具概述 + +`benchmark`工具为华为自研的模型推理工具,支持多种模型的离线推理,能够迅速统计出模型在`Ascend310`上的性能,支持真实数据和纯推理两种模式,配合后处理脚本,可以实现诸多模型的端到端过程,获取工具及使用方法可以参考[CANN V100R020C10 推理benchmark工具用户指南 01](https://support.huawei.com/enterprise/zh/doc/EDOC1100164874?idPath=23710424%7C251366513%7C22892968%7C251168373) + +### 5.2 离线推理 +- ln -s benckmark.x86: + +```shell +ln -s path_to_benchmark.x86_64 ./ +``` +- 执行离线推理 + +```shell +bash ./test/infer_om_310.sh --device_id=0 --batch_size=1 --input_shape=1344 --om_path="./weights/npu_8P_model_0020001_bs1.om" --input_text_path="./retinamask_coco2017.info" --coco_path="/opt/npu/coco" --result_path="./result" +``` +- 输出结果默认保存在当前目录`result/dumpOutput_device0`,模型有四个输出,每个输入对应的输出对应四个`bin`文件 + +| outputs | shape | dtype | info | +| :-------- | ------------------ | ------- | ------ | +| xxx_1.bin | 1000 * 4 | float32 | bboxes | +| xxx_2.bin | 1000 | int32 | labels | +| xxx_3.bin | 1000 | float32 | scores | +| xxx_4.bin | 1000 * 1 * 28 * 28 | float32 | masks | + + + +## 6 精度对比 + +### 6.1 om离线推理精度统计 + +- 后处理统计`map`精度 + +```shell +bash ./test/postprocess.sh --device_id=0 --input_text_path="./retinamask_coco2017.info" --coco_path="/opt/npu/coco" --result_path="./result" +``` +```shell +bbox_info: + Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.279 + Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.443 + Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.296 + Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.144 + Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.312 + Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.347 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.266 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.421 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.435 + Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.249 + Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.468 + Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.565 + +segm_info: + Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.248 + Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.415 + Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.259 + Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.120 + Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.284 + Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.318 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.240 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.376 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.388 + Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.212 + Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.429 + Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.508 +``` + +### 6.2 npu推理精度 + +- 相关更改说明: + +> 为提升运行速度,npu代码的输入`shape`分为`(1344,960)`与`(960,1344)`两种情况。转换模型时为简便处理,统一使用`(1,3,1344,1344)`的输入`shape`,并统一预处理方法。 + +- 测试`pth`精度 + +```python +python tools/test_net.py +``` + +```shell +Evaluate annotation type *bbox* +DONE (t=39.81s). +Accumulating evaluation results... +DONE (t=11.35s). + Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.271 + Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.429 + Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.288 + Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.126 + Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.294 + Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.357 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.261 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.411 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.423 + Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.239 + Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.456 + Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.538 + +Evaluate annotation type *segm* +DONE (t=42.63s). +Accumulating evaluation results... +DONE (t=11.29s). + Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.252 + Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.414 + Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.264 + Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.107 + Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.278 + Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.340 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.243 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.376 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.387 + Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.205 + Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.424 + Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.503 +``` +### 6.3 精度对比 +`npu`推理精度`bbox: 0.271`,`segm:0.252`,`om`推理精度`bbox:0.279`,`segm:0.248`,精度下降在1个点以内,精度达标 + + + +## 7 性能对比 + +### 7.1 310性能数据 + +确保`device`空闲,`benchmark.x86_64`输出结果如下: + +``` +[e2e] throughputRate: 0.674726, latency: 7.41041e+06 +[data read] throughputRate: 0.709269, moduleLatency: 1409.9 +[preprocess] throughputRate: 0.694304, moduleLatency: 1440.29 +[infer] throughputRate: 0.675258, Interface throughputRate: 0.69984, moduleLatency: 1478.81 +[post] throughputRate: 0.675257, moduleLatency: 1480.92 +``` +`retinamask`不支持多`batch` + + +### 7.2 T4性能数据 + +`onnx`包含`nms`与`roialign`算子,因此不能使用`TensorRT`测试性能数据,故在`T4`机器上使用`onnx`在线推理测试性能数据: + +```shell +bash ./test/infer_onnx_T4.sh --weight_path="./weights/npu_8P_model_0020001_bs1_sim.onnx" --coco_path="/data/savion/Datasets/COCO2017/coco" +``` + +`T4`性能: + +```shell +FPS: 1.2889 +``` +### 7.3 性能对比 +`310`单卡4个`device`,`benchmark`测试的是一个`device`。`T4`一个设备相当于4个`device`,测试的是整个设备。 + +对比性能,`0.69984 * 4 > 1.2889`,`npu`性能超过`T4` 。 diff --git a/ACL_PyTorch/contrib/cv/detection/RetinaMask/RetinaMask.patch b/ACL_PyTorch/contrib/cv/detection/RetinaMask/RetinaMask.patch new file mode 100644 index 0000000000..78973d4a6c --- /dev/null +++ b/ACL_PyTorch/contrib/cv/detection/RetinaMask/RetinaMask.patch @@ -0,0 +1,692 @@ +diff --git a/README.md b/README.md +deleted file mode 100644 +index e6474ac..0000000 +--- a/README.md ++++ /dev/null +@@ -1,78 +0,0 @@ +-## Before running +- +-- install numactl: +- +-``` +-apt-get install numactl # for Ubuntu +-yum install numactl # for CentOS +-``` +- +-- get R-50.pkl: +- +-``` +-mkdir -p /root/.torch/models/ +-wget https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl +-mv R-50.pkl /root/.torch/models/ +-``` +- +-- ln -s dataset: +- +-``` +-mkdir ./dataset +-ln -snf path_to_coco ./dataset/coco +-``` +- +-- other requirements: +- +-``` +-pip3 install torchvision==0.2.1 +- +-# other recommended requirements +-apex==0.1+ascend.20220315 +-torch==1.5.0+ascend.post5.20220315 +-``` +- +-- source env and build: +- +-``` +-source test/env_npu.sh +-``` +- +- +- +-## Running +- +-- To train: +- +-``` +-# 1p train full +-bash test/train_full_1p.sh --data_path=./dataset/ +- +-# 1p train perf +-bash test/train_performance_1p.sh --data_path=./dataset/ +- +-# 8p train full +-bash test/train_full_8p.sh --data_path=./dataset/ +- +-# 8p train perf +-bash test/train_performance_8p.sh --data_path=./dataset/ +-``` +- +-- To evaluate: +- +-``` +-bash test/train_eval_1p.sh --data_path=./dataset/ --weight_path=./model_0044999.pth # for example +-``` +- +- +- +-## Result +- +-1p batch_size == 8,8p batch_size == 64 +- +-| NAME | Steps | BBOX-MAP | SEGM-MAP | FPS | +-| :----: | :----: | :------: | :------: | :--: | +-| GPU-1p | 360000 | - | - | 8.7 | +-| GPU-8p | 20000 | 29.0 | 25.7 | 55.1 | +-| NPU-1p | 400 | - | - | 4.6 | +-| NPU-8p | 20000 | 28.8 | 25.7 | 34.8 | +diff --git a/maskrcnn_benchmark/config/defaults.py b/maskrcnn_benchmark/config/defaults.py +index 57db416..3c279c8 100644 +--- a/maskrcnn_benchmark/config/defaults.py ++++ b/maskrcnn_benchmark/config/defaults.py +@@ -36,7 +36,7 @@ _C.MODEL.WEIGHT = "" + # ----------------------------------------------------------------------------- + _C.INPUT = CN() + # Size of the fixed shape +-_C.INPUT.FIX_SHAPE = (1344, 1344) ++_C.INPUT.FIX_SHAPE = 1344 + # Size of the smallest side of the image during training + _C.INPUT.MIN_SIZE_TRAIN = (800,) # 800 + # Maximum size of the side of the image during training +diff --git a/maskrcnn_benchmark/layers/npu_roi_align.py b/maskrcnn_benchmark/layers/npu_roi_align.py +index 90d3168..91ffd6f 100644 +--- a/maskrcnn_benchmark/layers/npu_roi_align.py ++++ b/maskrcnn_benchmark/layers/npu_roi_align.py +@@ -54,12 +54,41 @@ class _ROIAlign(Function): + return grad_input, None, None, None, None, None + + +-roi_align = _ROIAlign.apply ++class RoiExtractor(torch.autograd.Function): ++ @staticmethod ++ def forward(self, feats, rois, aligned=0, finest_scale=56, pooled_height=14, pooled_width=14, ++ pool_mode='avg', roi_scale_factor=0, sample_num=0, spatial_scale=[0.125, ]): ++ """ ++ feats (torch.Tensor): feats in shape (batch, 256, H, W). ++ rois (torch.Tensor): rois in shape (k, 5). ++ return: ++ roi_feats (torch.Tensor): (k, 256, pooled_width, pooled_width) ++ """ ++ ++ # phony implementation for shape inference ++ k = rois.shape[0] ++ roi_feats = torch.ones(k, 256, pooled_height, pooled_width) ++ return roi_feats ++ ++ @staticmethod ++ def symbolic(g, feats, rois, aligned=0, finest_scale=56, pooled_height=14, pooled_width=14): ++ # TODO: support tensor list type for feats ++ # f_tensors = sym_help._unpack_list(feats) ++ roi_feats = g.op('RoiExtractor', feats, rois, aligned_i=0, finest_scale_i=56, ++ pooled_height_i=pooled_height, pooled_width_i=pooled_width, ++ pool_mode_s='avg', roi_scale_factor_i=0, sample_num_i=0, ++ spatial_scale_f=[0.125, ], outputs=1) ++ return roi_feats ++ ++ ++from torchvision.ops import roi_align ++ ++roi_align_ = roi_align + + + # NOTE: torchvision's RoIAlign has a different default aligned=False + class ROIAlign(nn.Module): +- def __init__(self, output_size, spatial_scale, sampling_ratio, aligned=True): ++ def __init__(self, output_size, spatial_scale, sampling_ratio, aligned=False): + """ROIAlign using npu api. + + Origin implement from detectron2 is +@@ -108,10 +137,19 @@ class ROIAlign(nn.Module): + rois: Bx5 boxes. First column is the index into N. The other 4 columns are xyxy. + """ + assert rois.dim() == 2 and rois.size(1) == 5 +- return roi_align( +- input_tensor.float(), rois, self.output_size, +- self.spatial_scale, self.sampling_ratio, self.aligned +- ) ++ ++ # if torch.onnx.is_in_onnx_export(): ++ # return RoiExtractor.apply(input_tensor.float(), rois, 0, 56, self.output_size[0], self.output_size[1]) ++ # else: ++ # return roi_align_( ++ # input_tensor.float(), rois, self.output_size, ++ # self.spatial_scale, self.sampling_ratio, self.aligned ++ # ) ++ ++ res = roi_align_(input_tensor.float(), rois, self.output_size, self.spatial_scale, self.sampling_ratio, ++ self.aligned) ++ ++ return res + + def __repr__(self): + tmpstr = self.__class__.__name__ + "(" +diff --git a/maskrcnn_benchmark/modeling/box_coder.py b/maskrcnn_benchmark/modeling/box_coder.py +index f26c369..7d2f07f 100644 +--- a/maskrcnn_benchmark/modeling/box_coder.py ++++ b/maskrcnn_benchmark/modeling/box_coder.py +@@ -68,10 +68,10 @@ class BoxCoder(object): + ctr_y = boxes[:, 1] + 0.5 * heights + + wx, wy, ww, wh = self.weights +- dx = rel_codes[:, 0::4] / wx +- dy = rel_codes[:, 1::4] / wy +- dw = rel_codes[:, 2::4] / ww +- dh = rel_codes[:, 3::4] / wh ++ dx = torch.true_divide(rel_codes[:, 0::4], wx) ++ dy = torch.true_divide(rel_codes[:, 1::4], wy) ++ dw = torch.true_divide(rel_codes[:, 2::4], ww) ++ dh = torch.true_divide(rel_codes[:, 3::4], wh) + + # Prevent sending too large values into torch.exp() + dw = torch.clamp(dw, max=self.bbox_xform_clip) +@@ -82,14 +82,10 @@ class BoxCoder(object): + pred_w = torch.exp(dw) * widths[:, None] + pred_h = torch.exp(dh) * heights[:, None] + +- pred_boxes = torch.zeros_like(rel_codes) +- # x1 +- pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w +- # y1 +- pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h +- # x2 (note: "- 1" is correct; don't be fooled by the asymmetry) +- pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w - 1 +- # y2 (note: "- 1" is correct; don't be fooled by the asymmetry) +- pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h - 1 ++ tmp1 = pred_ctr_x - 0.5 * pred_w ++ tmp2 = pred_ctr_y - 0.5 * pred_h ++ tmp3 = pred_ctr_x + 0.5 * pred_w - 1 ++ tmp4 = pred_ctr_y + 0.5 * pred_h - 1 ++ pred_boxes = torch.cat((tmp1, tmp2, tmp3, tmp4), dim=1) + + return pred_boxes +diff --git a/maskrcnn_benchmark/modeling/detector/retinanet.py b/maskrcnn_benchmark/modeling/detector/retinanet.py +index 4f2fa7f..1ab47c8 100644 +--- a/maskrcnn_benchmark/modeling/detector/retinanet.py ++++ b/maskrcnn_benchmark/modeling/detector/retinanet.py +@@ -48,15 +48,15 @@ class RetinaNet(nn.Module): + """ + if self.training and targets is None: + raise ValueError("In training mode, targets should be passed") +- images = to_image_list(images) +- features = self.backbone(images.tensors) ++ features = self.backbone(images) + + # Retina RPN Output + rpn_features = features + if self.cfg.RETINANET.BACKBONE == "p2p7": + rpn_features = features[1:] + +- (anchors, detections), detector_losses = self.rpn(images, rpn_features, targets) ++ image_sizes = [images.shape[-2:] for _ in range(images.shape[0])] ++ (anchors, detections), detector_losses = self.rpn(image_sizes, rpn_features) + + if self.training: + losses = {} +@@ -89,25 +89,6 @@ class RetinaNet(nn.Module): + return losses + else: + if self.mask: +- proposals = [] +- for image_detections in detections: +- num_of_detections = image_detections.bbox.shape[0] +- if num_of_detections > self.cfg.RETINANET.NUM_MASKS_TEST > 0: +- cls_scores = image_detections.get_field("scores") +- cls_scores = cls_scores.type(torch.float32) +- _, keep = torch.topk( +- cls_scores, +- self.cfg.RETINANET.NUM_MASKS_TEST, +- largest=True +- ) +- image_detections = image_detections[keep] +- +- proposals.append(image_detections) +- +- if self.cfg.MODEL.SPARSE_MASK_ON: +- x, detections, mask_losses = self.mask( +- features, proposals, targets +- ) +- else: +- x, detections, mask_losses = self.mask(features, proposals, targets) ++ detections, masks = self.mask(features, detections) ++ return detections, masks + return detections +diff --git a/maskrcnn_benchmark/modeling/poolers.py b/maskrcnn_benchmark/modeling/poolers.py +index bed784d..8c76d62 100644 +--- a/maskrcnn_benchmark/modeling/poolers.py ++++ b/maskrcnn_benchmark/modeling/poolers.py +@@ -1,6 +1,8 @@ + # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. + import torch + ++import numpy as np ++ + from torch import nn + from maskrcnn_benchmark.layers import ROIAlign + from .utils import cat +@@ -26,18 +28,19 @@ class LevelMapper(object): + self.lvl0 = canonical_level + self.eps = eps + +- def __call__(self, boxlists): ++ def __call__(self, bboxes): + """ + Arguments: + boxlists (list[BoxList]) + """ + # Compute level ids +- s = torch.sqrt(cat([boxlist.area() for boxlist in boxlists])) ++ s = torch.sqrt(cat([(bbox[0][:, 2] - bbox[0][:, 0] + 1) * (bbox[0][:, 3] - bbox[0][:, 1] + 1) ++ for bbox in bboxes])) + + # Eqn.(1) in FPN paper + target_lvls = torch.floor(self.lvl0 + torch.log2(s / self.s0 + self.eps)) + target_lvls = torch.clamp(target_lvls, min=self.k_min, max=self.k_max) +- return target_lvls.to(torch.int64) - self.k_min ++ return target_lvls - self.k_min + + + class Pooler(nn.Module): +@@ -76,16 +79,18 @@ class Pooler(nn.Module): + ) + + def convert_to_roi_format(self, boxes): +- concat_boxes = cat([b.bbox for b in boxes], dim=0) ++ num_box = [b[0].shape[0] for b in boxes] ++ concat_boxes = cat([b[0] for b in boxes], dim=0) + device, dtype = concat_boxes.device, concat_boxes.dtype + ids = cat( + [ +- torch.full((len(b), 1), i, dtype=dtype, device=device) ++ torch.full((num_box[i], 1), i, dtype=dtype, device=device) + for i, b in enumerate(boxes) + ], + dim=0, + ) + rois = torch.cat([ids, concat_boxes], dim=1) ++ + return rois + + def forward(self, x, boxes): +@@ -104,7 +109,7 @@ class Pooler(nn.Module): + + levels = self.map_levels(boxes) + +- num_rois = len(rois) ++ num_rois = rois.shape[0] + num_channels = x[0].shape[1] + output_size = self.output_size[0] + +@@ -117,14 +122,9 @@ class Pooler(nn.Module): + + for level, (per_level_feature, pooler) in enumerate(zip(x, self.poolers)): + idx_in_level = levels == level +- +- rois_per_level = rois[idx_in_level] +- +- num_rois_per_level = len(rois_per_level) +- max_len = len(rois) +- fix_shape_rois = rois_per_level.new_zeros([max_len, 5]) +- fix_shape_rois[:num_rois_per_level] = rois_per_level +- fix_shape_res = pooler(per_level_feature, fix_shape_rois) +- result[idx_in_level] = fix_shape_res[:num_rois_per_level] ++ pooler_res = pooler(per_level_feature, rois) ++ idx_in_level = idx_in_level[:, None, None, None] ++ res = torch.mul(idx_in_level, pooler_res) ++ result += res + + return result +diff --git a/maskrcnn_benchmark/modeling/roi_heads/mask_head/inference.py b/maskrcnn_benchmark/modeling/roi_heads/mask_head/inference.py +index f97f992..325cb99 100644 +--- a/maskrcnn_benchmark/modeling/roi_heads/mask_head/inference.py ++++ b/maskrcnn_benchmark/modeling/roi_heads/mask_head/inference.py +@@ -4,6 +4,7 @@ import torch + from PIL import Image + from torch import nn + ++from maskrcnn_benchmark.modeling.utils import cat + from maskrcnn_benchmark.structures.bounding_box import BoxList + + +@@ -38,26 +39,18 @@ class MaskPostProcessor(nn.Module): + + # select masks coresponding to the predicted classes + num_masks = x.shape[0] +- labels = [bbox.get_field("labels") for bbox in boxes] +- labels = torch.cat(labels) ++ labels = [bbox[1] for bbox in boxes] ++ labels = cat(labels) + index = torch.arange(num_masks, device=labels.device) + mask_prob = mask_prob[index.long(), labels.long()][:, None] + + if self.masker: + mask_prob = self.masker(mask_prob, boxes) + +- boxes_per_image = [len(box) for box in boxes] +- mask_prob = mask_prob.split(boxes_per_image, dim=0) ++ # boxes_per_image = [len(box[0]) for box in boxes] ++ # mask_prob = mask_prob.split(boxes_per_image, dim=0) + +- results = [] +- for prob, box in zip(mask_prob, boxes): +- bbox = BoxList(box.bbox, box.size, mode="xyxy") +- for field in box.fields(): +- bbox.add_field(field, box.get_field(field)) +- bbox.add_field("mask", prob) +- results.append(bbox) +- +- return results ++ return boxes, mask_prob + + + class MaskPostProcessorCOCOFormat(MaskPostProcessor): +diff --git a/maskrcnn_benchmark/modeling/roi_heads/mask_head/mask_head.py b/maskrcnn_benchmark/modeling/roi_heads/mask_head/mask_head.py +index 6089195..3966ae7 100644 +--- a/maskrcnn_benchmark/modeling/roi_heads/mask_head/mask_head.py ++++ b/maskrcnn_benchmark/modeling/roi_heads/mask_head/mask_head.py +@@ -1,6 +1,7 @@ + # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. + import math + import torch ++from torch.nn import functional as F + from maskrcnn_benchmark.structures.bounding_box import BoxList + + from .roi_mask_feature_extractors import make_roi_mask_feature_extractor +@@ -34,22 +35,23 @@ def keep_only_positive_boxes(boxes): + + + def extra_proposals(proposals): ++ outputs = [] + for proposal in proposals: +- cur_count = len(proposal) +- boxes = proposal.bbox +- labels = proposal.get_field('labels') ++ cur_count = proposal[1].shape[0] ++ boxes = proposal[0] ++ labels = proposal[1] ++ scores = proposal[2] + + box_count = 180 + if cur_count > box_count: + box_count = int(math.ceil(cur_count / 45)) * 45 +- new_boxes = boxes.new_zeros((box_count, 4), dtype=torch.float) +- new_labels = boxes.new_full((box_count,), fill_value=-1, dtype=torch.int) +- new_boxes[:cur_count] = boxes +- new_labels[:cur_count] = labels ++ pad_diff = box_count - cur_count ++ new_boxes = F.pad(boxes, (0, 0, 0, pad_diff), value=0) ++ new_labels = F.pad(labels, (0, pad_diff), value=-1) ++ new_scores = F.pad(scores, (0, pad_diff), value=0) + +- proposal.bbox = new_boxes +- proposal.add_field('labels', new_labels) +- return proposals ++ outputs.append([new_boxes, new_labels, new_scores]) ++ return outputs + + + class ROIMaskHead(torch.nn.Module): +@@ -80,7 +82,7 @@ class ROIMaskHead(torch.nn.Module): + if self.training: + # during training, only focus on positive boxes + all_proposals = proposals +- proposals = extra_proposals(proposals) ++ # proposals = extra_proposals(proposals) + + if self.training and self.cfg.MODEL.ROI_MASK_HEAD.SHARE_BOX_FEATURE_EXTRACTOR: + x = features +@@ -92,7 +94,7 @@ class ROIMaskHead(torch.nn.Module): + + if not self.training: + result = self.post_processor(mask_logits, proposals) +- return x, result, {} ++ return result + + loss_mask = self.loss_evaluator(proposals, mask_logits, targets) + +diff --git a/maskrcnn_benchmark/modeling/rpn/anchor_generator.py b/maskrcnn_benchmark/modeling/rpn/anchor_generator.py +index ef09c13..014e25f 100644 +--- a/maskrcnn_benchmark/modeling/rpn/anchor_generator.py ++++ b/maskrcnn_benchmark/modeling/rpn/anchor_generator.py +@@ -108,19 +108,14 @@ class AnchorGenerator(nn.Module): + inds_inside = torch.ones(anchors.shape[0], dtype=torch.uint8, device=device) + boxlist.add_field("visibility", inds_inside) + +- def forward(self, image_list, feature_maps): +- grid_height, grid_width = feature_maps[0].shape[-2:] ++ def forward(self, image_sizes, feature_maps): + grid_sizes = [feature_map.shape[-2:] for feature_map in feature_maps] + anchors_over_all_feature_maps = self.grid_anchors(grid_sizes) + anchors = [] +- for i, (image_height, image_width) in enumerate(image_list.image_sizes): ++ for i, (image_height, image_width) in enumerate(image_sizes): + anchors_in_image = [] + for anchors_per_feature_map in anchors_over_all_feature_maps: +- boxlist = BoxList( +- anchors_per_feature_map, (image_width, image_height), mode="xyxy" +- ) +- self.add_visibility_to(boxlist) +- anchors_in_image.append(boxlist) ++ anchors_in_image.append(anchors_per_feature_map) + anchors.append(anchors_in_image) + return anchors + +diff --git a/maskrcnn_benchmark/modeling/rpn/retinanet.py b/maskrcnn_benchmark/modeling/rpn/retinanet.py +index b5c17df..b846895 100644 +--- a/maskrcnn_benchmark/modeling/rpn/retinanet.py ++++ b/maskrcnn_benchmark/modeling/rpn/retinanet.py +@@ -186,10 +186,10 @@ class RetinaNetModule(torch.nn.Module): + + def _forward_test(self, anchors, box_cls, box_regression): + N = int(box_cls[0].size(0)) +- A = int(box_regression[0].size(1) / 4) +- C = int(box_cls[0].size(1) / A) ++ A = torch.floor_divide(box_regression[0].size(1), 4) ++ C = torch.floor_divide(box_cls[0].size(1), A) + anchors_size = [anchor_list[0].size for anchor_list in anchors] +- anchors_bbox = [[anchor.bbox for anchor in anchor_list] for anchor_list in anchors] ++ anchors_bbox = [[anchor for anchor in anchor_list] for anchor_list in anchors] + anchors_per_img = [torch.cat(anchor_list, 0) for anchor_list in anchors_bbox] + + box_cls = self.permute_and_concat(box_cls, C) +diff --git a/maskrcnn_benchmark/modeling/rpn/retinanet_infer.py b/maskrcnn_benchmark/modeling/rpn/retinanet_infer.py +index 99932ec..5d19482 100644 +--- a/maskrcnn_benchmark/modeling/rpn/retinanet_infer.py ++++ b/maskrcnn_benchmark/modeling/rpn/retinanet_infer.py +@@ -1,51 +1,6 @@ + import torch + + from maskrcnn_benchmark.modeling.box_coder import BoxCoder +-from maskrcnn_benchmark.structures.bounding_box import BoxList +-from maskrcnn_benchmark.structures.boxlist_ops import remove_small_boxes +- +- +-def batched_nms(boxes, scores, max_output_size, iou_threshold, scores_threshold): +- """ +- Performs non-maximum suppression in a batched fashion. +- +- Each index value correspond to a category, and NMS +- will not be applied between elements of different categories. +- +- Parameters +- ---------- +- boxes : Tensor[N, 4] +- boxes where NMS will be performed. They +- are expected to be in (x1, y1, x2, y2) format +- scores : Tensor[N] +- scores for each one of the boxes +- idxs : Tensor[N] +- indices of the categories for each one of the boxes. +- iou_threshold : float +- discards all overlapping boxes +- with IoU > iou_threshold +- +- Returns +- ------- +- keep : Tensor +- int64 tensor with the indices of +- the elements that have been kept by NMS, sorted +- in decreasing order of scores +- """ +- num_classes = scores.size(1) +- num_boxes = scores.size(0) +- multi_bboxes = boxes.reshape(1, num_boxes, -1, 4) +- multi_scores = scores.reshape(1, num_boxes, num_classes) +- nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_num = torch.npu_batch_nms(multi_bboxes.half(), multi_scores.half(), +- scores_threshold, +- iou_threshold, max_output_size, +- max_output_size) +- nmsed_boxes = nmsed_boxes.reshape(nmsed_boxes.shape[1:]) +- nmsed_scores = nmsed_scores.reshape(nmsed_scores.shape[1]) +- nmsed_classes = nmsed_classes.reshape(nmsed_classes.shape[1]) +- nmsed_num = nmsed_num.item() +- +- return nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_num + + + class RetinaNetPostProcessor(torch.nn.Module): +@@ -83,74 +38,6 @@ class RetinaNetPostProcessor(torch.nn.Module): + box_coder = BoxCoder(weights=(10., 10., 5., 5.)) + self.box_coder = box_coder + +- def forward_for_single_feature_map(self, anchors, box_cls, box_regression, +- pre_nms_thresh): +- """ +- Arguments: +- anchors: list[BoxList] +- box_cls: tensor of size N, A * C, H, W +- box_regression: tensor of size N, A * 4, H, W +- """ +- device = box_cls.device +- N, _, H, W = box_cls.shape +- A = int(box_regression.size(1) / 4) +- C = int(box_cls.size(1) / A) +- +- # put in the same format as anchors +- box_cls = box_cls.permute(0, 2, 3, 1) +- box_cls = box_cls.reshape(N, -1, C) +- box_cls = box_cls.sigmoid().cpu().float() +- +- box_regression = box_regression.permute(0, 2, 3, 1) +- box_regression = box_regression.reshape(N, -1, 4).cpu().float() +- +- num_anchors = A * H * W +- +- results = [[] for _ in range(N)] +- candidate_inds = box_cls > pre_nms_thresh +- if candidate_inds.sum().item() == 0: +- empty_boxlists = [] +- for a in anchors: +- empty_boxlist = BoxList(torch.zeros(1, 4).cpu().float(), a.size) +- empty_boxlist.add_field( +- "labels", torch.LongTensor([-1]).cpu()) +- empty_boxlist.add_field( +- "scores", torch.Tensor([0]).cpu().float()) +- empty_boxlists.append(empty_boxlist) +- return empty_boxlists +- +- pre_nms_top_n = candidate_inds.reshape(N, -1).sum(1) +- pre_nms_top_n = pre_nms_top_n.clamp(max=self.pre_nms_top_n) +- +- for batch_idx, (per_box_cls, per_box_regression, per_pre_nms_top_n, +- per_candidate_inds, per_anchors) in enumerate( +- zip(box_cls, box_regression, pre_nms_top_n, candidate_inds, anchors)): +- # Sort and select TopN +- per_box_cls = per_box_cls[per_candidate_inds] +- per_box_cls, top_k_indices = \ +- per_box_cls.topk(per_pre_nms_top_n, sorted=False) +- +- per_candidate_nonzeros = \ +- per_candidate_inds.nonzero()[top_k_indices, :] +- +- per_box_loc = per_candidate_nonzeros[:, 0] +- per_class = per_candidate_nonzeros[:, 1] +- per_class += 1 +- +- detections = self.box_coder.decode_cpu( +- per_box_regression[per_box_loc, :].view(-1, 4), +- per_anchors.bbox[per_box_loc, :].view(-1, 4) +- ) +- +- boxlist = BoxList(detections, per_anchors.size, mode="xyxy") +- boxlist.add_field("labels", per_class) +- boxlist.add_field("scores", per_box_cls) +- boxlist = boxlist.clip_to_image(remove_empty=False) +- boxlist = remove_small_boxes(boxlist, self.min_size) +- results[batch_idx] = boxlist +- +- return results +- + def forward(self, anchors_per_img, box_cls, box_regression, anchors_size, N, C, targets=None): + """ + Arguments: +@@ -164,7 +51,7 @@ class RetinaNetPostProcessor(torch.nn.Module): + """ + device = box_cls.device + box_cls = box_cls.sigmoid() +- k = self.pre_nms_top_n * 2 ++ k = self.pre_nms_top_n # * 4 + results = [] + for i in range(N): + cls_scores = box_cls[i] +@@ -176,10 +63,11 @@ class RetinaNetPostProcessor(torch.nn.Module): + achrs.view(-1, 4) + ) + if not self.training: +- k = k * 2 + scores, topk_inds = torch.topk(cls_scores.flatten(), k=k, largest=True) +- labels = topk_inds % C +- topk_inds = topk_inds // C ++ C = torch.tensor(C, dtype=torch.int32) ++ labels = topk_inds.int() % C ++ topk_inds = torch.floor_divide(topk_inds.int(), C).long() ++ labels = labels.int() + bboxes = bboxes[topk_inds] + else: + max_scores, labels = torch.max(cls_scores, 1) +@@ -188,31 +76,17 @@ class RetinaNetPostProcessor(torch.nn.Module): + scores = topk_scores + labels = labels[topk_inds] + if labels.numel() == 0: +- result = BoxList(bboxes.new_ones([1, 4]), anchor_size, mode="xyxy") +- result.add_field("scores", bboxes.new_zeros([1, ])) +- result.add_field("labels", bboxes.new_full((1,), -1, dtype=torch.long)) ++ result_boxes = bboxes.new_ones([1, 4]).to(device) ++ result_scores = bboxes.new_zeros([1, ]).to(device) ++ result_labels = (bboxes.new_ones((1,), dtype=torch.int32) * -1).to(device) + else: +- multi_scores = scores.new_zeros([k, C]) +- multi_bboxes = bboxes.new_zeros([k, 4]) + k = min(k, labels.numel()) +- multi_bboxes[:k] = bboxes[:k] +- indices = torch.arange(0, k).to(device) +- multi_scores[indices, labels[:k]] = scores[:k] +- +- nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_num = batched_nms(multi_bboxes, multi_scores, +- self.fpn_post_nms_top_n, +- iou_threshold=self.nms_thresh, +- scores_threshold=self.pre_nms_thresh) +- nmsed_classes = nmsed_classes + 1 +- result = BoxList(nmsed_boxes, anchor_size, mode="xyxy") +- result.add_field("scores", nmsed_scores) +- result.add_field("labels", nmsed_classes) +- result = result.clip_to_image(remove_empty=False) ++ result_boxes = bboxes[:k] ++ result_boxes = torch.clamp(result_boxes, 0, 1344) ++ result_scores = scores[:k] ++ result_labels = labels + 1 + +- result.bbox = result.bbox.to(device) +- result.add_field('labels', result.get_field('labels').to(device)) +- result.add_field('scores', result.get_field('scores').to(device)) +- results.append(result) ++ results.append([result_boxes, result_labels, result_scores]) + + return results + diff --git a/ACL_PyTorch/contrib/cv/detection/RetinaMask/requirements.txt b/ACL_PyTorch/contrib/cv/detection/RetinaMask/requirements.txt new file mode 100644 index 0000000000..78767bdca3 --- /dev/null +++ b/ACL_PyTorch/contrib/cv/detection/RetinaMask/requirements.txt @@ -0,0 +1,9 @@ +matplotlib==3.5.2 +onnx==1.8.1 +onnx-simplifier==0.3.9 +opencv-python==4.5.5.64 +pycocotools==2.0 +torch==1.6.0 +torchvision==0.7.0 +tqdm==4.64.0 +yacs==0.1.8 diff --git a/ACL_PyTorch/contrib/cv/detection/RetinaMask/test/env_310.sh b/ACL_PyTorch/contrib/cv/detection/RetinaMask/test/env_310.sh new file mode 100644 index 0000000000..bc4f4e945d --- /dev/null +++ b/ACL_PyTorch/contrib/cv/detection/RetinaMask/test/env_310.sh @@ -0,0 +1,6 @@ +export install_path=/usr/local/Ascend/ascend-toolkit/latest +export PATH=/usr/local/python3.7.5/bin:${install_path}/atc/ccec_compiler/bin:${install_path}/atc/bin:$PATH +export PYTHONPATH=${install_path}/atc/python/site-packages:$PYTHONPATH +export LD_LIBRARY_PATH=${install_path}/atc/lib64:${install_path}/acllib/lib64:$LD_LIBRARY_PATH +export ASCEND_OPP_PATH=${install_path}/opp +export ASCEND_AICPU_PATH=/usr/local/Ascend/ascend-toolkit/latest \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/RetinaMask/test/infer_om_310.sh b/ACL_PyTorch/contrib/cv/detection/RetinaMask/test/infer_om_310.sh new file mode 100644 index 0000000000..ca2fca9a06 --- /dev/null +++ b/ACL_PyTorch/contrib/cv/detection/RetinaMask/test/infer_om_310.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +for para in $* +do + if [[ $para == --device_id* ]];then + device_id=`echo ${para#*=}` + fi + if [[ $para == --batch_size* ]];then + batch_size=`echo ${para#*=}` + fi + if [[ $para == --input_shape* ]];then + input_shape=`echo ${para#*=}` + fi + if [[ $para == --om_path* ]];then + om_path=`echo ${para#*=}` + fi + if [[ $para == --input_text_path* ]];then + input_text_path=`echo ${para#*=}` + fi + if [[ $para == --coco_path* ]];then + coco_path=`echo ${para#*=}` + fi + if [[ $para == --result_path* ]];then + result_path=`echo ${para#*=}` + fi +done + +./benchmark.x86_64 -model_type=vision -device_id=${device_id} -batch_size=${batch_size} -om_path=${om_path} -input_text_path=${input_text_path} -input_width=${input_shape} -input_height=${input_shape} -output_binary=True -useDvpp=False + +python ./tools/RetinaMask_postprocess.py --input_text_path=${input_text_path} --device=${device_id} --coco_path=${coco_path} --result_path=${result_path} \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/RetinaMask/test/infer_onnx_T4.sh b/ACL_PyTorch/contrib/cv/detection/RetinaMask/test/infer_onnx_T4.sh new file mode 100644 index 0000000000..eac395790c --- /dev/null +++ b/ACL_PyTorch/contrib/cv/detection/RetinaMask/test/infer_onnx_T4.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +for para in $* +do + if [[ $para == --weight_path* ]];then + weight_path=`echo ${para#*=}` + fi + if [[ $para == --coco_path* ]];then + coco_path=`echo ${para#*=}` + fi +done + +python ./tools/RetinaMask_eval_onnx.py --weight_path=${weight_path} --coco_path=${coco_path} \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/RetinaMask/test/onnx2om.sh b/ACL_PyTorch/contrib/cv/detection/RetinaMask/test/onnx2om.sh new file mode 100644 index 0000000000..bab26e9bdf --- /dev/null +++ b/ACL_PyTorch/contrib/cv/detection/RetinaMask/test/onnx2om.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +for para in $* +do + if [[ $para == --onnx_path* ]];then + onnx_path=`echo ${para#*=}` + fi + if [[ $para == --om_path* ]];then + om_path=`echo ${para#*=}` + fi + if [[ $para == --input_shape* ]];then + input_shape=`echo ${para#*=}` + fi +done + +atc --framework=5 --model=${onnx_path} --output=${om_path} --input_format=NCHW --input_shape=${input_shape} --log=debug --soc_version=Ascend310 diff --git a/ACL_PyTorch/contrib/cv/detection/RetinaMask/test/postprocess.sh b/ACL_PyTorch/contrib/cv/detection/RetinaMask/test/postprocess.sh new file mode 100644 index 0000000000..176b679de9 --- /dev/null +++ b/ACL_PyTorch/contrib/cv/detection/RetinaMask/test/postprocess.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +for para in $* +do + if [[ $para == --device_id* ]];then + device_id=`echo ${para#*=}` + fi + if [[ $para == --input_text_path* ]];then + input_text_path=`echo ${para#*=}` + fi + if [[ $para == --coco_path* ]];then + coco_path=`echo ${para#*=}` + fi + if [[ $para == --result_path* ]];then + result_path=`echo ${para#*=}` + fi +done + +python ./tools/RetinaMask_postprocess.py --input_text_path=${input_text_path} --device=${device_id} --coco_path=${coco_path} --result_path=${result_path} \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/RetinaMask/test/preprocess.sh b/ACL_PyTorch/contrib/cv/detection/RetinaMask/test/preprocess.sh new file mode 100644 index 0000000000..d8b76ec1c4 --- /dev/null +++ b/ACL_PyTorch/contrib/cv/detection/RetinaMask/test/preprocess.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +for para in $* +do + if [[ $para == --image_src_path* ]];then + image_src_path=`echo ${para#*=}` + fi + if [[ $para == --bin_file_path* ]];then + bin_file_path=`echo ${para#*=}` + fi + if [[ $para == --bin_info_name* ]];then + bin_info_name=`echo ${para#*=}` + fi +done + +python ./tools/RetinaMask_preprocess.py --image_src_path=${image_src_path} --bin_file_path=${bin_file_path} --bin_info_name=${bin_info_name} \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/RetinaMask/test/pth2onnx.sh b/ACL_PyTorch/contrib/cv/detection/RetinaMask/test/pth2onnx.sh new file mode 100644 index 0000000000..0ad61f93e9 --- /dev/null +++ b/ACL_PyTorch/contrib/cv/detection/RetinaMask/test/pth2onnx.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +for para in $* +do + if [[ $para == --cfg_path* ]];then + cfg_path=`echo ${para#*=}` + fi + if [[ $para == --pth_path* ]];then + pth_path=`echo ${para#*=}` + fi + if [[ $para == --onnx_path* ]];then + onnx_path=`echo ${para#*=}` + fi + if [[ $para == --save_dir* ]];then + save_dir=`echo ${para#*=}` + fi +done + +python ./tools/RetinaMask_pth2onnx.py --cfg_path=${cfg_path} --weight_path=${pth_path} --save_dir=${save_dir} --simplify=True + +# cast node Concat_742 into FLOAT16 +python ./tools/cast_onnx.py --weight_path=${onnx_path} --save_dir=${save_dir} \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/RetinaMask/tools/RetinaMask_eval_onnx.py b/ACL_PyTorch/contrib/cv/detection/RetinaMask/tools/RetinaMask_eval_onnx.py new file mode 100644 index 0000000000..19b690262d --- /dev/null +++ b/ACL_PyTorch/contrib/cv/detection/RetinaMask/tools/RetinaMask_eval_onnx.py @@ -0,0 +1,92 @@ +import os +import sys +import time +import argparse +import numpy as np +import onnxruntime as ort +import pycocotools.coco as coco +import pycocotools.mask as mask_util + +cur_path = os.path.abspath(os.path.dirname(__file__)) +sys.path.insert(0, cur_path + "/..") + +from tqdm import tqdm +from PIL import Image +from tools.utils import build_transforms, np_batched_nms, Masker, post_process, \ + convert_to_coco_format, evaluate_prediction + +if __name__ == '__main__': + # load configs + parser = argparse.ArgumentParser() + parser.add_argument("--weight_path", type=str, + default="../weights/npu_8P_model_0020001_bs1_sim.onnx") + parser.add_argument("--coco_path", type=str, + default=r"/opt/npu/coco") + + args = parser.parse_args() + + fix_shape = 1344 + trans = build_transforms(fix_shape) + # load model + onnx_session = ort.InferenceSession(args.weight_path, providers=['TensorrtExecutionProvider', + 'CUDAExecutionProvider']) + masker = Masker(threshold=0.5, padding=1) + + # load images + coco_dataset = coco.COCO(os.path.join(args.coco_path, 'annotations', 'instances_val2017.json')) + imgs_path = os.path.join(args.coco_path, 'val2017') + image_files = [] + for main_dir, sub_dir, files in os.walk(imgs_path): + for file in files: + if file.split('.')[-1] in ['jpg', 'png', 'bmp']: + image_files.append(os.path.join(main_dir, file)) + image_files.sort() + + # run + cost_time = [] + data_list = [] + for image_file in tqdm(image_files): + image_file = image_file.replace('\\', '/') + image_id = int(image_file.split('/')[-1].split('.')[0]) + + img = Image.open(image_file).convert('RGB') + ori_w, ori_h = img.size + long_side = ori_w if ori_w > ori_h else ori_h + ratio = fix_shape / long_side + dummy_input = trans(img) + + # onnx process + onnx_inputs = {onnx_session.get_inputs()[0].name: dummy_input.astype(np.float32)} + start_time = time.time() + onnx_output = onnx_session.run(None, onnx_inputs) + end_time = time.time() + + cost_time.append(end_time - start_time) + detections = onnx_output[:3] + masks = onnx_output[-1] + + # nms + bboxes, scores, labels = detections[0], detections[2], detections[1] + bboxes, labels, scores, masks = np_batched_nms(bboxes, scores, labels, masks, iou_threshold=0.4) + + # post + bboxes, labels, scores, masks = post_process(ori_h, ori_w, ratio, bboxes, labels, scores, masks) + masks = masker(masks, bboxes, ori_h, ori_w) + + rles = [ + mask_util.encode(np.array(mask[0, :, :, np.newaxis], order="F").astype('uint8'))[0] + for mask in masks + ] + for rle in rles: + rle["counts"] = rle["counts"].decode("utf-8") + + data_list.extend(convert_to_coco_format(coco_dataset, image_id, bboxes, labels, scores, rles)) + + mean_cost_time = np.array(cost_time).mean() + fps = 1 / mean_cost_time + print('FPS: %.4f ' % fps) + + bbox_info, segm_info = evaluate_prediction(coco_dataset, data_list) + + print('bbox_info: \n', bbox_info) + print('segm_info: \n', segm_info) diff --git a/ACL_PyTorch/contrib/cv/detection/RetinaMask/tools/RetinaMask_postprocess.py b/ACL_PyTorch/contrib/cv/detection/RetinaMask/tools/RetinaMask_postprocess.py new file mode 100644 index 0000000000..501b4ebc15 --- /dev/null +++ b/ACL_PyTorch/contrib/cv/detection/RetinaMask/tools/RetinaMask_postprocess.py @@ -0,0 +1,78 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License + +import os +import sys +import argparse +import numpy as np +import pycocotools.coco as coco +import pycocotools.mask as mask_util + +cur_path = os.path.abspath(os.path.dirname(__file__)) +sys.path.insert(0, cur_path + "/..") + +from tqdm import tqdm +from PIL import Image +from tools.utils import np_batched_nms, post_process, convert_to_coco_format, evaluate_prediction, Masker + + +def display_results(info_file, results_path, coco_path, fix_shape): + with open(info_file, 'r') as f: + infos = f.readlines() + masker = Masker(threshold=0.5, padding=1) + coco_dataset = coco.COCO(os.path.join(coco_path, 'annotations', 'instances_val2017.json')) + data_list = [] + for info in tqdm(infos): + name = info.split(' ')[1].split('/')[-1].split('.')[0] + image_id = int(name) + image_file = os.path.join(coco_path, 'val2017', name + '.jpg') + img = Image.open(image_file).convert('RGB') + ori_w, ori_h = img.size + long_side = ori_w if ori_w > ori_h else ori_h + ratio = fix_shape / long_side + + bboxes_file, labels_file, scores_file, masks_file = \ + name + '_1.bin', name + '_2.bin', name + '_3.bin', name + '_4.bin' + bboxes = np.fromfile(os.path.join(results_path, bboxes_file), dtype=np.float32).reshape(-1, 4) + labels = np.fromfile(os.path.join(results_path, labels_file), dtype=np.int32) + scores = np.fromfile(os.path.join(results_path, scores_file), dtype=np.float32) + masks = np.fromfile(os.path.join(results_path, masks_file), dtype=np.float32).reshape(-1, 1, 28, 28) + + bboxes, labels, scores, masks = np_batched_nms(bboxes, scores, labels, masks, iou_threshold=0.4) + bboxes, labels, scores, masks = post_process(ori_h, ori_w, ratio, bboxes, labels, scores, masks) + + masks = masker(masks, bboxes, ori_h, ori_w) + rles = [mask_util.encode(np.array(mask[0, :, :, np.newaxis], order="F").astype('uint8'))[0] for mask in masks] + for rle in rles: + rle["counts"] = rle["counts"].decode("utf-8") + + data_list.extend(convert_to_coco_format(coco_dataset, image_id, bboxes, labels, scores, rles)) + + bbox_info, segm_info = evaluate_prediction(coco_dataset, data_list) + print('bbox_info: \n', bbox_info) + print('segm_info: \n', segm_info) + + +if __name__ == '__main__': + # cur_path = os.path.abspath(os.path.dirname(__file__)) + parser = argparse.ArgumentParser() + parser.add_argument("--input_text_path", type=str, default="../retinamask_coco2017.info") + parser.add_argument("--device", type=str, default="3") + parser.add_argument("--fix_shape", type=int, default=1344) + parser.add_argument("--coco_path", type=str, default="/opt/npu/coco") + parser.add_argument("--result_path", type=str, default="./result") + + args = parser.parse_args() + results_path = os.path.join(args.result_path, 'dumpOutput_device' + args.device) + display_results(args.input_text_path, results_path, args.coco_path, args.fix_shape) diff --git a/ACL_PyTorch/contrib/cv/detection/RetinaMask/tools/RetinaMask_preprocess.py b/ACL_PyTorch/contrib/cv/detection/RetinaMask/tools/RetinaMask_preprocess.py new file mode 100644 index 0000000000..a6912d801c --- /dev/null +++ b/ACL_PyTorch/contrib/cv/detection/RetinaMask/tools/RetinaMask_preprocess.py @@ -0,0 +1,75 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import argparse +import multiprocessing + +cur_path = os.path.abspath(os.path.dirname(__file__)) +sys.path.insert(0, cur_path + "/..") + +from glob import glob +from PIL import Image +from tools.utils import build_transforms + +fix_shape = 1344 +trans = build_transforms(fix_shape) + + +def gen_input_bin(file_batches, batch): + i = 0 + for file in file_batches[batch]: + i = i + 1 + print("batch", batch, file, "===", i) + + image = Image.open(os.path.join(flags.image_src_path, file)).convert('RGB') + dummy_input = trans(image) + + dummy_input.tofile(os.path.join(flags.bin_file_path, file.split('.')[0] + ".bin")) + + +def preprocess(src_path): + files = os.listdir(src_path) + file_batches = [files[i:i + 100] for i in range(0, 5000, 100) if files[i:i + 100] != []] + thread_pool = multiprocessing.Pool(len(file_batches)) + for batch in range(len(file_batches)): + thread_pool.apply_async(gen_input_bin, args=(file_batches, batch)) + thread_pool.close() + thread_pool.join() + print("in thread, except will not report! please ensure bin files generated.") + + +def get_bin_info(file_path, info_name, width, height): + bin_images = glob(os.path.join(file_path, '*.bin')) + with open(info_name, 'w') as file: + for index, img in enumerate(bin_images): + content = ' '.join([str(index), img, width, height]) + file.write(content) + file.write('\n') + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='preprocess of MaskRCNN PyTorch model') + parser.add_argument("--image_src_path", default="/opt/npu/coco/val2017/", help='image of dataset') + parser.add_argument("--bin_file_path", default="./bins/", help='Preprocessed image buffer') + parser.add_argument("--bin_info_name", default="retinamask_coco2017.info") + parser.add_argument("--input_size", default='1344', type=str, help='input tensor size') + flags = parser.parse_args() + if not os.path.exists(flags.bin_file_path): + os.makedirs(flags.bin_file_path) + preprocess(flags.image_src_path) + + # gen bins_info + get_bin_info(flags.bin_file_path, flags.bin_info_name, flags.input_size, flags.input_size) diff --git a/ACL_PyTorch/contrib/cv/detection/RetinaMask/tools/RetinaMask_pth2onnx.py b/ACL_PyTorch/contrib/cv/detection/RetinaMask/tools/RetinaMask_pth2onnx.py new file mode 100644 index 0000000000..15ca4ef606 --- /dev/null +++ b/ACL_PyTorch/contrib/cv/detection/RetinaMask/tools/RetinaMask_pth2onnx.py @@ -0,0 +1,75 @@ +import os +import sys +import onnx +import torch +import argparse +import warnings + +cur_path = os.path.abspath(os.path.dirname(__file__)) +sys.path.insert(0, cur_path + "/..") +warnings.filterwarnings("ignore") + +from onnxsim import simplify +from collections import OrderedDict +from maskrcnn_benchmark.config import cfg +from maskrcnn_benchmark.modeling.detector import build_detection_model + + +def pth2onnx(): + parser = argparse.ArgumentParser() + parser.add_argument("--cfg_path", type=str, + default="../configs/retina/retinanet_mask_R-50-FPN_2x_adjust_std011_ms.yaml") + parser.add_argument("--weight_path", type=str, default="../npu_8P_model_0020001.pth") + parser.add_argument("--save_dir", type=str, default="../weights") + parser.add_argument("--batch_size", type=int, default=1) + parser.add_argument("--simplify", type=bool, default=True) + + args = parser.parse_args() + cfg.merge_from_file(args.cfg_path) + cfg.freeze() + + if not os.path.exists(args.save_dir): + os.makedirs(args.save_dir) + name = args.weight_path.split('/')[-1].split('.')[0] + onnx_file = os.path.join(args.save_dir, name + '_' + 'bs' + str(args.batch_size) + '.onnx') + + device = torch.device('cpu') + model = build_detection_model(cfg) + model = model.to(device) + ckpt = torch.load(args.weight_path, map_location=device) + checkpoints = ckpt['model'] + new_checkpoints = OrderedDict() + for k, v in checkpoints.items(): + if k.startswith('module'): + k = k[7:] + new_checkpoints[k] = v + model.load_state_dict(new_checkpoints) + model.eval() + + dummy_input = torch.randn(args.batch_size, 3, 1344, 1344, dtype=torch.float32) + # r = model(dummy_input) + + input_names = ["input"] + output_names = ["bboxs", "labels", "scores", "masks"] + + torch.onnx.export(model, + dummy_input, + onnx_file, + input_names=input_names, + output_names=output_names, + opset_version=11, + verbose=True, + enable_onnx_checker=True) + print("************* Convert to ONNX model file SUCCESS! *************") + + if args.simplify: + sim_path = os.path.join(args.save_dir, name + '_' + 'bs' + str(args.batch_size) + '_sim.onnx') + onnx_model = onnx.load(onnx_file) + onnx_sim_model, check = simplify(onnx_model, check_n=1) + assert check, "Simplified ONNX model could not be validated" + onnx.save(onnx_sim_model, sim_path) + print('ONNX file simplified!') + + +if __name__ == '__main__': + pth2onnx() diff --git a/ACL_PyTorch/contrib/cv/detection/RetinaMask/tools/cast_onnx.py b/ACL_PyTorch/contrib/cv/detection/RetinaMask/tools/cast_onnx.py new file mode 100644 index 0000000000..f8600f0663 --- /dev/null +++ b/ACL_PyTorch/contrib/cv/detection/RetinaMask/tools/cast_onnx.py @@ -0,0 +1,44 @@ +import os +import onnx +import argparse + +if __name__ == '__main__': + + parser = argparse.ArgumentParser() + parser.add_argument("--weight_path", type=str, default="../weights/npu_8P_model_0020001_bs1_sim.onnx") + parser.add_argument("--save_dir", type=str, default="../weights") + args = parser.parse_args() + + onnx_model = onnx.load(args.weight_path) + graph = onnx_model.graph + node = graph.node + # search Concat_742 node id + for i in range(len(node)): + if node[i].name == 'Concat_742': + node_rise = node[i] + print(node_rise) + print(i) + # add new node, cast to float16 + new_scale_node = onnx.helper.make_node( + "Cast", + inputs=["bboxs"], + outputs=['Cast_bboxs'], + name="Cast_bboxs", + to=getattr(onnx.TensorProto, "FLOAT16") + ) + + new_scale_node2 = onnx.helper.make_node( + "Cast", + inputs=["1702"], + outputs=['Cast_742'], + name="Cast_742", + to=getattr(onnx.TensorProto, "FLOAT16") + ) + + graph.node.insert(400, new_scale_node) + graph.node.insert(400, new_scale_node2) + node[402].input[0] = "Cast_742" + node[402].input[1] = "Cast_bboxs" + + file_name = args.weight_path.split('/')[-1].split('.')[0] + onnx.save(onnx_model, os.path.join(args.save_dir, file_name + '_cast.onnx')) diff --git a/ACL_PyTorch/contrib/cv/detection/RetinaMask/tools/utils.py b/ACL_PyTorch/contrib/cv/detection/RetinaMask/tools/utils.py new file mode 100644 index 0000000000..d52f08e615 --- /dev/null +++ b/ACL_PyTorch/contrib/cv/detection/RetinaMask/tools/utils.py @@ -0,0 +1,400 @@ +import io +import cv2 +import json +import tempfile +import contextlib +import numpy as np + +from PIL import Image +from pycocotools.cocoeval import COCOeval + + +class Compose(object): + def __init__(self, transforms): + self.transforms = transforms + + def __call__(self, image): + for t in self.transforms: + image = t(image) + + return image + + def __repr__(self): + format_string = self.__class__.__name__ + "(" + for t in self.transforms: + format_string += "\n" + format_string += " {0}".format(t) + format_string += "\n)" + + return format_string + + +class Resize(object): + def __init__(self, fix_shape): + self.fix_shape = fix_shape + + # modified from torchvision to add support for max size + def get_size(self, image_size): + w, h = image_size + long_side = w if w > h else h + ratio = self.fix_shape / long_side + oh = min(int(h * ratio), self.fix_shape) + ow = min(int(ratio * w), self.fix_shape) + + return (ow, oh) + + def __call__(self, image): + size = self.get_size(image.size) + image = image.resize(size, Image.BILINEAR) + return image + + +class Normalize(object): + def __init__(self, mean, std): + self.mean = mean + self.std = std + + def __call__(self, image): + image = np.float32(image) + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + image -= self.mean + image /= self.std + + return image + + +class ImgPad(object): + def __init__(self, fix_shape): + self.fix_shape = fix_shape + + def _pad(self, image): + w, h, c = image.shape + image_t = np.zeros((self.fix_shape, self.fix_shape, c), dtype=np.float32) + image_t[0:w, 0:h, :] = image + image_t = image_t.transpose(2, 0, 1)[None] + + return image_t + + def __call__(self, image): + image = self._pad(image) + + return image + + +def build_transforms(fix_shape): + resize = Resize(fix_shape) + normalize_transform = Normalize(mean=[102.9801, 115.9465, 122.7717], std=[1., 1., 1.]) + pad = ImgPad(fix_shape) + + transform = Compose([resize, normalize_transform, pad]) + + return transform + + +def expand_boxes(boxes, scale): + w_half = (boxes[:, 2] - boxes[:, 0]) * .5 + h_half = (boxes[:, 3] - boxes[:, 1]) * .5 + x_c = (boxes[:, 2] + boxes[:, 0]) * .5 + y_c = (boxes[:, 3] + boxes[:, 1]) * .5 + + w_half *= scale + h_half *= scale + + boxes_exp = np.zeros_like(boxes) + boxes_exp[:, 0] = x_c - w_half + boxes_exp[:, 2] = x_c + w_half + boxes_exp[:, 1] = y_c - h_half + boxes_exp[:, 3] = y_c + h_half + return boxes_exp + + +def expand_masks(mask, padding): + N = mask.shape[0] + M = mask.shape[-1] + pad2 = 2 * padding + scale = float(M + pad2) / M + padded_mask = np.zeros((N, 1, M + pad2, M + pad2), dtype=np.float32) + padded_mask[:, :, padding:-padding, padding:-padding] = mask + return padded_mask, scale + + +def paste_mask_in_image(mask, box, im_h, im_w, thresh=0.5, padding=1): + padded_mask, scale = expand_masks(mask[None], padding=padding) + mask = padded_mask[0, 0] + box = expand_boxes(box[None], scale)[0] + + TO_REMOVE = 1 + w = box[2] - box[0] + TO_REMOVE + h = box[3] - box[1] + TO_REMOVE + w = round(max(w, 1)) + h = round(max(h, 1)) + + mask = cv2.resize(mask, (w, h)) + + if thresh >= 0: + mask = np.array(mask > thresh, dtype=np.uint8) + else: + # for visualization and debugging, we also + # allow it to return an unmodified mask + mask = (mask * 255).astype(np.uint8) + + box = [round(x) for x in box] + + im_mask = np.zeros((im_h, im_w), dtype=np.uint8) + x_0 = max(box[0], 0) + x_1 = min(box[2], im_w) + y_0 = max(box[1], 0) + y_1 = min(box[3], im_h) + + im_mask[y_0:y_1, x_0:x_1] = mask[(y_0 - box[1]): (y_1 - box[1]), (x_0 - box[0]): (x_1 - box[0])] + return im_mask + + +class Masker(object): + """ + Projects a set of masks in an image on the locations + specified by the bounding boxes + """ + + def __init__(self, threshold=0.5, padding=1): + self.threshold = threshold + self.padding = padding + + def forward_single_image(self, masks, boxes, h, w): + + im_w, im_h = w, h + res = [ + paste_mask_in_image(mask[0], box, im_h, im_w, self.threshold, self.padding) + for mask, box in zip(masks, boxes) + ] + if len(res) > 0: + res = np.stack(res, axis=0)[:, None] + else: + res = np.zeros((0, 1, masks.shape[-2], masks.shape[-1])) + return res + + def __call__(self, masks, boxes, h, w): + + result = self.forward_single_image(masks, boxes, h, w) + return result + + +def np_nms(boxes, scores, thresh): + x1 = boxes[:, 0] + y1 = boxes[:, 1] + x2 = boxes[:, 2] + y2 = boxes[:, 3] + + areas = (x2 - x1 + 1) * (y2 - y1 + 1) + order = np.argsort(scores)[::-1] + + keep = [] + while len(order) > 0: + i = order[0] + keep.append(i) + xx1 = np.maximum(x1[i], x1[order[1:]]) + yy1 = np.maximum(y1[i], y1[order[1:]]) + xx2 = np.minimum(x2[i], x2[order[1:]]) + yy2 = np.minimum(y2[i], y2[order[1:]]) + + w = np.maximum(0.0, xx2 - xx1 + 1) + h = np.maximum(0.0, yy2 - yy1 + 1) + inter = w * h + ovr = inter / (areas[i] + areas[order[1:]] - inter) + + inds = np.where(ovr <= thresh)[0] + order = order[inds + 1] + + res_keep = np.array(keep) + + return res_keep + + +def np_batched_nms(boxes, scores, labels, masks, iou_threshold=0.4, keep_num=100): + if not boxes.any(): + return np.array((0,), dtype=np.int64) + + max_coordinate = boxes.max() + + offsets = np.float32(labels) * (max_coordinate + 1) + boxes_for_nms = boxes + offsets[:, None] + keep = np_nms(boxes_for_nms, scores, iou_threshold) + + if len(keep) >= keep_num: + bboxes = boxes[keep][:keep_num] + labels = labels[keep][:keep_num] + scores = scores[keep][:keep_num] + masks = masks[keep][:keep_num] + else: + diff_num = keep_num - len(keep) + bboxes = np.concatenate([boxes[keep], np.zeros((diff_num, 4))], axis=0) + labels = np.concatenate([labels[keep], np.ones(diff_num, dtype=np.int64) * -1], axis=0) + scores = np.concatenate([scores[keep], np.zeros(diff_num, )], axis=0) + masks = np.concatenate([masks[keep], np.zeros((diff_num, 1, 28, 28))], axis=0) + + return bboxes, labels, scores, masks + + +def post_process(ori_h, ori_w, ratio, bboxes, labels, scores, masks=None): + bboxes /= ratio + bboxes[:, 0::2] = np.clip(bboxes[:, 0::2], 0, ori_w) + bboxes[:, 1::2] = np.clip(bboxes[:, 1::2], 0, ori_h) + + return bboxes, labels, scores, masks + + +def convert_to_coco_format(coco_dataset, image_id, bboxes, labels, scores, rles): + bboxes[:, 2] = bboxes[:, 2] - bboxes[:, 0] + bboxes[:, 3] = bboxes[:, 3] - bboxes[:, 1] + + data_list = [] + for i in range(len(labels)): + label = labels[i] + if label < 0: + continue + category_id = coco_dataset.dataset['categories'][label - 1]['id'] + pred_data = { + "image_id": int(image_id), + "category_id": category_id, + "bbox": bboxes[i].tolist(), + "score": float(scores[i]), + "segmentation": rles[i], + } # COCO json format + data_list.append(pred_data) + + return data_list + + +def evaluate_prediction(coco_dataset, data_dict): + annType = ["segm", "bbox", "keypoints"] + + # Evaluate the Dt (detection) json comparing with the ground truth + if len(data_dict) > 0: + cocoGt = coco_dataset + + _, tmp = tempfile.mkstemp() + json.dump(data_dict, open(tmp, "w")) + cocoDt = cocoGt.loadRes(tmp) + + coco_bbox_eval = COCOeval(cocoGt, cocoDt, annType[1]) + coco_bbox_eval.evaluate() + coco_bbox_eval.accumulate() + redirect_string_1 = io.StringIO() + with contextlib.redirect_stdout(redirect_string_1): + coco_bbox_eval.summarize() + bbox_info = redirect_string_1.getvalue() + + coco_segm_eval = COCOeval(cocoGt, cocoDt, annType[0]) + coco_segm_eval.evaluate() + coco_segm_eval.accumulate() + redirect_string_2 = io.StringIO() + with contextlib.redirect_stdout(redirect_string_2): + coco_segm_eval.summarize() + segm_info = redirect_string_2.getvalue() + + return bbox_info, segm_info + + +def draw_bbox_segm(image, bboxes, labels, scores, masks=None): + img = image.copy() + for i in range(len(labels)): + box = bboxes[i] + label = labels[i] + class_label = label_categories[label-1]['name'] + score = scores[i] + if masks is not None: + mask = masks[i][0] + if score > 0.5: + box = list(map(int, box)) + x1, y1, x2, y2 = box + cv2.rectangle(img, (x1, y1), (x2, y2), (255, 0, 0), thickness=2) + cv2.putText(img, f'{class_label}_{score:.3f}', (box[0], box[1] + 30), + cv2.FONT_HERSHEY_SIMPLEX, 0.75, (255, 0, 0), 2) + if masks is not None: + mask = (mask > 0.1).astype(np.uint8) * 255 + mask = cv2.resize(mask, (x2 - x1, y2 - y1), interpolation=cv2.INTER_CUBIC) + img[y1:y2, x1:x2, 0] = img[y1:y2, x1:x2, 0] * 0.3 + mask * 0.7 + + return img + + +label_categories = [{'supercategory': 'person', 'id': 1, 'name': 'person'}, + {'supercategory': 'vehicle', 'id': 2, 'name': 'bicycle'}, + {'supercategory': 'vehicle', 'id': 3, 'name': 'car'}, + {'supercategory': 'vehicle', 'id': 4, 'name': 'motorcycle'}, + {'supercategory': 'vehicle', 'id': 5, 'name': 'airplane'}, + {'supercategory': 'vehicle', 'id': 6, 'name': 'bus'}, + {'supercategory': 'vehicle', 'id': 7, 'name': 'train'}, + {'supercategory': 'vehicle', 'id': 8, 'name': 'truck'}, + {'supercategory': 'vehicle', 'id': 9, 'name': 'boat'}, + {'supercategory': 'outdoor', 'id': 10, 'name': 'traffic light'}, + {'supercategory': 'outdoor', 'id': 11, 'name': 'fire hydrant'}, + {'supercategory': 'outdoor', 'id': 13, 'name': 'stop sign'}, + {'supercategory': 'outdoor', 'id': 14, 'name': 'parking meter'}, + {'supercategory': 'outdoor', 'id': 15, 'name': 'bench'}, + {'supercategory': 'animal', 'id': 16, 'name': 'bird'}, + {'supercategory': 'animal', 'id': 17, 'name': 'cat'}, + {'supercategory': 'animal', 'id': 18, 'name': 'dog'}, + {'supercategory': 'animal', 'id': 19, 'name': 'horse'}, + {'supercategory': 'animal', 'id': 20, 'name': 'sheep'}, + {'supercategory': 'animal', 'id': 21, 'name': 'cow'}, + {'supercategory': 'animal', 'id': 22, 'name': 'elephant'}, + {'supercategory': 'animal', 'id': 23, 'name': 'bear'}, + {'supercategory': 'animal', 'id': 24, 'name': 'zebra'}, + {'supercategory': 'animal', 'id': 25, 'name': 'giraffe'}, + {'supercategory': 'accessory', 'id': 27, 'name': 'backpack'}, + {'supercategory': 'accessory', 'id': 28, 'name': 'umbrella'}, + {'supercategory': 'accessory', 'id': 31, 'name': 'handbag'}, + {'supercategory': 'accessory', 'id': 32, 'name': 'tie'}, + {'supercategory': 'accessory', 'id': 33, 'name': 'suitcase'}, + {'supercategory': 'sports', 'id': 34, 'name': 'frisbee'}, + {'supercategory': 'sports', 'id': 35, 'name': 'skis'}, + {'supercategory': 'sports', 'id': 36, 'name': 'snowboard'}, + {'supercategory': 'sports', 'id': 37, 'name': 'sports ball'}, + {'supercategory': 'sports', 'id': 38, 'name': 'kite'}, + {'supercategory': 'sports', 'id': 39, 'name': 'baseball bat'}, + {'supercategory': 'sports', 'id': 40, 'name': 'baseball glove'}, + {'supercategory': 'sports', 'id': 41, 'name': 'skateboard'}, + {'supercategory': 'sports', 'id': 42, 'name': 'surfboard'}, + {'supercategory': 'sports', 'id': 43, 'name': 'tennis racket'}, + {'supercategory': 'kitchen', 'id': 44, 'name': 'bottle'}, + {'supercategory': 'kitchen', 'id': 46, 'name': 'wine glass'}, + {'supercategory': 'kitchen', 'id': 47, 'name': 'cup'}, + {'supercategory': 'kitchen', 'id': 48, 'name': 'fork'}, + {'supercategory': 'kitchen', 'id': 49, 'name': 'knife'}, + {'supercategory': 'kitchen', 'id': 50, 'name': 'spoon'}, + {'supercategory': 'kitchen', 'id': 51, 'name': 'bowl'}, + {'supercategory': 'food', 'id': 52, 'name': 'banana'}, + {'supercategory': 'food', 'id': 53, 'name': 'apple'}, + {'supercategory': 'food', 'id': 54, 'name': 'sandwich'}, + {'supercategory': 'food', 'id': 55, 'name': 'orange'}, + {'supercategory': 'food', 'id': 56, 'name': 'broccoli'}, + {'supercategory': 'food', 'id': 57, 'name': 'carrot'}, + {'supercategory': 'food', 'id': 58, 'name': 'hot dog'}, + {'supercategory': 'food', 'id': 59, 'name': 'pizza'}, + {'supercategory': 'food', 'id': 60, 'name': 'donut'}, + {'supercategory': 'food', 'id': 61, 'name': 'cake'}, + {'supercategory': 'furniture', 'id': 62, 'name': 'chair'}, + {'supercategory': 'furniture', 'id': 63, 'name': 'couch'}, + {'supercategory': 'furniture', 'id': 64, 'name': 'potted plant'}, + {'supercategory': 'furniture', 'id': 65, 'name': 'bed'}, + {'supercategory': 'furniture', 'id': 67, 'name': 'dining table'}, + {'supercategory': 'furniture', 'id': 70, 'name': 'toilet'}, + {'supercategory': 'electronic', 'id': 72, 'name': 'tv'}, + {'supercategory': 'electronic', 'id': 73, 'name': 'laptop'}, + {'supercategory': 'electronic', 'id': 74, 'name': 'mouse'}, + {'supercategory': 'electronic', 'id': 75, 'name': 'remote'}, + {'supercategory': 'electronic', 'id': 76, 'name': 'keyboard'}, + {'supercategory': 'electronic', 'id': 77, 'name': 'cell phone'}, + {'supercategory': 'appliance', 'id': 78, 'name': 'microwave'}, + {'supercategory': 'appliance', 'id': 79, 'name': 'oven'}, + {'supercategory': 'appliance', 'id': 80, 'name': 'toaster'}, + {'supercategory': 'appliance', 'id': 81, 'name': 'sink'}, + {'supercategory': 'appliance', 'id': 82, 'name': 'refrigerator'}, + {'supercategory': 'indoor', 'id': 84, 'name': 'book'}, + {'supercategory': 'indoor', 'id': 85, 'name': 'clock'}, + {'supercategory': 'indoor', 'id': 86, 'name': 'vase'}, + {'supercategory': 'indoor', 'id': 87, 'name': 'scissors'}, + {'supercategory': 'indoor', 'id': 88, 'name': 'teddy bear'}, + {'supercategory': 'indoor', 'id': 89, 'name': 'hair drier'}, + {'supercategory': 'indoor', 'id': 90, 'name': 'toothbrush'}] -- Gitee From 18284d0707089ba33ef4334c9e5a93577e7d88bf Mon Sep 17 00:00:00 2001 From: Savion_G Date: Fri, 13 May 2022 10:44:46 +0000 Subject: [PATCH 20/20] =?UTF-8?q?=E9=87=8D=E5=91=BD=E5=90=8D=20ACL=5FPyTor?= =?UTF-8?q?ch/contrib/cv/detection/RetinaMask/README.md=20=E4=B8=BA=20ACL?= =?UTF-8?q?=5FPyTorch/contrib/cv/detection/RetinaMask/README=5Finfer.md?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../cv/detection/RetinaMask/{README.md => README_infer.md} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename ACL_PyTorch/contrib/cv/detection/RetinaMask/{README.md => README_infer.md} (100%) diff --git a/ACL_PyTorch/contrib/cv/detection/RetinaMask/README.md b/ACL_PyTorch/contrib/cv/detection/RetinaMask/README_infer.md similarity index 100% rename from ACL_PyTorch/contrib/cv/detection/RetinaMask/README.md rename to ACL_PyTorch/contrib/cv/detection/RetinaMask/README_infer.md -- Gitee